Changeset View
Changeset View
Standalone View
Standalone View
swh/deposit/tests/api/test_parser.py
# Copyright (C) 2018-2019 The Software Heritage developers | # Copyright (C) 2018-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from collections import OrderedDict | from collections import OrderedDict | ||||
import io | import io | ||||
from swh.deposit.parsers import SWHXMLParser | import pytest | ||||
from swh.deposit.parsers import SWHXMLParser, parse_swh_reference, parse_xml | |||||
from swh.deposit.utils import clean_swhid | |||||
from swh.model.identifiers import parse_swhid | |||||
def test_parsing_without_duplicates(): | def test_parsing_without_duplicates(): | ||||
xml_no_duplicate = io.BytesIO( | xml_no_duplicate = io.BytesIO( | ||||
b"""<?xml version="1.0"?> | b"""<?xml version="1.0"?> | ||||
<entry xmlns="http://www.w3.org/2005/Atom" | <entry xmlns="http://www.w3.org/2005/Atom" | ||||
xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"> | xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"> | ||||
<title>Awesome Compiler</title> | <title>Awesome Compiler</title> | ||||
▲ Show 20 Lines • Show All 104 Lines • ▼ Show 20 Lines | expected_dict = OrderedDict( | ||||
] | ] | ||||
), | ), | ||||
], | ], | ||||
), | ), | ||||
("codemeta:programmingLanguage", ["ocaml", "haskell", "python3"]), | ("codemeta:programmingLanguage", ["ocaml", "haskell", "python3"]), | ||||
] | ] | ||||
) | ) | ||||
assert expected_dict == actual_result | assert expected_dict == actual_result | ||||
@pytest.fixture | |||||
def xml_with_origin_reference(): | |||||
xml_data = """<?xml version="1.0"?> | |||||
<entry xmlns="http://www.w3.org/2005/Atom" | |||||
xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0" | |||||
xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit"> | |||||
<swh:deposit> | |||||
<swh:reference> | |||||
<swh:origin url="{url}"/> | |||||
</swh:reference> | |||||
</swh:deposit> | |||||
</entry> | |||||
""" | |||||
return xml_data.strip() | |||||
def test_parse_swh_reference_origin(xml_with_origin_reference): | |||||
url = "https://url" | |||||
xml_data = xml_with_origin_reference.format(url=url) | |||||
metadata = parse_xml(xml_data) | |||||
actual_origin = parse_swh_reference(metadata) | |||||
assert actual_origin == url | |||||
@pytest.fixture | |||||
def xml_with_empty_reference(): | |||||
xml_data = """<?xml version="1.0"?> | |||||
<entry xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit"> | |||||
<swh:deposit> | |||||
{swh_reference} | |||||
</swh:deposit> | |||||
</entry> | |||||
""" | |||||
return xml_data.strip() | |||||
@pytest.mark.parametrize( | |||||
"xml_ref", | |||||
[ | |||||
"", | |||||
"<swh:reference></swh:reference>", | |||||
"<swh:reference><swh:object /></swh:reference>", | |||||
"""<swh:reference><swh:object swhid="" /></swh:reference>""", | |||||
], | |||||
) | |||||
def test_parse_swh_reference_empty(xml_with_empty_reference, xml_ref): | |||||
xml_body = xml_with_empty_reference.format(swh_reference=xml_ref) | |||||
metadata = parse_xml(xml_body) | |||||
assert parse_swh_reference(metadata) is None | |||||
@pytest.fixture | |||||
def xml_with_swhid(): | |||||
xml_data = """<?xml version="1.0"?> | |||||
<entry xmlns="http://www.w3.org/2005/Atom" | |||||
xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0" | |||||
xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit"> | |||||
<swh:deposit> | |||||
<swh:reference> | |||||
<swh:object swhid="{swhid}" /> | |||||
</swh:reference> | |||||
</swh:deposit> | |||||
</entry> | |||||
""" | |||||
return xml_data.strip() | |||||
@pytest.mark.parametrize( | |||||
"swhid", | |||||
[ | |||||
"""swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49; | |||||
origin=https://hal.archives-ouvertes.fr/hal-01243573; | |||||
visit=swh:1:snp:4fc1e36fca86b2070204bedd51106014a614f321; | |||||
anchor=swh:1:rev:9c5de20cfb54682370a398fcc733e829903c8cba; | |||||
path=/moranegg-AffectationRO-df7f68b/""", | |||||
"swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49", | |||||
], | |||||
) | |||||
def test_parse_swh_reference_swhid(swhid, xml_with_swhid): | |||||
xml_data = xml_with_swhid.format(swhid=swhid) | |||||
metadata = parse_xml(xml_data) | |||||
actual_swhid = parse_swh_reference(metadata) | |||||
assert actual_swhid is not None | |||||
expected_swhid = parse_swhid(clean_swhid(swhid)) | |||||
assert actual_swhid == expected_swhid |