Changeset View
Changeset View
Standalone View
Standalone View
swh/deposit/tests/api/test_parsers.py
# Copyright (C) 2018-2020 The Software Heritage developers | # Copyright (C) 2018-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from collections import OrderedDict | |||||
import io | import io | ||||
from swh.deposit.parsers import SWHXMLParser | from swh.deposit.parsers import SWHXMLParser | ||||
from swh.deposit.utils import NAMESPACES | |||||
def test_parsing_without_duplicates(): | def test_parsing_without_duplicates(): | ||||
xml_no_duplicate = io.BytesIO( | xml_no_duplicate = io.BytesIO( | ||||
b"""<?xml version="1.0"?> | b"""<?xml version="1.0"?> | ||||
<entry xmlns="http://www.w3.org/2005/Atom" | <entry xmlns="http://www.w3.org/2005/Atom" | ||||
xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"> | xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"> | ||||
<title>Awesome Compiler</title> | <title>Awesome Compiler</title> | ||||
<codemeta:license> | <codemeta:license> | ||||
<codemeta:name>GPL3.0</codemeta:name> | <codemeta:name>GPL3.0</codemeta:name> | ||||
<codemeta:url>https://opensource.org/licenses/GPL-3.0</codemeta:url> | <codemeta:url>https://opensource.org/licenses/GPL-3.0</codemeta:url> | ||||
</codemeta:license> | </codemeta:license> | ||||
<codemeta:runtimePlatform>Python3</codemeta:runtimePlatform> | <codemeta:runtimePlatform>Python3</codemeta:runtimePlatform> | ||||
<codemeta:author> | <codemeta:author> | ||||
<codemeta:name>author1</codemeta:name> | <codemeta:name>author1</codemeta:name> | ||||
<codemeta:affiliation>Inria</codemeta:affiliation> | <codemeta:affiliation>Inria</codemeta:affiliation> | ||||
</codemeta:author> | </codemeta:author> | ||||
<codemeta:programmingLanguage>ocaml</codemeta:programmingLanguage> | <codemeta:programmingLanguage>ocaml</codemeta:programmingLanguage> | ||||
<codemeta:issueTracker>http://issuetracker.com</codemeta:issueTracker> | <codemeta:issueTracker>http://issuetracker.com</codemeta:issueTracker> | ||||
</entry>""" | </entry>""" | ||||
) | ) | ||||
actual_result = SWHXMLParser().parse(xml_no_duplicate) | actual_result = SWHXMLParser().parse(xml_no_duplicate) | ||||
expected_dict = OrderedDict( | |||||
[ | assert ( | ||||
("atom:title", "Awesome Compiler"), | actual_result.findtext( | ||||
( | "codemeta:license/codemeta:name", | ||||
"codemeta:license", | namespaces={"codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"}, | ||||
OrderedDict( | ) | ||||
[ | == "GPL3.0" | ||||
("codemeta:name", "GPL3.0"), | ) | ||||
("codemeta:url", "https://opensource.org/licenses/GPL-3.0"), | assert ( | ||||
] | actual_result.findtext("codemeta:license/codemeta:name", namespaces=NAMESPACES) | ||||
), | == "GPL3.0" | ||||
), | ) | ||||
("codemeta:runtimePlatform", "Python3"), | authors = actual_result.findall( | ||||
( | "codemeta:author/codemeta:name", namespaces=NAMESPACES | ||||
"codemeta:author", | |||||
OrderedDict( | |||||
[("codemeta:name", "author1"), ("codemeta:affiliation", "Inria")] | |||||
), | |||||
), | |||||
("codemeta:programmingLanguage", "ocaml"), | |||||
("codemeta:issueTracker", "http://issuetracker.com"), | |||||
] | |||||
) | ) | ||||
assert expected_dict == actual_result | assert len(authors) == 1 | ||||
assert authors[0].text == "author1" | |||||
def test_parsing_with_duplicates(): | def test_parsing_with_duplicates(): | ||||
xml_with_duplicates = io.BytesIO( | xml_with_duplicates = io.BytesIO( | ||||
b"""<?xml version="1.0"?> | b"""<?xml version="1.0"?> | ||||
<entry xmlns="http://www.w3.org/2005/Atom" | <entry xmlns="http://www.w3.org/2005/Atom" | ||||
xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"> | xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"> | ||||
<title>Another Compiler</title> | <title>Another Compiler</title> | ||||
Show All 18 Lines | <codemeta:license> | ||||
<codemeta:url>http://spdx.org</codemeta:url> | <codemeta:url>http://spdx.org</codemeta:url> | ||||
</codemeta:license> | </codemeta:license> | ||||
<codemeta:programmingLanguage>python3</codemeta:programmingLanguage> | <codemeta:programmingLanguage>python3</codemeta:programmingLanguage> | ||||
</entry>""" | </entry>""" | ||||
) | ) | ||||
actual_result = SWHXMLParser().parse(xml_with_duplicates) | actual_result = SWHXMLParser().parse(xml_with_duplicates) | ||||
expected_dict = OrderedDict( | assert ( | ||||
[ | actual_result.findtext( | ||||
("atom:title", "Another Compiler"), | "codemeta:license/codemeta:name", | ||||
("codemeta:runtimePlatform", ["GNU/Linux", "Un*x"]), | namespaces={"codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"}, | ||||
( | ) | ||||
"codemeta:license", | == "GPL3.0" | ||||
[ | ) | ||||
OrderedDict( | assert ( | ||||
[ | actual_result.findtext("codemeta:license/codemeta:name", namespaces=NAMESPACES) | ||||
("codemeta:name", "GPL3.0"), | == "GPL3.0" | ||||
("codemeta:url", "https://opensource.org/licenses/GPL-3.0"), | ) | ||||
] | authors = actual_result.findall( | ||||
), | "codemeta:author/codemeta:name", namespaces=NAMESPACES | ||||
OrderedDict( | |||||
[("codemeta:name", "spdx"), ("codemeta:url", "http://spdx.org")] | |||||
), | |||||
], | |||||
), | |||||
( | |||||
"codemeta:author", | |||||
[ | |||||
OrderedDict( | |||||
[ | |||||
("codemeta:name", "author1"), | |||||
("codemeta:affiliation", "Inria"), | |||||
] | |||||
), | |||||
OrderedDict( | |||||
[ | |||||
("codemeta:name", "author2"), | |||||
("codemeta:affiliation", "Inria"), | |||||
] | |||||
), | |||||
], | |||||
), | |||||
("codemeta:programmingLanguage", ["ocaml", "haskell", "python3"]), | |||||
] | |||||
) | ) | ||||
assert expected_dict == actual_result | assert len(authors) == 2 | ||||
assert authors[0].text == "author1" | |||||
assert authors[1].text == "author2" |