Page MenuHomeSoftware Heritage

test_utils.py
No OneTemporary

test_utils.py

# Copyright (C) 2018-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import pytest
from swh.deposit import utils
from swh.deposit.parsers import parse_xml
from swh.model.exceptions import ValidationError
from swh.model.swhids import CoreSWHID, QualifiedSWHID
@pytest.fixture
def xml_with_origin_reference():
xml_data = """<?xml version="1.0"?>
<entry xmlns="http://www.w3.org/2005/Atom"
xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
<swh:deposit>
<swh:reference>
<swh:origin url="{url}"/>
</swh:reference>
</swh:deposit>
</entry>
"""
return xml_data.strip()
def test_normalize_date_0():
"""When date is a list, choose the first date and normalize it
"""
actual_date = utils.normalize_date(["2017-10-12", "date1"])
assert actual_date == {
"timestamp": {"microseconds": 0, "seconds": 1507766400},
"offset": 0,
}
def test_normalize_date_1():
"""Providing a date in a reasonable format, everything is fine
"""
actual_date = utils.normalize_date("2018-06-11 17:02:02")
assert actual_date == {
"timestamp": {"microseconds": 0, "seconds": 1528736522},
"offset": 0,
}
def test_normalize_date_doing_irrelevant_stuff():
"""Providing a date with only the year results in a reasonable date
"""
actual_date = utils.normalize_date("2017")
assert actual_date == {
"timestamp": {"seconds": 1483228800, "microseconds": 0},
"offset": 0,
}
@pytest.mark.parametrize(
"swhid,expected_metadata_context",
[
("swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49", {"origin": None},),
(
"swh:1:snp:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=http://blah",
{"origin": "http://blah", "path": None},
),
(
"swh:1:dir:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;path=/path",
{"origin": None, "path": b"/path"},
),
(
"swh:1:rev:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;visit=swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49", # noqa
{
"origin": None,
"path": None,
"snapshot": CoreSWHID.from_string(
"swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49"
),
},
),
(
"swh:1:rel:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;anchor=swh:1:dir:41b5c8cc985d190b5a7ef4878128ebfdc2358f49", # noqa
{
"origin": None,
"path": None,
"directory": CoreSWHID.from_string(
"swh:1:dir:41b5c8cc985d190b5a7ef4878128ebfdc2358f49"
),
},
),
],
)
def test_compute_metadata_context(swhid: str, expected_metadata_context):
assert expected_metadata_context == utils.compute_metadata_context(
QualifiedSWHID.from_string(swhid)
)
def test_parse_swh_reference_origin(xml_with_origin_reference):
url = "https://url"
xml_data = xml_with_origin_reference.format(url=url)
metadata = parse_xml(xml_data)
actual_origin = utils.parse_swh_reference(metadata)
assert actual_origin == url
@pytest.fixture
def xml_swh_deposit_template():
xml_data = """<?xml version="1.0"?>
<entry xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit"
xmlns:schema="http://schema.org/">
<swh:deposit>
{swh_deposit}
</swh:deposit>
</entry>
"""
return xml_data.strip()
@pytest.mark.parametrize(
"xml_ref",
[
"",
"<swh:reference></swh:reference>",
"<swh:reference><swh:object /></swh:reference>",
"""<swh:reference><swh:object swhid="" /></swh:reference>""",
],
)
def test_parse_swh_reference_empty(xml_swh_deposit_template, xml_ref):
xml_body = xml_swh_deposit_template.format(swh_deposit=xml_ref)
metadata = utils.parse_xml(xml_body)
assert utils.parse_swh_reference(metadata) is None
@pytest.fixture
def xml_with_swhid(atom_dataset):
return atom_dataset["entry-data-with-swhid"]
@pytest.mark.parametrize(
"swhid",
[
"swh:1:cnt:31b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=https://hal.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:4fc1e36fca86b2070204bedd51106014a614f321;anchor=swh:1:rev:9c5de20cfb54682370a398fcc733e829903c8cba;path=/moranegg-AffectationRO-df7f68b/", # noqa
"swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49;anchor=swh:1:dir:9c5de20cfb54682370a398fcc733e829903c8cba", # noqa
"swh:1:rev:31b5c8cc985d190b5a7ef4878128ebfdc2358f49;anchor=swh:1:rev:9c5de20cfb54682370a398fcc733e829903c8cba", # noqa
"swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49;anchor=swh:1:rel:9c5de20cfb54682370a398fcc733e829903c8cba", # noqa
"swh:1:snp:31b5c8cc985d190b5a7ef4878128ebfdc2358f49;anchor=swh:1:snp:9c5de20cfb54682370a398fcc733e829903c8cba", # noqa
"swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49",
],
)
def test_parse_swh_reference_swhid(swhid, xml_with_swhid):
xml_data = xml_with_swhid.format(swhid=swhid)
metadata = utils.parse_xml(xml_data)
actual_swhid = utils.parse_swh_reference(metadata)
assert actual_swhid is not None
expected_swhid = QualifiedSWHID.from_string(swhid)
assert actual_swhid == expected_swhid
@pytest.mark.parametrize(
"invalid_swhid",
[
# incorrect length
"swh:1:cnt:31b5c8cc985d190b5a7ef4878128ebfdc235" # noqa
# visit qualifier should be a core SWHID with type,
"swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;visit=swh:1:rev:0175049fc45055a3824a1675ac06e3711619a55a", # noqa
# anchor qualifier should be a core SWHID with type one of
"swh:1:rev:c4993c872593e960dc84e4430dbbfbc34fd706d0;anchor=swh:1:cnt:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa
"swh:1:rev:c4993c872593e960dc84e4430dbbfbc34fd706d0;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:snp:b5f505b005435fa5c4fa4c279792bd7b17167c04", # noqa
],
)
def test_parse_swh_reference_invalid_swhid(invalid_swhid, xml_with_swhid):
"""Unparsable swhid should raise
"""
xml_invalid_swhid = xml_with_swhid.format(swhid=invalid_swhid)
metadata = utils.parse_xml(xml_invalid_swhid)
with pytest.raises(ValidationError):
utils.parse_swh_reference(metadata)
@pytest.mark.parametrize(
"xml_ref",
[
"",
"<swh:metadata-provenance></swh:metadata-provenance>",
"<swh:metadata-provenance><schema:url /></swh:metadata-provenance>",
],
)
def test_parse_swh_metatada_provenance_empty(xml_swh_deposit_template, xml_ref):
xml_body = xml_swh_deposit_template.format(swh_deposit=xml_ref)
metadata = utils.parse_xml(xml_body)
assert utils.parse_swh_metadata_provenance(metadata) is None
@pytest.fixture
def xml_with_metadata_provenance(atom_dataset):
return atom_dataset["entry-data-with-metadata-provenance"]
def test_parse_swh_metadata_provenance2(xml_with_metadata_provenance):
xml_data = xml_with_metadata_provenance.format(url="https://url.org/metadata/url")
metadata = utils.parse_xml(xml_data)
actual_url = utils.parse_swh_metadata_provenance(metadata)
assert actual_url == "https://url.org/metadata/url"

File Metadata

Mime Type
text/x-python
Expires
Jun 4 2025, 7:47 PM (11 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3382281

Event Timeline