Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/archive/tests/test_archive.py
# Copyright (C) 2019-2021 The Software Heritage developers | # Copyright (C) 2019-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import hashlib | |||||
import string | |||||
import attr | import attr | ||||
import pytest | |||||
from swh.loader.package.archive.loader import ArchiveLoader, ArchivePackageInfo | from swh.loader.package.archive.loader import ArchiveLoader, ArchivePackageInfo | ||||
from swh.loader.package.tests.common import check_metadata_paths | from swh.loader.package.tests.common import check_metadata_paths | ||||
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats | from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.model import Snapshot, SnapshotBranch, TargetType | from swh.model.model import Snapshot, SnapshotBranch, TargetType | ||||
URL = "https://ftp.gnu.org/gnu/8sync/" | URL = "https://ftp.gnu.org/gnu/8sync/" | ||||
▲ Show 20 Lines • Show All 299 Lines • ▼ Show 20 Lines | def test_archive_2_visits_without_change_not_gnu(swh_storage, requests_mock_datadir): | ||||
] | ] | ||||
# Here the loader defines the id_keys to use for existence in the snapshot | # Here the loader defines the id_keys to use for existence in the snapshot | ||||
# It's not the default archive loader which | # It's not the default archive loader which | ||||
loader = ArchiveLoader( | loader = ArchiveLoader( | ||||
swh_storage, | swh_storage, | ||||
url, | url, | ||||
artifacts=artifacts, | artifacts=artifacts, | ||||
identity_artifact_keys=["sha256", "length", "url"], | extid_manifest_format="$sha256 $length $url", | ||||
) | ) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status["status"] == "eventful" | assert actual_load_status["status"] == "eventful" | ||||
assert actual_load_status["snapshot_id"] is not None | assert actual_load_status["snapshot_id"] is not None | ||||
assert_last_visit_matches(swh_storage, url, status="full", type="tar") | assert_last_visit_matches(swh_storage, url, status="full", type="tar") | ||||
actual_load_status2 = loader.load() | actual_load_status2 = loader.load() | ||||
assert actual_load_status2["status"] == "uneventful" | assert actual_load_status2["status"] == "uneventful" | ||||
assert actual_load_status2["snapshot_id"] == actual_load_status["snapshot_id"] | assert actual_load_status2["snapshot_id"] == actual_load_status["snapshot_id"] | ||||
assert_last_visit_matches(swh_storage, url, status="full", type="tar") | assert_last_visit_matches(swh_storage, url, status="full", type="tar") | ||||
urls = [ | urls = [ | ||||
m.url | m.url | ||||
for m in requests_mock_datadir.request_history | for m in requests_mock_datadir.request_history | ||||
if m.url.startswith("https://ftp.gnu.org") | if m.url.startswith("https://ftp.gnu.org") | ||||
] | ] | ||||
assert len(urls) == 1 | assert len(urls) == 1 | ||||
def test_archive_artifact_identity(): | def test_archive_extid(): | ||||
"""Compute primary key should return the right identity | """Compute primary key should return the right identity | ||||
""" | """ | ||||
@attr.s | @attr.s | ||||
class TestPackageInfo(ArchivePackageInfo): | class TestPackageInfo(ArchivePackageInfo): | ||||
a = attr.ib() | a = attr.ib() | ||||
b = attr.ib() | b = attr.ib() | ||||
metadata = GNU_ARTIFACTS[0] | metadata = GNU_ARTIFACTS[0] | ||||
p_info = TestPackageInfo( | p_info = TestPackageInfo( | ||||
raw_info={**metadata, "a": 1, "b": 2}, a=1, b=2, **metadata, | raw_info={**metadata, "a": 1, "b": 2}, a=1, b=2, **metadata, | ||||
) | ) | ||||
for id_keys, expected_id in [ | for manifest_format, expected_manifest in [ | ||||
(["a", "b"], [1, 2]), | (string.Template("$a $b"), b"1 2"), | ||||
([], []), | (string.Template(""), b""), | ||||
(["a", "key-that-does-not-exist"], [1, None]), | (None, "{time} {length} {version} {url}".format(**metadata).encode()), | ||||
( | |||||
None, | |||||
[ | |||||
metadata["time"], | |||||
metadata["url"], | |||||
metadata["length"], | |||||
metadata["version"], | |||||
], | |||||
), | |||||
]: | ]: | ||||
actual_id = p_info.artifact_identity(id_keys=id_keys) | actual_id = p_info.extid(manifest_format=manifest_format) | ||||
assert actual_id == expected_id | assert actual_id == hashlib.sha256(expected_manifest).digest() | ||||
with pytest.raises(KeyError): | |||||
p_info.extid(manifest_format=string.Template("$a $unknown_key")) |