Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/archive/tests/test_archive.py
# Copyright (C) 2019-2021 The Software Heritage developers | # Copyright (C) 2019-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import hashlib | import hashlib | ||||
import string | import string | ||||
import attr | import attr | ||||
import pytest | import pytest | ||||
from swh.loader.package.archive.loader import ArchiveLoader, ArchivePackageInfo | from swh.loader.package.archive.loader import ArchiveLoader, ArchivePackageInfo | ||||
from swh.loader.package.tests.common import check_metadata_paths | |||||
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats | from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.model import Snapshot, SnapshotBranch, TargetType | from swh.model.model import Snapshot, SnapshotBranch, TargetType | ||||
URL = "https://ftp.gnu.org/gnu/8sync/" | URL = "https://ftp.gnu.org/gnu/8sync/" | ||||
GNU_ARTIFACTS = [ | GNU_ARTIFACTS = [ | ||||
{ | { | ||||
"time": 944729610, | "time": 944729610, | ||||
▲ Show 20 Lines • Show All 83 Lines • ▼ Show 20 Lines | assert { | ||||
"revision": 0, | "revision": 0, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} == stats | } == stats | ||||
assert_last_visit_matches(swh_storage, url, status="partial", type="tar") | assert_last_visit_matches(swh_storage, url, status="partial", type="tar") | ||||
def test_archive_check_revision_metadata_structure(swh_storage, requests_mock_datadir): | |||||
loader = ArchiveLoader(swh_storage, URL, artifacts=GNU_ARTIFACTS) | |||||
actual_load_status = loader.load() | |||||
assert actual_load_status["status"] == "eventful" | |||||
assert actual_load_status["snapshot_id"] is not None | |||||
assert_last_visit_matches(swh_storage, URL, status="full", type="tar") | |||||
expected_revision_id = hash_to_bytes("44183488c0774ce3c957fa19ba695cf18a4a42b3") | |||||
revision = swh_storage.revision_get([expected_revision_id])[0] | |||||
assert revision is not None | |||||
check_metadata_paths( | |||||
revision.metadata, | |||||
paths=[ | |||||
("intrinsic", dict), | |||||
("extrinsic.provider", str), | |||||
("extrinsic.when", str), | |||||
("extrinsic.raw", dict), | |||||
("original_artifact", list), | |||||
], | |||||
) | |||||
for original_artifact in revision.metadata["original_artifact"]: | |||||
check_metadata_paths( | |||||
original_artifact, | |||||
paths=[("filename", str), ("length", int), ("checksums", dict),], | |||||
) | |||||
def test_archive_visit_with_release_artifact_no_prior_visit( | def test_archive_visit_with_release_artifact_no_prior_visit( | ||||
swh_storage, requests_mock_datadir | swh_storage, requests_mock_datadir | ||||
): | ): | ||||
"""With no prior visit, load a gnu project ends up with 1 snapshot | """With no prior visit, load a gnu project ends up with 1 snapshot | ||||
""" | """ | ||||
loader = ArchiveLoader(swh_storage, URL, artifacts=GNU_ARTIFACTS) | loader = ArchiveLoader(swh_storage, URL, artifacts=GNU_ARTIFACTS) | ||||
▲ Show 20 Lines • Show All 224 Lines • Show Last 20 Lines |