Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/archive/tests/test_archive.py
# Copyright (C) 2019-2021 The Software Heritage developers | # Copyright (C) 2019-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import hashlib | import hashlib | ||||
from io import BytesIO | from io import BytesIO | ||||
from pathlib import Path | from pathlib import Path | ||||
import string | import string | ||||
import attr | import attr | ||||
import pytest | import pytest | ||||
from requests.exceptions import ContentDecodingError | from requests.exceptions import ContentDecodingError | ||||
from swh.loader.package.archive.loader import ArchiveLoader, ArchivePackageInfo | from swh.loader.package.archive.loader import ArchiveLoader, ArchivePackageInfo | ||||
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats | from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes, hash_to_hex | ||||
from swh.model.model import Snapshot, SnapshotBranch, TargetType | from swh.model.model import ( | ||||
ObjectType, | |||||
Person, | |||||
Release, | |||||
Snapshot, | |||||
SnapshotBranch, | |||||
TargetType, | |||||
Timestamp, | |||||
TimestampWithTimezone, | |||||
) | |||||
URL = "https://ftp.gnu.org/gnu/8sync/" | URL = "https://ftp.gnu.org/gnu/8sync/" | ||||
GNU_ARTIFACTS = [ | GNU_ARTIFACTS = [ | ||||
{ | { | ||||
"time": 944729610, | "time": 944729610, | ||||
"url": "https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz", | "url": "https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz", | ||||
"length": 221837, | "length": 221837, | ||||
"filename": "8sync-0.1.0.tar.gz", | "filename": "8sync-0.1.0.tar.gz", | ||||
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | _expected_new_directories_first_visit = [ | ||||
"7f6e63ba6eb3e2236f65892cd822041f1a01dd5c", | "7f6e63ba6eb3e2236f65892cd822041f1a01dd5c", | ||||
"4db0a3ecbc976083e2dac01a62f93729698429a3", | "4db0a3ecbc976083e2dac01a62f93729698429a3", | ||||
"dfef1c80e1098dd5deda664bb44a9ab1f738af13", | "dfef1c80e1098dd5deda664bb44a9ab1f738af13", | ||||
"eca971d346ea54d95a6e19d5051f900237fafdaa", | "eca971d346ea54d95a6e19d5051f900237fafdaa", | ||||
"3aebc29ed1fccc4a6f2f2010fb8e57882406b528", | "3aebc29ed1fccc4a6f2f2010fb8e57882406b528", | ||||
] | ] | ||||
_expected_new_releases_first_visit = { | _expected_new_releases_first_visit = { | ||||
"c9786c1e3b46f52779c727d3509d66ebf8948d88": ( | "97c2ada10ca9b7876a8b5b17858b0518309170fd": ( | ||||
"3aebc29ed1fccc4a6f2f2010fb8e57882406b528" | "3aebc29ed1fccc4a6f2f2010fb8e57882406b528" | ||||
) | ) | ||||
} | } | ||||
def test_archive_visit_with_no_artifact_found(swh_storage, requests_mock_datadir): | def test_archive_visit_with_no_artifact_found(swh_storage, requests_mock_datadir): | ||||
url = URL | url = URL | ||||
unknown_artifact_url = "https://ftp.g.o/unknown/8sync-0.1.0.tar.gz" | unknown_artifact_url = "https://ftp.g.o/unknown/8sync-0.1.0.tar.gz" | ||||
Show All 37 Lines | ): | ||||
""" | """ | ||||
loader = ArchiveLoader(swh_storage, URL, artifacts=GNU_ARTIFACTS[:1]) | loader = ArchiveLoader(swh_storage, URL, artifacts=GNU_ARTIFACTS[:1]) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status["status"] == "eventful" | assert actual_load_status["status"] == "eventful" | ||||
expected_snapshot_first_visit_id = hash_to_bytes( | expected_snapshot_first_visit_id = hash_to_bytes( | ||||
"cdf8f335fa0c81c8ad089870ec14f52b1980eb6c" | "af62f6f6d464f9b29f270d1bbefa355af38946c4" | ||||
) | ) | ||||
assert ( | assert actual_load_status["snapshot_id"] == hash_to_hex( | ||||
hash_to_bytes(actual_load_status["snapshot_id"]) | expected_snapshot_first_visit_id | ||||
== expected_snapshot_first_visit_id | |||||
) | ) | ||||
assert_last_visit_matches(swh_storage, URL, status="full", type="tar") | assert_last_visit_matches(swh_storage, URL, status="full", type="tar") | ||||
stats = get_stats(swh_storage) | stats = get_stats(swh_storage) | ||||
assert { | assert { | ||||
"content": len(_expected_new_contents_first_visit), | "content": len(_expected_new_contents_first_visit), | ||||
"directory": len(_expected_new_directories_first_visit), | "directory": len(_expected_new_directories_first_visit), | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"release": len(_expected_new_releases_first_visit), | "release": len(_expected_new_releases_first_visit), | ||||
"revision": 0, | "revision": 0, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} == stats | } == stats | ||||
release_id = hash_to_bytes(list(_expected_new_releases_first_visit)[0]) | |||||
expected_snapshot = Snapshot( | expected_snapshot = Snapshot( | ||||
id=expected_snapshot_first_visit_id, | id=expected_snapshot_first_visit_id, | ||||
branches={ | branches={ | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch( | ||||
target_type=TargetType.ALIAS, target=b"releases/0.1.0", | target_type=TargetType.ALIAS, target=b"releases/0.1.0", | ||||
), | ), | ||||
b"releases/0.1.0": SnapshotBranch( | b"releases/0.1.0": SnapshotBranch( | ||||
target_type=TargetType.RELEASE, | target_type=TargetType.RELEASE, target=release_id, | ||||
target=hash_to_bytes(list(_expected_new_releases_first_visit)[0]), | |||||
), | ), | ||||
}, | }, | ||||
) | ) | ||||
check_snapshot(expected_snapshot, swh_storage) | check_snapshot(expected_snapshot, swh_storage) | ||||
assert swh_storage.release_get([release_id])[0] == Release( | |||||
id=release_id, | |||||
name=b"0.1.0", | |||||
message=( | |||||
b"Synthetic release for archive at " | |||||
b"https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz" | |||||
), | |||||
target=hash_to_bytes("3aebc29ed1fccc4a6f2f2010fb8e57882406b528"), | |||||
target_type=ObjectType.DIRECTORY, | |||||
synthetic=True, | |||||
author=Person.from_fullname(b""), | |||||
date=TimestampWithTimezone( | |||||
timestamp=Timestamp(seconds=944729610, microseconds=0), | |||||
offset=0, | |||||
negative_utc=False, | |||||
), | |||||
) | |||||
expected_contents = map(hash_to_bytes, _expected_new_contents_first_visit) | expected_contents = map(hash_to_bytes, _expected_new_contents_first_visit) | ||||
assert list(swh_storage.content_missing_per_sha1(expected_contents)) == [] | assert list(swh_storage.content_missing_per_sha1(expected_contents)) == [] | ||||
expected_dirs = map(hash_to_bytes, _expected_new_directories_first_visit) | expected_dirs = map(hash_to_bytes, _expected_new_directories_first_visit) | ||||
assert list(swh_storage.directory_missing(expected_dirs)) == [] | assert list(swh_storage.directory_missing(expected_dirs)) == [] | ||||
expected_rels = map(hash_to_bytes, _expected_new_releases_first_visit) | expected_rels = map(hash_to_bytes, _expected_new_releases_first_visit) | ||||
assert list(swh_storage.release_missing(expected_rels)) == [] | assert list(swh_storage.release_missing(expected_rels)) == [] | ||||
▲ Show 20 Lines • Show All 287 Lines • Show Last 20 Lines |