Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/archive/tests/test_archive.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.loader.package.archive.loader import ArchiveLoader | from swh.loader.package.archive.loader import ArchiveLoader | ||||
from swh.loader.package.tests.common import ( | from swh.loader.package.tests.common import ( | ||||
assert_last_visit_ok, | |||||
check_snapshot, | check_snapshot, | ||||
check_metadata_paths, | check_metadata_paths, | ||||
get_stats, | get_stats, | ||||
) | ) | ||||
URL = "https://ftp.gnu.org/gnu/8sync/" | URL = "https://ftp.gnu.org/gnu/8sync/" | ||||
GNU_ARTIFACTS = [ | GNU_ARTIFACTS = [ | ||||
▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines | loader = ArchiveLoader( | ||||
"version": "0.1.0", | "version": "0.1.0", | ||||
} | } | ||||
], | ], | ||||
) | ) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status["status"] == "uneventful" | assert actual_load_status["status"] == "uneventful" | ||||
assert actual_load_status["snapshot_id"] is not None | assert actual_load_status["snapshot_id"] is not None | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert { | assert { | ||||
"content": 0, | "content": 0, | ||||
"directory": 0, | "directory": 0, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"person": 0, | "person": 0, | ||||
"release": 0, | "release": 0, | ||||
"revision": 0, | "revision": 0, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} == stats | } == stats | ||||
origin_visit = loader.storage.origin_visit_get_latest(url) | assert_last_visit_ok(loader.storage, url, status="partial", type="tar") | ||||
assert origin_visit["status"] == "partial" | |||||
assert origin_visit["type"] == "tar" | |||||
def test_check_revision_metadata_structure(swh_config, requests_mock_datadir): | def test_check_revision_metadata_structure(swh_config, requests_mock_datadir): | ||||
loader = ArchiveLoader(url=URL, artifacts=GNU_ARTIFACTS) | loader = ArchiveLoader(url=URL, artifacts=GNU_ARTIFACTS) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status["status"] == "eventful" | assert actual_load_status["status"] == "eventful" | ||||
assert actual_load_status["snapshot_id"] is not None | assert actual_load_status["snapshot_id"] is not None | ||||
assert_last_visit_ok(loader.storage, URL, status="full", type="tar") | |||||
expected_revision_id = hash_to_bytes("44183488c0774ce3c957fa19ba695cf18a4a42b3") | expected_revision_id = hash_to_bytes("44183488c0774ce3c957fa19ba695cf18a4a42b3") | ||||
revision = list(loader.storage.revision_get([expected_revision_id]))[0] | revision = list(loader.storage.revision_get([expected_revision_id]))[0] | ||||
assert revision is not None | assert revision is not None | ||||
check_metadata_paths( | check_metadata_paths( | ||||
revision["metadata"], | revision["metadata"], | ||||
paths=[ | paths=[ | ||||
Show All 19 Lines | def test_visit_with_release_artifact_no_prior_visit(swh_config, requests_mock_datadir): | ||||
loader = ArchiveLoader(url=URL, artifacts=GNU_ARTIFACTS) | loader = ArchiveLoader(url=URL, artifacts=GNU_ARTIFACTS) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status["status"] == "eventful" | assert actual_load_status["status"] == "eventful" | ||||
assert ( | assert ( | ||||
actual_load_status["snapshot_id"] == _expected_new_snapshot_first_visit_id | actual_load_status["snapshot_id"] == _expected_new_snapshot_first_visit_id | ||||
) # noqa | ) # noqa | ||||
assert_last_visit_ok(loader.storage, URL, status="full", type="tar") | |||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert { | assert { | ||||
"content": len(_expected_new_contents_first_visit), | "content": len(_expected_new_contents_first_visit), | ||||
"directory": len(_expected_new_directories_first_visit), | "directory": len(_expected_new_directories_first_visit), | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"person": 1, | "person": 1, | ||||
"release": 0, | "release": 0, | ||||
Show All 24 Lines | def test_2_visits_without_change(swh_config, requests_mock_datadir): | ||||
""" | """ | ||||
url = URL | url = URL | ||||
loader = ArchiveLoader(url, artifacts=GNU_ARTIFACTS) | loader = ArchiveLoader(url, artifacts=GNU_ARTIFACTS) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status["status"] == "eventful" | assert actual_load_status["status"] == "eventful" | ||||
assert actual_load_status["snapshot_id"] is not None | assert actual_load_status["snapshot_id"] is not None | ||||
origin_visit = loader.storage.origin_visit_get_latest(url) | |||||
assert origin_visit["status"] == "full" | assert_last_visit_ok(loader.storage, url, status="full", type="tar") | ||||
assert origin_visit["type"] == "tar" | |||||
actual_load_status2 = loader.load() | actual_load_status2 = loader.load() | ||||
assert actual_load_status2["status"] == "uneventful" | assert actual_load_status2["status"] == "uneventful" | ||||
assert actual_load_status2["snapshot_id"] is not None | assert actual_load_status2["snapshot_id"] is not None | ||||
assert actual_load_status["snapshot_id"] == actual_load_status2["snapshot_id"] | assert actual_load_status["snapshot_id"] == actual_load_status2["snapshot_id"] | ||||
origin_visit2 = loader.storage.origin_visit_get_latest(url) | assert_last_visit_ok(loader.storage, url, status="full", type="tar") | ||||
assert origin_visit2["status"] == "full" | |||||
assert origin_visit2["type"] == "tar" | |||||
urls = [ | urls = [ | ||||
m.url | m.url | ||||
for m in requests_mock_datadir.request_history | for m in requests_mock_datadir.request_history | ||||
if m.url.startswith("https://ftp.gnu.org") | if m.url.startswith("https://ftp.gnu.org") | ||||
] | ] | ||||
assert len(urls) == 1 | assert len(urls) == 1 | ||||
def test_2_visits_with_new_artifact(swh_config, requests_mock_datadir): | def test_2_visits_with_new_artifact(swh_config, requests_mock_datadir): | ||||
"""With no prior visit, load a gnu project ends up with 1 snapshot | """With no prior visit, load a gnu project ends up with 1 snapshot | ||||
""" | """ | ||||
url = URL | url = URL | ||||
artifact1 = GNU_ARTIFACTS[0] | artifact1 = GNU_ARTIFACTS[0] | ||||
loader = ArchiveLoader(url, [artifact1]) | loader = ArchiveLoader(url, [artifact1]) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status["status"] == "eventful" | assert actual_load_status["status"] == "eventful" | ||||
assert actual_load_status["snapshot_id"] is not None | assert actual_load_status["snapshot_id"] is not None | ||||
origin_visit = loader.storage.origin_visit_get_latest(url) | assert_last_visit_ok(loader.storage, url, status="full", type="tar") | ||||
assert origin_visit["status"] == "full" | |||||
assert origin_visit["type"] == "tar" | |||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert { | assert { | ||||
"content": len(_expected_new_contents_first_visit), | "content": len(_expected_new_contents_first_visit), | ||||
"directory": len(_expected_new_directories_first_visit), | "directory": len(_expected_new_directories_first_visit), | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"person": 1, | "person": 1, | ||||
Show All 36 Lines | assert { | ||||
"origin_visit": 1 + 1, | "origin_visit": 1 + 1, | ||||
"person": 1, | "person": 1, | ||||
"release": 0, | "release": 0, | ||||
"revision": len(_expected_new_revisions_first_visit) + 1, | "revision": len(_expected_new_revisions_first_visit) + 1, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1 + 1, | "snapshot": 1 + 1, | ||||
} == stats2 | } == stats2 | ||||
origin_visit2 = loader.storage.origin_visit_get_latest(url) | assert_last_visit_ok(loader.storage, url, status="full", type="tar") | ||||
assert origin_visit2["status"] == "full" | |||||
assert origin_visit2["type"] == "tar" | |||||
urls = [ | urls = [ | ||||
m.url | m.url | ||||
for m in requests_mock_datadir.request_history | for m in requests_mock_datadir.request_history | ||||
if m.url.startswith("https://ftp.gnu.org") | if m.url.startswith("https://ftp.gnu.org") | ||||
] | ] | ||||
# 1 artifact (2nd time no modification) + 1 new artifact | # 1 artifact (2nd time no modification) + 1 new artifact | ||||
assert len(urls) == 2 | assert len(urls) == 2 | ||||
Show All 20 Lines | def test_2_visits_without_change_not_gnu(swh_config, requests_mock_datadir): | ||||
# It's not the default archive loader which | # It's not the default archive loader which | ||||
loader = ArchiveLoader( | loader = ArchiveLoader( | ||||
url, artifacts=artifacts, identity_artifact_keys=["sha256", "length", "url"] | url, artifacts=artifacts, identity_artifact_keys=["sha256", "length", "url"] | ||||
) | ) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status["status"] == "eventful" | assert actual_load_status["status"] == "eventful" | ||||
assert actual_load_status["snapshot_id"] is not None | assert actual_load_status["snapshot_id"] is not None | ||||
origin_visit = loader.storage.origin_visit_get_latest(url) | assert_last_visit_ok(loader.storage, url, status="full", type="tar") | ||||
assert origin_visit["status"] == "full" | |||||
assert origin_visit["type"] == "tar" | |||||
actual_load_status2 = loader.load() | actual_load_status2 = loader.load() | ||||
assert actual_load_status2["status"] == "uneventful" | assert actual_load_status2["status"] == "uneventful" | ||||
assert actual_load_status2["snapshot_id"] == actual_load_status["snapshot_id"] | assert actual_load_status2["snapshot_id"] == actual_load_status["snapshot_id"] | ||||
origin_visit2 = loader.storage.origin_visit_get_latest(url) | assert_last_visit_ok(loader.storage, url, status="full", type="tar") | ||||
assert origin_visit2["status"] == "full" | |||||
assert origin_visit2["type"] == "tar" | |||||
urls = [ | urls = [ | ||||
m.url | m.url | ||||
for m in requests_mock_datadir.request_history | for m in requests_mock_datadir.request_history | ||||
if m.url.startswith("https://ftp.gnu.org") | if m.url.startswith("https://ftp.gnu.org") | ||||
] | ] | ||||
assert len(urls) == 1 | assert len(urls) == 1 |