diff --git a/swh/loader/package/debian.py b/swh/loader/package/debian.py --- a/swh/loader/package/debian.py +++ b/swh/loader/package/debian.py @@ -29,7 +29,7 @@ """Load debian origins into swh archive. """ - visit_type = 'debian' + visit_type = 'deb' def __init__(self, url: str, date: str, packages: Mapping[str, Any]): """Debian Loader implementation. diff --git a/swh/loader/package/deposit.py b/swh/loader/package/deposit.py --- a/swh/loader/package/deposit.py +++ b/swh/loader/package/deposit.py @@ -7,7 +7,7 @@ from typing import Any, Dict, Generator, Mapping, Sequence, Tuple -from swh.model.hashutil import hash_to_hex +from swh.model.hashutil import hash_to_hex, hash_to_bytes from swh.loader.package.loader import PackageLoader from swh.deposit.client import PrivateApiDepositClient as ApiClient @@ -116,7 +116,7 @@ self.deposit_update_url, status='failed') return r - snapshot_id = r['snapshot_id'] + snapshot_id = hash_to_bytes(r['snapshot_id']) branches = self.storage.snapshot_get(snapshot_id)['branches'] logger.debug('branches: %s', branches) if not branches: diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py --- a/swh/loader/package/loader.py +++ b/swh/loader/package/loader.py @@ -15,6 +15,7 @@ from swh.core.tarball import uncompress from swh.core.config import SWHConfig from swh.model.from_disk import Directory +from swh.model.hashutil import hash_to_hex from swh.model.identifiers import ( revision_identifier, snapshot_identifier, identifier_to_bytes ) @@ -390,5 +391,5 @@ 'status': status_load, } # type: Dict[str, Any] if snapshot: - result['snapshot_id'] = snapshot['id'] + result['snapshot_id'] = hash_to_hex(snapshot['id']) return result diff --git a/swh/loader/package/tests/test_archive.py b/swh/loader/package/tests/test_archive.py --- a/swh/loader/package/tests/test_archive.py +++ b/swh/loader/package/tests/test_archive.py @@ -99,6 +99,7 @@ actual_load_status = loader.load() assert actual_load_status['status'] == 'uneventful' + assert actual_load_status['snapshot_id'] is not None stats = get_stats(loader.storage) assert { @@ -115,6 +116,7 @@ origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'partial' + assert origin_visit['type'] == 'tar' def test_check_revision_metadata_structure(swh_config, requests_mock_datadir): @@ -122,6 +124,7 @@ actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' + assert actual_load_status['snapshot_id'] is not None expected_revision_id = hash_to_bytes( '44183488c0774ce3c957fa19ba695cf18a4a42b3') @@ -154,6 +157,7 @@ actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' + assert actual_load_status['snapshot_id'] == _expected_new_snapshot_first_visit_id # noqa stats = get_stats(loader.storage) assert { @@ -195,13 +199,21 @@ actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' + assert actual_load_status['snapshot_id'] is not None origin_visit = list(loader.storage.origin_visit_get(url))[-1] assert origin_visit['status'] == 'full' + assert origin_visit['type'] == 'tar' actual_load_status2 = loader.load() assert actual_load_status2['status'] == 'uneventful' + assert actual_load_status2['snapshot_id'] is not None + + assert actual_load_status['snapshot_id'] == actual_load_status2[ + 'snapshot_id'] + origin_visit2 = list(loader.storage.origin_visit_get(url))[-1] assert origin_visit2['status'] == 'full' + assert origin_visit2['type'] == 'tar' urls = [ m.url for m in requests_mock_datadir.request_history @@ -220,8 +232,11 @@ actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' + assert actual_load_status['snapshot_id'] is not None + origin_visit = list(loader.storage.origin_visit_get(url))[-1] assert origin_visit['status'] == 'full' + assert origin_visit['type'] == 'tar' stats = get_stats(loader.storage) assert { @@ -258,6 +273,7 @@ actual_load_status2 = loader2.load() assert actual_load_status2['status'] == 'eventful' + assert actual_load_status2['snapshot_id'] is not None stats2 = get_stats(loader.storage) assert { @@ -274,6 +290,7 @@ origin_visit2 = list(loader.storage.origin_visit_get(url))[-1] assert origin_visit2['status'] == 'full' + assert origin_visit2['type'] == 'tar' urls = [ m.url for m in requests_mock_datadir.request_history @@ -329,13 +346,18 @@ actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' + assert actual_load_status['snapshot_id'] is not None origin_visit = list(loader.storage.origin_visit_get(url))[-1] assert origin_visit['status'] == 'full' + assert origin_visit['type'] == 'tar' actual_load_status2 = loader.load() assert actual_load_status2['status'] == 'uneventful' + assert actual_load_status2['snapshot_id'] == actual_load_status[ + 'snapshot_id'] origin_visit2 = list(loader.storage.origin_visit_get(url))[-1] assert origin_visit2['status'] == 'full' + assert origin_visit2['type'] == 'tar' urls = [ m.url for m in requests_mock_datadir.request_history diff --git a/swh/loader/package/tests/test_debian.py b/swh/loader/package/tests/test_debian.py --- a/swh/loader/package/tests/test_debian.py +++ b/swh/loader/package/tests/test_debian.py @@ -96,7 +96,11 @@ packages=PACKAGE_PER_VERSION) actual_load_status = loader.load() - assert actual_load_status['status'] == 'eventful' + expected_snapshot_id = '3b6b66e6ee4e7d903a379a882684a2a50480c0b4' + assert actual_load_status == { + 'status': 'eventful', + 'snapshot_id': expected_snapshot_id + } stats = get_stats(loader.storage) assert { @@ -112,7 +116,7 @@ } == stats expected_snapshot = { - 'id': '3b6b66e6ee4e7d903a379a882684a2a50480c0b4', + 'id': expected_snapshot_id, 'branches': { 'releases/stretch/contrib/0.7.2-3': { 'target_type': 'revision', @@ -126,7 +130,7 @@ def test_debian_first_visit_then_another_visit( swh_config, requests_mock_datadir): - """With no prior visit, load a gnu project ends up with 1 snapshot + """With no prior visit, load a debian project ends up with 1 snapshot """ url = 'deb://Debian/packages/cicero' @@ -136,9 +140,16 @@ packages=PACKAGE_PER_VERSION) actual_load_status = loader.load() - assert actual_load_status['status'] == 'eventful' + + expected_snapshot_id = '3b6b66e6ee4e7d903a379a882684a2a50480c0b4' + assert actual_load_status == { + 'status': 'eventful', + 'snapshot_id': expected_snapshot_id + } + origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'full' + assert origin_visit['type'] == 'deb' stats = get_stats(loader.storage) assert { @@ -154,7 +165,7 @@ } == stats expected_snapshot = { - 'id': '3b6b66e6ee4e7d903a379a882684a2a50480c0b4', + 'id': expected_snapshot_id, 'branches': { 'releases/stretch/contrib/0.7.2-3': { 'target_type': 'revision', @@ -170,6 +181,7 @@ assert actual_load_status2['status'] == 'uneventful' origin_visit2 = list(loader.storage.origin_visit_get(url)) assert origin_visit2[-1]['status'] == 'full' + assert origin_visit2[-1]['type'] == 'deb' stats2 = get_stats(loader.storage) assert { @@ -349,13 +361,18 @@ packages=PACKAGES_PER_VERSION) actual_load_status = loader.load() - assert actual_load_status['status'] == 'eventful' + expected_snapshot_id = 'defc19021187f3727293121fcf6c5c82cb923604' + assert actual_load_status == { + 'status': 'eventful', + 'snapshot_id': expected_snapshot_id + } origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'full' + assert origin_visit['type'] == 'deb' expected_snapshot = { - 'id': 'defc19021187f3727293121fcf6c5c82cb923604', + 'id': expected_snapshot_id, 'branches': { 'releases/stretch/contrib/0.7.2-3': { 'target_type': 'revision', diff --git a/swh/loader/package/tests/test_deposit.py b/swh/loader/package/tests/test_deposit.py --- a/swh/loader/package/tests/test_deposit.py +++ b/swh/loader/package/tests/test_deposit.py @@ -38,7 +38,7 @@ loader = DepositLoader(url, unknown_deposit_id) # does not exist actual_load_status = loader.load() - assert actual_load_status['status'] == 'failed' + assert actual_load_status == {'status': 'failed'} stats = get_stats(loader.storage) @@ -56,6 +56,7 @@ origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'partial' + assert origin_visit['type'] == 'deposit' requests_mock_datadir_missing_one = requests_mock_datadir_factory(ignore_urls=[ @@ -76,6 +77,7 @@ assert loader.archive_url actual_load_status = loader.load() assert actual_load_status['status'] == 'uneventful' + assert actual_load_status['snapshot_id'] is not None stats = get_stats(loader.storage) assert { @@ -92,6 +94,7 @@ origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'partial' + assert origin_visit['type'] == 'deposit' def test_revision_metadata_structure(swh_config, requests_mock_datadir): @@ -105,6 +108,7 @@ assert loader.archive_url actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' + assert actual_load_status['snapshot_id'] is not None expected_revision_id = hash_to_bytes( '9471c606239bccb1f269564c9ea114e1eeab9eb4') @@ -136,7 +140,11 @@ assert loader.archive_url actual_load_status = loader.load() - assert actual_load_status['status'] == 'eventful' + expected_snapshot_id = '453f455d0efb69586143cd6b6e5897f9906b53a7' + assert actual_load_status == { + 'status': 'eventful', + 'snapshot_id': expected_snapshot_id, + } stats = get_stats(loader.storage) assert { @@ -153,6 +161,7 @@ origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'full' + assert origin_visit['type'] == 'deposit' expected_branches = { 'HEAD': { @@ -162,7 +171,7 @@ } expected_snapshot = { - 'id': '453f455d0efb69586143cd6b6e5897f9906b53a7', + 'id': expected_snapshot_id, 'branches': expected_branches, } check_snapshot(expected_snapshot, storage=loader.storage) diff --git a/swh/loader/package/tests/test_npm.py b/swh/loader/package/tests/test_npm.py --- a/swh/loader/package/tests/test_npm.py +++ b/swh/loader/package/tests/test_npm.py @@ -389,6 +389,7 @@ actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' + assert actual_load_status['snapshot_id'] is not None expected_revision_id = hash_to_bytes( 'd8a1c7474d2956ac598a19f0f27d52f7015f117e') @@ -421,7 +422,11 @@ package_metadata_url(package)) actual_load_status = loader.load() - assert actual_load_status['status'] == 'eventful' + expected_snapshot_id = 'd0587e1195aed5a8800411a008f2f2d627f18e2d' + assert actual_load_status == { + 'status': 'eventful', + 'snapshot_id': expected_snapshot_id + } stats = get_stats(loader.storage) @@ -448,7 +453,7 @@ _expected_new_revisions_first_visit)) == [] expected_snapshot = { - 'id': 'd0587e1195aed5a8800411a008f2f2d627f18e2d', + 'id': expected_snapshot_id, 'branches': { 'HEAD': { 'target': 'releases/0.0.4', @@ -479,10 +484,11 @@ loader = NpmLoader(package, url, metadata_url) actual_load_status = loader.load() - assert actual_load_status['status'] == 'eventful' + assert actual_load_status['status'] is not None origin_visit = list(loader.storage.origin_visit_get(url))[-1] assert origin_visit['status'] == 'full' + assert origin_visit['type'] == 'npm' stats = get_stats(loader.storage) @@ -500,10 +506,14 @@ loader._info = None # reset loader internal state actual_load_status2 = loader.load() - assert actual_load_status2['status'] == 'eventful' + snap_id2 = actual_load_status2['snapshot_id'] + assert snap_id2 is not None + assert snap_id2 != actual_load_status['snapshot_id'] + origin_visit2 = list(loader.storage.origin_visit_get(url))[-1] assert origin_visit2['status'] == 'full' + assert origin_visit2['type'] == 'npm' stats = get_stats(loader.storage) diff --git a/swh/loader/package/tests/test_pypi.py b/swh/loader/package/tests/test_pypi.py --- a/swh/loader/package/tests/test_pypi.py +++ b/swh/loader/package/tests/test_pypi.py @@ -198,6 +198,7 @@ actual_load_status = loader.load() assert actual_load_status['status'] == 'uneventful' + assert actual_load_status['snapshot_id'] is not None stats = get_stats(loader.storage) assert { @@ -214,6 +215,7 @@ origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'partial' + assert origin_visit['type'] == 'pypi' # problem during loading: @@ -227,7 +229,7 @@ loader = PyPILoader(url) actual_load_status = loader.load() - assert actual_load_status['status'] == 'failed' + assert actual_load_status == {'status': 'failed'} stats = get_stats(loader.storage) @@ -245,6 +247,7 @@ origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'partial' + assert origin_visit['type'] == 'pypi' # problem during loading: failure early enough in between swh contents... @@ -272,6 +275,7 @@ actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' + assert actual_load_status['snapshot_id'] is not None expected_revision_id = hash_to_bytes( 'e445da4da22b31bfebb6ffc4383dbf839a074d21') @@ -305,7 +309,11 @@ loader = PyPILoader(url) actual_load_status = loader.load() - assert actual_load_status['status'] == 'eventful' + expected_snapshot_id = 'dd0e4201a232b1c104433741dbf45895b8ac9355' + assert actual_load_status == { + 'status': 'eventful', + 'snapshot_id': expected_snapshot_id + } stats = get_stats(loader.storage) @@ -355,13 +363,14 @@ } expected_snapshot = { - 'id': 'dd0e4201a232b1c104433741dbf45895b8ac9355', + 'id': expected_snapshot_id, 'branches': expected_branches, } check_snapshot(expected_snapshot, storage=loader.storage) origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'partial' + assert origin_visit['type'] == 'pypi' def test_visit_with_1_release_artifact(swh_config, requests_mock_datadir): @@ -372,7 +381,11 @@ loader = PyPILoader(url) actual_load_status = loader.load() - assert actual_load_status['status'] == 'eventful' + expected_snapshot_id = 'ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a' + assert actual_load_status == { + 'status': 'eventful', + 'snapshot_id': expected_snapshot_id + } stats = get_stats(loader.storage) assert { @@ -431,13 +444,14 @@ } expected_snapshot = { - 'id': 'ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a', + 'id': expected_snapshot_id, 'branches': expected_branches, } check_snapshot(expected_snapshot, loader.storage) origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'full' + assert origin_visit['type'] == 'pypi' def test_multiple_visits_with_no_change(swh_config, requests_mock_datadir): @@ -448,7 +462,11 @@ loader = PyPILoader(url) actual_load_status = loader.load() - assert actual_load_status['status'] == 'eventful' + snapshot_id = 'ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a' + assert actual_load_status == { + 'status': 'eventful', + 'snapshot_id': snapshot_id, + } stats = get_stats(loader.storage) @@ -479,7 +497,6 @@ }, } - snapshot_id = 'ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a' expected_snapshot = { 'id': snapshot_id, 'branches': expected_branches, @@ -488,9 +505,13 @@ origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'full' + assert origin_visit['type'] == 'pypi' actual_load_status2 = loader.load() - assert actual_load_status2['status'] == 'uneventful' + assert actual_load_status2 == { + 'status': 'uneventful', + 'snapshot_id': actual_load_status2['snapshot_id'] + } stats2 = get_stats(loader.storage) expected_stats2 = stats.copy() @@ -511,9 +532,15 @@ visit1_actual_load_status = loader.load() visit1_stats = get_stats(loader.storage) - assert visit1_actual_load_status['status'] == 'eventful' + expected_snapshot_id = 'ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a' + assert visit1_actual_load_status == { + 'status': 'eventful', + 'snapshot_id': expected_snapshot_id + } + origin_visit1 = next(loader.storage.origin_visit_get(url)) assert origin_visit1['status'] == 'full' + assert origin_visit1['type'] == 'pypi' assert { 'content': 6, @@ -534,9 +561,16 @@ visit2_stats = get_stats(loader.storage) assert visit2_actual_load_status['status'] == 'eventful' + expected_snapshot_id2 = '2e5149a7b0725d18231a37b342e9b7c4e121f283' + assert visit2_actual_load_status == { + 'status': 'eventful', + 'snapshot_id': expected_snapshot_id2 + } + visits = list(loader.storage.origin_visit_get(url)) assert len(visits) == 2 assert visits[1]['status'] == 'full' + assert visits[1]['type'] == 'pypi' assert { 'content': 6 + 1, # 1 more content @@ -603,7 +637,7 @@ }, } expected_snapshot = { - 'id': '2e5149a7b0725d18231a37b342e9b7c4e121f283', + 'id': expected_snapshot_id2, 'branches': expected_branches, } @@ -611,6 +645,7 @@ origin_visit = list(loader.storage.origin_visit_get(url))[-1] assert origin_visit['status'] == 'full' + assert origin_visit['type'] == 'pypi' urls = [ m.url for m in requests_mock_datadir_visits.request_history @@ -638,7 +673,11 @@ loader = PyPILoader(url) actual_load_status = loader.load() - assert actual_load_status['status'] == 'eventful' + expected_snapshot_id = 'a27e638a4dad6fbfa273c6ebec1c4bf320fb84c6' + assert actual_load_status == { + 'status': 'eventful', + 'snapshot_id': expected_snapshot_id, + } expected_branches = { 'releases/1.1.0/nexter-1.1.0.zip': { @@ -652,10 +691,11 @@ } expected_snapshot = { - 'id': 'a27e638a4dad6fbfa273c6ebec1c4bf320fb84c6', + 'id': expected_snapshot_id, 'branches': expected_branches, } check_snapshot(expected_snapshot, loader.storage) origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'full' + assert origin_visit['type'] == 'pypi'