diff --git a/swh/loader/package/archive/tests/test_archive.py b/swh/loader/package/archive/tests/test_archive.py --- a/swh/loader/package/archive/tests/test_archive.py +++ b/swh/loader/package/archive/tests/test_archive.py @@ -76,8 +76,8 @@ "3aebc29ed1fccc4a6f2f2010fb8e57882406b528", ] -_expected_new_revisions_first_visit = { - "44183488c0774ce3c957fa19ba695cf18a4a42b3": ( +_expected_new_releases_first_visit = { + "c9786c1e3b46f52779c727d3509d66ebf8948d88": ( "3aebc29ed1fccc4a6f2f2010fb8e57882406b528" ) } @@ -131,7 +131,7 @@ assert actual_load_status["status"] == "eventful" expected_snapshot_first_visit_id = hash_to_bytes( - "c419397fd912039825ebdbea378bc6283f006bf5" + "cdf8f335fa0c81c8ad089870ec14f52b1980eb6c" ) assert ( @@ -147,8 +147,8 @@ "directory": len(_expected_new_directories_first_visit), "origin": 1, "origin_visit": 1, - "release": 0, - "revision": len(_expected_new_revisions_first_visit), + "release": len(_expected_new_releases_first_visit), + "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats @@ -160,8 +160,8 @@ target_type=TargetType.ALIAS, target=b"releases/0.1.0", ), b"releases/0.1.0": SnapshotBranch( - target_type=TargetType.REVISION, - target=hash_to_bytes(list(_expected_new_revisions_first_visit)[0]), + target_type=TargetType.RELEASE, + target=hash_to_bytes(list(_expected_new_releases_first_visit)[0]), ), }, ) @@ -174,8 +174,8 @@ expected_dirs = map(hash_to_bytes, _expected_new_directories_first_visit) assert list(swh_storage.directory_missing(expected_dirs)) == [] - expected_revs = map(hash_to_bytes, _expected_new_revisions_first_visit) - assert list(swh_storage.revision_missing(expected_revs)) == [] + expected_rels = map(hash_to_bytes, _expected_new_releases_first_visit) + assert list(swh_storage.release_missing(expected_rels)) == [] def test_archive_2_visits_without_change(swh_storage, requests_mock_datadir): @@ -226,8 +226,8 @@ "directory": len(_expected_new_directories_first_visit), "origin": 1, "origin_visit": 1, - "release": 0, - "revision": len(_expected_new_revisions_first_visit), + "release": len(_expected_new_releases_first_visit), + "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats @@ -255,8 +255,8 @@ "directory": len(_expected_new_directories_first_visit) + 8, "origin": 1, "origin_visit": 1 + 1, - "release": 0, - "revision": len(_expected_new_revisions_first_visit) + 1, + "release": len(_expected_new_releases_first_visit) + 1, + "revision": 0, "skipped_content": 0, "snapshot": 1 + 1, } == stats2 diff --git a/swh/loader/package/cran/tests/test_cran.py b/swh/loader/package/cran/tests/test_cran.py --- a/swh/loader/package/cran/tests/test_cran.py +++ b/swh/loader/package/cran/tests/test_cran.py @@ -23,14 +23,14 @@ from swh.model.model import Snapshot, SnapshotBranch, TargetType, TimestampWithTimezone SNAPSHOT = Snapshot( - id=hash_to_bytes("920adcccc78aaeedd3cfa4459dd900d8c3431a21"), + id=hash_to_bytes("56ed00938d83892bd5b42f2f368ae38a1dbfa718"), branches={ b"HEAD": SnapshotBranch( target=b"releases/2.22-6", target_type=TargetType.ALIAS ), b"releases/2.22-6": SnapshotBranch( - target=hash_to_bytes("42bdb16facd5140424359c8ce89a28ecfa1ce603"), - target_type=TargetType.REVISION, + target=hash_to_bytes("42993a72eac50a4a83523c9327a52be3593755a8"), + target_type=TargetType.RELEASE, ), }, ) @@ -194,8 +194,8 @@ "directory": 7, "origin": 1, "origin_visit": 1, - "release": 0, - "revision": 1, + "release": 1, + "revision": 0, "skipped_content": 0, "snapshot": 1, } == visit_stats @@ -241,8 +241,8 @@ "directory": 7, "origin": 1, "origin_visit": 1, - "release": 0, - "revision": 1, + "release": 1, + "revision": 0, "skipped_content": 0, "snapshot": 1, } == visit_stats @@ -358,8 +358,8 @@ "directory": 7, "origin": 1, "origin_visit": 1, - "release": 0, - "revision": 1, + "release": 1, + "revision": 0, "skipped_content": 0, "snapshot": 1, } == visit_stats diff --git a/swh/loader/package/debian/tests/test_debian.py b/swh/loader/package/debian/tests/test_debian.py --- a/swh/loader/package/debian/tests/test_debian.py +++ b/swh/loader/package/debian/tests/test_debian.py @@ -110,7 +110,7 @@ ) actual_load_status = loader.load() - expected_snapshot_id = "3b6b66e6ee4e7d903a379a882684a2a50480c0b4" + expected_snapshot_id = "8bc5d12e2443ab216fdd2f969b25b39e96c20fef" assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id, @@ -128,8 +128,8 @@ id=hash_to_bytes(expected_snapshot_id), branches={ b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( - target_type=TargetType.REVISION, - target=hash_to_bytes("2807f5b3f84368b4889a9ae827fe85854ffecf07"), + target_type=TargetType.RELEASE, + target=hash_to_bytes("5a99736512d381700c5f54d7fdd6b46e136535a2"), ) }, ) # different than the previous loader as no release is done @@ -142,8 +142,8 @@ "directory": 2, "origin": 1, "origin_visit": 1, - "release": 0, - "revision": 1, # all artifacts under 1 revision + "release": 1, # all artifacts under 1 release + "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats @@ -162,7 +162,7 @@ actual_load_status = loader.load() - expected_snapshot_id = "3b6b66e6ee4e7d903a379a882684a2a50480c0b4" + expected_snapshot_id = "8bc5d12e2443ab216fdd2f969b25b39e96c20fef" assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id, @@ -180,8 +180,8 @@ id=hash_to_bytes(expected_snapshot_id), branches={ b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( - target_type=TargetType.REVISION, - target=hash_to_bytes("2807f5b3f84368b4889a9ae827fe85854ffecf07"), + target_type=TargetType.RELEASE, + target=hash_to_bytes("5a99736512d381700c5f54d7fdd6b46e136535a2"), ) }, ) # different than the previous loader as no release is done @@ -194,8 +194,8 @@ "directory": 2, "origin": 1, "origin_visit": 1, - "release": 0, - "revision": 1, # all artifacts under 1 revision + "release": 1, # all artifacts under 1 release + "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats @@ -217,8 +217,8 @@ "directory": 2 + 0, "origin": 1, "origin_visit": 1 + 1, # a new visit occurred - "release": 0, - "revision": 1, + "release": 1, + "revision": 0, "skipped_content": 0, "snapshot": 1, # same snapshot across 2 visits } == stats2 @@ -418,7 +418,7 @@ ) actual_load_status = loader.load() - expected_snapshot_id = "defc19021187f3727293121fcf6c5c82cb923604" + expected_snapshot_id = "3d26243c91eb084c350627a5a102cfe039c5b92a" assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id, @@ -436,12 +436,12 @@ id=hash_to_bytes(expected_snapshot_id), branches={ b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( - target_type=TargetType.REVISION, - target=hash_to_bytes("2807f5b3f84368b4889a9ae827fe85854ffecf07"), + target_type=TargetType.RELEASE, + target=hash_to_bytes("5a99736512d381700c5f54d7fdd6b46e136535a2"), ), b"releases/buster/contrib/0.7.2-4": SnapshotBranch( - target_type=TargetType.REVISION, - target=hash_to_bytes("8224139c274c984147ef4b09aa0e462c55a10bd3"), + target_type=TargetType.RELEASE, + target=hash_to_bytes("192fc7ccce80f64a0d3cf33d379133af067ec721"), ), }, ) diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py --- a/swh/loader/package/deposit/loader.py +++ b/swh/loader/package/deposit/loader.py @@ -65,7 +65,7 @@ # Note: # `date` and `committer_date` are always transmitted by the deposit read api # which computes itself the values. The loader needs to use those to create the - # revision. + # release. all_metadata_raw: List[str] = metadata["metadata_raw"] raw_info = { @@ -270,19 +270,19 @@ logger.debug("branches: %s", branches) if not branches: return r - rev_id = branches[b"HEAD"].target + rel_id = branches[b"HEAD"].target - revision = self.storage.revision_get([rev_id])[0] - if not revision: + release = self.storage.release_get([rel_id])[0] + if not release: return r # update the deposit's status to success with its - # revision-id and directory-id + # release-id and directory-id self.client.status_update( self.deposit_id, status="done", - revision_id=hash_to_hex(rev_id), - directory_id=hash_to_hex(revision.directory), + release_id=hash_to_hex(rel_id), + directory_id=hash_to_hex(release.target), snapshot_id=r["snapshot_id"], origin_url=self.url, ) @@ -358,7 +358,7 @@ deposit_id: Union[int, str], status: str, errors: Optional[List[str]] = None, - revision_id: Optional[str] = None, + release_id: Optional[str] = None, directory_id: Optional[str] = None, snapshot_id: Optional[str] = None, origin_url: Optional[str] = None, @@ -369,8 +369,8 @@ """ url = f"{self.base_url}/{deposit_id}/update/" payload: Dict[str, Any] = {"status": status} - if revision_id: - payload["revision_id"] = revision_id + if release_id: + payload["release_id"] = release_id if directory_id: payload["directory_id"] = directory_id if snapshot_id: diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py --- a/swh/loader/package/deposit/tests/test_deposit.py +++ b/swh/loader/package/deposit/tests/test_deposit.py @@ -15,20 +15,18 @@ from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats from swh.model.hashutil import hash_to_bytes, hash_to_hex from swh.model.model import ( - MetadataAuthority, - MetadataAuthorityType, - MetadataFetcher, Origin, Person, RawExtrinsicMetadata, - Revision, - RevisionType, + Release, Snapshot, SnapshotBranch, TargetType, Timestamp, TimestampWithTimezone, ) +from swh.model.model import MetadataAuthority, MetadataAuthorityType, MetadataFetcher +from swh.model.model import ObjectType as ModelObjectType from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType DEPOSIT_URL = "https://deposit.softwareheritage.org/1/private" @@ -171,7 +169,7 @@ ) actual_load_status = loader.load() - expected_snapshot_id = "b2b327b33dc85818bd23c3ccda8b7e675a66ecbd" + expected_snapshot_id = "1090aaadc9fd1a77798bf6187d309145cbd23c53" assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id, @@ -185,20 +183,18 @@ snapshot=hash_to_bytes(expected_snapshot_id), ) - revision_id_hex = "637318680351f5d78856d13264faebbd91efe9bb" - revision_id = hash_to_bytes(revision_id_hex) + release_id_hex = "77c127bff4f9137baf26774fe19e29d82a41f69d" + release_id = hash_to_bytes(release_id_hex) expected_snapshot = Snapshot( id=hash_to_bytes(expected_snapshot_id), branches={ - b"HEAD": SnapshotBranch( - target=revision_id, target_type=TargetType.REVISION, - ), + b"HEAD": SnapshotBranch(target=release_id, target_type=TargetType.RELEASE,), }, ) check_snapshot(expected_snapshot, storage=loader.storage) - revision = loader.storage.revision_get([revision_id])[0] + release = loader.storage.release_get([release_id])[0] date = TimestampWithTimezone( timestamp=Timestamp(seconds=1507389428, microseconds=0), offset=0, @@ -209,19 +205,16 @@ name=b"Software Heritage", email=b"robot@softwareheritage.org", ) - assert revision == Revision( - id=revision_id, + assert release == Release( + id=release_id, + name=b"HEAD", message=b"hal: Deposit 666 in collection hal", author=person, - committer=person, date=date, - committer_date=date, - type=RevisionType.TAR, - directory=b"\xfd-\xf1-\xc5SL\x1d\xa1\xe9\x18\x0b\x91Q\x02\xfbo`\x1d\x19", + target_type=ModelObjectType.DIRECTORY, + target=b"\xfd-\xf1-\xc5SL\x1d\xa1\xe9\x18\x0b\x91Q\x02\xfbo`\x1d\x19", synthetic=True, metadata=None, - parents=(), - extra_headers=(), ) # check metadata @@ -247,8 +240,9 @@ assert orig_meta0.fetcher == fetcher # Check directory metadata + assert release.target_type == ModelObjectType.DIRECTORY directory_swhid = CoreSWHID( - object_type=ObjectType.DIRECTORY, object_id=revision.directory + object_type=ObjectType.DIRECTORY, object_id=release.target ) actual_dir_meta = loader.storage.raw_extrinsic_metadata_get( directory_swhid, authority @@ -273,8 +267,8 @@ body = update_query.json() expected_body = { "status": "done", - "revision_id": revision_id_hex, - "directory_id": hash_to_hex(revision.directory), + "release_id": release_id_hex, + "directory_id": hash_to_hex(release.target), "snapshot_id": expected_snapshot_id, "origin_url": url, } @@ -287,8 +281,8 @@ "directory": 12, "origin": 1, "origin_visit": 1, - "release": 0, - "revision": 1, + "release": 1, + "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats @@ -306,7 +300,7 @@ ) actual_load_status = loader.load() - expected_snapshot_id = "3e68440fdd7c81d283f8f3aebb6f0c8657864192" + expected_snapshot_id = "f87b25c121d9ab3ff0219b04b92d83f8c6f368f4" assert actual_load_status == { "status": "eventful", @@ -320,12 +314,12 @@ snapshot=hash_to_bytes(expected_snapshot_id), ) - revision_id = "564d18943d71be80d0d73b43a77cfb205bcde96c" + release_id = "c6891941d4033f4fb1dbf39b501c819ac618f957" expected_snapshot = Snapshot( id=hash_to_bytes(expected_snapshot_id), branches={ b"HEAD": SnapshotBranch( - target=hash_to_bytes(revision_id), target_type=TargetType.REVISION + target=hash_to_bytes(release_id), target_type=TargetType.RELEASE ) }, ) @@ -333,14 +327,13 @@ check_snapshot(expected_snapshot, storage=loader.storage) raw_meta = loader.client.metadata_get(deposit_id) - # Ensure the date fields are set appropriately in the revision + # Ensure the date fields are set appropriately in the release - # Retrieve the revision - revision = loader.storage.revision_get([hash_to_bytes(revision_id)])[0] - assert revision - assert revision.date.to_dict() == raw_meta["deposit"]["author_date"] - assert revision.committer_date.to_dict() == raw_meta["deposit"]["committer_date"] - assert not revision.metadata + # Retrieve the release + release = loader.storage.release_get([hash_to_bytes(release_id)])[0] + assert release + assert release.date.to_dict() == raw_meta["deposit"]["author_date"] + assert not release.metadata provider = { "provider_name": "hal", @@ -404,9 +397,10 @@ assert sorted(origin_extrinsic_metadata.results) == sorted(expected_metadata) - # Check the revision metadata swh side + # Check the release metadata swh side + assert release.target_type == ModelObjectType.DIRECTORY directory_swhid = ExtendedSWHID( - object_type=ExtendedObjectType.DIRECTORY, object_id=revision.directory + object_type=ExtendedObjectType.DIRECTORY, object_id=release.target ) actual_directory_metadata = loader.storage.raw_extrinsic_metadata_get( directory_swhid, authority @@ -415,8 +409,8 @@ assert actual_directory_metadata.next_page_token is None assert len(actual_directory_metadata.results) == len(all_metadata_raw) - revision_swhid = CoreSWHID( - object_type=ObjectType.REVISION, object_id=hash_to_bytes(revision_id) + release_swhid = CoreSWHID( + object_type=ObjectType.RELEASE, object_id=hash_to_bytes(release_id) ) dir_metadata_template = RawExtrinsicMetadata( target=directory_swhid, @@ -424,7 +418,7 @@ authority=authority, fetcher=fetcher, origin=url, - revision=revision_swhid, + release=release_swhid, # to satisfy the constructor discovery_date=now(), metadata=b"", @@ -464,8 +458,8 @@ body = update_query.json() expected_body = { "status": "done", - "revision_id": revision_id, - "directory_id": hash_to_hex(revision.directory), + "release_id": release_id, + "directory_id": hash_to_hex(release.target), "snapshot_id": expected_snapshot_id, "origin_url": url, } @@ -485,7 +479,7 @@ loader = DepositLoader(swh_storage, url, deposit_id, deposit_client) actual_load_status = loader.load() - expected_snapshot_id = "0ac7b54c042a026389f2087dc16f1d5c644ed0e4" + expected_snapshot_id = "212228fe041c763471c14545cf11dbec8003d6b4" assert actual_load_status == { "status": "eventful", diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py --- a/swh/loader/package/loader.py +++ b/swh/loader/package/loader.py @@ -26,6 +26,7 @@ Tuple, TypeVar, ) +import warnings import attr from requests.exceptions import ContentDecodingError @@ -42,14 +43,18 @@ MetadataAuthority, MetadataAuthorityType, MetadataFetcher, +) +from swh.model.model import ( Origin, OriginVisit, OriginVisitStatus, RawExtrinsicMetadata, + Release, Revision, Sha1Git, Snapshot, ) +from swh.model.model import ObjectType as ModelObjectType from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType from swh.storage.algos.snapshot import snapshot_get_latest from swh.storage.interface import StorageInterface @@ -118,7 +123,7 @@ """:term:`extrinsic metadata` collected by the loader, that will be attached to the loaded directory and added to the Metadata storage.""" - # TODO: add support for metadata for revisions and contents + # TODO: add support for metadata for releases and contents def extid(self) -> Optional[PartialExtID]: """Returns a unique intrinsic identifier of this package info, @@ -192,15 +197,39 @@ """Build the revision from the archive metadata (extrinsic artifact metadata) and the intrinsic metadata. + This method is deprecated, :meth:`build_release` should be implemented instead. + Args: p_info: Package information uncompressed_path: Artifact uncompressed path on disk + """ + raise NotImplementedError("build_revision") - Returns: - Revision object + def build_release( + self, + version: str, + p_info: TPackageInfo, + uncompressed_path: str, + directory: Sha1Git, + ) -> Optional[Release]: + """Build the release from the archive metadata (extrinsic + artifact metadata) and the intrinsic metadata. + Args: + p_info: Package information + uncompressed_path: Artifact uncompressed path on disk """ - raise NotImplementedError("build_revision") + warnings.warn( + f"{self.get_loader_name()} is missing a build_release() method. " + f"Falling back to `build_revision` + automatic conversion to release.", + DeprecationWarning, + ) + + rev = self.build_revision(p_info, uncompressed_path, directory) + if rev is None: + return None + else: + return rev2rel(rev, version) def get_default_version(self) -> str: """Retrieve the latest release version if any. @@ -245,33 +274,34 @@ return known_extids - def resolve_revision_from_extids( + def resolve_object_from_extids( self, known_extids: Dict[PartialExtID, List[CoreSWHID]], p_info: TPackageInfo, - revision_whitelist: Set[Sha1Git], - ) -> Optional[Sha1Git]: - """Resolve the revision from known ExtIDs and a package info object. + whitelist: Set[Sha1Git], + ) -> Optional[CoreSWHID]: + """Resolve the revision/release from known ExtIDs and a package info object. If the artifact has already been downloaded, this will return the - existing revision targeting that uncompressed artifact directory. + existing release (or revision) targeting that uncompressed artifact directory. Otherwise, this returns None. Args: known_extids: Dict built from a list of ExtID, with the target as value p_info: Package information - revision_whitelist: Any ExtID with target not in this set is filtered out + whitelist: Any ExtID with target not in this set is filtered out Returns: - None or revision identifier + None or release/revision SWHID """ new_extid = p_info.extid() if new_extid is None: return None + extid_targets = [] for extid_target in known_extids.get(new_extid, []): - if extid_target.object_id not in revision_whitelist: + if extid_target.object_id not in whitelist: # There is a known ExtID for this package, but its target is not # in the snapshot. # This can happen for three reasons: @@ -290,22 +320,43 @@ # # In case of 1, we must actually load the package now, # so let's do it. - # TODO: detect when we are in case 3 using revision_missing instead - # of the snapshot. + # TODO: detect when we are in case 3 using release_missing + # or revision_missing instead of the snapshot. continue - elif extid_target.object_type != ObjectType.REVISION: - # We only support revisions for now. + elif extid_target.object_type in (ObjectType.RELEASE, ObjectType.REVISION): + extid_targets.append(extid_target) + else: # Note that this case should never be reached unless there is a # collision between a revision hash and some non-revision object's # hash, but better safe than sorry. logger.warning( - "%s is in the revision whitelist, but is not a revision.", + "%s is in the whitelist, but is not a revision/release.", hash_to_hex(extid_target.object_type), ) - continue - return extid_target.object_id - return None + if extid_targets: + # This is a known package version, as we have an extid to reference it. + # Let's return one of them. + + # If there is a release extid, return it. + release_extid_targets = [ + extid_target + for extid_target in extid_targets + if extid_target.object_type == ObjectType.RELEASE + ] + if release_extid_targets: + assert len(release_extid_targets) == 1, release_extid_targets + return release_extid_targets[0] + + # If there is no release extid (ie. if the package was only loaded with + # older versions of this loader, which produced revision objects instead + # of releases), return a revision extid. + assert len(extid_targets) == 1, extid_targets + assert extid_targets[0].object_type == ObjectType.REVISION, extid_targets + return extid_targets[0] + else: + # No target found (this is probably a new package version) + return None def download_package( self, p_info: TPackageInfo, tmpdir: str @@ -542,7 +593,7 @@ } new_extids: Set[ExtID] = set() - tmp_revisions: Dict[str, List[Tuple[str, Sha1Git]]] = { + tmp_releases: Dict[str, List[Tuple[str, Sha1Git]]] = { version: [] for version in versions } errors = [] @@ -550,20 +601,39 @@ logger.debug("package_info: %s", p_info) # Check if the package was already loaded, using its ExtID - revision_id = self.resolve_revision_from_extids( + swhid = self.resolve_object_from_extids( known_extids, p_info, last_snapshot_targets ) - if revision_id is None: - # No matching revision found in the last snapshot, load it. + if swhid is not None and swhid.object_type == ObjectType.REVISION: + # This package was already loaded, but by an older version + # of this loader, which produced revisions instead of releases. + # Let's fetch the revision's data, and "upgrade" it into a release. + (rev,) = self.storage.revision_get([swhid.object_id]) + if not rev: + logger.error( + "Failed to upgrade branch %s from revision to " + "release, %s is missing from the storage. " + "Falling back to re-loading from the origin.", + branch_name, + swhid, + ) + else: + rev = None + + if swhid is None or (swhid.object_type == ObjectType.REVISION and not rev): + # No matching revision or release found in the last snapshot, load it. + + release_id = None + try: - res = self._load_revision(p_info, origin) + res = self._load_release(version, p_info, origin) if res: - (revision_id, directory_id) = res - assert revision_id + (release_id, directory_id) = res + assert release_id assert directory_id self._load_extrinsic_directory_metadata( - p_info, revision_id, directory_id + p_info, release_id, directory_id ) self.storage.flush() status_load = "eventful" @@ -577,26 +647,49 @@ errors.append(f"{error}: {e}") continue - if revision_id is None: + if release_id is None: continue + add_extid = True + elif swhid.object_type == ObjectType.REVISION: + # If 'rev' was None, the previous block would have run. + assert rev is not None + rel = rev2rel(rev, version) + self.storage.release_add([rel]) + logger.debug("Upgraded %s to %s", swhid, rel.swhid()) + release_id = rel.id + + # Create a new extid for this package, so the next run of this loader + # will be able to find the new release, and use it (instead of the + # old revision) + add_extid = True + elif swhid.object_type == ObjectType.RELEASE: + # This package was already loaded, nothing to do. + release_id = swhid.object_id + add_extid = False + else: + assert False, f"Unexpected object type: {swhid}" + + assert release_id is not None + + if add_extid: partial_extid = p_info.extid() if partial_extid is not None: (extid_type, extid) = partial_extid - revision_swhid = CoreSWHID( - object_type=ObjectType.REVISION, object_id=revision_id + release_swhid = CoreSWHID( + object_type=ObjectType.RELEASE, object_id=release_id ) new_extids.add( - ExtID(extid_type=extid_type, extid=extid, target=revision_swhid) + ExtID(extid_type=extid_type, extid=extid, target=release_swhid) ) - tmp_revisions[version].append((branch_name, revision_id)) + tmp_releases[version].append((branch_name, release_id)) if load_exceptions: status_visit = "partial" - if not tmp_revisions: - # We could not load any revisions; fail completely + if not tmp_releases: + # We could not load any releases; fail completely return self.finalize_visit( snapshot=snapshot, visit=visit, @@ -615,7 +708,7 @@ logger.debug("extra branches: %s", extra_branches) snapshot = self._load_snapshot( - default_version, tmp_revisions, extra_branches + default_version, tmp_releases, extra_branches ) self.storage.flush() except Exception as e: @@ -683,15 +776,15 @@ return (uncompressed_path, directory) - def _load_revision( - self, p_info: TPackageInfo, origin + def _load_release( + self, version: str, p_info: TPackageInfo, origin ) -> Optional[Tuple[Sha1Git, Sha1Git]]: - """Does all the loading of a revision itself: + """Does all the loading of a release itself: * downloads a package and uncompresses it * loads it from disk - * adds contents, directories, and revision to self.storage - * returns (revision_id, directory_id) + * adds contents, directories, and release to self.storage + * returns (release_id, directory_id) Raises exception when unable to download or uncompress artifacts @@ -703,54 +796,57 @@ (uncompressed_path, directory) = self._load_directory(dl_artifacts, tmpdir) # FIXME: This should be release. cf. D409 - revision = self.build_revision( - p_info, uncompressed_path, directory=directory.hash + release = self.build_release( + version, p_info, uncompressed_path, directory=directory.hash ) - if not revision: + if not release: # Some artifacts are missing intrinsic metadata # skipping those return None metadata = [metadata for (filepath, metadata) in dl_artifacts] + assert release.target is not None, release + assert release.target_type == ModelObjectType.DIRECTORY, release + metadata_target = ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=release.target + ) original_artifact_metadata = RawExtrinsicMetadata( - target=ExtendedSWHID( - object_type=ExtendedObjectType.DIRECTORY, object_id=revision.directory - ), + target=metadata_target, discovery_date=self.visit_date, authority=SWH_METADATA_AUTHORITY, fetcher=self.get_metadata_fetcher(), format="original-artifacts-json", metadata=json.dumps(metadata).encode(), origin=self.url, - revision=CoreSWHID(object_type=ObjectType.REVISION, object_id=revision.id), + release=release.swhid(), ) self._load_metadata_objects([original_artifact_metadata]) - logger.debug("Revision: %s", revision) + logger.debug("Release: %s", release) - self.storage.revision_add([revision]) + self.storage.release_add([release]) assert directory.hash - return (revision.id, directory.hash) + return (release.id, directory.hash) def _load_snapshot( self, default_version: str, - revisions: Dict[str, List[Tuple[str, bytes]]], + releases: Dict[str, List[Tuple[str, bytes]]], extra_branches: Dict[bytes, Mapping[str, Any]], ) -> Optional[Snapshot]: - """Build snapshot out of the current revisions stored and extra branches. + """Build snapshot out of the current releases stored and extra branches. Then load it in the storage. """ - logger.debug("revisions: %s", revisions) + logger.debug("releases: %s", releases) # Build and load the snapshot branches = {} # type: Dict[bytes, Mapping[str, Any]] - for version, branch_name_revisions in revisions.items(): - if version == default_version and len(branch_name_revisions) == 1: + for version, branch_name_releases in releases.items(): + if version == default_version and len(branch_name_releases) == 1: # only 1 branch (no ambiguity), we can create an alias # branch 'HEAD' - branch_name, _ = branch_name_revisions[0] + branch_name, _ = branch_name_releases[0] # except for some corner case (deposit) if branch_name != "HEAD": branches[b"HEAD"] = { @@ -758,9 +854,9 @@ "target": branch_name.encode("utf-8"), } - for branch_name, target in branch_name_revisions: + for branch_name, target in branch_name_releases: branches[branch_name.encode("utf-8")] = { - "target_type": "revision", + "target_type": "release", "target": target, } @@ -886,7 +982,7 @@ return metadata_objects def build_extrinsic_directory_metadata( - self, p_info: TPackageInfo, revision_id: Sha1Git, directory_id: Sha1Git, + self, p_info: TPackageInfo, release_id: Sha1Git, directory_id: Sha1Git, ) -> List[RawExtrinsicMetadata]: if not p_info.directory_extrinsic_metadata: # If this package loader doesn't write metadata, no need to require @@ -910,8 +1006,8 @@ format=item.format, metadata=item.metadata, origin=self.url, - revision=CoreSWHID( - object_type=ObjectType.REVISION, object_id=revision_id + release=CoreSWHID( + object_type=ObjectType.RELEASE, object_id=release_id ), ) ) @@ -919,10 +1015,10 @@ return metadata_objects def _load_extrinsic_directory_metadata( - self, p_info: TPackageInfo, revision_id: Sha1Git, directory_id: Sha1Git, + self, p_info: TPackageInfo, release_id: Sha1Git, directory_id: Sha1Git, ) -> None: metadata_objects = self.build_extrinsic_directory_metadata( - p_info, revision_id, directory_id + p_info, release_id, directory_id ) self._load_metadata_objects(metadata_objects) @@ -963,3 +1059,16 @@ sentry_sdk.capture_exception(e) # No big deal, it just means the next visit will load the same versions # again. + + +def rev2rel(rev: Revision, version: str) -> Release: + """Converts a revision to a release.""" + return Release( + name=version.encode(), + message=rev.message, + target=rev.directory, + target_type=ModelObjectType.DIRECTORY, + synthetic=rev.synthetic, + author=rev.author, + date=rev.date, + ) diff --git a/swh/loader/package/nixguix/tests/test_nixguix.py b/swh/loader/package/nixguix/tests/test_nixguix.py --- a/swh/loader/package/nixguix/tests/test_nixguix.py +++ b/swh/loader/package/nixguix/tests/test_nixguix.py @@ -54,15 +54,15 @@ SNAPSHOT1 = Snapshot( - id=hash_to_bytes("0c5881c74283793ebe9a09a105a9381e41380383"), + id=hash_to_bytes("771d13ae4e799755c22d1e05da8fc39cf215de58"), branches={ b"evaluation": SnapshotBranch( target=hash_to_bytes("cc4e04c26672dd74e5fd0fecb78b435fb55368f7"), target_type=TargetType.REVISION, ), b"https://github.com/owner-1/repository-1/revision-1.tgz": SnapshotBranch( - target=hash_to_bytes("488ad4e7b8e2511258725063cf43a2b897c503b4"), - target_type=TargetType.REVISION, + target=hash_to_bytes("24853190589d26d0ea2b6c0330b553ff39176e0c"), + target_type=TargetType.RELEASE, ), }, ) @@ -282,8 +282,8 @@ "directory": 3, "origin": 1, "origin_visit": 1, - "release": 0, - "revision": 1, + "release": 1, + "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats @@ -405,15 +405,15 @@ "directory": 3, "origin": 1, "origin_visit": 1, - "release": 0, - "revision": 1, + "release": 1, + "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats loader = NixGuixLoader(swh_storage, sources_url) load_status = loader.load() - expected_snapshot_id_hex = "b0bfa75cbd0cc90aac3b9e95fb0f59c731176d97" + expected_snapshot_id_hex = "c5bba84fd5ac3342566effb86190619092d34e79" expected_snapshot_id = hash_to_bytes(expected_snapshot_id_hex) assert load_status == { "status": "eventful", @@ -439,12 +439,12 @@ target_type=TargetType.REVISION, ), b"https://github.com/owner-1/repository-1/revision-1.tgz": SnapshotBranch( - target=hash_to_bytes("488ad4e7b8e2511258725063cf43a2b897c503b4"), - target_type=TargetType.REVISION, + target=hash_to_bytes("24853190589d26d0ea2b6c0330b553ff39176e0c"), + target_type=TargetType.RELEASE, ), b"https://github.com/owner-2/repository-1/revision-1.tgz": SnapshotBranch( - target=hash_to_bytes("85e0bad74e33e390aaeb74f139853ae3863ee544"), - target_type=TargetType.REVISION, + target=hash_to_bytes("3d44fbe814ba802cfd77f83975e45766d3a2ba85"), + target_type=TargetType.RELEASE, ), }, ) @@ -456,8 +456,8 @@ "directory": 5, "origin": 1, "origin_visit": 2, - "release": 0, - "revision": 2, + "release": 2, + "revision": 0, "skipped_content": 0, "snapshot": 2, } == stats @@ -573,7 +573,7 @@ ] archive_loader = ArchiveLoader(swh_storage, url=gnu_url, artifacts=gnu_artifacts) actual_load_status = archive_loader.load() - expected_snapshot_id = "c419397fd912039825ebdbea378bc6283f006bf5" + expected_snapshot_id = "cdf8f335fa0c81c8ad089870ec14f52b1980eb6c" assert actual_load_status["status"] == "eventful" assert actual_load_status["snapshot_id"] == expected_snapshot_id # noqa diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py --- a/swh/loader/package/npm/tests/test_npm.py +++ b/swh/loader/package/npm/tests/test_npm.py @@ -17,15 +17,14 @@ from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats from swh.model.hashutil import hash_to_bytes from swh.model.model import ( - MetadataAuthority, - MetadataAuthorityType, - MetadataFetcher, Person, RawExtrinsicMetadata, Snapshot, SnapshotBranch, TargetType, ) +from swh.model.model import MetadataAuthority, MetadataAuthorityType, MetadataFetcher +from swh.model.model import ObjectType as ModelObjectType from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType from swh.storage.interface import PagedResult @@ -279,15 +278,15 @@ ] ) -_expected_new_revisions_first_visit = normalize_hashes( +_expected_new_releases_first_visit = normalize_hashes( { - "d8a1c7474d2956ac598a19f0f27d52f7015f117e": ( + "d25e722a32c145b3eb88b416049dd35d27759a87": ( "42753c0c2ab00c4501b552ac4671c68f3cf5aece" ), - "5f9eb78af37ffd12949f235e86fac04898f9f72a": ( + "3522e846b97c0b8434c565fe891c0f082a357e5d": ( "3370d20d6f96dc1c9e50f083e2134881db110f4f" ), - "ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a": ( + "54f6c1711c6aedb6de3cf2d6347b9f772e343784": ( "d7895533ef5edbcffdea3f057d9fef3a1ef845ce" ), } @@ -308,7 +307,7 @@ loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("d0587e1195aed5a8800411a008f2f2d627f18e2d") + expected_snapshot_id = hash_to_bytes("ddaad89b0b4edb7eefe7c92e9b1166caa776ebbc") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -319,9 +318,9 @@ ) versions = [ - ("0.0.2", "d8a1c7474d2956ac598a19f0f27d52f7015f117e"), - ("0.0.3", "5f9eb78af37ffd12949f235e86fac04898f9f72a"), - ("0.0.4", "ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a"), + ("0.0.2", "d25e722a32c145b3eb88b416049dd35d27759a87"), + ("0.0.3", "3522e846b97c0b8434c565fe891c0f082a357e5d"), + ("0.0.4", "54f6c1711c6aedb6de3cf2d6347b9f772e343784"), ] expected_snapshot = Snapshot( @@ -333,7 +332,7 @@ **{ b"releases/" + version_name.encode(): SnapshotBranch( - target=hash_to_bytes(version_id), target_type=TargetType.REVISION, + target=hash_to_bytes(version_id), target_type=TargetType.RELEASE, ) for (version_name, version_id) in versions }, @@ -349,20 +348,21 @@ list(swh_storage.directory_missing(_expected_new_directories_first_visit)) == [] ) - assert list(swh_storage.revision_missing(_expected_new_revisions_first_visit)) == [] + assert list(swh_storage.release_missing(_expected_new_releases_first_visit)) == [] metadata_authority = MetadataAuthority( type=MetadataAuthorityType.FORGE, url="https://npmjs.com/", ) - for (version_name, revision_id) in versions: - revision = swh_storage.revision_get([hash_to_bytes(revision_id)])[0] - directory_id = revision.directory + for (version_name, release_id) in versions: + release = swh_storage.release_get([hash_to_bytes(release_id)])[0] + assert release.target_type == ModelObjectType.DIRECTORY + directory_id = release.target directory_swhid = ExtendedSWHID( object_type=ExtendedObjectType.DIRECTORY, object_id=directory_id, ) - revision_swhid = CoreSWHID( - object_type=ObjectType.REVISION, object_id=hash_to_bytes(revision_id), + release_swhid = CoreSWHID( + object_type=ObjectType.RELEASE, object_id=hash_to_bytes(release_id), ) expected_metadata = [ RawExtrinsicMetadata( @@ -377,7 +377,7 @@ json.loads(org_api_info)["versions"][version_name] ).encode(), origin="https://www.npmjs.com/package/org", - revision=revision_swhid, + release=release_swhid, ) ] assert swh_storage.raw_extrinsic_metadata_get( @@ -391,8 +391,8 @@ "directory": len(_expected_new_directories_first_visit), "origin": 1, "origin_visit": 1, - "release": 0, - "revision": len(_expected_new_revisions_first_visit), + "release": len(_expected_new_releases_first_visit), + "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats @@ -403,7 +403,7 @@ url = package_url(package) loader = NpmLoader(swh_storage, url) - expected_snapshot_id = hash_to_bytes("d0587e1195aed5a8800411a008f2f2d627f18e2d") + expected_snapshot_id = hash_to_bytes("ddaad89b0b4edb7eefe7c92e9b1166caa776ebbc") actual_load_status = loader.load() assert actual_load_status == { "status": "eventful", @@ -420,8 +420,8 @@ "directory": len(_expected_new_directories_first_visit), "origin": 1, "origin_visit": 1, - "release": 0, - "revision": len(_expected_new_revisions_first_visit), + "release": len(_expected_new_releases_first_visit), + "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats @@ -445,8 +445,8 @@ "directory": len(_expected_new_directories_first_visit) + 15, "origin": 1, "origin_visit": 2, - "release": 0, - "revision": len(_expected_new_revisions_first_visit) + 3, + "release": len(_expected_new_releases_first_visit) + 3, + "revision": 0, "skipped_content": 0, "snapshot": 2, } == stats @@ -466,7 +466,7 @@ loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92") + expected_snapshot_id = hash_to_bytes("7a89bc3cb51ff1d3213b2151c745d82c3b9d69b1") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -482,12 +482,12 @@ target_type=TargetType.ALIAS, target=b"releases/0.1.0" ), b"releases/0.1.0": SnapshotBranch( - target_type=TargetType.REVISION, - target=hash_to_bytes("845673bfe8cbd31b1eaf757745a964137e6f9116"), + target_type=TargetType.RELEASE, + target=hash_to_bytes("103fa6d0a1abb405468e3590dcf634bcb77f67be"), ), b"releases/0.1.1-alpha.14": SnapshotBranch( - target_type=TargetType.REVISION, - target=hash_to_bytes("05181c12cd8c22035dd31155656826b85745da37"), + target_type=TargetType.RELEASE, + target=hash_to_bytes("c00b54143582a4e963e0b86e8dfa58eedd260020"), ), }, ) @@ -500,8 +500,8 @@ "directory": 153, "origin": 1, "origin_visit": 1, - "release": 0, - "revision": 2, + "release": 2, + "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats @@ -566,7 +566,7 @@ loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("d6e08e19159f77983242877c373c75222d5ae9dd") + expected_snapshot_id = hash_to_bytes("7f5e591dd3c4754abca4db1cc18355671e2c014c") assert actual_load_status == { "status": "eventful", @@ -581,8 +581,8 @@ target_type=TargetType.ALIAS, target=b"releases/0.0.1" ), b"releases/0.0.1": SnapshotBranch( - target_type=TargetType.REVISION, - target=hash_to_bytes("9e4dd2b40d1b46b70917c0949aa2195c823a648e"), + target_type=TargetType.RELEASE, + target=hash_to_bytes("199bf0ad020617357d608655e6549e526a65dc36"), ), }, ) diff --git a/swh/loader/package/opam/tests/test_opam.py b/swh/loader/package/opam/tests/test_opam.py --- a/swh/loader/package/opam/tests/test_opam.py +++ b/swh/loader/package/opam/tests/test_opam.py @@ -110,20 +110,19 @@ actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("4e4bf977312460329d7f769b0be89937c9827efc") + expected_snapshot_id = hash_to_bytes("50b5961c27dd4f8b138acce8bac4f90d1e33081f") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } - target = b"S\x8c\x8aq\xdcy\xa4/0\xa0\xb2j\xeb\xc1\x16\xad\xce\x06\xeaV" - expected_snapshot = Snapshot( id=expected_snapshot_id, branches={ b"HEAD": SnapshotBranch(target=b"agrid.0.1", target_type=TargetType.ALIAS,), b"agrid.0.1": SnapshotBranch( - target=target, target_type=TargetType.REVISION, + target=hash_to_bytes("efcb9ef9d0f2a85312463251732b42f9e45a5c12"), + target_type=TargetType.RELEASE, ), }, ) @@ -141,8 +140,8 @@ "directory": 8, "origin": 1, "origin_visit": 1, - "release": 0, - "revision": 1, + "release": 1, + "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats @@ -168,7 +167,7 @@ actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("1b49be175dcf17c0f568bcd7aac3d4faadc41249") + expected_snapshot_id = hash_to_bytes("f0a974e47999e74d323f1fb9604fde72527bda28") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -181,17 +180,16 @@ target=b"directories.0.3", target_type=TargetType.ALIAS, ), b"directories.0.1": SnapshotBranch( - target=b"N\x92jA\xb2\x892\xeb\xcc\x9c\xa9\xb3\xea\xa7kz\xb08\xa6V", - target_type=TargetType.REVISION, + target=hash_to_bytes("1f839cb1f4720d6b33fdd856e3ff1119497979d9"), + target_type=TargetType.RELEASE, ), b"directories.0.2": SnapshotBranch( - target=b"yj\xc9\x1a\x8f\xe0\xaa\xff[\x88\xffz" - b"\x91C\xcc\x96\xb7\xd4\xf65", - target_type=TargetType.REVISION, + target=hash_to_bytes("4133834d966381804347efbc41e35dd2bdd48962"), + target_type=TargetType.RELEASE, ), b"directories.0.3": SnapshotBranch( - target=b"hA \xc4\xb5\x18A8\xb8C\x12\xa3\xa5T\xb7/v\x85X\xcb", - target_type=TargetType.REVISION, + target=hash_to_bytes("2f20cabfbacfe447b80dc2a4eb14d461775100c8"), + target_type=TargetType.RELEASE, ), }, ) @@ -203,7 +201,7 @@ check_snapshot(expected_snapshot, swh_storage) -def test_opam_revision(tmpdir, requests_mock_datadir, swh_storage, datadir): +def test_opam_release(tmpdir, requests_mock_datadir, swh_storage, datadir): opam_url = f"file://{datadir}/fake_opam_repo" opam_root = tmpdir @@ -224,7 +222,7 @@ actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("398df115b9feb2f463efd21941d69b7d59cd9025") + expected_snapshot_id = hash_to_bytes("987425c6fe94d3972c4c4e97ee27a6a7c8b68e82") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -253,14 +251,29 @@ assert branch_name == expected_branch_name assert package_info == expected_package_info - revision_id = b"o\xad\x7f=\x07\xbb\xaah\xdbI(\xb0'\x10z\xfc\xff\x06x\x1b" + release_id = hash_to_bytes("8d0612cdf172e5dff3d876ca2bbc0f6003cc36cc") + + expected_snapshot = Snapshot( + id=hash_to_bytes(actual_load_status["snapshot_id"]), + branches={ + b"HEAD": SnapshotBranch(target=b"ocb.0.1", target_type=TargetType.ALIAS,), + b"ocb.0.1": SnapshotBranch( + target=release_id, target_type=TargetType.RELEASE, + ), + }, + ) + + assert_last_visit_matches( + swh_storage, url, status="full", type="opam", snapshot=expected_snapshot.id + ) + + check_snapshot(expected_snapshot, swh_storage) - revision = swh_storage.revision_get([revision_id])[0] + release = swh_storage.release_get([release_id])[0] - assert revision is not None + assert release is not None - assert revision.author == expected_package_info.author - assert revision.committer == expected_package_info.committer + assert release.author == expected_package_info.author def test_opam_metadata(tmpdir, requests_mock_datadir, swh_storage, datadir): @@ -285,16 +298,32 @@ assert actual_load_status["status"] == "eventful" - expected_revision_id = b"o\xad\x7f=\x07\xbb\xaah\xdbI(\xb0'\x10z\xfc\xff\x06x\x1b" + expected_release_id = hash_to_bytes("8d0612cdf172e5dff3d876ca2bbc0f6003cc36cc") + + expected_snapshot = Snapshot( + id=hash_to_bytes(actual_load_status["snapshot_id"]), + branches={ + b"HEAD": SnapshotBranch(target=b"ocb.0.1", target_type=TargetType.ALIAS,), + b"ocb.0.1": SnapshotBranch( + target=expected_release_id, target_type=TargetType.RELEASE, + ), + }, + ) + + assert_last_visit_matches( + swh_storage, url, status="full", type="opam", snapshot=expected_snapshot.id + ) + + check_snapshot(expected_snapshot, swh_storage) - revision = swh_storage.revision_get([expected_revision_id])[0] - assert revision is not None + release = swh_storage.release_get([expected_release_id])[0] + assert release is not None - revision_swhid = CoreSWHID( - object_type=ObjectType.REVISION, object_id=expected_revision_id + release_swhid = CoreSWHID( + object_type=ObjectType.RELEASE, object_id=expected_release_id ) directory_swhid = ExtendedSWHID( - object_type=ExtendedObjectType.DIRECTORY, object_id=revision.directory + object_type=ExtendedObjectType.DIRECTORY, object_id=release.target ) metadata_authority = MetadataAuthority( type=MetadataAuthorityType.FORGE, url=opam_url, @@ -310,7 +339,7 @@ format="opam-package-definition", metadata=OCB_METADATA, origin=url, - revision=revision_swhid, + release=release_swhid, ) ] assert swh_storage.raw_extrinsic_metadata_get( diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py --- a/swh/loader/package/pypi/tests/test_pypi.py +++ b/swh/loader/package/pypi/tests/test_pypi.py @@ -260,8 +260,8 @@ "directory": 4, "origin": 1, "origin_visit": 1, - "release": 0, - "revision": 2, + "release": 2, + "revision": 0, "skipped_content": 0, "snapshot": 0, } == stats @@ -319,7 +319,7 @@ # {visit partial, status: eventful, 1 snapshot} -def test_pypi_revision_metadata_structure( +def test_pypi_release_metadata_structure( swh_storage, requests_mock_datadir, _0805nexter_api_info ): url = "https://pypi.org/project/0805nexter" @@ -329,15 +329,39 @@ assert actual_load_status["status"] == "eventful" assert actual_load_status["snapshot_id"] is not None - expected_revision_id = hash_to_bytes("e445da4da22b31bfebb6ffc4383dbf839a074d21") - revision = swh_storage.revision_get([expected_revision_id])[0] - assert revision is not None + expected_release_id = hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c") + + expected_snapshot = Snapshot( + id=hash_to_bytes(actual_load_status["snapshot_id"]), + branches={ + b"HEAD": SnapshotBranch( + target=b"releases/1.2.0", target_type=TargetType.ALIAS, + ), + b"releases/1.1.0": SnapshotBranch( + target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"), + target_type=TargetType.RELEASE, + ), + b"releases/1.2.0": SnapshotBranch( + target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"), + target_type=TargetType.RELEASE, + ), + }, + ) - revision_swhid = CoreSWHID( - object_type=ObjectType.REVISION, object_id=expected_revision_id + assert_last_visit_matches( + swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot.id + ) + + check_snapshot(expected_snapshot, swh_storage) + + release = swh_storage.release_get([expected_release_id])[0] + assert release is not None + + release_swhid = CoreSWHID( + object_type=ObjectType.RELEASE, object_id=expected_release_id ) directory_swhid = ExtendedSWHID( - object_type=ExtendedObjectType.DIRECTORY, object_id=revision.directory + object_type=ExtendedObjectType.DIRECTORY, object_id=release.target ) metadata_authority = MetadataAuthority( type=MetadataAuthorityType.FORGE, url="https://pypi.org/", @@ -355,7 +379,7 @@ json.loads(_0805nexter_api_info)["releases"]["1.2.0"][0] ).encode(), origin=url, - revision=revision_swhid, + release=release_swhid, ) ] assert swh_storage.raw_extrinsic_metadata_get( @@ -373,7 +397,7 @@ loader = PyPILoader(swh_storage, url) actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("dd0e4201a232b1c104433741dbf45895b8ac9355") + expected_snapshot_id = hash_to_bytes("eee24d5b0c156ebb4ece0c810c9dce636ebe881f") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -387,8 +411,8 @@ id=hash_to_bytes(expected_snapshot_id), branches={ b"releases/1.2.0": SnapshotBranch( - target=hash_to_bytes("e445da4da22b31bfebb6ffc4383dbf839a074d21"), - target_type=TargetType.REVISION, + target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"), + target_type=TargetType.RELEASE, ), b"HEAD": SnapshotBranch( target=b"releases/1.2.0", target_type=TargetType.ALIAS, @@ -404,8 +428,8 @@ "directory": 2, "origin": 1, "origin_visit": 1, - "release": 0, - "revision": 1, + "release": 1, + "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats @@ -419,7 +443,7 @@ loader = PyPILoader(swh_storage, url) actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a") + expected_snapshot_id = hash_to_bytes("62d957f2b5cdc515bea0a46252a3ab29ee271636") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -433,12 +457,12 @@ id=expected_snapshot_id, branches={ b"releases/1.1.0": SnapshotBranch( - target=hash_to_bytes("4c99891f93b81450385777235a37b5e966dd1571"), - target_type=TargetType.REVISION, + target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"), + target_type=TargetType.RELEASE, ), b"releases/1.2.0": SnapshotBranch( - target=hash_to_bytes("e445da4da22b31bfebb6ffc4383dbf839a074d21"), - target_type=TargetType.REVISION, + target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"), + target_type=TargetType.RELEASE, ), b"HEAD": SnapshotBranch( target=b"releases/1.2.0", target_type=TargetType.ALIAS, @@ -453,8 +477,8 @@ "directory": 4, "origin": 1, "origin_visit": 1, - "release": 0, - "revision": 2, + "release": 2, + "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats @@ -468,7 +492,7 @@ loader = PyPILoader(swh_storage, url) actual_load_status = loader.load() - snapshot_id = hash_to_bytes("ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a") + snapshot_id = hash_to_bytes("62d957f2b5cdc515bea0a46252a3ab29ee271636") assert actual_load_status == { "status": "eventful", "snapshot_id": snapshot_id.hex(), @@ -481,12 +505,12 @@ id=snapshot_id, branches={ b"releases/1.1.0": SnapshotBranch( - target=hash_to_bytes("4c99891f93b81450385777235a37b5e966dd1571"), - target_type=TargetType.REVISION, + target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"), + target_type=TargetType.RELEASE, ), b"releases/1.2.0": SnapshotBranch( - target=hash_to_bytes("e445da4da22b31bfebb6ffc4383dbf839a074d21"), - target_type=TargetType.REVISION, + target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"), + target_type=TargetType.RELEASE, ), b"HEAD": SnapshotBranch( target=b"releases/1.2.0", target_type=TargetType.ALIAS, @@ -502,8 +526,8 @@ "directory": 4, "origin": 1, "origin_visit": 1, - "release": 0, - "revision": 2, + "release": 2, + "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats @@ -536,7 +560,7 @@ visit1_actual_load_status = loader.load() visit1_stats = get_stats(swh_storage) - expected_snapshot_id = hash_to_bytes("ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a") + expected_snapshot_id = hash_to_bytes("62d957f2b5cdc515bea0a46252a3ab29ee271636") assert visit1_actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -551,8 +575,8 @@ "directory": 4, "origin": 1, "origin_visit": 1, - "release": 0, - "revision": 2, + "release": 2, + "revision": 0, "skipped_content": 0, "snapshot": 1, } == visit1_stats @@ -565,7 +589,7 @@ visit2_stats = get_stats(swh_storage) assert visit2_actual_load_status["status"] == "eventful", visit2_actual_load_status - expected_snapshot_id2 = hash_to_bytes("2e5149a7b0725d18231a37b342e9b7c4e121f283") + expected_snapshot_id2 = hash_to_bytes("6a8a84e7f765bed4362315fb054adb2466598636") assert visit2_actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id2.hex(), @@ -579,16 +603,16 @@ id=expected_snapshot_id2, branches={ b"releases/1.1.0": SnapshotBranch( - target=hash_to_bytes("4c99891f93b81450385777235a37b5e966dd1571"), - target_type=TargetType.REVISION, + target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"), + target_type=TargetType.RELEASE, ), b"releases/1.2.0": SnapshotBranch( - target=hash_to_bytes("e445da4da22b31bfebb6ffc4383dbf839a074d21"), - target_type=TargetType.REVISION, + target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"), + target_type=TargetType.RELEASE, ), b"releases/1.3.0": SnapshotBranch( - target=hash_to_bytes("51247143b01445c9348afa9edfae31bf7c5d86b1"), - target_type=TargetType.REVISION, + target=hash_to_bytes("d46442e99bb6e05df5f75a7f0f7f61a4f2098147"), + target_type=TargetType.RELEASE, ), b"HEAD": SnapshotBranch( target=b"releases/1.3.0", target_type=TargetType.ALIAS, @@ -607,8 +631,8 @@ "directory": 4 + 2, # 2 more directories "origin": 1, "origin_visit": 1 + 1, - "release": 0, - "revision": 2 + 1, # 1 more revision + "release": 2 + 1, # 1 more release + "revision": 0, "skipped_content": 0, "snapshot": 1 + 1, # 1 more snapshot } == visit2_stats @@ -641,7 +665,7 @@ loader = PyPILoader(swh_storage, url) actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("a27e638a4dad6fbfa273c6ebec1c4bf320fb84c6") + expected_snapshot_id = hash_to_bytes("a136ee226316276c347d7be3da07df5828605927") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -655,12 +679,12 @@ id=expected_snapshot_id, branches={ b"releases/1.1.0/nexter-1.1.0.zip": SnapshotBranch( - target=hash_to_bytes("4c99891f93b81450385777235a37b5e966dd1571"), - target_type=TargetType.REVISION, + target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"), + target_type=TargetType.RELEASE, ), b"releases/1.1.0/nexter-1.1.0.tar.gz": SnapshotBranch( - target=hash_to_bytes("0bf88f5760cca7665d0af4d6575d9301134fe11a"), - target_type=TargetType.REVISION, + target=hash_to_bytes("b3391cb4007fb6872c4dfab476a7cfe7443a1bb4"), + target_type=TargetType.RELEASE, ), }, ) @@ -702,8 +726,8 @@ ) -def test_pypi_build_revision_missing_version_in_pkg_info(swh_storage, tmp_path): - """Simulate revision build when Version field is missing in PKG-INFO file.""" +def test_pypi_build_release_missing_version_in_pkg_info(swh_storage, tmp_path): + """Simulate release build when Version field is missing in PKG-INFO file.""" url = "https://pypi.org/project/GermlineFilter" # create package info p_info = PyPIPackageInfo( @@ -732,12 +756,12 @@ ) directory = hash_to_bytes("8b864d66f356afe35033d58f8e03b7c23a66751f") - # attempt to build revision + # attempt to build release loader = PyPILoader(swh_storage, url) - revision = loader.build_revision(p_info, str(tmp_path), directory) + release = loader.build_release("1.2", p_info, str(tmp_path), directory) # without comment_text and version in PKG-INFO, message should be empty - assert revision.message == b"" + assert release.message == b"" def test_filter_out_invalid_sdists(swh_storage, requests_mock): diff --git a/swh/loader/package/tests/test_loader.py b/swh/loader/package/tests/test_loader.py --- a/swh/loader/package/tests/test_loader.py +++ b/swh/loader/package/tests/test_loader.py @@ -5,6 +5,7 @@ import datetime import hashlib +import logging import string from unittest.mock import Mock, call, patch @@ -13,14 +14,20 @@ from swh.loader.package.loader import BasePackageInfo, PackageLoader from swh.model.model import ( - ExtID, Origin, OriginVisit, OriginVisitStatus, + Person, + Release, + Revision, + RevisionType, Snapshot, SnapshotBranch, TargetType, + TimestampWithTimezone, ) +from swh.model.model import ExtID +from swh.model.model import ObjectType as ModelObjectType from swh.model.swhids import CoreSWHID, ObjectType from swh.storage import get_storage from swh.storage.algos.snapshot import snapshot_get_latest @@ -63,7 +70,7 @@ ).start() yield (f"branch-{version}", p_info) - def _load_revision(self, p_info, origin): + def _load_release(self, version, p_info, origin): return None @@ -83,7 +90,7 @@ assert actual_load_status2 == {"status": "failed"} -def test_resolve_revision_from_extids() -> None: +def test_resolve_object_from_extids() -> None: loader = PackageLoader(None, None) # type: ignore p_info = Mock(wraps=BasePackageInfo(None, None)) # type: ignore @@ -92,51 +99,40 @@ p_info.extid.return_value = None known_extids = { ("extid-type", b"extid-of-aaaa"): [ - CoreSWHID(object_type=ObjectType.REVISION, object_id=b"a" * 20), + CoreSWHID(object_type=ObjectType.RELEASE, object_id=b"a" * 20), ] } - revision_whitelist = {b"unused"} - assert ( - loader.resolve_revision_from_extids(known_extids, p_info, revision_whitelist) - is None - ) + whitelist = {b"unused"} + assert loader.resolve_object_from_extids(known_extids, p_info, whitelist) is None # Some known extid, and the PackageInfo is not one of them (ie. cache miss) p_info.extid.return_value = ("extid-type", b"extid-of-cccc") - assert ( - loader.resolve_revision_from_extids(known_extids, p_info, revision_whitelist) - is None - ) + assert loader.resolve_object_from_extids(known_extids, p_info, whitelist) is None # Some known extid, and the PackageInfo is one of them (ie. cache hit), - # but the target revision was not in the previous snapshot + # but the target release was not in the previous snapshot p_info.extid.return_value = ("extid-type", b"extid-of-aaaa") - assert ( - loader.resolve_revision_from_extids(known_extids, p_info, revision_whitelist) - is None - ) + assert loader.resolve_object_from_extids(known_extids, p_info, whitelist) is None # Some known extid, and the PackageInfo is one of them (ie. cache hit), - # and the target revision was in the previous snapshot - revision_whitelist = {b"a" * 20} - assert ( - loader.resolve_revision_from_extids(known_extids, p_info, revision_whitelist) - == b"a" * 20 - ) + # and the target release was in the previous snapshot + whitelist = {b"a" * 20} + assert loader.resolve_object_from_extids( + known_extids, p_info, whitelist + ) == CoreSWHID(object_type=ObjectType.RELEASE, object_id=b"a" * 20) # Same as before, but there is more than one extid, and only one is an allowed - # revision - revision_whitelist = {b"a" * 20} + # release + whitelist = {b"a" * 20} known_extids = { ("extid-type", b"extid-of-aaaa"): [ - CoreSWHID(object_type=ObjectType.REVISION, object_id=b"b" * 20), - CoreSWHID(object_type=ObjectType.REVISION, object_id=b"a" * 20), + CoreSWHID(object_type=ObjectType.RELEASE, object_id=b"b" * 20), + CoreSWHID(object_type=ObjectType.RELEASE, object_id=b"a" * 20), ] } - assert ( - loader.resolve_revision_from_extids(known_extids, p_info, revision_whitelist) - == b"a" * 20 - ) + assert loader.resolve_object_from_extids( + known_extids, p_info, whitelist + ) == CoreSWHID(object_type=ObjectType.RELEASE, object_id=b"a" * 20) def test_load_get_known_extids() -> None: @@ -163,29 +159,29 @@ storage = get_storage("memory") origin = "http://example.org" - rev1_swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=b"a" * 20) - rev2_swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=b"b" * 20) - rev3_swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=b"c" * 20) - rev4_swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=b"d" * 20) + rel1_swhid = CoreSWHID(object_type=ObjectType.RELEASE, object_id=b"a" * 20) + rel2_swhid = CoreSWHID(object_type=ObjectType.RELEASE, object_id=b"b" * 20) + rel3_swhid = CoreSWHID(object_type=ObjectType.RELEASE, object_id=b"c" * 20) + rel4_swhid = CoreSWHID(object_type=ObjectType.RELEASE, object_id=b"d" * 20) dir_swhid = CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=b"e" * 20) # Results of a previous load storage.extid_add( [ - ExtID("extid-type1", b"extid-of-v1.0", rev1_swhid), - ExtID("extid-type2", b"extid-of-v2.0", rev2_swhid), + ExtID("extid-type1", b"extid-of-v1.0", rel1_swhid), + ExtID("extid-type2", b"extid-of-v2.0", rel2_swhid), ] ) last_snapshot = Snapshot( branches={ b"v1.0": SnapshotBranch( - target_type=TargetType.REVISION, target=rev1_swhid.object_id + target_type=TargetType.RELEASE, target=rel1_swhid.object_id ), b"v2.0": SnapshotBranch( - target_type=TargetType.REVISION, target=rev2_swhid.object_id + target_type=TargetType.RELEASE, target=rel2_swhid.object_id ), b"v3.0": SnapshotBranch( - target_type=TargetType.REVISION, target=rev3_swhid.object_id + target_type=TargetType.RELEASE, target=rel3_swhid.object_id ), } ) @@ -210,65 +206,202 @@ loader = StubPackageLoader(storage, "http://example.org") patch.object( loader, - "_load_revision", - return_value=(rev4_swhid.object_id, dir_swhid.object_id), + "_load_release", + return_value=(rel4_swhid.object_id, dir_swhid.object_id), autospec=True, ).start() loader.load() - assert loader._load_revision.mock_calls == [ # type: ignore - # v1.0: not loaded because there is already its (extid_type, extid, rev) + assert loader._load_release.mock_calls == [ # type: ignore + # v1.0: not loaded because there is already its (extid_type, extid, rel) # in the storage. # v2.0: loaded, because there is already a similar extid, but different type - call(StubPackageInfo(origin, "example-v2.0.tar"), Origin(url=origin)), + call("v2.0", StubPackageInfo(origin, "example-v2.0.tar"), Origin(url=origin)), # v3.0: loaded despite having an (extid_type, extid) in storage, because # the target of the extid is not in the previous snapshot - call(StubPackageInfo(origin, "example-v3.0.tar"), Origin(url=origin)), + call("v3.0", StubPackageInfo(origin, "example-v3.0.tar"), Origin(url=origin)), # v4.0: loaded, because there isn't its extid - call(StubPackageInfo(origin, "example-v4.0.tar"), Origin(url=origin)), + call("v4.0", StubPackageInfo(origin, "example-v4.0.tar"), Origin(url=origin)), ] # then check the snapshot has all the branches. - # versions 2.0 to 4.0 all point to rev4_swhid (instead of the value of the last + # versions 2.0 to 4.0 all point to rel4_swhid (instead of the value of the last # snapshot), because they had to be loaded (mismatched extid), and the mocked - # _load_revision always returns rev4_swhid. + # _load_release always returns rel4_swhid. snapshot = Snapshot( branches={ b"branch-v1.0": SnapshotBranch( - target_type=TargetType.REVISION, target=rev1_swhid.object_id + target_type=TargetType.RELEASE, target=rel1_swhid.object_id ), b"branch-v2.0": SnapshotBranch( - target_type=TargetType.REVISION, target=rev4_swhid.object_id + target_type=TargetType.RELEASE, target=rel4_swhid.object_id ), b"branch-v3.0": SnapshotBranch( - target_type=TargetType.REVISION, target=rev4_swhid.object_id + target_type=TargetType.RELEASE, target=rel4_swhid.object_id ), b"branch-v4.0": SnapshotBranch( - target_type=TargetType.REVISION, target=rev4_swhid.object_id + target_type=TargetType.RELEASE, target=rel4_swhid.object_id ), } ) assert snapshot_get_latest(storage, origin) == snapshot extids = storage.extid_get_from_target( - ObjectType.REVISION, + ObjectType.RELEASE, [ - rev1_swhid.object_id, - rev2_swhid.object_id, - rev3_swhid.object_id, - rev4_swhid.object_id, + rel1_swhid.object_id, + rel2_swhid.object_id, + rel3_swhid.object_id, + rel4_swhid.object_id, ], ) assert set(extids) == { # What we inserted at the beginning of the test: - ExtID("extid-type1", b"extid-of-v1.0", rev1_swhid), - ExtID("extid-type2", b"extid-of-v2.0", rev2_swhid), + ExtID("extid-type1", b"extid-of-v1.0", rel1_swhid), + ExtID("extid-type2", b"extid-of-v2.0", rel2_swhid), # Added by the loader: - ExtID("extid-type1", b"extid-of-v2.0", rev4_swhid), - ExtID("extid-type2", b"extid-of-v3.0", rev4_swhid), - ExtID("extid-type2", b"extid-of-v4.0", rev4_swhid), + ExtID("extid-type1", b"extid-of-v2.0", rel4_swhid), + ExtID("extid-type2", b"extid-of-v3.0", rel4_swhid), + ExtID("extid-type2", b"extid-of-v4.0", rel4_swhid), + } + + +def test_load_upgrade_from_revision_extids(caplog): + """Tests that, when loading incrementally based on a snapshot made by an old + version of the loader, the loader will convert revisions to releases + and add them to the storage. + + Also checks that, if an extid exists pointing to a non-existent revision + (which should never happen, but you never know...), the release is loaded from + scratch.""" + + storage = get_storage("memory") + + origin = "http://example.org" + dir1_swhid = CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=b"d" * 20) + dir2_swhid = CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=b"e" * 20) + + date = TimestampWithTimezone.from_datetime( + datetime.datetime.now(tz=datetime.timezone.utc) + ) + person = Person.from_fullname(b"Jane Doe ") + + rev1 = Revision( + message=b"blah", + author=person, + date=date, + committer=person, + committer_date=date, + directory=dir1_swhid.object_id, + type=RevisionType.TAR, + synthetic=True, + ) + + rel1 = Release( + name=b"v1.0", + message=b"blah", + author=person, + date=date, + target=dir1_swhid.object_id, + target_type=ModelObjectType.DIRECTORY, + synthetic=True, + ) + + rev1_swhid = rev1.swhid() + rel1_swhid = rel1.swhid() + rev2_swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=b"b" * 20) + rel2_swhid = CoreSWHID(object_type=ObjectType.RELEASE, object_id=b"c" * 20) + + # Results of a previous load + storage.extid_add( + [ + ExtID("extid-type1", b"extid-of-v1.0", rev1_swhid), + ExtID("extid-type1", b"extid-of-v2.0", rev2_swhid), + ] + ) + storage.revision_add([rev1]) + last_snapshot = Snapshot( + branches={ + b"v1.0": SnapshotBranch( + target_type=TargetType.REVISION, target=rev1_swhid.object_id + ), + b"v2.0": SnapshotBranch( + target_type=TargetType.REVISION, target=rev2_swhid.object_id + ), + } + ) + storage.snapshot_add([last_snapshot]) + date = datetime.datetime.now(tz=datetime.timezone.utc) + storage.origin_add([Origin(url=origin)]) + storage.origin_visit_add( + [OriginVisit(origin="http://example.org", visit=1, date=date, type="tar")] + ) + storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin, + visit=1, + status="full", + date=date, + snapshot=last_snapshot.id, + ) + ] + ) + + loader = StubPackageLoader(storage, "http://example.org") + patch.object( + loader, + "_load_release", + return_value=(rel2_swhid.object_id, dir2_swhid.object_id), + autospec=True, + ).start() + patch.object( + loader, "get_versions", return_value=["v1.0", "v2.0", "v3.0"], autospec=True, + ).start() + + caplog.set_level(logging.ERROR) + + loader.load() + + assert len(caplog.records) == 1 + (record,) = caplog.records + assert record.levelname == "ERROR" + assert "Failed to upgrade branch branch-v2.0" in record.message + + assert loader._load_release.mock_calls == [ + # v1.0: not loaded because there is already a revision matching it + # v2.0: loaded, as the revision is missing from the storage even though there + # is an extid + call("v2.0", StubPackageInfo(origin, "example-v2.0.tar"), Origin(url=origin)), + # v3.0: loaded (did not exist yet) + call("v3.0", StubPackageInfo(origin, "example-v3.0.tar"), Origin(url=origin)), + ] + + snapshot = Snapshot( + branches={ + b"branch-v1.0": SnapshotBranch( + target_type=TargetType.RELEASE, target=rel1_swhid.object_id + ), + b"branch-v2.0": SnapshotBranch( + target_type=TargetType.RELEASE, target=rel2_swhid.object_id + ), + b"branch-v3.0": SnapshotBranch( + target_type=TargetType.RELEASE, target=rel2_swhid.object_id + ), + } + ) + assert snapshot_get_latest(storage, origin) == snapshot + + extids = storage.extid_get_from_target( + ObjectType.RELEASE, [rel1_swhid.object_id, rel2_swhid.object_id,], + ) + + assert set(extids) == { + ExtID("extid-type1", b"extid-of-v1.0", rel1_swhid), + ExtID("extid-type1", b"extid-of-v2.0", rel2_swhid), + ExtID("extid-type2", b"extid-of-v3.0", rel2_swhid), } diff --git a/swh/loader/package/tests/test_loader_metadata.py b/swh/loader/package/tests/test_loader_metadata.py --- a/swh/loader/package/tests/test_loader_metadata.py +++ b/swh/loader/package/tests/test_loader_metadata.py @@ -29,7 +29,7 @@ from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType EMPTY_SNAPSHOT_ID = "1a8893e6a86f444e8be8e7bda6cb34fb1735a00e" -FULL_SNAPSHOT_ID = "4a9b608c9f01860a627237dd2409d1d50ec4b054" +FULL_SNAPSHOT_ID = "4ac5730a9393f5099b63a35a17b6c33d36d70c3a" AUTHORITY = MetadataAuthority( type=MetadataAuthorityType.FORGE, url="http://example.org/", @@ -38,7 +38,8 @@ ORIGIN_SWHID = Origin(ORIGIN_URL).swhid() REVISION_ID = hash_to_bytes("8ff44f081d43176474b267de5451f2c2e88089d0") -REVISION_SWHID = CoreSWHID(object_type=ObjectType.REVISION, object_id=REVISION_ID) +RELEASE_ID = hash_to_bytes("9477a708196b44e59efb4e47b7d979a4146bd428") +RELEASE_SWHID = CoreSWHID(object_type=ObjectType.RELEASE, object_id=RELEASE_ID) DIRECTORY_ID = hash_to_bytes("aa" * 20) DIRECTORY_SWHID = ExtendedSWHID( object_type=ExtendedObjectType.DIRECTORY, object_id=DIRECTORY_ID @@ -61,7 +62,7 @@ format="test-format1", metadata=b"foo bar", origin=ORIGIN_URL, - revision=REVISION_SWHID, + release=RELEASE_SWHID, ), RawExtrinsicMetadata( target=DIRECTORY_SWHID, @@ -71,7 +72,7 @@ format="test-format2", metadata=b"bar baz", origin=ORIGIN_URL, - revision=REVISION_SWHID, + release=RELEASE_SWHID, ), ] @@ -95,7 +96,7 @@ class directory: hash = DIRECTORY_ID - return (None, directory) # just enough for _load_revision to work + return (None, directory) # just enough for _load_release to work def download_package(self, p_info: BasePackageInfo, tmpdir: str): return [("path", {"artifact_key": "value", "length": 0})] @@ -161,7 +162,7 @@ format="original-artifacts-json", metadata=b'[{"artifact_key": "value", "length": 0}]', origin=ORIGIN_URL, - revision=REVISION_SWHID, + release=RELEASE_SWHID, )