diff --git a/swh/loader/mercurial/from_disk.py b/swh/loader/mercurial/from_disk.py --- a/swh/loader/mercurial/from_disk.py +++ b/swh/loader/mercurial/from_disk.py @@ -15,7 +15,7 @@ from swh.loader.mercurial.utils import parse_visit_date from swh.model import identifiers from swh.model.from_disk import Content, DentryPerms, Directory -from swh.model.hashutil import hash_to_bytehex, hash_to_bytes +from swh.model.hashutil import hash_to_bytehex from swh.model.model import ( ExtID, ObjectType, @@ -206,18 +206,8 @@ def _set_latest_heads(self, latest_snapshot: Snapshot) -> None: """ - Looks up the nodeid for all revisions in the snapshot, and adds them to - self._latest_heads. - - This works in two steps: - - 1. Query the revisions with extid_get_from_target, to find nodeids from - revision ids, using the new ExtID architecture - 2. For all revisions that were not found this way, fetch the revision - and look for the nodeid in its metadata. - - This is a temporary process. When we are done migrating away from revision - metadata, step 2 will be removed. + Looks up the nodeid for all revisions in the snapshot via extid_get_from_target, + and adds them to self._latest_heads. """ # TODO: add support for releases snapshot_branches = [ @@ -248,17 +238,6 @@ # Add the found nodeids to self.latest_heads self._latest_heads.extend(extid.extid for extid in extids) - # For each revision without a nodeid, get the revision metadata - # to see if it is found there. - found_revisions = {extid.target.object_id for extid in extids if extid} - revisions_without_extid = list(set(snapshot_branches) - found_revisions) - - self._latest_heads.extend( - hash_to_bytes(revision.metadata["node"]) - for revision in self.storage.revision_get(revisions_without_extid) - if revision and revision.metadata - ) - def fetch_data(self) -> bool: """Fetch the data from the source the loader is currently loading @@ -372,16 +351,14 @@ target=name, target_type=TargetType.ALIAS, ) - # TODO: do not write an ExtID if we got this branch from an ExtID that - # already exists. - # When we are done migrating away from revision metadata, this will - # be as simple as checking if the target is in self._latest_heads - revision_swhid = identifiers.CoreSWHID( - object_type=identifiers.ObjectType.REVISION, object_id=revision_sha1git - ) - extids.append( - ExtID(extid_type=EXTID_TYPE, extid=hg_nodeid, target=revision_swhid) - ) + if hg_nodeid not in self._latest_heads: + revision_swhid = identifiers.CoreSWHID( + object_type=identifiers.ObjectType.REVISION, + object_id=revision_sha1git, + ) + extids.append( + ExtID(extid_type=EXTID_TYPE, extid=hg_nodeid, target=revision_swhid) + ) snapshot = Snapshot(branches=snapshot_branches) self.storage.snapshot_add([snapshot]) @@ -486,7 +463,6 @@ type=RevisionType.MERCURIAL, directory=root_sha1git, message=rev_ctx.description(), - metadata={"node": hg_nodeid.hex()}, extra_headers=tuple(extra_headers), synthetic=False, parents=self.get_revision_parents(rev_ctx), diff --git a/swh/loader/mercurial/tests/test_from_disk.py b/swh/loader/mercurial/tests/test_from_disk.py --- a/swh/loader/mercurial/tests/test_from_disk.py +++ b/swh/loader/mercurial/tests/test_from_disk.py @@ -18,7 +18,7 @@ prepare_repository_from_archive, ) from swh.model.from_disk import Content, DentryPerms -from swh.model.hashutil import hash_to_bytes +from swh.model.hashutil import hash_to_bytes, hash_to_hex from swh.model.identifiers import ObjectType from swh.model.model import RevisionType, Snapshot, SnapshotBranch, TargetType from swh.storage import get_storage @@ -242,7 +242,11 @@ hg_changesets = set() transplant_sources = set() for rev in loader.storage.revision_log(revisions): - hg_changesets.add(rev["metadata"]["node"]) + extids = list( + loader.storage.extid_get_from_target(ObjectType.REVISION, [rev["id"]]) + ) + assert len(extids) == 1 + hg_changesets.add(hash_to_hex(extids[0].extid)) for k, v in rev["extra_headers"]: if k == b"transplant_source": transplant_sources.add(v.decode("ascii")) @@ -250,7 +254,7 @@ # check extracted data are valid assert len(hg_changesets) > 0 assert len(transplant_sources) > 0 - assert transplant_sources.issubset(hg_changesets) + assert transplant_sources <= hg_changesets def _partial_copy_storage( @@ -275,13 +279,6 @@ ] new_storage.revision_add(revisions) - elif mechanism == "revision metadata": - assert ( - copy_revisions - ), "copy_revisions must be True if mechanism='revision metadata'" - revisions = [rev for rev in old_storage.revision_get(heads) if rev] - new_storage.revision_add(revisions) - else: assert mechanism == "same storage" return old_storage @@ -297,12 +294,11 @@ return new_storage -@pytest.mark.parametrize("mechanism", ("extid", "revision metadata", "same storage")) +@pytest.mark.parametrize("mechanism", ("extid", "same storage")) def test_load_unchanged_repo_should_be_uneventful( swh_storage, datadir, tmp_path, mechanism ): - """Checks the loader can find which revisions it already loaded, using either - ExtIDs or revision metadata.""" + """Checks the loader can find which revisions it already loaded, using ExtIDs.""" archive_name = "hello" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)