diff --git a/swh/loader/mercurial/identify.py b/swh/loader/mercurial/identify.py --- a/swh/loader/mercurial/identify.py +++ b/swh/loader/mercurial/identify.py @@ -16,7 +16,7 @@ from swh.loader.mercurial.utils import get_minimum_env from swh.model.cli import identify_object -from swh.model.hashutil import hash_to_bytehex, hash_to_bytes +from swh.model.hashutil import hash_to_bytehex from swh.model.identifiers import CoreSWHID, ObjectType, normalize_timestamp from swh.model.model import RevisionType @@ -310,39 +310,40 @@ ctx.obj["HG_ROOT"] = root -def identify_directory(path: Path) -> str: +def identify_directory(path: Path) -> CoreSWHID: """Return the SWHID of the given path.""" - uri = identify_object( - "directory", follow_symlinks=True, exclude_patterns=[".hg"], obj=str(path) + return CoreSWHID.from_string( + identify_object( + "directory", follow_symlinks=True, exclude_patterns=[".hg"], obj=str(path) + ) ) - return uri.split(":")[-1] class RevisionIdentity(NamedTuple): """Represent a swh revision identity.""" - swhid: bytes - """SWHID raw bytes""" + swhid: CoreSWHID + """SWH Identifier of the revision.""" node_id: bytes """node_id hex bytes""" - directory_swhid: bytes + directory_swhid: CoreSWHID + """SWH Identifier of the directory""" def dir_uri(self) -> str: """Return the SWHID uri of the revision's directory.""" - return f"swh:1:dir:{self.directory_swhid.hex()}\t{self.node_id.decode()}" + return f"{self.directory_swhid}\t{self.node_id.decode()}" def __str__(self) -> str: """Return the string representation of a RevisionIdentity.""" - uri = CoreSWHID(object_type=ObjectType.REVISION, object_id=self.swhid) - return f"{uri}\t{self.node_id.decode()}" + return f"{self.swhid}\t{self.node_id.decode()}" def identify_revision( hg: Hg, rev: Optional[bytes] = None, - node_id_2_swhid: Optional[Dict[bytes, bytes]] = None, + node_id_2_swhid: Optional[Dict[bytes, CoreSWHID]] = None, ) -> Iterator[RevisionIdentity]: """Return the repository revision identities. @@ -352,7 +353,6 @@ node_id_2_swhid: An optional cache mapping hg node ids to SWHIDs It will be updated in place with new mappings. """ - from swh.model.hashutil import hash_to_bytes from swh.model.model import Revision if node_id_2_swhid is None: @@ -362,18 +362,19 @@ data = revision.to_dict() hg.up(revision.node_id) - directory_swhid = hash_to_bytes(identify_directory(hg.root())) - data["directory"] = directory_swhid + directory_swhid = identify_directory(hg.root()) + data["directory"] = directory_swhid.object_id parents = [] for parent in data["parents"]: if parent not in node_id_2_swhid: parent_revision = next(identify_revision(hg, parent, node_id_2_swhid)) node_id_2_swhid[parent] = parent_revision.swhid - parents.append(node_id_2_swhid[parent]) + assert node_id_2_swhid[parent].object_type == ObjectType.REVISION + parents.append(node_id_2_swhid[parent].object_id) data["parents"] = parents - revision_swhid = hash_to_bytes(Revision.from_dict(data).id) + revision_swhid = Revision.from_dict(data).swhid() node_id_2_swhid[revision.node_id] = revision_swhid yield RevisionIdentity( @@ -386,8 +387,8 @@ class ReleaseIdentity(NamedTuple): """Represent a swh release identity.""" - swhid: str - """SWHID hex string""" + swhid: CoreSWHID + """SWH Identifier of the release.""" node_id: bytes """node_id hex bytes""" @@ -397,14 +398,11 @@ def __str__(self) -> str: """Return the string representation of a ReleaseIdentity.""" - uri = CoreSWHID( - object_type=ObjectType.RELEASE, object_id=hash_to_bytes(self.swhid) - ) - return f"{uri}\t{self.name.decode()}" + return f"{self.swhid}\t{self.name.decode()}" def identify_release( - hg: Hg, node_id_2_swhid: Optional[Dict[bytes, bytes]] = None, + hg: Hg, node_id_2_swhid: Optional[Dict[bytes, CoreSWHID]] = None, ) -> Iterator[ReleaseIdentity]: """Return the repository's release identities. @@ -421,9 +419,10 @@ } for tag in hg.tags(): + assert node_id_2_swhid[tag.node_id].object_type == ObjectType.REVISION data = { "name": tag.name, - "target": node_id_2_swhid[tag.node_id], + "target": node_id_2_swhid[tag.node_id].object_id, "target_type": ModelObjectType.REVISION.value, "message": None, "metadata": None, @@ -432,7 +431,7 @@ "date": None, } - release_swhid = Release.from_dict(data).id + release_swhid = Release.from_dict(data).swhid() yield ReleaseIdentity( swhid=release_swhid, node_id=tag.node_id, name=tag.name, @@ -441,9 +440,9 @@ def identify_snapshot( hg: Hg, - node_id_2_swhid: Optional[Dict[bytes, bytes]] = None, + node_id_2_swhid: Optional[Dict[bytes, CoreSWHID]] = None, releases: Optional[List[ReleaseIdentity]] = None, -) -> str: +) -> CoreSWHID: """Return the repository snapshot identity. hg: A `Hg` repository instance @@ -471,18 +470,20 @@ } for branch in hg.branches(): + assert node_id_2_swhid[branch.node_id].object_type == ObjectType.REVISION branches[branch.name] = { - "target": node_id_2_swhid[branch.node_id], + "target": node_id_2_swhid[branch.node_id].object_id, "target_type": TargetType.REVISION.value, } for release in releases: + assert release.swhid.object_type == ObjectType.RELEASE branches[release.name] = { - "target": release.swhid, + "target": release.swhid.object_id, "target_type": TargetType.RELEASE.value, } - return Snapshot.from_dict({"branches": branches}).id + return Snapshot.from_dict({"branches": branches}).swhid() @main.command() @@ -509,8 +510,7 @@ snapshot_swhid = identify_snapshot(hg) - uri = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snapshot_swhid) - click.echo(f"{uri}\t{root}") + click.echo(f"{snapshot_swhid}\t{root}") @main.command() @@ -540,8 +540,7 @@ for uri in dir_uris + rev_uris + rel_uris: click.echo(uri) - uri = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snapshot_swhid) - click.echo(f"{uri}\t{root}") + click.echo(f"{snapshot_swhid}\t{root}") if __name__ == "__main__":