diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py --- a/swh/model/hashutil.py +++ b/swh/model/hashutil.py @@ -296,6 +296,7 @@ "tag", "snapshot", "raw_extrinsic_metadata", + "extid", } if git_type not in git_object_types: diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -823,6 +823,37 @@ ) +def extid_identifier(extid: Dict[str, Any]) -> str: + """Return the intrinsic identifier for an ExtID object. + + An ExtID identifier is a salted sha1 (using the git hashing algorithm with + the ``extid`` object type) of a manifest following the format: + + ``` + extid_type $StrWithoutSpaces + extid $Bytes + target $CoreSwhid + ``` + + $StrWithoutSpaces is an ASCII string, and may not contain spaces. + + Newlines in $Bytes are escaped as with other git fields, ie. by adding a + space after them. + + Returns: + str: the intrinsic identifier for `extid` + + """ + + headers = [ + (b"extid_type", extid["extid_type"].encode("ascii")), + (b"extid", extid["extid"]), + (b"target", str(extid["target"]).encode("ascii")), + ] + + return identifier_to_str(hash_manifest("extid", headers)) + + # type of the "object_type" attribute of the SWHID class; either # ObjectType or ExtendedObjectType _TObjectType = TypeVar("_TObjectType", ObjectType, ExtendedObjectType) diff --git a/swh/model/model.py b/swh/model/model.py --- a/swh/model/model.py +++ b/swh/model/model.py @@ -19,6 +19,7 @@ from .hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_bytes from .identifiers import ( directory_identifier, + extid_identifier, normalize_timestamp, origin_identifier, raw_extrinsic_metadata_identifier, @@ -1096,3 +1097,25 @@ d[swhid_key] = CoreSWHID.from_string(d[swhid_key]) return super().from_dict(d) + + +@attr.s(frozen=True, slots=True) +class ExtID(HashableObject, BaseModel): + object_type: Final = "extid" + + extid_type = attr.ib(type=str, validator=type_validator()) + extid = attr.ib(type=bytes, validator=type_validator()) + target = attr.ib(type=CoreSWHID, validator=type_validator()) + + id = attr.ib(type=Sha1Git, validator=type_validator(), default=b"") + + @classmethod + def from_dict(cls, d): + return cls( + extid=d["extid"], + extid_type=d["extid_type"], + target=CoreSWHID.from_string(d["target"]), + ) + + def compute_hash(self) -> bytes: + return hash_to_bytes(extid_identifier(self.to_dict())) diff --git a/swh/model/tests/swh_model_data.py b/swh/model/tests/swh_model_data.py --- a/swh/model/tests/swh_model_data.py +++ b/swh/model/tests/swh_model_data.py @@ -15,6 +15,7 @@ Content, Directory, DirectoryEntry, + ExtID, MetadataAuthority, MetadataAuthorityType, MetadataFetcher, @@ -131,6 +132,11 @@ ), ] +EXTIDS = [ + ExtID(extid_type="git256", extid=b"\x03" * 32, target=REVISIONS[0].swhid(),), + ExtID(extid_type="hg", extid=b"\x04" * 20, target=REVISIONS[1].swhid(),), +] + RELEASES = [ Release( id=hash_to_bytes("8059dc4e17fcd0e51ca3bcd6b80f4577d281fd08"), @@ -330,6 +336,7 @@ TEST_OBJECTS: Dict[str, Sequence[BaseModel]] = { "content": CONTENTS, "directory": DIRECTORIES, + "extid": EXTIDS, "metadata_authority": METADATA_AUTHORITIES, "metadata_fetcher": METADATA_FETCHERS, "origin": ORIGINS,