diff --git a/swh/model/model.py b/swh/model/model.py --- a/swh/model/model.py +++ b/swh/model/model.py @@ -18,14 +18,18 @@ from .collections import ImmutableDict from .hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_bytes from .identifiers import ( - SWHID, directory_identifier, normalize_timestamp, + origin_identifier, parse_swhid, release_identifier, revision_identifier, snapshot_identifier, ) +from .identifiers import ExtendedObjectType as SwhidExtendedObjectType +from .identifiers import ExtendedSWHID +from .identifiers import ObjectType as SwhidObjectType +from .identifiers import SWHID, CoreSWHID class MissingData(Exception): @@ -274,6 +278,13 @@ def unique_key(self) -> KeyType: return {"url": self.url} + def swhid(self) -> ExtendedSWHID: + """Returns a SWHID representing this origin.""" + return ExtendedSWHID( + object_type=SwhidExtendedObjectType.ORIGIN, + object_id=hash_to_bytes(origin_identifier(self.unique_key())), + ) + @attr.s(frozen=True, slots=True) class OriginVisit(BaseModel): @@ -415,6 +426,10 @@ **d, ) + def swhid(self) -> CoreSWHID: + """Returns a SWHID representing this object.""" + return CoreSWHID(object_type=SwhidObjectType.SNAPSHOT, object_id=self.id) + @attr.s(frozen=True, slots=True) class Release(HashableObject, BaseModel): @@ -461,6 +476,10 @@ d["date"] = TimestampWithTimezone.from_dict(d["date"]) return cls(target_type=ObjectType(d.pop("target_type")), **d) + def swhid(self) -> CoreSWHID: + """Returns a SWHID representing this object.""" + return CoreSWHID(object_type=SwhidObjectType.RELEASE, object_id=self.id) + def anonymize(self) -> "Release": """Returns an anonymized version of the Release object. @@ -549,6 +568,10 @@ **d, ) + def swhid(self) -> CoreSWHID: + """Returns a SWHID representing this object.""" + return CoreSWHID(object_type=SwhidObjectType.REVISION, object_id=self.id) + def anonymize(self) -> "Revision": """Returns an anonymized version of the Revision object. @@ -591,6 +614,10 @@ **d, ) + def swhid(self) -> CoreSWHID: + """Returns a SWHID representing this object.""" + return CoreSWHID(object_type=SwhidObjectType.DIRECTORY, object_id=self.id) + @attr.s(frozen=True, slots=True) class BaseContent(BaseModel): @@ -706,6 +733,10 @@ def unique_key(self) -> KeyType: return self.sha1 # TODO: use a dict of hashes + def swhid(self) -> CoreSWHID: + """Returns a SWHID representing this object.""" + return CoreSWHID(object_type=SwhidObjectType.CONTENT, object_id=self.sha1_git) + @attr.s(frozen=True, slots=True) class SkippedContent(BaseContent): diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -108,21 +108,22 @@ self.assertEqual(identifiers.format_offset(offset), res) +content_example = { + "status": "visible", + "length": 5, + "data": b"1984\n", + "ctime": datetime.datetime(2015, 11, 22, 16, 33, 56, tzinfo=datetime.timezone.utc), +} + + class ContentIdentifier(unittest.TestCase): def setUp(self): - self.content = { - "status": "visible", - "length": 5, - "data": b"1984\n", - "ctime": datetime.datetime( - 2015, 11, 22, 16, 33, 56, tzinfo=datetime.timezone.utc - ), - } - - self.content_id = hashutil.MultiHash.from_data(self.content["data"]).digest() + self.content_id = hashutil.MultiHash.from_data(content_example["data"]).digest() def test_content_identifier(self): - self.assertEqual(identifiers.content_identifier(self.content), self.content_id) + self.assertEqual( + identifiers.content_identifier(content_example), self.content_id + ) directory_example = { @@ -772,15 +773,15 @@ ) -class OriginIdentifier(unittest.TestCase): - def setUp(self): - self.origin = { - "url": "https://github.com/torvalds/linux", - } +origin_example = { + "url": "https://github.com/torvalds/linux", +} + +class OriginIdentifier(unittest.TestCase): def test_content_identifier(self): self.assertEqual( - identifiers.origin_identifier(self.origin), + identifiers.origin_identifier(origin_example), "b63a575fe3faab7692c9f38fb09d4bb45651bb0f", ) diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -12,11 +12,13 @@ from hypothesis.strategies import binary import pytest -from swh.model.hashutil import MultiHash, hash_to_bytes +from swh.model.hashutil import MultiHash, hash_to_bytes, hash_to_hex import swh.model.hypothesis_strategies as strategies from swh.model.identifiers import ( SWHID, + content_identifier, directory_identifier, + origin_identifier, parse_swhid, release_identifier, revision_identifier, @@ -44,7 +46,9 @@ TimestampWithTimezone, ) from swh.model.tests.test_identifiers import ( + content_example, directory_example, + origin_example, release_example, revision_example, snapshot_example, @@ -702,22 +706,34 @@ # ID computation +def test_content_model_id_computation(): + cnt_dict = content_example.copy() + + cnt_id_str = hash_to_hex(content_identifier(cnt_dict)["sha1_git"]) + cnt_model = Content.from_data(cnt_dict["data"]) + assert str(cnt_model.swhid()) == "swh:1:cnt:" + cnt_id_str + + def test_directory_model_id_computation(): dir_dict = directory_example.copy() del dir_dict["id"] - dir_id = hash_to_bytes(directory_identifier(dir_dict)) + dir_id_str = directory_identifier(dir_dict) + dir_id = hash_to_bytes(dir_id_str) dir_model = Directory.from_dict(dir_dict) assert dir_model.id == dir_id + assert str(dir_model.swhid()) == "swh:1:dir:" + dir_id_str def test_revision_model_id_computation(): rev_dict = revision_example.copy() del rev_dict["id"] - rev_id = hash_to_bytes(revision_identifier(rev_dict)) + rev_id_str = revision_identifier(rev_dict) + rev_id = hash_to_bytes(rev_id_str) rev_model = Revision.from_dict(rev_dict) assert rev_model.id == rev_id + assert str(rev_model.swhid()) == "swh:1:rev:" + rev_id_str def test_revision_model_id_computation_with_no_date(): @@ -740,19 +756,31 @@ rel_dict = release_example.copy() del rel_dict["id"] - rel_id = hash_to_bytes(release_identifier(rel_dict)) + rel_id_str = release_identifier(rel_dict) + rel_id = hash_to_bytes(rel_id_str) rel_model = Release.from_dict(rel_dict) assert isinstance(rel_model.date, TimestampWithTimezone) assert rel_model.id == hash_to_bytes(rel_id) + assert str(rel_model.swhid()) == "swh:1:rel:" + rel_id_str def test_snapshot_model_id_computation(): snp_dict = snapshot_example.copy() del snp_dict["id"] - snp_id = hash_to_bytes(snapshot_identifier(snp_dict)) + snp_id_str = snapshot_identifier(snp_dict) + snp_id = hash_to_bytes(snp_id_str) snp_model = Snapshot.from_dict(snp_dict) assert snp_model.id == snp_id + assert str(snp_model.swhid()) == "swh:1:snp:" + snp_id_str + + +def test_origin_model_id_computation(): + ori_dict = origin_example.copy() + + ori_id_str = origin_identifier(ori_dict) + ori_model = Origin.from_dict(ori_dict) + assert str(ori_model.swhid()) == "swh:1:ori:" + ori_id_str @given(strategies.objects(split_content=True))