diff --git a/swh/model/hypothesis_strategies.py b/swh/model/hypothesis_strategies.py --- a/swh/model/hypothesis_strategies.py +++ b/swh/model/hypothesis_strategies.py @@ -91,7 +91,6 @@ tuples(timestamps_with_timezone(), persons()))) rel = draw(builds( Release, - id=sha1_git(), author=none(), date=none(), target=sha1_git())) @@ -108,7 +107,6 @@ def revisions(): return builds( Revision, - id=sha1_git(), date=timestamps_with_timezone(), committer_date=timestamps_with_timezone(), parents=lists(sha1_git()), @@ -127,7 +125,6 @@ def directories(): return builds( Directory, - id=sha1_git(), entries=lists(directory_entries())) diff --git a/swh/model/model.py b/swh/model/model.py --- a/swh/model/model.py +++ b/swh/model/model.py @@ -4,14 +4,19 @@ # See top-level LICENSE file for more information import datetime + +from abc import ABCMeta, abstractmethod from enum import Enum from typing import List, Optional, Dict import attr import dateutil.parser -from .identifiers import normalize_timestamp -from .hashutil import DEFAULT_ALGORITHMS +from .identifiers import ( + normalize_timestamp, directory_identifier, revision_identifier, + release_identifier, snapshot_identifier +) +from .hashutil import DEFAULT_ALGORITHMS, hash_to_bytes SHA1_SIZE = 20 @@ -51,6 +56,23 @@ return cls(**d) +class HashableObject(metaclass=ABCMeta): + """Mixin to automatically compute object identifier hash when + the associated model is instantiated.""" + + @staticmethod + @abstractmethod + def compute_hash(object_dict): + """Derived model classes must implement this to compute + the object hash from its dict representation.""" + pass + + def __attrs_post_init__(self): + if not self.id: + obj_id = hash_to_bytes(self.compute_hash(self.to_dict())) + object.__setattr__(self, 'id', obj_id) + + @attr.s(frozen=True) class Person(BaseModel): """Represents the author/committer of a revision or release.""" @@ -185,7 +207,7 @@ def check_target(self, attribute, value): """Checks the target type is not an alias, checks the target is a valid sha1_git.""" - if self.target_type != TargetType.ALIAS: + if self.target_type != TargetType.ALIAS and self.target is not None: if len(value) != 20: raise ValueError('Wrong length for bytes identifier: %d' % len(value)) @@ -198,24 +220,28 @@ @attr.s(frozen=True) -class Snapshot(BaseModel): +class Snapshot(BaseModel, HashableObject): """Represents the full state of an origin at a given point in time.""" - id = attr.ib(type=Sha1Git) branches = attr.ib(type=Dict[bytes, Optional[SnapshotBranch]]) + id = attr.ib(type=Sha1Git, default=b'') + + @staticmethod + def compute_hash(object_dict): + return snapshot_identifier(object_dict) @classmethod def from_dict(cls, d): + d = d.copy() return cls( - id=d['id'], branches={ name: SnapshotBranch.from_dict(branch) if branch else None - for (name, branch) in d['branches'].items() - }) + for (name, branch) in d.pop('branches').items() + }, + **d) @attr.s(frozen=True) -class Release(BaseModel): - id = attr.ib(type=Sha1Git) +class Release(BaseModel, HashableObject): name = attr.ib(type=bytes) message = attr.ib(type=bytes) target = attr.ib(type=Optional[Sha1Git]) @@ -227,6 +253,11 @@ default=None) metadata = attr.ib(type=Optional[Dict[str, object]], default=None) + id = attr.ib(type=Sha1Git, default=b'') + + @staticmethod + def compute_hash(object_dict): + return release_identifier(object_dict) @author.validator def check_author(self, attribute, value): @@ -261,8 +292,7 @@ @attr.s(frozen=True) -class Revision(BaseModel): - id = attr.ib(type=Sha1Git) +class Revision(BaseModel, HashableObject): message = attr.ib(type=bytes) author = attr.ib(type=Person) committer = attr.ib(type=Person) @@ -275,12 +305,16 @@ default=None) parents = attr.ib(type=List[Sha1Git], default=attr.Factory(list)) + id = attr.ib(type=Sha1Git, default=b'') + + @staticmethod + def compute_hash(object_dict): + return revision_identifier(object_dict) @classmethod def from_dict(cls, d): d = d.copy() return cls( - id=d.pop('id'), author=Person.from_dict(d.pop('author')), committer=Person.from_dict(d.pop('committer')), date=TimestampWithTimezone.from_dict(d.pop('date')), @@ -301,16 +335,21 @@ @attr.s(frozen=True) -class Directory(BaseModel): - id = attr.ib(type=Sha1Git) +class Directory(BaseModel, HashableObject): entries = attr.ib(type=List[DirectoryEntry]) + id = attr.ib(type=Sha1Git, default=b'') + + @staticmethod + def compute_hash(object_dict): + return directory_identifier(object_dict) @classmethod def from_dict(cls, d): + d = d.copy() return cls( - id=d['id'], entries=[DirectoryEntry.from_dict(entry) - for entry in d['entries']]) + for entry in d.pop('entries')], + **d) @attr.s(frozen=True) diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -9,6 +9,7 @@ from swh.model import hashutil, identifiers from swh.model.exceptions import ValidationError +from swh.model.hashutil import hash_to_bytes from swh.model.identifiers import (CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT, PersistentId) @@ -111,97 +112,100 @@ self.content_id) +directory_example = { + 'id': 'c2e41aae41ac17bd4a650770d6ee77f62e52235b', + 'entries': [ + { + 'type': 'file', + 'perms': 33188, + 'name': b'README', + 'target': '37ec8ea2110c0b7a32fbb0e872f6e7debbf95e21' + }, + { + 'type': 'file', + 'perms': 33188, + 'name': b'Rakefile', + 'target': '3bb0e8592a41ae3185ee32266c860714980dbed7' + }, + { + 'type': 'dir', + 'perms': 16384, + 'name': b'app', + 'target': '61e6e867f5d7ba3b40540869bc050b0c4fed9e95' + }, + { + 'type': 'file', + 'perms': 33188, + 'name': b'1.megabyte', + 'target': '7c2b2fbdd57d6765cdc9d84c2d7d333f11be7fb3' + }, + { + 'type': 'dir', + 'perms': 16384, + 'name': b'config', + 'target': '591dfe784a2e9ccc63aaba1cb68a765734310d98' + }, + { + 'type': 'dir', + 'perms': 16384, + 'name': b'public', + 'target': '9588bf4522c2b4648bfd1c61d175d1f88c1ad4a5' + }, + { + 'type': 'file', + 'perms': 33188, + 'name': b'development.sqlite3', + 'target': 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391' + }, + { + 'type': 'dir', + 'perms': 16384, + 'name': b'doc', + 'target': '154705c6aa1c8ead8c99c7915373e3c44012057f' + }, + { + 'type': 'dir', + 'perms': 16384, + 'name': b'db', + 'target': '85f157bdc39356b7bc7de9d0099b4ced8b3b382c' + }, + { + 'type': 'dir', + 'perms': 16384, + 'name': b'log', + 'target': '5e3d3941c51cce73352dff89c805a304ba96fffe' + }, + { + 'type': 'dir', + 'perms': 16384, + 'name': b'script', + 'target': '1b278423caf176da3f3533592012502aa10f566c' + }, + { + 'type': 'dir', + 'perms': 16384, + 'name': b'test', + 'target': '035f0437c080bfd8711670b3e8677e686c69c763' + }, + { + 'type': 'dir', + 'perms': 16384, + 'name': b'vendor', + 'target': '7c0dc9ad978c1af3f9a4ce061e50f5918bd27138' + }, + { + 'type': 'rev', + 'perms': 57344, + 'name': b'will_paginate', + 'target': '3d531e169db92a16a9a8974f0ae6edf52e52659e' + } + ], +} + + class DirectoryIdentifier(unittest.TestCase): def setUp(self): - self.directory = { - 'id': 'c2e41aae41ac17bd4a650770d6ee77f62e52235b', - 'entries': [ - { - 'type': 'file', - 'perms': 33188, - 'name': b'README', - 'target': '37ec8ea2110c0b7a32fbb0e872f6e7debbf95e21' - }, - { - 'type': 'file', - 'perms': 33188, - 'name': b'Rakefile', - 'target': '3bb0e8592a41ae3185ee32266c860714980dbed7' - }, - { - 'type': 'dir', - 'perms': 16384, - 'name': b'app', - 'target': '61e6e867f5d7ba3b40540869bc050b0c4fed9e95' - }, - { - 'type': 'file', - 'perms': 33188, - 'name': b'1.megabyte', - 'target': '7c2b2fbdd57d6765cdc9d84c2d7d333f11be7fb3' - }, - { - 'type': 'dir', - 'perms': 16384, - 'name': b'config', - 'target': '591dfe784a2e9ccc63aaba1cb68a765734310d98' - }, - { - 'type': 'dir', - 'perms': 16384, - 'name': b'public', - 'target': '9588bf4522c2b4648bfd1c61d175d1f88c1ad4a5' - }, - { - 'type': 'file', - 'perms': 33188, - 'name': b'development.sqlite3', - 'target': 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391' - }, - { - 'type': 'dir', - 'perms': 16384, - 'name': b'doc', - 'target': '154705c6aa1c8ead8c99c7915373e3c44012057f' - }, - { - 'type': 'dir', - 'perms': 16384, - 'name': b'db', - 'target': '85f157bdc39356b7bc7de9d0099b4ced8b3b382c' - }, - { - 'type': 'dir', - 'perms': 16384, - 'name': b'log', - 'target': '5e3d3941c51cce73352dff89c805a304ba96fffe' - }, - { - 'type': 'dir', - 'perms': 16384, - 'name': b'script', - 'target': '1b278423caf176da3f3533592012502aa10f566c' - }, - { - 'type': 'dir', - 'perms': 16384, - 'name': b'test', - 'target': '035f0437c080bfd8711670b3e8677e686c69c763' - }, - { - 'type': 'dir', - 'perms': 16384, - 'name': b'vendor', - 'target': '7c0dc9ad978c1af3f9a4ce061e50f5918bd27138' - }, - { - 'type': 'rev', - 'perms': 57344, - 'name': b'will_paginate', - 'target': '3d531e169db92a16a9a8974f0ae6edf52e52659e' - } - ], - } + self.directory = directory_example self.empty_directory = { 'id': '4b825dc642cb6eb9a060e54bf8d69288fbee4904', @@ -219,11 +223,34 @@ self.empty_directory['id']) -class RevisionIdentifier(unittest.TestCase): - def setUp(self): +linus_tz = datetime.timezone(datetime.timedelta(minutes=-420)) + +revision_example = { + 'id': 'bc0195aad0daa2ad5b0d76cce22b167bc3435590', + 'directory': '85a74718d377195e1efd0843ba4f3260bad4fe07', + 'parents': ['01e2d0627a9a6edb24c37db45db5ecb31e9de808'], + 'author': { + 'name': b'Linus Torvalds', + 'email': b'torvalds@linux-foundation.org', + 'fullname': b'Linus Torvalds ' + }, + 'date': datetime.datetime(2015, 7, 12, 15, 10, 30, + tzinfo=linus_tz), + 'committer': { + 'name': b'Linus Torvalds', + 'email': b'torvalds@linux-foundation.org', + 'fullname': b'Linus Torvalds ' + }, + 'committer_date': datetime.datetime(2015, 7, 12, 15, 10, 30, + tzinfo=linus_tz), + 'message': b'Linux 4.2-rc2\n', + 'type': 'git', + 'synthetic': False +} - linus_tz = datetime.timezone(datetime.timedelta(minutes=-420)) +class RevisionIdentifier(unittest.TestCase): + def setUp(self): gpgsig = b'''\ -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.13 (Darwin) @@ -243,24 +270,7 @@ =od6m -----END PGP SIGNATURE-----''' - self.revision = { - 'id': 'bc0195aad0daa2ad5b0d76cce22b167bc3435590', - 'directory': '85a74718d377195e1efd0843ba4f3260bad4fe07', - 'parents': ['01e2d0627a9a6edb24c37db45db5ecb31e9de808'], - 'author': { - 'name': b'Linus Torvalds', - 'email': b'torvalds@linux-foundation.org', - }, - 'date': datetime.datetime(2015, 7, 12, 15, 10, 30, - tzinfo=linus_tz), - 'committer': { - 'name': b'Linus Torvalds', - 'email': b'torvalds@linux-foundation.org', - }, - 'committer_date': datetime.datetime(2015, 7, 12, 15, 10, 30, - tzinfo=linus_tz), - 'message': b'Linux 4.2-rc2\n', - } + self.revision = revision_example self.revision_none_metadata = { 'id': 'bc0195aad0daa2ad5b0d76cce22b167bc3435590', @@ -509,22 +519,19 @@ ) -class ReleaseIdentifier(unittest.TestCase): - def setUp(self): - linus_tz = datetime.timezone(datetime.timedelta(minutes=-420)) - - self.release = { - 'id': '2b10839e32c4c476e9d94492756bb1a3e1ec4aa8', - 'target': b't\x1b"R\xa5\xe1Ml`\xa9\x13\xc7z`\x99\xab\xe7:\x85J', - 'target_type': 'revision', - 'name': b'v2.6.14', - 'author': { - 'name': b'Linus Torvalds', - 'email': b'torvalds@g5.osdl.org', - }, - 'date': datetime.datetime(2005, 10, 27, 17, 2, 33, - tzinfo=linus_tz), - 'message': b'''\ +release_example = { + 'id': '2b10839e32c4c476e9d94492756bb1a3e1ec4aa8', + 'target': b't\x1b"R\xa5\xe1Ml`\xa9\x13\xc7z`\x99\xab\xe7:\x85J', + 'target_type': 'revision', + 'name': b'v2.6.14', + 'author': { + 'name': b'Linus Torvalds', + 'email': b'torvalds@g5.osdl.org', + 'fullname': b'Linus Torvalds ' + }, + 'date': datetime.datetime(2005, 10, 27, 17, 2, 33, + tzinfo=linus_tz), + 'message': b'''\ Linux 2.6.14 release -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.1 (GNU/Linux) @@ -534,8 +541,15 @@ =7VeT -----END PGP SIGNATURE----- ''', - 'synthetic': False, - } + 'synthetic': False, +} + + +class ReleaseIdentifier(unittest.TestCase): + def setUp(self): + linus_tz = datetime.timezone(datetime.timedelta(minutes=-420)) + + self.release = release_example self.release_no_author = { 'id': b'&y\x1a\x8b\xcf\x0em3\xf4:\xefv\x82\xbd\xb5U#mV\xde', @@ -673,6 +687,44 @@ ) +snapshot_example = { + 'id': hash_to_bytes('6e65b86363953b780d92b0a928f3e8fcdd10db36'), + 'branches': { + b'directory': { + 'target': hash_to_bytes( + '1bd0e65f7d2ff14ae994de17a1e7fe65111dcad8'), + 'target_type': 'directory', + }, + b'content': { + 'target': hash_to_bytes( + 'fe95a46679d128ff167b7c55df5d02356c5a1ae1'), + 'target_type': 'content', + }, + b'alias': { + 'target': b'revision', + 'target_type': 'alias', + }, + b'revision': { + 'target': hash_to_bytes( + 'aafb16d69fd30ff58afdd69036a26047f3aebdc6'), + 'target_type': 'revision', + }, + b'release': { + 'target': hash_to_bytes( + '7045404f3d1c54e6473c71bbb716529fbad4be24'), + 'target_type': 'release', + }, + b'snapshot': { + 'target': hash_to_bytes( + '1a8893e6a86f444e8be8e7bda6cb34fb1735a00e' + ), + 'target_type': 'snapshot', + }, + b'dangling': None, + } +} + + class SnapshotIdentifier(unittest.TestCase): def setUp(self): super().setUp() @@ -699,36 +751,7 @@ }, } - self.all_types = { - 'id': '6e65b86363953b780d92b0a928f3e8fcdd10db36', - 'branches': { - b'directory': { - 'target': '1bd0e65f7d2ff14ae994de17a1e7fe65111dcad8', - 'target_type': 'directory', - }, - b'content': { - 'target': 'fe95a46679d128ff167b7c55df5d02356c5a1ae1', - 'target_type': 'content', - }, - b'alias': { - 'target': b'revision', - 'target_type': 'alias', - }, - b'revision': { - 'target': 'aafb16d69fd30ff58afdd69036a26047f3aebdc6', - 'target_type': 'revision', - }, - b'release': { - 'target': '7045404f3d1c54e6473c71bbb716529fbad4be24', - 'target_type': 'release', - }, - b'snapshot': { - 'target': '1a8893e6a86f444e8be8e7bda6cb34fb1735a00e', - 'target_type': 'snapshot', - }, - b'dangling': None, - } - } + self.all_types = snapshot_example def test_empty_snapshot(self): self.assertEqual( diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -7,8 +7,16 @@ from hypothesis import given -from swh.model.model import Content +from swh.model.model import Content, Directory, Revision, Release, Snapshot +from swh.model.hashutil import hash_to_bytes from swh.model.hypothesis_strategies import objects, origins, origin_visits +from swh.model.identifiers import ( + directory_identifier, revision_identifier, release_identifier, + snapshot_identifier +) +from swh.model.tests.test_identifiers import ( + directory_example, revision_example, release_example, snapshot_example +) @given(objects()) @@ -52,3 +60,55 @@ c = Content(length=42, status='visible', **hashes) for (hash_name, hash_) in hashes.items(): assert c.get_hash(hash_name) == hash_ + + +def test_directory_model_id_computation(): + dir_dict = dict(directory_example) + del dir_dict['id'] + + dir_model = Directory(**dir_dict) + assert dir_model.id + assert dir_model.id == hash_to_bytes(directory_identifier(dir_dict)) + + dir_model = Directory.from_dict(dir_dict) + assert dir_model.id + assert dir_model.id == hash_to_bytes(directory_identifier(dir_dict)) + + +def test_revision_model_id_computation(): + rev_dict = dict(revision_example) + del rev_dict['id'] + + rev_model = Revision(**rev_dict) + assert rev_model.id + assert rev_model.id == hash_to_bytes(revision_identifier(rev_dict)) + + rev_model = Revision.from_dict(rev_dict) + assert rev_model.id + assert rev_model.id == hash_to_bytes(revision_identifier(rev_dict)) + + +def test_release_model_id_computation(): + rel_dict = dict(release_example) + del rel_dict['id'] + + rel_model = Release(**rel_dict) + assert rel_model.id + assert rel_model.id == hash_to_bytes(release_identifier(rel_dict)) + + rel_model = Release.from_dict(rel_dict) + assert rel_model.id + assert rel_model.id == hash_to_bytes(release_identifier(rel_dict)) + + +def test_snapshot_model_id_computation(): + snp_dict = dict(snapshot_example) + del snp_dict['id'] + + snp_model = Snapshot(**snp_dict) + assert snp_model.id + assert snp_model.id == hash_to_bytes(snapshot_identifier(snp_dict)) + + snp_model = Snapshot.from_dict(snp_dict) + assert snp_model.id + assert snp_model.id == hash_to_bytes(snapshot_identifier(snp_dict))