diff --git a/swh/model/hypothesis_strategies.py b/swh/model/hypothesis_strategies.py --- a/swh/model/hypothesis_strategies.py +++ b/swh/model/hypothesis_strategies.py @@ -3,9 +3,11 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import datetime + from hypothesis.strategies import ( - lists, one_of, composite, builds, integers, sampled_from, binary, - dictionaries, none, from_regex, just + binary, builds, characters, composite, dictionaries, from_regex, + integers, just, lists, none, one_of, sampled_from, text, tuples, ) @@ -22,6 +24,10 @@ return binary(min_size=20, max_size=20) +def sha1(): + return binary(min_size=20, max_size=20) + + @composite def urls(draw): protocol = draw(sampled_from(['git', 'http', 'https', 'deb'])) @@ -35,9 +41,11 @@ def timestamps(): + max_seconds = datetime.datetime.max.timestamp() + min_seconds = datetime.datetime.min.timestamp() return builds( Timestamp, - seconds=integers(-2**63, 2**63-1), + seconds=integers(min_seconds, max_seconds), microseconds=integers(0, 1000000)) @@ -45,7 +53,7 @@ return builds( TimestampWithTimezone, timestamp=timestamps(), - offset=integers(-2**16, 2**16-1)) + offset=integers(min_value=-14*60, max_value=14*60)) def origins(): @@ -62,13 +70,27 @@ origin=origins()) -def releases(): - return builds( +@composite +def releases(draw): + (date, author) = draw(one_of( + tuples(none(), none()), + tuples(timestamps_with_timezone(), persons()))) + rel = draw(builds( Release, id=sha1_git(), - date=timestamps_with_timezone(), - author=one_of(none(), persons()), - target=one_of(none(), sha1_git())) + author=none(), + date=none(), + target=sha1_git())) + rel.date = date + rel.author = author + return rel + + +def revision_metadata(): + alphabet = characters( + blacklist_categories=('Cs', ), + blacklist_characters=['\u0000']) # postgresql does not like these + return dictionaries(text(alphabet=alphabet), text(alphabet=alphabet)) def revisions(): @@ -77,9 +99,10 @@ id=sha1_git(), date=timestamps_with_timezone(), committer_date=timestamps_with_timezone(), - parents=lists(binary()), - directory=binary(), - metadata=one_of(none(), dictionaries(binary(), binary()))) + parents=lists(sha1_git()), + directory=sha1_git(), + metadata=one_of(none(), revision_metadata())) + # TODO: metadata['extra_headers'] can have binary keys and values def directory_entries(): @@ -96,18 +119,25 @@ entries=lists(directory_entries())) -def contents(): - def filter_data(content): - if content.status != 'visible': - content.data = None - return content +@composite +def contents(draw): + (status, data, reason) = draw(one_of( + tuples(just('visible'), binary(), none()), + tuples(just('absent'), none(), text()), + tuples(just('hidden'), none(), none()), + )) - return builds( + return draw(builds( Content, length=integers(0), - data=binary(), + sha1=sha1(), sha1_git=sha1_git(), - ).map(filter_data) + sha256=binary(min_size=32, max_size=32), + blake2s256=binary(min_size=32, max_size=32), + status=just(status), + data=just(data), + reason=just(reason), + )) def branch_names(): diff --git a/swh/model/model.py b/swh/model/model.py --- a/swh/model/model.py +++ b/swh/model/model.py @@ -48,6 +48,13 @@ def to_dict(self): return attr.asdict(self) + @offset.validator + def check_offset(self, attribute, value): + if not (-2**15 <= value < 2**15): + # max 14 hours offset in theory, but you never know what + # you'll find in the wild... + raise ValueError('offset too large: %d minutes' % value) + @attr.s class Origin: @@ -83,6 +90,14 @@ ALIAS = 'alias' +class ObjectType(Enum): + CONTENT = 'content' + DIRECTORY = 'directory' + REVISION = 'revision' + RELEASE = 'release' + SNAPSHOT = 'snapshot' + + @attr.s class SnapshotBranch: target = attr.ib(type=bytes) @@ -121,18 +136,31 @@ id = attr.ib(type=Sha1Git) name = attr.ib(type=bytes) message = attr.ib(type=bytes) - date = attr.ib(type=TimestampWithTimezone) + date = attr.ib(type=Optional[TimestampWithTimezone]) author = attr.ib(type=Optional[Person]) target = attr.ib(type=Optional[Sha1Git]) - target_type = attr.ib(type=TargetType) + target_type = attr.ib(type=ObjectType) synthetic = attr.ib(type=bool) def to_dict(self): rel = attr.asdict(self) - rel['date'] = self.date.to_dict() + rel['date'] = self.date.to_dict() if self.date is not None else None rel['target_type'] = rel['target_type'].value return rel + @author.validator + def check_author(self, attribute, value): + if self.author is None and self.date is not None: + raise ValueError('release date must be None if date is None.') + + +class RevisionType(Enum): + GIT = 'git' + TAR = 'tar' + DSC = 'dsc' + SUBVERSION = 'svn' + MERCURIAL = 'hg' + @attr.s class Revision: @@ -143,15 +171,16 @@ date = attr.ib(type=TimestampWithTimezone) committer_date = attr.ib(type=TimestampWithTimezone) parents = attr.ib(type=List[Sha1Git]) - type = attr.ib(type=str) + type = attr.ib(type=RevisionType) directory = attr.ib(type=Sha1Git) - metadata = attr.ib(type=Optional[dict]) + metadata = attr.ib(type=Optional[Dict[str, object]]) synthetic = attr.ib(type=bool) def to_dict(self): rev = attr.asdict(self) rev['date'] = self.date.to_dict() rev['committer_date'] = self.committer_date.to_dict() + rev['type'] = rev['type'].value return rev @@ -191,6 +220,7 @@ status = attr.ib( type=str, validator=attr.validators.in_(['visible', 'absent', 'hidden'])) + reason = attr.ib(type=Optional[str]) @length.validator def check_length(self, attribute, value): @@ -198,8 +228,20 @@ if value < 0: raise ValueError('Length must be positive.') + @reason.validator + def check_reason(self, attribute, value): + """Checks the reason is full iff status != absent.""" + assert self.reason == value + if self.status == 'absent' and value is None: + raise ValueError('Must provide a reason if content is absent.') + elif self.status != 'absent' and value is not None: + raise ValueError( + 'Must not provide a reason if content is not absent.') + def to_dict(self): content = attr.asdict(self) if content['data'] is None: del content['data'] + if content['reason'] is None: + del content['reason'] return content diff --git a/swh/model/tests/test_hypothesis_strategies.py b/swh/model/tests/test_hypothesis_strategies.py --- a/swh/model/tests/test_hypothesis_strategies.py +++ b/swh/model/tests/test_hypothesis_strategies.py @@ -44,9 +44,14 @@ if object_['status'] == 'visible': assert set(object_) == \ set(DEFAULT_ALGORITHMS) | {'length', 'status', 'data'} - else: + elif object_['status'] == 'absent': + assert set(object_) == \ + set(DEFAULT_ALGORITHMS) | {'length', 'status', 'reason'} + elif object_['status'] == 'hidden': assert set(object_) == \ set(DEFAULT_ALGORITHMS) | {'length', 'status'} + else: + assert False, object_ elif obj_type == 'release': assert object_['target_type'] in target_types elif obj_type == 'snapshot':