diff --git a/swh/model/git_objects.py b/swh/model/git_objects.py --- a/swh/model/git_objects.py +++ b/swh/model/git_objects.py @@ -340,10 +340,15 @@ if parent: headers.append((b"parent", hash_to_bytehex(parent))) - headers.append((b"author", format_author_data(revision.author, revision.date))) - headers.append( - (b"committer", format_author_data(revision.committer, revision.committer_date),) - ) + if revision.author is not None: + headers.append((b"author", format_author_data(revision.author, revision.date))) + if revision.committer is not None: + headers.append( + ( + b"committer", + format_author_data(revision.committer, revision.committer_date), + ) + ) # Handle extra headers metadata = revision.metadata or ImmutableDict() diff --git a/swh/model/hypothesis_strategies.py b/swh/model/hypothesis_strategies.py --- a/swh/model/hypothesis_strategies.py +++ b/swh/model/hypothesis_strategies.py @@ -217,6 +217,7 @@ d = draw( one_of( + # None author/date: builds( dict, name=name, @@ -228,6 +229,7 @@ target_type=target_type, metadata=metadata, ), + # non-None author/date: builds( dict, name=name, @@ -239,6 +241,8 @@ target_type=target_type, metadata=metadata, ), + # it is also possible for date to be None but not author, but let's not + # overwhelm hypothesis with this edge case ) ) @@ -264,19 +268,39 @@ @composite def revisions_d(draw): d = draw( - builds( - dict, - message=optional(binary()), - synthetic=booleans(), - author=persons_d(), - committer=persons_d(), - date=timestamps_with_timezone_d(), - committer_date=timestamps_with_timezone_d(), - parents=tuples(sha1_git()), - directory=sha1_git(), - type=sampled_from([x.value for x in RevisionType]), - metadata=optional(revision_metadata()), - extra_headers=extra_headers(), + one_of( + # None author/committer/date/committer_date + builds( + dict, + message=optional(binary()), + synthetic=booleans(), + author=none(), + committer=none(), + date=none(), + committer_date=none(), + parents=tuples(sha1_git()), + directory=sha1_git(), + type=sampled_from([x.value for x in RevisionType]), + metadata=optional(revision_metadata()), + extra_headers=extra_headers(), + ), + # non-None author/committer/date/committer_date + builds( + dict, + message=optional(binary()), + synthetic=booleans(), + author=persons_d(), + committer=persons_d(), + date=timestamps_with_timezone_d(), + committer_date=timestamps_with_timezone_d(), + parents=tuples(sha1_git()), + directory=sha1_git(), + type=sampled_from([x.value for x in RevisionType]), + metadata=optional(revision_metadata()), + extra_headers=extra_headers(), + ), + # There are many other combinations, but let's not overwhelm hypothesis + # with these edge cases ) ) # TODO: metadata['extra_headers'] can have binary keys and values diff --git a/swh/model/model.py b/swh/model/model.py --- a/swh/model/model.py +++ b/swh/model/model.py @@ -835,8 +835,8 @@ object_type: Final = "revision" message = attr.ib(type=Optional[bytes], validator=type_validator()) - author = attr.ib(type=Person, validator=type_validator()) - committer = attr.ib(type=Person, validator=type_validator()) + author = attr.ib(type=Optional[Person], validator=type_validator()) + committer = attr.ib(type=Optional[Person], validator=type_validator()) date = attr.ib(type=Optional[TimestampWithTimezone], validator=type_validator()) committer_date = attr.ib( type=Optional[TimestampWithTimezone], validator=type_validator() @@ -877,6 +877,20 @@ def _compute_hash_from_attributes(self) -> bytes: return _compute_hash_from_manifest(git_objects.revision_git_object(self)) + @author.validator + def check_author(self, attribute, value): + """If the author is `None`, checks the date is `None` too.""" + if self.author is None and self.date is not None: + raise ValueError("revision date must be None if author is None.") + + @committer.validator + def check_committer(self, attribute, value): + """If the committer is `None`, checks the committer_date is `None` too.""" + if self.committer is None and self.committer_date is not None: + raise ValueError( + "revision committer_date must be None if committer is None." + ) + @classmethod def from_dict(cls, d): d = d.copy() @@ -888,9 +902,17 @@ if committer_date: committer_date = TimestampWithTimezone.from_dict(committer_date) + author = d.pop("author") + if author: + author = Person.from_dict(author) + + committer = d.pop("committer") + if committer: + committer = Person.from_dict(committer) + return cls( - author=Person.from_dict(d.pop("author")), - committer=Person.from_dict(d.pop("committer")), + author=author, + committer=committer, date=date, committer_date=committer_date, type=RevisionType(d.pop("type")), @@ -909,7 +931,9 @@ Person object. """ return attr.evolve( - self, author=self.author.anonymize(), committer=self.committer.anonymize() + self, + author=None if self.author is None else self.author.anonymize(), + committer=None if self.committer is None else self.committer.anonymize(), ) diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -1124,6 +1124,33 @@ assert rev_model.extra_headers == extra_headers +def test_revision_no_author_or_committer_from_dict(): + rev_dict = revision_example.copy() + rev_dict["author"] = rev_dict["date"] = None + rev_dict["committer"] = rev_dict["committer_date"] = None + rev_model = Revision.from_dict(rev_dict) + assert rev_model.to_dict() == { + **rev_dict, + "parents": tuple(rev_dict["parents"]), + "extra_headers": (), + "metadata": None, + } + + +def test_revision_none_author_or_committer(): + rev_dict = revision_example.copy() + rev_dict["author"] = None + with pytest.raises(ValueError, match=".*date must be None if author is None.*"): + Revision.from_dict(rev_dict) + + rev_dict = revision_example.copy() + rev_dict["committer"] = None + with pytest.raises( + ValueError, match=".*committer_date must be None if committer is None.*" + ): + Revision.from_dict(rev_dict) + + @given(strategies.objects(split_content=True)) def test_object_type(objtype_and_obj): obj_type, obj = objtype_and_obj