diff --git a/swh/loader/git/converters.py b/swh/loader/git/converters.py --- a/swh/loader/git/converters.py +++ b/swh/loader/git/converters.py @@ -147,19 +147,23 @@ return dir_ -def parse_author(name_email: bytes) -> Person: +def parse_author(name_email: Optional[bytes]) -> Optional[Person]: """Parse an author line""" + if name_email is None: + return None return Person.from_fullname(name_email) def dulwich_tsinfo_to_timestamp( - timestamp, + timestamp: Optional[str], timezone: int, timezone_neg_utc: bool, timezone_bytes: Optional[bytes], ) -> TimestampWithTimezone: """Convert the dulwich timestamp information to a structure compatible with Software Heritage.""" + if timestamp is None: + return None ts = Timestamp( seconds=int(timestamp), microseconds=0, diff --git a/swh/loader/git/tests/test_converters.py b/swh/loader/git/tests/test_converters.py --- a/swh/loader/git/tests/test_converters.py +++ b/swh/loader/git/tests/test_converters.py @@ -326,6 +326,77 @@ assert revision == expected_revision + def test_commit_without_author(self): + target = b"641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce" + message = b"some commit message" + author = None + raw_manifest = ( + b"tree 641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce\n" b"some commit message" + ) + commit = dulwich.objects.Commit.from_raw_string(b"commit", raw_manifest) + assert converters.dulwich_commit_to_revision(commit) == Revision( + message=b"some commit message", + author=author, + committer=author, + date=None, + committer_date=None, + type=RevisionType.GIT, + directory=hash_to_bytes(target.decode()), + synthetic=False, + metadata=None, + parents=(), + ) + + def test_commit_without_author_name(self): + target = b"641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce" + message = b"some commit message" + author = Person(fullname=b"foo@example.org", name=b"", email=b"") + raw_manifest = ( + b"tree 641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce\n" + b"author foo@example.org 1640191028 +0200\n" + b"committer foo@example.org 1640191028 +0200\n\n" + b"some commit message" + ) + commit = dulwich.objects.Commit.from_raw_string(b"commit", raw_manifest) + assert converters.dulwich_commit_to_revision(commit) == Revision( + message=b"some commit message", + author=author, + committer=author, + date=None, + committer_date=None, + type=RevisionType.GIT, + directory=hash_to_bytes(target.decode()), + synthetic=False, + metadata=None, + parents=(), + ) + + def test_commit_without_dates(self): + target = b"641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce" + message = b"some commit message" + author = Person( + fullname=b"Foo ", name=b"Foo", email=b"foo@example.org" + ) + raw_manifest = ( + b"tree 641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce\n" + b"author Foo \n" + b"committer Foo \n\n" + b"some commit message" + ) + commit = dulwich.objects.Commit.from_raw_string(b"commit", raw_manifest) + assert converters.dulwich_commit_to_revision(commit) == Revision( + message=b"some commit message", + author=author, + committer=author, + date=None, + committer_date=None, + type=RevisionType.GIT, + directory=hash_to_bytes(target.decode()), + synthetic=False, + metadata=None, + parents=(), + ) + def test_commit_without_manifest(self): """Tests a Release can still be produced when the manifest is not understood by the custom parser in dulwich_commit_to_revision."""