diff --git a/swh/model/git_objects.py b/swh/model/git_objects.py --- a/swh/model/git_objects.py +++ b/swh/model/git_objects.py @@ -39,6 +39,19 @@ from .hashutil import git_object_header, hash_to_bytehex +def content_git_object(content: model.Content) -> bytes: + """Formats a content as a git blob. + + A content's identifier is the blob sha1 à la git of the tagged content. + """ + content = cast(model.Content, content) + + if content.data is None: + raise model.MissingData("Content data is None, cannot format.") + + return git_object_header("blob", len(content.data)) + content.data + + def directory_entry_sort_key(entry: model.DirectoryEntry): """The sorting key for tree entries""" if isinstance(entry, dict): diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -841,6 +841,20 @@ ) +@given(strategies.contents().filter(lambda cnt: cnt.data is not None)) +def test_content_check(content): + content.check() + + content2 = attr.evolve(content, id=b"\x00" * 20) + with pytest.raises(ValueError, match="does not match recomputed hash"): + content2.check() + + content2 = attr.evolve( + content, raw_manifest=swh.model.git_objects.content_git_object(content) + ) + content2.check() + + # SkippedContent