diff --git a/swh/model/git_objects.py b/swh/model/git_objects.py --- a/swh/model/git_objects.py +++ b/swh/model/git_objects.py @@ -39,6 +39,19 @@ from .hashutil import git_object_header, hash_to_bytehex +def content_git_object(content: model.Content) -> bytes: + """Formats a content as a git blob. + + A content's identifier is the blob sha1 à la git of the tagged content. + """ + content = cast(model.Content, content) + + if content.data is None: + raise model.MissingData("Content data is None, cannot format.") + + return git_object_header("blob", len(content.data)) + content.data + + def directory_entry_sort_key(entry: model.DirectoryEntry): """The sorting key for tree entries""" if isinstance(entry, dict): diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -841,6 +841,14 @@ ) +@given(strategies.present_contents().filter(lambda cnt: cnt.data is not None)) +def test_content_git_roundtrip(content): + assert content.data is not None + raw = swh.model.git_objects.content_git_object(content) + sha1_git = hashlib.new("sha1", raw).digest() + assert content.sha1_git == sha1_git + + # SkippedContent