diff --git a/swh/dataset/exporters/orc.py b/swh/dataset/exporters/orc.py --- a/swh/dataset/exporters/orc.py +++ b/swh/dataset/exporters/orc.py @@ -211,6 +211,7 @@ release["target_type"], (release.get("author") or {}).get("fullname"), *swh_date_to_tuple(release["date"]), + release.get("raw_manifest"), ) ) @@ -226,6 +227,7 @@ *swh_date_to_tuple(revision["committer_date"]), hash_to_hex_or_none(revision["directory"]), revision["type"], + revision.get("raw_manifest"), ) ) @@ -255,7 +257,12 @@ def process_directory(self, directory): directory_writer = self.get_writer_for("directory") - directory_writer.write((hash_to_hex_or_none(directory["id"]),)) + directory_writer.write( + ( + hash_to_hex_or_none(directory["id"]), + directory.get("raw_manifest"), + ) + ) directory_entry_writer = self.get_writer_for( "directory_entry", diff --git a/swh/dataset/relational.py b/swh/dataset/relational.py --- a/swh/dataset/relational.py +++ b/swh/dataset/relational.py @@ -41,6 +41,7 @@ ("date", "timestamp"), ("date_offset", "smallint"), ("date_raw_offset_bytes", "binary"), + ("raw_manifest", "binary"), ], "revision": [ ("id", "string"), @@ -55,6 +56,7 @@ ("committer_date_raw_offset_bytes", "binary"), ("directory", "string"), ("type", "string"), + ("raw_manifest", "binary"), ], "revision_history": [ ("id", "string"), @@ -68,6 +70,7 @@ ], "directory": [ ("id", "string"), + ("raw_manifest", "binary"), ], "directory_entry": [ ("directory_id", "string"), diff --git a/swh/dataset/test/test_orc.py b/swh/dataset/test/test_orc.py --- a/swh/dataset/test/test_orc.py +++ b/swh/dataset/test/test_orc.py @@ -105,6 +105,7 @@ obj.target_type.value, obj.author.fullname if obj.author else None, *swh_date_to_tuple(obj.date.to_dict() if obj.date is not None else None), + obj.raw_manifest, ) in output[obj_type] @@ -123,6 +124,7 @@ ), hash_to_hex_or_none(obj.directory), obj.type.value, + obj.raw_manifest, ) in output["revision"] for i, parent in enumerate(obj.parents): assert ( @@ -136,7 +138,7 @@ obj_type = "directory" output = exporter({obj_type: TEST_OBJECTS[obj_type]}) for obj in TEST_OBJECTS[obj_type]: - assert (hash_to_hex_or_none(obj.id),) in output["directory"] + assert (hash_to_hex_or_none(obj.id), obj.raw_manifest) in output["directory"] for entry in obj.entries: assert ( hash_to_hex_or_none(obj.id),