diff --git a/swh/dataset/exporters/orc.py b/swh/dataset/exporters/orc.py --- a/swh/dataset/exporters/orc.py +++ b/swh/dataset/exporters/orc.py @@ -163,6 +163,7 @@ release["target_type"], (release.get("author") or {}).get("fullname"), *swh_date_to_tuple(release["date"]), + release.get("raw_manifest"), ) ) @@ -178,6 +179,7 @@ *swh_date_to_tuple(revision["committer_date"]), hash_to_hex_or_none(revision["directory"]), revision["type"], + revision.get("raw_manifest"), ) ) @@ -207,7 +209,12 @@ def process_directory(self, directory): directory_writer = self.get_writer_for("directory") - directory_writer.write((hash_to_hex_or_none(directory["id"]),)) + directory_writer.write( + ( + hash_to_hex_or_none(directory["id"]), + directory.get("raw_manifest"), + ) + ) directory_entry_writer = self.get_writer_for( "directory_entry", diff --git a/swh/dataset/relational.py b/swh/dataset/relational.py --- a/swh/dataset/relational.py +++ b/swh/dataset/relational.py @@ -41,6 +41,7 @@ ("date_seconds", "bigint"), ("date_microseconds", "int"), ("date_offset_bytes", "binary"), + ("raw_manifest", "binary"), ], "revision": [ ("id", "string"), @@ -55,6 +56,7 @@ ("committer_date_offset_bytes", "binary"), ("directory", "string"), ("type", "string"), + ("raw_manifest", "binary"), ], "revision_history": [ ("id", "string"), @@ -68,6 +70,7 @@ ], "directory": [ ("id", "string"), + ("raw_manifest", "binary"), ], "directory_entry": [ ("id", "string"), diff --git a/swh/dataset/test/test_orc.py b/swh/dataset/test/test_orc.py --- a/swh/dataset/test/test_orc.py +++ b/swh/dataset/test/test_orc.py @@ -98,6 +98,7 @@ obj.target_type.value, obj.author.fullname if obj.author else None, *swh_date_to_tuple(obj.date.to_dict() if obj.date is not None else None), + obj.raw_manifest, ) in output[obj_type] @@ -116,6 +117,7 @@ ), hash_to_hex_or_none(obj.directory), obj.type.value, + obj.raw_manifest, ) in output["revision"] for i, parent in enumerate(obj.parents): assert ( @@ -129,7 +131,7 @@ obj_type = "directory" output = exporter({obj_type: TEST_OBJECTS[obj_type]}) for obj in TEST_OBJECTS[obj_type]: - assert (hash_to_hex_or_none(obj.id),) in output["directory"] + assert (hash_to_hex_or_none(obj.id), obj.raw_manifest) in output["directory"] for entry in obj.entries: assert ( hash_to_hex_or_none(obj.id),