diff --git a/mypy.ini b/mypy.ini --- a/mypy.ini +++ b/mypy.ini @@ -17,6 +17,9 @@ [mypy-confluent_kafka.*] ignore_missing_imports = True +[mypy-pkginfo.*] +ignore_missing_imports = True + [mypy-pyorc.*] ignore_missing_imports = True @@ -29,5 +32,8 @@ [mypy-botocore.*] ignore_missing_imports = True +[mypy-msgpack.*] +ignore_missing_imports = True + # [mypy-add_your_lib_here.*] # ignore_missing_imports = True diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -4,5 +4,6 @@ boto3 click tqdm -pyorc +pkginfo plyvel +pyorc diff --git a/swh/dataset/exporters/orc.py b/swh/dataset/exporters/orc.py --- a/swh/dataset/exporters/orc.py +++ b/swh/dataset/exporters/orc.py @@ -5,6 +5,7 @@ import uuid +import pkginfo from pyorc import ( BigInt, Binary, @@ -17,9 +18,11 @@ Writer, ) +import swh.dataset from swh.dataset.exporter import ExporterDispatch from swh.dataset.relational import TABLES from swh.dataset.utils import remove_pull_requests +import swh.model from swh.model.hashutil import hash_to_hex import swh.model.model as swhmodel @@ -83,6 +86,13 @@ compression=CompressionKind.ZSTD, ) ) + self.writers[table_name].set_user_metadata( + swh_object_type=table_name.encode(), + swh_uuid=unique_id.encode(), + swh_model_version=pkginfo.get_metadata(swh.model).version.encode(), + swh_dataset_version=pkginfo.get_metadata(swh.dataset).version.encode(), + # maybe put a copy of the config (redacted) also? + ) return self.writers[table_name] def process_origin(self, origin):