diff --git a/swh/dataset/exporters/orc.py b/swh/dataset/exporters/orc.py --- a/swh/dataset/exporters/orc.py +++ b/swh/dataset/exporters/orc.py @@ -116,7 +116,8 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.max_rows = self.config.get("max-rows", {}) + config = self.config.get('orc', {}) + self.max_rows = config.get("max_rows", {}) self._reset() def _reset(self): @@ -227,7 +228,7 @@ ) def process_snapshot(self, snapshot): - if self.config.get("remove_pull_requests"): + if self.config.get("orc", {}).get("remove_pull_requests"): remove_pull_requests(snapshot) snapshot_writer = self.get_writer_for("snapshot") snapshot_writer.write((hash_to_hex_or_none(snapshot["id"]),)) diff --git a/swh/dataset/test/test_orc.py b/swh/dataset/test/test_orc.py --- a/swh/dataset/test/test_orc.py +++ b/swh/dataset/test/test_orc.py @@ -244,7 +244,7 @@ def test_export_related_files(max_rows, obj_type, tmpdir): config = {} if max_rows is not None: - config["max-rows"] = {obj_type: max_rows} + config["orc"] = {"max_rows": {obj_type: max_rows}} exporter({obj_type: TEST_OBJECTS[obj_type]}, config=config, tmpdir=tmpdir) # check there are as many ORC files as objects orcfiles = [fname for fname in (tmpdir / obj_type).listdir(f"{obj_type}-*.orc")]