diff --git a/swh/dataset/exporters/orc.py b/swh/dataset/exporters/orc.py --- a/swh/dataset/exporters/orc.py +++ b/swh/dataset/exporters/orc.py @@ -116,7 +116,8 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.max_rows = self.config.get("max_rows", {}) + config = self.config.get("orc", {}) + self.max_rows = config.get("max_rows", {}) invalid_tables = [ table_name for table_name in self.max_rows if table_name not in MAIN_TABLES @@ -237,7 +238,7 @@ ) def process_snapshot(self, snapshot): - if self.config.get("remove_pull_requests"): + if self.config.get("orc", {}).get("remove_pull_requests"): remove_pull_requests(snapshot) snapshot_writer = self.get_writer_for("snapshot") snapshot_writer.write((hash_to_hex_or_none(snapshot["id"]),)) diff --git a/swh/dataset/test/test_orc.py b/swh/dataset/test/test_orc.py --- a/swh/dataset/test/test_orc.py +++ b/swh/dataset/test/test_orc.py @@ -239,7 +239,7 @@ def test_export_related_files(max_rows, obj_type, tmpdir): config = {} if max_rows is not None: - config["max_rows"] = {obj_type: max_rows} + config["orc"] = {"max_rows": {obj_type: max_rows}} exporter({obj_type: TEST_OBJECTS[obj_type]}, config=config, tmpdir=tmpdir) # check there are as many ORC files as objects orcfiles = [fname for fname in (tmpdir / obj_type).listdir(f"{obj_type}-*.orc")] @@ -283,6 +283,6 @@ @pytest.mark.parametrize("table_name", RELATION_TABLES.keys()) def test_export_invalid_max_rows(table_name): - config = {"max_rows": {table_name: 10}} + config = {"orc": {"max_rows": {table_name: 10}}} with pytest.raises(ValueError): exporter({}, config=config)