Page MenuHomeSoftware Heritage

D7519.diff
No OneTemporary

D7519.diff

diff --git a/swh/dataset/cli.py b/swh/dataset/cli.py
--- a/swh/dataset/cli.py
+++ b/swh/dataset/cli.py
@@ -13,9 +13,7 @@
from swh.core.cli import CONTEXT_SETTINGS
from swh.core.cli import swh as swh_cli_group
-from swh.dataset.exporters.edges import GraphEdgesExporter
-from swh.dataset.exporters.orc import ORCExporter
-from swh.dataset.journalprocessor import ParallelJournalProcessor
+from swh.dataset.relational import MAIN_TABLES
@swh_cli_group.group(name="dataset", context_settings=CONTEXT_SETTINGS)
@@ -28,7 +26,12 @@
)
@click.pass_context
def dataset_cli_group(ctx, config_file):
- """Software Heritage Dataset Tools"""
+ """Dataset Tools.
+
+ A set of tools to export datasets from the Software Heritage Archive in
+ various formats.
+
+ """
from swh.core import config
ctx.ensure_object(dict)
@@ -44,8 +47,8 @@
AVAILABLE_EXPORTERS = {
- "edges": GraphEdgesExporter,
- "orc": ORCExporter,
+ "edges": "swh.dataset.exporters.edges:GraphEdgesExporter",
+ "orc": "swh.dataset.exporters.orc:ORCExporter",
}
@@ -78,6 +81,8 @@
def export_graph(ctx, export_path, export_id, formats, exclude, processes):
"""Export the Software Heritage graph as an edge dataset."""
import uuid
+ from importlib import import_module
+ from swh.dataset.journalprocessor import ParallelJournalProcessor
config = ctx.obj["config"]
if not export_id:
@@ -91,23 +96,22 @@
option_name="formats", message=f"{f} is not an available format."
)
+ def importcls(clspath):
+ mod, cls = clspath.split(":")
+ m = import_module(mod)
+ return getattr(m, cls)
+
+ exporter_cls = dict(
+ (fmt, importcls(clspath))
+ for (fmt, clspath) in AVAILABLE_EXPORTERS.items()
+ if fmt in export_formats
+ )
# Run the exporter for each edge type.
- object_types = [
- "origin",
- "origin_visit",
- "origin_visit_status",
- "snapshot",
- "release",
- "revision",
- "directory",
- "content",
- "skipped_content",
- ]
- for obj_type in object_types:
+ for obj_type in MAIN_TABLES:
if obj_type in exclude_obj_types:
continue
exporters = [
- (AVAILABLE_EXPORTERS[f], {"export_path": os.path.join(export_path, f)},)
+ (exporter_cls[f], {"export_path": os.path.join(export_path, f)},)
for f in export_formats
]
parallel_exporter = ParallelJournalProcessor(

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 4:19 AM (19 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224625

Event Timeline