Changeset View
Changeset View
Standalone View
Standalone View
swh/dataset/cli.py
Show First 20 Lines • Show All 60 Lines • ▼ Show 20 Lines | @click.option( | ||||
help="Formats to export.", | help="Formats to export.", | ||||
) | ) | ||||
@click.option("--processes", "-p", default=1, help="Number of parallel processes") | @click.option("--processes", "-p", default=1, help="Number of parallel processes") | ||||
@click.option( | @click.option( | ||||
"--exclude", | "--exclude", | ||||
type=click.STRING, | type=click.STRING, | ||||
help="Comma-separated list of object types to exclude", | help="Comma-separated list of object types to exclude", | ||||
) | ) | ||||
@click.option( | |||||
"--reset", | |||||
type=bool, | |||||
is_flag=True, | |||||
vlorentz: copy-pasted too fast | |||||
help=( | |||||
"Consume the kafka journal from the beginning instead of current " | |||||
"(committed) offsets" | |||||
), | |||||
) | |||||
@click.pass_context | @click.pass_context | ||||
def export_graph(ctx, export_path, export_id, formats, exclude, processes): | def export_graph(ctx, export_path, export_id, formats, exclude, processes, reset): | ||||
"""Export the Software Heritage graph as an edge dataset.""" | """Export the Software Heritage graph as an edge dataset.""" | ||||
import uuid | import uuid | ||||
config = ctx.obj["config"] | config = ctx.obj["config"] | ||||
if not export_id: | if not export_id: | ||||
export_id = str(uuid.uuid4()) | export_id = str(uuid.uuid4()) | ||||
exclude_obj_types = {o.strip() for o in (exclude.split(",") if exclude else [])} | exclude_obj_types = {o.strip() for o in (exclude.split(",") if exclude else [])} | ||||
Show All 25 Lines | for obj_type in object_types: | ||||
] | ] | ||||
parallel_exporter = ParallelJournalProcessor( | parallel_exporter = ParallelJournalProcessor( | ||||
config, | config, | ||||
exporters, | exporters, | ||||
export_id, | export_id, | ||||
obj_type, | obj_type, | ||||
node_sets_path=pathlib.Path(export_path) / ".node_sets" / obj_type, | node_sets_path=pathlib.Path(export_path) / ".node_sets" / obj_type, | ||||
processes=processes, | processes=processes, | ||||
reset_offsets=reset, | |||||
) | ) | ||||
print("Exporting {}:".format(obj_type)) | print("Exporting {}:".format(obj_type)) | ||||
parallel_exporter.run() | parallel_exporter.run() | ||||
@graph.command("sort") | @graph.command("sort") | ||||
@click.argument("export-path", type=click.Path()) | @click.argument("export-path", type=click.Path()) | ||||
@click.pass_context | @click.pass_context | ||||
▲ Show 20 Lines • Show All 64 Lines • Show Last 20 Lines |
copy-pasted too fast