Changeset View
Changeset View
Standalone View
Standalone View
swh/graph/luigi/misc_datasets.py
| Show All 33 Lines | |||||
| # WARNING: do not import unnecessary things here to keep cli startup time under | # WARNING: do not import unnecessary things here to keep cli startup time under | ||||
| # control | # control | ||||
| from typing import List | from typing import List | ||||
| import luigi | import luigi | ||||
| from .compressed_graph import LocalGraph | from .compressed_graph import LocalGraph | ||||
| from .utils import run_script | from .utils import run_script, silence_webgraph_cache_warning | ||||
| class TopoSort(luigi.Task): | class TopoSort(luigi.Task): | ||||
| """Creates a file that contains all SWHIDs in topological order from a compressed | """Creates a file that contains all SWHIDs in topological order from a compressed | ||||
| graph.""" | graph.""" | ||||
| local_graph_path = luigi.PathParameter() | local_graph_path = luigi.PathParameter() | ||||
| topological_order_path = luigi.PathParameter() | topological_order_path = luigi.PathParameter() | ||||
| graph_name = luigi.Parameter(default="graph") | graph_name = luigi.Parameter(default="graph") | ||||
| def requires(self) -> List[luigi.Task]: | def requires(self) -> List[luigi.Task]: | ||||
| """Returns an instance of :class:`LocalGraph`.""" | """Returns an instance of :class:`LocalGraph`.""" | ||||
| return [LocalGraph(local_graph_path=self.local_graph_path)] | return [LocalGraph(local_graph_path=self.local_graph_path)] | ||||
| def output(self) -> luigi.Target: | def output(self) -> luigi.Target: | ||||
| """.csv.zst file that contains the topological order.""" | """.csv.zst file that contains the topological order.""" | ||||
| return luigi.LocalTarget(self.topological_order_path) | return luigi.LocalTarget(self.topological_order_path) | ||||
| def run(self) -> None: | def run(self) -> None: | ||||
| """Runs org.softwareheritage.graph.utils.TopoSort and compresses""" | """Runs org.softwareheritage.graph.utils.TopoSort and compresses""" | ||||
| object_types = "rev,rel,snp,ori" | object_types = "rev,rel,snp,ori" | ||||
| class_name = "org.softwareheritage.graph.utils.TopoSort" | class_name = "org.softwareheritage.graph.utils.TopoSort" | ||||
| silence_webgraph_cache_warning(self.local_graph_path) | |||||
| script = f""" | script = f""" | ||||
| java {class_name} '{self.local_graph_path}/{self.graph_name}' '{object_types}' \ | java {class_name} '{self.local_graph_path}/{self.graph_name}' '{object_types}' \ | ||||
| | pv --line-mode --wait \ | | pv --line-mode --wait \ | ||||
| | zstdmt -19 | | zstdmt -19 | ||||
| """ | """ | ||||
| run_script(script, self.topological_order_path) | run_script(script, self.topological_order_path) | ||||