Changeset View
Changeset View
Standalone View
Standalone View
swh/graph/luigi/misc_datasets.py
Show All 33 Lines | |||||
# WARNING: do not import unnecessary things here to keep cli startup time under | # WARNING: do not import unnecessary things here to keep cli startup time under | ||||
# control | # control | ||||
from typing import List | from typing import List | ||||
import luigi | import luigi | ||||
from .compressed_graph import LocalGraph | from .compressed_graph import LocalGraph | ||||
from .utils import run_script | from .utils import run_script, silence_webgraph_cache_warning | ||||
class TopoSort(luigi.Task): | class TopoSort(luigi.Task): | ||||
"""Creates a file that contains all SWHIDs in topological order from a compressed | """Creates a file that contains all SWHIDs in topological order from a compressed | ||||
graph.""" | graph.""" | ||||
local_graph_path = luigi.PathParameter() | local_graph_path = luigi.PathParameter() | ||||
topological_order_path = luigi.PathParameter() | topological_order_path = luigi.PathParameter() | ||||
graph_name = luigi.Parameter(default="graph") | graph_name = luigi.Parameter(default="graph") | ||||
def requires(self) -> List[luigi.Task]: | def requires(self) -> List[luigi.Task]: | ||||
"""Returns an instance of :class:`LocalGraph`.""" | """Returns an instance of :class:`LocalGraph`.""" | ||||
return [LocalGraph(local_graph_path=self.local_graph_path)] | return [LocalGraph(local_graph_path=self.local_graph_path)] | ||||
def output(self) -> luigi.Target: | def output(self) -> luigi.Target: | ||||
""".csv.zst file that contains the topological order.""" | """.csv.zst file that contains the topological order.""" | ||||
return luigi.LocalTarget(self.topological_order_path) | return luigi.LocalTarget(self.topological_order_path) | ||||
def run(self) -> None: | def run(self) -> None: | ||||
"""Runs org.softwareheritage.graph.utils.TopoSort and compresses""" | """Runs org.softwareheritage.graph.utils.TopoSort and compresses""" | ||||
object_types = "rev,rel,snp,ori" | object_types = "rev,rel,snp,ori" | ||||
class_name = "org.softwareheritage.graph.utils.TopoSort" | class_name = "org.softwareheritage.graph.utils.TopoSort" | ||||
silence_webgraph_cache_warning(self.local_graph_path) | |||||
script = f""" | script = f""" | ||||
java {class_name} '{self.local_graph_path}/{self.graph_name}' '{object_types}' \ | java {class_name} '{self.local_graph_path}/{self.graph_name}' '{object_types}' \ | ||||
| pv --line-mode --wait \ | | pv --line-mode --wait \ | ||||
| zstdmt -19 | | zstdmt -19 | ||||
""" | """ | ||||
run_script(script, self.topological_order_path) | run_script(script, self.topological_order_path) |