diff --git a/swh/graph/luigi/misc_datasets.py b/swh/graph/luigi/misc_datasets.py --- a/swh/graph/luigi/misc_datasets.py +++ b/swh/graph/luigi/misc_datasets.py @@ -39,7 +39,7 @@ import luigi from .compressed_graph import LocalGraph -from .utils import run_script +from .utils import run_script, silence_webgraph_cache_warning class TopoSort(luigi.Task): @@ -62,6 +62,7 @@ """Runs org.softwareheritage.graph.utils.TopoSort and compresses""" object_types = "rev,rel,snp,ori" class_name = "org.softwareheritage.graph.utils.TopoSort" + silence_webgraph_cache_warning(self.local_graph_path) script = f""" java {class_name} '{self.local_graph_path}/{self.graph_name}' '{object_types}' \ | pv --line-mode --wait \ diff --git a/swh/graph/luigi/origin_contributors.py b/swh/graph/luigi/origin_contributors.py --- a/swh/graph/luigi/origin_contributors.py +++ b/swh/graph/luigi/origin_contributors.py @@ -20,7 +20,7 @@ from .compressed_graph import LocalGraph from .misc_datasets import TopoSort -from .utils import run_script +from .utils import run_script, silence_webgraph_cache_warning class ListOriginContributors(luigi.Task): @@ -51,6 +51,7 @@ def run(self) -> None: """Runs org.softwareheritage.graph.utils.TopoSort and compresses""" class_name = "org.softwareheritage.graph.utils.ListOriginContributors" + silence_webgraph_cache_warning(self.local_graph_path) script = f""" zstdcat {self.topological_order_path} \ | java {class_name} '{self.local_graph_path}/{self.graph_name}' \ diff --git a/swh/graph/luigi/utils.py b/swh/graph/luigi/utils.py --- a/swh/graph/luigi/utils.py +++ b/swh/graph/luigi/utils.py @@ -33,3 +33,13 @@ # Atomically write the output file tmp_output_path.replace(output_path) + + +def silence_webgraph_cache_warning(local_graph_path: Path): + """Touches .obl files so WebGraph does not warn about them being older than + .offset files. + + This workaround is a workaround for https://github.com/vigna/webgraph-big/pull/6 + """ + for path in local_graph_path.glob("*.obl"): + path.touch()