Changeset View
Changeset View
Standalone View
Standalone View
swh/graph/luigi/utils.py
# Copyright (C) 2022 The Software Heritage developers | # Copyright (C) 2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from pathlib import Path | from pathlib import Path | ||||
from typing import Dict | from typing import Dict | ||||
LOGBACK_CONF = b"""\ | |||||
<configuration> | |||||
<appender name="STDERR" class="ch.qos.logback.core.ConsoleAppender"> | |||||
<target>System.err</target> | |||||
<encoder> | |||||
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} %msg%n</pattern> | |||||
</encoder> | |||||
</appender> | |||||
<root level="debug"> | |||||
<appender-ref ref="STDERR" /> | |||||
</root> | |||||
</configuration> | |||||
""" | |||||
"""Overrides the default config, to log to stderr instead of stdout""" | |||||
def run_script(script: str, output_path: Path) -> None: | def run_script(script: str, output_path: Path) -> None: | ||||
import os | import os | ||||
import subprocess | import subprocess | ||||
import tempfile | |||||
from ..config import check_config | from ..config import check_config | ||||
conf: Dict = {} # TODO: configurable | conf: Dict = {} # TODO: configurable | ||||
output_path.parent.mkdir(parents=True, exist_ok=True) | |||||
tmp_output_path = Path(f"{output_path}.tmp") | |||||
conf = check_config(conf) | conf = check_config(conf) | ||||
with tempfile.NamedTemporaryFile(prefix="logback_", suffix=".xml") as logback_conf: | |||||
logback_conf.write(LOGBACK_CONF) | |||||
java_tool_options = [ | |||||
"-Dlogback.configurationFile={logback_conf}", | |||||
conf["java_tool_options"], | |||||
] | |||||
env = { | env = { | ||||
**os.environ.copy(), | **os.environ.copy(), | ||||
"JAVA_TOOL_OPTIONS": conf["java_tool_options"], | "JAVA_TOOL_OPTIONS": " ".join(java_tool_options), | ||||
"CLASSPATH": conf["classpath"], | "CLASSPATH": conf["classpath"], | ||||
} | } | ||||
output_path.parent.mkdir(parents=True, exist_ok=True) | |||||
tmp_output_path = Path(f"{output_path}.tmp") | |||||
with tmp_output_path.open("wb") as tmp_output: | with tmp_output_path.open("wb") as tmp_output: | ||||
subprocess.run( | subprocess.run( | ||||
["bash", "-c", f"{script.strip()}"], stdout=tmp_output, env=env, check=True | ["bash", "-c", f"{script.strip()}"], | ||||
stdout=tmp_output, | |||||
env=env, | |||||
check=True, | |||||
) | ) | ||||
# Atomically write the output file | # Atomically write the output file | ||||
tmp_output_path.replace(output_path) | tmp_output_path.replace(output_path) | ||||
def silence_webgraph_cache_warning(local_graph_path: Path): | |||||
"""Touches .obl files so WebGraph does not warn about them being older than | |||||
.offset files. | |||||
This workaround is a workaround for https://github.com/vigna/webgraph-big/pull/6 | |||||
""" | |||||
for path in local_graph_path.glob("*.obl"): | |||||
path.touch() |