Page MenuHomeSoftware Heritage

D7758.diff
No OneTemporary

D7758.diff

diff --git a/.gitignore b/.gitignore
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,4 @@
version.txt
.tox
.mypy_cache/
+compressed/logs
diff --git a/swh/graph/webgraph.py b/swh/graph/webgraph.py
--- a/swh/graph/webgraph.py
+++ b/swh/graph/webgraph.py
@@ -17,6 +17,8 @@
from swh.graph.config import check_config_compress
+logger = logging.getLogger(__name__)
+
class CompressionStep(Enum):
EXTRACT_NODES = 1
@@ -262,13 +264,27 @@
def do_step(step, conf):
- cmd = " ".join(STEP_ARGV[step]).format(**conf)
+ log_dir = Path(conf["out_dir"]) / "logs"
+ log_dir.mkdir(exist_ok=True)
+ step_logger = logger.getChild(f"steps.{step.name.lower()}")
+ step_handler = logging.FileHandler(
+ log_dir
+ / (
+ f"{conf['graph_name']}"
+ f"-{int(datetime.now().timestamp() * 1000)}"
+ f"-{str(step).lower()}.log"
+ )
+ )
+ step_logger.addHandler(step_handler)
+
+ step_start_time = datetime.now()
+ step_logger.info("Starting compression step %s at %s", step, step_start_time)
+
+ cmd = " ".join(STEP_ARGV[step]).format(**conf)
cmd_env = os.environ.copy()
cmd_env["JAVA_TOOL_OPTIONS"] = conf["java_tool_options"]
cmd_env["CLASSPATH"] = conf["classpath"]
-
- logging.info(f"running: {cmd}")
process = subprocess.Popen(
["/bin/bash", "-c", cmd],
env=cmd_env,
@@ -276,16 +292,25 @@
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
+ step_logger.info("Running: %s", cmd)
+
with process.stdout as stdout:
for line in stdout:
- logging.info(line.rstrip())
+ step_logger.info(line.rstrip())
rc = process.wait()
if rc != 0:
- raise RuntimeError(
- f"compression step {step} returned non-zero " f"exit code {rc}"
- )
- else:
- return rc
+ raise RuntimeError(f"Compression step {step} returned non-zero exit code {rc}")
+ step_end_time = datetime.now()
+ step_duration = step_end_time - step_start_time
+ step_logger.info(
+ "Compression step %s finished at %s (in %s)",
+ step,
+ step_end_time,
+ step_duration,
+ )
+ step_logger.removeHandler(step_handler)
+ step_handler.close()
+ return rc
def compress(
@@ -327,26 +352,15 @@
conf = check_config_compress(conf, graph_name, in_dir, out_dir)
compression_start_time = datetime.now()
- logging.info(f"starting compression at {compression_start_time}")
+ logger.info("Starting compression at %s", compression_start_time)
seq_no = 0
for step in COMP_SEQ:
if step not in steps:
- logging.debug(f"skipping compression step {step}")
+ logger.debug("Skipping compression step %s", step)
continue
seq_no += 1
- step_start_time = datetime.now()
- logging.info(
- f"starting compression step {step} "
- f"({seq_no}/{len(steps)}) at {step_start_time}"
- )
+ logger.info("Running compression step %s (%s/%s)", step, seq_no, len(steps))
do_step(step, conf)
- step_end_time = datetime.now()
- step_duration = step_end_time - step_start_time
- logging.info(
- f"completed compression step {step} "
- f"({seq_no}/{len(steps)}) "
- f"at {step_end_time} in {step_duration}"
- )
compression_end_time = datetime.now()
compression_duration = compression_end_time - compression_start_time
- logging.info(f"completed compression in {compression_duration}")
+ logger.info("Completed compression in %s", compression_duration)

File Metadata

Mime Type
text/plain
Expires
Jul 3 2025, 10:23 AM (4 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3222845

Event Timeline