diff --git a/swh/graph/cli.py b/swh/graph/cli.py --- a/swh/graph/cli.py +++ b/swh/graph/cli.py @@ -6,6 +6,7 @@ import aiohttp import click import logging +import shutil import sys from pathlib import Path @@ -334,6 +335,37 @@ webgraph.compress(graph_name, in_dir, out_dir, steps, conf) +@cli.command(name="memcache") +@click.option( + "--graph", "-g", required=True, metavar="GRAPH", help="compressed graph basename" +) +@click.option( + "--cache", + "-c", + required=True, + metavar="CACHE", + type=PathlibPath(), + help="memory cache path", +) +@click.pass_context +def memcache(ctx, graph, cache): + """ + Cache the mmapped files of the compressed graph in a tmpfs. + + This command creates a new directory at the path given by CACHE that has the + same structure as the compressed graph basename, except it copies the files + that require fast/mmap access (.graph, .obl, .offsets) but uses symlinks + from the source for all the other files (.map, .bin, ...). + """ + cache.mkdir() + for src in Path(graph).parent.glob("*"): + dst = cache / src.name + if src.suffix in (".graph", ".obl", ".offsets"): + shutil.copy2(src, dst) + else: + dst.symlink_to(src.resolve()) + + def main(): return cli(auto_envvar_prefix="SWH_GRAPH")