diff --git a/setup.py b/setup.py
index 5708a18..316d6ab 100755
--- a/setup.py
+++ b/setup.py
@@ -1,77 +1,77 @@
 #!/usr/bin/env python3
 # Copyright (C) 2015-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-from setuptools import setup, find_packages
-
-from os import path
-from io import open
 from glob import glob
+from io import open
+from os import path
+
+from setuptools import find_packages, setup
 
 here = path.abspath(path.dirname(__file__))
 
 # Get the long description from the README file
 with open(path.join(here, "README.rst"), encoding="utf-8") as f:
     long_description = f.read()
 
 
 def parse_requirements(name=None):
     if name:
         reqf = "requirements-%s.txt" % name
     else:
         reqf = "requirements.txt"
 
     requirements = []
     if not path.exists(reqf):
         return requirements
 
     with open(reqf) as f:
         for line in f.readlines():
             line = line.strip()
             if not line or line.startswith("#"):
                 continue
             requirements.append(line)
     return requirements
 
 
 JAR_PATHS = list(glob("java/target/swh-graph-*.jar"))
 
 setup(
     name="swh.graph",
     description="Software Heritage graph service",
     long_description=long_description,
     long_description_content_type="text/x-rst",
     python_requires=">=3.7",
     author="Software Heritage developers",
     author_email="swh-devel@inria.fr",
     url="https://forge.softwareheritage.org/diffusion/DGRPH",
     packages=find_packages(),
     install_requires=parse_requirements() + parse_requirements("swh"),
     tests_require=parse_requirements("test"),
     setup_requires=["setuptools-scm"],
     use_scm_version=True,
     extras_require={"testing": parse_requirements("test")},
     include_package_data=True,
     data_files=[("share/swh-graph", JAR_PATHS)],
     entry_points="""
         [console_scripts]
         swh-graph=swh.graph.cli:main
         [swh.cli.subcommands]
         graph=swh.graph.cli:cli
     """,
     classifiers=[
         "Programming Language :: Python :: 3",
         "Intended Audience :: Developers",
         "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
         "Operating System :: OS Independent",
         "Development Status :: 3 - Alpha",
     ],
     project_urls={
         "Bug Reports": "https://forge.softwareheritage.org/maniphest",
         "Funding": "https://www.softwareheritage.org/donate",
         "Source": "https://forge.softwareheritage.org/source/swh-graph",
         "Documentation": "https://docs.softwareheritage.org/devel/swh-graph/",
     },
 )
diff --git a/swh/graph/cli.py b/swh/graph/cli.py
index 770899c..5265ac2 100644
--- a/swh/graph/cli.py
+++ b/swh/graph/cli.py
@@ -1,444 +1,445 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-# WARNING: do not import unnecessary things here to keep cli startup time under
-# control
-import click
 import logging
+from pathlib import Path
 import sys
+from typing import TYPE_CHECKING, Any, Dict, Set, Tuple
 
-from pathlib import Path
-from typing import Any, Dict, Tuple, Set, TYPE_CHECKING
+# WARNING: do not import unnecessary things here to keep cli startup time under
+# control
+import click
 
 from swh.core.cli import CONTEXT_SETTINGS, AliasedGroup
 
 if TYPE_CHECKING:
     from swh.graph.webgraph import CompressionStep  # noqa
 
 
 class StepOption(click.ParamType):
     """click type for specifying a compression step on the CLI
 
     parse either individual steps, specified as step names or integers, or step
     ranges
 
     """
 
     name = "compression step"
 
     def convert(self, value, param, ctx):  # type: (...) -> Set[CompressionStep]
-        from swh.graph.webgraph import CompressionStep, COMP_SEQ  # noqa
+        from swh.graph.webgraph import COMP_SEQ, CompressionStep  # noqa
 
         steps: Set[CompressionStep] = set()
 
         specs = value.split(",")
         for spec in specs:
             if "-" in spec:  # step range
                 (raw_l, raw_r) = spec.split("-", maxsplit=1)
                 if raw_l == "":  # no left endpoint
                     raw_l = COMP_SEQ[0].name
                 if raw_r == "":  # no right endpoint
                     raw_r = COMP_SEQ[-1].name
                 l_step = self.convert(raw_l, param, ctx)
                 r_step = self.convert(raw_r, param, ctx)
                 if len(l_step) != 1 or len(r_step) != 1:
                     self.fail(f"invalid step specification: {value}, " f"see --help")
                 l_idx = l_step.pop()
                 r_idx = r_step.pop()
                 steps = steps.union(
                     set(map(CompressionStep, range(l_idx.value, r_idx.value + 1)))
                 )
             else:  # singleton step
                 try:
                     steps.add(CompressionStep(int(spec)))  # integer step
                 except ValueError:
                     try:
                         steps.add(CompressionStep[spec.upper()])  # step name
                     except KeyError:
                         self.fail(
                             f"invalid step specification: {value}, " f"see --help"
                         )
 
         return steps
 
 
 class PathlibPath(click.Path):
     """A Click path argument that returns a pathlib Path, not a string"""
 
     def convert(self, value, param, ctx):
         return Path(super().convert(value, param, ctx))
 
 
 DEFAULT_CONFIG: Dict[str, Tuple[str, Any]] = {"graph": ("dict", {})}
 
 
 @click.group(name="graph", context_settings=CONTEXT_SETTINGS, cls=AliasedGroup)
 @click.option(
     "--config-file",
     "-C",
     default=None,
     type=click.Path(exists=True, dir_okay=False,),
     help="YAML configuration file",
 )
 @click.pass_context
 def cli(ctx, config_file):
     """Software Heritage graph tools."""
     from swh.core import config
 
     ctx.ensure_object(dict)
     conf = config.read(config_file, DEFAULT_CONFIG)
     if "graph" not in conf:
         raise ValueError(
             'no "graph" stanza found in configuration file %s' % config_file
         )
     ctx.obj["config"] = conf
 
 
 @cli.command("api-client")
 @click.option("--host", default="localhost", help="Graph server host")
 @click.option("--port", default="5009", help="Graph server port")
 @click.pass_context
 def api_client(ctx, host, port):
     """client for the graph REST service"""
     from swh.graph import client
 
     url = "http://{}:{}".format(host, port)
     app = client.RemoteGraphClient(url)
 
     # TODO: run web app
     print(app.stats())
 
 
 @cli.group("map")
 @click.pass_context
 def map(ctx):
     """Manage swh-graph on-disk maps"""
     pass
 
 
 def dump_pid2node(filename):
     from swh.graph.pid import PidToNodeMap
 
     for (pid, int) in PidToNodeMap(filename):
         print("{}\t{}".format(pid, int))
 
 
 def dump_node2pid(filename):
     from swh.graph.pid import NodeToPidMap
 
     for (int, pid) in NodeToPidMap(filename):
         print("{}\t{}".format(int, pid))
 
 
 def restore_pid2node(filename):
     """read a textual PID->int map from stdin and write its binary version to
     filename
 
     """
     from swh.graph.pid import PidToNodeMap
 
     with open(filename, "wb") as dst:
         for line in sys.stdin:
             (str_pid, str_int) = line.split()
             PidToNodeMap.write_record(dst, str_pid, int(str_int))
 
 
 def restore_node2pid(filename, length):
     """read a textual int->PID map from stdin and write its binary version to
     filename
 
     """
     from swh.graph.pid import NodeToPidMap
 
     node2pid = NodeToPidMap(filename, mode="wb", length=length)
     for line in sys.stdin:
         (str_int, str_pid) = line.split()
         node2pid[int(str_int)] = str_pid
     node2pid.close()
 
 
 @map.command("dump")
 @click.option(
     "--type",
     "-t",
     "map_type",
     required=True,
     type=click.Choice(["pid2node", "node2pid"]),
     help="type of map to dump",
 )
 @click.argument("filename", required=True, type=click.Path(exists=True))
 @click.pass_context
 def dump_map(ctx, map_type, filename):
     """Dump a binary PID<->node map to textual format."""
     if map_type == "pid2node":
         dump_pid2node(filename)
     elif map_type == "node2pid":
         dump_node2pid(filename)
     else:
         raise ValueError("invalid map type: " + map_type)
     pass
 
 
 @map.command("restore")
 @click.option(
     "--type",
     "-t",
     "map_type",
     required=True,
     type=click.Choice(["pid2node", "node2pid"]),
     help="type of map to dump",
 )
 @click.option(
     "--length",
     "-l",
     type=int,
     help="""map size in number of logical records
               (required for node2pid maps)""",
 )
 @click.argument("filename", required=True, type=click.Path())
 @click.pass_context
 def restore_map(ctx, map_type, length, filename):
     """Restore a binary PID<->node map from textual format."""
     if map_type == "pid2node":
         restore_pid2node(filename)
     elif map_type == "node2pid":
         if length is None:
             raise click.UsageError(
                 "map length is required when restoring {} maps".format(map_type), ctx
             )
         restore_node2pid(filename, length)
     else:
         raise ValueError("invalid map type: " + map_type)
 
 
 @map.command("write")
 @click.option(
     "--type",
     "-t",
     "map_type",
     required=True,
     type=click.Choice(["pid2node", "node2pid"]),
     help="type of map to write",
 )
 @click.argument("filename", required=True, type=click.Path())
 @click.pass_context
 def write(ctx, map_type, filename):
     """Write a map to disk sequentially.
 
     read from stdin a textual PID->node mapping (for pid2node, or a simple
     sequence of PIDs for node2pid) and write it to disk in the requested binary
     map format
 
     note that no sorting is applied, so the input should already be sorted as
     required by the chosen map type (by PID for pid2node, by int for node2pid)
 
     """
-    from swh.graph.pid import PidToNodeMap, NodeToPidMap
+    from swh.graph.pid import NodeToPidMap, PidToNodeMap
 
     with open(filename, "wb") as f:
         if map_type == "pid2node":
             for line in sys.stdin:
                 (pid, int_str) = line.rstrip().split(maxsplit=1)
                 PidToNodeMap.write_record(f, pid, int(int_str))
         elif map_type == "node2pid":
             for line in sys.stdin:
                 pid = line.rstrip()
                 NodeToPidMap.write_record(f, pid)
         else:
             raise ValueError("invalid map type: " + map_type)
 
 
 @map.command("lookup")
 @click.option(
     "--graph", "-g", required=True, metavar="GRAPH", help="compressed graph basename"
 )
 @click.argument("identifiers", nargs=-1)
 def map_lookup(graph, identifiers):
     """Lookup identifiers using on-disk maps.
 
     Depending on the identifier type lookup either a PID into a PID->node (and
     return the node integer identifier) or, vice-versa, lookup a node integer
     identifier into a node->PID (and return the PID).  The desired behavior is
     chosen depending on the syntax of each given identifier.
 
     Identifiers can be passed either directly on the command line or on
     standard input, separate by blanks. Logical lines (as returned by
     readline()) in stdin will be preserved in stdout.
 
     """
-    import swh.model.exceptions
     from swh.graph.backend import NODE2PID_EXT, PID2NODE_EXT
-    from swh.graph.pid import PidToNodeMap, NodeToPidMap
+    from swh.graph.pid import NodeToPidMap, PidToNodeMap
+    import swh.model.exceptions
     from swh.model.identifiers import parse_persistent_identifier
 
     success = True  # no identifiers failed to be looked up
     pid2node = PidToNodeMap(f"{graph}.{PID2NODE_EXT}")
     node2pid = NodeToPidMap(f"{graph}.{NODE2PID_EXT}")
 
     def lookup(identifier):
         nonlocal success, pid2node, node2pid
         is_pid = None
         try:
             int(identifier)
             is_pid = False
         except ValueError:
             try:
                 parse_persistent_identifier(identifier)
                 is_pid = True
             except swh.model.exceptions.ValidationError:
                 success = False
                 logging.error(f'invalid identifier: "{identifier}", skipping')
 
         try:
             if is_pid:
                 return str(pid2node[identifier])
             else:
                 return node2pid[int(identifier)]
         except KeyError:
             success = False
             logging.error(f'identifier not found: "{identifier}", skipping')
 
     if identifiers:  # lookup identifiers passed via CLI
         for identifier in identifiers:
             print(lookup(identifier))
     else:  # lookup identifiers passed via stdin, preserving logical lines
         for line in sys.stdin:
             results = [lookup(id) for id in line.rstrip().split()]
             if results:  # might be empty if all IDs on the same line failed
                 print(" ".join(results))
 
     sys.exit(0 if success else 1)
 
 
 @cli.command(name="rpc-serve")
 @click.option(
     "--host",
     "-h",
     default="0.0.0.0",
     metavar="IP",
     show_default=True,
     help="host IP address to bind the server on",
 )
 @click.option(
     "--port",
     "-p",
     default=5009,
     type=click.INT,
     metavar="PORT",
     show_default=True,
     help="port to bind the server on",
 )
 @click.option(
     "--graph", "-g", required=True, metavar="GRAPH", help="compressed graph basename"
 )
 @click.pass_context
 def serve(ctx, host, port, graph):
     """run the graph REST service"""
     import aiohttp
-    from swh.graph.server.app import make_app
+
     from swh.graph.backend import Backend
+    from swh.graph.server.app import make_app
 
     backend = Backend(graph_path=graph, config=ctx.obj["config"])
     app = make_app(backend=backend)
 
     with backend:
         aiohttp.web.run_app(app, host=host, port=port)
 
 
 @cli.command()
 @click.option(
     "--graph",
     "-g",
     required=True,
     metavar="GRAPH",
     type=PathlibPath(),
     help="input graph basename",
 )
 @click.option(
     "--outdir",
     "-o",
     "out_dir",
     required=True,
     metavar="DIR",
     type=PathlibPath(),
     help="directory where to store compressed graph",
 )
 @click.option(
     "--steps",
     "-s",
     metavar="STEPS",
     type=StepOption(),
     help="run only these compression steps (default: all steps)",
 )
 @click.pass_context
 def compress(ctx, graph, out_dir, steps):
     """Compress a graph using WebGraph
 
     Input: a pair of files g.nodes.csv.gz, g.edges.csv.gz
 
     Output: a directory containing a WebGraph compressed graph
 
     Compression steps are: (1) mph, (2) bv, (3) bv_obl, (4) bfs, (5) permute,
     (6) permute_obl, (7) stats, (8) transpose, (9) transpose_obl, (10) maps,
     (11) clean_tmp. Compression steps can be selected by name or number using
     --steps, separating them with commas; step ranges (e.g., 3-9, 6-, etc.) are
     also supported.
 
     """
     from swh.graph import webgraph
 
     graph_name = graph.name
     in_dir = graph.parent
     try:
         conf = ctx.obj["config"]["graph"]["compress"]
     except KeyError:
         conf = {}  # use defaults
 
     webgraph.compress(graph_name, in_dir, out_dir, steps, conf)
 
 
 @cli.command(name="cachemount")
 @click.option(
     "--graph", "-g", required=True, metavar="GRAPH", help="compressed graph basename"
 )
 @click.option(
     "--cache",
     "-c",
     default="/dev/shm/swh-graph/default",
     metavar="CACHE",
     type=PathlibPath(),
     help="Memory cache path (defaults to /dev/shm/swh-graph/default)",
 )
 @click.pass_context
 def cachemount(ctx, graph, cache):
     """
     Cache the mmapped files of the compressed graph in a tmpfs.
 
     This command creates a new directory at the path given by CACHE that has
     the same structure as the compressed graph basename, except it copies the
     files that require mmap access (*.graph) but uses symlinks from the source
     for all the other files (.map, .bin, ...).
 
     The command outputs the path to the memory cache directory (particularly
     useful when relying on the default value).
     """
     import shutil
 
     cache.mkdir(parents=True)
     for src in Path(graph).parent.glob("*"):
         dst = cache / src.name
         if src.suffix == ".graph":
             shutil.copy2(src, dst)
         else:
             dst.symlink_to(src.resolve())
     print(cache)
 
 
 def main():
     return cli(auto_envvar_prefix="SWH_GRAPH")
 
 
 if __name__ == "__main__":
     main()
diff --git a/swh/graph/config.py b/swh/graph/config.py
index 93c17c4..4487753 100644
--- a/swh/graph/config.py
+++ b/swh/graph/config.py
@@ -1,110 +1,111 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import logging
-import psutil
-import sys
 from pathlib import Path
+import sys
+
+import psutil
 
 
 def find_graph_jar():
     """find swh-graph.jar, containing the Java part of swh-graph
 
     look both in development directories and installed data (for in-production
     deployments who fecthed the JAR from pypi)
 
     """
     swh_graph_root = Path(__file__).parents[2]
     try_paths = [
         swh_graph_root / "java/target/",
         Path(sys.prefix) / "share/swh-graph/",
         Path(sys.prefix) / "local/share/swh-graph/",
     ]
     for path in try_paths:
         glob = list(path.glob("swh-graph-*.jar"))
         if glob:
             if len(glob) > 1:
                 logging.warn(
                     "found multiple swh-graph JARs, " "arbitrarily picking one"
                 )
             logging.info("using swh-graph JAR: {0}".format(glob[0]))
             return str(glob[0])
     raise RuntimeError("swh-graph JAR not found. Have you run `make java`?")
 
 
 def check_config(conf):
     """check configuration and propagate defaults
     """
     conf = conf.copy()
     if "batch_size" not in conf:
         conf["batch_size"] = "1000000000"  # 1 billion
     if "max_ram" not in conf:
         conf["max_ram"] = str(psutil.virtual_memory().total)
     if "java_tool_options" not in conf:
         conf["java_tool_options"] = " ".join(
             [
                 "-Xmx{max_ram}",
                 "-XX:PretenureSizeThreshold=512M",
                 "-XX:MaxNewSize=4G",
                 "-XX:+UseLargePages",
                 "-XX:+UseTransparentHugePages",
                 "-XX:+UseNUMA",
                 "-XX:+UseTLAB",
                 "-XX:+ResizeTLAB",
             ]
         )
     conf["java_tool_options"] = conf["java_tool_options"].format(
         max_ram=conf["max_ram"]
     )
     if "java" not in conf:
         conf["java"] = "java"
     if "classpath" not in conf:
         conf["classpath"] = find_graph_jar()
 
     return conf
 
 
 def check_config_compress(config, graph_name, in_dir, out_dir):
     """check compression-specific configuration and initialize its execution
     environment.
     """
     conf = check_config(config)
 
     conf["graph_name"] = graph_name
     conf["in_dir"] = str(in_dir)
     conf["out_dir"] = str(out_dir)
     out_dir.mkdir(parents=True, exist_ok=True)
     if "tmp_dir" not in conf:
         tmp_dir = out_dir / "tmp"
         conf["tmp_dir"] = str(tmp_dir)
     else:
         tmp_dir = Path(conf["tmp_dir"])
     tmp_dir.mkdir(parents=True, exist_ok=True)
 
     if "logback" not in conf:
         logback_confpath = tmp_dir / "logback.xml"
         with open(logback_confpath, "w") as conffile:
             conffile.write(
                 """
 <configuration>
     <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
         <encoder>
             <pattern>%d %r %p [%t] %logger{1} - %m%n</pattern>
         </encoder>
     </appender>
     <root level="INFO">
         <appender-ref ref="STDOUT"/>
     </root>
 </configuration>
 """
             )
         conf["logback"] = str(logback_confpath)
 
     conf["java_tool_options"] += " -Dlogback.configurationFile={logback}"
     conf["java_tool_options"] = conf["java_tool_options"].format(
         logback=conf["logback"]
     )
 
     return conf
diff --git a/swh/graph/dot.py b/swh/graph/dot.py
index 505ec65..6a17150 100644
--- a/swh/graph/dot.py
+++ b/swh/graph/dot.py
@@ -1,69 +1,68 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+import collections
 from functools import lru_cache
 import subprocess
-import collections
-
 
 KIND_TO_SHAPE = {
     "ori": "egg",
     "snp": "doubleoctagon",
     "rel": "octagon",
     "rev": "diamond",
     "dir": "folder",
     "cnt": "oval",
 }
 
 
 @lru_cache()
 def dot_to_svg(dot):
     try:
         p = subprocess.run(
             ["dot", "-Tsvg"],
             input=dot,
             universal_newlines=True,
             capture_output=True,
             check=True,
         )
     except subprocess.CalledProcessError as e:
         raise RuntimeError(e.stderr) from e
     return p.stdout
 
 
 def graph_dot(nodes):
     ids = {n.id for n in nodes}
 
     by_kind = collections.defaultdict(list)
     for n in nodes:
         by_kind[n.kind].append(n)
 
     forward_edges = [
         (node.id, child.id)
         for node in nodes
         for child in node.children()
         if child.id in ids
     ]
     backward_edges = [
         (parent.id, node.id)
         for node in nodes
         for parent in node.parents()
         if parent.id in ids
     ]
     edges = set(forward_edges + backward_edges)
     edges_fmt = "\n".join("{} -> {};".format(a, b) for a, b in edges)
     nodes_fmt = "\n".join(node.dot_fragment() for node in nodes)
 
     s = """digraph G {{
     ranksep=1;
     nodesep=0.5;
 
     {nodes}
     {edges}
 
     }}""".format(
         nodes=nodes_fmt, edges=edges_fmt
     )
     return s
diff --git a/swh/graph/graph.py b/swh/graph/graph.py
index fcc71ba..726ab6d 100644
--- a/swh/graph/graph.py
+++ b/swh/graph/graph.py
@@ -1,185 +1,185 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import asyncio
 import contextlib
 import functools
-from swh.graph.backend import Backend
-from swh.graph.dot import dot_to_svg, graph_dot, KIND_TO_SHAPE
 
+from swh.graph.backend import Backend
+from swh.graph.dot import KIND_TO_SHAPE, dot_to_svg, graph_dot
 
 BASE_URL = "https://archive.softwareheritage.org/browse"
 KIND_TO_URL_FRAGMENT = {
     "ori": "/origin/{}",
     "snp": "/snapshot/{}",
     "rel": "/release/{}",
     "rev": "/revision/{}",
     "dir": "/directory/{}",
     "cnt": "/content/sha1_git:{}/",
 }
 
 
 def call_async_gen(generator, *args, **kwargs):
     loop = asyncio.get_event_loop()
     it = generator(*args, **kwargs).__aiter__()
     while True:
         try:
             res = loop.run_until_complete(it.__anext__())
             yield res
         except StopAsyncIteration:
             break
 
 
 class Neighbors:
     """Neighbor iterator with custom O(1) length method"""
 
     def __init__(self, graph, iterator, length_func):
         self.graph = graph
         self.iterator = iterator
         self.length_func = length_func
 
     def __iter__(self):
         return self
 
     def __next__(self):
         succ = self.iterator.nextLong()
         if succ == -1:
             raise StopIteration
         return GraphNode(self.graph, succ)
 
     def __len__(self):
         return self.length_func()
 
 
 class GraphNode:
     """Node in the SWH graph"""
 
     def __init__(self, graph, node_id):
         self.graph = graph
         self.id = node_id
 
     def children(self):
         return Neighbors(
             self.graph,
             self.graph.java_graph.successors(self.id),
             lambda: self.graph.java_graph.outdegree(self.id),
         )
 
     def parents(self):
         return Neighbors(
             self.graph,
             self.graph.java_graph.predecessors(self.id),
             lambda: self.graph.java_graph.indegree(self.id),
         )
 
     def simple_traversal(self, ttype, direction="forward", edges="*"):
         for node in call_async_gen(
             self.graph.backend.simple_traversal, ttype, direction, edges, self.id
         ):
             yield self.graph[node]
 
     def leaves(self, *args, **kwargs):
         yield from self.simple_traversal("leaves", *args, **kwargs)
 
     def visit_nodes(self, *args, **kwargs):
         yield from self.simple_traversal("visit_nodes", *args, **kwargs)
 
     def visit_edges(self, direction="forward", edges="*"):
         for src, dst in call_async_gen(
             self.graph.backend.visit_edges, direction, edges, self.id
         ):
             yield (self.graph[src], self.graph[dst])
 
     def visit_paths(self, direction="forward", edges="*"):
         for path in call_async_gen(
             self.graph.backend.visit_paths, direction, edges, self.id
         ):
             yield [self.graph[node] for node in path]
 
     def walk(self, dst, direction="forward", edges="*", traversal="dfs"):
         for node in call_async_gen(
             self.graph.backend.walk, direction, edges, traversal, self.id, dst
         ):
             yield self.graph[node]
 
     def _count(self, ttype, direction="forward", edges="*"):
         return self.graph.backend.count(ttype, direction, edges, self.id)
 
     count_leaves = functools.partialmethod(_count, ttype="leaves")
     count_neighbors = functools.partialmethod(_count, ttype="neighbors")
     count_visit_nodes = functools.partialmethod(_count, ttype="visit_nodes")
 
     @property
     def pid(self):
         return self.graph.node2pid[self.id]
 
     @property
     def kind(self):
         return self.pid.split(":")[2]
 
     def __str__(self):
         return self.pid
 
     def __repr__(self):
         return "<{}>".format(self.pid)
 
     def dot_fragment(self):
         swh, version, kind, hash = self.pid.split(":")
         label = "{}:{}..{}".format(kind, hash[0:2], hash[-2:])
         url = BASE_URL + KIND_TO_URL_FRAGMENT[kind].format(hash)
         shape = KIND_TO_SHAPE[kind]
         return '{} [label="{}", href="{}", target="_blank", shape="{}"];'.format(
             self.id, label, url, shape
         )
 
     def _repr_svg_(self):
         nodes = [self, *list(self.children()), *list(self.parents())]
         dot = graph_dot(nodes)
         svg = dot_to_svg(dot)
         return svg
 
 
 class Graph:
     def __init__(self, backend, node2pid, pid2node):
         self.backend = backend
         self.java_graph = backend.entry.get_graph()
         self.node2pid = node2pid
         self.pid2node = pid2node
 
     def stats(self):
         return self.backend.stats()
 
     @property
     def path(self):
         return self.java_graph.getPath()
 
     def __len__(self):
         return self.java_graph.numNodes()
 
     def __getitem__(self, node_id):
         if isinstance(node_id, int):
             self.node2pid[node_id]  # check existence
             return GraphNode(self, node_id)
         elif isinstance(node_id, str):
             node_id = self.pid2node[node_id]
             return GraphNode(self, node_id)
 
     def __iter__(self):
         for pid, pos in self.backend.pid2node:
             yield self[pid]
 
     def iter_prefix(self, prefix):
         for pid, pos in self.backend.pid2node.iter_prefix(prefix):
             yield self[pid]
 
     def iter_type(self, pid_type):
         for pid, pos in self.backend.pid2node.iter_type(pid_type):
             yield self[pid]
 
 
 @contextlib.contextmanager
 def load(graph_path):
     with Backend(graph_path) as backend:
         yield Graph(backend, backend.node2pid, backend.pid2node)
diff --git a/swh/graph/pid.py b/swh/graph/pid.py
index 138e25f..f0e4a67 100644
--- a/swh/graph/pid.py
+++ b/swh/graph/pid.py
@@ -1,406 +1,404 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-import mmap
-import os
-import struct
-
 from collections.abc import MutableMapping
 from enum import Enum
+import mmap
 from mmap import MAP_SHARED, PROT_READ, PROT_WRITE
+import os
+import struct
 from typing import BinaryIO, Iterator, Tuple
 
 from swh.model.identifiers import SWHID, parse_swhid
 
-
 PID_BIN_FMT = "BB20s"  # 2 unsigned chars + 20 bytes
 INT_BIN_FMT = ">q"  # big endian, 8-byte integer
 PID_BIN_SIZE = 22  # in bytes
 INT_BIN_SIZE = 8  # in bytes
 
 
 class PidType(Enum):
     """types of existing PIDs, used to serialize PID type as a (char) integer
 
     Note that the order does matter also for driving the binary search in
     PID-indexed maps. Integer values also matter, for compatibility with the
     Java layer.
 
     """
 
     content = 0
     directory = 1
     origin = 2
     release = 3
     revision = 4
     snapshot = 5
 
 
 def str_to_bytes(pid_str: str) -> bytes:
     """Convert a PID to a byte sequence
 
     The binary format used to represent PIDs as 22-byte long byte sequences as
     follows:
 
     - 1 byte for the namespace version represented as a C `unsigned char`
     - 1 byte for the object type, as the int value of :class:`PidType` enums,
       represented as a C `unsigned char`
     - 20 bytes for the SHA1 digest as a byte sequence
 
     Args:
         pid: persistent identifier
 
     Returns:
         bytes: byte sequence representation of pid
 
     """
     pid = parse_swhid(pid_str)
     return struct.pack(
         PID_BIN_FMT,
         pid.scheme_version,
         PidType[pid.object_type].value,
         bytes.fromhex(pid.object_id),
     )
 
 
 def bytes_to_str(bytes: bytes) -> str:
     """Inverse function of :func:`str_to_bytes`
 
     See :func:`str_to_bytes` for a description of the binary PID format.
 
     Args:
         bytes: byte sequence representation of pid
 
     Returns:
         pid: persistent identifier
 
     """
     (version, type, bin_digest) = struct.unpack(PID_BIN_FMT, bytes)
     pid = SWHID(object_type=PidType(type).name, object_id=bin_digest)
     return str(pid)
 
 
 class _OnDiskMap:
     """mmap-ed on-disk sequence of fixed size records
 
     """
 
     def __init__(
         self, record_size: int, fname: str, mode: str = "rb", length: int = None
     ):
         """open an existing on-disk map
 
         Args:
             record_size: size of each record in bytes
             fname: path to the on-disk map
             mode: file open mode, usually either 'rb' for read-only maps, 'wb'
                 for creating new maps, or 'rb+' for updating existing ones
                 (default: 'rb')
             length: map size in number of logical records; used to initialize
                 writable maps at creation time. Must be given when mode is 'wb'
                 and the map doesn't exist on disk; ignored otherwise
 
         """
         os_modes = {"rb": os.O_RDONLY, "wb": os.O_RDWR | os.O_CREAT, "rb+": os.O_RDWR}
         if mode not in os_modes:
             raise ValueError("invalid file open mode: " + mode)
         new_map = mode == "wb"
         writable_map = mode in ["wb", "rb+"]
 
         self.record_size = record_size
         self.fd = os.open(fname, os_modes[mode])
         if new_map:
             if length is None:
                 raise ValueError("missing length when creating new map")
             os.truncate(self.fd, length * self.record_size)
 
         self.size = os.path.getsize(fname)
         (self.length, remainder) = divmod(self.size, record_size)
         if remainder:
             raise ValueError(
                 "map size {} is not a multiple of the record size {}".format(
                     self.size, record_size
                 )
             )
 
         self.mm = mmap.mmap(
             self.fd,
             self.size,
             prot=(PROT_READ | PROT_WRITE if writable_map else PROT_READ),
             flags=MAP_SHARED,
         )
 
     def close(self) -> None:
         """close the map
 
         shuts down both the mmap and the underlying file descriptor
 
         """
         if not self.mm.closed:
             self.mm.close()
         os.close(self.fd)
 
     def __len__(self) -> int:
         return self.length
 
     def __delitem__(self, pos: int) -> None:
         raise NotImplementedError("cannot delete records from fixed-size map")
 
 
 class PidToNodeMap(_OnDiskMap, MutableMapping):
     """memory mapped map from :ref:`SWHIDs <persistent-identifiers>` to a
     continuous range 0..N of (8-byte long) integers
 
     This is the converse mapping of :class:`NodeToPidMap`.
 
     The on-disk serialization format is a sequence of fixed length (30 bytes)
     records with the following fields:
 
     - PID (22 bytes): binary PID representation as per :func:`str_to_bytes`
     - long (8 bytes): big endian long integer
 
     The records are sorted lexicographically by PID type and checksum, where
     type is the integer value of :class:`PidType`. PID lookup in the map is
     performed via binary search.  Hence a huge map with, say, 11 B entries,
     will require ~30 disk seeks.
 
     Note that, due to fixed size + ordering, it is not possible to create these
     maps by random writing. Hence, __setitem__ can be used only to *update* the
     value associated to an existing key, rather than to add a missing item. To
     create an entire map from scratch, you should do so *sequentially*, using
     static method :meth:`write_record` (or, at your own risk, by hand via the
     mmap :attr:`mm`).
 
     """
 
     # record binary format: PID + a big endian 8-byte big endian integer
     RECORD_BIN_FMT = ">" + PID_BIN_FMT + "q"
     RECORD_SIZE = PID_BIN_SIZE + INT_BIN_SIZE
 
     def __init__(self, fname: str, mode: str = "rb", length: int = None):
         """open an existing on-disk map
 
         Args:
             fname: path to the on-disk map
             mode: file open mode, usually either 'rb' for read-only maps, 'wb'
                 for creating new maps, or 'rb+' for updating existing ones
                 (default: 'rb')
             length: map size in number of logical records; used to initialize
                 read-write maps at creation time. Must be given when mode is
                 'wb'; ignored otherwise
 
         """
         super().__init__(self.RECORD_SIZE, fname, mode=mode, length=length)
 
     def _get_bin_record(self, pos: int) -> Tuple[bytes, bytes]:
         """seek and return the (binary) record at a given (logical) position
 
         see :func:`_get_record` for an equivalent function with additional
         deserialization
 
         Args:
             pos: 0-based record number
 
         Returns:
             a pair `(pid, int)`, where pid and int are bytes
 
         """
         rec_pos = pos * self.RECORD_SIZE
         int_pos = rec_pos + PID_BIN_SIZE
 
         return (self.mm[rec_pos:int_pos], self.mm[int_pos : int_pos + INT_BIN_SIZE])
 
     def _get_record(self, pos: int) -> Tuple[str, int]:
         """seek and return the record at a given (logical) position
 
         moral equivalent of :func:`_get_bin_record`, with additional
         deserialization to non-bytes types
 
         Args:
             pos: 0-based record number
 
         Returns:
             a pair `(pid, int)`, where pid is a string-based PID and int the
             corresponding integer identifier
 
         """
         (pid_bytes, int_bytes) = self._get_bin_record(pos)
         return (bytes_to_str(pid_bytes), struct.unpack(INT_BIN_FMT, int_bytes)[0])
 
     @classmethod
     def write_record(cls, f: BinaryIO, pid: str, int: int) -> None:
         """write a logical record to a file-like object
 
         Args:
             f: file-like object to write the record to
             pid: textual PID
             int: PID integer identifier
 
         """
         f.write(str_to_bytes(pid))
         f.write(struct.pack(INT_BIN_FMT, int))
 
     def _bisect_pos(self, pid_str: str) -> int:
         """bisect the position of the given identifier. If the identifier is
         not found, the position of the pid immediately after is returned.
 
         Args:
             pid_str: the pid as a string
 
         Returns:
             the logical record of the bisected position in the map
 
         """
         if not isinstance(pid_str, str):
             raise TypeError("PID must be a str, not {}".format(type(pid_str)))
         try:
             target = str_to_bytes(pid_str)  # desired PID as bytes
         except ValueError:
             raise ValueError('invalid PID: "{}"'.format(pid_str))
 
         lo = 0
         hi = self.length - 1
         while lo < hi:
             mid = (lo + hi) // 2
             (pid, _value) = self._get_bin_record(mid)
             if pid < target:
                 lo = mid + 1
             else:
                 hi = mid
         return lo
 
     def _find(self, pid_str: str) -> Tuple[int, int]:
         """lookup the integer identifier of a pid and its position
 
         Args:
             pid_str: the pid as a string
 
         Returns:
             a pair `(pid, pos)` with pid integer identifier and its logical
             record position in the map
 
         """
         pos = self._bisect_pos(pid_str)
         pid_found, value = self._get_record(pos)
         if pid_found == pid_str:
             return (value, pos)
         raise KeyError(pid_str)
 
     def __getitem__(self, pid_str: str) -> int:
         """lookup the integer identifier of a PID
 
         Args:
             pid: the PID as a string
 
         Returns:
             the integer identifier of pid
 
         """
         return self._find(pid_str)[0]  # return element, ignore position
 
     def __setitem__(self, pid_str: str, int: str) -> None:
         (_pid, pos) = self._find(pid_str)  # might raise KeyError and that's OK
 
         rec_pos = pos * self.RECORD_SIZE
         int_pos = rec_pos + PID_BIN_SIZE
         self.mm[rec_pos:int_pos] = str_to_bytes(pid_str)
         self.mm[int_pos : int_pos + INT_BIN_SIZE] = struct.pack(INT_BIN_FMT, int)
 
     def __iter__(self) -> Iterator[Tuple[str, int]]:
         for pos in range(self.length):
             yield self._get_record(pos)
 
     def iter_prefix(self, prefix: str):
         swh, n, t, sha = prefix.split(":")
         sha = sha.ljust(40, "0")
         start_pid = ":".join([swh, n, t, sha])
         start = self._bisect_pos(start_pid)
         for pos in range(start, self.length):
             pid, value = self._get_record(pos)
             if not pid.startswith(prefix):
                 break
             yield pid, value
 
     def iter_type(self, pid_type: str) -> Iterator[Tuple[str, int]]:
         prefix = "swh:1:{}:".format(pid_type)
         yield from self.iter_prefix(prefix)
 
 
 class NodeToPidMap(_OnDiskMap, MutableMapping):
     """memory mapped map from a continuous range of 0..N (8-byte long) integers to
     :ref:`SWHIDs <persistent-identifiers>`
 
     This is the converse mapping of :class:`PidToNodeMap`.
 
     The on-disk serialization format is a sequence of fixed length records (22
     bytes), each being the binary representation of a PID as per
     :func:`str_to_bytes`.
 
     The records are sorted by long integer, so that integer lookup is possible
     via fixed-offset seek.
 
     """
 
     RECORD_BIN_FMT = PID_BIN_FMT
     RECORD_SIZE = PID_BIN_SIZE
 
     def __init__(self, fname: str, mode: str = "rb", length: int = None):
         """open an existing on-disk map
 
         Args:
             fname: path to the on-disk map
             mode: file open mode, usually either 'rb' for read-only maps, 'wb'
                 for creating new maps, or 'rb+' for updating existing ones
                 (default: 'rb')
             size: map size in number of logical records; used to initialize
                 read-write maps at creation time. Must be given when mode is
                 'wb'; ignored otherwise
             length: passed to :class:`_OnDiskMap`
 
         """
 
         super().__init__(self.RECORD_SIZE, fname, mode=mode, length=length)
 
     def _get_bin_record(self, pos: int) -> bytes:
         """seek and return the (binary) PID at a given (logical) position
 
         Args:
             pos: 0-based record number
 
         Returns:
             PID as a byte sequence
 
         """
         rec_pos = pos * self.RECORD_SIZE
 
         return self.mm[rec_pos : rec_pos + self.RECORD_SIZE]
 
     @classmethod
     def write_record(cls, f: BinaryIO, pid: str) -> None:
         """write a PID to a file-like object
 
         Args:
             f: file-like object to write the record to
             pid: textual PID
 
         """
         f.write(str_to_bytes(pid))
 
     def __getitem__(self, pos: int) -> str:
         orig_pos = pos
         if pos < 0:
             pos = len(self) + pos
         if not (0 <= pos < len(self)):
             raise IndexError(orig_pos)
 
         return bytes_to_str(self._get_bin_record(pos))
 
     def __setitem__(self, pos: int, pid: str) -> None:
         rec_pos = pos * self.RECORD_SIZE
         self.mm[rec_pos : rec_pos + self.RECORD_SIZE] = str_to_bytes(pid)
 
     def __iter__(self) -> Iterator[Tuple[int, str]]:
         for pos in range(self.length):
             yield (pos, self[pos])
diff --git a/swh/graph/server/app.py b/swh/graph/server/app.py
index acca30d..a45fad9 100644
--- a/swh/graph/server/app.py
+++ b/swh/graph/server/app.py
@@ -1,312 +1,313 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 """
 A proxy HTTP server for swh-graph, talking to the Java code via py4j, and using
 FIFO as a transport to stream integers between the two languages.
 """
 
 import asyncio
-import json
-import aiohttp.web
 from collections import deque
+import json
 from typing import Optional
 
+import aiohttp.web
+
 from swh.core.api.asynchronous import RPCServerApp
-from swh.model.identifiers import PID_TYPES
 from swh.model.exceptions import ValidationError
+from swh.model.identifiers import PID_TYPES
 
 try:
     from contextlib import asynccontextmanager
 except ImportError:
     # Compatibility with 3.6 backport
     from async_generator import asynccontextmanager  # type: ignore
 
 
 # maximum number of retries for random walks
 RANDOM_RETRIES = 5  # TODO make this configurable via rpc-serve configuration
 
 
 async def index(request):
     return aiohttp.web.Response(
         content_type="text/html",
         body="""<html>
 <head><title>Software Heritage storage server</title></head>
 <body>
 <p>You have reached the <a href="https://www.softwareheritage.org/">
 Software Heritage</a> graph API server.</p>
 
 <p>See its
 <a href="https://docs.softwareheritage.org/devel/swh-graph/api.html">API
 documentation</a> for more information.</p>
 </body>
 </html>""",
     )
 
 
 class GraphView(aiohttp.web.View):
     """Base class for views working on the graph, with utility functions"""
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.backend = self.request.app["backend"]
 
     def node_of_pid(self, pid):
         """Lookup a PID in a pid2node map, failing in an HTTP-nice way if needed."""
         try:
             return self.backend.pid2node[pid]
         except KeyError:
             raise aiohttp.web.HTTPNotFound(body=f"PID not found: {pid}")
         except ValidationError:
             raise aiohttp.web.HTTPBadRequest(body=f"malformed PID: {pid}")
 
     def pid_of_node(self, node):
         """Lookup a node in a node2pid map, failing in an HTTP-nice way if needed."""
         try:
             return self.backend.node2pid[node]
         except KeyError:
             raise aiohttp.web.HTTPInternalServerError(
                 body=f"reverse lookup failed for node id: {node}"
             )
 
     def get_direction(self):
         """Validate HTTP query parameter `direction`"""
         s = self.request.query.get("direction", "forward")
         if s not in ("forward", "backward"):
             raise aiohttp.web.HTTPBadRequest(body=f"invalid direction: {s}")
         return s
 
     def get_edges(self):
         """Validate HTTP query parameter `edges`, i.e., edge restrictions"""
         s = self.request.query.get("edges", "*")
         if any(
             [
                 node_type != "*" and node_type not in PID_TYPES
                 for edge in s.split(":")
                 for node_type in edge.split(",", maxsplit=1)
             ]
         ):
             raise aiohttp.web.HTTPBadRequest(body=f"invalid edge restriction: {s}")
         return s
 
     def get_traversal(self):
         """Validate HTTP query parameter `traversal`, i.e., visit order"""
         s = self.request.query.get("traversal", "dfs")
         if s not in ("bfs", "dfs"):
             raise aiohttp.web.HTTPBadRequest(body=f"invalid traversal order: {s}")
         return s
 
     def get_limit(self):
         """Validate HTTP query parameter `limit`, i.e., number of results"""
         s = self.request.query.get("limit", "0")
         try:
             return int(s)
         except ValueError:
             raise aiohttp.web.HTTPBadRequest(body=f"invalid limit value: {s}")
 
 
 class StreamingGraphView(GraphView):
     """Base class for views streaming their response line by line."""
 
     content_type = "text/plain"
 
     @asynccontextmanager
     async def response_streamer(self, *args, **kwargs):
         """Context manager to prepare then close a StreamResponse"""
         response = aiohttp.web.StreamResponse(*args, **kwargs)
         response.content_type = self.content_type
         await response.prepare(self.request)
         yield response
         await response.write_eof()
 
     async def get(self):
         await self.prepare_response()
         async with self.response_streamer() as self.response_stream:
             await self.stream_response()
             return self.response_stream
 
     async def prepare_response(self):
         """This can be overridden with some setup to be run before the response
         actually starts streaming.
         """
         pass
 
     async def stream_response(self):
         """Override this to perform the response streaming. Implementations of
         this should await self.stream_line(line) to write each line.
         """
         raise NotImplementedError
 
     async def stream_line(self, line):
         """Write a line in the response stream."""
         await self.response_stream.write((line + "\n").encode())
 
 
 class StatsView(GraphView):
     """View showing some statistics on the graph"""
 
     async def get(self):
         stats = self.backend.stats()
         return aiohttp.web.Response(body=stats, content_type="application/json")
 
 
 class SimpleTraversalView(StreamingGraphView):
     """Base class for views of simple traversals"""
 
     simple_traversal_type: Optional[str] = None
 
     async def prepare_response(self):
         src = self.request.match_info["src"]
         self.src_node = self.node_of_pid(src)
 
         self.edges = self.get_edges()
         self.direction = self.get_direction()
 
     async def stream_response(self):
         async for res_node in self.backend.simple_traversal(
             self.simple_traversal_type, self.direction, self.edges, self.src_node
         ):
             res_pid = self.pid_of_node(res_node)
             await self.stream_line(res_pid)
 
 
 class LeavesView(SimpleTraversalView):
     simple_traversal_type = "leaves"
 
 
 class NeighborsView(SimpleTraversalView):
     simple_traversal_type = "neighbors"
 
 
 class VisitNodesView(SimpleTraversalView):
     simple_traversal_type = "visit_nodes"
 
 
 class WalkView(StreamingGraphView):
     async def prepare_response(self):
         src = self.request.match_info["src"]
         dst = self.request.match_info["dst"]
         self.src_node = self.node_of_pid(src)
         if dst not in PID_TYPES:
             self.dst_thing = self.node_of_pid(dst)
         else:
             self.dst_thing = dst
 
         self.edges = self.get_edges()
         self.direction = self.get_direction()
         self.algo = self.get_traversal()
         self.limit = self.get_limit()
 
     async def get_walk_iterator(self):
         return self.backend.walk(
             self.direction, self.edges, self.algo, self.src_node, self.dst_thing
         )
 
     async def stream_response(self):
         it = self.get_walk_iterator()
         if self.limit < 0:
             queue = deque(maxlen=-self.limit)
             async for res_node in it:
                 res_pid = self.pid_of_node(res_node)
                 queue.append(res_pid)
             while queue:
                 await self.stream_line(queue.popleft())
         else:
             count = 0
             async for res_node in it:
                 if self.limit == 0 or count < self.limit:
                     res_pid = self.pid_of_node(res_node)
                     await self.stream_line(res_pid)
                     count += 1
                 else:
                     break
 
 
 class RandomWalkView(WalkView):
     def get_walk_iterator(self):
         return self.backend.random_walk(
             self.direction, self.edges, RANDOM_RETRIES, self.src_node, self.dst_thing
         )
 
 
 class VisitEdgesView(SimpleTraversalView):
     async def stream_response(self):
         it = self.backend.visit_edges(self.direction, self.edges, self.src_node)
         async for (res_src, res_dst) in it:
             res_src_pid = self.pid_of_node(res_src)
             res_dst_pid = self.pid_of_node(res_dst)
             await self.stream_line("{} {}".format(res_src_pid, res_dst_pid))
 
 
 class VisitPathsView(SimpleTraversalView):
     content_type = "application/x-ndjson"
 
     async def stream_response(self):
         it = self.backend.visit_paths(self.direction, self.edges, self.src_node)
         async for res_path in it:
             res_path_pid = [self.pid_of_node(n) for n in res_path]
             line = json.dumps(res_path_pid)
             await self.stream_line(line)
 
 
 class CountView(GraphView):
     """Base class for counting views."""
 
     count_type: Optional[str] = None
 
     async def get(self):
         src = self.request.match_info["src"]
         self.src_node = self.node_of_pid(src)
 
         self.edges = self.get_edges()
         self.direction = self.get_direction()
 
         loop = asyncio.get_event_loop()
         cnt = await loop.run_in_executor(
             None,
             self.backend.count,
             self.count_type,
             self.direction,
             self.edges,
             self.src_node,
         )
         return aiohttp.web.Response(body=str(cnt), content_type="application/json")
 
 
 class CountNeighborsView(CountView):
     count_type = "neighbors"
 
 
 class CountLeavesView(CountView):
     count_type = "leaves"
 
 
 class CountVisitNodesView(CountView):
     count_type = "visit_nodes"
 
 
 def make_app(backend, **kwargs):
     app = RPCServerApp(**kwargs)
     app.add_routes(
         [
             aiohttp.web.get("/", index),
             aiohttp.web.get("/graph", index),
             aiohttp.web.view("/graph/stats", StatsView),
             aiohttp.web.view("/graph/leaves/{src}", LeavesView),
             aiohttp.web.view("/graph/neighbors/{src}", NeighborsView),
             aiohttp.web.view("/graph/visit/nodes/{src}", VisitNodesView),
             aiohttp.web.view("/graph/visit/edges/{src}", VisitEdgesView),
             aiohttp.web.view("/graph/visit/paths/{src}", VisitPathsView),
             # temporarily disabled in wait of a proper fix for T1969
             # aiohttp.web.view("/graph/walk/{src}/{dst}", WalkView)
             aiohttp.web.view("/graph/randomwalk/{src}/{dst}", RandomWalkView),
             aiohttp.web.view("/graph/neighbors/count/{src}", CountNeighborsView),
             aiohttp.web.view("/graph/leaves/count/{src}", CountLeavesView),
             aiohttp.web.view("/graph/visit/nodes/count/{src}", CountVisitNodesView),
         ]
     )
 
     app["backend"] = backend
     return app
diff --git a/swh/graph/tests/conftest.py b/swh/graph/tests/conftest.py
index 442e32c..497062e 100644
--- a/swh/graph/tests/conftest.py
+++ b/swh/graph/tests/conftest.py
@@ -1,51 +1,51 @@
 import multiprocessing
-import pytest
-
-from aiohttp.test_utils import TestServer, TestClient, loop_context
 from pathlib import Path
 
-from swh.graph.graph import load as graph_load
-from swh.graph.client import RemoteGraphClient
+from aiohttp.test_utils import TestClient, TestServer, loop_context
+import pytest
+
 from swh.graph.backend import Backend
+from swh.graph.client import RemoteGraphClient
+from swh.graph.graph import load as graph_load
 from swh.graph.server.app import make_app
 
 SWH_GRAPH_TESTS_ROOT = Path(__file__).parents[0]
 TEST_GRAPH_PATH = SWH_GRAPH_TESTS_ROOT / "dataset/output/example"
 
 
 class GraphServerProcess(multiprocessing.Process):
     def __init__(self, q, *args, **kwargs):
         self.q = q
         super().__init__(*args, **kwargs)
 
     def run(self):
         try:
             backend = Backend(graph_path=str(TEST_GRAPH_PATH))
             with backend:
                 with loop_context() as loop:
                     app = make_app(backend=backend, debug=True)
                     client = TestClient(TestServer(app), loop=loop)
                     loop.run_until_complete(client.start_server())
                     url = client.make_url("/graph/")
                     self.q.put(url)
                     loop.run_forever()
         except Exception as e:
             self.q.put(e)
 
 
 @pytest.fixture(scope="module")
 def graph_client():
     queue = multiprocessing.Queue()
     server = GraphServerProcess(queue)
     server.start()
     res = queue.get()
     if isinstance(res, Exception):
         raise res
     yield RemoteGraphClient(str(res))
     server.terminate()
 
 
 @pytest.fixture(scope="module")
 def graph():
     with graph_load(str(TEST_GRAPH_PATH)) as g:
         yield g
diff --git a/swh/graph/tests/test_api_client.py b/swh/graph/tests/test_api_client.py
index 96279d0..4bed140 100644
--- a/swh/graph/tests/test_api_client.py
+++ b/swh/graph/tests/test_api_client.py
@@ -1,305 +1,303 @@
 import pytest
-
-
 from pytest import raises
 
 from swh.core.api import RemoteException
 
 
 def test_stats(graph_client):
     stats = graph_client.stats()
 
     assert set(stats.keys()) == {"counts", "ratios", "indegree", "outdegree"}
 
     assert set(stats["counts"].keys()) == {"nodes", "edges"}
     assert set(stats["ratios"].keys()) == {
         "compression",
         "bits_per_node",
         "bits_per_edge",
         "avg_locality",
     }
     assert set(stats["indegree"].keys()) == {"min", "max", "avg"}
     assert set(stats["outdegree"].keys()) == {"min", "max", "avg"}
 
     assert stats["counts"]["nodes"] == 21
     assert stats["counts"]["edges"] == 23
     assert isinstance(stats["ratios"]["compression"], float)
     assert isinstance(stats["ratios"]["bits_per_node"], float)
     assert isinstance(stats["ratios"]["bits_per_edge"], float)
     assert isinstance(stats["ratios"]["avg_locality"], float)
     assert stats["indegree"]["min"] == 0
     assert stats["indegree"]["max"] == 3
     assert isinstance(stats["indegree"]["avg"], float)
     assert stats["outdegree"]["min"] == 0
     assert stats["outdegree"]["max"] == 3
     assert isinstance(stats["outdegree"]["avg"], float)
 
 
 def test_leaves(graph_client):
     actual = list(
         graph_client.leaves("swh:1:ori:0000000000000000000000000000000000000021")
     )
     expected = [
         "swh:1:cnt:0000000000000000000000000000000000000001",
         "swh:1:cnt:0000000000000000000000000000000000000004",
         "swh:1:cnt:0000000000000000000000000000000000000005",
         "swh:1:cnt:0000000000000000000000000000000000000007",
     ]
     assert set(actual) == set(expected)
 
 
 def test_neighbors(graph_client):
     actual = list(
         graph_client.neighbors(
             "swh:1:rev:0000000000000000000000000000000000000009", direction="backward"
         )
     )
     expected = [
         "swh:1:snp:0000000000000000000000000000000000000020",
         "swh:1:rel:0000000000000000000000000000000000000010",
         "swh:1:rev:0000000000000000000000000000000000000013",
     ]
     assert set(actual) == set(expected)
 
 
 def test_visit_nodes(graph_client):
     actual = list(
         graph_client.visit_nodes(
             "swh:1:rel:0000000000000000000000000000000000000010",
             edges="rel:rev,rev:rev",
         )
     )
     expected = [
         "swh:1:rel:0000000000000000000000000000000000000010",
         "swh:1:rev:0000000000000000000000000000000000000009",
         "swh:1:rev:0000000000000000000000000000000000000003",
     ]
     assert set(actual) == set(expected)
 
 
 def test_visit_edges(graph_client):
     actual = list(
         graph_client.visit_edges(
             "swh:1:rel:0000000000000000000000000000000000000010",
             edges="rel:rev,rev:rev,rev:dir",
         )
     )
     expected = [
         (
             "swh:1:rel:0000000000000000000000000000000000000010",
             "swh:1:rev:0000000000000000000000000000000000000009",
         ),
         (
             "swh:1:rev:0000000000000000000000000000000000000009",
             "swh:1:rev:0000000000000000000000000000000000000003",
         ),
         (
             "swh:1:rev:0000000000000000000000000000000000000009",
             "swh:1:dir:0000000000000000000000000000000000000008",
         ),
         (
             "swh:1:rev:0000000000000000000000000000000000000003",
             "swh:1:dir:0000000000000000000000000000000000000002",
         ),
     ]
     assert set(actual) == set(expected)
 
 
 def test_visit_edges_diamond_pattern(graph_client):
     actual = list(
         graph_client.visit_edges(
             "swh:1:rev:0000000000000000000000000000000000000009", edges="*",
         )
     )
     expected = [
         (
             "swh:1:rev:0000000000000000000000000000000000000009",
             "swh:1:rev:0000000000000000000000000000000000000003",
         ),
         (
             "swh:1:rev:0000000000000000000000000000000000000009",
             "swh:1:dir:0000000000000000000000000000000000000008",
         ),
         (
             "swh:1:rev:0000000000000000000000000000000000000003",
             "swh:1:dir:0000000000000000000000000000000000000002",
         ),
         (
             "swh:1:dir:0000000000000000000000000000000000000002",
             "swh:1:cnt:0000000000000000000000000000000000000001",
         ),
         (
             "swh:1:dir:0000000000000000000000000000000000000008",
             "swh:1:cnt:0000000000000000000000000000000000000001",
         ),
         (
             "swh:1:dir:0000000000000000000000000000000000000008",
             "swh:1:cnt:0000000000000000000000000000000000000007",
         ),
         (
             "swh:1:dir:0000000000000000000000000000000000000008",
             "swh:1:dir:0000000000000000000000000000000000000006",
         ),
         (
             "swh:1:dir:0000000000000000000000000000000000000006",
             "swh:1:cnt:0000000000000000000000000000000000000004",
         ),
         (
             "swh:1:dir:0000000000000000000000000000000000000006",
             "swh:1:cnt:0000000000000000000000000000000000000005",
         ),
     ]
     assert set(actual) == set(expected)
 
 
 def test_visit_paths(graph_client):
     actual = list(
         graph_client.visit_paths(
             "swh:1:snp:0000000000000000000000000000000000000020", edges="snp:*,rev:*"
         )
     )
     actual = [tuple(path) for path in actual]
     expected = [
         (
             "swh:1:snp:0000000000000000000000000000000000000020",
             "swh:1:rev:0000000000000000000000000000000000000009",
             "swh:1:rev:0000000000000000000000000000000000000003",
             "swh:1:dir:0000000000000000000000000000000000000002",
         ),
         (
             "swh:1:snp:0000000000000000000000000000000000000020",
             "swh:1:rev:0000000000000000000000000000000000000009",
             "swh:1:dir:0000000000000000000000000000000000000008",
         ),
         (
             "swh:1:snp:0000000000000000000000000000000000000020",
             "swh:1:rel:0000000000000000000000000000000000000010",
         ),
     ]
     assert set(actual) == set(expected)
 
 
 @pytest.mark.skip(reason="currently disabled due to T1969")
 def test_walk(graph_client):
     args = ("swh:1:dir:0000000000000000000000000000000000000016", "rel")
     kwargs = {
         "edges": "dir:dir,dir:rev,rev:*",
         "direction": "backward",
         "traversal": "bfs",
     }
 
     actual = list(graph_client.walk(*args, **kwargs))
     expected = [
         "swh:1:dir:0000000000000000000000000000000000000016",
         "swh:1:dir:0000000000000000000000000000000000000017",
         "swh:1:rev:0000000000000000000000000000000000000018",
         "swh:1:rel:0000000000000000000000000000000000000019",
     ]
     assert set(actual) == set(expected)
 
     kwargs2 = kwargs.copy()
     kwargs2["limit"] = -1
     actual = list(graph_client.walk(*args, **kwargs2))
     expected = ["swh:1:rel:0000000000000000000000000000000000000019"]
     assert set(actual) == set(expected)
 
     kwargs2 = kwargs.copy()
     kwargs2["limit"] = 2
     actual = list(graph_client.walk(*args, **kwargs2))
     expected = [
         "swh:1:dir:0000000000000000000000000000000000000016",
         "swh:1:dir:0000000000000000000000000000000000000017",
     ]
     assert set(actual) == set(expected)
 
 
 def test_random_walk(graph_client):
     """as the walk is random, we test a visit from a cnt node to the only origin in
     the dataset, and only check the final node of the path (i.e., the origin)
 
     """
     args = ("swh:1:cnt:0000000000000000000000000000000000000001", "ori")
     kwargs = {"direction": "backward"}
     expected_root = "swh:1:ori:0000000000000000000000000000000000000021"
 
     actual = list(graph_client.random_walk(*args, **kwargs))
     assert len(actual) > 1  # no origin directly links to a content
     assert actual[0] == args[0]
     assert actual[-1] == expected_root
 
     kwargs2 = kwargs.copy()
     kwargs2["limit"] = -1
     actual = list(graph_client.random_walk(*args, **kwargs2))
     assert actual == [expected_root]
 
     kwargs2["limit"] = -2
     actual = list(graph_client.random_walk(*args, **kwargs2))
     assert len(actual) == 2
     assert actual[-1] == expected_root
 
     kwargs2["limit"] = 3
     actual = list(graph_client.random_walk(*args, **kwargs2))
     assert len(actual) == 3
 
 
 def test_count(graph_client):
     actual = graph_client.count_leaves(
         "swh:1:ori:0000000000000000000000000000000000000021"
     )
     assert actual == 4
     actual = graph_client.count_visit_nodes(
         "swh:1:rel:0000000000000000000000000000000000000010", edges="rel:rev,rev:rev"
     )
     assert actual == 3
     actual = graph_client.count_neighbors(
         "swh:1:rev:0000000000000000000000000000000000000009", direction="backward"
     )
     assert actual == 3
 
 
 def test_param_validation(graph_client):
     with raises(RemoteException) as exc_info:  # PID not found
         list(graph_client.leaves("swh:1:ori:fff0000000000000000000000000000000000021"))
     assert exc_info.value.response.status_code == 404
 
     with raises(RemoteException) as exc_info:  # malformed PID
         list(
             graph_client.neighbors("swh:1:ori:fff000000zzzzzz0000000000000000000000021")
         )
     assert exc_info.value.response.status_code == 400
 
     with raises(RemoteException) as exc_info:  # malformed edge specificaiton
         list(
             graph_client.visit_nodes(
                 "swh:1:dir:0000000000000000000000000000000000000016",
                 edges="dir:notanodetype,dir:rev,rev:*",
                 direction="backward",
             )
         )
     assert exc_info.value.response.status_code == 400
 
     with raises(RemoteException) as exc_info:  # malformed direction
         list(
             graph_client.visit_nodes(
                 "swh:1:dir:0000000000000000000000000000000000000016",
                 edges="dir:dir,dir:rev,rev:*",
                 direction="notadirection",
             )
         )
     assert exc_info.value.response.status_code == 400
 
 
 @pytest.mark.skip(reason="currently disabled due to T1969")
 def test_param_validation_walk(graph_client):
     """test validation of walk-specific parameters only"""
     with raises(RemoteException) as exc_info:  # malformed traversal order
         list(
             graph_client.walk(
                 "swh:1:dir:0000000000000000000000000000000000000016",
                 "rel",
                 edges="dir:dir,dir:rev,rev:*",
                 direction="backward",
                 traversal="notatraversalorder",
             )
         )
     assert exc_info.value.response.status_code == 400
diff --git a/swh/graph/tests/test_cli.py b/swh/graph/tests/test_cli.py
index 009a23d..4eaa389 100644
--- a/swh/graph/tests/test_cli.py
+++ b/swh/graph/tests/test_cli.py
@@ -1,46 +1,45 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from typing import Dict
 
 from click.testing import CliRunner
 
 from swh.graph.cli import cli
 
-
 DATA_DIR = Path(__file__).parents[0] / "dataset"
 
 
 def read_properties(properties_fname) -> Dict[str, str]:
     """read a Java .properties file"""
     with open(properties_fname) as f:
         keyvalues = (
             line.split("=", maxsplit=1)
             for line in f
             if not line.strip().startswith("#")
         )
         return dict((k.strip(), v.strip()) for (k, v) in keyvalues)
 
 
 def test_pipeline():
     """run full compression pipeline"""
     # bare bone configuration, to allow testing the compression pipeline
     # with minimum RAM requirements on trivial graphs
     config = {"graph": {"compress": {"batch_size": 1000}}}
     runner = CliRunner()
 
     with TemporaryDirectory(suffix=".swh-graph-test") as tmpdir:
         result = runner.invoke(
             cli,
             ["compress", "--graph", DATA_DIR / "example", "--outdir", tmpdir],
             obj={"config": config},
         )
         assert result.exit_code == 0, result
         properties = read_properties(Path(tmpdir) / "example.properties")
 
     assert int(properties["nodes"]) == 21
     assert int(properties["arcs"]) == 23
diff --git a/swh/graph/tests/test_pid.py b/swh/graph/tests/test_pid.py
index 110c61e..f937729 100644
--- a/swh/graph/tests/test_pid.py
+++ b/swh/graph/tests/test_pid.py
@@ -1,202 +1,200 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+from itertools import islice
 import os
 import shutil
 import tempfile
 import unittest
 
-from itertools import islice
-
-from swh.graph.pid import str_to_bytes, bytes_to_str
-from swh.graph.pid import PidToNodeMap, NodeToPidMap
+from swh.graph.pid import NodeToPidMap, PidToNodeMap, bytes_to_str, str_to_bytes
 from swh.model.identifiers import PID_TYPES
 
 
 class TestPidSerialization(unittest.TestCase):
 
     pairs = [
         (
             "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
             bytes.fromhex("01" + "00" + "94a9ed024d3859793618152ea559a168bbcbb5e2"),
         ),
         (
             "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505",
             bytes.fromhex("01" + "01" + "d198bc9d7a6bcf6db04f476d29314f157507d505"),
         ),
         (
             "swh:1:ori:b63a575fe3faab7692c9f38fb09d4bb45651bb0f",
             bytes.fromhex("01" + "02" + "b63a575fe3faab7692c9f38fb09d4bb45651bb0f"),
         ),
         (
             "swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f",
             bytes.fromhex("01" + "03" + "22ece559cc7cc2364edc5e5593d63ae8bd229f9f"),
         ),
         (
             "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d",
             bytes.fromhex("01" + "04" + "309cf2674ee7a0749978cf8265ab91a60aea0f7d"),
         ),
         (
             "swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453",
             bytes.fromhex("01" + "05" + "c7c108084bc0bf3d81436bf980b46e98bd338453"),
         ),
     ]
 
     def test_str_to_bytes(self):
         for (pid_str, pid_bytes) in self.pairs:
             self.assertEqual(str_to_bytes(pid_str), pid_bytes)
 
     def test_bytes_to_str(self):
         for (pid_str, pid_bytes) in self.pairs:
             self.assertEqual(bytes_to_str(pid_bytes), pid_str)
 
     def test_round_trip(self):
         for (pid_str, pid_bytes) in self.pairs:
             self.assertEqual(pid_str, bytes_to_str(str_to_bytes(pid_str)))
             self.assertEqual(pid_bytes, str_to_bytes(bytes_to_str(pid_bytes)))
 
 
 def gen_records(types=["cnt", "dir", "ori", "rel", "rev", "snp"], length=10000):
     """generate sequential PID/int records, suitable for filling int<->pid maps for
     testing swh-graph on-disk binary databases
 
     Args:
         types (list): list of PID types to be generated, specified as the
             corresponding 3-letter component in PIDs
         length (int): number of PIDs to generate *per type*
 
     Yields:
         pairs (pid, int) where pid is a textual PID and int its sequential
         integer identifier
 
     """
     pos = 0
     for t in sorted(types):
         for i in range(0, length):
             seq = format(pos, "x")  # current position as hex string
             pid = "swh:1:{}:{}{}".format(t, "0" * (40 - len(seq)), seq)
             yield (pid, pos)
             pos += 1
 
 
 # pairs PID/position in the sequence generated by :func:`gen_records` above
 MAP_PAIRS = [
     ("swh:1:cnt:0000000000000000000000000000000000000000", 0),
     ("swh:1:cnt:000000000000000000000000000000000000002a", 42),
     ("swh:1:dir:0000000000000000000000000000000000002afc", 11004),
     ("swh:1:ori:00000000000000000000000000000000000056ce", 22222),
     ("swh:1:rel:0000000000000000000000000000000000008235", 33333),
     ("swh:1:rev:000000000000000000000000000000000000ad9c", 44444),
     ("swh:1:snp:000000000000000000000000000000000000ea5f", 59999),
 ]
 
 
 class TestPidToNodeMap(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """create reasonably sized (~2 MB) PID->int map to test on-disk DB
 
         """
         cls.tmpdir = tempfile.mkdtemp(prefix="swh.graph.test.")
         cls.fname = os.path.join(cls.tmpdir, "pid2int.bin")
         with open(cls.fname, "wb") as f:
             for (pid, i) in gen_records(length=10000):
                 PidToNodeMap.write_record(f, pid, i)
 
     @classmethod
     def tearDownClass(cls):
         shutil.rmtree(cls.tmpdir)
 
     def setUp(self):
         self.map = PidToNodeMap(self.fname)
 
     def tearDown(self):
         self.map.close()
 
     def test_lookup(self):
         for (pid, pos) in MAP_PAIRS:
             self.assertEqual(self.map[pid], pos)
 
     def test_missing(self):
         with self.assertRaises(KeyError):
             self.map["swh:1:ori:0101010100000000000000000000000000000000"],
         with self.assertRaises(KeyError):
             self.map["swh:1:cnt:0101010100000000000000000000000000000000"],
 
     def test_type_error(self):
         with self.assertRaises(TypeError):
             self.map[42]
         with self.assertRaises(TypeError):
             self.map[1.2]
 
     def test_update(self):
         fname2 = self.fname + ".update"
         shutil.copy(self.fname, fname2)  # fresh map copy
         map2 = PidToNodeMap(fname2, mode="rb+")
         for (pid, int) in islice(map2, 11):  # update the first N items
             new_int = int + 42
             map2[pid] = new_int
             self.assertEqual(map2[pid], new_int)  # check updated value
 
         os.unlink(fname2)  # tmpdir will be cleaned even if we don't reach this
 
     def test_iter_type(self):
         for t in PID_TYPES:
             first_20 = list(islice(self.map.iter_type(t), 20))
             k = first_20[0][1]
             expected = [("swh:1:{}:{:040x}".format(t, i), i) for i in range(k, k + 20)]
             assert first_20 == expected
 
     def test_iter_prefix(self):
         for t in PID_TYPES:
             prefix = self.map.iter_prefix("swh:1:{}:00".format(t))
             first_20 = list(islice(prefix, 20))
             k = first_20[0][1]
             expected = [("swh:1:{}:{:040x}".format(t, i), i) for i in range(k, k + 20)]
             assert first_20 == expected
 
 
 class TestNodeToPidMap(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """create reasonably sized (~1 MB) int->PID map to test on-disk DB
 
         """
         cls.tmpdir = tempfile.mkdtemp(prefix="swh.graph.test.")
         cls.fname = os.path.join(cls.tmpdir, "int2pid.bin")
         with open(cls.fname, "wb") as f:
             for (pid, _i) in gen_records(length=10000):
                 NodeToPidMap.write_record(f, pid)
 
     @classmethod
     def tearDownClass(cls):
         shutil.rmtree(cls.tmpdir)
 
     def setUp(self):
         self.map = NodeToPidMap(self.fname)
 
     def tearDown(self):
         self.map.close()
 
     def test_lookup(self):
         for (pid, pos) in MAP_PAIRS:
             self.assertEqual(self.map[pos], pid)
 
     def test_out_of_bounds(self):
         with self.assertRaises(IndexError):
             self.map[1000000]
         with self.assertRaises(IndexError):
             self.map[-1000000]
 
     def test_update(self):
         fname2 = self.fname + ".update"
         shutil.copy(self.fname, fname2)  # fresh map copy
         map2 = NodeToPidMap(fname2, mode="rb+")
         for (int, pid) in islice(map2, 11):  # update the first N items
             new_pid = pid.replace(":0", ":f")  # mangle first hex digit
             map2[int] = new_pid
             self.assertEqual(map2[int], new_pid)  # check updated value
 
         os.unlink(fname2)  # tmpdir will be cleaned even if we don't reach this
diff --git a/swh/graph/webgraph.py b/swh/graph/webgraph.py
index c27aaac..6390014 100644
--- a/swh/graph/webgraph.py
+++ b/swh/graph/webgraph.py
@@ -1,226 +1,225 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 """WebGraph driver
 
 """
 
+from datetime import datetime
+from enum import Enum
 import logging
 import os
-import subprocess
-
-from enum import Enum
-from datetime import datetime
 from pathlib import Path
+import subprocess
 from typing import Dict, List, Set
 
 from swh.graph.config import check_config_compress
 
 
 class CompressionStep(Enum):
     MPH = 1
     BV = 2
     BV_OBL = 3
     BFS = 4
     PERMUTE = 5
     PERMUTE_OBL = 6
     STATS = 7
     TRANSPOSE = 8
     TRANSPOSE_OBL = 9
     MAPS = 10
     CLEAN_TMP = 11
 
     def __str__(self):
         return self.name
 
 
 # full compression pipeline
 COMP_SEQ = list(CompressionStep)
 
 # Mapping from compression steps to shell commands implementing them. Commands
 # will be executed by the shell, so be careful with meta characters. They are
 # specified here as lists of tokens that will be joined together only for ease
 # of line splitting. In commands, {tokens} will be interpolated with
 # configuration values, see :func:`compress`.
 STEP_ARGV: Dict[CompressionStep, List[str]] = {
     CompressionStep.MPH: [
         "{java}",
         "it.unimi.dsi.sux4j.mph.GOVMinimalPerfectHashFunction",
         "--temp-dir",
         "{tmp_dir}",
         "{out_dir}/{graph_name}.mph",
         "<( zstdcat {in_dir}/{graph_name}.nodes.csv.zst )",
     ],
     # use process substitution (and hence FIFO) above as MPH class load the
     # entire file in memory when reading from stdin
     CompressionStep.BV: [
         "zstdcat",
         "{in_dir}/{graph_name}.edges.csv.zst",
         "|",
         "{java}",
         "it.unimi.dsi.big.webgraph.ScatteredArcsASCIIGraph",
         "--temp-dir",
         "{tmp_dir}",
         "--function",
         "{out_dir}/{graph_name}.mph",
         "{out_dir}/{graph_name}-bv",
     ],
     CompressionStep.BV_OBL: [
         "{java}",
         "it.unimi.dsi.big.webgraph.BVGraph",
         "--list",
         "{out_dir}/{graph_name}-bv",
     ],
     CompressionStep.BFS: [
         "{java}",
         "it.unimi.dsi.law.big.graph.BFS",
         "{out_dir}/{graph_name}-bv",
         "{out_dir}/{graph_name}.order",
     ],
     CompressionStep.PERMUTE: [
         "{java}",
         "it.unimi.dsi.big.webgraph.Transform",
         "mapOffline",
         "{out_dir}/{graph_name}-bv",
         "{out_dir}/{graph_name}",
         "{out_dir}/{graph_name}.order",
         "{batch_size}",
         "{tmp_dir}",
     ],
     CompressionStep.PERMUTE_OBL: [
         "{java}",
         "it.unimi.dsi.big.webgraph.BVGraph",
         "--list",
         "{out_dir}/{graph_name}",
     ],
     CompressionStep.STATS: [
         "{java}",
         "it.unimi.dsi.big.webgraph.Stats",
         "{out_dir}/{graph_name}",
     ],
     CompressionStep.TRANSPOSE: [
         "{java}",
         "it.unimi.dsi.big.webgraph.Transform",
         "transposeOffline",
         "{out_dir}/{graph_name}",
         "{out_dir}/{graph_name}-transposed",
         "{batch_size}",
         "{tmp_dir}",
     ],
     CompressionStep.TRANSPOSE_OBL: [
         "{java}",
         "it.unimi.dsi.big.webgraph.BVGraph",
         "--list",
         "{out_dir}/{graph_name}-transposed",
     ],
     CompressionStep.MAPS: [
         "zstdcat",
         "{in_dir}/{graph_name}.nodes.csv.zst",
         "|",
         "{java}",
         "org.softwareheritage.graph.maps.NodeMapBuilder",
         "{out_dir}/{graph_name}",
         "{tmp_dir}",
     ],
     CompressionStep.CLEAN_TMP: [
         "rm",
         "-rf",
         "{out_dir}/{graph_name}-bv.graph",
         "{out_dir}/{graph_name}-bv.obl",
         "{out_dir}/{graph_name}-bv.offsets",
         "{tmp_dir}",
     ],
 }
 
 
 def do_step(step, conf):
     cmd = " ".join(STEP_ARGV[step]).format(**conf)
 
     cmd_env = os.environ.copy()
     cmd_env["JAVA_TOOL_OPTIONS"] = conf["java_tool_options"]
     cmd_env["CLASSPATH"] = conf["classpath"]
 
     logging.info(f"running: {cmd}")
     process = subprocess.Popen(
         ["/bin/bash", "-c", cmd],
         env=cmd_env,
         encoding="utf8",
         stdout=subprocess.PIPE,
         stderr=subprocess.STDOUT,
     )
     with process.stdout as stdout:
         for line in stdout:
             logging.info(line.rstrip())
     rc = process.wait()
     if rc != 0:
         raise RuntimeError(
             f"compression step {step} returned non-zero " f"exit code {rc}"
         )
     else:
         return rc
 
 
 def compress(
     graph_name: str,
     in_dir: Path,
     out_dir: Path,
     steps: Set[CompressionStep] = set(COMP_SEQ),
     conf: Dict[str, str] = {},
 ):
     """graph compression pipeline driver from nodes/edges files to compressed
     on-disk representation
 
     Args:
         graph_name: graph base name, relative to in_dir
         in_dir: input directory, where the uncompressed graph can be found
         out_dir: output directory, where the compressed graph will be stored
         steps: compression steps to run (default: all steps)
         conf: compression configuration, supporting the following keys (all are
           optional, so an empty configuration is fine and is the default)
 
           - batch_size: batch size for `WebGraph transformations
             <http://webgraph.di.unimi.it/docs/it/unimi/dsi/webgraph/Transform.html>`_;
             defaults to 1 billion
           - classpath: java classpath, defaults to swh-graph JAR only
           - java: command to run java VM, defaults to "java"
           - java_tool_options: value for JAVA_TOOL_OPTIONS environment
             variable; defaults to various settings for high memory machines
           - logback: path to a logback.xml configuration file; if not provided
             a temporary one will be created and used
           - max_ram: maximum RAM to use for compression; defaults to available
             virtual memory
           - tmp_dir: temporary directory, defaults to the "tmp" subdir of
             out_dir
 
     """
     if not steps:
         steps = set(COMP_SEQ)
 
     conf = check_config_compress(conf, graph_name, in_dir, out_dir)
 
     compression_start_time = datetime.now()
     logging.info(f"starting compression at {compression_start_time}")
     seq_no = 0
     for step in COMP_SEQ:
         if step not in steps:
             logging.debug(f"skipping compression step {step}")
             continue
         seq_no += 1
         step_start_time = datetime.now()
         logging.info(
             f"starting compression step {step} "
             f"({seq_no}/{len(steps)}) at {step_start_time}"
         )
         do_step(step, conf)
         step_end_time = datetime.now()
         step_duration = step_end_time - step_start_time
         logging.info(
             f"completed compression step {step} "
             f"({seq_no}/{len(steps)}) "
             f"at {step_end_time} in {step_duration}"
         )
     compression_end_time = datetime.now()
     compression_duration = compression_end_time - compression_start_time
     logging.info(f"completed compression in {compression_duration}")