diff --git a/docs/api.rst b/docs/api.rst
index 70982fd..b618b5d 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -1,360 +1,357 @@
 Graph REST API
 ==============
 
 
 Terminology
 -----------
 
 This API uses the following notions:
 
-- **Node**: a node in the `Software Heritage graph
-  <https://docs.softwareheritage.org/devel/swh-model/data-model.html>`_,
-  represented by a `persistent identifier
-  <https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html#persistent-identifiers>`_
-  (abbreviated as *SWH PID*, or simply *PID*).
+- **Node**: a node in the :ref:`Software Heritage graph <data-model>`,
+  represented by a :ref:`SWHID <persistent-identifiers>`.
 
-- **Node type**: the 3-letter specifier from the node PID (``cnt``, ``dir``,
+- **Node type**: the 3-letter specifier from the node SWHID (``cnt``, ``dir``,
   ``rel``, ``rev``, ``snp``, ``ori``), or ``*`` for all node types.
 
 - **Edge type**: a pair ``src:dst`` where ``src`` and ``dst`` are either node
   types, or ``*`` to denote all node types.
 
 - **Edge restrictions**: a textual specification of which edges can be followed
   during graph traversal. Either ``*`` to denote that all edges can be followed
   or a comma separated list of edge types to allow following only those edges.
 
   Note that when traversing the *backward* (i.e., transposed) graph, edge types
   are reversed too. So, for instance, ``ori:snp`` makes sense when traversing
   the forward graph, but useless (due to lack of matching edges in the graph)
   when traversing the backward graph; conversely ``snp:ori`` is useful when
   traversing the backward graph, but not in the forward one. For the same
   reason ``dir:dir`` allows following edges from parent directories to
   sub-directories when traversing the forward graph, but the same restriction
   allows following edges from sub-directories to parent directories.
 
 
 Examples
 ~~~~~~~~
 
-- ``swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2`` the PID of a node of
+- ``swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2`` the SWHID of a node of
   type content containing the full text of the GPL3 license.
-- ``swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35`` the PID of a node of
+- ``swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35`` the SWHID of a node of
   type revision corresponding to the commit in Linux that merged the
   'x86/urgent' branch on 31 December 2017.
 - ``"dir:dir,dir:cnt"`` node types allowing edges from directories to
   directories nodes, or directories to contents nodes.
 - ``"rev:rev,dir:*"`` node types allowing edges from revisions to revisions
   nodes, or from directories nodes.
 - ``"*:rel"`` node types allowing all edges to releases.
 
 
 Leaves
 ------
 
 .. http:get:: /graph/leaves/:src
 
     Performs a graph traversal and returns the leaves of the subgraph rooted at
     the specified source node.
 
-    :param string src: source node specified as a SWH PID
+    :param string src: source node specified as a SWHID
 
     :query string edges: edges types the traversal can follow; default to
         ``"*"``
     :query string direction: direction in which graph edges will be followed;
         can be either ``forward`` or ``backward``, default to ``forward``
 
     :statuscode 200: success
     :statuscode 400: invalid query string provided
     :statuscode 404: starting node cannot be found
 
     **Example:**
 
     .. sourcecode:: http
 
         GET /graph/leaves/swh:1:dir:432d1b21c1256f7408a07c577b6974bbdbcc1323 HTTP/1.1
 
         Content-Type: text/plain
         Transfer-Encoding: chunked
 
     .. sourcecode:: http
 
         HTTP/1.1 200 OK
 
         swh:1:cnt:540faad6b1e02e2db4f349a4845192db521ff2bd
         swh:1:cnt:630585fc6d34e5e121139e2aee0a64e83dc9aae6
         swh:1:cnt:f8634ced669f0a9155c8cab1b2621d57d778215e
         swh:1:cnt:ba6daa801ad3ea587904b1abe9161dceedb2e0bd
         ...
 
 
 Neighbors
 ---------
 
 .. http:get:: /graph/neighbors/:src
 
     Returns node direct neighbors (linked with exactly one edge) in the graph.
 
-    :param string src: source node specified as a SWH PID
+    :param string src: source node specified as a SWHID
 
     :query string edges: edges types allowed to be listed as neighbors; default
         to ``"*"``
     :query string direction: direction in which graph edges will be followed;
         can be either ``forward`` or ``backward``, default to ``forward``
 
     :statuscode 200: success
     :statuscode 400: invalid query string provided
     :statuscode 404: starting node cannot be found
 
     **Example:**
 
     .. sourcecode:: http
 
         GET /graph/neighbors/swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35 HTTP/1.1
 
         Content-Type: text/plain
         Transfer-Encoding: chunked
 
     .. sourcecode:: http
 
         HTTP/1.1 200 OK
 
         swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961
         swh:1:dir:b5d2aa0746b70300ebbca82a8132af386cc5986d
         swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd
         ...
 
 
 Walk
 ----
 
 ..
   .. http:get:: /graph/walk/:src/:dst
 
       Performs a graph traversal and returns the first found path from source to
       destination (final destination node included).
 
-      :param string src: starting node specified as a SWH PID
-      :param string dst: destination node, either as a node PID or a node type.
-          The traversal will stop at the first node encountered matching the
-          desired destination.
+      :param string src: starting node specified as a SWHID
+      :param string dst: destination node, either as a node SWHID or a node
+          type.  The traversal will stop at the first node encountered matching
+          the desired destination.
 
       :query string edges: edges types the traversal can follow; default to
           ``"*"``
       :query string traversal: traversal algorithm; can be either ``dfs`` or
           ``bfs``, default to ``dfs``
       :query string direction: direction in which graph edges will be followed;
           can be either ``forward`` or ``backward``, default to ``forward``
 
       :statuscode 200: success
       :statuscode 400: invalid query string provided
       :statuscode 404: starting node cannot be found
 
       **Example:**
 
       .. sourcecode:: http
 
           HTTP/1.1 200 OK
 
           swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35
           swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd
           swh:1:rev:cea92e843e40452c08ba313abc39f59efbb4c29c
           swh:1:rev:8d517bdfb57154b8a11d7f1682ecc0f79abf8e02
           ...
 
 .. http:get:: /graph/randomwalk/:src/:dst
 
     Performs a graph *random* traversal, i.e., picking one random successor
     node at each hop, from source to destination (final destination node
     included).
 
-    :param string src: starting node specified as a SWH PID
-    :param string dst: destination node, either as a node PID or a node type.
+    :param string src: starting node specified as a SWHID
+    :param string dst: destination node, either as a node SWHID or a node type.
         The traversal will stop at the first node encountered matching the
         desired destination.
 
     :query string edges: edges types the traversal can follow; default to
         ``"*"``
     :query string direction: direction in which graph edges will be followed;
         can be either ``forward`` or ``backward``, default to ``forward``
     :query int limit: limit the number of nodes returned. You can use positive
         numbers to get the first N results, or negative numbers to get the last
         N results starting from the tail;
         default to ``0``, meaning no limit.
 
     :statuscode 200: success
     :statuscode 400: invalid query string provided
     :statuscode 404: starting node cannot be found
 
     **Example:**
 
     .. sourcecode:: http
 
         GET /graph/randomwalk/swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2/ori?direction=backward HTTP/1.1
 
         Content-Type: text/plain
         Transfer-Encoding: chunked
 
     .. sourcecode:: http
 
         HTTP/1.1 200 OK
 
         swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2
         swh:1:dir:8de8a8823a0780524529c94464ee6ef60b98e2ed
         swh:1:dir:7146ea6cbd5ffbfec58cc8df5e0552da45e69cb7
         swh:1:rev:b12563e00026b48b817fd3532fc3df2db2a0f460
         swh:1:rev:13e8ebe80fb878bade776131e738d5772aa0ad1b
         swh:1:rev:cb39b849f167c70c1f86d4356f02d1285d49ee13
         ...
         swh:1:rev:ff70949f336593d6c59b18e4989edf24d7f0f254
         swh:1:snp:a511810642b7795e725033febdd82075064ed863
         swh:1:ori:98aa0e71f5c789b12673717a97f6e9fa20aa1161
 
     **Limit example:**
 
     .. sourcecode:: http
 
         GET /graph/randomwalk/swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2/ori?direction=backward&limit=-2 HTTP/1.1
 
         Content-Type: text/plain
         Transfer-Encoding: chunked
 
     .. sourcecode:: http
 
         HTTP/1.1 200 OK
 
         swh:1:ori:98aa0e71f5c789b12673717a97f6e9fa20aa1161
         swh:1:snp:a511810642b7795e725033febdd82075064ed863
 
 
 Visit
 -----
 
 .. http:get:: /graph/visit/nodes/:src
 .. http:get:: /graph/visit/paths/:src
 
     Performs a graph traversal and returns explored nodes or paths (in the order
     of the traversal).
 
-    :param string src: starting node specified as a SWH PID
+    :param string src: starting node specified as a SWHID
 
     :query string edges: edges types the traversal can follow; default to
         ``"*"``
     :query string direction: direction in which graph edges will be followed;
         can be either ``forward`` or ``backward``, default to ``forward``
 
     :statuscode 200: success
     :statuscode 400: invalid query string provided
     :statuscode 404: starting node cannot be found
 
     **Example:**
 
     .. sourcecode:: http
 
         GET /graph/visit/nodes/swh:1:snp:40f9f177b8ab0b7b3d70ee14bbc8b214e2b2dcfc HTTP/1.1
 
         Content-Type: text/plain
         Transfer-Encoding: chunked
 
     .. sourcecode:: http
 
         HTTP/1.1 200 OK
 
         swh:1:snp:40f9f177b8ab0b7b3d70ee14bbc8b214e2b2dcfc
         swh:1:rev:cfab784723a6c2d33468c9ed8a566fd5e2abd8c9
         swh:1:rev:53e5df0e7a6b7bd4919074c081a173655c0da164
         swh:1:rev:f85647f14b8243532283eff3e08f4ee96c35945f
         swh:1:rev:fe5f9ef854715fc59b9ec22f9878f11498cfcdbf
         swh:1:dir:644dd466d8ad527ea3a609bfd588a3244e6dafcb
         swh:1:cnt:c8cece50beae7a954f4ea27e3ae7bf941dc6d0c0
         swh:1:dir:a358d0cf89821227d4c00b0ced5e0a8b3756b5db
         swh:1:cnt:cc407b7e24dd300d2e1a77d8f04af89b3f962a51
         swh:1:cnt:701bd0a63e11b3390a547ce8515d28c6bab8a201
         ...
 
     **Example:**
 
     .. sourcecode:: http
 
         GET /graph/visit/nodes/swh:1:dir:644dd466d8ad527ea3a609bfd588a3244e6dafcb HTTP/1.1
 
         Content-Type: application/x-ndjson
         Transfer-Encoding: chunked
 
     .. sourcecode:: http
 
         HTTP/1.1 200 OK
 
         ["swh:1:dir:644dd466d8ad527ea3a609bfd588a3244e6dafcb", "swh:1:cnt:acfb7cabd63b368a03a9df87670ece1488c8bce0"]
         ["swh:1:dir:644dd466d8ad527ea3a609bfd588a3244e6dafcb", "swh:1:cnt:2a0837708151d76edf28fdbb90dc3eabc676cff3"]
         ["swh:1:dir:644dd466d8ad527ea3a609bfd588a3244e6dafcb", "swh:1:cnt:eaf025ad54b94b2fdda26af75594cfae3491ec75"]
         ...
         ["swh:1:dir:644dd466d8ad527ea3a609bfd588a3244e6dafcb", "swh:1:dir:2ebd4b96fa5665ff74f2b27ae41aecdc43af4463", "swh:1:cnt:1d3b6575fb7bf2a147d228e78ffd77ea193c3639"]
         ...
 
 
 Counting results
 ----------------
 
 The following method variants, with trailing `/count` added, behave like their
 already discussed counterparts but, instead of returning results, return the
 *amount* of results that would have been returned:
 
 
 .. http:get:: /graph/leaves/count/:src
 
    Return the amount of :http:get:`/graph/leaves/:src` results
 
 
 .. http:get:: /graph/neighbors/count/:src
 
    Return the amount of :http:get:`/graph/neighbors/:src` results
 
 
 .. http:get:: /graph/visit/nodes/count/:src
 
    Return the amount of :http:get:`/graph/visit/nodes/:src` results
 
 
 Stats
 -----
 
 .. http:get:: /graph/stats
 
     Returns statistics on the compressed graph.
 
     :statuscode 200: success
 
     **Example**
 
     .. sourcecode:: http
 
         GET /graph/stats HTTP/1.1
 
         Content-Type: application/json
 
     .. sourcecode:: http
 
         HTTP/1.1 200 OK
 
         {
             "counts": {
                 "nodes": 16222788,
                 "edges": 9907464
             },
             "ratios": {
                 "compression": 0.367,
                 "bits_per_node": 5.846,
                 "bits_per_edge": 9.573,
                 "avg_locality": 270.369
             },
             "indegree": {
                 "min": 0,
                 "max": 12382,
                 "avg": 0.6107127825377487
             },
             "outdegree": {
                 "min": 0,
                 "max": 1,
                 "avg": 0.6107127825377487
             }
         }
diff --git a/docs/compression.rst b/docs/compression.rst
index 8d9ebcd..e46e7ec 100644
--- a/docs/compression.rst
+++ b/docs/compression.rst
@@ -1,122 +1,123 @@
 Graph compression
 =================
 
 The compression process is a pipeline implemented for the most part on top of
 the `WebGraph framework <http://webgraph.di.unimi.it/>`_ and ecosystem
 libraries. The compression pipeline consists of the following steps:
 
 .. figure:: images/compression_steps.png
     :align: center
     :alt: Compression steps
 
     Compression steps
 
 Each of these steps is briefly described below. For more details see the
 following paper:
 
 .. note::
 
   Paolo Boldi, Antoine Pietri, Sebastiano Vigna, Stefano Zacchiroli.
   `Ultra-Large-Scale Repository Analysis via Graph Compression
   <https://upsilon.cc/~zack/research/publications/saner-2020-swh-graph.pdf>`_. In
   proceedings of `SANER 2020 <https://saner2020.csd.uwo.ca/>`_: The 27th IEEE
   International Conference on Software Analysis, Evolution and
   Reengineering. IEEE 2020.
 
   Links: `preprint
   <https://upsilon.cc/~zack/research/publications/saner-2020-swh-graph.pdf>`_,
   `bibtex
   <https://upsilon.cc/~zack/research/publications/saner-2020-swh-graph.bib>`_.
 
 In order to practically perform graph compression, install the ``swh.graph``
 module and use the ``swh graph compress`` command line interface of the
 compression driver, that will conduct the various steps in the right order.
 See ``swh graph compress --help`` for usage details.
 
 
 1. MPH
 ------
 
-A node in the Software Heritage :ref:`data-model` is identified using its PID
-(see :ref:`persistent-identifiers`). However, WebGraph internally uses integers
-to refer to node ids.
+A node in the Software Heritage :ref:`data model <data-model>` is identified
+using its SWHID (see :ref:`persistent identifiers
+<persistent-identifiers>`). However, WebGraph internally uses integers to refer
+to node ids.
 
 Mapping between the strings and longs ids is needed before compressing the
 graph. From the `Sux4J <http://sux.di.unimi.it/>`_ utility tool, we use the
 `GOVMinimalPerfectHashFunction
 <http://sux.di.unimi.it/docs/it/unimi/dsi/sux4j/mph/GOVMinimalPerfectHashFunction.html>`_
 class, mapping with no collisions N keys to N consecutive integers.
 
 The step produces a ``.mph`` file (MPH stands for *Minimal Perfect
 Hash-function*) storing the hash function taking as input a string and returning
 a unique integer.
 
 
 2. BV compress
 --------------
 
 This is the first actual compression step, building a compressed version of the
 input graph using WebGraph techniques presented in the framework paper. We use
 the `ScatteredArcsASCIIGraph
 <http://webgraph.di.unimi.it/docs-big/it/unimi/dsi/big/webgraph/ScatteredArcsASCIIGraph.html>`_
 class, from WebGraph.
 
 The resulting BV graph is stored as a set of files:
 
 - ``.graph``: the compressed graph in the BV format
 - ``.offsets``: offsets values to read the bit stream graph file
 - ``.obl``: offsets cache to load the graph faster
 - ``.properties``: entries used to correctly decode graph and offset files
 
 
 3. BFS
 -------
 
 In the LLP paper, authors propose an empirical analysis linking node ordering
 and high compression ratio: it is important to use an ordering of nodes ids such
 that vertices from the same host are close to one another.
 
 Building on this insight, the previous compression results in the BV compress
 step are improved by re-ordering nodes ids using a BFS traversal order. We use
 the `BFS
 <http://law.di.unimi.it/software/law-docs/it/unimi/dsi/law/big/graph/BFS.html>`_
 class from the `LAW <http://law.di.unimi.it/>`_ library.
 
 The resulting ordering is stored in the ``.order`` file, listing nodes ids in
 order of traversal.
 
 
 4. Permute
 ----------
 
 Once the order is computed (BFS or another ordering technique), the final
 compressed graph is created based on the initial BV compress result, and using
 the new node order mapping. The permutation uses the `Transform
 <http://webgraph.di.unimi.it/docs-big/it/unimi/dsi/big/webgraph/Transform.html>`_
 class from WebGraph framework.
 
 The final compressed graph is only stored in the resulting ``.graph``,
 ``.offsets``, ``.obl``, and ``.properties`` files.
 
 
 5. Stats
 --------
 
 Compute various statistics on the final compressed graph:
 
 - ``.stats``: entries such as number of nodes, edges, avg/min/max degree,
   average locality, etc.
 - ``.indegree``: graph indegree distribution
 - ``.outdegree``: graph outdegree distribution
 
 This step uses the `Stats
 <http://webgraph.di.unimi.it/docs-big/it/unimi/dsi/big/webgraph/Stats.html>`_
 class from WebGraph.
 
 
 6. Transpose
 ------------
 
 Create a transposed graph to allow backward traversal, using the `Transform
 <http://webgraph.di.unimi.it/docs-big/it/unimi/dsi/big/webgraph/Transform.html>`_
 class from WebGraph.
diff --git a/docs/docker.rst b/docs/docker.rst
index ddaccaf..4931a6b 100644
--- a/docs/docker.rst
+++ b/docs/docker.rst
@@ -1,70 +1,70 @@
 Docker environment
 ==================
 
 
 Build
 -----
 
 .. code:: bash
 
     $ git clone https://forge.softwareheritage.org/source/swh-graph.git
     $ cd swh-graph
     $ docker build --tag swh-graph docker/
 
 ``docker/build.sh`` also exists as an alias for the last line.
 
 
 Run
 ---
 
 Given a graph ``g`` specified by:
 
 - ``g.edges.csv.zst``: zstd-compressed CSV file with one edge per line, as a
-  "SRC_ID SPACE DST_ID" string, where identifiers are the
-  :ref:`persistent-identifiers` of each node.
+  "SRC_ID SPACE DST_ID" string, where identifiers are the :ref:`SWHIDs
+  <persistent-identifiers>` of each node.
 - ``g.nodes.csv.zst``: sorted list of unique node identifiers appearing in the
   corresponding ``g.edges.csv.zst`` file. The format is a zst-compressed CSV
   file (single column) with one persistent identifier per line.
 
 .. code:: bash
 
     $ docker run -ti \
         --volume /PATH/TO/GRAPH/:/srv/softwareheritage/graph/data \
         --publish 127.0.0.1:5009:5009 \
         swh-graph:latest \
         bash
 
 or, as a shortcut:
 
 .. code:: bash
 
     $ docker/run.sh /PATH/TO/GRAPH/
 
 Where ``/PATH/TO/GRAPH`` is a directory containing the ``g.edges.csv.zst`` and
 ``g.nodes.csv.zst`` files.  By default, when entering the container the current
 working directory will be ``/srv/softwareheritage/graph``; all relative paths
 found below are intended to be relative to that dir.
 
 
 Graph compression
 ~~~~~~~~~~~~~~~~~
 
 To compress the graph (from within the docker container):
 
 .. code:: bash
 
     $ docker/run.sh /PATH/TO/GRAPH/
     root@7f3306806861:/srv/softwareheritage/graph# \
         swh graph compress --graph data/g --outdir data/compressed
 
 
 Graph server
 ~~~~~~~~~~~~
 
 To start the swh-graph server (from within the docker container):
 
 .. code:: bash
 
     $ docker/run.sh /PATH/TO/GRAPH/
     root@7f3306806861:/srv/softwareheritage/graph# \
         swh graph rpc-serve --graph data/compressed/g
diff --git a/docs/use-cases.rst b/docs/use-cases.rst
index 983568a..ce01d8c 100644
--- a/docs/use-cases.rst
+++ b/docs/use-cases.rst
@@ -1,167 +1,167 @@
 =========
 Use cases
 =========
 
 
 This document lists use cases and benchmark scenarios for the Software Heritage
 graph service.
 
 
 Conventions
 ===========
 
 - **Node identification**: in the following, nodes are always identified by
-  their Software Heritage :ref:`persistent-identifiers` (SWH PIDs).
+  their :ref:`SWHIDs <persistent-identifiers>`.
 
 
 Use cases
 =========
 
 
 Browsing
 --------
 
 The following use cases require traversing the *forward graph*.
 
 - **ls**: given a directory node, list (non recursively) all linked nodes of
   type directory and content
 
   Implementation::
 
     /graph/neighbors/:DIR_ID?edges=dir:cnt,dir:dir
 
 - **ls -R**: given a directory node, recursively list all linked nodes of type
   directory and content
 
   Implementation::
 
     /graph/visit/paths/:DIR_ID?edges=dir:cnt,dir:dir
 
 - **git log**: given a revision node, recursively list all linked nodes of type
   revision
 
   Implementation::
 
     /graph/visit/nodes/:REV_ID?edges=rev:rev
 
 
 Vault
 -----
 
 The following use cases require traversing the *forward graph*.
 
 - **tarball** (same as *ls -R* above)
 
 - **git bundle**: given a node, recursively list all linked nodes of any kind
 
   Implementation::
 
      /graph/visit/nodes/:NODE_ID?edges=*
 
 
 Provenance
 ----------
 
 The following use cases require traversing the *backward (transposed)
 graph*.
 
 - **commit provenance**: given a content or directory node, return *a* commit
   whose directory (recursively) contains it
 
   Implementation::
 
     /graph/walk/:NODE_ID/rev?direction=backward&edges=dir:dir,cnt:dir,dir:rev
 
 - **complete commit provenance**: given a content or directory node, return
   *all* commits whose directory (recursively) contains it
 
   Implementation::
 
     /graph/leaves/:NODE_ID?direction=backward&edges=dir:dir,cnt:dir,dir:rev
 
 - **origin provenance**: given a content, directory, or commit node, return
   *an* origin that has at least one snapshot that (recursively) contains it
 
   Implementation::
 
     /graph/walk/:NODE_ID/ori?direction=backward&edges=*
 
 - **complete origin provenance**: given a content, directory, or commit node,
   return *all* origins that have at least one snapshot that (recursively)
   contains it
 
   Implementation::
 
     /graph/leaves/:NODE_ID?direction=backward&edges=*
 
 - *SLOC tracking*: left as future work
 
 
 Provenance statistics
 ~~~~~~~~~~~~~~~~~~~~~
 
 The following use cases require traversing the *backward (transposed)
 graph*.
 
 - **content popularity across commits**: for each content, count the number of
   commits (or *commit popularity*) that link to a directory that (recursively)
   includes it. Plot the distribution of content popularity across commits.
 
   Implementation: apply *complete commit provenance* to each content node,
   count the returned commits, aggregate.
 
 - **commit popularity across origins**: for each commit, count the number of
   origins (or *origin popularity*) that have a snapshot that (recursively)
   includes it. Plot the distribution of commit popularity across origins.
 
   Implementation: apply *complete origin provenance* to each commit node, count
   the returned origins, aggregate.
 
 - *SLOC popularity across contents*: left as future work
 
 The following use cases require traversing the *forward graph*.
 
 - **revision size** (as n. of contents) distribution: for each revision, count
   the number of contents that are (recursively) reachable from it. Plot the
   distribution of revision sizes.
 
 - **origin size** (as n. of revisions) distribution: for each origin, count the
   number of revisions that are (recursively) reachable from it. Plot the
   distribution of origin sizes.
 
 
 Benchmarks
 ==========
 
 Notes on how to benchmark graph access:
 
 - separate pure-graph timings from timings related to use additional mappings
   (e.g., node types), no matter if the mappings are in-memory or on-disk
 
 - separate in-memory timings from on-disk timings; in particular, separate the
-  timing of translating node identifiers between internal integers and SWH PIDs
+  timing of translating node identifiers between internal integers and SWHIDs
 
 - for each use case that requires a node as input, we will randomize the choice
   of the input node and repeat the experiment a suitable number of times; where
   possible we will aggregate results computing basic statistics (average,
   standard deviation), as well as normalize results w.r.t. the “size” of the
   chosen node (e.g., number of nodes/path length in the resulting visit)
 
 
 Basic benchmarks
 ----------------
 
 - **Edge traversal**: given a node, retrieve the first node in its adjacency
   list.
 
   For reference: Apostolico, Drovandi in *Graph Compression by BFS* report
   times to retrieve the adjacency list of a node (and/or test if an edge exists
   between two nodes) in the 2-3 us range, for the largest graph in their
   experiments (22 M nodes, 600 M edges).
 
 
 Each use case is a benchmark
 ----------------------------
 
 In addition to abstract benchmark, we will use each use case above as a
 scenario-based benchmark.
diff --git a/swh/graph/pid.py b/swh/graph/pid.py
index 330ba25..83a1dff 100644
--- a/swh/graph/pid.py
+++ b/swh/graph/pid.py
@@ -1,403 +1,403 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import mmap
 import os
 import struct
 
 from collections.abc import MutableMapping
 from enum import Enum
 from mmap import MAP_SHARED, MAP_PRIVATE
 from typing import BinaryIO, Iterator, Tuple
 
 from swh.model.identifiers import PersistentId, parse_persistent_identifier
 
 
 PID_BIN_FMT = "BB20s"  # 2 unsigned chars + 20 bytes
 INT_BIN_FMT = ">q"  # big endian, 8-byte integer
 PID_BIN_SIZE = 22  # in bytes
 INT_BIN_SIZE = 8  # in bytes
 
 
 class PidType(Enum):
     """types of existing PIDs, used to serialize PID type as a (char) integer
 
     Note that the order does matter also for driving the binary search in
     PID-indexed maps. Integer values also matter, for compatibility with the
     Java layer.
 
     """
 
     content = 0
     directory = 1
     origin = 2
     release = 3
     revision = 4
     snapshot = 5
 
 
 def str_to_bytes(pid_str: str) -> bytes:
     """Convert a PID to a byte sequence
 
     The binary format used to represent PIDs as 22-byte long byte sequences as
     follows:
 
     - 1 byte for the namespace version represented as a C `unsigned char`
     - 1 byte for the object type, as the int value of :class:`PidType` enums,
       represented as a C `unsigned char`
     - 20 bytes for the SHA1 digest as a byte sequence
 
     Args:
         pid: persistent identifier
 
     Returns:
         bytes: byte sequence representation of pid
 
     """
     pid = parse_persistent_identifier(pid_str)
     return struct.pack(
         PID_BIN_FMT,
         pid.scheme_version,
         PidType[pid.object_type].value,
         bytes.fromhex(pid.object_id),
     )
 
 
 def bytes_to_str(bytes: bytes) -> str:
     """Inverse function of :func:`str_to_bytes`
 
     See :func:`str_to_bytes` for a description of the binary PID format.
 
     Args:
         bytes: byte sequence representation of pid
 
     Returns:
         pid: persistent identifier
 
     """
     (version, type, bin_digest) = struct.unpack(PID_BIN_FMT, bytes)
     pid = PersistentId(object_type=PidType(type).name, object_id=bin_digest)
     return str(pid)
 
 
 class _OnDiskMap:
     """mmap-ed on-disk sequence of fixed size records
 
     """
 
     def __init__(
         self, record_size: int, fname: str, mode: str = "rb", length: int = None
     ):
         """open an existing on-disk map
 
         Args:
             record_size: size of each record in bytes
             fname: path to the on-disk map
             mode: file open mode, usually either 'rb' for read-only maps, 'wb'
                 for creating new maps, or 'rb+' for updating existing ones
                 (default: 'rb')
             length: map size in number of logical records; used to initialize
                 writable maps at creation time. Must be given when mode is 'wb'
                 and the map doesn't exist on disk; ignored otherwise
 
         """
         os_modes = {"rb": os.O_RDONLY, "wb": os.O_RDWR | os.O_CREAT, "rb+": os.O_RDWR}
         if mode not in os_modes:
             raise ValueError("invalid file open mode: " + mode)
         new_map = mode == "wb"
         writable_map = mode in ["wb", "rb+"]
 
         self.record_size = record_size
         self.fd = os.open(fname, os_modes[mode])
         if new_map:
             if length is None:
                 raise ValueError("missing length when creating new map")
             os.truncate(self.fd, length * self.record_size)
 
         self.size = os.path.getsize(fname)
         (self.length, remainder) = divmod(self.size, record_size)
         if remainder:
             raise ValueError(
                 "map size {} is not a multiple of the record size {}".format(
                     self.size, record_size
                 )
             )
 
         self.mm = mmap.mmap(
             self.fd, self.size, flags=MAP_SHARED if writable_map else MAP_PRIVATE
         )
 
     def close(self) -> None:
         """close the map
 
         shuts down both the mmap and the underlying file descriptor
 
         """
         if not self.mm.closed:
             self.mm.close()
         os.close(self.fd)
 
     def __len__(self) -> int:
         return self.length
 
     def __delitem__(self, pos: int) -> None:
         raise NotImplementedError("cannot delete records from fixed-size map")
 
 
 class PidToNodeMap(_OnDiskMap, MutableMapping):
-    """memory mapped map from PID (:ref:`persistent-identifiers`) to a continuous
-    range 0..N of (8-byte long) integers
+    """memory mapped map from :ref:`SWHIDs <persistent-identifiers>` to a
+    continuous range 0..N of (8-byte long) integers
 
     This is the converse mapping of :class:`NodeToPidMap`.
 
     The on-disk serialization format is a sequence of fixed length (30 bytes)
     records with the following fields:
 
     - PID (22 bytes): binary PID representation as per :func:`str_to_bytes`
     - long (8 bytes): big endian long integer
 
     The records are sorted lexicographically by PID type and checksum, where
     type is the integer value of :class:`PidType`. PID lookup in the map is
     performed via binary search.  Hence a huge map with, say, 11 B entries,
     will require ~30 disk seeks.
 
     Note that, due to fixed size + ordering, it is not possible to create these
     maps by random writing. Hence, __setitem__ can be used only to *update* the
     value associated to an existing key, rather than to add a missing item. To
     create an entire map from scratch, you should do so *sequentially*, using
     static method :meth:`write_record` (or, at your own risk, by hand via the
     mmap :attr:`mm`).
 
     """
 
     # record binary format: PID + a big endian 8-byte big endian integer
     RECORD_BIN_FMT = ">" + PID_BIN_FMT + "q"
     RECORD_SIZE = PID_BIN_SIZE + INT_BIN_SIZE
 
     def __init__(self, fname: str, mode: str = "rb", length: int = None):
         """open an existing on-disk map
 
         Args:
             fname: path to the on-disk map
             mode: file open mode, usually either 'rb' for read-only maps, 'wb'
                 for creating new maps, or 'rb+' for updating existing ones
                 (default: 'rb')
             length: map size in number of logical records; used to initialize
                 read-write maps at creation time. Must be given when mode is
                 'wb'; ignored otherwise
 
         """
         super().__init__(self.RECORD_SIZE, fname, mode=mode, length=length)
 
     def _get_bin_record(self, pos: int) -> Tuple[bytes, bytes]:
         """seek and return the (binary) record at a given (logical) position
 
         see :func:`_get_record` for an equivalent function with additional
         deserialization
 
         Args:
             pos: 0-based record number
 
         Returns:
             a pair `(pid, int)`, where pid and int are bytes
 
         """
         rec_pos = pos * self.RECORD_SIZE
         int_pos = rec_pos + PID_BIN_SIZE
 
         return (self.mm[rec_pos:int_pos], self.mm[int_pos : int_pos + INT_BIN_SIZE])
 
     def _get_record(self, pos: int) -> Tuple[str, int]:
         """seek and return the record at a given (logical) position
 
         moral equivalent of :func:`_get_bin_record`, with additional
         deserialization to non-bytes types
 
         Args:
             pos: 0-based record number
 
         Returns:
             a pair `(pid, int)`, where pid is a string-based PID and int the
             corresponding integer identifier
 
         """
         (pid_bytes, int_bytes) = self._get_bin_record(pos)
         return (bytes_to_str(pid_bytes), struct.unpack(INT_BIN_FMT, int_bytes)[0])
 
     @classmethod
     def write_record(cls, f: BinaryIO, pid: str, int: int) -> None:
         """write a logical record to a file-like object
 
         Args:
             f: file-like object to write the record to
             pid: textual PID
             int: PID integer identifier
 
         """
         f.write(str_to_bytes(pid))
         f.write(struct.pack(INT_BIN_FMT, int))
 
     def _bisect_pos(self, pid_str: str) -> int:
         """bisect the position of the given identifier. If the identifier is
         not found, the position of the pid immediately after is returned.
 
         Args:
             pid_str: the pid as a string
 
         Returns:
             the logical record of the bisected position in the map
 
         """
         if not isinstance(pid_str, str):
             raise TypeError("PID must be a str, not {}".format(type(pid_str)))
         try:
             target = str_to_bytes(pid_str)  # desired PID as bytes
         except ValueError:
             raise ValueError('invalid PID: "{}"'.format(pid_str))
 
         lo = 0
         hi = self.length - 1
         while lo < hi:
             mid = (lo + hi) // 2
             (pid, _value) = self._get_bin_record(mid)
             if pid < target:
                 lo = mid + 1
             else:
                 hi = mid
         return lo
 
     def _find(self, pid_str: str) -> Tuple[int, int]:
         """lookup the integer identifier of a pid and its position
 
         Args:
             pid_str: the pid as a string
 
         Returns:
             a pair `(pid, pos)` with pid integer identifier and its logical
             record position in the map
 
         """
         pos = self._bisect_pos(pid_str)
         pid_found, value = self._get_record(pos)
         if pid_found == pid_str:
             return (value, pos)
         raise KeyError(pid_str)
 
     def __getitem__(self, pid_str: str) -> int:
         """lookup the integer identifier of a PID
 
         Args:
             pid: the PID as a string
 
         Returns:
             the integer identifier of pid
 
         """
         return self._find(pid_str)[0]  # return element, ignore position
 
     def __setitem__(self, pid_str: str, int: str) -> None:
         (_pid, pos) = self._find(pid_str)  # might raise KeyError and that's OK
 
         rec_pos = pos * self.RECORD_SIZE
         int_pos = rec_pos + PID_BIN_SIZE
         self.mm[rec_pos:int_pos] = str_to_bytes(pid_str)
         self.mm[int_pos : int_pos + INT_BIN_SIZE] = struct.pack(INT_BIN_FMT, int)
 
     def __iter__(self) -> Iterator[Tuple[str, int]]:
         for pos in range(self.length):
             yield self._get_record(pos)
 
     def iter_prefix(self, prefix: str):
         swh, n, t, sha = prefix.split(":")
         sha = sha.ljust(40, "0")
         start_pid = ":".join([swh, n, t, sha])
         start = self._bisect_pos(start_pid)
         for pos in range(start, self.length):
             pid, value = self._get_record(pos)
             if not pid.startswith(prefix):
                 break
             yield pid, value
 
     def iter_type(self, pid_type: str) -> Iterator[Tuple[str, int]]:
         prefix = "swh:1:{}:".format(pid_type)
         yield from self.iter_prefix(prefix)
 
 
 class NodeToPidMap(_OnDiskMap, MutableMapping):
     """memory mapped map from a continuous range of 0..N (8-byte long) integers to
-    PIDs (:ref:`persistent-identifiers`)
+    :ref:`SWHIDs <persistent-identifiers>`
 
     This is the converse mapping of :class:`PidToNodeMap`.
 
     The on-disk serialization format is a sequence of fixed length records (22
     bytes), each being the binary representation of a PID as per
     :func:`str_to_bytes`.
 
     The records are sorted by long integer, so that integer lookup is possible
     via fixed-offset seek.
 
     """
 
     RECORD_BIN_FMT = PID_BIN_FMT
     RECORD_SIZE = PID_BIN_SIZE
 
     def __init__(self, fname: str, mode: str = "rb", length: int = None):
         """open an existing on-disk map
 
         Args:
             fname: path to the on-disk map
             mode: file open mode, usually either 'rb' for read-only maps, 'wb'
                 for creating new maps, or 'rb+' for updating existing ones
                 (default: 'rb')
             size: map size in number of logical records; used to initialize
                 read-write maps at creation time. Must be given when mode is
                 'wb'; ignored otherwise
             length: passed to :class:`_OnDiskMap`
 
         """
 
         super().__init__(self.RECORD_SIZE, fname, mode=mode, length=length)
 
     def _get_bin_record(self, pos: int) -> bytes:
         """seek and return the (binary) PID at a given (logical) position
 
         Args:
             pos: 0-based record number
 
         Returns:
             PID as a byte sequence
 
         """
         rec_pos = pos * self.RECORD_SIZE
 
         return self.mm[rec_pos : rec_pos + self.RECORD_SIZE]
 
     @classmethod
     def write_record(cls, f: BinaryIO, pid: str) -> None:
         """write a PID to a file-like object
 
         Args:
             f: file-like object to write the record to
             pid: textual PID
 
         """
         f.write(str_to_bytes(pid))
 
     def __getitem__(self, pos: int) -> str:
         orig_pos = pos
         if pos < 0:
             pos = len(self) + pos
         if not (0 <= pos < len(self)):
             raise IndexError(orig_pos)
 
         return bytes_to_str(self._get_bin_record(pos))
 
     def __setitem__(self, pos: int, pid: str) -> None:
         rec_pos = pos * self.RECORD_SIZE
         self.mm[rec_pos : rec_pos + self.RECORD_SIZE] = str_to_bytes(pid)
 
     def __iter__(self) -> Iterator[Tuple[int, str]]:
         for pos in range(self.length):
             yield (pos, self[pos])