diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 0000000..09a6f12
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,13 @@
+FROM openjdk:11-slim-buster
+
+# Install swh.graph (both Python and Java parts)
+RUN apt-get update && \
+    apt-get install --no-install-recommends --yes \
+        curl time \
+        gcc pkg-config libsystemd-dev python3-dev \
+        python3-pip python3-setuptools && \
+    rm -rf /var/lib/apt/lists/* && \
+    pip3 install swh.graph
+
+# Default dir
+WORKDIR /srv/softwareheritage/graph
diff --git a/dockerfiles/Dockerfile b/dockerfiles/Dockerfile
deleted file mode 100644
index a71d493..0000000
--- a/dockerfiles/Dockerfile
+++ /dev/null
@@ -1,31 +0,0 @@
-FROM openjdk:13-slim-buster
-
-# Java global config
-ARG MAX_RAM=2800G
-ENV JAVA_TOOL_OPTIONS \
-    -Xmx${MAX_RAM} -XX:PretenureSizeThreshold=512M -XX:MaxNewSize=4G \
-    -XX:+UseLargePages -XX:+UseTransparentHugePages -XX:+UseNUMA \
-    -XX:+UseTLAB -XX:+ResizeTLAB \
-    -Dlogback.configurationFile=app/configuration/logback.xml
-
-# Install swh.graph (both Python and Java parts)
-RUN apt-get update && \
-    apt-get install --no-install-recommends --yes \
-        curl time \
-        gcc pkg-config libsystemd-dev python3-dev \
-        python3-pip python3-setuptools && \
-    rm -rf /var/lib/apt/lists/* && \
-    pip3 install swh.graph
-
-# Install 3rd party dependencies (not shipped with swh.graph)
-WORKDIR /srv/softwareheritage/graph/lib
-
-RUN ln /usr/local/share/swh-graph/*.jar .
-
-# Add user files
-WORKDIR /srv/softwareheritage/graph/app
-COPY configuration configuration/
-COPY scripts scripts/
-
-# Default dir
-WORKDIR /srv/softwareheritage/graph
diff --git a/dockerfiles/configuration/logback.xml b/dockerfiles/configuration/logback.xml
deleted file mode 100644
index 76a4c7a..0000000
--- a/dockerfiles/configuration/logback.xml
+++ /dev/null
@@ -1,11 +0,0 @@
-<configuration>
-    <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
-        <encoder>
-            <pattern>%d %r %p [%t] %logger{1} - %m%n</pattern>
-        </encoder>
-    </appender>
-
-    <root level="INFO">
-        <appender-ref ref="STDOUT"/>
-    </root>
-</configuration>
diff --git a/dockerfiles/scripts/compress_graph.sh b/dockerfiles/scripts/compress_graph.sh
deleted file mode 100755
index 1194689..0000000
--- a/dockerfiles/scripts/compress_graph.sh
+++ /dev/null
@@ -1,112 +0,0 @@
-#!/bin/bash
-
-usage() {
-    echo "Usage: compress_graph.sh --lib <LIB_DIR> --input <GRAPH_BASEPATH>"
-    echo "Options:"
-    echo "  -o, --outdir <OUT_DIR>    (Default: GRAPH_DIR/compressed)"
-    echo "  -t, --tmp <TMP_DIR>       (Default: OUT_DIR/tmp)"
-    echo "  --stdout  <STDOUT_LOG>    (Default: OUT_DIR/stdout)"
-    echo "  --stderr  <STDERR_LOG>    (Default: OUT_DIR/stderr)"
-    echo "  --batch-size <BATCH_SIZE> (Default: 10^6): WebGraph internals"
-    exit 1
-}
-
-graph_path=""
-out_dir=""
-lib_dir=""
-stdout_file=""
-stderr_file=""
-batch_size=1000000
-while (( "$#" )); do
-    case "$1" in
-        -i|--input)   shift; graph_path=$1 ;;
-        -o|--outdir)  shift; out_dir=$1 ;;
-        -l|--lib)     shift; lib_dir=$1 ;;
-        -t|--tmp)     shift; tmp_dir=$1 ;;
-        --stdout)     shift; stdout_file=$1 ;;
-        --stderr)     shift; stderr_file=$1 ;;
-        --batch-size) shift; batch_size=$1 ;;
-        *) usage ;;
-    esac
-    shift
-done
-
-if [[ -z "$graph_path" || ! -d "$lib_dir" ]]; then
-    usage
-fi
-if [ -z "$out_dir" ] ; then
-    out_dir="$(dirname $graph_path)/compressed"
-fi
-if [ -z "$tmp_dir" ] ; then
-    tmp_dir="${out_dir}/tmp"
-fi
-if [ -z "$stdout_file" ] ; then
-    stdout_file="${out_dir}/stdout"
-fi
-if [ -z "$stderr_file" ] ; then
-    stderr_file="${out_dir}/stderr"
-fi
-
-dataset=$(basename $graph_path)
-compr_graph_path="${out_dir}/${dataset}"
-
-test -d "$out_dir" || mkdir -p "$out_dir"
-test -d "$tmp_dir" || mkdir -p "$tmp_dir"
-
-step_info() {
-    echo -e "\n* swh-graph: $1 step... ($2)\n"
-}
-
-java_cmd () {
-    /usr/bin/time -v java -cp $lib_dir/'*' $*
-}
-
-{
-    # Build a function (MPH) that maps node names to node numbers in
-    # lexicographic order (output: .mph)
-    step_info "MPH" "1/6"                                               &&
-    java_cmd it.unimi.dsi.sux4j.mph.GOVMinimalPerfectHashFunction       \
-        --zipped $compr_graph_path.mph --temp-dir $tmp_dir              \
-        $graph_path.nodes.csv.gz                                        &&
-
-    # Build the graph in BVGraph format (output: .{graph,offsets,properties})
-    step_info "BV compress" "2/6"                                       &&
-    java_cmd it.unimi.dsi.big.webgraph.ScatteredArcsASCIIGraph          \
-        --function $compr_graph_path.mph --temp-dir $tmp_dir            \
-        --zipped $compr_graph_path-bv < $graph_path.edges.csv.gz        &&
-    # Build the offset big-list file to load the graph faster (output: .obl)
-    java_cmd it.unimi.dsi.big.webgraph.BVGraph                          \
-        --list $compr_graph_path-bv                                     &&
-
-    # Find a better permutation using a BFS traversal order (output: .order)
-    step_info "BFS" "3/6"                                               &&
-    java_cmd it.unimi.dsi.law.big.graph.BFS                             \
-        $compr_graph_path-bv $compr_graph_path.order                    &&
-
-    # Permute the graph accordingly
-    step_info "Permute" "4/6"                                           &&
-    java_cmd it.unimi.dsi.big.webgraph.Transform mapOffline             \
-        $compr_graph_path-bv $compr_graph_path                          \
-        $compr_graph_path.order $batch_size $tmp_dir                    &&
-    java_cmd it.unimi.dsi.big.webgraph.BVGraph                          \
-        --list $compr_graph_path                                        &&
-
-    # Compute graph statistics (output: .{indegree,outdegree,stats})
-    step_info "Stats" "5/6"                                             &&
-    java_cmd it.unimi.dsi.big.webgraph.Stats $compr_graph_path          &&
-
-    # Create transposed graph (to allow backward traversal)
-    step_info "Transpose" "6/6"                                         &&
-    java_cmd it.unimi.dsi.big.webgraph.Transform transposeOffline       \
-        $compr_graph_path $compr_graph_path-transposed                  \
-        $batch_size $tmp_dir                                            &&
-    java_cmd it.unimi.dsi.big.webgraph.BVGraph                          \
-        --list $compr_graph_path-transposed
-} > $stdout_file 2> $stderr_file
-
-if [[ $? -eq 0 ]]; then
-    echo "Graph compression done."
-else
-    echo "Graph compression failed: see $stderr_file for more info."
-    exit 1
-fi
diff --git a/docs/docker.rst b/docs/docker.rst
index c0d6ba5..62434c2 100644
--- a/docs/docker.rst
+++ b/docs/docker.rst
@@ -1,79 +1,58 @@
 Docker environment
 ==================
 
 
 Build
 -----
 
 .. code:: bash
 
     $ git clone https://forge.softwareheritage.org/source/swh-graph.git
     $ cd swh-graph
-    $ docker build --tag swh-graph dockerfiles
+    $ docker build --tag swh-graph docker/
 
 
 Run
 ---
 
 Given a graph ``g`` specified by:
 
-- ``g.edges.csv.gz``: gzip-compressed csv file with one edge per line, as a
+- ``g.edges.csv.zst``: zstd-compressed CSV file with one edge per line, as a
   "SRC_ID SPACE DST_ID" string, where identifiers are the
   :ref:`persistent-identifiers` of each node.
-- ``g.nodes.csv.gz``: sorted list of unique node identifiers appearing in the
-  corresponding ``g.edges.csv.gz`` file. The format is a gzip-compressed csv
-  file with one persistent identifier per line.
+- ``g.nodes.csv.zst``: sorted list of unique node identifiers appearing in the
+  corresponding ``g.edges.csv.zst`` file. The format is a zst-compressed CSV
+  file (single column) with one persistent identifier per line.
 
 .. code:: bash
 
     $ docker run -ti \
         --volume /PATH/TO/GRAPH/:/srv/softwareheritage/graph/data \
         --publish 127.0.0.1:5009:5009 \
         swh-graph:latest \
         bash
 
-Where ``/PATH/TO/GRAPH`` is a directory containing the ``g.edges.csv.gz`` and
-``g.nodes.csv.gz`` files.  By default, when entering the container the current
+Where ``/PATH/TO/GRAPH`` is a directory containing the ``g.edges.csv.zst`` and
+``g.nodes.csv.zst`` files.  By default, when entering the container the current
 working directory will be ``/srv/softwareheritage/graph``; all relative paths
 found below are intended to be relative to that dir.
 
 
 Graph compression
 ~~~~~~~~~~~~~~~~~
 
 To compress the graph:
 
 .. code:: bash
 
-    $ app/scripts/compress_graph.sh --lib lib/ --input data/g
-
-Warning: very large graphs may need a bigger batch size parameter for WebGraph
-internals (you can specify a value when running the compression script using:
-``--batch-size 1000000000``).
-
-
-Node identifier mappings
-~~~~~~~~~~~~~~~~~~~~~~~~
-
-To dump the mapping files (i.e., various node id <-> other info mapping files,
-in either ``.csv.gz`` or ad-hoc ``.map`` format):
-
-.. code:: bash
-
-    $ java -cp lib/swh-graph-*.jar \
-        org.softwareheritage.graph.backend.MapBuilder \
-        data/g.nodes.csv.gz data/compressed/g
+    $ swh graph compress --graph data/g --outdir data/compressed
 
 
 Graph server
 ~~~~~~~~~~~~
 
 To start the swh-graph server:
 
 .. code:: bash
 
-    $ java -cp lib/swh-graph-*.jar \
-        org.softwareheritage.graph.App data/compressed/g
-
-To specify the port on which the server will run, use the `--port` or `-p` flag
-(default is 5009).
+    $ swh graph rpc-serve --graph data/compressed/g
diff --git a/reports/experiments/experiments.tex b/reports/experiments/experiments.tex
index fcf1131..fce997e 100644
--- a/reports/experiments/experiments.tex
+++ b/reports/experiments/experiments.tex
@@ -1,233 +1,233 @@
 \documentclass[11pt,a4paper]{article}
 
 \usepackage[english]{babel}
 \usepackage{a4wide}
 \usepackage{booktabs}
 \usepackage{minted}
 \usepackage{siunitx}
 \usepackage[colorlinks,urlcolor=blue,linkcolor=magenta,citecolor=red,linktocpage=true]{hyperref}
 
 \title{Google Summer of Code 2019}
 \author{Thibault Allançon}
 \date{8 April 2019}
 
 \begin{document}
 
 \maketitle
 
 Early experiments running WebGraph framework on the Software Heritage datasets.
 
 \section{Environment}
 
 Docker environment and compression script can be found here:
-\url{https://forge.softwareheritage.org/source/swh-graph/browse/master/dockerfiles/}.
+\url{https://forge.softwareheritage.org/source/swh-graph/browse/master/docker/}.
 
 \section{Datasets analysis}
 
 \begin{center}
     \begin{tabular}{@{} l *4r @{}}
         \toprule
         \multicolumn{1}{c}{} &
             \textbf{\mintinline{text}{.nodes.csv.gz}} &
             \textbf{\mintinline{text}{.edges.csv.gz}} &
             \textbf{\# of nodes} & \textbf{\# of edges} \\
         \midrule
         \texttt{rel\_to\_obj}
             & 344M & 382M & \num{16222788} & \num{9907464} \\
         \texttt{ori\_to\_snp}
             & 1.3G & 3.7G & \num{112564374} & \num{194970670} \\
         \texttt{dir\_to\_rev}
             & 745M & 12G & \num{35399184} & \num{481829426} \\
         \texttt{snp\_to\_obj}
             & 3.5G & 21G & \num{170999796} & \num{831089515} \\
         \texttt{rev\_to\_rev}
             & 22G & 33G & \num{1117498391} & \num{1165813689} \\
         \texttt{rev\_to\_dir}
             & 41G & 48G & \num{2047888941} & \num{1125083793} \\
         \texttt{dir\_to\_dir}
             & 95G & 1.3T & \num{4805057515} & \num{48341950415} \\
         \texttt{dir\_to\_cnt}
             & 180G & 3T & \num{9231457233} & \num{112363058067} \\
         \midrule
         Entire graph (\texttt{all})
             & 340G & 4.5T & \num{11595403407} & \num{164513703039} \\
         \bottomrule
     \end{tabular}
 \end{center}
 
 \section{Individual datasets compression}
 
 The first experiments were done on individual datasets.
 
 \subsection{Results}
 
 Datasets were compressed on different VM (depending on availability):
 
 \begin{itemize}
     \item \textit{(sexus)} 1TB of RAM and 40vCPU: \mintinline{text}{dir_to_dir}
     \item \textit{(monster)} 700GB of RAM and 72vCPU:
         \mintinline{text}{dir_to_cnt}
     \item \textit{(chub)} 2TB of RAM and 128vCPU: all the others datasets
 \end{itemize}
 
 Note: the results may vary because random permutations are used in the graph
 compression process.
 
 \begin{center}
     \begin{tabular}{@{} l *4r @{}}
         \toprule
         \multicolumn{1}{c}{} &
             \textbf{compr ratio} & \textbf{bit/edge} & \textbf{compr
             size\footnotemark} \\
         \midrule
         \texttt{rel\_to\_obj} & 0.367 & 9.573 & 23M \\
         \texttt{ori\_to\_snp} & 0.291 & 8.384 & 140M \\
         \texttt{dir\_to\_rev} & 0.07 & 1.595 & 120M & \\
         \texttt{snp\_to\_obj} & 0.067 & 1.798 & 253M \\
         \texttt{rev\_to\_rev} & 0.288 & 9.063 & 2.2G \\
         \texttt{rev\_to\_dir} & 0.291 & 9.668 & 2.6G \\
         \texttt{dir\_to\_dir} & 0.336 & 10.178 & 61G \\
         \texttt{dir\_to\_cnt} & 0.228 & 7.054 & 97G \\
         \midrule
         Entire graph (estimated) & & & 163G \\
         \bottomrule
     \end{tabular}
 \end{center}
 
 \footnotetext{calculated as: size of \mintinline{bash}{*.graph} + size of
 \mintinline{bash}{*.offsets}}
 
 \subsection{Timings}
 
 \begin{center}
     \begin{tabular}{@{} l *6r @{}}
         \toprule
         \multicolumn{1}{c}{} &
             \textbf{MPH} &
             \textbf{BV Compress} &
             \textbf{Symmetrized} &
             \textbf{LLP} &
             \textbf{Permute} &
             \textbf{Total} \\
         \midrule
         \texttt{rel\_to\_obj}
             & 14s & 25s & 18s & 8min & 10s & \textbf{9min} \\
         \texttt{ori\_to\_snp}
             & 1min & 5min & 3min & 1h30 & 1min & \textbf{1h40} \\
         \texttt{dir\_to\_rev}
             & 56s & 22min & 6min & 41min & 2min & \textbf{1h13} \\
         \texttt{snp\_to\_obj}
             & 3min & 22min & 8min & 2h50 & 5min & \textbf{3h30} \\
         \texttt{rev\_to\_rev}
             & 11min & 56min & 24min & 31h52 & 20min & \textbf{33h42} \\
         \texttt{rev\_to\_dir}
             & 20min & 1h & 30min & 52h45 & 23min & \textbf{55h} \\
         \bottomrule
     \end{tabular}
 \end{center}
 
 \vspace{0.5cm}
 
 For the \mintinline{text}{dir_to_*} datasets we decided not use LLP algorithm
 because it would take too long, and instead used a BFS traversal order for the
 node re-ordering. This allows \textbf{much} faster computation and yields
 similar results (thanks to our graph topology).
 
 \vspace{0.5cm}
 
 \begin{center}
     \begin{tabular}{@{} l *5r @{}}
         \toprule
         \multicolumn{1}{c}{} &
             \textbf{MPH} &
             \textbf{BV Compress} &
             \textbf{BFS} &
             \textbf{Permute} &
             \textbf{Total} \\
         \midrule
         \texttt{dir\_to\_dir}
             & 4h36 & 50h & 4h44 & 12h38 & \textbf{72h} \\
         \texttt{dir\_to\_cnt}
             & 3h07 & 101h & 17h18 & 20h38 & \textbf{142h} \\
         \bottomrule
     \end{tabular}
 \end{center}
 
 \subsection{Memory usage}
 
 Memory usage monitoring during the compression process:
 
 \begin{center}
     \begin{tabular}{@{} l c @{}}
         \toprule
         \multicolumn{1}{c}{} &
             \textbf{Maximum resident set size} \\
         \midrule
         \texttt{rel\_to\_obj} & 11G \\
         \texttt{ori\_to\_snp} & 15G \\
         \texttt{dir\_to\_rev} & 22G \\
         \texttt{snp\_to\_obj} & 23G \\
         \texttt{rev\_to\_rev} & 86G \\
         \texttt{rev\_to\_dir} & 154G \\
         \texttt{dir\_to\_dir} & 345G \\
         \texttt{dir\_to\_cnt} & 764G \\
         \midrule
         Entire graph (estimated) & 1.4T \\
         \bottomrule
     \end{tabular}
 \end{center}
 
 \section{Entire graph compression}
 
 After studying feasibility on the individual datasets and estimating the final
 results, we assembled the entire graph into a single dataset and launched the
 compression process on it.
 
 \subsection{Results}
 
 Two different VM where used depending on the compression step:
 
 \begin{itemize}
     \item \textit{(monster)} 700GB of RAM and 72vCPU: for the BV compress step.
     \item \textit{(rioc)} 3TB of RAM and 48vCPU: all the other steps.
 \end{itemize}
 
 The reason to use monster instead of rioc for the BV compress step was because
 the I/O on rioc was too slow for the job to complete within the time limit
 allowed on the cluster.
 
 \begin{center}
     \begin{tabular}{@{} l *3r @{}}
         \toprule
         \multicolumn{1}{c}{} &
             \textbf{compr ratio} & \textbf{bit/edge} & \textbf{compr size} \\
         \midrule
         \texttt{all} & 0.158 & 4.913 & 101G \\
         \texttt{all-transposed} & 0.144 & 4.481 & 94G \\
         \bottomrule
     \end{tabular}
 \end{center}
 
 \subsection{Timings and max memory usage}
 
 \begin{center}
     \begin{tabular}{@{} r *2r @{}}
         \toprule
         \multicolumn{1}{c}{} &
             \textbf{Timings} & \textbf{Max mem usage} \\
         \midrule
         \texttt{MPH} & 3h30 & 10GB \\
         \texttt{BV Compress} & 103h & 16GB \\
         \texttt{BFS} & 10h & 1057GB \\
         \texttt{Permute} & 25h & 115GB \\
         \texttt{Stats} & 4h & 102GB \\
         \texttt{Transpose} & 22h & 19GB \\
         \midrule
         Total &
             \begin{tabular}{@{}r@{}}\textbf{168h} \\
             \scriptsize{(7 days)}\end{tabular} & \textbf{1TB} \\
         \bottomrule
     \end{tabular}
 \end{center}
 
 \end{document}
diff --git a/swh/graph/tests/dataset/.gitignore b/swh/graph/tests/dataset/.gitignore
index cf41cb1..531c841 100644
--- a/swh/graph/tests/dataset/.gitignore
+++ b/swh/graph/tests/dataset/.gitignore
@@ -1,4 +1,5 @@
-dockerfiles/
+docker/
 output/*-bv.*
 output/stderr
 output/stdout
+output/compression.log
diff --git a/swh/graph/tests/dataset/generate_graph.sh b/swh/graph/tests/dataset/generate_graph.sh
index e1a72ee..7b78d36 100755
--- a/swh/graph/tests/dataset/generate_graph.sh
+++ b/swh/graph/tests/dataset/generate_graph.sh
@@ -1,27 +1,23 @@
 #!/bin/bash
 
 # Clean previous run
-rm -rf dockerfiles output
+rm -rf docker/ output
 mkdir output
 
 # Build Docker work environment
 toplevel_dir=`git rev-parse --show-toplevel`
-mkdir -p dockerfiles
-cp -r $toplevel_dir/dockerfiles/ .
-docker build --tag swh-graph-test dockerfiles
+mkdir -p docker
+cp -r $toplevel_dir/docker/ .
+docker build --tag swh-graph-test docker
 
 # Setup input for compression script
 tr ' ' '\n' < example.edges.csv | sort -u > example.nodes.csv
 zstd < example.nodes.csv > example.edges.csv.zst
 zstd < example.edges.csv > example.nodes.csv.zst
 
-docker run                                          \
-    --user $(id -u):$(id -g)                        \
+docker run \
+    --user $(id -u):$(id -g) \
     --name swh-graph-test --rm --tty --interactive  \
-    --volume $(pwd):/input                          \
-    --volume $(pwd)/output:/output                  \
-    swh-graph-test:latest                           \
-    app/scripts/compress_graph.sh                   \
-      --lib lib/                                    \
-      --input /input/example                        \
-      --outdir /output
+    --volume $(pwd):/input --volume $(pwd)/output:/output \
+    swh-graph-test:latest \
+    swh graph compress --graph /input/example --outdir /output