diff --git a/common/modules/dataset.org b/common/modules/dataset.org
index 43e26fb..3f4e249 100644
--- a/common/modules/dataset.org
+++ b/common/modules/dataset.org
@@ -1,260 +1,261 @@
 #+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt)
 #+INCLUDE: "prelude.org" :minlevel 1
 
 * Open Datasets
   :PROPERTIES:
   :CUSTOM_ID: main
   :END:
 ** Software Heritage Graph dataset
    :PROPERTIES:
    :CUSTOM_ID: graphdataset
    :END:
    #+BEAMER: \vspace{-1mm}
 
    **Use case:** large scale analyses of the most comprehensive corpus on the
    development history of free/open source software.
 
 *** 
     #+BEGIN_EXPORT latex
     \vspace{-2mm}
     \begin{thebibliography}{Foo Bar, 1969}
     \small
     \bibitem{Pietri2019} Antoine Pietri, Diomidis Spinellis, Stefano Zacchiroli\newblock
     The Software Heritage Graph Dataset: Public software development under one roof\newblock
     MSR 2019: 16th Intl. Conf. on Mining Software Repositories. IEEE\newblock
     preprint: \url{http://deb.li/swhmsr19}
     \end{thebibliography}
     #+END_EXPORT
 
   #+BEAMER: \vspace{-1mm}
 *** Dataset
     - Relational representation of the full graph as a set of tables
     - Available as open data: https://doi.org/10.5281/zenodo.2583978
     - Chosen as subject for the *MSR 2020 Mining Challenge*
 
   #+BEAMER: \vspace{-1mm}
 *** Formats
     - Local use: PostgreSQL dumps, or Apache Parquet files (~1 TiB each)
     - Live usage: Amazon Athena (SQL-queriable), Azure Data Lake
 
 ** Sample query --- most frequent first commit words
    :PROPERTIES:
    :CUSTOM_ID: graphquery1
    :END:
 *** 
     #+begin_src sql
     SELECT COUNT(*) AS c, word FROM (
       SELECT LOWER(REGEXP_EXTRACT(FROM_UTF8(
       message), 'ˆ\w+')) AS word FROM revision)
     WHERE word != ''
     GROUP BY word ORDER BY COUNT(*) DESC LIMIT 5;
     #+end_src
   #+BEAMER: \pause
 *** 
     | *Count*    | *Word* |
     |------------+--------|
     | 71 338 310 | update |
     | 64 980 346 | merge  |
     | 56 854 372 | add    |
     | 44 971 954 | added  |
     | 33 222 056 | fix    |
 
 ** Sample query --- fork and merge arities
    :PROPERTIES:
    :CUSTOM_ID: graphquery2
    :END:
 *** Fork arity
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_COL: 0.5
     :END:
     i.e., how often is a commit based upon?
     #+BEAMER: \scriptsize
     #+begin_src sql
     SELECT fork_deg, count(*) FROM (
       SELECT id, count(*) AS fork_deg
       FROM revision_history GROUP BY id) t
     GROUP BY fork_deg ORDER BY fork_deg;
     #+end_src
     #+BEAMER: \includegraphics[width=\linewidth]{fork-degree}
   #+BEAMER: \pause
 *** Merge arity
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_COL: 0.5
     :END:
     i.e., how large are merges?
     #+BEAMER: \scriptsize
     #+begin_src sql
     SELECT merge_deg, COUNT(*) FROM (
       SELECT parent_id, COUNT(*) AS merge_deg
       FROM revision_history GROUP BY parent_id) t
     GROUP BY deg ORDER BY deg;
     #+end_src
     #+BEAMER: \includegraphics[width=\linewidth]{merge-degree}
 * Other queries
   :PROPERTIES:
   :CUSTOM_ID: morequery
   :END:
 ** Sample query --- ratio of commits performed during weekends
    :PROPERTIES:
    :CUSTOM_ID: weekendsrc
    :END:
    #+BEGIN_SRC sql
    WITH revision_date AS
      (SELECT FROM_UNIXTIME(date / 1000000) AS date
      FROM revision)
    SELECT yearly_rev.year AS year,
      CAST(yearly_weekend_rev.number AS DOUBLE)
      / yearly_rev.number * 100.0 AS weekend_pc
    FROM
      (SELECT YEAR(date) AS year, COUNT(*) AS number
      FROM revision_date
      WHERE YEAR(date) BETWEEN 1971 AND 2018
      GROUP BY  YEAR(date) ) AS yearly_rev
    JOIN
      (SELECT YEAR(date) AS year, COUNT(*) AS number
      FROM revision_date
      WHERE DAY_OF_WEEK(date) >= 6
          AND YEAR(date) BETWEEN 1971 AND 2018
      GROUP BY  YEAR(date) ) AS yearly_weekend_rev
      ON yearly_rev.year = yearly_weekend_rev.year
    ORDER BY  year DESC;
    #+END_SRC
 
 ** Sample query --- ratio of commits performed during weekends (cont.)
    :PROPERTIES:
    :CUSTOM_ID: weekendout
    :END:
    | *Year* | *Weekend* |   *Total* | *Weekend percentage* |
    |--------+-----------+-----------+----------------------|
    |   2018 |  15130065 |  78539158 |                19.26 |
    |   2017 |  33776451 | 168074276 |                20.09 |
    |   2016 |  43890325 | 209442130 |                20.95 |
    |   2015 |  35781159 | 166884920 |                21.44 |
    |   2014 |  24591048 | 122341275 |                20.10 |
    |   2013 |  17792778 |  88524430 |                20.09 |
    |   2012 |  12794430 |  64516008 |                19.83 |
    |   2011 |   9765190 |  48479321 |                20.14 |
    |   2010 |   7766348 |  38561515 |                20.14 |
    |   2009 |   6352253 |  31053219 |                20.45 |
    |   2008 |   4568373 |  22474882 |                20.32 |
    |   2007 |   3318881 |  16289632 |                20.37 |
    |   2006 |   2597142 |  12224905 |                21.24 |
    |   2005 |   2086697 |   9603804 |                21.72 |
    |   2004 |   1752400 |   7948104 |                22.04 |
    |   2003 |   1426033 |   6941593 |                20.54 |
    |   2002 |   1159294 |   5378538 |                21.55 |
    |   2001 |    849905 |   4098587 |                20.73 |
    |   2000 |   2091770 |   4338842 |                48.21 |
    |   1999 |    438540 |   2026906 |                21.63 |
    |   1998 |    311888 |   1430567 |                21.80 |
    |   1997 |    263995 |   1129249 |                23.37 |
    |   1996 |    192543 |    795827 |                24.19 |
    |   1995 |    176270 |    670417 |                26.29 |
    |   1994 |    137811 |    581563 |                23.69 |
    |   1993 |    169767 |    697343 |                24.34 |
    |   1992 |     74923 |    422068 |                17.75 |
    |   1991 |     92782 |    484547 |                19.14 |
    |   1990 |    113201 |    340489 |                33.24 |
    |   1989 |     31742 |    182325 |                17.40 |
    |   1988 |     44983 |    206275 |                21.80 |
    |   1987 |     27892 |    146157 |                19.08 |
    |   1986 |     54200 |    237330 |                22.83 |
    |   1985 |     75595 |    306564 |                24.65 |
    |   1984 |     26391 |     95506 |                27.63 |
    |   1983 |     89776 |    370687 |                24.21 |
    |   1982 |     51524 |    191933 |                26.84 |
    |   1981 |     32995 |    123618 |                26.69 |
    |   1980 |     31832 |    133733 |                23.80 |
    |   1979 |     20943 |    175164 |                11.95 |
    |   1978 |      3773 |     33677 |                11.20 |
    |   1977 |      4783 |     19376 |                24.68 |
    |   1976 |      1907 |      7048 |                27.05 |
    |   1975 |      2089 |     26579 |                 7.85 |
    |   1974 |      2095 |     14290 |                14.66 |
    |   1973 |      2988 |     15580 |                19.17 |
    |   1972 |      1755 |      6552 |                26.78 |
    |   1971 |      1723 |      6125 |                28.13 |
 
 ** Sample query --- average size of the most popular file types
    :PROPERTIES:
    :CUSTOM_ID: popfilesrc
    :END:
    #+BEGIN_SRC sql
    SELECT suffix,
      ROUND(COUNT(*) * 100 / 1e6) AS Million_files,
      ROUND(AVG(length) / 1024) AS Average_k_length
    FROM
      (SELECT length, suffix
      FROM  -- File length in joinable form
        (SELECT TO_BASE64(sha1_git) AS sha1_git64, length
        FROM content ) AS content_length
      JOIN  -- Sample of files with popular suffixes
      (SELECT target64, file_suffix_sample.suffix AS suffix
      FROM  -- Popular suffixes
        (SELECT suffix FROM (
 	 SELECT REGEXP_EXTRACT(FROM_UTF8(name),
 	  '\.[^.]+$') AS suffix
        FROM directory_entry_file) AS file_suffix
        GROUP BY  suffix
        ORDER BY  COUNT(*) DESC LIMIT 20 ) AS pop_suffix
      JOIN  -- Sample of files and suffixes
        (SELECT TO_BASE64(target) AS target64,
 	 REGEXP_EXTRACT(FROM_UTF8(name),
 	   '\.[^.]+$') AS suffix
        FROM directory_entry_file TABLESAMPLE BERNOULLI(1))
        AS file_suffix_sample
      ON file_suffix_sample.suffix = pop_suffix.suffix)
      AS pop_suffix_sample
      ON pop_suffix_sample.target64 = content_length.sha1_git64)
    GROUP BY suffix
    ORDER BY AVG(length) DESC;
    #+END_SRC
 
 * Discussion
   :PROPERTIES:
   :CUSTOM_ID: discussion
   :END:
 ** Discussion
    - one /can/ query such a corpus SQL-style
    - but relational representation shows its limits at this scale
      - ...at least as deployed on commercial SQL offerings such as Athena
      - note: (naive) sharding is ineffective, due to the pseudo-random
        distribution of node identifiers
    - experiments with Google BigQuery are ongoing
      - (we broke it at the first import attempt..., due to very large arrays in
        directory entry tables)
 
 * License Dataset
 ** Software Heritage License Blob Dataset
    :PROPERTIES:
    :CUSTOM_ID: licensedataset
    :END:
 
+  #+BEAMER: \vspace{-2mm}
 *** 
     #+BEGIN_EXPORT latex
     \vspace{-2mm}
     \begin{thebibliography}{Foo Bar, 1969}
     \footnotesize
     \bibitem{Zacchiroli2022LicenseBlobs} Stefano Zacchiroli \newblock
     A Large-scale Dataset of (Open Source) License Text Variants \newblock
     MSR 2022 (best dataset paper award)\newblock
     preprint: \url{https://arxiv.org/abs/2204.00256}
     \end{thebibliography}
     #+END_EXPORT
 
 *** Dataset
     #+BEAMER: \vspace{-1mm}
     - 6.5 million unique full texts of FOSS license variants
     - Detected using filename patterns across the entire SWH archive
       - =LICENSE=, =COPYRIGHT=, =NOTICE=, etc.
     - Metadata: file lengths measures, detected MIME type, detected SPDX
       license (via ScanCode), example origin repository, oldest public commit
       of origin
 
 *** Use cases
     #+BEAMER: \vspace{-1mm}
     - Empirical studies on FOSS licensing, including phylogenetics
     - Training of automated license classifiers
     - NLP analyses of legal texts
diff --git a/common/modules/graph-compression.org b/common/modules/graph-compression.org
index a6d79e5..e42d9ae 100644
--- a/common/modules/graph-compression.org
+++ b/common/modules/graph-compression.org
@@ -1,316 +1,345 @@
 #+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt)
 #+INCLUDE: "prelude.org" :minlevel 1
 
 # Depends: \usepackage{pdfpages}
 
 * Graph Compression
   :PROPERTIES:
   :CUSTOM_ID: main
   :END:
+** Graph compression on the Software Heritage archive              :noexport:
+   :PROPERTIES:
+   :CUSTOM_ID: oneslide
+   :END:
+*** 
+     #+BEGIN_EXPORT latex
+     \vspace{-3mm}
+     \footnotesize
+     \begin{thebibliography}{Foo Bar, 1969}
+     \bibitem{Boldi2020} Paolo Boldi, Antoine Pietri, Sebastiano Vigna, Stefano Zacchiroli
+     \newblock Ultra-Large-Scale Repository Analysis via Graph Compression
+     \newblock SANER 2020, 27th Intl. Conf. on Software Analysis, Evolution and Reengineering. IEEE
+     \end{thebibliography}
+     #+END_EXPORT
+  #+BEAMER: \vspace{-1mm}
+*** Research question
+    Is it possible to efficiently perform software development history analyses
+    at the scale of Software Heritage archive on a single, relatively cheap
+    machine?
+  #+BEAMER: \vspace{-1mm}
+*** Idea
+    Apply state-of-the-art graph compression techniques from the field of Web
+    graph / social network analysis.
+  #+BEAMER: \vspace{-1mm}
+*** Results
+    The entire archive graph (25 B nodes, 350 B edges) can be loaded in 200 GiB
+    and then traversed at the cost of tens of nanoseconds per edge (= a few
+    hours for a complete single-thread traversal of the archive).
+
 ** Graph compression on the Software Heritage archive
    :PROPERTIES:
    :CUSTOM_ID: intro
    :END:
 *** 
      #+BEGIN_EXPORT latex
      \vspace{-3mm}
      \begin{thebibliography}{Foo Bar, 1969}
      \bibitem{Boldi2020} Paolo Boldi, Antoine Pietri, Sebastiano Vigna, Stefano Zacchiroli
      \newblock Ultra-Large-Scale Repository Analysis via Graph Compression
      \newblock SANER 2020, 27th Intl. Conf. on Software Analysis, Evolution and Reengineering. IEEE
      \end{thebibliography}
      #+END_EXPORT
 *** Research question
     Is it possible to efficiently perform software development history analyses
     at ultra large scale (= the scale of Software Heritage archive or more), on
     a single, relatively cheap machine?
 *** Idea
     Apply state-of-the-art graph compression techniques from the field of Web
     graph / social network analysis.
 
 ** Background --- (Web) graph compression
    :PROPERTIES:
    :CUSTOM_ID: background1
    :END:
 *** The graph of the Web
     :PROPERTIES:
     :BEAMER_env: definition
     :END:
     Directed graph that has Web pages as nodes and hyperlinks between them as
     edges.
 *** Properties (1)
     - **Locality:** pages link to pages whose URLs are lexicographically
       similar. URLs share long common prefixes.
 
     → use *D-gap compression*
 *** Adjacency lists
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_col: 0.51
     :END:
     #+BEAMER: \scriptsize
     | *Node* | *Outdegree* | *Successors*                         |
     |--------+-------------+--------------------------------------|
     |    ... |         ... | ...                                  |
     |     15 |          11 | 13,15,16,17,18,19,23,24,203,315,1034 |
     |     16 |          10 | 15,16,17,22,23,24,315,316,317,3041   |
     |     17 |           0 |                                      |
     |     18 |           5 | 13,15,16,17,50                       |
     |    ... |         ... | ...                                  |
 *** D-gapped adjacency lists
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_col: 0.48
     :END:
     #+BEAMER: \scriptsize
     | *Node* | *Outdegree* | *Successors*                |
     |--------+-------------+-----------------------------|
     |    ... |         ... | ...                         |
     |     15 |          11 | 3,1,0,0,0,0,3,0,178,111,718 |
     |     16 |          10 | 1,0,0,4,0,0,290,0,0,2723    |
     |     17 |           0 |                             |
     |     18 |           5 | 9,1,0,0,32                  |
     |    ... |         ... | ...                         |
 
 ** Background --- (Web) graph compression (cont.)
    :PROPERTIES:
    :CUSTOM_ID: background2
    :END:
 *** The graph of the Web
     :PROPERTIES:
     :BEAMER_env: definition
     :END:
     Directed graph that has Web pages as nodes and hyperlinks between them as
     edges.
 *** Properties (2)
     - **Similarity:** pages that are close together in lexicographic order tend
       to have many common successors.
     → use *reference compression*
 *** Adjacency lists
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_col: 0.47
     :END:
     #+BEAMER: \scriptsize
     | *Node* | *Outd.* | *Successors*                         |
     |--------+---------+--------------------------------------|
     |    ... |     ... | ...                                  |
     |     15 |      11 | 13,15,16,17,18,19,23,24,203,315,1034 |
     |     16 |      10 | 15,16,17,22,23,24,315,316,317,3041   |
     |     17 |       0 |                                      |
     |     18 |       5 | 13,15,16,17,50                       |
     |    ... |     ... | ...                                  |
 *** Copy lists
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_col: 0.60
     :END:
     #+BEAMER: \scriptsize
     | *Node* | *Ref.* | *Copy list* | *Extra nodes*                        |
     |--------+--------+-------------+--------------------------------------|
     |    ... |    ... | ...         | ...                                  |
     |     15 |      0 |             | 13,15,16,17,18,19,23,24,203,315,1034 |
     |     16 |      1 | 01110011010 | 22,316,317,3041                      |
     |     17 |        |             |                                      |
     |     18 |      3 | 11110000000 | 50                                   |
     |    ... |    ... | ...         |                                      |
 
 ** Background --- Web graph compression (OLD)                      :noexport:
    Borrowing (great!) slides from:
    #+BEGIN_EXPORT latex
    \begin{thebibliography}{}
    \bibitem{Pibiri2018} Giulio Ermanno Pibiri
    \newblock Effective Web Graph Representations, 2018
    \newblock \url{http://pages.di.unipi.it/pibiri/slides/webgraphs\_compression.pdf}
    \end{thebibliography}
    #+END_EXPORT
 
 ** Background -- Web graph compression (imported slides) (OLD)     :noexport:
    :PROPERTIES:
    :BEAMER_env: ignoreheading
    :END:
    #+BEGIN_EXPORT latex
    {
    \setbeamercolor{background canvas}{bg=}
    \setbeamertemplate{background}{}
    \includepdf[pages={4,11,12,13}]{webgraphs_compression.pdf}
    \addtocounter{framenumber}{4}
    }
    #+END_EXPORT
 
 ** Corpus
    :PROPERTIES:
    :CUSTOM_ID: corpus
    :END:
 *** Nodes
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_col: 0.5
     :END:
     #+BEAMER: \small
     | *Node type* | *N. of nodes* |
     |-------------+---------------|
     | origins     | 88 M          |
     | snapshots   | 57 M          |
     | releases    | 9.9 M         |
     | revisions   | 1.1 B         |
     | directories | 4.9 B         |
     | contents    | 5.5 B         |
     |-------------+---------------|
     | Total nodes | 12 B          |
 *** Edges
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_col: 0.5
     :END:
     #+BEAMER: \footnotesize
     | *Edge type*           | *N. of edges* |
     |-----------------------+---------------|
     | origin → snapshot     | 195 M         |
     | snapshot → revision   | 616 M         |
     | snapshot → release    | 215 M         |
     | release → revision    | 9.9 M         |
     | revision → revision   | 1.2 B         |
     | revision → directory  | 1.1 B         |
     | directory → directory | 48 B          |
     | directory → revisiony | 482 M         |
     | directory → content   | 112 B         |
     |-----------------------+---------------|
     | Total edges           | 165 B         |
 *** 
     :PROPERTIES:
     :BEAMER_env: ignoreheading
     :END:
     #+BEAMER: \vspace{1mm}
     Stats for archive snapshot 2018-09-25, from the Software Heritage graph
     dataset. Growth rate: exponential, doubling every 22-30 months, cf.:
     #+BEGIN_EXPORT latex
     \begin{thebibliography}{Foo Bar, 1969}
     \footnotesize
     \bibitem{Rousseau2020} Roberto Di Cosmo, Guillaume Rousseau, Stefano Zacchiroli
     \newblock Software Provenance Tracking at the Scale of Public Source Code
     \newblock Empirical Software Engineering 25(4): 2930-2959 (2020)
     \end{thebibliography}
     #+END_EXPORT
 
 ** Graph compression pipeline
    :PROPERTIES:
    :CUSTOM_ID: pipeline
    :END:
     #+BEAMER: \hspace*{-0.1\linewidth} \includegraphics[width=1.2\linewidth]{compression/compression-steps}
     #+BEAMER: \vspace{-1cm}
 *** 
     - *MPH*: minimal perfect hash, mapping Merkle IDs to 0..N-1 integers
     - *BV* compress: Boldi-Vigna compression (based on MPH order)
     - *BFS*: breadth-first visit to renumber
     - *Permute*: update BV compression according to BFS order
 *** (Re)establishing locality
     - key for good compression is a node ordering that ensures locality and
       similarity
     - which is very much /not/ the case with Merkle IDs, ...but is the case
       /again/ after BFS reordering
 
 ** Compression experiment
    :PROPERTIES:
    :CUSTOM_ID: compexp
    :END:
    | *Step*      | *Wall time* (hours) |
    |-------------+---------------------|
    | MPH         |                   2 |
    | BV Compress |                  84 |
    | BFS         |                  19 |
    | Permute     |                  18 |
    | Transpose   |                  15 |
    |-------------+---------------------|
    | Total       |        138 (6 days) |
 
    - server equipped with 24 CPUs and 750 GB of RAM
    - RAM mostly used as I/O cache for the BFS step
    - /minimum/ memory requirements are close to the RAM needed to load the
      final compressed graph in memory
 
 ** Compression efficiency (space)
    :PROPERTIES:
    :CUSTOM_ID: spaceefficiency
    :END:
 *** 
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_col: 0.4
     :END:
     | *Forward  graph*  |        |
     | total size        | 91 GiB |
     | bits per edge     |   4.91 |
     | compression ratio |  15.8% |
 *** 
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_col: 0.4
     :END:
     | *Backward  graph* |        |
     | total size        | 83 GiB |
     | bits per edge     |   4.49 |
     | compression ratio |  14.4% |
 *** Operating cost
     The structure of a full bidirectional archive graph fits in less than 200
     GiB of RAM, for a hardware cost of ~300 USD.
 
 ** Compression efficiency (time)
    :PROPERTIES:
    :CUSTOM_ID: timeefficiency
    :END:
 *** Benchmark --- Full BFS visit (single thread)
     #+BEAMER: \begin{columns}\begin{column}{0.45\textwidth}
     | *Forward  graph* |                |
     |------------------+----------------|
     | wall time        | 1h48m          |
     | throughput       | 1.81 M nodes/s |
     |                  | (553 ns/node)  |
     #+BEAMER: \end{column}\begin{column}{0.45\textwidth}
     | *Backward graph* |                |
     |------------------+----------------|
     | wall time        | 3h17m          |
     | throughput       | 988 M nodes/s  |
     |                  | (1.01 µs/node) |
     #+BEAMER: \end{column}\end{columns}
 *** Benchmark --- Edge lookup
     random sample: 1 B nodes (8.3% of entire graph); then enumeration of all
     successors
     #+BEAMER: \begin{columns}\begin{column}{0.45\textwidth}
     | *Forward  graph* |                |
     |------------------+----------------|
     | visited edges    | 13.6 B         |
     | throughput       | 12.0 M edges/s |
     |                  | (83 ns/edge)   |
     #+BEAMER: \end{column}\begin{column}{0.45\textwidth}
     | *Backward graph* |                |
     |------------------+----------------|
     | visited edges    | 13.6 B         |
     | throughput       | 9.45 M edges/s |
     |                  | (106 ns/edge)  |
     #+BEAMER: \end{column}\end{columns}
 *** 
     :PROPERTIES:
     :BEAMER_env: ignoreheading
     :END:
     Note how edge lookup time is close to DRAM random access time (50-60 ns).
 
 ** Discussion
    :PROPERTIES:
    :CUSTOM_ID: discussion
    :END:
 *** Incrementality
     compression is *not incremental*, due to the use of contiguous integer
     ranges
     - but the graph is append-only, so...
     - ...based on expected graph growth rate it should be possible to
       pre-allocate enough free space in the integer ranges to support
       *amortized incrementality* (future work)
   #+BEAMER: \pause
 *** In-memory v. on-disk
     the compressed in-memory graph structure has *no attributes*
     - usual design is to exploit the 0..N-1 integer ranges to *memory map node
       attributes* to disk for efficient access
     - works well for queries that does graph traversal first and "join" node
       attributes last; ping-pong between the two is expensive
     - edge attributes are more problematic (work in progress)
diff --git a/common/modules/swh-fuse.org b/common/modules/swh-fuse.org
index 461ee9d..74f6bbf 100644
--- a/common/modules/swh-fuse.org
+++ b/common/modules/swh-fuse.org
@@ -1,114 +1,139 @@
 #+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt)
 #+INCLUDE: "prelude.org" :minlevel 1
 
 ** Software Heritage Filesystem (SwhFS)
   :PROPERTIES:
   :CUSTOM_ID: oneslide
   :END:
 *** 
     The *Software Heritage Filesystem (SwhFS)* is a user-space POSIX filesystem
     that enables browsing parts of the Software Heritage archive as if it were
     locally available.
 
 *** 
     - code: https://forge.softwareheritage.org/source/swh-fuse/
     - documentation: https://docs.softwareheritage.org/devel/swh-fuse/
 *** 
     #+BEGIN_EXPORT latex
     \begin{thebibliography}{Foo Bar, 1969}
     \bibitem{Allancon2021} Thibault Allançon, Antoine Pietri, Stefano Zacchiroli
     \newblock The Software Heritage Filesystem (SwhFS): Integrating Source Code Archival with Development
     \newblock ICSE 2021: The 43rd International Conference on Software Engineering
     \newblock \url{https://arxiv.org/abs/2102.06390}
     \end{thebibliography}
     #+END_EXPORT
 
+** Software Heritage Filesystem (SwhFS) --- Example                :noexport:
+  :PROPERTIES:
+  :CUSTOM_ID: examplemini
+  :END:
+*** 
+#+BEAMER: \footnotesize
+#+BEGIN_SRC
+$ mkdir swhfs
+$ swh fs mount swhfs/  # mount the archive
+$ cd swhfs/
+
+$ cat archive/swh:1:cnt:c839dea9e8e6f0528b468214348fee8669b305b2
+#include <stdio.h>
+
+int main(void) {
+    printf("Hello, World!\n");
+}
+
+$ cd archive/swh:1:dir:1fee702c7e6d14395bbf5ac3598e73bcbf97b030
+$ ls | wc -l
+127
+$ grep -i antenna THE_LUNAR_LANDING.s | cut -f 5
+# IS THE LR ANTENNA IN POSITION 1 YET
+# BRANCH IF ANTENNA ALREADY IN POSITION 1
+#+END_SRC
 * Software Heritage Filesystem (SwhFS) --- Tutorial
   :PROPERTIES:
   :CUSTOM_ID: tutorial
   :END:
 ** Software Heritage Filesystem (SwhFS) --- Tutorial
 *** 
   #+BEGIN_SRC
 $ pip install swh.fuse  # install SwhFS
 
 $ mkdir swhfs
 $ swh fs mount swhfs/  # mount the archive
 
 $ ls -1F swhfs/  # list entry points
 archive/  # <- start browsing from here
 cache/
 origin/
 README
 #+END_SRC
 
 ** Software Heritage Filesystem (SwhFS) --- Tutorial (cont.)
 *** 
   #+BEAMER: \footnotesize
   #+BEGIN_SRC
 $ cd swhfs/
 
 $ cat archive/swh:1:cnt:c839dea9e8e6f0528b468214348fee8669b305b2
 #include <stdio.h>
 
 int main(void) {
     printf("Hello, World!\n");
 }
 #+END_SRC
 
 ** Software Heritage Filesystem (SwhFS) --- Tutorial (cont.)
 *** 
   #+BEAMER: \footnotesize
   #+BEGIN_SRC
 $ cd archive/swh:1:dir:1fee702c7e6d14395bbf5ac3598e73bcbf97b030
 
 $ ls | wc -l
 127
 
 $ grep -i antenna THE_LUNAR_LANDING.s | cut -f 5
 # IS THE LR ANTENNA IN POSITION 1 YET
 # BRANCH IF ANTENNA ALREADY IN POSITION 1
 #+END_SRC
 
 ** Software Heritage Filesystem (SwhFS) --- Tutorial (cont.)
 *** 
   #+BEAMER: \footnotesize
   #+BEGIN_SRC
 $ cd archive/swh:1:rev:9d76c0b163675505d1a901e5fe5249a2c55609bc
 
 $ ls -F
 history/  meta.json@  parent@  parents/  root@
 
 $ jq '.author.name, .date, .message' meta.json
 "Michal Golebiowski-Owczarek"
 "2020-03-02T23:02:42+01:00"
 "Data:Event:Manipulation: Prevent collisions with Object.prototype ..."
 
 $ find root/src/ -type f -name '*.js' | xargs cat | wc -l
 10136
 #+END_SRC
 
 ** Software Heritage Filesystem (SwhFS) --- Tutorial (cont.)
 *** 
   #+BEAMER: \footnotesize
   #+BEGIN_SRC
 $ swh web search git-annex --limit 1
 ...
 git://git.joeyh.name/git-annex.git \
   https://archive.softwareheritage.org/api/1/origin/git://git.joeyh.name/git-annex.git/visits/
 ...
 
 $ swh web search git-annex --url-encode | cut -f 1
 git%3A%2F%2Fgit.joeyh.name%2Fgit-annex.git
 
 $ cd origin/git%3A%2F%2Fgit.joeyh.name%2Fgit-annex.git
 $ ls -F
 2020-12-19/
 
 $ ls 2020-12-19/snapshot/refs/heads/master/root/
 Annex/                COPYRIGHT           NEWS
 Annex.hs              Creds.hs            P2P/
 Assistant/            Crypto.hs           README
 Assistant.hs          Database/           Remote/
 Backend/              debian/             RemoteDaemon/
 #+END_SRC
diff --git a/talks-public/2022-09-28-ese-research/2022-09-28-ese-research.org b/talks-public/2022-09-28-ese-research/2022-09-28-ese-research.org
index a7eb802..1e45f9e 100644
--- a/talks-public/2022-09-28-ese-research/2022-09-28-ese-research.org
+++ b/talks-public/2022-09-28-ese-research/2022-09-28-ese-research.org
@@ -1,16 +1,30 @@
 #+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt)
 #+TITLE: Empirical Software Engineering Research with Software Heritage
 #+BEAMER_HEADER: \date[2022-09-28]{28 September 2022}
+#+BEAMER_HEADER: \title[Empirical Software Eng. Research with Software Heritage]{Empirical Software Engineering Research with Software Heritage}
 #+AUTHOR: Stefano Zacchiroli
 #+DATE: 28 September 2022
 #+EMAIL: stefano.zacchiroli@telecom-paris.fr
 
 #+INCLUDE: "../../common/modules/prelude-toc.org" :minlevel 1
 #+INCLUDE: "../../common/modules/169.org"
 #+BEAMER_HEADER: \institute[Télécom Paris]{Télécom Paris, Polytechnic Institute of Paris\\ {\tt stefano.zacchiroli@telecom-paris.fr}}
 #+BEAMER_HEADER: \author{Stefano Zacchiroli}
 
 * Datasets
-#+INCLUDE: "../../common/modules/dataset.org::#graphdataset"
-#+INCLUDE: "../../common/modules/dataset.org::#graphquery1"
-#+INCLUDE: "../../common/modules/dataset.org::#licensedataset"
+** Graph dataset
+#+INCLUDE: "../../common/modules/dataset.org::#graphdataset" :only-contents t
+** Graph dataset --- example
+#+INCLUDE: "../../common/modules/dataset.org::#graphquery1" :only-contents t
+** License dataset
+#+INCLUDE: "../../common/modules/dataset.org::#licensedataset" :only-contents t
+* Accessing source code artifacts
+** The Software Heritage Filesystem (SwhFS)
+#+INCLUDE: "../../common/modules/swh-fuse.org::#oneslide" :only-contents t
+** The Software Heritage Filesystem (SwhFS) --- example
+#+INCLUDE: "../../common/modules/swh-fuse.org::#examplemini" :only-contents t
+** Graph compression
+#+INCLUDE: "../../common/modules/graph-compression.org::#oneslide" :only-contents t
+* Software provenance and evolution
+* Software forks
+* Diversity, equity, and inclusion