diff --git a/common/modules/dataset.org b/common/modules/dataset.org
index 6bb3d90..32525a1 100644
--- a/common/modules/dataset.org
+++ b/common/modules/dataset.org
@@ -1,214 +1,227 @@
 #+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt)
 #+INCLUDE: "prelude.org" :minlevel 1
 
 * Open Datasets
   :PROPERTIES:
   :CUSTOM_ID: main
   :END:
 ** Software Heritage Graph dataset
    :PROPERTIES:
    :CUSTOM_ID: graphdataset
    :END:
    #+BEAMER: \vspace{-1mm}
 
    **Use case:** large scale analyses of the most comprehensive corpus on the
    development history of free/open source software.
 
 *** 
     #+BEGIN_EXPORT latex
     \vspace{-2mm}
     \begin{thebibliography}{Foo Bar, 1969}
     \small
     \bibitem{Pietri2019} Antoine Pietri, Diomidis Spinellis, Stefano Zacchiroli\newblock
     The Software Heritage Graph Dataset: Public software development under one roof\newblock
     MSR 2019: 16th Intl. Conf. on Mining Software Repositories. IEEE\newblock
     preprint: \url{http://deb.li/swhmsr19}
     \end{thebibliography}
     #+END_EXPORT
 
   #+BEAMER: \vspace{-1mm}
 *** Dataset
     - Relational representation of the full graph as a set of tables
     - Available as open data: https://doi.org/10.5281/zenodo.2583978
     - Chosen as subject for the *MSR 2020 Mining Challenge*
 
   #+BEAMER: \vspace{-1mm}
 *** Formats
     - Local use: PostgreSQL dumps, or Apache Parquet files (~1 TiB each)
     - Live usage: Amazon Athena (SQL-queriable), Azure Data Lake (soon)
 
 ** Sample query --- most frequent first commit words
    :PROPERTIES:
    :CUSTOM_ID: graphquery1
    :END:
 *** 
     #+begin_src sql
     SELECT COUNT(*) AS c, word FROM (
       SELECT LOWER(REGEXP_EXTRACT(FROM_UTF8(
       message), 'ˆ\w+')) AS word FROM revision)
     WHERE word != ''
     GROUP BY word ORDER BY COUNT(*) DESC LIMIT 5;
     #+end_src
   #+BEAMER: \pause
 *** 
     | *Count*    | *Word* |
     |------------+--------|
     | 71 338 310 | update |
     | 64 980 346 | merge  |
     | 56 854 372 | add    |
     | 44 971 954 | added  |
     | 33 222 056 | fix    |
 
 ** Sample query --- fork and merge arities
    :PROPERTIES:
    :CUSTOM_ID: graphquery2
    :END:
 *** Fork arity
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_COL: 0.5
     :END:
     i.e., how often is a commit based upon?
     #+BEAMER: \scriptsize
     #+begin_src sql
     SELECT fork_deg, count(*) FROM (
       SELECT id, count(*) AS fork_deg
       FROM revision_history GROUP BY id) t
     GROUP BY fork_deg ORDER BY fork_deg;
     #+end_src
     #+BEAMER: \includegraphics[width=\linewidth]{fork-degree}
   #+BEAMER: \pause
 *** Merge arity
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_COL: 0.5
     :END:
     i.e., how large are merges?
     #+BEAMER: \scriptsize
     #+begin_src sql
     SELECT merge_deg, COUNT(*) FROM (
       SELECT parent_id, COUNT(*) AS merge_deg
       FROM revision_history GROUP BY parent_id) t
     GROUP BY deg ORDER BY deg;
     #+end_src
     #+BEAMER: \includegraphics[width=\linewidth]{merge-degree}
 * Other queries
   :PROPERTIES:
   :CUSTOM_ID: morequery
   :END:
 ** Sample query --- ratio of commits performed during weekends
    :PROPERTIES:
    :CUSTOM_ID: weekendsrc
    :END:
    #+BEGIN_SRC sql
    WITH revision_date AS
      (SELECT FROM_UNIXTIME(date / 1000000) AS date
      FROM revision)
    SELECT yearly_rev.year AS year,
      CAST(yearly_weekend_rev.number AS DOUBLE)
      / yearly_rev.number * 100.0 AS weekend_pc
    FROM
      (SELECT YEAR(date) AS year, COUNT(*) AS number
      FROM revision_date
      WHERE YEAR(date) BETWEEN 1971 AND 2018
      GROUP BY  YEAR(date) ) AS yearly_rev
    JOIN
      (SELECT YEAR(date) AS year, COUNT(*) AS number
      FROM revision_date
      WHERE DAY_OF_WEEK(date) >= 6
          AND YEAR(date) BETWEEN 1971 AND 2018
      GROUP BY  YEAR(date) ) AS yearly_weekend_rev
      ON yearly_rev.year = yearly_weekend_rev.year
    ORDER BY  year DESC;
    #+END_SRC
 
 ** Sample query --- ratio of commits performed during weekends (cont.)
    :PROPERTIES:
    :CUSTOM_ID: weekendout
    :END:
    | *Year* | *Weekend* |   *Total* | *Weekend percentage* |
    |--------+-----------+-----------+----------------------|
    |   2018 |  15130065 |  78539158 |                19.26 |
    |   2017 |  33776451 | 168074276 |                20.09 |
    |   2016 |  43890325 | 209442130 |                20.95 |
    |   2015 |  35781159 | 166884920 |                21.44 |
    |   2014 |  24591048 | 122341275 |                20.10 |
    |   2013 |  17792778 |  88524430 |                20.09 |
    |   2012 |  12794430 |  64516008 |                19.83 |
    |   2011 |   9765190 |  48479321 |                20.14 |
    |   2010 |   7766348 |  38561515 |                20.14 |
    |   2009 |   6352253 |  31053219 |                20.45 |
    |   2008 |   4568373 |  22474882 |                20.32 |
    |   2007 |   3318881 |  16289632 |                20.37 |
    |   2006 |   2597142 |  12224905 |                21.24 |
    |   2005 |   2086697 |   9603804 |                21.72 |
    |   2004 |   1752400 |   7948104 |                22.04 |
    |   2003 |   1426033 |   6941593 |                20.54 |
    |   2002 |   1159294 |   5378538 |                21.55 |
    |   2001 |    849905 |   4098587 |                20.73 |
    |   2000 |   2091770 |   4338842 |                48.21 |
    |   1999 |    438540 |   2026906 |                21.63 |
    |   1998 |    311888 |   1430567 |                21.80 |
    |   1997 |    263995 |   1129249 |                23.37 |
    |   1996 |    192543 |    795827 |                24.19 |
    |   1995 |    176270 |    670417 |                26.29 |
    |   1994 |    137811 |    581563 |                23.69 |
    |   1993 |    169767 |    697343 |                24.34 |
    |   1992 |     74923 |    422068 |                17.75 |
    |   1991 |     92782 |    484547 |                19.14 |
    |   1990 |    113201 |    340489 |                33.24 |
    |   1989 |     31742 |    182325 |                17.40 |
    |   1988 |     44983 |    206275 |                21.80 |
    |   1987 |     27892 |    146157 |                19.08 |
    |   1986 |     54200 |    237330 |                22.83 |
    |   1985 |     75595 |    306564 |                24.65 |
    |   1984 |     26391 |     95506 |                27.63 |
    |   1983 |     89776 |    370687 |                24.21 |
    |   1982 |     51524 |    191933 |                26.84 |
    |   1981 |     32995 |    123618 |                26.69 |
    |   1980 |     31832 |    133733 |                23.80 |
    |   1979 |     20943 |    175164 |                11.95 |
    |   1978 |      3773 |     33677 |                11.20 |
    |   1977 |      4783 |     19376 |                24.68 |
    |   1976 |      1907 |      7048 |                27.05 |
    |   1975 |      2089 |     26579 |                 7.85 |
    |   1974 |      2095 |     14290 |                14.66 |
    |   1973 |      2988 |     15580 |                19.17 |
    |   1972 |      1755 |      6552 |                26.78 |
    |   1971 |      1723 |      6125 |                28.13 |
 
 ** Sample query --- average size of the most popular file types
    :PROPERTIES:
    :CUSTOM_ID: popfilesrc
    :END:
    #+BEGIN_SRC sql
    SELECT suffix,
      ROUND(COUNT(*) * 100 / 1e6) AS Million_files,
      ROUND(AVG(length) / 1024) AS Average_k_length
    FROM
      (SELECT length, suffix
      FROM  -- File length in joinable form
        (SELECT TO_BASE64(sha1_git) AS sha1_git64, length
        FROM content ) AS content_length
      JOIN  -- Sample of files with popular suffixes
      (SELECT target64, file_suffix_sample.suffix AS suffix
      FROM  -- Popular suffixes
        (SELECT suffix FROM (
 	 SELECT REGEXP_EXTRACT(FROM_UTF8(name),
 	  '\.[^.]+$') AS suffix
        FROM directory_entry_file) AS file_suffix
        GROUP BY  suffix
        ORDER BY  COUNT(*) DESC LIMIT 20 ) AS pop_suffix
      JOIN  -- Sample of files and suffixes
        (SELECT TO_BASE64(target) AS target64,
 	 REGEXP_EXTRACT(FROM_UTF8(name),
 	   '\.[^.]+$') AS suffix
        FROM directory_entry_file TABLESAMPLE BERNOULLI(1))
        AS file_suffix_sample
      ON file_suffix_sample.suffix = pop_suffix.suffix)
      AS pop_suffix_sample
      ON pop_suffix_sample.target64 = content_length.sha1_git64)
    GROUP BY suffix
    ORDER BY AVG(length) DESC;
    #+END_SRC
 
+* Discussion
+  :PROPERTIES:
+  :CUSTOM_ID: discussion
+  :END:
+** Discussion
+   - one /can/ query such a corpus SQL-style
+   - but relational representation shows its limits at this scale
+     - ...at least as deployed on commercial SQL offerings such as Athena
+     - note: (naive) sharding is ineffective, due to the pseudo-random
+       distribution of node identifiers
+   - experiments with Google BigQuery are ongoing
+     - (we broke it at the first import attempt..., due to very large arrays in
+       directory entry tables)
diff --git a/common/modules/foss-commons.org b/common/modules/foss-commons.org
index 3ca554b..002f37a 100644
--- a/common/modules/foss-commons.org
+++ b/common/modules/foss-commons.org
@@ -1,67 +1,85 @@
 #+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt)
 #+INCLUDE: "prelude.org" :minlevel 1
 * The Free Software Commons
   :PROPERTIES:
   :CUSTOM_ID: main
   :END:
 ** Free Software
-*** Definition (Free Software)
+   :PROPERTIES:
+   :CUSTOM_ID: freeswdef
+   :END:
+*** [#B] Free Software
+    :PROPERTIES:
+    :BEAMER_env: definition
+    :END:
     A program is *free software* if the program's users have the four
     /essential freedoms/:
     - Freedom #0, to *run* the program, for any purpose
     - Freedom #1, to *study* how the program works, and change it
     - Freedom #2, to *redistribute* copies
     - Freedom #3, to *improve* the program, and *release* improvements
 *** 							    :B_ignoreheading:
     :PROPERTIES:
     :BEAMER_env: ignoreheading
     :BEAMER_ref:
     :END:
     Free Software also comes with *obligations*, which vary according to the
     license: BSD, GPL, Apache, AGPL, . . .
 ** Why bother?
+   :PROPERTIES:
+   :CUSTOM_ID: whybother
+   :END:
    Why, as computer scientists/teachers/students, should we bother about
    Free/Open Source Software (FOSS)?
    #+BEAMER: \pause \vfill
    FOSS has /radically changed/ the way software is:
    - developed
    - tested
    - proven
    - conceived
    - marketed
    - sold
    - maintained
    - taught
    - deployed
    - ...
 ** The Commons and FOSS
    :PROPERTIES:
    :CUSTOM_ID: commonsdef
    :END:
 *** Definition (Commons)
+    :PROPERTIES:
+    :CUSTOM_ID: commonsdef1
+    :END:
     The *commons* is the cultural and natural resources accessible to all
     members of a society, including natural materials such as air, water, and a
     habitable earth. These resources are held in common, not owned privately.
     #+BEAMER: {\tiny\url{https://en.wikipedia.org/wiki/Commons}}
 *** Definition (Software Commons)
+    :PROPERTIES:
+    :CUSTOM_ID: commonsdef2
+    :END:
     The *software commons* consists of all computer software which is available
     at little or no cost and which can be altered and reused with few
     restrictions. Thus /all open source software and all free software are part
     of the [software] commons/. [...]
     #+BEAMER: {\tiny\url{https://en.wikipedia.org/wiki/Software_Commons}}
 ** But /where/ is this commons?
+   :PROPERTIES:
+   :CUSTOM_ID: wherefoss
+   :END:
    #+latex: \begin{flushleft}
    #+ATTR_LATEX: :width \extblockscale{.5\linewidth}
    file:myriadsources.png
    #+latex: \end{flushleft}
    #+BEAMER: \pause
 *** Fashion victims
     - many disparate development platforms
     - a myriad places where distribution may happen
     - projects tend to migrate from one place to the other over time
     #+BEAMER: \pause
 *** One place...                                                    :B_block:
     :PROPERTIES:
     :BEAMER_env: block
     :END:
     \hfill ... where can we find, track and search /all/ source code?
diff --git a/common/modules/graph-compression.org b/common/modules/graph-compression.org
index c59fd5e..8c2d452 100644
--- a/common/modules/graph-compression.org
+++ b/common/modules/graph-compression.org
@@ -1,279 +1,306 @@
 #+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt)
 #+INCLUDE: "prelude.org" :minlevel 1
 
 # Depends: \usepackage{pdfpages}
 
 * Graph Compression
   :PROPERTIES:
   :CUSTOM_ID: main
   :END:
 ** Graph compression on the Software Heritage archive
+   :PROPERTIES:
+   :CUSTOM_ID: intro
+   :END:
 *** 
      #+BEGIN_EXPORT latex
      \vspace{-3mm}
      \begin{thebibliography}{Foo Bar, 1969}
      \bibitem{Boldi2020} Paolo Boldi, Antoine Pietri, Sebastiano Vigna, Stefano Zacchiroli
      \newblock Ultra-Large-Scale Repository Analysis via Graph Compression
      \newblock SANER 2020, 27th Intl. Conf. on Software Analysis, Evolution and Reengineering. IEEE
      \end{thebibliography}
      #+END_EXPORT
 *** Research question
     Is it possible to efficiently perform software development history analyses
     at ultra large scale (= the scale of Software Heritage archive or more), on
     a single, relatively cheap machine?
 *** Idea
     Apply state-of-the-art graph compression techniques from the field of Web
     graph / social network analysis.
 
 ** Background --- (Web) graph compression
+   :PROPERTIES:
+   :CUSTOM_ID: background1
+   :END:
 *** The graph of the Web
     :PROPERTIES:
     :BEAMER_env: definition
     :END:
     Directed graph that has Web pages as nodes and hyperlinks between them as
     edges.
 *** Properties (1)
     - **Locality:** pages links to pages whose URLs are lexicographically
       similar. URLs share long common prefixes.
 
     → use *D-gap compression*
 *** Adjacency lists
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_col: 0.51
     :END:
     #+BEAMER: \scriptsize
     | *Node* | *Outdegree* | *Successors*                         |
     |--------+-------------+--------------------------------------|
     |    ... |         ... | ...                                  |
     |     15 |          11 | 13,15,16,17,18,19,23,24,203,315,1034 |
     |     16 |          10 | 15,16,17,22,23,24,315,316,317,3041   |
     |     17 |           0 |                                      |
     |     18 |           5 | 13,15,16,17,50                       |
     |    ... |         ... | ...                                  |
 *** D-gapped adjacency lists
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_col: 0.48
     :END:
     #+BEAMER: \scriptsize
     | *Node* | *Outdegree* | *Successors*                |
     |--------+-------------+-----------------------------|
     |    ... |         ... | ...                         |
     |     15 |          11 | 3,1,0,0,0,0,3,0,178,111,718 |
     |     16 |          10 | 1,0,0,4,0,0,290,0,0,2723    |
     |     17 |           0 |                             |
     |     18 |           5 | 9,1,0,0,32                  |
     |    ... |         ... | ...                         |
 
 ** Background --- (Web) graph compression (cont.)
+   :PROPERTIES:
+   :CUSTOM_ID: background2
+   :END:
 *** The graph of the Web
     :PROPERTIES:
     :BEAMER_env: definition
     :END:
     Directed graph that has Web pages as nodes and hyperlinks between them as
     edges.
 *** Properties (2)
     - **Similarity:** pages that are close together in lexicographic order tend
       to have many common successors.
     → use *reference compression*
 *** Adjacency lists
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_col: 0.47
     :END:
     #+BEAMER: \scriptsize
     | *Node* | *Outd.* | *Successors*                         |
     |--------+---------+--------------------------------------|
     |    ... |     ... | ...                                  |
     |     15 |      11 | 13,15,16,17,18,19,23,24,203,315,1034 |
     |     16 |      10 | 15,16,17,22,23,24,315,316,317,3041   |
     |     17 |       0 |                                      |
     |     18 |       5 | 13,15,16,17,50                       |
     |    ... |     ... | ...                                  |
 *** Copy lists
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_col: 0.60
     :END:
     #+BEAMER: \scriptsize
     | *Node* | *Ref.* | *Copy list* | *Extra nodes*                        |
     |--------+--------+-------------+--------------------------------------|
     |    ... |    ... | ...         | ...                                  |
     |     15 |      0 |             | 13,15,16,17,18,19,23,24,203,315,1034 |
     |     16 |      1 | 01110011010 | 22,316,317,3041                      |
     |     17 |        |             |                                      |
     |     18 |      3 | 11110000000 | 50                                   |
     |    ... |    ... | ...         |                                      |
 
 ** Background --- Web graph compression (OLD)                      :noexport:
    Borrowing (great!) slides from:
    #+BEGIN_EXPORT latex
    \begin{thebibliography}{}
    \bibitem{Pibiri2018} Giulio Ermanno Pibiri
    \newblock Effective Web Graph Representations, 2018
    \newblock \url{http://pages.di.unipi.it/pibiri/slides/webgraphs\_compression.pdf}
    \end{thebibliography}
    #+END_EXPORT
 
 ** Background -- Web graph compression (imported slides) (OLD)     :noexport:
    :PROPERTIES:
    :BEAMER_env: ignoreheading
    :END:
    #+BEGIN_EXPORT latex
    {
    \setbeamercolor{background canvas}{bg=}
    \setbeamertemplate{background}{}
    \includepdf[pages={4,11,12,13}]{webgraphs_compression.pdf}
    \addtocounter{framenumber}{4}
    }
    #+END_EXPORT
 
 ** Corpus
+   :PROPERTIES:
+   :CUSTOM_ID: corpus
+   :END:
 *** Nodes
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_col: 0.5
     :END:
     | *Node type* | *N. of nodes* |
     |-------------+---------------|
     | origins     | 88 M          |
     | snapshots   | 57 M          |
     | releases    | 9.9 M         |
     | revisions   | 1.1 B         |
     | directories | 4.9 B         |
     | contents    | 5.5 B         |
     |-------------+---------------|
     | Total nodes | 12 B          |
 *** Edges
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_col: 0.5
     :END:
     | *Edge type*           | *N. of edges* |
     |-----------------------+---------------|
     | origin → snapshot     | 195 M         |
     | snapshot → revision   | 616 M         |
     | snapshot → release    | 215 M         |
     | release → revision    | 9.9 M         |
     | revision → revision   | 1.2 B         |
     | revision → directory  | 1.1 B         |
     | directory → directory | 48 B          |
     | directory → revisiony | 482 M         |
     | directory → content   | 112 B         |
     |-----------------------+---------------|
     | Total edges           | 165 B         |
 *** 
     :PROPERTIES:
     :BEAMER_env: ignoreheading
     :END:
     Archive snapshot 2018-09-25, from the Software Heritage graph dataset.\\
     Growth rate: exponential, doubling every 22-30 months (Rousseau, Di Cosmo,
     Zacchiroli; ESE 2020, to appear).
 
 ** Graph compression pipeline
+   :PROPERTIES:
+   :CUSTOM_ID: pipeline
+   :END:
     #+BEAMER: \hspace*{-0.1\linewidth} \includegraphics[width=1.2\linewidth]{compression/compression-steps}
     #+BEAMER: \vspace{-1cm}
 *** 
     - *MPH*: minimal perfect hash, mapping Merkle IDs to 0..N-1 integers
     - *BV* compress: Boldi-Vigna compression (based on MPH order)
     - *BFS*: breadth-first visit to renumber
     - *Permute*: update BV compression according to BFS order
 *** (Re)establishing locality
     - key for good compression is a node ordering that ensures locality and
       similarity
     - which is very much /not/ the case with Merkle IDs, ...but is the case
       /again/ after BFS reordering
 
 ** Compression experiment
+   :PROPERTIES:
+   :CUSTOM_ID: compexp
+   :END:
    | *Step*      | *Wall time* (hours) |
    |-------------+---------------------|
    | MPH         |                   2 |
    | BV Compress |                  84 |
    | BFS         |                  19 |
    | Permute     |                  18 |
    | Transpose   |                  15 |
    |-------------+---------------------|
    | Total       |        138 (6 days) |
 
    - server equipped with 24 CPUs and 750 GB of RAM
    - RAM mostly used as I/O cache for the BFS step
    - /minimum/ memory requirements are close to the RAM needed to load the
      final compressed graph in memory
 
 ** Compression efficiency (space)
+   :PROPERTIES:
+   :CUSTOM_ID: spaceefficiency
+   :END:
 *** 
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_col: 0.4
     :END:
     | *Forward  graph*  |        |
     | total size        | 91 GiB |
     | bits per edge     |   4.91 |
     | compression ratio |  15.8% |
 *** 
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_col: 0.4
     :END:
     | *Backward  graph* |        |
     | total size        | 83 GiB |
     | bits per edge     |   4.49 |
     | compression ratio |  14.4% |
 *** Operating cost
     The structure of a full bidirectional archive graph fits in less than 200
     GiB of RAM, for a hardware cost of ~300 USD.
 
 ** Compression efficiency (time)
+   :PROPERTIES:
+   :CUSTOM_ID: timeefficiency
+   :END:
 *** Benchmark --- Full BFS visit (single thread)
     #+BEAMER: \begin{columns}\begin{column}{0.45\textwidth}
     | *Forward  graph* |                |
     |------------------+----------------|
     | wall time        | 1h48m          |
     | throughput       | 1.81 M nodes/s |
     |                  | (553 ns/node)  |
     #+BEAMER: \end{column}\begin{column}{0.45\textwidth}
     | *Backward graph* |                |
     |------------------+----------------|
     | wall time        | 3h17m          |
     | throughput       | 988 M nodes/s  |
     |                  | (1.01 µs/node) |
     #+BEAMER: \end{column}\end{columns}
 *** Benchmark --- Edge lookup
     random sample: 1 B nodes (8.3% of entire graph); then enumeration of all
     successors
     #+BEAMER: \begin{columns}\begin{column}{0.45\textwidth}
     | *Forward  graph* |                |
     |------------------+----------------|
     | visited edges    | 13.6 B         |
     | throughput       | 12.0 M edges/s |
     |                  | (83 ns/edge)   |
     #+BEAMER: \end{column}\begin{column}{0.45\textwidth}
     | *Backward graph* |                |
     |------------------+----------------|
     | visited edges    | 13.6 B         |
     | throughput       | 9.45 M edges/s |
     |                  | (106 ns/edge)  |
     #+BEAMER: \end{column}\end{columns}
 *** 
     :PROPERTIES:
     :BEAMER_env: ignoreheading
     :END:
     Note how edge lookup time is close to DRAM random access time (50-60 ns).
 
 ** Discussion
+   :PROPERTIES:
+   :CUSTOM_ID: discussion
+   :END:
 *** Incrementality
     compression is *not incremental*, due to the use of contiguous integer
     ranges
     - but the graph is append-only, so...
     - ...based on expected graph growth rate it should be possible to
       pre-allocate enough free space in the integer ranges to support
       *amortized incrementality* (future work)
   #+BEAMER: \pause
 *** In-memory v. on-disk
     the compressed in-memory graph structure has *no attributes*
     - usual design is to exploit the 0..N-1 integer ranges to *memory map node
       attributes* to disk for efficient access
     - works well for queries that does graph traversal first and "join" node
       attributes last; ping-pong between the two is expensive
     - edge attributes are more problematic
diff --git a/talks-public/2020-07-03-soheal/2020-07-03-soheal.org b/talks-public/2020-07-03-soheal/2020-07-03-soheal.org
new file mode 100644
index 0000000..01d2e84
--- /dev/null
+++ b/talks-public/2020-07-03-soheal/2020-07-03-soheal.org
@@ -0,0 +1,237 @@
+#+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt)
+#+TITLE: Global Software Health
+#+SUBTITLE: an Unified View of how our Software Commons is Doing
+#+BEAMER_HEADER: \date[3 July 2020, SoHeal]{3 July 2020\\SoHeal 2020\\ (via conf call)\\[-2ex]}
+#+AUTHOR: Stefano Zacchiroli
+#+DATE: 3 July 2020
+#+EMAIL: zack@upsilon.cc
+
+#+INCLUDE: "../../common/modules/prelude-toc.org" :minlevel 1
+#+INCLUDE: "../../common/modules/169.org"
+#+BEAMER_HEADER: \institute[UParis \& Inria]{Université de Paris \& Inria --- {\tt zack@upsilon.cc, @zacchiro}}
+#+BEAMER_HEADER: \author{Stefano Zacchiroli}
+
+# Required by graph-compression.org module
+#+LATEX_HEADER_EXTRA: \usepackage{pdfpages}
+
+# Syntax highlighting setup
+#+LATEX_HEADER_EXTRA: \usepackage{minted}
+#+LaTeX_HEADER_EXTRA: \usemintedstyle{tango}
+#+LaTeX_HEADER_EXTRA: \newminted{sql}{fontsize=\scriptsize}
+#+name: setup-minted
+#+begin_src emacs-lisp :exports results :results silent
+   (setq org-latex-listings 'minted)
+   (setq org-latex-minted-options
+         '(("fontsize" "\\scriptsize")))
+   (setq org-latex-to-pdf-process
+         '("pdflatex -shell-escape -interaction nonstopmode -output-directory %o %f"
+           "pdflatex -shell-escape -interaction nonstopmode -output-directory %o %f"
+           "pdflatex -shell-escape -interaction nonstopmode -output-directory %o %f"))
+#+end_src
+# End syntax highlighting setup
+
+* Software Health
+** Software Health
+  #+BEAMER: \pause
+*** Software Health
+    :PROPERTIES:
+    :BEAMER_env: definition
+    :END:
+    One of the hardest research fields to search the Web for.
+*** Proof (empirical, trivial)
+    :PROPERTIES:
+    :BEAMER_env: proof
+    :END:
+    Exhibit: https://www.google.com/search?q=software+health
+***                                                         :B_ignoreheading:
+    :PROPERTIES:
+    :BEAMER_env: ignoreheading
+    :END:
+
+  #+BEAMER: \vfill \pause
+  More seriously…
+*** 
+    The SoHeal community has pioneered the exploration of the notion of
+    *Software Health*.
+
+    By now we have evidence of interest in several /dimensions/ of the notion,
+    we have /tools & techniques/ that are routinely used to explore them, and
+    we have been doing that at various /scopes/.
+
+** Software Health --- dimensions
+*** /What/ are we looking at
+    Several *dimensions* have been explored thus far, e.g.:
+    - software evolution and "liveliness"
+    - quality (cf. SoHeal 2019 keynote by Jesus M. Gonzalez-Barahona)
+    - community
+      - both static structure
+      - and dynamics over time
+    (non-exhaustive list)
+
+** Software Health --- tools & techniques
+*** /How/ we are exploring the topic
+    - classic software evolution & MSR techniques
+    - quantitative analysis (stats !)
+    - qualitative analysis
+      - e.g., interviews, ethnography, Delphi method
+    - community metrics & their standardization (cf. CHAOSS)
+    - raising awareness in relevant communities: FOSS + scholars
+    the SoHeal workshop series!
+
+** Software Health --- scope
+*** How /far/ are we looking
+    1. a single project
+    2. a set of inter-dependent projects
+       - e.g., a specific framework with plugins, a software stack, etc.
+       - also a community of contributors working on said projects
+    3. an ecosystem
+       - e.g., Debian, PyPI, NPM, etc.
+  #+BEAMER: \pause
+*** Going further
+    - can we go further in terms of software health scope? how far?
+    - is there a meaningful notion of *"global software health"*?
+    - if there is, which the *tools* can we use to explore global software
+      health?
+    - if they exist and are practical, what is the *current status* of global
+      software health?
+
+* Software Commons
+** Free Software
+   (I know you all know this, but bear with me. I pinky promise it's gonna be
+   useful!)
+*** Free Software
+    :PROPERTIES:
+    :BEAMER_env: definition
+    :END:
+    A program is *free software* if the program's users have the four
+    /essential freedoms/:
+    - Freedom #0, to *run* the program, for any purpose
+    - Freedom #1, to *study* how the program works, and change it
+    - Freedom #2, to *redistribute* copies
+    - Freedom #3, to *improve* the program, and *release* improvements
+*** ChangeLog                                               :B_ignoreheading:
+    :PROPERTIES:
+    :BEAMER_env: ignoreheading
+    :END:
+    by the Free Software Foundation\\
+    ChangeLog: 2-freedom version: 1986, 3-freedom: 1990; 4-freedom: early 90s
+** Software Commons
+   #+INCLUDE: "../../common/modules/foss-commons.org::#commonsdef1"
+   #+BEAMER: \pause
+   #+INCLUDE: "../../common/modules/foss-commons.org::#commonsdef2"
+** Global Software Health
+*** Proposition #1
+    The full extent of our shared software commons is the ultimate scope for
+    software health.\\
+    #+BEAMER: \centering \Large
+    global software health = software health + software commons
+*** Global Software Health (tentative)
+    :PROPERTIES:
+    :BEAMER_env: definition
+    :END:
+    The investigation of *software health* at the scale of the entire *software
+    commons*.
+  #+BEAMER: \pause \vfill
+*** Proposition #2
+    As a starting point for global software health analysis, we need the
+    equivalent of ancient world libraries, i.e., *great libraries of software
+    artifacts*, that encompass the software commons as much as possible.
+*** Great library options                                   :B_ignoreheading:
+    :PROPERTIES:
+    :BEAMER_env: ignoreheading
+    :END:
+    - GHTorrent
+    - World of Code
+    - Software Heritage (← my focus for the rest of this talk)
+
+* Software Heritage
+** Software Heritage in a nutshell \hfill [[https://softwareheritage.org][softwareheritage.org]]
+   #+INCLUDE: "../../common/modules/swh-goals-oneslide-vertical.org::#goals" :only-contents t :minlevel 3
+** An international, non profit initiative\hfill built for the long term
+  :PROPERTIES:
+  :CUSTOM_ID: support
+  :END:
+*** Sharing the vision                                              :B_block:
+  :PROPERTIES:
+  :CUSTOM_ID: endorsement
+  :BEAMER_COL: .5
+  :BEAMER_env: block
+  :END:
+   #+LATEX: \begin{center}{\includegraphics[width=\extblockscale{.4\linewidth}]{unesco_logo_en_285}}\end{center}
+   #+LATEX: \vspace{-0.8cm}
+   #+LATEX: \begin{center}\vskip 1em \includegraphics[width=\extblockscale{1.4\linewidth}]{support.pdf}\end{center}
+   #+latex:\mbox{}~~~~~~~\tiny\url{www.softwareheritage.org/support/testimonials}
+*** Donors, members, sponsors                                       :B_block:
+    :PROPERTIES:
+    :CUSTOM_ID: sponsors
+    :BEAMER_COL: .5
+    :BEAMER_env: block
+    :END:
+   #+LATEX: \begin{center}\includegraphics[width=\extblockscale{.4\linewidth}]{inria-logo-new}\end{center}
+   #+LATEX: \begin{center}
+   #+LATEX: \colorbox{white}{\includegraphics[width=\extblockscale{1.4\linewidth}]{sponsors.pdf}}
+   #+latex:\mbox{}~~~~~~~\tiny\url{www.softwareheritage.org/support/sponsors}
+   #+LATEX: \end{center}
+** Status                                                   :B_ignoreheading:
+   :PROPERTIES:
+   :BEAMER_env: ignoreheading
+   :END:
+#+INCLUDE: "../../common/modules/status-extended.org::#archivinggoals" :minlevel 2
+#+INCLUDE: "../../common/modules/status-extended.org::#architecture" :minlevel 2 :only-contents t
+#+INCLUDE: "../../common/modules/status-extended.org::#merkletree" :minlevel 2
+#+INCLUDE: "../../common/modules/status-extended.org::#datamodel" :minlevel 2 :only-contents t
+#+INCLUDE: "../../common/modules/status-extended.org::#dagdetailsmall" :minlevel 2 :only-contents t
+#+INCLUDE: "../../common/modules/status-extended.org::#archive" :minlevel 2
+* Exploring the Software Commons
+** Early days
+*** 
+    - We are in the *early days* of full-scale explorations of the entire
+      software commons, for both /software health/ and other research or
+      practical needs.
+    - We are also not yet *capable* of performing analyses at such scale, due
+      to a lack of /resources/ (including time!) and/or appropriate /tools/ and
+      /techniques/.
+***                                                         :B_ignoreheading:
+    :PROPERTIES:
+    :BEAMER_env: ignoreheading
+    :END:
+    In the following I'll review some related work:
+    - a large-scale *dataset* encompassing a decent chunk of the software commons
+    - a *technique* to exploit such dataset /on a budget/
+    - a long-term exploration of the *growth rate* of the software commons
+
+  #+INCLUDE: "../../common/modules/dataset.org::#main" :minlevel 2 :only-contents t
+  #+INCLUDE: "../../common/modules/dataset.org::#morequery" :minlevel 2 :only-contents t
+  #+INCLUDE: "../../common/modules/dataset.org::#discussion" :minlevel 2 :only-contents t
+
+  #+INCLUDE: "../../common/modules/graph-compression.org::#intro" :minlevel 2
+  #+INCLUDE: "../../common/modules/graph-compression.org::#background1" :minlevel 2
+  #+INCLUDE: "../../common/modules/graph-compression.org::#background2" :minlevel 2
+  #+INCLUDE: "../../common/modules/graph-compression.org::#pipeline" :minlevel 2
+  #+INCLUDE: "../../common/modules/graph-compression.org::#compexp" :minlevel 2
+  #+INCLUDE: "../../common/modules/graph-compression.org::#spaceefficiency" :minlevel 2
+  #+INCLUDE: "../../common/modules/graph-compression.org::#timeefficiency" :minlevel 2
+  #+INCLUDE: "../../common/modules/graph-compression.org::#discussion" :minlevel 2
+
+  #+INCLUDE: "this/original-content-growth.org::#oneslide" :minlevel 2
+
+* Conclusion
+** Wrapping up
+*** 
+    - the notion of *software health* is shaping up nicely, with several
+      dimensions to it and more and more established tools and techniques
+    - *global software health*, i.e., the study of software health at the scale
+      of the full software commons is an open challenge that requires
+      exhaustive code libraries, tools, and techniques
+    - *Software Heritage* is one such library, containing a significant span of
+      the software commons; tools and techniques to analyze it are now badly
+      needed
+    - meanwhile, the *software commons* seems to be doing well in terms of
+      *growth*; let's dig it further to assess its health!
+*** Contacts
+    [[https://upsilon.cc/~zack/][Stefano Zacchiroli]] / [[mailto:zack@upsilon.cc][zack@upsilon.cc]] / [[https://twitter.com/zacchiro][@zacchiro]] / [[https://mastodon.xyz/@zacchiro][@zacchiro@mastodon.xyz]]
+
+* Appendix                                                       :B_appendix:
+  :PROPERTIES:
+  :BEAMER_env: appendix
+  :END:
diff --git a/talks-public/2020-07-03-soheal/Makefile b/talks-public/2020-07-03-soheal/Makefile
new file mode 100644
index 0000000..68fbee7
--- /dev/null
+++ b/talks-public/2020-07-03-soheal/Makefile
@@ -0,0 +1 @@
+include ../Makefile.slides
diff --git a/talks-public/2020-07-03-soheal/this/original-content-growth.org b/talks-public/2020-07-03-soheal/this/original-content-growth.org
new file mode 100644
index 0000000..58c7cac
--- /dev/null
+++ b/talks-public/2020-07-03-soheal/this/original-content-growth.org
@@ -0,0 +1,33 @@
+* Original content growth
+** Original content growth
+   :PROPERTIES:
+   :CUSTOM_ID: oneslide
+   :END:
+   #+BEAMER: \vspace{-1mm} \includegraphics[width=\textwidth]{revision_content_growth_wide}
+*** strut                                                   :B_ignoreheading:
+    :PROPERTIES:
+    :BEAMER_env: ignoreheading
+    :END:
+    #+BEAMER: \vspace{-5mm}
+*** 
+    - *50 years of software commons* history. 50 M projects, 4 B blobs, 1 B
+      commits (Software Heritage snapshot, Feb 2018)
+    - */original/ artifacts* explored over time, after deduplication
+    - evidence of *exponential growth*: original commits doubles every 30
+      months; blobs every 22 months; original blobs /per commit/ doubles every
+      7 years
+*** strut                                                   :B_ignoreheading:
+    :PROPERTIES:
+    :BEAMER_env: ignoreheading
+    :END:
+    #+BEAMER: \vspace{-1mm}
+*** 
+    #+BEGIN_EXPORT latex
+    \vspace{-3mm}
+    \begin{thebibliography}{Foo Bar, 1969}
+    \footnotesize
+    \bibitem{Rousseau2020} Roberto Di Cosmo, Guillaume Rousseau, Stefano Zacchiroli
+    \newblock Software Provenance Tracking at the Scale of Public Source Code
+    \newblock Empirical Software Engineering, 2020
+    \end{thebibliography}
+    #+END_EXPORT