diff --git a/common/modules/dataset.org b/common/modules/dataset.org
index 83f5188..43e26fb 100644
--- a/common/modules/dataset.org
+++ b/common/modules/dataset.org
@@ -1,227 +1,260 @@
 #+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt)
 #+INCLUDE: "prelude.org" :minlevel 1
 
 * Open Datasets
   :PROPERTIES:
   :CUSTOM_ID: main
   :END:
 ** Software Heritage Graph dataset
    :PROPERTIES:
    :CUSTOM_ID: graphdataset
    :END:
    #+BEAMER: \vspace{-1mm}
 
    **Use case:** large scale analyses of the most comprehensive corpus on the
    development history of free/open source software.
 
 *** 
     #+BEGIN_EXPORT latex
     \vspace{-2mm}
     \begin{thebibliography}{Foo Bar, 1969}
     \small
     \bibitem{Pietri2019} Antoine Pietri, Diomidis Spinellis, Stefano Zacchiroli\newblock
     The Software Heritage Graph Dataset: Public software development under one roof\newblock
     MSR 2019: 16th Intl. Conf. on Mining Software Repositories. IEEE\newblock
     preprint: \url{http://deb.li/swhmsr19}
     \end{thebibliography}
     #+END_EXPORT
 
   #+BEAMER: \vspace{-1mm}
 *** Dataset
     - Relational representation of the full graph as a set of tables
     - Available as open data: https://doi.org/10.5281/zenodo.2583978
     - Chosen as subject for the *MSR 2020 Mining Challenge*
 
   #+BEAMER: \vspace{-1mm}
 *** Formats
     - Local use: PostgreSQL dumps, or Apache Parquet files (~1 TiB each)
     - Live usage: Amazon Athena (SQL-queriable), Azure Data Lake
 
 ** Sample query --- most frequent first commit words
    :PROPERTIES:
    :CUSTOM_ID: graphquery1
    :END:
 *** 
     #+begin_src sql
     SELECT COUNT(*) AS c, word FROM (
       SELECT LOWER(REGEXP_EXTRACT(FROM_UTF8(
       message), 'ˆ\w+')) AS word FROM revision)
     WHERE word != ''
     GROUP BY word ORDER BY COUNT(*) DESC LIMIT 5;
     #+end_src
   #+BEAMER: \pause
 *** 
     | *Count*    | *Word* |
     |------------+--------|
     | 71 338 310 | update |
     | 64 980 346 | merge  |
     | 56 854 372 | add    |
     | 44 971 954 | added  |
     | 33 222 056 | fix    |
 
 ** Sample query --- fork and merge arities
    :PROPERTIES:
    :CUSTOM_ID: graphquery2
    :END:
 *** Fork arity
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_COL: 0.5
     :END:
     i.e., how often is a commit based upon?
     #+BEAMER: \scriptsize
     #+begin_src sql
     SELECT fork_deg, count(*) FROM (
       SELECT id, count(*) AS fork_deg
       FROM revision_history GROUP BY id) t
     GROUP BY fork_deg ORDER BY fork_deg;
     #+end_src
     #+BEAMER: \includegraphics[width=\linewidth]{fork-degree}
   #+BEAMER: \pause
 *** Merge arity
     :PROPERTIES:
     :BEAMER_env: block
     :BEAMER_COL: 0.5
     :END:
     i.e., how large are merges?
     #+BEAMER: \scriptsize
     #+begin_src sql
     SELECT merge_deg, COUNT(*) FROM (
       SELECT parent_id, COUNT(*) AS merge_deg
       FROM revision_history GROUP BY parent_id) t
     GROUP BY deg ORDER BY deg;
     #+end_src
     #+BEAMER: \includegraphics[width=\linewidth]{merge-degree}
 * Other queries
   :PROPERTIES:
   :CUSTOM_ID: morequery
   :END:
 ** Sample query --- ratio of commits performed during weekends
    :PROPERTIES:
    :CUSTOM_ID: weekendsrc
    :END:
    #+BEGIN_SRC sql
    WITH revision_date AS
      (SELECT FROM_UNIXTIME(date / 1000000) AS date
      FROM revision)
    SELECT yearly_rev.year AS year,
      CAST(yearly_weekend_rev.number AS DOUBLE)
      / yearly_rev.number * 100.0 AS weekend_pc
    FROM
      (SELECT YEAR(date) AS year, COUNT(*) AS number
      FROM revision_date
      WHERE YEAR(date) BETWEEN 1971 AND 2018
      GROUP BY  YEAR(date) ) AS yearly_rev
    JOIN
      (SELECT YEAR(date) AS year, COUNT(*) AS number
      FROM revision_date
      WHERE DAY_OF_WEEK(date) >= 6
          AND YEAR(date) BETWEEN 1971 AND 2018
      GROUP BY  YEAR(date) ) AS yearly_weekend_rev
      ON yearly_rev.year = yearly_weekend_rev.year
    ORDER BY  year DESC;
    #+END_SRC
 
 ** Sample query --- ratio of commits performed during weekends (cont.)
    :PROPERTIES:
    :CUSTOM_ID: weekendout
    :END:
    | *Year* | *Weekend* |   *Total* | *Weekend percentage* |
    |--------+-----------+-----------+----------------------|
    |   2018 |  15130065 |  78539158 |                19.26 |
    |   2017 |  33776451 | 168074276 |                20.09 |
    |   2016 |  43890325 | 209442130 |                20.95 |
    |   2015 |  35781159 | 166884920 |                21.44 |
    |   2014 |  24591048 | 122341275 |                20.10 |
    |   2013 |  17792778 |  88524430 |                20.09 |
    |   2012 |  12794430 |  64516008 |                19.83 |
    |   2011 |   9765190 |  48479321 |                20.14 |
    |   2010 |   7766348 |  38561515 |                20.14 |
    |   2009 |   6352253 |  31053219 |                20.45 |
    |   2008 |   4568373 |  22474882 |                20.32 |
    |   2007 |   3318881 |  16289632 |                20.37 |
    |   2006 |   2597142 |  12224905 |                21.24 |
    |   2005 |   2086697 |   9603804 |                21.72 |
    |   2004 |   1752400 |   7948104 |                22.04 |
    |   2003 |   1426033 |   6941593 |                20.54 |
    |   2002 |   1159294 |   5378538 |                21.55 |
    |   2001 |    849905 |   4098587 |                20.73 |
    |   2000 |   2091770 |   4338842 |                48.21 |
    |   1999 |    438540 |   2026906 |                21.63 |
    |   1998 |    311888 |   1430567 |                21.80 |
    |   1997 |    263995 |   1129249 |                23.37 |
    |   1996 |    192543 |    795827 |                24.19 |
    |   1995 |    176270 |    670417 |                26.29 |
    |   1994 |    137811 |    581563 |                23.69 |
    |   1993 |    169767 |    697343 |                24.34 |
    |   1992 |     74923 |    422068 |                17.75 |
    |   1991 |     92782 |    484547 |                19.14 |
    |   1990 |    113201 |    340489 |                33.24 |
    |   1989 |     31742 |    182325 |                17.40 |
    |   1988 |     44983 |    206275 |                21.80 |
    |   1987 |     27892 |    146157 |                19.08 |
    |   1986 |     54200 |    237330 |                22.83 |
    |   1985 |     75595 |    306564 |                24.65 |
    |   1984 |     26391 |     95506 |                27.63 |
    |   1983 |     89776 |    370687 |                24.21 |
    |   1982 |     51524 |    191933 |                26.84 |
    |   1981 |     32995 |    123618 |                26.69 |
    |   1980 |     31832 |    133733 |                23.80 |
    |   1979 |     20943 |    175164 |                11.95 |
    |   1978 |      3773 |     33677 |                11.20 |
    |   1977 |      4783 |     19376 |                24.68 |
    |   1976 |      1907 |      7048 |                27.05 |
    |   1975 |      2089 |     26579 |                 7.85 |
    |   1974 |      2095 |     14290 |                14.66 |
    |   1973 |      2988 |     15580 |                19.17 |
    |   1972 |      1755 |      6552 |                26.78 |
    |   1971 |      1723 |      6125 |                28.13 |
 
 ** Sample query --- average size of the most popular file types
    :PROPERTIES:
    :CUSTOM_ID: popfilesrc
    :END:
    #+BEGIN_SRC sql
    SELECT suffix,
      ROUND(COUNT(*) * 100 / 1e6) AS Million_files,
      ROUND(AVG(length) / 1024) AS Average_k_length
    FROM
      (SELECT length, suffix
      FROM  -- File length in joinable form
        (SELECT TO_BASE64(sha1_git) AS sha1_git64, length
        FROM content ) AS content_length
      JOIN  -- Sample of files with popular suffixes
      (SELECT target64, file_suffix_sample.suffix AS suffix
      FROM  -- Popular suffixes
        (SELECT suffix FROM (
 	 SELECT REGEXP_EXTRACT(FROM_UTF8(name),
 	  '\.[^.]+$') AS suffix
        FROM directory_entry_file) AS file_suffix
        GROUP BY  suffix
        ORDER BY  COUNT(*) DESC LIMIT 20 ) AS pop_suffix
      JOIN  -- Sample of files and suffixes
        (SELECT TO_BASE64(target) AS target64,
 	 REGEXP_EXTRACT(FROM_UTF8(name),
 	   '\.[^.]+$') AS suffix
        FROM directory_entry_file TABLESAMPLE BERNOULLI(1))
        AS file_suffix_sample
      ON file_suffix_sample.suffix = pop_suffix.suffix)
      AS pop_suffix_sample
      ON pop_suffix_sample.target64 = content_length.sha1_git64)
    GROUP BY suffix
    ORDER BY AVG(length) DESC;
    #+END_SRC
 
 * Discussion
   :PROPERTIES:
   :CUSTOM_ID: discussion
   :END:
 ** Discussion
    - one /can/ query such a corpus SQL-style
    - but relational representation shows its limits at this scale
      - ...at least as deployed on commercial SQL offerings such as Athena
      - note: (naive) sharding is ineffective, due to the pseudo-random
        distribution of node identifiers
    - experiments with Google BigQuery are ongoing
      - (we broke it at the first import attempt..., due to very large arrays in
        directory entry tables)
+
+* License Dataset
+** Software Heritage License Blob Dataset
+   :PROPERTIES:
+   :CUSTOM_ID: licensedataset
+   :END:
+
+*** 
+    #+BEGIN_EXPORT latex
+    \vspace{-2mm}
+    \begin{thebibliography}{Foo Bar, 1969}
+    \footnotesize
+    \bibitem{Zacchiroli2022LicenseBlobs} Stefano Zacchiroli \newblock
+    A Large-scale Dataset of (Open Source) License Text Variants \newblock
+    MSR 2022 (best dataset paper award)\newblock
+    preprint: \url{https://arxiv.org/abs/2204.00256}
+    \end{thebibliography}
+    #+END_EXPORT
+
+*** Dataset
+    #+BEAMER: \vspace{-1mm}
+    - 6.5 million unique full texts of FOSS license variants
+    - Detected using filename patterns across the entire SWH archive
+      - =LICENSE=, =COPYRIGHT=, =NOTICE=, etc.
+    - Metadata: file lengths measures, detected MIME type, detected SPDX
+      license (via ScanCode), example origin repository, oldest public commit
+      of origin
+
+*** Use cases
+    #+BEAMER: \vspace{-1mm}
+    - Empirical studies on FOSS licensing, including phylogenetics
+    - Training of automated license classifiers
+    - NLP analyses of legal texts
diff --git a/talks-public/2022-09-28-ese-research/2022-09-28-ese-research.org b/talks-public/2022-09-28-ese-research/2022-09-28-ese-research.org
new file mode 100644
index 0000000..a7eb802
--- /dev/null
+++ b/talks-public/2022-09-28-ese-research/2022-09-28-ese-research.org
@@ -0,0 +1,16 @@
+#+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt)
+#+TITLE: Empirical Software Engineering Research with Software Heritage
+#+BEAMER_HEADER: \date[2022-09-28]{28 September 2022}
+#+AUTHOR: Stefano Zacchiroli
+#+DATE: 28 September 2022
+#+EMAIL: stefano.zacchiroli@telecom-paris.fr
+
+#+INCLUDE: "../../common/modules/prelude-toc.org" :minlevel 1
+#+INCLUDE: "../../common/modules/169.org"
+#+BEAMER_HEADER: \institute[Télécom Paris]{Télécom Paris, Polytechnic Institute of Paris\\ {\tt stefano.zacchiroli@telecom-paris.fr}}
+#+BEAMER_HEADER: \author{Stefano Zacchiroli}
+
+* Datasets
+#+INCLUDE: "../../common/modules/dataset.org::#graphdataset"
+#+INCLUDE: "../../common/modules/dataset.org::#graphquery1"
+#+INCLUDE: "../../common/modules/dataset.org::#licensedataset"
diff --git a/talks-public/2022-09-28-ese-research/Makefile b/talks-public/2022-09-28-ese-research/Makefile
new file mode 100644
index 0000000..68fbee7
--- /dev/null
+++ b/talks-public/2022-09-28-ese-research/Makefile
@@ -0,0 +1 @@
+include ../Makefile.slides