diff --git a/common/images/Rlogo.pdf b/common/images/Rlogo.pdf
new file mode 100644
index 0000000..60fb87a
Binary files /dev/null and b/common/images/Rlogo.pdf differ
diff --git a/common/images/Rlogo.svg b/common/images/Rlogo.svg
new file mode 100644
index 0000000..78281f7
--- /dev/null
+++ b/common/images/Rlogo.svg
@@ -0,0 +1,14 @@
+
diff --git a/common/images/mhh-sweden.3-190x300.jpg b/common/images/mhh-sweden.3-190x300.jpg
new file mode 100644
index 0000000..44bd269
Binary files /dev/null and b/common/images/mhh-sweden.3-190x300.jpg differ
diff --git a/common/modules/prelude-toc.org b/common/modules/prelude-toc.org
index ea21dcf..9df83cd 100644
--- a/common/modules/prelude-toc.org
+++ b/common/modules/prelude-toc.org
@@ -1,99 +1,98 @@
#+BEAMER_HEADER: \titlegraphic{\includegraphics[width=\extblockscale{0.7\textwidth}]{SWH-logo+motto}}
#+STARTUP: hidestars
# activate org-beamer-mode minor mode automatically
#+STARTUP: beamer
# org export options
#+LANGUAGE: en
#+OPTIONS: H:2 num:t toc:nil \n:nil @:t ::t |:t ^:t -:t f:t *:t <:t
#+OPTIONS: TeX:t LaTeX:t skip:nil d:nil todo:t pri:nil tags:not-in-toc
#+EXPORT_SELECT_TAGS: export
#+EXPORT_EXCLUDE_TAGS: noexport
#+LINK_UP:
#+LINK_HOME:
#+LaTeX_CLASS: beamer
#+LaTeX_CLASS_OPTIONS: [presentation,xcolor=table]
#
# important font choice!
#
#+LaTeX_HEADER: \usepackage{libertine}
#
# Let's move that logo...
#
#+LaTeX_HEADER: \usepackage{animate}
#+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt)
# have the theme desired
#+latex_header: \mode{\usetheme{swh} \beamertemplatenavigationsymbolsempty \setbeamertemplate{navigation symbols}{} \setbeamertemplate{headline}{}
#+latex_header: \setbeamertemplate{footline}
#+latex_header: {
#+latex_header: \leavevmode%
#+latex_header: \hbox{%
#+latex_header: \begin{beamercolorbox}[wd=.5\paperwidth,ht=2.25ex,dp=1ex,center]{author in head/foot}%
#+latex_header: \usebeamerfont{author in head/foot}\insertshortauthor%~~\beamer@ifempty{\insertshortinstitute}{}{(\insertshortinstitute)}
#+latex_header: \end{beamercolorbox}%
#+latex_header: \begin{beamercolorbox}[wd=.5\paperwidth,ht=2.25ex,dp=1ex,right]{title in head/foot}%
#+latex_header: \usebeamerfont{title in head/foot}\insertshorttitle{}\hspace*{2em}
#+latex_header: \usebeamerfont{date in head/foot}\insertshortdate{}\hspace*{2em}
#+latex_header: \insertframenumber{} / \inserttotalframenumber\hspace*{2ex}
#+latex_header: \end{beamercolorbox}}%
#+latex_header: \vskip0pt%
#+latex_header: }
#+latex_header: }
# some color
#+latex_header: \rowcolors[]{1}{blue!10}{blue!05}
#
# to have a toc for each section
#
# Use suggestions from http://web.stanford.edu/~dgleich/notebook/2009/05/appendix_slides_in_beamer_cont_1.html to avoid counting tocs in page number
#
#+latex_header: \AtBeginSection[] {\begin{frame}<*> \frametitle{Outline} \tableofcontents[currentsection]\end{frame} \addtocounter{framenumber}{-1}}
# set the paths for images
#+latex_header: \graphicspath{%
#+latex_header: {../../common/images/}{../../common/logos/}%
#+latex_header: {pics/}{../images/}{../../images/}{../pics/}{../../pics/}%
#+latex_header: {../figures/}{../../figures/}{../logos/}{../../logos/}{../../../logos/}%
#+latex_header: {../../communication/web/graphics/carousel/}%
#+latex_header: {../../communication/web/graphics/pictos/png/400x400/}%
#+latex_header: }
# some default information I did not find how to set this in org-mode
-#+latex_header: \institute[Irill/INRIA/UPD]{\url{roberto@dicosmo.org}}
# to add the picblock macro
#+latex_header: \usepackage{extblocks}
#+latex_header: \usepackage{pgfpages}
#+latex_header: \usepackage{animate}
#+latex_header: \usepackage{alltt}
#
# Itemize in multiple columns
#
#+latex_header: \usepackage{multicol}
#
# Requires
#
# http://www-ljk.imag.fr/membres/Jerome.Lelong/latex/appendixnumberbeamer.sty
#+latex_header: \usepackage{appendixnumberbeamer}
#
# Colors, color boxes
#
#+latex_header: \usepackage{color}
#+latex_header: \usepackage{soul}
# http://tex.stackexchange.com/questions/41683/why-is-it-that-coloring-in-soul-in-beamer-is-not-visible
#+latex_header: \makeatletter
#+latex_header: \newcommand\SoulColor{%
#+latex_header: \let\set@color\beamerorig@set@color
#+latex_header: \let\reset@color\beamerorig@reset@color}
#+latex_header: \makeatother
#+latex_header: \SoulColor
#+LATEX_HEADER: \usepackage{listings}
#+LATEX_HEADER: \usepackage{forcebeamermode}
diff --git a/common/modules/prelude.org b/common/modules/prelude.org
index 8bda5de..8eb0903 100644
--- a/common/modules/prelude.org
+++ b/common/modules/prelude.org
@@ -1,92 +1,91 @@
#+BEAMER_HEADER: \titlegraphic{\includegraphics[width=\extblockscale{0.7\textwidth}]{SWH-logo+motto}}
#+STARTUP: hidestars
# activate org-beamer-mode minor mode automatically
#+STARTUP: beamer
# org export options
#+LANGUAGE: en
#+OPTIONS: H:2 num:t toc:nil \n:nil @:t ::t |:t ^:t -:t f:t *:t <:t
#+OPTIONS: TeX:t LaTeX:t skip:nil d:nil todo:t pri:nil tags:not-in-toc
#+EXPORT_SELECT_TAGS: export
#+EXPORT_EXCLUDE_TAGS: noexport
#+LINK_UP:
#+LINK_HOME:
#+LaTeX_CLASS: beamer
#+LaTeX_CLASS_OPTIONS: [presentation,xcolor=table]
#
# important font choice!
#
#+LaTeX_HEADER: \usepackage{libertine}
#
# Let's move that logo...
#
#+LaTeX_HEADER: \usepackage{animate}
#+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt)
# have the theme desired
#+latex_header: \mode{\usetheme{swh} \beamertemplatenavigationsymbolsempty \setbeamertemplate{navigation symbols}{} \setbeamertemplate{headline}{}
#+latex_header: \setbeamertemplate{footline}
#+latex_header: {
#+latex_header: \leavevmode%
#+latex_header: \hbox{%
#+latex_header: \begin{beamercolorbox}[wd=.5\paperwidth,ht=2.25ex,dp=1ex,center]{author in head/foot}%
#+latex_header: \usebeamerfont{author in head/foot}\insertshortauthor%~~\beamer@ifempty{\insertshortinstitute}{}{(\insertshortinstitute)}
#+latex_header: \end{beamercolorbox}%
#+latex_header: \begin{beamercolorbox}[wd=.5\paperwidth,ht=2.25ex,dp=1ex,right]{title in head/foot}%
#+latex_header: \usebeamerfont{title in head/foot}\insertshorttitle{}\hspace*{2em}
#+latex_header: \usebeamerfont{date in head/foot}\insertshortdate{}\hspace*{2em}
#+latex_header: \insertframenumber{} / \inserttotalframenumber\hspace*{2ex}
#+latex_header: \end{beamercolorbox}}%
#+latex_header: \vskip0pt%
#+latex_header: }
#+latex_header: }
# some color
#+latex_header: \rowcolors[]{1}{blue!10}{blue!05}
# set the paths for images
#+latex_header: \graphicspath{%
#+latex_header: {../../common/images/}{../../common/logos/}%
#+latex_header: {pics/}{../images/}{../../images/}{../pics/}{../../pics/}%
#+latex_header: {../figures/}{../../figures/}{../logos/}{../../logos/}{../../../logos/}%
#+latex_header: {../../communication/web/graphics/carousel/}%
#+latex_header: {../../communication/web/graphics/pictos/png/400x400/}%
#+latex_header: }
# some default information I did not find how to set this in org-mode
-#+latex_header: \institute[Irill/INRIA/UPD]{\url{roberto@dicosmo.org}}
# to add the picblock macro
#+latex_header: \usepackage{extblocks}
#+latex_header: \usepackage{pgfpages}
#+latex_header: \usepackage{animate}
#+latex_header: \usepackage{alltt}
#
# Itemize in multiple columns
#
#+latex_header: \usepackage{multicol}
#
# Requires
#
# http://www-ljk.imag.fr/membres/Jerome.Lelong/latex/appendixnumberbeamer.sty
#+latex_header: \usepackage{appendixnumberbeamer}
#
# Colors, color boxes
#
#+latex_header: \usepackage{color}
#+latex_header: \usepackage{soul}
# http://tex.stackexchange.com/questions/41683/why-is-it-that-coloring-in-soul-in-beamer-is-not-visible
#+latex_header: \makeatletter
#+latex_header: \newcommand\SoulColor{%
#+latex_header: \let\set@color\beamerorig@set@color
#+latex_header: \let\reset@color\beamerorig@reset@color}
#+latex_header: \makeatother
#+latex_header: \SoulColor
#+LATEX_HEADER: \usepackage{listings}
#+LATEX_HEADER: \usepackage{forcebeamermode}
diff --git a/common/modules/status-extended.org b/common/modules/status-extended.org
index 624dc06..54d1435 100644
--- a/common/modules/status-extended.org
+++ b/common/modules/status-extended.org
@@ -1,459 +1,461 @@
#+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt)
#+INCLUDE: "prelude.org" :minlevel 1
# not to be included as a whole, just pick individual slides as you see fit
* Status
:PROPERTIES:
:CUSTOM_ID: main
:END:
** The people
:PROPERTIES:
:CUSTOM_ID: people
:END:
*** The core team :B_picblock:
:PROPERTIES:
:CUSTOM_ID: core-team-formal
:BEAMER_env: picblock
:BEAMER_opt: pic=team,width=.4\linewidth
:END:
- Roberto Di Cosmo
- Stefano Zacchiroli
- Nicolas Dandrimont (Engineer)
- Antoine Dumont (Engineer)
# - and /Jordi, Quentin and Guillaume/
*** Scientific advisors
- Serge Abiteboul (French Science Academy)
- Jean-François Abramatic (former W3C director)
- Gerard Berry (CNRS Gold Medal, French Science Academy)
- Julia Lawall (Coccinelle, Linux Kernel, Outreachy)
** Archive coverage --- archive.softwareheritage.org
:PROPERTIES:
:CUSTOM_ID: archive
:END:
#+BEAMER: \vspace{-1mm}
#+BEAMER: \begin{center}\includegraphics[width=\extblockscale{1.1\linewidth}]{2019-01-archive-growth.png}\end{center}
#+BEAMER: \vspace{-2mm}
***
#+BEAMER: \includegraphics[width=0.19\linewidth]{coverage/github} \hfill
#+BEAMER: \includegraphics[width=0.2\linewidth]{coverage/debian} \hfill
#+BEAMER: \includegraphics[width=0.2\linewidth]{coverage/gitlab} \hfill
#+BEAMER: \includegraphics[width=0.2\linewidth]{coverage/googlecode} \\
#+BEAMER: \includegraphics[width=0.2\linewidth]{coverage/gitorious} \hfill
#+BEAMER: \includegraphics[width=0.15\linewidth]{coverage/gnu} \hfill
#+BEAMER: \includegraphics[width=0.13\linewidth]{coverage/hal} \hfill
#+BEAMER: \includegraphics[width=0.16\linewidth]{coverage/inria} \hfill
#+BEAMER: \includegraphics[width=0.13\linewidth]{coverage/pypi}
#+BEAMER: \pause
***
- 200 TB (compressed) blobs, 6 TB database (as a graph: 10 B nodes + 100 B edges)
- The /richest/ public source code archive, ... and growing daily!
** The structure of the archive :noexport:
*** On-disk storage
- flat file storage for contents
- postgres database for the metadata
*** Data model: /one/ big Merkle DAG, inspired by the git model
- Origins (= repositories)
- Occurrences (= branches)
- Releases (= tags)
- Revisions (= commits)
- Directories (= trees)
- Contents (= blobs)
** Archiving goals
:PROPERTIES:
:CUSTOM_ID: archivinggoals
:END:
Targets: VCS repositories & source code releases (e.g., tarballs)
*** We DO archive
- file *content* (= blobs)
- *revisions* (= commits), with full metadata
- *releases* (= tags), ditto
- where (*origin*) & when (*visit*) we found any of the above
# - time-indexed repo *snapshots* (i.e., we never delete anything)
… in a VCS-/archive-agnostic *canonical data model*
*** We DON'T archive
# - diffs → derived data from related contents
- homepages, wikis
- BTS/issues/code reviews/etc.
- mailing lists
Long term vision: play our part in a /"semantic wikipedia of software"/
** Architecture
:PROPERTIES:
:CUSTOM_ID: architecture
:END:
*** Data flow
:PROPERTIES:
:CUSTOM_ID: dataflow
:END:
#
#+BEAMER: \begin{center}\includegraphics[width=\extblockscale{1.2\textwidth}]{swh-dataflow.pdf}\end{center}
** Data model :noexport:
*** General schema
- VCS-independent
- fully deduplicated
+ files, directories and commits are /shared/
- biggest git-like /graph/ in the world
***
\begin{center}
\url{http://deb.li/swhdm}
\end{center}
*** full hash index (sha1, sha256, ...)
Some funny facts:
- the GPL2 licence appears under more than 500 names
+ including /aa.css.txt/ and /FullSync.txt/ ~ :-)
** Merkle DAG
*** Merkle structure
:PROPERTIES:
:CUSTOM_ID: merkle
:END:
**** Merkle trees
:PROPERTIES:
:CUSTOM_ID: merkletree
:END:
# R. C. Merkle, A digital signature based on a conventional encryption
# function, Crypto '87
#+BEAMER: \vspace{-3mm}
***** Merkle tree (R. C. Merkle, Crypto 1979) :B_picblock:
:PROPERTIES:
:BEAMER_opt: pic=merkle, leftpic=true, width=.7\linewidth
:BEAMER_env: picblock
:BEAMER_act:
:END:
Combination of
- tree
- hash function
#+BEAMER: \pause
#+BEAMER: \footnotesize
***** Classical cryptographic construction
- fast, parallel signature of large data structures
- widely used (e.g., Git, blockchains, IPFS, ...)
- built-in deduplication
#+BEAMER: \vspace{-1mm}
**** The archive in a few pictures
:PROPERTIES:
:CUSTOM_ID: merkledemo
:END:
***** A giant (extended) Merkle DAG
#+LATEX: \only<1>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/merkle_1.pdf}}}
#+LATEX: \only<2>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/contents.pdf}}}
#+LATEX: \only<3>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/merkle_2_contents.pdf}}}
#+LATEX: \only<4>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/directories.pdf}}}
#+LATEX: \only<5>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/merkle_3_directories.pdf}}}
#+LATEX: \only<6>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/revisions.pdf}}}
#+LATEX: \only<7>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/merkle_4_revisions.pdf}}}
#+LATEX: \only<8>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/releases.pdf}}}
#+LATEX: \only<9>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/merkle_5_releases.pdf}}}
# #+LATEX: {\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/merkle_1.pdf}}}
*** A revision node
:PROPERTIES:
:CUSTOM_ID: merklerevision
:END:
**** Example: a Software Heritage revision
*****
#+BEAMER: \vspace{-.5cm}\centering\includegraphics[width=0.9\textwidth]{git-merkle/revisions}
*****
Note: most object kinds currently have Git-compatible identifiers
*** Giant DAG
:PROPERTIES:
:CUSTOM_ID: giantdag
:END:
**** The archive: a (giant) Merkle DAG
# Using an empty frame because the image is difficult to read on swh bg.
# Finding a way to override image bg for just this frame would be better.
*****
#+BEAMER: \centering \includegraphics[width=\extblockscale{\textwidth}]{git-merkle/merkle_5_releases}
*** Giant DAG (single slide)
:PROPERTIES:
:CUSTOM_ID: giantdag1slide
:END:
**** The Software Heritage archive: a gigantic Merkle DAG
#+LATEX: \centering\forcebeamerstart{}
#+LATEX: \only<1>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/merkle_1}}}
#+LATEX: \only<2>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/contents}}}
#+LATEX: \only<3>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/merkle_2_contents}}}
#+LATEX: \only<4>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/directories}}}
#+LATEX: \only<5>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/merkle_3_directories}}}
#+LATEX: \only<6>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/revisions}}}
#+LATEX: \only<7>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/merkle_4_revisions}}}
#+LATEX: \only<8>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/releases}}}
#+LATEX: \only<9>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/merkle_5_releases}}}
#+LATEX: \forcebeamerend{}
*** Giant DAG (detailed)
:PROPERTIES:
:CUSTOM_ID: dagdetail
:END:
**** The archive: a (giant) Merkle DAG
#+BEAMER: \vspace{-3mm}
#+BEAMER: \centering \includegraphics[width=\textwidth]{swh-merkle-dag-wide}
** Technology :noexport:
:PROPERTIES:
:CUSTOM_ID: technology
:END:
*** Software stack
**** 3rd party
- Debian, Puppet, Ceph
- PostgreSQL for metadata storage, with barman & pglogical
- Celery (RabbitMQ backend) for task scheduling
- Python3 and psycopg2 for the backend
- Django, Bootstrap, D3.js for Web stuff
**** in house
- /ad hoc/ object storage (to avoid imposing tech to mirrors)
- data model implementation, listers, loaders, scheduler
- ~60 Git repositories (~20 Python packages, ~30 Puppet modules)
- ~30 kSLOC Python / ~12 kSLOC SQL / ~4 kSLOC Puppet
- licence choice: GPLv3 (backend) / AGPLv3 (frontend)
*** Hardware stack
**** in house
- 2x hypervisors with ~20 VMs
- 2x high density storage array (60 * 6TB => 300TB usable each)
- Prototype: ceph storage cluster for blobs
**** on Azure
- full object storage mirror
- full mirror of the database containing the graph
- workers for content indexing
- workers for download bundle preparation
**** at the University of Bologna
- backend storage (60TB) for the bundles available for download
*** Software architecture :noexport:
**** Module dependencies (internal + external) :B_picblock:
:PROPERTIES:
:BEAMER_env: picblock
:BEAMER_opt: pic=swh-modules-deps-all,width=\linewidth
:END:
****
let's zoom in: http://deb.li/swhdeps
** Technology :noexport:
:PROPERTIES:
:CUSTOM_ID: technology-short
:END:
*** Deployment and resource usage
**** Software
- around 30k SLOC of custom Python code, running on Debian Stable
- PostgreSQL database for the metadata storage
+ - Full docker-compose development environment
+ - Work in progress: scale-out metadata storage (Cassandra?)
+ - Work in progress: mirroring infrastructure (Kafka)
**** Hardware
- - 3 hypervisors with mass storage and a backup server at Inria
- - Work in progress: in-house Ceph deployment for object storage
+ - 12 servers (hypervisors, database, storage, staging and testing infrastructure) / 40 virtual machines with mass storage and a backup server at Inria
- In-kind sponsorship of cloud and storage resources (Microsoft, University of Bologna)
** Software development :noexport:
:PROPERTIES:
:CUSTOM_ID: development
:END:
*** Software development
**** classic FOSS development
- language: English
- development mailing list
#+BEAMER: \\{\small \url{https://sympa.inria.fr/sympa/info/swh-devel}}
- IRC
#+BEAMER: \\
#swh-devel / FreeNode
- Forge
#+BEAMER: \\{\small \url{https://forge.softwareheritage.org}}
- Git, tasks, code review, etc.
**** for more information
#+BEAMER: \scriptsize
https://www.softwareheritage.org/community/developers/
** Roadmap
:PROPERTIES:
:CUSTOM_ID: features
:END:
*** Features...
- (done) *lookup* by content hash
- (done) *browsing*: "wayback machine" for source code (API + UI)
- (early access) *deposit* of source code bundles directly to the archive
- (early access) *save code now*, on-demand archive
- (done) *download*: =wget= / =git clone= from the archive
- (todo) *provenance* lookup for all archived content
- (todo) *full-text search* on all archived source code files
#+BEAMER: \pause
*** ... and much more than one could possibly imagine
all the world's software development history at hand's reach!
** Web API :noexport:
:PROPERTIES:
:CUSTOM_ID: api
:END:
*** Web API
:PROPERTIES:
:CUSTOM_ID: apiintro
:END:
****
RESTful API to programmatically access the Software Heritage archive \\
*\url{https://archive.softwareheritage.org/api/}*
**** Features
- pointwise *browsing* of the archive
- … snapshots → revisions → directories → contents …
- full access to the *metadata* of archived objects
- *crawling* information
- /when have you last visited this Git repository I care about?/
- /where were its branches/tags pointing to at the time?/
# - derived information about archived contents (WIP)
# - MIME type, programming language, license, etc.
**** Endpoint index
\url{https://archive.softwareheritage.org/api/1/}
*** A tour of the Web API --- origins & visits
:PROPERTIES:
:CUSTOM_ID: apitourvisits
:END:
#+BEAMER: \footnotesize
#+BEGIN_SRC
GET https://archive.softwareheritage.org/api/1/origin/ \
git/url/https://github.com/hylang/hy
{ "id": 1,
"origin_visits_url": "/api/1/origin/1/visits/",
"type": "git",
"url": "https://github.com/hylang/hy"
}
#+END_SRC
#+BEAMER: \vfill
#+BEGIN_SRC
GET https://archive.softwareheritage.org/api/1/origin/ \
1/visits/
[ ...,
{ "date": "2016-09-14T11:04:26.769266+00:00",
"origin": 1,
"origin_visit_url": "/api/1/origin/1/visit/13/",
"status": "full",
"visit": 13
}, ...
]
#+END_SRC
*** A tour of the Web API --- snapshots
:PROPERTIES:
:CUSTOM_ID: apitoursnapshots
:END:
#+BEAMER: \footnotesize
#+BEGIN_SRC
GET https://archive.softwareheritage.org/api/1/origin/ \
1/visit/13/
{ ...,
"occurrences": { ...,
"refs/heads/master": {
"target": "b94211251...",
"target_type": "revision",
"target_url": "/api/1/revision/b94211251.../"
},
"refs/tags/0.10.0": {
"target": "7045404f3...",
"target_type": "release",
"target_url": "/api/1/release/7045404f3.../"
}, ...
},
"origin": 1,
"origin_url": "/api/1/origin/1/",
"status": "full",
"visit": 13
}
#+END_SRC
*** A tour of the Web API --- releases :noexport:
:PROPERTIES:
:CUSTOM_ID: apitourreleases
:END:
#+BEAMER: \footnotesize
#+BEGIN_SRC
GET https://archive.softwareheritage.org/api/1/release/ \
7045404f3d1c54e6473c71bbb716529fbad4be24/
{
"author": {
"email": "tag@pault.ag",
"fullname": "Paul Tagliamonte ",
"id": 96,
"name": "Paul Tagliamonte"
},
"date": "2014-04-10T23:01:28-04:00",
"message": "0.10: The Oh f*ck it's PyCon release",
"name": "0.10.0",
"synthetic": false,
"target": "6072557b6...",
"target_type": "revision",
"target_url": "/api/1/revision/6072557b6.../",
...
}
#+END_SRC
*** A tour of the Web API --- revisions
:PROPERTIES:
:CUSTOM_ID: apitourrevisions
:END:
#+BEAMER: \footnotesize
#+BEGIN_SRC
GET https://archive.softwareheritage.org/api/1/revision/ \
6072557b6c10cd9a21145781e26ad1f978ed14b9/
{
"author": {
"email": "tag@pault.ag",
"fullname": "Paul Tagliamonte ",
"id": 96,
"name": "Paul Tagliamonte"
},
"committer": { ... },
"date": "2014-04-10T23:01:11-04:00",
"committer_date": "2014-04-10T23:01:11-04:00",
"directory": "2df4cd84e...",
"directory_url": "/api/1/directory/2df4cd84e.../",
"history_url": "/api/1/revision/6072557b6.../log/",
"merge": false,
"message": "0.10: The Oh f*ck it's PyCon release",
"parents": [ {
"id": "10149f66e...",
"url": "/api/1/revision/10149f66e.../"
} ],
...
}
#+END_SRC
*** A tour of the Web API --- contents
:PROPERTIES:
:CUSTOM_ID: apitourcontents
:END:
#+BEAMER: \footnotesize
#+BEGIN_SRC
GET https://archive.softwareheritage.org/api/1/content/ \
adc83b19e793491b1c6ea0fd8b46cd9f32e592fc/
{
"data_url": "/api/1/content/sha1:adc83b19e.../raw/",
"filetype_url": "/api/1/content/sha1:.../filetype/",
"language_url": "/api/1/content/sha1:.../language/",
"length": 1,
"license_url": "/api/1/content/sha1:.../license/",
"sha1": "adc83b19e...",
"sha1_git": "8b1378917...",
"sha256": "01ba4719c...",
"status": "visible"
}
#+END_SRC
#+BEAMER: \normalsize \vfill \pause
**** Caveats
- rate limits apply throughout the API
- raw download available for textual contents
** Accessing the archive :noexport:
:PROPERTIES:
:CUSTOM_ID: accessing-short
:END:
*** Browse :B_block:BMCOL:
:PROPERTIES:
:BEAMER_col: 0.4
:BEAMER_env: block
:END:
#+BEAMER: \begin{center}\includegraphics[width=0.5\textwidth]{archive-browse}\end{center}
- https://archive.softwareheritage.org/browse
- way back machine for software source code
#+BEAMER: \pause
*** Web API :B_block:BMCOL:
:PROPERTIES:
:BEAMER_col: 0.4
:BEAMER_env: block
:END:
#+BEAMER: \begin{center}\includegraphics[width=0.5\textwidth]{archive-webapi}\end{center}
- https://archive.softwareheritage.org/api
- point-wise navigation of the archive as a graph
** Some technical challenges
:PROPERTIES:
:CUSTOM_ID: techchallenges
:END:
*** Expanding the archive
- discover and classify /all/ the software sources
- importers for other VCSs (SVN, Hg, ...)
\hfill /We need your help!/
*** Staying current
get new repositories and commits ASAP\\
\hfill /We need reliable, standardised event feeds./
*** Handling the backlog
ingesting all the pre-existing data\\
\hfill /Decades of software development are waiting!/
diff --git a/talks-public/2019-05-18-ubuntuparty/2019-05-18-ubuntuparty.org b/talks-public/2019-05-18-ubuntuparty/2019-05-18-ubuntuparty.org
new file mode 100644
index 0000000..7c77fb6
--- /dev/null
+++ b/talks-public/2019-05-18-ubuntuparty/2019-05-18-ubuntuparty.org
@@ -0,0 +1,283 @@
+#+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt)
+#+TITLE: Software Heritage: The Great Library of Source Code
+# does not allow short title, so we override it for beamer as follows :
+#+BEAMER_HEADER: \title[Software Heritage]{Software Heritage\\The Great Library of Source Code}
+#+BEAMER_HEADER: \author{Nicolas Dandrimont}
+#+BEAMER_HEADER: \date[2019-05-18 Ubuntu Party]{18 mai 2019\\Ubuntu Party - Paris}
+#+AUTHOR: Nicolas Dandrimont
+#+DATE: 2019-05-18
+#+EMAIL: nicolas@dandrimont.eu
+#+DESCRIPTION: Software Heritage: The Great Library of Source Code
+#+KEYWORDS: software heritage legacy preservation knowledge mankind technology
+
+#+INCLUDE: "../../common/modules/prelude.org" :minlevel 1
+#+INCLUDE: "../../common/modules/169.org"
+#+BEAMER_HEADER: \institute[Software Heritage]{Software Engineer - Software Heritage\\\href{mailto:nicolas@dandrimont.eu}{\tt nicolas@dandrimont.eu}}
+
+#+LATEX_HEADER_EXTRA: \usepackage{bbding}
+#+LATEX_HEADER_EXTRA: \DeclareUnicodeCharacter{66D}{\FiveStar}
+#+LATEX_HEADER_EXTRA: \usepackage{tikz}
+#+LATEX_HEADER_EXTRA: \usetikzlibrary{arrows,shapes}
+#+LATEX_HEADER_EXTRA: \definecolor{swh-orange}{RGB}{254,205,27}
+#+LATEX_HEADER_EXTRA: \definecolor{swh-red}{RGB}{226,0,38}
+#+LATEX_HEADER_EXTRA: \definecolor{swh-green}{RGB}{77,181,174}
+
+
+* Software is everywhere around us
+** Software is everywhere
+ :PROPERTIES:
+ :CUSTOM_ID: softwareispervasive
+ :END:
+ #+latex: \begin{center}
+ #+ATTR_LATEX: :width .5\linewidth
+file:software-center.pdf
+ #+latex: \end{center}
+#+BEGIN_EXPORT latex
+\note{If we look around us, we see software everywhere.\\[1em]
+It powers our industries, fuels innovation, mediates access to all digital information,
+and is a pillar of modern scientific research.\\[1em] %
+%Our industry, our society, our own lives depend on software!\\[1em]
+Software in general, and Free and Open Source software in particular, is at the heart of our society.\\[1em]
+The Source code of this software embodies our collective knowledge, and is a growing part of our cultural heritage.\\
+%Now the question is: are we taking care of it?
+}
+#+END_EXPORT
+#+BEAMER: \pause
+*** Software embodies a growing part of...
+ \hfill ... our scientific, /technical/ and Cultural Heritage!
+** Source Code: /executable/ and /human readable/ knowledge
+#+INCLUDE: "../../common/modules/source-code-different-short.org::#thesourcecode" :only-contents t :minlevel 3
+
+** ~ 50 years, a lightning fast growth
+ # #+INCLUDE: "../../common/modules/50years-source-code.org::#apollolinux" :only-contents t :minlevel 3
+*** Apollo 11 (~60.000 lines), 1969 :B_picblock:
+ :PROPERTIES:
+ :BEAMER_opt: pic=Margaret_Hamilton, width=.4\linewidth, leftpic=true
+ :BEAMER_env: picblock
+ :BEAMER_act:
+ :BEAMER_COL: .58
+ :END:
+ "When I first got into it, nobody knew what it was that we were doing. It was like the Wild West."\\
+ \mbox{}\hfill Margaret Hamilton\\
+# https://github.com/chrislgarry/Apollo-11
+# https://archive.softwareheritage.org/api/1/origin/git/url/https://github.com/chrislgarry/Apollo-11
+ #+BEAMER: \pause
+*** Linux Kernel (in your pockets!) :B_picblock:
+ :PROPERTIES:
+ :BEAMER_opt: pic=Linuxlinecount, width=1.3\linewidth, leftpic=true
+ :BEAMER_env: picblock
+ :BEAMER_COL: .42
+ :BEAMER_act:
+ :END:
+# \mbox{}\\
+ \vfill
+ #+BEAMER: \pause
+*** Harold Abelson, Structure and Interpretation of Computer Programs \hfill (1985)
+ /“Programs must be written for people to read, and only incidentally for machines to execute.”/
+ #+BEAMER: \pause
+*** Len Shustek, Computer History Museum \hfill (2006)
+ \hfill /“Source code provides a view into the mind of the designer.”/
+#+INCLUDE: "../../common/modules/swh-motivations-foss.org::#fragile" :minlevel 2
+** We are at a turning point
+*** Preserve the past
+ \hfill Only a few years left to recover the history of software technology
+#+BEAMER: \pause
+*** Improve the future
+ \hfill We need a \alert{universal} platform for all the future software source code
+* Software Heritage
+ #+INCLUDE: "../../common/modules/swh-overview-sourcecode.org::#mission" :minlevel 2
+** Core principles
+ #+latex: \begin{center}
+ #+ATTR_LATEX: :width .9\linewidth
+ file:SWH-as-foundation-slim.png
+ #+latex: \end{center}
+ #+BEAMER: \pause
+*** Open approach :B_block:BMCOL:
+ :PROPERTIES:
+ :BEAMER_col: 0.4
+ :BEAMER_env: block
+ :END:
+ - 100% Free and Open Source Software
+ - transparency
+*** In for the long haul :B_block:BMCOL:
+ :PROPERTIES:
+ :BEAMER_col: 0.4
+ :BEAMER_env: block
+ :END:
+ - replication
+ - non profit
+ #+INCLUDE: "../../common/modules/status-extended.org::#archivinggoals" :minlevel 2
+* Architecture
+ #+INCLUDE: "../../common/modules/status-extended.org::#architecture" :only-contents t
+# #+INCLUDE: "../../common/modules/status-extended.org::#merkletree" :minlevel 2
+ #+INCLUDE: "../../common/modules/status-extended.org::#merklerevision" :only-contents t
+ #+INCLUDE: "../../common/modules/status-extended.org::#archive" :minlevel 2
+# #+INCLUDE: "../../common/modules/status-extended.org::#technology" :only-contents t
+ #+INCLUDE: "../../common/modules/status-extended.org::#technology-short" :only-contents t
+ #+INCLUDE: "../../common/modules/status-extended.org::#development" :only-contents t
+# #+INCLUDE: "../../common/modules/status-extended.org::#features" :minlevel 2
+
+* Accessing the archive
+ #+INCLUDE: "../../common/modules/webui.org::#intro"
+
+** Visiting the archive: the Apollo 11 source code
+*** Margaret Hamilton today
+ :PROPERTIES:
+ :BEAMER_env: block
+ :BEAMER_COL: .42
+ :END:
+#+ATTR_LATEX: :width 0.4\linewidth
+ file:mhh-sweden.3-190x300.jpg
+*** The Apollo 11 source code in SWH
+ :PROPERTIES:
+ :BEAMER_env: block
+ :BEAMER_COL: .42
+ :END:
+ file:swh-apollo11.png
+*** :B_ignoreheading:
+ :PROPERTIES:
+ :BEAMER_env: ignoreheading
+ :END:
+*** Some pointers
+# Source code entry point
+ - [[https://archive.softwareheritage.org/swh:1:dir:3c235a1a8223727a964c154eb8f2273176c48c88;origin=https://github.com/chrislgarry/Apollo-11/][Entry point]]
+ - [[https://archive.softwareheritage.org/swh:1:cnt:41ddb23118f92d7218099a5e7a990cf58f1d07fa;origin=https://github.com/chrislgarry/Apollo-11;lines=53-87/][Burn, baby, burn!]]
+** Visiting the archive: the Quake 3 source code
+*** John Carmack
+ :PROPERTIES:
+ :BEAMER_env: block
+ :BEAMER_COL: .42
+ :END:
+#+ATTR_LATEX: :width .7\linewidth
+ file:John_Carmack_GDC_2010.jpg
+*** The Quake 3 source code in SWH
+ :PROPERTIES:
+ :BEAMER_env: block
+ :BEAMER_COL: .52
+ :END:
+ file:swh-quake3.png
+*** :B_ignoreheading:
+ :PROPERTIES:
+ :BEAMER_env: ignoreheading
+ :END:
+*** Some pointers
+# Source code entry point
+ - [[https://archive.softwareheritage.org/swh:1:dir:c6f07c2173a458d098de45d4c459a8f1916d900f;origin=https://github.com/id-Software/Quake-III-Arena/][Entry point]]
+ - [[https://archive.softwareheritage.org/swh:1:cnt:bb0faf6919fc60636b2696f32ec9b3c2adb247fe;origin=https://github.com/id-Software/Quake-III-Arena;lines=552-572/][What the f...]]
+ # #+INCLUDE: "../../common/modules/status-extended.org::#apiintro" :minlevel 2
+ # #+INCLUDE: "../../common/modules/vault.org::#vault-short" :minlevel 2 :only-contents t
+ # #+INCLUDE: "../../common/modules/vault.org::#main" :minlevel 2 :only-contents t
+
+ #+INCLUDE: "../../common/modules/webui.org" :minlevel 2 :lines "21-"
+
+* Today
+** Yes, now you can!
+*** Wayback machine for source code :B_block:
+ :PROPERTIES:
+ :BEAMER_COL: .46
+ :BEAMER_env: block
+ :END:
+ \hfill *retrieve* the source code as it was
+*** Reference catalog :B_block:
+ :PROPERTIES:
+ :BEAMER_COL: .5
+ :BEAMER_env: block
+ :END:
+ \hfill use *intrinsic identifiers* for software
+*** :B_ignoreheading:
+ :PROPERTIES:
+ :BEAMER_env: ignoreheading
+ :END:
+*** Open science :B_block:
+ :PROPERTIES:
+ :BEAMER_COL: .46
+ :BEAMER_env: block
+ :END:
+ \hfill *deposit* scientific software (via HAL)
+*** Universal knowledge base :B_block:
+ :PROPERTIES:
+ :BEAMER_COL: .5
+ :BEAMER_env: block
+ :END:
+ \hfill store the *knowledge* about source code
+*** :B_ignoreheading:
+ :PROPERTIES:
+ :BEAMER_env: ignoreheading
+ :END:
+*** And much, much more is in store!
+ \hfill With your help?
+** The next steps
+*** The Software Heritage Foundation :B_block:
+ :PROPERTIES:
+ :BEAMER_env: block
+ :BEAMER_COL: .4
+ :END:
+ - independent
+ - long term mission
+ - multistakeholder
+*** The community :B_block:
+ :PROPERTIES:
+ :BEAMER_env: block
+ :BEAMER_COL: .58
+ :END:
+ - academia: Open Access, research
+ - industry: better software
+ - cultural heritage: *all* the software history
+*** The mirror network :B_block:
+ :PROPERTIES:
+ :BEAMER_env: block
+ :END:
+ - resilience
+ - biodiversity
+#+BEGIN_EXPORT latex
+\begin{quote}
+ “Let us save what remains: not by vaults and locks which fence them from the
+ public eye and use in consigning them to the waste of time, but by such a
+ multiplication of copies, as shall place them beyond the reach of accident.”\\
+ \hfill Thomas Jefferson
+\end{quote}
+#+END_EXPORT
+* Conclusion
+** Come in, we're open! \hfill www.softwareheritage.org
+#+BEGIN_EXPORT latex
+ \begin{center}
+ \includegraphics[width=.6\linewidth]{SWH-logo.pdf}
+ \end{center}
+ \begin{center}
+ {\large \url{www.softwareheritage.org} \hspace{4em} \url{@swheritage}}
+ \end{center}
+#+END_EXPORT
+#+LATEX: {\begin{center}\Large Everybody is concerned, everybody can help build\end{center}}
+*** The Library of Alexandria of code :B_picblock:
+ :PROPERTIES:
+ :BEAMER_env: picblock
+ :BEAMER_COL: 0.42
+ :BEAMER_OPT: pic=clock-spring-forward.png,width=.45\linewidth,leftpic=true
+ :END:
+ - recover the past
+ - structure the future
+*** A CERN for Software :B_picblock:
+ :PROPERTIES:
+ :BEAMER_env: picblock
+ :BEAMER_COL: 0.5
+ :BEAMER_OPT: pic=atacama-telescope.jpg,width=.5\linewidth,leftpic=true
+ :END:
+ - build better software
+ + for industry
+ + for society as a whole
+* FAQ :B_appendix:
+ :PROPERTIES:
+ :BEAMER_env: appendix
+ :END:
+** Q: do you archive /only/ Free Software?
+ - We only crawl origins /meant/ to host source code (e.g., forges)
+ - Most (~90%) of what we /actually/ retrieve is textual content
+ #+BEAMER: \vfill
+*** Our goal
+ Archive *the entire Free Software Commons*
+
+ #+BEAMER: \vfill
+***
+ - Large parts of what we retrieve is /already/ Free Software, today
+ - Most of the rest /will become/ Free Software in the long term
+ - e.g., at copyright expiration
diff --git a/talks-public/2019-05-18-ubuntuparty/Makefile b/talks-public/2019-05-18-ubuntuparty/Makefile
new file mode 100644
index 0000000..68fbee7
--- /dev/null
+++ b/talks-public/2019-05-18-ubuntuparty/Makefile
@@ -0,0 +1 @@
+include ../Makefile.slides