Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/common/images/Rlogo.pdf b/common/images/Rlogo.pdf
new file mode 100644
index 0000000..60fb87a
Binary files /dev/null and b/common/images/Rlogo.pdf differ
diff --git a/common/images/Rlogo.svg b/common/images/Rlogo.svg
new file mode 100644
index 0000000..78281f7
--- /dev/null
+++ b/common/images/Rlogo.svg
@@ -0,0 +1,14 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" preserveAspectRatio="xMidYMid" width="724" height="561" viewBox="0 0 724 561">
+ <defs>
+ <linearGradient id="gradientFill-1" x1="0" x2="1" y1="0" y2="1" gradientUnits="objectBoundingBox" spreadMethod="pad">
+ <stop offset="0" stop-color="rgb(203,206,208)" stop-opacity="1"/>
+ <stop offset="1" stop-color="rgb(132,131,139)" stop-opacity="1"/>
+ </linearGradient>
+ <linearGradient id="gradientFill-2" x1="0" x2="1" y1="0" y2="1" gradientUnits="objectBoundingBox" spreadMethod="pad">
+ <stop offset="0" stop-color="rgb(39,109,195)" stop-opacity="1"/>
+ <stop offset="1" stop-color="rgb(22,92,170)" stop-opacity="1"/>
+ </linearGradient>
+ </defs>
+ <path d="M361.453,485.937 C162.329,485.937 0.906,377.828 0.906,244.469 C0.906,111.109 162.329,3.000 361.453,3.000 C560.578,3.000 722.000,111.109 722.000,244.469 C722.000,377.828 560.578,485.937 361.453,485.937 ZM416.641,97.406 C265.289,97.406 142.594,171.314 142.594,262.484 C142.594,353.654 265.289,427.562 416.641,427.562 C567.992,427.562 679.687,377.033 679.687,262.484 C679.687,147.971 567.992,97.406 416.641,97.406 Z" fill="url(#gradientFill-1)" fill-rule="evenodd"/>
+ <path d="M550.000,377.000 C550.000,377.000 571.822,383.585 584.500,390.000 C588.899,392.226 596.510,396.668 602.000,402.500 C607.378,408.212 610.000,414.000 610.000,414.000 L696.000,559.000 L557.000,559.062 L492.000,437.000 C492.000,437.000 478.690,414.131 470.500,407.500 C463.668,401.969 460.755,400.000 454.000,400.000 C449.298,400.000 420.974,400.000 420.974,400.000 L421.000,558.974 L298.000,559.026 L298.000,152.938 L545.000,152.938 C545.000,152.938 657.500,154.967 657.500,262.000 C657.500,369.033 550.000,377.000 550.000,377.000 ZM496.500,241.024 L422.037,240.976 L422.000,310.026 L496.500,310.002 C496.500,310.002 531.000,309.895 531.000,274.877 C531.000,239.155 496.500,241.024 496.500,241.024 Z" fill="url(#gradientFill-2)" fill-rule="evenodd"/>
+</svg>
diff --git a/common/images/mhh-sweden.3-190x300.jpg b/common/images/mhh-sweden.3-190x300.jpg
new file mode 100644
index 0000000..44bd269
Binary files /dev/null and b/common/images/mhh-sweden.3-190x300.jpg differ
diff --git a/common/modules/prelude-toc.org b/common/modules/prelude-toc.org
index ea21dcf..9df83cd 100644
--- a/common/modules/prelude-toc.org
+++ b/common/modules/prelude-toc.org
@@ -1,99 +1,98 @@
#+BEAMER_HEADER: \titlegraphic{\includegraphics[width=\extblockscale{0.7\textwidth}]{SWH-logo+motto}}
#+STARTUP: hidestars
# activate org-beamer-mode minor mode automatically
#+STARTUP: beamer
# org export options
#+LANGUAGE: en
#+OPTIONS: H:2 num:t toc:nil \n:nil @:t ::t |:t ^:t -:t f:t *:t <:t
#+OPTIONS: TeX:t LaTeX:t skip:nil d:nil todo:t pri:nil tags:not-in-toc
#+EXPORT_SELECT_TAGS: export
#+EXPORT_EXCLUDE_TAGS: noexport
#+LINK_UP:
#+LINK_HOME:
#+LaTeX_CLASS: beamer
#+LaTeX_CLASS_OPTIONS: [presentation,xcolor=table]
#
# important font choice!
#
#+LaTeX_HEADER: \usepackage{libertine}
#
# Let's move that logo...
#
#+LaTeX_HEADER: \usepackage{animate}
#+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt)
# have the theme desired
#+latex_header: \mode<presentation>{\usetheme{swh} \beamertemplatenavigationsymbolsempty \setbeamertemplate{navigation symbols}{} \setbeamertemplate{headline}{}
#+latex_header: \setbeamertemplate{footline}
#+latex_header: {
#+latex_header: \leavevmode%
#+latex_header: \hbox{%
#+latex_header: \begin{beamercolorbox}[wd=.5\paperwidth,ht=2.25ex,dp=1ex,center]{author in head/foot}%
#+latex_header: \usebeamerfont{author in head/foot}\insertshortauthor%~~\beamer@ifempty{\insertshortinstitute}{}{(\insertshortinstitute)}
#+latex_header: \end{beamercolorbox}%
#+latex_header: \begin{beamercolorbox}[wd=.5\paperwidth,ht=2.25ex,dp=1ex,right]{title in head/foot}%
#+latex_header: \usebeamerfont{title in head/foot}\insertshorttitle{}\hspace*{2em}
#+latex_header: \usebeamerfont{date in head/foot}\insertshortdate{}\hspace*{2em}
#+latex_header: \insertframenumber{} / \inserttotalframenumber\hspace*{2ex}
#+latex_header: \end{beamercolorbox}}%
#+latex_header: \vskip0pt%
#+latex_header: }
#+latex_header: }
# some color
#+latex_header: \rowcolors[]{1}{blue!10}{blue!05}
#
# to have a toc for each section
#
# Use suggestions from http://web.stanford.edu/~dgleich/notebook/2009/05/appendix_slides_in_beamer_cont_1.html to avoid counting tocs in page number
#
#+latex_header: \AtBeginSection[] {\begin{frame}<*> \frametitle{Outline} \tableofcontents[currentsection]\end{frame} \addtocounter{framenumber}{-1}}
# set the paths for images
#+latex_header: \graphicspath{%
#+latex_header: {../../common/images/}{../../common/logos/}%
#+latex_header: {pics/}{../images/}{../../images/}{../pics/}{../../pics/}%
#+latex_header: {../figures/}{../../figures/}{../logos/}{../../logos/}{../../../logos/}%
#+latex_header: {../../communication/web/graphics/carousel/}%
#+latex_header: {../../communication/web/graphics/pictos/png/400x400/}%
#+latex_header: }
# some default information I did not find how to set this in org-mode
-#+latex_header: \institute[Irill/INRIA/UPD]{\url{roberto@dicosmo.org}}
# to add the picblock macro
#+latex_header: \usepackage{extblocks}
#+latex_header: \usepackage{pgfpages}
#+latex_header: \usepackage{animate}
#+latex_header: \usepackage{alltt}
#
# Itemize in multiple columns
#
#+latex_header: \usepackage{multicol}
#
# Requires
#
# http://www-ljk.imag.fr/membres/Jerome.Lelong/latex/appendixnumberbeamer.sty
#+latex_header: \usepackage{appendixnumberbeamer}
#
# Colors, color boxes
#
#+latex_header: \usepackage{color}
#+latex_header: \usepackage{soul}
# http://tex.stackexchange.com/questions/41683/why-is-it-that-coloring-in-soul-in-beamer-is-not-visible
#+latex_header: \makeatletter
#+latex_header: \newcommand\SoulColor{%
#+latex_header: \let\set@color\beamerorig@set@color
#+latex_header: \let\reset@color\beamerorig@reset@color}
#+latex_header: \makeatother
#+latex_header: \SoulColor
#+LATEX_HEADER: \usepackage{listings}
#+LATEX_HEADER: \usepackage{forcebeamermode}
diff --git a/common/modules/prelude.org b/common/modules/prelude.org
index 8bda5de..8eb0903 100644
--- a/common/modules/prelude.org
+++ b/common/modules/prelude.org
@@ -1,92 +1,91 @@
#+BEAMER_HEADER: \titlegraphic{\includegraphics[width=\extblockscale{0.7\textwidth}]{SWH-logo+motto}}
#+STARTUP: hidestars
# activate org-beamer-mode minor mode automatically
#+STARTUP: beamer
# org export options
#+LANGUAGE: en
#+OPTIONS: H:2 num:t toc:nil \n:nil @:t ::t |:t ^:t -:t f:t *:t <:t
#+OPTIONS: TeX:t LaTeX:t skip:nil d:nil todo:t pri:nil tags:not-in-toc
#+EXPORT_SELECT_TAGS: export
#+EXPORT_EXCLUDE_TAGS: noexport
#+LINK_UP:
#+LINK_HOME:
#+LaTeX_CLASS: beamer
#+LaTeX_CLASS_OPTIONS: [presentation,xcolor=table]
#
# important font choice!
#
#+LaTeX_HEADER: \usepackage{libertine}
#
# Let's move that logo...
#
#+LaTeX_HEADER: \usepackage{animate}
#+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt)
# have the theme desired
#+latex_header: \mode<presentation>{\usetheme{swh} \beamertemplatenavigationsymbolsempty \setbeamertemplate{navigation symbols}{} \setbeamertemplate{headline}{}
#+latex_header: \setbeamertemplate{footline}
#+latex_header: {
#+latex_header: \leavevmode%
#+latex_header: \hbox{%
#+latex_header: \begin{beamercolorbox}[wd=.5\paperwidth,ht=2.25ex,dp=1ex,center]{author in head/foot}%
#+latex_header: \usebeamerfont{author in head/foot}\insertshortauthor%~~\beamer@ifempty{\insertshortinstitute}{}{(\insertshortinstitute)}
#+latex_header: \end{beamercolorbox}%
#+latex_header: \begin{beamercolorbox}[wd=.5\paperwidth,ht=2.25ex,dp=1ex,right]{title in head/foot}%
#+latex_header: \usebeamerfont{title in head/foot}\insertshorttitle{}\hspace*{2em}
#+latex_header: \usebeamerfont{date in head/foot}\insertshortdate{}\hspace*{2em}
#+latex_header: \insertframenumber{} / \inserttotalframenumber\hspace*{2ex}
#+latex_header: \end{beamercolorbox}}%
#+latex_header: \vskip0pt%
#+latex_header: }
#+latex_header: }
# some color
#+latex_header: \rowcolors[]{1}{blue!10}{blue!05}
# set the paths for images
#+latex_header: \graphicspath{%
#+latex_header: {../../common/images/}{../../common/logos/}%
#+latex_header: {pics/}{../images/}{../../images/}{../pics/}{../../pics/}%
#+latex_header: {../figures/}{../../figures/}{../logos/}{../../logos/}{../../../logos/}%
#+latex_header: {../../communication/web/graphics/carousel/}%
#+latex_header: {../../communication/web/graphics/pictos/png/400x400/}%
#+latex_header: }
# some default information I did not find how to set this in org-mode
-#+latex_header: \institute[Irill/INRIA/UPD]{\url{roberto@dicosmo.org}}
# to add the picblock macro
#+latex_header: \usepackage{extblocks}
#+latex_header: \usepackage{pgfpages}
#+latex_header: \usepackage{animate}
#+latex_header: \usepackage{alltt}
#
# Itemize in multiple columns
#
#+latex_header: \usepackage{multicol}
#
# Requires
#
# http://www-ljk.imag.fr/membres/Jerome.Lelong/latex/appendixnumberbeamer.sty
#+latex_header: \usepackage{appendixnumberbeamer}
#
# Colors, color boxes
#
#+latex_header: \usepackage{color}
#+latex_header: \usepackage{soul}
# http://tex.stackexchange.com/questions/41683/why-is-it-that-coloring-in-soul-in-beamer-is-not-visible
#+latex_header: \makeatletter
#+latex_header: \newcommand\SoulColor{%
#+latex_header: \let\set@color\beamerorig@set@color
#+latex_header: \let\reset@color\beamerorig@reset@color}
#+latex_header: \makeatother
#+latex_header: \SoulColor
#+LATEX_HEADER: \usepackage{listings}
#+LATEX_HEADER: \usepackage{forcebeamermode}
diff --git a/common/modules/status-extended.org b/common/modules/status-extended.org
index 624dc06..54d1435 100644
--- a/common/modules/status-extended.org
+++ b/common/modules/status-extended.org
@@ -1,459 +1,461 @@
#+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt)
#+INCLUDE: "prelude.org" :minlevel 1
# not to be included as a whole, just pick individual slides as you see fit
* Status
:PROPERTIES:
:CUSTOM_ID: main
:END:
** The people
:PROPERTIES:
:CUSTOM_ID: people
:END:
*** The core team :B_picblock:
:PROPERTIES:
:CUSTOM_ID: core-team-formal
:BEAMER_env: picblock
:BEAMER_opt: pic=team,width=.4\linewidth
:END:
- Roberto Di Cosmo
- Stefano Zacchiroli
- Nicolas Dandrimont (Engineer)
- Antoine Dumont (Engineer)
# - and /Jordi, Quentin and Guillaume/
*** Scientific advisors
- Serge Abiteboul (French Science Academy)
- Jean-François Abramatic (former W3C director)
- Gerard Berry (CNRS Gold Medal, French Science Academy)
- Julia Lawall (Coccinelle, Linux Kernel, Outreachy)
** Archive coverage --- archive.softwareheritage.org
:PROPERTIES:
:CUSTOM_ID: archive
:END:
#+BEAMER: \vspace{-1mm}
#+BEAMER: \begin{center}\includegraphics[width=\extblockscale{1.1\linewidth}]{2019-01-archive-growth.png}\end{center}
#+BEAMER: \vspace{-2mm}
***
#+BEAMER: \includegraphics[width=0.19\linewidth]{coverage/github} \hfill
#+BEAMER: \includegraphics[width=0.2\linewidth]{coverage/debian} \hfill
#+BEAMER: \includegraphics[width=0.2\linewidth]{coverage/gitlab} \hfill
#+BEAMER: \includegraphics[width=0.2\linewidth]{coverage/googlecode} \\
#+BEAMER: \includegraphics[width=0.2\linewidth]{coverage/gitorious} \hfill
#+BEAMER: \includegraphics[width=0.15\linewidth]{coverage/gnu} \hfill
#+BEAMER: \includegraphics[width=0.13\linewidth]{coverage/hal} \hfill
#+BEAMER: \includegraphics[width=0.16\linewidth]{coverage/inria} \hfill
#+BEAMER: \includegraphics[width=0.13\linewidth]{coverage/pypi}
#+BEAMER: \pause
***
- 200 TB (compressed) blobs, 6 TB database (as a graph: 10 B nodes + 100 B edges)
- The /richest/ public source code archive, ... and growing daily!
** The structure of the archive :noexport:
*** On-disk storage
- flat file storage for contents
- postgres database for the metadata
*** Data model: /one/ big Merkle DAG, inspired by the git model
- Origins (= repositories)
- Occurrences (= branches)
- Releases (= tags)
- Revisions (= commits)
- Directories (= trees)
- Contents (= blobs)
** Archiving goals
:PROPERTIES:
:CUSTOM_ID: archivinggoals
:END:
Targets: VCS repositories & source code releases (e.g., tarballs)
*** We DO archive
- file *content* (= blobs)
- *revisions* (= commits), with full metadata
- *releases* (= tags), ditto
- where (*origin*) & when (*visit*) we found any of the above
# - time-indexed repo *snapshots* (i.e., we never delete anything)
… in a VCS-/archive-agnostic *canonical data model*
*** We DON'T archive
# - diffs → derived data from related contents
- homepages, wikis
- BTS/issues/code reviews/etc.
- mailing lists
Long term vision: play our part in a /"semantic wikipedia of software"/
** Architecture
:PROPERTIES:
:CUSTOM_ID: architecture
:END:
*** Data flow
:PROPERTIES:
:CUSTOM_ID: dataflow
:END:
#
#+BEAMER: \begin{center}\includegraphics[width=\extblockscale{1.2\textwidth}]{swh-dataflow.pdf}\end{center}
** Data model :noexport:
*** General schema
- VCS-independent
- fully deduplicated
+ files, directories and commits are /shared/
- biggest git-like /graph/ in the world
***
\begin{center}
\url{http://deb.li/swhdm}
\end{center}
*** full hash index (sha1, sha256, ...)
Some funny facts:
- the GPL2 licence appears under more than 500 names
+ including /aa.css.txt/ and /FullSync.txt/ ~ :-)
** Merkle DAG
*** Merkle structure
:PROPERTIES:
:CUSTOM_ID: merkle
:END:
**** Merkle trees
:PROPERTIES:
:CUSTOM_ID: merkletree
:END:
# R. C. Merkle, A digital signature based on a conventional encryption
# function, Crypto '87
#+BEAMER: \vspace{-3mm}
***** Merkle tree (R. C. Merkle, Crypto 1979) :B_picblock:
:PROPERTIES:
:BEAMER_opt: pic=merkle, leftpic=true, width=.7\linewidth
:BEAMER_env: picblock
:BEAMER_act:
:END:
Combination of
- tree
- hash function
#+BEAMER: \pause
#+BEAMER: \footnotesize
***** Classical cryptographic construction
- fast, parallel signature of large data structures
- widely used (e.g., Git, blockchains, IPFS, ...)
- built-in deduplication
#+BEAMER: \vspace{-1mm}
**** The archive in a few pictures
:PROPERTIES:
:CUSTOM_ID: merkledemo
:END:
***** A giant (extended) Merkle DAG
#+LATEX: \only<1>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/merkle_1.pdf}}}
#+LATEX: \only<2>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/contents.pdf}}}
#+LATEX: \only<3>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/merkle_2_contents.pdf}}}
#+LATEX: \only<4>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/directories.pdf}}}
#+LATEX: \only<5>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/merkle_3_directories.pdf}}}
#+LATEX: \only<6>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/revisions.pdf}}}
#+LATEX: \only<7>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/merkle_4_revisions.pdf}}}
#+LATEX: \only<8>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/releases.pdf}}}
#+LATEX: \only<9>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/merkle_5_releases.pdf}}}
# #+LATEX: {\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/merkle_1.pdf}}}
*** A revision node
:PROPERTIES:
:CUSTOM_ID: merklerevision
:END:
**** Example: a Software Heritage revision
*****
#+BEAMER: \vspace{-.5cm}\centering\includegraphics[width=0.9\textwidth]{git-merkle/revisions}
*****
Note: most object kinds currently have Git-compatible identifiers
*** Giant DAG
:PROPERTIES:
:CUSTOM_ID: giantdag
:END:
**** The archive: a (giant) Merkle DAG
# Using an empty frame because the image is difficult to read on swh bg.
# Finding a way to override image bg for just this frame would be better.
*****
#+BEAMER: \centering \includegraphics[width=\extblockscale{\textwidth}]{git-merkle/merkle_5_releases}
*** Giant DAG (single slide)
:PROPERTIES:
:CUSTOM_ID: giantdag1slide
:END:
**** The Software Heritage archive: a gigantic Merkle DAG
#+LATEX: \centering\forcebeamerstart{}
#+LATEX: \only<1>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/merkle_1}}}
#+LATEX: \only<2>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/contents}}}
#+LATEX: \only<3>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/merkle_2_contents}}}
#+LATEX: \only<4>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/directories}}}
#+LATEX: \only<5>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/merkle_3_directories}}}
#+LATEX: \only<6>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/revisions}}}
#+LATEX: \only<7>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/merkle_4_revisions}}}
#+LATEX: \only<8>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/releases}}}
#+LATEX: \only<9>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/merkle_5_releases}}}
#+LATEX: \forcebeamerend{}
*** Giant DAG (detailed)
:PROPERTIES:
:CUSTOM_ID: dagdetail
:END:
**** The archive: a (giant) Merkle DAG
#+BEAMER: \vspace{-3mm}
#+BEAMER: \centering \includegraphics[width=\textwidth]{swh-merkle-dag-wide}
** Technology :noexport:
:PROPERTIES:
:CUSTOM_ID: technology
:END:
*** Software stack
**** 3rd party
- Debian, Puppet, Ceph
- PostgreSQL for metadata storage, with barman & pglogical
- Celery (RabbitMQ backend) for task scheduling
- Python3 and psycopg2 for the backend
- Django, Bootstrap, D3.js for Web stuff
**** in house
- /ad hoc/ object storage (to avoid imposing tech to mirrors)
- data model implementation, listers, loaders, scheduler
- ~60 Git repositories (~20 Python packages, ~30 Puppet modules)
- ~30 kSLOC Python / ~12 kSLOC SQL / ~4 kSLOC Puppet
- licence choice: GPLv3 (backend) / AGPLv3 (frontend)
*** Hardware stack
**** in house
- 2x hypervisors with ~20 VMs
- 2x high density storage array (60 * 6TB => 300TB usable each)
- Prototype: ceph storage cluster for blobs
**** on Azure
- full object storage mirror
- full mirror of the database containing the graph
- workers for content indexing
- workers for download bundle preparation
**** at the University of Bologna
- backend storage (60TB) for the bundles available for download
*** Software architecture :noexport:
**** Module dependencies (internal + external) :B_picblock:
:PROPERTIES:
:BEAMER_env: picblock
:BEAMER_opt: pic=swh-modules-deps-all,width=\linewidth
:END:
****
let's zoom in: http://deb.li/swhdeps
** Technology :noexport:
:PROPERTIES:
:CUSTOM_ID: technology-short
:END:
*** Deployment and resource usage
**** Software
- around 30k SLOC of custom Python code, running on Debian Stable
- PostgreSQL database for the metadata storage
+ - Full docker-compose development environment
+ - Work in progress: scale-out metadata storage (Cassandra?)
+ - Work in progress: mirroring infrastructure (Kafka)
**** Hardware
- - 3 hypervisors with mass storage and a backup server at Inria
- - Work in progress: in-house Ceph deployment for object storage
+ - 12 servers (hypervisors, database, storage, staging and testing infrastructure) / 40 virtual machines with mass storage and a backup server at Inria
- In-kind sponsorship of cloud and storage resources (Microsoft, University of Bologna)
** Software development :noexport:
:PROPERTIES:
:CUSTOM_ID: development
:END:
*** Software development
**** classic FOSS development
- language: English
- development mailing list
#+BEAMER: \\{\small \url{https://sympa.inria.fr/sympa/info/swh-devel}}
- IRC
#+BEAMER: \\
#swh-devel / FreeNode
- Forge
#+BEAMER: \\{\small \url{https://forge.softwareheritage.org}}
- Git, tasks, code review, etc.
**** for more information
#+BEAMER: \scriptsize
https://www.softwareheritage.org/community/developers/
** Roadmap
:PROPERTIES:
:CUSTOM_ID: features
:END:
*** Features...
- (done) *lookup* by content hash
- (done) *browsing*: "wayback machine" for source code (API + UI)
- (early access) *deposit* of source code bundles directly to the archive
- (early access) *save code now*, on-demand archive
- (done) *download*: =wget= / =git clone= from the archive
- (todo) *provenance* lookup for all archived content
- (todo) *full-text search* on all archived source code files
#+BEAMER: \pause
*** ... and much more than one could possibly imagine
all the world's software development history at hand's reach!
** Web API :noexport:
:PROPERTIES:
:CUSTOM_ID: api
:END:
*** Web API
:PROPERTIES:
:CUSTOM_ID: apiintro
:END:
****
RESTful API to programmatically access the Software Heritage archive \\
*\url{https://archive.softwareheritage.org/api/}*
**** Features
- pointwise *browsing* of the archive
- … snapshots → revisions → directories → contents …
- full access to the *metadata* of archived objects
- *crawling* information
- /when have you last visited this Git repository I care about?/
- /where were its branches/tags pointing to at the time?/
# - derived information about archived contents (WIP)
# - MIME type, programming language, license, etc.
**** Endpoint index
\url{https://archive.softwareheritage.org/api/1/}
*** A tour of the Web API --- origins & visits
:PROPERTIES:
:CUSTOM_ID: apitourvisits
:END:
#+BEAMER: \footnotesize
#+BEGIN_SRC
GET https://archive.softwareheritage.org/api/1/origin/ \
git/url/https://github.com/hylang/hy
{ "id": 1,
"origin_visits_url": "/api/1/origin/1/visits/",
"type": "git",
"url": "https://github.com/hylang/hy"
}
#+END_SRC
#+BEAMER: \vfill
#+BEGIN_SRC
GET https://archive.softwareheritage.org/api/1/origin/ \
1/visits/
[ ...,
{ "date": "2016-09-14T11:04:26.769266+00:00",
"origin": 1,
"origin_visit_url": "/api/1/origin/1/visit/13/",
"status": "full",
"visit": 13
}, ...
]
#+END_SRC
*** A tour of the Web API --- snapshots
:PROPERTIES:
:CUSTOM_ID: apitoursnapshots
:END:
#+BEAMER: \footnotesize
#+BEGIN_SRC
GET https://archive.softwareheritage.org/api/1/origin/ \
1/visit/13/
{ ...,
"occurrences": { ...,
"refs/heads/master": {
"target": "b94211251...",
"target_type": "revision",
"target_url": "/api/1/revision/b94211251.../"
},
"refs/tags/0.10.0": {
"target": "7045404f3...",
"target_type": "release",
"target_url": "/api/1/release/7045404f3.../"
}, ...
},
"origin": 1,
"origin_url": "/api/1/origin/1/",
"status": "full",
"visit": 13
}
#+END_SRC
*** A tour of the Web API --- releases :noexport:
:PROPERTIES:
:CUSTOM_ID: apitourreleases
:END:
#+BEAMER: \footnotesize
#+BEGIN_SRC
GET https://archive.softwareheritage.org/api/1/release/ \
7045404f3d1c54e6473c71bbb716529fbad4be24/
{
"author": {
"email": "tag@pault.ag",
"fullname": "Paul Tagliamonte <tag@pault.ag>",
"id": 96,
"name": "Paul Tagliamonte"
},
"date": "2014-04-10T23:01:28-04:00",
"message": "0.10: The Oh f*ck it's PyCon release",
"name": "0.10.0",
"synthetic": false,
"target": "6072557b6...",
"target_type": "revision",
"target_url": "/api/1/revision/6072557b6.../",
...
}
#+END_SRC
*** A tour of the Web API --- revisions
:PROPERTIES:
:CUSTOM_ID: apitourrevisions
:END:
#+BEAMER: \footnotesize
#+BEGIN_SRC
GET https://archive.softwareheritage.org/api/1/revision/ \
6072557b6c10cd9a21145781e26ad1f978ed14b9/
{
"author": {
"email": "tag@pault.ag",
"fullname": "Paul Tagliamonte <tag@pault.ag>",
"id": 96,
"name": "Paul Tagliamonte"
},
"committer": { ... },
"date": "2014-04-10T23:01:11-04:00",
"committer_date": "2014-04-10T23:01:11-04:00",
"directory": "2df4cd84e...",
"directory_url": "/api/1/directory/2df4cd84e.../",
"history_url": "/api/1/revision/6072557b6.../log/",
"merge": false,
"message": "0.10: The Oh f*ck it's PyCon release",
"parents": [ {
"id": "10149f66e...",
"url": "/api/1/revision/10149f66e.../"
} ],
...
}
#+END_SRC
*** A tour of the Web API --- contents
:PROPERTIES:
:CUSTOM_ID: apitourcontents
:END:
#+BEAMER: \footnotesize
#+BEGIN_SRC
GET https://archive.softwareheritage.org/api/1/content/ \
adc83b19e793491b1c6ea0fd8b46cd9f32e592fc/
{
"data_url": "/api/1/content/sha1:adc83b19e.../raw/",
"filetype_url": "/api/1/content/sha1:.../filetype/",
"language_url": "/api/1/content/sha1:.../language/",
"length": 1,
"license_url": "/api/1/content/sha1:.../license/",
"sha1": "adc83b19e...",
"sha1_git": "8b1378917...",
"sha256": "01ba4719c...",
"status": "visible"
}
#+END_SRC
#+BEAMER: \normalsize \vfill \pause
**** Caveats
- rate limits apply throughout the API
- raw download available for textual contents
** Accessing the archive :noexport:
:PROPERTIES:
:CUSTOM_ID: accessing-short
:END:
*** Browse :B_block:BMCOL:
:PROPERTIES:
:BEAMER_col: 0.4
:BEAMER_env: block
:END:
#+BEAMER: \begin{center}\includegraphics[width=0.5\textwidth]{archive-browse}\end{center}
- https://archive.softwareheritage.org/browse
- way back machine for software source code
#+BEAMER: \pause
*** Web API :B_block:BMCOL:
:PROPERTIES:
:BEAMER_col: 0.4
:BEAMER_env: block
:END:
#+BEAMER: \begin{center}\includegraphics[width=0.5\textwidth]{archive-webapi}\end{center}
- https://archive.softwareheritage.org/api
- point-wise navigation of the archive as a graph
** Some technical challenges
:PROPERTIES:
:CUSTOM_ID: techchallenges
:END:
*** Expanding the archive
- discover and classify /all/ the software sources
- importers for other VCSs (SVN, Hg, ...)
\hfill /We need your help!/
*** Staying current
get new repositories and commits ASAP\\
\hfill /We need reliable, standardised event feeds./
*** Handling the backlog
ingesting all the pre-existing data\\
\hfill /Decades of software development are waiting!/
diff --git a/talks-public/2019-05-18-ubuntuparty/2019-05-18-ubuntuparty.org b/talks-public/2019-05-18-ubuntuparty/2019-05-18-ubuntuparty.org
new file mode 100644
index 0000000..7c77fb6
--- /dev/null
+++ b/talks-public/2019-05-18-ubuntuparty/2019-05-18-ubuntuparty.org
@@ -0,0 +1,283 @@
+#+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt)
+#+TITLE: Software Heritage: The Great Library of Source Code
+# does not allow short title, so we override it for beamer as follows :
+#+BEAMER_HEADER: \title[Software Heritage]{Software Heritage\\The Great Library of Source Code}
+#+BEAMER_HEADER: \author{Nicolas Dandrimont}
+#+BEAMER_HEADER: \date[2019-05-18 Ubuntu Party]{18 mai 2019\\Ubuntu Party - Paris}
+#+AUTHOR: Nicolas Dandrimont
+#+DATE: 2019-05-18
+#+EMAIL: nicolas@dandrimont.eu
+#+DESCRIPTION: Software Heritage: The Great Library of Source Code
+#+KEYWORDS: software heritage legacy preservation knowledge mankind technology
+
+#+INCLUDE: "../../common/modules/prelude.org" :minlevel 1
+#+INCLUDE: "../../common/modules/169.org"
+#+BEAMER_HEADER: \institute[Software Heritage]{Software Engineer - Software Heritage\\\href{mailto:nicolas@dandrimont.eu}{\tt nicolas@dandrimont.eu}}
+
+#+LATEX_HEADER_EXTRA: \usepackage{bbding}
+#+LATEX_HEADER_EXTRA: \DeclareUnicodeCharacter{66D}{\FiveStar}
+#+LATEX_HEADER_EXTRA: \usepackage{tikz}
+#+LATEX_HEADER_EXTRA: \usetikzlibrary{arrows,shapes}
+#+LATEX_HEADER_EXTRA: \definecolor{swh-orange}{RGB}{254,205,27}
+#+LATEX_HEADER_EXTRA: \definecolor{swh-red}{RGB}{226,0,38}
+#+LATEX_HEADER_EXTRA: \definecolor{swh-green}{RGB}{77,181,174}
+
+
+* Software is everywhere around us
+** Software is everywhere
+ :PROPERTIES:
+ :CUSTOM_ID: softwareispervasive
+ :END:
+ #+latex: \begin{center}
+ #+ATTR_LATEX: :width .5\linewidth
+file:software-center.pdf
+ #+latex: \end{center}
+#+BEGIN_EXPORT latex
+\note{If we look around us, we see software everywhere.\\[1em]
+It powers our industries, fuels innovation, mediates access to all digital information,
+and is a pillar of modern scientific research.\\[1em] %
+%Our industry, our society, our own lives depend on software!\\[1em]
+Software in general, and Free and Open Source software in particular, is at the heart of our society.\\[1em]
+The Source code of this software embodies our collective knowledge, and is a growing part of our cultural heritage.\\
+%Now the question is: are we taking care of it?
+}
+#+END_EXPORT
+#+BEAMER: \pause
+*** Software embodies a growing part of...
+ \hfill ... our scientific, /technical/ and Cultural Heritage!
+** Source Code: /executable/ and /human readable/ knowledge
+#+INCLUDE: "../../common/modules/source-code-different-short.org::#thesourcecode" :only-contents t :minlevel 3
+
+** ~ 50 years, a lightning fast growth
+ # #+INCLUDE: "../../common/modules/50years-source-code.org::#apollolinux" :only-contents t :minlevel 3
+*** Apollo 11 (~60.000 lines), 1969 :B_picblock:
+ :PROPERTIES:
+ :BEAMER_opt: pic=Margaret_Hamilton, width=.4\linewidth, leftpic=true
+ :BEAMER_env: picblock
+ :BEAMER_act:
+ :BEAMER_COL: .58
+ :END:
+ "When I first got into it, nobody knew what it was that we were doing. It was like the Wild West."\\
+ \mbox{}\hfill Margaret Hamilton\\
+# https://github.com/chrislgarry/Apollo-11
+# https://archive.softwareheritage.org/api/1/origin/git/url/https://github.com/chrislgarry/Apollo-11
+ #+BEAMER: \pause
+*** Linux Kernel (in your pockets!) :B_picblock:
+ :PROPERTIES:
+ :BEAMER_opt: pic=Linuxlinecount, width=1.3\linewidth, leftpic=true
+ :BEAMER_env: picblock
+ :BEAMER_COL: .42
+ :BEAMER_act:
+ :END:
+# \mbox{}\\
+ \vfill
+ #+BEAMER: \pause
+*** Harold Abelson, Structure and Interpretation of Computer Programs \hfill (1985)
+ /“Programs must be written for people to read, and only incidentally for machines to execute.”/
+ #+BEAMER: \pause
+*** Len Shustek, Computer History Museum \hfill (2006)
+ \hfill /“Source code provides a view into the mind of the designer.”/
+#+INCLUDE: "../../common/modules/swh-motivations-foss.org::#fragile" :minlevel 2
+** We are at a turning point
+*** Preserve the past
+ \hfill Only a few years left to recover the history of software technology
+#+BEAMER: \pause
+*** Improve the future
+ \hfill We need a \alert{universal} platform for all the future software source code
+* Software Heritage
+ #+INCLUDE: "../../common/modules/swh-overview-sourcecode.org::#mission" :minlevel 2
+** Core principles
+ #+latex: \begin{center}
+ #+ATTR_LATEX: :width .9\linewidth
+ file:SWH-as-foundation-slim.png
+ #+latex: \end{center}
+ #+BEAMER: \pause
+*** Open approach :B_block:BMCOL:
+ :PROPERTIES:
+ :BEAMER_col: 0.4
+ :BEAMER_env: block
+ :END:
+ - 100% Free and Open Source Software
+ - transparency
+*** In for the long haul :B_block:BMCOL:
+ :PROPERTIES:
+ :BEAMER_col: 0.4
+ :BEAMER_env: block
+ :END:
+ - replication
+ - non profit
+ #+INCLUDE: "../../common/modules/status-extended.org::#archivinggoals" :minlevel 2
+* Architecture
+ #+INCLUDE: "../../common/modules/status-extended.org::#architecture" :only-contents t
+# #+INCLUDE: "../../common/modules/status-extended.org::#merkletree" :minlevel 2
+ #+INCLUDE: "../../common/modules/status-extended.org::#merklerevision" :only-contents t
+ #+INCLUDE: "../../common/modules/status-extended.org::#archive" :minlevel 2
+# #+INCLUDE: "../../common/modules/status-extended.org::#technology" :only-contents t
+ #+INCLUDE: "../../common/modules/status-extended.org::#technology-short" :only-contents t
+ #+INCLUDE: "../../common/modules/status-extended.org::#development" :only-contents t
+# #+INCLUDE: "../../common/modules/status-extended.org::#features" :minlevel 2
+
+* Accessing the archive
+ #+INCLUDE: "../../common/modules/webui.org::#intro"
+
+** Visiting the archive: the Apollo 11 source code
+*** Margaret Hamilton today
+ :PROPERTIES:
+ :BEAMER_env: block
+ :BEAMER_COL: .42
+ :END:
+#+ATTR_LATEX: :width 0.4\linewidth
+ file:mhh-sweden.3-190x300.jpg
+*** The Apollo 11 source code in SWH
+ :PROPERTIES:
+ :BEAMER_env: block
+ :BEAMER_COL: .42
+ :END:
+ file:swh-apollo11.png
+*** :B_ignoreheading:
+ :PROPERTIES:
+ :BEAMER_env: ignoreheading
+ :END:
+*** Some pointers
+# Source code entry point
+ - [[https://archive.softwareheritage.org/swh:1:dir:3c235a1a8223727a964c154eb8f2273176c48c88;origin=https://github.com/chrislgarry/Apollo-11/][Entry point]]
+ - [[https://archive.softwareheritage.org/swh:1:cnt:41ddb23118f92d7218099a5e7a990cf58f1d07fa;origin=https://github.com/chrislgarry/Apollo-11;lines=53-87/][Burn, baby, burn!]]
+** Visiting the archive: the Quake 3 source code
+*** John Carmack
+ :PROPERTIES:
+ :BEAMER_env: block
+ :BEAMER_COL: .42
+ :END:
+#+ATTR_LATEX: :width .7\linewidth
+ file:John_Carmack_GDC_2010.jpg
+*** The Quake 3 source code in SWH
+ :PROPERTIES:
+ :BEAMER_env: block
+ :BEAMER_COL: .52
+ :END:
+ file:swh-quake3.png
+*** :B_ignoreheading:
+ :PROPERTIES:
+ :BEAMER_env: ignoreheading
+ :END:
+*** Some pointers
+# Source code entry point
+ - [[https://archive.softwareheritage.org/swh:1:dir:c6f07c2173a458d098de45d4c459a8f1916d900f;origin=https://github.com/id-Software/Quake-III-Arena/][Entry point]]
+ - [[https://archive.softwareheritage.org/swh:1:cnt:bb0faf6919fc60636b2696f32ec9b3c2adb247fe;origin=https://github.com/id-Software/Quake-III-Arena;lines=552-572/][What the f...]]
+ # #+INCLUDE: "../../common/modules/status-extended.org::#apiintro" :minlevel 2
+ # #+INCLUDE: "../../common/modules/vault.org::#vault-short" :minlevel 2 :only-contents t
+ # #+INCLUDE: "../../common/modules/vault.org::#main" :minlevel 2 :only-contents t
+
+ #+INCLUDE: "../../common/modules/webui.org" :minlevel 2 :lines "21-"
+
+* Today
+** Yes, now you can!
+*** Wayback machine for source code :B_block:
+ :PROPERTIES:
+ :BEAMER_COL: .46
+ :BEAMER_env: block
+ :END:
+ \hfill *retrieve* the source code as it was
+*** Reference catalog :B_block:
+ :PROPERTIES:
+ :BEAMER_COL: .5
+ :BEAMER_env: block
+ :END:
+ \hfill use *intrinsic identifiers* for software
+*** :B_ignoreheading:
+ :PROPERTIES:
+ :BEAMER_env: ignoreheading
+ :END:
+*** Open science :B_block:
+ :PROPERTIES:
+ :BEAMER_COL: .46
+ :BEAMER_env: block
+ :END:
+ \hfill *deposit* scientific software (via HAL)
+*** Universal knowledge base :B_block:
+ :PROPERTIES:
+ :BEAMER_COL: .5
+ :BEAMER_env: block
+ :END:
+ \hfill store the *knowledge* about source code
+*** :B_ignoreheading:
+ :PROPERTIES:
+ :BEAMER_env: ignoreheading
+ :END:
+*** And much, much more is in store!
+ \hfill With your help?
+** The next steps
+*** The Software Heritage Foundation :B_block:
+ :PROPERTIES:
+ :BEAMER_env: block
+ :BEAMER_COL: .4
+ :END:
+ - independent
+ - long term mission
+ - multistakeholder
+*** The community :B_block:
+ :PROPERTIES:
+ :BEAMER_env: block
+ :BEAMER_COL: .58
+ :END:
+ - academia: Open Access, research
+ - industry: better software
+ - cultural heritage: *all* the software history
+*** The mirror network :B_block:
+ :PROPERTIES:
+ :BEAMER_env: block
+ :END:
+ - resilience
+ - biodiversity
+#+BEGIN_EXPORT latex
+\begin{quote}
+ “Let us save what remains: not by vaults and locks which fence them from the
+ public eye and use in consigning them to the waste of time, but by such a
+ multiplication of copies, as shall place them beyond the reach of accident.”\\
+ \hfill Thomas Jefferson
+\end{quote}
+#+END_EXPORT
+* Conclusion
+** Come in, we're open! \hfill www.softwareheritage.org
+#+BEGIN_EXPORT latex
+ \begin{center}
+ \includegraphics[width=.6\linewidth]{SWH-logo.pdf}
+ \end{center}
+ \begin{center}
+ {\large \url{www.softwareheritage.org} \hspace{4em} \url{@swheritage}}
+ \end{center}
+#+END_EXPORT
+#+LATEX: {\begin{center}\Large Everybody is concerned, everybody can help build\end{center}}
+*** The Library of Alexandria of code :B_picblock:
+ :PROPERTIES:
+ :BEAMER_env: picblock
+ :BEAMER_COL: 0.42
+ :BEAMER_OPT: pic=clock-spring-forward.png,width=.45\linewidth,leftpic=true
+ :END:
+ - recover the past
+ - structure the future
+*** A CERN for Software :B_picblock:
+ :PROPERTIES:
+ :BEAMER_env: picblock
+ :BEAMER_COL: 0.5
+ :BEAMER_OPT: pic=atacama-telescope.jpg,width=.5\linewidth,leftpic=true
+ :END:
+ - build better software
+ + for industry
+ + for society as a whole
+* FAQ :B_appendix:
+ :PROPERTIES:
+ :BEAMER_env: appendix
+ :END:
+** Q: do you archive /only/ Free Software?
+ - We only crawl origins /meant/ to host source code (e.g., forges)
+ - Most (~90%) of what we /actually/ retrieve is textual content
+ #+BEAMER: \vfill
+*** Our goal
+ Archive *the entire Free Software Commons*
+
+ #+BEAMER: \vfill
+***
+ - Large parts of what we retrieve is /already/ Free Software, today
+ - Most of the rest /will become/ Free Software in the long term
+ - e.g., at copyright expiration
diff --git a/talks-public/2019-05-18-ubuntuparty/Makefile b/talks-public/2019-05-18-ubuntuparty/Makefile
new file mode 100644
index 0000000..68fbee7
--- /dev/null
+++ b/talks-public/2019-05-18-ubuntuparty/Makefile
@@ -0,0 +1 @@
+include ../Makefile.slides

File Metadata

Mime Type
text/x-diff
Expires
Jul 4 2025, 7:46 AM (10 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3452463

Event Timeline