diff --git a/common/modules/prelude-toc.org b/common/modules/prelude-toc.org index 8f33244..ea21dcf 100644 --- a/common/modules/prelude-toc.org +++ b/common/modules/prelude-toc.org @@ -1,98 +1,99 @@ #+BEAMER_HEADER: \titlegraphic{\includegraphics[width=\extblockscale{0.7\textwidth}]{SWH-logo+motto}} #+STARTUP: hidestars # activate org-beamer-mode minor mode automatically #+STARTUP: beamer # org export options #+LANGUAGE: en #+OPTIONS: H:2 num:t toc:nil \n:nil @:t ::t |:t ^:t -:t f:t *:t <:t #+OPTIONS: TeX:t LaTeX:t skip:nil d:nil todo:t pri:nil tags:not-in-toc #+EXPORT_SELECT_TAGS: export #+EXPORT_EXCLUDE_TAGS: noexport #+LINK_UP: #+LINK_HOME: #+LaTeX_CLASS: beamer #+LaTeX_CLASS_OPTIONS: [presentation,xcolor=table] # # important font choice! # #+LaTeX_HEADER: \usepackage{libertine} # # Let's move that logo... # #+LaTeX_HEADER: \usepackage{animate} #+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt) # have the theme desired #+latex_header: \mode{\usetheme{swh} \beamertemplatenavigationsymbolsempty \setbeamertemplate{navigation symbols}{} \setbeamertemplate{headline}{} #+latex_header: \setbeamertemplate{footline} #+latex_header: { #+latex_header: \leavevmode% #+latex_header: \hbox{% #+latex_header: \begin{beamercolorbox}[wd=.5\paperwidth,ht=2.25ex,dp=1ex,center]{author in head/foot}% #+latex_header: \usebeamerfont{author in head/foot}\insertshortauthor%~~\beamer@ifempty{\insertshortinstitute}{}{(\insertshortinstitute)} #+latex_header: \end{beamercolorbox}% #+latex_header: \begin{beamercolorbox}[wd=.5\paperwidth,ht=2.25ex,dp=1ex,right]{title in head/foot}% #+latex_header: \usebeamerfont{title in head/foot}\insertshorttitle{}\hspace*{2em} #+latex_header: \usebeamerfont{date in head/foot}\insertshortdate{}\hspace*{2em} #+latex_header: \insertframenumber{} / \inserttotalframenumber\hspace*{2ex} #+latex_header: \end{beamercolorbox}}% #+latex_header: \vskip0pt% #+latex_header: } #+latex_header: } # some color #+latex_header: \rowcolors[]{1}{blue!10}{blue!05} # # to have a toc for each section # # Use suggestions from http://web.stanford.edu/~dgleich/notebook/2009/05/appendix_slides_in_beamer_cont_1.html to avoid counting tocs in page number # #+latex_header: \AtBeginSection[] {\begin{frame}<*> \frametitle{Outline} \tableofcontents[currentsection]\end{frame} \addtocounter{framenumber}{-1}} # set the paths for images #+latex_header: \graphicspath{% #+latex_header: {../../common/images/}{../../common/logos/}% #+latex_header: {pics/}{../images/}{../../images/}{../pics/}{../../pics/}% #+latex_header: {../figures/}{../../figures/}{../logos/}{../../logos/}{../../../logos/}% #+latex_header: {../../communication/web/graphics/carousel/}% #+latex_header: {../../communication/web/graphics/pictos/png/400x400/}% #+latex_header: } # some default information I did not find how to set this in org-mode #+latex_header: \institute[Irill/INRIA/UPD]{\url{roberto@dicosmo.org}} # to add the picblock macro #+latex_header: \usepackage{extblocks} #+latex_header: \usepackage{pgfpages} #+latex_header: \usepackage{animate} #+latex_header: \usepackage{alltt} # # Itemize in multiple columns # #+latex_header: \usepackage{multicol} # # Requires # # http://www-ljk.imag.fr/membres/Jerome.Lelong/latex/appendixnumberbeamer.sty #+latex_header: \usepackage{appendixnumberbeamer} # # Colors, color boxes # #+latex_header: \usepackage{color} #+latex_header: \usepackage{soul} # http://tex.stackexchange.com/questions/41683/why-is-it-that-coloring-in-soul-in-beamer-is-not-visible #+latex_header: \makeatletter #+latex_header: \newcommand\SoulColor{% #+latex_header: \let\set@color\beamerorig@set@color #+latex_header: \let\reset@color\beamerorig@reset@color} #+latex_header: \makeatother #+latex_header: \SoulColor #+LATEX_HEADER: \usepackage{listings} +#+LATEX_HEADER: \usepackage{forcebeamermode} diff --git a/common/modules/prelude.org b/common/modules/prelude.org index 2594760..8bda5de 100644 --- a/common/modules/prelude.org +++ b/common/modules/prelude.org @@ -1,91 +1,92 @@ #+BEAMER_HEADER: \titlegraphic{\includegraphics[width=\extblockscale{0.7\textwidth}]{SWH-logo+motto}} #+STARTUP: hidestars # activate org-beamer-mode minor mode automatically #+STARTUP: beamer # org export options #+LANGUAGE: en #+OPTIONS: H:2 num:t toc:nil \n:nil @:t ::t |:t ^:t -:t f:t *:t <:t #+OPTIONS: TeX:t LaTeX:t skip:nil d:nil todo:t pri:nil tags:not-in-toc #+EXPORT_SELECT_TAGS: export #+EXPORT_EXCLUDE_TAGS: noexport #+LINK_UP: #+LINK_HOME: #+LaTeX_CLASS: beamer #+LaTeX_CLASS_OPTIONS: [presentation,xcolor=table] # # important font choice! # #+LaTeX_HEADER: \usepackage{libertine} # # Let's move that logo... # #+LaTeX_HEADER: \usepackage{animate} #+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt) # have the theme desired #+latex_header: \mode{\usetheme{swh} \beamertemplatenavigationsymbolsempty \setbeamertemplate{navigation symbols}{} \setbeamertemplate{headline}{} #+latex_header: \setbeamertemplate{footline} #+latex_header: { #+latex_header: \leavevmode% #+latex_header: \hbox{% #+latex_header: \begin{beamercolorbox}[wd=.5\paperwidth,ht=2.25ex,dp=1ex,center]{author in head/foot}% #+latex_header: \usebeamerfont{author in head/foot}\insertshortauthor%~~\beamer@ifempty{\insertshortinstitute}{}{(\insertshortinstitute)} #+latex_header: \end{beamercolorbox}% #+latex_header: \begin{beamercolorbox}[wd=.5\paperwidth,ht=2.25ex,dp=1ex,right]{title in head/foot}% #+latex_header: \usebeamerfont{title in head/foot}\insertshorttitle{}\hspace*{2em} #+latex_header: \usebeamerfont{date in head/foot}\insertshortdate{}\hspace*{2em} #+latex_header: \insertframenumber{} / \inserttotalframenumber\hspace*{2ex} #+latex_header: \end{beamercolorbox}}% #+latex_header: \vskip0pt% #+latex_header: } #+latex_header: } # some color #+latex_header: \rowcolors[]{1}{blue!10}{blue!05} # set the paths for images #+latex_header: \graphicspath{% #+latex_header: {../../common/images/}{../../common/logos/}% #+latex_header: {pics/}{../images/}{../../images/}{../pics/}{../../pics/}% #+latex_header: {../figures/}{../../figures/}{../logos/}{../../logos/}{../../../logos/}% #+latex_header: {../../communication/web/graphics/carousel/}% #+latex_header: {../../communication/web/graphics/pictos/png/400x400/}% #+latex_header: } # some default information I did not find how to set this in org-mode #+latex_header: \institute[Irill/INRIA/UPD]{\url{roberto@dicosmo.org}} # to add the picblock macro #+latex_header: \usepackage{extblocks} #+latex_header: \usepackage{pgfpages} #+latex_header: \usepackage{animate} #+latex_header: \usepackage{alltt} # # Itemize in multiple columns # #+latex_header: \usepackage{multicol} # # Requires # # http://www-ljk.imag.fr/membres/Jerome.Lelong/latex/appendixnumberbeamer.sty #+latex_header: \usepackage{appendixnumberbeamer} # # Colors, color boxes # #+latex_header: \usepackage{color} #+latex_header: \usepackage{soul} # http://tex.stackexchange.com/questions/41683/why-is-it-that-coloring-in-soul-in-beamer-is-not-visible #+latex_header: \makeatletter #+latex_header: \newcommand\SoulColor{% #+latex_header: \let\set@color\beamerorig@set@color #+latex_header: \let\reset@color\beamerorig@reset@color} #+latex_header: \makeatother #+latex_header: \SoulColor #+LATEX_HEADER: \usepackage{listings} +#+LATEX_HEADER: \usepackage{forcebeamermode} diff --git a/common/modules/rdc-bio.org b/common/modules/rdc-bio.org index cb67a14..1329f17 100644 --- a/common/modules/rdc-bio.org +++ b/common/modules/rdc-bio.org @@ -1,33 +1,34 @@ #+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt) #+INCLUDE: "prelude.org" :minlevel 1 * Introductions :PROPERTIES: :CUSTOM_ID: main :END: ** Short Bio: Roberto Di Cosmo # +BEAMER: \raisebox{-.5\height}{\includegraphics[width=.28\linewidth]{rdc}} Computer Science professor in Paris, now working at INRIA\\ - /30 years/ of research (Theor. CS, Programming, Software Engineering, Erdos #: 3)\\ - /20 years/ of Free and Open Source Software\\ - /10 years/ building and directing structures for the common good\\ \mbox{}\\ \begin{minipage}[c]{0.18\linewidth} \includegraphics[width=1.0\linewidth]{rdc} \end{minipage} \begin{minipage}[c]{0.8\linewidth} \begin{description} % \item[1998] \emph{Cybersnare} -- voice of French FOSS \item[1999] \emph{DemoLinux} -- first live GNU/Linux distro % \item[2004] \emph{EDOS} -- check package dependencies \item[2007] \emph{Free Software Thematic Group}\\ %\tiny{\url{http://www.systematic-paris-region.org/fr/logiciel-libre}}\\ ~150 members ~40 projects ~200Me \item[2008] \emph{Mancoosi project} \url{www.mancoosi.org} \item[2010] \emph{IRILL} \url{www.irill.org} \item[2015] \emph{Software Heritage} at INRIA + \item[2018] \emph{National Committee for Open Science}, France \end{description} \end{minipage} diff --git a/common/modules/status-extended.org b/common/modules/status-extended.org index 9165af8..b98ca87 100644 --- a/common/modules/status-extended.org +++ b/common/modules/status-extended.org @@ -1,431 +1,432 @@ #+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt) #+INCLUDE: "prelude.org" :minlevel 1 # not to be included as a whole, just pick individual slides as you see fit * Status :PROPERTIES: :CUSTOM_ID: main :END: ** The people :PROPERTIES: :CUSTOM_ID: people :END: *** The core team :B_picblock: :PROPERTIES: :CUSTOM_ID: core-team-formal :BEAMER_env: picblock :BEAMER_opt: pic=team,width=.4\linewidth :END: - Roberto Di Cosmo - Stefano Zacchiroli - Nicolas Dandrimont (Engineer) - Antoine Dumont (Engineer) # - and /Jordi, Quentin and Guillaume/ *** Scientific advisors - Serge Abiteboul (French Science Academy) - Jean-François Abramatic (former W3C director) - Gerard Berry (CNRS Gold Medal, French Science Academy) - Julia Lawall (Coccinelle, Linux Kernel, Outreachy) ** Archive coverage :PROPERTIES: :CUSTOM_ID: archive :END: #+BEAMER: \vspace{-2mm} #+BEAMER: \begin{center}\includegraphics[width=\extblockscale{1.1\linewidth}]{2018-03-archive-growth.png}\end{center} #+BEAMER: \vspace{-2mm} *** Current sources - live: GitHub, Debian - one-off: Gitorious, Google Code, GNU - WIP: GitLab, PyPI, Bitbucket #+BEAMER: \pause *** 175 TB (compressed) blobs, 6 TB database (as a graph: 10 B nodes + 100 B edges) #+BEAMER: \pause *** \hfill The /richest/ public source code archive, ... and growing daily! ** The structure of the archive :noexport: *** On-disk storage - flat file storage for contents - postgres database for the metadata *** Data model: /one/ big Merkle DAG, inspired by the git model - Origins (= repositories) - Occurrences (= branches) - Releases (= tags) - Revisions (= commits) - Directories (= trees) - Contents (= blobs) ** Archiving goals :PROPERTIES: :CUSTOM_ID: archivinggoals :END: Targets: VCS repositories & source code releases (e.g., tarballs) *** We DO archive - file *content* (= blobs) - *revisions* (= commits), with full metadata - *releases* (= tags), ditto - where (*origin*) & when (*visit*) we found any of the above # - time-indexed repo *snapshots* (i.e., we never delete anything) … in a VCS-/archive-agnostic *canonical data model* *** We DON'T archive # - diffs → derived data from related contents - homepages, wikis - BTS/issues/code reviews/etc. - mailing lists Long term vision: play our part in a /"semantic wikipedia of software"/ ** Architecture :PROPERTIES: :CUSTOM_ID: architecture :END: *** Data flow :PROPERTIES: :CUSTOM_ID: dataflow :END: # #+BEAMER: \begin{center}\includegraphics[width=\extblockscale{1.2\textwidth}]{swh-dataflow.pdf}\end{center} ** Data model :noexport: *** General schema - VCS-independent - fully deduplicated + files, directories and commits are /shared/ - biggest git-like /graph/ in the world *** \begin{center} \url{http://deb.li/swhdm} \end{center} *** full hash index (sha1, sha256, ...) Some funny facts: - the GPL2 licence appears under more than 500 names + including /aa.css.txt/ and /FullSync.txt/ ~ :-) ** Merkle DAG *** Merkle structure :PROPERTIES: :CUSTOM_ID: merkle :END: **** Merkle trees :PROPERTIES: :CUSTOM_ID: merkletree :END: # R. C. Merkle, A digital signature based on a conventional encryption # function, Crypto '87 #+BEAMER: \vspace{-3mm} ***** Merkle tree (R. C. Merkle, Crypto 1979) :B_picblock: :PROPERTIES: :BEAMER_opt: pic=merkle, leftpic=true, width=.7\linewidth :BEAMER_env: picblock :BEAMER_act: :END: Combination of - tree - hash function #+BEAMER: \pause #+BEAMER: \footnotesize ***** Classical cryptographic construction - fast, parallel signature of large data structures - widely used (e.g., Git, blockchains, IPFS, ...) - built-in deduplication **** The archive in a few pictures :PROPERTIES: :CUSTOM_ID: merkledemo :END: ***** A giant (extended) Merkle DAG #+LATEX: \only<1>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/merkle_1.pdf}}} #+LATEX: \only<2>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/contents.pdf}}} #+LATEX: \only<3>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/merkle_2_contents.pdf}}} #+LATEX: \only<4>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/directories.pdf}}} #+LATEX: \only<5>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/merkle_3_directories.pdf}}} #+LATEX: \only<6>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/revisions.pdf}}} #+LATEX: \only<7>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/merkle_4_revisions.pdf}}} #+LATEX: \only<8>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/releases.pdf}}} #+LATEX: \only<9>{\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/merkle_5_releases.pdf}}} # #+LATEX: {\colorbox{white}{\includegraphics[width=\extblockscale{.9\linewidth}]{git-merkle/merkle_1.pdf}}} *** A revision node :PROPERTIES: :CUSTOM_ID: merklerevision :END: **** Example: a Software Heritage revision ***** #+BEAMER: \vspace{-.5cm}\centering\includegraphics[width=0.9\textwidth]{git-merkle/revisions} ***** Note: most object kinds currently have Git-compatible identifiers *** Giant DAG :PROPERTIES: :CUSTOM_ID: giantdag :END: **** The archive: a (giant) Merkle DAG # Using an empty frame because the image is difficult to read on swh bg. # Finding a way to override image bg for just this frame would be better. ***** #+BEAMER: \centering \includegraphics[width=\extblockscale{\textwidth}]{git-merkle/merkle_5_releases} *** Giant DAG (single slide) :PROPERTIES: :CUSTOM_ID: giantdag1slide :END: **** The Software Heritage archive: a gigantic Merkle DAG - #+LATEX: \centering + #+LATEX: \centering\forcebeamerstart{} #+LATEX: \only<1>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/merkle_1}}} #+LATEX: \only<2>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/contents}}} #+LATEX: \only<3>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/merkle_2_contents}}} #+LATEX: \only<4>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/directories}}} #+LATEX: \only<5>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/merkle_3_directories}}} #+LATEX: \only<6>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/revisions}}} #+LATEX: \only<7>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/merkle_4_revisions}}} #+LATEX: \only<8>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/releases}}} #+LATEX: \only<9>{\colorbox{white}{\includegraphics[width=.75\linewidth]{git-merkle/merkle_5_releases}}} + #+LATEX: \forcebeamerend{} *** Giant DAG (detailed) :PROPERTIES: :CUSTOM_ID: dagdetail :END: **** The archive: a (giant) Merkle DAG #+BEAMER: \vspace{-3mm} #+BEAMER: \centering \includegraphics[width=\textwidth]{swh-merkle-dag-wide} ** Technology :noexport: :PROPERTIES: :CUSTOM_ID: technology :END: *** Software stack **** 3rd party - Debian, Puppet, Ceph - PostgreSQL for metadata storage, with barman & pglogical - Celery (RabbitMQ backend) for task scheduling - Python3 and psycopg2 for the backend - Django, Bootstrap, D3.js for Web stuff **** in house - /ad hoc/ object storage (to avoid imposing tech to mirrors) - data model implementation, listers, loaders, scheduler - ~60 Git repositories (~20 Python packages, ~30 Puppet modules) - ~30 kSLOC Python / ~12 kSLOC SQL / ~4 kSLOC Puppet - licence choice: GPLv3 (backend) / AGPLv3 (frontend) *** Hardware stack **** in house - 2x hypervisors with ~20 VMs - 2x high density storage array (60 * 6TB => 300TB usable each) - Prototype: ceph storage cluster for blobs **** on Azure - full object storage mirror - full mirror of the database containing the graph - workers for content indexing - workers for download bundle preparation **** at the University of Bologna - backend storage (60TB) for the bundles available for download *** Software architecture :noexport: **** Module dependencies (internal + external) :B_picblock: :PROPERTIES: :BEAMER_env: picblock :BEAMER_opt: pic=swh-modules-deps-all,width=\linewidth :END: **** let's zoom in: http://deb.li/swhdeps ** Technology :noexport: :PROPERTIES: :CUSTOM_ID: technology-short :END: *** Deployment and resource usage **** Software - around 30k SLOC of custom Python code, running on Debian Stable - PostgreSQL database for the metadata storage **** Hardware - 3 hypervisors with mass storage and a backup server at Inria - Work in progress: in-house Ceph deployment for object storage - In-kind sponsorship of cloud and storage resources (Microsoft, University of Bologna) ** Software development :noexport: :PROPERTIES: :CUSTOM_ID: development :END: *** Software development **** classic FOSS development - language: English - development mailing list #+BEAMER: \\{\small \url{https://sympa.inria.fr/sympa/info/swh-devel}} - IRC #+BEAMER: \\ #swh-devel / FreeNode - Forge #+BEAMER: \\{\small \url{https://forge.softwareheritage.org}} - Git, tasks, code review, etc. **** for more information #+BEAMER: \scriptsize https://www.softwareheritage.org/community/developers/ ** Roadmap :PROPERTIES: :CUSTOM_ID: features :END: *** Features... - (done) *lookup* by content hash - (done) *browsing*: "wayback machine" for source code (API + UI) - (early access) *deposit* of source code bundles directly to the archive - (done) *download*: =wget= / =git clone= from the archive - (todo) *provenance* lookup for all archived content - (todo) *full-text search* on all archived source code files #+BEAMER: \pause *** ... and much more than one could possibly imagine all the world's software development history at hand's reach! ** Web API :noexport: :PROPERTIES: :CUSTOM_ID: api :END: *** Web API :PROPERTIES: :CUSTOM_ID: apiintro :END: **** RESTful API to programmatically access the Software Heritage archive \\ *\url{https://archive.softwareheritage.org/api/}* **** Features - pointwise *browsing* of the archive - … snapshots → revisions → directories → contents … - full access to the *metadata* of archived objects - *crawling* information - /when have you last visited this Git repository I care about?/ - /where were its branches/tags pointing to at the time?/ # - derived information about archived contents (WIP) # - MIME type, programming language, license, etc. **** Endpoint index \url{https://archive.softwareheritage.org/api/1/} *** A tour of the Web API --- origins & visits :PROPERTIES: :CUSTOM_ID: apitourvisits :END: #+BEAMER: \footnotesize #+BEGIN_SRC GET https://archive.softwareheritage.org/api/1/origin/ \ git/url/https://github.com/hylang/hy { "id": 1, "origin_visits_url": "/api/1/origin/1/visits/", "type": "git", "url": "https://github.com/hylang/hy" } #+END_SRC #+BEAMER: \vfill #+BEGIN_SRC GET https://archive.softwareheritage.org/api/1/origin/ \ 1/visits/ [ ..., { "date": "2016-09-14T11:04:26.769266+00:00", "origin": 1, "origin_visit_url": "/api/1/origin/1/visit/13/", "status": "full", "visit": 13 }, ... ] #+END_SRC *** A tour of the Web API --- snapshots :PROPERTIES: :CUSTOM_ID: apitoursnapshots :END: #+BEAMER: \footnotesize #+BEGIN_SRC GET https://archive.softwareheritage.org/api/1/origin/ \ 1/visit/13/ { ..., "occurrences": { ..., "refs/heads/master": { "target": "b94211251...", "target_type": "revision", "target_url": "/api/1/revision/b94211251.../" }, "refs/tags/0.10.0": { "target": "7045404f3...", "target_type": "release", "target_url": "/api/1/release/7045404f3.../" }, ... }, "origin": 1, "origin_url": "/api/1/origin/1/", "status": "full", "visit": 13 } #+END_SRC *** A tour of the Web API --- releases :noexport: :PROPERTIES: :CUSTOM_ID: apitourreleases :END: #+BEAMER: \footnotesize #+BEGIN_SRC GET https://archive.softwareheritage.org/api/1/release/ \ 7045404f3d1c54e6473c71bbb716529fbad4be24/ { "author": { "email": "tag@pault.ag", "fullname": "Paul Tagliamonte ", "id": 96, "name": "Paul Tagliamonte" }, "date": "2014-04-10T23:01:28-04:00", "message": "0.10: The Oh f*ck it's PyCon release", "name": "0.10.0", "synthetic": false, "target": "6072557b6...", "target_type": "revision", "target_url": "/api/1/revision/6072557b6.../", ... } #+END_SRC *** A tour of the Web API --- revisions :PROPERTIES: :CUSTOM_ID: apitourrevisions :END: #+BEAMER: \footnotesize #+BEGIN_SRC GET https://archive.softwareheritage.org/api/1/revision/ \ 6072557b6c10cd9a21145781e26ad1f978ed14b9/ { "author": { "email": "tag@pault.ag", "fullname": "Paul Tagliamonte ", "id": 96, "name": "Paul Tagliamonte" }, "committer": { ... }, "date": "2014-04-10T23:01:11-04:00", "committer_date": "2014-04-10T23:01:11-04:00", "directory": "2df4cd84e...", "directory_url": "/api/1/directory/2df4cd84e.../", "history_url": "/api/1/revision/6072557b6.../log/", "merge": false, "message": "0.10: The Oh f*ck it's PyCon release", "parents": [ { "id": "10149f66e...", "url": "/api/1/revision/10149f66e.../" } ], ... } #+END_SRC *** A tour of the Web API --- contents :PROPERTIES: :CUSTOM_ID: apitourcontents :END: #+BEAMER: \footnotesize #+BEGIN_SRC GET https://archive.softwareheritage.org/api/1/content/ \ adc83b19e793491b1c6ea0fd8b46cd9f32e592fc/ { "data_url": "/api/1/content/sha1:adc83b19e.../raw/", "filetype_url": "/api/1/content/sha1:.../filetype/", "language_url": "/api/1/content/sha1:.../language/", "length": 1, "license_url": "/api/1/content/sha1:.../license/", "sha1": "adc83b19e...", "sha1_git": "8b1378917...", "sha256": "01ba4719c...", "status": "visible" } #+END_SRC #+BEAMER: \normalsize \vfill \pause **** Caveats - rate limits apply throughout the API - blob download available for selected contents ** Some technical challenges :PROPERTIES: :CUSTOM_ID: techchallenges :END: *** Expanding the archive - discover and classify /all/ the software sources - importers for other VCSs (SVN, Hg, ...) \hfill /We need your help!/ *** Staying current get new repositories and commits ASAP\\ \hfill /We need reliable, standardised event feeds./ *** Handling the backlog ingesting all the pre-existing data\\ \hfill /Decades of software development are waiting!/ diff --git a/common/modules/swh-pids.org b/common/modules/swh-pids.org index bba5e05..a01abd8 100644 --- a/common/modules/swh-pids.org +++ b/common/modules/swh-pids.org @@ -1,146 +1,147 @@ #+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt) # # Software Heritage PIDs: here we present our rationale for introducing a new identifier schema, and the identifier schema itself # #+INCLUDE: "prelude.org" :minlevel 1 # # We need tcolorbox here: add the following lines to your main .org document! # #+LATEX_HEADER: \usepackage{tcolorbox} #+BEAMER_HEADER: \usepackage{tcolorbox} * The quest for a PID :PROPERTIES: :CUSTOM_ID: main :END: ** Systems of identifiers :PROPERTIES: :CUSTOM_ID: definition :END: *** A /system of identifiers/ is - a set of labels (the identifiers) - mechanisms to perform : |------------------------+---------------------------| | /Generation (minting)/ | create a new label | | /Assignment/ | associate label to object | | /Retrieval/ | get object from a label | |------------------------+---------------------------| - optionally, mechanisms to perform: |------------------+---------------------------| | /Verification/ | check label and object | | /Reverse Lookup/ | get label from an object | | /Description/ | get metadata of an object | |------------------+---------------------------| ** Mechanisms offered in some systems of identifiers :PROPERTIES: :CUSTOM_ID: survey :END: |--------------------+----------+-------+-------+--------| | *Mech.* / *System* | *Handle* | *DOI* | *Ark* | *PURL* | |--------------------+----------+-------+-------+--------| | Generation | Yes | Yes | Yes | Yes | | Assignment | Yes | Yes | Yes | Yes | | Retrieval | Yes | Yes | Yes | Yes | | Verification | N.A. | N.A. | N.A. | N.A. | | Reverse Lookup | N.A. | N.A. | N.A. | N.A. | | Description | Yes | Yes | Yes | N.A. | |--------------------+----------+-------+-------+--------| ** Our challenges in the PID landscape :PROPERTIES: :CUSTOM_ID: challenges :END: *** Typical properties of systems of identifiers \hfill uniqueness, non ambiguity, persistence, abstraction (opacity) #+BEAMER: \pause *** Key needed properties from our use cases - gratis :: identifiers are free (billions of objects) - - integrity :: the associated object cannot been changed (sw dev, reproducibility) - - no middle man :: no central authority is needed (sw dev, reproducibility) + - integrity :: the associated object cannot be changed (sw dev, /reproducibility/) + - no middle man :: no central authority is needed (sw dev, /reproducibility/) #+BEAMER: \pause *** \hfill we could not find systems with both *integrity* and *no middle man* ! ** An important distinction: DIOs vs. IDOs :PROPERTIES: :CUSTOM_ID: diovsido :END: #+BEGIN_EXPORT latex \begin{quote} The term “Digital Object Identifier” is construed as “digital identifier of an object," rather than “identifier of a digital object” \hfill Norman Paskin. 2010 \end{quote} #+END_EXPORT #+BEAMER: \pause *** DIO (Digital Identifier of an Object) digital identifiers for (potentially) *non digital objects* - epistemic complexity (manifestations, versions, locations, etc.) - need an authority to ensure persistence and uniqueness #+BEAMER: \pause *** IDO (Identifier of a Digital Object) digital identifiers (only) for *digital objects* - can provide both *integrity* and *no middle man* - broadly used in modern software development (git, etc.) #+BEAMER: \pause *** \hfill for the core Software Heritage archive, *IDOs are enough* ** IDOs in Software Development: the origins # R. C. Merkle, A digital signature based on a conventional encryption # function, Crypto '87 #+BEAMER: \vspace{-3mm} ***** Merkle tree (R. C. Merkle, Crypto 1979) :B_picblock: :PROPERTIES: :BEAMER_opt: pic=merkle, leftpic=true, width=.5\linewidth :BEAMER_env: picblock :BEAMER_act: :END: Combination of - tree - hash function ***** Classical cryptographic construction fast, parallel signature of large data structures, built-in deduplication #+BEAMER: \pause - satisfies all three criteria: *gratis, integrity, no middle man*! - widely used in industry (e.g., Git, nix, blockchains, IPFS, ...) ** IDOs in Software Heritage: a worked example - #+LATEX: \centering + #+LATEX: \centering\forcebeamerstart #+LATEX: \only<1>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/merkle_1}}} #+LATEX: \only<2>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/contents}}} #+LATEX: \only<3>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/merkle_2_contents}}} #+LATEX: \only<4>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/directories}}} #+LATEX: \only<5>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/merkle_3_directories}}} #+LATEX: \only<6>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/revisions}}} #+LATEX: \only<7>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/merkle_4_revisions}}} #+LATEX: \only<8>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/releases}}} #+LATEX: \only<9>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/merkle_5_releases}}} #+LATEX: \only<10>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/snapshots.pdf}}} + #+LATEX: \forcebeamerend ** The Software Heritage IDO schema \hfill (see *\url{http://bit.ly/swhpids}*) #+BEGIN_EXPORT latex \small \begin{tcolorbox} \href{https://archive.softwareheritage.org/swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2} {swh:1:{\bf cnt}:94a9ed024d3859793618152ea559a168bbcbb5e2} \hfill full text of the GPL3 license \end{tcolorbox} \pause \begin{tcolorbox} \href{https://archive.softwareheritage.org/swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505} {swh:1:{\bf dir}:d198bc9d7a6bcf6db04f476d29314f157507d505} \hfill Darktable source code \end{tcolorbox} \pause \begin{tcolorbox} \href{https://archive.softwareheritage.org/swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d} {swh:1:{\bf rev}:309cf2674ee7a0749978cf8265ab91a60aea0f7d} \end{tcolorbox} \hfill a {\bf revision} in the development history of Darktable\\\pause \begin{tcolorbox} \href{https://archive.softwareheritage.org/swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f} {swh:1:{\bf rel}:22ece559cc7cc2364edc5e5593d63ae8bd229f9f} \end{tcolorbox} \hfill {\bf release} 2.3.0 of Darktable, dated 24 December 2016\\\pause \begin{tcolorbox} \href{https://archive.softwareheritage.org/swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453} {swh:1:{\bf snp}:c7c108084bc0bf3d81436bf980b46e98bd338453} \end{tcolorbox} \hfill a {\bf snapshot} of the entire Darktable repository (4 May 2017, GitHub) #+END_EXPORT #+LATEX: \pause *** *Current resolvers:* \url{archive.softwareheritage.org} and \url{n2t.org}