diff --git a/talks-public/2017-12-07-ACM/2017-12-07-ACM.org b/talks-public/2017-12-07-ACM/2017-12-07-ACM.org new file mode 100644 index 0000000..da223b1 --- /dev/null +++ b/talks-public/2017-12-07-ACM/2017-12-07-ACM.org @@ -0,0 +1,317 @@ +#+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt) +#+TITLE: Software Heritage +#+SUBTITLE: A new essential infrastructure for Software Source Code +#+BEAMER_HEADER: \title{Software Heritage} +#+AUTHOR: Roberto Di Cosmo +#+EMAIL: roberto@dicosmo.org +#+BEAMER_HEADER: \date{December 7th, 2017} +#+BEAMER_HEADER: \title[www.softwareheritage.org]{Software Heritage} +#+BEAMER_HEADER: \author[Roberto Di Cosmo \hspace{5em} www.dicosmo.org]{Roberto Di Cosmo\\[1em]% +#+BEAMER_HEADER: Computer Science full professor, Inria and IRIF} +# #+BEAMER_HEADER: \setbeameroption{show notes on second screen} +#+BEAMER_HEADER: \setbeameroption{hide notes} +#+KEYWORDS: software heritage legacy preservation knowledge mankind technology + +# +# prelude.org contains all the information needed to export the main beamer latex source +# use prelude-toc.org to get the table of contents +# + +#+INCLUDE: "../../common/modules/prelude-toc.org" :minlevel 1 + + +#+INCLUDE: "../../common/modules/169.org" + +# +LaTeX_CLASS_OPTIONS: [aspectratio=169,handout,xcolor=table] + + +# +# If you want to change the title logo it's here +# +# +BEAMER_HEADER: \titlegraphic{\includegraphics[width=0.7\textwidth]{SWH-logo}} + +# aspect ratio can be changed, but the slides need to be adapted +# - compute a "resizing factor" for the images (macro for picblocks?) +# +# set the background image +# +# https://pacoup.com/2011/06/12/list-of-true-169-resolutions/ +# +#+BEAMER_HEADER: \pgfdeclareimage[height=90mm,width=160mm]{bgd}{swh-world-169.png} +#+BEAMER_HEADER: \setbeamertemplate{background}{\pgfuseimage{bgd}} + +#+LATEX_HEADER: \usepackage{supertabular} +#+LATEX_HEADER: \newcommand{\sponsor}[2]{{\bf #1}, #2} +#+LATEX_HEADER: \newcommand{\teamster}[2]{{\textcolor{red}{#1}}, #2} +* Introductions :noexport: +** Short Bio + # +BEAMER: \raisebox{-.5\height}{\includegraphics[width=.28\linewidth]{rdc}} + Roberto Di Cosmo\\ + Computer Science professor in Paris\\ + now working at INRIA\\ + /20 years/ of Free and Open Source Software\\ + \mbox{}\\ + \begin{minipage}[c]{0.18\linewidth} + \includegraphics[width=1.0\linewidth]{rdc} + \end{minipage} + \begin{minipage}[c]{0.8\linewidth} + \begin{description} +% \item[1998] \emph{Cybersnare} -- voice of French FOSS + \item[1999] \emph{DemoLinux} -- first live GNU/Linux distro +% \item[2004] \emph{EDOS} -- check package dependencies + \item[2007] \emph{Free Software Thematic Group}\\ + %\tiny{\url{http://www.systematic-paris-region.org/fr/logiciel-libre}}\\ + ~150 members ~40 projects ~200Me +% \item[2008] \emph{Mancoosi project} \url{www.mancoosi.org} + \item[2010] \emph{IRILL} \url{www.irill.org} + \item[2015] \emph{Software Heritage} at INRIA + \end{description} + \end{minipage} + +* Software is everywhere... :noexport: +** Software is everywhere :noexport: + :PROPERTIES: + :CUSTOM_ID: softwareispervasive + :END: + #+latex: \begin{center} + #+ATTR_LATEX: :width .6\linewidth +file:software-center.pdf + #+latex: \end{center} +#+BEAMER: \pause +*** + :PROPERTIES: + :BEAMER_env: block + :END: + \hfill Software embodies our collective *Knowledge* and *Cultural Heritage* +# why software source code is special (2 slides) +# +#+INCLUDE: "../../common/modules/source-code-different-long.org::#thesourcecode" :minlevel 2 +** Source code is essential :noexport: +#+INCLUDE: "../../common/modules/source-code-different-long.org::#softwareisdifferent" :only-contents t :minlevel 3 +** 50 years of software source code +#+INCLUDE: "../../common/modules/50years-source-code.org::#apollolinux" :only-contents t :minlevel 3 + +* Software Heritage +# +# One slide motivation + goals +# +#+INCLUDE: "../../common/modules/swh-goals-oneslide-vertical.org::#goals" :minlevel 2 +# + +# * Building the network +# Where we are today: endorsement +# ** Our principles \hfill iPres 2017 - \url{http://bit.ly/swhpaper} +# #+INCLUDE: "../../common/modules/principles-compact.org::#principlesstatus" :only-contents t :minlevel 3 +** Our principles \hfill iPres 2017 - \url{http://bit.ly/swhpaper} + :PROPERTIES: + :CUSTOM_ID: principlesstatus + :END: +#+latex: \begin{center} +#+ATTR_LATEX: :width .6\linewidth +file:SWH-as-foundation-slim.png +#+latex: \end{center} +#+latex: \footnotesize\vspace{-3mm} + # + # #+BEAMER: \pause + + #+BEAMER: \pause + #+latex: \centering + #+ATTR_LATEX: :width \extblockscale{.8\linewidth} + file:growth-latest.png +#+BEAMER: \pause +*** Open approach :B_block:BMCOL: + :PROPERTIES: + :BEAMER_col: 0.3 + :BEAMER_env: block + :END: + - open source + - transparency +*** In for the long haul :B_block:BMCOL: + :PROPERTIES: + :BEAMER_col: 0.3 + :BEAMER_env: block + :END: + - non profit, replication + - *intrinsic* identifiers +# +*** Exhaustive :B_block:BMCOL: + :PROPERTIES: + :BEAMER_col: 0.3 + :BEAMER_env: block + :END: + - *all* software + - open to *all* communities + +*** :B_ignoreheading: + :PROPERTIES: + :BEAMER_env: ignoreheading + :END: + +*** + +** Growing Support +#+INCLUDE: "../../common/modules/support-compact.org::#support" :only-contents t :minlevel 3 +* Relevance for research software publishing +** Zoom on the collection phase +# #+BEAMER: \begin{center}\includegraphics[width=\extblockscale{.8\textwidth}]{swh-dataflow.pdf}\end{center} +*** Much more than an archive! :B_picblock: + :PROPERTIES: + :BEAMER_env: picblock + :BEAMER_OPT: pic=swh-dataflow.pdf,width=.65\linewidth,leftpic=true + :END: + - GitHub + - Debian, GNU + - Gitorious, Google Code + - Bitbucket (WIP), FusionForge (WIP) + - /add your own plugins!/ + #+BEAMER: \pause +*** Important properties + - mission: *exhaustive* and *up to date* collection + - strategy: *automatic* harvesting + /deposit/ from /selected/ sources + #+BEAMER: \pause +*** + \hfill The /richest/ source code archive already, ... and growing daily! +** The research software (deposit) use case +*** Software deposit (beta test ongoing with HAL) :B_picblock: + :PROPERTIES: + :BEAMER_env: picblock + :BEAMER_OPT: pic=software_publication_state_diagram.png,width=.63\linewidth,leftpic=true + :END: +#+LATEX: \pause + *\hspace{1em}Generic mechanism:* + - SWORD based + - review process + - versioning + - /industry chimes in/ +#+BEAMER: \pause + *\hspace{1em}Variants:* + - skip deposit, just provide SWH hash and metadata + - ... +** Selected unique benefits \hfill there are more! +*** All features of Software Heritage /for free/ + - *intrinsic ID* (not DIOs), browse, download (now) + - metadata, licenses, provenance analysis (plagiarism detection), classification (wip) + - and many more (powerful connections with SE and Industry) +#+LATEX: \pause +*** Coverage and uniformity + - *one* archive for *all* domains (industry included) + - you can reference /any/ software, not just the deposited one\\ + \hfill /(thanks D. Katz for pointing this out)/ + - *git-compatible* identifiers greatly simplify workflows +#+LATEX: \pause +*** Sustainability \hfill ... ambassadors welcome! + \mbox{}\hfill /one/ infrastructure \hfill /independent/ non profit foundation \hfill /worldwide/ mirrors\hfill\mbox{} +# * Conclusion +** Come in, we're open! +#+latex: \begin{center} +#+ATTR_LATEX: :width \linewidth +file:SWH-logo.pdf +#+latex: \end{center} +#+latex: \begin{center} +#+latex: {\large \url{www.softwareheritage.org} \hspace{4em} \url{@swheritage}} +#+latex: \end{center} +*** Get involved + - sponsoring / partnership :: \hfill \url{sponsorship.softwareheritage.org} + - working groups, leads :: \hfill \url{wiki.softwareheritage.org} + - our own code :: \hfill \url{forge.softwareheritage.org} + - metadata :: \hfill RDA source code IG +* Appendix :B_appendix: + :PROPERTIES: + :BEAMER_env: appendix + :END: +* Intrinsic PID +** Our challenge in the PID arena +*** Long term + Identifiers must be there for the long term +*** No middle man + Identifiers must be meaningful even if resolvers go away +*** Integrity, not just naming + Identifier must ensure that the retrieved object is the intended one +*** Uniqueness by design + one name identifies a single object, and each object has only one name +** Exploring the PID landscape +*** A lot of options out there... + URL, URI, PURL, URN, ARK, DOI, ... +*** ... some are widely used + - articles + - data + - even software artefacts! +#+BEAMER: \pause +*** We can get no satisfaction + \hfill of all the key criteria +#+BEAMER: \pause +*** + \hfill we adopted something radically different \hfill +** Intrinsic identifiers in Software Heritage + # R. C. Merkle, A digital signature based on a conventional encryption + # function, Crypto '87 + #+BEAMER: \vspace{-3mm} +***** Merkle tree (R. C. Merkle, Crypto 1979) :B_picblock: + :PROPERTIES: + :BEAMER_opt: pic=merkle, leftpic=true, width=.5\linewidth + :BEAMER_env: picblock + :BEAMER_act: + :END: + Combination of + - tree + - hash function +***** Classical cryptographic construction + fast, parallel signature of large data structures, built-in deduplication +#+BEAMER: \pause + - satisfies all three criteria + - widely used in industry (e.g., Git, nix, blockchains, IPFS, ...) +** Back to basics: DIOs vs. IDOs +*** DIO (digital identifier of an object) + - digital identifiers for traditional (non digital) objects + - epistemic complications (manifestations, versions, locations, etc.) + - significant governance issues, ... +#+BEAMER: \pause +*** IDO (identifier of a digital object) + - (digital) identifier for digital objects + - much simpler to build/handle + - can (and must) be intrinsic +#+BEAMER: \pause +*** Separation of concerns + - yes, we \alert{need both} DIOs and IDOs + - no, we \alert{must not mistake} DIOs for IDOs (and viceversa) +#+BEAMER: \pause +** Working together +*** Example: links to /software source code/ in an article + Leveraging the Software Heritage universal archive: + - set of files :: \small\url{swh:1:tree:06741c8c37c5a384083082b99f4c5ad94cd0cd1f}\\ + id of tree object listing all the files in a project (at a given time) + - revision :: \url{swh:1:rev:7598fb94d59178d65bd8d2892c19356290f5d4e3}\\ + id of commit object which a tree and (a pointer to) the history +#+BEAMER: \pause + - metadata :: this /will/ involve some form of DIO + - and we get all the complications back +* Our role in the publication workflow +** Our role : handle /all/ the /software source code/ +*** At the end of the process + Explicit deposit, coordinated with the publisher + - store the /final/ source code (no garbage) + - store only public source code + - *N.B.:* no embargo or access control (yet) +*** During the review + Access to the largest available source code base + + provenance, plagiarism detection (for new code) + + metrics (for long standing projects) +#+BEAMER: \pause +*** Later on + - Support embargo/access control +* The Metadata challenge +** Collecting metadata for 60+ million projects +*** Landscape of Software Ontologies + #+latex: \begin{center} + #+ATTR_LATEX: :width .75\linewidth +file:metadata_landscape6.png + #+latex: \end{center} +*** It's the real world! + reconcile metadata from different origins, handle conflicts, synthesise missing information, classify (automatically) the projects, etc. +* Collection strategies +** All the source code + #+BEAMER: \begin{center}\includegraphics[width=\extblockscale{\linewidth}]{swh-collect-axes}\end{center} +** All the source code, strategies + #+BEAMER: \begin{center}\includegraphics[width=\extblockscale{\linewidth}]{swh-collect-strategies}\end{center} +** Online, open source code: automation overview + #+BEAMER: \begin{center}\includegraphics[width=\extblockscale{\linewidth}]{swh-automation}\end{center} diff --git a/talks-public/2017-12-07-ACM/Makefile b/talks-public/2017-12-07-ACM/Makefile new file mode 100644 index 0000000..68fbee7 --- /dev/null +++ b/talks-public/2017-12-07-ACM/Makefile @@ -0,0 +1 @@ +include ../Makefile.slides