diff --git a/common/modules/swh-scientific-publishing.org b/common/modules/swh-scientific-publishing.org index 5823332..467850b 100644 --- a/common/modules/swh-scientific-publishing.org +++ b/common/modules/swh-scientific-publishing.org @@ -1,61 +1,75 @@ #+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt) #+INCLUDE: "prelude.org" :minlevel 1 * Software Heritage for Scientific Publishing :PROPERTIES: :CUSTOM_ID: main :END: ** The research software (deposit) use case :PROPERTIES: :CUSTOM_ID: hal :END: *** Deposit software in HAL \hfill \url{http://bit.ly/swhdeposithalen} :B_picblock: :PROPERTIES: :BEAMER_env: picblock :BEAMER_OPT: pic=deposit-communication.png,width=.63\linewidth,leftpic=true :END: #+LATEX: \pause *\hspace{1em}Generic mechanism:* - SWORD based - review process - versioning # - /industry chimes in/ (details on demand) #+BEAMER: \pause *\hspace{1em} How to do it:* - - today: deposit .zip file - - tomorrow: + - *today*: deposit .zip or .tar.gz file + - *tomorrow*: - /provide SWH id and metadata/ - /provide SWH id, metadata is extracted/ - ... -*** Feedback is welcome :noexport: - \hfill drop me a line if you want to join the test group -** Coming soon: access using intrinsic IDs +** Identifying and retrieving source code +*** Intrinsic identifiers \hfill (spec: http://bit.ly/swhpids) + - provide *integrity* guarantees + - *all software and VCS* (not just git or GitHub) + - use for identifying *a precise version* of source code + - learn more in the forthcoming iPres 2018 paper + e.g: *swh:1:cnt:52dba04fcffb3b7c0206b45a3f0640c841a2c459* +#+BEAMER: \pause +*** "Wayback-machine-style" identifiers + - point to software *origins* + - expose the SWH crawling history + - use *when no precise version is known* +** +#+BEAMER: \vfill\begin{center}\Huge Demo Time!\end{center}\vfill + - example deposits in HAL + - example use of https://archive.softwareheritage.org +** Access using intrinsic IDs :PROPERTIES: :CUSTOM_ID: codereferences :END: -*** Getting close to it ... \hfill click in the paper and view the source - "Our *Parmap.parmap* and *Parmap.parfold* functions may be used to seamlessly ..." +*** Click on the links in the paper and view the source code! + "Our *[[https://archive.softwareheritage.org/swh:1:cnt:52dba04fcffb3b7c0206b45a3f0640c841a2c459;origin=https://github.com/rdicosmo/parmap;lines=90-101/][Parmap.parmap]]* and *[[https://archive.softwareheritage.org/swh:1:cnt:52dba04fcffb3b7c0206b45a3f0640c841a2c459;origin=https://github.com/rdicosmo/parmap;lines=60-73/][Parmap.parfold]]* functions may be used to seamlessly ..." # \mbox{} \hfill https://doi.org/10.1016/j.procs.2012.04.202 # replace OCaml List map and fold standard functions preserving their full # functional semantics..." -*** :B_ignoreheading: +*** :B_ignoreheading: :PROPERTIES: :BEAMER_env: ignoreheading :END: - #+BEAMER: \begin{center}\includegraphics[width=\extblockscale{1.4\textwidth}]{Parmap-browse-contextless-path.png}\end{center} + #+BEAMER: \begin{center}\includegraphics[width=\extblockscale{1.3\textwidth}]{Parmap-swhid-lines.png}\end{center} ** Selected unique benefits \hfill there are more! :PROPERTIES: :CUSTOM_ID: benefits :END: *** All features of Software Heritage /for free/ - *intrinsic IDs* (integrity, not just DIOs!), browse, download (now) - metadata, licenses, provenance analysis (plagiarism detection), classification (wip) - and many more (powerful connections with SE and Industry) #+LATEX: \pause *** Coverage and uniformity - *one* archive for *all* domains (industry included) - you can reference /any/ software, not just the deposited one\\ \hfill /(thanks D. Katz for pointing this out)/ - *git-compatible* identifiers greatly simplify workflows #+LATEX: \pause *** Sustainability \hfill ... doors are open! \mbox{}\hfill /one/ infrastructure \hfill /independent/ non profit foundation \hfill /worldwide/ mirrors\hfill\mbox{} diff --git a/talks-public/2018-07-11-SWDepositSSI/2018-07-11-SWDepositSSI.org b/talks-public/2018-07-11-SWDepositSSI/2018-07-11-SWDepositSSI.org new file mode 100644 index 0000000..b4623bb --- /dev/null +++ b/talks-public/2018-07-11-SWDepositSSI/2018-07-11-SWDepositSSI.org @@ -0,0 +1,298 @@ +#+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt) +#+TITLE: Software Heritage +#+SUBTITLE: The universal source code archive +#+BEAMER_HEADER: \title{Software Heritage} +#+AUTHOR: *Roberto Di Cosmo*, Morane Gruenpeter +#+EMAIL: roberto@dicosmo.org +#+BEAMER_HEADER: \date[July 11th, 2018]{July 11th, 2018\\[-1em]} +#+BEAMER_HEADER: \title[www.softwareheritage.org]{Software Heritage} +#+BEAMER_HEADER: \author[{\bf Roberto Di Cosmo}, Morane Gruenpeter]{Roberto Di Cosmo\\[1em]% +#+BEAMER_HEADER: Director, Software Heritage\\Computer Science full professor, Inria and IRIF\\[-1em]} +# #+BEAMER_HEADER: \setbeameroption{show notes on second screen} +#+BEAMER_HEADER: \setbeameroption{hide notes} +#+KEYWORDS: software heritage legacy preservation knowledge mankind technology + +# +# prelude.org contains all the information needed to export the main beamer latex source +# use prelude-toc.org to get the table of contents +# + +#+INCLUDE: "../../common/modules/prelude-toc.org" :minlevel 1 + + +#+INCLUDE: "../../common/modules/169.org" + +# +LaTeX_CLASS_OPTIONS: [aspectratio=169,handout,xcolor=table] + + +# +# If you want to change the title logo it's here +# +# +BEAMER_HEADER: \titlegraphic{\includegraphics[width=0.7\textwidth]{SWH-logo}} + +# aspect ratio can be changed, but the slides need to be adapted +# - compute a "resizing factor" for the images (macro for picblocks?) +# +# set the background image +# +# https://pacoup.com/2011/06/12/list-of-true-169-resolutions/ +# +#+BEAMER_HEADER: \pgfdeclareimage[height=90mm,width=160mm]{bgd}{swh-world-169.png} +#+BEAMER_HEADER: \setbeamertemplate{background}{\pgfuseimage{bgd}} + +#+LATEX_HEADER: \usepackage{supertabular} +#+LATEX_HEADER: \newcommand{\sponsor}[2]{{\bf #1}, #2} +#+LATEX_HEADER: \newcommand{\teamster}[2]{{\textcolor{red}{#1}}, #2} +* Introductions :noexport: +** Short Bio + # +BEAMER: \raisebox{-.5\height}{\includegraphics[width=.28\linewidth]{rdc}} + Roberto Di Cosmo\\ + Computer Science professor in Paris\\ + now working at INRIA\\ + /20 years/ of Free and Open Source Software\\ + \mbox{}\\ + \begin{minipage}[c]{0.18\linewidth} + \includegraphics[width=1.0\linewidth]{rdc} + \end{minipage} + \begin{minipage}[c]{0.8\linewidth} + \begin{description} +% \item[1998] \emph{Cybersnare} -- voice of French FOSS + \item[1999] \emph{DemoLinux} -- first live GNU/Linux distro +% \item[2004] \emph{EDOS} -- check package dependencies + \item[2007] \emph{Free Software Thematic Group}\\ + %\tiny{\url{http://www.systematic-paris-region.org/fr/logiciel-libre}}\\ + ~150 members ~40 projects ~200Me +% \item[2008] \emph{Mancoosi project} \url{www.mancoosi.org} + \item[2010] \emph{IRILL} \url{www.irill.org} + \item[2015] \emph{Software Heritage} at INRIA + \end{description} + \end{minipage} + +* Software is everywhere... :noexport: +** Software is everywhere :noexport: + :PROPERTIES: + :CUSTOM_ID: softwareispervasive + :END: + #+latex: \begin{center} + #+ATTR_LATEX: :width .6\linewidth +file:software-center.pdf + #+latex: \end{center} +#+BEAMER: \pause +*** + :PROPERTIES: + :BEAMER_env: block + :END: + \hfill Software embodies our collective *Knowledge* and *Cultural Heritage* +# why software source code is special (2 slides) +# +#+INCLUDE: "../../common/modules/source-code-different-long.org::#thesourcecode" :minlevel 2 +** Source code is essential :noexport: +#+INCLUDE: "../../common/modules/source-code-different-long.org::#softwareisdifferent" :only-contents t :minlevel 3 +** 50 years of software source code +#+INCLUDE: "../../common/modules/50years-source-code.org::#apollolinux" :only-contents t :minlevel 3 + +* Software Heritage +# +# One slide motivation + goals +# +#+INCLUDE: "../../common/modules/swh-goals-oneslide-vertical.org::#goals" :minlevel 2 +# + +# * Building the network +# Where we are today: endorsement +# ** Our principles \hfill iPres 2017 - \url{http://bit.ly/swhpaper} +# #+INCLUDE: "../../common/modules/principles-compact.org::#principlesstatus" :only-contents t :minlevel 3 +** Our principles \hfill iPres 2017 - \url{http://bit.ly/swhpaper} + :PROPERTIES: + :CUSTOM_ID: principlesstatus + :END: +#+latex: \begin{center} +#+ATTR_LATEX: :width .6\linewidth +file:SWH-as-foundation-slim.png +#+latex: \end{center} +#+latex: \footnotesize\vspace{-3mm} + # + # #+BEAMER: \pause + + #+BEAMER: \pause + #+latex: \centering + #+ATTR_LATEX: :width \extblockscale{.8\linewidth} + file:growth.png +#+BEAMER: \pause +*** Open approach :B_block:BMCOL: + :PROPERTIES: + :BEAMER_col: 0.3 + :BEAMER_env: block + :END: + - open source + - transparency +*** In for the long haul :B_block:BMCOL: + :PROPERTIES: + :BEAMER_col: 0.3 + :BEAMER_env: block + :END: + - non profit, replication + - *intrinsic* identifiers +# +*** Exhaustive :B_block:BMCOL: + :PROPERTIES: + :BEAMER_col: 0.3 + :BEAMER_env: block + :END: + - *all* software + - open to *all* communities + +*** :B_ignoreheading: + :PROPERTIES: + :BEAMER_env: ignoreheading + :END: + +*** + +** Growing Support +#+INCLUDE: "../../common/modules/support-compact.org::#support" :only-contents t :minlevel 3 +* Relevance for research software publishing +** Zoom on the collection phase +*** Much more than an archive! :B_picblock: + :PROPERTIES: + :BEAMER_env: picblock + :BEAMER_OPT: pic=swh-dataflow.pdf,width=.65\linewidth,leftpic=true + :END: + - GitHub + - Debian, GNU + - Gitorious, Google Code + - WIP: Bitbucket, FusionForge, GitLab.com + - /add your own plugins!/ + #+BEAMER: \pause +*** Important properties + - mission: *exhaustive* and *up to date* collection of *source code*, /specifically/ + - strategy: *automatic* harvesting + /deposit/ from /selected/ sources + #+BEAMER: \pause +*** + \hfill The /richest/ source code archive already, ... and growing daily! +#+INCLUDE: "../../common/modules/swh-scientific-publishing.org::#main" :only-contents t :minlevel 2 +** Breaking news +*** Operational adoption + - June 7th :: swMath.org points into SWH for the source code + \hfill see "Code" link in, e.g. http://swmath.org/software/7116 + - ongoing :: OpenAire detects source code links in articles, resolves them to SWH + - Spetember :: HAL opens the software deposit doors to all +*** Institutional adoption + - July 4th :: Software Heritage is part of the french National Plan for Open Science +#+ATTR_LATEX: :width \extblockscale{.8\linewidth} +file:swhplanopenscience.png +** Come in, we're open! +#+latex: \begin{center} +#+ATTR_LATEX: :width .7\linewidth +file:SWH-logo.pdf +#+latex: \end{center} +#+latex: \begin{center} +#+latex: {\large \url{www.softwareheritage.org} \hspace{4em} \url{@swheritage}}\\ +#+latex: \mbox{}\hfill Talks, slides: {\large \url{annex.softwareheritage.org/public/talks}} +#+latex: \end{center} +*** Get involved + - sponsoring / partnership :: \hfill \url{sponsorship.softwareheritage.org} + - donations :: \hfill \url{softwareheritage.org/donate} + - our own code :: \hfill \url{forge.softwareheritage.org} + - metadata :: \hfill RDA source code IG + - identifiers :: \hfill RDA source code identification WG +* Appendix :B_appendix: + :PROPERTIES: + :BEAMER_env: appendix + :END: +* Intrinsic PID +** Our challenge in the PID arena +*** Long term + Identifiers must be there for the long term +*** No middle man + Identifiers must be meaningful even if resolvers go away +*** Integrity, not just naming + Identifier must ensure that the retrieved object is the intended one +*** Uniqueness by design + one name identifies a single object, and each object has only one name +** Exploring the PID landscape +*** A lot of options out there... + URL, URI, PURL, URN, ARK, DOI, ... +*** ... some are widely used + - articles + - data + - even software artefacts! +#+BEAMER: \pause +*** We can get no satisfaction + \hfill of all the key criteria +#+BEAMER: \pause +*** + \hfill we adopted something radically different \hfill +** Intrinsic identifiers in Software Heritage + # R. C. Merkle, A digital signature based on a conventional encryption + # function, Crypto '87 + #+BEAMER: \vspace{-3mm} +***** Merkle tree (R. C. Merkle, Crypto 1979) :B_picblock: + :PROPERTIES: + :BEAMER_opt: pic=merkle, leftpic=true, width=.5\linewidth + :BEAMER_env: picblock + :BEAMER_act: + :END: + Combination of + - tree + - hash function +***** Classical cryptographic construction + fast, parallel signature of large data structures, built-in deduplication +#+BEAMER: \pause + - satisfies all three criteria + - widely used in industry (e.g., Git, nix, blockchains, IPFS, ...) +** Back to basics: DIOs vs. IDOs +*** DIO (digital identifier of an object) + - digital identifiers for traditional (non digital) objects + - epistemic complications (manifestations, versions, locations, etc.) + - significant governance issues, ... +#+BEAMER: \pause +*** IDO (identifier of a digital object) + - (digital) identifier for digital objects + - much simpler to build/handle + - can (and must) be intrinsic +#+BEAMER: \pause +*** Separation of concerns + - yes, we \alert{need both} DIOs and IDOs + - no, we \alert{must not mistake} DIOs for IDOs (and viceversa) +#+BEAMER: \pause +** Working together +*** Example: links to /software source code/ in an article + Leveraging the Software Heritage universal archive: + - set of files :: \small\url{swh:1:tree:06741c8c37c5a384083082b99f4c5ad94cd0cd1f}\\ + id of tree object listing all the files in a project (at a given time) + - revision :: \url{swh:1:rev:7598fb94d59178d65bd8d2892c19356290f5d4e3}\\ + id of commit object which a tree and (a pointer to) the history +#+BEAMER: \pause + - metadata :: this /will/ involve some form of DIO + - and we get all the complications back +* Our role in the publication workflow +** Our role : handle /all/ the /software source code/ +*** At the end of the process + Explicit deposit, coordinated with the publisher + - store the /final/ source code (no garbage) + - store only public source code + - *N.B.:* no embargo or access control (yet) +*** During the review + Access to the largest available source code base + + provenance, plagiarism detection (for new code) + + metrics (for long standing projects) +#+BEAMER: \pause +*** Later on + - Support embargo/access control +* The Metadata challenge +** Collecting metadata for 60+ million projects +*** Landscape of Software Ontologies + #+latex: \begin{center} + #+ATTR_LATEX: :width .75\linewidth +file:metadata_landscape6.png + #+latex: \end{center} +*** It's the real world! + reconcile metadata from different origins, handle conflicts, synthesise missing information, classify (automatically) the projects, etc. +* Collection strategies +** All the source code + #+BEAMER: \begin{center}\includegraphics[width=\extblockscale{\linewidth}]{swh-collect-axes}\end{center} +** All the source code, strategies + #+BEAMER: \begin{center}\includegraphics[width=\extblockscale{\linewidth}]{swh-collect-strategies}\end{center} +** Online, open source code: automation overview + #+BEAMER: \begin{center}\includegraphics[width=\extblockscale{\linewidth}]{swh-automation}\end{center} diff --git a/talks-public/2018-07-11-SWDepositSSI/METADATA b/talks-public/2018-07-11-SWDepositSSI/METADATA new file mode 100644 index 0000000..96b588a --- /dev/null +++ b/talks-public/2018-07-11-SWDepositSSI/METADATA @@ -0,0 +1,18 @@ +Title: + Software Heritage: the universal source code archive + +Abstract: + + Software source code is everywhere, with tens of millions of developers + worldwide, and it is of paramount importance to provide a universal + archive and reference system for all its applications. Software Heritage + has taken over this task. + + Since software source code has been recently recognised as an important + asset also in the field of scientific research, complementing publications + and research data, Software Heritage is now providing the infrastructure + for depositing and referencing software source code, in collaboration with + national and international open access portals. + + In this short presentation we will provide an overview of the available + functionalities, and an update on the latest collaborations. diff --git a/talks-public/2018-07-11-SWDepositSSI/Makefile b/talks-public/2018-07-11-SWDepositSSI/Makefile new file mode 100644 index 0000000..68fbee7 --- /dev/null +++ b/talks-public/2018-07-11-SWDepositSSI/Makefile @@ -0,0 +1 @@ +include ../Makefile.slides