diff --git a/common/modules/last-slide-references.org b/common/modules/last-slide-references.org new file mode 100644 index 0000000..b98577e --- /dev/null +++ b/common/modules/last-slide-references.org @@ -0,0 +1,38 @@ +#+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt) +#+INCLUDE: "prelude.org" :minlevel 1 + +* References + :PROPERTIES: + :CUSTOM_ID: main + :END: +** Come in, we're open ! + :PROPERTIES: + :CUSTOM_ID: references-identifiers + :END: +*** + \url{www.softwareheritage.org} --- learn more \\ + \url{save.softwareheritage.org} --- save code now \\ + \url{www.softwareheritage.org/swhap} --- legacy software acquisition process \\ + \url{forge.softwareheritage.org} --- our own code + #+BEAMER: \vspace{-1mm} \flushright {\Huge Questions?} \vfill + +*** References :B_block: + :PROPERTIES: + :BEAMER_env: block + :END: + #+BEGIN_EXPORT latex + \begin{thebibliography}{Foo Bar, 1969} + \footnotesize + \bibitem{Abramatic2018} Jean-François Abramatic, Roberto Di Cosmo, Stefano Zacchiroli\newblock + \emph{Building the Universal Archive of Source Code},\\ + Communications of the ACM, October 2018 + \href{https://doi.org/10.1145/3183558}{(10.1145/3183558)} + \bibitem{DiCosmo2019} Roberto Di Cosmo, Morane Gruenpeter, Stefano Zacchiroli\newblock + \emph{Referencing Source Code Artifacts: a Separate Concern in Software Citation},\\ + Computing in Science and Engineering, IEEE, pp.1-9. \href{https://dx.doi.org/10.1109/MCSE.2019.2963148}{(10.1109/MCSE.2019.2963148)} + \href{https://hal.archives-ouvertes.fr/hal-02446202}{(hal-02446202)} + \end{thebibliography} + #+END_EXPORT + + + diff --git a/common/modules/swh-scientific-preservation.org b/common/modules/swh-scientific-preservation.org index 4695b2a..346ee1c 100644 --- a/common/modules/swh-scientific-preservation.org +++ b/common/modules/swh-scientific-preservation.org @@ -1,65 +1,73 @@ #+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt) #+INCLUDE: "prelude.org" :minlevel 1 * Software Heritage for Scientific Publishing :PROPERTIES: :CUSTOM_ID: main :END: ** Save and reference research software \hfill \href{https://www.softwareheritage.org/save-and-reference-research-software/}{SWH guidelines} :PROPERTIES: - :CUSTOM_ID: save research software + :CUSTOM_ID: prepare :END: *** Prepare your public repository with: :B_picblock: - README, LICENSE, AUTHORS & codemeta.json files #+LATEX: \pause *** What's a good README \hfill extracted from \href{https://www.tldp.org/HOWTO/html_single/Software-Release-Practice-HOWTO/}{Eric Steven Raymond} and \href{https://www.makeareadme.com/}{Make a README} /MUST/ include: - **Name** and a **description** of the software. + #+BEAMER: \pause /SHOULD/ include: - how to **run** and **use** the source code - build **environment**, installation, requirements + #+BEAMER: \pause /CAN/ include: - project **website** or **documentation** pointer and recent **news** - **visuals** ** Save and reference research software \hfill \href{https://www.softwareheritage.org/save-and-reference-research-software/}{SWH guidelines} - +:PROPERTIES: +:CUSTOM_ID: save +:END: *** Save code now on \hfill \url{https://archive.softwareheritage.org/save/} - git, svn or mercurial - intrinsic metadata files - complete history #+latex: \begin{center} #+ATTR_LATEX: :width \linewidth file:webui-save-code-now.png #+latex: \end{center} ** Save and reference research software \hfill \href{https://www.softwareheritage.org/save-and-reference-research-software/}{SWH guidelines} - +:PROPERTIES: +:CUSTOM_ID: reference +:END: *** Choose the granularity level for the reference - code fragment, specific version or full repository #+BEGIN_EXPORT latex \small \begin{tcolorbox} \href{https://archive.softwareheritage.org/swh:1:cnt:c60366bc03936eede6509b23307321faf1035e23;origin=https://github.com/sagemath/sage;lines=473-537} {swh:1:{\bf cnt}:c60366bc03936eede6509b23307321faf1035e23;lines=473-537} \end{tcolorbox} \hfill James McCaffrey's {\bf algorithm} in sageMath + \pause \begin{tcolorbox} \href{https://archive.softwareheritage.org/swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f} {swh:1:{\bf rel}:22ece559cc7cc2364edc5e5593d63ae8bd229f9f} \end{tcolorbox} \hfill {\bf release} 2.3.0 of Darktable, dated 24 December 2016 + \pause \begin{tcolorbox} \href{https://archive.softwareheritage.org/swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453} {swh:1:{\bf snp}:c7c108084bc0bf3d81436bf980b46e98bd338453} \end{tcolorbox} \hfill a {\bf snapshot} of the entire Darktable repository (4 May 2017, GitHub) #+END_EXPORT diff --git a/talks-public/2019-11-26-UPMC/2019-11-26-UPMC.org b/talks-public/2019-11-26-UPMC/2019-11-26-UPMC.org index 6ea0873..9f26cb4 100644 --- a/talks-public/2019-11-26-UPMC/2019-11-26-UPMC.org +++ b/talks-public/2019-11-26-UPMC/2019-11-26-UPMC.org @@ -1,280 +1,280 @@ #+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt) #+TITLE: Software Heritage #+SUBTITLE: The universal source code archive #+BEAMER_HEADER: \title{Software Heritage} #+AUTHOR: Morane Gruenpeter #+EMAIL: morane@softwareheritage.org #+BEAMER_HEADER: \date[November 26th, 2019]{November 26th, 2019\\[-1em]} #+BEAMER_HEADER: \title[www.softwareheritage.org]{Software Heritage} #+BEAMER_HEADER: \institute[]{\\\href{mailto:morane@softwareheritage.org}{\tt morane@softwareheritage.org}} #+BEAMER_HEADER: \author[Morane Gruenpeter]{ Morane Gruenpeter\\[1em]% #+BEAMER_HEADER: Metadata specialist, Software Heritage\\Crossminer, University of L'Aquila\\[-1em]} # #+BEAMER_HEADER: \setbeameroption{show notes on second screen} #+BEAMER_HEADER: \setbeameroption{hide notes} #+KEYWORDS: software heritage legacy preservation knowledge mankind technology deposit # # prelude.org contains all the information needed to export the main beamer latex source # use prelude-toc.org to get the table of contents # #+INCLUDE: "../../common/modules/prelude-toc.org" :minlevel 1 #+INCLUDE: "../../common/modules/169.org" # +LaTeX_CLASS_OPTIONS: [aspectratio=169,handout,xcolor=table] #+LATEX_HEADER: \usepackage{bbding} #+LATEX_HEADER: \usepackage{tcolorbox} #+LATEX_HEADER: \DeclareUnicodeCharacter{66D}{\FiveStar} # # If you want to change the title logo it's here # # +BEAMER_HEADER: \titlegraphic{\includegraphics[width=0.7\textwidth]{SWH-logo}} # aspect ratio can be changed, but the slides need to be adapted # - compute a "resizing factor" for the images (macro for picblocks?) # # set the background image # # https://pacoup.com/2011/06/12/list-of-true-169-resolutions/ # #+BEAMER_HEADER: \pgfdeclareimage[height=90mm,width=160mm]{bgd}{swh-world-169.png} #+BEAMER_HEADER: \setbeamertemplate{background}{\pgfuseimage{bgd}} #+LATEX_HEADER: \usepackage{supertabular} #+LATEX_HEADER: \newcommand{\sponsor}[2]{{\bf #1}, #2} #+LATEX_HEADER: \newcommand{\teamster}[2]{{\textcolor{red}{#1}}, #2} * Introduction # # One slide motivation + goals #+INCLUDE: "../../common/modules/swh-goals-oneslide-vertical.org::#goals" :minlevel 2 # # Where we are today: endorsement # ** Our principles \hfill iPres 2017 - \url{http://bit.ly/swhpaper} # #+INCLUDE: "../../common/modules/principles-compact.org::#principlesstatus" :only-contents t :minlevel 3 ** Our principles \hfill iPres 2017 - \url{http://bit.ly/swhpaper} :PROPERTIES: :CUSTOM_ID: principlesstatus :END: #+latex: \begin{center} #+ATTR_LATEX: :width .8\linewidth file:SWH-as-foundation-slim.png #+latex: \end{center} #+latex: \footnotesize\vspace{-3mm} # # #+BEAMER: \pause #+BEAMER: \pause #+latex: \centering #+ATTR_LATEX: :width \extblockscale{.8\linewidth} file:2019-09-archive-growth.png ** Growing Support #+INCLUDE: "../../common/modules/support-compact.org::#support" :only-contents t :minlevel 3 * The knowledge is in the source code ! ** The knowledge is in the source code! #+INCLUDE: "../../common/modules/source-code-different-short.org::#thesourcecode" :only-contents t :minlevel 3 ** Source code is /special/ *** /Executable/ and /human readable/ knowledge \hfill copyright law /“Programs must be written for people to read, and only incidentally for machines to execute.”/\\ \hfill Harold Abelson #+BEAMER: \pause *** Software /evolves/ over time - projects may last decades - the /development history/ is key to its /understanding/ #+BEAMER: \pause *** Complexity :B_picblock: :PROPERTIES: :BEAMER_env: picblock :BEAMER_OPT: pic=python3-matplotlib.pdf, width=.6\linewidth :END: - /millions/ of lines of code - large /web of dependencies/ + easy to break, difficult to maintain - sophisticated /developer communities/ * SWH data model and persistent identifiers # under the hood: automation and storage, the archive in pictures #+INCLUDE: "../../common/modules/under-the-hood-pictures.org::#main" :only-contents t :minlevel 2 ** Under the hood: identifying billions of objects \hfill \url{https://bit.ly/2wOOmyV} #+latex: \begin{center} #+ATTR_LATEX: :width .85\linewidth file:swh-merkle-dag-wide.pdf #+latex: \end{center} #+latex: \footnotesize\vspace{-3mm} ** Our challenges in the PID landscape :PROPERTIES: :CUSTOM_ID: challenges :END: *** Typical properties of systems of identifiers \hfill uniqueness, non ambiguity, persistence, abstraction (opacity) #+BEAMER: \pause *** Key needed properties from our use cases - gratis :: identifiers are free (billions of objects) - integrity :: the associated object cannot be changed (sw dev, /reproducibility/) - no middle man :: no central authority is needed (sw dev, /reproducibility/) #+BEAMER: \pause *** \hfill we could not find systems with both *integrity* and *no middle man* ! # metadata challenge- questions about a software entity and where to find metadata (one slide) #+INCLUDE: "../../common/modules/identifiers-arena.org::#main" :only-contents t :minlevel 2 * The missing piece- the Metadata # metadata challenge- questions about a software entity and where to find metadata (one slide) #+INCLUDE: "../../common/modules/metadata-challenge.org::#main" :only-contents t :minlevel 2 ** The Software Ontology /Touchstone/ *** Software Citation Principles \tiny ( FORCE11's 2015 conference and WG) :B_block: :PROPERTIES: :BEAMER_env: block :BEAMER_opt: :END: - *Importance* : first class citizen in the scholarly ecosystem - *Credit and attribution* : authors, maintainer - *Unique identification*: points to a unique, specific software version (DOI, Git SHA1 hash, etc..) - *Persistence* : identification beyond the lifespan of the software (swh-id) - *Accessibility*: url, publisher - *Specificity* : version, environment # metadata landscape (one decomposed slide) #+INCLUDE: "../../common/modules/metadata-landscape.org::#main" :only-contents t :minlevel 2 ** Software Metadata Terms *** identify :B_block:BMCOL: :PROPERTIES: :BEAMER_col: 0.2 :BEAMER_env: block :END: - identifier - title - authors - version - type - origin source #+BEAMER: \pause *** execute :B_block:BMCOL: :PROPERTIES: :BEAMER_opt: :BEAMER_env: block :BEAMER_col: 0.2 :END: - link to a compiled version - repository - compiler - environment - examples #+BEAMER: \pause *** classify :B_block:BMCOL: :PROPERTIES: :BEAMER_col: 0.2 :BEAMER_env: block :END: - description - keywords - in/out data - references - algorithms - docs url #+BEAMER: \pause *** administrate :B_block:BMCOL: :PROPERTIES: :BEAMER_col: 0.2 :BEAMER_env: block :END: - contact - authorship - funders - license - editor (publisher) - dates - status ** Much more complex than it seems *** Software is complex - Structure :: monolithic/composite; self-contained/external dependencies - Lifetime :: one-shot/long term - Community :: one man/one team/distributed community - Authorship :: complex set of roles - Authority :: institutions/organizations/communities/single person #+BEAMER: \pause *** Various granularities - Exact status of the source code :: for reproducibility, e.g. #+latex: \emph{``you can find at \href{https://archive.softwareheritage.org/swh:1:cnt:cdf19c4487c43c76f3612557d4dc61f9131790a4;lines=146-187/}{swh:1:cnt:cdf19c4487c43c76f3612557d4dc61f9131790a4;lines=146-187} the core algorithm used in this article''} - (Major) release :: \emph{``This functionality is available in OCaml version 4''} - Project :: \emph{``Inria has created OCaml and Scikit-Learn''}. * Software Source code as a research output # reproducibility and scientific knowledge pillars (one slide) #+INCLUDE: "../../common/modules/swh-scientific-reproducibility.org::#main" :only-contents t :minlevel 2 # # scientific software (deposit) use-case (one slide) #+INCLUDE: "../../common/modules/swh-scientific-deposit.org::#main" :only-contents t :minlevel 2 -# scientific software (save code now) use-case (one slide) +# scientific software (save code now) use-case (three slides) #+INCLUDE: "../../common/modules/swh-scientific-preservation.org::#main" :only-contents t :minlevel 2 * Conclusion ** Research Software Engineer tips *** Use a forge for your academic and personal projects \hfill Github, Gitlab or Bitbucket are the best way to create your *source code cv* #+BEAMER: \pause *** Put in your projects metadata files \hfill *README*, *LICENSE*, *AUTHORS* and *codemeta.json* to describe your project #+BEAMER: \pause *** Archive your projects on SWH \hfill Use the *Save Code Now* feature #+BEAMER: \pause *** Contribute to other projects \hfill When you contribute you learn how to *read code* #+BEAMER: \pause *** Ask \hfill Don't be afraid to ask on an *issue*, *mailing list* or *irc channel* (or your teachers) ** Come in, we're open! #+BEGIN_EXPORT latex \begin{center} \includegraphics[width=.5\linewidth]{SWH-logo.pdf} \end{center} \begin{center} \vfill {\Large Thank you! Any questions?} \end{center} #+END_EXPORT *** Join us on https://forge.softwareheritage.org/ :B_block: :PROPERTIES: :BEAMER_env: block :END: #+BEGIN_EXPORT latex \begin{thebibliography}{Foo Bar, 1969} \footnotesize \bibitem{Abramatic2018} Jean-François Abramatic, Roberto Di Cosmo, Stefano Zacchiroli\newblock \emph{Building the Universal Archive of Source Code}, Communications of the ACM, October 2018 \bibitem{DiCosmo2018} Roberto Di Cosmo, Morane Gruenpeter, Stefano Zacchiroli\newblock \emph{Identifiers for Digital Objects: the Case of Software Source Code Preservation}, iPRES 2018: Intl. Conf. on Digital Preservation \end{thebibliography} #+END_EXPORT *** contact: morane@softwareheritage.org diff --git a/talks-public/2020-01-29-Pidapalooza/2020-01-29-Pidapalooza.org b/talks-public/2020-01-29-Pidapalooza/2020-01-29-Pidapalooza.org index e64b677..d3e6629 100644 --- a/talks-public/2020-01-29-Pidapalooza/2020-01-29-Pidapalooza.org +++ b/talks-public/2020-01-29-Pidapalooza/2020-01-29-Pidapalooza.org @@ -1,302 +1,302 @@ #+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt) #+TITLE: The SWH-ID #+SUBTITLE: A digital fingerprint identifying software source code #+AUTHOR: Roberto Di Cosmo #+EMAIL: roberto@dicosmo.org @rdicosmo @swheritage #+BEAMER_HEADER: \date[January 29th, 2020]{January 29th, 2020\\[-1em]} #+BEAMER_HEADER: \title[The SWH-ID]{The SWH-ID} #+BEAMER_HEADER: \author[Roberto Di Cosmo, Morane Gruenpeter]{Roberto Di Cosmo, Morane Gruenpeter\\[1em]} #+KEYWORDS: software heritage legacy preservation knowledge mankind technology #+LATEX_HEADER: \usepackage{tcolorbox} #+LATEX_HEADER: \definecolor{links}{HTML}{2A1B81} #+LATEX_HEADER: \hypersetup{colorlinks,linkcolor=,urlcolor=links} # # prelude.org contains all the information needed to export the main beamer latex source # use prelude-toc.org to get the table of contents # #+INCLUDE: "../../common/modules/prelude-toc.org" :minlevel 1 #+INCLUDE: "../../common/modules/169.org" # +LaTeX_CLASS_OPTIONS: [aspectratio=169,handout,xcolor=table] #+LATEX_HEADER: \usepackage{bbding} #+LATEX_HEADER: \usepackage{tcolorbox} #+LATEX_HEADER: \DeclareUnicodeCharacter{66D}{\FiveStar} # # If you want to change the title logo it's here # # +BEAMER_HEADER: \titlegraphic{\includegraphics[width=0.7\textwidth]{SWH-logo}} # aspect ratio can be changed, but the slides need to be adapted # - compute a "resizing factor" for the images (macro for picblocks?) # # set the background image # # https://pacoup.com/2011/06/12/list-of-true-169-resolutions/ # #+BEAMER_HEADER: \pgfdeclareimage[height=90mm,width=160mm]{bgd}{swh-world-169.png} #+BEAMER_HEADER: \setbeamertemplate{background}{\pgfuseimage{bgd}} #+LATEX: \addtocounter{framenumber}{-1} * Software is our heritage ** Source Code: /executable/ and /human readable/ knowledge #+INCLUDE: "../../common/modules/source-code-different-short.org::#thesourcecode" :only-contents t :minlevel 3 #+Beamer: \pause *** Len Shustek, CHM\hfill /“Source code provides a view into the mind of the designer.”/ ** The Paris call: Software Source Code is part of our Heritage #+INCLUDE: "../../common/modules/paris-call-2019.org::#pariscall2019" :only-contents t :minlevel 3 * Preserving all software source code #+INCLUDE: "../../common/modules/swh-goals-oneslide-vertical.org::#goals" :minlevel 2 ** A principled infrastructure \hfill \url{http://bit.ly/swhpaper} :noexport: #+latex: \begin{center} #+ATTR_LATEX: :width 0.5\linewidth file:SWH-as-foundation-slim.png #+latex: \end{center} #+BEAMER: \pause #+latex: \centering #+ATTR_LATEX: :width \extblockscale{.7\linewidth} file:growth.png #+BEAMER: \pause *** Technology :PROPERTIES: :BEAMER_col: 0.34 :BEAMER_env: block :END: - transparency and FOSS - replicas all the way down *** Content (billions!) :PROPERTIES: :BEAMER_col: 0.32 :BEAMER_env: block :END: - *intrinsic identifiers* - facts and provenance *** Organization :PROPERTIES: :BEAMER_col: 0.33 :BEAMER_env: block :END: - non-profit - multi-stakeholder ** Source code is /special/ *** :PROPERTIES: :BEAMER_env: picblock :BEAMER_OPT: pic=python3-matplotlib.pdf, width=.51\linewidth :END: Software **evolves** over time - projects may last decades - the /development history/ is key to its /understanding/ #+BEAMER: \pause Layers of **complexity** - /millions/ of lines of code - large /web of dependencies/ - sophisticated /developer communities/ *** :B_ignoreheading: :PROPERTIES: :BEAMER_env: ignoreheading :END: #+BEAMER: \pause *** Bottomline - we must archive /all/ the source code - we must preserve /all/ the history of its development - we must **identify** /all/ the archived software artifacts (more than 20 billions today!) #+BEAMER: \pause \hfill how can we do this? ** Evolution of software development *** Version control system (VCS) - records changes made to a (set of) /source code file/ (s) - allows to operate on versions: diff/merge/fork/recover etc. - *essential* tool for software development #+BEAMER: \pause *** Three decades of evolution #+LATEX: \centering #+LATEX: \includegraphics[width=.8\linewidth]{VCS_history_timeline.png} ** In a picture \hfill (from https://github.com/progit/progit2) :noexport: #+BEGIN_EXPORT latex \centering\forcebeamerstart \only<1>{\colorbox{white}{\includegraphics[width=\extblockscale{.5\linewidth}]{localvcs}}\mbox{}\\[2em] \texttt{co -r1.2 file.c} } \only<2>{\colorbox{white}{\includegraphics[width=\extblockscale{.5\linewidth}]{centralisedvcs}}\mbox{}\\[2em] \texttt{cvs co -r Rel-1A ProgABC} } \only<3>{\colorbox{white}{\includegraphics[width=\extblockscale{.5\linewidth}]{distvcs}}\mbox{}\\[2em] \texttt{git checkout df3b1b08f756569eff0919e37d8af1f403515b31} } \forcebeamerend #+END_EXPORT ** Foundations of modern DVCS **** Requirements for the D in DVCS - *intrinsic* unique identifiers... \hfill (here: /cryptographic signature/, aka "hash") - ... that work for *tree structures* (software directories) #+BEAMER: \pause # R. C. Merkle, A digital signature based on a conventional encryption # function, Crypto '87 **** Merkle tree to the rescue (R. C. Merkle, Crypto 1979) :B_picblock: :PROPERTIES: :BEAMER_opt: pic=merkle, leftpic=true, width=.7\linewidth :BEAMER_env: picblock :BEAMER_act: :END: Combination of - tree - hash function ** A massive adoption *** Stack Overflow \hfill \href{https://insights.stackoverflow.com/survey/2018}{[Survey 2018]} :PROPERTIES: :BEAMER_col: 0.47 :BEAMER_env: block :END: #+latex: \centering #+ATTR_LATEX: :width \extblockscale{1.4\linewidth} file:stackoverflow-survey-VCS.png #+BEAMER: \pause *** In numbers :PROPERTIES: :BEAMER_col: 0.45 :BEAMER_env: block :END: GitHub \hfill \href{https://octoverse.github.com/2017/}{[Octoverse 2017]} \href{https://github.blog/2018-11-08-100m-repos/}{[Blog 2018]} - *100.000.000+* repositories - *40.000.000+* developers worldwide Bitbucket \hfill \href{https://bitbucket.org/blog/celebrating-10-million-bitbucket-cloud-registered-users}{[Blog 2019]} - - *28.000.000+* repositories + - *28.000.000+* repositories - *10.000.000+* developers worldwide GitLab \hfill \href{https://about.gitlab.com/blog/2019/06/06/1-mil-merge-requests/}{[Blog 2019]} - *1.000.000* MRs March 19' #+BEAMER: \pause *** \hfill Let's use it! * The SWH-ID: the source code fingerprint ** The SWH-ID schema # TODO: drawing with swh:1:cnt:xxxxxxx "exploded" and explained #+LATEX: \centering\forcebeamerstart #+LATEX: \only<1>{\includegraphics[width=\linewidth]{SWH-ID-1.png}} #+LATEX: \only<2>{\includegraphics[width=\linewidth]{SWH-ID-2.png}} #+LATEX: \only<3>{\includegraphics[width=\linewidth]{SWH-ID-3.png}} #+LATEX: \forcebeamerend ** A worked example #+LATEX: \centering\forcebeamerstart #+LATEX: \only<1>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/merkle_1.pdf}}} #+LATEX: \only<2>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/contents.pdf}}} #+LATEX: \only<3>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/merkle_2_contents.pdf}}} #+LATEX: \only<4>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/directories.pdf}}} #+LATEX: \only<5>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/merkle_3_directories.pdf}}} #+LATEX: \only<6>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/revisions.pdf}}} #+LATEX: \only<7>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/merkle_4_revisions.pdf}}} #+LATEX: \only<8>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/releases.pdf}}} #+LATEX: \only<9>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/merkle_5_releases.pdf}}} #+LATEX: \only<10>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/snapshots.pdf}}} #+LATEX: \forcebeamerend ** Demo time *** Let's look at some famous exceprts of source code #+BEAMER: \pause *** Apollo 11 source code ([[https://archive.softwareheritage.org/swh:1:cnt:64582b78792cd6c2d67d35da5a11bb80886a6409;origin=https://github.com/virtualagc/virtualagc;lines=245-261/][excerpt]]) :B_block:BMCOL: :PROPERTIES: :BEAMER_col: 0.48 :BEAMER_env: block :END: #+LATEX: \includegraphics[width=\linewidth]{apollo-11-cranksilly.png} # excerpt of routine that asks astronaut to turn around the LEM #+BEAMER: \pause *** Quake III source code ([[https://archive.softwareheritage.org/swh:1:cnt:bb0faf6919fc60636b2696f32ec9b3c2adb247fe;origin=https://github.com/id-Software/Quake-III-Arena;lines=549-572/][excerpt]]) :B_block:BMCOL: :PROPERTIES: :BEAMER_col: 0.45 :BEAMER_env: block :END: #+LATEX: \includegraphics[width=\linewidth]{quake-carmack-sqrt-1.png} # smart efficient implementation of 1/sqrt(x) on a CPU without special support #+BEAMER: \pause *** :B_ignoreheading: :PROPERTIES: :BEAMER_env: ignoreheading :END: *** It works! we have /intrinsic/ identifiers for all 20+ billion objects in the archive * Conclusion ** Food for thought *** Intrinsic identifiers... - can be extracted from the *object itself*, hence: - no need for a /central authority/, nor maintenance - any modification to the object changes the identifier - identifies the /object/, not the /metadata/ ! #+BEAMER: \pause *** ... /for source code/ - Distributed Version Control Systems made them popular - massively used every day by millions of software developers - Software Heritage provides *SWH-IDs* for billions of software artifacts ** Come in, we're open! *** \url{www.softwareheritage.org} --- learn more \\ \url{save.softwareheritage.org} --- save code now \\ \url{www.softwareheritage.org/swhap} --- legacy software acquisition process \\ \url{forge.softwareheritage.org} --- our own code #+BEAMER: \vspace{-1mm} \flushright {\Huge Questions?} \vfill *** References :B_block: :PROPERTIES: :BEAMER_env: block :END: #+BEGIN_EXPORT latex \begin{thebibliography}{Foo Bar, 1969} \footnotesize \bibitem{Abramatic2018} Jean-François Abramatic, Roberto Di Cosmo, Stefano Zacchiroli\newblock \emph{Building the Universal Archive of Source Code},\\ Communications of the ACM, October 2018 \href{https://doi.org/10.1145/3183558}{(10.1145/3183558)} \bibitem{DiCosmo2019} Roberto Di Cosmo, Morane Gruenpeter, Stefano Zacchiroli\newblock \emph{Referencing Source Code Artifacts: a Separate Concern in Software Citation},\\ Computing in Science and Engineering, IEEE, pp.1-9. \href{https://dx.doi.org/10.1109/MCSE.2019.2963148}{(10.1109/MCSE.2019.2963148)} \href{https://hal.archives-ouvertes.fr/hal-02446202}{(hal-02446202)} \end{thebibliography} #+END_EXPORT * Extrinsic vs Intrinsic identifiers :noexport: ** An important distinction: DIOs vs. IDOs :PROPERTIES: :CUSTOM_ID: diovsido :END: #+BEGIN_EXPORT latex \begin{quote} The term “Digital Object Identifier” is construed as “digital identifier of an object," rather than “identifier of a digital object” \hfill Norman Paskin. 2010 \end{quote} #+END_EXPORT #+BEAMER: \pause *** DIO (Digital Identifier of an Object) digital identifiers for (potentially) *non digital objects* - epistemic complexity (manifestations, versions, locations, etc.) - need an authority to ensure persistence and uniqueness #+BEAMER: \pause *** IDO (Identifier of a Digital Object) digital identifiers (only) for *digital objects* - can provide both *integrity* and *no middle man* - broadly used in modern software development (git, etc.) ** An important distinction: DIOs vs. IDOs #+latex: \begin{center} #+ATTR_LATEX: :width 0.859\linewidth file:DIOvsIDO.png #+latex: \end{center} #+BEAMER: \pause \hfill for the core Software Heritage archive, *IDOs are enough* ** Intrinsic: what does it really mean? Examples of intrinsic identifiers (DNA, music notes, etc.) diff --git a/talks-public/2020-02-19-RDA-AMA/2020-02-19-RDA-AMA.org b/talks-public/2020-02-19-RDA-AMA/2020-02-19-RDA-AMA.org new file mode 100644 index 0000000..b9c94c8 --- /dev/null +++ b/talks-public/2020-02-19-RDA-AMA/2020-02-19-RDA-AMA.org @@ -0,0 +1,393 @@ +#+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt) +#+TITLE: Archiving And Referencing All Software Source Code Using Software Heritage +#+SUBTITLE: +#+AUTHOR: Roberto Di Cosmo +#+EMAIL: roberto@dicosmo.org @rdicosmo @swheritage +#+BEAMER_HEADER: \date{February 19th, 2020} +#+BEAMER_HEADER: \title[Archiving And Referencing All Software Source Code]{Archiving And Referencing All Software Source Code} +#+BEAMER_HEADER: \author[Roberto Di Cosmo \hspace{5em} www.dicosmo.org]{Roberto Di Cosmo\\[2em]} +# #+BEAMER_HEADER: \setbeameroption{show notes on second screen} +#+BEAMER_HEADER: \setbeameroption{hide notes} +#+KEYWORDS: software heritage legacy preservation knowledge mankind technology +#+LATEX_HEADER: \usepackage{tcolorbox} +#+LATEX_HEADER: \definecolor{links}{HTML}{2A1B81} +#+LATEX_HEADER: \hypersetup{colorlinks,linkcolor=,urlcolor=links} +# +# prelude.org contains all the information needed to export the main beamer latex source +# use prelude-toc.org to get the table of contents +# + +#+INCLUDE: "../../common/modules/prelude-toc.org" :minlevel 1 + + +#+INCLUDE: "../../common/modules/169.org" + +# +LaTeX_CLASS_OPTIONS: [aspectratio=169,handout,xcolor=table] + +#+LATEX_HEADER: \usepackage{bbding} +#+LATEX_HEADER: \DeclareUnicodeCharacter{66D}{\FiveStar} + +# +# If you want to change the title logo it's here +# +# +BEAMER_HEADER: \titlegraphic{\includegraphics[width=0.7\textwidth]{SWH-logo}} + +# aspect ratio can be changed, but the slides need to be adapted +# - compute a "resizing factor" for the images (macro for picblocks?) +# +# set the background image +# +# https://pacoup.com/2011/06/12/list-of-true-169-resolutions/ +# +#+BEAMER_HEADER: \pgfdeclareimage[height=90mm,width=160mm]{bgd}{swh-world-169.png} +#+BEAMER_HEADER: \setbeamertemplate{background}{\pgfuseimage{bgd}} +#+LATEX: \addtocounter{framenumber}{-1} + +* Introduction + #+INCLUDE: "../../common/modules/rdc-bio.org::#main" :only-contents t :minlevel 2 +** Why we are here +*** Software is everywhere in modern research :B_picblock: + :PROPERTIES: + :BEAMER_opt: pic=papermountain, leftpic=true, width=.3\linewidth + :BEAMER_env: picblock + :BEAMER_COL: .6 + :END: +#+BEGIN_QUOTE +[...] software [...] essential in their fields. + +\mbox{}\hfill Top 100 papers (Nature, 2014) +#+END_QUOTE +#+BEGIN_QUOTE +Sometimes, if you dont have the software, you dont have the data + +\mbox{}\hfill Christine Borgman, Paris, 2018 +#+END_QUOTE +# http://www.nature.com/news/the-top-100-papers-1.16224 +#+BEAMER: \pause +*** Open Science: three pillars :B_block: + :PROPERTIES: + :BEAMER_COL: .45 + :BEAMER_env: block + :END: +#+latex: \begin{center} +#+ATTR_LATEX: :width \extblockscale{\linewidth} +file:PreservationTriangle.png +#+latex: \end{center} +#+BEAMER: \pause +*** :B_ignoreheading: + :PROPERTIES: + :BEAMER_env: ignoreheading + :END: +*** Nota bene + \hfill The links in the picture are *essential* +** The knowledge is in the source code! +#+INCLUDE: "../../common/modules/source-code-different-short.org::#thesourcecode" :only-contents t :minlevel 3 +** Source code is /special/ +*** /Executable/ and /human readable/ knowledge \hfill copyright law + /“Programs must be written for people to read, and only incidentally for machines to execute.”/\\ + \hfill Harold Abelson +#+BEAMER: \pause +*** Software /evolves/ over time + - projects may last decades + - the /development history/ is key to its /understanding/ +#+BEAMER: \pause +*** Complexity :B_picblock: + :PROPERTIES: + :BEAMER_env: picblock + :BEAMER_OPT: pic=python3-matplotlib.pdf, width=.6\linewidth + :END: + - /millions/ of lines of code + - large /web of dependencies/ + + easy to break, difficult to maintain + - sophisticated /developer communities/ +* Academia's evolving practice +** Pressure to make the source code available is raising +*** Why + Necessary to + - /reproduce/ and verify, + - /modify/ and /evolve/, *building new experiments* from old ones +#+BEAMER: \pause +*** When and where + - debate started end of first 2000 decade (biology, statistics, medicine, etc.) + - growing in Computer Science since the [[https://www.artifact-eval.org/about.html][ESEC/FSE 2011 Artifact Evaluation context]] (winner: Vouillon and Di Cosmo) + +** What is at stake \hfill in increasing order of difficulty +*** Archival + Research software artifacts must be properly *archived*\\ + \hfill make it sure we can /retrieve/ them (/reproducibility/) +#+BEAMER: \pause +*** Identification + Research software artifacts must be properly *referenced*\\ + \hfill make it sure we can /identify/ them (/reproducibility/) +#+BEAMER: \pause +*** Metadata + Research software artifacts must be properly *described*\\ + \hfill make it easy to /discover/ them (/visibility/) +#+BEAMER: \pause +*** Citation + Research software artifacts must be properly *cited* /(not the same as referenced!)/\\ + \hfill to give /credit/ to authors (/evaluation/!) +** Where we stand +*** Lack of recognition + :PROPERTIES: + :BEAMER_env: block + :BEAMER_COL: .5 + :END: + not (yet) a first class citizen + - in the EOSC plan +# - in the EU copyright reform + - in the scholarly world +#+BEAMER: \pause +*** Lack of consensus on how to + :PROPERTIES: + :BEAMER_env: block + :BEAMER_COL: .5 + :END: + - /archive/ software + - /choose/ a license + - /cite/ a software project +# #+BEAMER: \pause +# *** :B_ignoreheading: +# :PROPERTIES: +# :BEAMER_env: ignoreheading +# :END: +# *** Lack of basic prerequisites to reproducibility +# See a discussion in \url{annex.softwareheritage.org/talks/2018/2018-09-17-STScI_public.pdf} + +*** :B_ignoreheading: + :PROPERTIES: + :BEAMER_env: ignoreheading + :END: +#+BEAMER: \pause +*** ... but a wealth of initiatives! + - Policies: ACM [[https://www.acm.org/publications/policies/artifact-review-badging][Artifact Review and Badging]], AEC, ... + - Working groups: [[https://www.force11.org/software-citation-principles][FORCE11]], [[https://www.rd-alliance.org/groups/software-source-code-ig][RDA]], [[https://www.ouvrirlascience.fr/logiciels-libres-et-open-source/][SPSO]], ... +# - Metrics: [[https://www.ouvrirlascience.fr/about-the-proposal-for-software-indicators-in-open-science-monitor-3/][Open Science Monitor]] (Elsevier!), ... + - Journals: [[https://www.ipol.im/][IPOL]], ReScience, InsightJournal, JOSS, eLife, ACM DL, ... + - Repositories: FigShare, Zenodo, ... + - Common infrastructures: [[https://www.softwareheritage.org][Software Heritage]] + +* Preserving all software source code +#+INCLUDE: "../../common/modules/swh-goals-oneslide-vertical.org::#goals" :minlevel 2 +** A principled infrastructure \hfill \url{http://bit.ly/swhpaper} :noexport: + #+latex: \begin{center} + #+ATTR_LATEX: :width 0.5\linewidth + file:SWH-as-foundation-slim.png + #+latex: \end{center} + #+BEAMER: \pause + #+latex: \centering + #+ATTR_LATEX: :width \extblockscale{.7\linewidth} + file:growth.png + #+BEAMER: \pause +*** Technology + :PROPERTIES: + :BEAMER_col: 0.34 + :BEAMER_env: block + :END: + - transparency and FOSS + - replicas all the way down +*** Content (billions!) + :PROPERTIES: + :BEAMER_col: 0.32 + :BEAMER_env: block + :END: + - *intrinsic identifiers* + - facts and provenance +*** Organization + :PROPERTIES: + :BEAMER_col: 0.33 + :BEAMER_env: block + :END: + - non-profit + - multi-stakeholder + +** Source code is /special/ +*** + :PROPERTIES: + :BEAMER_env: picblock + :BEAMER_OPT: pic=python3-matplotlib.pdf, width=.51\linewidth + :END: + Software **evolves** over time + - projects may last decades + - the /development history/ is key to its /understanding/ + #+BEAMER: \pause + Layers of **complexity** + - /millions/ of lines of code + - large /web of dependencies/ + - sophisticated /developer communities/ +*** :B_ignoreheading: + :PROPERTIES: + :BEAMER_env: ignoreheading + :END: + #+BEAMER: \pause +*** Bottomline + - we must archive /all/ the source code + - we must preserve /all/ the history of its development + - we must **identify** /all/ the archived software artifacts (more than 20 billions today!) +#+BEAMER: \pause + \hfill how can we do this? +** Evolution of software development +#+INCLUDE: "../../common/modules/vcs-history.org::#timeline" :only-contents t :minlevel 3 + +** Foundations of modern DVCS +#+INCLUDE: "../../common/modules/vcs-history.org::#dvcs-to-merkle" :only-contents t :minlevel 3 + +** In a picture \hfill (from https://github.com/progit/progit2) +#+INCLUDE: "../../common/modules/vcs-history.org::#vcs-explained" :only-contents t :minlevel 3 + +** A massive adoption +#+INCLUDE: "../../common/modules/vcs-history.org::#adoption" :only-contents t :minlevel 3 + +* The SWH-ID +** The SWH-ID schema: syntax and semantics +#+INCLUDE: "../../common/modules/swh-id-syntax.org::#swh-id-syntax" :only-contents t :minlevel 3 +** A worked example + #+LATEX: \centering\forcebeamerstart + #+LATEX: \only<1>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/merkle_1.pdf}}} + #+LATEX: \only<2>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/contents.pdf}}} + #+LATEX: \only<3>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/merkle_2_contents.pdf}}} + #+LATEX: \only<4>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/directories.pdf}}} + #+LATEX: \only<5>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/merkle_3_directories.pdf}}} + #+LATEX: \only<6>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/revisions.pdf}}} + #+LATEX: \only<7>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/merkle_4_revisions.pdf}}} + #+LATEX: \only<8>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/releases.pdf}}} + #+LATEX: \only<9>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/merkle_5_releases.pdf}}} + #+LATEX: \only<10>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/snapshots.pdf}}} + #+LATEX: \forcebeamerend +** Demo time +*** + Let's look at some famous exceprts of source code +#+BEAMER: \pause +*** Apollo 11 source code ([[https://archive.softwareheritage.org/swh:1:cnt:64582b78792cd6c2d67d35da5a11bb80886a6409;origin=https://github.com/virtualagc/virtualagc;lines=245-261/][excerpt]]) :B_block:BMCOL: + :PROPERTIES: + :BEAMER_col: 0.48 + :BEAMER_env: block + :END: + #+LATEX: \includegraphics[width=\linewidth]{apollo-11-cranksilly.png} + # excerpt of routine that asks astronaut to turn around the LEM +#+BEAMER: \pause +*** Quake III source code ([[https://archive.softwareheritage.org/swh:1:cnt:bb0faf6919fc60636b2696f32ec9b3c2adb247fe;origin=https://github.com/id-Software/Quake-III-Arena;lines=549-572/][excerpt]]) :B_block:BMCOL: + :PROPERTIES: + :BEAMER_col: 0.45 + :BEAMER_env: block + :END: + #+LATEX: \includegraphics[width=\linewidth]{quake-carmack-sqrt-1.png} + # smart efficient implementation of 1/sqrt(x) on a CPU without special support +#+BEAMER: \pause +*** :B_ignoreheading: + :PROPERTIES: + :BEAMER_env: ignoreheading + :END: +*** It works! + we have /intrinsic/ identifiers for all 20+ billion objects in the archive + +* Save and reference your software +** Prepare your software source code \hfill \href{https://www.softwareheritage.org/save-and-reference-research-software/}{SWH guidelines} +# scientific software (save code now) use-case (one slide)- preapre +#+INCLUDE: "../../common/modules/swh-scientific-preservation.org::#prepare" :only-contents t :minlevel 3 + +** Submit save request on SWH \hfill \href{https://www.softwareheritage.org/save-and-reference-research-software/}{SWH guidelines} +# scientific software (save code now) use-case (one slide) +#+INCLUDE: "../../common/modules/swh-scientific-preservation.org::#save" :only-contents t :minlevel 3 + +** Reference software artifacts in your articles \hfill \href{https://www.softwareheritage.org/save-and-reference-research-software/}{SWH guidelines} +# scientific software (save code now) use-case (one slide) +#+INCLUDE: "../../common/modules/swh-scientific-preservation.org::#reference" :only-contents t :minlevel 3 + +* Challenges +** Much more complex than it seems +*** Software is complex + - Structure :: monolithic/composite; self-contained/external dependencies + - Lifetime :: one-shot/long term + - Community :: one man/one team/distributed community + - Authorship :: complex set of roles + - Authority :: institutions/organizations/communities/single person +#+BEAMER: \pause +*** Various granularities + - Exact status of the source code :: for reproducibility, e.g. +#+latex: \emph{``you can find at \href{https://archive.softwareheritage.org/swh:1:cnt:cdf19c4487c43c76f3612557d4dc61f9131790a4;lines=146-187/}{swh:1:cnt:cdf19c4487c43c76f3612557d4dc61f9131790a4;lines=146-187} the core algorithm used in this article''} + + - (Major) release :: \emph{``This functionality is available in OCaml version 4''} + + - Project :: \emph{``Inria has created OCaml and Scikit-Learn''}. +** We are not alone +*** Research Software does not exist in isolation :B_picblock: + :PROPERTIES: + :BEAMER_env: picblock + :BEAMER_OPT: pic=python3-matplotlib.pdf, width=.6\linewidth, leftpic=true + :END: + large /web of dependencies/ on non-research software +#+BEAMER: \pause +*** Industry and developers have been here :B_block: + :PROPERTIES: + :BEAMER_env: block + :BEAMER_COL: .5 + :END: + - NSRL (NIST) + - SPDX (Linux Foundation) + - SWH-ID (Software Heritage) + - SWID (ISO Standard) + - Wikidata Software Properties +#+BEAMER: \pause +*** We must :B_block: + :PROPERTIES: + :BEAMER_env: block + :BEAMER_COL: .5 + :END: + - accept the complexity + - avoid reinventing the wheel + - connect with existing communities of practice +** Proposals for the scholarly world +take from https://annex.softwareheritage.org/public/talks/2019/2019-09-24-ENS-Cachan.pdf + + +* Connecting communities +** FORCE11 Software Citation Implementation WG +*** Spawned from the FORCE11 Software Citation WG (2/2016) + led by Daniel Katz, Kyle Niemeyer and Arfon Smith +*** Co-chairs + Neil Chue Hong, Martin Fenner, Daniel Katz +#+TODO:fill in with links + +** RDA Software Source Code Interest Group +*** Co-chairs + Roberto Di Cosmo, Neil Chue Hong, Mingfang Wu, Julia Collins +*** Objectives + a forum for discussing /software/ inside RDA +*** Chronology + - RDA 10, Montreal 9/2017 :: motivations, survey of ontologies, metadata use cases + - RDA 11, Berlin 3/2018 :: identification of gaps in metadata + - RDA 13, Philadelphia 4/2019 :: FAIR for Software Source Code + - RDA 15, Melbourne 3/2020 :: Should we create a FAIR4Software WG? +*** Web page + https://www.rd-alliance.org/groups/software-source-code-ig +** RDA WG on Software Source Code Identification +*** Joint RDA & FORCE11 WG which spawned from + RDA's Software Source Code IG & FORCE11's SCIWG +*** Co-chairs + Roberto Di Cosmo, Daniel Katz, Martin Fenner +*** Objectives + - bring together people involved/interested in /software identification/ + - produce concrete recommendations for the academic community +*** Chronology + - FORCE2019, Edinburgh 10/2019 :: Research Software Hackathon - identification track + - RDA 15, Melbourne 3/2020 :: Software identification use cases +*** + https://www.rd-alliance.org/groups/software-source-code-identification-wg +** Inria's Software Citation Working Group +*** Members + \hfill task force of Inria's scientific council +*** Mission + - map the landscape + - collect best practices + - identify potential Inria contributions + - make recommendations +*** First outcome + Position paper available from + \hfill https://hal.archives-ouvertes.fr/hal-02135891 + +* The road ahead +** Conclusions +Conclusions from: https://annex.softwareheritage.org/public/talks/2019/2019-09-24-ENS-Cachan.pdf +** Come in, we're open ! +#+INCLUDE: "../../common/modules/last-slide-references.org::#references-identifiers" :only-contents t :minlevel 3