diff --git a/talks-public/2021-01-27-Pidapalooza/2021-01-27-Pidapalooza.org b/talks-public/2021-01-27-Pidapalooza/2021-01-27-Pidapalooza.org new file mode 100644 index 0000000..2657773 --- /dev/null +++ b/talks-public/2021-01-27-Pidapalooza/2021-01-27-Pidapalooza.org @@ -0,0 +1,308 @@ +#+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt) +#+TITLE: Intrinsic identifiers +#+SUBTITLE: A digital fingerprint identifying software source code +#+AUTHOR: Roberto Di Cosmo +#+EMAIL: roberto@dicosmo.org @rdicosmo @swheritage +#+BEAMER_HEADER: \date[January 29th, 2020]{January 29th, 2020\\[-1em]} +#+BEAMER_HEADER: \title[The SWHID intrinsic identifier for software]{Intrinsic identifiers and the SHWID} +#+BEAMER_HEADER: \author[Roberto Di Cosmo~~~~CC-BY 4.0]{Roberto Di Cosmo\\[1em]} +#+KEYWORDS: software heritage legacy preservation knowledge mankind technology +#+LATEX_HEADER: \usepackage{tcolorbox} +#+LATEX_HEADER: \definecolor{links}{HTML}{2A1B81} +#+LATEX_HEADER: \hypersetup{colorlinks,linkcolor=,urlcolor=links} +# +# prelude.org contains all the information needed to export the main beamer latex source +# use prelude-toc.org to get the table of contents +# + +#+INCLUDE: "../../common/modules/prelude-toc.org" :minlevel 1 + + +#+INCLUDE: "../../common/modules/169.org" + +# +LaTeX_CLASS_OPTIONS: [aspectratio=169,handout,xcolor=table] +#+LATEX_HEADER: \usepackage{bbding} +#+LATEX_HEADER: \usepackage{tcolorbox} +#+LATEX_HEADER: \DeclareUnicodeCharacter{66D}{\FiveStar} + + +# +# If you want to change the title logo it's here +# +# +BEAMER_HEADER: \titlegraphic{\includegraphics[width=0.7\textwidth]{SWH-logo}} + +# aspect ratio can be changed, but the slides need to be adapted +# - compute a "resizing factor" for the images (macro for picblocks?) +# +# set the background image +# +# https://pacoup.com/2011/06/12/list-of-true-169-resolutions/ +# +#+BEAMER_HEADER: \pgfdeclareimage[height=90mm,width=160mm]{bgd}{swh-world-169.png} +#+BEAMER_HEADER: \setbeamertemplate{background}{\pgfuseimage{bgd}} +#+LATEX: \addtocounter{framenumber}{-1} + +* Extrinsic and Intrinsic identifiers in a nutshell +** Extrinsic identifiers + \vspace{-.4em} +*** The Identifier has no /per se/ relation with the designated Object + A /register/ keeps the correspondence between the identifier and the object + - pre-internet era :: passport number, social security number, ISBN, ISSN, etc. + - internet era :: DOI, Handle, Ark, PURLs, RRID, etc.\pause +*** A word about the /Persistent/ adjective in Persistent Identifiers + - this technology /cannot guarantee/ persistence by itself! \pause Example: + #+ATTR_LATEX: :width .4\linewidth +**** + :PROPERTIES: + :BEAMER_COL: .4 + :END: + file:DOI.png +**** + :PROPERTIES: + :BEAMER_COL: .5 + :END: + - DOI resolution can change + - content at URL can change + - no way for the user to notice any of these changes from the outside\\ + \mbox{} \pause +**** :B_ignoreheading: + :PROPERTIES: + :BEAMER_env: ignoreheading + :END: + \vspace{.4em} + \hfill "persistence... is a function of /administrative/ care"\hfill\mbox{}\\ + \hfill [[https://tools.ietf.org/html/rfc3650][RFC 3650 (Handle System Overview, 2003)]] +** Intrinsic identifiers +*** The Identifier is derived from the designated Object + /No register/ needed to keep the correspondence between the identifier and the object + - pre-internet era :: musical notation, chemical notation (/NaCl/ is table salt)\pause + - internet era :: cryptographic hashes for distributed software development, Bitcoin\pause +**** + :PROPERTIES: + :BEAMER_COL: .5 + :END: + file:VCS_history_timeline.png +**** + :PROPERTIES: + :BEAMER_COL: .5 + :END: + - scientific breakthrough in the 1990's + - massive adoption in the 2010's + - 150+M repositories (GitHub, BitBucket, GitLab) + - 40.000.000 users +**** :B_ignoreheading: + :PROPERTIES: + :BEAMER_env: ignoreheading + :END: + \mbox{}\\ + \pause + /Persistence is built-in:/ nobody can change the designated object, and get away unnoticed!\pause +*** + Good news: now easily available for you via the Software Heritage Identifiers (SWHID)! +* Software Heritage and the SWHID +** Software Heritage in a nutshell + :PROPERTIES: + :CUSTOM_ID: goals + :END: +#+latex: \begin{center} +#+ATTR_LATEX: :width \extblockscale{.8\linewidth} +file:SWH-logo+motto.pdf +#+latex: \end{center} +*** Collect, preserve and share /all/ software source code + \hfill Over 150 million repositories already, ... and counting! +#+BEAMER: \pause +*** Addressing the ARDC key needs for (research) software source code... +**** + :PROPERTIES: + :BEAMER_COL: .5 + :END: + - Archive :: ensure it is not lost + - Reference :: identify the object +**** + :PROPERTIES: + :BEAMER_COL: .5 + :END: + - Describe :: make it findable + - Cite :: give credit to authors +#+BEAMER: \pause +*** ... long term, non profit initiative with broad support + Academia and Government: Inria, UNESCO, CNRS, French National Open Science Fund, + DANS, universities... + Industry: Intel, Microsoft, GitHub, VMware, Societe Generale, ... +** Getting the source code in the archive +#+INCLUDE: "../../common/modules/swh-ardc.org::#swh-a" :only-contents t :minlevel 3 +** SWHID: the source code fingerprint +#+INCLUDE: "../../common/modules/swh-ardc.org::#swh-r" :only-contents t :minlevel 3 +** Adoption hilights ... + #+INCLUDE: "../../common/modules/swh-adoption-academic.org::#adoption" :only-contents t :minlevel 3 +** Come in, we're open! +*** + \url{https://www.youtube.com/watch?v=8nlSvYh7VpI} -- full talk\\ + \url{www.softwareheritage.org/blog} --- learn more \\ + \url{save.softwareheritage.org} --- save code now \\ + \url{deposit.softwareheritage.org} --- deposit code and metadata \\ + \url{www.softwareheritage.org/swhap} --- legacy software acquisition process \\ +# #+BEAMER: \vspace{-1mm} \flushright {\Huge Questions?} \vfill + +*** References :B_block: + :PROPERTIES: + :BEAMER_env: block + :END: + #+BEGIN_EXPORT latex + \begin{thebibliography}{Foo Bar, 1969} + \footnotesize + \bibitem{EOSCSirs2020} EOSC SIRS Task Force + \newblock \emph{Scholarly Infrastructures for Research Software} + \newblock 2020, European Commission, https://doi.org/10.2777/28598 + + \bibitem{Abramatic2018} Jean-François Abramatic, Roberto Di Cosmo, Stefano Zacchiroli\newblock + \emph{Building the Universal Archive of Source Code},\\ + Communications of the ACM, October 2018 + \href{https://doi.org/10.1145/3183558}{(10.1145/3183558)} + \bibitem{DiCosmo2019} Roberto Di Cosmo, Morane Gruenpeter, Stefano Zacchiroli\newblock + \emph{Referencing Source Code Artifacts: a Separate Concern in Software Citation},\\ + Computing in Science and Engineering, IEEE, pp.1-9. \href{https://dx.doi.org/10.1109/MCSE.2019.2963148}{(10.1109/MCSE.2019.2963148)} + \href{https://hal.archives-ouvertes.fr/hal-02446202}{(hal-02446202)} + \end{thebibliography} + #+END_EXPORT + + +* Appendix :B_appendix: + :PROPERTIES: + :BEAMER_env: appendix + :END: +** + \vfill + \centerline{\Huge Appendix} + \vfill +** Mirrors +*** Thomas Jefferson, February 18, 1791 :B_block: + :PROPERTIES: + :BEAMER_ACT: + :BEAMER_env: block + :END: +#+latex: {\em + ...let us save what remains: not by vaults and locks which fence them + from the public eye and use in consigning them to the waste of time, + but by such a multiplication of copies, as shall place them beyond + the reach of accident. +#+latex: } + #+BEAMER: \pause +*** Welcoming ENEA :B_block: + :PROPERTIES: + :BEAMER_env: picblock + :BEAMER_OPT: pic=LogoENEAcompletoENG.png, leftpic=true, width=.7\linewidth + :END: + - first *institutional* mirror + - increased resilience + - *AI infrastructure* for researchers + - stepping stone to \endgraf + \hfill an European joint effort +** A revolutionary infrastructure for software source code + #+BEAMER: \vspace{-2mm} +*** The /graph/ of Software Development :B_picblock: + :PROPERTIES: + :BEAMER_env: picblock + :BEAMER_COL: .5 + :BEAMER_OPT: pic=git-merkle/merkle-vertical, leftpic=true, width=.4\linewidth + :END: + All software development with its history, + in *a single graph* ... + #+BEAMER: \pause \vspace{-2mm} +*** Reference platform for /Big Code/ :B_picblock: + :PROPERTIES: + :BEAMER_opt: pic=universal, leftpic=true, width=.4\linewidth + :BEAMER_env: picblock + :BEAMER_COL: .5 + :BEAMER_act: + :END: + *One uniform data structure* enables /massive/ machine learning + for *quality, cybersecurity*, etc. + +*** :B_ignoreheading: + :PROPERTIES: + :BEAMER_env: ignoreheading + :END: +#+BEAMER: \pause +*** First datasets are available! + - full graph of software development (~20Bn nodes, ~200Bn edges) + see Pietri et al., MSR 2019 https://dx.doi.org/10.1109/MSR.2019.00030 + - MSR 2020 mining competition +** Automation, and storage :maybemove: + #+BEAMER: \begin{center} + #+BEAMER: \mode{\only<1>{\includegraphics[width=\extblockscale{.9\textwidth}]{swh-dataflow-merkle-listers.pdf}}} + #+BEAMER: \only<2-3>{\includegraphics[width=\extblockscale{.9\textwidth}]{swh-dataflow-merkle.pdf}} + #+BEAMER: \end{center} + #+BEAMER: \pause + #+BEAMER: \pause + - full development history *permanently archived!* + - over 8 billions unique source files from 140+ million origins +** Archiving /public/ code + #+latex: \begin{center} + #+ATTR_LATEX: :width 0.7\linewidth + file:codeetalab.png + #+latex: \end{center} +#+BEAMER: \pause + https://code.etalab.gouv.fr +** ENEA mirror +*** Thomas Jefferson, February 18, 1791 :B_block: + :PROPERTIES: + :BEAMER_ACT: + :BEAMER_env: block + :END: +#+latex: {\em + ...let us save what remains: not by vaults and locks which fence them + from the public eye and use in consigning them to the waste of time, + but by such a multiplication of copies, as shall place them beyond + the reach of accident. +#+latex: } + #+BEAMER: \pause +*** Welcoming ENEA :B_block: + :PROPERTIES: + :BEAMER_env: picblock + :BEAMER_OPT: pic=LogoENEAcompletoENG.png, leftpic=true, width=.7\linewidth + :END: + - first *institutional* mirror + - increased resilience + - *AI infrastructure* for researchers + - stepping stone to \endgraf + \hfill an European joint effort +** The Software Heritage Acquisition Process (SWHAP) +*** Paris Call on Software Source Code + “[We call to] support efforts to gather and preserve the artifacts and + narratives of the history of computing, while the earlier creators are still + alive” +#+BEAMER: \pause +*** SWHAP : an important step forward + - detailed guidelines to *curate* landmark legacy source code + and *archive* it on Software Heritage + - intense cooperation with *Università di Pisa* and *UNESCO* + - open to all, we'll promote it worldwide +*** + https://www.softwareheritage.org/swhap +** A walkthrough + - Browse [[https://archive.softwareheritage.org][the archive]] + - [[https://save.softwareheritage.org][Trigger archival]] of your preferred software in a breeze + - Get and use SWHIDs ([[https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html][full specification available online]]) + - Cite software [[https://www.softwareheritage.org/2020/05/26/citing-software-with-style/][with the biblatex-software style]] from CTAN + - Example use in a research article: compare Fig. 1 and conclusions + - in [[http://www.dicosmo.org/Articles/2012-DaneluttoDiCosmo-Pcs.pdf][the 2012 version]] + - in [[https://www.dicosmo.org/share/parmap_swh.pdf][the updated version]] using SWHIDs and Software Heritage + - Example use in a research article: extensive use of SWHIDs in [[https://www.dicosmo.org/Articles/2020-ReScienceC.pdf][a replication experiment]] + - [[https://doc.archives-ouvertes.fr/en/deposit/deposit-software-source-code/][Curated deposit in SWH via HAL]], see for example: + [[https://hal.archives-ouvertes.fr/hal-02130801][LinBox]], [[https://hal.archives-ouvertes.fr/hal-01897934][SLALOM]], [[https://hal.archives-ouvertes.fr/hal-02130729][Givaro]], [[https://hal.archives-ouvertes.fr/hal-02137040][NS2DDV]], [[https://hal.archives-ouvertes.fr/lirmm-02136558][SumGra]], [[https://hal.archives-ouvertes.fr/hal-02155786][Coq proof]], ... + - Rescue landmark legacy software, see the [[https://www.softwareheritage.org/swhap/][SWHAP process with UNESCO]] +** A worked example + #+LATEX: \centering\forcebeamerstart + #+LATEX: \only<1>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/merkle_1.pdf}}} + #+LATEX: \only<2>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/contents.pdf}}} + #+LATEX: \only<3>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/merkle_2_contents.pdf}}} + #+LATEX: \only<4>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/directories.pdf}}} + #+LATEX: \only<5>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/merkle_3_directories.pdf}}} + #+LATEX: \only<6>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/revisions.pdf}}} + #+LATEX: \only<7>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/merkle_4_revisions.pdf}}} + #+LATEX: \only<8>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/releases.pdf}}} + #+LATEX: \only<9>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/merkle_5_releases.pdf}}} + #+LATEX: \only<10>{\colorbox{white}{\includegraphics[width=\extblockscale{\linewidth}]{git-merkle/snapshots.pdf}}} + #+LATEX: \forcebeamerend + diff --git a/talks-public/2021-01-27-Pidapalooza/Makefile b/talks-public/2021-01-27-Pidapalooza/Makefile new file mode 100644 index 0000000..68fbee7 --- /dev/null +++ b/talks-public/2021-01-27-Pidapalooza/Makefile @@ -0,0 +1 @@ +include ../Makefile.slides