diff --git a/common/modules/vault.org b/common/modules/vault.org index 0ccc37a..1b908cd 100644 --- a/common/modules/vault.org +++ b/common/modules/vault.org @@ -1,154 +1,157 @@ #+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt) #+INCLUDE: "prelude.org" :minlevel 1 # Requirements: # #+LATEX_HEADER_EXTRA: \usepackage{tikz} #+LATEX_HEADER_EXTRA: \usetikzlibrary{arrows,shapes} #+LATEX_HEADER_EXTRA: \definecolor{swh-orange}{RGB}{254,205,27} #+LATEX_HEADER_EXTRA: \definecolor{swh-red}{RGB}{226,0,38} #+LATEX_HEADER_EXTRA: \definecolor{swh-green}{RGB}{77,181,174} * Vault :PROPERTIES: :CUSTOM_ID: main :END: ** Bulk download :PROPERTIES: :CUSTOM_ID: overview :END: **** Vault service - source code is thoroughly deduplicated within the Software Heritage archive - bulk download of large artefacts (e.g., a Linux kernel release) requires collecting millions of objects - the *Software Heritage Vault* cooks and caches source code bundles for bulk download needs **** Tech bits - *RESTful API* to request downloads, notifications, and monitoring - \url{docs.softwareheritage.org/devel/swh-vault} ** Request cooking :PROPERTIES: :CUSTOM_ID: cooking :END: *** #+BEAMER: \footnotesize #+BEGIN_SRC sh $ curl -X POST /api/1/vault/revision/a86747d2.../gitfast #+END_SRC #+BEGIN_SRC json { 'fetch_url': '/api/1/vault/revision/a86747d2.../gitfast/raw/', 'progress_message': None, 'status': 'new', 'id': 4, 'obj_id': 'a86747d201ab8f8657d145df4376676d5e47cf9f', 'obj_type': 'revision_gitfast' } #+END_SRC *** Email notification an optional email POST parameter can be used to request notification of bundle availability ** Cooking progress :PROPERTIES: :CUSTOM_ID: progress :END: *** #+BEAMER: \small \tikzstyle{fail} = [draw, thin, fill=swh-red!80!swh-orange, minimum height=1.5em] \tikzstyle{processing} = [draw, thin, fill=swh-orange!100, minimum height=1.5em] \tikzstyle{success} = [draw, thin, fill=swh-green!80!swh-orange, minimum height=1.5em] \begin{figure} \begin{tikzpicture}[node distance=1.8cm, auto,>=latex', thick] % We need to set at bounding box first. Otherwise the diagram % will change position for each frame. %\path[use as bounding box] (-1,0) rectangle (10,-2); \path[->]<1-> node[processing] (new) {new} node[processing, right of=new] (pending) {pending} node[processing, right of=pending] (loading) {} node[success, right of=loading] (done) {done} node[fail, below of=loading] (failed) {failed} (new) edge node {} (pending) (pending) edge node {} (loading) (loading) edge node {} (failed) (loading) edge node {} (done); \end{tikzpicture} \end{figure} *** #+BEAMER: \footnotesize #+BEGIN_SRC sh $ curl /api/1/vault/revision/a86747d2.../gitfast #+END_SRC #+BEGIN_SRC json { 'fetch_url': '/api/1/vault/revision/a86747d2.../gitfast/raw/', 'progress_message': None, 'status': 'pending', 'id': 4, 'obj_id': 'a86747d201ab8f8657d145df4376676d5e47cf9f', 'obj_type': 'revision_gitfast' } #+END_SRC ** Download :PROPERTIES: :CUSTOM_ID: download :END: *** #+BEAMER: \footnotesize #+BEGIN_SRC sh $ curl /api/1/vault/revision/a86747d2.../gitfast #+END_SRC #+BEGIN_SRC json { 'fetch_url': '/api/1/vault/revision/a86747d2.../gitfast/raw/', 'progress_message': None, 'status': 'done', 'id': 4, 'obj_id': 'a86747d201ab8f8657d145df4376676d5e47cf9f', 'obj_type': 'revision_gitfast' } #+END_SRC #+BEAMER: \pause *** #+BEAMER: \footnotesize #+BEGIN_SRC sh $ curl /api/1/vault/revision/a86747d2.../gitfast/raw/ \ -O path/to/revision.gitfast.gz $ git init $ zcat path/to/revision.gitfast.gz | git fast-import $ git checkout HEAD #+END_SRC * Vault - short :noexport: :PROPERTIES: :CUSTOM_ID: vault-short :END: ** Bulk download + :PROPERTIES: + :CUSTOM_ID: bulk-download + :END: - source code is thoroughly deduplicated within the Software Heritage archive - bulk download of large artefacts (e.g., a Linux kernel release) requires collecting millions of objects - the *Software Heritage Vault* cooks and caches source code bundles for bulk download needs #+BEAMER: \pause *** #+BEAMER: \scriptsize #+BEGIN_SRC $ curl -X POST /api/1/vault/revision/a86747d2.../gitfast { 'fetch_url': '/api/1/vault/revision/a86747d2.../gitfast/raw/', 'progress_message': None, 'status': 'new', 'id': 4, 'obj_id': 'a86747d201ab8f8657d145df4376676d5e47cf9f', 'obj_type': 'revision_gitfast' } #+END_SRC #+BEAMER: \pause #+BEGIN_SRC $ curl -O dump.gz /api/1/vault/revision/a86747d2.../gitfast/raw/ $ git init $ zcat dump.gz | git fast-import $ git checkout HEAD #+END_SRC