diff --git a/common/modules/vault.org b/common/modules/vault.org index 96d2dad..1ec6745 100644 --- a/common/modules/vault.org +++ b/common/modules/vault.org @@ -1,122 +1,125 @@ #+COLUMNS: %40ITEM %10BEAMER_env(Env) %9BEAMER_envargs(Env Args) %10BEAMER_act(Act) %4BEAMER_col(Col) %10BEAMER_extra(Extra) %8BEAMER_opt(Opt) #+INCLUDE: "prelude.org" :minlevel 1 # Requirements: # #+LATEX_HEADER_EXTRA: \usepackage{tikz} #+LATEX_HEADER_EXTRA: \usetikzlibrary{arrows,shapes} #+LATEX_HEADER_EXTRA: \definecolor{swh-orange}{RGB}{254,205,27} #+LATEX_HEADER_EXTRA: \definecolor{swh-red}{RGB}{226,0,38} #+LATEX_HEADER_EXTRA: \definecolor{swh-green}{RGB}{77,181,174} * Vault :PROPERTIES: :CUSTOM_ID: main :END: ** Bulk download :PROPERTIES: :CUSTOM_ID: overview :END: **** Vault service - source code is thoroughly deduplicated within the Software Heritage archive - bulk download of large artefacts (e.g., a Linux kernel release) requires collecting millions of objects - the *Software Heritage Vault* cooks and caches source code bundles for bulk download needs **** Tech bits - *RESTful API* to request downloads, notifications, and monitoring - \url{docs.softwareheritage.org/devel/swh-vault} ** Vault walkthrough *** Software identifier to request download -#+BEAMER: \footnotesize -The swh-id *swh:1:rev:a86747d201ab8f8657d145df4376676d5e47cf9f* - -is composed of: -- the object type *swh:1:rev* -- and the sha1 hash as the object identifier \\ - *a86747d201ab8f8657d145df4376676d5e47cf9f* -We will use the object identifier to create a bundle to download -#+BEAMER: \pause *** Requesting download with swh-id #+BEAMER: \tiny #+BEGIN_SRC sh $ curl -X POST /api/1/vault/revision/a86747d2.../gitfast #+END_SRC #+BEAMER: \pause *** Email notification #+BEAMER: \footnotesize optionally, an email POST parameter containing an e-mail to notify when the bundle cooking has ended. +*** Response +#+BEAMER: \tiny +#+BEGIN_SRC json +{ + 'fetch_url': '/api/1/vault/revision/a86747d2.../gitfast/raw/', + 'progress_message': None, + 'status': 'new', + 'id': 4, + 'obj_id': 'a86747d201ab8f8657d145df4376676d5e47cf9f', + 'obj_type': 'revision_gitfast' +} +#+END_SRC ** Vault walkthrough *** Checking progress #+BEAMER: \tiny #+BEGIN_SRC sh $ curl /api/1/vault/revision/a86747d2.../gitfast #+END_SRC # can we cook objects that aren't revisions? #+BEAMER: \pause *** Response #+BEAMER: \tiny #+BEGIN_SRC json { 'fetch_url': '/api/1/vault/revision/a86747d2.../gitfast/raw/', 'progress_message': None, 'status': 'pending', 'id': 4, 'obj_id': 'a86747d201ab8f8657d145df4376676d5e47cf9f', 'obj_type': 'revision_gitfast' } #+END_SRC #+BEAMER: \pause *** What's your status? #+BEAMER: \small \tikzstyle{fail} = [draw, thin, fill=swh-red!80!swh-orange, minimum height=1.5em] \tikzstyle{processing} = [draw, thin, fill=swh-orange!100, minimum height=1.5em] \tikzstyle{success} = [draw, thin, fill=swh-green!80!swh-orange, minimum height=1.5em] \begin{figure} \begin{tikzpicture}[node distance=1.8cm, auto,>=latex', thick] % We need to set at bounding box first. Otherwise the diagram % will change position for each frame. %\path[use as bounding box] (-1,0) rectangle (10,-2); \path[->]<1-> node[processing] (new) {new} node[processing, right of=new] (pending) {pending} node[processing, right of=pending] (loading) {} node[success, right of=loading] (done) {done} node[fail, below of=loading] (failed) {failed} (new) edge node {} (pending) (pending) edge node {} (loading) (loading) edge node {} (failed) (loading) edge node {} (done); \end{tikzpicture} \end{figure} ** Vault walkthrough *** Checking progress #+BEAMER: \tiny #+BEGIN_SRC sh $ curl /api/1/vault/revision/a86747d2.../gitfast #+END_SRC #+BEAMER: \pause *** Response #+BEAMER: \tiny #+BEGIN_SRC json { 'fetch_url': '/api/1/vault/revision/a86747d2.../gitfast/raw/', 'progress_message': None, 'status': 'done', 'id': 4, 'obj_id': 'a86747d201ab8f8657d145df4376676d5e47cf9f', 'obj_type': 'revision_gitfast' } #+END_SRC #+BEAMER: \pause *** Download available when status is marked /done/ #+BEAMER: \tiny #+BEGIN_SRC sh $ curl /api/1/vault/revision/a86747d2.../gitfast/raw/ \ -O path/to/revision.gitfast.gz $ git init $ zcat path/to/revision.gitfast.gz | git fast-import $ git checkout HEAD #+END_SRC