diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 0000000..5ed5651
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1,5 @@
+# Enable black
+be318c7fc864410fb44187fdaeade22ca3ee9914
+
+# python: Reformat code with black 22.3.0
+19fc56a7ffa2a7715b8b0dcb1673f0d6f697313a
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 05398bb..1c95e3d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,42 +1,40 @@
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.1.0
     hooks:
       - id: trailing-whitespace
       - id: check-json
       - id: check-yaml
 
   - repo: https://gitlab.com/pycqa/flake8
     rev: 4.0.1
     hooks:
       - id: flake8
+        additional_dependencies: [flake8-bugbear==22.3.23]
 
   - repo: https://github.com/codespell-project/codespell
     rev: v2.1.0
     hooks:
       - id: codespell
         name: Check source code spelling
         stages: [commit]
-      - id: codespell
-        name: Check commit message spelling
-        stages: [commit-msg]
 
   - repo: local
     hooks:
       - id: mypy
         name: mypy
         entry: mypy
         args: [swh]
         pass_filenames: false
         language: system
         types: [python]
 
   - repo: https://github.com/PyCQA/isort
     rev: 5.10.1
     hooks:
       - id: isort
 
   - repo: https://github.com/python/black
-    rev: 19.10b0
+    rev: 22.3.0
     hooks:
       - id: black
diff --git a/PKG-INFO b/PKG-INFO
index 085f9fe..a08850f 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,42 +1,38 @@
 Metadata-Version: 2.1
 Name: swh.vault
-Version: 1.6.1
+Version: 1.7.2
 Summary: Software Heritage vault
 Home-page: https://forge.softwareheritage.org/diffusion/DVAU/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
-License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-vault
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-vault/
-Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Requires-Python: >=3.7
 Description-Content-Type: text/x-rst
 Provides-Extra: testing
 Provides-Extra: graph
 License-File: LICENSE
 License-File: AUTHORS
 
 Software Heritage - Vault
 =========================
 
 User-facing service that allows to retrieve parts of the archive as
 self-contained bundles (e.g., individual releases, entire repository snapshots,
 etc.)
 The creation of a bundle is called "cooking" a bundle.
 
 Architecture
 ------------
 
 The vault is made of two main parts:
 
 1. a stateful RPC server called the **backend**
 2. Celery tasks, called **cookers**
-
-
diff --git a/debian/changelog b/debian/changelog
index a811bf4..d173696 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,548 +1,568 @@
-swh-vault (1.6.1-1~swh1~bpo10+1) buster-swh; urgency=medium
+swh-vault (1.7.2-1~swh1) unstable-swh; urgency=medium
 
-  * Rebuild for buster-swh
+  * New upstream release 1.7.2     - (tagged by David Douard
+    <david.douard@sdfa3.org> on 2022-08-04 16:33:28 +0200)
+  * Upstream changes:     - v1.7.2
 
- -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 04 Mar 2022 15:57:33 +0000
+ -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 04 Aug 2022 14:42:53 +0000
+
+swh-vault (1.7.1-1~swh1) unstable-swh; urgency=medium
+
+  * New upstream release 1.7.1     - (tagged by David Douard
+    <david.douard@sdfa3.org> on 2022-08-04 15:50:00 +0200)
+  * Upstream changes:     - v1.7.1
+
+ -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 04 Aug 2022 13:56:27 +0000
+
+swh-vault (1.7.0-1~swh1) unstable-swh; urgency=medium
+
+  * New upstream release 1.7.0     - (tagged by Nicolas Dandrimont
+    <nicolas@dandrimont.eu> on 2022-06-22 14:15:01 +0200)
+  * Upstream changes:     - Release swh.vault 1.7     - Improve sentry
+    captures     - Enhance test execution times     - Update linters and
+    other tools     - cache: Prepare for objstorage 2.0 API changes
+
+ -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 22 Jun 2022 12:20:01 +0000
 
 swh-vault (1.6.1-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 1.6.1     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2022-03-04 16:51:54 +0100)
   * Upstream changes:     - v1.6.1     - * conftest: drop deprecated
     args from objstorage initializers     - * server tests: refactor
     config fixtures to match production     - * server: ensure
     check_config is called during app instantiation     - * Finish
     removing aiohttp
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 04 Mar 2022 15:55:44 +0000
 
 swh-vault (1.6.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 1.6.0     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2022-03-04 13:28:24 +0100)
   * Upstream changes:     - v1.6.0     - * cache: Remove unused method
     add_stream     - * Rewrite the server to use flask instead of
     aiohttp
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 04 Mar 2022 12:32:32 +0000
 
 swh-vault (1.5.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 1.5.0     - (tagged by David Douard
     <david.douard@sdfa3.org> on 2022-02-24 17:20:11 +0100)
   * Upstream changes:     - v1.5.0
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 24 Feb 2022 16:23:28 +0000
 
 swh-vault (1.4.2-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 1.4.2     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2022-02-08 11:43:01 +0100)
   * Upstream changes:     - v1.4.2     - * Fix PyPI upload
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 08 Feb 2022 10:45:44 +0000
 
 swh-vault (1.3.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 1.3.0     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2021-11-16 15:02:00 +0100)
   * Upstream changes:     - v1.3.0     - * docs: Various fixes     - *
     Remove references to swh.model.identifiers     - * git_bare: Send
     progress updates while cooking
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 16 Nov 2021 14:05:28 +0000
 
 swh-vault (1.2.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 1.2.0     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2021-09-17 17:44:51 +0200)
   * Upstream changes:     - v1.2.0     - * Add support for custom SMTP
     configuration     - * Add the whole traceback in error messages.
     - * Make object corruption non-fatal.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 17 Sep 2021 15:47:52 +0000
 
 swh-vault (1.1.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 1.1.0     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2021-09-08 14:29:18 +0200)
   * Upstream changes:     - v1.1.0     - * git_bare: Remove sample git
     hooks from output     - * git_bare: Fix crash on submodules
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 08 Sep 2021 12:32:28 +0000
 
 swh-vault (1.0.2-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 1.0.2     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2021-08-26 14:23:53 +0200)
   * Upstream changes:     - v1.0.2     - * Fix compatibility with
     dulwich 0.19.11     - (needed for builds on debian 10)
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 26 Aug 2021 12:27:30 +0000
 
 swh-vault (1.0.1-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 1.0.1     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2021-08-26 14:13:53 +0200)
   * Upstream changes:     - v1.0.1     - * Re-add pytest.mark.graph to
     fix debian builds.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 26 Aug 2021 12:17:30 +0000
 
 swh-vault (1.0.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 1.0.0     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2021-08-26 11:54:21 +0200)
   * Upstream changes:     - v1.0.0     - * Feature-complete git-bare
     cooker     - * Rename bundle types and use SWHIDs everywhere instead
     of raw sha1_git
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 26 Aug 2021 09:57:07 +0000
 
 swh-vault (0.6.4-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.6.4     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2021-06-29 13:18:27
     +0200)
   * Upstream changes:     - v0.6.4     - Fix tests when the umask is not
     022     - tests: Fix support of Dulwich < 0.20     - conftest: Use
     postgresql keyword for the configuration
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 29 Jun 2021 11:20:54 +0000
 
 swh-vault (0.6.3-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.6.3     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2021-06-29 11:55:43
     +0200)
   * Upstream changes:     - v0.6.3     - git_bare: Add support for
     filtered content with Git >= 2.21
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 29 Jun 2021 09:59:47 +0000
 
 swh-vault (0.6.2-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.6.2     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2021-06-29 11:08:44
     +0200)
   * Upstream changes:     - v0.6.2     - Make swh.graph dependency
     optional 2/2
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 29 Jun 2021 09:12:28 +0000
 
 swh-vault (0.6.1-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.6.1     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2021-06-29 10:12:19
     +0200)
   * Upstream changes:     - v0.6.1     - Make swh.graph dependency
     optional
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 29 Jun 2021 08:15:52 +0000
 
 swh-vault (0.6.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.6.0     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2021-06-28 12:18:51
     +0200)
   * Upstream changes:     - v0.6.0     - git_bare: Add support for
     skipped/missing/absent/hidden contents     - git_bare: Optionally
     access the objstorage directly     - git_bare: Use batched
     content_get() instead of content_find()     - git_bare: Use
     directory_get_entries instead of directory_ls, it should be faster
     - git_bare: Refactor the graph descent using explicit stacks instead
     of the call stack.     - git_bare: When possible, use swh-graph
     instead of swh-storage to query revision     - history     -
     git_bare: Deduplicate object downloads and writes     - Add a naive
     git bare cooker     - cli: Add 'cook' command, to run cookers
     without Celery     - tests: Run all directory tests on the gitfast
     cooker     - tests: Add in_memory_backend.py     - tests: Make
     test_directory_bogus_perms/test_revision_bogus_perms/ actually test
     the     - cookers
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Mon, 28 Jun 2021 10:25:48 +0000
 
 swh-vault (0.5.1-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.5.1     - (tagged by Antoine Lambert
     <antoine.lambert@inria.fr> on 2021-04-29 14:42:43 +0200)
   * Upstream changes:     - version 0.5.1
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 29 Apr 2021 12:48:13 +0000
 
 swh-vault (0.5.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.5.0     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-12-08 15:58:26
     +0100)
   * Upstream changes:     - v0.5.0     - vault: Remove deprecated
     services default config     - cli: Remove deprecated logging
     configuration
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 08 Dec 2020 15:01:11 +0000
 
 swh-vault (0.4.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.4.0     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-11-23 13:50:22
     +0100)
   * Upstream changes:     - v0.4.0     - requirements-test.txt: Drop no
     longer needed test dependency     - swh.vault.tests.conftest: Drop
     dead code
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Mon, 23 Nov 2020 12:52:25 +0000
 
 swh-vault (0.3.4-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.3.4     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-11-23 11:35:47
     +0100)
   * Upstream changes:     - v0.3.4     - test_server: Fix exception
     structure     - conftest: Explicitely declare aiohttp pytest plugin
     use
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Mon, 23 Nov 2020 10:37:50 +0000
 
 swh-vault (0.3.3-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.3.3     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-11-18 18:02:35
     +0100)
   * Upstream changes:     - v0.3.3     - Fix api.server configuration
     adaptation issue
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 18 Nov 2020 18:40:45 +0000
 
 swh-vault (0.3.1-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.3.1     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-11-17 17:46:37
     +0100)
   * Upstream changes:     - v0.3.1     - test_server: Simplify test
     server initialization to the minimum
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 17 Nov 2020 16:54:22 +0000
 
 swh-vault (0.3.0-1~swh2) unstable-swh; urgency=medium
 
   * Fix dependency release
 
  -- Antoine R. Dumont (@ardumont) <ardumont@softwareheritage.org>  Tue, 17 Nov 2020 16:54:03 +0000
 
 swh-vault (0.3.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.3.0     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-11-13 12:10:09
     +0100)
   * Upstream changes:     - v0.3.0     - Refactor vault configuration
     without the args indirection     - vault.server: Introduce typed
     VaultInterface     - Replace file modes literals to DentryPerms enum
     - Add tests on current configuration check for cooker instantiation
     - api.server: Add types and tests on configuration checks     -
     swh.vault: Unify get_vault factory function with other factories
     - vault.tests: Make postgresql fixture faster
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 17 Nov 2020 16:22:52 +0000
 
 swh-vault (0.2.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.2.0     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-10-19 09:52:04
     +0200)
   * Upstream changes:     - v0.2.0     - vault.config: Adapt scheduler
     configuration structure     - test_cookers: Turn git_loader into a
     pytest fixture     - tests: Fix loader git instantiation     -
     tox.ini: pin black to the pre-commit version (19.10b0) to avoid flip-
     flops     - Run isort after the CLI import changes
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Mon, 19 Oct 2020 07:54:03 +0000
 
 swh-vault (0.1.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.1.0     - (tagged by David Douard
     <david.douard@sdfa3.org> on 2020-09-25 12:34:43 +0200)
   * Upstream changes:     - v0.1.0
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 25 Sep 2020 10:37:22 +0000
 
 swh-vault (0.0.35-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.35     - (tagged by David Douard
     <david.douard@sdfa3.org> on 2020-09-11 15:15:26 +0200)
   * Upstream changes:     - v0.0.35
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 11 Sep 2020 13:18:50 +0000
 
 swh-vault (0.0.34-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.34     - (tagged by Antoine Lambert
     <antoine.lambert@inria.fr> on 2020-08-18 13:55:51 +0200)
   * Upstream changes:     - version 0.0.34
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 18 Aug 2020 11:58:22 +0000
 
 swh-vault (0.0.33-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.33     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2020-05-05 17:56:33 +0200)
   * Upstream changes:     - v0.0.33     - * Use swh-storage validation
     proxy.     - * Use model objects to send to storage     - * Add a
     pyproject.toml file to target py37 for black     - * setup: Update
     the minimum required runtime python3 version     - * setup.py: add
     documentation link     - * Raise NotFoundExc within our RPC
     framework instead of returning 404.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 05 May 2020 15:59:51 +0000
 
 swh-vault (0.0.32-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.32     - (tagged by Antoine Lambert
     <antoine.lambert@inria.fr> on 2020-02-05 13:00:19 +0100)
   * Upstream changes:     - version 0.0.32
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 05 Feb 2020 12:16:16 +0000
 
 swh-vault (0.0.31-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.31     - (tagged by Stefano Zacchiroli
     <zack@upsilon.cc> on 2019-11-05 17:24:43 +0100)
   * Upstream changes:     - v0.0.31     - * typing: minimal changes to
     make a no-op mypy run pass     - * Remove indirection
     swh.vault.api.wsgi     - * tox.ini: Fix py3 environment to use
     packaged tests     - * CLI: drop obsolete alias "serve" for "rpc-
     serve"
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 05 Nov 2019 16:44:29 +0000
 
 swh-vault (0.0.30-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.30     - (tagged by Antoine Lambert
     <antoine.lambert@inria.fr> on 2019-07-29 11:17:23 +0200)
   * Upstream changes:     - version 0.0.30
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Mon, 29 Jul 2019 09:22:02 +0000
 
 swh-vault (0.0.29-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.29     - (tagged by Antoine Lambert
     <antoine.lambert@inria.fr> on 2019-05-23 11:39:12 +0200)
   * Upstream changes:     - version 0.0.29
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 23 May 2019 09:46:57 +0000
 
 swh-vault (0.0.28-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.28     - (tagged by Antoine Lambert
     <antoine.lambert@inria.fr> on 2019-05-23 11:00:51 +0200)
   * Upstream changes:     - version 0.0.28
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 23 May 2019 09:05:34 +0000
 
 swh-vault (0.0.27-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.27     - (tagged by Antoine Lambert
     <antoine.lambert@inria.fr> on 2019-05-07 14:44:26 +0200)
   * Upstream changes:     - version 0.0.27
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 07 May 2019 12:54:35 +0000
 
 swh-vault (0.0.26-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.26     - (tagged by Antoine Lambert
     <antoine.lambert@inria.fr> on 2019-04-26 11:59:23 +0200)
   * Upstream changes:     - version 0.0.26
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 26 Apr 2019 10:06:45 +0000
 
 swh-vault (0.0.25-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.25     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2019-03-29 12:19:19
     +0100)
   * Upstream changes:     - v0.0.25     - master vault.backend: Migrate
     email address to bot@swh.org     - API: use default's APIError
     exception instead of the VaultAPIError     - Remove debian packaging
     from master branch
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 29 Mar 2019 11:28:28 +0000
 
 swh-vault (0.0.24-1~swh3) unstable-swh; urgency=low
 
   * d/control: Update missing build dependency on postgresql-contrib
 
  -- Antoine Romain Dumont <antoine.romain.dumont@gmail.com>  Mon, 18 Feb 2019 16:20:50 +0100
 
 swh-vault (0.0.24-1~swh2) unstable-swh; urgency=low
 
   * d/control: Update missing build dependency on git
   * d/rules: Sanitize build locale
 
  -- Antoine Romain Dumont <antoine.romain.dumont@gmail.com>  Mon, 18 Feb 2019 16:04:50 +0100
 
 swh-vault (0.0.24-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.24     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2019-02-18 15:21:31
     +0100)
   * Upstream changes:     - v0.0.24     - MANIFEST.in: Fix packaging to
     include the sql schema definitions
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Mon, 18 Feb 2019 14:25:33 +0000
 
 swh-vault (0.0.23-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.23     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2019-02-18 14:39:25
     +0100)
   * Upstream changes:     - v0.0.23     - test_cookers: Fix commit
     behavior when committing to another branch     - Rewrite tests using
     pytest's fixtures and adapt them to recent refactorings     -
     Normalize the configuration of VaultBackend and cooker     - Make it
     possible to specify the config file via SWH_CONFIG_FILENAME env var
     - Refactor the VaultBackend to use BaseDb and pool-based db access
     - Add a swh.vault.api.wsgi module to instanciate the (singleton)
     wsgi app object
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Mon, 18 Feb 2019 13:48:28 +0000
 
 swh-vault (0.0.22-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.22     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2019-02-14 10:12:41
     +0100)
   * Upstream changes:     - v0.0.22     - api/server: Do not read
     configuration at each request
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 14 Feb 2019 09:16:23 +0000
 
 swh-vault (0.0.21-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.21     - (tagged by David Douard
     <david.douard@sdfa3.org> on 2019-02-07 17:38:49 +0100)
   * Upstream changes:     - v0.0.21
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 07 Feb 2019 16:44:51 +0000
 
 swh-vault (0.0.20-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.20
   * swh.vault: Open a get_vault instantiation function
   * swh.vault.api.client: Permit to specify the query timeout option
   * swh.storage doesn't expose a db attribute any longer
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Thu, 24 May 2018 12:31:50 +0200
 
 swh-vault (0.0.19-1~swh1) unstable-swh; urgency=medium
 
   * version 0.0.19
 
  -- Antoine Pietri <antoine.pietri1@gmail.com>  Thu, 03 May 2018 17:49:18 +0200
 
 swh-vault (0.0.18-1~swh1) unstable-swh; urgency=medium
 
   * version 0.0.18
 
  -- Antoine Pietri <antoine.pietri1@gmail.com>  Thu, 03 May 2018 17:10:24 +0200
 
 swh-vault (0.0.17-1~swh1) unstable-swh; urgency=medium
 
   * version 0.0.17
 
  -- Antoine Pietri <antoine.pietri1@gmail.com>  Thu, 03 May 2018 13:16:59 +0200
 
 swh-vault (0.0.16-1~swh1) unstable-swh; urgency=medium
 
   * version 0.0.16
 
  -- Antoine Pietri <antoine.pietri1@gmail.com>  Wed, 02 May 2018 13:41:05 +0200
 
 swh-vault (0.0.15-1~swh1) unstable-swh; urgency=medium
 
   * version 0.0.15
 
  -- Antoine Pietri <antoine.pietri1@gmail.com>  Fri, 27 Apr 2018 18:46:06 +0200
 
 swh-vault (0.0.14-1~swh1) unstable-swh; urgency=medium
 
   * version 0.0.14
 
  -- Antoine Pietri <antoine.pietri1@gmail.com>  Fri, 27 Apr 2018 17:11:50 +0200
 
 swh-vault (0.0.13-1~swh1) unstable-swh; urgency=medium
 
   * version 0.0.13
 
  -- Antoine Pietri <antoine.pietri1@gmail.com>  Wed, 25 Apr 2018 15:52:33 +0200
 
 swh-vault (0.0.12-1~swh1) unstable-swh; urgency=medium
 
   * version 0.0.12
 
  -- Antoine Pietri <antoine.pietri1@gmail.com>  Wed, 21 Feb 2018 15:30:25 +0100
 
 swh-vault (0.0.11-1~swh1) unstable-swh; urgency=medium
 
   * version 0.0.11
 
  -- Antoine Pietri <antoine.pietri1@gmail.com>  Fri, 16 Feb 2018 16:09:10 +0100
 
 swh-vault (0.0.10-1~swh1) unstable-swh; urgency=medium
 
   * version 0.0.10
 
  -- Antoine Pietri <antoine.pietri1@gmail.com>  Thu, 15 Feb 2018 16:08:05 +0100
 
 swh-vault (0.0.9-1~swh1) unstable-swh; urgency=medium
 
   * version 0.0.9
 
  -- Antoine Pietri <antoine.pietri1@gmail.com>  Thu, 01 Feb 2018 18:21:29 +0100
 
 swh-vault (0.0.8-1~swh1) unstable-swh; urgency=medium
 
   * version 0.0.8
 
  -- Antoine Pietri <antoine.pietri1@gmail.com>  Wed, 31 Jan 2018 17:54:55 +0100
 
 swh-vault (0.0.7-1~swh1) unstable-swh; urgency=medium
 
   * version 0.0.7
 
  -- Antoine Pietri <antoine.pietri1@gmail.com>  Tue, 30 Jan 2018 18:21:07 +0100
 
 swh-vault (0.0.6-1~swh1) unstable-swh; urgency=medium
 
   * version 0.0.6
 
  -- Antoine Pietri <antoine.pietri1@gmail.com>  Tue, 09 Jan 2018 16:37:41 +0100
 
 swh-vault (0.0.5-1~swh1) unstable-swh; urgency=medium
 
   * version 0.0.5
 
  -- Antoine Pietri <antoine.pietri1@gmail.com>  Thu, 14 Dec 2017 19:33:01 +0100
 
 swh-vault (0.0.4-1~swh1) unstable-swh; urgency=medium
 
   * version 0.0.4
 
  -- Antoine Pietri <antoine.pietri1@gmail.com>  Fri, 08 Dec 2017 15:33:54 +0100
 
 swh-vault (0.0.3-1~swh1) unstable-swh; urgency=medium
 
   * version 0.0.3
 
  -- Antoine Pietri <antoine.pietri1@gmail.com>  Fri, 01 Dec 2017 15:31:34 +0100
 
 swh-vault (0.0.2-1~swh1) unstable-swh; urgency=medium
 
   * version 0.0.2
 
  -- Antoine Pietri <antoine.pietri1@gmail.com>  Thu, 30 Nov 2017 15:50:43 +0100
 
 swh-vault (0.0.1-1~swh1) unstable-swh; urgency=medium
 
   * Initial release
   * version 0.0.1
 
  -- Antoine Pietri <antoine.pietri1@gmail.com>  Mon, 13 Nov 2017 16:22:47 +0100
diff --git a/pytest.ini b/pytest.ini
index d811f93..7a65a50 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,4 +1,6 @@
 [pytest]
-norecursedirs = docs .*
+norecursedirs = build docs .*
 markers =
     graph: execute tests that depend on swh-graph
+
+asyncio_mode = strict
diff --git a/requirements-test.txt b/requirements-test.txt
index 4d5fa7a..319374d 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,10 +1,11 @@
-pytest < 7.0.0  # v7.0.0 removed _pytest.tmpdir.TempdirFactory, which is used by some of the pytest plugins we use
+attrs
 dulwich >= 0.18.7
+pytest
+pytest-mock
 swh.loader.core
 swh.loader.git >= 0.8
 swh.storage[testing]
-pytest-mock
 types-click
 types-python-dateutil
 types-pyyaml
 types-requests
diff --git a/requirements.txt b/requirements.txt
index bbc6f49..9bfddc4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,7 @@
 click
+fastimport
 flask
 psycopg2
 python-dateutil
-fastimport
+sentry-sdk
 typing-extensions
diff --git a/setup.cfg b/setup.cfg
index 1d722c2..f65ba0a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,8 +1,9 @@
 [flake8]
-ignore = E203,E231,W503
+select = C,E,F,W,B950
+ignore = E203,E231,E501,W503
 max-line-length = 88
 
 [egg_info]
 tag_build = 
 tag_date = 0
 
diff --git a/swh.vault.egg-info/PKG-INFO b/swh.vault.egg-info/PKG-INFO
index 085f9fe..a08850f 100644
--- a/swh.vault.egg-info/PKG-INFO
+++ b/swh.vault.egg-info/PKG-INFO
@@ -1,42 +1,38 @@
 Metadata-Version: 2.1
 Name: swh.vault
-Version: 1.6.1
+Version: 1.7.2
 Summary: Software Heritage vault
 Home-page: https://forge.softwareheritage.org/diffusion/DVAU/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
-License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-vault
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-vault/
-Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Requires-Python: >=3.7
 Description-Content-Type: text/x-rst
 Provides-Extra: testing
 Provides-Extra: graph
 License-File: LICENSE
 License-File: AUTHORS
 
 Software Heritage - Vault
 =========================
 
 User-facing service that allows to retrieve parts of the archive as
 self-contained bundles (e.g., individual releases, entire repository snapshots,
 etc.)
 The creation of a bundle is called "cooking" a bundle.
 
 Architecture
 ------------
 
 The vault is made of two main parts:
 
 1. a stateful RPC server called the **backend**
 2. Celery tasks, called **cookers**
-
-
diff --git a/swh.vault.egg-info/SOURCES.txt b/swh.vault.egg-info/SOURCES.txt
index d48f9a9..e42146e 100644
--- a/swh.vault.egg-info/SOURCES.txt
+++ b/swh.vault.egg-info/SOURCES.txt
@@ -1,75 +1,76 @@
+.git-blame-ignore-revs
 .gitignore
 .pre-commit-config.yaml
 AUTHORS
 CODE_OF_CONDUCT.md
 CONTRIBUTORS
 LICENSE
 MANIFEST.in
 Makefile
 README.rst
 conftest.py
 mypy.ini
 pyproject.toml
 pytest.ini
 requirements-swh-graph.txt
 requirements-swh.txt
 requirements-test.txt
 requirements.txt
 setup.cfg
 setup.py
 tox.ini
 docs/.gitignore
 docs/Makefile
 docs/README.rst
 docs/api.rst
 docs/cli.rst
 docs/conf.py
 docs/getting-started.rst
 docs/index.rst
 docs/_static/.placeholder
 docs/_templates/.placeholder
 swh/__init__.py
 swh.vault.egg-info/PKG-INFO
 swh.vault.egg-info/SOURCES.txt
 swh.vault.egg-info/dependency_links.txt
 swh.vault.egg-info/entry_points.txt
 swh.vault.egg-info/not-zip-safe
 swh.vault.egg-info/requires.txt
 swh.vault.egg-info/top_level.txt
 swh/vault/__init__.py
 swh/vault/backend.py
 swh/vault/cache.py
 swh/vault/cli.py
 swh/vault/cooking_tasks.py
 swh/vault/exc.py
 swh/vault/in_memory_backend.py
 swh/vault/interface.py
 swh/vault/py.typed
 swh/vault/to_disk.py
 swh/vault/api/__init__.py
 swh/vault/api/client.py
 swh/vault/api/serializers.py
 swh/vault/api/server.py
 swh/vault/cookers/__init__.py
 swh/vault/cookers/base.py
 swh/vault/cookers/directory.py
 swh/vault/cookers/git_bare.py
 swh/vault/cookers/revision_flat.py
 swh/vault/cookers/revision_gitfast.py
 swh/vault/cookers/utils.py
 swh/vault/sql/30-schema.sql
 swh/vault/sql/upgrades/002.sql
 swh/vault/sql/upgrades/003.sql
 swh/vault/tests/__init__.py
 swh/vault/tests/conftest.py
 swh/vault/tests/test_backend.py
 swh/vault/tests/test_cache.py
 swh/vault/tests/test_cli.py
 swh/vault/tests/test_cookers.py
 swh/vault/tests/test_cookers_base.py
 swh/vault/tests/test_git_bare_cooker.py
 swh/vault/tests/test_init.py
 swh/vault/tests/test_init_cookers.py
 swh/vault/tests/test_server.py
 swh/vault/tests/test_to_disk.py
 swh/vault/tests/vault_testing.py
\ No newline at end of file
diff --git a/swh.vault.egg-info/requires.txt b/swh.vault.egg-info/requires.txt
index 55158dd..4479f7c 100644
--- a/swh.vault.egg-info/requires.txt
+++ b/swh.vault.egg-info/requires.txt
@@ -1,26 +1,28 @@
 click
+fastimport
 flask
 psycopg2
 python-dateutil
-fastimport
+sentry-sdk
 typing-extensions
 swh.core[db,http]>=2
 swh.model>=3.0.0
 swh.objstorage>=0.0.17
 swh.scheduler>=0.7.0
 swh.storage>=0.43.1
 
 [graph]
 swh.graph>=0.3.2
 
 [testing]
-pytest<7.0.0
+attrs
 dulwich>=0.18.7
+pytest
+pytest-mock
 swh.loader.core
 swh.loader.git>=0.8
 swh.storage[testing]
-pytest-mock
 types-click
 types-python-dateutil
 types-pyyaml
 types-requests
diff --git a/swh/vault/api/server.py b/swh/vault/api/server.py
index 0630a0c..5b58d61 100644
--- a/swh/vault/api/server.py
+++ b/swh/vault/api/server.py
@@ -1,117 +1,121 @@
 # Copyright (C) 2016-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from __future__ import annotations
 
 import os
 from typing import Any, Dict, Optional
 
 from swh.core.api import RPCServerApp
 from swh.core.api import encode_data_server as encode_data
 from swh.core.api import error_handler
 from swh.core.config import config_basepath, merge_configs, read_raw_config
 from swh.vault import get_vault as get_swhvault
 from swh.vault.backend import NotFoundExc
 from swh.vault.interface import VaultInterface
 
 from .serializers import DECODERS, ENCODERS
 
 # do not define default services here
 DEFAULT_CONFIG = {
-    "client_max_size": 1024 ** 3,
+    "client_max_size": 1024**3,
 }
 
 
 def get_vault():
     global vault
     if not vault:
         vault = get_swhvault(**app.config["vault"])
 
     return vault
 
 
 class VaultServerApp(RPCServerApp):
-    client_exception_classes = (NotFoundExc,)
     extra_type_decoders = DECODERS
     extra_type_encoders = ENCODERS
 
 
 vault = None
-app = VaultServerApp(__name__, backend_class=VaultInterface, backend_factory=get_vault,)
+app = VaultServerApp(
+    __name__,
+    backend_class=VaultInterface,
+    backend_factory=get_vault,
+)
 
 
 @app.errorhandler(NotFoundExc)
 def argument_error_handler(exception):
     return error_handler(exception, encode_data, status_code=400)
 
 
 @app.errorhandler(Exception)
 def my_error_handler(exception):
     return error_handler(exception, encode_data)
 
 
 @app.route("/")
 def index():
     return "SWH Vault API server"
 
 
 def check_config(cfg: Dict[str, Any]) -> Dict[str, Any]:
-    """Ensure the configuration is ok to run a local vault server, and propagate defaults.
+    """Ensure the configuration is ok to run a postgresql vault server, and propagate
+    defaults.
 
     Raises:
-        EnvironmentError if the configuration is not for local instance
+        EnvironmentError if the configuration is not for postgresql instance
         ValueError if one of the following keys is missing: vault, cache, storage,
         scheduler
 
     Returns:
-        New configuration dict to instantiate a local vault server instance.
+        New configuration dict to instantiate a postgresql vault server instance.
 
     """
     cfg = cfg.copy()
 
     if "vault" not in cfg:
         raise ValueError("missing 'vault' configuration")
 
     vcfg = cfg["vault"]
-    if vcfg["cls"] != "local":
+    if vcfg["cls"] not in ("local", "postgresql"):
         raise EnvironmentError(
-            "The vault backend can only be started with a 'local' configuration",
+            "The vault backend can only be started with a 'postgresql' configuration",
         )
 
     # TODO: Soft-deprecation of args key. Remove when ready.
     vcfg.update(vcfg.get("args", {}))
 
     # Default to top-level value if any
     vcfg = {**cfg, **vcfg}
 
     for key in ("cache", "storage", "scheduler"):
         if not vcfg.get(key):
             raise ValueError(f"invalid configuration: missing {key} config entry.")
 
     return vcfg
 
 
 def make_app_from_configfile(
     config_path: Optional[str] = None, **kwargs
 ) -> VaultServerApp:
     """Load and check configuration if ok, then instantiate (once) a vault server
-       application.
+    application.
 
     """
     config_path = os.environ.get("SWH_CONFIG_FILENAME", config_path)
     if not config_path:
         raise ValueError("Missing configuration path.")
     if not os.path.isfile(config_path):
         raise ValueError(f"Configuration path {config_path} should exist.")
 
     app_config = read_raw_config(config_basepath(config_path))
     app_config["vault"] = check_config(app_config)
     app.config.update(merge_configs(DEFAULT_CONFIG, app_config))
 
     return app
 
 
 if __name__ == "__main__":
     print("Deprecated. Use swh-vault ")
diff --git a/swh/vault/backend.py b/swh/vault/backend.py
index c75bd00..b7d9bf3 100644
--- a/swh/vault/backend.py
+++ b/swh/vault/backend.py
@@ -1,531 +1,536 @@
-# Copyright (C) 2017-2020  The Software Heritage developers
+# Copyright (C) 2017-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import collections
 from email.mime.text import MIMEText
 import smtplib
 from typing import Any, Dict, List, Optional, Tuple
 
 import psycopg2.extras
 import psycopg2.pool
 
 from swh.core.db import BaseDb
 from swh.core.db.common import db_transaction
 from swh.model.swhids import CoreSWHID
 from swh.scheduler import get_scheduler
 from swh.scheduler.utils import create_oneshot_task_dict
 from swh.storage import get_storage
 from swh.vault.cache import VaultCache
 from swh.vault.cookers import COOKER_TYPES, get_cooker_cls
 from swh.vault.exc import NotFoundExc
 
 cooking_task_name = "swh.vault.cooking_tasks.SWHCookingTask"
 
 NOTIF_EMAIL_FROM = '"Software Heritage Vault" ' "<bot@softwareheritage.org>"
 NOTIF_EMAIL_SUBJECT_SUCCESS = "Bundle ready: {bundle_type} {short_id}"
 NOTIF_EMAIL_SUBJECT_FAILURE = "Bundle failed: {bundle_type} {short_id}"
 
 NOTIF_EMAIL_BODY_SUCCESS = """
 You have requested the following bundle from the Software Heritage
 Vault:
 
 Bundle Type: {bundle_type}
 Object SWHID: {swhid}
 
 This bundle is now available for download at the following address:
 
 {url}
 
 Please keep in mind that this link might expire at some point, in which
 case you will need to request the bundle again.
 
 --\x20
 The Software Heritage Developers
 """
 
 NOTIF_EMAIL_BODY_FAILURE = """
 You have requested the following bundle from the Software Heritage
 Vault:
 
 Bundle Type: {bundle_type}
 Object SWHID: {swhid}
 
 This bundle could not be cooked for the following reason:
 
 {progress_msg}
 
 We apologize for the inconvenience.
 
 --\x20
 The Software Heritage Developers
 """
 
 
 class VaultBackend:
     """
     Backend for the Software Heritage Vault.
     """
 
     current_version = 4
 
     def __init__(self, **config):
         self.config = config
         self.cache = VaultCache(**config["cache"])
         self.scheduler = get_scheduler(**config["scheduler"])
         self.storage = get_storage(**config["storage"])
         self.smtp_server = smtplib.SMTP(**config.get("smtp", {}))
 
+        if "db" not in self.config:
+            raise ValueError(
+                "The 'db' configuration entry is missing "
+                "in the vault configuration file"
+            )
         db_conn = config["db"]
         self._pool = psycopg2.pool.ThreadedConnectionPool(
             config.get("min_pool_conns", 1),
             config.get("max_pool_conns", 10),
             db_conn,
             cursor_factory=psycopg2.extras.RealDictCursor,
         )
         self._db = None
 
     def get_db(self):
         if self._db:
             return self._db
         return BaseDb.from_pool(self._pool)
 
     def put_db(self, db):
         if db is not self._db:
             db.put_conn()
 
     @db_transaction()
     def progress(
         self,
         bundle_type: str,
         swhid: CoreSWHID,
         raise_notfound: bool = True,
         db=None,
         cur=None,
     ) -> Optional[Dict[str, Any]]:
         cur.execute(
             """
             SELECT id, type, swhid, task_id, task_status, sticky,
                    ts_created, ts_done, ts_last_access, progress_msg
             FROM vault_bundle
             WHERE type = %s AND swhid = %s""",
             (bundle_type, str(swhid)),
         )
         res = cur.fetchone()
         if not res:
             if raise_notfound:
                 raise NotFoundExc(f"{bundle_type} {swhid} was not found.")
             return None
         res["swhid"] = CoreSWHID.from_string(res["swhid"])
         return res
 
     def _send_task(self, bundle_type: str, swhid: CoreSWHID):
         """Send a cooking task to the celery scheduler"""
         task = create_oneshot_task_dict("cook-vault-bundle", bundle_type, str(swhid))
         added_tasks = self.scheduler.create_tasks([task])
         return added_tasks[0]["id"]
 
     @db_transaction()
     def create_task(
         self,
         bundle_type: str,
         swhid: CoreSWHID,
         sticky: bool = False,
         db=None,
         cur=None,
     ):
         """Create and send a cooking task"""
         cooker_class = get_cooker_cls(bundle_type, swhid.object_type)
         cooker = cooker_class(swhid, backend=self, storage=self.storage)
 
         if not cooker.check_exists():
             raise NotFoundExc(f"{bundle_type} {swhid} was not found.")
 
         cur.execute(
             """
             INSERT INTO vault_bundle (type, swhid, sticky)
             VALUES (%s, %s, %s)""",
             (bundle_type, str(swhid), sticky),
         )
         db.conn.commit()
 
         task_id = self._send_task(bundle_type, swhid)
 
         cur.execute(
             """
             UPDATE vault_bundle
             SET task_id = %s
             WHERE type = %s AND swhid = %s""",
             (task_id, bundle_type, str(swhid)),
         )
 
     @db_transaction()
     def add_notif_email(
         self, bundle_type: str, swhid: CoreSWHID, email: str, db=None, cur=None
     ):
         """Add an e-mail address to notify when a given bundle is ready"""
         cur.execute(
             """
             INSERT INTO vault_notif_email (email, bundle_id)
             VALUES (%s, (SELECT id FROM vault_bundle
                          WHERE type = %s AND swhid = %s))""",
             (email, bundle_type, str(swhid)),
         )
 
     def put_bundle(self, bundle_type: str, swhid: CoreSWHID, bundle) -> bool:
         self.cache.add(bundle_type, swhid, bundle)
         return True
 
     @db_transaction()
     def cook(
         self,
         bundle_type: str,
         swhid: CoreSWHID,
         *,
         sticky: bool = False,
         email: Optional[str] = None,
         db=None,
         cur=None,
     ) -> Dict[str, Any]:
         info = self.progress(bundle_type, swhid, raise_notfound=False)
 
         if bundle_type not in COOKER_TYPES:
             raise NotFoundExc(f"{bundle_type} is an unknown type.")
 
         # If there's a failed bundle entry, delete it first.
         if info is not None and info["task_status"] == "failed":
             cur.execute(
                 "DELETE FROM vault_bundle WHERE type = %s AND swhid = %s",
                 (bundle_type, str(swhid)),
             )
             db.conn.commit()
             info = None
 
         # If there's no bundle entry, create the task.
         if info is None:
             self.create_task(bundle_type, swhid, sticky)
 
         if email is not None:
             # If the task is already done, send the email directly
             if info is not None and info["task_status"] == "done":
                 self.send_notification(
                     None, email, bundle_type, swhid, info["task_status"]
                 )
             # Else, add it to the notification queue
             else:
                 self.add_notif_email(bundle_type, swhid, email)
 
         return self.progress(bundle_type, swhid)
 
     @db_transaction()
     def batch_cook(
         self, batch: List[Tuple[str, str]], db=None, cur=None
     ) -> Dict[str, int]:
         # Import execute_values at runtime only, because it requires
         # psycopg2 >= 2.7 (only available on postgresql servers)
         from psycopg2.extras import execute_values
 
         for bundle_type, _ in batch:
             if bundle_type not in COOKER_TYPES:
                 raise NotFoundExc(f"{bundle_type} is an unknown type.")
 
         cur.execute(
             """
             INSERT INTO vault_batch (id)
             VALUES (DEFAULT)
             RETURNING id"""
         )
         batch_id = cur.fetchone()["id"]
 
         # Delete all failed bundles from the batch
         cur.execute(
             """
             DELETE FROM vault_bundle
             WHERE task_status = 'failed'
               AND (type, swhid) IN %s""",
             (tuple(batch),),
         )
 
         # Insert all the bundles, return the new ones
         execute_values(
             cur,
             """
             INSERT INTO vault_bundle (type, swhid)
             VALUES %s ON CONFLICT DO NOTHING""",
             batch,
         )
 
         # Get the bundle ids and task status
         cur.execute(
             """
             SELECT id, type, swhid, task_id FROM vault_bundle
             WHERE (type, swhid) IN %s""",
             (tuple(batch),),
         )
         bundles = cur.fetchall()
 
         # Insert the batch-bundle entries
         batch_id_bundle_ids = [(batch_id, row["id"]) for row in bundles]
         execute_values(
             cur,
             """
             INSERT INTO vault_batch_bundle (batch_id, bundle_id)
             VALUES %s ON CONFLICT DO NOTHING""",
             batch_id_bundle_ids,
         )
         db.conn.commit()
 
         # Get the tasks to fetch
         batch_new = [
             (row["type"], CoreSWHID.from_string(row["swhid"]))
             for row in bundles
             if row["task_id"] is None
         ]
 
         # Send the tasks
         args_batch = [(bundle_type, swhid) for bundle_type, swhid in batch_new]
         # TODO: change once the scheduler handles priority tasks
         tasks = [
             create_oneshot_task_dict("swh-vault-batch-cooking", *args)
             for args in args_batch
         ]
 
         added_tasks = self.scheduler.create_tasks(tasks)
         tasks_ids_bundle_ids = [
             (task_id, bundle_type, swhid)
             for task_id, (bundle_type, swhid) in zip(
                 [task["id"] for task in added_tasks], batch_new
             )
         ]
 
         # Update the task ids
         execute_values(
             cur,
             """
             UPDATE vault_bundle
             SET task_id = s_task_id
             FROM (VALUES %s) AS sub (s_task_id, s_type, s_swhid)
             WHERE type = s_type::cook_type AND swhid = s_swhid """,
             tasks_ids_bundle_ids,
         )
         return {"id": batch_id}
 
     @db_transaction()
     def batch_progress(self, batch_id: int, db=None, cur=None) -> Dict[str, Any]:
         cur.execute(
             """
             SELECT vault_bundle.id as id,
                    type, swhid, task_id, task_status, sticky,
                    ts_created, ts_done, ts_last_access, progress_msg
             FROM vault_batch_bundle
             LEFT JOIN vault_bundle ON vault_bundle.id = bundle_id
             WHERE batch_id = %s""",
             (batch_id,),
         )
         bundles = cur.fetchall()
         if not bundles:
             raise NotFoundExc(f"Batch {batch_id} does not exist.")
 
         for bundle in bundles:
             bundle["swhid"] = CoreSWHID.from_string(bundle["swhid"])
 
         counter = collections.Counter(b["status"] for b in bundles)
         res = {
             "bundles": bundles,
             "total": len(bundles),
             **{k: 0 for k in ("new", "pending", "done", "failed")},
             **dict(counter),
         }
 
         return res
 
     @db_transaction()
     def is_available(self, bundle_type: str, swhid: CoreSWHID, db=None, cur=None):
         """Check whether a bundle is available for retrieval"""
         info = self.progress(bundle_type, swhid, raise_notfound=False, cur=cur)
         return (
             info is not None
             and info["task_status"] == "done"
             and self.cache.is_cached(bundle_type, swhid)
         )
 
     @db_transaction()
     def fetch(
         self, bundle_type: str, swhid: CoreSWHID, raise_notfound=True, db=None, cur=None
     ) -> Optional[bytes]:
         """Retrieve a bundle from the cache"""
         available = self.is_available(bundle_type, swhid, cur=cur)
         if not available:
             if raise_notfound:
                 raise NotFoundExc(f"{bundle_type} {swhid} is not available.")
             return None
         self.update_access_ts(bundle_type, swhid, cur=cur)
         return self.cache.get(bundle_type, swhid)
 
     @db_transaction()
     def update_access_ts(self, bundle_type: str, swhid: CoreSWHID, db=None, cur=None):
         """Update the last access timestamp of a bundle"""
         cur.execute(
             """
             UPDATE vault_bundle
             SET ts_last_access = NOW()
             WHERE type = %s AND swhid = %s""",
             (bundle_type, str(swhid)),
         )
 
     @db_transaction()
     def set_status(
         self, bundle_type: str, swhid: CoreSWHID, status: str, db=None, cur=None
     ) -> bool:
         req = (
             """
                UPDATE vault_bundle
                SET task_status = %s """
             + (""", ts_done = NOW() """ if status == "done" else "")
             + """WHERE type = %s AND swhid = %s"""
         )
         cur.execute(req, (status, bundle_type, str(swhid)))
         return True
 
     @db_transaction()
     def set_progress(
         self, bundle_type: str, swhid: CoreSWHID, progress: str, db=None, cur=None
     ) -> bool:
         cur.execute(
             """
             UPDATE vault_bundle
             SET progress_msg = %s
             WHERE type = %s AND swhid = %s""",
             (progress, bundle_type, str(swhid)),
         )
         return True
 
     @db_transaction()
     def send_notif(self, bundle_type: str, swhid: CoreSWHID, db=None, cur=None) -> bool:
         cur.execute(
             """
             SELECT vault_notif_email.id AS id, email, task_status, progress_msg
             FROM vault_notif_email
             INNER JOIN vault_bundle ON bundle_id = vault_bundle.id
             WHERE vault_bundle.type = %s AND vault_bundle.swhid = %s""",
             (bundle_type, str(swhid)),
         )
         for d in cur:
             self.send_notification(
                 d["id"],
                 d["email"],
                 bundle_type,
                 swhid,
                 status=d["task_status"],
                 progress_msg=d["progress_msg"],
             )
         return True
 
     @db_transaction()
     def send_notification(
         self,
         n_id: Optional[int],
         email: str,
         bundle_type: str,
         swhid: CoreSWHID,
         status: str,
         progress_msg: Optional[str] = None,
         db=None,
         cur=None,
     ) -> None:
         """Send the notification of a bundle to a specific e-mail"""
         short_id = swhid.object_id.hex()[:7]
 
         # TODO: instead of hardcoding this, we should probably:
         # * add a "fetch_url" field in the vault_notif_email table
         # * generate the url with flask.url_for() on the web-ui side
         # * send this url as part of the cook request and store it in
         #   the table
         # * use this url for the notification e-mail
         url = "https://archive.softwareheritage.org/api/1/vault/{}/{}/" "raw".format(
             bundle_type, swhid
         )
 
         if status == "done":
             text = NOTIF_EMAIL_BODY_SUCCESS.strip()
             text = text.format(bundle_type=bundle_type, swhid=swhid, url=url)
             msg = MIMEText(text)
             msg["Subject"] = NOTIF_EMAIL_SUBJECT_SUCCESS.format(
                 bundle_type=bundle_type, short_id=short_id
             )
         elif status == "failed":
             text = NOTIF_EMAIL_BODY_FAILURE.strip()
             text = text.format(
                 bundle_type=bundle_type, swhid=swhid, progress_msg=progress_msg
             )
             msg = MIMEText(text)
             msg["Subject"] = NOTIF_EMAIL_SUBJECT_FAILURE.format(
                 bundle_type=bundle_type, short_id=short_id
             )
         else:
             raise RuntimeError(
                 "send_notification called on a '{}' bundle".format(status)
             )
 
         msg["From"] = NOTIF_EMAIL_FROM
         msg["To"] = email
 
         self._smtp_send(msg)
 
         if n_id is not None:
             cur.execute(
                 """
                 DELETE FROM vault_notif_email
                 WHERE id = %s""",
                 (n_id,),
             )
 
     def _smtp_send(self, msg: MIMEText):
         # Reconnect if needed
         try:
             status = self.smtp_server.noop()[0]
         except smtplib.SMTPException:
             status = -1
         if status != 250:
             self.smtp_server.connect("localhost", 25)
 
         # Send the message
         self.smtp_server.send_message(msg)
 
     @db_transaction()
     def _cache_expire(self, cond, *args, db=None, cur=None) -> None:
         """Low-level expiration method, used by cache_expire_* methods"""
         # Embedded SELECT query to be able to use ORDER BY and LIMIT
         cur.execute(
             """
             DELETE FROM vault_bundle
             WHERE ctid IN (
                 SELECT ctid
                 FROM vault_bundle
                 WHERE sticky = false
                 {}
             )
             RETURNING type, swhid
             """.format(
                 cond
             ),
             args,
         )
 
         for d in cur:
             self.cache.delete(d["type"], CoreSWHID.from_string(d["swhid"]))
 
     @db_transaction()
     def cache_expire_oldest(self, n=1, by="last_access", db=None, cur=None) -> None:
         """Expire the `n` oldest bundles"""
         assert by in ("created", "done", "last_access")
         filter = """ORDER BY ts_{} LIMIT {}""".format(by, n)
         return self._cache_expire(filter)
 
     @db_transaction()
     def cache_expire_until(self, date, by="last_access", db=None, cur=None) -> None:
         """Expire all the bundles until a certain date"""
         assert by in ("created", "done", "last_access")
         filter = """AND ts_{} <= %s""".format(by)
         return self._cache_expire(filter, date)
diff --git a/swh/vault/cache.py b/swh/vault/cache.py
index 5599c29..bd88b9f 100644
--- a/swh/vault/cache.py
+++ b/swh/vault/cache.py
@@ -1,43 +1,39 @@
 # Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from swh.model import hashutil
 from swh.model.swhids import CoreSWHID
 from swh.objstorage.factory import get_objstorage
 from swh.objstorage.objstorage import compute_hash
 
 
 class VaultCache:
     """The Vault cache is an object storage that stores Vault bundles.
 
     This implementation computes sha1('<bundle_type>:<swhid>') as the
     internal identifiers used in the underlying objstorage.
     """
 
     def __init__(self, **objstorage):
         self.objstorage = get_objstorage(**objstorage)
 
-    def add(self, bundle_type, swhid: CoreSWHID, content):
+    def add(self, bundle_type, swhid: CoreSWHID, content) -> None:
         sid = self._get_internal_id(bundle_type, swhid)
-        return self.objstorage.add(content, sid)
+        self.objstorage.add(content, sid)
 
-    def get(self, bundle_type, swhid: CoreSWHID):
+    def get(self, bundle_type, swhid: CoreSWHID) -> bytes:
         sid = self._get_internal_id(bundle_type, swhid)
         return self.objstorage.get(hashutil.hash_to_bytes(sid))
 
     def delete(self, bundle_type, swhid: CoreSWHID):
         sid = self._get_internal_id(bundle_type, swhid)
         return self.objstorage.delete(hashutil.hash_to_bytes(sid))
 
-    def get_stream(self, bundle_type, swhid: CoreSWHID):
-        sid = self._get_internal_id(bundle_type, swhid)
-        return self.objstorage.get_stream(hashutil.hash_to_bytes(sid))
-
-    def is_cached(self, bundle_type, swhid: CoreSWHID):
+    def is_cached(self, bundle_type, swhid: CoreSWHID) -> bool:
         sid = self._get_internal_id(bundle_type, swhid)
         return hashutil.hash_to_bytes(sid) in self.objstorage
 
     def _get_internal_id(self, bundle_type, swhid: CoreSWHID):
         return compute_hash("{}:{}".format(bundle_type, swhid).encode())
diff --git a/swh/vault/cli.py b/swh/vault/cli.py
index 2881117..e274405 100644
--- a/swh/vault/cli.py
+++ b/swh/vault/cli.py
@@ -1,189 +1,198 @@
 # Copyright (C) 2015-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from __future__ import annotations
 
 # WARNING: do not import unnecessary things here to keep cli startup time under
 # control
 import logging
 from typing import TYPE_CHECKING, Optional
 
 import click
 
 from swh.core.cli import CONTEXT_SETTINGS, AliasedGroup
 from swh.core.cli import swh as swh_cli_group
 
 if TYPE_CHECKING:
     import io
 
     from swh.model.swhids import CoreSWHID
 
 
 class SwhidParamType(click.ParamType):
     name = "swhid"
 
     def convert(self, value, param, ctx):
         from swh.model.exceptions import ValidationError
         from swh.model.swhids import CoreSWHID
 
         try:
             return CoreSWHID.from_string(value)
         except ValidationError:
             self.fail(f"expected core SWHID, got {value!r}", param, ctx)
 
 
 @swh_cli_group.group(name="vault", context_settings=CONTEXT_SETTINGS, cls=AliasedGroup)
 @click.pass_context
 def vault(ctx):
     """Software Heritage Vault tools."""
 
 
 @vault.command()
 @click.option(
     "--config-file",
     "-C",
     default=None,
     metavar="CONFIGFILE",
-    type=click.Path(exists=True, dir_okay=False,),
+    type=click.Path(
+        exists=True,
+        dir_okay=False,
+    ),
     help="Configuration file.",
 )
 @click.argument("swhid", type=SwhidParamType())
 @click.argument("outfile", type=click.File("wb"))
 @click.option(
     "--bundle-type",
     type=click.Choice(["flat", "gitfast", "git_bare"]),
     help="Selects which cooker to use, when there is more than one available "
     "for the given object type.",
 )
 @click.pass_context
 def cook(
     ctx,
     config_file: str,
     swhid: CoreSWHID,
     outfile: io.RawIOBase,
     bundle_type: Optional[str],
 ):
     """
     Runs a vault cooker for a single object (identified by a SWHID),
     and outputs it to the given file.
     """
     from swh.core import config
     from swh.model.swhids import ObjectType
     from swh.objstorage.exc import ObjNotFoundError
     from swh.objstorage.factory import get_objstorage
     from swh.storage import get_storage
 
     from .cookers import get_cooker_cls
     from .in_memory_backend import InMemoryVaultBackend
 
     conf = config.read(config_file)
 
     try:
         from swh.graph.client import RemoteGraphClient  # optional dependency
 
         graph = RemoteGraphClient(**conf["graph"]) if conf.get("graph") else None
     except ModuleNotFoundError:
         if conf.get("graph"):
             raise EnvironmentError(
                 "Graph configuration required but module is not installed."
             )
         else:
             graph = None
 
     backend = InMemoryVaultBackend()
 
     if bundle_type is None:
-        if swhid.object_type in (ObjectType.RELEASE, ObjectType.SNAPSHOT,):
+        if swhid.object_type in (
+            ObjectType.RELEASE,
+            ObjectType.SNAPSHOT,
+        ):
             bundle_type = "git_bare"
         elif swhid.object_type in (ObjectType.DIRECTORY,):
             bundle_type = "flat"
         else:
             raise click.ClickException(
                 "No default bundle type for this kind of object, "
                 "use --bundle-type to choose one"
             )
 
     try:
         cooker_cls = get_cooker_cls(bundle_type, swhid.object_type)
     except ValueError as e:
         raise click.ClickException(*e.args)
 
     storage = get_storage(**conf["storage"])
     objstorage = get_objstorage(**conf["objstorage"]) if "objstorage" in conf else None
     cooker = cooker_cls(
         swhid=swhid,
         backend=backend,
         storage=storage,
         graph=graph,
         objstorage=objstorage,
         max_bundle_size=None,  # No need for a size limit, we are running locally
     )
     cooker.cook()
 
     try:
         bundle = backend.fetch(cooker_cls.BUNDLE_TYPE, swhid)
     except ObjNotFoundError:
         bundle = None
     if bundle is None:
         import pdb
 
         pdb.set_trace()
         raise click.ClickException("Cooker did not write a bundle to the backend.")
     outfile.write(bundle)
 
 
 @vault.command(name="rpc-serve")
 @click.option(
     "--config-file",
     "-C",
     default=None,
     metavar="CONFIGFILE",
-    type=click.Path(exists=True, dir_okay=False,),
+    type=click.Path(
+        exists=True,
+        dir_okay=False,
+    ),
     help="Configuration file.",
 )
 @click.option(
     "--host",
     default="0.0.0.0",
     metavar="IP",
     show_default=True,
     help="Host ip address to bind the server on",
 )
 @click.option(
     "--port",
     default=5005,
     type=click.INT,
     metavar="PORT",
     help="Binding port of the server",
 )
 @click.option(
     "--debug/--no-debug",
     default=True,
     help="Indicates if the server should run in debug mode",
 )
 @click.pass_context
 def serve(ctx, config_file, host, port, debug):
     """Software Heritage Vault RPC server."""
     from swh.vault.api.server import make_app_from_configfile
 
     ctx.ensure_object(dict)
     if "log_level" in ctx.obj:
         logging.getLogger("werkzeug").setLevel(ctx.obj["log_level"])
 
     try:
         app = make_app_from_configfile(config_file, debug=debug)
     except EnvironmentError as e:
         click.echo(e.msg, err=True)
         ctx.exit(1)
 
     app.run(host, port=int(port), debug=debug)
 
 
 def main():
     logging.basicConfig()
     return serve(auto_envvar_prefix="SWH_VAULT")
 
 
 if __name__ == "__main__":
     main()
diff --git a/swh/vault/cookers/__init__.py b/swh/vault/cookers/__init__.py
index c583695..878c77c 100644
--- a/swh/vault/cookers/__init__.py
+++ b/swh/vault/cookers/__init__.py
@@ -1,129 +1,135 @@
 # Copyright (C) 2017-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from __future__ import annotations
 
 import os
 from typing import Any, Dict, List, Type
 
 from swh.core.config import load_named_config
 from swh.core.config import read as read_config
 from swh.model.swhids import CoreSWHID, ObjectType
 from swh.storage import get_storage
 from swh.vault import get_vault
 from swh.vault.cookers.base import DEFAULT_CONFIG, DEFAULT_CONFIG_PATH, BaseVaultCooker
 from swh.vault.cookers.directory import DirectoryCooker
 from swh.vault.cookers.git_bare import GitBareCooker
 from swh.vault.cookers.revision_flat import RevisionFlatCooker
 from swh.vault.cookers.revision_gitfast import RevisionGitfastCooker
 
 _COOKER_CLS: List[Type[BaseVaultCooker]] = [
     DirectoryCooker,
     RevisionFlatCooker,
     RevisionGitfastCooker,
     GitBareCooker,
 ]
 COOKER_TYPES: Dict[str, List[Type[BaseVaultCooker]]] = {}
 
 
 for _cooker_cls in _COOKER_CLS:
     COOKER_TYPES.setdefault(_cooker_cls.BUNDLE_TYPE, []).append(_cooker_cls)
 
 
 def get_cooker_cls(bundle_type: str, object_type: ObjectType):
     cookers = COOKER_TYPES.get(bundle_type)
 
     if not cookers:
         raise ValueError(f"{bundle_type} is not a valid bundle type.")
 
     for cooker in cookers:
         try:
             cooker.check_object_type(object_type)
         except ValueError:
             pass
         else:
             return cooker
 
     raise ValueError(
         f"{object_type.name.lower()} objects do not have a {bundle_type} cooker"
     )
 
 
 def check_config(cfg: Dict[str, Any]) -> Dict[str, Any]:
     """Ensure the configuration is ok to run a vault worker, and propagate defaults
 
     Raises:
         EnvironmentError if the configuration is not for remote instance
         ValueError if one of the following keys is missing: vault, storage
 
     Returns:
         New configuration dict to instantiate a vault worker instance
 
     """
     cfg = cfg.copy()
 
     if "vault" not in cfg:
         raise ValueError("missing 'vault' configuration")
 
     vcfg = cfg["vault"]
     if vcfg["cls"] != "remote":
         raise EnvironmentError(
             "This vault backend can only be a 'remote' configuration"
         )
 
     # TODO: Soft-deprecation of args key. Remove when ready.
     vcfg.update(vcfg.get("args", {}))
 
     # Default to top-level value if any
     if "storage" not in vcfg:
         vcfg["storage"] = cfg.get("storage")
 
     if not vcfg.get("storage"):
         raise ValueError("invalid configuration: missing 'storage' config entry.")
 
     return cfg
 
 
 def get_cooker(bundle_type: str, swhid: CoreSWHID):
     """Instantiate a cooker class of type bundle_type.
 
     Returns:
         Cooker class in charge of cooking the bundle_type with id swhid.
 
     Raises:
         ValueError in case of a missing top-level vault key configuration or a storage
           key.
         EnvironmentError in case the vault configuration reference a non remote class.
 
     """
     if "SWH_CONFIG_FILENAME" in os.environ:
         cfg = read_config(os.environ["SWH_CONFIG_FILENAME"], DEFAULT_CONFIG)
     else:
         cfg = load_named_config(DEFAULT_CONFIG_PATH, DEFAULT_CONFIG)
     cooker_cls = get_cooker_cls(bundle_type, swhid.object_type)
 
     cfg = check_config(cfg)
     vcfg = cfg["vault"]
 
     storage = get_storage(**vcfg.pop("storage"))
     backend = get_vault(**vcfg)
 
     try:
         from swh.graph.client import RemoteGraphClient  # optional dependency
 
         graph = RemoteGraphClient(**vcfg["graph"]) if vcfg.get("graph") else None
     except ModuleNotFoundError:
         if vcfg.get("graph"):
             raise EnvironmentError(
                 "Graph configuration required but module is not installed."
             )
         else:
             graph = None
 
     kwargs = {
         k: v for (k, v) in cfg.items() if k in ("max_bundle_size", "thread_pool_size")
     }
 
-    return cooker_cls(swhid, backend=backend, storage=storage, graph=graph, **kwargs,)
+    return cooker_cls(
+        swhid,
+        backend=backend,
+        storage=storage,
+        graph=graph,
+        **kwargs,
+    )
diff --git a/swh/vault/cookers/base.py b/swh/vault/cookers/base.py
index 38549db..2c974e8 100644
--- a/swh/vault/cookers/base.py
+++ b/swh/vault/cookers/base.py
@@ -1,159 +1,161 @@
 # Copyright (C) 2016-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import abc
 import io
 import logging
 import traceback
 from typing import ClassVar, Set
 
 from psycopg2.extensions import QueryCanceledError
+import sentry_sdk
 
+import swh.model.swhids
 from swh.model.swhids import CoreSWHID, ObjectType
 from swh.storage.interface import StorageInterface
 
-MAX_BUNDLE_SIZE = 2 ** 29  # 512 MiB
+MAX_BUNDLE_SIZE = 2**29  # 512 MiB
 DEFAULT_CONFIG_PATH = "vault/cooker"
 DEFAULT_CONFIG = {
     "max_bundle_size": ("int", MAX_BUNDLE_SIZE),
 }
 
 
 class PolicyError(Exception):
     """Raised when the bundle violates the cooking policy."""
 
     pass
 
 
 class BundleTooLargeError(PolicyError):
     """Raised when the bundle is too large to be cooked."""
 
     pass
 
 
 class BytesIOBundleSizeLimit(io.BytesIO):
     def __init__(self, *args, size_limit=None, **kwargs):
         super().__init__(*args, **kwargs)
         self.size_limit = size_limit
 
     def write(self, chunk):
         if (
             self.size_limit is not None
             and self.getbuffer().nbytes + len(chunk) > self.size_limit
         ):
             raise BundleTooLargeError(
                 "The requested bundle exceeds the maximum allowed "
                 "size of {} bytes.".format(self.size_limit)
             )
         return super().write(chunk)
 
 
 class BaseVaultCooker(metaclass=abc.ABCMeta):
     """Abstract base class for the vault's bundle creators
 
     This class describes a common API for the cookers.
 
     To define a new cooker, inherit from this class and override:
     - CACHE_TYPE_KEY: key to use for the bundle to reference in cache
     - def cook(): cook the object into a bundle
     """
 
-    SUPPORTED_OBJECT_TYPES: ClassVar[Set[ObjectType]]
+    SUPPORTED_OBJECT_TYPES: ClassVar[Set[swh.model.swhids.ObjectType]]
     BUNDLE_TYPE: ClassVar[str]
 
     def __init__(
         self,
         swhid: CoreSWHID,
         backend,
         storage: StorageInterface,
         graph=None,
         objstorage=None,
         max_bundle_size: int = MAX_BUNDLE_SIZE,
         thread_pool_size: int = 10,
     ):
         """Initialize the cooker.
 
         The type of the object represented by the id depends on the
         concrete class. Very likely, each type of bundle will have its
         own cooker class.
 
         Args:
             swhid: id of the object to be cooked into a bundle.
             backend: the vault backend (swh.vault.backend.VaultBackend).
         """
         self.check_object_type(swhid.object_type)
         self.swhid = swhid
         self.obj_id = swhid.object_id
         self.backend = backend
         self.storage = storage
         self.objstorage = objstorage
         self.graph = graph
         self.max_bundle_size = max_bundle_size
         self.thread_pool_size = thread_pool_size
 
     @classmethod
     def check_object_type(cls, object_type: ObjectType) -> None:
         if object_type not in cls.SUPPORTED_OBJECT_TYPES:
             raise ValueError(f"{cls.__name__} does not support {object_type} objects.")
 
     @abc.abstractmethod
     def check_exists(self):
         """Checks that the requested object exists and can be cooked.
 
         Override this in the cooker implementation.
         """
         raise NotImplementedError
 
     @abc.abstractmethod
     def prepare_bundle(self):
         """Implementation of the cooker. Yields chunks of the bundle bytes.
 
         Override this with the cooker implementation.
         """
         raise NotImplementedError
 
     def cache_type_key(self) -> str:
         assert self.BUNDLE_TYPE
         return self.BUNDLE_TYPE
 
     def write(self, chunk):
         self.fileobj.write(chunk)
 
     def cook(self):
-        """Cook the requested object into a bundle
-        """
+        """Cook the requested object into a bundle"""
         self.backend.set_status(self.BUNDLE_TYPE, self.swhid, "pending")
         self.backend.set_progress(self.BUNDLE_TYPE, self.swhid, "Processing...")
 
         self.fileobj = BytesIOBundleSizeLimit(size_limit=self.max_bundle_size)
         try:
             try:
                 self.prepare_bundle()
             except QueryCanceledError:
                 raise PolicyError(
                     "Timeout reached while assembling the requested bundle"
                 )
             bundle = self.fileobj.getvalue()
             # TODO: use proper content streaming instead of put_bundle()
             self.backend.put_bundle(self.cache_type_key(), self.swhid, bundle)
         except PolicyError as e:
             logging.info("Bundle cooking violated policy: %s", e)
             self.backend.set_status(self.BUNDLE_TYPE, self.swhid, "failed")
             self.backend.set_progress(self.BUNDLE_TYPE, self.swhid, str(e))
         except Exception:
             self.backend.set_status(self.BUNDLE_TYPE, self.swhid, "failed")
             tb = traceback.format_exc()
             self.backend.set_progress(
                 self.BUNDLE_TYPE,
                 self.swhid,
                 f"Internal Server Error. This incident will be reported.\n"
                 f"The full error was:\n\n{tb}",
             )
             logging.exception("Bundle cooking failed.")
+            sentry_sdk.capture_exception()
         else:
             self.backend.set_status(self.BUNDLE_TYPE, self.swhid, "done")
             self.backend.set_progress(self.BUNDLE_TYPE, self.swhid, None)
         finally:
             self.backend.send_notif(self.BUNDLE_TYPE, self.swhid)
diff --git a/swh/vault/cookers/directory.py b/swh/vault/cookers/directory.py
index e3540ef..d096320 100644
--- a/swh/vault/cookers/directory.py
+++ b/swh/vault/cookers/directory.py
@@ -1,28 +1,28 @@
 # Copyright (C) 2016  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import tarfile
 import tempfile
 
 from swh.model.swhids import ObjectType
 from swh.vault.cookers.base import BaseVaultCooker
 from swh.vault.to_disk import DirectoryBuilder
 
 
 class DirectoryCooker(BaseVaultCooker):
-    """Cooker to create a directory bundle """
+    """Cooker to create a directory bundle"""
 
     BUNDLE_TYPE = "flat"
     SUPPORTED_OBJECT_TYPES = {ObjectType.DIRECTORY}
 
     def check_exists(self):
         return not list(self.storage.directory_missing([self.obj_id]))
 
     def prepare_bundle(self):
         with tempfile.TemporaryDirectory(prefix="tmp-vault-directory-") as td:
             directory_builder = DirectoryBuilder(self.storage, td.encode(), self.obj_id)
             directory_builder.build()
             with tarfile.open(fileobj=self.fileobj, mode="w:gz") as tar:
                 tar.add(td, arcname=str(self.swhid))
diff --git a/swh/vault/cookers/git_bare.py b/swh/vault/cookers/git_bare.py
index bb1d748..c45b96a 100644
--- a/swh/vault/cookers/git_bare.py
+++ b/swh/vault/cookers/git_bare.py
@@ -1,720 +1,729 @@
 # Copyright (C) 2021-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 """
 This cooker creates tarballs containing a bare .git directory,
 that can be unpacked and cloned like any git repository.
 
 It works in three steps:
 
 1. Write objects one by one in :file:`.git/objects/`
 2. Calls ``git repack`` to pack all these objects into git packfiles.
 3. Creates a tarball of the resulting repository
 
 It keeps a set of all written (or about-to-be-written) object hashes in memory
 to avoid downloading and writing the same objects twice.
 
 The first step is the most complex. When swh-graph is available, this roughly does
 the following:
 
 1. Find all the revisions and releases in the induced subgraph, adds them to
    todo-lists
 2. Grab a batch from (release/revision/directory/content) todo-lists, and load them.
    Add directory and content objects they reference to the todo-list
 3. If any todo-list is not empty, goto 1
 
 When swh-graph is not available, steps 1 and 2 are merged, because revisions need
 to be loaded in order to compute the subgraph.
 """
 
 import datetime
 import enum
 import glob
 import logging
 import multiprocessing.dummy
 import os.path
 import re
 import subprocess
 import tarfile
 import tempfile
 from typing import Any, Dict, Iterable, Iterator, List, NoReturn, Optional, Set, Tuple
 import zlib
 
+import sentry_sdk
+
 from swh.core.api.classes import stream_results_optional
 from swh.model import git_objects
 from swh.model.hashutil import hash_to_bytehex, hash_to_hex
 from swh.model.model import (
     Person,
     Release,
     Revision,
     RevisionType,
     Sha1Git,
     Snapshot,
     SnapshotBranch,
     TargetType,
     TimestampWithTimezone,
 )
 from swh.model.model import Content, Directory, DirectoryEntry
 from swh.model.model import ObjectType as ModelObjectType
 from swh.model.swhids import CoreSWHID, ObjectType
 from swh.storage.algos.revisions_walker import DFSRevisionsWalker
 from swh.storage.algos.snapshot import snapshot_get_all_branches
 from swh.vault.cookers.base import BaseVaultCooker
 from swh.vault.to_disk import HIDDEN_MESSAGE, SKIPPED_MESSAGE
 
 RELEASE_BATCH_SIZE = 10000
 REVISION_BATCH_SIZE = 10000
 DIRECTORY_BATCH_SIZE = 10000
 CONTENT_BATCH_SIZE = 100
 
 
 logger = logging.getLogger(__name__)
 
 
 class RootObjectType(enum.Enum):
     DIRECTORY = "directory"
     REVISION = "revision"
     RELEASE = "release"
     SNAPSHOT = "snapshot"
 
 
 def assert_never(value: NoReturn, msg) -> NoReturn:
     """mypy makes sure this function is never called, through exhaustive checking
     of ``value`` in the parent function.
 
     See https://mypy.readthedocs.io/en/latest/literal_types.html#exhaustive-checks
     for details.
     """
     assert False, msg
 
 
 class GitBareCooker(BaseVaultCooker):
     BUNDLE_TYPE = "git_bare"
     SUPPORTED_OBJECT_TYPES = {ObjectType[obj_type.name] for obj_type in RootObjectType}
 
     use_fsck = True
 
     obj_type: RootObjectType
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.obj_type = RootObjectType[self.swhid.object_type.name]
 
     def check_exists(self) -> bool:
         """Returns whether the root object is present in the archive."""
         if self.obj_type is RootObjectType.REVISION:
             return not list(self.storage.revision_missing([self.obj_id]))
         elif self.obj_type is RootObjectType.RELEASE:
             return not list(self.storage.release_missing([self.obj_id]))
         elif self.obj_type is RootObjectType.DIRECTORY:
             return not list(self.storage.directory_missing([self.obj_id]))
         elif self.obj_type is RootObjectType.SNAPSHOT:
             return not list(self.storage.snapshot_missing([self.obj_id]))
         else:
             assert_never(self.obj_type, f"Unexpected root object type: {self.obj_type}")
 
     def _push(self, stack: List[Sha1Git], obj_ids: Iterable[Sha1Git]) -> None:
         """Adds all the given ``obj_ids`` to the given ``stack``, unless they are
         already in ``self._seen``, and adds them to ``self._seen``."""
         assert not isinstance(obj_ids, bytes)
         revision_ids = [id_ for id_ in obj_ids if id_ not in self._seen]
         self._seen.update(revision_ids)
         stack.extend(revision_ids)
 
     def _pop(self, stack: List[Sha1Git], n: int) -> List[Sha1Git]:
         """Removes ``n`` object from the ``stack`` and returns them."""
         obj_ids = stack[-n:]
         stack[-n:] = []
         return obj_ids
 
     def prepare_bundle(self):
         """Main entry point. Initializes the state, creates the bundle, and
         sends it to the backend."""
         # Objects we will visit soon (aka. "todo-lists"):
         self._rel_stack: List[Sha1Git] = []
         self._rev_stack: List[Sha1Git] = []
         self._dir_stack: List[Sha1Git] = []
         self._cnt_stack: List[Sha1Git] = []
 
         # Set of objects already in any of the stacks:
         self._seen: Set[Sha1Git] = set()
         self._walker_state: Optional[Any] = None
 
         # Set of errors we expect git-fsck to raise at the end:
         self._expected_fsck_errors = set()
 
         with tempfile.TemporaryDirectory(prefix="swh-vault-gitbare-") as workdir:
             # Initialize a Git directory
             self.workdir = workdir
             self.gitdir = os.path.join(workdir, "clone.git")
             os.mkdir(self.gitdir)
             self.init_git()
 
             self.nb_loaded = 0
 
             # Add the root object to the stack of objects to visit
             self.push_subgraph(self.obj_type, self.obj_id)
 
             # Load and write all the objects to disk
             self.load_objects()
 
             self.backend.set_progress(
                 self.BUNDLE_TYPE, self.swhid, "Writing references..."
             )
 
             # Write the root object as a ref (this step is skipped if it's a snapshot)
             # This must be done before repacking; git-repack ignores orphan objects.
             self.write_refs()
 
             self.backend.set_progress(
                 self.BUNDLE_TYPE, self.swhid, "Checking content integrity"
             )
 
             if self.use_fsck:
                 self.git_fsck()
 
             self.backend.set_progress(
                 self.BUNDLE_TYPE, self.swhid, "Creating final bundle"
             )
 
             self.repack()
 
             self.write_archive()
 
             self.backend.set_progress(self.BUNDLE_TYPE, self.swhid, "Uploading bundle")
 
     def init_git(self) -> None:
         """Creates an empty :file:`.git` directory."""
         subprocess.run(["git", "-C", self.gitdir, "init", "--bare"], check=True)
         self.create_object_dirs()
 
         # Remove example hooks; they take ~40KB and we don't use them
         for filename in glob.glob(os.path.join(self.gitdir, "hooks", "*.sample")):
             os.unlink(filename)
 
     def create_object_dirs(self) -> None:
         """Creates all possible subdirectories of :file:`.git/objects/`"""
         # Create all possible dirs ahead of time, so we don't have to check for
         # existence every time.
         for byte in range(256):
             try:
                 os.mkdir(os.path.join(self.gitdir, "objects", f"{byte:02x}"))
             except FileExistsError:
                 pass
 
     def repack(self) -> None:
         """Moves all objects from :file:`.git/objects/` to a packfile."""
         try:
             subprocess.run(["git", "-C", self.gitdir, "repack", "-d"], check=True)
         except subprocess.CalledProcessError:
             logging.exception("git-repack failed with:")
+            sentry_sdk.capture_exception()
 
         # Remove their non-packed originals
         subprocess.run(["git", "-C", self.gitdir, "prune-packed"], check=True)
 
     def git_fsck(self) -> None:
         """Runs git-fsck and ignores expected errors (eg. because of missing
         objects)."""
         proc = subprocess.run(
             ["git", "-C", self.gitdir, "fsck"],
             stdout=subprocess.PIPE,
             stderr=subprocess.STDOUT,
             env={"LANG": "C.utf8"},
         )
 
         # Split on newlines not followed by a space
         errors = re.split("\n(?! )", proc.stdout.decode())
 
         errors = [
             error for error in errors if error and not error.startswith("warning ")
         ]
 
         unexpected_errors = set(errors) - self._expected_fsck_errors
         if unexpected_errors:
             logging.error(
                 "Unexpected errors from git-fsck after cooking %s: %s",
                 self.swhid,
                 "\n".join(sorted(unexpected_errors)),
             )
 
     def _make_stub_directory_revision(self, dir_id: Sha1Git) -> Sha1Git:
         author = Person.from_fullname(
             b"swh-vault, git-bare cooker <robot@softwareheritage.org>"
         )
         dt = datetime.datetime.now(tz=datetime.timezone.utc)
         dt = dt.replace(microsecond=0)  # not supported by git
         date = TimestampWithTimezone.from_datetime(dt)
 
         revision = Revision(
             author=author,
             committer=author,
             date=date,
             committer_date=date,
             message=b"Initial commit",
             type=RevisionType.GIT,
             directory=self.obj_id,
             synthetic=True,
         )
         self.write_revision_node(revision)
 
         return revision.id
 
     def write_refs(self, snapshot=None):
         """Writes all files in :file:`.git/refs/`.
 
         For non-snapshot objects, this is only ``master``."""
         refs: Dict[bytes, bytes]  # ref name -> target
         if self.obj_type == RootObjectType.DIRECTORY:
             # We need a synthetic revision pointing to the directory
             rev_id = self._make_stub_directory_revision(self.obj_id)
 
             refs = {b"refs/heads/master": hash_to_bytehex(rev_id)}
         elif self.obj_type == RootObjectType.REVISION:
             refs = {b"refs/heads/master": hash_to_bytehex(self.obj_id)}
         elif self.obj_type == RootObjectType.RELEASE:
             (release,) = self.storage.release_get([self.obj_id])
 
-            if release.name and re.match(br"^[a-zA-Z0-9_.-]+$", release.name):
+            if release.name and re.match(rb"^[a-zA-Z0-9_.-]+$", release.name):
                 release_name = release.name
             else:
                 release_name = b"release"
 
             refs = {
                 b"refs/tags/" + release_name: hash_to_bytehex(self.obj_id),
             }
 
             if release.target_type.value == ModelObjectType.REVISION:
                 # Not necessary, but makes it easier to browse
                 refs[b"ref/heads/master"] = hash_to_bytehex(release.target)
             # TODO: synthetize a master branch for other target types
 
         elif self.obj_type == RootObjectType.SNAPSHOT:
             if snapshot is None:
                 # refs were already written in a previous step
                 return
             branches = []
             for (branch_name, branch) in snapshot.branches.items():
                 if branch is None:
                     logging.error(
                         "%s has dangling branch: %r", snapshot.swhid(), branch_name
                     )
                 else:
                     branches.append((branch_name, branch))
             refs = {
                 branch_name: (
                     b"ref: " + branch.target
                     if branch.target_type == TargetType.ALIAS
                     else hash_to_bytehex(branch.target)
                 )
                 for (branch_name, branch) in branches
             }
         else:
             assert_never(self.obj_type, f"Unexpected root object type: {self.obj_type}")
 
         for (ref_name, ref_target) in refs.items():
             path = os.path.join(self.gitdir.encode(), ref_name)
             os.makedirs(os.path.dirname(path), exist_ok=True)
             with open(path, "wb") as fd:
                 fd.write(ref_target)
 
     def write_archive(self):
         """Creates the final .tar file."""
         with tarfile.TarFile(mode="w", fileobj=self.fileobj) as tf:
             tf.add(self.gitdir, arcname=f"{self.swhid}.git", recursive=True)
 
     def _obj_path(self, obj_id: Sha1Git):
         """Returns the absolute path of file (in :file:`.git/objects/`) that will
         contain the git object identified by the ``obj_id``."""
         return os.path.join(self.gitdir, self._obj_relative_path(obj_id))
 
     def _obj_relative_path(self, obj_id: Sha1Git):
         """Same as :meth:`_obj_path`, but relative."""
         obj_id_hex = hash_to_hex(obj_id)
         directory = obj_id_hex[0:2]
         filename = obj_id_hex[2:]
         return os.path.join("objects", directory, filename)
 
     def object_exists(self, obj_id: Sha1Git) -> bool:
         """Returns whether the object identified by the given ``obj_id`` was already
         written to a file in :file:`.git/object/`.
 
         This function ignores objects contained in a git pack."""
         return os.path.exists(self._obj_path(obj_id))
 
     def write_object(self, obj_id: Sha1Git, obj: bytes) -> bool:
         """Writes a git object on disk.
 
         Returns whether it was already written."""
         # Git requires objects to be zlib-compressed; but repacking decompresses and
         # removes them, so we don't need to compress them too much.
         data = zlib.compress(obj, level=1)
 
         with open(self._obj_path(obj_id), "wb") as fd:
             fd.write(data)
         return True
 
     def push_subgraph(self, obj_type: RootObjectType, obj_id) -> None:
         """Adds graph induced by the given ``obj_id`` without recursing through
         directories, to the todo-lists.
 
         If swh-graph is not available, this immediately loads revisions, as they
         need to be fetched in order to compute the subgraph, and fetching them
         immediately avoids duplicate fetches."""
         if self.obj_type is RootObjectType.REVISION:
             self.push_revision_subgraph(obj_id)
         elif self.obj_type is RootObjectType.DIRECTORY:
             self._push(self._dir_stack, [obj_id])
         elif self.obj_type is RootObjectType.SNAPSHOT:
             self.push_snapshot_subgraph(obj_id)
         elif self.obj_type is RootObjectType.RELEASE:
             self.push_releases_subgraphs([obj_id])
         else:
             assert_never(self.obj_type, f"Unexpected root object type: {self.obj_type}")
 
     def load_objects(self) -> None:
         """Repeatedly loads objects in the todo-lists, until all lists are empty."""
         while self._rel_stack or self._rev_stack or self._dir_stack or self._cnt_stack:
 
             nb_remaining = (
                 len(self._rel_stack)
                 + len(self._rev_stack)
                 + len(self._dir_stack)
                 + len(self._cnt_stack)
             )
             # We assume assume nb_remaining is a lower bound.
             # When the snapshot was loaded with swh-graph, this should be the exact
             # value, though.
             self.backend.set_progress(
                 self.BUNDLE_TYPE,
                 self.swhid,
                 f"Processing... {self.nb_loaded} objects processed\n"
                 f"Over {nb_remaining} remaining",
             )
 
             release_ids = self._pop(self._rel_stack, RELEASE_BATCH_SIZE)
             if release_ids:
                 self.load_releases(release_ids)
                 self.nb_loaded += len(release_ids)
 
             revision_ids = self._pop(self._rev_stack, REVISION_BATCH_SIZE)
             if revision_ids:
                 self.load_revisions(revision_ids)
                 self.nb_loaded += len(revision_ids)
 
             directory_ids = self._pop(self._dir_stack, DIRECTORY_BATCH_SIZE)
             if directory_ids:
                 self.load_directories(directory_ids)
                 self.nb_loaded += len(directory_ids)
 
             content_ids = self._pop(self._cnt_stack, CONTENT_BATCH_SIZE)
             if content_ids:
                 self.load_contents(content_ids)
                 self.nb_loaded += len(content_ids)
 
     def push_revision_subgraph(self, obj_id: Sha1Git) -> None:
         """Fetches the graph of revisions induced by the given ``obj_id`` and adds
         them to ``self._rev_stack``.
 
         If swh-graph is not available, this requires fetching the revisions themselves,
         so they are directly loaded instead."""
         loaded_from_graph = False
 
         if self.graph:
             from swh.graph.client import GraphArgumentException
 
             # First, try to cook using swh-graph, as it is more efficient than
             # swh-storage for querying the history
-            obj_swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=obj_id,)
+            obj_swhid = CoreSWHID(
+                object_type=ObjectType.REVISION,
+                object_id=obj_id,
+            )
             try:
                 revision_ids = (
                     swhid.object_id
                     for swhid in map(
                         CoreSWHID.from_string,
                         self.graph.visit_nodes(str(obj_swhid), edges="rev:rev"),
                     )
                 )
                 self._push(self._rev_stack, revision_ids)
             except GraphArgumentException as e:
                 logger.info(
                     "Revision %s not found in swh-graph, falling back to fetching "
                     "history using swh-storage. %s",
                     hash_to_hex(obj_id),
                     e.args[0],
                 )
             else:
                 loaded_from_graph = True
 
         if not loaded_from_graph:
             # If swh-graph is not available, or the revision is not yet in
             # swh-graph, fall back to self.storage.revision_log.
             # self.storage.revision_log also gives us the full revisions,
             # so we load them right now instead of just pushing them on the stack.
             walker = DFSRevisionsWalker(
                 self.storage, obj_id, state=self._walker_state, ignore_displayname=True
             )
             for revision in walker:
                 self.write_revision_node(Revision.from_dict(revision))
                 self.nb_loaded += 1
                 self._push(self._dir_stack, [revision["directory"]])
             # Save the state, so the next call to the walker won't return the same
             # revisions
             self._walker_state = walker.export_state()
 
     def push_snapshot_subgraph(self, obj_id: Sha1Git) -> None:
         """Fetches a snapshot and all its children, excluding directories and contents,
         and pushes them to the todo-lists.
 
         Also loads revisions if swh-graph is not available, see
         :meth:`push_revision_subgraph`."""
         loaded_from_graph = False
 
         if self.graph:
             revision_ids = []
             release_ids = []
             directory_ids = []
             content_ids = []
 
             from swh.graph.client import GraphArgumentException
 
             # First, try to cook using swh-graph, as it is more efficient than
             # swh-storage for querying the history
-            obj_swhid = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=obj_id,)
+            obj_swhid = CoreSWHID(
+                object_type=ObjectType.SNAPSHOT,
+                object_id=obj_id,
+            )
             try:
                 swhids: Iterable[CoreSWHID] = map(
                     CoreSWHID.from_string,
                     self.graph.visit_nodes(str(obj_swhid), edges="snp:*,rel:*,rev:rev"),
                 )
                 for swhid in swhids:
                     if swhid.object_type is ObjectType.REVISION:
                         revision_ids.append(swhid.object_id)
                     elif swhid.object_type is ObjectType.RELEASE:
                         release_ids.append(swhid.object_id)
                     elif swhid.object_type is ObjectType.DIRECTORY:
                         directory_ids.append(swhid.object_id)
                     elif swhid.object_type is ObjectType.CONTENT:
                         content_ids.append(swhid.object_id)
                     elif swhid.object_type is ObjectType.SNAPSHOT:
                         assert (
                             swhid.object_id == obj_id
                         ), f"Snapshot {obj_id.hex()} references a different snapshot"
                     else:
                         assert_never(
                             swhid.object_type, f"Unexpected SWHID object type: {swhid}"
                         )
             except GraphArgumentException as e:
                 logger.info(
                     "Snapshot %s not found in swh-graph, falling back to fetching "
                     "history for each branch. %s",
                     hash_to_hex(obj_id),
                     e.args[0],
                 )
             else:
                 self._push(self._rev_stack, revision_ids)
                 self._push(self._rel_stack, release_ids)
                 self._push(self._dir_stack, directory_ids)
                 self._push(self._cnt_stack, content_ids)
                 loaded_from_graph = True
 
         # TODO: when self.graph is available and supports edge labels, use it
         # directly to get branch names.
         snapshot: Optional[Snapshot] = snapshot_get_all_branches(self.storage, obj_id)
         assert snapshot, "Unknown snapshot"  # should have been caught by check_exists()
         for branch in snapshot.branches.values():
             if not loaded_from_graph:
                 if branch is None:
                     logging.warning("Dangling branch: %r", branch)
                     continue
                 assert isinstance(branch, SnapshotBranch)  # for mypy
                 if branch.target_type is TargetType.REVISION:
                     self.push_revision_subgraph(branch.target)
                 elif branch.target_type is TargetType.RELEASE:
                     self.push_releases_subgraphs([branch.target])
                 elif branch.target_type is TargetType.ALIAS:
                     # Nothing to do, this for loop also iterates on the target branch
                     # (if it exists)
                     pass
                 elif branch.target_type is TargetType.DIRECTORY:
                     self._push(self._dir_stack, [branch.target])
                 elif branch.target_type is TargetType.CONTENT:
                     self._push(self._cnt_stack, [branch.target])
                 elif branch.target_type is TargetType.SNAPSHOT:
                     if swhid.object_id != obj_id:
                         raise NotImplementedError(
                             f"{swhid} has a snapshot as a branch."
                         )
                 else:
                     assert_never(
                         branch.target_type, f"Unexpected target type: {self.obj_type}"
                     )
 
         self.write_refs(snapshot=snapshot)
 
     def load_revisions(self, obj_ids: List[Sha1Git]) -> None:
         """Given a list of revision ids, loads these revisions and their directories;
         but not their parent revisions (ie. this is not recursive)."""
         ret: List[Optional[Revision]] = self.storage.revision_get(
             obj_ids, ignore_displayname=True
         )
 
         revisions: List[Revision] = list(filter(None, ret))
         if len(ret) != len(revisions):
             logger.error("Missing revision(s), ignoring them.")
 
         for revision in revisions:
             self.write_revision_node(revision)
         self._push(self._dir_stack, (rev.directory for rev in revisions))
 
     def write_revision_node(self, revision: Revision) -> bool:
         """Writes a revision object to disk"""
         git_object = revision.raw_manifest or git_objects.revision_git_object(revision)
         return self.write_object(revision.id, git_object)
 
     def load_releases(self, obj_ids: List[Sha1Git]) -> List[Release]:
         """Loads release objects, and returns them."""
         ret = self.storage.release_get(obj_ids, ignore_displayname=True)
 
         releases = list(filter(None, ret))
         if len(ret) != len(releases):
             logger.error("Missing release(s), ignoring them.")
 
         for release in releases:
             self.write_release_node(release)
 
         return releases
 
     def push_releases_subgraphs(self, obj_ids: List[Sha1Git]) -> None:
         """Given a list of release ids, loads these releases and adds their
         target to the list of objects to visit"""
         for release in self.load_releases(obj_ids):
             self.nb_loaded += 1
             assert release.target, "{release.swhid(}) has no target"
             if release.target_type is ModelObjectType.REVISION:
                 self.push_revision_subgraph(release.target)
             elif release.target_type is ModelObjectType.DIRECTORY:
                 self._push(self._dir_stack, [release.target])
             elif release.target_type is ModelObjectType.CONTENT:
                 self._push(self._cnt_stack, [release.target])
             elif release.target_type is ModelObjectType.RELEASE:
                 self.push_releases_subgraphs([release.target])
             elif release.target_type is ModelObjectType.SNAPSHOT:
                 raise NotImplementedError(
                     f"{release.swhid()} targets a snapshot: {release.target!r}"
                 )
             else:
                 assert_never(
                     release.target_type,
                     f"Unexpected release target type: {release.target_type}",
                 )
 
     def write_release_node(self, release: Release) -> bool:
         """Writes a release object to disk"""
         git_object = release.raw_manifest or git_objects.release_git_object(release)
         return self.write_object(release.id, git_object)
 
     def load_directories(self, obj_ids: List[Sha1Git]) -> None:
         if not obj_ids:
             return
 
         raw_manifests = self.storage.directory_get_raw_manifest(obj_ids)
 
         with multiprocessing.dummy.Pool(min(self.thread_pool_size, len(obj_ids))) as p:
             for _ in p.imap_unordered(
                 lambda obj_id: self.load_directory(obj_id, raw_manifests.get(obj_id)),
                 obj_ids,
             ):
                 pass
 
     def load_directory(self, obj_id: Sha1Git, raw_manifest: Optional[bytes]) -> None:
         # Load the directory
         entries_it: Optional[Iterable[DirectoryEntry]] = stream_results_optional(
             self.storage.directory_get_entries, obj_id
         )
 
         if entries_it is None:
             logger.error("Missing swh:1:dir:%s, ignoring.", hash_to_hex(obj_id))
             return
 
         directory = Directory(
             id=obj_id, entries=tuple(entries_it), raw_manifest=raw_manifest
         )
         git_object = raw_manifest or git_objects.directory_git_object(directory)
         self.write_object(obj_id, git_object)
 
         # Add children to the stack
         entry_loaders: Dict[str, Optional[List[Sha1Git]]] = {
             "file": self._cnt_stack,
             "dir": self._dir_stack,
             "rev": None,  # Do not include submodule targets (rejected by git-fsck)
         }
         for entry in directory.entries:
             stack = entry_loaders[entry.type]
             if stack is not None:
                 self._push(stack, [entry.target])
 
     def load_contents(self, obj_ids: List[Sha1Git]) -> None:
         # TODO: add support of filtered objects, somehow?
         # It's tricky, because, by definition, we can't write a git object with
         # the expected hash, so git-fsck *will* choke on it.
         contents = self.storage.content_get(obj_ids, "sha1_git")
 
         visible_contents = []
         for (obj_id, content) in zip(obj_ids, contents):
             if content is None:
                 # FIXME: this may also happen for missing content
                 self.write_content(obj_id, SKIPPED_MESSAGE)
                 self._expect_mismatched_object_error(obj_id)
             elif content.status == "visible":
                 visible_contents.append(content)
             elif content.status == "hidden":
                 self.write_content(obj_id, HIDDEN_MESSAGE)
                 self._expect_mismatched_object_error(obj_id)
             elif content.status == "absent":
                 assert False, f"content_get returned absent content {content.swhid()}"
             else:
                 # TODO: When content.status will have type Literal, replace this with
                 # assert_never
                 assert False, f"{content.swhid} has status: {content.status!r}"
 
         contents_and_data: Iterator[Tuple[Content, Optional[bytes]]]
         if self.objstorage is None:
             contents_and_data = (
                 (content, self.storage.content_get_data(content.sha1))
                 for content in visible_contents
             )
         else:
             contents_and_data = zip(
                 visible_contents,
                 self.objstorage.get_batch(c.sha1 for c in visible_contents),
             )
 
         for (content, datum) in contents_and_data:
             if datum is None:
                 logger.error(
                     "%s is visible, but is missing data. Skipping.", content.swhid()
                 )
                 continue
             self.write_content(content.sha1_git, datum)
 
     def write_content(self, obj_id: Sha1Git, content: bytes) -> None:
         header = git_objects.git_object_header("blob", len(content))
         self.write_object(obj_id, header + content)
 
     def _expect_mismatched_object_error(self, obj_id):
         obj_id_hex = hash_to_hex(obj_id)
         obj_path = self._obj_relative_path(obj_id)
 
         # For Git < 2.21:
         self._expected_fsck_errors.add(
             f"error: sha1 mismatch for ./{obj_path} (expected {obj_id_hex})"
         )
         # For Git >= 2.21:
         self._expected_fsck_errors.add(
             f"error: hash mismatch for ./{obj_path} (expected {obj_id_hex})"
         )
 
         self._expected_fsck_errors.add(
             f"error: {obj_id_hex}: object corrupt or missing: ./{obj_path}"
         )
         self._expected_fsck_errors.add(f"missing blob {obj_id_hex}")
diff --git a/swh/vault/cookers/revision_flat.py b/swh/vault/cookers/revision_flat.py
index 8bd9418..09c2ac6 100644
--- a/swh/vault/cookers/revision_flat.py
+++ b/swh/vault/cookers/revision_flat.py
@@ -1,37 +1,37 @@
 # Copyright (C) 2016-2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from pathlib import Path
 import tarfile
 import tempfile
 
 from swh.model import hashutil
 from swh.model.swhids import ObjectType
 from swh.vault.cookers.base import BaseVaultCooker
 from swh.vault.cookers.utils import revision_log
 from swh.vault.to_disk import DirectoryBuilder
 
 
 class RevisionFlatCooker(BaseVaultCooker):
-    """Cooker to create a revision_flat bundle """
+    """Cooker to create a revision_flat bundle"""
 
     BUNDLE_TYPE = "flat"
     SUPPORTED_OBJECT_TYPES = {ObjectType.REVISION}
 
     def check_exists(self):
         return not list(self.storage.revision_missing([self.swhid.object_id]))
 
     def prepare_bundle(self):
         with tempfile.TemporaryDirectory(prefix="tmp-vault-revision-") as td:
             root = Path(td)
             for revision in revision_log(self.storage, self.swhid.object_id):
                 revdir = root / hashutil.hash_to_hex(revision["id"])
                 revdir.mkdir()
                 directory_builder = DirectoryBuilder(
                     self.storage, str(revdir).encode(), revision["directory"]
                 )
                 directory_builder.build()
             with tarfile.open(fileobj=self.fileobj, mode="w:gz") as tar:
                 tar.add(td, arcname=self.swhid)
diff --git a/swh/vault/cookers/revision_gitfast.py b/swh/vault/cookers/revision_gitfast.py
index 1741869..19be69d 100644
--- a/swh/vault/cookers/revision_gitfast.py
+++ b/swh/vault/cookers/revision_gitfast.py
@@ -1,221 +1,219 @@
 # Copyright (C) 2017-2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import functools
 import os
 import time
 import zlib
 
 from fastimport.commands import (
     BlobCommand,
     CommitCommand,
     FileDeleteCommand,
     FileModifyCommand,
     ResetCommand,
 )
 
 from swh.model import hashutil
 from swh.model.from_disk import DentryPerms, mode_to_perms
 from swh.model.swhids import ObjectType
 from swh.model.toposort import toposort
 from swh.vault.cookers.base import BaseVaultCooker
 from swh.vault.cookers.utils import revision_log
 from swh.vault.to_disk import get_filtered_files_content
 
 
 class RevisionGitfastCooker(BaseVaultCooker):
-    """Cooker to create a git fast-import bundle """
+    """Cooker to create a git fast-import bundle"""
 
     BUNDLE_TYPE = "gitfast"
     SUPPORTED_OBJECT_TYPES = {ObjectType.REVISION}
 
     def check_exists(self):
         return not list(self.storage.revision_missing([self.obj_id]))
 
     def prepare_bundle(self):
         self.log = list(toposort(revision_log(self.storage, self.obj_id)))
         self.gzobj = zlib.compressobj(9, zlib.DEFLATED, zlib.MAX_WBITS | 16)
         self.fastexport()
         self.write(self.gzobj.flush())
 
     def write_cmd(self, cmd):
         chunk = bytes(cmd) + b"\n"
         super().write(self.gzobj.compress(chunk))
 
     def fastexport(self):
-        """Generate all the git fast-import commands from a given log.
-        """
+        """Generate all the git fast-import commands from a given log."""
         self.rev_by_id = {r["id"]: r for r in self.log}
         self.obj_done = set()
         self.obj_to_mark = {}
         self.next_available_mark = 1
 
         last_progress_report = None
 
         for i, rev in enumerate(self.log, 1):
             # Update progress if needed
             ct = time.time()
             if last_progress_report is None or last_progress_report + 2 <= ct:
                 last_progress_report = ct
                 pg = "Computing revision {}/{}".format(i, len(self.log))
                 self.backend.set_progress(self.BUNDLE_TYPE, self.swhid, pg)
 
             # Compute the current commit
             self._compute_commit_command(rev)
 
     def mark(self, obj_id):
         """Get the mark ID as bytes of a git object.
 
         If the object has not yet been marked, assign a new ID and add it to
         the mark dictionary.
         """
         if obj_id not in self.obj_to_mark:
             self.obj_to_mark[obj_id] = self.next_available_mark
             self.next_available_mark += 1
         return str(self.obj_to_mark[obj_id]).encode()
 
     def _compute_blob_command_content(self, file_data):
         """Compute the blob command of a file entry if it has not been
         computed yet.
         """
         obj_id = file_data["sha1"]
         if obj_id in self.obj_done:
             return
         contents = list(get_filtered_files_content(self.storage, [file_data]))
         content = contents[0]["content"]
         self.write_cmd(BlobCommand(mark=self.mark(obj_id), data=content))
         self.obj_done.add(obj_id)
 
     def _author_tuple_format(self, author, date):
         # We never want to have None values here so we replace null entries
         # by ''.
         if author is not None:
             author_tuple = (author.get("name") or b"", author.get("email") or b"")
         else:
             author_tuple = (b"", b"")
         if date is not None:
             date_tuple = (
                 date.get("timestamp", {}).get("seconds") or 0,
                 (date.get("offset") or 0) * 60,
             )
         else:
             date_tuple = (0, 0)
         return author_tuple + date_tuple
 
     def _compute_commit_command(self, rev):
-        """Compute a commit command from a specific revision.
-        """
+        """Compute a commit command from a specific revision."""
         if "parents" in rev and rev["parents"]:
             from_ = b":" + self.mark(rev["parents"][0])
             merges = [b":" + self.mark(r) for r in rev["parents"][1:]]
             parent = self.rev_by_id[rev["parents"][0]]
         else:
             # We issue a reset command before all the new roots so that they
             # are not automatically added as children of the current branch.
             self.write_cmd(ResetCommand(b"refs/heads/master", None))
             from_ = None
             merges = None
             parent = None
 
         # Retrieve the file commands while yielding new blob commands if
         # needed.
         files = list(self._compute_file_commands(rev, parent))
 
         # Construct and write the commit command
         author = self._author_tuple_format(rev["author"], rev["date"])
         committer = self._author_tuple_format(rev["committer"], rev["committer_date"])
         self.write_cmd(
             CommitCommand(
                 ref=b"refs/heads/master",
                 mark=self.mark(rev["id"]),
                 author=author,
                 committer=committer,
                 message=rev["message"] or b"",
                 from_=from_,
                 merges=merges,
                 file_iter=files,
             )
         )
 
     @functools.lru_cache(maxsize=4096)
     def _get_dir_ents(self, dir_id=None):
         """Get the entities of a directory as a dictionary (name -> entity).
 
         This function has a cache to avoid doing multiple requests to retrieve
         the same entities, as doing a directory_ls() is expensive.
         """
         data = self.storage.directory_ls(dir_id) if dir_id is not None else []
         return {f["name"]: f for f in data}
 
     def _compute_file_commands(self, rev, parent=None):
         """Compute all the file commands of a revision.
 
         Generate a diff of the files between the revision and its main parent
         to find the necessary file commands to apply.
         """
         # Initialize the stack with the root of the tree.
         cur_dir = rev["directory"]
         parent_dir = parent["directory"] if parent else None
         stack = [(b"", cur_dir, parent_dir)]
 
         while stack:
             # Retrieve the current directory and the directory of the parent
             # commit in order to compute the diff of the trees.
             root, cur_dir_id, prev_dir_id = stack.pop()
             cur_dir = self._get_dir_ents(cur_dir_id)
             prev_dir = self._get_dir_ents(prev_dir_id)
 
             # Find subtrees to delete:
             #  - Subtrees that are not in the new tree (file or directory
             #    deleted).
             #  - Subtrees that do not have the same type in the new tree
             #    (file -> directory or directory -> file)
             # After this step, every node remaining in the previous directory
             # has the same type than the one in the current directory.
             for fname, f in prev_dir.items():
                 if fname not in cur_dir or f["type"] != cur_dir[fname]["type"]:
                     yield FileDeleteCommand(path=os.path.join(root, fname))
 
             # Find subtrees to modify:
             #  - Leaves (files) will be added or modified using `filemodify`
             #  - Other subtrees (directories) will be added to the stack and
             #    processed in the next iteration.
             for fname, f in cur_dir.items():
                 # A file is added or modified if it was not in the tree, if its
                 # permissions changed or if its content changed.
                 if f["type"] == "file" and (
                     fname not in prev_dir
                     or f["sha1"] != prev_dir[fname]["sha1"]
                     or f["perms"] != prev_dir[fname]["perms"]
                 ):
                     # Issue a blob command for the new blobs if needed.
                     self._compute_blob_command_content(f)
                     yield FileModifyCommand(
                         path=os.path.join(root, fname),
                         mode=mode_to_perms(f["perms"]).value,
                         dataref=(b":" + self.mark(f["sha1"])),
                         data=None,
                     )
                 # A revision is added or modified if it was not in the tree or
                 # if its target changed
                 elif f["type"] == "rev" and (
                     fname not in prev_dir or f["target"] != prev_dir[fname]["target"]
                 ):
                     yield FileModifyCommand(
                         path=os.path.join(root, fname),
                         mode=DentryPerms.revision,
                         dataref=hashutil.hash_to_hex(f["target"]).encode(),
                         data=None,
                     )
                 # A directory is added or modified if it was not in the tree or
                 # if its target changed.
                 elif f["type"] == "dir":
                     f_prev_target = None
                     if fname in prev_dir and prev_dir[fname]["type"] == "dir":
                         f_prev_target = prev_dir[fname]["target"]
                     if f_prev_target is None or f["target"] != f_prev_target:
                         stack.append(
                             (os.path.join(root, fname), f["target"], f_prev_target)
                         )
diff --git a/swh/vault/interface.py b/swh/vault/interface.py
index 8ea68ac..e6991ee 100644
--- a/swh/vault/interface.py
+++ b/swh/vault/interface.py
@@ -1,69 +1,69 @@
 # Copyright (C) 2017-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from typing import Any, Dict, List, Optional, Tuple
 
 from typing_extensions import Protocol, runtime_checkable
 
 from swh.core.api import remote_api_endpoint
 from swh.model.swhids import CoreSWHID
 
 
 @runtime_checkable
 class VaultInterface(Protocol):
     """
     Backend Interface for the Software Heritage vault.
     """
 
     @remote_api_endpoint("fetch")
     def fetch(self, bundle_type: str, swhid: CoreSWHID) -> Optional[bytes]:
         """Fetch information from a bundle"""
         ...
 
     @remote_api_endpoint("cook")
     def cook(
         self, bundle_type: str, swhid: CoreSWHID, email: Optional[str] = None
     ) -> Dict[str, Any]:
         """Main entry point for cooking requests. This starts a cooking task if
-            needed, and add the given e-mail to the notify list"""
+        needed, and add the given e-mail to the notify list"""
         ...
 
     @remote_api_endpoint("progress")
     def progress(self, bundle_type: str, swhid: CoreSWHID):
         ...
 
     # Cookers endpoints
 
     @remote_api_endpoint("set_progress")
     def set_progress(self, bundle_type: str, swhid: CoreSWHID, progress: str) -> None:
         """Set the cooking progress of a bundle"""
         ...
 
     @remote_api_endpoint("set_status")
     def set_status(self, bundle_type: str, swhid: CoreSWHID, status: str) -> bool:
         """Set the cooking status of a bundle"""
         ...
 
     @remote_api_endpoint("put_bundle")
     def put_bundle(self, bundle_type: str, swhid: CoreSWHID, bundle):
         """Store bundle in vault cache"""
         ...
 
     @remote_api_endpoint("send_notif")
     def send_notif(self, bundle_type: str, swhid: CoreSWHID):
         """Send all the e-mails in the notification list of a bundle"""
         ...
 
     # Batch endpoints
 
     @remote_api_endpoint("batch_cook")
     def batch_cook(self, batch: List[Tuple[str, str]]) -> int:
         """Cook a batch of bundles and returns the cooking id."""
         ...
 
     @remote_api_endpoint("batch_progress")
     def batch_progress(self, batch_id: int) -> Dict[str, Any]:
         """Fetch information from a batch of bundles"""
         ...
diff --git a/swh/vault/tests/conftest.py b/swh/vault/tests/conftest.py
index ef893c8..9c4b0a6 100644
--- a/swh/vault/tests/conftest.py
+++ b/swh/vault/tests/conftest.py
@@ -1,89 +1,74 @@
-# Copyright (C) 2020  The Software Heritage developers
+# Copyright (C) 2020-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from functools import partial
 import os
 from typing import Any, Dict
 
 import pkg_resources.extern.packaging.version
 import pytest
 from pytest_postgresql import factories
 
-from swh.core.db.pytest_plugin import initialize_database_for_module, postgresql_fact
-from swh.storage.postgresql.db import Db as StorageDb
+from swh.core.db.pytest_plugin import initialize_database_for_module
 from swh.vault import get_vault
 from swh.vault.backend import VaultBackend
 
 os.environ["LC_ALL"] = "C.UTF-8"
 
 # needed for directory tests on git-cloned repositories
 # 022 is usually the default value, but some environments (eg. Debian builds) have
 # a different one.
 os.umask(0o022)
 
 pytest_v = pkg_resources.get_distribution("pytest").parsed_version
 if pytest_v < pkg_resources.extern.packaging.version.parse("3.9"):
 
     @pytest.fixture
     def tmp_path():
         import pathlib
         import tempfile
 
         with tempfile.TemporaryDirectory() as tmpdir:
             yield pathlib.Path(tmpdir)
 
 
-storage_postgresql_proc = factories.postgresql_proc(
-    dbname="storage",
-    load=[
-        partial(initialize_database_for_module, "storage", StorageDb.current_version)
-    ],
-)
-
 vault_postgresql_proc = factories.postgresql_proc(
-    dbname="vault",
     load=[
         partial(initialize_database_for_module, "vault", VaultBackend.current_version)
     ],
 )
 
-postgres_vault = postgresql_fact("vault_postgresql_proc")
-postgres_storage = postgresql_fact(
-    "storage_postgresql_proc", no_db_drop=True,  # keep the db for performance reasons
-)
+postgres_vault = factories.postgresql("vault_postgresql_proc")
 
 
 @pytest.fixture
-def swh_vault_config(postgres_vault, postgres_storage, tmp_path) -> Dict[str, Any]:
+def swh_vault_config(postgres_vault, tmp_path) -> Dict[str, Any]:
     tmp_path = str(tmp_path)
     return {
         "db": postgres_vault.dsn,
         "storage": {
-            "cls": "postgresql",
-            "db": postgres_storage.dsn,
-            "objstorage": {
-                "cls": "pathslicing",
-                "root": tmp_path,
-                "slicing": "0:1/1:5",
-            },
+            "cls": "memory",
         },
         "cache": {
             "cls": "pathslicing",
             "root": tmp_path,
             "slicing": "0:1/1:5",
             "allow_delete": True,
         },
-        "scheduler": {"cls": "remote", "url": "http://swh-scheduler:5008",},
+        "scheduler": {
+            "cls": "remote",
+            "url": "http://swh-scheduler:5008",
+        },
     }
 
 
 @pytest.fixture
 def swh_vault(swh_vault_config):
     return get_vault("local", **swh_vault_config)
 
 
 @pytest.fixture
 def swh_storage(swh_vault):
     return swh_vault.storage
diff --git a/swh/vault/tests/test_cli.py b/swh/vault/tests/test_cli.py
index fd65167..60f18a6 100644
--- a/swh/vault/tests/test_cli.py
+++ b/swh/vault/tests/test_cli.py
@@ -1,102 +1,160 @@
 # Copyright (C) 2021 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import tempfile
 from unittest.mock import MagicMock
 
 import click
 import click.testing
 import pytest
 
+from swh.core.cli.db import db as swhdb
+from swh.core.db import BaseDb
+from swh.core.db.db_utils import swh_db_module, swh_db_version
+from swh.core.db.tests.test_cli import craft_conninfo
 from swh.model.swhids import CoreSWHID
+from swh.vault.backend import VaultBackend
 from swh.vault.cli import vault as vault_cli_group
 from swh.vault.cookers.base import BaseVaultCooker
 from swh.vault.in_memory_backend import InMemoryVaultBackend
 
 
 def test_cook_unsupported_swhid():
     runner = click.testing.CliRunner()
 
     result = runner.invoke(vault_cli_group, ["cook", "swh:1:dir:f00b4r", "-"])
     assert isinstance(result.exception, SystemExit)
     assert "expected core SWHID" in result.stdout
 
     result = runner.invoke(vault_cli_group, ["cook", "swh:1:ori:" + "0" * 40, "-"])
     assert isinstance(result.exception, SystemExit)
     assert "expected core SWHID" in result.stdout
 
 
 def test_cook_unknown_cooker():
     runner = click.testing.CliRunner()
 
     result = runner.invoke(
         vault_cli_group,
         ["cook", "swh:1:dir:" + "0" * 40, "-", "--bundle-type", "gitfast"],
     )
     assert isinstance(result.exception, SystemExit)
     assert "do not have a gitfast cooker" in result.stdout
 
     result = runner.invoke(vault_cli_group, ["cook", "swh:1:rev:" + "0" * 40, "-"])
     assert isinstance(result.exception, SystemExit)
     assert "use --bundle-type" in result.stdout
 
 
 @pytest.mark.parametrize(
     "bundle_type,cooker_name_suffix,swhid_type",
-    [("directory", "", "dir"), ("revision", "gitfast", "rev"),],
+    [
+        ("directory", "", "dir"),
+        ("revision", "gitfast", "rev"),
+    ],
 )
 def test_cook_directory(bundle_type, cooker_name_suffix, swhid_type, mocker):
     storage = object()
     mocker.patch("swh.storage.get_storage", return_value=storage)
 
     backend = MagicMock(spec=InMemoryVaultBackend)
     backend.fetch.return_value = b"bundle content"
     mocker.patch(
         "swh.vault.in_memory_backend.InMemoryVaultBackend", return_value=backend
     )
 
     cooker = MagicMock(spec=BaseVaultCooker)
     cooker_cls = MagicMock(return_value=cooker)
     mocker.patch("swh.vault.cookers.get_cooker_cls", return_value=cooker_cls)
 
     runner = click.testing.CliRunner()
 
     swhid = CoreSWHID.from_string(f"swh:1:{swhid_type}:{'0'*40}")
 
     with tempfile.NamedTemporaryFile("a", suffix=".yml") as config_fd:
         config_fd.write('{"storage": {}}')
         config_fd.seek(0)
         if cooker_name_suffix:
             result = runner.invoke(
                 vault_cli_group,
                 [
                     "cook",
                     f"swh:1:{swhid_type}:{'0'*40}",
                     "-",
                     "-C",
                     config_fd.name,
                     "--bundle-type",
                     cooker_name_suffix,
                 ],
             )
         else:
             result = runner.invoke(
-                vault_cli_group, ["cook", str(swhid), "-", "-C", config_fd.name],
+                vault_cli_group,
+                ["cook", str(swhid), "-", "-C", config_fd.name],
             )
 
     if result.exception is not None:
         raise result.exception
 
     cooker_cls.assert_called_once_with(
         swhid=swhid,
         backend=backend,
         storage=storage,
         graph=None,
         objstorage=None,
         max_bundle_size=None,
     )
     cooker.cook.assert_called_once_with()
 
     assert result.stdout_bytes == b"bundle content"
+
+
+def test_cli_swh_vault_db_create_and_init_db(postgresql, tmp_path):
+    """Test that 'swh db init vault' works"""
+    module_name = "vault"
+    conninfo = craft_conninfo(postgresql, "new-db")
+
+    cfgfile = tmp_path / "config.yml"
+    CFG = f"""
+vault:
+  cls: postgresql
+  db: {conninfo}
+  cache:
+    cls: memory
+  storage:
+    cls: memory
+  scheduler:
+    cls: remote
+    url: mock://scheduler
+    """
+    cfgfile.write_text(CFG)
+
+    cli_runner = click.testing.CliRunner()
+    # This creates the db and installs the necessary admin extensions
+    result = cli_runner.invoke(swhdb, ["create", module_name, "--dbname", conninfo])
+    assert result.exit_code == 0, f"Unexpected output: {result.output}"
+
+    result = cli_runner.invoke(swhdb, ["init-admin", module_name, "--dbname", conninfo])
+    assert result.exit_code == 0, f"Unexpected output: {result.output}"
+
+    # This initializes the schema and data
+    result = cli_runner.invoke(swhdb, ["-C", cfgfile, "init", module_name])
+    assert result.exit_code == 0, f"Unexpected output: {result.output}"
+
+    assert swh_db_module(conninfo) == "vault"
+    assert swh_db_version(conninfo) == VaultBackend.current_version
+
+    with BaseDb.connect(conninfo).cursor() as cur:
+        cur.execute("select tablename from pg_tables where schemaname='public'")
+        tables = {table for table, in cur.fetchall()}
+
+    assert tables == {
+        "dbmodule",
+        "dbversion",
+        "vault_bundle",
+        "vault_notif_email",
+        "vault_batch",
+        "vault_batch_bundle",
+    }
diff --git a/swh/vault/tests/test_cookers.py b/swh/vault/tests/test_cookers.py
index 12844b2..6654380 100644
--- a/swh/vault/tests/test_cookers.py
+++ b/swh/vault/tests/test_cookers.py
@@ -1,1189 +1,1200 @@
 # Copyright (C) 2017-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import contextlib
 import datetime
 import glob
 import gzip
 import io
 import os
 import pathlib
 import shutil
 import subprocess
 import tarfile
 import tempfile
 import unittest
 import unittest.mock
 
+import attr
 import dulwich.fastexport
 import dulwich.index
 import dulwich.objects
 import dulwich.porcelain
 import dulwich.repo
 import pytest
 
 from swh.loader.git.from_disk import GitLoaderFromDisk
 from swh.model import from_disk, hashutil
 from swh.model.model import (
     Person,
     Release,
     Revision,
     RevisionType,
+    SkippedContent,
     Snapshot,
     SnapshotBranch,
     TargetType,
     Timestamp,
     TimestampWithTimezone,
 )
 from swh.model.model import Content, Directory, DirectoryEntry
 from swh.model.model import ObjectType as ModelObjectType
 from swh.model.swhids import CoreSWHID, ObjectType
 from swh.vault.cookers import DirectoryCooker, GitBareCooker, RevisionGitfastCooker
 from swh.vault.tests.vault_testing import hash_content
 from swh.vault.to_disk import HIDDEN_MESSAGE, SKIPPED_MESSAGE
 
 
 class TestRepo:
     """A tiny context manager for a test git repository, with some utility
     functions to perform basic git stuff.
     """
 
     def __init__(self, repo_dir=None):
         self.repo_dir = repo_dir
 
     def __enter__(self):
         if self.repo_dir:
             self.tmp_dir = None
             self.repo = dulwich.repo.Repo(self.repo_dir)
         else:
             self.tmp_dir = tempfile.TemporaryDirectory(prefix="tmp-vault-repo-")
             self.repo_dir = self.tmp_dir.__enter__()
             self.repo = dulwich.repo.Repo.init(self.repo_dir)
         self.author_name = b"Test Author"
         self.author_email = b"test@softwareheritage.org"
         self.author = b"%s <%s>" % (self.author_name, self.author_email)
         self.base_date = 258244200
         self.counter = 0
         return pathlib.Path(self.repo_dir)
 
     def __exit__(self, exc, value, tb):
         if self.tmp_dir is not None:
             self.tmp_dir.__exit__(exc, value, tb)
             self.repo_dir = None
 
     def checkout(self, rev_sha):
         rev = self.repo[rev_sha]
         dulwich.index.build_index_from_tree(
             str(self.repo_dir), self.repo.index_path(), self.repo.object_store, rev.tree
         )
 
     def git_shell(self, *cmd, stdout=subprocess.DEVNULL, **kwargs):
         name = self.author_name
         email = self.author_email
         date = "%d +0000" % (self.base_date + self.counter)
         env = {
             # Set git commit format
             "GIT_AUTHOR_NAME": name,
             "GIT_AUTHOR_EMAIL": email,
             "GIT_AUTHOR_DATE": date,
             "GIT_COMMITTER_NAME": name,
             "GIT_COMMITTER_EMAIL": email,
             "GIT_COMMITTER_DATE": date,
             # Ignore all the system-wide and user configurations
             "GIT_CONFIG_NOSYSTEM": "1",
             "HOME": str(self.tmp_dir),
             "XDG_CONFIG_HOME": str(self.tmp_dir),
         }
         kwargs.setdefault("env", {}).update(env)
 
         subprocess.check_call(
             ("git", "-C", self.repo_dir) + cmd, stdout=stdout, **kwargs
         )
 
     def commit(self, message="Commit test\n", ref=b"HEAD"):
         """Commit the current working tree in a new commit with message on
         the branch 'ref'.
 
         At the end of the commit, the reference should stay the same
         and the index should be clean.
 
         """
         paths = [
             os.path.relpath(path, self.repo_dir)
             for path in glob.glob(self.repo_dir + "/**/*", recursive=True)
         ]
         self.repo.stage(paths)
         message = message.encode() + b"\n"
         ret = self.repo.do_commit(
             message=message,
             committer=self.author,
             commit_timestamp=self.base_date + self.counter,
             commit_timezone=0,
             ref=ref,
         )
         self.counter += 1
 
         # committing on another branch leaves
         # dangling files in index
         if ref != b"HEAD":
             # XXX this should work (but does not)
             # dulwich.porcelain.reset(self.repo, 'hard')
             self.git_shell("reset", "--hard", "HEAD")
         return ret
 
     def tag(self, name, target=b"HEAD", message=None):
         dulwich.porcelain.tag_create(
             self.repo,
             name,
             message=message,
             annotated=message is not None,
             objectish=target,
         )
 
     def merge(self, parent_sha_list, message="Merge branches."):
         self.git_shell(
             "merge",
             "--allow-unrelated-histories",
             "-m",
             message,
             *[p.decode() for p in parent_sha_list],
         )
         self.counter += 1
         return self.repo.refs[b"HEAD"]
 
     def print_debug_graph(self, reflog=False):
         args = ["log", "--all", "--graph", "--decorate"]
         if reflog:
             args.append("--reflog")
         self.git_shell(*args, stdout=None)
 
 
 @pytest.fixture
-def git_loader(swh_storage,):
-    """Instantiate a Git Loader using the storage instance as storage.
-
-    """
+def git_loader(
+    swh_storage,
+):
+    """Instantiate a Git Loader using the storage instance as storage."""
 
     def _create_loader(directory):
         return GitLoaderFromDisk(
             swh_storage,
             "fake_origin",
             directory=directory,
             visit_date=datetime.datetime.now(datetime.timezone.utc),
         )
 
     return _create_loader
 
 
 @contextlib.contextmanager
 def cook_extract_directory_dircooker(storage, swhid, fsck=True):
     """Context manager that cooks a directory and extract it."""
     backend = unittest.mock.MagicMock()
     backend.storage = storage
     cooker = DirectoryCooker(swhid, backend=backend, storage=storage)
     cooker.fileobj = io.BytesIO()
     assert cooker.check_exists()
     cooker.prepare_bundle()
     cooker.fileobj.seek(0)
     with tempfile.TemporaryDirectory(prefix="tmp-vault-extract-") as td:
         with tarfile.open(fileobj=cooker.fileobj, mode="r") as tar:
             tar.extractall(td)
         yield pathlib.Path(td) / str(swhid)
     cooker.storage = None
 
 
 @contextlib.contextmanager
 def cook_extract_directory_gitfast(storage, swhid, fsck=True):
     """Context manager that cooks a revision containing a directory and extract it,
     using RevisionGitfastCooker"""
     test_repo = TestRepo()
     with test_repo as p:
         date = TimestampWithTimezone.from_datetime(
             datetime.datetime.now(datetime.timezone.utc)
         )
         revision = Revision(
             directory=swhid.object_id,
             message=b"dummy message",
             author=Person.from_fullname(b"someone"),
             committer=Person.from_fullname(b"someone"),
             date=date,
             committer_date=date,
             type=RevisionType.GIT,
             synthetic=False,
         )
         storage.revision_add([revision])
 
     with cook_stream_revision_gitfast(
         storage, revision.swhid()
     ) as stream, test_repo as p:
         processor = dulwich.fastexport.GitImportProcessor(test_repo.repo)
         processor.import_stream(stream)
         test_repo.checkout(b"HEAD")
         shutil.rmtree(p / ".git")
         yield p
 
 
 @contextlib.contextmanager
 def cook_extract_directory_git_bare(storage, swhid, fsck=True, direct_objstorage=False):
     """Context manager that cooks a revision and extract it,
     using GitBareCooker"""
     backend = unittest.mock.MagicMock()
     backend.storage = storage
 
     # Cook the object
     cooker = GitBareCooker(
         swhid,
         backend=backend,
         storage=storage,
         objstorage=storage.objstorage if direct_objstorage else None,
     )
     cooker.use_fsck = fsck  # Some tests try edge-cases that git-fsck rejects
     cooker.fileobj = io.BytesIO()
     assert cooker.check_exists()
     cooker.prepare_bundle()
     cooker.fileobj.seek(0)
 
     # Extract it
     with tempfile.TemporaryDirectory(prefix="tmp-vault-extract-") as td:
         with tarfile.open(fileobj=cooker.fileobj, mode="r") as tar:
             tar.extractall(td)
 
         # Clone it with Dulwich
         with tempfile.TemporaryDirectory(prefix="tmp-vault-clone-") as clone_dir:
             clone_dir = pathlib.Path(clone_dir)
             subprocess.check_call(
-                ["git", "clone", os.path.join(td, f"{swhid}.git"), clone_dir,]
+                [
+                    "git",
+                    "clone",
+                    os.path.join(td, f"{swhid}.git"),
+                    clone_dir,
+                ]
             )
             shutil.rmtree(clone_dir / ".git")
             yield clone_dir
 
 
 @pytest.fixture(
     scope="module",
     params=[
         cook_extract_directory_dircooker,
         cook_extract_directory_gitfast,
         cook_extract_directory_git_bare,
     ],
 )
 def cook_extract_directory(request):
     """A fixture that is instantiated as either cook_extract_directory_dircooker or
     cook_extract_directory_git_bare."""
     return request.param
 
 
 @contextlib.contextmanager
 def cook_stream_revision_gitfast(storage, swhid):
     """Context manager that cooks a revision and stream its fastexport."""
     backend = unittest.mock.MagicMock()
     backend.storage = storage
     cooker = RevisionGitfastCooker(swhid, backend=backend, storage=storage)
     cooker.fileobj = io.BytesIO()
     assert cooker.check_exists()
     cooker.prepare_bundle()
     cooker.fileobj.seek(0)
     fastexport_stream = gzip.GzipFile(fileobj=cooker.fileobj)
     yield fastexport_stream
     cooker.storage = None
 
 
 @contextlib.contextmanager
 def cook_extract_revision_gitfast(storage, swhid, fsck=True):
     """Context manager that cooks a revision and extract it,
     using RevisionGitfastCooker"""
     test_repo = TestRepo()
     with cook_stream_revision_gitfast(storage, swhid) as stream, test_repo as p:
         processor = dulwich.fastexport.GitImportProcessor(test_repo.repo)
         processor.import_stream(stream)
         yield test_repo, p
 
 
 @contextlib.contextmanager
 def cook_extract_git_bare(storage, swhid, fsck=True):
     """Context manager that cooks a revision and extract it,
     using GitBareCooker"""
     backend = unittest.mock.MagicMock()
     backend.storage = storage
 
     # Cook the object
     cooker = GitBareCooker(swhid, backend=backend, storage=storage)
     cooker.use_fsck = fsck  # Some tests try edge-cases that git-fsck rejects
     cooker.fileobj = io.BytesIO()
     assert cooker.check_exists()
     cooker.prepare_bundle()
     cooker.fileobj.seek(0)
 
     # Extract it
     with tempfile.TemporaryDirectory(prefix="tmp-vault-extract-") as td:
         with tarfile.open(fileobj=cooker.fileobj, mode="r") as tar:
             tar.extractall(td)
 
         # Clone it with Dulwich
         with tempfile.TemporaryDirectory(prefix="tmp-vault-clone-") as clone_dir:
             clone_dir = pathlib.Path(clone_dir)
             subprocess.check_call(
-                ["git", "clone", os.path.join(td, f"{swhid}.git"), clone_dir,]
+                [
+                    "git",
+                    "clone",
+                    os.path.join(td, f"{swhid}.git"),
+                    clone_dir,
+                ]
             )
             test_repo = TestRepo(clone_dir)
             with test_repo:
                 yield test_repo, clone_dir
 
 
 @contextlib.contextmanager
 def cook_extract_revision_git_bare(storage, swhid, fsck=True):
-    with cook_extract_git_bare(storage, swhid, fsck=fsck,) as res:
+    with cook_extract_git_bare(
+        storage,
+        swhid,
+        fsck=fsck,
+    ) as res:
         yield res
 
 
 @pytest.fixture(
     scope="module",
     params=[cook_extract_revision_gitfast, cook_extract_revision_git_bare],
 )
 def cook_extract_revision(request):
     """A fixture that is instantiated as either cook_extract_revision_gitfast or
     cook_extract_revision_git_bare."""
     return request.param
 
 
 @contextlib.contextmanager
 def cook_extract_snapshot_git_bare(storage, swhid, fsck=True):
-    with cook_extract_git_bare(storage, swhid, fsck=fsck,) as res:
+    with cook_extract_git_bare(
+        storage,
+        swhid,
+        fsck=fsck,
+    ) as res:
         yield res
 
 
 @pytest.fixture(
-    scope="module", params=[cook_extract_snapshot_git_bare],
+    scope="module",
+    params=[cook_extract_snapshot_git_bare],
 )
 def cook_extract_snapshot(request):
     """Equivalent to cook_extract_snapshot_git_bare; but analogous to
     cook_extract_revision in case we ever have more cookers supporting snapshots"""
     return request.param
 
 
 TEST_CONTENT = (
     "   test content\n" "and unicode \N{BLACK HEART SUIT}\n" " and trailing spaces   "
 )
 TEST_EXECUTABLE = b"\x42\x40\x00\x00\x05"
 
 
 class TestDirectoryCooker:
     def test_directory_simple(self, git_loader, cook_extract_directory):
         repo = TestRepo()
         with repo as rp:
             (rp / "file").write_text(TEST_CONTENT)
             (rp / "executable").write_bytes(TEST_EXECUTABLE)
             (rp / "executable").chmod(0o755)
             (rp / "link").symlink_to("file")
             (rp / "dir1/dir2").mkdir(parents=True)
             (rp / "dir1/dir2/file").write_text(TEST_CONTENT)
             c = repo.commit()
             loader = git_loader(str(rp))
             loader.load()
 
             obj_id_hex = repo.repo[c].tree.decode()
             obj_id = hashutil.hash_to_bytes(obj_id_hex)
             swhid = CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=obj_id)
 
         with cook_extract_directory(loader.storage, swhid) as p:
             assert (p / "file").stat().st_mode == 0o100644
             assert (p / "file").read_text() == TEST_CONTENT
             assert (p / "executable").stat().st_mode == 0o100755
             assert (p / "executable").read_bytes() == TEST_EXECUTABLE
             assert (p / "link").is_symlink()
             assert os.readlink(str(p / "link")) == "file"
             assert (p / "dir1/dir2/file").stat().st_mode == 0o100644
             assert (p / "dir1/dir2/file").read_text() == TEST_CONTENT
 
             directory = from_disk.Directory.from_disk(path=bytes(p))
             assert obj_id_hex == hashutil.hash_to_hex(directory.hash)
 
     def test_directory_filtered_objects(self, git_loader, cook_extract_directory):
         repo = TestRepo()
         with repo as rp:
             file_1, id_1 = hash_content(b"test1")
             file_2, id_2 = hash_content(b"test2")
             file_3, id_3 = hash_content(b"test3")
 
             (rp / "file").write_bytes(file_1)
             (rp / "hidden_file").write_bytes(file_2)
             (rp / "absent_file").write_bytes(file_3)
 
             c = repo.commit()
             loader = git_loader(str(rp))
             loader.load()
 
             obj_id_hex = repo.repo[c].tree.decode()
             obj_id = hashutil.hash_to_bytes(obj_id_hex)
             swhid = CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=obj_id)
 
-        # FIXME: storage.content_update() should be changed to allow things
-        # like that
-        with loader.storage.get_db().transaction() as cur:
-            cur.execute(
-                """update content set status = 'visible'
-                           where sha1 = %s""",
-                (id_1,),
-            )
-            cur.execute(
-                """update content set status = 'hidden'
-                           where sha1 = %s""",
-                (id_2,),
-            )
-
-            cur.execute(
-                """
-                insert into skipped_content
-                    (sha1, sha1_git, sha256, blake2s256, length, reason)
-                select sha1, sha1_git, sha256, blake2s256, length, 'no reason'
-                from content
-                where sha1 = %s
-                """,
-                (id_3,),
-            )
-
-            cur.execute("delete from content where sha1 = %s", (id_3,))
+        # alter the content of the storage
+        # 1/ make file 2 an hidden file object
+        loader.storage._allow_overwrite = True
+        cnt2 = attr.evolve(
+            loader.storage.content_get([id_2])[0], status="hidden", data=file_2
+        )
+        loader.storage.content_add([cnt2])
+        assert loader.storage.content_get([id_2])[0].status == "hidden"
+
+        # 2/ make file 3 an skipped file object
+        cnt3 = loader.storage.content_get([id_3])[0].to_dict()
+        cnt3["status"] = "absent"
+        cnt3["reason"] = "no reason"
+        sk_cnt3 = SkippedContent.from_dict(cnt3)
+        loader.storage.skipped_content_add([sk_cnt3])
+        # dirty dirty dirty... let's pretend it is the equivalent of writing sql
+        # queries in the postgresql backend
+        for hashkey in loader.storage._cql_runner._content_indexes:
+            loader.storage._cql_runner._content_indexes[hashkey].pop(cnt3[hashkey])
 
         with cook_extract_directory(loader.storage, swhid) as p:
             assert (p / "file").read_bytes() == b"test1"
             assert (p / "hidden_file").read_bytes() == HIDDEN_MESSAGE
             assert (p / "absent_file").read_bytes() == SKIPPED_MESSAGE
 
     def test_directory_bogus_perms(self, git_loader, cook_extract_directory):
         # Some early git repositories have 664/775 permissions... let's check
         # if all the weird modes are properly normalized in the directory
         # cooker.
         repo = TestRepo()
         with repo as rp:
             (rp / "file").write_text(TEST_CONTENT)
             (rp / "file").chmod(0o664)
             (rp / "executable").write_bytes(TEST_EXECUTABLE)
             (rp / "executable").chmod(0o775)
             (rp / "wat").write_text(TEST_CONTENT)
             (rp / "wat").chmod(0o604)
 
             # Disable mode cleanup
             with unittest.mock.patch("dulwich.index.cleanup_mode", lambda mode: mode):
                 c = repo.commit()
 
             # Make sure Dulwich didn't normalize the permissions itself.
             # (if it did, then the test can't check the cooker normalized them)
             tree_id = repo.repo[c].tree
             assert {entry.mode for entry in repo.repo[tree_id].items()} == {
                 0o100775,
                 0o100664,
                 0o100604,
             }
 
             # Disable mode checks
             with unittest.mock.patch("dulwich.objects.Tree.check", lambda self: None):
                 loader = git_loader(str(rp))
                 loader.load()
 
             # Make sure swh-loader didn't normalize them either
             dir_entries = loader.storage.directory_ls(hashutil.bytehex_to_hash(tree_id))
             assert {entry["perms"] for entry in dir_entries} == {
                 0o100664,
                 0o100775,
                 0o100604,
             }
 
             obj_id_hex = repo.repo[c].tree.decode()
             obj_id = hashutil.hash_to_bytes(obj_id_hex)
             swhid = CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=obj_id)
 
         with cook_extract_directory(loader.storage, swhid) as p:
             assert (p / "file").stat().st_mode == 0o100644
             assert (p / "executable").stat().st_mode == 0o100755
             assert (p / "wat").stat().st_mode == 0o100644
 
     @pytest.mark.parametrize("direct_objstorage", [True, False])
     def test_directory_objstorage(
         self, swh_storage, git_loader, mocker, direct_objstorage
     ):
         """Like test_directory_simple, but using swh_objstorage directly, without
         going through swh_storage.content_get_data()"""
         repo = TestRepo()
         with repo as rp:
             (rp / "file").write_text(TEST_CONTENT)
             (rp / "executable").write_bytes(TEST_EXECUTABLE)
             (rp / "executable").chmod(0o755)
             (rp / "link").symlink_to("file")
             (rp / "dir1/dir2").mkdir(parents=True)
             (rp / "dir1/dir2/file").write_text(TEST_CONTENT)
             c = repo.commit()
             loader = git_loader(str(rp))
             loader.load()
 
             obj_id_hex = repo.repo[c].tree.decode()
             obj_id = hashutil.hash_to_bytes(obj_id_hex)
             swhid = CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=obj_id)
 
         # Set-up spies
         storage_content_get_data = mocker.patch.object(
             swh_storage, "content_get_data", wraps=swh_storage.content_get_data
         )
         objstorage_content_batch = mocker.patch.object(
             swh_storage.objstorage, "get_batch", wraps=swh_storage.objstorage.get_batch
         )
 
         with cook_extract_directory_git_bare(
             loader.storage, swhid, direct_objstorage=direct_objstorage
         ) as p:
             assert (p / "file").stat().st_mode == 0o100644
             assert (p / "file").read_text() == TEST_CONTENT
             assert (p / "executable").stat().st_mode == 0o100755
             assert (p / "executable").read_bytes() == TEST_EXECUTABLE
             assert (p / "link").is_symlink()
             assert os.readlink(str(p / "link")) == "file"
             assert (p / "dir1/dir2/file").stat().st_mode == 0o100644
             assert (p / "dir1/dir2/file").read_text() == TEST_CONTENT
 
             directory = from_disk.Directory.from_disk(path=bytes(p))
             assert obj_id_hex == hashutil.hash_to_hex(directory.hash)
 
         if direct_objstorage:
             storage_content_get_data.assert_not_called()
             objstorage_content_batch.assert_called()
         else:
             storage_content_get_data.assert_called()
             objstorage_content_batch.assert_not_called()
 
     def test_directory_revision_data(self, swh_storage):
         target_rev = "0e8a3ad980ec179856012b7eecf4327e99cd44cd"
 
         dir = Directory(
             entries=(
                 DirectoryEntry(
                     name=b"submodule",
                     type="rev",
                     target=hashutil.hash_to_bytes(target_rev),
                     perms=0o100644,
                 ),
             ),
         )
         swh_storage.directory_add([dir])
 
         with cook_extract_directory_dircooker(
             swh_storage, dir.swhid(), fsck=False
         ) as p:
             assert (p / "submodule").is_symlink()
             assert os.readlink(str(p / "submodule")) == target_rev
 
 
 class RepoFixtures:
     """Shared loading and checking methods that can be reused by different types
     of tests."""
 
     def load_repo_simple(self, git_loader):
         #
         #     1--2--3--4--5--6--7
         #
         repo = TestRepo()
         with repo as rp:
             (rp / "file1").write_text(TEST_CONTENT)
             repo.commit("add file1")
             (rp / "file2").write_text(TEST_CONTENT)
             repo.commit("add file2")
             (rp / "dir1/dir2").mkdir(parents=True)
             (rp / "dir1/dir2/file").write_text(TEST_CONTENT)
 
             (rp / "bin1").write_bytes(TEST_EXECUTABLE)
             (rp / "bin1").chmod(0o755)
             repo.commit("add bin1")
             (rp / "link1").symlink_to("file1")
             repo.commit("link link1 to file1")
             (rp / "file2").unlink()
             repo.commit("remove file2")
             (rp / "bin1").rename(rp / "bin")
             repo.commit("rename bin1 to bin")
             loader = git_loader(str(rp))
             loader.load()
             obj_id_hex = repo.repo.refs[b"HEAD"].decode()
             obj_id = hashutil.hash_to_bytes(obj_id_hex)
             swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=obj_id)
         return (loader, swhid)
 
     def check_revision_simple(self, ert, p, swhid):
         ert.checkout(b"HEAD")
         assert (p / "file1").stat().st_mode == 0o100644
         assert (p / "file1").read_text() == TEST_CONTENT
         assert (p / "link1").is_symlink()
         assert os.readlink(str(p / "link1")) == "file1"
         assert (p / "bin").stat().st_mode == 0o100755
         assert (p / "bin").read_bytes() == TEST_EXECUTABLE
         assert (p / "dir1/dir2/file").read_text() == TEST_CONTENT
         assert (p / "dir1/dir2/file").stat().st_mode == 0o100644
         assert ert.repo.refs[b"HEAD"].decode() == swhid.object_id.hex()
 
     def load_repo_two_roots(self, git_loader):
         #
         #    1----3---4
         #        /
         #   2----
         #
         repo = TestRepo()
         with repo as rp:
             (rp / "file1").write_text(TEST_CONTENT)
             c1 = repo.commit("Add file1")
             del repo.repo.refs[b"refs/heads/master"]  # git update-ref -d HEAD
             (rp / "file2").write_text(TEST_CONTENT)
             repo.commit("Add file2")
             repo.merge([c1])
             (rp / "file3").write_text(TEST_CONTENT)
             repo.commit("add file3")
             obj_id_hex = repo.repo.refs[b"HEAD"].decode()
             obj_id = hashutil.hash_to_bytes(obj_id_hex)
             swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=obj_id)
             loader = git_loader(str(rp))
             loader.load()
         return (loader, swhid)
 
     def check_revision_two_roots(self, ert, p, swhid):
         assert ert.repo.refs[b"HEAD"].decode() == swhid.object_id.hex()
 
         (c3,) = ert.repo[hashutil.hash_to_bytehex(swhid.object_id)].parents
         assert len(ert.repo[c3].parents) == 2
 
     def load_repo_two_heads(self, git_loader):
         #
         #    1---2----4      <-- master and b1
         #         \
         #          ----3     <-- b2
         #
         repo = TestRepo()
         with repo as rp:
             (rp / "file1").write_text(TEST_CONTENT)
             repo.commit("Add file1")
 
             (rp / "file2").write_text(TEST_CONTENT)
             c2 = repo.commit("Add file2")
 
             repo.repo.refs[b"refs/heads/b2"] = c2  # branch b2 from master
 
             (rp / "file3").write_text(TEST_CONTENT)
             repo.commit("add file3", ref=b"refs/heads/b2")
 
             (rp / "file4").write_text(TEST_CONTENT)
             c4 = repo.commit("add file4", ref=b"refs/heads/master")
             repo.repo.refs[b"refs/heads/b1"] = c4  # branch b1 from master
 
             obj_id_hex = repo.repo.refs[b"HEAD"].decode()
             obj_id = hashutil.hash_to_bytes(obj_id_hex)
             swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=obj_id)
             loader = git_loader(str(rp))
             loader.load()
         return (loader, swhid)
 
     def check_snapshot_two_heads(self, ert, p, swhid):
         assert (
             hashutil.hash_to_bytehex(swhid.object_id)
             == ert.repo.refs[b"HEAD"]
             == ert.repo.refs[b"refs/heads/master"]
             == ert.repo.refs[b"refs/remotes/origin/HEAD"]
             == ert.repo.refs[b"refs/remotes/origin/master"]
             == ert.repo.refs[b"refs/remotes/origin/b1"]
         )
 
         c4_id = hashutil.hash_to_bytehex(swhid.object_id)
         c3_id = ert.repo.refs[b"refs/remotes/origin/b2"]
 
         assert ert.repo[c3_id].parents == ert.repo[c4_id].parents
 
     def load_repo_two_double_fork_merge(self, git_loader):
         #
         #     2---4---6
         #    /   /   /
         #   1---3---5
         #
         repo = TestRepo()
         with repo as rp:
             (rp / "file1").write_text(TEST_CONTENT)
             c1 = repo.commit("Add file1")  # create commit 1
             repo.repo.refs[b"refs/heads/c1"] = c1  # branch c1 from master
 
             (rp / "file2").write_text(TEST_CONTENT)
             repo.commit("Add file2")  # create commit 2
 
             (rp / "file3").write_text(TEST_CONTENT)
             c3 = repo.commit("Add file3", ref=b"refs/heads/c1")  # create commit 3 on c1
             repo.repo.refs[b"refs/heads/c3"] = c3  # branch c3 from c1
 
             repo.merge([c3])  # create commit 4
 
             (rp / "file5").write_text(TEST_CONTENT)
             c5 = repo.commit("Add file3", ref=b"refs/heads/c3")  # create commit 5 on c3
 
             repo.merge([c5])  # create commit 6
 
             obj_id_hex = repo.repo.refs[b"HEAD"].decode()
             obj_id = hashutil.hash_to_bytes(obj_id_hex)
             swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=obj_id)
             loader = git_loader(str(rp))
             loader.load()
         return (loader, swhid)
 
     def check_revision_two_double_fork_merge(self, ert, p, swhid):
         assert ert.repo.refs[b"HEAD"].decode() == swhid.object_id.hex()
 
     def check_snapshot_two_double_fork_merge(self, ert, p, swhid):
         assert (
             hashutil.hash_to_bytehex(swhid.object_id)
             == ert.repo.refs[b"HEAD"]
             == ert.repo.refs[b"refs/heads/master"]
             == ert.repo.refs[b"refs/remotes/origin/HEAD"]
             == ert.repo.refs[b"refs/remotes/origin/master"]
         )
 
         (c4_id, c5_id) = ert.repo[swhid.object_id.hex().encode()].parents
         assert c5_id == ert.repo.refs[b"refs/remotes/origin/c3"]
 
         (c2_id, c3_id) = ert.repo[c4_id].parents
         assert c3_id == ert.repo.refs[b"refs/remotes/origin/c1"]
 
     def load_repo_triple_merge(self, git_loader):
         #
         #       .---.---5
         #      /   /   /
         #     2   3   4
         #    /   /   /
         #   1---.---.
         #
         repo = TestRepo()
         with repo as rp:
             (rp / "file1").write_text(TEST_CONTENT)
             c1 = repo.commit("Commit 1")
             repo.repo.refs[b"refs/heads/b1"] = c1
             repo.repo.refs[b"refs/heads/b2"] = c1
 
             repo.commit("Commit 2")
             c3 = repo.commit("Commit 3", ref=b"refs/heads/b1")
             c4 = repo.commit("Commit 4", ref=b"refs/heads/b2")
             repo.merge([c3, c4])
 
             obj_id_hex = repo.repo.refs[b"HEAD"].decode()
             obj_id = hashutil.hash_to_bytes(obj_id_hex)
             swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=obj_id)
             loader = git_loader(str(rp))
             loader.load()
         return (loader, swhid)
 
     def check_revision_triple_merge(self, ert, p, swhid):
         assert ert.repo.refs[b"HEAD"].decode() == swhid.object_id.hex()
 
     def check_snapshot_triple_merge(self, ert, p, swhid):
         assert (
             hashutil.hash_to_bytehex(swhid.object_id)
             == ert.repo.refs[b"HEAD"]
             == ert.repo.refs[b"refs/heads/master"]
             == ert.repo.refs[b"refs/remotes/origin/HEAD"]
             == ert.repo.refs[b"refs/remotes/origin/master"]
         )
 
         (c2_id, c3_id, c4_id) = ert.repo[swhid.object_id.hex().encode()].parents
         assert c3_id == ert.repo.refs[b"refs/remotes/origin/b1"]
         assert c4_id == ert.repo.refs[b"refs/remotes/origin/b2"]
 
         assert (
             ert.repo[c2_id].parents
             == ert.repo[c3_id].parents
             == ert.repo[c4_id].parents
         )
 
     def load_repo_filtered_objects(self, git_loader):
         repo = TestRepo()
         with repo as rp:
             file_1, id_1 = hash_content(b"test1")
             file_2, id_2 = hash_content(b"test2")
             file_3, id_3 = hash_content(b"test3")
 
             (rp / "file").write_bytes(file_1)
             (rp / "hidden_file").write_bytes(file_2)
             (rp / "absent_file").write_bytes(file_3)
 
             repo.commit()
             obj_id_hex = repo.repo.refs[b"HEAD"].decode()
             obj_id = hashutil.hash_to_bytes(obj_id_hex)
             swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=obj_id)
             loader = git_loader(str(rp))
             loader.load()
 
-        # FIXME: storage.content_update() should be changed to allow things
-        # like that
-        with loader.storage.get_db().transaction() as cur:
-            cur.execute(
-                """update content set status = 'visible'
-                           where sha1 = %s""",
-                (id_1,),
-            )
-            cur.execute(
-                """update content set status = 'hidden'
-                           where sha1 = %s""",
-                (id_2,),
-            )
-
-            cur.execute(
-                """
-                insert into skipped_content
-                    (sha1, sha1_git, sha256, blake2s256, length, reason)
-                select sha1, sha1_git, sha256, blake2s256, length, 'no reason'
-                from content
-                where sha1 = %s
-                """,
-                (id_3,),
-            )
+        # alter the content of the storage
+        # 1/ make file 2 an hidden file object
+        loader.storage._allow_overwrite = True
+        cnt2 = attr.evolve(
+            loader.storage.content_get([id_2])[0], status="hidden", data=file_2
+        )
+        loader.storage.content_add([cnt2])
+        assert loader.storage.content_get([id_2])[0].status == "hidden"
+
+        # 2/ make file 3 an skipped file object
+        cnt3 = loader.storage.content_get([id_3])[0].to_dict()
+        cnt3["status"] = "absent"
+        cnt3["reason"] = "no reason"
+        sk_cnt3 = SkippedContent.from_dict(cnt3)
+        loader.storage.skipped_content_add([sk_cnt3])
+        # dirty dirty dirty... let's pretend it is the equivalent of writing sql
+        # queries in the postgresql backend
+        for hashkey in loader.storage._cql_runner._content_indexes:
+            loader.storage._cql_runner._content_indexes[hashkey].pop(cnt3[hashkey])
 
-            cur.execute("delete from content where sha1 = %s", (id_3,))
         return (loader, swhid)
 
     def check_revision_filtered_objects(self, ert, p, swhid):
         ert.checkout(b"HEAD")
         assert (p / "file").read_bytes() == b"test1"
         assert (p / "hidden_file").read_bytes() == HIDDEN_MESSAGE
         assert (p / "absent_file").read_bytes() == SKIPPED_MESSAGE
 
     def load_repo_null_fields(self, git_loader):
         # Our schema doesn't enforce a lot of non-null revision fields. We need
         # to check these cases don't break the cooker.
         repo = TestRepo()
         with repo as rp:
             (rp / "file").write_text(TEST_CONTENT)
             c = repo.commit("initial commit")
             loader = git_loader(str(rp))
             loader.load()
             repo.repo.refs[b"HEAD"].decode()
             dir_id_hex = repo.repo[c].tree.decode()
             dir_id = hashutil.hash_to_bytes(dir_id_hex)
 
         test_revision = Revision(
             message=b"",
             author=Person(name=None, email=None, fullname=b""),
             date=None,
             committer=Person(name=None, email=None, fullname=b""),
             committer_date=None,
             parents=(),
             type=RevisionType.GIT,
             directory=dir_id,
             metadata={},
             synthetic=True,
         )
 
         storage = loader.storage
         storage.revision_add([test_revision])
         return (loader, test_revision.swhid())
 
     def check_revision_null_fields(self, ert, p, swhid):
         ert.checkout(b"HEAD")
         assert (p / "file").stat().st_mode == 0o100644
 
     def load_repo_tags(self, git_loader):
         #        v-- t2
         #
         #    1---2----5      <-- master, t5, and t5a (annotated)
         #         \
         #          ----3----4     <-- t4a (annotated)
         #
         repo = TestRepo()
         with repo as rp:
             (rp / "file1").write_text(TEST_CONTENT)
             repo.commit("Add file1")
 
             (rp / "file2").write_text(TEST_CONTENT)
             repo.commit("Add file2")  # create c2
 
             repo.tag(b"t2")
 
             (rp / "file3").write_text(TEST_CONTENT)
             repo.commit("add file3")
 
             (rp / "file4").write_text(TEST_CONTENT)
             repo.commit("add file4")
 
             repo.tag(b"t4a", message=b"tag 4")
 
             # Go back to c2
             repo.git_shell("reset", "--hard", "HEAD^^")
 
             (rp / "file5").write_text(TEST_CONTENT)
             repo.commit("add file5")  # create c5
 
             repo.tag(b"t5")
             repo.tag(b"t5a", message=b"tag 5")
 
             obj_id_hex = repo.repo.refs[b"HEAD"].decode()
             obj_id = hashutil.hash_to_bytes(obj_id_hex)
             swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=obj_id)
             loader = git_loader(str(rp))
             loader.load()
         return (loader, swhid)
 
     def check_snapshot_tags(self, ert, p, swhid):
         assert (
             hashutil.hash_to_bytehex(swhid.object_id)
             == ert.repo.refs[b"HEAD"]
             == ert.repo.refs[b"refs/heads/master"]
             == ert.repo.refs[b"refs/remotes/origin/HEAD"]
             == ert.repo.refs[b"refs/remotes/origin/master"]
             == ert.repo.refs[b"refs/tags/t5"]
         )
 
         c2_id = ert.repo.refs[b"refs/tags/t2"]
         c5_id = hashutil.hash_to_bytehex(swhid.object_id)
 
         assert ert.repo[c5_id].parents == [c2_id]
 
         t5a = ert.repo[ert.repo.refs[b"refs/tags/t5a"]]
         # TODO: investigate why new dulwich adds \n
         assert t5a.message in (b"tag 5", b"tag 5\n")
         assert t5a.object == (dulwich.objects.Commit, c5_id)
 
         t4a = ert.repo[ert.repo.refs[b"refs/tags/t4a"]]
         (_, c4_id) = t4a.object
         assert ert.repo[c4_id].message == b"add file4\n"  # TODO: ditto
         (c3_id,) = ert.repo[c4_id].parents
         assert ert.repo[c3_id].message == b"add file3\n"  # TODO: ditto
         assert ert.repo[c3_id].parents == [c2_id]
 
 
 class TestRevisionCooker(RepoFixtures):
     def test_revision_simple(self, git_loader, cook_extract_revision):
         (loader, swhid) = self.load_repo_simple(git_loader)
         with cook_extract_revision(loader.storage, swhid) as (ert, p):
             self.check_revision_simple(ert, p, swhid)
 
     def test_revision_two_roots(self, git_loader, cook_extract_revision):
         (loader, swhid) = self.load_repo_two_roots(git_loader)
         with cook_extract_revision(loader.storage, swhid) as (ert, p):
             self.check_revision_two_roots(ert, p, swhid)
 
     def test_revision_two_double_fork_merge(self, git_loader, cook_extract_revision):
         (loader, swhid) = self.load_repo_two_double_fork_merge(git_loader)
         with cook_extract_revision(loader.storage, swhid) as (ert, p):
             self.check_revision_two_double_fork_merge(ert, p, swhid)
 
     def test_revision_triple_merge(self, git_loader, cook_extract_revision):
         (loader, swhid) = self.load_repo_triple_merge(git_loader)
         with cook_extract_revision(loader.storage, swhid) as (ert, p):
             self.check_revision_triple_merge(ert, p, swhid)
 
     def test_revision_filtered_objects(self, git_loader, cook_extract_revision):
         (loader, swhid) = self.load_repo_filtered_objects(git_loader)
         with cook_extract_revision(loader.storage, swhid) as (ert, p):
             self.check_revision_filtered_objects(ert, p, swhid)
 
     def test_revision_null_fields(self, git_loader, cook_extract_revision):
         (loader, swhid) = self.load_repo_null_fields(git_loader)
         with cook_extract_revision(loader.storage, swhid, fsck=False) as (ert, p):
             self.check_revision_null_fields(ert, p, swhid)
 
     @pytest.mark.parametrize("ingest_target_revision", [False, True])
     def test_revision_submodule(
         self, swh_storage, cook_extract_revision, ingest_target_revision
     ):
         date = TimestampWithTimezone.from_datetime(
             datetime.datetime.now(datetime.timezone.utc).replace(microsecond=0)
         )
 
         target_rev = Revision(
             message=b"target_rev",
             author=Person.from_fullname(b"me <test@example.org>"),
             date=date,
             committer=Person.from_fullname(b"me <test@example.org>"),
             committer_date=date,
             parents=(),
             type=RevisionType.GIT,
             directory=bytes.fromhex("3333333333333333333333333333333333333333"),
             metadata={},
             synthetic=True,
         )
         if ingest_target_revision:
             swh_storage.revision_add([target_rev])
 
         dir = Directory(
             entries=(
                 DirectoryEntry(
-                    name=b"submodule", type="rev", target=target_rev.id, perms=0o160000,
+                    name=b"submodule",
+                    type="rev",
+                    target=target_rev.id,
+                    perms=0o160000,
                 ),
             ),
         )
         swh_storage.directory_add([dir])
 
         rev = Revision(
             message=b"msg",
             author=Person.from_fullname(b"me <test@example.org>"),
             date=date,
             committer=Person.from_fullname(b"me <test@example.org>"),
             committer_date=date,
             parents=(),
             type=RevisionType.GIT,
             directory=dir.id,
             metadata={},
             synthetic=True,
         )
         swh_storage.revision_add([rev])
 
         with cook_extract_revision(swh_storage, rev.swhid()) as (ert, p):
             ert.checkout(b"HEAD")
             pattern = b"160000 submodule\x00%s" % target_rev.id
             tree = ert.repo[b"HEAD"].tree
             assert pattern in ert.repo[tree].as_raw_string()
 
 
 class TestSnapshotCooker(RepoFixtures):
     def test_snapshot_simple(self, git_loader, cook_extract_snapshot):
         (loader, main_rev_id) = self.load_repo_simple(git_loader)
         snp_id = loader.loaded_snapshot_id
         swhid = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snp_id)
         with cook_extract_snapshot(loader.storage, swhid) as (ert, p):
             self.check_revision_simple(ert, p, main_rev_id)
 
     def test_snapshot_two_roots(self, git_loader, cook_extract_snapshot):
         (loader, main_rev_id) = self.load_repo_two_roots(git_loader)
         snp_id = loader.loaded_snapshot_id
         swhid = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snp_id)
         with cook_extract_snapshot(loader.storage, swhid) as (ert, p):
             self.check_revision_two_roots(ert, p, main_rev_id)
 
     def test_snapshot_two_heads(self, git_loader, cook_extract_snapshot):
         (loader, main_rev_id) = self.load_repo_two_heads(git_loader)
         snp_id = loader.loaded_snapshot_id
         swhid = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snp_id)
         with cook_extract_snapshot(loader.storage, swhid) as (ert, p):
             self.check_snapshot_two_heads(ert, p, main_rev_id)
 
     def test_snapshot_two_double_fork_merge(self, git_loader, cook_extract_snapshot):
         (loader, main_rev_id) = self.load_repo_two_double_fork_merge(git_loader)
         snp_id = loader.loaded_snapshot_id
         swhid = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snp_id)
         with cook_extract_snapshot(loader.storage, swhid) as (ert, p):
             self.check_revision_two_double_fork_merge(ert, p, main_rev_id)
             self.check_snapshot_two_double_fork_merge(ert, p, main_rev_id)
 
     def test_snapshot_triple_merge(self, git_loader, cook_extract_snapshot):
         (loader, main_rev_id) = self.load_repo_triple_merge(git_loader)
         snp_id = loader.loaded_snapshot_id
         swhid = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snp_id)
         with cook_extract_snapshot(loader.storage, swhid) as (ert, p):
             self.check_revision_triple_merge(ert, p, main_rev_id)
             self.check_snapshot_triple_merge(ert, p, main_rev_id)
 
     def test_snapshot_filtered_objects(self, git_loader, cook_extract_snapshot):
         (loader, main_rev_id) = self.load_repo_filtered_objects(git_loader)
         snp_id = loader.loaded_snapshot_id
         swhid = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snp_id)
         with cook_extract_snapshot(loader.storage, swhid) as (ert, p):
             self.check_revision_filtered_objects(ert, p, main_rev_id)
 
     def test_snapshot_tags(self, git_loader, cook_extract_snapshot):
         (loader, main_rev_id) = self.load_repo_tags(git_loader)
         snp_id = loader.loaded_snapshot_id
         swhid = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snp_id)
         with cook_extract_snapshot(loader.storage, swhid) as (ert, p):
             self.check_snapshot_tags(ert, p, main_rev_id)
 
     def test_original_malformed_objects(self, swh_storage, cook_extract_snapshot):
         """Tests that objects that were originally malformed:
 
         * are still interpreted somewhat correctly (if the loader could make sense of
           them), especially that they still have links to children
         * have their original manifest in the bundle
         """
         date = TimestampWithTimezone.from_numeric_offset(
             Timestamp(1643819927, 0), 0, False
         )
 
         content = Content.from_data(b"foo")
         swh_storage.content_add([content])
 
         # disordered
         # fmt: off
         malformed_dir_manifest = (
             b""
             + b"100644 file2\x00" + content.sha1_git
             + b"100644 file1\x00" + content.sha1_git
         )
         # fmt: on
         directory = Directory(
             entries=(
                 DirectoryEntry(
                     name=b"file1", type="file", perms=0o100644, target=content.sha1_git
                 ),
                 DirectoryEntry(
                     name=b"file2", type="file", perms=0o100644, target=content.sha1_git
                 ),
             ),
             raw_manifest=f"tree {len(malformed_dir_manifest)}\x00".encode()
             + malformed_dir_manifest,
         )
         swh_storage.directory_add([directory])
 
         # 'committer' and 'author' swapped
         # fmt: off
         malformed_rev_manifest = (
             b"tree " + hashutil.hash_to_bytehex(directory.id) + b"\n"
             + b"committer me <test@example.org> 1643819927 +0000\n"
             + b"author me <test@example.org> 1643819927 +0000\n"
             + b"\n"
             + b"rev"
         )
         # fmt: on
         revision = Revision(
             message=b"rev",
             author=Person.from_fullname(b"me <test@example.org>"),
             date=date,
             committer=Person.from_fullname(b"me <test@example.org>"),
             committer_date=date,
             parents=(),
             type=RevisionType.GIT,
             directory=directory.id,
             synthetic=True,
             raw_manifest=f"commit {len(malformed_rev_manifest)}\x00".encode()
             + malformed_rev_manifest,
         )
         swh_storage.revision_add([revision])
 
         # 'tag' and 'tagger' swapped
         # fmt: off
         malformed_rel_manifest = (
             b"object " + hashutil.hash_to_bytehex(revision.id) + b"\n"
             + b"type commit\n"
             + b"tagger me <test@example.org> 1643819927 +0000\n"
             + b"tag v1.1.0\n"
         )
         # fmt: on
 
         release = Release(
             name=b"v1.1.0",
             message=None,
             author=Person.from_fullname(b"me <test@example.org>"),
             date=date,
             target=revision.id,
             target_type=ModelObjectType.REVISION,
             synthetic=True,
             raw_manifest=f"tag {len(malformed_rel_manifest)}\x00".encode()
             + malformed_rel_manifest,
         )
         swh_storage.release_add([release])
 
         snapshot = Snapshot(
             branches={
                 b"refs/tags/v1.1.0": SnapshotBranch(
                     target=release.id, target_type=TargetType.RELEASE
                 ),
                 b"HEAD": SnapshotBranch(
                     target=revision.id, target_type=TargetType.REVISION
                 ),
             }
         )
         swh_storage.snapshot_add([snapshot])
 
         with cook_extract_snapshot(swh_storage, snapshot.swhid()) as (ert, p):
             tag = ert.repo[b"refs/tags/v1.1.0"]
             assert tag.as_raw_string() == malformed_rel_manifest
 
             commit = ert.repo[tag.object[1]]
             assert commit.as_raw_string() == malformed_rev_manifest
 
             tree = ert.repo[commit.tree]
             assert tree.as_raw_string() == malformed_dir_manifest
diff --git a/swh/vault/tests/test_git_bare_cooker.py b/swh/vault/tests/test_git_bare_cooker.py
index e16fa9b..923f77a 100644
--- a/swh/vault/tests/test_git_bare_cooker.py
+++ b/swh/vault/tests/test_git_bare_cooker.py
@@ -1,668 +1,697 @@
-# Copyright (C) 2021  The Software Heritage developers
+# Copyright (C) 2021-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 """
 This module contains additional tests for the bare cooker.
 Generic cooker tests (eg. without swh-graph) in test_cookers.py also
 run on the bare cooker.
 """
 
 import datetime
 import enum
+from functools import partial
 import io
 import subprocess
 import tarfile
 import tempfile
 import unittest.mock
 
 import attr
 import dulwich.repo
 import pytest
 from pytest import param
+from pytest_postgresql import factories
 
+from swh.core.db.pytest_plugin import initialize_database_for_module
 from swh.model.from_disk import DentryPerms
 from swh.model.model import (
     Content,
     Directory,
     DirectoryEntry,
     ObjectType,
     Person,
     Release,
     Revision,
     RevisionType,
     Snapshot,
     SnapshotBranch,
     TargetType,
     Timestamp,
     TimestampWithTimezone,
 )
+from swh.storage import get_storage
+from swh.storage.postgresql.storage import Storage
 from swh.vault.cookers.git_bare import GitBareCooker
 from swh.vault.in_memory_backend import InMemoryVaultBackend
 
+storage_postgresql_proc = factories.postgresql_proc(
+    load=[partial(initialize_database_for_module, "storage", Storage.current_version)],
+)
+
+storage_postgresql = factories.postgresql("storage_postgresql_proc")
+
+
+@pytest.fixture
+def swh_storage(storage_postgresql):
+    return get_storage("local", db=storage_postgresql.dsn, objstorage={"cls": "memory"})
+
 
 class RootObjects(enum.Enum):
     REVISION = enum.auto()
     SNAPSHOT = enum.auto()
     RELEASE = enum.auto()
     WEIRD_RELEASE = enum.auto()  # has a : in the name + points to another release
 
 
 @pytest.mark.graph
 @pytest.mark.parametrize(
     "root_object,up_to_date_graph,tag,weird_branches",
     [
         param(
             RootObjects.REVISION,
             False,
             False,
             False,
             id="rev, outdated graph, no tag/tree/blob",
         ),
         param(
             RootObjects.REVISION,
             True,
             False,
             False,
             id="rev, updated graph, no tag/tree/blob",
         ),
         param(
             RootObjects.RELEASE,
             False,
             False,
             False,
             id="rel, outdated graph, no tag/tree/blob",
         ),
         param(
             RootObjects.RELEASE,
             True,
             False,
             False,
             id="rel, updated graph, no tag/tree/blob",
         ),
         param(
             RootObjects.WEIRD_RELEASE,
             True,
             False,
             False,
             id="weird rel, updated graph, no tag/tree/blob",
         ),
         param(
             RootObjects.SNAPSHOT,
             False,
             False,
             False,
             id="snp, outdated graph, no tag/tree/blob",
         ),
         param(
             RootObjects.SNAPSHOT,
             True,
             False,
             False,
             id="snp, updated graph, no tag/tree/blob",
         ),
         param(
             RootObjects.SNAPSHOT,
             False,
             True,
             False,
             id="snp, outdated graph, w/ tag, no tree/blob",
         ),
         param(
             RootObjects.SNAPSHOT,
             True,
             True,
             False,
             id="snp, updated graph, w/ tag, no tree/blob",
         ),
         param(
             RootObjects.SNAPSHOT,
             False,
             True,
             True,
             id="snp, outdated graph, w/ tag, tree, and blob",
         ),
         param(
             RootObjects.SNAPSHOT,
             True,
             True,
             True,
             id="snp, updated graph, w/ tag, tree, and blob",
         ),
     ],
 )
 def test_graph_revisions(
     swh_storage, up_to_date_graph, root_object, tag, weird_branches
 ):
     r"""
     Build objects::
 
                                      snp
                                     /|||\
                                    / ||| \
                         rel2 <----°  /|\  \----> rel4
                          |          / | \         |
                          v         /  v  \        v
           rev1  <------ rev2 <----°  dir4 \      rel3
            |             |            |    \      |
            v             v            v     \     |
           dir1          dir2         dir3   |     |
            |           /   |          |     |     |
            v          /    v          v     v     v
           cnt1  <----°    cnt2       cnt3  cnt4  cnt5
 
     If up_to_date_graph is true, then swh-graph contains all objects.
     Else, cnt4, cnt5, dir4, rev2, rel2, rel3, and snp are missing from the graph.
 
     If tag is False, rel2 is excluded.
 
     If weird_branches is False, dir4, cnt4, rel3, rel4, and cnt5 are excluded.
     """
     from swh.graph.naive_client import NaiveClient as GraphClient
 
     # Create objects:
 
     date = TimestampWithTimezone.from_datetime(
         datetime.datetime(2021, 5, 7, 8, 43, 59, tzinfo=datetime.timezone.utc)
     )
     author = Person.from_fullname(b"Foo <foo@example.org>")
     cnt1 = Content.from_data(b"correct")
     cnt2 = Content.from_data(b"horse")
     cnt3 = Content.from_data(b"battery")
     cnt4 = Content.from_data(b"staple")
     cnt5 = Content.from_data(b"Tr0ub4dor&3")
     dir1 = Directory(
         entries=(
             DirectoryEntry(
                 name=b"file1",
                 type="file",
                 perms=DentryPerms.content,
                 target=cnt1.sha1_git,
             ),
         )
     )
     dir2 = Directory(
         entries=(
             DirectoryEntry(
                 name=b"file1",
                 type="file",
                 perms=DentryPerms.content,
                 target=cnt1.sha1_git,
             ),
             DirectoryEntry(
                 name=b"file2",
                 type="file",
                 perms=DentryPerms.content,
                 target=cnt2.sha1_git,
             ),
         )
     )
     dir3 = Directory(
         entries=(
             DirectoryEntry(
                 name=b"file3",
                 type="file",
                 perms=DentryPerms.content,
                 target=cnt3.sha1_git,
             ),
         )
     )
     dir4 = Directory(
         entries=(
             DirectoryEntry(
                 name=b"directory3",
                 type="dir",
                 perms=DentryPerms.directory,
                 target=dir3.id,
             ),
         )
     )
     rev1 = Revision(
         message=b"msg1",
         date=date,
         committer_date=date,
         author=author,
         committer=author,
         directory=dir1.id,
         type=RevisionType.GIT,
         synthetic=True,
     )
     rev2 = Revision(
         message=b"msg2",
         date=date,
         committer_date=date,
         author=author,
         committer=author,
         directory=dir2.id,
         parents=(rev1.id,),
         type=RevisionType.GIT,
         synthetic=True,
     )
 
     rel2 = Release(
         name=b"1.0.0",
         message=b"tag2",
         target_type=ObjectType.REVISION,
         target=rev2.id,
         synthetic=True,
     )
     rel3 = Release(
         name=b"1.0.0-blob",
         message=b"tagged-blob",
         target_type=ObjectType.CONTENT,
         target=cnt5.sha1_git,
         synthetic=True,
     )
     rel4 = Release(
         name=b"1.0.0-weird",
         message=b"weird release",
         target_type=ObjectType.RELEASE,
         target=rel3.id,
         synthetic=True,
     )
     rel5 = Release(
         name=b"1.0.0:weirdname",
         message=b"weird release",
         target_type=ObjectType.RELEASE,
         target=rel2.id,
         synthetic=True,
     )
 
     # Create snapshot:
 
     branches = {
         b"refs/heads/master": SnapshotBranch(
             target=rev2.id, target_type=TargetType.REVISION
         ),
     }
     if tag:
         branches[b"refs/tags/1.0.0"] = SnapshotBranch(
             target=rel2.id, target_type=TargetType.RELEASE
         )
     if weird_branches:
         branches[b"refs/heads/tree-ref"] = SnapshotBranch(
             target=dir4.id, target_type=TargetType.DIRECTORY
         )
         branches[b"refs/heads/blob-ref"] = SnapshotBranch(
             target=cnt4.sha1_git, target_type=TargetType.CONTENT
         )
         branches[b"refs/tags/1.0.0-weird"] = SnapshotBranch(
             target=rel4.id, target_type=TargetType.RELEASE
         )
     snp = Snapshot(branches=branches)
 
     # "Fill" swh-graph
 
     if up_to_date_graph:
         nodes = [cnt1, cnt2, dir1, dir2, rev1, rev2, snp]
         edges = [
             (dir1, cnt1),
             (dir2, cnt1),
             (dir2, cnt2),
             (rev1, dir1),
             (rev2, dir2),
             (rev2, rev1),
             (snp, rev2),
         ]
         if tag:
             nodes.append(rel2)
             edges.append((rel2, rev2))
             edges.append((snp, rel2))
         if weird_branches:
             nodes.extend([cnt3, cnt4, cnt5, dir3, dir4, rel3, rel4, rel5])
             edges.extend(
                 [
                     (dir3, cnt3),
                     (dir4, dir3),
                     (snp, dir4),
                     (snp, cnt4),
                     (snp, rel4),
                     (rel4, rel3),
                     (rel3, cnt5),
                     (rel5, rev2),
                 ]
             )
     else:
         nodes = [cnt1, cnt2, cnt3, dir1, dir2, dir3, rev1]
         edges = [
             (dir1, cnt1),
             (dir2, cnt1),
             (dir2, cnt2),
             (dir3, cnt3),
             (rev1, dir1),
         ]
         if tag:
             nodes.append(rel2)
         if weird_branches:
             nodes.extend([cnt3, dir3])
             edges.extend([(dir3, cnt3)])
 
     nodes = [str(n.swhid()) for n in nodes]
     edges = [(str(s.swhid()), str(d.swhid())) for (s, d) in edges]
 
     # Add all objects to storage
     swh_storage.content_add([cnt1, cnt2, cnt3, cnt4, cnt5])
     swh_storage.directory_add([dir1, dir2, dir3, dir4])
     swh_storage.revision_add([rev1, rev2])
     swh_storage.release_add([rel2, rel3, rel4, rel5])
     swh_storage.snapshot_add([snp])
 
     # Add spy on swh_storage, to make sure revision_log is not called
     # (the graph must be used instead)
     swh_storage = unittest.mock.MagicMock(wraps=swh_storage)
 
     # Add all objects to graph
     swh_graph = unittest.mock.Mock(wraps=GraphClient(nodes=nodes, edges=edges))
 
     # Cook
     backend = InMemoryVaultBackend()
     cooked_swhid = {
         RootObjects.SNAPSHOT: snp.swhid(),
         RootObjects.REVISION: rev2.swhid(),
         RootObjects.RELEASE: rel2.swhid(),
         RootObjects.WEIRD_RELEASE: rel5.swhid(),
     }[root_object]
     cooker = GitBareCooker(
-        cooked_swhid, backend=backend, storage=swh_storage, graph=swh_graph,
+        cooked_swhid,
+        backend=backend,
+        storage=swh_storage,
+        graph=swh_graph,
     )
 
     if weird_branches:
         # git-fsck now rejects refs pointing to trees and blobs,
         # but some old git repos have them.
         cooker.use_fsck = False
 
     cooker.cook()
 
     # Get bundle
     bundle = backend.fetch("git_bare", cooked_swhid)
 
     # Extract bundle and make sure both revisions are in it
     with tempfile.TemporaryDirectory("swh-vault-test-bare") as tempdir:
         with tarfile.open(fileobj=io.BytesIO(bundle)) as tf:
             tf.extractall(tempdir)
 
         if root_object in (RootObjects.SNAPSHOT, RootObjects.REVISION):
             log_head = "master"
         elif root_object == RootObjects.RELEASE:
             log_head = "1.0.0"
         elif root_object == RootObjects.WEIRD_RELEASE:
             log_head = "release"
         else:
             assert False, root_object
 
         output = subprocess.check_output(
             [
                 "git",
                 "-C",
                 f"{tempdir}/{cooked_swhid}.git",
                 "log",
                 "--format=oneline",
                 "--decorate=",
                 log_head,
             ]
         )
 
         assert output.decode() == f"{rev2.id.hex()} msg2\n{rev1.id.hex()} msg1\n"
 
     # Make sure the graph was used instead of swh_storage.revision_log
     if root_object == RootObjects.SNAPSHOT:
         if up_to_date_graph:
             # The graph has everything, so the first call succeeds and returns
             # all objects transitively pointed by the snapshot
             swh_graph.visit_nodes.assert_has_calls(
-                [unittest.mock.call(str(snp.swhid()), edges="snp:*,rel:*,rev:rev"),]
+                [
+                    unittest.mock.call(str(snp.swhid()), edges="snp:*,rel:*,rev:rev"),
+                ]
             )
         else:
             # The graph does not have everything, so the first call returns nothing.
             # However, the second call (on the top rev) succeeds and returns
             # all objects but the rev and the rel
             swh_graph.visit_nodes.assert_has_calls(
                 [
                     unittest.mock.call(str(snp.swhid()), edges="snp:*,rel:*,rev:rev"),
                     unittest.mock.call(str(rev2.swhid()), edges="rev:rev"),
                 ]
             )
     elif root_object in (
         RootObjects.REVISION,
         RootObjects.RELEASE,
         RootObjects.WEIRD_RELEASE,
     ):
         swh_graph.visit_nodes.assert_has_calls(
             [unittest.mock.call(str(rev2.swhid()), edges="rev:rev")]
         )
     else:
         assert False, root_object
 
     if up_to_date_graph:
         swh_storage.revision_log.assert_not_called()
         swh_storage.revision_shortlog.assert_not_called()
     else:
         swh_storage.revision_log.assert_called()
 
 
 @pytest.mark.parametrize(
     "mismatch_on", ["content", "directory", "revision1", "revision2", "none"]
 )
 def test_checksum_mismatch(swh_storage, mismatch_on):
     date = TimestampWithTimezone.from_datetime(
         datetime.datetime(2021, 5, 7, 8, 43, 59, tzinfo=datetime.timezone.utc)
     )
     author = Person.from_fullname(b"Foo <foo@example.org>")
 
     wrong_hash = b"\x12\x34" * 10
 
     cnt1 = Content.from_data(b"Tr0ub4dor&3")
     if mismatch_on == "content":
         cnt1 = attr.evolve(cnt1, sha1_git=wrong_hash)
 
     dir1 = Directory(
         entries=(
             DirectoryEntry(
                 name=b"file1",
                 type="file",
                 perms=DentryPerms.content,
                 target=cnt1.sha1_git,
             ),
         )
     )
 
     if mismatch_on == "directory":
         dir1 = attr.evolve(dir1, id=wrong_hash)
 
     rev1 = Revision(
         message=b"msg1",
         date=date,
         committer_date=date,
         author=author,
         committer=author,
         directory=dir1.id,
         type=RevisionType.GIT,
         synthetic=True,
     )
 
     if mismatch_on == "revision1":
         rev1 = attr.evolve(rev1, id=wrong_hash)
 
     rev2 = Revision(
         message=b"msg2",
         date=date,
         committer_date=date,
         author=author,
         committer=author,
         directory=dir1.id,
         parents=(rev1.id,),
         type=RevisionType.GIT,
         synthetic=True,
     )
 
     if mismatch_on == "revision2":
         rev2 = attr.evolve(rev2, id=wrong_hash)
 
     cooked_swhid = rev2.swhid()
 
     swh_storage.content_add([cnt1])
     swh_storage.directory_add([dir1])
     swh_storage.revision_add([rev1, rev2])
 
     backend = InMemoryVaultBackend()
     cooker = GitBareCooker(
-        cooked_swhid, backend=backend, storage=swh_storage, graph=None,
+        cooked_swhid,
+        backend=backend,
+        storage=swh_storage,
+        graph=None,
     )
 
     cooker.cook()
 
     # Get bundle
     bundle = backend.fetch("git_bare", cooked_swhid)
 
     # Extract bundle and make sure both revisions are in it
     with tempfile.TemporaryDirectory("swh-vault-test-bare") as tempdir:
         with tarfile.open(fileobj=io.BytesIO(bundle)) as tf:
             tf.extractall(tempdir)
 
         if mismatch_on != "revision2":
             # git-log fails if the head revision is corrupted
             # TODO: we need to find a way to make this somewhat usable
             output = subprocess.check_output(
                 [
                     "git",
                     "-C",
                     f"{tempdir}/{cooked_swhid}.git",
                     "log",
                     "--format=oneline",
                     "--decorate=",
                 ]
             )
 
             assert output.decode() == f"{rev2.id.hex()} msg2\n{rev1.id.hex()} msg1\n"
 
 
 @pytest.mark.parametrize(
     "use_graph",
     [
         pytest.param(False, id="without-graph"),
         pytest.param(True, id="with-graph", marks=pytest.mark.graph),
     ],
 )
 def test_ignore_displayname(swh_storage, use_graph):
     """Tests the original authorship information is used instead of
     configured display names; otherwise objects would not match their hash,
     and git-fsck/git-clone would fail.
 
     This tests both with and without swh-graph, as both configurations use different
     code paths to fetch revisions.
     """
 
     date = TimestampWithTimezone.from_numeric_offset(Timestamp(1643882820, 0), 0, False)
     legacy_person = Person.from_fullname(b"old me <old@example.org>")
     current_person = Person.from_fullname(b"me <me@example.org>")
 
     content = Content.from_data(b"foo")
     swh_storage.content_add([content])
 
     directory = Directory(
         entries=(
             DirectoryEntry(
                 name=b"file1", type="file", perms=0o100644, target=content.sha1_git
             ),
         ),
     )
     swh_storage.directory_add([directory])
 
     revision = Revision(
         message=b"rev",
         author=legacy_person,
         date=date,
         committer=legacy_person,
         committer_date=date,
         parents=(),
         type=RevisionType.GIT,
         directory=directory.id,
         synthetic=True,
     )
     swh_storage.revision_add([revision])
 
     release = Release(
         name=b"v1.1.0",
         message=None,
         author=legacy_person,
         date=date,
         target=revision.id,
         target_type=ObjectType.REVISION,
         synthetic=True,
     )
     swh_storage.release_add([release])
 
     snapshot = Snapshot(
         branches={
             b"refs/tags/v1.1.0": SnapshotBranch(
                 target=release.id, target_type=TargetType.RELEASE
             ),
             b"HEAD": SnapshotBranch(
                 target=revision.id, target_type=TargetType.REVISION
             ),
         }
     )
     swh_storage.snapshot_add([snapshot])
 
     # Add all objects to graph
     if use_graph:
         from swh.graph.naive_client import NaiveClient as GraphClient
 
         nodes = [
             str(x.swhid()) for x in [content, directory, revision, release, snapshot]
         ]
         edges = [
             (str(x.swhid()), str(y.swhid()))
             for (x, y) in [
                 (directory, content),
                 (revision, directory),
                 (release, revision),
                 (snapshot, release),
                 (snapshot, revision),
             ]
         ]
         swh_graph = unittest.mock.Mock(wraps=GraphClient(nodes=nodes, edges=edges))
     else:
         swh_graph = None
 
     # Set a display name
     with swh_storage.db() as db:
         with db.transaction() as cur:
             cur.execute(
                 "UPDATE person set displayname = %s where fullname = %s",
                 (current_person.fullname, legacy_person.fullname),
             )
 
     # Check the display name did apply in the storage
     assert swh_storage.revision_get([revision.id])[0] == attr.evolve(
-        revision, author=current_person, committer=current_person,
+        revision,
+        author=current_person,
+        committer=current_person,
     )
 
     # Cook
     cooked_swhid = snapshot.swhid()
     backend = InMemoryVaultBackend()
     cooker = GitBareCooker(
-        cooked_swhid, backend=backend, storage=swh_storage, graph=swh_graph,
+        cooked_swhid,
+        backend=backend,
+        storage=swh_storage,
+        graph=swh_graph,
     )
 
     cooker.cook()
 
     # Get bundle
     bundle = backend.fetch("git_bare", cooked_swhid)
 
     # Extract bundle and make sure both revisions are in it
     with tempfile.TemporaryDirectory("swh-vault-test-bare") as tempdir:
         with tarfile.open(fileobj=io.BytesIO(bundle)) as tf:
             tf.extractall(tempdir)
 
         # If we are here, it means git-fsck succeeded when called by cooker.cook(),
         # so we already know the original person was used. Let's double-check.
 
         repo = dulwich.repo.Repo(f"{tempdir}/{cooked_swhid}.git")
 
         tag = repo[b"refs/tags/v1.1.0"]
         assert tag.tagger == legacy_person.fullname
 
         commit = repo[tag.object[1]]
         assert commit.author == legacy_person.fullname
diff --git a/swh/vault/tests/test_init_cookers.py b/swh/vault/tests/test_init_cookers.py
index 583a20d..f0e5790 100644
--- a/swh/vault/tests/test_init_cookers.py
+++ b/swh/vault/tests/test_init_cookers.py
@@ -1,112 +1,116 @@
 # Copyright (C) 2017-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 from typing import Dict
 
 import pytest
 import yaml
 
 from swh.vault.cookers import COOKER_TYPES, get_cooker
 from swh.vault.tests.test_backend import TEST_SWHID
 
 
 @pytest.fixture
 def swh_cooker_config():
     return {
         "vault": {
             "cls": "remote",
             "args": {
                 "url": "mock://vault-backend",
                 "storage": {"cls": "remote", "url": "mock://storage-url"},
             },
         }
     }
 
 
 def write_config_to_env(config: Dict, tmp_path, monkeypatch) -> str:
     """Write the configuration dict into a temporary file, then reference that path to
-       SWH_CONFIG_FILENAME environment variable.
+    SWH_CONFIG_FILENAME environment variable.
 
     """
     conf_path = os.path.join(str(tmp_path), "cooker.yml")
     with open(conf_path, "w") as f:
         f.write(yaml.dump(config))
     monkeypatch.setenv("SWH_CONFIG_FILENAME", conf_path)
     return conf_path
 
 
 def test_write_to_env(swh_cooker_config, tmp_path, monkeypatch):
     actual_path = write_config_to_env(swh_cooker_config, tmp_path, monkeypatch)
 
     assert os.path.exists(actual_path) is True
     assert os.environ["SWH_CONFIG_FILENAME"] == actual_path
 
     with open(actual_path, "r") as f:
         actual_config = yaml.safe_load(f.read())
     assert actual_config == swh_cooker_config
 
 
 @pytest.mark.parametrize(
     "config_ko,exception_class,exception_msg",
     [
         ({}, ValueError, "missing 'vault' configuration"),
         (
             {"vault": {"cls": "local"}},
             EnvironmentError,
             "This vault backend can only be a 'remote' configuration",
         ),
         (
             {"vault": {"cls": "remote", "args": {"missing-storage-key": ""}}},
             ValueError,
             "invalid configuration: missing 'storage' config entry",
         ),
     ],
 )
 def test_get_cooker_config_ko(
     config_ko, exception_class, exception_msg, monkeypatch, tmp_path
 ):
-    """Misconfigured cooker should fail the instantiation with exception message
-
-    """
+    """Misconfigured cooker should fail the instantiation with exception message"""
     write_config_to_env(config_ko, tmp_path, monkeypatch)
 
     with pytest.raises(exception_class, match=exception_msg):
         get_cooker("flat", TEST_SWHID)
 
 
 @pytest.mark.parametrize(
     "config_ok",
     [
         {
             "vault": {
                 "cls": "remote",
                 "args": {
                     "url": "mock://vault-backend",
                     "storage": {"cls": "remote", "url": "mock://storage-url"},
                 },
             }
         },
         {
-            "vault": {"cls": "remote", "args": {"url": "mock://vault-backend",},},
+            "vault": {
+                "cls": "remote",
+                "args": {
+                    "url": "mock://vault-backend",
+                },
+            },
             "storage": {"cls": "remote", "url": "mock://storage-url"},
         },
         {
-            "vault": {"cls": "remote", "url": "mock://vault-backend",},
+            "vault": {
+                "cls": "remote",
+                "url": "mock://vault-backend",
+            },
             "storage": {"cls": "remote", "url": "mock://storage-url"},
         },
     ],
 )
 def test_get_cooker_nominal(config_ok, tmp_path, monkeypatch):
-    """Correct configuration should allow the instantiation of the cookers
-
-    """
+    """Correct configuration should allow the instantiation of the cookers"""
     for cooker_type in COOKER_TYPES.keys():
         write_config_to_env(config_ok, tmp_path, monkeypatch)
 
         cooker = get_cooker(cooker_type, TEST_SWHID)
 
         assert cooker is not None
         assert isinstance(cooker, tuple(COOKER_TYPES[cooker_type]))
diff --git a/swh/vault/tests/test_server.py b/swh/vault/tests/test_server.py
index 28a20c6..1f67ccf 100644
--- a/swh/vault/tests/test_server.py
+++ b/swh/vault/tests/test_server.py
@@ -1,188 +1,187 @@
 # Copyright (C) 2020-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import copy
 import os
 from typing import Any, Dict
 
 import pytest
 import yaml
 
 from swh.core.api.serializers import json_dumps, msgpack_dumps, msgpack_loads
 from swh.vault.api.serializers import ENCODERS
 import swh.vault.api.server
 from swh.vault.api.server import app, check_config, get_vault, make_app_from_configfile
 from swh.vault.tests.test_backend import TEST_SWHID
 
 
 @pytest.fixture
 def swh_vault_server_config(swh_vault_config: Dict[str, Any]) -> Dict[str, Any]:
     """Returns a vault server configuration, with ``storage``, ``scheduler`` and
     ``cache`` set at the toplevel"""
     return {
-        "vault": {"cls": "local", "db": swh_vault_config["db"]},
-        "client_max_size": 1024 ** 3,
+        "vault": {"cls": "postgresql", "db": swh_vault_config["db"]},
+        "client_max_size": 1024**3,
         **{k: v for k, v in swh_vault_config.items() if k != "db"},
     }
 
 
 @pytest.fixture
 def swh_vault_server_config_file(swh_vault_server_config, monkeypatch, tmp_path):
     """Creates a vault server configuration file and sets it into SWH_CONFIG_FILENAME"""
     conf_path = os.path.join(str(tmp_path), "vault-server.yml")
     with open(conf_path, "w") as f:
         f.write(yaml.dump(swh_vault_server_config))
     monkeypatch.setenv("SWH_CONFIG_FILENAME", conf_path)
     return conf_path
 
 
 def test_make_app_from_file_missing():
     with pytest.raises(ValueError, match="Missing configuration path."):
         make_app_from_configfile()
 
 
 def test_make_app_from_file_does_not_exist(tmp_path):
     conf_path = os.path.join(str(tmp_path), "vault-server.yml")
     assert os.path.exists(conf_path) is False
 
     with pytest.raises(
         ValueError, match=f"Configuration path {conf_path} should exist."
     ):
         make_app_from_configfile(conf_path)
 
 
 def test_make_app_from_env_variable(swh_vault_server_config_file):
-    """Server initialization happens through env variable when no path is provided
-
-    """
+    """Server initialization happens through env variable when no path is provided"""
     app = make_app_from_configfile()
     assert app is not None
     assert get_vault() is not None
 
     # Cleanup app
     del app.config["vault"]
     swh.vault.api.server.vault = None
 
 
 def test_make_app_from_file(swh_vault_server_config, tmp_path):
-    """Server initialization happens through path if provided
-
-    """
+    """Server initialization happens through path if provided"""
     conf_path = os.path.join(str(tmp_path), "vault-server.yml")
     with open(conf_path, "w") as f:
         f.write(yaml.dump(swh_vault_server_config))
 
     app = make_app_from_configfile(conf_path)
     assert app is not None
     assert get_vault() is not None
 
     # Cleanup app
     del app.config["vault"]
     swh.vault.api.server.vault = None
 
 
 @pytest.fixture
 def vault_app(swh_vault_server_config_file):
     yield make_app_from_configfile()
 
     # Cleanup app
     del app.config["vault"]
     swh.vault.api.server.vault = None
 
 
 @pytest.fixture
 def cli(vault_app):
     cli = vault_app.test_client()
     return cli
 
 
 def test_client_index(cli):
     resp = cli.get("/")
     assert resp.status == "200 OK"
 
 
 def test_client_cook_notfound(cli):
     resp = cli.post(
         "/cook",
         data=json_dumps(
             {"bundle_type": "flat", "swhid": TEST_SWHID}, extra_encoders=ENCODERS
         ),
         headers=[("Content-Type", "application/json")],
     )
     assert resp.status == "400 BAD REQUEST"
     content = msgpack_loads(resp.data)
     assert content["type"] == "NotFoundExc"
     assert content["args"] == [f"flat {TEST_SWHID} was not found."]
 
 
 def test_client_progress_notfound(cli):
     resp = cli.post(
         "/progress",
         data=json_dumps(
             {"bundle_type": "flat", "swhid": TEST_SWHID}, extra_encoders=ENCODERS
         ),
         headers=[("Content-Type", "application/json")],
     )
     assert resp.status == "400 BAD REQUEST"
     content = msgpack_loads(resp.data)
     assert content["type"] == "NotFoundExc"
     assert content["args"] == [f"flat {TEST_SWHID} was not found."]
 
 
 def test_client_batch_cook_invalid_type(cli):
     resp = cli.post(
         "/batch_cook",
         data=msgpack_dumps({"batch": [("foobar", [])]}),
         headers={"Content-Type": "application/x-msgpack"},
     )
     assert resp.status == "400 BAD REQUEST"
     content = msgpack_loads(resp.data)
     assert content["type"] == "NotFoundExc"
     assert content["args"] == ["foobar is an unknown type."]
 
 
 def test_client_batch_progress_notfound(cli):
     resp = cli.post(
         "/batch_progress",
         data=msgpack_dumps({"batch_id": 1}),
         headers={"Content-Type": "application/x-msgpack"},
     )
     assert resp.status == "400 BAD REQUEST"
     content = msgpack_loads(resp.data)
     assert content["type"] == "NotFoundExc"
     assert content["args"] == ["Batch 1 does not exist."]
 
 
 def test_check_config_missing_vault_configuration() -> None:
     """Irrelevant configuration file path raises"""
     with pytest.raises(ValueError, match="missing 'vault' configuration"):
         check_config({})
 
 
 def test_check_config_not_local() -> None:
     """Wrong configuration raises"""
     expected_error = (
-        "The vault backend can only be started with a 'local' configuration"
+        "The vault backend can only be started with a 'postgresql' configuration"
     )
     with pytest.raises(EnvironmentError, match=expected_error):
         check_config({"vault": {"cls": "remote"}})
 
 
-def test_check_config_ok(swh_vault_server_config) -> None:
+@pytest.mark.parametrize("clazz", ["local", "postgresql"])
+def test_check_config_ok(swh_vault_server_config, clazz) -> None:
     """Check that the default config is accepted"""
+    config = swh_vault_server_config.copy()
+    config["vault"]["cls"] = clazz
     assert check_config(swh_vault_server_config) is not None
 
 
 @pytest.mark.parametrize("missing_key", ["storage", "cache", "scheduler"])
 def test_check_config_missing_key(missing_key, swh_vault_server_config) -> None:
     """Check that configs with a missing key get rejected"""
     config_ok = swh_vault_server_config
     config_ko = copy.deepcopy(config_ok)
     config_ko["vault"].pop(missing_key, None)
     config_ko.pop(missing_key, None)
 
     expected_error = f"invalid configuration: missing {missing_key} config entry"
     with pytest.raises(ValueError, match=expected_error):
         check_config(config_ko)
diff --git a/swh/vault/tests/test_to_disk.py b/swh/vault/tests/test_to_disk.py
index 7596dd7..71424e5 100644
--- a/swh/vault/tests/test_to_disk.py
+++ b/swh/vault/tests/test_to_disk.py
@@ -1,72 +1,184 @@
-# Copyright (C) 2020  The Software Heritage developers
+# Copyright (C) 2020-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import pytest
 
-from swh.model.model import Content, SkippedContent
-from swh.vault.to_disk import get_filtered_files_content
+from swh.model.from_disk import DentryPerms
+from swh.model.model import Content, Directory, DirectoryEntry, SkippedContent
+from swh.vault.to_disk import DirectoryBuilder, get_filtered_files_content
 
 
 def test_get_filtered_files_content(swh_storage):
     content = Content.from_data(b"foo bar")
     skipped_content = SkippedContent(
         sha1=None,
         sha1_git=b"c" * 20,
         sha256=None,
         blake2s256=None,
         length=42,
         status="absent",
         reason="for some reason",
     )
     swh_storage.content_add([content])
     swh_storage.skipped_content_add([skipped_content])
 
     files_data = [
         {
             "status": "visible",
             "sha1": content.sha1,
             "sha1_git": content.sha1_git,
             "target": content.sha1_git,
         },
-        {"status": "absent", "target": skipped_content.sha1_git,},
+        {
+            "status": "absent",
+            "target": skipped_content.sha1_git,
+        },
     ]
 
     res = list(get_filtered_files_content(swh_storage, files_data))
 
     assert res == [
         {
             "content": content.data,
             "status": "visible",
             "sha1": content.sha1,
             "sha1_git": content.sha1_git,
             "target": content.sha1_git,
         },
         {
             "content": (
                 b"This content has not been retrieved in the "
                 b"Software Heritage archive due to its size."
             ),
             "status": "absent",
             "target": skipped_content.sha1_git,
         },
     ]
 
 
 def test_get_filtered_files_content__unknown_status(swh_storage):
     content = Content.from_data(b"foo bar")
     swh_storage.content_add([content])
 
     files_data = [
         {
             "status": "visible",
             "sha1": content.sha1,
             "sha1_git": content.sha1_git,
             "target": content.sha1_git,
         },
-        {"status": None, "target": b"c" * 20,},
+        {
+            "status": "blah",
+            "target": b"c" * 20,
+        },
     ]
 
-    with pytest.raises(AssertionError, match="unexpected status None"):
+    with pytest.raises(AssertionError, match="unexpected status 'blah'"):
         list(get_filtered_files_content(swh_storage, files_data))
+
+
+def _fill_storage(swh_storage, exclude_cnt3=False, exclude_dir1=False):
+    cnt1 = Content.from_data(b"foo bar")
+    cnt2 = Content.from_data(b"bar baz")
+    cnt3 = Content.from_data(b"baz qux")
+    dir1 = Directory(
+        entries=(
+            DirectoryEntry(
+                name=b"content1",
+                type="file",
+                target=cnt1.sha1_git,
+                perms=DentryPerms.content,
+            ),
+            DirectoryEntry(
+                name=b"content2",
+                type="file",
+                target=cnt2.sha1_git,
+                perms=DentryPerms.content,
+            ),
+        )
+    )
+    dir2 = Directory(
+        entries=(
+            DirectoryEntry(
+                name=b"content3",
+                type="file",
+                target=cnt3.sha1_git,
+                perms=DentryPerms.content,
+            ),
+            DirectoryEntry(
+                name=b"subdirectory",
+                type="dir",
+                target=dir1.id,
+                perms=DentryPerms.directory,
+            ),
+        )
+    )
+    if exclude_cnt3:
+        swh_storage.content_add([cnt1, cnt2])
+    else:
+        swh_storage.content_add([cnt1, cnt2, cnt3])
+    if exclude_dir1:
+        swh_storage.directory_add([dir2])
+    else:
+        swh_storage.directory_add([dir1, dir2])
+
+    return dir2
+
+
+def test_directory_builder(swh_storage, tmp_path):
+    dir2 = _fill_storage(swh_storage)
+
+    root = tmp_path / "root"
+    builder = DirectoryBuilder(swh_storage, bytes(root), dir2.id)
+
+    assert not root.exists()
+
+    builder.build()
+
+    assert root.is_dir()
+    assert set(root.glob("**/*")) == {
+        root / "subdirectory",
+        root / "subdirectory" / "content1",
+        root / "subdirectory" / "content2",
+        root / "content3",
+    }
+
+    assert (root / "subdirectory" / "content1").open().read() == "foo bar"
+    assert (root / "subdirectory" / "content2").open().read() == "bar baz"
+    assert (root / "content3").open().read() == "baz qux"
+
+
+def test_directory_builder_missing_content(swh_storage, tmp_path):
+    dir2 = _fill_storage(swh_storage, exclude_cnt3=True)
+
+    root = tmp_path / "root"
+    builder = DirectoryBuilder(swh_storage, bytes(root), dir2.id)
+
+    assert not root.exists()
+
+    builder.build()
+
+    assert root.is_dir()
+
+    assert "This content is missing" in (root / "content3").open().read()
+
+
+def test_directory_builder_missing_directory(swh_storage, tmp_path):
+    dir2 = _fill_storage(swh_storage, exclude_dir1=True)
+
+    root = tmp_path / "root"
+    builder = DirectoryBuilder(swh_storage, bytes(root), dir2.id)
+
+    assert not root.exists()
+
+    builder.build()
+
+    assert root.is_dir()
+    assert set(root.glob("**/*")) == {
+        root / "subdirectory",
+        root / "content3",
+    }
+
+    assert (root / "content3").open().read() == "baz qux"
diff --git a/swh/vault/to_disk.py b/swh/vault/to_disk.py
index 797bb31..2721642 100644
--- a/swh/vault/to_disk.py
+++ b/swh/vault/to_disk.py
@@ -1,138 +1,146 @@
 # Copyright (C) 2016-2020 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import collections
 import functools
 import os
 from typing import Any, Dict, Iterator, List
 
 from swh.model import hashutil
 from swh.model.from_disk import DentryPerms, mode_to_perms
 from swh.storage.algos.dir_iterators import dir_iterator
 from swh.storage.interface import StorageInterface
 
+MISSING_MESSAGE = (
+    b"This content is missing from the Software Heritage archive "
+    b"(or from the mirror used while retrieving it)."
+)
+
 SKIPPED_MESSAGE = (
     b"This content has not been retrieved in the "
     b"Software Heritage archive due to its size."
 )
 
 HIDDEN_MESSAGE = b"This content is hidden."
 
 
 def get_filtered_files_content(
     storage: StorageInterface, files_data: List[Dict]
 ) -> Iterator[Dict[str, Any]]:
     """Retrieve the files specified by files_data and apply filters for skipped
     and missing contents.
 
     Args:
         storage: the storage from which to retrieve the objects
         files_data: list of file entries as returned by directory_ls()
 
     Yields:
         The entries given in files_data with a new 'content' key that points to
         the file content in bytes.
 
         The contents can be replaced by a specific message to indicate that
         they could not be retrieved (either due to privacy policy or because
         their sizes were too big for us to archive it).
 
     """
     for file_data in files_data:
         status = file_data["status"]
-        if status == "absent":
-            content = SKIPPED_MESSAGE
-        elif status == "hidden":
-            content = HIDDEN_MESSAGE
-        elif status == "visible":
+        if status == "visible":
             sha1 = file_data["sha1"]
             data = storage.content_get_data(sha1)
             if data is None:
                 content = SKIPPED_MESSAGE
             else:
                 content = data
+        elif status == "absent":
+            content = SKIPPED_MESSAGE
+        elif status == "hidden":
+            content = HIDDEN_MESSAGE
+        elif status is None:
+            content = MISSING_MESSAGE
         else:
             assert False, (
                 f"unexpected status {status!r} "
                 f"for content {hashutil.hash_to_hex(file_data['target'])}"
             )
 
         yield {"content": content, **file_data}
 
 
 def apply_chunked(func, input_list, chunk_size):
     """Apply func on input_list divided in chunks of size chunk_size"""
     for i in range(0, len(input_list), chunk_size):
         yield from func(input_list[i : i + chunk_size])
 
 
 class DirectoryBuilder:
-    """Reconstructs the on-disk representation of a directory in the storage.
-    """
+    """Reconstructs the on-disk representation of a directory in the storage."""
 
-    def __init__(self, storage, root, dir_id):
+    def __init__(self, storage: StorageInterface, root: bytes, dir_id: bytes):
         """Initialize the directory builder.
 
         Args:
             storage: the storage object
             root: the path where the directory should be reconstructed
             dir_id: the identifier of the directory in the storage
         """
         self.storage = storage
         self.root = root
         self.dir_id = dir_id
 
-    def build(self):
+    def build(self) -> None:
         """Perform the reconstruction of the directory in the given root."""
         # Retrieve data from the database.
         # Split into files, revisions and directory data.
         entries = collections.defaultdict(list)
         for entry in dir_iterator(self.storage, self.dir_id):
             entries[entry["type"]].append(entry)
 
         # Recreate the directory's subtree and then the files into it.
         self._create_tree(entries["dir"])
         self._create_files(entries["file"])
         self._create_revisions(entries["rev"])
 
-    def _create_tree(self, directories):
+    def _create_tree(self, directories: List[Dict[str, Any]]) -> None:
         """Create a directory tree from the given paths
 
         The tree is created from `root` and each given directory in
         `directories` will be created.
         """
         # Directories are sorted by depth so they are created in the
         # right order
         bsep = os.path.sep.encode()
         directories = sorted(directories, key=lambda x: len(x["path"].split(bsep)))
         for dir in directories:
             os.makedirs(os.path.join(self.root, dir["path"]))
 
-    def _create_files(self, files_data):
+    def _create_files(self, files_data: List[Dict[str, Any]]) -> None:
         """Create the files in the tree and fetch their contents."""
         f = functools.partial(get_filtered_files_content, self.storage)
         files_data = apply_chunked(f, files_data, 1000)
 
         for file_data in files_data:
             path = os.path.join(self.root, file_data["path"])
             self._create_file(path, file_data["content"], file_data["perms"])
 
-    def _create_revisions(self, revs_data):
+    def _create_revisions(self, revs_data: List[Dict[str, Any]]) -> None:
         """Create the revisions in the tree as broken symlinks to the target
         identifier."""
         for file_data in revs_data:
             path = os.path.join(self.root, file_data["path"])
-            target = hashutil.hash_to_hex(file_data["target"])
+            target = hashutil.hash_to_bytehex(file_data["target"])
             self._create_file(path, target, mode=DentryPerms.symlink)
 
-    def _create_file(self, path, content, mode=DentryPerms.content):
+    def _create_file(
+        self, path: bytes, content: bytes, mode: int = DentryPerms.content
+    ) -> None:
         """Create the given file and fill it with content."""
         perms = mode_to_perms(mode)
         if perms == DentryPerms.symlink:
             os.symlink(content, path)
         else:
             with open(path, "wb") as f:
                 f.write(content)
             os.chmod(path, perms.value)
diff --git a/tox.ini b/tox.ini
index 7c64f9e..65f7fe3 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,76 +1,77 @@
 [tox]
 envlist=black,flake8,mypy,py3
 
 [testenv]
 extras =
   testing
   graph
 deps =
   pytest-cov
 commands =
   pytest --cov={envsitepackagesdir}/swh/vault \
          {envsitepackagesdir}/swh/vault \
          --cov-branch {posargs}
 
 [testenv:black]
 skip_install = true
 deps =
-  black==19.10b0
+  black==22.3.0
 commands =
   {envpython} -m black --check swh
 
 [testenv:flake8]
 skip_install = true
 deps =
-  flake8
+  flake8==4.0.1
+  flake8-bugbear==22.3.23
 commands =
   {envpython} -m flake8
 
 [testenv:mypy]
 extras =
   testing
   graph
 deps =
-  mypy==0.920
+  mypy==0.942
 commands =
   mypy swh
 
 # build documentation outside swh-environment using the current
 # git HEAD of swh-docs, is executed on CI for each diff to prevent
 # breaking doc build
 [testenv:sphinx]
 whitelist_externals = make
 usedevelop = true
 extras =
   testing
   graph
 deps =
   # fetch and install swh-docs in develop mode
   -e git+https://forge.softwareheritage.org/source/swh-docs#egg=swh.docs
 
 setenv =
   SWH_PACKAGE_DOC_TOX_BUILD = 1
   # turn warnings into errors
   SPHINXOPTS = -W
 commands =
   make -I ../.tox/sphinx/src/swh-docs/swh/ -C docs
 
 
 # build documentation only inside swh-environment using local state
 # of swh-docs package
 [testenv:sphinx-dev]
 whitelist_externals = make
 usedevelop = true
 extras =
   testing
   graph
 deps =
   # install swh-docs in develop mode
   -e ../swh-docs
 
 setenv =
   SWH_PACKAGE_DOC_TOX_BUILD = 1
   # turn warnings into errors
   SPHINXOPTS = -W
 commands =
   make -I ../.tox/sphinx-dev/src/swh-docs/swh/ -C docs