diff --git a/.gitignore b/.gitignore
index a1f7d837f..dda3e7a86 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,11 +1,12 @@
 *.pyc
 *.sw?
 *~
 .coverage
 .eggs/
 __pycache__
 build/
 dist/
 *.egg-info
 version.txt
 .vscode/
+.hypothesis/
diff --git a/MANIFEST.in b/MANIFEST.in
index 2a92ebecc..808326604 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,8 +1,8 @@
 include Makefile
 include Makefile.local
-include README.db_testing
-include README.dev
+include README.md
 include requirements.txt
 include requirements-swh.txt
 include version.txt
 recursive-include sql *
+recursive-include swh/storage/sql *
diff --git a/PKG-INFO b/PKG-INFO
index 390d8a2b3..1926a6943 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,156 +1,153 @@
 Metadata-Version: 2.1
 Name: swh.storage
-Version: 0.0.107
+Version: 0.0.108
 Summary: Software Heritage storage manager
 Home-page: https://forge.softwareheritage.org/diffusion/DSTO/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-storage
 Description: swh-storage
         ===========
         
         Abstraction layer over the archive, allowing to access all stored source code
         artifacts as well as their metadata.
         
         See the
         [documentation](https://docs.softwareheritage.org/devel/swh-storage/index.html)
         for more details.
         
         Tests
         -----
         
         Python tests for this module include tests that cannot be run without a local
         Postgres database. You are not obliged to run those tests though:
         
         - `make test`:      will run all tests
         - `make test-nodb`: will run only tests that do not need a local DB
         - `make test-db`:   will run only tests that do need a local DB
         
         If you do want to run DB-related tests, you should ensure you have access zith
         sufficient privileges to a Postgresql database.
         
         ### Using your system database
         
-        You need to:
+        You need to ensure that your user is authorized to create and drop DBs, and in
+        particular DBs named "softwareheritage-test" and "softwareheritage-dev"
         
-        - ensure that your user is authorized to create and drop DBs, and in particular
-          DBs named "softwareheritage-test" and "softwareheritage-dev"
-        
-        - ensure that you have the storage testdata repository checked out in
-          ../swh-storage-testdata
+        Note: the testdata repository (swh-storage-testdata) is not required any more.
         
         ### Using pifpaf
         
         [pifpaf](https://github.com/jd/pifpaf) is a suite of fixtures and a
         command-line tool that allows to start and stop daemons for a quick throw-away
         usage.
         
         It can be used to run tests that need a Postgres database without any other
         configuration reauired nor the need to have special access to a running
         database:
         
         ```bash
         
         $ pifpaf run postgresql make test-db
         [snip]
         ----------------------------------------------------------------------
         Ran 124 tests in 56.203s
         
         OK
         ```
         
         Note that pifpaf is not yet available as a Debian package, so you may have to
         install it in a venv.
         
         
         Development
         -----------
         
         A test server could locally be running for tests.
         
         ### Sample configuration
         
         In either /etc/softwareheritage/storage/storage.yml,
         ~/.config/swh/storage.yml or ~/.swh/storage.yml:
         
         ```
         storage:
           cls: local
           args:
             db: "dbname=softwareheritage-dev user=<user>"
             objstorage:
               cls: pathslicing
               args:
                 root: /home/storage/swh-storage/
                 slicing: 0:2/2:4/4:6
         ```
         
         which means, this uses:
         
         - a local storage instance whose db connection is to
           softwareheritage-dev local instance
         
         - the objstorage uses a local objstorage instance whose:
         
           - root path is /home/storage/swh-storage
         
           - slicing scheme is 0:2/2:4/4:6. This means that the identifier of
             the content (sha1) which will be stored on disk at first level
             with the first 2 hex characters, the second level with the next 2
             hex characters and the third level with the next 2 hex
             characters. And finally the complete hash file holding the raw
             content. For example: 00062f8bd330715c4f819373653d97b3cd34394c
             will be stored at 00/06/2f/00062f8bd330715c4f819373653d97b3cd34394c
         
         Note that the 'root' path should exist on disk.
         
         
         ### Run server
         
         Command:
         ```
         python3 -m swh.storage.api.server ~/.config/swh/storage.yml
         ```
         
         This runs a local swh-storage api at 5002 port.
         
         
         ### And then what?
         
         In your upper layer (loader-git, loader-svn, etc...), you can define a
         remote storage with this snippet of yaml configuration.
         
         ```
         storage:
           cls: remote
           args:
             url: http://localhost:5002/
         ```
         
         You could directly define a local storage with the following snippet:
         
         ```
         storage:
           cls: local
           args:
             db: service=swh-dev
             objstorage:
               cls: pathslicing
               args:
                 root: /home/storage/swh-storage/
                 slicing: 0:2/2:4/4:6
         ```
         
 Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Description-Content-Type: text/markdown
 Provides-Extra: listener
 Provides-Extra: schemata
 Provides-Extra: testing
diff --git a/README.md b/README.md
index 53f5a62bd..0dbe6ac44 100644
--- a/README.md
+++ b/README.md
@@ -1,134 +1,131 @@
 swh-storage
 ===========
 
 Abstraction layer over the archive, allowing to access all stored source code
 artifacts as well as their metadata.
 
 See the
 [documentation](https://docs.softwareheritage.org/devel/swh-storage/index.html)
 for more details.
 
 Tests
 -----
 
 Python tests for this module include tests that cannot be run without a local
 Postgres database. You are not obliged to run those tests though:
 
 - `make test`:      will run all tests
 - `make test-nodb`: will run only tests that do not need a local DB
 - `make test-db`:   will run only tests that do need a local DB
 
 If you do want to run DB-related tests, you should ensure you have access zith
 sufficient privileges to a Postgresql database.
 
 ### Using your system database
 
-You need to:
+You need to ensure that your user is authorized to create and drop DBs, and in
+particular DBs named "softwareheritage-test" and "softwareheritage-dev"
 
-- ensure that your user is authorized to create and drop DBs, and in particular
-  DBs named "softwareheritage-test" and "softwareheritage-dev"
-
-- ensure that you have the storage testdata repository checked out in
-  ../swh-storage-testdata
+Note: the testdata repository (swh-storage-testdata) is not required any more.
 
 ### Using pifpaf
 
 [pifpaf](https://github.com/jd/pifpaf) is a suite of fixtures and a
 command-line tool that allows to start and stop daemons for a quick throw-away
 usage.
 
 It can be used to run tests that need a Postgres database without any other
 configuration reauired nor the need to have special access to a running
 database:
 
 ```bash
 
 $ pifpaf run postgresql make test-db
 [snip]
 ----------------------------------------------------------------------
 Ran 124 tests in 56.203s
 
 OK
 ```
 
 Note that pifpaf is not yet available as a Debian package, so you may have to
 install it in a venv.
 
 
 Development
 -----------
 
 A test server could locally be running for tests.
 
 ### Sample configuration
 
 In either /etc/softwareheritage/storage/storage.yml,
 ~/.config/swh/storage.yml or ~/.swh/storage.yml:
 
 ```
 storage:
   cls: local
   args:
     db: "dbname=softwareheritage-dev user=<user>"
     objstorage:
       cls: pathslicing
       args:
         root: /home/storage/swh-storage/
         slicing: 0:2/2:4/4:6
 ```
 
 which means, this uses:
 
 - a local storage instance whose db connection is to
   softwareheritage-dev local instance
 
 - the objstorage uses a local objstorage instance whose:
 
   - root path is /home/storage/swh-storage
 
   - slicing scheme is 0:2/2:4/4:6. This means that the identifier of
     the content (sha1) which will be stored on disk at first level
     with the first 2 hex characters, the second level with the next 2
     hex characters and the third level with the next 2 hex
     characters. And finally the complete hash file holding the raw
     content. For example: 00062f8bd330715c4f819373653d97b3cd34394c
     will be stored at 00/06/2f/00062f8bd330715c4f819373653d97b3cd34394c
 
 Note that the 'root' path should exist on disk.
 
 
 ### Run server
 
 Command:
 ```
 python3 -m swh.storage.api.server ~/.config/swh/storage.yml
 ```
 
 This runs a local swh-storage api at 5002 port.
 
 
 ### And then what?
 
 In your upper layer (loader-git, loader-svn, etc...), you can define a
 remote storage with this snippet of yaml configuration.
 
 ```
 storage:
   cls: remote
   args:
     url: http://localhost:5002/
 ```
 
 You could directly define a local storage with the following snippet:
 
 ```
 storage:
   cls: local
   args:
     db: service=swh-dev
     objstorage:
       cls: pathslicing
       args:
         root: /home/storage/swh-storage/
         slicing: 0:2/2:4/4:6
 ```
diff --git a/requirements-swh.txt b/requirements-swh.txt
index b4673f274..3e809cceb 100644
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,4 +1,4 @@
-swh.core >= 0.0.40
+swh.core >= 0.0.44
 swh.model >= 0.0.27
 swh.objstorage >= 0.0.17
 swh.scheduler >= 0.0.14
diff --git a/requirements-test.txt b/requirements-test.txt
index f3c7e8e6f..4af3b14a4 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1 +1,2 @@
+hypothesis >= 3.11.0
 nose
diff --git a/sql/Makefile b/sql/Makefile
index 5d52a958e..c7caa91da 100644
--- a/sql/Makefile
+++ b/sql/Makefile
@@ -1,61 +1,78 @@
 # Depends: postgresql-client, postgresql-autodoc
 
 DBNAME = softwareheritage-dev
 DOCDIR = autodoc
 
-SQL_INIT    = swh-init.sql
-SQL_ENUMS   = swh-enums.sql
-SQL_SCHEMA  = swh-schema.sql
-SQL_FUNC    = swh-func.sql
-SQL_INDEX   = swh-indexes.sql
-SQL_TRIGGER = swh-triggers.sql
+SQL_INIT    = 10-swh-init.sql
+SQL_ENUMS   = 20-swh-enums.sql
+SQL_SCHEMA  = 30-swh-schema.sql
+SQL_FUNC    = 40-swh-func.sql
+SQL_INDEX   = 60-swh-indexes.sql
+SQL_TRIGGER = 70-swh-triggers.sql
 SQLS = $(SQL_INIT) $(SQL_ENUMS) $(SQL_SCHEMA) $(SQL_FUNC) $(SQL_INDEX) $(SQL_TRIGGER)
+SQL_FILES = $(abspath $(addprefix $(CURDIR)/../swh/storage/sql/,$(SQLS)))
 
 PSQL_BIN = psql
 PSQL_FLAGS = --echo-all -X -v ON_ERROR_STOP=1
 PSQL = $(PSQL_BIN) $(PSQL_FLAGS)
 
+PIFPAF=$(findstring postgresql://,$(PIFPAF_URLS))
+
 all:
 
 createdb: createdb-stamp
-createdb-stamp: $(SQL_INIT)
+createdb-stamp: $(SQL_FILES)
+ifndef PIFPAF
 	-dropdb $(DBNAME)
+endif
 	createdb $(DBNAME)
+ifndef PIFPAF
 	touch $@
+else
+	rm -f $@
+endif
 
 filldb: filldb-stamp
 filldb-stamp: createdb-stamp
-	cat $(SQLS) | $(PSQL) $(DBNAME)
+	cat $(SQL_FILES) | $(PSQL) $(DBNAME)
+ifndef PIFPAF
 	touch $@
+else
+	rm -f $@
+endif
 
 dropdb:
 	-dropdb $(DBNAME)
 
 dumpdb: swh.dump
 swh.dump: filldb-stamp
 	pg_dump -Fc $(DBNAME) > $@
 
 $(DOCDIR):
 	test -d $(DOCDIR)/ || mkdir $(DOCDIR)
 
 doc: autodoc-stamp $(DOCDIR)/db-schema.pdf $(DOCDIR)/db-schema.svg
 autodoc-stamp: filldb-stamp $(DOCDIR)
 	postgresql_autodoc -d $(DBNAME) -f $(DOCDIR)/db-schema
 	cp -a $(DOCDIR)/db-schema.dot $(DOCDIR)/db-schema.dot.orig
+ifndef PIFPAF
 	touch $@
+else
+	rm -f $@
+endif
 
 $(DOCDIR)/db-schema.dot: clusters.dot autodoc-stamp $(DOCDIR)
 	bin/dot_add_content $(DOCDIR)/db-schema.dot.orig clusters.dot > $(DOCDIR)/db-schema.dot
 
 $(DOCDIR)/db-schema.pdf: $(DOCDIR)/db-schema.dot autodoc-stamp
 	dot -T pdf $< > $@
 $(DOCDIR)/db-schema.svg: $(DOCDIR)/db-schema.dot autodoc-stamp
 	dot -T svg $< > $@
 
 clean:
 	rm -rf *-stamp $(DOCDIR)/
 
 distclean: clean dropdb
 	rm -f swh.dump
 
 .PHONY: all initdb createdb dropdb doc clean
diff --git a/sql/bin/db-init b/sql/bin/db-init
deleted file mode 100755
index 7802eff61..000000000
--- a/sql/bin/db-init
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/bin/bash
-set -e
-
-# Must be run as (a) Postgres super user
-
-SQL_INIT=swh-init.sql
-SQL_ENUMS=swh-enums.sql
-SQL_SCHEMA=swh-schema.sql
-SQL_FUNC=swh-func.sql
-SQL_INDEX=swh-indexes.sql
-SQL_TRIGGER=swh-triggers.sql
-SQL_DATA=swh-data.sql
-ROOT_SQLS="$SQL_INIT"
-USER_SQLS="$SQL_ENUMS $SQL_SCHEMA $SQL_FUNC $SQL_INDEX $SQL_TRIGGER $SQL_DATA"
-SQLS="$ROOT_SQLS $USER_SQLS"
-
-DB_ENCODING="UTF-8"
-DB_LOCALE="C.UTF-8"
-DB_TEMPLATE="template0"
-
-cd "$( dirname $0 )/.."
-
-if ! [ -f "$SQL_INIT" ] ; then
-    echo "Cannot find $SQL_INIT. Abort."
-    exit 2
-fi
-
-if [ -z "$1" ] ; then
-    echo "Usage: bin/db-init DB_NAME [DB_PORT]"
-    echo "Example: bin/db-init softwareheritage-dev"
-    echo "Note: DB_NAME should not exist and will be created"
-    exit 2
-fi
-db_name="$1"
-port=${2:-5432}
-
-conn_flags="--port $port"
-
-echo "I: creating Postgres database ${db_name} ..."
-createdb $conn_flags \
-         --encoding "$DB_ENCODING" --locale "$DB_LOCALE" \
-         --template "$DB_TEMPLATE" "$db_name"
-
-sqls_flags=''
-for f in $SQLS ; do
-    sqls_flags="${sqls_flags} --file ${f}"
-done
-
-echo "I: initializing DB ${db_name} ..."
-psql $conn_flags ${sqls_flags} "$db_name"
-
-echo "I: all done."
diff --git a/swh.storage.egg-info/PKG-INFO b/swh.storage.egg-info/PKG-INFO
index 390d8a2b3..1926a6943 100644
--- a/swh.storage.egg-info/PKG-INFO
+++ b/swh.storage.egg-info/PKG-INFO
@@ -1,156 +1,153 @@
 Metadata-Version: 2.1
 Name: swh.storage
-Version: 0.0.107
+Version: 0.0.108
 Summary: Software Heritage storage manager
 Home-page: https://forge.softwareheritage.org/diffusion/DSTO/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-storage
 Description: swh-storage
         ===========
         
         Abstraction layer over the archive, allowing to access all stored source code
         artifacts as well as their metadata.
         
         See the
         [documentation](https://docs.softwareheritage.org/devel/swh-storage/index.html)
         for more details.
         
         Tests
         -----
         
         Python tests for this module include tests that cannot be run without a local
         Postgres database. You are not obliged to run those tests though:
         
         - `make test`:      will run all tests
         - `make test-nodb`: will run only tests that do not need a local DB
         - `make test-db`:   will run only tests that do need a local DB
         
         If you do want to run DB-related tests, you should ensure you have access zith
         sufficient privileges to a Postgresql database.
         
         ### Using your system database
         
-        You need to:
+        You need to ensure that your user is authorized to create and drop DBs, and in
+        particular DBs named "softwareheritage-test" and "softwareheritage-dev"
         
-        - ensure that your user is authorized to create and drop DBs, and in particular
-          DBs named "softwareheritage-test" and "softwareheritage-dev"
-        
-        - ensure that you have the storage testdata repository checked out in
-          ../swh-storage-testdata
+        Note: the testdata repository (swh-storage-testdata) is not required any more.
         
         ### Using pifpaf
         
         [pifpaf](https://github.com/jd/pifpaf) is a suite of fixtures and a
         command-line tool that allows to start and stop daemons for a quick throw-away
         usage.
         
         It can be used to run tests that need a Postgres database without any other
         configuration reauired nor the need to have special access to a running
         database:
         
         ```bash
         
         $ pifpaf run postgresql make test-db
         [snip]
         ----------------------------------------------------------------------
         Ran 124 tests in 56.203s
         
         OK
         ```
         
         Note that pifpaf is not yet available as a Debian package, so you may have to
         install it in a venv.
         
         
         Development
         -----------
         
         A test server could locally be running for tests.
         
         ### Sample configuration
         
         In either /etc/softwareheritage/storage/storage.yml,
         ~/.config/swh/storage.yml or ~/.swh/storage.yml:
         
         ```
         storage:
           cls: local
           args:
             db: "dbname=softwareheritage-dev user=<user>"
             objstorage:
               cls: pathslicing
               args:
                 root: /home/storage/swh-storage/
                 slicing: 0:2/2:4/4:6
         ```
         
         which means, this uses:
         
         - a local storage instance whose db connection is to
           softwareheritage-dev local instance
         
         - the objstorage uses a local objstorage instance whose:
         
           - root path is /home/storage/swh-storage
         
           - slicing scheme is 0:2/2:4/4:6. This means that the identifier of
             the content (sha1) which will be stored on disk at first level
             with the first 2 hex characters, the second level with the next 2
             hex characters and the third level with the next 2 hex
             characters. And finally the complete hash file holding the raw
             content. For example: 00062f8bd330715c4f819373653d97b3cd34394c
             will be stored at 00/06/2f/00062f8bd330715c4f819373653d97b3cd34394c
         
         Note that the 'root' path should exist on disk.
         
         
         ### Run server
         
         Command:
         ```
         python3 -m swh.storage.api.server ~/.config/swh/storage.yml
         ```
         
         This runs a local swh-storage api at 5002 port.
         
         
         ### And then what?
         
         In your upper layer (loader-git, loader-svn, etc...), you can define a
         remote storage with this snippet of yaml configuration.
         
         ```
         storage:
           cls: remote
           args:
             url: http://localhost:5002/
         ```
         
         You could directly define a local storage with the following snippet:
         
         ```
         storage:
           cls: local
           args:
             db: service=swh-dev
             objstorage:
               cls: pathslicing
               args:
                 root: /home/storage/swh-storage/
                 slicing: 0:2/2:4/4:6
         ```
         
 Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Description-Content-Type: text/markdown
 Provides-Extra: listener
 Provides-Extra: schemata
 Provides-Extra: testing
diff --git a/swh.storage.egg-info/SOURCES.txt b/swh.storage.egg-info/SOURCES.txt
index e00eb9914..0b23e3c35 100644
--- a/swh.storage.egg-info/SOURCES.txt
+++ b/swh.storage.egg-info/SOURCES.txt
@@ -1,214 +1,215 @@
 .gitignore
 AUTHORS
 LICENSE
 MANIFEST.in
 Makefile
 Makefile.local
 README.md
 requirements-swh.txt
 requirements-test.txt
 requirements.txt
 setup.py
 version.txt
 bin/swh-storage-add-dir
 debian/changelog
 debian/compat
 debian/control
 debian/copyright
 debian/rules
 debian/source/format
 docs/.gitignore
 docs/Makefile
 docs/Makefile.local
 docs/archive-copies.rst
 docs/conf.py
 docs/index.rst
 docs/sql-storage.rst
 docs/_static/.placeholder
 docs/_templates/.placeholder
 docs/images/.gitignore
 docs/images/Makefile
 docs/images/swh-archive-copies.dia
 docs/images/swh-archive-copies.pdf
 docs/images/swh-archive-copies.svg
 sql/.gitignore
 sql/Makefile
 sql/TODO
 sql/clusters.dot
-sql/swh-enums.sql
-sql/swh-func.sql
-sql/swh-indexes.sql
-sql/swh-init.sql
-sql/swh-schema.sql
-sql/swh-triggers.sql
-sql/bin/db-init
 sql/bin/db-upgrade
 sql/bin/dot_add_content
 sql/doc/json
 sql/doc/json/.gitignore
 sql/doc/json/Makefile
 sql/doc/json/entity.lister_metadata.schema.json
 sql/doc/json/entity.metadata.schema.json
 sql/doc/json/entity_history.lister_metadata.schema.json
 sql/doc/json/entity_history.metadata.schema.json
 sql/doc/json/fetch_history.result.schema.json
 sql/doc/json/list_history.result.schema.json
 sql/doc/json/listable_entity.list_params.schema.json
 sql/doc/json/origin_visit.metadata.json
 sql/doc/json/tool.tool_configuration.schema.json
 sql/json/.gitignore
 sql/json/Makefile
 sql/json/entity.lister_metadata.schema.json
 sql/json/entity.metadata.schema.json
 sql/json/entity_history.lister_metadata.schema.json
 sql/json/entity_history.metadata.schema.json
 sql/json/fetch_history.result.schema.json
 sql/json/list_history.result.schema.json
 sql/json/listable_entity.list_params.schema.json
 sql/json/origin_visit.metadata.json
 sql/json/tool.tool_configuration.schema.json
 sql/upgrades/015.sql
 sql/upgrades/016.sql
 sql/upgrades/017.sql
 sql/upgrades/018.sql
 sql/upgrades/019.sql
 sql/upgrades/020.sql
 sql/upgrades/021.sql
 sql/upgrades/022.sql
 sql/upgrades/023.sql
 sql/upgrades/024.sql
 sql/upgrades/025.sql
 sql/upgrades/026.sql
 sql/upgrades/027.sql
 sql/upgrades/028.sql
 sql/upgrades/029.sql
 sql/upgrades/030.sql
 sql/upgrades/032.sql
 sql/upgrades/033.sql
 sql/upgrades/034.sql
 sql/upgrades/035.sql
 sql/upgrades/036.sql
 sql/upgrades/037.sql
 sql/upgrades/038.sql
 sql/upgrades/039.sql
 sql/upgrades/040.sql
 sql/upgrades/041.sql
 sql/upgrades/042.sql
 sql/upgrades/043.sql
 sql/upgrades/044.sql
 sql/upgrades/045.sql
 sql/upgrades/046.sql
 sql/upgrades/047.sql
 sql/upgrades/048.sql
 sql/upgrades/049.sql
 sql/upgrades/050.sql
 sql/upgrades/051.sql
 sql/upgrades/052.sql
 sql/upgrades/053.sql
 sql/upgrades/054.sql
 sql/upgrades/055.sql
 sql/upgrades/056.sql
 sql/upgrades/057.sql
 sql/upgrades/058.sql
 sql/upgrades/059.sql
 sql/upgrades/060.sql
 sql/upgrades/061.sql
 sql/upgrades/062.sql
 sql/upgrades/063.sql
 sql/upgrades/064.sql
 sql/upgrades/065.sql
 sql/upgrades/066.sql
 sql/upgrades/067.sql
 sql/upgrades/068.sql
 sql/upgrades/069.sql
 sql/upgrades/070.sql
 sql/upgrades/071.sql
 sql/upgrades/072.sql
 sql/upgrades/073.sql
 sql/upgrades/074.sql
 sql/upgrades/075.sql
 sql/upgrades/076.sql
 sql/upgrades/077.sql
 sql/upgrades/078.sql
 sql/upgrades/079.sql
 sql/upgrades/080.sql
 sql/upgrades/081.sql
 sql/upgrades/082.sql
 sql/upgrades/083.sql
 sql/upgrades/084.sql
 sql/upgrades/085.sql
 sql/upgrades/086.sql
 sql/upgrades/087.sql
 sql/upgrades/088.sql
 sql/upgrades/089.sql
 sql/upgrades/090.sql
 sql/upgrades/091.sql
 sql/upgrades/092.sql
 sql/upgrades/093.sql
 sql/upgrades/094.sql
 sql/upgrades/095.sql
 sql/upgrades/096.sql
 sql/upgrades/097.sql
 sql/upgrades/098.sql
 sql/upgrades/099.sql
 sql/upgrades/100.sql
 sql/upgrades/101.sql
 sql/upgrades/102.sql
 sql/upgrades/103.sql
 sql/upgrades/104.sql
 sql/upgrades/105.sql
 sql/upgrades/106.sql
 sql/upgrades/107.sql
 sql/upgrades/108.sql
 sql/upgrades/109.sql
 sql/upgrades/110.sql
 sql/upgrades/111.sql
 sql/upgrades/112.sql
 sql/upgrades/113.sql
 sql/upgrades/114.sql
 sql/upgrades/115.sql
 sql/upgrades/116.sql
 sql/upgrades/117.sql
 sql/upgrades/118.sql
 sql/upgrades/119.sql
 sql/upgrades/120.sql
 sql/upgrades/121.sql
 sql/upgrades/122.sql
 sql/upgrades/123.sql
 sql/upgrades/124.sql
 sql/upgrades/125.sql
 sql/upgrades/126.sql
 sql/upgrades/127.sql
 swh/__init__.py
 swh.storage.egg-info/PKG-INFO
 swh.storage.egg-info/SOURCES.txt
 swh.storage.egg-info/dependency_links.txt
 swh.storage.egg-info/requires.txt
 swh.storage.egg-info/top_level.txt
 swh/storage/__init__.py
 swh/storage/common.py
 swh/storage/converters.py
 swh/storage/db.py
 swh/storage/db_utils.py
 swh/storage/exc.py
 swh/storage/listener.py
 swh/storage/storage.py
 swh/storage/algos/__init__.py
 swh/storage/algos/diff.py
 swh/storage/algos/dir_iterators.py
+swh/storage/algos/snapshot.py
 swh/storage/api/__init__.py
 swh/storage/api/client.py
 swh/storage/api/server.py
 swh/storage/schemata/__init__.py
 swh/storage/schemata/distribution.py
+swh/storage/sql/10-swh-init.sql
+swh/storage/sql/20-swh-enums.sql
+swh/storage/sql/30-swh-schema.sql
+swh/storage/sql/40-swh-func.sql
+swh/storage/sql/60-swh-indexes.sql
+swh/storage/sql/70-swh-triggers.sql
 swh/storage/tests/__init__.py
 swh/storage/tests/storage_testing.py
 swh/storage/tests/test_api_client.py
 swh/storage/tests/test_converters.py
 swh/storage/tests/test_db.py
 swh/storage/tests/test_storage.py
 swh/storage/tests/algos/__init__.py
 swh/storage/tests/algos/test_diff.py
 swh/storage/tests/algos/test_dir_iterator.py
+swh/storage/tests/algos/test_snapshot.py
 utils/dump_revisions.py
 utils/fix_revisions_from_dump.py
\ No newline at end of file
diff --git a/swh.storage.egg-info/requires.txt b/swh.storage.egg-info/requires.txt
index b3c903a36..18155fc81 100644
--- a/swh.storage.egg-info/requires.txt
+++ b/swh.storage.egg-info/requires.txt
@@ -1,19 +1,20 @@
 aiohttp
 click
 flask
 psycopg2
 python-dateutil
-swh.core>=0.0.40
+swh.core>=0.0.44
 swh.model>=0.0.27
 swh.objstorage>=0.0.17
 swh.scheduler>=0.0.14
 vcversioner
 
 [listener]
 kafka_python
 
 [schemata]
 SQLAlchemy
 
 [testing]
+hypothesis>=3.11.0
 nose
diff --git a/swh/storage/algos/snapshot.py b/swh/storage/algos/snapshot.py
new file mode 100644
index 000000000..76eca6183
--- /dev/null
+++ b/swh/storage/algos/snapshot.py
@@ -0,0 +1,31 @@
+# Copyright (C) 2018  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+def snapshot_get_all_branches(storage, snapshot_id):
+    """Get all the branches for a given snapshot
+
+    Args:
+        storage (swh.storage.storage.Storage): the storage instance
+        snapshot_id (bytes): the snapshot's identifier
+    Returns:
+        dict: a dict with two keys:
+            * **id**: identifier of the snapshot
+            * **branches**: a dict of branches contained in the snapshot
+              whose keys are the branches' names.
+    """
+    ret = storage.snapshot_get(snapshot_id)
+
+    if not ret:
+        return
+
+    next_branch = ret.pop('next_branch', None)
+    while next_branch:
+        data = storage.snapshot_get_branches(snapshot_id,
+                                             branches_from=next_branch)
+        ret['branches'].update(data['branches'])
+        next_branch = data.get('next_branch')
+
+    return ret
diff --git a/swh/storage/api/client.py b/swh/storage/api/client.py
index 2460ff54f..038a3a66f 100644
--- a/swh/storage/api/client.py
+++ b/swh/storage/api/client.py
@@ -1,233 +1,233 @@
 # Copyright (C) 2015-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 from swh.core.api import SWHRemoteAPI
 
 from ..exc import StorageAPIError
 
 
 class RemoteStorage(SWHRemoteAPI):
     """Proxy to a remote storage API"""
     def __init__(self, url, timeout=None):
         super().__init__(
             api_exception=StorageAPIError, url=url, timeout=timeout)
 
     def check_config(self, *, check_write):
         return self.post('check_config', {'check_write': check_write})
 
     def content_add(self, content):
         return self.post('content/add', {'content': content})
 
     def content_update(self, content, keys=[]):
         return self.post('content/update', {'content': content,
                                             'keys': keys})
 
     def content_missing(self, content, key_hash='sha1'):
         return self.post('content/missing', {'content': content,
                                              'key_hash': key_hash})
 
     def content_missing_per_sha1(self, contents):
         return self.post('content/missing/sha1', {'contents': contents})
 
     def content_get(self, content):
         return self.post('content/data', {'content': content})
 
     def content_get_metadata(self, content):
         return self.post('content/metadata', {'content': content})
 
     def content_find(self, content):
         return self.post('content/present', {'content': content})
 
     def directory_add(self, directories):
         return self.post('directory/add', {'directories': directories})
 
     def directory_missing(self, directories):
         return self.post('directory/missing', {'directories': directories})
 
     def directory_ls(self, directory, recursive=False):
         return self.get('directory/ls', {'directory': directory,
                                          'recursive': recursive})
 
     def revision_get(self, revisions):
         return self.post('revision', {'revisions': revisions})
 
     def revision_log(self, revisions, limit=None):
         return self.post('revision/log', {'revisions': revisions,
                                           'limit': limit})
 
     def revision_shortlog(self, revisions, limit=None):
         return self.post('revision/shortlog', {'revisions': revisions,
                                                'limit': limit})
 
     def revision_add(self, revisions):
         return self.post('revision/add', {'revisions': revisions})
 
     def revision_missing(self, revisions):
         return self.post('revision/missing', {'revisions': revisions})
 
     def release_add(self, releases):
         return self.post('release/add', {'releases': releases})
 
     def release_get(self, releases):
         return self.post('release', {'releases': releases})
 
     def release_missing(self, releases):
         return self.post('release/missing', {'releases': releases})
 
     def object_find_by_sha1_git(self, ids):
         return self.post('object/find_by_sha1_git', {'ids': ids})
 
     def snapshot_add(self, origin, visit, snapshot):
         return self.post('snapshot/add', {
             'origin': origin, 'visit': visit, 'snapshot': snapshot,
         })
 
     def snapshot_get(self, snapshot_id):
         return self.post('snapshot', {
             'snapshot_id': snapshot_id
         })
 
     def snapshot_get_by_origin_visit(self, origin, visit):
         return self.post('snapshot/by_origin_visit', {
             'origin': origin,
             'visit': visit
         })
 
     def snapshot_get_latest(self, origin, allowed_statuses=None):
         return self.post('snapshot/latest', {
             'origin': origin,
             'allowed_statuses': allowed_statuses
         })
 
     def snapshot_count_branches(self, snapshot_id):
         return self.post('snapshot/count_branches', {
             'snapshot_id': snapshot_id
         })
 
     def snapshot_get_branches(self, snapshot_id, branches_from=b'',
-                              branches_count=None, target_types=None):
+                              branches_count=1000, target_types=None):
         return self.post('snapshot/get_branches', {
             'snapshot_id': snapshot_id,
             'branches_from': branches_from,
             'branches_count': branches_count,
             'target_types': target_types
         })
 
     def origin_get(self, origin):
         return self.post('origin/get', {'origin': origin})
 
     def origin_search(self, url_pattern, offset=0, limit=50, regexp=False,
                       with_visit=False):
         return self.post('origin/search', {'url_pattern': url_pattern,
                                            'offset': offset,
                                            'limit': limit,
                                            'regexp': regexp,
                                            'with_visit': with_visit})
 
     def origin_add(self, origins):
         return self.post('origin/add_multi', {'origins': origins})
 
     def origin_add_one(self, origin):
         return self.post('origin/add', {'origin': origin})
 
     def origin_visit_add(self, origin, ts):
         return self.post('origin/visit/add', {'origin': origin, 'ts': ts})
 
     def origin_visit_update(self, origin, visit_id, status, metadata=None):
         return self.post('origin/visit/update', {'origin': origin,
                                                  'visit_id': visit_id,
                                                  'status': status,
                                                  'metadata': metadata})
 
     def origin_visit_get(self, origin, last_visit=None, limit=None):
         return self.post('origin/visit/get', {
             'origin': origin, 'last_visit': last_visit, 'limit': limit})
 
     def origin_visit_get_by(self, origin, visit):
         return self.post('origin/visit/getby', {'origin': origin,
                                                 'visit': visit})
 
     def person_get(self, person):
         return self.post('person', {'person': person})
 
     def fetch_history_start(self, origin_id):
         return self.post('fetch_history/start', {'origin_id': origin_id})
 
     def fetch_history_end(self, fetch_history_id, data):
         return self.post('fetch_history/end',
                          {'fetch_history_id': fetch_history_id,
                           'data': data})
 
     def fetch_history_get(self, fetch_history_id):
         return self.get('fetch_history', {'id': fetch_history_id})
 
     def entity_add(self, entities):
         return self.post('entity/add', {'entities': entities})
 
     def entity_get(self, uuid):
         return self.post('entity/get', {'uuid': uuid})
 
     def entity_get_one(self, uuid):
         return self.get('entity', {'uuid': uuid})
 
     def entity_get_from_lister_metadata(self, entities):
         return self.post('entity/from_lister_metadata', {'entities': entities})
 
     def stat_counters(self):
         return self.get('stat/counters')
 
     def directory_entry_get_by_path(self, directory, paths):
         return self.post('directory/path', dict(directory=directory,
                                                 paths=paths))
 
     def tool_add(self, tools):
         return self.post('tool/add', {'tools': tools})
 
     def tool_get(self, tool):
         return self.post('tool/data', {'tool': tool})
 
     def origin_metadata_add(self, origin_id, ts, provider, tool, metadata):
         return self.post('origin/metadata/add', {'origin_id': origin_id,
                                                  'ts': ts,
                                                  'provider': provider,
                                                  'tool': tool,
                                                  'metadata': metadata})
 
     def origin_metadata_get_by(self, origin_id, provider_type=None):
         return self.post('origin/metadata/get', {
             'origin_id': origin_id,
             'provider_type': provider_type
         })
 
     def metadata_provider_add(self, provider_name, provider_type, provider_url,
                               metadata):
         return self.post('provider/add', {'provider_name': provider_name,
                                           'provider_type': provider_type,
                                           'provider_url': provider_url,
                                           'metadata': metadata})
 
     def metadata_provider_get(self, provider_id):
         return self.post('provider/get', {'provider_id': provider_id})
 
     def metadata_provider_get_by(self, provider):
         return self.post('provider/getby', {'provider': provider})
 
     def diff_directories(self, from_dir, to_dir, track_renaming=False):
         return self.post('algos/diff_directories',
                          {'from_dir': from_dir,
                           'to_dir': to_dir,
                           'track_renaming': track_renaming})
 
     def diff_revisions(self, from_rev, to_rev, track_renaming=False):
         return self.post('algos/diff_revisions',
                          {'from_rev': from_rev,
                           'to_rev': to_rev,
                           'track_renaming': track_renaming})
 
     def diff_revision(self, revision, track_renaming=False):
         return self.post('algos/diff_revision',
                          {'revision': revision,
                           'track_renaming': track_renaming})
diff --git a/sql/swh-init.sql b/swh/storage/sql/10-swh-init.sql
similarity index 100%
rename from sql/swh-init.sql
rename to swh/storage/sql/10-swh-init.sql
diff --git a/sql/swh-enums.sql b/swh/storage/sql/20-swh-enums.sql
similarity index 100%
rename from sql/swh-enums.sql
rename to swh/storage/sql/20-swh-enums.sql
diff --git a/sql/swh-schema.sql b/swh/storage/sql/30-swh-schema.sql
similarity index 100%
rename from sql/swh-schema.sql
rename to swh/storage/sql/30-swh-schema.sql
diff --git a/sql/swh-func.sql b/swh/storage/sql/40-swh-func.sql
similarity index 100%
rename from sql/swh-func.sql
rename to swh/storage/sql/40-swh-func.sql
diff --git a/sql/swh-indexes.sql b/swh/storage/sql/60-swh-indexes.sql
similarity index 100%
rename from sql/swh-indexes.sql
rename to swh/storage/sql/60-swh-indexes.sql
diff --git a/sql/swh-triggers.sql b/swh/storage/sql/70-swh-triggers.sql
similarity index 100%
rename from sql/swh-triggers.sql
rename to swh/storage/sql/70-swh-triggers.sql
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
index 7726895cc..6d7942f44 100644
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -1,1373 +1,1357 @@
 # Copyright (C) 2015-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 from collections import defaultdict
 from concurrent.futures import ThreadPoolExecutor
 import datetime
 import itertools
 import json
 
 import dateutil.parser
 import psycopg2
 import psycopg2.pool
 
 from . import converters
 from .common import db_transaction_generator, db_transaction
 from .db import Db
 from .exc import StorageDBError
 from .algos import diff
 
-from swh.model.hashutil import ALGORITHMS
+from swh.model.hashutil import ALGORITHMS, hash_to_bytes
 from swh.objstorage import get_objstorage
 from swh.objstorage.exc import ObjNotFoundError
 
 # Max block size of contents to return
 BULK_BLOCK_CONTENT_LEN_MAX = 10000
 
+EMPTY_SNAPSHOT_ID = hash_to_bytes('1a8893e6a86f444e8be8e7bda6cb34fb1735a00e')
+"""Identifier for the empty snapshot"""
+
 
 class Storage():
     """SWH storage proxy, encompassing DB and object storage
 
     """
 
     def __init__(self, db, objstorage, min_pool_conns=1, max_pool_conns=10):
         """
         Args:
             db_conn: either a libpq connection string, or a psycopg2 connection
             obj_root: path to the root of the object storage
 
         """
         try:
             if isinstance(db, psycopg2.extensions.connection):
                 self._pool = None
                 self._db = Db(db)
             else:
                 self._pool = psycopg2.pool.ThreadedConnectionPool(
                     min_pool_conns, max_pool_conns, db
                 )
                 self._db = None
         except psycopg2.OperationalError as e:
             raise StorageDBError(e)
 
         self.objstorage = get_objstorage(**objstorage)
 
     def get_db(self):
         if self._db:
             return self._db
         else:
             return Db.from_pool(self._pool)
 
     def check_config(self, *, check_write):
         """Check that the storage is configured and ready to go."""
 
         if not self.objstorage.check_config(check_write=check_write):
             return False
 
         # Check permissions on one of the tables
         with self.get_db().transaction() as cur:
             if check_write:
                 check = 'INSERT'
             else:
                 check = 'SELECT'
 
             cur.execute(
                 "select has_table_privilege(current_user, 'content', %s)",
                 (check,)
             )
             return cur.fetchone()[0]
 
         return True
 
     def content_add(self, content):
         """Add content blobs to the storage
 
         Note: in case of DB errors, objects might have already been added to
         the object storage and will not be removed. Since addition to the
         object storage is idempotent, that should not be a problem.
 
         Args:
             content (iterable): iterable of dictionaries representing
                 individual pieces of content to add. Each dictionary has the
                 following keys:
 
                 - data (bytes): the actual content
                 - length (int): content length (default: -1)
                 - one key for each checksum algorithm in
                   :data:`swh.model.hashutil.ALGORITHMS`, mapped to the
                   corresponding checksum
                 - status (str): one of visible, hidden, absent
                 - reason (str): if status = absent, the reason why
                 - origin (int): if status = absent, the origin we saw the
                   content in
 
         """
         db = self.get_db()
 
         def _unique_key(hash, keys=db.content_hash_keys):
             """Given a hash (tuple or dict), return a unique key from the
                aggregation of keys.
 
             """
             if isinstance(hash, tuple):
                 return hash
             return tuple([hash[k] for k in keys])
 
         content_by_status = defaultdict(list)
         for d in content:
             if 'status' not in d:
                 d['status'] = 'visible'
             if 'length' not in d:
                 d['length'] = -1
             content_by_status[d['status']].append(d)
 
         content_with_data = content_by_status['visible']
         content_without_data = content_by_status['absent']
 
         missing_content = set(self.content_missing(content_with_data))
         missing_skipped = set(_unique_key(hashes) for hashes
                               in self.skipped_content_missing(
                                   content_without_data))
 
         def add_to_objstorage():
             data = {
                 cont['sha1']: cont['data']
                 for cont in content_with_data
                 if cont['sha1'] in missing_content
             }
             self.objstorage.add_batch(data)
 
         with db.transaction() as cur:
             with ThreadPoolExecutor(max_workers=1) as executor:
                 added_to_objstorage = executor.submit(add_to_objstorage)
                 if missing_content:
                     # create temporary table for metadata injection
                     db.mktemp('content', cur)
 
                     content_filtered = (cont for cont in content_with_data
                                         if cont['sha1'] in missing_content)
 
                     db.copy_to(content_filtered, 'tmp_content',
                                db.content_get_metadata_keys, cur)
 
                     # move metadata in place
                     db.content_add_from_temp(cur)
 
                 if missing_skipped:
                     missing_filtered = (
                         cont for cont in content_without_data
                         if _unique_key(cont) in missing_skipped
                     )
 
                     db.mktemp('skipped_content', cur)
                     db.copy_to(missing_filtered, 'tmp_skipped_content',
                                db.skipped_content_keys, cur)
 
                     # move metadata in place
                     db.skipped_content_add_from_temp(cur)
 
                 # Wait for objstorage addition before returning from the
                 # transaction, bubbling up any exception
                 added_to_objstorage.result()
 
     @db_transaction()
     def content_update(self, content, keys=[], db=None, cur=None):
         """Update content blobs to the storage. Does nothing for unknown
         contents or skipped ones.
 
         Args:
             content (iterable): iterable of dictionaries representing
                 individual pieces of content to update. Each dictionary has the
                 following keys:
 
                 - data (bytes): the actual content
                 - length (int): content length (default: -1)
                 - one key for each checksum algorithm in
                   :data:`swh.model.hashutil.ALGORITHMS`, mapped to the
                   corresponding checksum
                 - status (str): one of visible, hidden, absent
 
             keys (list): List of keys (str) whose values needs an update, e.g.,
                 new hash column
 
         """
         # TODO: Add a check on input keys. How to properly implement
         # this? We don't know yet the new columns.
 
         db.mktemp('content', cur)
         select_keys = list(set(db.content_get_metadata_keys).union(set(keys)))
         db.copy_to(content, 'tmp_content', select_keys, cur)
         db.content_update_from_temp(keys_to_update=keys,
                                     cur=cur)
 
     def content_get(self, content):
         """Retrieve in bulk contents and their data.
 
         Args:
             content: iterables of sha1
 
         Yields:
             dict: Generates streams of contents as dict with their raw data:
 
                 - sha1: sha1's content
                 - data: bytes data of the content
 
         Raises:
             ValueError in case of too much contents are required.
             cf. BULK_BLOCK_CONTENT_LEN_MAX
 
         """
         # FIXME: Improve on server module to slice the result
         if len(content) > BULK_BLOCK_CONTENT_LEN_MAX:
             raise ValueError(
                 "Send at maximum %s contents." % BULK_BLOCK_CONTENT_LEN_MAX)
 
         for obj_id in content:
             try:
                 data = self.objstorage.get(obj_id)
             except ObjNotFoundError:
                 yield None
                 continue
 
             yield {'sha1': obj_id, 'data': data}
 
     @db_transaction_generator(statement_timeout=500)
     def content_get_metadata(self, content, db=None, cur=None):
         """Retrieve content metadata in bulk
 
         Args:
             content: iterable of content identifiers (sha1)
 
         Returns:
             an iterable with content metadata corresponding to the given ids
         """
         for metadata in db.content_get_metadata_from_sha1s(content, cur):
             yield dict(zip(db.content_get_metadata_keys, metadata))
 
     @db_transaction_generator()
     def content_missing(self, content, key_hash='sha1', db=None, cur=None):
         """List content missing from storage
 
         Args:
             content ([dict]): iterable of dictionaries containing one
                               key for each checksum algorithm in
                               :data:`swh.model.hashutil.ALGORITHMS`,
                               mapped to the corresponding checksum,
                               and a length key mapped to the content
                               length.
 
             key_hash (str): name of the column to use as hash id
                             result (default: 'sha1')
 
         Returns:
             iterable ([bytes]): missing content ids (as per the
             key_hash column)
 
         Raises:
             TODO: an exception when we get a hash collision.
 
         """
         keys = db.content_hash_keys
 
         if key_hash not in keys:
             raise ValueError("key_hash should be one of %s" % keys)
 
         key_hash_idx = keys.index(key_hash)
 
         if not content:
             return
 
         for obj in db.content_missing_from_list(content, cur):
             yield obj[key_hash_idx]
 
     @db_transaction_generator()
     def content_missing_per_sha1(self, contents, db=None, cur=None):
         """List content missing from storage based only on sha1.
 
         Args:
             contents: Iterable of sha1 to check for absence.
 
         Returns:
             iterable: missing ids
 
         Raises:
             TODO: an exception when we get a hash collision.
 
         """
         for obj in db.content_missing_per_sha1(contents, cur):
             yield obj[0]
 
     @db_transaction_generator()
     def skipped_content_missing(self, content, db=None, cur=None):
         """List skipped_content missing from storage
 
         Args:
             content: iterable of dictionaries containing the data for each
                 checksum algorithm.
 
         Returns:
             iterable: missing signatures
 
         """
         keys = db.content_hash_keys
 
         db.mktemp('skipped_content', cur)
         db.copy_to(content, 'tmp_skipped_content',
                    keys + ['length', 'reason'], cur)
 
         yield from db.skipped_content_missing_from_temp(cur)
 
     @db_transaction()
     def content_find(self, content, db=None, cur=None):
         """Find a content hash in db.
 
         Args:
             content: a dictionary representing one content hash, mapping
                 checksum algorithm names (see swh.model.hashutil.ALGORITHMS) to
                 checksum values
 
         Returns:
             a triplet (sha1, sha1_git, sha256) if the content exist
             or None otherwise.
 
         Raises:
             ValueError: in case the key of the dictionary is not sha1, sha1_git
                 nor sha256.
 
         """
         if not set(content).intersection(ALGORITHMS):
             raise ValueError('content keys must contain at least one of: '
                              'sha1, sha1_git, sha256, blake2s256')
 
         c = db.content_find(sha1=content.get('sha1'),
                             sha1_git=content.get('sha1_git'),
                             sha256=content.get('sha256'),
                             blake2s256=content.get('blake2s256'),
                             cur=cur)
         if c:
             return dict(zip(db.content_find_cols, c))
         return None
 
     def directory_add(self, directories):
         """Add directories to the storage
 
         Args:
             directories (iterable): iterable of dictionaries representing the
                 individual directories to add. Each dict has the following
                 keys:
 
                 - id (sha1_git): the id of the directory to add
                 - entries (list): list of dicts for each entry in the
                       directory.  Each dict has the following keys:
 
                       - name (bytes)
                       - type (one of 'file', 'dir', 'rev'): type of the
                         directory entry (file, directory, revision)
                       - target (sha1_git): id of the object pointed at by the
                         directory entry
                       - perms (int): entry permissions
         """
         dirs = set()
         dir_entries = {
             'file': defaultdict(list),
             'dir': defaultdict(list),
             'rev': defaultdict(list),
         }
 
         for cur_dir in directories:
             dir_id = cur_dir['id']
             dirs.add(dir_id)
             for src_entry in cur_dir['entries']:
                 entry = src_entry.copy()
                 entry['dir_id'] = dir_id
                 dir_entries[entry['type']][dir_id].append(entry)
 
         dirs_missing = set(self.directory_missing(dirs))
         if not dirs_missing:
             return
 
         db = self.get_db()
         with db.transaction() as cur:
             # Copy directory ids
             dirs_missing_dict = ({'id': dir} for dir in dirs_missing)
             db.mktemp('directory', cur)
             db.copy_to(dirs_missing_dict, 'tmp_directory', ['id'], cur)
 
             # Copy entries
             for entry_type, entry_list in dir_entries.items():
                 entries = itertools.chain.from_iterable(
                     entries_for_dir
                     for dir_id, entries_for_dir
                     in entry_list.items()
                     if dir_id in dirs_missing)
 
                 db.mktemp_dir_entry(entry_type)
 
                 db.copy_to(
                     entries,
                     'tmp_directory_entry_%s' % entry_type,
                     ['target', 'name', 'perms', 'dir_id'],
                     cur,
                 )
 
             # Do the final copy
             db.directory_add_from_temp(cur)
 
     @db_transaction_generator()
     def directory_missing(self, directories, db=None, cur=None):
         """List directories missing from storage
 
         Args:
             directories (iterable): an iterable of directory ids
 
         Yields:
             missing directory ids
 
         """
         for obj in db.directory_missing_from_list(directories, cur):
             yield obj[0]
 
     @db_transaction_generator(statement_timeout=20000)
     def directory_ls(self, directory, recursive=False, db=None, cur=None):
         """Get entries for one directory.
 
         Args:
             - directory: the directory to list entries from.
             - recursive: if flag on, this list recursively from this directory.
 
         Returns:
             List of entries for such directory.
 
         """
         if recursive:
             res_gen = db.directory_walk(directory, cur=cur)
         else:
             res_gen = db.directory_walk_one(directory, cur=cur)
 
         for line in res_gen:
             yield dict(zip(db.directory_ls_cols, line))
 
     @db_transaction(statement_timeout=2000)
     def directory_entry_get_by_path(self, directory, paths, db=None, cur=None):
         """Get the directory entry (either file or dir) from directory with path.
 
         Args:
             - directory: sha1 of the top level directory
             - paths: path to lookup from the top level directory. From left
               (top) to right (bottom).
 
         Returns:
             The corresponding directory entry if found, None otherwise.
 
         """
         res = db.directory_entry_get_by_path(directory, paths, cur)
         if res:
             return dict(zip(db.directory_ls_cols, res))
 
     def revision_add(self, revisions):
         """Add revisions to the storage
 
         Args:
             revisions (iterable): iterable of dictionaries representing the
                 individual revisions to add. Each dict has the following keys:
 
                 - id (sha1_git): id of the revision to add
                 - date (datetime.DateTime): date the revision was written
                 - date_offset (int): offset from UTC in minutes the revision
                   was written
                 - date_neg_utc_offset (boolean): whether a null date_offset
                   represents a negative UTC offset
                 - committer_date (datetime.DateTime): date the revision got
                   added to the origin
                 - committer_date_offset (int): offset from UTC in minutes the
                   revision was added to the origin
                 - committer_date_neg_utc_offset (boolean): whether a null
                   committer_date_offset represents a negative UTC offset
                 - type (one of 'git', 'tar'): type of the revision added
                 - directory (sha1_git): the directory the revision points at
                 - message (bytes): the message associated with the revision
                 - author_name (bytes): the name of the revision author
                 - author_email (bytes): the email of the revision author
                 - committer_name (bytes): the name of the revision committer
                 - committer_email (bytes): the email of the revision committer
                 - metadata (jsonb): extra information as dictionary
                 - synthetic (bool): revision's nature (tarball, directory
                   creates synthetic revision)
                 - parents (list of sha1_git): the parents of this revision
 
         """
         db = self.get_db()
 
         revisions_missing = set(self.revision_missing(
             set(revision['id'] for revision in revisions)))
 
         if not revisions_missing:
             return
 
         with db.transaction() as cur:
             db.mktemp_revision(cur)
 
             revisions_filtered = (
                 converters.revision_to_db(revision) for revision in revisions
                 if revision['id'] in revisions_missing)
 
             parents_filtered = []
 
             db.copy_to(
                 revisions_filtered, 'tmp_revision', db.revision_add_cols,
                 cur,
                 lambda rev: parents_filtered.extend(rev['parents']))
 
             db.revision_add_from_temp(cur)
 
             db.copy_to(parents_filtered, 'revision_history',
                        ['id', 'parent_id', 'parent_rank'], cur)
 
     @db_transaction_generator()
     def revision_missing(self, revisions, db=None, cur=None):
         """List revisions missing from storage
 
         Args:
             revisions (iterable): revision ids
 
         Yields:
             missing revision ids
 
         """
         if not revisions:
             return
 
         for obj in db.revision_missing_from_list(revisions, cur):
             yield obj[0]
 
     @db_transaction_generator(statement_timeout=500)
     def revision_get(self, revisions, db=None, cur=None):
         """Get all revisions from storage
 
         Args:
             revisions: an iterable of revision ids
 
         Returns:
             iterable: an iterable of revisions as dictionaries (or None if the
                 revision doesn't exist)
 
         """
         for line in db.revision_get_from_list(revisions, cur):
             data = converters.db_to_revision(
                 dict(zip(db.revision_get_cols, line))
             )
             if not data['type']:
                 yield None
                 continue
             yield data
 
     @db_transaction_generator(statement_timeout=2000)
     def revision_log(self, revisions, limit=None, db=None, cur=None):
         """Fetch revision entry from the given root revisions.
 
         Args:
             revisions: array of root revision to lookup
             limit: limitation on the output result. Default to None.
 
         Yields:
             List of revision log from such revisions root.
 
         """
         for line in db.revision_log(revisions, limit, cur):
             data = converters.db_to_revision(
                 dict(zip(db.revision_get_cols, line))
             )
             if not data['type']:
                 yield None
                 continue
             yield data
 
     @db_transaction_generator(statement_timeout=2000)
     def revision_shortlog(self, revisions, limit=None, db=None, cur=None):
         """Fetch the shortlog for the given revisions
 
         Args:
             revisions: list of root revisions to lookup
             limit: depth limitation for the output
 
         Yields:
             a list of (id, parents) tuples.
 
         """
 
         yield from db.revision_shortlog(revisions, limit, cur)
 
     def release_add(self, releases):
         """Add releases to the storage
 
         Args:
             releases (iterable): iterable of dictionaries representing the
                 individual releases to add. Each dict has the following keys:
 
                 - id (sha1_git): id of the release to add
                 - revision (sha1_git): id of the revision the release points to
                 - date (datetime.DateTime): the date the release was made
                 - date_offset (int): offset from UTC in minutes the release was
                   made
                 - date_neg_utc_offset (boolean): whether a null date_offset
                   represents a negative UTC offset
                 - name (bytes): the name of the release
                 - comment (bytes): the comment associated with the release
                 - author_name (bytes): the name of the release author
                 - author_email (bytes): the email of the release author
 
         """
         db = self.get_db()
 
         release_ids = set(release['id'] for release in releases)
         releases_missing = set(self.release_missing(release_ids))
 
         if not releases_missing:
             return
 
         with db.transaction() as cur:
             db.mktemp_release(cur)
 
             releases_filtered = (
                 converters.release_to_db(release) for release in releases
                 if release['id'] in releases_missing
             )
 
             db.copy_to(releases_filtered, 'tmp_release', db.release_add_cols,
                        cur)
 
             db.release_add_from_temp(cur)
 
     @db_transaction_generator()
     def release_missing(self, releases, db=None, cur=None):
         """List releases missing from storage
 
         Args:
             releases: an iterable of release ids
 
         Returns:
             a list of missing release ids
 
         """
         if not releases:
             return
 
         for obj in db.release_missing_from_list(releases, cur):
             yield obj[0]
 
     @db_transaction_generator(statement_timeout=500)
     def release_get(self, releases, db=None, cur=None):
         """Given a list of sha1, return the releases's information
 
         Args:
             releases: list of sha1s
 
         Yields:
             releases: list of releases as dicts with the following keys:
 
             - id: origin's id
             - revision: origin's type
             - url: origin's url
 
         Raises:
             ValueError: if the keys does not match (url and type) nor id.
 
         """
         for release in db.release_get_from_list(releases, cur):
             yield converters.db_to_release(
                 dict(zip(db.release_get_cols, release))
             )
 
     @db_transaction()
     def snapshot_add(self, origin, visit, snapshot,
                      db=None, cur=None):
         """Add a snapshot for the given origin/visit couple
 
         Args:
             origin (int): id of the origin
             visit (int): id of the visit
             snapshot (dict): the snapshot to add to the visit, containing the
               following keys:
 
               - **id** (:class:`bytes`): id of the snapshot
               - **branches** (:class:`dict`): branches the snapshot contains,
                 mapping the branch name (:class:`bytes`) to the branch target,
                 itself a :class:`dict` (or ``None`` if the branch points to an
                 unknown object)
 
                 - **target_type** (:class:`str`): one of ``content``,
                   ``directory``, ``revision``, ``release``,
                   ``snapshot``, ``alias``
                 - **target** (:class:`bytes`): identifier of the target
                   (currently a ``sha1_git`` for all object kinds, or the name
                   of the target branch for aliases)
         """
         if not db.snapshot_exists(snapshot['id'], cur):
             db.mktemp_snapshot_branch(cur)
             db.copy_to(
                 (
                     {
                         'name': name,
                         'target': info['target'] if info else None,
                         'target_type': info['target_type'] if info else None,
                     }
                     for name, info in snapshot['branches'].items()
                 ),
                 'tmp_snapshot_branch',
                 ['name', 'target', 'target_type'],
                 cur,
             )
 
         db.snapshot_add(origin, visit, snapshot['id'], cur)
 
     @db_transaction(statement_timeout=2000)
     def snapshot_get(self, snapshot_id, db=None, cur=None):
         """Get the content, possibly partial, of a snapshot with the given id
 
         The branches of the snapshot are iterated in the lexicographical
         order of their names.
 
         .. warning:: At most 1000 branches contained in the snapshot will be
             returned for performance reasons. In order to browse the whole
             set of branches, the method :meth:`snapshot_get_branches`
             should be used instead.
 
         Args:
             snapshot_id (bytes): identifier of the snapshot
         Returns:
             dict: a dict with three keys:
                 * **id**: identifier of the snapshot
                 * **branches**: a dict of branches contained in the snapshot
                   whose keys are the branches' names.
                 * **next_branch**: the name of the first branch not returned
                   or :const:`None` if the snapshot has less than 1000
                   branches.
         """
-        max_branches = 1000
-        branches = {}
-        next_branch = None
-        fetched_branches = list(db.snapshot_get_by_id(
-            snapshot_id, branches_count=max_branches+1, cur=cur))
-        for branch in fetched_branches[:max_branches]:
-            branch = dict(zip(db.snapshot_get_cols, branch))
-            del branch['snapshot_id']
-            name = branch.pop('name')
-            if branch == {'target': None, 'target_type': None}:
-                branch = None
-            branches[name] = branch
-
-        if len(fetched_branches) > max_branches:
-            branch = dict(zip(db.snapshot_get_cols, fetched_branches[-1]))
-            next_branch = branch['name']
-
-        if branches:
-            return {
-                'id': snapshot_id,
-                'branches': branches,
-                'next_branch': next_branch
-            }
-
-        if db.snapshot_exists(snapshot_id, cur):
-            # empty snapshot
-            return {
-                'id': snapshot_id,
-                'branches': {},
-                'next_branch': None
-            }
 
-        return None
+        return self.snapshot_get_branches(snapshot_id, db=db, cur=cur)
 
     @db_transaction(statement_timeout=2000)
     def snapshot_get_by_origin_visit(self, origin, visit, db=None, cur=None):
         """Get the content, possibly partial, of a snapshot for the given origin visit
 
         The branches of the snapshot are iterated in the lexicographical
         order of their names.
 
         .. warning:: At most 1000 branches contained in the snapshot will be
             returned for performance reasons. In order to browse the whole
             set of branches, the method :meth:`snapshot_get_branches`
             should be used instead.
 
         Args:
             origin (int): the origin identifier
             visit (int): the visit identifier
         Returns:
             dict: a dict with three keys:
                 * **id**: identifier of the snapshot
                 * **branches**: a dict of branches contained in the snapshot
                   whose keys are the branches' names.
                 * **next_branch**: the name of the first branch not returned
                   or :const:`None` if the snapshot has less than 1000
                   branches.
 
         """
         snapshot_id = db.snapshot_get_by_origin_visit(origin, visit, cur)
 
         if snapshot_id:
             return self.snapshot_get(snapshot_id, db=db, cur=cur)
-        else:
-            # compatibility code during the snapshot migration
-            origin_visit_info = self.origin_visit_get_by(origin, visit,
-                                                         db=db, cur=cur)
-            if origin_visit_info is None:
-                return None
-            ret = {'id': None}
-            ret['branches'] = origin_visit_info['occurrences']
-            return ret
 
         return None
 
     @db_transaction(statement_timeout=2000)
     def snapshot_get_latest(self, origin, allowed_statuses=None, db=None,
                             cur=None):
         """Get the content, possibly partial, of the latest snapshot for the
         given origin, optionally only from visits that have one of the given
         allowed_statuses
 
         The branches of the snapshot are iterated in the lexicographical
         order of their names.
 
         .. warning:: At most 1000 branches contained in the snapshot will be
             returned for performance reasons. In order to browse the whole
             set of branches, the method :meth:`snapshot_get_branches`
             should be used instead.
 
         Args:
             origin (int): the origin identifier
             allowed_statuses (list of str): list of visit statuses considered
                 to find the latest snapshot for the visit. For instance,
                 ``allowed_statuses=['full']`` will only consider visits that
                 have successfully run to completion.
         Returns:
             dict: a dict with three keys:
                 * **id**: identifier of the snapshot
                 * **branches**: a dict of branches contained in the snapshot
                   whose keys are the branches' names.
                 * **next_branch**: the name of the first branch not returned
                   or :const:`None` if the snapshot has less than 1000
                   branches.
         """
         origin_visit = db.origin_visit_get_latest_snapshot(
             origin, allowed_statuses=allowed_statuses, cur=cur)
         if origin_visit:
             origin_visit = dict(zip(db.origin_visit_get_cols, origin_visit))
             return self.snapshot_get(origin_visit['snapshot'], db=db, cur=cur)
 
     @db_transaction(statement_timeout=2000)
     def snapshot_count_branches(self, snapshot_id, db=None, cur=None):
         """Count the number of branches in the snapshot with the given id
 
         Args:
             snapshot_id (bytes): identifier of the snapshot
 
         Returns:
             dict: A dict whose keys are the target types of branches and
             values their corresponding amount
         """
         return dict([bc for bc in
                      db.snapshot_count_branches(snapshot_id, cur)])
 
     @db_transaction(statement_timeout=2000)
     def snapshot_get_branches(self, snapshot_id, branches_from=b'',
-                              branches_count=None, target_types=None,
+                              branches_count=1000, target_types=None,
                               db=None, cur=None):
         """Get the content, possibly partial, of a snapshot with the given id
 
         The branches of the snapshot are iterated in the lexicographical
         order of their names.
 
         Args:
             snapshot_id (bytes): identifier of the snapshot
             branches_from (bytes): optional parameter used to skip branches
                 whose name is lesser than it before returning them
             branches_count (int): optional parameter used to restrain
                 the amount of returned branches
             target_types (list): optional parameter used to filter the
                 target types of branch to return (possible values that can be
                 contained in that list are `'content', 'directory',
                 'revision', 'release', 'snapshot', 'alias'`)
         Returns:
-            dict: a dict with two keys:
+            dict: a dict with three keys:
                 * **id**: identifier of the snapshot
                 * **branches**: a dict of branches contained in the snapshot
                   whose keys are the branches' names.
+                * **next_branch**: the name of the first branch not returned
+                  or :const:`None` if the snapshot has less than
+                  `branches_count` branches after `branches_from` included.
         """
+        if snapshot_id == EMPTY_SNAPSHOT_ID:
+            return {
+                'id': snapshot_id,
+                'branches': {},
+                'next_branch': None,
+            }
+
         branches = {}
-        for branch in db.snapshot_get_by_id(snapshot_id, branches_from,
-                                            branches_count, target_types, cur):
+        next_branch = None
+
+        fetched_branches = list(db.snapshot_get_by_id(
+            snapshot_id, branches_from=branches_from,
+            branches_count=branches_count+1, target_types=target_types,
+            cur=cur,
+        ))
+        for branch in fetched_branches[:branches_count]:
             branch = dict(zip(db.snapshot_get_cols, branch))
             del branch['snapshot_id']
             name = branch.pop('name')
             if branch == {'target': None, 'target_type': None}:
                 branch = None
             branches[name] = branch
 
-        if branches:
-            return {'id': snapshot_id, 'branches': branches}
+        if len(fetched_branches) > branches_count:
+            branch = dict(zip(db.snapshot_get_cols, fetched_branches[-1]))
+            next_branch = branch['name']
 
-        if db.snapshot_exists(snapshot_id, cur):
-            return {'id': snapshot_id, 'branches': {}}
+        if branches:
+            return {
+                'id': snapshot_id,
+                'branches': branches,
+                'next_branch': next_branch,
+            }
 
         return None
 
     @db_transaction()
     def origin_visit_add(self, origin, ts, db=None, cur=None):
         """Add an origin_visit for the origin at ts with status 'ongoing'.
 
         Args:
             origin: Visited Origin id
             ts: timestamp of such visit
 
         Returns:
             dict: dictionary with keys origin and visit where:
 
             - origin: origin identifier
             - visit: the visit identifier for the new visit occurrence
             - ts (datetime.DateTime): the visit date
 
         """
         if isinstance(ts, str):
             ts = dateutil.parser.parse(ts)
 
         return {
             'origin': origin,
             'visit': db.origin_visit_add(origin, ts, cur)
         }
 
     @db_transaction()
     def origin_visit_update(self, origin, visit_id, status, metadata=None,
                             db=None, cur=None):
         """Update an origin_visit's status.
 
         Args:
             origin: Visited Origin id
             visit_id: Visit's id
             status: Visit's new status
             metadata: Data associated to the visit
 
         Returns:
             None
 
         """
         return db.origin_visit_update(origin, visit_id, status, metadata, cur)
 
     @db_transaction_generator(statement_timeout=500)
     def origin_visit_get(self, origin, last_visit=None, limit=None, db=None,
                          cur=None):
         """Retrieve all the origin's visit's information.
 
         Args:
             origin (int): The occurrence's origin (identifier).
             last_visit (int): Starting point from which listing the next visits
                 Default to None
             limit (int): Number of results to return from the last visit.
                 Default to None
 
         Yields:
             List of visits.
 
         """
         for line in db.origin_visit_get_all(
                 origin, last_visit=last_visit, limit=limit, cur=cur):
             data = dict(zip(db.origin_visit_get_cols, line))
             yield data
 
     @db_transaction(statement_timeout=500)
     def origin_visit_get_by(self, origin, visit, db=None, cur=None):
         """Retrieve origin visit's information.
 
         Args:
             origin: The occurrence's origin (identifier).
 
         Returns:
             The information on that particular (origin, visit)
 
         """
         ori_visit = db.origin_visit_get(origin, visit, cur)
         if not ori_visit:
             return None
 
         return dict(zip(db.origin_visit_get_cols, ori_visit))
 
     @db_transaction(statement_timeout=2000)
     def object_find_by_sha1_git(self, ids, db=None, cur=None):
         """Return the objects found with the given ids.
 
         Args:
             ids: a generator of sha1_gits
 
         Returns:
             dict: a mapping from id to the list of objects found. Each object
             found is itself a dict with keys:
 
             - sha1_git: the input id
             - type: the type of object found
             - id: the id of the object found
             - object_id: the numeric id of the object found.
 
         """
         ret = {id: [] for id in ids}
 
         for retval in db.object_find_by_sha1_git(ids, cur=cur):
             if retval[1]:
                 ret[retval[0]].append(dict(zip(db.object_find_by_sha1_git_cols,
                                                retval)))
 
         return ret
 
     origin_keys = ['id', 'type', 'url']
 
     @db_transaction(statement_timeout=500)
     def origin_get(self, origin, db=None, cur=None):
         """Return the origin either identified by its id or its tuple
         (type, url).
 
         Args:
             origin: dictionary representing the individual origin to find.
                 This dict has either the keys type and url:
 
                 - type (FIXME: enum TBD): the origin type ('git', 'wget', ...)
                 - url (bytes): the url the origin points to
 
                 or the id:
 
                 - id: the origin id
 
         Returns:
             dict: the origin dictionary with the keys:
 
             - id: origin's id
             - type: origin's type
             - url: origin's url
 
         Raises:
             ValueError: if the keys does not match (url and type) nor id.
 
         """
         origin_id = origin.get('id')
         if origin_id:  # check lookup per id first
             ori = db.origin_get(origin_id, cur)
         elif 'type' in origin and 'url' in origin:  # or lookup per type, url
             ori = db.origin_get_with(origin['type'], origin['url'], cur)
         else:  # unsupported lookup
             raise ValueError('Origin must have either id or (type and url).')
 
         if ori:
             return dict(zip(self.origin_keys, ori))
         return None
 
     @db_transaction_generator()
     def origin_search(self, url_pattern, offset=0, limit=50,
                       regexp=False, with_visit=False, db=None, cur=None):
         """Search for origins whose urls contain a provided string pattern
         or match a provided regular expression.
         The search is performed in a case insensitive way.
 
         Args:
             url_pattern (str): the string pattern to search for in origin urls
             offset (int): number of found origins to skip before returning
                 results
             limit (int): the maximum number of found origins to return
             regexp (bool): if True, consider the provided pattern as a regular
                 expression and return origins whose urls match it
             with_visit (bool): if True, filter out origins with no visit
 
         Returns:
             An iterable of dict containing origin information as returned
             by :meth:`swh.storage.storage.Storage.origin_get`.
         """
         for origin in db.origin_search(url_pattern, offset, limit,
                                        regexp, with_visit, cur):
             yield dict(zip(self.origin_keys, origin))
 
     @db_transaction()
     def _person_add(self, person, db=None, cur=None):
         """Add a person in storage.
 
         Note: Internal function for now, do not use outside of this module.
 
         Do not do anything fancy in case a person already exists.
         Please adapt code if more checks are needed.
 
         Args:
             person: dictionary with keys name and email.
 
         Returns:
             Id of the new person.
 
         """
         return db.person_add(person)
 
     @db_transaction_generator(statement_timeout=500)
     def person_get(self, person, db=None, cur=None):
         """Return the persons identified by their ids.
 
         Args:
             person: array of ids.
 
         Returns:
             The array of persons corresponding of the ids.
 
         """
         for person in db.person_get(person):
             yield dict(zip(db.person_get_cols, person))
 
     @db_transaction()
     def origin_add(self, origins, db=None, cur=None):
         """Add origins to the storage
 
         Args:
             origins: list of dictionaries representing the individual origins,
                 with the following keys:
 
                 - type: the origin type ('git', 'svn', 'deb', ...)
                 - url (bytes): the url the origin points to
 
         Returns:
             list: given origins as dict updated with their id
 
         """
         for origin in origins:
             origin['id'] = self.origin_add_one(origin, db=db, cur=cur)
         return origins
 
     @db_transaction()
     def origin_add_one(self, origin, db=None, cur=None):
         """Add origin to the storage
 
         Args:
             origin: dictionary representing the individual origin to add. This
                 dict has the following keys:
 
                 - type (FIXME: enum TBD): the origin type ('git', 'wget', ...)
                 - url (bytes): the url the origin points to
 
         Returns:
             the id of the added origin, or of the identical one that already
             exists.
 
         """
         data = db.origin_get_with(origin['type'], origin['url'], cur)
         if data:
             return data[0]
 
         return db.origin_add(origin['type'], origin['url'], cur)
 
     @db_transaction()
     def fetch_history_start(self, origin_id, db=None, cur=None):
         """Add an entry for origin origin_id in fetch_history. Returns the id
         of the added fetch_history entry
         """
         fetch_history = {
             'origin': origin_id,
             'date': datetime.datetime.now(tz=datetime.timezone.utc),
         }
 
         return db.create_fetch_history(fetch_history, cur)
 
     @db_transaction()
     def fetch_history_end(self, fetch_history_id, data, db=None, cur=None):
         """Close the fetch_history entry with id `fetch_history_id`, replacing
            its data with `data`.
         """
         now = datetime.datetime.now(tz=datetime.timezone.utc)
         fetch_history = db.get_fetch_history(fetch_history_id, cur)
 
         if not fetch_history:
             raise ValueError('No fetch_history with id %d' % fetch_history_id)
 
         fetch_history['duration'] = now - fetch_history['date']
 
         fetch_history.update(data)
 
         db.update_fetch_history(fetch_history, cur)
 
     @db_transaction()
     def fetch_history_get(self, fetch_history_id, db=None, cur=None):
         """Get the fetch_history entry with id `fetch_history_id`.
         """
         return db.get_fetch_history(fetch_history_id, cur)
 
     @db_transaction(statement_timeout=500)
     def stat_counters(self, db=None, cur=None):
         """compute statistics about the number of tuples in various tables
 
         Returns:
             dict: a dictionary mapping textual labels (e.g., content) to
             integer values (e.g., the number of tuples in table content)
 
         """
         return {k: v for (k, v) in db.stat_counters()}
 
     @db_transaction()
     def origin_metadata_add(self, origin_id, ts, provider, tool, metadata,
                             db=None, cur=None):
         """ Add an origin_metadata for the origin at ts with provenance and
         metadata.
 
         Args:
             origin_id (int): the origin's id for which the metadata is added
             ts (datetime): timestamp of the found metadata
             provider (int): the provider of metadata (ex:'hal')
             tool (int): tool used to extract metadata
             metadata (jsonb): the metadata retrieved at the time and location
 
         Returns:
             id (int): the origin_metadata unique id
         """
         if isinstance(ts, str):
             ts = dateutil.parser.parse(ts)
 
         return db.origin_metadata_add(origin_id, ts, provider, tool,
                                       metadata, cur)
 
     @db_transaction_generator(statement_timeout=500)
     def origin_metadata_get_by(self, origin_id, provider_type=None, db=None,
                                cur=None):
         """Retrieve list of all origin_metadata entries for the origin_id
 
         Args:
             origin_id (int): the unique origin identifier
             provider_type (str): (optional) type of provider
 
         Returns:
             list of dicts: the origin_metadata dictionary with the keys:
 
             - id (int): origin_metadata's id
             - origin_id (int): origin's id
             - discovery_date (datetime): timestamp of discovery
             - tool_id (int): metadata's extracting tool
             - metadata (jsonb)
             - provider_id (int): metadata's provider
             - provider_name (str)
             - provider_type (str)
             - provider_url (str)
 
         """
         for line in db.origin_metadata_get_by(origin_id, provider_type, cur):
             yield dict(zip(db.origin_metadata_get_cols, line))
 
     @db_transaction_generator()
     def tool_add(self, tools, db=None, cur=None):
         """Add new tools to the storage.
 
         Args:
             tools (iterable of :class:`dict`): Tool information to add to
               storage. Each tool is a :class:`dict` with the following keys:
 
               - name (:class:`str`): name of the tool
               - version (:class:`str`): version of the tool
               - configuration (:class:`dict`): configuration of the tool,
                 must be json-encodable
 
         Returns:
             `iterable` of :class:`dict`: All the tools inserted in storage
             (including the internal ``id``). The order of the list is not
             guaranteed to match the order of the initial list.
 
         """
         db.mktemp_tool(cur)
         db.copy_to(tools, 'tmp_tool',
                    ['name', 'version', 'configuration'],
                    cur)
 
         tools = db.tool_add_from_temp(cur)
         for line in tools:
             yield dict(zip(db.tool_cols, line))
 
     @db_transaction(statement_timeout=500)
     def tool_get(self, tool, db=None, cur=None):
         """Retrieve tool information.
 
         Args:
             tool (dict): Tool information we want to retrieve from storage.
               The dicts have the same keys as those used in :func:`tool_add`.
 
         Returns:
             dict: The full tool information if it exists (``id`` included),
             None otherwise.
 
         """
         tool_conf = tool['configuration']
         if isinstance(tool_conf, dict):
             tool_conf = json.dumps(tool_conf)
 
         idx = db.tool_get(tool['name'],
                           tool['version'],
                           tool_conf)
         if not idx:
             return None
         return dict(zip(db.tool_cols, idx))
 
     @db_transaction()
     def metadata_provider_add(self, provider_name, provider_type, provider_url,
                               metadata, db=None, cur=None):
         return db.metadata_provider_add(provider_name, provider_type,
                                         provider_url, metadata, cur)
 
     @db_transaction()
     def metadata_provider_get(self, provider_id, db=None, cur=None):
         result = db.metadata_provider_get(provider_id)
         if not result:
             return None
         return dict(zip(db.metadata_provider_cols, result))
 
     @db_transaction()
     def metadata_provider_get_by(self, provider, db=None, cur=None):
         result = db.metadata_provider_get_by(provider['provider_name'],
                                              provider['provider_url'])
         if not result:
             return None
         return dict(zip(db.metadata_provider_cols, result))
 
     def diff_directories(self, from_dir, to_dir, track_renaming=False):
         """Compute the list of file changes introduced between two arbitrary
         directories (insertion / deletion / modification / renaming of files).
 
         Args:
             from_dir (bytes): identifier of the directory to compare from
             to_dir (bytes): identifier of the directory to compare to
             track_renaming (bool): whether or not to track files renaming
 
         Returns:
             A list of dict describing the introduced file changes
             (see :func:`swh.storage.algos.diff.diff_directories`
             for more details).
         """
         return diff.diff_directories(self, from_dir, to_dir, track_renaming)
 
     def diff_revisions(self, from_rev, to_rev, track_renaming=False):
         """Compute the list of file changes introduced between two arbitrary
         revisions (insertion / deletion / modification / renaming of files).
 
         Args:
             from_rev (bytes): identifier of the revision to compare from
             to_rev (bytes): identifier of the revision to compare to
             track_renaming (bool): whether or not to track files renaming
 
         Returns:
             A list of dict describing the introduced file changes
             (see :func:`swh.storage.algos.diff.diff_directories`
             for more details).
         """
         return diff.diff_revisions(self, from_rev, to_rev, track_renaming)
 
     def diff_revision(self, revision, track_renaming=False):
         """Compute the list of file changes introduced by a specific revision
         (insertion / deletion / modification / renaming of files) by comparing
         it against its first parent.
 
         Args:
             revision (bytes): identifier of the revision from which to
                 compute the list of files changes
             track_renaming (bool): whether or not to track files renaming
 
         Returns:
             A list of dict describing the introduced file changes
             (see :func:`swh.storage.algos.diff.diff_directories`
             for more details).
         """
         return diff.diff_revision(self, revision, track_renaming)
diff --git a/swh/storage/tests/__init__.py b/swh/storage/tests/__init__.py
index e69de29bb..68b15fce7 100644
--- a/swh/storage/tests/__init__.py
+++ b/swh/storage/tests/__init__.py
@@ -0,0 +1,5 @@
+from os import path
+import swh.storage
+
+
+SQL_DIR = path.join(path.dirname(swh.storage.__file__), 'sql')
diff --git a/swh/storage/tests/algos/test_snapshot.py b/swh/storage/tests/algos/test_snapshot.py
new file mode 100644
index 000000000..ceff24376
--- /dev/null
+++ b/swh/storage/tests/algos/test_snapshot.py
@@ -0,0 +1,128 @@
+# Copyright (C) 2018  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import unittest
+
+from nose.plugins.attrib import attr
+
+from hypothesis import given, settings
+from hypothesis.strategies import (binary, composite, datetimes, dictionaries,
+                                   from_regex, none, one_of, sampled_from)
+
+from swh.model.identifiers import snapshot_identifier, identifier_to_bytes
+from swh.storage.tests.storage_testing import StorageTestFixture
+
+from swh.storage.algos.snapshot import snapshot_get_all_branches
+
+
+def branch_names():
+    return binary(min_size=5, max_size=10)
+
+
+@composite
+def branch_targets_object(draw):
+    return {
+        'target': draw(binary(min_size=20, max_size=20)),
+        'target_type': draw(
+            sampled_from([
+                'content', 'directory', 'revision', 'release', 'snapshot',
+            ])
+        ),
+    }
+
+
+@composite
+def branch_targets_alias(draw):
+    return {
+        'target': draw(branch_names()),
+        'target_type': 'alias',
+    }
+
+
+def branch_targets(*, only_objects=False):
+    if only_objects:
+        return branch_targets_object()
+    else:
+        return one_of(none(), branch_targets_alias(), branch_targets_object())
+
+
+@composite
+def snapshots(draw, *, min_size=0, max_size=100, only_objects=False):
+    branches = draw(dictionaries(
+        keys=branch_names(),
+        values=branch_targets(only_objects=only_objects),
+        min_size=min_size,
+        max_size=max_size,
+    ))
+
+    if not only_objects:
+        # Make sure aliases point to actual branches
+        unresolved_aliases = {
+            target['target']
+            for target in branches.values()
+            if (target
+                and target['target_type'] == 'alias'
+                and target['target'] not in branches)
+         }
+
+        for alias in unresolved_aliases:
+            branches[alias] = draw(branch_targets(only_objects=True))
+
+    ret = {
+        'branches': branches,
+    }
+    ret['id'] = identifier_to_bytes(snapshot_identifier(ret))
+    return ret
+
+
+@composite
+def urls(draw):
+    protocol = draw(sampled_from(['git', 'http', 'https', 'deb']))
+    domain = draw(from_regex(r'\A([a-z]([a-z0-9-]*)\.){1,3}[a-z0-9]+\Z'))
+
+    return '%s://%s' % (protocol, domain)
+
+
+@composite
+def origins(draw):
+    return {
+        'type': draw(sampled_from(['git', 'hg', 'svn', 'pypi', 'deb'])),
+        'url': draw(urls()),
+    }
+
+
+@attr('db')
+class TestSnapshotAllBranches(StorageTestFixture, unittest.TestCase):
+    @given(origins(), datetimes(), snapshots(min_size=0, max_size=10,
+                                             only_objects=False))
+    def test_snapshot_small(self, origin, ts, snapshot):
+        origin_id = self.storage.origin_add_one(origin)
+        visit = self.storage.origin_visit_add(origin_id, ts)
+        self.storage.snapshot_add(origin_id, visit['visit'], snapshot)
+
+        returned_snapshot = snapshot_get_all_branches(self.storage,
+                                                      snapshot['id'])
+        self.assertEquals(snapshot, returned_snapshot)
+
+    @settings(max_examples=5, deadline=1000)
+    @given(origins(), datetimes(),
+           branch_names(), branch_targets(only_objects=True))
+    def test_snapshot_large(self, origin, ts, branch_name, branch_target):
+        origin_id = self.storage.origin_add_one(origin)
+        visit = self.storage.origin_visit_add(origin_id, ts)
+
+        snapshot = {
+            'branches': {
+                b'%s%05d' % (branch_name, i): branch_target
+                for i in range(10000)
+            }
+        }
+        snapshot['id'] = identifier_to_bytes(snapshot_identifier(snapshot))
+
+        self.storage.snapshot_add(origin_id, visit['visit'], snapshot)
+
+        returned_snapshot = snapshot_get_all_branches(self.storage,
+                                                      snapshot['id'])
+        self.assertEquals(snapshot, returned_snapshot)
diff --git a/swh/storage/tests/storage_testing.py b/swh/storage/tests/storage_testing.py
index bc6d471ce..338a0aecd 100644
--- a/swh/storage/tests/storage_testing.py
+++ b/swh/storage/tests/storage_testing.py
@@ -1,63 +1,57 @@
 # Copyright (C) 2015-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+import os
 import tempfile
-import pathlib
 
 from swh.storage import get_storage
 
+from swh.core.tests.db_testing import SingleDbTestFixture
+from swh.storage.tests import SQL_DIR
 
-class StorageTestFixture:
+
+class StorageTestFixture(SingleDbTestFixture):
     """Mix this in a test subject class to get Storage testing support.
 
-    This fixture requires to come before DbTestFixture in the inheritance list
-    as it uses its methods to setup its own internal database.
+    This fixture requires to come before SingleDbTestFixture in the
+    inheritance list as it uses its methods to setup its own
+    internal database.
 
     Usage example:
 
-        class TestStorage(StorageTestFixture, DbTestFixture):
+        class MyTestStorage(StorageTestFixture, unittest.TestCase):
             ...
-    """
-    TEST_STORAGE_DB_NAME = 'softwareheritage-test-storage'
 
-    @classmethod
-    def setUpClass(cls):
-        if not hasattr(cls, 'DB_TEST_FIXTURE_IMPORTED'):
-            raise RuntimeError("StorageTestFixture needs to be followed by "
-                               "DbTestFixture in the inheritance list.")
-
-        test_dir = pathlib.Path(__file__).absolute().parent
-        test_data_dir = test_dir / '../../../../swh-storage-testdata'
-        test_db_dump = (test_data_dir / 'dumps/swh.dump').absolute()
-        cls.add_db(cls.TEST_STORAGE_DB_NAME, str(test_db_dump), 'pg_dump')
-        super().setUpClass()
+    """
+    TEST_DB_NAME = 'softwareheritage-test-storage'
+    TEST_DB_DUMP = os.path.join(SQL_DIR, '*.sql')
 
     def setUp(self):
         super().setUp()
         self.objtmp = tempfile.TemporaryDirectory()
 
         self.storage_config = {
             'cls': 'local',
             'args': {
-                'db': 'dbname=%s' % self.TEST_STORAGE_DB_NAME,
+                'db': 'dbname=%s' % self.TEST_DB_NAME,
                 'objstorage': {
                     'cls': 'pathslicing',
                     'args': {
                         'root': self.objtmp.name,
                         'slicing': '0:1/1:5',
                     },
                 },
             },
         }
         self.storage = get_storage(**self.storage_config)
 
     def tearDown(self):
         self.objtmp.cleanup()
         self.storage = None
         super().tearDown()
 
     def reset_storage_tables(self):
         excluded = {'dbversion', 'tool'}
-        self.reset_db_tables(self.TEST_STORAGE_DB_NAME, excluded=excluded)
+        self.reset_db_tables(self.TEST_DB_NAME, excluded=excluded)
diff --git a/swh/storage/tests/test_api_client.py b/swh/storage/tests/test_api_client.py
index 2a49e39d2..6735541e5 100644
--- a/swh/storage/tests/test_api_client.py
+++ b/swh/storage/tests/test_api_client.py
@@ -1,54 +1,54 @@
 # Copyright (C) 2015-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import shutil
 import tempfile
 import unittest
 
 from swh.core.tests.server_testing import ServerTestFixture
 from swh.storage.api.client import RemoteStorage
 from swh.storage.api.server import app
 from swh.storage.tests.test_storage import CommonTestStorage
 
 
 class TestRemoteStorage(CommonTestStorage, ServerTestFixture,
                         unittest.TestCase):
     """Test the remote storage API.
 
     This class doesn't define any tests as we want identical
     functionality between local and remote storage. All the tests are
     therefore defined in CommonTestStorage.
     """
 
     def setUp(self):
         # ServerTestFixture needs to have self.objroot for
         # setUp() method, but this field is defined in
         # AbstractTestStorage's setUp()
         # To avoid confusion, override the self.objroot to a
         # one chosen in this class.
         self.storage_base = tempfile.mkdtemp()
         self.config = {
             'storage': {
                 'cls': 'local',
                 'args': {
-                    'db': 'dbname=%s' % self.TEST_STORAGE_DB_NAME,
+                    'db': 'dbname=%s' % self.TEST_DB_NAME,
                     'objstorage': {
                         'cls': 'pathslicing',
                         'args': {
                             'root': self.storage_base,
                             'slicing': '0:2',
                         },
                     },
                 }
             }
         }
         self.app = app
         super().setUp()
         self.storage = RemoteStorage(self.url())
         self.objroot = self.storage_base
 
     def tearDown(self):
         super().tearDown()
         shutil.rmtree(self.storage_base)
diff --git a/swh/storage/tests/test_db.py b/swh/storage/tests/test_db.py
index f0952ffca..5b3c2ec0e 100644
--- a/swh/storage/tests/test_db.py
+++ b/swh/storage/tests/test_db.py
@@ -1,52 +1,50 @@
 # Copyright (C) 2015-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 import unittest
 
 from nose.plugins.attrib import attr
 
 from swh.core.tests.db_testing import SingleDbTestFixture
 from swh.model.hashutil import hash_to_bytes
 from swh.storage.db import Db
-
-TEST_DIR = os.path.dirname(os.path.abspath(__file__))
-TEST_DATA_DIR = os.path.join(TEST_DIR, '../../../../swh-storage-testdata')
+from . import SQL_DIR
 
 
 @attr('db')
 class TestDb(SingleDbTestFixture, unittest.TestCase):
-
-    TEST_DB_DUMP = os.path.join(TEST_DATA_DIR, 'dumps/swh.dump')
+    TEST_DB_NAME = 'softwareheritage-test-storage'
+    TEST_DB_DUMP = os.path.join(SQL_DIR, '*.sql')
 
     def setUp(self):
         super().setUp()
         self.db = Db(self.conn)
 
     def tearDown(self):
         self.db.conn.close()
         super().tearDown()
 
     def test_add_content(self):
         cur = self.cursor
         sha1 = hash_to_bytes('34973274ccef6ab4dfaaf86599792fa9c3fe4689')
         self.db.mktemp('content', cur)
         self.db.copy_to([{
             'sha1': sha1,
             'sha1_git': hash_to_bytes(
                 'd81cc0710eb6cf9efd5b920a8453e1e07157b6cd'),
             'sha256': hash_to_bytes(
                 '673650f936cb3b0a2f93ce09d81be107'
                 '48b1b203c19e8176b4eefc1964a0cf3a'),
             'blake2s256': hash_to_bytes('69217a3079908094e11121d042354a7c'
                                         '1f55b6482ca1a51e1b250dfd1ed0eef9'),
             'length': 3}],
                         'tmp_content',
                         ['sha1', 'sha1_git', 'sha256', 'blake2s256', 'length'],
                         cur)
         self.db.content_add_from_temp(cur)
         self.cursor.execute('SELECT sha1 FROM content WHERE sha1 = %s',
                             (sha1,))
         self.assertEqual(self.cursor.fetchone()[0].tobytes(), sha1)
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
index d65f85a10..e002e0e7c 100644
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -1,1960 +1,1982 @@
 # Copyright (C) 2015-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import copy
 import datetime
 import unittest
 from collections import defaultdict
 from operator import itemgetter
 from unittest.mock import Mock, patch
 
 import psycopg2
 from nose.plugins.attrib import attr
 
 from swh.core.tests.db_testing import DbTestFixture
 from swh.model import from_disk, identifiers
 from swh.model.hashutil import hash_to_bytes
 from swh.storage.tests.storage_testing import StorageTestFixture
 
 
 @attr('db')
-class BaseTestStorage(StorageTestFixture, DbTestFixture):
+class BaseTestStorage(StorageTestFixture):
     def setUp(self):
         super().setUp()
 
-        db = self.test_db[self.TEST_STORAGE_DB_NAME]
+        db = self.test_db[self.TEST_DB_NAME]
         self.conn = db.conn
         self.cursor = db.cursor
 
         self.maxDiff = None
 
         self.cont = {
             'data': b'42\n',
             'length': 3,
             'sha1': hash_to_bytes(
                 '34973274ccef6ab4dfaaf86599792fa9c3fe4689'),
             'sha1_git': hash_to_bytes(
                 'd81cc0710eb6cf9efd5b920a8453e1e07157b6cd'),
             'sha256': hash_to_bytes(
                 '673650f936cb3b0a2f93ce09d81be107'
                 '48b1b203c19e8176b4eefc1964a0cf3a'),
             'blake2s256': hash_to_bytes('d5fe1939576527e42cfd76a9455a2'
                                         '432fe7f56669564577dd93c4280e76d661d'),
             'status': 'visible',
         }
 
         self.cont2 = {
             'data': b'4242\n',
             'length': 5,
             'sha1': hash_to_bytes(
                 '61c2b3a30496d329e21af70dd2d7e097046d07b7'),
             'sha1_git': hash_to_bytes(
                 '36fade77193cb6d2bd826161a0979d64c28ab4fa'),
             'sha256': hash_to_bytes(
                 '859f0b154fdb2d630f45e1ecae4a8629'
                 '15435e663248bb8461d914696fc047cd'),
             'blake2s256': hash_to_bytes('849c20fad132b7c2d62c15de310adfe87be'
                                         '94a379941bed295e8141c6219810d'),
             'status': 'visible',
         }
 
         self.cont3 = {
             'data': b'424242\n',
             'length': 7,
             'sha1': hash_to_bytes(
                 '3e21cc4942a4234c9e5edd8a9cacd1670fe59f13'),
             'sha1_git': hash_to_bytes(
                 'c932c7649c6dfa4b82327d121215116909eb3bea'),
             'sha256': hash_to_bytes(
                 '92fb72daf8c6818288a35137b72155f5'
                 '07e5de8d892712ab96277aaed8cf8a36'),
             'blake2s256': hash_to_bytes('76d0346f44e5a27f6bafdd9c2befd304af'
                                         'f83780f93121d801ab6a1d4769db11'),
             'status': 'visible',
         }
 
         self.missing_cont = {
             'data': b'missing\n',
             'length': 8,
             'sha1': hash_to_bytes(
                 'f9c24e2abb82063a3ba2c44efd2d3c797f28ac90'),
             'sha1_git': hash_to_bytes(
                 '33e45d56f88993aae6a0198013efa80716fd8919'),
             'sha256': hash_to_bytes(
                 '6bbd052ab054ef222c1c87be60cd191a'
                 'ddedd24cc882d1f5f7f7be61dc61bb3a'),
             'blake2s256': hash_to_bytes('306856b8fd879edb7b6f1aeaaf8db9bbecc9'
                                         '93cd7f776c333ac3a782fa5c6eba'),
             'status': 'absent',
         }
 
         self.skipped_cont = {
             'length': 1024 * 1024 * 200,
             'sha1_git': hash_to_bytes(
                 '33e45d56f88993aae6a0198013efa80716fd8920'),
             'sha1': hash_to_bytes(
                 '43e45d56f88993aae6a0198013efa80716fd8920'),
             'sha256': hash_to_bytes(
                 '7bbd052ab054ef222c1c87be60cd191a'
                 'ddedd24cc882d1f5f7f7be61dc61bb3a'),
             'blake2s256': hash_to_bytes(
                 'ade18b1adecb33f891ca36664da676e1'
                 '2c772cc193778aac9a137b8dc5834b9b'),
             'reason': 'Content too long',
             'status': 'absent',
         }
 
         self.skipped_cont2 = {
             'length': 1024 * 1024 * 300,
             'sha1_git': hash_to_bytes(
                 '44e45d56f88993aae6a0198013efa80716fd8921'),
             'sha1': hash_to_bytes(
                 '54e45d56f88993aae6a0198013efa80716fd8920'),
             'sha256': hash_to_bytes(
                 '8cbd052ab054ef222c1c87be60cd191a'
                 'ddedd24cc882d1f5f7f7be61dc61bb3a'),
             'blake2s256': hash_to_bytes(
                 '9ce18b1adecb33f891ca36664da676e1'
                 '2c772cc193778aac9a137b8dc5834b9b'),
             'reason': 'Content too long',
             'status': 'absent',
         }
 
         self.dir = {
             'id': b'4\x013\x422\x531\x000\xf51\xe62\xa73\xff7\xc3\xa90',
             'entries': [
                 {
                     'name': b'foo',
                     'type': 'file',
                     'target': self.cont['sha1_git'],
                     'perms': from_disk.DentryPerms.content,
                 },
                 {
                     'name': b'bar\xc3',
                     'type': 'dir',
                     'target': b'12345678901234567890',
                     'perms': from_disk.DentryPerms.directory,
                 },
             ],
         }
 
         self.dir2 = {
             'id': b'4\x013\x422\x531\x000\xf51\xe62\xa73\xff7\xc3\xa95',
             'entries': [
                 {
                     'name': b'oof',
                     'type': 'file',
                     'target': self.cont2['sha1_git'],
                     'perms': from_disk.DentryPerms.content,
                 }
             ],
         }
 
         self.dir3 = {
             'id': hash_to_bytes('33e45d56f88993aae6a0198013efa80716fd8921'),
             'entries': [
                 {
                     'name': b'foo',
                     'type': 'file',
                     'target': self.cont['sha1_git'],
                     'perms': from_disk.DentryPerms.content,
                 },
                 {
                     'name': b'bar',
                     'type': 'dir',
                     'target': b'12345678901234560000',
                     'perms': from_disk.DentryPerms.directory,
                 },
                 {
                     'name': b'hello',
                     'type': 'file',
                     'target': b'12345678901234567890',
                     'perms': from_disk.DentryPerms.content,
                 },
 
             ],
         }
 
         self.minus_offset = datetime.timezone(datetime.timedelta(minutes=-120))
         self.plus_offset = datetime.timezone(datetime.timedelta(minutes=120))
 
         self.revision = {
             'id': b'56789012345678901234',
             'message': b'hello',
             'author': {
                 'name': b'Nicolas Dandrimont',
                 'email': b'nicolas@example.com',
                 'fullname': b'Nicolas Dandrimont <nicolas@example.com> ',
             },
             'date': {
                 'timestamp': 1234567890,
                 'offset': 120,
                 'negative_utc': None,
             },
             'committer': {
                 'name': b'St\xc3fano Zacchiroli',
                 'email': b'stefano@example.com',
                 'fullname': b'St\xc3fano Zacchiroli <stefano@example.com>'
             },
             'committer_date': {
                 'timestamp': 1123456789,
                 'offset': 0,
                 'negative_utc': True,
             },
             'parents': [b'01234567890123456789', b'23434512345123456789'],
             'type': 'git',
             'directory': self.dir['id'],
             'metadata': {
                 'checksums': {
                     'sha1': 'tarball-sha1',
                     'sha256': 'tarball-sha256',
                 },
                 'signed-off-by': 'some-dude',
                 'extra_headers': [
                     ['gpgsig', b'test123'],
                     ['mergetags', [b'foo\\bar', b'\x22\xaf\x89\x80\x01\x00']],
                 ],
             },
             'synthetic': True
         }
 
         self.revision2 = {
             'id': b'87659012345678904321',
             'message': b'hello again',
             'author': {
                 'name': b'Roberto Dicosmo',
                 'email': b'roberto@example.com',
                 'fullname': b'Roberto Dicosmo <roberto@example.com>',
             },
             'date': {
                 'timestamp': {
                     'seconds': 1234567843,
                     'microseconds': 220000,
                 },
                 'offset': -720,
                 'negative_utc': None,
             },
             'committer': {
                 'name': b'tony',
                 'email': b'ar@dumont.fr',
                 'fullname': b'tony <ar@dumont.fr>',
             },
             'committer_date': {
                 'timestamp': 1123456789,
                 'offset': 0,
                 'negative_utc': False,
             },
             'parents': [b'01234567890123456789'],
             'type': 'git',
             'directory': self.dir2['id'],
             'metadata': None,
             'synthetic': False
         }
 
         self.revision3 = {
             'id': hash_to_bytes('7026b7c1a2af56521e951c01ed20f255fa054238'),
             'message': b'a simple revision with no parents this time',
             'author': {
                 'name': b'Roberto Dicosmo',
                 'email': b'roberto@example.com',
                 'fullname': b'Roberto Dicosmo <roberto@example.com>',
             },
             'date': {
                 'timestamp': {
                     'seconds': 1234567843,
                     'microseconds': 220000,
                 },
                 'offset': -720,
                 'negative_utc': None,
             },
             'committer': {
                 'name': b'tony',
                 'email': b'ar@dumont.fr',
                 'fullname': b'tony <ar@dumont.fr>',
             },
             'committer_date': {
                 'timestamp': 1127351742,
                 'offset': 0,
                 'negative_utc': False,
             },
             'parents': [],
             'type': 'git',
             'directory': self.dir2['id'],
             'metadata': None,
             'synthetic': True
         }
 
         self.revision4 = {
             'id': hash_to_bytes('368a48fe15b7db2383775f97c6b247011b3f14f4'),
             'message': b'parent of self.revision2',
             'author': {
                 'name': b'me',
                 'email': b'me@soft.heri',
                 'fullname': b'me <me@soft.heri>',
             },
             'date': {
                 'timestamp': {
                     'seconds': 1244567843,
                     'microseconds': 220000,
                 },
                 'offset': -720,
                 'negative_utc': None,
             },
             'committer': {
                 'name': b'committer-dude',
                 'email': b'committer@dude.com',
                 'fullname': b'committer-dude <committer@dude.com>',
             },
             'committer_date': {
                 'timestamp': {
                     'seconds': 1244567843,
                     'microseconds': 220000,
                 },
                 'offset': -720,
                 'negative_utc': None,
             },
             'parents': [self.revision3['id']],
             'type': 'git',
             'directory': self.dir['id'],
             'metadata': None,
             'synthetic': False
         }
 
         self.origin = {
             'url': 'file:///dev/null',
             'type': 'git',
         }
 
         self.origin2 = {
             'url': 'file:///dev/zero',
             'type': 'git',
         }
 
         self.provider = {
             'name': 'hal',
             'type': 'deposit-client',
             'url': 'http:///hal/inria',
             'metadata': {
                 'location': 'France'
             }
         }
 
         self.metadata_tool = {
             'name': 'swh-deposit',
             'version': '0.0.1',
             'configuration': {
                 'sword_version': '2'
             }
         }
 
         self.origin_metadata = {
             'origin': self.origin,
             'discovery_date': datetime.datetime(2015, 1, 1, 23, 0, 0,
                                                 tzinfo=datetime.timezone.utc),
             'provider': self.provider,
             'tool': 'swh-deposit',
             'metadata': {
                 'name': 'test_origin_metadata',
                 'version': '0.0.1'
              }
         }
 
         self.origin_metadata2 = {
             'origin': self.origin,
             'discovery_date': datetime.datetime(2017, 1, 1, 23, 0, 0,
                                                 tzinfo=datetime.timezone.utc),
             'provider': self.provider,
             'tool': 'swh-deposit',
             'metadata': {
                 'name': 'test_origin_metadata',
                 'version': '0.0.1'
              }
         }
 
         self.date_visit1 = datetime.datetime(2015, 1, 1, 23, 0, 0,
                                              tzinfo=datetime.timezone.utc)
 
         self.date_visit2 = datetime.datetime(2017, 1, 1, 23, 0, 0,
                                              tzinfo=datetime.timezone.utc)
 
         self.date_visit3 = datetime.datetime(2018, 1, 1, 23, 0, 0,
                                              tzinfo=datetime.timezone.utc)
 
         self.release = {
             'id': b'87659012345678901234',
             'name': b'v0.0.1',
             'author': {
                 'name': b'olasd',
                 'email': b'nic@olasd.fr',
                 'fullname': b'olasd <nic@olasd.fr>',
             },
             'date': {
                 'timestamp': 1234567890,
                 'offset': 42,
                 'negative_utc': None,
             },
             'target': b'43210987654321098765',
             'target_type': 'revision',
             'message': b'synthetic release',
             'synthetic': True,
         }
 
         self.release2 = {
             'id': b'56789012348765901234',
             'name': b'v0.0.2',
             'author': {
                 'name': b'tony',
                 'email': b'ar@dumont.fr',
                 'fullname': b'tony <ar@dumont.fr>',
             },
             'date': {
                 'timestamp': 1634366813,
                 'offset': -120,
                 'negative_utc': None,
             },
             'target': b'432109\xa9765432\xc309\x00765',
             'target_type': 'revision',
             'message': b'v0.0.2\nMisc performance improvements + bug fixes',
             'synthetic': False
         }
 
         self.release3 = {
             'id': b'87659012345678904321',
             'name': b'v0.0.2',
             'author': {
                 'name': b'tony',
                 'email': b'tony@ardumont.fr',
                 'fullname': b'tony <tony@ardumont.fr>',
             },
             'date': {
                 'timestamp': 1634336813,
                 'offset': 0,
                 'negative_utc': False,
             },
             'target': self.revision2['id'],
             'target_type': 'revision',
             'message': b'yet another synthetic release',
             'synthetic': True,
         }
 
         self.fetch_history_date = datetime.datetime(
             2015, 1, 2, 21, 0, 0,
             tzinfo=datetime.timezone.utc)
         self.fetch_history_end = datetime.datetime(
             2015, 1, 2, 23, 0, 0,
             tzinfo=datetime.timezone.utc)
 
         self.fetch_history_duration = (self.fetch_history_end -
                                        self.fetch_history_date)
 
         self.fetch_history_data = {
             'status': True,
             'result': {'foo': 'bar'},
             'stdout': 'blabla',
             'stderr': 'blablabla',
         }
 
         self.snapshot = {
             'id': hash_to_bytes('2498dbf535f882bc7f9a18fb16c9ad27fda7bab7'),
             'branches': {
                 b'master': {
                     'target': self.revision['id'],
                     'target_type': 'revision',
                 },
             },
             'next_branch': None
         }
 
         self.empty_snapshot = {
             'id': hash_to_bytes('1a8893e6a86f444e8be8e7bda6cb34fb1735a00e'),
             'branches': {},
             'next_branch': None
         }
 
         self.complete_snapshot = {
             'id': hash_to_bytes('6e65b86363953b780d92b0a928f3e8fcdd10db36'),
             'branches': {
                 b'directory': {
                     'target': hash_to_bytes(
                         '1bd0e65f7d2ff14ae994de17a1e7fe65111dcad8'),
                     'target_type': 'directory',
                 },
                 b'content': {
                     'target': hash_to_bytes(
                         'fe95a46679d128ff167b7c55df5d02356c5a1ae1'),
                     'target_type': 'content',
                 },
                 b'alias': {
                     'target': b'revision',
                     'target_type': 'alias',
                 },
                 b'revision': {
                     'target': hash_to_bytes(
                         'aafb16d69fd30ff58afdd69036a26047f3aebdc6'),
                     'target_type': 'revision',
                 },
                 b'release': {
                     'target': hash_to_bytes(
                         '7045404f3d1c54e6473c71bbb716529fbad4be24'),
                     'target_type': 'release',
                 },
                 b'snapshot': {
                     'target': hash_to_bytes(
                         '1a8893e6a86f444e8be8e7bda6cb34fb1735a00e'),
                     'target_type': 'snapshot',
                 },
                 b'dangling': None,
             },
             'next_branch': None
         }
 
     def tearDown(self):
         self.reset_storage_tables()
         super().tearDown()
 
 
 class CommonTestStorage(BaseTestStorage):
     """Base class for Storage testing.
 
     This class is used as-is to test local storage (see TestLocalStorage
     below) and remote storage (see TestRemoteStorage in
     test_remote_storage.py.
 
     We need to have the two classes inherit from this base class
     separately to avoid nosetests running the tests from the base
     class twice.
 
     """
 
     @staticmethod
     def normalize_entity(entity):
         entity = copy.deepcopy(entity)
         for key in ('date', 'committer_date'):
             if key in entity:
                 entity[key] = identifiers.normalize_timestamp(entity[key])
 
         return entity
 
     def test_check_config(self):
         self.assertTrue(self.storage.check_config(check_write=True))
         self.assertTrue(self.storage.check_config(check_write=False))
 
     def test_content_add(self):
         cont = self.cont
 
         self.storage.content_add([cont])
         if hasattr(self.storage, 'objstorage'):
             self.assertIn(cont['sha1'], self.storage.objstorage)
         self.cursor.execute('SELECT sha1, sha1_git, sha256, length, status'
                             ' FROM content WHERE sha1 = %s',
                             (cont['sha1'],))
         datum = self.cursor.fetchone()
         self.assertEqual(
             (datum[0].tobytes(), datum[1].tobytes(), datum[2].tobytes(),
              datum[3], datum[4]),
             (cont['sha1'], cont['sha1_git'], cont['sha256'],
              cont['length'], 'visible'))
 
     def test_content_add_collision(self):
         cont1 = self.cont
 
         # create (corrupted) content with same sha1{,_git} but != sha256
         cont1b = cont1.copy()
         sha256_array = bytearray(cont1b['sha256'])
         sha256_array[0] += 1
         cont1b['sha256'] = bytes(sha256_array)
 
         with self.assertRaises(psycopg2.IntegrityError):
             self.storage.content_add([cont1, cont1b])
 
     def test_skipped_content_add(self):
         cont = self.skipped_cont.copy()
         cont2 = self.skipped_cont2.copy()
         cont2['blake2s256'] = None
 
         self.storage.content_add([cont, cont, cont2])
 
         self.cursor.execute('SELECT sha1, sha1_git, sha256, blake2s256, '
                             'length, status, reason '
                             'FROM skipped_content ORDER BY sha1_git')
 
         datums = self.cursor.fetchall()
 
         self.assertEquals(2, len(datums))
         datum = datums[0]
         self.assertEqual(
             (datum[0].tobytes(), datum[1].tobytes(), datum[2].tobytes(),
              datum[3].tobytes(), datum[4], datum[5], datum[6]),
             (cont['sha1'], cont['sha1_git'], cont['sha256'],
              cont['blake2s256'], cont['length'], 'absent',
              'Content too long')
         )
 
         datum2 = datums[1]
         self.assertEqual(
             (datum2[0].tobytes(), datum2[1].tobytes(), datum2[2].tobytes(),
              datum2[3], datum2[4], datum2[5], datum2[6]),
             (cont2['sha1'], cont2['sha1_git'], cont2['sha256'],
              cont2['blake2s256'], cont2['length'], 'absent',
              'Content too long')
         )
 
     def test_content_missing(self):
         cont2 = self.cont2
         missing_cont = self.missing_cont
         self.storage.content_add([cont2])
         test_contents = [cont2]
         missing_per_hash = defaultdict(list)
         for i in range(256):
             test_content = missing_cont.copy()
             for hash in ['sha1', 'sha256', 'sha1_git', 'blake2s256']:
                 test_content[hash] = bytes([i]) + test_content[hash][1:]
                 missing_per_hash[hash].append(test_content[hash])
             test_contents.append(test_content)
 
         self.assertCountEqual(
             self.storage.content_missing(test_contents),
             missing_per_hash['sha1']
         )
 
         for hash in ['sha1', 'sha256', 'sha1_git', 'blake2s256']:
             self.assertCountEqual(
                 self.storage.content_missing(test_contents, key_hash=hash),
                 missing_per_hash[hash]
             )
 
     def test_content_missing_per_sha1(self):
         # given
         cont2 = self.cont2
         missing_cont = self.missing_cont
         self.storage.content_add([cont2])
         # when
         gen = self.storage.content_missing_per_sha1([cont2['sha1'],
                                                      missing_cont['sha1']])
 
         # then
         self.assertEqual(list(gen), [missing_cont['sha1']])
 
     def test_content_get_metadata(self):
         cont1 = self.cont.copy()
         cont2 = self.cont2.copy()
 
         self.storage.content_add([cont1, cont2])
 
         gen = self.storage.content_get_metadata([cont1['sha1'], cont2['sha1']])
 
         # we only retrieve the metadata
         cont1.pop('data')
         cont2.pop('data')
 
         self.assertCountEqual(list(gen), [cont1, cont2])
 
     def test_content_get_metadata_missing_sha1(self):
         cont1 = self.cont.copy()
         cont2 = self.cont2.copy()
 
         missing_cont = self.missing_cont.copy()
 
         self.storage.content_add([cont1, cont2])
 
         gen = self.storage.content_get_metadata([missing_cont['sha1']])
 
         # All the metadata keys are None
         missing_cont.pop('data')
         for key in list(missing_cont):
             if key != 'sha1':
                 missing_cont[key] = None
 
         self.assertEqual(list(gen), [missing_cont])
 
     def test_directory_add(self):
         init_missing = list(self.storage.directory_missing([self.dir['id']]))
         self.assertEqual([self.dir['id']], init_missing)
 
         self.storage.directory_add([self.dir])
 
         stored_data = list(self.storage.directory_ls(self.dir['id']))
 
         data_to_store = []
         for ent in sorted(self.dir['entries'], key=itemgetter('name')):
             data_to_store.append({
                 'dir_id': self.dir['id'],
                 'type': ent['type'],
                 'target': ent['target'],
                 'name': ent['name'],
                 'perms': ent['perms'],
                 'status': None,
                 'sha1': None,
                 'sha1_git': None,
                 'sha256': None,
                 'length': None,
             })
 
         self.assertEqual(data_to_store, stored_data)
 
         after_missing = list(self.storage.directory_missing([self.dir['id']]))
         self.assertEqual([], after_missing)
 
     def test_directory_entry_get_by_path(self):
         # given
         init_missing = list(self.storage.directory_missing([self.dir3['id']]))
         self.assertEqual([self.dir3['id']], init_missing)
 
         self.storage.directory_add([self.dir3])
 
         expected_entries = [
             {
                 'dir_id': self.dir3['id'],
                 'name': b'foo',
                 'type': 'file',
                 'target': self.cont['sha1_git'],
                 'sha1': None,
                 'sha1_git': None,
                 'sha256': None,
                 'status': None,
                 'perms': from_disk.DentryPerms.content,
                 'length': None,
             },
             {
                 'dir_id': self.dir3['id'],
                 'name': b'bar',
                 'type': 'dir',
                 'target': b'12345678901234560000',
                 'sha1': None,
                 'sha1_git': None,
                 'sha256': None,
                 'status': None,
                 'perms': from_disk.DentryPerms.directory,
                 'length': None,
             },
             {
                 'dir_id': self.dir3['id'],
                 'name': b'hello',
                 'type': 'file',
                 'target': b'12345678901234567890',
                 'sha1': None,
                 'sha1_git': None,
                 'sha256': None,
                 'status': None,
                 'perms': from_disk.DentryPerms.content,
                 'length': None,
             },
         ]
 
         # when (all must be found here)
         for entry, expected_entry in zip(self.dir3['entries'],
                                          expected_entries):
             actual_entry = self.storage.directory_entry_get_by_path(
                 self.dir3['id'],
                 [entry['name']])
             self.assertEqual(actual_entry, expected_entry)
 
         # when (nothing should be found here since self.dir is not persisted.)
         for entry in self.dir['entries']:
             actual_entry = self.storage.directory_entry_get_by_path(
                 self.dir['id'],
                 [entry['name']])
             self.assertIsNone(actual_entry)
 
     def test_revision_add(self):
         init_missing = self.storage.revision_missing([self.revision['id']])
         self.assertEqual([self.revision['id']], list(init_missing))
 
         self.storage.revision_add([self.revision])
 
         end_missing = self.storage.revision_missing([self.revision['id']])
         self.assertEqual([], list(end_missing))
 
     def test_revision_log(self):
         # given
         # self.revision4 -is-child-of-> self.revision3
         self.storage.revision_add([self.revision3,
                                    self.revision4])
 
         # when
         actual_results = list(self.storage.revision_log(
             [self.revision4['id']]))
 
         # hack: ids generated
         for actual_result in actual_results:
             del actual_result['author']['id']
             del actual_result['committer']['id']
 
         self.assertEqual(len(actual_results), 2)  # rev4 -child-> rev3
         self.assertEquals(actual_results[0],
                           self.normalize_entity(self.revision4))
         self.assertEquals(actual_results[1],
                           self.normalize_entity(self.revision3))
 
     def test_revision_log_with_limit(self):
         # given
         # self.revision4 -is-child-of-> self.revision3
         self.storage.revision_add([self.revision3,
                                    self.revision4])
         actual_results = list(self.storage.revision_log(
             [self.revision4['id']], 1))
 
         # hack: ids generated
         for actual_result in actual_results:
             del actual_result['author']['id']
             del actual_result['committer']['id']
 
         self.assertEqual(len(actual_results), 1)
         self.assertEquals(actual_results[0], self.revision4)
 
     @staticmethod
     def _short_revision(revision):
         return [revision['id'], revision['parents']]
 
     def test_revision_shortlog(self):
         # given
         # self.revision4 -is-child-of-> self.revision3
         self.storage.revision_add([self.revision3,
                                    self.revision4])
 
         # when
         actual_results = list(self.storage.revision_shortlog(
             [self.revision4['id']]))
 
         self.assertEqual(len(actual_results), 2)  # rev4 -child-> rev3
         self.assertEquals(list(actual_results[0]),
                           self._short_revision(self.revision4))
         self.assertEquals(list(actual_results[1]),
                           self._short_revision(self.revision3))
 
     def test_revision_shortlog_with_limit(self):
         # given
         # self.revision4 -is-child-of-> self.revision3
         self.storage.revision_add([self.revision3,
                                    self.revision4])
         actual_results = list(self.storage.revision_shortlog(
             [self.revision4['id']], 1))
 
         self.assertEqual(len(actual_results), 1)
         self.assertEquals(list(actual_results[0]),
                           self._short_revision(self.revision4))
 
     def test_revision_get(self):
         self.storage.revision_add([self.revision])
 
         actual_revisions = list(self.storage.revision_get(
             [self.revision['id'], self.revision2['id']]))
 
         # when
         del actual_revisions[0]['author']['id']  # hack: ids are generated
         del actual_revisions[0]['committer']['id']
 
         self.assertEqual(len(actual_revisions), 2)
         self.assertEqual(actual_revisions[0],
                          self.normalize_entity(self.revision))
         self.assertIsNone(actual_revisions[1])
 
     def test_revision_get_no_parents(self):
         self.storage.revision_add([self.revision3])
 
         get = list(self.storage.revision_get([self.revision3['id']]))
 
         self.assertEqual(len(get), 1)
         self.assertEqual(get[0]['parents'], [])  # no parents on this one
 
     def test_release_add(self):
         init_missing = self.storage.release_missing([self.release['id'],
                                                      self.release2['id']])
         self.assertEqual([self.release['id'], self.release2['id']],
                          list(init_missing))
 
         self.storage.release_add([self.release, self.release2])
 
         end_missing = self.storage.release_missing([self.release['id'],
                                                     self.release2['id']])
         self.assertEqual([], list(end_missing))
 
     def test_release_get(self):
         # given
         self.storage.release_add([self.release, self.release2])
 
         # when
         actual_releases = list(self.storage.release_get([self.release['id'],
                                                          self.release2['id']]))
 
         # then
         for actual_release in actual_releases:
             del actual_release['author']['id']  # hack: ids are generated
 
         self.assertEquals([self.normalize_entity(self.release),
                            self.normalize_entity(self.release2)],
                           [actual_releases[0], actual_releases[1]])
 
     def test_origin_add_one(self):
         origin0 = self.storage.origin_get(self.origin)
         self.assertIsNone(origin0)
 
         id = self.storage.origin_add_one(self.origin)
 
         actual_origin = self.storage.origin_get({'url': self.origin['url'],
                                                  'type': self.origin['type']})
         self.assertEqual(actual_origin['id'], id)
 
         id2 = self.storage.origin_add_one(self.origin)
 
         self.assertEqual(id, id2)
 
     def test_origin_add(self):
         origin0 = self.storage.origin_get(self.origin)
         self.assertIsNone(origin0)
 
         origin1, origin2 = self.storage.origin_add([self.origin, self.origin2])
 
         actual_origin = self.storage.origin_get({
             'url': self.origin['url'],
             'type': self.origin['type'],
         })
         self.assertEqual(actual_origin['id'], origin1['id'])
 
         actual_origin2 = self.storage.origin_get({
             'url': self.origin2['url'],
             'type': self.origin2['type'],
         })
         self.assertEqual(actual_origin2['id'], origin2['id'])
 
     def test_origin_add_twice(self):
         add1 = self.storage.origin_add([self.origin, self.origin2])
         add2 = self.storage.origin_add([self.origin, self.origin2])
 
         self.assertEqual(add1, add2)
 
     def test_origin_get(self):
         self.assertIsNone(self.storage.origin_get(self.origin))
         id = self.storage.origin_add_one(self.origin)
 
         # lookup per type and url (returns id)
         actual_origin0 = self.storage.origin_get({'url': self.origin['url'],
                                                   'type': self.origin['type']})
         self.assertEqual(actual_origin0['id'], id)
 
         # lookup per id (returns dict)
         actual_origin1 = self.storage.origin_get({'id': id})
 
         self.assertEqual(actual_origin1, {'id': id,
                                           'type': self.origin['type'],
                                           'url': self.origin['url']})
 
     def test_origin_search(self):
         found_origins = list(self.storage.origin_search(self.origin['url']))
         self.assertEqual(len(found_origins), 0)
 
         found_origins = list(self.storage.origin_search(self.origin['url'],
                                                         regexp=True))
         self.assertEqual(len(found_origins), 0)
 
         id = self.storage.origin_add_one(self.origin)
         origin_data = {'id': id,
                        'type': self.origin['type'],
                        'url': self.origin['url']}
         found_origins = list(self.storage.origin_search(self.origin['url']))
         self.assertEqual(len(found_origins), 1)
         self.assertEqual(found_origins[0], origin_data)
 
         found_origins = list(self.storage.origin_search(
             '.' + self.origin['url'][1:-1] + '.', regexp=True))
         self.assertEqual(len(found_origins), 1)
         self.assertEqual(found_origins[0], origin_data)
 
         id2 = self.storage.origin_add_one(self.origin2)
         origin2_data = {'id': id2,
                         'type': self.origin2['type'],
                         'url': self.origin2['url']}
         found_origins = list(self.storage.origin_search(self.origin2['url']))
         self.assertEqual(len(found_origins), 1)
         self.assertEqual(found_origins[0], origin2_data)
 
         found_origins = list(self.storage.origin_search(
             '.' + self.origin2['url'][1:-1] + '.', regexp=True))
         self.assertEqual(len(found_origins), 1)
         self.assertEqual(found_origins[0], origin2_data)
 
         found_origins = list(self.storage.origin_search('/'))
         self.assertEqual(len(found_origins), 2)
 
         found_origins = list(self.storage.origin_search('.*/.*', regexp=True))
         self.assertEqual(len(found_origins), 2)
 
         found_origins = list(self.storage.origin_search('/', offset=0, limit=1)) # noqa
         self.assertEqual(len(found_origins), 1)
         self.assertEqual(found_origins[0], origin_data)
 
         found_origins = list(self.storage.origin_search('.*/.*', offset=0, limit=1, regexp=True)) # noqa
         self.assertEqual(len(found_origins), 1)
         self.assertEqual(found_origins[0], origin_data)
 
         found_origins = list(self.storage.origin_search('/', offset=1, limit=1)) # noqa
         self.assertEqual(len(found_origins), 1)
         self.assertEqual(found_origins[0], origin2_data)
 
         found_origins = list(self.storage.origin_search('.*/.*', offset=1, limit=1, regexp=True)) # noqa
         self.assertEqual(len(found_origins), 1)
         self.assertEqual(found_origins[0], origin2_data)
 
     def test_origin_visit_add(self):
         # given
         self.assertIsNone(self.storage.origin_get(self.origin2))
 
         origin_id = self.storage.origin_add_one(self.origin2)
         self.assertIsNotNone(origin_id)
 
         # when
         origin_visit1 = self.storage.origin_visit_add(
             origin_id,
             ts=self.date_visit2)
 
         # then
         self.assertEquals(origin_visit1['origin'], origin_id)
         self.assertIsNotNone(origin_visit1['visit'])
         self.assertTrue(origin_visit1['visit'] > 0)
 
         actual_origin_visits = list(self.storage.origin_visit_get(origin_id))
         self.assertEquals(actual_origin_visits,
                           [{
                               'origin': origin_id,
                               'date': self.date_visit2,
                               'visit': origin_visit1['visit'],
                               'status': 'ongoing',
                               'metadata': None,
                               'snapshot': None,
                           }])
 
     def test_origin_visit_update(self):
         # given
         origin_id = self.storage.origin_add_one(self.origin2)
         origin_id2 = self.storage.origin_add_one(self.origin)
 
         origin_visit1 = self.storage.origin_visit_add(
             origin_id,
             ts=self.date_visit2)
 
         origin_visit2 = self.storage.origin_visit_add(
             origin_id,
             ts=self.date_visit3)
 
         origin_visit3 = self.storage.origin_visit_add(
             origin_id2,
             ts=self.date_visit3)
 
         # when
         visit1_metadata = {
             'contents': 42,
             'directories': 22,
         }
         self.storage.origin_visit_update(
             origin_id, origin_visit1['visit'], status='full',
             metadata=visit1_metadata)
         self.storage.origin_visit_update(origin_id2, origin_visit3['visit'],
                                          status='partial')
 
         # then
         actual_origin_visits = list(self.storage.origin_visit_get(origin_id))
         self.assertEquals(actual_origin_visits, [{
             'origin': origin_visit2['origin'],
             'date': self.date_visit2,
             'visit': origin_visit1['visit'],
             'status': 'full',
             'metadata': visit1_metadata,
             'snapshot': None,
         }, {
             'origin': origin_visit2['origin'],
             'date': self.date_visit3,
             'visit': origin_visit2['visit'],
             'status': 'ongoing',
             'metadata': None,
             'snapshot': None,
         }])
 
         actual_origin_visits_bis = list(self.storage.origin_visit_get(
             origin_id, limit=1))
         self.assertEquals(actual_origin_visits_bis,
                           [{
                               'origin': origin_visit2['origin'],
                               'date': self.date_visit2,
                               'visit': origin_visit1['visit'],
                               'status': 'full',
                               'metadata': visit1_metadata,
                               'snapshot': None,
                           }])
 
         actual_origin_visits_ter = list(self.storage.origin_visit_get(
             origin_id, last_visit=origin_visit1['visit']))
         self.assertEquals(actual_origin_visits_ter,
                           [{
                               'origin': origin_visit2['origin'],
                               'date': self.date_visit3,
                               'visit': origin_visit2['visit'],
                               'status': 'ongoing',
                               'metadata': None,
                               'snapshot': None,
                           }])
 
         actual_origin_visits2 = list(self.storage.origin_visit_get(origin_id2))
         self.assertEquals(actual_origin_visits2,
                           [{
                               'origin': origin_visit3['origin'],
                               'date': self.date_visit3,
                               'visit': origin_visit3['visit'],
                               'status': 'partial',
                               'metadata': None,
                               'snapshot': None,
                           }])
 
     def test_origin_visit_get_by(self):
         origin_id = self.storage.origin_add_one(self.origin2)
         origin_id2 = self.storage.origin_add_one(self.origin)
 
         origin_visit1 = self.storage.origin_visit_add(
             origin_id,
             ts=self.date_visit2)
 
         self.storage.snapshot_add(origin_id, origin_visit1['visit'],
                                   self.snapshot)
 
         # Add some other {origin, visit} entries
         self.storage.origin_visit_add(origin_id, ts=self.date_visit3)
         self.storage.origin_visit_add(origin_id2, ts=self.date_visit3)
 
         # when
         visit1_metadata = {
             'contents': 42,
             'directories': 22,
         }
 
         self.storage.origin_visit_update(
             origin_id, origin_visit1['visit'], status='full',
             metadata=visit1_metadata)
 
         expected_origin_visit = origin_visit1.copy()
         expected_origin_visit.update({
             'origin': origin_id,
             'visit': origin_visit1['visit'],
             'date': self.date_visit2,
             'metadata': visit1_metadata,
             'status': 'full',
             'snapshot': self.snapshot['id'],
         })
 
         # when
         actual_origin_visit1 = self.storage.origin_visit_get_by(
             origin_visit1['origin'], origin_visit1['visit'])
 
         # then
         self.assertEquals(actual_origin_visit1, expected_origin_visit)
 
     def test_origin_visit_get_by_no_result(self):
         # No result
         actual_origin_visit = self.storage.origin_visit_get_by(
             10, 999)
 
         self.assertIsNone(actual_origin_visit)
 
     def test_snapshot_add_get_empty(self):
         origin_id = self.storage.origin_add_one(self.origin)
         origin_visit1 = self.storage.origin_visit_add(origin_id,
                                                       self.date_visit1)
         visit_id = origin_visit1['visit']
 
         self.storage.snapshot_add(origin_id, visit_id, self.empty_snapshot)
 
         by_id = self.storage.snapshot_get(self.empty_snapshot['id'])
         self.assertEqual(by_id, self.empty_snapshot)
 
         by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id)
         self.assertEqual(by_ov, self.empty_snapshot)
 
     def test_snapshot_add_get_complete(self):
         origin_id = self.storage.origin_add_one(self.origin)
         origin_visit1 = self.storage.origin_visit_add(origin_id,
                                                       self.date_visit1)
         visit_id = origin_visit1['visit']
 
         self.storage.snapshot_add(origin_id, visit_id, self.complete_snapshot)
 
         by_id = self.storage.snapshot_get(self.complete_snapshot['id'])
         self.assertEqual(by_id, self.complete_snapshot)
 
         by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id)
         self.assertEqual(by_ov, self.complete_snapshot)
 
     def test_snapshot_add_count_branches(self):
         origin_id = self.storage.origin_add_one(self.origin)
         origin_visit1 = self.storage.origin_visit_add(origin_id,
                                                       self.date_visit1)
         visit_id = origin_visit1['visit']
 
         self.storage.snapshot_add(origin_id, visit_id, self.complete_snapshot)
 
         snp_id = self.complete_snapshot['id']
         snp_size = self.storage.snapshot_count_branches(snp_id)
 
         expected_snp_size = {
             'alias': 1,
             'content': 1,
             'directory': 1,
             'release': 1,
             'revision': 1,
             'snapshot': 1,
             None: 1
         }
 
         self.assertEqual(snp_size, expected_snp_size)
 
     def test_snapshot_add_get_paginated(self):
         origin_id = self.storage.origin_add_one(self.origin)
         origin_visit1 = self.storage.origin_visit_add(origin_id,
                                                       self.date_visit1)
         visit_id = origin_visit1['visit']
 
         self.storage.snapshot_add(origin_id, visit_id, self.complete_snapshot)
 
         snp_id = self.complete_snapshot['id']
+        branches = self.complete_snapshot['branches']
+        branch_names = list(sorted(branches))
 
         snapshot = self.storage.snapshot_get_branches(snp_id,
                                                       branches_from=b'release')
 
-        expected_snapshot = copy.deepcopy(self.complete_snapshot)
-        del expected_snapshot['next_branch']
-        for name in [b'alias', b'content', b'dangling', b'directory']:
-            del expected_snapshot['branches'][name]
+        rel_idx = branch_names.index(b'release')
+        expected_snapshot = {
+            'id': snp_id,
+            'branches': {
+                name: branches[name]
+                for name in branch_names[rel_idx:]
+            },
+            'next_branch': None,
+        }
 
         self.assertEqual(snapshot, expected_snapshot)
 
         snapshot = self.storage.snapshot_get_branches(snp_id,
                                                       branches_count=1)
 
-        expected_snapshot = copy.deepcopy(self.complete_snapshot)
-        del expected_snapshot['next_branch']
-        for name in [b'content', b'dangling', b'directory',
-                     b'release', b'revision', b'snapshot']:
-            del expected_snapshot['branches'][name]
-
+        expected_snapshot = {
+            'id': snp_id,
+            'branches': {
+                 branch_names[0]: branches[branch_names[0]],
+            },
+            'next_branch': b'content',
+        }
         self.assertEqual(snapshot, expected_snapshot)
 
         snapshot = self.storage.snapshot_get_branches(
             snp_id, branches_from=b'directory', branches_count=3)
 
-        expected_snapshot = copy.deepcopy(self.complete_snapshot)
-        del expected_snapshot['next_branch']
-        for name in [b'alias', b'content', b'dangling', b'snapshot']:
-            del expected_snapshot['branches'][name]
+        dir_idx = branch_names.index(b'directory')
+        expected_snapshot = {
+            'id': snp_id,
+            'branches': {
+                name: branches[name]
+                for name in branch_names[dir_idx:dir_idx + 3]
+            },
+            'next_branch': branch_names[dir_idx + 3],
+        }
 
         self.assertEqual(snapshot, expected_snapshot)
 
     def test_snapshot_add_get_filtered(self):
         origin_id = self.storage.origin_add_one(self.origin)
         origin_visit1 = self.storage.origin_visit_add(origin_id,
                                                       self.date_visit1)
         visit_id = origin_visit1['visit']
 
         self.storage.snapshot_add(origin_id, visit_id, self.complete_snapshot)
 
         snp_id = self.complete_snapshot['id']
+        branches = self.complete_snapshot['branches']
 
         snapshot = self.storage.snapshot_get_branches(
             snp_id, target_types=['release', 'revision'])
 
-        expected_snapshot = copy.deepcopy(self.complete_snapshot)
-        del expected_snapshot['next_branch']
-        for name in [b'alias', b'content', b'dangling', b'directory',
-                     b'snapshot']:
-            del expected_snapshot['branches'][name]
+        expected_snapshot = {
+            'id': snp_id,
+            'branches': {
+                name: tgt
+                for name, tgt in branches.items()
+                if tgt and tgt['target_type'] in ['release', 'revision']
+            },
+            'next_branch': None,
+        }
 
         self.assertEqual(snapshot, expected_snapshot)
 
         snapshot = self.storage.snapshot_get_branches(snp_id,
                                                       target_types=['alias'])
 
-        expected_snapshot = copy.deepcopy(self.complete_snapshot)
-        del expected_snapshot['next_branch']
-        for name in [b'content', b'dangling', b'directory', b'release',
-                     b'revision', b'snapshot']:
-            del expected_snapshot['branches'][name]
+        expected_snapshot = {
+            'id': snp_id,
+            'branches': {
+                name: tgt
+                for name, tgt in branches.items()
+                if tgt and tgt['target_type'] == 'alias'
+            },
+            'next_branch': None,
+        }
 
         self.assertEqual(snapshot, expected_snapshot)
 
     def test_snapshot_add_get(self):
         origin_id = self.storage.origin_add_one(self.origin)
         origin_visit1 = self.storage.origin_visit_add(origin_id,
                                                       self.date_visit1)
         visit_id = origin_visit1['visit']
 
         self.storage.snapshot_add(origin_id, visit_id, self.snapshot)
 
         by_id = self.storage.snapshot_get(self.snapshot['id'])
         self.assertEqual(by_id, self.snapshot)
 
         by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id)
         self.assertEqual(by_ov, self.snapshot)
 
         origin_visit_info = self.storage.origin_visit_get_by(origin_id,
                                                              visit_id)
         self.assertEqual(origin_visit_info['snapshot'], self.snapshot['id'])
 
     def test_snapshot_add_twice(self):
         origin_id = self.storage.origin_add_one(self.origin)
         origin_visit1 = self.storage.origin_visit_add(origin_id,
                                                       self.date_visit1)
         visit1_id = origin_visit1['visit']
         self.storage.snapshot_add(origin_id, visit1_id, self.snapshot)
 
         by_ov1 = self.storage.snapshot_get_by_origin_visit(origin_id,
                                                            visit1_id)
         self.assertEqual(by_ov1, self.snapshot)
 
         origin_visit2 = self.storage.origin_visit_add(origin_id,
                                                       self.date_visit2)
         visit2_id = origin_visit2['visit']
 
         self.storage.snapshot_add(origin_id, visit2_id, self.snapshot)
 
         by_ov2 = self.storage.snapshot_get_by_origin_visit(origin_id,
                                                            visit2_id)
         self.assertEqual(by_ov2, self.snapshot)
 
     def test_snapshot_get_nonexistent(self):
         bogus_snapshot_id = b'bogus snapshot id 00'
         bogus_origin_id = 1
         bogus_visit_id = 1
 
         by_id = self.storage.snapshot_get(bogus_snapshot_id)
         self.assertIsNone(by_id)
 
         by_ov = self.storage.snapshot_get_by_origin_visit(bogus_origin_id,
                                                           bogus_visit_id)
         self.assertIsNone(by_ov)
 
     def test_snapshot_get_latest(self):
         origin_id = self.storage.origin_add_one(self.origin)
         origin_visit1 = self.storage.origin_visit_add(origin_id,
                                                       self.date_visit1)
         visit1_id = origin_visit1['visit']
         origin_visit2 = self.storage.origin_visit_add(origin_id,
                                                       self.date_visit2)
         visit2_id = origin_visit2['visit']
 
         # Two visits, both with no snapshot: latest snapshot is None
         self.assertIsNone(self.storage.snapshot_get_latest(origin_id))
 
         # Add snapshot to visit1, latest snapshot = visit 1 snapshot
         self.storage.snapshot_add(origin_id, visit1_id, self.complete_snapshot)
         self.assertEquals(self.complete_snapshot,
                           self.storage.snapshot_get_latest(origin_id))
 
         # Status filter: both visits are status=ongoing, so no snapshot
         # returned
         self.assertIsNone(
             self.storage.snapshot_get_latest(origin_id,
                                              allowed_statuses=['full'])
         )
 
         # Mark the first visit as completed and check status filter again
         self.storage.origin_visit_update(origin_id, visit1_id, status='full')
         self.assertEquals(
             self.complete_snapshot,
             self.storage.snapshot_get_latest(origin_id,
                                              allowed_statuses=['full']),
         )
 
         # Add snapshot to visit2 and check that the new snapshot is returned
         self.storage.snapshot_add(origin_id, visit2_id, self.empty_snapshot)
         self.assertEquals(self.empty_snapshot,
                           self.storage.snapshot_get_latest(origin_id))
 
         # Check that the status filter is still working
         self.assertEquals(
             self.complete_snapshot,
             self.storage.snapshot_get_latest(origin_id,
                                              allowed_statuses=['full']),
         )
 
     def test_stat_counters(self):
         expected_keys = ['content', 'directory', 'directory_entry_dir',
                          'origin', 'person', 'revision']
 
         for key in expected_keys:
             self.cursor.execute('select * from swh_update_counter(%s)', (key,))
         self.conn.commit()
 
         counters = self.storage.stat_counters()
 
         self.assertTrue(set(expected_keys) <= set(counters))
         self.assertIsInstance(counters[expected_keys[0]], int)
 
     def test_content_find_with_present_content(self):
         # 1. with something to find
         cont = self.cont
         self.storage.content_add([cont])
 
         actually_present = self.storage.content_find({'sha1': cont['sha1']})
 
         actually_present.pop('ctime')
         self.assertEqual(actually_present, {
             'sha1': cont['sha1'],
             'sha256': cont['sha256'],
             'sha1_git': cont['sha1_git'],
             'blake2s256': cont['blake2s256'],
             'length': cont['length'],
             'status': 'visible'
         })
 
         # 2. with something to find
         actually_present = self.storage.content_find(
             {'sha1_git': cont['sha1_git']})
 
         actually_present.pop('ctime')
         self.assertEqual(actually_present, {
             'sha1': cont['sha1'],
             'sha256': cont['sha256'],
             'sha1_git': cont['sha1_git'],
             'blake2s256': cont['blake2s256'],
             'length': cont['length'],
             'status': 'visible'
         })
 
         # 3. with something to find
         actually_present = self.storage.content_find(
             {'sha256': cont['sha256']})
 
         actually_present.pop('ctime')
         self.assertEqual(actually_present, {
             'sha1': cont['sha1'],
             'sha256': cont['sha256'],
             'sha1_git': cont['sha1_git'],
             'blake2s256': cont['blake2s256'],
             'length': cont['length'],
             'status': 'visible'
         })
 
         # 4. with something to find
         actually_present = self.storage.content_find({
             'sha1': cont['sha1'],
             'sha1_git': cont['sha1_git'],
             'sha256': cont['sha256'],
             'blake2s256': cont['blake2s256'],
         })
 
         actually_present.pop('ctime')
         self.assertEqual(actually_present, {
             'sha1': cont['sha1'],
             'sha256': cont['sha256'],
             'sha1_git': cont['sha1_git'],
             'blake2s256': cont['blake2s256'],
             'length': cont['length'],
             'status': 'visible'
         })
 
     def test_content_find_with_non_present_content(self):
         # 1. with something that does not exist
         missing_cont = self.missing_cont
 
         actually_present = self.storage.content_find(
             {'sha1': missing_cont['sha1']})
 
         self.assertIsNone(actually_present)
 
         # 2. with something that does not exist
         actually_present = self.storage.content_find(
             {'sha1_git': missing_cont['sha1_git']})
 
         self.assertIsNone(actually_present)
 
         # 3. with something that does not exist
         actually_present = self.storage.content_find(
             {'sha256': missing_cont['sha256']})
 
         self.assertIsNone(actually_present)
 
     def test_content_find_bad_input(self):
         # 1. with bad input
         with self.assertRaises(ValueError):
             self.storage.content_find({})  # empty is bad
 
         # 2. with bad input
         with self.assertRaises(ValueError):
             self.storage.content_find(
                 {'unknown-sha1': 'something'})  # not the right key
 
     def test_object_find_by_sha1_git(self):
         sha1_gits = [b'00000000000000000000']
         expected = {
             b'00000000000000000000': [],
         }
 
         self.storage.content_add([self.cont])
         sha1_gits.append(self.cont['sha1_git'])
         expected[self.cont['sha1_git']] = [{
             'sha1_git': self.cont['sha1_git'],
             'type': 'content',
             'id': self.cont['sha1'],
         }]
 
         self.storage.directory_add([self.dir])
         sha1_gits.append(self.dir['id'])
         expected[self.dir['id']] = [{
             'sha1_git': self.dir['id'],
             'type': 'directory',
             'id': self.dir['id'],
         }]
 
         self.storage.revision_add([self.revision])
         sha1_gits.append(self.revision['id'])
         expected[self.revision['id']] = [{
             'sha1_git': self.revision['id'],
             'type': 'revision',
             'id': self.revision['id'],
         }]
 
         self.storage.release_add([self.release])
         sha1_gits.append(self.release['id'])
         expected[self.release['id']] = [{
             'sha1_git': self.release['id'],
             'type': 'release',
             'id': self.release['id'],
         }]
 
         ret = self.storage.object_find_by_sha1_git(sha1_gits)
         for val in ret.values():
             for obj in val:
                 del obj['object_id']
 
         self.assertEqual(expected, ret)
 
     def test_tool_add(self):
         tool = {
             'name': 'some-unknown-tool',
             'version': 'some-version',
             'configuration': {"debian-package": "some-package"},
         }
 
         actual_tool = self.storage.tool_get(tool)
         self.assertIsNone(actual_tool)  # does not exist
 
         # add it
         actual_tools = list(self.storage.tool_add([tool]))
 
         self.assertEquals(len(actual_tools), 1)
         actual_tool = actual_tools[0]
         self.assertIsNotNone(actual_tool)  # now it exists
         new_id = actual_tool.pop('id')
         self.assertEquals(actual_tool, tool)
 
         actual_tools2 = list(self.storage.tool_add([tool]))
         actual_tool2 = actual_tools2[0]
         self.assertIsNotNone(actual_tool2)  # now it exists
         new_id2 = actual_tool2.pop('id')
 
         self.assertEqual(new_id, new_id2)
         self.assertEqual(actual_tool, actual_tool2)
 
     def test_tool_add_multiple(self):
         tool = {
             'name': 'some-unknown-tool',
             'version': 'some-version',
             'configuration': {"debian-package": "some-package"},
         }
 
         actual_tools = list(self.storage.tool_add([tool]))
         self.assertEqual(len(actual_tools), 1)
 
         new_tools = [tool, {
             'name': 'yet-another-tool',
             'version': 'version',
             'configuration': {},
         }]
 
         actual_tools = list(self.storage.tool_add(new_tools))
         self.assertEqual(len(actual_tools), 2)
 
         # order not guaranteed, so we iterate over results to check
         for tool in actual_tools:
             _id = tool.pop('id')
             self.assertIsNotNone(_id)
             self.assertIn(tool, new_tools)
 
     def test_tool_get_missing(self):
         tool = {
             'name': 'unknown-tool',
             'version': '3.1.0rc2-31-ga2cbb8c',
             'configuration': {"command_line": "nomossa <filepath>"},
         }
 
         actual_tool = self.storage.tool_get(tool)
 
         self.assertIsNone(actual_tool)
 
     def test_tool_metadata_get_missing_context(self):
         tool = {
             'name': 'swh-metadata-translator',
             'version': '0.0.1',
             'configuration': {"context": "unknown-context"},
         }
 
         actual_tool = self.storage.tool_get(tool)
 
         self.assertIsNone(actual_tool)
 
     def test_tool_metadata_get(self):
         tool = {
             'name': 'swh-metadata-translator',
             'version': '0.0.1',
             'configuration': {"type": "local", "context": "npm"},
         }
 
         tools = list(self.storage.tool_add([tool]))
         expected_tool = tools[0]
 
         # when
         actual_tool = self.storage.tool_get(tool)
 
         # then
         self.assertEqual(expected_tool, actual_tool)
 
     def test_metadata_provider_get_by(self):
         # given
         no_provider = self.storage.metadata_provider_get_by({
             'provider_name': self.provider['name'],
             'provider_url': self.provider['url']
         })
         self.assertIsNone(no_provider)
         # when
         provider_id = self.storage.metadata_provider_add(
             self.provider['name'],
             self.provider['type'],
             self.provider['url'],
             self.provider['metadata'])
 
         actual_provider = self.storage.metadata_provider_get_by({
             'provider_name': self.provider['name'],
             'provider_url': self.provider['url']
         })
         # then
         self.assertTrue(provider_id, actual_provider['id'])
 
     def test_origin_metadata_add(self):
         # given
         origin_id = self.storage.origin_add([self.origin])[0]['id']
         origin_metadata0 = list(self.storage.origin_metadata_get_by(origin_id))
         self.assertTrue(len(origin_metadata0) == 0)
 
         tools = list(self.storage.tool_add([self.metadata_tool]))
         tool = tools[0]
 
         self.storage.metadata_provider_add(
                            self.provider['name'],
                            self.provider['type'],
                            self.provider['url'],
                            self.provider['metadata'])
         provider = self.storage.metadata_provider_get_by({
                             'provider_name': self.provider['name'],
                             'provider_url': self.provider['url']
                       })
         tool = self.storage.tool_get(self.metadata_tool)
 
         # when adding for the same origin 2 metadatas
         o_m1 = self.storage.origin_metadata_add(
                     origin_id,
                     self.origin_metadata['discovery_date'],
                     provider['id'],
                     tool['id'],
                     self.origin_metadata['metadata'])
         actual_om1 = list(self.storage.origin_metadata_get_by(origin_id))
         # then
         self.assertEqual(actual_om1[0]['id'], o_m1)
         self.assertEqual(len(actual_om1), 1)
         self.assertEqual(actual_om1[0]['origin_id'], origin_id)
 
     def test_origin_metadata_get(self):
         # given
         origin_id = self.storage.origin_add([self.origin])[0]['id']
         origin_id2 = self.storage.origin_add([self.origin2])[0]['id']
 
         self.storage.metadata_provider_add(self.provider['name'],
                                            self.provider['type'],
                                            self.provider['url'],
                                            self.provider['metadata'])
         provider = self.storage.metadata_provider_get_by({
                             'provider_name': self.provider['name'],
                             'provider_url': self.provider['url']
                    })
         tool = self.storage.tool_get(self.metadata_tool)
         # when adding for the same origin 2 metadatas
         o_m1 = self.storage.origin_metadata_add(
                     origin_id,
                     self.origin_metadata['discovery_date'],
                     provider['id'],
                     tool['id'],
                     self.origin_metadata['metadata'])
         o_m2 = self.storage.origin_metadata_add(
                     origin_id2,
                     self.origin_metadata2['discovery_date'],
                     provider['id'],
                     tool['id'],
                     self.origin_metadata2['metadata'])
         o_m3 = self.storage.origin_metadata_add(
                     origin_id,
                     self.origin_metadata2['discovery_date'],
                     provider['id'],
                     tool['id'],
                     self.origin_metadata2['metadata'])
         all_metadatas = list(self.storage.origin_metadata_get_by(origin_id))
         metadatas_for_origin2 = list(self.storage.origin_metadata_get_by(
                                           origin_id2))
         expected_results = [{
             'origin_id': origin_id,
             'discovery_date': datetime.datetime(
                                 2017, 1, 2, 0, 0,
                                 tzinfo=psycopg2.tz.FixedOffsetTimezone(
                                     offset=60,
                                     name=None)),
             'metadata': {
                 'name': 'test_origin_metadata',
                 'version': '0.0.1'
             },
             'id': o_m3,
             'provider_id': provider['id'],
             'provider_name': 'hal',
             'provider_type': 'deposit-client',
             'provider_url': 'http:///hal/inria',
             'tool_id': tool['id']
         }, {
             'origin_id': origin_id,
             'discovery_date': datetime.datetime(
                                 2015, 1, 2, 0, 0,
                                 tzinfo=psycopg2.tz.FixedOffsetTimezone(
                                     offset=60,
                                     name=None)),
             'metadata': {
                 'name': 'test_origin_metadata',
                 'version': '0.0.1'
             },
             'id': o_m1,
             'provider_id': provider['id'],
             'provider_name': 'hal',
             'provider_type': 'deposit-client',
             'provider_url': 'http:///hal/inria',
             'tool_id': tool['id']
         }]
 
         # then
         self.assertEqual(len(all_metadatas), 2)
         self.assertEqual(len(metadatas_for_origin2), 1)
         self.assertEqual(metadatas_for_origin2[0]['id'], o_m2)
         self.assertEqual(all_metadatas, expected_results)
 
     def test_origin_metadata_get_by_provider_type(self):
         # given
         origin_id = self.storage.origin_add([self.origin])[0]['id']
         origin_id2 = self.storage.origin_add([self.origin2])[0]['id']
         self.storage.metadata_provider_add(
                            self.provider['name'],
                            self.provider['type'],
                            self.provider['url'],
                            self.provider['metadata'])
         provider1 = self.storage.metadata_provider_get_by({
                             'provider_name': self.provider['name'],
                             'provider_url': self.provider['url']
                    })
 
         self.storage.metadata_provider_add(
                             'swMATH',
                             'registry',
                             'http://www.swmath.org/',
                             {'email': 'contact@swmath.org',
                              'license': 'All rights reserved'})
         provider2 = self.storage.metadata_provider_get_by({
                             'provider_name': 'swMATH',
                             'provider_url': 'http://www.swmath.org/'
                    })
 
         # using the only tool now inserted in the data.sql, but for this
         # provider should be a crawler tool (not yet implemented)
         tool = self.storage.tool_get(self.metadata_tool)
 
         # when adding for the same origin 2 metadatas
         o_m1 = self.storage.origin_metadata_add(
                     origin_id,
                     self.origin_metadata['discovery_date'],
                     provider1['id'],
                     tool['id'],
                     self.origin_metadata['metadata'])
         o_m2 = self.storage.origin_metadata_add(
                     origin_id2,
                     self.origin_metadata2['discovery_date'],
                     provider2['id'],
                     tool['id'],
                     self.origin_metadata2['metadata'])
         provider_type = 'registry'
         m_by_provider = list(self.storage.
                              origin_metadata_get_by(
                                 origin_id2,
                                 provider_type))
         expected_results = [{
             'origin_id': origin_id2,
             'discovery_date': datetime.datetime(
                                 2017, 1, 2, 0, 0,
                                 tzinfo=psycopg2.tz.FixedOffsetTimezone(
                                     offset=60,
                                     name=None)),
             'metadata': {
                 'name': 'test_origin_metadata',
                 'version': '0.0.1'
             },
             'id': o_m2,
             'provider_id': provider2['id'],
             'provider_name': 'swMATH',
             'provider_type': provider_type,
             'provider_url': 'http://www.swmath.org/',
             'tool_id': tool['id']
         }]
         # then
 
         self.assertEqual(len(m_by_provider), 1)
         self.assertEqual(m_by_provider, expected_results)
         self.assertEqual(m_by_provider[0]['id'], o_m2)
         self.assertIsNotNone(o_m1)
 
 
 class TestLocalStorage(CommonTestStorage, unittest.TestCase):
     """Test the local storage"""
 
     # Can only be tested with local storage as you can't mock
     # datetimes for the remote server
     def test_fetch_history(self):
         origin = self.storage.origin_add_one(self.origin)
         with patch('datetime.datetime'):
             datetime.datetime.now.return_value = self.fetch_history_date
             fetch_history_id = self.storage.fetch_history_start(origin)
             datetime.datetime.now.assert_called_with(tz=datetime.timezone.utc)
 
         with patch('datetime.datetime'):
             datetime.datetime.now.return_value = self.fetch_history_end
             self.storage.fetch_history_end(fetch_history_id,
                                            self.fetch_history_data)
 
         fetch_history = self.storage.fetch_history_get(fetch_history_id)
         expected_fetch_history = self.fetch_history_data.copy()
 
         expected_fetch_history['id'] = fetch_history_id
         expected_fetch_history['origin'] = origin
         expected_fetch_history['date'] = self.fetch_history_date
         expected_fetch_history['duration'] = self.fetch_history_duration
 
         self.assertEqual(expected_fetch_history, fetch_history)
 
     # The remote API doesn't expose _person_add
     def test_person_get(self):
         # given
         person0 = {
             'fullname': b'bob <alice@bob>',
             'name': b'bob',
             'email': b'alice@bob',
         }
         id0 = self.storage._person_add(person0)
 
         person1 = {
             'fullname': b'tony <tony@bob>',
             'name': b'tony',
             'email': b'tony@bob',
         }
         id1 = self.storage._person_add(person1)
 
         # when
         actual_persons = self.storage.person_get([id0, id1])
 
         # given (person injection through release for example)
         self.assertEqual(
             list(actual_persons), [
                 {
                     'id': id0,
                     'fullname': person0['fullname'],
                     'name': person0['name'],
                     'email': person0['email'],
                 },
                 {
                     'id': id1,
                     'fullname': person1['fullname'],
                     'name': person1['name'],
                     'email': person1['email'],
                 },
             ])
 
     # This test is only relevant on the local storage, with an actual
     # objstorage raising an exception
     def test_content_add_objstorage_exception(self):
         self.storage.objstorage.add = Mock(
             side_effect=Exception('mocked broken objstorage')
         )
 
         with self.assertRaises(Exception) as e:
             self.storage.content_add([self.cont])
 
         self.assertEqual(e.exception.args, ('mocked broken objstorage',))
         missing = list(self.storage.content_missing([self.cont]))
         self.assertEqual(missing, [self.cont['sha1']])
 
 
 class AlteringSchemaTest(BaseTestStorage, unittest.TestCase):
     """This class is dedicated for the rare case where the schema needs to
        be altered dynamically.
 
        Otherwise, the tests could be blocking when ran altogether.
 
     """
     def test_content_update(self):
         cont = copy.deepcopy(self.cont)
 
         self.storage.content_add([cont])
         # alter the sha1_git for example
         cont['sha1_git'] = hash_to_bytes(
             '3a60a5275d0333bf13468e8b3dcab90f4046e654')
 
         self.storage.content_update([cont], keys=['sha1_git'])
 
         with self.storage.get_db().transaction() as cur:
             cur.execute('SELECT sha1, sha1_git, sha256, length, status'
                         ' FROM content WHERE sha1 = %s',
                         (cont['sha1'],))
             datum = cur.fetchone()
 
         self.assertEqual(
             (datum[0].tobytes(), datum[1].tobytes(), datum[2].tobytes(),
              datum[3], datum[4]),
             (cont['sha1'], cont['sha1_git'], cont['sha256'],
              cont['length'], 'visible'))
 
     def test_content_update_with_new_cols(self):
         with self.storage.get_db().transaction() as cur:
             cur.execute("""alter table content
                            add column test text default null,
                            add column test2 text default null""")
 
         cont = copy.deepcopy(self.cont2)
         self.storage.content_add([cont])
         cont['test'] = 'value-1'
         cont['test2'] = 'value-2'
 
         self.storage.content_update([cont], keys=['test', 'test2'])
         with self.storage.get_db().transaction() as cur:
             cur.execute(
                 'SELECT sha1, sha1_git, sha256, length, status, test, test2'
                 ' FROM content WHERE sha1 = %s',
                 (cont['sha1'],))
 
             datum = cur.fetchone()
 
         self.assertEqual(
             (datum[0].tobytes(), datum[1].tobytes(), datum[2].tobytes(),
              datum[3], datum[4], datum[5], datum[6]),
             (cont['sha1'], cont['sha1_git'], cont['sha256'],
              cont['length'], 'visible', cont['test'], cont['test2']))
 
         with self.storage.get_db().transaction() as cur:
             cur.execute("""alter table content drop column test,
                            drop column test2""")
diff --git a/version.txt b/version.txt
index 969c2f5a9..1cea4f661 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-v0.0.107-0-g2d5efac
\ No newline at end of file
+v0.0.108-0-gd211615
\ No newline at end of file