diff --git a/PKG-INFO b/PKG-INFO
index 1b7d378..abd445b 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,93 +1,93 @@
 Metadata-Version: 2.1
 Name: swh.core
-Version: 0.4.0
+Version: 0.5.0
 Summary: Software Heritage core utilities
 Home-page: https://forge.softwareheritage.org/diffusion/DCORE/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-core
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-core/
 Description: swh-core
         ========
         
         core library for swh's modules:
         - config parser
         - hash computations
         - serialization
         - logging mechanism
         - database connection
         - http-based RPC client/server
         
         Development
         -----------
         
         We strongly recommend you to use a [virtualenv][1] if you want to run tests or
         hack the code.
         
         To set up your development environment:
         
         ```
         (swh) user@host:~/swh-environment/swh-core$ pip install -e .[testing]
         ```
         
         This will install every Python package needed to run this package's tests.
         
         Unit tests can be executed using [pytest][2] or [tox][3].
         
         ```
         (swh) user@host:~/swh-environment/swh-core$ pytest
         ============================== test session starts ==============================
         platform linux -- Python 3.7.3, pytest-3.10.1, py-1.8.0, pluggy-0.12.0
         hypothesis profile 'default' -> database=DirectoryBasedExampleDatabase('/home/ddouard/src/swh-environment/swh-core/.hypothesis/examples')
         rootdir: /home/ddouard/src/swh-environment/swh-core, inifile: pytest.ini
         plugins: requests-mock-1.6.0, hypothesis-4.26.4, celery-4.3.0, postgresql-1.4.1
         collected 89 items
         
         swh/core/api/tests/test_api.py ..                                         [  2%]
         swh/core/api/tests/test_async.py ....                                     [  6%]
         swh/core/api/tests/test_serializers.py .....                              [ 12%]
         swh/core/db/tests/test_db.py ....                                         [ 16%]
         swh/core/tests/test_cli.py ......                                         [ 23%]
         swh/core/tests/test_config.py ..............                              [ 39%]
         swh/core/tests/test_statsd.py ........................................... [ 87%]
         ....                                                                      [ 92%]
         swh/core/tests/test_utils.py .......                                      [100%]
         ===================== 89 passed, 9 warnings in 6.94 seconds =====================
         ```
         
         Note: this git repository uses [pre-commit][4] hooks to ensure better and more
         consistent code. It should already be installed in your virtualenv (if not,
         just type `pip install pre-commit`). Make sure to activate it in your local
         copy of the git repository:
         
         ```
         (swh) user@host:~/swh-environment/swh-core$ pre-commit install
         pre-commit installed at .git/hooks/pre-commit
         ```
         
         Please read the [developer setup manual][5] for more information on how to hack
         on Software Heritage.
         
         [1]: https://virtualenv.pypa.io
         [2]: https://docs.pytest.org
         [3]: https://tox.readthedocs.io
         [4]: https://pre-commit.com
         [5]: https://docs.softwareheritage.org/devel/developer-setup.html
         
 Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: testing-core
 Provides-Extra: logging
 Provides-Extra: db
 Provides-Extra: testing-db
 Provides-Extra: http
 Provides-Extra: testing
diff --git a/mypy.ini b/mypy.ini
index 5d30edb..696abf0 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -1,45 +1,48 @@
 [mypy]
 namespace_packages = True
 warn_unused_ignores = True
 
 
 # 3rd party libraries without stubs (yet)
 
 [mypy-aiohttp_utils.*]
 ignore_missing_imports = True
 
 [mypy-arrow.*]
 ignore_missing_imports = True
 
 [mypy-celery.*]
 ignore_missing_imports = True
 
 [mypy-decorator.*]
 ignore_missing_imports = True
 
 [mypy-deprecated.*]
 ignore_missing_imports = True
 
 [mypy-django.*]  # false positive, only used my hypotesis' extras
 ignore_missing_imports = True
 
 [mypy-iso8601.*]
 ignore_missing_imports = True
 
 [mypy-msgpack.*]
 ignore_missing_imports = True
 
 [mypy-pkg_resources.*]
 ignore_missing_imports = True
 
 [mypy-psycopg2.*]
 ignore_missing_imports = True
 
 [mypy-pytest.*]
 ignore_missing_imports = True
 
+[mypy-pytest_postgresql.*]
+ignore_missing_imports = True
+
 [mypy-requests_mock.*]
 ignore_missing_imports = True
 
 [mypy-systemd.*]
 ignore_missing_imports = True
diff --git a/requirements-db.txt b/requirements-db.txt
index 921e04d..d0f0975 100644
--- a/requirements-db.txt
+++ b/requirements-db.txt
@@ -1,3 +1,4 @@
 # requirements for swh.core.db
 psycopg2
 typing-extensions
+pytest-postgresql
diff --git a/requirements-test-db.txt b/requirements-test-db.txt
index cfd42eb..8b13789 100644
--- a/requirements-test-db.txt
+++ b/requirements-test-db.txt
@@ -1 +1 @@
-pytest-postgresql
+
diff --git a/swh.core.egg-info/PKG-INFO b/swh.core.egg-info/PKG-INFO
index 1b7d378..abd445b 100644
--- a/swh.core.egg-info/PKG-INFO
+++ b/swh.core.egg-info/PKG-INFO
@@ -1,93 +1,93 @@
 Metadata-Version: 2.1
 Name: swh.core
-Version: 0.4.0
+Version: 0.5.0
 Summary: Software Heritage core utilities
 Home-page: https://forge.softwareheritage.org/diffusion/DCORE/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-core
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-core/
 Description: swh-core
         ========
         
         core library for swh's modules:
         - config parser
         - hash computations
         - serialization
         - logging mechanism
         - database connection
         - http-based RPC client/server
         
         Development
         -----------
         
         We strongly recommend you to use a [virtualenv][1] if you want to run tests or
         hack the code.
         
         To set up your development environment:
         
         ```
         (swh) user@host:~/swh-environment/swh-core$ pip install -e .[testing]
         ```
         
         This will install every Python package needed to run this package's tests.
         
         Unit tests can be executed using [pytest][2] or [tox][3].
         
         ```
         (swh) user@host:~/swh-environment/swh-core$ pytest
         ============================== test session starts ==============================
         platform linux -- Python 3.7.3, pytest-3.10.1, py-1.8.0, pluggy-0.12.0
         hypothesis profile 'default' -> database=DirectoryBasedExampleDatabase('/home/ddouard/src/swh-environment/swh-core/.hypothesis/examples')
         rootdir: /home/ddouard/src/swh-environment/swh-core, inifile: pytest.ini
         plugins: requests-mock-1.6.0, hypothesis-4.26.4, celery-4.3.0, postgresql-1.4.1
         collected 89 items
         
         swh/core/api/tests/test_api.py ..                                         [  2%]
         swh/core/api/tests/test_async.py ....                                     [  6%]
         swh/core/api/tests/test_serializers.py .....                              [ 12%]
         swh/core/db/tests/test_db.py ....                                         [ 16%]
         swh/core/tests/test_cli.py ......                                         [ 23%]
         swh/core/tests/test_config.py ..............                              [ 39%]
         swh/core/tests/test_statsd.py ........................................... [ 87%]
         ....                                                                      [ 92%]
         swh/core/tests/test_utils.py .......                                      [100%]
         ===================== 89 passed, 9 warnings in 6.94 seconds =====================
         ```
         
         Note: this git repository uses [pre-commit][4] hooks to ensure better and more
         consistent code. It should already be installed in your virtualenv (if not,
         just type `pip install pre-commit`). Make sure to activate it in your local
         copy of the git repository:
         
         ```
         (swh) user@host:~/swh-environment/swh-core$ pre-commit install
         pre-commit installed at .git/hooks/pre-commit
         ```
         
         Please read the [developer setup manual][5] for more information on how to hack
         on Software Heritage.
         
         [1]: https://virtualenv.pypa.io
         [2]: https://docs.pytest.org
         [3]: https://tox.readthedocs.io
         [4]: https://pre-commit.com
         [5]: https://docs.softwareheritage.org/devel/developer-setup.html
         
 Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: testing-core
 Provides-Extra: logging
 Provides-Extra: db
 Provides-Extra: testing-db
 Provides-Extra: http
 Provides-Extra: testing
diff --git a/swh.core.egg-info/SOURCES.txt b/swh.core.egg-info/SOURCES.txt
index 4a3e3a0..200cc64 100644
--- a/swh.core.egg-info/SOURCES.txt
+++ b/swh.core.egg-info/SOURCES.txt
@@ -1,102 +1,106 @@
 .gitignore
 .pre-commit-config.yaml
 AUTHORS
 CODE_OF_CONDUCT.md
 CONTRIBUTORS
 LICENSE
 MANIFEST.in
 Makefile
 Makefile.local
 README.md
 conftest.py
 mypy.ini
 pyproject.toml
 pytest.ini
 requirements-db.txt
 requirements-http.txt
 requirements-logging.txt
 requirements-swh.txt
 requirements-test-db.txt
 requirements-test.txt
 requirements.txt
 setup.cfg
 setup.py
 tox.ini
 docs/.gitignore
 docs/Makefile
 docs/cli.rst
 docs/conf.py
 docs/index.rst
 docs/_static/.placeholder
 docs/_templates/.placeholder
 swh/__init__.py
 swh.core.egg-info/PKG-INFO
 swh.core.egg-info/SOURCES.txt
 swh.core.egg-info/dependency_links.txt
 swh.core.egg-info/entry_points.txt
 swh.core.egg-info/requires.txt
 swh.core.egg-info/top_level.txt
 swh/core/__init__.py
 swh/core/api_async.py
 swh/core/collections.py
 swh/core/config.py
 swh/core/logger.py
 swh/core/py.typed
 swh/core/pytest_plugin.py
 swh/core/sentry.py
 swh/core/statsd.py
 swh/core/tarball.py
 swh/core/utils.py
 swh/core/api/__init__.py
 swh/core/api/asynchronous.py
 swh/core/api/classes.py
 swh/core/api/gunicorn_config.py
 swh/core/api/negotiation.py
 swh/core/api/serializers.py
 swh/core/api/tests/__init__.py
 swh/core/api/tests/server_testing.py
 swh/core/api/tests/test_async.py
 swh/core/api/tests/test_classes.py
 swh/core/api/tests/test_gunicorn.py
 swh/core/api/tests/test_rpc_client.py
 swh/core/api/tests/test_rpc_client_server.py
 swh/core/api/tests/test_rpc_server.py
 swh/core/api/tests/test_serializers.py
 swh/core/cli/__init__.py
 swh/core/cli/db.py
 swh/core/db/__init__.py
 swh/core/db/common.py
 swh/core/db/db_utils.py
+swh/core/db/pytest_plugin.py
 swh/core/db/tests/__init__.py
 swh/core/db/tests/conftest.py
 swh/core/db/tests/db_testing.py
 swh/core/db/tests/test_cli.py
 swh/core/db/tests/test_db.py
+swh/core/db/tests/test_db_utils.py
+swh/core/db/tests/data/0-schema.sql
+swh/core/db/tests/data/1-data.sql
 swh/core/sql/log-schema.sql
 swh/core/tests/__init__.py
 swh/core/tests/test_cli.py
 swh/core/tests/test_collections.py
 swh/core/tests/test_config.py
 swh/core/tests/test_logger.py
 swh/core/tests/test_pytest_plugin.py
 swh/core/tests/test_statsd.py
 swh/core/tests/test_tarball.py
 swh/core/tests/test_utils.py
 swh/core/tests/data/archives/groff-1.02.tar.Z
 swh/core/tests/data/archives/hello.tar
 swh/core/tests/data/archives/hello.tar.bz2
 swh/core/tests/data/archives/hello.tar.gz
 swh/core/tests/data/archives/hello.tar.lz
 swh/core/tests/data/archives/hello.tar.x
 swh/core/tests/data/archives/hello.zip
 swh/core/tests/data/http_example.com/something.json
 swh/core/tests/data/https_example.com/file.json
 swh/core/tests/data/https_example.com/file.json,name=doe,firstname=jane
 swh/core/tests/data/https_example.com/file.json_visit1
 swh/core/tests/data/https_example.com/other.json
 swh/core/tests/data/https_forge.s.o/api_diffusion,attachments[uris]=1
 swh/core/tests/data/https_www.reference.com/web,q=What+Is+an+Example+of+a+URL?,qo=contentPageRelatedSearch,o=600605,l=dir,sga=1
 swh/core/tests/fixture/__init__.py
 swh/core/tests/fixture/conftest.py
 swh/core/tests/fixture/test_pytest_plugin.py
 swh/core/tests/fixture/data/https_example.com/file.json
\ No newline at end of file
diff --git a/swh.core.egg-info/requires.txt b/swh.core.egg-info/requires.txt
index 2ad0f15..d0b3e47 100644
--- a/swh.core.egg-info/requires.txt
+++ b/swh.core.egg-info/requires.txt
@@ -1,54 +1,54 @@
 Click
 Deprecated
 PyYAML
 sentry-sdk
 
 [db]
 psycopg2
 typing-extensions
+pytest-postgresql
 
 [http]
 aiohttp
 aiohttp_utils>=3.1.1
 arrow
 decorator
 Flask
 iso8601
 msgpack>0.5
 requests
 blinker
 
 [logging]
 systemd-python
 
 [testing]
 pytest
 pytest-mock
 requests-mock
 hypothesis>=3.11.0
 pre-commit
 pytz
-pytest-postgresql
 psycopg2
 typing-extensions
+pytest-postgresql
 aiohttp
 aiohttp_utils>=3.1.1
 arrow
 decorator
 Flask
 iso8601
 msgpack>0.5
 requests
 blinker
 systemd-python
 
 [testing-core]
 pytest
 pytest-mock
 requests-mock
 hypothesis>=3.11.0
 pre-commit
 pytz
 
 [testing-db]
-pytest-postgresql
diff --git a/swh/core/api/tests/test_async.py b/swh/core/api/tests/test_async.py
index 4b25aa3..8923222 100644
--- a/swh/core/api/tests/test_async.py
+++ b/swh/core/api/tests/test_async.py
@@ -1,243 +1,233 @@
 # Copyright (C) 2019-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import datetime
 import json
 
 import msgpack
 import pytest
 
 from swh.core.api.asynchronous import (
     Response,
     RPCServerApp,
     decode_request,
     encode_msgpack,
 )
 from swh.core.api.serializers import SWHJSONEncoder, msgpack_dumps
 
 pytest_plugins = ["aiohttp.pytest_plugin", "pytester"]
 
 
 class TestServerException(Exception):
     pass
 
 
 class TestClientError(Exception):
     pass
 
 
 async def root(request):
     return Response("toor")
 
 
 STRUCT = {
     "txt": "something stupid",
     # 'date': datetime.date(2019, 6, 9),  # not supported
     "datetime": datetime.datetime(2019, 6, 9, 10, 12, tzinfo=datetime.timezone.utc),
     "timedelta": datetime.timedelta(days=-2, hours=3),
     "int": 42,
     "float": 3.14,
     "subdata": {
         "int": 42,
         "datetime": datetime.datetime(
             2019, 6, 10, 11, 12, tzinfo=datetime.timezone.utc
         ),
     },
     "list": [
         42,
         datetime.datetime(2019, 9, 10, 11, 12, tzinfo=datetime.timezone.utc),
         "ok",
     ],
 }
 
 
 async def struct(request):
     return Response(STRUCT)
 
 
 async def echo(request):
     data = await decode_request(request)
     return Response(data)
 
 
 async def server_exception(request):
     raise TestServerException()
 
 
 async def client_error(request):
     raise TestClientError()
 
 
 async def echo_no_nego(request):
     # let the content negotiation handle the serialization for us...
     data = await decode_request(request)
     ret = encode_msgpack(data)
     return ret
 
 
 def check_mimetype(src, dst):
     src = src.split(";")[0].strip()
     dst = dst.split(";")[0].strip()
     assert src == dst
 
 
 @pytest.fixture
 def async_app():
     app = RPCServerApp()
     app.client_exception_classes = (TestClientError,)
     app.router.add_route("GET", "/", root)
     app.router.add_route("GET", "/struct", struct)
     app.router.add_route("POST", "/echo", echo)
     app.router.add_route("GET", "/server_exception", server_exception)
     app.router.add_route("GET", "/client_error", client_error)
     app.router.add_route("POST", "/echo-no-nego", echo_no_nego)
     return app
 
 
-async def test_get_simple(async_app, aiohttp_client) -> None:
-    assert async_app is not None
+@pytest.fixture
+def cli(async_app, aiohttp_client, loop):
+    return loop.run_until_complete(aiohttp_client(async_app))
+
 
-    cli = await aiohttp_client(async_app)
+async def test_get_simple(cli) -> None:
     resp = await cli.get("/")
     assert resp.status == 200
     check_mimetype(resp.headers["Content-Type"], "application/x-msgpack")
     data = await resp.read()
     value = msgpack.unpackb(data, raw=False)
     assert value == "toor"
 
 
-async def test_get_server_exception(async_app, aiohttp_client) -> None:
-    cli = await aiohttp_client(async_app)
+async def test_get_server_exception(cli) -> None:
     resp = await cli.get("/server_exception")
     assert resp.status == 500
     data = await resp.read()
     data = msgpack.unpackb(data, raw=False)
     assert data["exception"]["type"] == "TestServerException"
 
 
-async def test_get_client_error(async_app, aiohttp_client) -> None:
-    cli = await aiohttp_client(async_app)
+async def test_get_client_error(cli) -> None:
     resp = await cli.get("/client_error")
     assert resp.status == 400
     data = await resp.read()
     data = msgpack.unpackb(data, raw=False)
     assert data["exception"]["type"] == "TestClientError"
 
 
-async def test_get_simple_nego(async_app, aiohttp_client) -> None:
-    cli = await aiohttp_client(async_app)
+async def test_get_simple_nego(cli) -> None:
     for ctype in ("x-msgpack", "json"):
         resp = await cli.get("/", headers={"Accept": "application/%s" % ctype})
         assert resp.status == 200
         check_mimetype(resp.headers["Content-Type"], "application/%s" % ctype)
         assert (await decode_request(resp)) == "toor"
 
 
-async def test_get_struct(async_app, aiohttp_client) -> None:
+async def test_get_struct(cli) -> None:
     """Test returned structured from a simple GET data is OK"""
-    cli = await aiohttp_client(async_app)
     resp = await cli.get("/struct")
     assert resp.status == 200
     check_mimetype(resp.headers["Content-Type"], "application/x-msgpack")
     assert (await decode_request(resp)) == STRUCT
 
 
-async def test_get_struct_nego(async_app, aiohttp_client) -> None:
+async def test_get_struct_nego(cli) -> None:
     """Test returned structured from a simple GET data is OK"""
-    cli = await aiohttp_client(async_app)
     for ctype in ("x-msgpack", "json"):
         resp = await cli.get("/struct", headers={"Accept": "application/%s" % ctype})
         assert resp.status == 200
         check_mimetype(resp.headers["Content-Type"], "application/%s" % ctype)
         assert (await decode_request(resp)) == STRUCT
 
 
-async def test_post_struct_msgpack(async_app, aiohttp_client) -> None:
+async def test_post_struct_msgpack(cli) -> None:
     """Test that msgpack encoded posted struct data is returned as is"""
-    cli = await aiohttp_client(async_app)
     # simple struct
     resp = await cli.post(
         "/echo",
         headers={"Content-Type": "application/x-msgpack"},
         data=msgpack_dumps({"toto": 42}),
     )
     assert resp.status == 200
     check_mimetype(resp.headers["Content-Type"], "application/x-msgpack")
     assert (await decode_request(resp)) == {"toto": 42}
     # complex struct
     resp = await cli.post(
         "/echo",
         headers={"Content-Type": "application/x-msgpack"},
         data=msgpack_dumps(STRUCT),
     )
     assert resp.status == 200
     check_mimetype(resp.headers["Content-Type"], "application/x-msgpack")
     assert (await decode_request(resp)) == STRUCT
 
 
-async def test_post_struct_json(async_app, aiohttp_client) -> None:
+async def test_post_struct_json(cli) -> None:
     """Test that json encoded posted struct data is returned as is"""
-    cli = await aiohttp_client(async_app)
-
     resp = await cli.post(
         "/echo",
         headers={"Content-Type": "application/json"},
         data=json.dumps({"toto": 42}, cls=SWHJSONEncoder),
     )
     assert resp.status == 200
     check_mimetype(resp.headers["Content-Type"], "application/x-msgpack")
     assert (await decode_request(resp)) == {"toto": 42}
 
     resp = await cli.post(
         "/echo",
         headers={"Content-Type": "application/json"},
         data=json.dumps(STRUCT, cls=SWHJSONEncoder),
     )
     assert resp.status == 200
     check_mimetype(resp.headers["Content-Type"], "application/x-msgpack")
     # assert resp.headers['Content-Type'] == 'application/x-msgpack'
     assert (await decode_request(resp)) == STRUCT
 
 
-async def test_post_struct_nego(async_app, aiohttp_client) -> None:
+async def test_post_struct_nego(cli) -> None:
     """Test that json encoded posted struct data is returned as is
 
     using content negotiation (accept json or msgpack).
     """
-    cli = await aiohttp_client(async_app)
-
     for ctype in ("x-msgpack", "json"):
         resp = await cli.post(
             "/echo",
             headers={
                 "Content-Type": "application/json",
                 "Accept": "application/%s" % ctype,
             },
             data=json.dumps(STRUCT, cls=SWHJSONEncoder),
         )
         assert resp.status == 200
         check_mimetype(resp.headers["Content-Type"], "application/%s" % ctype)
         assert (await decode_request(resp)) == STRUCT
 
 
-async def test_post_struct_no_nego(async_app, aiohttp_client) -> None:
+async def test_post_struct_no_nego(cli) -> None:
     """Test that json encoded posted struct data is returned as msgpack
 
     when using non-negotiation-compatible handlers.
     """
-    cli = await aiohttp_client(async_app)
-
     for ctype in ("x-msgpack", "json"):
         resp = await cli.post(
             "/echo-no-nego",
             headers={
                 "Content-Type": "application/json",
                 "Accept": "application/%s" % ctype,
             },
             data=json.dumps(STRUCT, cls=SWHJSONEncoder),
         )
         assert resp.status == 200
         check_mimetype(resp.headers["Content-Type"], "application/x-msgpack")
         assert (await decode_request(resp)) == STRUCT
diff --git a/swh/core/config.py b/swh/core/config.py
index 7490ef5..119eadc 100644
--- a/swh/core/config.py
+++ b/swh/core/config.py
@@ -1,378 +1,308 @@
 # Copyright (C) 2015-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from copy import deepcopy
 from itertools import chain
 import logging
 import os
 from typing import Any, Callable, Dict, List, Optional, Tuple
 
-from deprecated import deprecated
 import yaml
 
 logger = logging.getLogger(__name__)
 
 
 SWH_CONFIG_DIRECTORIES = [
     "~/.config/swh",
     "~/.swh",
     "/etc/softwareheritage",
 ]
 
 SWH_GLOBAL_CONFIG = "global.yml"
 
 SWH_DEFAULT_GLOBAL_CONFIG = {
     "max_content_size": ("int", 100 * 1024 * 1024),
 }
 
 SWH_CONFIG_EXTENSIONS = [
     ".yml",
 ]
 
 # conversion per type
 _map_convert_fn: Dict[str, Callable] = {
     "int": int,
     "bool": lambda x: x.lower() == "true",
     "list[str]": lambda x: [value.strip() for value in x.split(",")],
     "list[int]": lambda x: [int(value.strip()) for value in x.split(",")],
 }
 
 _map_check_fn: Dict[str, Callable] = {
     "int": lambda x: isinstance(x, int),
     "bool": lambda x: isinstance(x, bool),
     "list[str]": lambda x: (isinstance(x, list) and all(isinstance(y, str) for y in x)),
     "list[int]": lambda x: (isinstance(x, list) and all(isinstance(y, int) for y in x)),
 }
 
 
 def exists_accessible(filepath: str) -> bool:
     """Check whether a file exists, and is accessible.
 
     Returns:
         True if the file exists and is accessible
         False if the file does not exist
 
     Raises:
         PermissionError if the file cannot be read.
     """
 
     try:
         os.stat(filepath)
     except PermissionError:
         raise
     except FileNotFoundError:
         return False
     else:
         if os.access(filepath, os.R_OK):
             return True
         else:
             raise PermissionError("Permission denied: {filepath!r}")
 
 
 def config_basepath(config_path: str) -> str:
     """Return the base path of a configuration file"""
     if config_path.endswith(".yml"):
         return config_path[:-4]
 
     return config_path
 
 
 def read_raw_config(base_config_path: str) -> Dict[str, Any]:
     """Read the raw config corresponding to base_config_path.
 
     Can read yml files.
     """
     yml_file = f"{base_config_path}.yml"
     if exists_accessible(yml_file):
         logger.debug("Loading config file %s", yml_file)
         with open(yml_file) as f:
             return yaml.safe_load(f)
 
     return {}
 
 
 def config_exists(config_path):
     """Check whether the given config exists"""
     basepath = config_basepath(config_path)
     return any(
         exists_accessible(basepath + extension) for extension in SWH_CONFIG_EXTENSIONS
     )
 
 
 def read(
     conf_file: Optional[str] = None,
     default_conf: Optional[Dict[str, Tuple[str, Any]]] = None,
 ) -> Dict[str, Any]:
     """Read the user's configuration file.
 
     Fill in the gap using `default_conf`.  `default_conf` is similar to this::
 
         DEFAULT_CONF = {
             'a': ('str', '/tmp/swh-loader-git/log'),
             'b': ('str', 'dbname=swhloadergit')
             'c': ('bool', true)
             'e': ('bool', None)
             'd': ('int', 10)
         }
 
     If conf_file is None, return the default config.
 
     """
     conf: Dict[str, Any] = {}
 
     if conf_file:
         base_config_path = config_basepath(os.path.expanduser(conf_file))
         conf = read_raw_config(base_config_path) or {}
 
     if not default_conf:
         return conf
 
     # remaining missing default configuration key are set
     # also type conversion is enforced for underneath layer
     for key, (nature_type, default_value) in default_conf.items():
         val = conf.get(key, None)
         if val is None:  # fallback to default value
             conf[key] = default_value
         elif not _map_check_fn.get(nature_type, lambda x: True)(val):
             # value present but not in the proper format, force type conversion
             conf[key] = _map_convert_fn.get(nature_type, lambda x: x)(val)
 
     return conf
 
 
 def priority_read(
     conf_filenames: List[str], default_conf: Optional[Dict[str, Tuple[str, Any]]] = None
 ):
     """Try reading the configuration files from conf_filenames, in order,
        and return the configuration from the first one that exists.
 
        default_conf has the same specification as it does in read.
     """
 
     # Try all the files in order
     for filename in conf_filenames:
         full_filename = os.path.expanduser(filename)
         if config_exists(full_filename):
             return read(full_filename, default_conf)
 
     # Else, return the default configuration
     return read(None, default_conf)
 
 
 def merge_default_configs(base_config, *other_configs):
     """Merge several default config dictionaries, from left to right"""
     full_config = base_config.copy()
 
     for config in other_configs:
         full_config.update(config)
 
     return full_config
 
 
 def merge_configs(base: Optional[Dict[str, Any]], other: Optional[Dict[str, Any]]):
     """Merge two config dictionaries
 
     This does merge config dicts recursively, with the rules, for every value
     of the dicts (with 'val' not being a dict):
 
     - None + type -> type
     - type + None -> None
     - dict + dict -> dict (merged)
     - val + dict -> TypeError
     - dict + val -> TypeError
     - val + val -> val (other)
 
     for instance:
 
     >>> d1 = {
     ...   'key1': {
     ...     'skey1': 'value1',
     ...     'skey2': {'sskey1': 'value2'},
     ...   },
     ...   'key2': 'value3',
     ... }
 
     with
 
     >>> d2 = {
     ...   'key1': {
     ...     'skey1': 'value4',
     ...     'skey2': {'sskey2': 'value5'},
     ...   },
     ...   'key3': 'value6',
     ... }
 
     will give:
 
     >>> d3 = {
     ...   'key1': {
     ...     'skey1': 'value4',  # <-- note this
     ...     'skey2': {
     ...       'sskey1': 'value2',
     ...       'sskey2': 'value5',
     ...     },
     ...   },
     ...   'key2': 'value3',
     ...   'key3': 'value6',
     ... }
     >>> assert merge_configs(d1, d2) == d3
 
     Note that no type checking is done for anything but dicts.
     """
     if not isinstance(base, dict) or not isinstance(other, dict):
         raise TypeError("Cannot merge a %s with a %s" % (type(base), type(other)))
 
     output = {}
     allkeys = set(chain(base.keys(), other.keys()))
     for k in allkeys:
         vb = base.get(k)
         vo = other.get(k)
 
         if isinstance(vo, dict):
             output[k] = merge_configs(vb is not None and vb or {}, vo)
         elif isinstance(vb, dict) and k in other and other[k] is not None:
             output[k] = merge_configs(vb, vo is not None and vo or {})
         elif k in other:
             output[k] = deepcopy(vo)
         else:
             output[k] = deepcopy(vb)
 
     return output
 
 
 def swh_config_paths(base_filename: str) -> List[str]:
     """Return the Software Heritage specific configuration paths for the given
        filename."""
 
     return [os.path.join(dirname, base_filename) for dirname in SWH_CONFIG_DIRECTORIES]
 
 
 def prepare_folders(conf, *keys):
     """Prepare the folder mentioned in config under keys.
     """
 
     def makedir(folder):
         if not os.path.exists(folder):
             os.makedirs(folder)
 
     for key in keys:
         makedir(conf[key])
 
 
 def load_global_config():
     """Load the global Software Heritage config"""
 
     return priority_read(
         swh_config_paths(SWH_GLOBAL_CONFIG), SWH_DEFAULT_GLOBAL_CONFIG,
     )
 
 
 def load_named_config(name, default_conf=None, global_conf=True):
     """Load the config named `name` from the Software Heritage
        configuration paths.
 
        If global_conf is True (default), read the global configuration
        too.
     """
 
     conf = {}
 
     if global_conf:
         conf.update(load_global_config())
 
     conf.update(priority_read(swh_config_paths(name), default_conf))
 
     return conf
 
 
 def load_from_envvar(default_config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
     """Load configuration yaml file from the environment variable SWH_CONFIG_FILENAME,
     eventually enriched with default configuration key/value from the default_config
     dict if provided.
 
     Returns:
         Configuration dict
 
     Raises:
         AssertionError if SWH_CONFIG_FILENAME is undefined
 
     """
     assert (
         "SWH_CONFIG_FILENAME" in os.environ
     ), "SWH_CONFIG_FILENAME environment variable is undefined."
 
     cfg_path = os.environ["SWH_CONFIG_FILENAME"]
     cfg = read_raw_config(config_basepath(cfg_path))
     cfg = merge_configs(default_config or {}, cfg)
     return cfg
-
-
-@deprecated(version="0.3.2", reason="Use swh.core.config.load_from_envvar instead")
-class SWHConfig:
-    """Mixin to add configuration parsing abilities to classes
-
-    The class should override the class attributes:
-        - DEFAULT_CONFIG (default configuration to be parsed)
-        - CONFIG_BASE_FILENAME (the filename of the configuration to be used)
-
-    This class defines one classmethod, parse_config_file, which
-    parses a configuration file using the default config as set in the
-    class attribute.
-
-    """
-
-    DEFAULT_CONFIG = {}  # type: Dict[str, Tuple[str, Any]]
-    CONFIG_BASE_FILENAME = ""  # type: Optional[str]
-
-    @classmethod
-    def parse_config_file(
-        cls,
-        base_filename=None,
-        config_filename=None,
-        additional_configs=None,
-        global_config=True,
-    ):
-        """Parse the configuration file associated to the current class.
-
-        By default, parse_config_file will load the configuration
-        cls.CONFIG_BASE_FILENAME from one of the Software Heritage
-        configuration directories, in order, unless it is overridden
-        by base_filename or config_filename (which shortcuts the file
-        lookup completely).
-
-        Args:
-            - base_filename (str): overrides the default
-              cls.CONFIG_BASE_FILENAME
-            - config_filename (str): sets the file to parse instead of
-              the defaults set from cls.CONFIG_BASE_FILENAME
-            - additional_configs: (list of default configuration dicts)
-              allows to override or extend the configuration set in
-              cls.DEFAULT_CONFIG.
-            - global_config (bool): Load the global configuration (default:
-              True)
-        """
-
-        if config_filename:
-            config_filenames = [config_filename]
-        elif "SWH_CONFIG_FILENAME" in os.environ:
-            config_filenames = [os.environ["SWH_CONFIG_FILENAME"]]
-        else:
-            if not base_filename:
-                base_filename = cls.CONFIG_BASE_FILENAME
-            config_filenames = swh_config_paths(base_filename)
-        if not additional_configs:
-            additional_configs = []
-
-        full_default_config = merge_default_configs(
-            cls.DEFAULT_CONFIG, *additional_configs
-        )
-
-        config = {}
-        if global_config:
-            config = load_global_config()
-
-        config.update(priority_read(config_filenames, full_default_config))
-
-        return config
diff --git a/swh/core/db/db_utils.py b/swh/core/db/db_utils.py
index 77ddb59..95b79ff 100644
--- a/swh/core/db/db_utils.py
+++ b/swh/core/db/db_utils.py
@@ -1,252 +1,366 @@
-# Copyright (C) 2015-2019  The Software Heritage developers
+# Copyright (C) 2015-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import functools
+import glob
 import logging
 import re
-from typing import Optional, Union
+import subprocess
+from typing import Optional, Set, Union
 
 import psycopg2
 import psycopg2.extensions
+from pytest_postgresql.janitor import DatabaseJanitor, Version
+
+from swh.core.utils import numfile_sortkey as sortkey
 
 logger = logging.getLogger(__name__)
 
 
 def stored_procedure(stored_proc):
     """decorator to execute remote stored procedure, specified as argument
 
     Generally, the body of the decorated function should be empty. If it is
     not, the stored procedure will be executed first; the function body then.
 
     """
 
     def wrap(meth):
         @functools.wraps(meth)
         def _meth(self, *args, **kwargs):
             cur = kwargs.get("cur", None)
             self._cursor(cur).execute("SELECT %s()" % stored_proc)
             meth(self, *args, **kwargs)
 
         return _meth
 
     return wrap
 
 
 def jsonize(value):
     """Convert a value to a psycopg2 JSON object if necessary"""
     if isinstance(value, dict):
         return psycopg2.extras.Json(value)
 
     return value
 
 
 def connect_to_conninfo(
     db_or_conninfo: Union[str, psycopg2.extensions.connection]
 ) -> psycopg2.extensions.connection:
     """Connect to the database passed in argument
 
     Args:
         db_or_conninfo: A database connection, or a database connection info string
 
     Returns:
         a connected database handle
 
     Raises:
         psycopg2.Error if the database doesn't exist
     """
     if isinstance(db_or_conninfo, psycopg2.extensions.connection):
         return db_or_conninfo
 
     if "=" not in db_or_conninfo and "//" not in db_or_conninfo:
         # Database name
         db_or_conninfo = f"dbname={db_or_conninfo}"
 
     db = psycopg2.connect(db_or_conninfo)
 
     return db
 
 
 def swh_db_version(
     db_or_conninfo: Union[str, psycopg2.extensions.connection]
 ) -> Optional[int]:
     """Retrieve the swh version of the database.
 
     If the database is not initialized, this logs a warning and returns None.
 
     Args:
       db_or_conninfo: A database connection, or a database connection info string
 
     Returns:
         Either the version of the database, or None if it couldn't be detected
     """
     try:
         db = connect_to_conninfo(db_or_conninfo)
     except psycopg2.Error:
         logger.exception("Failed to connect to `%s`", db_or_conninfo)
         # Database not initialized
         return None
 
     try:
         with db.cursor() as c:
             query = "select version from dbversion order by dbversion desc limit 1"
             try:
                 c.execute(query)
                 return c.fetchone()[0]
             except psycopg2.errors.UndefinedTable:
                 return None
     except Exception:
         logger.exception("Could not get version from `%s`", db_or_conninfo)
         return None
 
 
 def swh_db_flavor(
     db_or_conninfo: Union[str, psycopg2.extensions.connection]
 ) -> Optional[str]:
     """Retrieve the swh flavor of the database.
 
     If the database is not initialized, or the database doesn't support
     flavors, this returns None.
 
     Args:
       db_or_conninfo: A database connection, or a database connection info string
 
     Returns:
         The flavor of the database, or None if it could not be detected.
     """
     try:
         db = connect_to_conninfo(db_or_conninfo)
     except psycopg2.Error:
         logger.exception("Failed to connect to `%s`", db_or_conninfo)
         # Database not initialized
         return None
 
     try:
         with db.cursor() as c:
             query = "select swh_get_dbflavor()"
             try:
                 c.execute(query)
                 return c.fetchone()[0]
             except psycopg2.errors.UndefinedFunction:
                 # function not found: no flavor
                 return None
     except Exception:
         logger.exception("Could not get flavor from `%s`", db_or_conninfo)
         return None
 
 
 # The following code has been imported from psycopg2, version 2.7.4,
 # https://github.com/psycopg/psycopg2/tree/5afb2ce803debea9533e293eef73c92ffce95bcd
 # and modified by Software Heritage.
 #
 # Original file: lib/extras.py
 #
 # psycopg2 is free software: you can redistribute it and/or modify it under the
 # terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option) any
 # later version.
 
 
 def _paginate(seq, page_size):
     """Consume an iterable and return it in chunks.
     Every chunk is at most `page_size`. Never return an empty chunk.
     """
     page = []
     it = iter(seq)
     while 1:
         try:
             for i in range(page_size):
                 page.append(next(it))
             yield page
             page = []
         except StopIteration:
             if page:
                 yield page
             return
 
 
 def _split_sql(sql):
     """Split *sql* on a single ``%s`` placeholder.
     Split on the %s, perform %% replacement and return pre, post lists of
     snippets.
     """
     curr = pre = []
     post = []
     tokens = re.split(br"(%.)", sql)
     for token in tokens:
         if len(token) != 2 or token[:1] != b"%":
             curr.append(token)
             continue
 
         if token[1:] == b"s":
             if curr is pre:
                 curr = post
             else:
                 raise ValueError("the query contains more than one '%s' placeholder")
         elif token[1:] == b"%":
             curr.append(b"%")
         else:
             raise ValueError(
                 "unsupported format character: '%s'"
                 % token[1:].decode("ascii", "replace")
             )
 
     if curr is pre:
         raise ValueError("the query doesn't contain any '%s' placeholder")
 
     return pre, post
 
 
 def execute_values_generator(cur, sql, argslist, template=None, page_size=100):
     """Execute a statement using SQL ``VALUES`` with a sequence of parameters.
     Rows returned by the query are returned through a generator.
     You need to consume the generator for the queries to be executed!
 
     :param cur: the cursor to use to execute the query.
     :param sql: the query to execute. It must contain a single ``%s``
         placeholder, which will be replaced by a `VALUES list`__.
         Example: ``"INSERT INTO mytable (id, f1, f2) VALUES %s"``.
     :param argslist: sequence of sequences or dictionaries with the arguments
         to send to the query. The type and content must be consistent with
         *template*.
     :param template: the snippet to merge to every item in *argslist* to
         compose the query.
 
         - If the *argslist* items are sequences it should contain positional
           placeholders (e.g. ``"(%s, %s, %s)"``, or ``"(%s, %s, 42)``" if there
           are constants value...).
         - If the *argslist* items are mappings it should contain named
           placeholders (e.g. ``"(%(id)s, %(f1)s, 42)"``).
 
         If not specified, assume the arguments are sequence and use a simple
         positional template (i.e.  ``(%s, %s, ...)``), with the number of
         placeholders sniffed by the first element in *argslist*.
     :param page_size: maximum number of *argslist* items to include in every
         statement. If there are more items the function will execute more than
         one statement.
     :param yield_from_cur: Whether to yield results from the cursor in this
         function directly.
 
     .. __: https://www.postgresql.org/docs/current/static/queries-values.html
 
     After the execution of the function the `cursor.rowcount` property will
     **not** contain a total result.
     """
     # we can't just use sql % vals because vals is bytes: if sql is bytes
     # there will be some decoding error because of stupid codec used, and Py3
     # doesn't implement % on bytes.
     if not isinstance(sql, bytes):
         sql = sql.encode(psycopg2.extensions.encodings[cur.connection.encoding])
     pre, post = _split_sql(sql)
 
     for page in _paginate(argslist, page_size=page_size):
         if template is None:
             template = b"(" + b",".join([b"%s"] * len(page[0])) + b")"
         parts = pre[:]
         for args in page:
             parts.append(cur.mogrify(template, args))
             parts.append(b",")
         parts[-1:] = post
         cur.execute(b"".join(parts))
         yield from cur
+
+
+class SWHDatabaseJanitor(DatabaseJanitor):
+    """SWH database janitor implementation with a a different setup/teardown policy than
+    than the stock one. Instead of dropping, creating and initializing the database for
+    each test, it creates and initializes the db once, then truncates the tables (and
+    sequences) in between tests.
+
+    This is needed to have acceptable test performances.
+
+    """
+
+    def __init__(
+        self,
+        user: str,
+        host: str,
+        port: str,
+        db_name: str,
+        version: Union[str, float, Version],
+        dump_files: Optional[str] = None,
+        no_truncate_tables: Set[str] = set(),
+    ) -> None:
+        super().__init__(user, host, port, db_name, version)
+        if dump_files:
+            self.dump_files = sorted(glob.glob(dump_files), key=sortkey)
+        else:
+            self.dump_files = []
+        # do no truncate the following tables
+        self.no_truncate_tables = set(no_truncate_tables)
+
+    def db_setup(self):
+        conninfo = (
+            f"host={self.host} user={self.user} port={self.port} dbname={self.db_name}"
+        )
+
+        for fname in self.dump_files:
+            subprocess.check_call(
+                [
+                    "psql",
+                    "--quiet",
+                    "--no-psqlrc",
+                    "-v",
+                    "ON_ERROR_STOP=1",
+                    "-d",
+                    conninfo,
+                    "-f",
+                    fname,
+                ]
+            )
+
+    def db_reset(self):
+        """Truncate tables (all but self.no_truncate_tables set) and sequences
+
+        """
+        with psycopg2.connect(
+            dbname=self.db_name, user=self.user, host=self.host, port=self.port,
+        ) as cnx:
+            with cnx.cursor() as cur:
+                cur.execute(
+                    "SELECT table_name FROM information_schema.tables "
+                    "WHERE table_schema = %s",
+                    ("public",),
+                )
+                all_tables = set(table for (table,) in cur.fetchall())
+                tables_to_truncate = all_tables - self.no_truncate_tables
+
+                for table in tables_to_truncate:
+                    cur.execute("TRUNCATE TABLE %s CASCADE" % table)
+
+                cur.execute(
+                    "SELECT sequence_name FROM information_schema.sequences "
+                    "WHERE sequence_schema = %s",
+                    ("public",),
+                )
+                seqs = set(seq for (seq,) in cur.fetchall())
+                for seq in seqs:
+                    cur.execute("ALTER SEQUENCE %s RESTART;" % seq)
+            cnx.commit()
+
+    def init(self):
+        """Initialize db. Create the db if it does not exist. Reset it if it exists."""
+        with self.cursor() as cur:
+            cur.execute(
+                "SELECT COUNT(1) FROM pg_database WHERE datname=%s;", (self.db_name,)
+            )
+            db_exists = cur.fetchone()[0] == 1
+            if db_exists:
+                cur.execute(
+                    "UPDATE pg_database SET datallowconn=true WHERE datname = %s;",
+                    (self.db_name,),
+                )
+                self.db_reset()
+                return
+
+        # initialize the inexistent db
+        with self.cursor() as cur:
+            cur.execute('CREATE DATABASE "{}";'.format(self.db_name))
+        self.db_setup()
+
+    def drop(self):
+        """The original DatabaseJanitor implementation prevents new connections from happening,
+           destroys current opened connections and finally drops the database.
+
+           We actually do not want to drop the db so we instead do nothing and resets
+           (truncate most tables and sequences) the db instead, in order to have some
+           acceptable performance.
+
+        """
+        pass
diff --git a/swh/core/db/pytest_plugin.py b/swh/core/db/pytest_plugin.py
new file mode 100644
index 0000000..56e3305
--- /dev/null
+++ b/swh/core/db/pytest_plugin.py
@@ -0,0 +1,62 @@
+# Copyright (C) 2020  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import logging
+from typing import Optional, Set
+
+import psycopg2
+import pytest
+from pytest_postgresql import factories
+
+from swh.core.db.db_utils import SWHDatabaseJanitor
+
+logger = logging.getLogger(__name__)
+
+
+# the postgres_fact factory fixture below is mostly a copy of the code
+# from pytest-postgresql. We need a custom version here to be able to
+# specify our version of the DBJanitor we use.
+def postgresql_fact(
+    process_fixture_name: str,
+    db_name: Optional[str] = None,
+    dump_files: str = "",
+    no_truncate_tables: Set[str] = {"dbversion"},
+):
+    @pytest.fixture
+    def postgresql_factory(request):
+        """Fixture factory for PostgreSQL.
+
+        :param FixtureRequest request: fixture request object
+        :rtype: psycopg2.connection
+        :returns: postgresql client
+        """
+        config = factories.get_config(request)
+        proc_fixture = request.getfixturevalue(process_fixture_name)
+
+        pg_host = proc_fixture.host
+        pg_port = proc_fixture.port
+        pg_user = proc_fixture.user
+        pg_options = proc_fixture.options
+        pg_db = db_name or config["dbname"]
+        with SWHDatabaseJanitor(
+            pg_user,
+            pg_host,
+            pg_port,
+            pg_db,
+            proc_fixture.version,
+            dump_files=dump_files,
+            no_truncate_tables=no_truncate_tables,
+        ):
+            connection = psycopg2.connect(
+                dbname=pg_db,
+                user=pg_user,
+                host=pg_host,
+                port=pg_port,
+                options=pg_options,
+            )
+            yield connection
+            connection.close()
+
+    return postgresql_factory
diff --git a/swh/core/db/tests/data/0-schema.sql b/swh/core/db/tests/data/0-schema.sql
new file mode 100644
index 0000000..e6b008b
--- /dev/null
+++ b/swh/core/db/tests/data/0-schema.sql
@@ -0,0 +1,19 @@
+-- schema version table which won't get truncated
+create table dbversion (
+  version     int primary key,
+  release     timestamptz,
+  description text
+);
+
+-- a people table which won't get truncated
+create table people (
+  fullname text not null
+);
+
+-- a fun table which will get truncated for each test
+create table fun (
+    time timestamptz not null
+);
+
+-- one sequence to check for reset as well
+create sequence serial;
diff --git a/swh/core/db/tests/data/1-data.sql b/swh/core/db/tests/data/1-data.sql
new file mode 100644
index 0000000..8680263
--- /dev/null
+++ b/swh/core/db/tests/data/1-data.sql
@@ -0,0 +1,15 @@
+-- insert some values in dbversion
+insert into dbversion(version, release, description) values (1, '2016-02-22 15:56:28.358587+00', 'Work In Progress');
+insert into dbversion(version, release, description) values (2, '2016-02-24 18:05:54.887217+00', 'Work In Progress');
+insert into dbversion(version, release, description) values (3, '2016-10-21 14:10:18.629763+00', 'Work In Progress');
+insert into dbversion(version, release, description) values (4, '2017-08-08 19:01:11.723113+00', 'Work In Progress');
+insert into dbversion(version, release, description) values (7, '2018-03-30 12:58:39.256679+00', 'Work In Progress');
+
+insert into fun(time) values ('2020-10-19 09:00:00.666999+00');
+insert into fun(time) values ('2020-10-18 09:00:00.666999+00');
+insert into fun(time) values ('2020-10-17 09:00:00.666999+00');
+
+select nextval('serial');
+
+insert into people(fullname) values ('dudess');
+insert into people(fullname) values ('dude');
diff --git a/swh/core/db/tests/test_db_utils.py b/swh/core/db/tests/test_db_utils.py
new file mode 100644
index 0000000..d4cb0fa
--- /dev/null
+++ b/swh/core/db/tests/test_db_utils.py
@@ -0,0 +1,142 @@
+# Copyright (C) 2020  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os
+
+from swh.core.db import BaseDb
+from swh.core.db.pytest_plugin import postgresql_fact
+
+SQL_DIR = os.path.join(os.path.dirname(__file__), "data")
+
+
+# db with special policy for tables dbversion and people
+postgres_fun = postgresql_fact(
+    "postgresql_proc",
+    db_name="fun",
+    dump_files=f"{SQL_DIR}/*.sql",
+    no_truncate_tables={"dbversion", "people"},
+)
+
+
+def test_smoke_test_fun_db_is_up(postgres_fun):
+    """This ensures the db is created and configured according to its dumps files.
+
+    """
+    with BaseDb.connect(postgres_fun.dsn).cursor() as cur:
+        cur.execute("select count(*) from dbversion")
+        nb_rows = cur.fetchone()[0]
+        assert nb_rows == 5
+
+        cur.execute("select count(*) from fun")
+        nb_rows = cur.fetchone()[0]
+        assert nb_rows == 3
+
+        cur.execute("select count(*) from people")
+        nb_rows = cur.fetchone()[0]
+        assert nb_rows == 2
+
+        # in data, we requested a value already so it starts at 2
+        cur.execute("select nextval('serial')")
+        val = cur.fetchone()[0]
+        assert val == 2
+
+
+def test_smoke_test_fun_db_is_still_up_and_got_reset(postgres_fun):
+    """This ensures that within another tests, the 'fun' db is still up, created (and not
+    configured again). This time, most of the data has been reset:
+    - except for tables 'dbversion' and 'people' which were left as is
+    - the other tables from the schema (here only "fun") got truncated
+    - the sequences got truncated as well
+
+    """
+    with BaseDb.connect(postgres_fun.dsn).cursor() as cur:
+        # db version is excluded from the truncate
+        cur.execute("select count(*) from dbversion")
+        nb_rows = cur.fetchone()[0]
+        assert nb_rows == 5
+
+        # people is also allowed not to be truncated
+        cur.execute("select count(*) from people")
+        nb_rows = cur.fetchone()[0]
+        assert nb_rows == 2
+
+        # table and sequence are reset
+        cur.execute("select count(*) from fun")
+        nb_rows = cur.fetchone()[0]
+        assert nb_rows == 0
+
+        cur.execute("select nextval('serial')")
+        val = cur.fetchone()[0]
+        assert val == 1
+
+
+# db with no special policy for tables truncation, all tables are reset
+postgres_people = postgresql_fact(
+    "postgresql_proc",
+    db_name="people",
+    dump_files=f"{SQL_DIR}/*.sql",
+    no_truncate_tables=set(),
+)
+
+
+def test_smoke_test_people_db_up(postgres_people):
+    """'people' db is up and configured
+
+    """
+    with BaseDb.connect(postgres_people.dsn).cursor() as cur:
+        cur.execute("select count(*) from dbversion")
+        nb_rows = cur.fetchone()[0]
+        assert nb_rows == 5
+
+        cur.execute("select count(*) from people")
+        nb_rows = cur.fetchone()[0]
+        assert nb_rows == 2
+
+        cur.execute("select count(*) from fun")
+        nb_rows = cur.fetchone()[0]
+        assert nb_rows == 3
+
+        cur.execute("select nextval('serial')")
+        val = cur.fetchone()[0]
+        assert val == 2
+
+
+def test_smoke_test_people_db_up_and_reset(postgres_people):
+    """'people' db is up and got reset on every tables and sequences
+
+    """
+    with BaseDb.connect(postgres_people.dsn).cursor() as cur:
+        # tables are truncated after the first round
+        cur.execute("select count(*) from dbversion")
+        nb_rows = cur.fetchone()[0]
+        assert nb_rows == 0
+
+        # tables are truncated after the first round
+        cur.execute("select count(*) from people")
+        nb_rows = cur.fetchone()[0]
+        assert nb_rows == 0
+
+        # table and sequence are reset
+        cur.execute("select count(*) from fun")
+        nb_rows = cur.fetchone()[0]
+        assert nb_rows == 0
+
+        cur.execute("select nextval('serial')")
+        val = cur.fetchone()[0]
+        assert val == 1
+
+
+# db with no initialization step, an empty db
+postgres_no_init = postgresql_fact("postgresql_proc", db_name="something")
+
+
+def test_smoke_test_db_no_init(postgres_no_init):
+    """We can connect to the db nonetheless
+
+    """
+    with BaseDb.connect(postgres_no_init.dsn).cursor() as cur:
+        cur.execute("select now()")
+        data = cur.fetchone()[0]
+        assert data is not None