diff --git a/MANIFEST.in b/MANIFEST.in
index e7c46fc..e19b7ae 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,5 @@
 include Makefile
 include requirements.txt
 include requirements-swh.txt
 include version.txt
+recursive-include swh/core/sql *.sql
diff --git a/PKG-INFO b/PKG-INFO
index 7f11cae..dd31e6c 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,59 +1,59 @@
 Metadata-Version: 2.1
 Name: swh.core
-Version: 0.0.43
+Version: 0.0.44
 Summary: Software Heritage core utilities
 Home-page: https://forge.softwareheritage.org/diffusion/DCORE/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-core
 Description: swh-core
         ========
         
         core library for swh's modules:
         - config parser
         - hash computations
         - serialization
         - logging mechanism
         
         Defines also a celery application to run concurrency tasks
         
         Celery use
         ----------
         
         ### configuration file
         
         worker.ini file which looks like:
         
             [main]
             task_broker = amqp://guest@localhost//
             task_modules = swh.loader.dir.tasks, swh.loader.tar.tasks, swh.loader.git.tasks
             task_queues = swh_loader_tar, swh_loader_git, swh_loader_dir
             task_soft_time_limit = 0
         
         This file can be set in the following location:
         - ~/.swh
         - ~/.config/swh
         - /etc/softwareheritage
         
         
         ### run celery worker
         
         Sample command:
         
             celery worker --app=swh.core.worker \
                           --pool=prefork \
                           --autoscale=2,2 \
                           -Ofair \
                           --loglevel=info 2>&1 | tee -a swh-core-worker.log
         
 Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Description-Content-Type: text/markdown
 Provides-Extra: testing
diff --git a/requirements-test.txt b/requirements-test.txt
index 145e520..4dfbdaa 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,2 +1,2 @@
-nose
+pytest
 requests-mock
diff --git a/setup.py b/setup.py
index 5755b09..25fc474 100755
--- a/setup.py
+++ b/setup.py
@@ -1,66 +1,69 @@
 #!/usr/bin/env python3
 # Copyright (C) 2015-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 from setuptools import setup, find_packages
 
 from os import path
 from io import open
 
 here = path.abspath(path.dirname(__file__))
 
 # Get the long description from the README file
 with open(path.join(here, 'README.md'), encoding='utf-8') as f:
     long_description = f.read()
 
 
 def parse_requirements(name=None):
     if name:
         reqf = 'requirements-%s.txt' % name
     else:
         reqf = 'requirements.txt'
 
     requirements = []
     if not os.path.exists(reqf):
         return requirements
 
     with open(reqf) as f:
         for line in f.readlines():
             line = line.strip()
             if not line or line.startswith('#'):
                 continue
             requirements.append(line)
     return requirements
 
 
 setup(
     name='swh.core',
     description='Software Heritage core utilities',
     long_description=long_description,
     long_description_content_type='text/markdown',
     author='Software Heritage developers',
     author_email='swh-devel@inria.fr',
     url='https://forge.softwareheritage.org/diffusion/DCORE/',
     packages=find_packages(),
     scripts=[],
     install_requires=parse_requirements() + parse_requirements('swh'),
     setup_requires=['vcversioner'],
     extras_require={'testing': parse_requirements('test')},
     vcversioner={},
     include_package_data=True,
+    entry_points={
+        'console_scripts': ['swh-db-init=swh.core.cli:db_init'],
+        },
     classifiers=[
         "Programming Language :: Python :: 3",
         "Intended Audience :: Developers",
         "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
         "Operating System :: OS Independent",
         "Development Status :: 5 - Production/Stable",
     ],
     project_urls={
         'Bug Reports': 'https://forge.softwareheritage.org/maniphest',
         'Funding': 'https://www.softwareheritage.org/donate',
         'Source': 'https://forge.softwareheritage.org/source/swh-core',
     },
 )
diff --git a/swh.core.egg-info/PKG-INFO b/swh.core.egg-info/PKG-INFO
index 7f11cae..dd31e6c 100644
--- a/swh.core.egg-info/PKG-INFO
+++ b/swh.core.egg-info/PKG-INFO
@@ -1,59 +1,59 @@
 Metadata-Version: 2.1
 Name: swh.core
-Version: 0.0.43
+Version: 0.0.44
 Summary: Software Heritage core utilities
 Home-page: https://forge.softwareheritage.org/diffusion/DCORE/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-core
 Description: swh-core
         ========
         
         core library for swh's modules:
         - config parser
         - hash computations
         - serialization
         - logging mechanism
         
         Defines also a celery application to run concurrency tasks
         
         Celery use
         ----------
         
         ### configuration file
         
         worker.ini file which looks like:
         
             [main]
             task_broker = amqp://guest@localhost//
             task_modules = swh.loader.dir.tasks, swh.loader.tar.tasks, swh.loader.git.tasks
             task_queues = swh_loader_tar, swh_loader_git, swh_loader_dir
             task_soft_time_limit = 0
         
         This file can be set in the following location:
         - ~/.swh
         - ~/.config/swh
         - /etc/softwareheritage
         
         
         ### run celery worker
         
         Sample command:
         
             celery worker --app=swh.core.worker \
                           --pool=prefork \
                           --autoscale=2,2 \
                           -Ofair \
                           --loglevel=info 2>&1 | tee -a swh-core-worker.log
         
 Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Description-Content-Type: text/markdown
 Provides-Extra: testing
diff --git a/swh.core.egg-info/SOURCES.txt b/swh.core.egg-info/SOURCES.txt
index e17ec53..6893a6b 100644
--- a/swh.core.egg-info/SOURCES.txt
+++ b/swh.core.egg-info/SOURCES.txt
@@ -1,47 +1,49 @@
 .gitignore
 AUTHORS
 LICENSE
 MANIFEST.in
 Makefile
 README.md
 requirements-swh.txt
 requirements-test.txt
 requirements.txt
 setup.py
 tox.ini
 version.txt
 debian/changelog
 debian/compat
 debian/control
 debian/copyright
 debian/rules
 debian/source/format
 docs/.gitignore
 docs/Makefile
 docs/conf.py
 docs/index.rst
 docs/_static/.placeholder
 docs/_templates/.placeholder
-sql/log-schema.sql
 swh/__init__.py
 swh.core.egg-info/PKG-INFO
 swh.core.egg-info/SOURCES.txt
 swh.core.egg-info/dependency_links.txt
+swh.core.egg-info/entry_points.txt
 swh.core.egg-info/requires.txt
 swh.core.egg-info/top_level.txt
 swh/core/__init__.py
 swh/core/api.py
 swh/core/api_async.py
+swh/core/cli.py
 swh/core/config.py
 swh/core/logger.py
 swh/core/serializers.py
 swh/core/tarball.py
 swh/core/utils.py
+swh/core/sql/log-schema.sql
 swh/core/tests/__init__.py
 swh/core/tests/db_testing.py
 swh/core/tests/server_testing.py
 swh/core/tests/test_api.py
 swh/core/tests/test_config.py
 swh/core/tests/test_logger.py
 swh/core/tests/test_serializers.py
 swh/core/tests/test_utils.py
\ No newline at end of file
diff --git a/swh.core.egg-info/entry_points.txt b/swh.core.egg-info/entry_points.txt
new file mode 100644
index 0000000..6b673cb
--- /dev/null
+++ b/swh.core.egg-info/entry_points.txt
@@ -0,0 +1,3 @@
+[console_scripts]
+swh-db-init = swh.core.cli:db_init
+
diff --git a/swh.core.egg-info/requires.txt b/swh.core.egg-info/requires.txt
index 0f40f15..bd2ef50 100644
--- a/swh.core.egg-info/requires.txt
+++ b/swh.core.egg-info/requires.txt
@@ -1,14 +1,14 @@
 Flask
 PyYAML
 aiohttp
 arrow
 msgpack-python
 psycopg2
 python-dateutil
 requests
 systemd-python
 vcversioner
 
 [testing]
-nose
+pytest
 requests-mock
diff --git a/swh/core/cli.py b/swh/core/cli.py
new file mode 100755
index 0000000..1da8bdb
--- /dev/null
+++ b/swh/core/cli.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+# Copyright (C) 2018  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import warnings
+warnings.filterwarnings("ignore")  # noqa prevent psycopg from telling us sh*t
+
+from os import path
+import glob
+
+import click
+from importlib import import_module
+
+from swh.core.utils import numfile_sortkey as sortkey
+from swh.core.tests.db_testing import pg_createdb, pg_restore, DB_DUMP_TYPES
+
+
+@click.command()
+@click.argument('module', nargs=-1, required=True)
+@click.option('--db-name', '-d', help='Database name.',
+              default='softwareheritage-dev', show_default=True)
+@click.option('--no-create', '-C',
+              help='Do not attempt to create the database', default=False)
+def db_init(module, db_name=None, no_create=None):
+    """Create and initialise a database for the Software Heritage <module>.
+
+    Example:
+
+      swh-db-init storage -d swh-test
+
+    If you want to specify non-default postgresql connection parameters,
+    please provide them using standard environment variables.
+    See psql(1) man page (section ENVIRONMENTS) for details.
+
+    Example:
+
+      PGPORT=5434 swh-db-init indexer -d swh-indexer
+
+    """
+
+    dump_files = []
+
+    for modname in module:
+        if not modname.startswith('swh.'):
+            modname = 'swh.{}'.format(modname)
+        try:
+            m = import_module(modname)
+        except ImportError:
+            raise click.BadParameter(
+                'Unable to load module {}'.format(modname))
+
+        sqldir = path.join(path.dirname(m.__file__), 'sql')
+        if not path.isdir(sqldir):
+            raise click.BadParameter(
+                'Module {} does not provide a db schema '
+                '(no sql/ dir)'.format(modname))
+        dump_files.extend(sorted(glob.glob(path.join(sqldir, '*.sql')),
+                                 key=sortkey))
+    if not no_create:
+        pg_createdb(db_name)
+
+    dump_files = [(x, DB_DUMP_TYPES[path.splitext(x)[1]])
+                  for x in dump_files]
+    for dump, dtype in dump_files:
+        click.secho('Loading {}'.format(dump), fg='yellow')
+        pg_restore(db_name, dump, dtype)
+
+    click.secho('DONE database is {}'.format(db_name), fg='green', bold=True)
diff --git a/swh/core/logger.py b/swh/core/logger.py
index 06e0847..ecced63 100644
--- a/swh/core/logger.py
+++ b/swh/core/logger.py
@@ -1,192 +1,192 @@
 # Copyright (C) 2015  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import datetime
 import logging
 import os
 import socket
 
 import psycopg2
 from psycopg2.extras import Json
 from systemd.journal import JournalHandler as _JournalHandler, send
 
 try:
     from celery import current_task
 except ImportError:
     current_task = None
 
 
 EXTRA_LOGDATA_PREFIX = 'swh_'
 
 
 def db_level_of_py_level(lvl):
     """convert a log level of the logging module to a log level suitable for the
     logging Postgres DB
 
     """
     return logging.getLevelName(lvl).lower()
 
 
 def get_extra_data(record, task_args=True):
     """Get the extra data to insert to the database from the logging record"""
     log_data = record.__dict__
 
     extra_data = {k[len(EXTRA_LOGDATA_PREFIX):]: v
                   for k, v in log_data.items()
                   if k.startswith(EXTRA_LOGDATA_PREFIX)}
 
     args = log_data.get('args')
     if args:
         extra_data['logging_args'] = args
 
     # Retrieve Celery task info
     if current_task and current_task.request:
         extra_data['task'] = {
             'id': current_task.request.id,
             'name': current_task.name,
         }
         if task_args:
             extra_data['task'].update({
                 'kwargs': current_task.request.kwargs,
                 'args': current_task.request.args,
             })
 
     return extra_data
 
 
 def flatten(data, separator='_'):
     """Flatten the data dictionary into a flat structure"""
     def inner_flatten(data, prefix):
         if isinstance(data, dict):
             for key, value in data.items():
                 yield from inner_flatten(value, prefix + [key])
         elif isinstance(data, (list, tuple)):
             for key, value in enumerate(data):
                 yield from inner_flatten(value, prefix + [str(key)])
         else:
             yield prefix, data
 
     for path, value in inner_flatten(data, []):
         yield separator.join(path), value
 
 
 def stringify(value):
     """Convert value to string"""
     if isinstance(value, datetime.datetime):
         return value.isoformat()
 
     return str(value)
 
 
 class PostgresHandler(logging.Handler):
     """log handler that store messages in a Postgres DB
 
-    See swh-core/sql/log-schema.sql for the DB schema.
+    See swh-core/swh/core/sql/log-schema.sql for the DB schema.
 
     All logging methods can be used as usual. Additionally, arbitrary metadata
     can be passed to logging methods, requesting that they will be stored in
     the DB as a single JSONB value. To do so, pass a dictionary to the 'extra'
     kwarg of any logging method; all keys in that dictionary that start with
     EXTRA_LOGDATA_PREFIX (currently: 'swh\_') will be extracted to form the
     JSONB dictionary. The prefix will be stripped and not included in the DB.
 
     Note: the logger name will be used to fill the 'module' DB column.
 
     Sample usage::
 
         logging.basicConfig(level=logging.INFO)
         h = PostgresHandler('dbname=softwareheritage-log')
         logging.getLogger().addHandler(h)
 
         logger.info('not so important notice',
                     extra={'swh_type': 'swh_logging_test',
                            'swh_meditation': 'guru'})
         logger.warn('something weird just happened, did you see that?')
 
     """
 
     def __init__(self, connstring):
         """
         Create a Postgres log handler.
 
         Args:
             config: configuration dictionary, with a key "log_db" containing a
                libpq connection string to the log DB
         """
         super().__init__()
 
         self.connstring = connstring
 
         self.fqdn = socket.getfqdn()  # cache FQDN value
 
     def _connect(self):
         return psycopg2.connect(self.connstring)
 
     def emit(self, record):
         msg = self.format(record)
         extra_data = get_extra_data(record)
 
         if 'task' in extra_data:
             task_args = {
                 'args': extra_data['task']['args'],
                 'kwargs': extra_data['task']['kwargs'],
             }
 
             try:
                 json_args = Json(task_args).getquoted()
             except TypeError:
                     task_args = {
                         'args': ['<failed to convert arguments to JSON>'],
                         'kwargs': {},
                     }
             else:
                 json_args_length = len(json_args)
                 if json_args_length >= 1000:
                     task_args = {
                         'args': ['<arguments too long>'],
                         'kwargs': {},
                     }
 
             extra_data['task'].update(task_args)
 
         log_entry = (db_level_of_py_level(record.levelno), msg,
                      Json(extra_data), record.name, self.fqdn,
                      os.getpid())
         db = self._connect()
         with db.cursor() as cur:
             cur.execute('INSERT INTO log '
                         '(level, message, data, src_module, src_host, src_pid)'
                         'VALUES (%s, %s, %s, %s, %s, %s)',
                         log_entry)
             db.commit()
         db.close()
 
 
 class JournalHandler(_JournalHandler):
     def emit(self, record):
         """Write `record` as a journal event.
 
         MESSAGE is taken from the message provided by the user, and PRIORITY,
         LOGGER, THREAD_NAME, CODE_{FILE,LINE,FUNC} fields are appended
         automatically. In addition, record.MESSAGE_ID will be used if present.
         """
         try:
             extra_data = flatten(get_extra_data(record, task_args=False))
             extra_data = {
                 (EXTRA_LOGDATA_PREFIX + key).upper(): stringify(value)
                 for key, value in extra_data
             }
             msg = self.format(record)
             pri = self.mapPriority(record.levelno)
             send(msg,
                  PRIORITY=format(pri),
                  LOGGER=record.name,
                  THREAD_NAME=record.threadName,
                  CODE_FILE=record.pathname,
                  CODE_LINE=record.lineno,
                  CODE_FUNC=record.funcName,
                  **extra_data)
         except Exception:
             self.handleError(record)
diff --git a/swh/core/serializers.py b/swh/core/serializers.py
index b3c4a19..49bd7e0 100644
--- a/swh/core/serializers.py
+++ b/swh/core/serializers.py
@@ -1,175 +1,186 @@
 # Copyright (C) 2015-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import arrow
 import base64
 import datetime
 from json import JSONDecoder, JSONEncoder
 import types
 from uuid import UUID
 
 import dateutil.parser
 import msgpack
 
 
 def encode_data_client(data):
     try:
         return msgpack_dumps(data)
     except OverflowError as e:
         raise ValueError('Limits were reached. Please, check your input.\n' +
                          str(e))
 
 
 def decode_response(response):
     content_type = response.headers['content-type']
 
     if content_type.startswith('application/x-msgpack'):
         r = msgpack_loads(response.content)
     elif content_type.startswith('application/json'):
         r = response.json(cls=SWHJSONDecoder)
     else:
         raise ValueError('Wrong content type `%s` for API response'
                          % content_type)
 
     return r
 
 
 class SWHJSONEncoder(JSONEncoder):
     """JSON encoder for data structures generated by Software Heritage.
 
     This JSON encoder extends the default Python JSON encoder and adds
     awareness for the following specific types:
 
     - bytes (get encoded as a Base85 string);
     - datetime.datetime (get encoded as an ISO8601 string).
 
     Non-standard types get encoded as a a dictionary with two keys:
 
     - swhtype with value 'bytes' or 'datetime';
     - d containing the encoded value.
 
     SWHJSONEncoder also encodes arbitrary iterables as a list
     (allowing serialization of generators).
 
     Caveats: Limitations in the JSONEncoder extension mechanism
     prevent us from "escaping" dictionaries that only contain the
     swhtype and d keys, and therefore arbitrary data structures can't
     be round-tripped through SWHJSONEncoder and SWHJSONDecoder.
 
     """
 
     def default(self, o):
         if isinstance(o, bytes):
             return {
                 'swhtype': 'bytes',
                 'd': base64.b85encode(o).decode('ascii'),
             }
         elif isinstance(o, datetime.datetime):
             return {
                 'swhtype': 'datetime',
                 'd': o.isoformat(),
             }
         elif isinstance(o, UUID):
             return {
                 'swhtype': 'uuid',
                 'd': str(o),
             }
         elif isinstance(o, datetime.timedelta):
             return {
                 'swhtype': 'timedelta',
-                'd': repr(o),
+                'd': {
+                    'days': o.days,
+                    'seconds': o.seconds,
+                    'microseconds': o.microseconds,
+                },
             }
         elif isinstance(o, arrow.Arrow):
             return {
                 'swhtype': 'arrow',
                 'd': o.isoformat(),
             }
         try:
             return super().default(o)
         except TypeError as e:
             try:
                 iterable = iter(o)
             except TypeError:
                 raise e from None
             else:
                 return list(iterable)
 
 
 class SWHJSONDecoder(JSONDecoder):
     """JSON decoder for data structures encoded with SWHJSONEncoder.
 
     This JSON decoder extends the default Python JSON decoder,
     allowing the decoding of:
 
     - bytes (encoded as a Base85 string);
     - datetime.datetime (encoded as an ISO8601 string).
 
     Non-standard types must be encoded as a a dictionary with exactly
     two keys:
 
     - swhtype with value 'bytes' or 'datetime';
     - d containing the encoded value.
 
     To limit the impact our encoding, if the swhtype key doesn't
     contain a known value, the dictionary is decoded as-is.
 
     """
     def decode_data(self, o):
         if isinstance(o, dict):
             if set(o.keys()) == {'d', 'swhtype'}:
                 datatype = o['swhtype']
                 if datatype == 'bytes':
                     return base64.b85decode(o['d'])
                 elif datatype == 'datetime':
                     return dateutil.parser.parse(o['d'])
                 elif datatype == 'uuid':
                     return UUID(o['d'])
                 elif datatype == 'timedelta':
-                    return eval(o['d'])
+                    return datetime.timedelta(**o['d'])
                 elif datatype == 'arrow':
                     return arrow.get(o['d'])
             return {key: self.decode_data(value) for key, value in o.items()}
         if isinstance(o, list):
             return [self.decode_data(value) for value in o]
         else:
             return o
 
     def raw_decode(self, s, idx=0):
         data, index = super().raw_decode(s, idx)
         return self.decode_data(data), index
 
 
 def msgpack_dumps(data):
     """Write data as a msgpack stream"""
     def encode_types(obj):
         if isinstance(obj, datetime.datetime):
             return {b'__datetime__': True, b's': obj.isoformat()}
         if isinstance(obj, types.GeneratorType):
             return list(obj)
         if isinstance(obj, UUID):
             return {b'__uuid__': True, b's': str(obj)}
         if isinstance(obj, datetime.timedelta):
-            return {b'__timedelta__': True, b's': repr(obj)}
+            return {
+                b'__timedelta__': True,
+                b's': {
+                    'days': obj.days,
+                    'seconds': obj.seconds,
+                    'microseconds': obj.microseconds,
+                },
+            }
         if isinstance(obj, arrow.Arrow):
             return {b'__arrow__': True, b's': obj.isoformat()}
         return obj
 
     return msgpack.packb(data, use_bin_type=True, default=encode_types)
 
 
 def msgpack_loads(data):
     """Read data as a msgpack stream"""
     def decode_types(obj):
         if b'__datetime__' in obj and obj[b'__datetime__']:
             return dateutil.parser.parse(obj[b's'])
         if b'__uuid__' in obj and obj[b'__uuid__']:
             return UUID(obj[b's'])
         if b'__timedelta__' in obj and obj[b'__timedelta__']:
-            return eval(obj[b's'])
+            return datetime.timedelta(**obj[b's'])
         if b'__arrow__' in obj and obj[b'__arrow__']:
             return arrow.get(obj[b's'])
         return obj
 
     return msgpack.unpackb(data, encoding='utf-8', object_hook=decode_types)
diff --git a/sql/log-schema.sql b/swh/core/sql/log-schema.sql
similarity index 100%
rename from sql/log-schema.sql
rename to swh/core/sql/log-schema.sql
diff --git a/swh/core/tests/__init__.py b/swh/core/tests/__init__.py
index e69de29..0e407c3 100644
--- a/swh/core/tests/__init__.py
+++ b/swh/core/tests/__init__.py
@@ -0,0 +1,5 @@
+from os import path
+import swh.core
+
+
+SQL_DIR = path.join(path.dirname(swh.core.__file__), 'sql')
diff --git a/swh/core/tests/db_testing.py b/swh/core/tests/db_testing.py
index 035945d..7c7971d 100644
--- a/swh/core/tests/db_testing.py
+++ b/swh/core/tests/db_testing.py
@@ -1,266 +1,281 @@
 # Copyright (C) 2015  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+import os
+import glob
 import psycopg2
 import subprocess
 
+from swh.core.utils import numfile_sortkey as sortkey
+
+DB_DUMP_TYPES = {'.sql': 'psql', '.dump': 'pg_dump'}
+
 
 def pg_restore(dbname, dumpfile, dumptype='pg_dump'):
     """
     Args:
         dbname: name of the DB to restore into
         dumpfile: path fo the dump file
         dumptype: one of 'pg_dump' (for binary dumps), 'psql' (for SQL dumps)
     """
     assert dumptype in ['pg_dump', 'psql']
     if dumptype == 'pg_dump':
         subprocess.check_call(['pg_restore', '--no-owner', '--no-privileges',
                                '--dbname', dbname, dumpfile])
     elif dumptype == 'psql':
         subprocess.check_call(['psql', '--quiet',
                                        '--no-psqlrc',
                                        '-v', 'ON_ERROR_STOP=1',
                                        '-f', dumpfile,
                                        dbname])
 
 
 def pg_dump(dbname, dumpfile):
     subprocess.check_call(['pg_dump', '--no-owner', '--no-privileges', '-Fc',
                            '-f', dumpfile, dbname])
 
 
 def pg_dropdb(dbname):
     subprocess.check_call(['dropdb', dbname])
 
 
 def pg_createdb(dbname):
     subprocess.check_call(['createdb', dbname])
 
 
-def db_create(dbname, dump=None, dumptype='pg_dump'):
-    """create the test DB and load the test data dump into it
+def db_create(dbname, dumps=None):
+    """create the test DB and load the test data dumps into it
+
+    dumps is an iterable of couples (dump_file, dump_type).
 
     context: setUpClass
 
     """
     try:
         pg_createdb(dbname)
     except subprocess.CalledProcessError:  # try recovering once, in case
         pg_dropdb(dbname)                  # the db already existed
         pg_createdb(dbname)
-    if dump:
-        pg_restore(dbname, dump, dumptype)
+    for dump, dtype in dumps:
+        pg_restore(dbname, dump, dtype)
     return dbname
 
 
 def db_destroy(dbname):
     """destroy the test DB
 
     context: tearDownClass
 
     """
     pg_dropdb(dbname)
 
 
 def db_connect(dbname):
     """connect to the test DB and open a cursor
 
     context: setUp
 
     """
     conn = psycopg2.connect('dbname=' + dbname)
     return {
         'conn': conn,
         'cursor': conn.cursor()
     }
 
 
 def db_close(conn):
     """rollback current transaction and disconnect from the test DB
 
     context: tearDown
 
     """
     if not conn.closed:
         conn.rollback()
         conn.close()
 
 
 class DbTestConn:
     def __init__(self, dbname):
         self.dbname = dbname
 
     def __enter__(self):
         self.db_setup = db_connect(self.dbname)
         self.conn = self.db_setup['conn']
         self.cursor = self.db_setup['cursor']
         return self
 
     def __exit__(self, *_):
         db_close(self.conn)
 
 
 class DbTestContext:
-    def __init__(self, name='softwareheritage-test', dump=None,
-                 dump_type='pg_dump'):
+    def __init__(self, name='softwareheritage-test', dumps=None):
         self.dbname = name
-        self.dump = dump
-        self.dump_type = dump_type
+        self.dumps = dumps
 
     def __enter__(self):
         db_create(dbname=self.dbname,
-                  dump=self.dump,
-                  dumptype=self.dump_type)
+                  dumps=self.dumps)
         return self
 
     def __exit__(self, *_):
         db_destroy(self.dbname)
 
 
 class DbTestFixture:
     """Mix this in a test subject class to get DB testing support.
 
     Use the class method add_db() to add a new database to be tested.
     Using this will create a DbTestConn entry in the `test_db` dictionary for
     all the tests, indexed by the name of the database.
 
     Example:
 
     class TestDb(DbTestFixture, unittest.TestCase):
         @classmethod
         def setUpClass(cls):
             super().setUpClass()
             cls.add_db('db_name', DUMP)
 
         def setUp(self):
             db = self.test_db['db_name']
             print('conn: {}, cursor: {}'.format(db.conn, db.cursor))
 
     To ensure test isolation, each test method of the test case class will
     execute in its own connection, cursor, and transaction.
 
     Note that if you want to define setup/teardown methods, you need to
     explicitly call super() to ensure that the fixture setup/teardown methods
     are invoked. Here is an example where all setup/teardown methods are
     defined in a test case:
 
         class TestDb(DbTestFixture, unittest.TestCase):
             @classmethod
             def setUpClass(cls):
                 # your add_db() calls here
                 super().setUpClass()
                 # your class setup code here
 
             def setUp(self):
                 super().setUp()
                 # your instance setup code here
 
             def tearDown(self):
                 # your instance teardown code here
                 super().tearDown()
 
             @classmethod
             def tearDownClass(cls):
                 # your class teardown code here
                 super().tearDownClass()
 
     """
 
     _DB_DUMP_LIST = {}
     _DB_LIST = {}
     DB_TEST_FIXTURE_IMPORTED = True
 
     @classmethod
-    def add_db(cls, name='softwareheritage-test', dump=None,
-               dump_type='pg_dump'):
-        cls._DB_DUMP_LIST[name] = (dump, dump_type)
+    def add_db(cls, name='softwareheritage-test', dumps=None):
+        cls._DB_DUMP_LIST[name] = dumps
 
     @classmethod
     def setUpClass(cls):
-        for name, (dump, dump_type) in cls._DB_DUMP_LIST.items():
-            cls._DB_LIST[name] = DbTestContext(name, dump, dump_type)
+        for name, dumps in cls._DB_DUMP_LIST.items():
+            cls._DB_LIST[name] = DbTestContext(name, dumps)
             cls._DB_LIST[name].__enter__()
         super().setUpClass()
 
     @classmethod
     def tearDownClass(cls):
         super().tearDownClass()
         for name, context in cls._DB_LIST.items():
             context.__exit__()
 
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.test_db = {}
-
     def setUp(self):
         self.test_db = {}
         for name in self._DB_LIST.keys():
             self.test_db[name] = DbTestConn(name)
             self.test_db[name].__enter__()
         super().setUp()
 
     def tearDown(self):
         super().tearDown()
         for name in self._DB_LIST.keys():
             self.test_db[name].__exit__()
 
     def reset_db_tables(self, name, excluded=None):
         db = self.test_db[name]
         conn = db.conn
         cursor = db.cursor
 
         cursor.execute("""SELECT table_name FROM information_schema.tables
                           WHERE table_schema = %s""", ('public',))
 
         tables = set(table for (table,) in cursor.fetchall())
         if excluded is not None:
             tables -= set(excluded)
 
         for table in tables:
             cursor.execute('truncate table %s cascade' % table)
 
         conn.commit()
 
 
 class SingleDbTestFixture(DbTestFixture):
     """Simplified fixture like DbTest but that can only handle a single DB.
 
     Gives access to shortcuts like self.cursor and self.conn.
 
     DO NOT use this with other fixtures that need to access databases, like
     StorageTestFixture.
 
     The class can override the following class attributes:
         TEST_DB_NAME: name of the DB used for testing
         TEST_DB_DUMP: DB dump to be restored before running test methods; can
-            be set to None if no restore from dump is required
-        TEST_DB_DUMP_TYPE: one of 'pg_dump' (binary dump) or 'psql' (SQL dump)
+            be set to None if no restore from dump is required.
+            If the dump file name endswith"
+            - '.sql' it will be loaded via psql,
+            - '.dump' it will be loaded via pg_restore.
+            Other file extensions will be ignored.
+            Can be a string or a list of strings; each path will be expanded
+            using glob pattern matching.
 
     The test case class will then have the following attributes, accessible via
     self:
 
         dbname: name of the test database
         conn: psycopg2 connection object
         cursor: open psycopg2 cursor to the DB
     """
 
     TEST_DB_NAME = 'softwareheritage-test'
     TEST_DB_DUMP = None
-    TEST_DB_DUMP_TYPE = 'pg_dump'
 
     @classmethod
     def setUpClass(cls):
-        cls.dbname = cls.TEST_DB_NAME
+        cls.dbname = cls.TEST_DB_NAME  # XXX to kill?
+
+        dump_files = cls.TEST_DB_DUMP
+        if isinstance(dump_files, str):
+            dump_files = [dump_files]
+        all_dump_files = []
+        for files in dump_files:
+            all_dump_files.extend(
+                sorted(glob.glob(files), key=sortkey))
+
+        all_dump_files = [(x, DB_DUMP_TYPES[os.path.splitext(x)[1]])
+                          for x in all_dump_files]
+
         cls.add_db(name=cls.TEST_DB_NAME,
-                   dump=cls.TEST_DB_DUMP,
-                   dump_type=cls.TEST_DB_DUMP_TYPE)
+                   dumps=all_dump_files)
         super().setUpClass()
 
     def setUp(self):
         super().setUp()
 
         db = self.test_db[self.TEST_DB_NAME]
         self.conn = db.conn
         self.cursor = db.cursor
diff --git a/swh/core/tests/test_config.py b/swh/core/tests/test_config.py
index bdabb1a..b0461b4 100644
--- a/swh/core/tests/test_config.py
+++ b/swh/core/tests/test_config.py
@@ -1,210 +1,210 @@
 # Copyright (C) 2015  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 import shutil
 import tempfile
 import unittest
 
 from swh.core import config
 
 
 class ConfReaderTest(unittest.TestCase):
 
     @classmethod
     def setUpClass(cls):
         # create a temporary folder
         cls.tmpdir = tempfile.mkdtemp(prefix='test-swh-core.')
         cls.conffile = os.path.join(cls.tmpdir, 'config.ini')
         conf_contents = """[main]
 a = 1
 b = this is a string
 c = true
 h = false
 ls = list, of, strings
 li = 1, 2, 3, 4
 """
         with open(cls.conffile, 'w') as conf:
             conf.write(conf_contents)
 
         cls.non_existing_conffile = os.path.join(cls.tmpdir,
                                                  'config-nonexisting.ini')
 
         # Create an unreadable, proper configuration file
         cls.perms_broken_file = os.path.join(cls.tmpdir, 'unreadable.ini')
         with open(cls.perms_broken_file, 'w') as conf:
             conf.write(conf_contents)
         os.chmod(cls.perms_broken_file, 0o000)
 
         # Create a proper configuration file in an unreadable directory
         cls.perms_broken_dir = os.path.join(cls.tmpdir, 'unreadabledir')
         cls.file_in_broken_dir = os.path.join(cls.perms_broken_dir,
                                               'unreadable.ini')
         os.makedirs(cls.perms_broken_dir)
         with open(cls.file_in_broken_dir, 'w') as conf:
             conf.write(conf_contents)
         os.chmod(cls.perms_broken_dir, 0o000)
 
         cls.empty_conffile = os.path.join(cls.tmpdir, 'empty.ini')
         open(cls.empty_conffile, 'w').close()
 
         cls.default_conf = {
             'a': ('int', 2),
             'b': ('string', 'default-string'),
             'c': ('bool', True),
             'd': ('int', 10),
             'e': ('int', None),
             'f': ('bool', None),
             'g': ('string', None),
             'h': ('bool', True),
             'i': ('bool', True),
             'ls': ('list[str]', ['a', 'b', 'c']),
             'li': ('list[int]', [42, 43]),
         }
 
         cls.other_default_conf = {
             'a': ('int', 3),
         }
 
         cls.full_default_conf = cls.default_conf.copy()
         cls.full_default_conf['a'] = cls.other_default_conf['a']
 
         cls.parsed_default_conf = {
             key: value
             for key, (type, value)
             in cls.default_conf.items()
         }
 
         cls.parsed_conffile = {
             'a': 1,
             'b': 'this is a string',
             'c': True,
             'd': 10,
             'e': None,
             'f': None,
             'g': None,
             'h': False,
             'i': True,
             'ls': ['list', 'of', 'strings'],
             'li': [1, 2, 3, 4],
         }
 
     @classmethod
     def tearDownClass(cls):
         # Make the broken perms items readable again to be able to remove them
         os.chmod(cls.perms_broken_dir, 0o755)
         os.chmod(cls.perms_broken_file, 0o644)
         shutil.rmtree(cls.tmpdir)
 
     def test_read(self):
         # when
         res = config.read(self.conffile, self.default_conf)
 
         # then
-        self.assertEquals(res, self.parsed_conffile)
+        self.assertEqual(res, self.parsed_conffile)
 
     def test_read_empty_file(self):
         # when
         res = config.read(None, self.default_conf)
 
         # then
-        self.assertEquals(res, self.parsed_default_conf)
+        self.assertEqual(res, self.parsed_default_conf)
 
     def test_support_non_existing_conffile(self):
         # when
         res = config.read(self.non_existing_conffile, self.default_conf)
 
         # then
-        self.assertEquals(res, self.parsed_default_conf)
+        self.assertEqual(res, self.parsed_default_conf)
 
     def test_support_empty_conffile(self):
         # when
         res = config.read(self.empty_conffile, self.default_conf)
 
         # then
-        self.assertEquals(res, self.parsed_default_conf)
+        self.assertEqual(res, self.parsed_default_conf)
 
     def test_raise_on_broken_directory_perms(self):
         with self.assertRaises(PermissionError):
             config.read(self.file_in_broken_dir, self.default_conf)
 
     def test_raise_on_broken_file_perms(self):
         with self.assertRaises(PermissionError):
             config.read(self.perms_broken_file, self.default_conf)
 
     def test_merge_default_configs(self):
         # when
         res = config.merge_default_configs(self.default_conf,
                                            self.other_default_conf)
 
         # then
-        self.assertEquals(res, self.full_default_conf)
+        self.assertEqual(res, self.full_default_conf)
 
     def test_priority_read_nonexist_conf(self):
         # when
         res = config.priority_read([self.non_existing_conffile, self.conffile],
                                    self.default_conf)
 
         # then
-        self.assertEquals(res, self.parsed_conffile)
+        self.assertEqual(res, self.parsed_conffile)
 
     def test_priority_read_conf_nonexist_empty(self):
         # when
         res = config.priority_read([
             self.conffile,
             self.non_existing_conffile,
             self.empty_conffile,
         ], self.default_conf)
 
         # then
-        self.assertEquals(res, self.parsed_conffile)
+        self.assertEqual(res, self.parsed_conffile)
 
     def test_priority_read_empty_conf_nonexist(self):
         # when
         res = config.priority_read([
             self.empty_conffile,
             self.conffile,
             self.non_existing_conffile,
         ], self.default_conf)
 
         # then
-        self.assertEquals(res, self.parsed_default_conf)
+        self.assertEqual(res, self.parsed_default_conf)
 
     def test_swh_config_paths(self):
         res = config.swh_config_paths('foo/bar.ini')
 
         self.assertEqual(res, [
             '~/.config/swh/foo/bar.ini',
             '~/.swh/foo/bar.ini',
             '/etc/softwareheritage/foo/bar.ini',
         ])
 
     def test_prepare_folder(self):
         # given
         conf = {'path1': os.path.join(self.tmpdir, 'path1'),
                 'path2': os.path.join(self.tmpdir, 'path2', 'depth1')}
 
         # the folders does not exists
         self.assertFalse(os.path.exists(conf['path1']),
                          "path1 should not exist.")
         self.assertFalse(os.path.exists(conf['path2']),
                          "path2 should not exist.")
 
         # when
         config.prepare_folders(conf, 'path1')
 
         # path1 exists but not path2
         self.assertTrue(os.path.exists(conf['path1']),
                         "path1 should now exist!")
         self.assertFalse(os.path.exists(conf['path2']),
                          "path2 should not exist.")
 
         # path1 already exists, skips it but creates path2
         config.prepare_folders(conf, 'path1', 'path2')
 
         self.assertTrue(os.path.exists(conf['path1']),
                         "path1 should still exist!")
         self.assertTrue(os.path.exists(conf['path2']),
                         "path2 should now exist.")
diff --git a/swh/core/tests/test_logger.py b/swh/core/tests/test_logger.py
index 1cbe308..ba4c9f6 100644
--- a/swh/core/tests/test_logger.py
+++ b/swh/core/tests/test_logger.py
@@ -1,47 +1,45 @@
 # Copyright (C) 2015  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import logging
 import os
 import unittest
 
-from nose.plugins.attrib import attr
+import pytest
 
 from swh.core.logger import PostgresHandler
 from swh.core.tests.db_testing import SingleDbTestFixture
 
-TEST_DIR = os.path.dirname(os.path.abspath(__file__))
-SQL_DIR = os.path.join(TEST_DIR, '../../../sql')
+from swh.core.tests import SQL_DIR
 
 
-@attr('db')
+@pytest.mark.db
 class PgLogHandler(SingleDbTestFixture, unittest.TestCase):
 
     TEST_DB_DUMP = os.path.join(SQL_DIR, 'log-schema.sql')
-    TEST_DB_DUMP_TYPE = 'psql'
 
     def setUp(self):
         super().setUp()
         self.modname = 'swh.core.tests.test_logger'
         self.logger = logging.Logger(self.modname, logging.DEBUG)
         self.logger.addHandler(PostgresHandler('dbname=' + self.TEST_DB_NAME))
 
     def tearDown(self):
         logging.shutdown()
         super().tearDown()
 
     def test_log(self):
         self.logger.info('notice',
                          extra={'swh_type': 'test entry', 'swh_data': 42})
-        self.logger.warn('warning')
+        self.logger.warning('warning')
 
         with self.conn.cursor() as cur:
             cur.execute('SELECT level, message, data, src_module FROM log')
             db_log_entries = cur.fetchall()
 
         self.assertIn(('info', 'notice', {'type': 'test entry', 'data': 42},
                        self.modname),
                       db_log_entries)
         self.assertIn(('warning', 'warning', {}, self.modname), db_log_entries)
diff --git a/swh/core/tests/test_serializers.py b/swh/core/tests/test_serializers.py
index 166744a..f9e80e9 100644
--- a/swh/core/tests/test_serializers.py
+++ b/swh/core/tests/test_serializers.py
@@ -1,80 +1,81 @@
 # Copyright (C) 2015-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import datetime
 import json
 import unittest
 from uuid import UUID
 
 import arrow
 
 from swh.core.serializers import (
     SWHJSONDecoder,
     SWHJSONEncoder,
     msgpack_dumps,
     msgpack_loads
 )
 
 
 class Serializers(unittest.TestCase):
     def setUp(self):
         self.tz = datetime.timezone(datetime.timedelta(minutes=118))
 
         self.data = {
             'bytes': b'123456789\x99\xaf\xff\x00\x12',
             'datetime_naive': datetime.datetime(2015, 1, 1, 12, 4, 42, 231455),
             'datetime_tz': datetime.datetime(2015, 3, 4, 18, 25, 13, 1234,
                                              tzinfo=self.tz),
             'datetime_utc': datetime.datetime(2015, 3, 4, 18, 25, 13, 1234,
                                               tzinfo=datetime.timezone.utc),
             'datetime_delta': datetime.timedelta(64),
             'arrow_date': arrow.get('2018-04-25T16:17:53.533672+00:00'),
             'swhtype': 'fake',
             'swh_dict': {'swhtype': 42, 'd': 'test'},
             'random_dict': {'swhtype': 43},
             'uuid': UUID('cdd8f804-9db6-40c3-93ab-5955d3836234'),
         }
 
         self.encoded_data = {
             'bytes': {'swhtype': 'bytes', 'd': 'F)}kWH8wXmIhn8j01^'},
             'datetime_naive': {'swhtype': 'datetime',
                                'd': '2015-01-01T12:04:42.231455'},
             'datetime_tz': {'swhtype': 'datetime',
                             'd': '2015-03-04T18:25:13.001234+01:58'},
             'datetime_utc': {'swhtype': 'datetime',
                              'd': '2015-03-04T18:25:13.001234+00:00'},
             'datetime_delta': {'swhtype': 'timedelta',
-                               'd': 'datetime.timedelta(64)'},
+                               'd': {'days': 64, 'seconds': 0,
+                                     'microseconds': 0}},
             'arrow_date': {'swhtype': 'arrow',
                            'd': '2018-04-25T16:17:53.533672+00:00'},
             'swhtype': 'fake',
             'swh_dict': {'swhtype': 42, 'd': 'test'},
             'random_dict': {'swhtype': 43},
             'uuid': {'swhtype': 'uuid',
                      'd': 'cdd8f804-9db6-40c3-93ab-5955d3836234'},
         }
 
         self.generator = (i for i in range(5))
         self.gen_lst = list(range(5))
 
     def test_round_trip_json(self):
         data = json.dumps(self.data, cls=SWHJSONEncoder)
         self.assertEqual(self.data, json.loads(data, cls=SWHJSONDecoder))
 
     def test_encode_swh_json(self):
         data = json.dumps(self.data, cls=SWHJSONEncoder)
         self.assertEqual(self.encoded_data, json.loads(data))
 
     def test_round_trip_msgpack(self):
         data = msgpack_dumps(self.data)
         self.assertEqual(self.data, msgpack_loads(data))
 
     def test_generator_json(self):
         data = json.dumps(self.generator, cls=SWHJSONEncoder)
         self.assertEqual(self.gen_lst, json.loads(data, cls=SWHJSONDecoder))
 
     def test_generator_msgpack(self):
         data = msgpack_dumps(self.generator)
         self.assertEqual(self.gen_lst, msgpack_loads(data))
diff --git a/swh/core/tests/test_utils.py b/swh/core/tests/test_utils.py
index 48dab5d..a2a4397 100644
--- a/swh/core/tests/test_utils.py
+++ b/swh/core/tests/test_utils.py
@@ -1,116 +1,116 @@
 # Copyright (C) 2015-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import unittest
 
 from swh.core import utils
 
 
 class UtilsLib(unittest.TestCase):
 
     def test_grouper(self):
         # given
         actual_data = utils.grouper((i for i in range(0, 9)), 2)
 
         out = []
         for d in actual_data:
             out.append(list(d))  # force generator resolution for checks
 
         self.assertEqual(out, [[0, 1], [2, 3], [4, 5], [6, 7], [8]])
 
         # given
         actual_data = utils.grouper((i for i in range(9, 0, -1)), 4)
 
         out = []
         for d in actual_data:
             out.append(list(d))  # force generator resolution for checks
 
         self.assertEqual(out, [[9, 8, 7, 6], [5, 4, 3, 2], [1]])
 
     def test_backslashescape_errors(self):
         raw_data_err = b'abcd\x80'
         with self.assertRaises(UnicodeDecodeError):
             raw_data_err.decode('utf-8', 'strict')
 
-        self.assertEquals(
+        self.assertEqual(
             raw_data_err.decode('utf-8', 'backslashescape'),
             'abcd\\x80',
         )
 
         raw_data_ok = b'abcd\xc3\xa9'
-        self.assertEquals(
+        self.assertEqual(
             raw_data_ok.decode('utf-8', 'backslashescape'),
             raw_data_ok.decode('utf-8', 'strict'),
         )
 
         unicode_data = 'abcdef\u00a3'
-        self.assertEquals(
+        self.assertEqual(
             unicode_data.encode('ascii', 'backslashescape'),
             b'abcdef\\xa3',
         )
 
     def test_encode_with_unescape(self):
         valid_data = '\\x01020304\\x00'
         valid_data_encoded = b'\x01020304\x00'
 
-        self.assertEquals(
+        self.assertEqual(
             valid_data_encoded,
             utils.encode_with_unescape(valid_data)
         )
 
     def test_encode_with_unescape_invalid_escape(self):
         invalid_data = 'test\\abcd'
 
         with self.assertRaises(ValueError) as exc:
             utils.encode_with_unescape(invalid_data)
 
         self.assertIn('invalid escape', exc.exception.args[0])
         self.assertIn('position 4', exc.exception.args[0])
 
     def test_decode_with_escape(self):
         backslashes = b'foo\\bar\\\\baz'
         backslashes_escaped = 'foo\\\\bar\\\\\\\\baz'
 
-        self.assertEquals(
+        self.assertEqual(
             backslashes_escaped,
             utils.decode_with_escape(backslashes),
         )
 
         valid_utf8 = b'foo\xc3\xa2'
         valid_utf8_escaped = 'foo\u00e2'
 
-        self.assertEquals(
+        self.assertEqual(
             valid_utf8_escaped,
             utils.decode_with_escape(valid_utf8),
         )
 
         invalid_utf8 = b'foo\xa2'
         invalid_utf8_escaped = 'foo\\xa2'
 
-        self.assertEquals(
+        self.assertEqual(
             invalid_utf8_escaped,
             utils.decode_with_escape(invalid_utf8),
         )
 
         valid_utf8_nul = b'foo\xc3\xa2\x00'
         valid_utf8_nul_escaped = 'foo\u00e2\\x00'
 
-        self.assertEquals(
+        self.assertEqual(
             valid_utf8_nul_escaped,
             utils.decode_with_escape(valid_utf8_nul),
         )
 
     def test_commonname(self):
         # when
         actual_commonname = utils.commonname('/some/where/to/',
                                              '/some/where/to/go/to')
         # then
-        self.assertEquals('go/to', actual_commonname)
+        self.assertEqual('go/to', actual_commonname)
 
         # when
         actual_commonname2 = utils.commonname(b'/some/where/to/',
                                               b'/some/where/to/go/to')
         # then
-        self.assertEquals(b'go/to', actual_commonname2)
+        self.assertEqual(b'go/to', actual_commonname2)
diff --git a/swh/core/utils.py b/swh/core/utils.py
index 0e748a1..857f8f6 100644
--- a/swh/core/utils.py
+++ b/swh/core/utils.py
@@ -1,103 +1,117 @@
 # Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 import itertools
 import codecs
+import re
 
 from contextlib import contextmanager
 
 
 @contextmanager
 def cwd(path):
     """Contextually change the working directory to do thy bidding.
     Then gets back to the original location.
 
     """
     prev_cwd = os.getcwd()
     os.chdir(path)
     try:
         yield
     finally:
         os.chdir(prev_cwd)
 
 
 def grouper(iterable, n):
     """Collect data into fixed-length chunks or blocks.
 
     Args:
         iterable: an iterable
         n: size of block
         fillvalue: value to use for the last block
 
     Returns:
         fixed-length chunks of blocks as iterables
 
     """
     args = [iter(iterable)] * n
     for _data in itertools.zip_longest(*args, fillvalue=None):
         yield (d for d in _data if d is not None)
 
 
 def backslashescape_errors(exception):
     if isinstance(exception, UnicodeDecodeError):
         bad_data = exception.object[exception.start:exception.end]
         escaped = ''.join(r'\x%02x' % x for x in bad_data)
         return escaped, exception.end
 
     return codecs.backslashreplace_errors(exception)
 
 
 codecs.register_error('backslashescape', backslashescape_errors)
 
 
 def encode_with_unescape(value):
     """Encode an unicode string containing \\x<hex> backslash escapes"""
     slices = []
     start = 0
     odd_backslashes = False
     i = 0
     while i < len(value):
         if value[i] == '\\':
             odd_backslashes = not odd_backslashes
         else:
             if odd_backslashes:
                 if value[i] != 'x':
                     raise ValueError('invalid escape for %r at position %d' %
                                      (value, i-1))
                 slices.append(
                     value[start:i-1].replace('\\\\', '\\').encode('utf-8')
                 )
                 slices.append(bytes.fromhex(value[i+1:i+3]))
 
                 odd_backslashes = False
                 start = i = i + 3
                 continue
 
         i += 1
 
     slices.append(
         value[start:i].replace('\\\\', '\\').encode('utf-8')
     )
 
     return b''.join(slices)
 
 
 def decode_with_escape(value):
     """Decode a bytestring as utf-8, escaping the bytes of invalid utf-8 sequences
     as \\x<hex value>. We also escape NUL bytes as they are invalid in JSON
     strings.
     """
     # escape backslashes
     value = value.replace(b'\\', b'\\\\')
     value = value.replace(b'\x00', b'\\x00')
     return value.decode('utf-8', 'backslashescape')
 
 
 def commonname(path0, path1, as_str=False):
     """Compute the commonname between the path0 and path1.
 
     """
     return path1.split(path0)[1]
+
+
+def numfile_sortkey(fname):
+    """Simple function to sort filenames of the form:
+
+      nnxxx.ext
+
+    where nn is a number according to the numbers.
+
+    Typically used to sort sql/nn-swh-xxx.sql files.
+    """
+    num, rem = re.match(r'(\d*)(.*)', fname).groups()
+    return (num and int(num) or 99, rem)
diff --git a/tox.ini b/tox.ini
index 7a3f221..c1e2f8b 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,23 +1,17 @@
 [tox]
-envlist=check-manifest,flake8,py3
+envlist=flake8,py3
 
 [testenv:py3]
 deps =
+  -r requirements-test.txt
+  pytest-cov
   pifpaf
-  nose
 commands =
-  pifpaf run postgresql -- nosetests
+  pifpaf run postgresql -- pytest --cov=swh --cov-branch {posargs}
 
 [testenv:flake8]
 skip_install = true
 deps =
   flake8
 commands =
   {envpython} -m flake8
-
-[testenv:check-manifest]
-skip_install = true
-deps =
-  check-manifest
-commands =
-  {envpython} -m check_manifest {toxinidir}
diff --git a/version.txt b/version.txt
index 6e57d71..2595a75 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-v0.0.43-0-g8363433
\ No newline at end of file
+v0.0.44-0-ge118202
\ No newline at end of file