diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 380c658..69b3349 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,46 +1,40 @@
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
   rev: v2.4.0
   hooks:
   - id: trailing-whitespace
   - id: flake8
   - id: check-json
   - id: check-yaml
 
 - repo: https://github.com/codespell-project/codespell
   rev: v1.16.0
   hooks:
   - id: codespell
 
 - repo: local
   hooks:
   - id: mypy
     name: mypy
     entry: mypy
     args: [swh]
     pass_filenames: false
     language: system
     types: [python]
 
+- repo: https://github.com/python/black
+  rev: 19.10b0
+  hooks:
+  - id: black
+
 # unfortunately, we are far from being able to enable this...
 # - repo: https://github.com/PyCQA/pydocstyle.git
 #   rev: 4.0.0
 #   hooks:
 #   - id: pydocstyle
 #     name: pydocstyle
 #     description: pydocstyle is a static analysis tool for checking compliance with Python docstring conventions.
 #     entry: pydocstyle --convention=google
 #     language: python
 #     types: [python]
 
-# black requires py3.6+
-#- repo: https://github.com/python/black
-#  rev: 19.3b0
-#  hooks:
-#  - id: black
-#    language_version: python3
-#- repo: https://github.com/asottile/blacken-docs
-#  rev: v1.0.0-1
-#  hooks:
-#  - id: blacken-docs
-#    additional_dependencies: [black==19.3b0]
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..8d79b7e
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,6 @@
+[flake8]
+# E203: whitespaces before ':' <https://github.com/psf/black/issues/315>
+# E231: missing whitespace after ','
+# W503: line break before binary operator <https://github.com/psf/black/issues/52>
+ignore = E203,E231,W503
+max-line-length = 88
diff --git a/setup.py b/setup.py
index 7d4fd32..69e80c8 100755
--- a/setup.py
+++ b/setup.py
@@ -1,69 +1,69 @@
 #!/usr/bin/env python3
 # Copyright (C) 2015-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from setuptools import setup, find_packages
 
 from os import path
 from io import open
 
 here = path.abspath(path.dirname(__file__))
 
 # Get the long description from the README file
-with open(path.join(here, 'README.md'), encoding='utf-8') as f:
+with open(path.join(here, "README.md"), encoding="utf-8") as f:
     long_description = f.read()
 
 
 def parse_requirements(name=None):
     if name:
-        reqf = 'requirements-%s.txt' % name
+        reqf = "requirements-%s.txt" % name
     else:
-        reqf = 'requirements.txt'
+        reqf = "requirements.txt"
 
     requirements = []
     if not path.exists(reqf):
         return requirements
 
     with open(reqf) as f:
         for line in f.readlines():
             line = line.strip()
-            if not line or line.startswith('#'):
+            if not line or line.startswith("#"):
                 continue
             requirements.append(line)
     return requirements
 
 
 setup(
-    name='swh.search',
-    description='Software Heritage search service',
+    name="swh.search",
+    description="Software Heritage search service",
     long_description=long_description,
-    long_description_content_type='text/markdown',
-    author='Software Heritage developers',
-    author_email='swh-devel@inria.fr',
-    url='https://forge.softwareheritage.org/diffusion/DSEA',
+    long_description_content_type="text/markdown",
+    author="Software Heritage developers",
+    author_email="swh-devel@inria.fr",
+    url="https://forge.softwareheritage.org/diffusion/DSEA",
     packages=find_packages(),  # packages's modules
-    install_requires=parse_requirements() + parse_requirements('swh'),
-    tests_require=parse_requirements('test'),
-    entry_points='''
+    install_requires=parse_requirements() + parse_requirements("swh"),
+    tests_require=parse_requirements("test"),
+    entry_points="""
         [swh.cli.subcommands]
         search=swh.search.cli:cli
-    ''',
-    setup_requires=['vcversioner'],
-    extras_require={'testing': parse_requirements('test')},
+    """,
+    setup_requires=["vcversioner"],
+    extras_require={"testing": parse_requirements("test")},
     vcversioner={},
     include_package_data=True,
     classifiers=[
         "Programming Language :: Python :: 3",
         "Intended Audience :: Developers",
         "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
         "Operating System :: OS Independent",
         "Development Status :: 3 - Alpha",
     ],
     project_urls={
-        'Bug Reports': 'https://forge.softwareheritage.org/maniphest',
-        'Funding': 'https://www.softwareheritage.org/donate',
-        'Source': 'https://forge.softwareheritage.org/source/swh-search',
+        "Bug Reports": "https://forge.softwareheritage.org/maniphest",
+        "Funding": "https://www.softwareheritage.org/donate",
+        "Source": "https://forge.softwareheritage.org/source/swh-search",
     },
 )
diff --git a/swh/search/__init__.py b/swh/search/__init__.py
index 7474665..d66b553 100644
--- a/swh/search/__init__.py
+++ b/swh/search/__init__.py
@@ -1,32 +1,32 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 def get_search(cls, args):
     """Get an search object of class `search_class` with
     arguments `search_args`.
 
     Args:
         cls (str): search's class, either 'local' or 'remote'
         args (dict): dictionary of arguments passed to the
             search class constructor
 
     Returns:
         an instance of swh.search's classes (either local or remote)
 
     Raises:
         ValueError if passed an unknown search class.
 
     """
-    if cls == 'remote':
+    if cls == "remote":
         from .api.client import RemoteSearch as Search
-    elif cls == 'elasticsearch':
+    elif cls == "elasticsearch":
         from .elasticsearch import ElasticSearch as Search
-    elif cls == 'memory':
+    elif cls == "memory":
         from .in_memory import InMemorySearch as Search
     else:
-        raise ValueError('Unknown indexer search class `%s`' % cls)
+        raise ValueError("Unknown indexer search class `%s`" % cls)
 
     return Search(**args)
diff --git a/swh/search/api/client.py b/swh/search/api/client.py
index fbe2433..786efad 100644
--- a/swh/search/api/client.py
+++ b/swh/search/api/client.py
@@ -1,13 +1,14 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from swh.core.api import RPCClient
 
 from ..elasticsearch import ElasticSearch
 
 
 class RemoteSearch(RPCClient):
     """Proxy to a remote search API"""
+
     backend_class = ElasticSearch
diff --git a/swh/search/api/server.py b/swh/search/api/server.py
index adf0402..bf994dc 100644
--- a/swh/search/api/server.py
+++ b/swh/search/api/server.py
@@ -1,90 +1,86 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import logging
 import os
 
 from swh.core import config
-from swh.core.api import (RPCServerApp, error_handler,
-                          encode_data_server as encode_data)
+from swh.core.api import RPCServerApp, error_handler, encode_data_server as encode_data
 
 from .. import get_search
 from ..elasticsearch import ElasticSearch
 
 
 def _get_search():
     global search
     if not search:
-        search = get_search(**app.config['search'])
+        search = get_search(**app.config["search"])
 
     return search
 
 
-app = RPCServerApp(__name__,
-                   backend_class=ElasticSearch,
-                   backend_factory=_get_search)
+app = RPCServerApp(__name__, backend_class=ElasticSearch, backend_factory=_get_search)
 
 search = None
 
 
 @app.errorhandler(Exception)
 def my_error_handler(exception):
     return error_handler(exception, encode_data)
 
 
-@app.route('/')
+@app.route("/")
 def index():
-    return 'SWH Search API server'
+    return "SWH Search API server"
 
 
 api_cfg = None
 
 
-def load_and_check_config(config_file, type='elasticsearch'):
+def load_and_check_config(config_file, type="elasticsearch"):
     """Check the minimal configuration is set to run the api or raise an
        error explanation.
 
     Args:
         config_file (str): Path to the configuration file to load
         type (str): configuration type. For 'local' type, more
                     checks are done.
 
     Raises:
         Error if the setup is not as expected
 
     Returns:
         configuration as a dict
 
     """
     if not config_file:
-        raise EnvironmentError('Configuration file must be defined')
+        raise EnvironmentError("Configuration file must be defined")
 
     if not os.path.exists(config_file):
-        raise FileNotFoundError('Configuration file %s does not exist' % (
-            config_file, ))
+        raise FileNotFoundError("Configuration file %s does not exist" % (config_file,))
 
     cfg = config.read(config_file)
-    if 'search' not in cfg:
+    if "search" not in cfg:
         raise KeyError("Missing 'search' configuration")
 
     return cfg
 
 
 def make_app_from_configfile():
     """Run the WSGI app from the webserver, loading the configuration from
        a configuration file.
 
        SWH_CONFIG_FILENAME environment variable defines the
        configuration path to load.
 
     """
     global api_cfg
     if not api_cfg:
-        config_file = os.environ.get('SWH_CONFIG_FILENAME')
+        config_file = os.environ.get("SWH_CONFIG_FILENAME")
         api_cfg = load_and_check_config(config_file)
         app.config.update(api_cfg)
     handler = logging.StreamHandler()
     app.logger.addHandler(handler)
     return app
diff --git a/swh/search/cli.py b/swh/search/cli.py
index b43d113..0926d90 100644
--- a/swh/search/cli.py
+++ b/swh/search/cli.py
@@ -1,89 +1,98 @@
 # Copyright (C) 2019-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import functools
 
 import click
 
 from swh.core import config
 from swh.core.cli import CONTEXT_SETTINGS
 from swh.journal.cli import get_journal_client
 
 from . import get_search
 from .journal_client import process_journal_objects
 from .api.server import load_and_check_config, app
 
 
-@click.group(name='search', context_settings=CONTEXT_SETTINGS)
-@click.option('--config-file', '-C', default=None,
-              type=click.Path(exists=True, dir_okay=False,),
-              help="Configuration file.")
+@click.group(name="search", context_settings=CONTEXT_SETTINGS)
+@click.option(
+    "--config-file",
+    "-C",
+    default=None,
+    type=click.Path(exists=True, dir_okay=False,),
+    help="Configuration file.",
+)
 @click.pass_context
 def cli(ctx, config_file):
-    '''Software Heritage Search tools.'''
+    """Software Heritage Search tools."""
     ctx.ensure_object(dict)
 
     conf = config.read(config_file)
-    ctx.obj['config'] = conf
+    ctx.obj["config"] = conf
 
 
-@cli.command('initialize')
+@cli.command("initialize")
 @click.pass_context
 def initialize(ctx):
     """Creates Elasticsearch indices."""
-    search = get_search(**ctx.obj['config']['search'])
+    search = get_search(**ctx.obj["config"]["search"])
     search.initialize()
-    print('Done.')
+    print("Done.")
 
 
-@cli.group('journal-client')
+@cli.group("journal-client")
 @click.pass_context
 def journal_client(ctx):
     """"""
     pass
 
 
-@journal_client.command('objects')
-@click.option('--stop-after-objects', '-s', default=None, type=int,
-              help='Maximum number of objects to replay. Default is to '
-                   'run forever.')
+@journal_client.command("objects")
+@click.option(
+    "--stop-after-objects",
+    "-s",
+    default=None,
+    type=int,
+    help="Maximum number of objects to replay. Default is to " "run forever.",
+)
 @click.pass_context
 def journal_client_objects(ctx, stop_after_objects):
     """Listens for new objects from the SWH Journal, and schedules tasks
     to run relevant indexers (currently, only origin)
     on these new objects."""
     client = get_journal_client(
-        ctx, object_types=['origin', 'origin_visit'],
-        stop_after_objects=stop_after_objects)
-    search = get_search(**ctx.obj['config']['search'])
-
-    worker_fn = functools.partial(
-        process_journal_objects,
-        search=search,
+        ctx,
+        object_types=["origin", "origin_visit"],
+        stop_after_objects=stop_after_objects,
     )
+    search = get_search(**ctx.obj["config"]["search"])
+
+    worker_fn = functools.partial(process_journal_objects, search=search,)
     nb_messages = 0
     try:
         nb_messages = client.process(worker_fn)
-        print('Processed %d messages.' % nb_messages)
+        print("Processed %d messages." % nb_messages)
     except KeyboardInterrupt:
         ctx.exit(0)
     else:
-        print('Done.')
+        print("Done.")
     finally:
         client.close()
 
 
-@cli.command('rpc-serve')
-@click.argument('config-path', required=True)
-@click.option('--host', default='0.0.0.0', help="Host to run the server")
-@click.option('--port', default=5010, type=click.INT,
-              help="Binding port of the server")
-@click.option('--debug/--nodebug', default=True,
-              help="Indicates if the server should run in debug mode")
+@cli.command("rpc-serve")
+@click.argument("config-path", required=True)
+@click.option("--host", default="0.0.0.0", help="Host to run the server")
+@click.option("--port", default=5010, type=click.INT, help="Binding port of the server")
+@click.option(
+    "--debug/--nodebug",
+    default=True,
+    help="Indicates if the server should run in debug mode",
+)
 def rpc_server(config_path, host, port, debug):
     """Starts a Software Heritage Indexer RPC HTTP server."""
-    api_cfg = load_and_check_config(config_path, type='any')
+    api_cfg = load_and_check_config(config_path, type="any")
     app.config.update(api_cfg)
     app.run(host, port=int(port), debug=bool(debug))
diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py
index 5365db3..9a74266 100644
--- a/swh/search/elasticsearch.py
+++ b/swh/search/elasticsearch.py
@@ -1,231 +1,213 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import base64
 from typing import Any, Iterable, Dict, List, Iterator, Optional
 
 from elasticsearch import Elasticsearch
 from elasticsearch.helpers import bulk, scan
 import msgpack
 
 from swh.core.api import remote_api_endpoint
 from swh.model import model
 from swh.model.identifiers import origin_identifier
 
 
 def _sanitize_origin(origin):
     origin = origin.copy()
-    res = {
-        'url': origin.pop('url')
-    }
-    for field_name in ('intrinsic_metadata', 'has_visits'):
+    res = {"url": origin.pop("url")}
+    for field_name in ("intrinsic_metadata", "has_visits"):
         if field_name in origin:
             res[field_name] = origin.pop(field_name)
     return res
 
 
 class ElasticSearch:
     def __init__(self, hosts: List[str]):
         self._backend = Elasticsearch(hosts=hosts)
 
-    @remote_api_endpoint('check')
+    @remote_api_endpoint("check")
     def check(self):
         return self._backend.ping()
 
     def deinitialize(self) -> None:
         """Removes all indices from the Elasticsearch backend"""
-        self._backend.indices.delete(index='*')
+        self._backend.indices.delete(index="*")
 
     def initialize(self) -> None:
         """Declare Elasticsearch indices and mappings"""
-        if not self._backend.indices.exists(index='origin'):
-            self._backend.indices.create(index='origin')
+        if not self._backend.indices.exists(index="origin"):
+            self._backend.indices.create(index="origin")
         self._backend.indices.put_mapping(
-            index='origin',
+            index="origin",
             body={
-                'properties': {
-                    'sha1': {
-                        'type': 'keyword',
-                        'doc_values': True,
-                    },
-                    'url': {
-                        'type': 'text',
+                "properties": {
+                    "sha1": {"type": "keyword", "doc_values": True,},
+                    "url": {
+                        "type": "text",
                         # To split URLs into token on any character
                         # that is not alphanumerical
-                        'analyzer': 'simple',
-                        'fields': {
-                            'as_you_type': {
-                                'type': 'search_as_you_type',
-                                'analyzer': 'simple',
+                        "analyzer": "simple",
+                        "fields": {
+                            "as_you_type": {
+                                "type": "search_as_you_type",
+                                "analyzer": "simple",
                             }
-                        }
-                    },
-                    'has_visits': {
-                        'type': 'boolean',
+                        },
                     },
-                    'intrinsic_metadata': {
-                        'type': 'nested',
-                        'properties': {
-                            '@context': {
+                    "has_visits": {"type": "boolean",},
+                    "intrinsic_metadata": {
+                        "type": "nested",
+                        "properties": {
+                            "@context": {
                                 # don't bother indexing tokens
-                                'type': 'keyword',
+                                "type": "keyword",
                             }
                         },
                     },
                 }
-            }
+            },
         )
 
-    @remote_api_endpoint('flush')
+    @remote_api_endpoint("flush")
     def flush(self) -> None:
         """Blocks until all previous calls to _update() are completely
         applied."""
-        self._backend.indices.refresh(index='_all')
+        self._backend.indices.refresh(index="_all")
 
-    @remote_api_endpoint('origin/update')
+    @remote_api_endpoint("origin/update")
     def origin_update(self, documents: Iterable[dict]) -> None:
         documents = map(_sanitize_origin, documents)
-        documents_with_sha1 = ((origin_identifier(document), document)
-                               for document in documents)
+        documents_with_sha1 = (
+            (origin_identifier(document), document) for document in documents
+        )
         actions = [
             {
-                '_op_type': 'update',
-                '_id': sha1,
-                '_index': 'origin',
-                'doc': {
-                    **document,
-                    'sha1': sha1,
-                },
-                'doc_as_upsert': True,
+                "_op_type": "update",
+                "_id": sha1,
+                "_index": "origin",
+                "doc": {**document, "sha1": sha1,},
+                "doc_as_upsert": True,
             }
             for (sha1, document) in documents_with_sha1
         ]
-        bulk(self._backend, actions, index='origin')
+        bulk(self._backend, actions, index="origin")
 
     def origin_dump(self) -> Iterator[model.Origin]:
         """Returns all content in Elasticsearch's index. Not exposed
         publicly; but useful for tests."""
-        results = scan(self._backend, index='*')
+        results = scan(self._backend, index="*")
         for hit in results:
-            yield self._backend.termvectors(
-                index='origin', id=hit['_id'],
-                fields=['*'])
+            yield self._backend.termvectors(index="origin", id=hit["_id"], fields=["*"])
 
-    @remote_api_endpoint('origin/search')
+    @remote_api_endpoint("origin/search")
     def origin_search(
-            self, *,
-            url_pattern: str = None, metadata_pattern: str = None,
-            with_visit: bool = False,
-            page_token: str = None, count: int = 50
-            ) -> Dict[str, object]:
+        self,
+        *,
+        url_pattern: str = None,
+        metadata_pattern: str = None,
+        with_visit: bool = False,
+        page_token: str = None,
+        count: int = 50
+    ) -> Dict[str, object]:
         """Searches for origins matching the `url_pattern`.
 
         Args:
             url_pattern (str): Part of thr URL to search for
             with_visit (bool): Whether origins with no visit are to be
                                filtered out
             page_token (str): Opaque value used for pagination.
             count (int): number of results to return.
 
         Returns:
             a dictionary with keys:
             * `next_page_token`:
               opaque value used for fetching more results. `None` if there
               are no more result.
             * `results`:
               list of dictionaries with key:
               * `url`: URL of a matching origin
         """
         query_clauses = []  # type: List[Dict[str, Any]]
 
         if url_pattern:
-            query_clauses.append({
-                'multi_match': {
-                    'query': url_pattern,
-                    'type': 'bool_prefix',
-                    'operator': 'and',
-                    'fields': [
-                        'url.as_you_type',
-                        'url.as_you_type._2gram',
-                        'url.as_you_type._3gram',
-                    ]
+            query_clauses.append(
+                {
+                    "multi_match": {
+                        "query": url_pattern,
+                        "type": "bool_prefix",
+                        "operator": "and",
+                        "fields": [
+                            "url.as_you_type",
+                            "url.as_you_type._2gram",
+                            "url.as_you_type._3gram",
+                        ],
+                    }
                 }
-            })
+            )
 
         if metadata_pattern:
-            query_clauses.append({
-                'nested': {
-                    'path': 'intrinsic_metadata',
-                    'query': {
-                        'multi_match': {
-                            'query': metadata_pattern,
-                            'operator': 'and',
-                            'fields': ['intrinsic_metadata.*']
-                        }
-                    },
+            query_clauses.append(
+                {
+                    "nested": {
+                        "path": "intrinsic_metadata",
+                        "query": {
+                            "multi_match": {
+                                "query": metadata_pattern,
+                                "operator": "and",
+                                "fields": ["intrinsic_metadata.*"],
+                            }
+                        },
+                    }
                 }
-            })
+            )
 
         if not query_clauses:
             raise ValueError(
-                'At least one of url_pattern and metadata_pattern '
-                'must be provided.')
+                "At least one of url_pattern and metadata_pattern " "must be provided."
+            )
 
         if with_visit:
-            query_clauses.append({
-                'term': {
-                    'has_visits': True,
-                }
-            })
+            query_clauses.append({"term": {"has_visits": True,}})
 
         body = {
-            'query': {
-                'bool': {
-                    'must': query_clauses,
-                }
-            },
-            'size': count,
-            'sort': [
-                {'_score': 'desc'},
-                {'sha1': 'asc'},
-            ]
+            "query": {"bool": {"must": query_clauses,}},
+            "size": count,
+            "sort": [{"_score": "desc"}, {"sha1": "asc"},],
         }
         if page_token:
             # TODO: use ElasticSearch's scroll API?
-            page_token_content = msgpack.loads(
-                base64.b64decode(page_token), raw=True)
-            body['search_after'] = \
-                [page_token_content[b'score'],
-                 page_token_content[b'sha1'].decode('ascii')]
-
-        res = self._backend.search(
-            index='origin',
-            body=body,
-            size=count,
-        )
+            page_token_content = msgpack.loads(base64.b64decode(page_token), raw=True)
+            body["search_after"] = [
+                page_token_content[b"score"],
+                page_token_content[b"sha1"].decode("ascii"),
+            ]
 
-        hits = res['hits']['hits']
+        res = self._backend.search(index="origin", body=body, size=count,)
+
+        hits = res["hits"]["hits"]
 
         if len(hits) == count:
             last_hit = hits[-1]
             next_page_token_content = {
-                b'score': last_hit['_score'],
-                b'sha1': last_hit['_source']['sha1'],
+                b"score": last_hit["_score"],
+                b"sha1": last_hit["_source"]["sha1"],
             }
-            next_page_token = base64.b64encode(msgpack.dumps(
-                next_page_token_content))  # type: Optional[bytes]
+            next_page_token = base64.b64encode(
+                msgpack.dumps(next_page_token_content)
+            )  # type: Optional[bytes]
         else:
             next_page_token = None
 
         return {
-            'next_page_token': next_page_token,
-            'results': [
+            "next_page_token": next_page_token,
+            "results": [
                 {
                     # TODO: also add 'id'?
-                    'url': hit['_source']['url'],
+                    "url": hit["_source"]["url"],
                 }
                 for hit in hits
-            ]
+            ],
         }
diff --git a/swh/search/in_memory.py b/swh/search/in_memory.py
index 01ac4ff..f5fc665 100644
--- a/swh/search/in_memory.py
+++ b/swh/search/in_memory.py
@@ -1,128 +1,127 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import base64
 from collections import defaultdict
 import itertools
 import re
 from typing import Any, Dict, Iterable, Iterator, List, Optional
 
 import msgpack
 
 from swh.core.api import remote_api_endpoint
 from swh.model.identifiers import origin_identifier
 
 
 def _sanitize_origin(origin):
     origin = origin.copy()
-    res = {
-        'url': origin.pop('url')
-    }
-    for field_name in ('type', 'intrinsic_metadata'):
+    res = {"url": origin.pop("url")}
+    for field_name in ("type", "intrinsic_metadata"):
         if field_name in origin:
             res[field_name] = origin.pop(field_name)
     return res
 
 
 class InMemorySearch:
     def __init__(self):
         pass
 
-    @remote_api_endpoint('check')
+    @remote_api_endpoint("check")
     def check(self):
         return True
 
     def deinitialize(self) -> None:
-        if hasattr(self, '_origins'):
+        if hasattr(self, "_origins"):
             del self._origins
             del self._origin_ids
 
     def initialize(self) -> None:
         self._origins = defaultdict(dict)  # type: Dict[str, Dict[str, Any]]
         self._origin_ids = []  # type: List[str]
 
     def flush(self) -> None:
         pass
 
-    _url_splitter = re.compile(r'\W')
+    _url_splitter = re.compile(r"\W")
 
-    @remote_api_endpoint('origin/update')
+    @remote_api_endpoint("origin/update")
     def origin_update(self, documents: Iterable[dict]) -> None:
         for document in documents:
             document = document.copy()
             id_ = origin_identifier(document)
-            if 'url' in document:
-                document['_url_tokens'] = \
-                    set(self._url_splitter.split(document['url']))
+            if "url" in document:
+                document["_url_tokens"] = set(self._url_splitter.split(document["url"]))
             self._origins[id_].update(document)
             if id_ not in self._origin_ids:
                 self._origin_ids.append(id_)
 
-    @remote_api_endpoint('origin/search')
+    @remote_api_endpoint("origin/search")
     def origin_search(
-            self, *,
-            url_pattern: str = None, metadata_pattern: str = None,
-            with_visit: bool = False,
-            page_token: str = None, count: int = 50
-            ) -> Dict[str, object]:
-        matches = \
-            (self._origins[id_]
-             for id_ in self._origin_ids)  # type: Iterator[Dict[str, Any]]
+        self,
+        *,
+        url_pattern: str = None,
+        metadata_pattern: str = None,
+        with_visit: bool = False,
+        page_token: str = None,
+        count: int = 50
+    ) -> Dict[str, object]:
+        matches = (
+            self._origins[id_] for id_ in self._origin_ids
+        )  # type: Iterator[Dict[str, Any]]
 
         if url_pattern:
             tokens = set(self._url_splitter.split(url_pattern))
 
             def predicate(match):
-                missing_tokens = tokens - match['_url_tokens']
+                missing_tokens = tokens - match["_url_tokens"]
                 if len(missing_tokens) == 0:
                     return True
                 elif len(missing_tokens) > 1:
                     return False
                 else:
                     # There is one missing token, look up by prefix.
                     (missing_token,) = missing_tokens
-                    return any(token.startswith(missing_token)
-                               for token in match['_url_tokens'])
+                    return any(
+                        token.startswith(missing_token)
+                        for token in match["_url_tokens"]
+                    )
 
             matches = filter(predicate, matches)
 
         if metadata_pattern:
             raise NotImplementedError(
-                'Metadata search is not implemented in the in-memory backend.')
+                "Metadata search is not implemented in the in-memory backend."
+            )
 
         if not url_pattern and not metadata_pattern:
             raise ValueError(
-                'At least one of url_pattern and metadata_pattern '
-                'must be provided.')
+                "At least one of url_pattern and metadata_pattern " "must be provided."
+            )
 
         if with_visit:
-            matches = filter(lambda o: o.get('has_visits'), matches)
+            matches = filter(lambda o: o.get("has_visits"), matches)
 
         if page_token:
-            page_token_content = msgpack.loads(
-                base64.b64decode(page_token))
-            start_at_index = page_token_content[b'start_at_index']
+            page_token_content = msgpack.loads(base64.b64decode(page_token))
+            start_at_index = page_token_content[b"start_at_index"]
         else:
             start_at_index = 0
 
-        hits = list(itertools.islice(
-            matches, start_at_index, start_at_index+count))
+        hits = list(itertools.islice(matches, start_at_index, start_at_index + count))
 
         if len(hits) == count:
             next_page_token_content = {
-                b'start_at_index': start_at_index+count,
+                b"start_at_index": start_at_index + count,
             }
-            next_page_token = base64.b64encode(msgpack.dumps(
-                next_page_token_content))  # type: Optional[bytes]
+            next_page_token = base64.b64encode(
+                msgpack.dumps(next_page_token_content)
+            )  # type: Optional[bytes]
         else:
             next_page_token = None
 
         return {
-            'next_page_token': next_page_token,
-            'results': [
-                {'url': hit['url']}
-                for hit in hits
-            ]
+            "next_page_token": next_page_token,
+            "results": [{"url": hit["url"]} for hit in hits],
         }
diff --git a/swh/search/journal_client.py b/swh/search/journal_client.py
index 4e38c5e..660a0f9 100644
--- a/swh/search/journal_client.py
+++ b/swh/search/journal_client.py
@@ -1,59 +1,63 @@
 # Copyright (C) 2018-2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import logging
 
 
 EXPECTED_MESSAGE_TYPES = {
-    'origin', 'origin_visit', 'origin_intrinsic_metadata',
+    "origin",
+    "origin_visit",
+    "origin_intrinsic_metadata",
 }
 
 
 def process_journal_objects(messages, *, search):
     """Worker function for `JournalClient.process(worker_fn)`, after
     currification of `scheduler` and `task_names`."""
     assert set(messages) <= EXPECTED_MESSAGE_TYPES, set(messages)
 
-    if 'origin' in messages:
-        process_origins(messages['origin'], search)
+    if "origin" in messages:
+        process_origins(messages["origin"], search)
 
-    if 'origin_visit' in messages:
-        process_origin_visits(messages['origin_visit'], search)
+    if "origin_visit" in messages:
+        process_origin_visits(messages["origin_visit"], search)
 
-    if 'origin_intrinsic_metadata' in messages:
-        process_origin_intrinsic_metadata(
-            messages['origin_intrinsic_metadata'], search)
+    if "origin_intrinsic_metadata" in messages:
+        process_origin_intrinsic_metadata(messages["origin_intrinsic_metadata"], search)
 
 
 def process_origins(origins, search):
-    logging.debug('processing origins %r', origins)
+    logging.debug("processing origins %r", origins)
 
     search.origin_update(origins)
 
 
 def process_origin_visits(visits, search):
-    logging.debug('processing origin visits %r', visits)
-
-    search.origin_update([
-        {
-            'url': (visit['origin'] if isinstance(visit['origin'], str)
-                    else visit['origin']['url']),
-            'has_visits': True
-        }
-        for visit in visits
-    ])
+    logging.debug("processing origin visits %r", visits)
+
+    search.origin_update(
+        [
+            {
+                "url": (
+                    visit["origin"]
+                    if isinstance(visit["origin"], str)
+                    else visit["origin"]["url"]
+                ),
+                "has_visits": True,
+            }
+            for visit in visits
+        ]
+    )
 
 
 def process_origin_intrinsic_metadata(origin_metadata, search):
-    logging.debug('processing origin intrinsic_metadata %r', origin_metadata)
+    logging.debug("processing origin intrinsic_metadata %r", origin_metadata)
 
     origin_metadata = [
-        {
-            'url': item['origin_url'],
-            'intrinsic_metadata': item['metadata'],
-        }
-        for item in origin_metadata]
+        {"url": item["origin_url"], "intrinsic_metadata": item["metadata"],}
+        for item in origin_metadata
+    ]
 
     search.origin_update(origin_metadata)
diff --git a/swh/search/tests/conftest.py b/swh/search/tests/conftest.py
index ba1c0f8..9077aa0 100644
--- a/swh/search/tests/conftest.py
+++ b/swh/search/tests/conftest.py
@@ -1,108 +1,111 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import socket
 import subprocess
 import time
 
 import elasticsearch
 import pytest
 
 
 def free_port():
     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-    sock.bind(('127.0.0.1', 0))
+    sock.bind(("127.0.0.1", 0))
     port = sock.getsockname()[1]
     sock.close()
     return port
 
 
 def wait_for_peer(addr, port):
     while True:
         try:
             sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             sock.connect((addr, port))
         except ConnectionRefusedError:
             time.sleep(0.1)
         else:
             sock.close()
             break
 
 
-CONFIG_TEMPLATE = '''
+CONFIG_TEMPLATE = """
 node.name: node-1
 path.data: {data}
 path.logs: {logs}
 network.host: 127.0.0.1
 http.port: {http_port}
 transport.port: {transport_port}
-'''
+"""
 
 
-def _run_elasticsearch(
-        conf_dir, data_dir, logs_dir, http_port, transport_port):
-    es_home = '/usr/share/elasticsearch'
+def _run_elasticsearch(conf_dir, data_dir, logs_dir, http_port, transport_port):
+    es_home = "/usr/share/elasticsearch"
 
-    with open(conf_dir + '/elasticsearch.yml', 'w') as fd:
-        fd.write(CONFIG_TEMPLATE.format(
-            data=data_dir,
-            logs=logs_dir,
-            http_port=http_port,
-            transport_port=transport_port))
+    with open(conf_dir + "/elasticsearch.yml", "w") as fd:
+        fd.write(
+            CONFIG_TEMPLATE.format(
+                data=data_dir,
+                logs=logs_dir,
+                http_port=http_port,
+                transport_port=transport_port,
+            )
+        )
 
-    with open(conf_dir + '/log4j2.properties', 'w') as fd:
+    with open(conf_dir + "/log4j2.properties", "w") as fd:
         pass
 
     cmd = [
-        '/usr/share/elasticsearch/jdk/bin/java',
-        '-Des.path.home={}'.format(es_home),
-        '-Des.path.conf={}'.format(conf_dir),
-        '-Des.bundled_jdk=true',
-        '-Dlog4j2.disable.jmx=true',
-        '-cp', '{}/lib/*'.format(es_home),
-        'org.elasticsearch.bootstrap.Elasticsearch',
+        "/usr/share/elasticsearch/jdk/bin/java",
+        "-Des.path.home={}".format(es_home),
+        "-Des.path.conf={}".format(conf_dir),
+        "-Des.bundled_jdk=true",
+        "-Dlog4j2.disable.jmx=true",
+        "-cp",
+        "{}/lib/*".format(es_home),
+        "org.elasticsearch.bootstrap.Elasticsearch",
     ]
 
-    host = '127.0.0.1:{}'.format(http_port)
+    host = "127.0.0.1:{}".format(http_port)
 
-    with open(logs_dir + '/output.txt', 'w') as fd:
+    with open(logs_dir + "/output.txt", "w") as fd:
         p = subprocess.Popen(cmd)
 
-    wait_for_peer('127.0.0.1', http_port)
+    wait_for_peer("127.0.0.1", http_port)
 
     client = elasticsearch.Elasticsearch([host])
     assert client.ping()
 
     return p
 
 
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def elasticsearch_session(tmpdir_factory):
-    tmpdir = tmpdir_factory.mktemp('elasticsearch')
-    es_conf = tmpdir.mkdir('conf')
+    tmpdir = tmpdir_factory.mktemp("elasticsearch")
+    es_conf = tmpdir.mkdir("conf")
 
     http_port = free_port()
     transport_port = free_port()
 
     p = _run_elasticsearch(
         conf_dir=str(es_conf),
-        data_dir=str(tmpdir.mkdir('data')),
-        logs_dir=str(tmpdir.mkdir('logs')),
+        data_dir=str(tmpdir.mkdir("data")),
+        logs_dir=str(tmpdir.mkdir("logs")),
         http_port=http_port,
         transport_port=transport_port,
     )
 
-    yield '127.0.0.1:{}'.format(http_port)
+    yield "127.0.0.1:{}".format(http_port)
 
     # Check ES didn't stop
     assert p.returncode is None, p.returncode
 
     p.kill()
     p.wait()
 
 
-@pytest.fixture(scope='class')
+@pytest.fixture(scope="class")
 def elasticsearch_host(elasticsearch_session):
     yield elasticsearch_session
diff --git a/swh/search/tests/test_api_client.py b/swh/search/tests/test_api_client.py
index c38cb8d..a1fe8e3 100644
--- a/swh/search/tests/test_api_client.py
+++ b/swh/search/tests/test_api_client.py
@@ -1,48 +1,43 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import unittest
 
 import pytest
 
 from swh.core.api.tests.server_testing import ServerTestFixture
 
 from swh.search import get_search
 from swh.search.api.server import app
 from .test_search import CommonSearchTest
 
 
 class TestRemoteSearch(CommonSearchTest, ServerTestFixture, unittest.TestCase):
     @pytest.fixture(autouse=True)
     def _instantiate_search(self, elasticsearch_host):
         self._elasticsearch_host = elasticsearch_host
 
     def setUp(self):
         self.config = {
-            'search': {
-                'cls': 'elasticsearch',
-                'args': {
-                    'hosts': [self._elasticsearch_host],
-                }
+            "search": {
+                "cls": "elasticsearch",
+                "args": {"hosts": [self._elasticsearch_host],},
             }
         }
         self.app = app
         super().setUp()
         self.reset()
-        self.search = get_search('remote', {
-            'url': self.url(),
-        })
+        self.search = get_search("remote", {"url": self.url(),})
 
     def reset(self):
-        search = get_search('elasticsearch', {
-            'hosts': [self._elasticsearch_host],
-        })
+        search = get_search("elasticsearch", {"hosts": [self._elasticsearch_host],})
         search.deinitialize()
         search.initialize()
 
-    @pytest.mark.skip('Elasticsearch also returns close matches, '
-                      'so this test would fail')
+    @pytest.mark.skip(
+        "Elasticsearch also returns close matches, " "so this test would fail"
+    )
     def test_origin_url_paging(self, count):
         pass
diff --git a/swh/search/tests/test_cli.py b/swh/search/tests/test_cli.py
index 677b8af..dc1d77a 100644
--- a/swh/search/tests/test_cli.py
+++ b/swh/search/tests/test_cli.py
@@ -1,133 +1,125 @@
 # Copyright (C) 2019-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import tempfile
 from unittest.mock import patch, MagicMock
 
 from click.testing import CliRunner
 
 from swh.journal.serializers import value_to_kafka
 from swh.journal.tests.utils import MockedKafkaConsumer
 
 from swh.search.cli import cli
 from .test_elasticsearch import BaseElasticsearchTest
 
 
-CLI_CONFIG = '''
+CLI_CONFIG = """
 search:
     cls: elasticsearch
     args:
         hosts:
         - '{elasticsearch_host}'
-'''
+"""
 
-JOURNAL_OBJECTS_CONFIG = '''
+JOURNAL_OBJECTS_CONFIG = """
 journal:
     brokers:
         - 192.0.2.1
     prefix: swh.journal.objects
     group_id: test-consumer
-'''
+"""
 
 
 class MockedKafkaConsumerWithTopics(MockedKafkaConsumer):
     def list_topics(self, timeout=None):
         return {
-            'swh.journal.objects.origin',
-            'swh.journal.objects.origin_visit',
+            "swh.journal.objects.origin",
+            "swh.journal.objects.origin_visit",
         }
 
 
-def invoke(catch_exceptions, args, config='', *, elasticsearch_host):
+def invoke(catch_exceptions, args, config="", *, elasticsearch_host):
     runner = CliRunner()
-    with tempfile.NamedTemporaryFile('a', suffix='.yml') as config_fd:
-        config_fd.write((CLI_CONFIG + config).format(
-            elasticsearch_host=elasticsearch_host
-        ))
+    with tempfile.NamedTemporaryFile("a", suffix=".yml") as config_fd:
+        config_fd.write(
+            (CLI_CONFIG + config).format(elasticsearch_host=elasticsearch_host)
+        )
         config_fd.seek(0)
-        result = runner.invoke(cli, ['-C' + config_fd.name] + args)
+        result = runner.invoke(cli, ["-C" + config_fd.name] + args)
     if not catch_exceptions and result.exception:
         print(result.output)
         raise result.exception
     return result
 
 
 class CliTestCase(BaseElasticsearchTest):
     def test__journal_client__origin(self):
         """Tests the re-indexing when origin_batch_size*task_batch_size is a
         divisor of nb_origins."""
-        topic = 'swh.journal.objects.origin'
-        value = value_to_kafka({
-            'url': 'http://foobar.baz',
-        })
+        topic = "swh.journal.objects.origin"
+        value = value_to_kafka({"url": "http://foobar.baz",})
         message = MagicMock()
         message.error.return_value = None
         message.topic.return_value = topic
         message.value.return_value = value
 
         mock_consumer = MockedKafkaConsumerWithTopics([message])
 
-        with patch('swh.journal.client.Consumer',
-                   return_value=mock_consumer):
-            result = invoke(False, [
-                    'journal-client', 'objects',
-                    '--stop-after-objects', '1',
-                ], JOURNAL_OBJECTS_CONFIG,
-                elasticsearch_host=self._elasticsearch_host)
+        with patch("swh.journal.client.Consumer", return_value=mock_consumer):
+            result = invoke(
+                False,
+                ["journal-client", "objects", "--stop-after-objects", "1",],
+                JOURNAL_OBJECTS_CONFIG,
+                elasticsearch_host=self._elasticsearch_host,
+            )
 
         # Check the output
-        expected_output = (
-            'Processed 1 messages.\n'
-            'Done.\n'
-        )
+        expected_output = "Processed 1 messages.\n" "Done.\n"
         assert result.exit_code == 0, result.output
         assert result.output == expected_output
 
         self.search.flush()
 
-        results = self.search.origin_search(url_pattern='foobar')
-        assert results == {'next_page_token': None, 'results': [
-            {'url': 'http://foobar.baz'}]}
+        results = self.search.origin_search(url_pattern="foobar")
+        assert results == {
+            "next_page_token": None,
+            "results": [{"url": "http://foobar.baz"}],
+        }
 
-        results = self.search.origin_search(url_pattern='foobar',
-                                            with_visit=True)
-        assert results == {'next_page_token': None, 'results': []}
+        results = self.search.origin_search(url_pattern="foobar", with_visit=True)
+        assert results == {"next_page_token": None, "results": []}
 
     def test__journal_client__origin_visit(self):
         """Tests the re-indexing when origin_batch_size*task_batch_size is a
         divisor of nb_origins."""
-        topic = 'swh.journal.objects.origin_visit'
-        value = value_to_kafka({
-            'origin': 'http://foobar.baz',
-        })
+        topic = "swh.journal.objects.origin_visit"
+        value = value_to_kafka({"origin": "http://foobar.baz",})
         message = MagicMock()
         message.error.return_value = None
         message.topic.return_value = topic
         message.value.return_value = value
 
         mock_consumer = MockedKafkaConsumerWithTopics([message])
 
-        with patch('swh.journal.client.Consumer',
-                   return_value=mock_consumer):
-            result = invoke(False, [
-                    'journal-client', 'objects',
-                    '--stop-after-objects', '1',
-                ], JOURNAL_OBJECTS_CONFIG,
-                elasticsearch_host=self._elasticsearch_host)
+        with patch("swh.journal.client.Consumer", return_value=mock_consumer):
+            result = invoke(
+                False,
+                ["journal-client", "objects", "--stop-after-objects", "1",],
+                JOURNAL_OBJECTS_CONFIG,
+                elasticsearch_host=self._elasticsearch_host,
+            )
 
         # Check the output
-        expected_output = (
-            'Processed 1 messages.\n'
-            'Done.\n'
-        )
+        expected_output = "Processed 1 messages.\n" "Done.\n"
         assert result.exit_code == 0, result.output
         assert result.output == expected_output
 
         self.search.flush()
 
-        results = self.search.origin_search(url_pattern='foobar',
-                                            with_visit=True)
-        assert results == {'next_page_token': None, 'results': [
-            {'url': 'http://foobar.baz'}]}
+        results = self.search.origin_search(url_pattern="foobar", with_visit=True)
+        assert results == {
+            "next_page_token": None,
+            "results": [{"url": "http://foobar.baz"}],
+        }
diff --git a/swh/search/tests/test_elasticsearch.py b/swh/search/tests/test_elasticsearch.py
index 73b4486..c5c185b 100644
--- a/swh/search/tests/test_elasticsearch.py
+++ b/swh/search/tests/test_elasticsearch.py
@@ -1,31 +1,29 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import unittest
 
 import pytest
 
 from swh.search import get_search
 from .test_search import CommonSearchTest
 
 
 class BaseElasticsearchTest(unittest.TestCase):
     @pytest.fixture(autouse=True)
     def _instantiate_search(self, elasticsearch_host):
         self._elasticsearch_host = elasticsearch_host
-        self.search = get_search('elasticsearch', {
-            'hosts': [elasticsearch_host],
-        })
+        self.search = get_search("elasticsearch", {"hosts": [elasticsearch_host],})
 
     def setUp(self):
         self.reset()
 
     def reset(self):
         self.search.deinitialize()
         self.search.initialize()
 
 
 class TestElasticsearchSearch(CommonSearchTest, BaseElasticsearchTest):
     pass
diff --git a/swh/search/tests/test_in_memory.py b/swh/search/tests/test_in_memory.py
index 57312bb..f48eb02 100644
--- a/swh/search/tests/test_in_memory.py
+++ b/swh/search/tests/test_in_memory.py
@@ -1,40 +1,40 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import unittest
 
 import pytest
 
 from swh.search import get_search
 from .test_search import CommonSearchTest
 
 
 class InmemorySearchTest(unittest.TestCase, CommonSearchTest):
     @pytest.fixture(autouse=True)
     def _instantiate_search(self):
-        self.search = get_search('memory', {})
+        self.search = get_search("memory", {})
 
     def setUp(self):
         self.reset()
 
     def reset(self):
         self.search.deinitialize()
         self.search.initialize()
 
-    @pytest.mark.skip('Not implemented in the in-memory search')
+    @pytest.mark.skip("Not implemented in the in-memory search")
     def test_origin_intrinsic_metadata_description(self):
         pass
 
-    @pytest.mark.skip('Not implemented in the in-memory search')
+    @pytest.mark.skip("Not implemented in the in-memory search")
     def test_origin_intrinsic_metadata_all_terms(self):
         pass
 
-    @pytest.mark.skip('Not implemented in the in-memory search')
+    @pytest.mark.skip("Not implemented in the in-memory search")
     def test_origin_intrinsic_metadata_nested(self):
         pass
 
-    @pytest.mark.skip('Not implemented in the in-memory search')
+    @pytest.mark.skip("Not implemented in the in-memory search")
     def test_origin_intrinsic_metadata_paging(self):
         pass
diff --git a/swh/search/tests/test_journal_client.py b/swh/search/tests/test_journal_client.py
index dcb4566..b8d92f9 100644
--- a/swh/search/tests/test_journal_client.py
+++ b/swh/search/tests/test_journal_client.py
@@ -1,82 +1,71 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import functools
 import unittest
 from unittest.mock import MagicMock
 
 from swh.search.journal_client import process_journal_objects
 
 
 class SearchJournalClientTest(unittest.TestCase):
     def test_origin_from_journal(self):
         search_mock = MagicMock()
 
-        worker_fn = functools.partial(
-            process_journal_objects,
-            search=search_mock,
-        )
+        worker_fn = functools.partial(process_journal_objects, search=search_mock,)
 
-        worker_fn({'origin': [
-            {'url': 'http://foobar.baz'},
-        ]})
-        search_mock.origin_update.assert_called_once_with([
-            {'url': 'http://foobar.baz'},
-        ])
+        worker_fn({"origin": [{"url": "http://foobar.baz"},]})
+        search_mock.origin_update.assert_called_once_with(
+            [{"url": "http://foobar.baz"},]
+        )
 
         search_mock.reset_mock()
 
-        worker_fn({'origin': [
-            {'url': 'http://foobar.baz'},
-            {'url': 'http://barbaz.qux'},
-        ]})
-        search_mock.origin_update.assert_called_once_with([
-            {'url': 'http://foobar.baz'},
-            {'url': 'http://barbaz.qux'},
-        ])
+        worker_fn(
+            {"origin": [{"url": "http://foobar.baz"}, {"url": "http://barbaz.qux"},]}
+        )
+        search_mock.origin_update.assert_called_once_with(
+            [{"url": "http://foobar.baz"}, {"url": "http://barbaz.qux"},]
+        )
 
     def test_origin_visit_from_journal(self):
         search_mock = MagicMock()
 
-        worker_fn = functools.partial(
-            process_journal_objects,
-            search=search_mock,
-        )
+        worker_fn = functools.partial(process_journal_objects, search=search_mock,)
 
-        worker_fn({'origin_visit': [
-            {
-                'origin': {'url': 'http://foobar.baz'},
-            }
-        ]})
-        search_mock.origin_update.assert_called_once_with([
-            {'url': 'http://foobar.baz', 'has_visits': True},
-        ])
+        worker_fn({"origin_visit": [{"origin": {"url": "http://foobar.baz"},}]})
+        search_mock.origin_update.assert_called_once_with(
+            [{"url": "http://foobar.baz", "has_visits": True},]
+        )
 
     def test_origin_metadata_from_journal(self):
         search_mock = MagicMock()
 
-        worker_fn = functools.partial(
-            process_journal_objects,
-            search=search_mock,
-        )
+        worker_fn = functools.partial(process_journal_objects, search=search_mock,)
 
-        worker_fn({'origin_intrinsic_metadata': [
-            {
-                'origin_url': 'http://foobar.baz',
-                'metadata': {
-                    '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
-                    'description': 'foo bar',
-                },
-            },
-        ]})
-        search_mock.origin_update.assert_called_once_with([
+        worker_fn(
             {
-                'url': 'http://foobar.baz',
-                'intrinsic_metadata': {
-                    '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
-                    'description': 'foo bar',
+                "origin_intrinsic_metadata": [
+                    {
+                        "origin_url": "http://foobar.baz",
+                        "metadata": {
+                            "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+                            "description": "foo bar",
+                        },
+                    },
+                ]
+            }
+        )
+        search_mock.origin_update.assert_called_once_with(
+            [
+                {
+                    "url": "http://foobar.baz",
+                    "intrinsic_metadata": {
+                        "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+                        "description": "foo bar",
+                    },
                 },
-            },
-        ])
+            ]
+        )
diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py
index 0105777..b0f0914 100644
--- a/swh/search/tests/test_search.py
+++ b/swh/search/tests/test_search.py
@@ -1,294 +1,320 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from hypothesis import given, strategies, settings
 
 from swh.search.utils import stream_results
 
 
 class CommonSearchTest:
     def test_origin_url_unique_word_prefix(self):
-        self.search.origin_update([
-            {'url': 'http://foobar.baz'},
-            {'url': 'http://barbaz.qux'},
-            {'url': 'http://qux.quux'},
-        ])
+        self.search.origin_update(
+            [
+                {"url": "http://foobar.baz"},
+                {"url": "http://barbaz.qux"},
+                {"url": "http://qux.quux"},
+            ]
+        )
         self.search.flush()
 
-        results = self.search.origin_search(url_pattern='foobar')
-        assert results == {'next_page_token': None, 'results': [
-            {'url': 'http://foobar.baz'}]}
+        results = self.search.origin_search(url_pattern="foobar")
+        assert results == {
+            "next_page_token": None,
+            "results": [{"url": "http://foobar.baz"}],
+        }
 
-        results = self.search.origin_search(url_pattern='barb')
-        assert results == {'next_page_token': None, 'results': [
-            {'url': 'http://barbaz.qux'}]}
+        results = self.search.origin_search(url_pattern="barb")
+        assert results == {
+            "next_page_token": None,
+            "results": [{"url": "http://barbaz.qux"}],
+        }
 
         # 'bar' is part of 'foobar', but is not the beginning of it
-        results = self.search.origin_search(url_pattern='bar')
-        assert results == {'next_page_token': None, 'results': [
-            {'url': 'http://barbaz.qux'}]}
-
-        results = self.search.origin_search(url_pattern='barbaz')
-        assert results == {'next_page_token': None, 'results': [
-            {'url': 'http://barbaz.qux'}]}
+        results = self.search.origin_search(url_pattern="bar")
+        assert results == {
+            "next_page_token": None,
+            "results": [{"url": "http://barbaz.qux"}],
+        }
+
+        results = self.search.origin_search(url_pattern="barbaz")
+        assert results == {
+            "next_page_token": None,
+            "results": [{"url": "http://barbaz.qux"}],
+        }
 
     def test_origin_url_unique_word_prefix_multiple_results(self):
-        self.search.origin_update([
-            {'url': 'http://foobar.baz'},
-            {'url': 'http://barbaz.qux'},
-            {'url': 'http://qux.quux'},
-        ])
+        self.search.origin_update(
+            [
+                {"url": "http://foobar.baz"},
+                {"url": "http://barbaz.qux"},
+                {"url": "http://qux.quux"},
+            ]
+        )
         self.search.flush()
 
-        results = self.search.origin_search(url_pattern='qu')
-        assert results['next_page_token'] is None
+        results = self.search.origin_search(url_pattern="qu")
+        assert results["next_page_token"] is None
 
-        results = [res['url'] for res in results['results']]
-        expected_results = ['http://qux.quux', 'http://barbaz.qux']
+        results = [res["url"] for res in results["results"]]
+        expected_results = ["http://qux.quux", "http://barbaz.qux"]
         assert sorted(results) == sorted(expected_results)
 
-        results = self.search.origin_search(url_pattern='qux')
-        assert results['next_page_token'] is None
+        results = self.search.origin_search(url_pattern="qux")
+        assert results["next_page_token"] is None
 
-        results = [res['url'] for res in results['results']]
-        expected_results = ['http://barbaz.qux', 'http://qux.quux']
+        results = [res["url"] for res in results["results"]]
+        expected_results = ["http://barbaz.qux", "http://qux.quux"]
         assert sorted(results) == sorted(expected_results)
 
     def test_origin_url_all_terms(self):
-        self.search.origin_update([
-            {'url': 'http://foo.bar/baz'},
-            {'url': 'http://foo.bar/foo.bar'},
-        ])
+        self.search.origin_update(
+            [{"url": "http://foo.bar/baz"}, {"url": "http://foo.bar/foo.bar"},]
+        )
         self.search.flush()
 
         # Only results containing all terms should be returned.
-        results = self.search.origin_search(url_pattern='foo bar baz')
-        assert results == {'next_page_token': None, 'results': [
-            {'url': 'http://foo.bar/baz'},
-        ]}
+        results = self.search.origin_search(url_pattern="foo bar baz")
+        assert results == {
+            "next_page_token": None,
+            "results": [{"url": "http://foo.bar/baz"},],
+        }
 
     def test_origin_with_visit(self):
-        self.search.origin_update([
-            {'url': 'http://foobar.baz', 'has_visits': True},
-        ])
+        self.search.origin_update(
+            [{"url": "http://foobar.baz", "has_visits": True},]
+        )
         self.search.flush()
 
-        results = self.search.origin_search(
-            url_pattern='foobar', with_visit=True)
-        assert results == {'next_page_token': None, 'results': [
-            {'url': 'http://foobar.baz'}]}
+        results = self.search.origin_search(url_pattern="foobar", with_visit=True)
+        assert results == {
+            "next_page_token": None,
+            "results": [{"url": "http://foobar.baz"}],
+        }
 
     def test_origin_with_visit_added(self):
-        self.search.origin_update([
-            {'url': 'http://foobar.baz'},
-        ])
+        self.search.origin_update(
+            [{"url": "http://foobar.baz"},]
+        )
         self.search.flush()
 
-        results = self.search.origin_search(
-            url_pattern='foobar', with_visit=True)
-        assert results == {'next_page_token': None, 'results': []}
+        results = self.search.origin_search(url_pattern="foobar", with_visit=True)
+        assert results == {"next_page_token": None, "results": []}
 
-        self.search.origin_update([
-            {'url': 'http://foobar.baz', 'has_visits': True},
-        ])
+        self.search.origin_update(
+            [{"url": "http://foobar.baz", "has_visits": True},]
+        )
         self.search.flush()
 
-        results = self.search.origin_search(
-            url_pattern='foobar', with_visit=True)
-        assert results == {'next_page_token': None, 'results': [
-            {'url': 'http://foobar.baz'}]}
+        results = self.search.origin_search(url_pattern="foobar", with_visit=True)
+        assert results == {
+            "next_page_token": None,
+            "results": [{"url": "http://foobar.baz"}],
+        }
 
     def test_origin_intrinsic_metadata_description(self):
-        self.search.origin_update([
-            {
-                'url': 'http://origin1',
-                'intrinsic_metadata': {},
-            },
-            {
-                'url': 'http://origin2',
-                'intrinsic_metadata': {
-                    '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
-                    'description': 'foo bar',
+        self.search.origin_update(
+            [
+                {"url": "http://origin1", "intrinsic_metadata": {},},
+                {
+                    "url": "http://origin2",
+                    "intrinsic_metadata": {
+                        "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+                        "description": "foo bar",
+                    },
+                },
+                {
+                    "url": "http://origin3",
+                    "intrinsic_metadata": {
+                        "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+                        "description": "bar baz",
+                    },
                 },
-            },
-            {
-                'url': 'http://origin3',
-                'intrinsic_metadata': {
-                    '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
-                    'description': 'bar baz',
-                }
-            },
-        ])
+            ]
+        )
         self.search.flush()
 
-        results = self.search.origin_search(metadata_pattern='foo')
-        assert results == {'next_page_token': None, 'results': [
-            {'url': 'http://origin2'}]}
+        results = self.search.origin_search(metadata_pattern="foo")
+        assert results == {
+            "next_page_token": None,
+            "results": [{"url": "http://origin2"}],
+        }
 
-        results = self.search.origin_search(metadata_pattern='foo bar')
-        assert results == {'next_page_token': None, 'results': [
-            {'url': 'http://origin2'}]}
+        results = self.search.origin_search(metadata_pattern="foo bar")
+        assert results == {
+            "next_page_token": None,
+            "results": [{"url": "http://origin2"}],
+        }
 
-        results = self.search.origin_search(metadata_pattern='bar baz')
-        assert results == {'next_page_token': None, 'results': [
-            {'url': 'http://origin3'}]}
+        results = self.search.origin_search(metadata_pattern="bar baz")
+        assert results == {
+            "next_page_token": None,
+            "results": [{"url": "http://origin3"}],
+        }
 
     def test_origin_intrinsic_metadata_all_terms(self):
-        self.search.origin_update([
-            {
-                'url': 'http://origin1',
-                'intrinsic_metadata': {
-                    '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
-                    'description': 'foo bar foo bar',
+        self.search.origin_update(
+            [
+                {
+                    "url": "http://origin1",
+                    "intrinsic_metadata": {
+                        "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+                        "description": "foo bar foo bar",
+                    },
                 },
-            },
-            {
-                'url': 'http://origin3',
-                'intrinsic_metadata': {
-                    '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
-                    'description': 'foo bar baz',
-                }
-            },
-        ])
+                {
+                    "url": "http://origin3",
+                    "intrinsic_metadata": {
+                        "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+                        "description": "foo bar baz",
+                    },
+                },
+            ]
+        )
         self.search.flush()
 
-        results = self.search.origin_search(metadata_pattern='foo bar baz')
-        assert results == {'next_page_token': None, 'results': [
-            {'url': 'http://origin3'}]}
+        results = self.search.origin_search(metadata_pattern="foo bar baz")
+        assert results == {
+            "next_page_token": None,
+            "results": [{"url": "http://origin3"}],
+        }
 
     def test_origin_intrinsic_metadata_nested(self):
-        self.search.origin_update([
-            {
-                'url': 'http://origin1',
-                'intrinsic_metadata': {},
-            },
-            {
-                'url': 'http://origin2',
-                'intrinsic_metadata': {
-                    '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
-                    'keywords': ['foo', 'bar'],
+        self.search.origin_update(
+            [
+                {"url": "http://origin1", "intrinsic_metadata": {},},
+                {
+                    "url": "http://origin2",
+                    "intrinsic_metadata": {
+                        "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+                        "keywords": ["foo", "bar"],
+                    },
+                },
+                {
+                    "url": "http://origin3",
+                    "intrinsic_metadata": {
+                        "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+                        "keywords": ["bar", "baz"],
+                    },
                 },
-            },
-            {
-                'url': 'http://origin3',
-                'intrinsic_metadata': {
-                    '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
-                    'keywords': ['bar', 'baz'],
-                }
-            },
-        ])
+            ]
+        )
         self.search.flush()
 
-        results = self.search.origin_search(metadata_pattern='foo')
-        assert results == {'next_page_token': None, 'results': [
-            {'url': 'http://origin2'}]}
+        results = self.search.origin_search(metadata_pattern="foo")
+        assert results == {
+            "next_page_token": None,
+            "results": [{"url": "http://origin2"}],
+        }
 
-        results = self.search.origin_search(metadata_pattern='foo bar')
-        assert results == {'next_page_token': None, 'results': [
-            {'url': 'http://origin2'}]}
+        results = self.search.origin_search(metadata_pattern="foo bar")
+        assert results == {
+            "next_page_token": None,
+            "results": [{"url": "http://origin2"}],
+        }
 
-        results = self.search.origin_search(metadata_pattern='bar baz')
-        assert results == {'next_page_token': None, 'results': [
-            {'url': 'http://origin3'}]}
+        results = self.search.origin_search(metadata_pattern="bar baz")
+        assert results == {
+            "next_page_token": None,
+            "results": [{"url": "http://origin3"}],
+        }
 
     # TODO: add more tests with more codemeta terms
 
     # TODO: add more tests with edge cases
 
     @settings(deadline=None)
     @given(strategies.integers(min_value=1, max_value=4))
     def test_origin_url_paging(self, count):
         # TODO: no hypothesis
         self.reset()
-        self.search.origin_update([
-            {'url': 'http://origin1/foo'},
-            {'url': 'http://origin2/foo/bar'},
-            {'url': 'http://origin3/foo/bar/baz'},
-        ])
+        self.search.origin_update(
+            [
+                {"url": "http://origin1/foo"},
+                {"url": "http://origin2/foo/bar"},
+                {"url": "http://origin3/foo/bar/baz"},
+            ]
+        )
         self.search.flush()
 
         results = stream_results(
-            self.search.origin_search,
-            url_pattern='foo bar baz', count=count)
-        results = [res['url'] for res in results]
+            self.search.origin_search, url_pattern="foo bar baz", count=count
+        )
+        results = [res["url"] for res in results]
         expected_results = [
-            'http://origin3/foo/bar/baz',
+            "http://origin3/foo/bar/baz",
         ]
-        assert sorted(results[0:len(expected_results)]) == \
-            sorted(expected_results)
+        assert sorted(results[0 : len(expected_results)]) == sorted(expected_results)
 
         results = stream_results(
-            self.search.origin_search,
-            url_pattern='foo bar', count=count)
+            self.search.origin_search, url_pattern="foo bar", count=count
+        )
         expected_results = [
-            'http://origin2/foo/bar',
-            'http://origin3/foo/bar/baz',
+            "http://origin2/foo/bar",
+            "http://origin3/foo/bar/baz",
         ]
-        results = [res['url'] for res in results]
-        assert sorted(results[0:len(expected_results)]) == \
-            sorted(expected_results)
+        results = [res["url"] for res in results]
+        assert sorted(results[0 : len(expected_results)]) == sorted(expected_results)
 
         results = stream_results(
-            self.search.origin_search,
-            url_pattern='foo', count=count)
+            self.search.origin_search, url_pattern="foo", count=count
+        )
         expected_results = [
-            'http://origin1/foo',
-            'http://origin2/foo/bar',
-            'http://origin3/foo/bar/baz',
+            "http://origin1/foo",
+            "http://origin2/foo/bar",
+            "http://origin3/foo/bar/baz",
         ]
-        results = [res['url'] for res in results]
-        assert sorted(results[0:len(expected_results)]) == \
-            sorted(expected_results)
+        results = [res["url"] for res in results]
+        assert sorted(results[0 : len(expected_results)]) == sorted(expected_results)
 
     @settings(deadline=None)
     @given(strategies.integers(min_value=1, max_value=4))
     def test_origin_intrinsic_metadata_paging(self, count):
         # TODO: no hypothesis
         self.reset()
-        self.search.origin_update([
-            {
-                'url': 'http://origin1',
-                'intrinsic_metadata': {
-                    '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
-                    'keywords': ['foo'],
+        self.search.origin_update(
+            [
+                {
+                    "url": "http://origin1",
+                    "intrinsic_metadata": {
+                        "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+                        "keywords": ["foo"],
+                    },
                 },
-            },
-            {
-                'url': 'http://origin2',
-                'intrinsic_metadata': {
-                    '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
-                    'keywords': ['foo', 'bar'],
+                {
+                    "url": "http://origin2",
+                    "intrinsic_metadata": {
+                        "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+                        "keywords": ["foo", "bar"],
+                    },
                 },
-            },
-            {
-                'url': 'http://origin3',
-                'intrinsic_metadata': {
-                    '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
-                    'keywords': ['foo', 'bar', 'baz'],
-                }
-            },
-        ])
+                {
+                    "url": "http://origin3",
+                    "intrinsic_metadata": {
+                        "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+                        "keywords": ["foo", "bar", "baz"],
+                    },
+                },
+            ]
+        )
         self.search.flush()
 
         results = stream_results(
-            self.search.origin_search,
-            metadata_pattern='foo bar baz', count=count)
-        assert list(results) == [
-            {'url': 'http://origin3'}]
+            self.search.origin_search, metadata_pattern="foo bar baz", count=count
+        )
+        assert list(results) == [{"url": "http://origin3"}]
 
         results = stream_results(
-            self.search.origin_search,
-            metadata_pattern='foo bar', count=count)
-        assert list(results) == [
-            {'url': 'http://origin2'},
-            {'url': 'http://origin3'}]
+            self.search.origin_search, metadata_pattern="foo bar", count=count
+        )
+        assert list(results) == [{"url": "http://origin2"}, {"url": "http://origin3"}]
 
         results = stream_results(
-            self.search.origin_search,
-            metadata_pattern='foo', count=count)
+            self.search.origin_search, metadata_pattern="foo", count=count
+        )
         assert list(results) == [
-            {'url': 'http://origin1'},
-            {'url': 'http://origin2'},
-            {'url': 'http://origin3'}]
+            {"url": "http://origin1"},
+            {"url": "http://origin2"},
+            {"url": "http://origin3"},
+        ]
diff --git a/swh/search/utils.py b/swh/search/utils.py
index b224573..fce8c4e 100644
--- a/swh/search/utils.py
+++ b/swh/search/utils.py
@@ -1,16 +1,16 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 def stream_results(f, *args, **kwargs):
-    if 'page_token' in kwargs:
+    if "page_token" in kwargs:
         raise TypeError('stream_results has no argument "page_token".')
     page_token = None
     while True:
         results = f(*args, page_token=page_token, **kwargs)
-        yield from results['results']
-        page_token = results['next_page_token']
+        yield from results["results"]
+        page_token = results["next_page_token"]
         if page_token is None:
             break
diff --git a/tox.ini b/tox.ini
index 04fb628..df58667 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,27 +1,34 @@
 [tox]
-envlist=flake8,mypy,py3
+envlist=black,flake8,mypy,py3
 
 [testenv]
 extras =
   testing
 deps =
   pytest-cov
 commands =
   pytest --cov={envsitepackagesdir}/swh/search \
          {envsitepackagesdir}/swh/search \
          --cov-branch {posargs}
 
+[testenv:black]
+skip_install = true
+deps =
+  black
+commands =
+  {envpython} -m black --check swh
+
 [testenv:flake8]
 skip_install = true
 deps =
   flake8
 commands =
   {envpython} -m flake8
 
 [testenv:mypy]
 extras =
   testing
 deps =
   mypy
 commands =
   mypy swh