diff --git a/PKG-INFO b/PKG-INFO
index a3444b3..a2920a6 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,70 +1,71 @@
 Metadata-Version: 2.1
 Name: swh.indexer
-Version: 0.0.171
+Version: 0.1.0
 Summary: Software Heritage Content Indexer
 Home-page: https://forge.softwareheritage.org/diffusion/78/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-indexer
+Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-indexer/
 Description: swh-indexer
         ============
         
         Tools to compute multiple indexes on SWH's raw contents:
         - content:
           - mimetype
           - ctags
           - language
           - fossology-license
           - metadata
         - revision:
           - metadata
         
         An indexer is in charge of:
         - looking up objects
         - extracting information from those objects
         - store those information in the swh-indexer db
         
         There are multiple indexers working on different object types:
           - content indexer: works with content sha1 hashes
           - revision indexer: works with revision sha1 hashes
           - origin indexer: works with origin identifiers
         
         Indexation procedure:
         - receive batch of ids
         - retrieve the associated data depending on object type
         - compute for that object some index
         - store the result to swh's storage
         
         Current content indexers:
         
         - mimetype (queue swh_indexer_content_mimetype): detect the encoding
           and mimetype
         
         - language (queue swh_indexer_content_language): detect the
           programming language
         
         - ctags (queue swh_indexer_content_ctags): compute tags information
         
         - fossology-license (queue swh_indexer_fossology_license): compute the
           license
         
         - metadata: translate file into translated_metadata dict
         
         Current revision indexers:
         
         - metadata: detects files containing metadata and retrieves translated_metadata
           in content_metadata table in storage or run content indexer to translate
           files.
         
 Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: testing
diff --git a/requirements-swh.txt b/requirements-swh.txt
index c19936c..32c8593 100644
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,6 +1,6 @@
 swh.core[db,http] >= 0.0.87
 swh.model >= 0.0.15
-swh.objstorage >= 0.0.28
+swh.objstorage >= 0.0.43
 swh.scheduler >= 0.0.47
-swh.storage >= 0.0.156
-swh.journal >= 0.0.31
+swh.storage >= 0.6.0
+swh.journal >= 0.1.0
diff --git a/requirements-test.txt b/requirements-test.txt
index c609823..68bb694 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,4 +1,5 @@
+confluent-kafka
 pytest
 pytest-postgresql
 hypothesis>=3.11.0
 swh.storage>= 0.0.178
diff --git a/setup.py b/setup.py
index 397491b..1f6fd99 100755
--- a/setup.py
+++ b/setup.py
@@ -1,72 +1,73 @@
 #!/usr/bin/env python3
 # Copyright (C) 2015-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from setuptools import setup, find_packages
 
 from os import path
 from io import open
 
 here = path.abspath(path.dirname(__file__))
 
 # Get the long description from the README file
 with open(path.join(here, "README.md"), encoding="utf-8") as f:
     long_description = f.read()
 
 
 def parse_requirements(name=None):
     if name:
         reqf = "requirements-%s.txt" % name
     else:
         reqf = "requirements.txt"
 
     requirements = []
     if not path.exists(reqf):
         return requirements
 
     with open(reqf) as f:
         for line in f.readlines():
             line = line.strip()
             if not line or line.startswith("#"):
                 continue
             requirements.append(line)
     return requirements
 
 
 setup(
     name="swh.indexer",
     description="Software Heritage Content Indexer",
     long_description=long_description,
     long_description_content_type="text/markdown",
     python_requires=">=3.7",
     author="Software Heritage developers",
     author_email="swh-devel@inria.fr",
     url="https://forge.softwareheritage.org/diffusion/78/",
     packages=find_packages(),
     scripts=[],
     install_requires=parse_requirements() + parse_requirements("swh"),
     setup_requires=["vcversioner"],
     extras_require={"testing": parse_requirements("test")},
     vcversioner={},
     include_package_data=True,
     entry_points="""
         [console_scripts]
         swh-indexer=swh.indexer.cli:main
         [swh.cli.subcommands]
         indexer=swh.indexer.cli:cli
     """,
     classifiers=[
         "Programming Language :: Python :: 3",
         "Intended Audience :: Developers",
         "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
         "Operating System :: OS Independent",
         "Development Status :: 5 - Production/Stable",
     ],
     project_urls={
         "Bug Reports": "https://forge.softwareheritage.org/maniphest",
         "Funding": "https://www.softwareheritage.org/donate",
         "Source": "https://forge.softwareheritage.org/source/swh-indexer",
+        "Documentation": "https://docs.softwareheritage.org/devel/swh-indexer/",
     },
 )
diff --git a/swh.indexer.egg-info/PKG-INFO b/swh.indexer.egg-info/PKG-INFO
index a3444b3..a2920a6 100644
--- a/swh.indexer.egg-info/PKG-INFO
+++ b/swh.indexer.egg-info/PKG-INFO
@@ -1,70 +1,71 @@
 Metadata-Version: 2.1
 Name: swh.indexer
-Version: 0.0.171
+Version: 0.1.0
 Summary: Software Heritage Content Indexer
 Home-page: https://forge.softwareheritage.org/diffusion/78/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-indexer
+Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-indexer/
 Description: swh-indexer
         ============
         
         Tools to compute multiple indexes on SWH's raw contents:
         - content:
           - mimetype
           - ctags
           - language
           - fossology-license
           - metadata
         - revision:
           - metadata
         
         An indexer is in charge of:
         - looking up objects
         - extracting information from those objects
         - store those information in the swh-indexer db
         
         There are multiple indexers working on different object types:
           - content indexer: works with content sha1 hashes
           - revision indexer: works with revision sha1 hashes
           - origin indexer: works with origin identifiers
         
         Indexation procedure:
         - receive batch of ids
         - retrieve the associated data depending on object type
         - compute for that object some index
         - store the result to swh's storage
         
         Current content indexers:
         
         - mimetype (queue swh_indexer_content_mimetype): detect the encoding
           and mimetype
         
         - language (queue swh_indexer_content_language): detect the
           programming language
         
         - ctags (queue swh_indexer_content_ctags): compute tags information
         
         - fossology-license (queue swh_indexer_fossology_license): compute the
           license
         
         - metadata: translate file into translated_metadata dict
         
         Current revision indexers:
         
         - metadata: detects files containing metadata and retrieves translated_metadata
           in content_metadata table in storage or run content indexer to translate
           files.
         
 Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: testing
diff --git a/swh.indexer.egg-info/requires.txt b/swh.indexer.egg-info/requires.txt
index 54018e5..0d7adeb 100644
--- a/swh.indexer.egg-info/requires.txt
+++ b/swh.indexer.egg-info/requires.txt
@@ -1,17 +1,18 @@
 vcversioner
 click
 python-magic>=0.4.13
 pyld
 xmltodict
 swh.core[db,http]>=0.0.87
 swh.model>=0.0.15
-swh.objstorage>=0.0.28
+swh.objstorage>=0.0.43
 swh.scheduler>=0.0.47
-swh.storage>=0.0.156
-swh.journal>=0.0.31
+swh.storage>=0.6.0
+swh.journal>=0.1.0
 
 [testing]
+confluent-kafka
 pytest
 pytest-postgresql
 hypothesis>=3.11.0
 swh.storage>=0.0.178
diff --git a/swh/indexer/cli.py b/swh/indexer/cli.py
index e2c904d..986f2f3 100644
--- a/swh/indexer/cli.py
+++ b/swh/indexer/cli.py
@@ -1,284 +1,284 @@
 # Copyright (C) 2019-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import functools
 import json
 
 import click
 
 from swh.core import config
 from swh.core.cli import CONTEXT_SETTINGS, AliasedGroup
 from swh.journal.client import get_journal_client
 from swh.scheduler import get_scheduler
 from swh.scheduler.cli_utils import schedule_origin_batches
 from swh.storage import get_storage
 
 from swh.indexer import metadata_dictionary
 from swh.indexer.journal_client import process_journal_objects
 from swh.indexer.storage import get_indexer_storage
 from swh.indexer.storage.api.server import load_and_check_config, app
 
 
 @click.group(name="indexer", context_settings=CONTEXT_SETTINGS, cls=AliasedGroup)
 @click.option(
     "--config-file",
     "-C",
     default=None,
     type=click.Path(exists=True, dir_okay=False,),
     help="Configuration file.",
 )
 @click.pass_context
 def cli(ctx, config_file):
     """Software Heritage Indexer tools.
 
     The Indexer is used to mine the content of the archive and extract derived
     information from archive source code artifacts.
 
     """
     ctx.ensure_object(dict)
 
     conf = config.read(config_file)
     ctx.obj["config"] = conf
 
 
 def _get_api(getter, config, config_key, url):
     if url:
         config[config_key] = {"cls": "remote", "args": {"url": url}}
     elif config_key not in config:
         raise click.ClickException("Missing configuration for {}".format(config_key))
     return getter(**config[config_key])
 
 
 @cli.group("mapping")
 def mapping():
     """Manage Software Heritage Indexer mappings."""
     pass
 
 
 @mapping.command("list")
 def mapping_list():
     """Prints the list of known mappings."""
     mapping_names = [mapping.name for mapping in metadata_dictionary.MAPPINGS.values()]
     mapping_names.sort()
     for mapping_name in mapping_names:
         click.echo(mapping_name)
 
 
 @mapping.command("list-terms")
 @click.option(
     "--exclude-mapping", multiple=True, help="Exclude the given mapping from the output"
 )
 @click.option(
     "--concise",
     is_flag=True,
     default=False,
     help="Don't print the list of mappings supporting each term.",
 )
 def mapping_list_terms(concise, exclude_mapping):
     """Prints the list of known CodeMeta terms, and which mappings
     support them."""
     properties = metadata_dictionary.list_terms()
     for (property_name, supported_mappings) in sorted(properties.items()):
         supported_mappings = {m.name for m in supported_mappings}
         supported_mappings -= set(exclude_mapping)
         if supported_mappings:
             if concise:
                 click.echo(property_name)
             else:
                 click.echo("{}:".format(property_name))
                 click.echo("\t" + ", ".join(sorted(supported_mappings)))
 
 
 @mapping.command("translate")
 @click.argument("mapping-name")
 @click.argument("file", type=click.File("rb"))
 def mapping_translate(mapping_name, file):
     """Prints the list of known mappings."""
     mapping_cls = [
         cls for cls in metadata_dictionary.MAPPINGS.values() if cls.name == mapping_name
     ]
     if not mapping_cls:
         raise click.ClickException("Unknown mapping {}".format(mapping_name))
     assert len(mapping_cls) == 1
     mapping_cls = mapping_cls[0]
     mapping = mapping_cls()
     codemeta_doc = mapping.translate(file.read())
     click.echo(json.dumps(codemeta_doc, indent=4))
 
 
 @cli.group("schedule")
 @click.option("--scheduler-url", "-s", default=None, help="URL of the scheduler API")
 @click.option(
     "--indexer-storage-url", "-i", default=None, help="URL of the indexer storage API"
 )
 @click.option(
     "--storage-url", "-g", default=None, help="URL of the (graph) storage API"
 )
 @click.option(
     "--dry-run/--no-dry-run",
     is_flag=True,
     default=False,
     help="List only what would be scheduled.",
 )
 @click.pass_context
 def schedule(ctx, scheduler_url, storage_url, indexer_storage_url, dry_run):
     """Manipulate Software Heritage Indexer tasks.
 
     Via SWH Scheduler's API."""
     ctx.obj["indexer_storage"] = _get_api(
         get_indexer_storage, ctx.obj["config"], "indexer_storage", indexer_storage_url
     )
     ctx.obj["storage"] = _get_api(
         get_storage, ctx.obj["config"], "storage", storage_url
     )
     ctx.obj["scheduler"] = _get_api(
         get_scheduler, ctx.obj["config"], "scheduler", scheduler_url
     )
     if dry_run:
         ctx.obj["scheduler"] = None
 
 
 def list_origins_by_producer(idx_storage, mappings, tool_ids):
     next_page_token = ""
     limit = 10000
     while next_page_token is not None:
         result = idx_storage.origin_intrinsic_metadata_search_by_producer(
             page_token=next_page_token,
             limit=limit,
             ids_only=True,
             mappings=mappings or None,
             tool_ids=tool_ids or None,
         )
         next_page_token = result.get("next_page_token")
         yield from result["origins"]
 
 
 @schedule.command("reindex_origin_metadata")
 @click.option(
     "--batch-size",
     "-b",
     "origin_batch_size",
     default=10,
     show_default=True,
     type=int,
     help="Number of origins per task",
 )
 @click.option(
     "--tool-id",
     "-t",
     "tool_ids",
     type=int,
     multiple=True,
     help="Restrict search of old metadata to this/these tool ids.",
 )
 @click.option(
     "--mapping",
     "-m",
     "mappings",
     multiple=True,
-    help="Mapping(s) that should be re-scheduled (eg. 'npm', " "'gemspec', 'maven')",
+    help="Mapping(s) that should be re-scheduled (eg. 'npm', 'gemspec', 'maven')",
 )
 @click.option(
     "--task-type",
     default="index-origin-metadata",
     show_default=True,
     help="Name of the task type to schedule.",
 )
 @click.pass_context
 def schedule_origin_metadata_reindex(
     ctx, origin_batch_size, tool_ids, mappings, task_type
 ):
     """Schedules indexing tasks for origins that were already indexed."""
     idx_storage = ctx.obj["indexer_storage"]
     scheduler = ctx.obj["scheduler"]
 
     origins = list_origins_by_producer(idx_storage, mappings, tool_ids)
 
     kwargs = {"policy_update": "update-dups"}
     schedule_origin_batches(scheduler, task_type, origins, origin_batch_size, kwargs)
 
 
 @cli.command("journal-client")
 @click.option("--scheduler-url", "-s", default=None, help="URL of the scheduler API")
 @click.option(
     "--origin-metadata-task-type",
     default="index-origin-metadata",
     help="Name of the task running the origin metadata indexer.",
 )
 @click.option(
     "--broker", "brokers", type=str, multiple=True, help="Kafka broker to connect to."
 )
 @click.option(
     "--prefix", type=str, default=None, help="Prefix of Kafka topic names to read from."
 )
 @click.option("--group-id", type=str, help="Consumer/group id for reading from Kafka.")
 @click.option(
     "--stop-after-objects",
     "-m",
     default=None,
     type=int,
-    help="Maximum number of objects to replay. Default is to " "run forever.",
+    help="Maximum number of objects to replay. Default is to run forever.",
 )
 @click.pass_context
 def journal_client(
     ctx,
     scheduler_url,
     origin_metadata_task_type,
     brokers,
     prefix,
     group_id,
     stop_after_objects,
 ):
     """Listens for new objects from the SWH Journal, and schedules tasks
     to run relevant indexers (currently, only origin-intrinsic-metadata)
     on these new objects."""
     scheduler = _get_api(get_scheduler, ctx.obj["config"], "scheduler", scheduler_url)
 
     client = get_journal_client(
         cls="kafka",
         brokers=brokers,
         prefix=prefix,
         group_id=group_id,
         object_types=["origin_visit"],
         stop_after_objects=stop_after_objects,
     )
 
     worker_fn = functools.partial(
         process_journal_objects,
         scheduler=scheduler,
         task_names={"origin_metadata": origin_metadata_task_type,},
     )
     try:
         client.process(worker_fn)
     except KeyboardInterrupt:
         ctx.exit(0)
     else:
         print("Done.")
     finally:
         client.close()
 
 
 @cli.command("rpc-serve")
 @click.argument("config-path", required=True)
 @click.option("--host", default="0.0.0.0", help="Host to run the server")
 @click.option("--port", default=5007, type=click.INT, help="Binding port of the server")
 @click.option(
     "--debug/--nodebug",
     default=True,
     help="Indicates if the server should run in debug mode",
 )
 def rpc_server(config_path, host, port, debug):
     """Starts a Software Heritage Indexer RPC HTTP server."""
     api_cfg = load_and_check_config(config_path, type="any")
     app.config.update(api_cfg)
     app.run(host, port=int(port), debug=bool(debug))
 
 
 def main():
     return cli(auto_envvar_prefix="SWH_INDEXER")
 
 
 if __name__ == "__main__":
     main()
diff --git a/swh/indexer/codemeta.py b/swh/indexer/codemeta.py
index a0fb805..a900104 100644
--- a/swh/indexer/codemeta.py
+++ b/swh/indexer/codemeta.py
@@ -1,203 +1,203 @@
 # Copyright (C) 2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import collections
 import csv
 import itertools
 import json
 import os.path
 import re
 
 import swh.indexer
 from pyld import jsonld
 
 _DATA_DIR = os.path.join(os.path.dirname(swh.indexer.__file__), "data")
 
 CROSSWALK_TABLE_PATH = os.path.join(_DATA_DIR, "codemeta", "crosswalk.csv")
 
 CODEMETA_CONTEXT_PATH = os.path.join(_DATA_DIR, "codemeta", "codemeta.jsonld")
 
 
 with open(CODEMETA_CONTEXT_PATH) as fd:
     CODEMETA_CONTEXT = json.load(fd)
 
 CODEMETA_CONTEXT_URL = "https://doi.org/10.5063/schema/codemeta-2.0"
 CODEMETA_ALTERNATE_CONTEXT_URLS = {
-    ("https://raw.githubusercontent.com/codemeta/codemeta/" "master/codemeta.jsonld")
+    ("https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld")
 }
 CODEMETA_URI = "https://codemeta.github.io/terms/"
 SCHEMA_URI = "http://schema.org/"
 
 
 PROPERTY_BLACKLIST = {
     # CodeMeta properties that we cannot properly represent.
     SCHEMA_URI + "softwareRequirements",
     CODEMETA_URI + "softwareSuggestions",
     # Duplicate of 'author'
     SCHEMA_URI + "creator",
 }
 
 _codemeta_field_separator = re.compile(r"\s*[,/]\s*")
 
 
 def make_absolute_uri(local_name):
     definition = CODEMETA_CONTEXT["@context"][local_name]
     if isinstance(definition, str):
         return definition
     elif isinstance(definition, dict):
         prefixed_name = definition["@id"]
         (prefix, local_name) = prefixed_name.split(":")
         if prefix == "schema":
             canonical_name = SCHEMA_URI + local_name
         elif prefix == "codemeta":
             canonical_name = CODEMETA_URI + local_name
         else:
             assert False, prefix
         return canonical_name
     else:
         assert False, definition
 
 
 def _read_crosstable(fd):
     reader = csv.reader(fd)
     try:
         header = next(reader)
     except StopIteration:
         raise ValueError("empty file")
 
     data_sources = set(header) - {"Parent Type", "Property", "Type", "Description"}
     assert "codemeta-V1" in data_sources
 
     codemeta_translation = {data_source: {} for data_source in data_sources}
     terms = set()
 
     for line in reader:  # For each canonical name
         local_name = dict(zip(header, line))["Property"]
         if not local_name:
             continue
         canonical_name = make_absolute_uri(local_name)
         if canonical_name in PROPERTY_BLACKLIST:
             continue
         terms.add(canonical_name)
         for (col, value) in zip(header, line):  # For each cell in the row
             if col in data_sources:
                 # If that's not the parentType/property/type/description
                 for local_name in _codemeta_field_separator.split(value):
                     # For each of the data source's properties that maps
                     # to this canonical name
                     if local_name.strip():
                         codemeta_translation[col][local_name.strip()] = canonical_name
 
     return (terms, codemeta_translation)
 
 
 with open(CROSSWALK_TABLE_PATH) as fd:
     (CODEMETA_TERMS, CROSSWALK_TABLE) = _read_crosstable(fd)
 
 
 def _document_loader(url, options=None):
     """Document loader for pyld.
 
     Reads the local codemeta.jsonld file instead of fetching it
     from the Internet every single time."""
     if url == CODEMETA_CONTEXT_URL or url in CODEMETA_ALTERNATE_CONTEXT_URLS:
         return {
             "contextUrl": None,
             "documentUrl": url,
             "document": CODEMETA_CONTEXT,
         }
     elif url == CODEMETA_URI:
         raise Exception(
             "{} is CodeMeta's URI, use {} as context url".format(
                 CODEMETA_URI, CODEMETA_CONTEXT_URL
             )
         )
     else:
         raise Exception(url)
 
 
 def compact(doc):
     """Same as `pyld.jsonld.compact`, but in the context of CodeMeta."""
     return jsonld.compact(
         doc, CODEMETA_CONTEXT_URL, options={"documentLoader": _document_loader}
     )
 
 
 def expand(doc):
     """Same as `pyld.jsonld.expand`, but in the context of CodeMeta."""
     return jsonld.expand(doc, options={"documentLoader": _document_loader})
 
 
 def merge_values(v1, v2):
     """If v1 and v2 are of the form `{"@list": l1}` and `{"@list": l2}`,
     returns `{"@list": l1 + l2}`.
     Otherwise, make them lists (if they are not already) and concatenate
     them.
 
     >>> merge_values('a', 'b')
     ['a', 'b']
     >>> merge_values(['a', 'b'], 'c')
     ['a', 'b', 'c']
     >>> merge_values({'@list': ['a', 'b']}, {'@list': ['c']})
     {'@list': ['a', 'b', 'c']}
     """
     if v1 is None:
         return v2
     elif v2 is None:
         return v1
     elif isinstance(v1, dict) and set(v1) == {"@list"}:
         assert isinstance(v1["@list"], list)
         if isinstance(v2, dict) and set(v2) == {"@list"}:
             assert isinstance(v2["@list"], list)
             return {"@list": v1["@list"] + v2["@list"]}
         else:
             raise ValueError("Cannot merge %r and %r" % (v1, v2))
     else:
         if isinstance(v2, dict) and "@list" in v2:
             raise ValueError("Cannot merge %r and %r" % (v1, v2))
         if not isinstance(v1, list):
             v1 = [v1]
         if not isinstance(v2, list):
             v2 = [v2]
         return v1 + v2
 
 
 def merge_documents(documents):
     """Takes a list of metadata dicts, each generated from a different
     metadata file, and merges them.
 
     Removes duplicates, if any."""
     documents = list(itertools.chain.from_iterable(map(expand, documents)))
     merged_document = collections.defaultdict(list)
     for document in documents:
         for (key, values) in document.items():
             if key == "@id":
                 # @id does not get expanded to a list
                 value = values
 
                 # Only one @id is allowed, move it to sameAs
                 if "@id" not in merged_document:
                     merged_document["@id"] = value
                 elif value != merged_document["@id"]:
                     if value not in merged_document[SCHEMA_URI + "sameAs"]:
                         merged_document[SCHEMA_URI + "sameAs"].append(value)
             else:
                 for value in values:
                     if isinstance(value, dict) and set(value) == {"@list"}:
                         # Value is of the form {'@list': [item1, item2]}
                         # instead of the usual [item1, item2].
                         # We need to merge the inner lists (and mostly
                         # preserve order).
                         merged_value = merged_document.setdefault(key, {"@list": []})
                         for subvalue in value["@list"]:
                             # merged_value must be of the form
                             # {'@list': [item1, item2]}; as it is the same
                             # type as value, which is an @list.
                             if subvalue not in merged_value["@list"]:
                                 merged_value["@list"].append(subvalue)
                     elif value not in merged_document[key]:
                         merged_document[key].append(value)
 
     return compact(merged_document)
diff --git a/swh/indexer/metadata_dictionary/npm.py b/swh/indexer/metadata_dictionary/npm.py
index 5876e58..dfd41c5 100644
--- a/swh/indexer/metadata_dictionary/npm.py
+++ b/swh/indexer/metadata_dictionary/npm.py
@@ -1,160 +1,160 @@
 # Copyright (C) 2018-2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import re
 
 from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
 from .base import JsonMapping
 
 
 class NpmMapping(JsonMapping):
     """
     dedicated class for NPM (package.json) mapping and translation
     """
 
     name = "npm"
     mapping = CROSSWALK_TABLE["NodeJS"]
     filename = b"package.json"
     string_fields = ["name", "version", "homepage", "description", "email"]
 
     _schema_shortcuts = {
         "github": "git+https://github.com/%s.git",
         "gist": "git+https://gist.github.com/%s.git",
         "gitlab": "git+https://gitlab.com/%s.git",
         # Bitbucket supports both hg and git, and the shortcut does not
         # tell which one to use.
         # 'bitbucket': 'https://bitbucket.org/',
     }
 
     def normalize_repository(self, d):
         """https://docs.npmjs.com/files/package.json#repository
 
         >>> NpmMapping().normalize_repository({
         ...     'type': 'git',
         ...     'url': 'https://example.org/foo.git'
         ... })
         {'@id': 'git+https://example.org/foo.git'}
         >>> NpmMapping().normalize_repository(
         ...     'gitlab:foo/bar')
         {'@id': 'git+https://gitlab.com/foo/bar.git'}
         >>> NpmMapping().normalize_repository(
         ...     'foo/bar')
         {'@id': 'git+https://github.com/foo/bar.git'}
         """
         if (
             isinstance(d, dict)
             and isinstance(d.get("type"), str)
             and isinstance(d.get("url"), str)
         ):
             url = "{type}+{url}".format(**d)
         elif isinstance(d, str):
             if "://" in d:
                 url = d
             elif ":" in d:
                 (schema, rest) = d.split(":", 1)
                 if schema in self._schema_shortcuts:
                     url = self._schema_shortcuts[schema] % rest
                 else:
                     return None
             else:
                 url = self._schema_shortcuts["github"] % d
 
         else:
             return None
 
         return {"@id": url}
 
     def normalize_bugs(self, d):
         """https://docs.npmjs.com/files/package.json#bugs
 
         >>> NpmMapping().normalize_bugs({
         ...     'url': 'https://example.org/bugs/',
         ...     'email': 'bugs@example.org'
         ... })
         {'@id': 'https://example.org/bugs/'}
         >>> NpmMapping().normalize_bugs(
         ...     'https://example.org/bugs/')
         {'@id': 'https://example.org/bugs/'}
         """
         if isinstance(d, dict) and isinstance(d.get("url"), str):
             return {"@id": d["url"]}
         elif isinstance(d, str):
             return {"@id": d}
         else:
             return None
 
     _parse_author = re.compile(
         r"^ *" r"(?P<name>.*?)" r"( +<(?P<email>.*)>)?" r"( +\((?P<url>.*)\))?" r" *$"
     )
 
     def normalize_author(self, d):
         """https://docs.npmjs.com/files/package.json#people-fields-author-contributors'
 
         >>> from pprint import pprint
         >>> pprint(NpmMapping().normalize_author({
         ...     'name': 'John Doe',
         ...     'email': 'john.doe@example.org',
         ...     'url': 'https://example.org/~john.doe',
         ... }))
         {'@list': [{'@type': 'http://schema.org/Person',
                     'http://schema.org/email': 'john.doe@example.org',
                     'http://schema.org/name': 'John Doe',
                     'http://schema.org/url': {'@id': 'https://example.org/~john.doe'}}]}
         >>> pprint(NpmMapping().normalize_author(
         ...     'John Doe <john.doe@example.org> (https://example.org/~john.doe)'
         ... ))
         {'@list': [{'@type': 'http://schema.org/Person',
                     'http://schema.org/email': 'john.doe@example.org',
                     'http://schema.org/name': 'John Doe',
                     'http://schema.org/url': {'@id': 'https://example.org/~john.doe'}}]}
         """  # noqa
         author = {"@type": SCHEMA_URI + "Person"}
         if isinstance(d, dict):
             name = d.get("name", None)
             email = d.get("email", None)
             url = d.get("url", None)
         elif isinstance(d, str):
             match = self._parse_author.match(d)
             if not match:
                 return None
             name = match.group("name")
             email = match.group("email")
             url = match.group("url")
         else:
             return None
         if name and isinstance(name, str):
             author[SCHEMA_URI + "name"] = name
         if email and isinstance(email, str):
             author[SCHEMA_URI + "email"] = email
         if url and isinstance(url, str):
             author[SCHEMA_URI + "url"] = {"@id": url}
         return {"@list": [author]}
 
     def normalize_license(self, s):
         """https://docs.npmjs.com/files/package.json#license
 
         >>> NpmMapping().normalize_license('MIT')
         {'@id': 'https://spdx.org/licenses/MIT'}
         """
         if isinstance(s, str):
             return {"@id": "https://spdx.org/licenses/" + s}
 
     def normalize_homepage(self, s):
         """https://docs.npmjs.com/files/package.json#homepage
 
         >>> NpmMapping().normalize_homepage('https://example.org/~john.doe')
         {'@id': 'https://example.org/~john.doe'}
         """
         if isinstance(s, str):
             return {"@id": s}
 
-    def normalize_keywords(self, l):
+    def normalize_keywords(self, lst):
         """https://docs.npmjs.com/files/package.json#homepage
 
         >>> NpmMapping().normalize_keywords(['foo', 'bar'])
         ['foo', 'bar']
         """
-        if isinstance(l, list):
-            return [x for x in l if isinstance(x, str)]
+        if isinstance(lst, list):
+            return [x for x in lst if isinstance(x, str)]
diff --git a/swh/indexer/origin_head.py b/swh/indexer/origin_head.py
index a0ce14d..dde9be3 100644
--- a/swh/indexer/origin_head.py
+++ b/swh/indexer/origin_head.py
@@ -1,155 +1,159 @@
 # Copyright (C) 2018-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from typing import List, Tuple, Any, Dict, Union
 
 import re
 import click
 import logging
 
 from swh.indexer.indexer import OriginIndexer
+from swh.storage.algos.origin import origin_get_latest_visit_status
 
 
 class OriginHeadIndexer(OriginIndexer):
     """Origin-level indexer.
 
     This indexer is in charge of looking up the revision that acts as the
     "head" of an origin.
 
     In git, this is usually the commit pointed to by the 'master' branch."""
 
     USE_TOOLS = False
 
     def persist_index_computations(
         self, results: Any, policy_update: str
     ) -> Dict[str, int]:
         """Do nothing. The indexer's results are not persistent, they
         should only be piped to another indexer."""
         return {}
 
     # Dispatch
 
     def index(self, origin_url):
-        latest_visit = self.storage.origin_visit_get_latest(
-            origin_url, allowed_statuses=["full"], require_snapshot=True
+        visit_and_status = origin_get_latest_visit_status(
+            self.storage, origin_url, allowed_statuses=["full"], require_snapshot=True
         )
-        if latest_visit is None:
+        if not visit_and_status:
+            return None
+        visit, visit_status = visit_and_status
+        latest_snapshot = self.storage.snapshot_get(visit_status.snapshot)
+        if latest_snapshot is None:
             return None
-        latest_snapshot = self.storage.snapshot_get(latest_visit["snapshot"])
         method = getattr(
-            self, "_try_get_%s_head" % latest_visit["type"], self._try_get_head_generic
+            self, "_try_get_%s_head" % visit.type, self._try_get_head_generic
         )
 
         rev_id = method(latest_snapshot)
         if rev_id is not None:
             return {
                 "origin_url": origin_url,
                 "revision_id": rev_id,
             }
 
         # could not find a head revision
         return None
 
     # Tarballs
 
     _archive_filename_re = re.compile(
         rb"^"
         rb"(?P<pkgname>.*)[-_]"
         rb"(?P<version>[0-9]+(\.[0-9])*)"
         rb"(?P<preversion>[-+][a-zA-Z0-9.~]+?)?"
         rb"(?P<extension>(\.[a-zA-Z0-9]+)+)"
         rb"$"
     )
 
     @classmethod
     def _parse_version(cls: Any, filename: str) -> Tuple[Union[float, int], ...]:
         """Extracts the release version from an archive filename,
         to get an ordering whose maximum is likely to be the last
         version of the software
 
         >>> OriginHeadIndexer._parse_version(b'foo')
         (-inf,)
         >>> OriginHeadIndexer._parse_version(b'foo.tar.gz')
         (-inf,)
         >>> OriginHeadIndexer._parse_version(b'gnu-hello-0.0.1.tar.gz')
         (0, 0, 1, 0)
         >>> OriginHeadIndexer._parse_version(b'gnu-hello-0.0.1-beta2.tar.gz')
         (0, 0, 1, -1, 'beta2')
         >>> OriginHeadIndexer._parse_version(b'gnu-hello-0.0.1+foobar.tar.gz')
         (0, 0, 1, 1, 'foobar')
         """
         res = cls._archive_filename_re.match(filename)
         if res is None:
             return (float("-infinity"),)
         version = [int(n) for n in res.group("version").decode().split(".")]
         if res.group("preversion") is None:
             version.append(0)
         else:
             preversion = res.group("preversion").decode()
             if preversion.startswith("-"):
                 version.append(-1)
                 version.append(preversion[1:])
             elif preversion.startswith("+"):
                 version.append(1)
                 version.append(preversion[1:])
             else:
                 assert False, res.group("preversion")
         return tuple(version)
 
     def _try_get_ftp_head(self, snapshot: Dict[str, Any]) -> Any:
         archive_names = list(snapshot["branches"])
         max_archive_name = max(archive_names, key=self._parse_version)
         r = self._try_resolve_target(snapshot["branches"], max_archive_name)
         return r
 
     # Generic
 
     def _try_get_head_generic(self, snapshot: Dict[str, Any]) -> Any:
         # Works on 'deposit', 'pypi', and VCSs.
         try:
             branches = snapshot["branches"]
         except KeyError:
             return None
         else:
             return self._try_resolve_target(
                 branches, b"HEAD"
             ) or self._try_resolve_target(branches, b"master")
 
     def _try_resolve_target(self, branches: Dict, target_name: bytes) -> Any:
         try:
             target = branches[target_name]
             if target is None:
                 return None
             while target["target_type"] == "alias":
                 target = branches[target["target"]]
                 if target is None:
                     return None
 
             if target["target_type"] == "revision":
                 return target["target"]
             elif target["target_type"] == "content":
                 return None  # TODO
             elif target["target_type"] == "directory":
                 return None  # TODO
             elif target["target_type"] == "release":
                 return None  # TODO
             else:
                 assert False
         except KeyError:
             return None
 
 
 @click.command()
 @click.option(
     "--origins", "-i", help='Origins to lookup, in the "type+url" format', multiple=True
 )
 def main(origins: List[str]) -> None:
     rev_metadata_indexer = OriginHeadIndexer()
     rev_metadata_indexer.run(origins)
 
 
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO)
     main()
diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
index 7542b28..9cad65d 100644
--- a/swh/indexer/storage/__init__.py
+++ b/swh/indexer/storage/__init__.py
@@ -1,599 +1,599 @@
 # Copyright (C) 2015-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 import json
 import psycopg2
 import psycopg2.pool
 
 from collections import defaultdict, Counter
 from typing import Dict, List
 
 from swh.storage.common import db_transaction_generator, db_transaction
 from swh.storage.exc import StorageDBError
 
 from . import converters
 from .db import Db
 from .exc import IndexerStorageArgumentException, DuplicateId
 from .metrics import process_metrics, send_metric, timed
 
 
 INDEXER_CFG_KEY = "indexer_storage"
 
 
 MAPPING_NAMES = ["codemeta", "gemspec", "maven", "npm", "pkg-info"]
 
 
 def get_indexer_storage(cls, args):
     """Get an indexer storage object of class `storage_class` with
     arguments `storage_args`.
 
     Args:
         cls (str): storage's class, either 'local' or 'remote'
         args (dict): dictionary of arguments passed to the
             storage class constructor
 
     Returns:
         an instance of swh.indexer's storage (either local or remote)
 
     Raises:
         ValueError if passed an unknown storage class.
 
     """
     if cls == "remote":
         from .api.client import RemoteStorage as IndexerStorage
     elif cls == "local":
         from . import IndexerStorage
     elif cls == "memory":
         from .in_memory import IndexerStorage
     else:
         raise ValueError("Unknown indexer storage class `%s`" % cls)
 
     return IndexerStorage(**args)
 
 
 def check_id_duplicates(data):
     """
     If any two dictionaries in `data` have the same id, raises
     a `ValueError`.
 
     Values associated to the key must be hashable.
 
     Args:
         data (List[dict]): List of dictionaries to be inserted
 
     >>> check_id_duplicates([
     ...     {'id': 'foo', 'data': 'spam'},
     ...     {'id': 'bar', 'data': 'egg'},
     ... ])
     >>> check_id_duplicates([
     ...     {'id': 'foo', 'data': 'spam'},
     ...     {'id': 'foo', 'data': 'egg'},
     ... ])
     Traceback (most recent call last):
       ...
     swh.indexer.storage.exc.DuplicateId: ['foo']
     """
     counter = Counter(item["id"] for item in data)
     duplicates = [id_ for (id_, count) in counter.items() if count >= 2]
     if duplicates:
         raise DuplicateId(duplicates)
 
 
 class IndexerStorage:
     """SWH Indexer Storage
 
     """
 
     def __init__(self, db, min_pool_conns=1, max_pool_conns=10):
         """
         Args:
             db_conn: either a libpq connection string, or a psycopg2 connection
 
         """
         try:
             if isinstance(db, psycopg2.extensions.connection):
                 self._pool = None
                 self._db = Db(db)
             else:
                 self._pool = psycopg2.pool.ThreadedConnectionPool(
                     min_pool_conns, max_pool_conns, db
                 )
                 self._db = None
         except psycopg2.OperationalError as e:
             raise StorageDBError(e)
 
     def get_db(self):
         if self._db:
             return self._db
         return Db.from_pool(self._pool)
 
     def put_db(self, db):
         if db is not self._db:
             db.put_conn()
 
     @timed
     @db_transaction()
     def check_config(self, *, check_write, db=None, cur=None):
         # Check permissions on one of the tables
         if check_write:
             check = "INSERT"
         else:
             check = "SELECT"
 
         cur.execute(
             "select has_table_privilege(current_user, 'content_mimetype', %s)",  # noqa
             (check,),
         )
         return cur.fetchone()[0]
 
     @timed
     @db_transaction_generator()
     def content_mimetype_missing(self, mimetypes, db=None, cur=None):
         for obj in db.content_mimetype_missing_from_list(mimetypes, cur):
             yield obj[0]
 
     def _content_get_range(
         self,
         content_type,
         start,
         end,
         indexer_configuration_id,
         limit=1000,
         with_textual_data=False,
         db=None,
         cur=None,
     ):
         if limit is None:
             raise IndexerStorageArgumentException("limit should not be None")
         if content_type not in db.content_indexer_names:
             err = "Wrong type. Should be one of [%s]" % (
                 ",".join(db.content_indexer_names)
             )
             raise IndexerStorageArgumentException(err)
 
         ids = []
         next_id = None
         for counter, obj in enumerate(
             db.content_get_range(
                 content_type,
                 start,
                 end,
                 indexer_configuration_id,
                 limit=limit + 1,
                 with_textual_data=with_textual_data,
                 cur=cur,
             )
         ):
             _id = obj[0]
             if counter >= limit:
                 next_id = _id
                 break
 
             ids.append(_id)
 
         return {"ids": ids, "next": next_id}
 
     @timed
     @db_transaction()
     def content_mimetype_get_range(
         self, start, end, indexer_configuration_id, limit=1000, db=None, cur=None
     ):
         return self._content_get_range(
             "mimetype",
             start,
             end,
             indexer_configuration_id,
             limit=limit,
             db=db,
             cur=cur,
         )
 
     @timed
     @process_metrics
     @db_transaction()
     def content_mimetype_add(
         self, mimetypes: List[Dict], conflict_update: bool = False, db=None, cur=None
     ) -> Dict[str, int]:
         """Add mimetypes to the storage (if conflict_update is True, this will
            override existing data if any).
 
         Returns:
             A dict with the number of new elements added to the storage.
 
         """
         check_id_duplicates(mimetypes)
         mimetypes.sort(key=lambda m: m["id"])
         db.mktemp_content_mimetype(cur)
         db.copy_to(
             mimetypes,
             "tmp_content_mimetype",
             ["id", "mimetype", "encoding", "indexer_configuration_id"],
             cur,
         )
         count = db.content_mimetype_add_from_temp(conflict_update, cur)
         return {"content_mimetype:add": count}
 
     @timed
     @db_transaction_generator()
     def content_mimetype_get(self, ids, db=None, cur=None):
         for c in db.content_mimetype_get_from_list(ids, cur):
             yield converters.db_to_mimetype(dict(zip(db.content_mimetype_cols, c)))
 
     @timed
     @db_transaction_generator()
     def content_language_missing(self, languages, db=None, cur=None):
         for obj in db.content_language_missing_from_list(languages, cur):
             yield obj[0]
 
     @timed
     @db_transaction_generator()
     def content_language_get(self, ids, db=None, cur=None):
         for c in db.content_language_get_from_list(ids, cur):
             yield converters.db_to_language(dict(zip(db.content_language_cols, c)))
 
     @timed
     @process_metrics
     @db_transaction()
     def content_language_add(
         self, languages: List[Dict], conflict_update: bool = False, db=None, cur=None
     ) -> Dict[str, int]:
         check_id_duplicates(languages)
         languages.sort(key=lambda m: m["id"])
         db.mktemp_content_language(cur)
         # empty language is mapped to 'unknown'
         db.copy_to(
             (
                 {
-                    "id": l["id"],
-                    "lang": "unknown" if not l["lang"] else l["lang"],
-                    "indexer_configuration_id": l["indexer_configuration_id"],
+                    "id": lang["id"],
+                    "lang": "unknown" if not lang["lang"] else lang["lang"],
+                    "indexer_configuration_id": lang["indexer_configuration_id"],
                 }
-                for l in languages
+                for lang in languages
             ),
             "tmp_content_language",
             ["id", "lang", "indexer_configuration_id"],
             cur,
         )
 
         count = db.content_language_add_from_temp(conflict_update, cur)
         return {"content_language:add": count}
 
     @timed
     @db_transaction_generator()
     def content_ctags_missing(self, ctags, db=None, cur=None):
         for obj in db.content_ctags_missing_from_list(ctags, cur):
             yield obj[0]
 
     @timed
     @db_transaction_generator()
     def content_ctags_get(self, ids, db=None, cur=None):
         for c in db.content_ctags_get_from_list(ids, cur):
             yield converters.db_to_ctags(dict(zip(db.content_ctags_cols, c)))
 
     @timed
     @process_metrics
     @db_transaction()
     def content_ctags_add(
         self, ctags: List[Dict], conflict_update: bool = False, db=None, cur=None
     ) -> Dict[str, int]:
         check_id_duplicates(ctags)
         ctags.sort(key=lambda m: m["id"])
 
         def _convert_ctags(__ctags):
             """Convert ctags dict to list of ctags.
 
             """
             for ctags in __ctags:
                 yield from converters.ctags_to_db(ctags)
 
         db.mktemp_content_ctags(cur)
         db.copy_to(
             list(_convert_ctags(ctags)),
             tblname="tmp_content_ctags",
             columns=["id", "name", "kind", "line", "lang", "indexer_configuration_id"],
             cur=cur,
         )
 
         count = db.content_ctags_add_from_temp(conflict_update, cur)
         return {"content_ctags:add": count}
 
     @timed
     @db_transaction_generator()
     def content_ctags_search(
         self, expression, limit=10, last_sha1=None, db=None, cur=None
     ):
         for obj in db.content_ctags_search(expression, last_sha1, limit, cur=cur):
             yield converters.db_to_ctags(dict(zip(db.content_ctags_cols, obj)))
 
     @timed
     @db_transaction_generator()
     def content_fossology_license_get(self, ids, db=None, cur=None):
         d = defaultdict(list)
         for c in db.content_fossology_license_get_from_list(ids, cur):
             license = dict(zip(db.content_fossology_license_cols, c))
 
             id_ = license["id"]
             d[id_].append(converters.db_to_fossology_license(license))
 
         for id_, facts in d.items():
             yield {id_: facts}
 
     @timed
     @process_metrics
     @db_transaction()
     def content_fossology_license_add(
         self, licenses: List[Dict], conflict_update: bool = False, db=None, cur=None
     ) -> Dict[str, int]:
         check_id_duplicates(licenses)
         licenses.sort(key=lambda m: m["id"])
         db.mktemp_content_fossology_license(cur)
         db.copy_to(
             (
                 {
                     "id": sha1["id"],
                     "indexer_configuration_id": sha1["indexer_configuration_id"],
                     "license": license,
                 }
                 for sha1 in licenses
                 for license in sha1["licenses"]
             ),
             tblname="tmp_content_fossology_license",
             columns=["id", "license", "indexer_configuration_id"],
             cur=cur,
         )
         count = db.content_fossology_license_add_from_temp(conflict_update, cur)
         return {"content_fossology_license:add": count}
 
     @timed
     @db_transaction()
     def content_fossology_license_get_range(
         self, start, end, indexer_configuration_id, limit=1000, db=None, cur=None
     ):
         return self._content_get_range(
             "fossology_license",
             start,
             end,
             indexer_configuration_id,
             limit=limit,
             with_textual_data=True,
             db=db,
             cur=cur,
         )
 
     @timed
     @db_transaction_generator()
     def content_metadata_missing(self, metadata, db=None, cur=None):
         for obj in db.content_metadata_missing_from_list(metadata, cur):
             yield obj[0]
 
     @timed
     @db_transaction_generator()
     def content_metadata_get(self, ids, db=None, cur=None):
         for c in db.content_metadata_get_from_list(ids, cur):
             yield converters.db_to_metadata(dict(zip(db.content_metadata_cols, c)))
 
     @timed
     @process_metrics
     @db_transaction()
     def content_metadata_add(
         self, metadata: List[Dict], conflict_update: bool = False, db=None, cur=None
     ) -> Dict[str, int]:
         check_id_duplicates(metadata)
         metadata.sort(key=lambda m: m["id"])
 
         db.mktemp_content_metadata(cur)
 
         db.copy_to(
             metadata,
             "tmp_content_metadata",
             ["id", "metadata", "indexer_configuration_id"],
             cur,
         )
         count = db.content_metadata_add_from_temp(conflict_update, cur)
         return {
             "content_metadata:add": count,
         }
 
     @timed
     @db_transaction_generator()
     def revision_intrinsic_metadata_missing(self, metadata, db=None, cur=None):
         for obj in db.revision_intrinsic_metadata_missing_from_list(metadata, cur):
             yield obj[0]
 
     @timed
     @db_transaction_generator()
     def revision_intrinsic_metadata_get(self, ids, db=None, cur=None):
         for c in db.revision_intrinsic_metadata_get_from_list(ids, cur):
             yield converters.db_to_metadata(
                 dict(zip(db.revision_intrinsic_metadata_cols, c))
             )
 
     @timed
     @process_metrics
     @db_transaction()
     def revision_intrinsic_metadata_add(
         self, metadata: List[Dict], conflict_update: bool = False, db=None, cur=None
     ) -> Dict[str, int]:
         check_id_duplicates(metadata)
         metadata.sort(key=lambda m: m["id"])
 
         db.mktemp_revision_intrinsic_metadata(cur)
 
         db.copy_to(
             metadata,
             "tmp_revision_intrinsic_metadata",
             ["id", "metadata", "mappings", "indexer_configuration_id"],
             cur,
         )
         count = db.revision_intrinsic_metadata_add_from_temp(conflict_update, cur)
         return {
             "revision_intrinsic_metadata:add": count,
         }
 
     @timed
     @process_metrics
     @db_transaction()
     def revision_intrinsic_metadata_delete(
         self, entries: List[Dict], db=None, cur=None
     ) -> Dict:
         count = db.revision_intrinsic_metadata_delete(entries, cur)
         return {"revision_intrinsic_metadata:del": count}
 
     @timed
     @db_transaction_generator()
     def origin_intrinsic_metadata_get(self, ids, db=None, cur=None):
         for c in db.origin_intrinsic_metadata_get_from_list(ids, cur):
             yield converters.db_to_metadata(
                 dict(zip(db.origin_intrinsic_metadata_cols, c))
             )
 
     @timed
     @process_metrics
     @db_transaction()
     def origin_intrinsic_metadata_add(
         self, metadata: List[Dict], conflict_update: bool = False, db=None, cur=None
     ) -> Dict[str, int]:
         check_id_duplicates(metadata)
         metadata.sort(key=lambda m: m["id"])
 
         db.mktemp_origin_intrinsic_metadata(cur)
 
         db.copy_to(
             metadata,
             "tmp_origin_intrinsic_metadata",
             ["id", "metadata", "indexer_configuration_id", "from_revision", "mappings"],
             cur,
         )
         count = db.origin_intrinsic_metadata_add_from_temp(conflict_update, cur)
         return {
             "origin_intrinsic_metadata:add": count,
         }
 
     @timed
     @process_metrics
     @db_transaction()
     def origin_intrinsic_metadata_delete(
         self, entries: List[Dict], db=None, cur=None
     ) -> Dict:
         count = db.origin_intrinsic_metadata_delete(entries, cur)
         return {
             "origin_intrinsic_metadata:del": count,
         }
 
     @timed
     @db_transaction_generator()
     def origin_intrinsic_metadata_search_fulltext(
         self, conjunction, limit=100, db=None, cur=None
     ):
         for c in db.origin_intrinsic_metadata_search_fulltext(
             conjunction, limit=limit, cur=cur
         ):
             yield converters.db_to_metadata(
                 dict(zip(db.origin_intrinsic_metadata_cols, c))
             )
 
     @timed
     @db_transaction()
     def origin_intrinsic_metadata_search_by_producer(
         self,
         page_token="",
         limit=100,
         ids_only=False,
         mappings=None,
         tool_ids=None,
         db=None,
         cur=None,
     ):
         assert isinstance(page_token, str)
         # we go to limit+1 to check whether we should add next_page_token in
         # the response
         res = db.origin_intrinsic_metadata_search_by_producer(
             page_token, limit + 1, ids_only, mappings, tool_ids, cur
         )
         result = {}
         if ids_only:
             result["origins"] = [origin for (origin,) in res]
             if len(result["origins"]) > limit:
                 result["origins"][limit:] = []
                 result["next_page_token"] = result["origins"][-1]
         else:
             result["origins"] = [
                 converters.db_to_metadata(
                     dict(zip(db.origin_intrinsic_metadata_cols, c))
                 )
                 for c in res
             ]
             if len(result["origins"]) > limit:
                 result["origins"][limit:] = []
                 result["next_page_token"] = result["origins"][-1]["id"]
         return result
 
     @timed
     @db_transaction()
     def origin_intrinsic_metadata_stats(self, db=None, cur=None):
         mapping_names = [m for m in MAPPING_NAMES]
         select_parts = []
 
         # Count rows for each mapping
         for mapping_name in mapping_names:
             select_parts.append(
                 (
                     "sum(case when (mappings @> ARRAY['%s']) "
                     "         then 1 else 0 end)"
                 )
                 % mapping_name
             )
 
         # Total
         select_parts.append("sum(1)")
 
         # Rows whose metadata has at least one key that is not '@context'
         select_parts.append(
             "sum(case when ('{}'::jsonb @> (metadata - '@context')) "
             "         then 0 else 1 end)"
         )
         cur.execute(
             "select " + ", ".join(select_parts) + " from origin_intrinsic_metadata"
         )
         results = dict(zip(mapping_names + ["total", "non_empty"], cur.fetchone()))
         return {
             "total": results.pop("total"),
             "non_empty": results.pop("non_empty"),
             "per_mapping": results,
         }
 
     @timed
     @db_transaction_generator()
     def indexer_configuration_add(self, tools, db=None, cur=None):
         db.mktemp_indexer_configuration(cur)
         db.copy_to(
             tools,
             "tmp_indexer_configuration",
             ["tool_name", "tool_version", "tool_configuration"],
             cur,
         )
 
         tools = db.indexer_configuration_add_from_temp(cur)
         count = 0
         for line in tools:
             yield dict(zip(db.indexer_configuration_cols, line))
             count += 1
         send_metric(
             "indexer_configuration:add", count, method_name="indexer_configuration_add"
         )
 
     @timed
     @db_transaction()
     def indexer_configuration_get(self, tool, db=None, cur=None):
         tool_conf = tool["tool_configuration"]
         if isinstance(tool_conf, dict):
             tool_conf = json.dumps(tool_conf)
         idx = db.indexer_configuration_get(
             tool["tool_name"], tool["tool_version"], tool_conf
         )
         if not idx:
             return None
         return dict(zip(db.indexer_configuration_cols, idx))
diff --git a/swh/indexer/tests/conftest.py b/swh/indexer/tests/conftest.py
index 3891b36..fb25abd 100644
--- a/swh/indexer/tests/conftest.py
+++ b/swh/indexer/tests/conftest.py
@@ -1,84 +1,86 @@
 # Copyright (C) 2019-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from datetime import timedelta
 from unittest.mock import patch
 
 import pytest
 
 from swh.objstorage import get_objstorage
 from swh.scheduler.tests.conftest import *  # noqa
 from swh.storage import get_storage
 from swh.indexer.storage import get_indexer_storage
 
 from .utils import fill_storage, fill_obj_storage
 
 
 TASK_NAMES = ["revision_intrinsic_metadata", "origin_intrinsic_metadata"]
 
 
 storage_config = {"cls": "pipeline", "steps": [{"cls": "validate"}, {"cls": "memory"},]}
 
 
 @pytest.fixture
 def indexer_scheduler(swh_scheduler):
     for taskname in TASK_NAMES:
         swh_scheduler.create_task_type(
             {
                 "type": taskname,
                 "description": "The {} indexer testing task".format(taskname),
                 "backend_name": "swh.indexer.tests.tasks.{}".format(taskname),
                 "default_interval": timedelta(days=1),
                 "min_interval": timedelta(hours=6),
                 "max_interval": timedelta(days=12),
                 "num_retries": 3,
             }
         )
     return swh_scheduler
 
 
 @pytest.fixture
 def idx_storage():
     """An instance of in-memory indexer storage that gets injected into all
     indexers classes.
 
     """
     idx_storage = get_indexer_storage("memory", {})
     with patch("swh.indexer.storage.in_memory.IndexerStorage") as idx_storage_mock:
         idx_storage_mock.return_value = idx_storage
         yield idx_storage
 
 
 @pytest.fixture
 def storage():
     """An instance of in-memory storage that gets injected into all indexers
        classes.
 
     """
     storage = get_storage(**storage_config)
     fill_storage(storage)
     with patch("swh.storage.in_memory.InMemoryStorage") as storage_mock:
         storage_mock.return_value = storage
         yield storage
 
 
 @pytest.fixture
 def obj_storage():
     """An instance of in-memory objstorage that gets injected into all indexers
     classes.
 
     """
     objstorage = get_objstorage("memory", {})
     fill_obj_storage(objstorage)
-    with patch.dict("swh.objstorage._STORAGE_CLASSES", {"memory": lambda: objstorage}):
+    with patch.dict(
+        "swh.objstorage.factory._STORAGE_CLASSES", {"memory": lambda: objstorage}
+    ):
         yield objstorage
 
 
 @pytest.fixture(scope="session")  # type: ignore  # expected redefinition
 def celery_includes():
     return [
         "swh.indexer.tests.tasks",
         "swh.indexer.tasks",
     ]
diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py
index 08ad477..ba1c098 100644
--- a/swh/indexer/tests/test_cli.py
+++ b/swh/indexer/tests/test_cli.py
@@ -1,373 +1,382 @@
 # Copyright (C) 2019-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from functools import reduce
 import re
 import tempfile
 from unittest.mock import patch
 
 from click.testing import CliRunner
+from confluent_kafka import Consumer, Producer
 
-from swh.journal.tests.utils import FakeKafkaMessage, MockedKafkaConsumer
+from swh.journal.serializers import value_to_kafka
 from swh.model.hashutil import hash_to_bytes
 
 from swh.indexer.cli import cli
 
 
 CLI_CONFIG = """
 scheduler:
     cls: foo
     args: {}
 storage:
     cls: memory
 indexer_storage:
     cls: memory
     args: {}
 """
 
 
 def fill_idx_storage(idx_storage, nb_rows):
     tools = [
         {"tool_name": "tool %d" % i, "tool_version": "0.0.1", "tool_configuration": {},}
         for i in range(2)
     ]
     tools = idx_storage.indexer_configuration_add(tools)
 
     origin_metadata = [
         {
             "id": "file://dev/%04d" % origin_id,
             "from_revision": hash_to_bytes("abcd{:0>4}".format(origin_id)),
             "indexer_configuration_id": tools[origin_id % 2]["id"],
             "metadata": {"name": "origin %d" % origin_id},
             "mappings": ["mapping%d" % (origin_id % 10)],
         }
         for origin_id in range(nb_rows)
     ]
     revision_metadata = [
         {
             "id": hash_to_bytes("abcd{:0>4}".format(origin_id)),
             "indexer_configuration_id": tools[origin_id % 2]["id"],
             "metadata": {"name": "origin %d" % origin_id},
             "mappings": ["mapping%d" % (origin_id % 10)],
         }
         for origin_id in range(nb_rows)
     ]
 
     idx_storage.revision_intrinsic_metadata_add(revision_metadata)
     idx_storage.origin_intrinsic_metadata_add(origin_metadata)
 
     return [tool["id"] for tool in tools]
 
 
 def _origins_in_task_args(tasks):
     """Returns the set of origins contained in the arguments of the
     provided tasks (assumed to be of type index-origin-metadata)."""
     return reduce(
         set.union, (set(task["arguments"]["args"][0]) for task in tasks), set()
     )
 
 
 def _assert_tasks_for_origins(tasks, origins):
     expected_kwargs = {"policy_update": "update-dups"}
     assert {task["type"] for task in tasks} == {"index-origin-metadata"}
     assert all(len(task["arguments"]["args"]) == 1 for task in tasks)
     for task in tasks:
         assert task["arguments"]["kwargs"] == expected_kwargs, task
     assert _origins_in_task_args(tasks) == set(["file://dev/%04d" % i for i in origins])
 
 
 def invoke(scheduler, catch_exceptions, args):
     runner = CliRunner()
     with patch(
         "swh.indexer.cli.get_scheduler"
     ) as get_scheduler_mock, tempfile.NamedTemporaryFile(
         "a", suffix=".yml"
     ) as config_fd:
         config_fd.write(CLI_CONFIG)
         config_fd.seek(0)
         get_scheduler_mock.return_value = scheduler
         result = runner.invoke(cli, ["-C" + config_fd.name] + args)
     if not catch_exceptions and result.exception:
         print(result.output)
         raise result.exception
     return result
 
 
 def test_mapping_list(indexer_scheduler):
     result = invoke(indexer_scheduler, False, ["mapping", "list",])
     expected_output = "\n".join(
         ["codemeta", "gemspec", "maven", "npm", "pkg-info", "",]
     )
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
 
 def test_mapping_list_terms(indexer_scheduler):
     result = invoke(indexer_scheduler, False, ["mapping", "list-terms",])
     assert result.exit_code == 0, result.output
     assert re.search(r"http://schema.org/url:\n.*npm", result.output)
     assert re.search(r"http://schema.org/url:\n.*codemeta", result.output)
     assert re.search(
         r"https://codemeta.github.io/terms/developmentStatus:\n\tcodemeta",
         result.output,
     )
 
 
 def test_mapping_list_terms_exclude(indexer_scheduler):
     result = invoke(
         indexer_scheduler,
         False,
         ["mapping", "list-terms", "--exclude-mapping", "codemeta"],
     )
     assert result.exit_code == 0, result.output
     assert re.search(r"http://schema.org/url:\n.*npm", result.output)
     assert not re.search(r"http://schema.org/url:\n.*codemeta", result.output)
     assert not re.search(
         r"https://codemeta.github.io/terms/developmentStatus:\n\tcodemeta",
         result.output,
     )
 
 
 @patch("swh.scheduler.cli.utils.TASK_BATCH_SIZE", 3)
 @patch("swh.scheduler.cli_utils.TASK_BATCH_SIZE", 3)
 def test_origin_metadata_reindex_empty_db(indexer_scheduler, idx_storage, storage):
     result = invoke(indexer_scheduler, False, ["schedule", "reindex_origin_metadata",])
     expected_output = "Nothing to do (no origin metadata matched the criteria).\n"
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 0
 
 
 @patch("swh.scheduler.cli.utils.TASK_BATCH_SIZE", 3)
 @patch("swh.scheduler.cli_utils.TASK_BATCH_SIZE", 3)
 def test_origin_metadata_reindex_divisor(indexer_scheduler, idx_storage, storage):
     """Tests the re-indexing when origin_batch_size*task_batch_size is a
     divisor of nb_origins."""
     fill_idx_storage(idx_storage, 90)
 
     result = invoke(indexer_scheduler, False, ["schedule", "reindex_origin_metadata",])
 
     # Check the output
     expected_output = (
         "Scheduled 3 tasks (30 origins).\n"
         "Scheduled 6 tasks (60 origins).\n"
         "Scheduled 9 tasks (90 origins).\n"
         "Done.\n"
     )
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     # Check scheduled tasks
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 9
     _assert_tasks_for_origins(tasks, range(90))
 
 
 @patch("swh.scheduler.cli.utils.TASK_BATCH_SIZE", 3)
 @patch("swh.scheduler.cli_utils.TASK_BATCH_SIZE", 3)
 def test_origin_metadata_reindex_dry_run(indexer_scheduler, idx_storage, storage):
     """Tests the re-indexing when origin_batch_size*task_batch_size is a
     divisor of nb_origins."""
     fill_idx_storage(idx_storage, 90)
 
     result = invoke(
         indexer_scheduler, False, ["schedule", "--dry-run", "reindex_origin_metadata",]
     )
 
     # Check the output
     expected_output = (
         "Scheduled 3 tasks (30 origins).\n"
         "Scheduled 6 tasks (60 origins).\n"
         "Scheduled 9 tasks (90 origins).\n"
         "Done.\n"
     )
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     # Check scheduled tasks
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 0
 
 
 @patch("swh.scheduler.cli.utils.TASK_BATCH_SIZE", 3)
 @patch("swh.scheduler.cli_utils.TASK_BATCH_SIZE", 3)
 def test_origin_metadata_reindex_nondivisor(indexer_scheduler, idx_storage, storage):
     """Tests the re-indexing when neither origin_batch_size or
     task_batch_size is a divisor of nb_origins."""
     fill_idx_storage(idx_storage, 70)
 
     result = invoke(
         indexer_scheduler,
         False,
         ["schedule", "reindex_origin_metadata", "--batch-size", "20",],
     )
 
     # Check the output
     expected_output = (
         "Scheduled 3 tasks (60 origins).\n"
         "Scheduled 4 tasks (70 origins).\n"
         "Done.\n"
     )
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     # Check scheduled tasks
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 4
     _assert_tasks_for_origins(tasks, range(70))
 
 
 @patch("swh.scheduler.cli.utils.TASK_BATCH_SIZE", 3)
 @patch("swh.scheduler.cli_utils.TASK_BATCH_SIZE", 3)
 def test_origin_metadata_reindex_filter_one_mapping(
     indexer_scheduler, idx_storage, storage
 ):
     """Tests the re-indexing when origin_batch_size*task_batch_size is a
     divisor of nb_origins."""
     fill_idx_storage(idx_storage, 110)
 
     result = invoke(
         indexer_scheduler,
         False,
         ["schedule", "reindex_origin_metadata", "--mapping", "mapping1",],
     )
 
     # Check the output
-    expected_output = "Scheduled 2 tasks (11 origins).\n" "Done.\n"
+    expected_output = "Scheduled 2 tasks (11 origins).\nDone.\n"
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     # Check scheduled tasks
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 2
     _assert_tasks_for_origins(tasks, [1, 11, 21, 31, 41, 51, 61, 71, 81, 91, 101])
 
 
 @patch("swh.scheduler.cli.utils.TASK_BATCH_SIZE", 3)
 @patch("swh.scheduler.cli_utils.TASK_BATCH_SIZE", 3)
 def test_origin_metadata_reindex_filter_two_mappings(
     indexer_scheduler, idx_storage, storage
 ):
     """Tests the re-indexing when origin_batch_size*task_batch_size is a
     divisor of nb_origins."""
     fill_idx_storage(idx_storage, 110)
 
     result = invoke(
         indexer_scheduler,
         False,
         [
             "schedule",
             "reindex_origin_metadata",
             "--mapping",
             "mapping1",
             "--mapping",
             "mapping2",
         ],
     )
 
     # Check the output
-    expected_output = "Scheduled 3 tasks (22 origins).\n" "Done.\n"
+    expected_output = "Scheduled 3 tasks (22 origins).\nDone.\n"
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     # Check scheduled tasks
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 3
     _assert_tasks_for_origins(
         tasks,
         [
             1,
             11,
             21,
             31,
             41,
             51,
             61,
             71,
             81,
             91,
             101,
             2,
             12,
             22,
             32,
             42,
             52,
             62,
             72,
             82,
             92,
             102,
         ],
     )
 
 
 @patch("swh.scheduler.cli.utils.TASK_BATCH_SIZE", 3)
 @patch("swh.scheduler.cli_utils.TASK_BATCH_SIZE", 3)
 def test_origin_metadata_reindex_filter_one_tool(
     indexer_scheduler, idx_storage, storage
 ):
     """Tests the re-indexing when origin_batch_size*task_batch_size is a
     divisor of nb_origins."""
     tool_ids = fill_idx_storage(idx_storage, 110)
 
     result = invoke(
         indexer_scheduler,
         False,
         ["schedule", "reindex_origin_metadata", "--tool-id", str(tool_ids[0]),],
     )
 
     # Check the output
     expected_output = (
         "Scheduled 3 tasks (30 origins).\n"
         "Scheduled 6 tasks (55 origins).\n"
         "Done.\n"
     )
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     # Check scheduled tasks
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 6
     _assert_tasks_for_origins(tasks, [x * 2 for x in range(55)])
 
 
-def test_journal_client(storage, indexer_scheduler):
+def test_journal_client(
+    storage, indexer_scheduler, kafka_prefix: str, kafka_server, consumer: Consumer
+):
     """Test the 'swh indexer journal-client' cli tool."""
-    message = FakeKafkaMessage(
-        "swh.journal.objects.origin_visit",
-        "bogus",
-        {"status": "full", "origin": {"url": "file://dev/0000",}},
+    producer = Producer(
+        {
+            "bootstrap.servers": kafka_server,
+            "client.id": "test producer",
+            "acks": "all",
+        }
     )
 
-    consumer = MockedKafkaConsumer([message])
-
-    with patch("swh.journal.client.Consumer", return_value=consumer):
-        result = invoke(
-            indexer_scheduler,
-            False,
-            [
-                "journal-client",
-                "--stop-after-objects",
-                "1",
-                "--broker",
-                "192.0.2.1",
-                "--prefix",
-                "swh.journal.objects",
-                "--group-id",
-                "test-consumer",
-            ],
-        )
+    STATUS = {"status": "full", "origin": {"url": "file://dev/0000",}}
+    producer.produce(
+        topic=kafka_prefix + ".origin_visit",
+        key=b"bogus",
+        value=value_to_kafka(STATUS),
+    )
+
+    result = invoke(
+        indexer_scheduler,
+        False,
+        [
+            "journal-client",
+            "--stop-after-objects",
+            "1",
+            "--broker",
+            kafka_server,
+            "--prefix",
+            kafka_prefix,
+            "--group-id",
+            "test-consumer",
+        ],
+    )
 
     # Check the output
     expected_output = "Done.\n"
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     # Check scheduled tasks
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 1
     _assert_tasks_for_origins(tasks, [0])
diff --git a/swh/indexer/tests/test_ctags.py b/swh/indexer/tests/test_ctags.py
index fca6498..baec382 100644
--- a/swh/indexer/tests/test_ctags.py
+++ b/swh/indexer/tests/test_ctags.py
@@ -1,155 +1,154 @@
 # Copyright (C) 2017-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import json
 import unittest
 from unittest.mock import patch
 
 import pytest
 
 import swh.indexer.ctags
 from swh.indexer.ctags import CtagsIndexer, run_ctags
 
 from swh.indexer.tests.utils import (
     CommonContentIndexerTest,
     SHA1_TO_CTAGS,
     BASE_TEST_CONFIG,
     OBJ_STORAGE_DATA,
     fill_storage,
     fill_obj_storage,
     filter_dict,
 )
 
 
 class BasicTest(unittest.TestCase):
     @patch("swh.indexer.ctags.subprocess")
     def test_run_ctags(self, mock_subprocess):
         """Computing licenses from a raw content should return results
 
         """
         output0 = """
 {"name":"defun","kind":"function","line":1,"language":"scheme"}
 {"name":"name","kind":"symbol","line":5,"language":"else"}"""
         output1 = """
 {"name":"let","kind":"var","line":10,"language":"something"}"""
 
         expected_result0 = [
             {"name": "defun", "kind": "function", "line": 1, "lang": "scheme"},
             {"name": "name", "kind": "symbol", "line": 5, "lang": "else"},
         ]
 
         expected_result1 = [
             {"name": "let", "kind": "var", "line": 10, "lang": "something"}
         ]
         for path, lang, intermediary_result, expected_result in [
             (b"some/path", "lisp", output0, expected_result0),
             (b"some/path/2", "markdown", output1, expected_result1),
         ]:
             mock_subprocess.check_output.return_value = intermediary_result
             actual_result = list(run_ctags(path, lang=lang))
             self.assertEqual(actual_result, expected_result)
 
 
 class InjectCtagsIndexer:
     """Override ctags computations.
 
     """
 
     def compute_ctags(self, path, lang):
         """Inject fake ctags given path (sha1 identifier).
 
         """
         return {"lang": lang, **SHA1_TO_CTAGS.get(path)}
 
 
 CONFIG = {
     **BASE_TEST_CONFIG,
     "tools": {
         "name": "universal-ctags",
         "version": "~git7859817b",
         "configuration": {
             "command_line": """ctags --fields=+lnz --sort=no """
             """ --links=no <filepath>""",
             "max_content_size": 1000,
         },
     },
     "languages": {"python": "python", "haskell": "haskell", "bar": "bar",},
     "workdir": "/tmp",
 }
 
 
 class TestCtagsIndexer(CommonContentIndexerTest, unittest.TestCase):
     """Ctags indexer test scenarios:
 
     - Known sha1s in the input list have their data indexed
     - Unknown sha1 in the input list are not indexed
 
     """
 
     legacy_get_format = True
 
     def get_indexer_results(self, ids):
         yield from self.idx_storage.content_ctags_get(ids)
 
     def setUp(self):
         super().setUp()
         self.indexer = CtagsIndexer(config=CONFIG)
         self.indexer.catch_exceptions = False
         self.idx_storage = self.indexer.idx_storage
         fill_storage(self.indexer.storage)
         fill_obj_storage(self.indexer.objstorage)
 
         # Prepare test input
         self.id0 = "01c9379dfc33803963d07c1ccc748d3fe4c96bb5"
         self.id1 = "d4c647f0fc257591cc9ba1722484229780d1c607"
         self.id2 = "688a5ef812c53907562fe379d4b3851e69c7cb15"
 
         tool = {k.replace("tool_", ""): v for (k, v) in self.indexer.tool.items()}
 
         self.expected_results = {
             self.id0: {"id": self.id0, "tool": tool, **SHA1_TO_CTAGS[self.id0][0],},
             self.id1: {"id": self.id1, "tool": tool, **SHA1_TO_CTAGS[self.id1][0],},
             self.id2: {"id": self.id2, "tool": tool, **SHA1_TO_CTAGS[self.id2][0],},
         }
 
         self._set_mocks()
 
     def _set_mocks(self):
         def find_ctags_for_content(raw_content):
             for (sha1, ctags) in SHA1_TO_CTAGS.items():
                 if OBJ_STORAGE_DATA[sha1] == raw_content:
                     return ctags
             else:
                 raise ValueError(
-                    ("%r not found in objstorage, can't mock " "its ctags.")
-                    % raw_content
+                    ("%r not found in objstorage, can't mock its ctags.") % raw_content
                 )
 
         def fake_language(raw_content, *args, **kwargs):
             ctags = find_ctags_for_content(raw_content)
             return {"lang": ctags[0]["lang"]}
 
         self._real_compute_language = swh.indexer.ctags.compute_language
         swh.indexer.ctags.compute_language = fake_language
 
         def fake_check_output(cmd, *args, **kwargs):
             id_ = cmd[-1].split("/")[-1]
             return "\n".join(
                 json.dumps({"language": ctag["lang"], **ctag})
                 for ctag in SHA1_TO_CTAGS[id_]
             )
 
         self._real_check_output = swh.indexer.ctags.subprocess.check_output
         swh.indexer.ctags.subprocess.check_output = fake_check_output
 
     def tearDown(self):
         swh.indexer.ctags.compute_language = self._real_compute_language
         swh.indexer.ctags.subprocess.check_output = self._real_check_output
         super().tearDown()
 
 
 def test_ctags_w_no_tool():
     with pytest.raises(ValueError):
         CtagsIndexer(config=filter_dict(CONFIG, "tools"))
diff --git a/swh/indexer/tests/test_origin_head.py b/swh/indexer/tests/test_origin_head.py
index 483ebca..a5ed93c 100644
--- a/swh/indexer/tests/test_origin_head.py
+++ b/swh/indexer/tests/test_origin_head.py
@@ -1,171 +1,199 @@
 # Copyright (C) 2017-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import unittest
-from datetime import datetime
+from datetime import datetime, timezone
 
+from swh.model.model import OriginVisit, OriginVisitStatus
 from swh.indexer.origin_head import OriginHeadIndexer
 from swh.indexer.tests.utils import BASE_TEST_CONFIG, fill_storage
+from swh.storage.utils import now
 
 ORIGIN_HEAD_CONFIG = {
     **BASE_TEST_CONFIG,
     "tools": {"name": "origin-metadata", "version": "0.0.1", "configuration": {},},
     "tasks": {"revision_intrinsic_metadata": None, "origin_intrinsic_metadata": None,},
 }
 
 
 class OriginHeadTestIndexer(OriginHeadIndexer):
     """Specific indexer whose configuration is enough to satisfy the
        indexing tests.
     """
 
     def parse_config_file(self, *args, **kwargs):
         return ORIGIN_HEAD_CONFIG
 
     def persist_index_computations(self, results, policy_update):
         self.results = results
 
 
 class OriginHead(unittest.TestCase):
     def setUp(self):
         self.indexer = OriginHeadTestIndexer()
         self.indexer.catch_exceptions = False
         fill_storage(self.indexer.storage)
 
     def test_git(self):
         self.indexer.run(["https://github.com/SoftwareHeritage/swh-storage"])
         self.assertEqual(
             self.indexer.results,
             [
                 {
                     "revision_id": b"8K\x12\x00d\x03\xcc\xe4]bS\xe3\x8f{"
                     b"\xd7}\xac\xefrm",
                     "origin_url": "https://github.com/SoftwareHeritage/swh-storage",
                 }
             ],
         )
 
     def test_git_partial_snapshot(self):
         """Checks partial snapshots are ignored."""
         origin_url = "https://github.com/SoftwareHeritage/swh-core"
         self.indexer.storage.origin_add_one(
             {"url": origin_url,}
         )
         visit = self.indexer.storage.origin_visit_add(
-            origin_url, datetime(2019, 2, 27), type="git",
-        )
+            [
+                OriginVisit(
+                    origin=origin_url,
+                    date=datetime(2019, 2, 27, tzinfo=timezone.utc),
+                    type="git",
+                    status="ongoing",
+                    snapshot=None,
+                )
+            ]
+        )[0]
         self.indexer.storage.snapshot_add(
             [
                 {
                     "id": b"foo",
                     "branches": {
                         b"foo": None,
                         b"HEAD": {"target_type": "alias", "target": b"foo",},
                     },
                 }
             ]
         )
-        self.indexer.storage.origin_visit_update(
-            origin_url, visit.visit, status="partial", snapshot=b"foo"
+        visit_status = OriginVisitStatus(
+            origin=origin_url,
+            visit=visit.visit,
+            date=now(),
+            status="partial",
+            snapshot=b"foo",
         )
+        self.indexer.storage.origin_visit_status_add([visit_status])
         self.indexer.run([origin_url])
         self.assertEqual(self.indexer.results, [])
 
     def test_vcs_missing_snapshot(self):
         self.indexer.storage.origin_add(
             [{"url": "https://github.com/SoftwareHeritage/swh-indexer",}]
         )
         self.indexer.run(["https://github.com/SoftwareHeritage/swh-indexer"])
         self.assertEqual(self.indexer.results, [])
 
     def test_pypi_missing_branch(self):
         origin_url = "https://pypi.org/project/abcdef/"
         self.indexer.storage.origin_add_one(
             {"url": origin_url,}
         )
         visit = self.indexer.storage.origin_visit_add(
-            origin_url, datetime(2019, 2, 27), type="pypi"
-        )
+            [
+                OriginVisit(
+                    origin=origin_url,
+                    date=datetime(2019, 2, 27, tzinfo=timezone.utc),
+                    type="pypi",
+                    status="ongoing",
+                    snapshot=None,
+                )
+            ]
+        )[0]
         self.indexer.storage.snapshot_add(
             [
                 {
                     "id": b"foo",
                     "branches": {
                         b"foo": None,
                         b"HEAD": {"target_type": "alias", "target": b"foo",},
                     },
                 }
             ]
         )
-        self.indexer.storage.origin_visit_update(
-            origin_url, visit.visit, status="full", snapshot=b"foo"
+        visit_status = OriginVisitStatus(
+            origin=origin_url,
+            visit=visit.visit,
+            date=now(),
+            status="full",
+            snapshot=b"foo",
         )
+        self.indexer.storage.origin_visit_status_add([visit_status])
         self.indexer.run(["https://pypi.org/project/abcdef/"])
         self.assertEqual(self.indexer.results, [])
 
     def test_ftp(self):
         self.indexer.run(["rsync://ftp.gnu.org/gnu/3dldf"])
         self.assertEqual(
             self.indexer.results,
             [
                 {
                     "revision_id": b"\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee"
                     b"\xcc\x1a\xb4`\x8c\x8by",
                     "origin_url": "rsync://ftp.gnu.org/gnu/3dldf",
                 }
             ],
         )
 
     def test_ftp_missing_snapshot(self):
         self.indexer.storage.origin_add([{"url": "rsync://ftp.gnu.org/gnu/foobar",}])
         self.indexer.run(["rsync://ftp.gnu.org/gnu/foobar"])
         self.assertEqual(self.indexer.results, [])
 
     def test_deposit(self):
-        self.indexer.run(["https://forge.softwareheritage.org/source/" "jesuisgpl/"])
+        self.indexer.run(["https://forge.softwareheritage.org/source/jesuisgpl/"])
         self.assertEqual(
             self.indexer.results,
             [
                 {
                     "revision_id": b"\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{"
                     b"\xa6\xe9\x99\xb1\x9e]q\xeb",
                     "origin_url": "https://forge.softwareheritage.org/source/"
                     "jesuisgpl/",
                 }
             ],
         )
 
     def test_deposit_missing_snapshot(self):
         self.indexer.storage.origin_add(
             [{"url": "https://forge.softwareheritage.org/source/foobar",}]
         )
         self.indexer.run(["https://forge.softwareheritage.org/source/foobar"])
         self.assertEqual(self.indexer.results, [])
 
     def test_pypi(self):
         self.indexer.run(["https://pypi.org/project/limnoria/"])
         self.assertEqual(
             self.indexer.results,
             [
                 {
                     "revision_id": b"\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8k"
                     b"A\x10\x9d\xc5\xfa2\xf8t",
                     "origin_url": "https://pypi.org/project/limnoria/",
                 }
             ],
         )
 
     def test_svn(self):
         self.indexer.run(["http://0-512-md.googlecode.com/svn/"])
         self.assertEqual(
             self.indexer.results,
             [
                 {
                     "revision_id": b"\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8"
                     b"\xc9\xad#.\x1bw=\x18",
                     "origin_url": "http://0-512-md.googlecode.com/svn/",
                 }
             ],
         )
diff --git a/swh/indexer/tests/utils.py b/swh/indexer/tests/utils.py
index 0dbe993..3a39558 100644
--- a/swh/indexer/tests/utils.py
+++ b/swh/indexer/tests/utils.py
@@ -1,724 +1,740 @@
 # Copyright (C) 2017-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import abc
-import datetime
 import functools
 import random
 from typing import Dict, Any
 import unittest
 
 from hypothesis import strategies
 
 from swh.model import hashutil
 from swh.model.hashutil import hash_to_bytes, hash_to_hex
+from swh.model.model import OriginVisit, OriginVisitStatus
+from swh.storage.utils import now
 
 from swh.indexer.storage import INDEXER_CFG_KEY
 
+
 BASE_TEST_CONFIG: Dict[str, Dict[str, Any]] = {
     "storage": {"cls": "pipeline", "steps": [{"cls": "validate"}, {"cls": "memory"},]},
     "objstorage": {"cls": "memory", "args": {},},
     INDEXER_CFG_KEY: {"cls": "memory", "args": {},},
 }
 
 ORIGIN_VISITS = [
     {"type": "git", "url": "https://github.com/SoftwareHeritage/swh-storage"},
     {"type": "ftp", "url": "rsync://ftp.gnu.org/gnu/3dldf"},
     {"type": "deposit", "url": "https://forge.softwareheritage.org/source/jesuisgpl/"},
     {"type": "pypi", "url": "https://pypi.org/project/limnoria/"},
     {"type": "svn", "url": "http://0-512-md.googlecode.com/svn/"},
     {"type": "git", "url": "https://github.com/librariesio/yarn-parser"},
     {"type": "git", "url": "https://github.com/librariesio/yarn-parser.git"},
 ]
 
 SNAPSHOTS = [
     {
         "origin": "https://github.com/SoftwareHeritage/swh-storage",
         "branches": {
             b"refs/heads/add-revision-origin-cache": {
                 "target": b'L[\xce\x1c\x88\x8eF\t\xf1"\x19\x1e\xfb\xc0'
                 b"s\xe7/\xe9l\x1e",
                 "target_type": "revision",
             },
             b"refs/head/master": {
                 "target": b"8K\x12\x00d\x03\xcc\xe4]bS\xe3\x8f{\xd7}" b"\xac\xefrm",
                 "target_type": "revision",
             },
             b"HEAD": {"target": b"refs/head/master", "target_type": "alias"},
             b"refs/tags/v0.0.103": {
                 "target": b'\xb6"Im{\xfdLb\xb0\x94N\xea\x96m\x13x\x88+' b"\x0f\xdd",
                 "target_type": "release",
             },
         },
     },
     {
         "origin": "rsync://ftp.gnu.org/gnu/3dldf",
         "branches": {
             b"3DLDF-1.1.4.tar.gz": {
                 "target": b"dJ\xfb\x1c\x91\xf4\x82B%]6\xa2\x90|\xd3\xfc" b'"G\x99\x11',
                 "target_type": "revision",
             },
             b"3DLDF-2.0.2.tar.gz": {
                 "target": b"\xb6\x0e\xe7\x9e9\xac\xaa\x19\x9e="
                 b"\xd1\xc5\x00\\\xc6\xfc\xe0\xa6\xb4V",
                 "target_type": "revision",
             },
             b"3DLDF-2.0.3-examples.tar.gz": {
                 "target": b"!H\x19\xc0\xee\x82-\x12F1\xbd\x97"
                 b"\xfe\xadZ\x80\x80\xc1\x83\xff",
                 "target_type": "revision",
             },
             b"3DLDF-2.0.3.tar.gz": {
                 "target": b"\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee"
                 b"\xcc\x1a\xb4`\x8c\x8by",
                 "target_type": "revision",
             },
             b"3DLDF-2.0.tar.gz": {
                 "target": b"F6*\xff(?\x19a\xef\xb6\xc2\x1fv$S\xe3G" b"\xd3\xd1m",
                 "target_type": "revision",
             },
         },
     },
     {
         "origin": "https://forge.softwareheritage.org/source/jesuisgpl/",
         "branches": {
             b"master": {
                 "target": b"\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{"
                 b"\xa6\xe9\x99\xb1\x9e]q\xeb",
                 "target_type": "revision",
             }
         },
         "id": b"h\xc0\xd2a\x04\xd4~'\x8d\xd6\xbe\x07\xeda\xfa\xfbV" b"\x1d\r ",
     },
     {
         "origin": "https://pypi.org/project/limnoria/",
         "branches": {
             b"HEAD": {"target": b"releases/2018.09.09", "target_type": "alias"},
             b"releases/2018.09.01": {
                 "target": b"<\xee1(\xe8\x8d_\xc1\xc9\xa6rT\xf1\x1d"
                 b"\xbb\xdfF\xfdw\xcf",
                 "target_type": "revision",
             },
             b"releases/2018.09.09": {
                 "target": b"\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8k"
                 b"A\x10\x9d\xc5\xfa2\xf8t",
                 "target_type": "revision",
             },
         },
         "id": b"{\xda\x8e\x84\x7fX\xff\x92\x80^\x93V\x18\xa3\xfay" b"\x12\x9e\xd6\xb3",
     },
     {
         "origin": "http://0-512-md.googlecode.com/svn/",
         "branches": {
             b"master": {
                 "target": b"\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8"
                 b"\xc9\xad#.\x1bw=\x18",
                 "target_type": "revision",
             }
         },
         "id": b"\xa1\xa2\x8c\n\xb3\x87\xa8\xf9\xe0a\x8c\xb7"
         b"\x05\xea\xb8\x1f\xc4H\xf4s",
     },
     {
         "origin": "https://github.com/librariesio/yarn-parser",
         "branches": {
             b"HEAD": {
                 "target": hash_to_bytes("8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f"),
                 "target_type": "revision",
             }
         },
     },
     {
         "origin": "https://github.com/librariesio/yarn-parser.git",
         "branches": {
             b"HEAD": {
                 "target": hash_to_bytes("8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f"),
                 "target_type": "revision",
             }
         },
     },
 ]
 
 
 REVISIONS = [
     {
         "id": hash_to_bytes("8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f"),
         "message": b"Improve search functionality",
         "author": {
             "name": b"Andrew Nesbitt",
             "fullname": b"Andrew Nesbitt <andrewnez@gmail.com>",
             "email": b"andrewnez@gmail.com",
         },
         "committer": {
             "name": b"Andrew Nesbitt",
             "fullname": b"Andrew Nesbitt <andrewnez@gmail.com>",
             "email": b"andrewnez@gmail.com",
         },
         "committer_date": {
             "negative_utc": False,
             "offset": 120,
             "timestamp": {"microseconds": 0, "seconds": 1380883849,},
         },
         "type": "git",
         "synthetic": False,
         "date": {
             "negative_utc": False,
             "timestamp": {"seconds": 1487596456, "microseconds": 0,},
             "offset": 0,
         },
         "directory": b"10",
+        "parents": (),
     }
 ]
 
 DIRECTORY_ID = b"10"
 
 DIRECTORY_ENTRIES = [
     {"name": b"index.js", "type": "file", "target": b"abc", "perms": 33188,},
     {"name": b"package.json", "type": "file", "target": b"cde", "perms": 33188,},
     {"name": b".github", "type": "dir", "target": b"11", "perms": 16384,},
 ]
 
 SHA1_TO_LICENSES = {
     "01c9379dfc33803963d07c1ccc748d3fe4c96bb5": ["GPL"],
     "02fb2c89e14f7fab46701478c83779c7beb7b069": ["Apache2.0"],
     "103bc087db1d26afc3a0283f38663d081e9b01e6": ["MIT"],
     "688a5ef812c53907562fe379d4b3851e69c7cb15": ["AGPL"],
     "da39a3ee5e6b4b0d3255bfef95601890afd80709": [],
 }
 
 
 SHA1_TO_CTAGS = {
     "01c9379dfc33803963d07c1ccc748d3fe4c96bb5": [
         {"name": "foo", "kind": "str", "line": 10, "lang": "bar",}
     ],
     "d4c647f0fc257591cc9ba1722484229780d1c607": [
         {"name": "let", "kind": "int", "line": 100, "lang": "haskell",}
     ],
     "688a5ef812c53907562fe379d4b3851e69c7cb15": [
         {"name": "symbol", "kind": "float", "line": 99, "lang": "python",}
     ],
 }
 
 
 OBJ_STORAGE_DATA = {
     "01c9379dfc33803963d07c1ccc748d3fe4c96bb5": b"this is some text",
     "688a5ef812c53907562fe379d4b3851e69c7cb15": b"another text",
     "8986af901dd2043044ce8f0d8fc039153641cf17": b"yet another text",
     "02fb2c89e14f7fab46701478c83779c7beb7b069": b"""
     import unittest
     import logging
     from swh.indexer.mimetype import MimetypeIndexer
     from swh.indexer.tests.test_utils import MockObjStorage
 
     class MockStorage():
         def content_mimetype_add(self, mimetypes):
             self.state = mimetypes
             self.conflict_update = conflict_update
 
         def indexer_configuration_add(self, tools):
             return [{
                 'id': 10,
             }]
     """,
     "103bc087db1d26afc3a0283f38663d081e9b01e6": b"""
         #ifndef __AVL__
         #define __AVL__
 
         typedef struct _avl_tree avl_tree;
 
         typedef struct _data_t {
           int content;
         } data_t;
     """,
     "93666f74f1cf635c8c8ac118879da6ec5623c410": b"""
     (should 'pygments (recognize 'lisp 'easily))
 
     """,
     "26a9f72a7c87cc9205725cfd879f514ff4f3d8d5": b"""
     {
         "name": "test_metadata",
         "version": "0.0.1",
         "description": "Simple package.json test for indexer",
         "repository": {
           "type": "git",
           "url": "https://github.com/moranegg/metadata_test"
       }
     }
     """,
     "d4c647f0fc257591cc9ba1722484229780d1c607": b"""
     {
       "version": "5.0.3",
       "name": "npm",
       "description": "a package manager for JavaScript",
       "keywords": [
         "install",
         "modules",
         "package manager",
         "package.json"
       ],
       "preferGlobal": true,
       "config": {
         "publishtest": false
       },
       "homepage": "https://docs.npmjs.com/",
       "author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
       "repository": {
         "type": "git",
         "url": "https://github.com/npm/npm"
       },
       "bugs": {
         "url": "https://github.com/npm/npm/issues"
       },
       "dependencies": {
         "JSONStream": "~1.3.1",
         "abbrev": "~1.1.0",
         "ansi-regex": "~2.1.1",
         "ansicolors": "~0.3.2",
         "ansistyles": "~0.1.3"
       },
       "devDependencies": {
         "tacks": "~1.2.6",
         "tap": "~10.3.2"
       },
       "license": "Artistic-2.0"
     }
 
     """,
     "a7ab314d8a11d2c93e3dcf528ca294e7b431c449": b"""
     """,
     "da39a3ee5e6b4b0d3255bfef95601890afd80709": b"",
     # 626364
     hash_to_hex(b"bcd"): b"unimportant content for bcd",
     # 636465
     hash_to_hex(
         b"cde"
     ): b"""
     {
       "name": "yarn-parser",
       "version": "1.0.0",
       "description": "Tiny web service for parsing yarn.lock files",
       "main": "index.js",
       "scripts": {
         "start": "node index.js",
         "test": "mocha"
       },
       "engines": {
         "node": "9.8.0"
       },
       "repository": {
         "type": "git",
         "url": "git+https://github.com/librariesio/yarn-parser.git"
       },
       "keywords": [
         "yarn",
         "parse",
         "lock",
         "dependencies"
       ],
       "author": "Andrew Nesbitt",
       "license": "AGPL-3.0",
       "bugs": {
         "url": "https://github.com/librariesio/yarn-parser/issues"
       },
       "homepage": "https://github.com/librariesio/yarn-parser#readme",
       "dependencies": {
         "@yarnpkg/lockfile": "^1.0.0",
         "body-parser": "^1.15.2",
         "express": "^4.14.0"
       },
       "devDependencies": {
         "chai": "^4.1.2",
         "mocha": "^5.2.0",
         "request": "^2.87.0",
         "test": "^0.6.0"
       }
     }
 
 """,
 }
 
 YARN_PARSER_METADATA = {
     "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
     "url": "https://github.com/librariesio/yarn-parser#readme",
     "codeRepository": "git+git+https://github.com/librariesio/yarn-parser.git",
     "author": [{"type": "Person", "name": "Andrew Nesbitt"}],
     "license": "https://spdx.org/licenses/AGPL-3.0",
     "version": "1.0.0",
     "description": "Tiny web service for parsing yarn.lock files",
     "issueTracker": "https://github.com/librariesio/yarn-parser/issues",
     "name": "yarn-parser",
     "keywords": ["yarn", "parse", "lock", "dependencies"],
     "type": "SoftwareSourceCode",
 }
 
 
 json_dict_keys = strategies.one_of(
     strategies.characters(),
     strategies.just("type"),
     strategies.just("url"),
     strategies.just("name"),
     strategies.just("email"),
     strategies.just("@id"),
     strategies.just("@context"),
     strategies.just("repository"),
     strategies.just("license"),
     strategies.just("repositories"),
     strategies.just("licenses"),
 )
 """Hypothesis strategy that generates strings, with an emphasis on those
 that are often used as dictionary keys in metadata files."""
 
 
 generic_json_document = strategies.recursive(
     strategies.none()
     | strategies.booleans()
     | strategies.floats()
     | strategies.characters(),
     lambda children: (
         strategies.lists(children, min_size=1)
         | strategies.dictionaries(json_dict_keys, children, min_size=1)
     ),
 )
 """Hypothesis strategy that generates possible values for values of JSON
 metadata files."""
 
 
 def json_document_strategy(keys=None):
     """Generates an hypothesis strategy that generates metadata files
     for a JSON-based format that uses the given keys."""
     if keys is None:
         keys = strategies.characters()
     else:
         keys = strategies.one_of(map(strategies.just, keys))
 
     return strategies.dictionaries(keys, generic_json_document, min_size=1)
 
 
 def _tree_to_xml(root, xmlns, data):
     def encode(s):
         "Skips unpaired surrogates generated by json_document_strategy"
         return s.encode("utf8", "replace")
 
     def to_xml(data, indent=b" "):
         if data is None:
             return b""
         elif isinstance(data, (bool, str, int, float)):
             return indent + encode(str(data))
         elif isinstance(data, list):
             return b"\n".join(to_xml(v, indent=indent) for v in data)
         elif isinstance(data, dict):
             lines = []
             for (key, value) in data.items():
                 lines.append(indent + encode("<{}>".format(key)))
                 lines.append(to_xml(value, indent=indent + b" "))
                 lines.append(indent + encode("</{}>".format(key)))
             return b"\n".join(lines)
         else:
             raise TypeError(data)
 
     return b"\n".join(
         [
             '<{} xmlns="{}">'.format(root, xmlns).encode(),
             to_xml(data),
             "</{}>".format(root).encode(),
         ]
     )
 
 
 class TreeToXmlTest(unittest.TestCase):
     def test_leaves(self):
         self.assertEqual(
             _tree_to_xml("root", "http://example.com", None),
             b'<root xmlns="http://example.com">\n\n</root>',
         )
         self.assertEqual(
             _tree_to_xml("root", "http://example.com", True),
             b'<root xmlns="http://example.com">\n True\n</root>',
         )
         self.assertEqual(
             _tree_to_xml("root", "http://example.com", "abc"),
             b'<root xmlns="http://example.com">\n abc\n</root>',
         )
         self.assertEqual(
             _tree_to_xml("root", "http://example.com", 42),
             b'<root xmlns="http://example.com">\n 42\n</root>',
         )
         self.assertEqual(
             _tree_to_xml("root", "http://example.com", 3.14),
             b'<root xmlns="http://example.com">\n 3.14\n</root>',
         )
 
     def test_dict(self):
         self.assertIn(
             _tree_to_xml("root", "http://example.com", {"foo": "bar", "baz": "qux"}),
             [
                 b'<root xmlns="http://example.com">\n'
                 b" <foo>\n  bar\n </foo>\n"
                 b" <baz>\n  qux\n </baz>\n"
                 b"</root>",
                 b'<root xmlns="http://example.com">\n'
                 b" <baz>\n  qux\n </baz>\n"
                 b" <foo>\n  bar\n </foo>\n"
                 b"</root>",
             ],
         )
 
     def test_list(self):
         self.assertEqual(
             _tree_to_xml(
                 "root", "http://example.com", [{"foo": "bar"}, {"foo": "baz"},]
             ),
             b'<root xmlns="http://example.com">\n'
             b" <foo>\n  bar\n </foo>\n"
             b" <foo>\n  baz\n </foo>\n"
             b"</root>",
         )
 
 
 def xml_document_strategy(keys, root, xmlns):
     """Generates an hypothesis strategy that generates metadata files
     for an XML format that uses the given keys."""
 
     return strategies.builds(
         functools.partial(_tree_to_xml, root, xmlns), json_document_strategy(keys)
     )
 
 
 def filter_dict(d, keys):
     "return a copy of the dict with keys deleted"
     if not isinstance(keys, (list, tuple)):
         keys = (keys,)
     return dict((k, v) for (k, v) in d.items() if k not in keys)
 
 
 def fill_obj_storage(obj_storage):
     """Add some content in an object storage."""
     for (obj_id, content) in OBJ_STORAGE_DATA.items():
         obj_storage.add(content, obj_id=hash_to_bytes(obj_id))
 
 
 def fill_storage(storage):
     visit_types = {}
     for visit in ORIGIN_VISITS:
         storage.origin_add_one({"url": visit["url"]})
         visit_types[visit["url"]] = visit["type"]
     for snap in SNAPSHOTS:
         origin_url = snap["origin"]
         visit = storage.origin_visit_add(
-            origin_url, date=datetime.datetime.now(), type=visit_types[origin_url]
-        )
+            [
+                OriginVisit(
+                    origin=origin_url,
+                    date=now(),
+                    type=visit_types[origin_url],
+                    status="ongoing",
+                    snapshot=None,
+                )
+            ]
+        )[0]
         snap_id = snap.get("id") or bytes([random.randint(0, 255) for _ in range(32)])
         storage.snapshot_add([{"id": snap_id, "branches": snap["branches"]}])
-        storage.origin_visit_update(
-            origin_url, visit.visit, status="full", snapshot=snap_id
+        visit_status = OriginVisitStatus(
+            origin=origin_url,
+            visit=visit.visit,
+            date=now(),
+            status="full",
+            snapshot=snap_id,
         )
+        storage.origin_visit_status_add([visit_status])
     storage.revision_add(REVISIONS)
 
     contents = []
     for (obj_id, content) in OBJ_STORAGE_DATA.items():
         content_hashes = hashutil.MultiHash.from_data(content).digest()
         contents.append(
             {
                 "data": content,
                 "length": len(content),
                 "status": "visible",
                 "sha1": hash_to_bytes(obj_id),
                 "sha1_git": hash_to_bytes(obj_id),
                 "sha256": content_hashes["sha256"],
                 "blake2s256": content_hashes["blake2s256"],
             }
         )
     storage.content_add(contents)
     storage.directory_add([{"id": DIRECTORY_ID, "entries": DIRECTORY_ENTRIES,}])
 
 
 class CommonContentIndexerTest(metaclass=abc.ABCMeta):
     legacy_get_format = False
     """True if and only if the tested indexer uses the legacy format.
     see: https://forge.softwareheritage.org/T1433
 
     """
 
     def get_indexer_results(self, ids):
         """Override this for indexers that don't have a mock storage."""
         return self.indexer.idx_storage.state
 
     def assert_legacy_results_ok(self, sha1s, expected_results=None):
         # XXX old format, remove this when all endpoints are
         #     updated to the new one
         #     see: https://forge.softwareheritage.org/T1433
         sha1s = [
             sha1 if isinstance(sha1, bytes) else hash_to_bytes(sha1) for sha1 in sha1s
         ]
         actual_results = list(self.get_indexer_results(sha1s))
 
         if expected_results is None:
             expected_results = self.expected_results
 
         self.assertEqual(
             len(expected_results),
             len(actual_results),
             (expected_results, actual_results),
         )
         for indexed_data in actual_results:
             _id = indexed_data["id"]
             expected_data = expected_results[hashutil.hash_to_hex(_id)].copy()
             expected_data["id"] = _id
             self.assertEqual(indexed_data, expected_data)
 
     def assert_results_ok(self, sha1s, expected_results=None):
         if self.legacy_get_format:
             self.assert_legacy_results_ok(sha1s, expected_results)
             return
 
         sha1s = [
             sha1 if isinstance(sha1, bytes) else hash_to_bytes(sha1) for sha1 in sha1s
         ]
         actual_results = list(self.get_indexer_results(sha1s))
 
         if expected_results is None:
             expected_results = self.expected_results
 
         self.assertEqual(
             len(expected_results),
             len(actual_results),
             (expected_results, actual_results),
         )
         for indexed_data in actual_results:
             (_id, indexed_data) = list(indexed_data.items())[0]
             expected_data = expected_results[hashutil.hash_to_hex(_id)].copy()
             expected_data = [expected_data]
             self.assertEqual(indexed_data, expected_data)
 
     def test_index(self):
         """Known sha1 have their data indexed
 
         """
         sha1s = [self.id0, self.id1, self.id2]
 
         # when
         self.indexer.run(sha1s, policy_update="update-dups")
 
         self.assert_results_ok(sha1s)
 
         # 2nd pass
         self.indexer.run(sha1s, policy_update="ignore-dups")
 
         self.assert_results_ok(sha1s)
 
     def test_index_one_unknown_sha1(self):
         """Unknown sha1 are not indexed"""
         sha1s = [
             self.id1,
             "799a5ef812c53907562fe379d4b3851e69c7cb15",  # unknown
             "800a5ef812c53907562fe379d4b3851e69c7cb15",
         ]  # unknown
 
         # when
         self.indexer.run(sha1s, policy_update="update-dups")
 
         # then
         expected_results = {
             k: v for k, v in self.expected_results.items() if k in sha1s
         }
 
         self.assert_results_ok(sha1s, expected_results)
 
 
 class CommonContentIndexerRangeTest:
     """Allows to factorize tests on range indexer.
 
     """
 
     def setUp(self):
         self.contents = sorted(OBJ_STORAGE_DATA)
 
     def assert_results_ok(self, start, end, actual_results, expected_results=None):
         if expected_results is None:
             expected_results = self.expected_results
 
         actual_results = list(actual_results)
         for indexed_data in actual_results:
             _id = indexed_data["id"]
             assert isinstance(_id, bytes)
             indexed_data = indexed_data.copy()
             indexed_data["id"] = hash_to_hex(indexed_data["id"])
             self.assertEqual(indexed_data, expected_results[hash_to_hex(_id)])
             self.assertTrue(start <= _id <= end)
             _tool_id = indexed_data["indexer_configuration_id"]
             self.assertEqual(_tool_id, self.indexer.tool["id"])
 
     def test__index_contents(self):
         """Indexing contents without existing data results in indexed data
 
         """
         _start, _end = [self.contents[0], self.contents[2]]  # output hex ids
         start, end = map(hashutil.hash_to_bytes, (_start, _end))
         # given
         actual_results = list(self.indexer._index_contents(start, end, indexed={}))
 
         self.assert_results_ok(start, end, actual_results)
 
     def test__index_contents_with_indexed_data(self):
         """Indexing contents with existing data results in less indexed data
 
         """
         _start, _end = [self.contents[0], self.contents[2]]  # output hex ids
         start, end = map(hashutil.hash_to_bytes, (_start, _end))
         data_indexed = [self.id0, self.id2]
 
         # given
         actual_results = self.indexer._index_contents(
             start, end, indexed=set(map(hash_to_bytes, data_indexed))
         )
 
         # craft the expected results
         expected_results = self.expected_results.copy()
         for already_indexed_key in data_indexed:
             expected_results.pop(already_indexed_key)
 
         self.assert_results_ok(start, end, actual_results, expected_results)
 
     def test_generate_content_get(self):
         """Optimal indexing should result in indexed data
 
         """
         _start, _end = [self.contents[0], self.contents[2]]  # output hex ids
         start, end = map(hashutil.hash_to_bytes, (_start, _end))
 
         # given
         actual_results = self.indexer.run(start, end)
 
         # then
         self.assertEqual(actual_results, {"status": "uneventful"})
 
     def test_generate_content_get_input_as_bytes(self):
         """Optimal indexing should result in indexed data
 
         Input are in bytes here.
 
         """
         _start, _end = [self.contents[0], self.contents[2]]  # output hex ids
         start, end = map(hashutil.hash_to_bytes, (_start, _end))
 
         # given
         actual_results = self.indexer.run(start, end, skip_existing=False)
         # no already indexed data so same result as prior test
 
         # then
         self.assertEqual(actual_results, {"status": "uneventful"})
 
     def test_generate_content_get_no_result(self):
         """No result indexed returns False"""
         _start, _end = [
             "0000000000000000000000000000000000000000",
             "0000000000000000000000000000000000000001",
         ]
         start, end = map(hashutil.hash_to_bytes, (_start, _end))
         # given
         actual_results = self.indexer.run(start, end, incremental=False)
 
         # then
         self.assertEqual(actual_results, {"status": "uneventful"})
diff --git a/version.txt b/version.txt
index 2e807e8..a538b5a 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-v0.0.171-0-g1d06000
\ No newline at end of file
+v0.1.0-0-ga8307fc
\ No newline at end of file