diff --git a/swh/indexer/cli.py b/swh/indexer/cli.py
--- a/swh/indexer/cli.py
+++ b/swh/indexer/cli.py
@@ -215,7 +215,15 @@
 @indexer_cli_group.command("journal-client")
 @click.argument(
     "indexer",
-    type=click.Choice(["origin-intrinsic-metadata", "extrinsic-metadata", "*"]),
+    type=click.Choice(
+        [
+            "origin-intrinsic-metadata",
+            "extrinsic-metadata",
+            "content-mimetype",
+            "content-fossology-license",
+            "*",
+        ]
+    ),
     required=False
     # TODO: remove required=False after we stop using it
 )
@@ -321,6 +329,22 @@
         idx.catch_exceptions = False  # don't commit offsets if indexation failed
         worker_fns.append(idx.process_journal_objects)
 
+    if indexer in ("content-mimetype", "*"):
+        from swh.indexer.mimetype import MimetypeIndexer
+
+        object_types.add("content")
+        idx = MimetypeIndexer()
+        idx.catch_exceptions = False  # don't commit offsets if indexation failed
+        worker_fns.append(idx.process_journal_objects)
+
+    if indexer in ("content-fossology-license", "*"):
+        from swh.indexer.fossology_license import FossologyLicenseIndexer
+
+        object_types.add("content")
+        idx = FossologyLicenseIndexer()
+        idx.catch_exceptions = False  # don't commit offsets if indexation failed
+        worker_fns.append(idx.process_journal_objects)
+
     if not worker_fns:
         raise click.ClickException(f"Unknown indexer: {indexer}")
 
diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py
--- a/swh/indexer/indexer.py
+++ b/swh/indexer/indexer.py
@@ -41,6 +41,9 @@
 
 
 class ObjectsDict(TypedDict, total=False):
+    """Typed objects."""
+
+    content: List[Dict]
     directory: List[Dict]
     origin: List[Dict]
     origin_visit_status: List[Dict]
@@ -282,12 +285,23 @@
         """
         return {}
 
+    def process_journal_objects(self, objects: ObjectsDict) -> Dict:
+        """Read swh message objects (content, origin, ...) from the journal to:
+
+        - retrieve the associated objects from the storage backend (e.g. storage,
+          objstorage...)
+        - execute the associated indexing computations
+        - store the results in the indexer storage
+
+        """
+        raise NotImplementedError()
+
 
 class ContentIndexer(BaseIndexer[Sha1, bytes, TResult], Generic[TResult]):
-    """A content indexer working on a list of ids directly.
+    """A content indexer working on the journal (method `process_journal_objects`) or on
+    a list of ids directly (method `run`).
 
-    To work on indexer partition, use the :class:`ContentPartitionIndexer`
-    instead.
+    To work on indexer partition, use the :class:`ContentPartitionIndexer` instead.
 
     Note: :class:`ContentIndexer` is not an instantiable object. To
     use it, one should inherit from this class and override the
@@ -295,6 +309,44 @@
 
     """
 
+    def process_journal_objects(self, objects: ObjectsDict) -> Dict:
+        """Read content objects from the journal, retrieve their raw content and compute
+        content indexing (e.g. mimetype, fossology license, ...).
+
+        Note that once this is deployed, this supersedes the main ContentIndexer.run
+        method call and the class ContentPartitionIndexer.
+
+        """
+        summary: Dict[str, Any] = {"status": "uneventful"}
+        try:
+            results = []
+            contents = objects.get("content", [])
+            # FIXME: with swh.objstorage > v2.0: self.objstorage.get_batch(contents)
+            content_data = self.objstorage.get_batch(c["sha1"] for c in contents)
+            for item, raw_content in zip(contents, content_data):
+                id_ = item["sha1"]
+                if not raw_content:
+                    self.log.warning(
+                        "Content %s not found in objstorage", hashutil.hash_to_hex(id_)
+                    )
+                    continue
+
+                results.extend(self.index(id_, data=raw_content))
+        except Exception:
+            if not self.catch_exceptions:
+                raise
+            summary["status"] = "failed"
+            return summary
+
+        summary_persist = self.persist_index_computations(results)
+        self.results = results
+        if summary_persist:
+            for value in summary_persist.values():
+                if value > 0:
+                    summary["status"] = "eventful"
+            summary.update(summary_persist)
+        return summary
+
     def run(self, ids: List[Sha1], **kwargs) -> Dict:
         """Given a list of ids:
 
diff --git a/swh/indexer/tests/conftest.py b/swh/indexer/tests/conftest.py
--- a/swh/indexer/tests/conftest.py
+++ b/swh/indexer/tests/conftest.py
@@ -72,11 +72,22 @@
 
 @pytest.fixture
 def swh_indexer_config(
-    swh_storage_backend_config, idx_storage_backend_config, swh_scheduler_config
+    swh_storage_backend_config,
+    idx_storage_backend_config,
+    swh_scheduler_config,
+    tmp_path,
 ):
+    from os import makedirs
+
+    objstore_rootdir = f"{tmp_path}/objstorage/objects"
+    makedirs(objstore_rootdir)
     return {
         "storage": swh_storage_backend_config,
-        "objstorage": {"cls": "memory"},
+        "objstorage": {
+            "cls": "pathslicing",
+            "root": objstore_rootdir,
+            "slicing": "0:2/0:5",
+        },
         "indexer_storage": idx_storage_backend_config,
         "scheduler": {"cls": "local", **swh_scheduler_config},
         "tools": {
diff --git a/swh/indexer/tests/metadata_dictionary/test_npm.py b/swh/indexer/tests/metadata_dictionary/test_npm.py
--- a/swh/indexer/tests/metadata_dictionary/test_npm.py
+++ b/swh/indexer/tests/metadata_dictionary/test_npm.py
@@ -11,13 +11,11 @@
 from swh.indexer.metadata_detector import detect_metadata
 from swh.indexer.metadata_dictionary import MAPPINGS
 from swh.indexer.storage.model import ContentMetadataRow
-from swh.model.hashutil import hash_to_bytes
 
 from ..test_metadata import TRANSLATOR_TOOL, ContentMetadataTestIndexer
 from ..utils import (
     BASE_TEST_CONFIG,
-    fill_obj_storage,
-    fill_storage,
+    MAPPING_DESCRIPTION_CONTENT_SHA1,
     json_document_strategy,
 )
 
@@ -96,31 +94,29 @@
     assert declared_metadata == result
 
 
-def test_index_content_metadata_npm():
+def test_index_content_metadata_npm(storage, obj_storage):
     """
     testing NPM with package.json
     - one sha1 uses a file that can't be translated to metadata and
       should return None in the translated metadata
     """
     sha1s = [
-        hash_to_bytes("26a9f72a7c87cc9205725cfd879f514ff4f3d8d5"),
-        hash_to_bytes("d4c647f0fc257591cc9ba1722484229780d1c607"),
-        hash_to_bytes("02fb2c89e14f7fab46701478c83779c7beb7b069"),
+        MAPPING_DESCRIPTION_CONTENT_SHA1["json:test-metadata-package.json"],
+        MAPPING_DESCRIPTION_CONTENT_SHA1["json:npm-package.json"],
+        MAPPING_DESCRIPTION_CONTENT_SHA1["python:code"],
     ]
+
     # this metadata indexer computes only metadata for package.json
     # in npm context with a hard mapping
     config = BASE_TEST_CONFIG.copy()
     config["tools"] = [TRANSLATOR_TOOL]
     metadata_indexer = ContentMetadataTestIndexer(config=config)
-    fill_obj_storage(metadata_indexer.objstorage)
-    fill_storage(metadata_indexer.storage)
-
-    metadata_indexer.run(sha1s)
+    metadata_indexer.run(sha1s, log_suffix="unknown content")
     results = list(metadata_indexer.idx_storage.content_metadata_get(sha1s))
 
     expected_results = [
         ContentMetadataRow(
-            id=hash_to_bytes("26a9f72a7c87cc9205725cfd879f514ff4f3d8d5"),
+            id=sha1s[0],
             tool=TRANSLATOR_TOOL,
             metadata={
                 "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
@@ -132,7 +128,7 @@
             },
         ),
         ContentMetadataRow(
-            id=hash_to_bytes("d4c647f0fc257591cc9ba1722484229780d1c607"),
+            id=sha1s[1],
             tool=TRANSLATOR_TOOL,
             metadata={
                 "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py
--- a/swh/indexer/tests/test_cli.py
+++ b/swh/indexer/tests/test_cli.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020  The Software Heritage developers
+# Copyright (C) 2019-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -17,16 +17,17 @@
 from swh.indexer.cli import indexer_cli_group
 from swh.indexer.storage.interface import IndexerStorageInterface
 from swh.indexer.storage.model import (
+    ContentMimetypeRow,
     DirectoryIntrinsicMetadataRow,
     OriginExtrinsicMetadataRow,
     OriginIntrinsicMetadataRow,
 )
 from swh.journal.writer import get_journal_writer
 from swh.model.hashutil import hash_to_bytes
-from swh.model.model import Origin, OriginVisitStatus
+from swh.model.model import Content, Origin, OriginVisitStatus
 
 from .test_metadata import REMD
-from .utils import DIRECTORY2, REVISION
+from .utils import DIRECTORY2, RAW_CONTENTS, REVISION
 
 
 def fill_idx_storage(idx_storage: IndexerStorageInterface, nb_rows: int) -> List[int]:
@@ -731,3 +732,85 @@
         )
     ]
     assert sorted(results, key=lambda r: r.id) == expected_results
+
+
+def test_cli_journal_client_index__content_mimetype(
+    cli_runner,
+    swh_config,
+    kafka_prefix: str,
+    kafka_server,
+    consumer: Consumer,
+    idx_storage,
+    obj_storage,
+    storage,
+    mocker,
+    swh_indexer_config,
+):
+    """Test the 'swh indexer journal-client' cli tool."""
+    journal_writer = get_journal_writer(
+        "kafka",
+        brokers=[kafka_server],
+        prefix=kafka_prefix,
+        client_id="test producer",
+        value_sanitizer=lambda object_type, value: value,
+        flush_timeout=3,  # fail early if something is going wrong
+    )
+
+    contents = []
+    expected_results = []
+    content_ids = []
+    for content_id, content_d in RAW_CONTENTS.items():
+        raw_content = content_d[0]
+        content = Content.from_data(raw_content)
+
+        assert content_id == content.sha1
+
+        contents.append(content)
+        content_ids.append(content_id)
+
+        if isinstance(content_d[1], tuple):
+            mimetype = content_d[1][1]
+        else:
+            mimetype = content_d[1]
+        encoding = content_d[2]
+        content_mimetype_row = ContentMimetypeRow(
+            id=content.sha1,
+            tool={"id": 1, **swh_indexer_config["tools"]},
+            mimetype=mimetype,
+            encoding=encoding,
+        )
+        expected_results.append(content_mimetype_row)
+
+    assert len(contents) == len(RAW_CONTENTS)
+
+    storage.content_add(contents)
+    journal_writer.write_additions("content", contents)
+
+    result = cli_runner.invoke(
+        indexer_cli_group,
+        [
+            "-C",
+            swh_config,
+            "journal-client",
+            "content-mimetype",
+            "--broker",
+            kafka_server,
+            "--prefix",
+            kafka_prefix,
+            "--group-id",
+            "test-consumer",
+            "--stop-after-objects",
+            len(contents),
+        ],
+        catch_exceptions=False,
+    )
+
+    # Check the output
+    expected_output = "Done.\n"
+    assert result.exit_code == 0, result.output
+    assert result.output == expected_output
+
+    results = idx_storage.content_mimetype_get(content_ids)
+    assert len(results) == len(expected_results)
+    for result in results:
+        assert result in expected_results
diff --git a/swh/indexer/tests/test_ctags.py b/swh/indexer/tests/test_ctags.py
--- a/swh/indexer/tests/test_ctags.py
+++ b/swh/indexer/tests/test_ctags.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2018  The Software Heritage developers
+# Copyright (C) 2017-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -15,6 +15,7 @@
 from swh.indexer.tests.utils import (
     BASE_TEST_CONFIG,
     OBJ_STORAGE_DATA,
+    RAW_CONTENT_IDS,
     SHA1_TO_CTAGS,
     CommonContentIndexerTest,
     fill_obj_storage,
@@ -99,16 +100,14 @@
         fill_obj_storage(self.indexer.objstorage)
 
         # Prepare test input
-        self.id0 = "01c9379dfc33803963d07c1ccc748d3fe4c96bb5"
-        self.id1 = "d4c647f0fc257591cc9ba1722484229780d1c607"
-        self.id2 = "688a5ef812c53907562fe379d4b3851e69c7cb15"
+        self.id0, self.id1, self.id2 = RAW_CONTENT_IDS
 
         tool = {k.replace("tool_", ""): v for (k, v) in self.indexer.tool.items()}
 
         self.expected_results = [
             *[
                 ContentCtagsRow(
-                    id=hash_to_bytes(self.id0),
+                    id=self.id0,
                     tool=tool,
                     **kwargs,
                 )
@@ -116,7 +115,7 @@
             ],
             *[
                 ContentCtagsRow(
-                    id=hash_to_bytes(self.id1),
+                    id=self.id1,
                     tool=tool,
                     **kwargs,
                 )
@@ -124,7 +123,7 @@
             ],
             *[
                 ContentCtagsRow(
-                    id=hash_to_bytes(self.id2),
+                    id=self.id2,
                     tool=tool,
                     **kwargs,
                 )
@@ -137,7 +136,7 @@
     def _set_mocks(self):
         def find_ctags_for_content(raw_content):
             for (sha1, ctags) in SHA1_TO_CTAGS.items():
-                if OBJ_STORAGE_DATA[sha1] == raw_content:
+                if OBJ_STORAGE_DATA[hash_to_bytes(sha1)] == raw_content:
                     return ctags
             else:
                 raise ValueError(
@@ -155,7 +154,7 @@
             id_ = cmd[-1].split("/")[-1]
             return "\n".join(
                 json.dumps({"language": ctag["lang"], **ctag})
-                for ctag in SHA1_TO_CTAGS[id_]
+                for ctag in SHA1_TO_CTAGS[hash_to_bytes(id_)]
             )
 
         self._real_check_output = swh.indexer.ctags.subprocess.check_output
diff --git a/swh/indexer/tests/test_fossology_license.py b/swh/indexer/tests/test_fossology_license.py
--- a/swh/indexer/tests/test_fossology_license.py
+++ b/swh/indexer/tests/test_fossology_license.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2018  The Software Heritage developers
+# Copyright (C) 2017-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -18,6 +18,7 @@
 from swh.indexer.storage.model import ContentLicenseRow
 from swh.indexer.tests.utils import (
     BASE_TEST_CONFIG,
+    RAW_CONTENT_IDS,
     SHA1_TO_LICENSES,
     CommonContentIndexerPartitionTest,
     CommonContentIndexerTest,
@@ -55,8 +56,8 @@
     if isinstance(id, bytes):
         path = path.decode("utf-8")
     # path is something like /tmp/tmpXXX/<sha1> so we keep only the sha1 part
-    path = path.split("/")[-1]
-    return {"licenses": SHA1_TO_LICENSES.get(path, [])}
+    id_ = path.split("/")[-1]
+    return {"licenses": SHA1_TO_LICENSES.get(hash_to_bytes(id_), [])}
 
 
 CONFIG = {
@@ -97,23 +98,18 @@
         fill_storage(self.indexer.storage)
         fill_obj_storage(self.indexer.objstorage)
 
-        self.id0 = "01c9379dfc33803963d07c1ccc748d3fe4c96bb5"
-        self.id1 = "688a5ef812c53907562fe379d4b3851e69c7cb15"
-        self.id2 = "da39a3ee5e6b4b0d3255bfef95601890afd80709"  # empty content
+        self.id0, self.id1, self.id2 = RAW_CONTENT_IDS
 
         tool = {k.replace("tool_", ""): v for (k, v) in self.indexer.tool.items()}
+
         # then
         self.expected_results = [
             *[
-                ContentLicenseRow(
-                    id=hash_to_bytes(self.id0), tool=tool, license=license
-                )
+                ContentLicenseRow(id=self.id0, tool=tool, license=license)
                 for license in SHA1_TO_LICENSES[self.id0]
             ],
             *[
-                ContentLicenseRow(
-                    id=hash_to_bytes(self.id1), tool=tool, license=license
-                )
+                ContentLicenseRow(id=self.id1, tool=tool, license=license)
                 for license in SHA1_TO_LICENSES[self.id1]
             ],
             *[],  # self.id2
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -31,6 +31,8 @@
 
 from .utils import (
     BASE_TEST_CONFIG,
+    MAPPING_DESCRIPTION_CONTENT_SHA1,
+    MAPPING_DESCRIPTION_CONTENT_SHA1GIT,
     YARN_PARSER_METADATA,
     fill_obj_storage,
     fill_storage,
@@ -92,10 +94,17 @@
         assert tool is not None
         dir_ = DIRECTORY2
 
+        assert (
+            dir_.entries[0].target
+            == MAPPING_DESCRIPTION_CONTENT_SHA1GIT["json:yarn-parser-package.json"]
+        )
+
         metadata_indexer.idx_storage.content_metadata_add(
             [
                 ContentMetadataRow(
-                    id=DIRECTORY2.entries[0].target,
+                    id=MAPPING_DESCRIPTION_CONTENT_SHA1[
+                        "json:yarn-parser-package.json"
+                    ],
                     indexer_configuration_id=tool["id"],
                     metadata=YARN_PARSER_METADATA,
                 )
@@ -105,9 +114,7 @@
         metadata_indexer.run([dir_.id])
 
         results = list(
-            metadata_indexer.idx_storage.directory_intrinsic_metadata_get(
-                [DIRECTORY2.id]
-            )
+            metadata_indexer.idx_storage.directory_intrinsic_metadata_get([dir_.id])
         )
 
         expected_results = [
@@ -132,6 +139,10 @@
         # Add a parent directory, that is the only directory at the root
         # of the directory
         dir_ = DIRECTORY2
+        assert (
+            dir_.entries[0].target
+            == MAPPING_DESCRIPTION_CONTENT_SHA1GIT["json:yarn-parser-package.json"]
+        )
 
         new_dir = Directory(
             entries=(
@@ -154,7 +165,9 @@
         metadata_indexer.idx_storage.content_metadata_add(
             [
                 ContentMetadataRow(
-                    id=DIRECTORY2.entries[0].target,
+                    id=MAPPING_DESCRIPTION_CONTENT_SHA1[
+                        "json:yarn-parser-package.json"
+                    ],
                     indexer_configuration_id=tool["id"],
                     metadata=YARN_PARSER_METADATA,
                 )
diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py
--- a/swh/indexer/tests/test_mimetype.py
+++ b/swh/indexer/tests/test_mimetype.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2020  The Software Heritage developers
+# Copyright (C) 2017-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -16,22 +16,19 @@
 from swh.indexer.storage.model import ContentMimetypeRow
 from swh.indexer.tests.utils import (
     BASE_TEST_CONFIG,
+    RAW_CONTENT_IDS,
+    RAW_CONTENTS,
     CommonContentIndexerPartitionTest,
     CommonContentIndexerTest,
     fill_obj_storage,
     fill_storage,
     filter_dict,
 )
-from swh.model.hashutil import hash_to_bytes
 
 
 @pytest.mark.parametrize(
     "raw_text,mimetype,encoding",
-    [
-        ("du français".encode(), "text/plain", "utf-8"),
-        (b"def __init__(self):", ("text/x-python", "text/x-script.python"), "us-ascii"),
-        (b"\xff\xfe\x00\x00\x00\x00\xff\xfe\xff\xff", "application/octet-stream", ""),
-    ],
+    RAW_CONTENTS.values(),
 )
 def test_compute_mimetype_encoding(raw_text, mimetype, encoding):
     """Compute mimetype encoding should return results"""
@@ -79,32 +76,25 @@
         fill_storage(self.indexer.storage)
         fill_obj_storage(self.indexer.objstorage)
 
-        self.id0 = "01c9379dfc33803963d07c1ccc748d3fe4c96bb5"
-        self.id1 = "688a5ef812c53907562fe379d4b3851e69c7cb15"
-        self.id2 = "da39a3ee5e6b4b0d3255bfef95601890afd80709"
+        self.id0, self.id1, self.id2 = RAW_CONTENT_IDS
 
         tool = {k.replace("tool_", ""): v for (k, v) in self.indexer.tool.items()}
 
-        self.expected_results = [
-            ContentMimetypeRow(
-                id=hash_to_bytes(self.id0),
-                tool=tool,
-                mimetype="text/plain",
-                encoding="us-ascii",
-            ),
-            ContentMimetypeRow(
-                id=hash_to_bytes(self.id1),
-                tool=tool,
-                mimetype="text/plain",
-                encoding="us-ascii",
-            ),
-            ContentMimetypeRow(
-                id=hash_to_bytes(self.id2),
-                tool=tool,
-                mimetype="application/x-empty",
-                encoding="binary",
-            ),
-        ]
+        results = []
+        for raw_content_id in RAW_CONTENT_IDS:
+            content_t = RAW_CONTENTS[raw_content_id]
+            # New magic version can return different results, this deals with such a case
+            if isinstance(content_t[1], tuple):
+                mimetype = content_t[1][1]
+            else:
+                mimetype = content_t[1]
+            encoding = content_t[2]
+            mimetype_row = ContentMimetypeRow(
+                id=raw_content_id, tool=tool, mimetype=mimetype, encoding=encoding
+            )
+            results.append(mimetype_row)
+
+        self.expected_results = results
 
 
 RANGE_CONFIG = dict(list(CONFIG.items()) + [("write_batch_size", 100)])
diff --git a/swh/indexer/tests/utils.py b/swh/indexer/tests/utils.py
--- a/swh/indexer/tests/utils.py
+++ b/swh/indexer/tests/utils.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2020  The Software Heritage developers
+# Copyright (C) 2017-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -6,14 +6,13 @@
 import abc
 import datetime
 import functools
-from typing import Any, Dict
+from typing import Any, Dict, List, Tuple
 import unittest
 
 from hypothesis import strategies
 
 from swh.core.api.classes import stream_results
 from swh.indexer.storage import INDEXER_CFG_KEY
-from swh.model import hashutil
 from swh.model.hashutil import hash_to_bytes
 from swh.model.model import (
     Content,
@@ -40,7 +39,6 @@
     INDEXER_CFG_KEY: {"cls": "memory"},
 }
 
-
 ORIGIN_VISITS = [
     {"type": "git", "origin": "https://github.com/SoftwareHeritage/swh-storage"},
     {"type": "ftp", "origin": "rsync://ftp.gnu.org/gnu/3dldf"},
@@ -61,20 +59,230 @@
 
 ORIGINS = [Origin(url=visit["origin"]) for visit in ORIGIN_VISITS]
 
+OBJ_STORAGE_RAW_CONTENT: Dict[str, bytes] = {
+    "text:some": b"this is some text",
+    "text:another": b"another text",
+    "text:yet": b"yet another text",
+    "python:code": b"""
+    import unittest
+    import logging
+    from swh.indexer.mimetype import MimetypeIndexer
+    from swh.indexer.tests.test_utils import MockObjStorage
+
+    class MockStorage():
+        def content_mimetype_add(self, mimetypes):
+            self.state = mimetypes
+
+        def indexer_configuration_add(self, tools):
+            return [{
+                'id': 10,
+            }]
+    """,
+    "c:struct": b"""
+        #ifndef __AVL__
+        #define __AVL__
+
+        typedef struct _avl_tree avl_tree;
+
+        typedef struct _data_t {
+          int content;
+        } data_t;
+    """,
+    "lisp:assertion": b"""
+    (should 'pygments (recognize 'lisp 'easily))
+
+    """,
+    "json:test-metadata-package.json": b"""
+    {
+        "name": "test_metadata",
+        "version": "0.0.1",
+        "description": "Simple package.json test for indexer",
+        "repository": {
+          "type": "git",
+          "url": "https://github.com/moranegg/metadata_test"
+      }
+    }
+    """,
+    "json:npm-package.json": b"""
+    {
+      "version": "5.0.3",
+      "name": "npm",
+      "description": "a package manager for JavaScript",
+      "keywords": [
+        "install",
+        "modules",
+        "package manager",
+        "package.json"
+      ],
+      "preferGlobal": true,
+      "config": {
+        "publishtest": false
+      },
+      "homepage": "https://docs.npmjs.com/",
+      "author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
+      "repository": {
+        "type": "git",
+        "url": "https://github.com/npm/npm"
+      },
+      "bugs": {
+        "url": "https://github.com/npm/npm/issues"
+      },
+      "dependencies": {
+        "JSONStream": "~1.3.1",
+        "abbrev": "~1.1.0",
+        "ansi-regex": "~2.1.1",
+        "ansicolors": "~0.3.2",
+        "ansistyles": "~0.1.3"
+      },
+      "devDependencies": {
+        "tacks": "~1.2.6",
+        "tap": "~10.3.2"
+      },
+      "license": "Artistic-2.0"
+    }
+
+    """,
+    "text:carriage-return": b"""
+    """,
+    "text:empty": b"",
+    # was 626364 / b'bcd'
+    "text:unimportant": b"unimportant content for bcd",
+    # was 636465 / b'cde' now yarn-parser package.json
+    "json:yarn-parser-package.json": b"""
+    {
+      "name": "yarn-parser",
+      "version": "1.0.0",
+      "description": "Tiny web service for parsing yarn.lock files",
+      "main": "index.js",
+      "scripts": {
+        "start": "node index.js",
+        "test": "mocha"
+      },
+      "engines": {
+        "node": "9.8.0"
+      },
+      "repository": {
+        "type": "git",
+        "url": "git+https://github.com/librariesio/yarn-parser.git"
+      },
+      "keywords": [
+        "yarn",
+        "parse",
+        "lock",
+        "dependencies"
+      ],
+      "author": "Andrew Nesbitt",
+      "license": "AGPL-3.0",
+      "bugs": {
+        "url": "https://github.com/librariesio/yarn-parser/issues"
+      },
+      "homepage": "https://github.com/librariesio/yarn-parser#readme",
+      "dependencies": {
+        "@yarnpkg/lockfile": "^1.0.0",
+        "body-parser": "^1.15.2",
+        "express": "^4.14.0"
+      },
+      "devDependencies": {
+        "chai": "^4.1.2",
+        "mocha": "^5.2.0",
+        "request": "^2.87.0",
+        "test": "^0.6.0"
+      }
+    }
+
+""",
+}
+
+MAPPING_DESCRIPTION_CONTENT_SHA1GIT: Dict[str, bytes] = {}
+MAPPING_DESCRIPTION_CONTENT_SHA1: Dict[str, bytes] = {}
+OBJ_STORAGE_DATA: Dict[bytes, bytes] = {}
+
+for key_description, data in OBJ_STORAGE_RAW_CONTENT.items():
+    content = Content.from_data(data)
+    MAPPING_DESCRIPTION_CONTENT_SHA1GIT[key_description] = content.sha1_git
+    MAPPING_DESCRIPTION_CONTENT_SHA1[key_description] = content.sha1
+    OBJ_STORAGE_DATA[content.sha1] = data
+
+
+RAW_CONTENT_METADATA = [
+    (
+        "du français".encode(),
+        "text/plain",
+        "utf-8",
+    ),
+    (
+        b"def __init__(self):",
+        ("text/x-python", "text/x-script.python"),
+        "us-ascii",
+    ),
+    (
+        b"\xff\xfe\x00\x00\x00\x00\xff\xfe\xff\xff",
+        "application/octet-stream",
+        "",
+    ),
+]
+
+RAW_CONTENTS: Dict[bytes, Tuple] = {}
+RAW_CONTENT_IDS: List[bytes] = []
+
+for index, raw_content_d in enumerate(RAW_CONTENT_METADATA):
+    raw_content = raw_content_d[0]
+    content = Content.from_data(raw_content)
+    RAW_CONTENTS[content.sha1] = raw_content_d
+    RAW_CONTENT_IDS.append(content.sha1)
+    # and write it to objstorage data so it's flushed in the objstorage
+    OBJ_STORAGE_DATA[content.sha1] = raw_content
+
+
+SHA1_TO_LICENSES: Dict[bytes, List[str]] = {
+    RAW_CONTENT_IDS[0]: ["GPL"],
+    RAW_CONTENT_IDS[1]: ["AGPL"],
+    RAW_CONTENT_IDS[2]: [],
+}
+
+
+SHA1_TO_CTAGS: Dict[bytes, List[Dict[str, Any]]] = {
+    RAW_CONTENT_IDS[0]: [
+        {
+            "name": "foo",
+            "kind": "str",
+            "line": 10,
+            "lang": "bar",
+        }
+    ],
+    RAW_CONTENT_IDS[1]: [
+        {
+            "name": "symbol",
+            "kind": "float",
+            "line": 99,
+            "lang": "python",
+        }
+    ],
+    RAW_CONTENT_IDS[2]: [
+        {
+            "name": "let",
+            "kind": "int",
+            "line": 100,
+            "lang": "haskell",
+        }
+    ],
+}
+
 
 DIRECTORY = Directory(
-    id=hash_to_bytes("34f335a750111ca0a8b64d8034faec9eedc396be"),
     entries=(
         DirectoryEntry(
             name=b"index.js",
             type="file",
-            target=hash_to_bytes("01c9379dfc33803963d07c1ccc748d3fe4c96bb5"),
+            target=MAPPING_DESCRIPTION_CONTENT_SHA1GIT["text:some"],
             perms=0o100644,
         ),
         DirectoryEntry(
             name=b"package.json",
             type="file",
-            target=hash_to_bytes("26a9f72a7c87cc9205725cfd879f514ff4f3d8d5"),
+            target=MAPPING_DESCRIPTION_CONTENT_SHA1GIT[
+                "json:test-metadata-package.json"
+            ],
             perms=0o100644,
         ),
         DirectoryEntry(
@@ -87,12 +295,11 @@
 )
 
 DIRECTORY2 = Directory(
-    id=b"\xf8zz\xa1\x12`<1$\xfav\xf9\x01\xfd5\x85F`\xf2\xb6",
     entries=(
         DirectoryEntry(
             name=b"package.json",
             type="file",
-            target=hash_to_bytes("f5305243b3ce7ef8dc864ebc73794da304025beb"),
+            target=MAPPING_DESCRIPTION_CONTENT_SHA1GIT["json:yarn-parser-package.json"],
             perms=0o100644,
         ),
     ),
@@ -101,7 +308,6 @@
 _utc_plus_2 = datetime.timezone(datetime.timedelta(minutes=120))
 
 REVISION = Revision(
-    id=hash_to_bytes("c6201cb1b9b9df9a7542f9665c3b5dfab85e9775"),
     message=b"Improve search functionality",
     author=Person(
         name=b"Andrew Nesbitt",
@@ -148,7 +354,6 @@
 SNAPSHOTS = [
     # https://github.com/SoftwareHeritage/swh-storage
     Snapshot(
-        id=hash_to_bytes("a50fde72265343b7d28cecf6db20d98a81d21965"),
         branches={
             b"refs/heads/add-revision-origin-cache": SnapshotBranch(
                 target=b'L[\xce\x1c\x88\x8eF\t\xf1"\x19\x1e\xfb\xc0s\xe7/\xe9l\x1e',
@@ -169,7 +374,6 @@
     ),
     # rsync://ftp.gnu.org/gnu/3dldf
     Snapshot(
-        id=hash_to_bytes("2c67f69a416bca4e1f3fcd848c588fab88ad0642"),
         branches={
             b"3DLDF-1.1.4.tar.gz": SnapshotBranch(
                 target=b'dJ\xfb\x1c\x91\xf4\x82B%]6\xa2\x90|\xd3\xfc"G\x99\x11',
@@ -195,7 +399,6 @@
     ),
     # https://forge.softwareheritage.org/source/jesuisgpl/",
     Snapshot(
-        id=hash_to_bytes("68c0d26104d47e278dd6be07ed61fafb561d0d20"),
         branches={
             b"master": SnapshotBranch(
                 target=b"\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{\xa6\xe9\x99\xb1\x9e]q\xeb",  # noqa
@@ -205,7 +408,6 @@
     ),
     # https://old-pypi.example.org/project/limnoria/
     Snapshot(
-        id=hash_to_bytes("f255245269e15fc99d284affd79f766668de0b67"),
         branches={
             b"HEAD": SnapshotBranch(
                 target=b"releases/2018.09.09", target_type=TargetType.ALIAS
@@ -238,7 +440,6 @@
     ),
     # http://0-512-md.googlecode.com/svn/
     Snapshot(
-        id=hash_to_bytes("a1a28c0ab387a8f9e0618cb705eab81fc448f473"),
         branches={
             b"master": SnapshotBranch(
                 target=b"\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8\xc9\xad#.\x1bw=\x18",
@@ -248,7 +449,6 @@
     ),
     # https://github.com/librariesio/yarn-parser
     Snapshot(
-        id=hash_to_bytes("bb4fd3a836930ce629d912864319637040ff3040"),
         branches={
             b"HEAD": SnapshotBranch(
                 target=REVISION.id,
@@ -258,7 +458,6 @@
     ),
     # https://github.com/librariesio/yarn-parser.git
     Snapshot(
-        id=hash_to_bytes("bb4fd3a836930ce629d912864319637040ff3040"),
         branches={
             b"HEAD": SnapshotBranch(
                 target=REVISION.id,
@@ -280,178 +479,6 @@
 assert len(SNAPSHOTS) == len(ORIGIN_VISITS)
 
 
-SHA1_TO_LICENSES = {
-    "01c9379dfc33803963d07c1ccc748d3fe4c96bb5": ["GPL"],
-    "02fb2c89e14f7fab46701478c83779c7beb7b069": ["Apache2.0"],
-    "103bc087db1d26afc3a0283f38663d081e9b01e6": ["MIT"],
-    "688a5ef812c53907562fe379d4b3851e69c7cb15": ["AGPL"],
-    "da39a3ee5e6b4b0d3255bfef95601890afd80709": [],
-}
-
-
-SHA1_TO_CTAGS = {
-    "01c9379dfc33803963d07c1ccc748d3fe4c96bb5": [
-        {
-            "name": "foo",
-            "kind": "str",
-            "line": 10,
-            "lang": "bar",
-        }
-    ],
-    "d4c647f0fc257591cc9ba1722484229780d1c607": [
-        {
-            "name": "let",
-            "kind": "int",
-            "line": 100,
-            "lang": "haskell",
-        }
-    ],
-    "688a5ef812c53907562fe379d4b3851e69c7cb15": [
-        {
-            "name": "symbol",
-            "kind": "float",
-            "line": 99,
-            "lang": "python",
-        }
-    ],
-}
-
-
-OBJ_STORAGE_DATA = {
-    "01c9379dfc33803963d07c1ccc748d3fe4c96bb5": b"this is some text",
-    "688a5ef812c53907562fe379d4b3851e69c7cb15": b"another text",
-    "8986af901dd2043044ce8f0d8fc039153641cf17": b"yet another text",
-    "02fb2c89e14f7fab46701478c83779c7beb7b069": b"""
-    import unittest
-    import logging
-    from swh.indexer.mimetype import MimetypeIndexer
-    from swh.indexer.tests.test_utils import MockObjStorage
-
-    class MockStorage():
-        def content_mimetype_add(self, mimetypes):
-            self.state = mimetypes
-
-        def indexer_configuration_add(self, tools):
-            return [{
-                'id': 10,
-            }]
-    """,
-    "103bc087db1d26afc3a0283f38663d081e9b01e6": b"""
-        #ifndef __AVL__
-        #define __AVL__
-
-        typedef struct _avl_tree avl_tree;
-
-        typedef struct _data_t {
-          int content;
-        } data_t;
-    """,
-    "93666f74f1cf635c8c8ac118879da6ec5623c410": b"""
-    (should 'pygments (recognize 'lisp 'easily))
-
-    """,
-    "26a9f72a7c87cc9205725cfd879f514ff4f3d8d5": b"""
-    {
-        "name": "test_metadata",
-        "version": "0.0.1",
-        "description": "Simple package.json test for indexer",
-        "repository": {
-          "type": "git",
-          "url": "https://github.com/moranegg/metadata_test"
-      }
-    }
-    """,
-    "d4c647f0fc257591cc9ba1722484229780d1c607": b"""
-    {
-      "version": "5.0.3",
-      "name": "npm",
-      "description": "a package manager for JavaScript",
-      "keywords": [
-        "install",
-        "modules",
-        "package manager",
-        "package.json"
-      ],
-      "preferGlobal": true,
-      "config": {
-        "publishtest": false
-      },
-      "homepage": "https://docs.npmjs.com/",
-      "author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
-      "repository": {
-        "type": "git",
-        "url": "https://github.com/npm/npm"
-      },
-      "bugs": {
-        "url": "https://github.com/npm/npm/issues"
-      },
-      "dependencies": {
-        "JSONStream": "~1.3.1",
-        "abbrev": "~1.1.0",
-        "ansi-regex": "~2.1.1",
-        "ansicolors": "~0.3.2",
-        "ansistyles": "~0.1.3"
-      },
-      "devDependencies": {
-        "tacks": "~1.2.6",
-        "tap": "~10.3.2"
-      },
-      "license": "Artistic-2.0"
-    }
-
-    """,
-    "a7ab314d8a11d2c93e3dcf528ca294e7b431c449": b"""
-    """,
-    "da39a3ee5e6b4b0d3255bfef95601890afd80709": b"",
-    # was 626364 / b'bcd'
-    "e3e40fee6ff8a52f06c3b428bfe7c0ed2ef56e92": b"unimportant content for bcd",
-    # was 636465 / b'cde' now yarn-parser package.json
-    "f5305243b3ce7ef8dc864ebc73794da304025beb": b"""
-    {
-      "name": "yarn-parser",
-      "version": "1.0.0",
-      "description": "Tiny web service for parsing yarn.lock files",
-      "main": "index.js",
-      "scripts": {
-        "start": "node index.js",
-        "test": "mocha"
-      },
-      "engines": {
-        "node": "9.8.0"
-      },
-      "repository": {
-        "type": "git",
-        "url": "git+https://github.com/librariesio/yarn-parser.git"
-      },
-      "keywords": [
-        "yarn",
-        "parse",
-        "lock",
-        "dependencies"
-      ],
-      "author": "Andrew Nesbitt",
-      "license": "AGPL-3.0",
-      "bugs": {
-        "url": "https://github.com/librariesio/yarn-parser/issues"
-      },
-      "homepage": "https://github.com/librariesio/yarn-parser#readme",
-      "dependencies": {
-        "@yarnpkg/lockfile": "^1.0.0",
-        "body-parser": "^1.15.2",
-        "express": "^4.14.0"
-      },
-      "devDependencies": {
-        "chai": "^4.1.2",
-        "mocha": "^5.2.0",
-        "request": "^2.87.0",
-        "test": "^0.6.0"
-      }
-    }
-
-""",
-}
-
-
 YARN_PARSER_METADATA = {
     "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
     "url": "https://github.com/librariesio/yarn-parser#readme",
@@ -613,17 +640,19 @@
 
 def fill_obj_storage(obj_storage):
     """Add some content in an object storage."""
-    for (obj_id, content) in OBJ_STORAGE_DATA.items():
-        obj_storage.add(content, obj_id=hash_to_bytes(obj_id))
+    for obj_id, content in OBJ_STORAGE_DATA.items():
+        obj_storage.add(content, obj_id)
 
 
 def fill_storage(storage):
-    storage.origin_add(ORIGINS)
+    """Fill in storage with consistent test dataset."""
+    storage.content_add([Content.from_data(data) for data in OBJ_STORAGE_DATA.values()])
     storage.directory_add([DIRECTORY, DIRECTORY2])
     storage.revision_add(REVISIONS)
     storage.release_add(RELEASES)
     storage.snapshot_add(SNAPSHOTS)
 
+    storage.origin_add(ORIGINS)
     for visit, snapshot in zip(ORIGIN_VISITS, SNAPSHOTS):
         assert snapshot.id is not None
 
@@ -639,22 +668,6 @@
         )
         storage.origin_visit_status_add([visit_status])
 
-    contents = []
-    for (obj_id, content) in OBJ_STORAGE_DATA.items():
-        content_hashes = hashutil.MultiHash.from_data(content).digest()
-        contents.append(
-            Content(
-                data=content,
-                length=len(content),
-                status="visible",
-                sha1=hash_to_bytes(obj_id),
-                sha1_git=hash_to_bytes(obj_id),
-                sha256=content_hashes["sha256"],
-                blake2s256=content_hashes["blake2s256"],
-            )
-        )
-    storage.content_add(contents)
-
 
 class CommonContentIndexerTest(metaclass=abc.ABCMeta):
     def get_indexer_results(self, ids):
@@ -662,9 +675,7 @@
         return self.indexer.idx_storage.state
 
     def assert_results_ok(self, sha1s, expected_results=None):
-        sha1s = [
-            sha1 if isinstance(sha1, bytes) else hash_to_bytes(sha1) for sha1 in sha1s
-        ]
+        sha1s = [hash_to_bytes(sha1) for sha1 in sha1s]
         actual_results = list(self.get_indexer_results(sha1s))
 
         if expected_results is None:
@@ -698,11 +709,7 @@
         self.indexer.run(sha1s)
 
         # then
-        expected_results = [
-            res
-            for res in self.expected_results
-            if hashutil.hash_to_hex(res.id) in sha1s
-        ]
+        expected_results = [res for res in self.expected_results if res.id in sha1s]
 
         self.assert_results_ok(sha1s, expected_results)
 
@@ -711,6 +718,7 @@
     """Allows to factorize tests on range indexer."""
 
     def setUp(self):
+        # still useful?
         self.contents = sorted(OBJ_STORAGE_DATA)
 
     def assert_results_ok(self, partition_id, nb_partitions, actual_results):