diff --git a/swh/indexer/cli.py b/swh/indexer/cli.py
--- a/swh/indexer/cli.py
+++ b/swh/indexer/cli.py
@@ -215,7 +215,15 @@
 @indexer_cli_group.command("journal-client")
 @click.argument(
     "indexer",
-    type=click.Choice(["origin-intrinsic-metadata", "extrinsic-metadata", "*"]),
+    type=click.Choice(
+        [
+            "origin-intrinsic-metadata",
+            "extrinsic-metadata",
+            "content-mimetype",
+            "content-fossology-license",
+            "*",
+        ]
+    ),
     required=False
     # TODO: remove required=False after we stop using it
 )
diff --git a/swh/indexer/tests/conftest.py b/swh/indexer/tests/conftest.py
--- a/swh/indexer/tests/conftest.py
+++ b/swh/indexer/tests/conftest.py
@@ -72,11 +72,22 @@
 
 @pytest.fixture
 def swh_indexer_config(
-    swh_storage_backend_config, idx_storage_backend_config, swh_scheduler_config
+    swh_storage_backend_config,
+    idx_storage_backend_config,
+    swh_scheduler_config,
+    tmp_path,
 ):
+    from os import makedirs
+
+    objstore_rootdir = f"{tmp_path}/objstorage/objects"
+    makedirs(objstore_rootdir)
     return {
         "storage": swh_storage_backend_config,
-        "objstorage": {"cls": "memory"},
+        "objstorage": {
+            "cls": "pathslicing",
+            "root": objstore_rootdir,
+            "slicing": "0:2/0:5",
+        },
         "indexer_storage": idx_storage_backend_config,
         "scheduler": {"cls": "local", **swh_scheduler_config},
         "tools": {
diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py
--- a/swh/indexer/tests/test_cli.py
+++ b/swh/indexer/tests/test_cli.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020  The Software Heritage developers
+# Copyright (C) 2019-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -17,16 +17,17 @@
 from swh.indexer.cli import indexer_cli_group
 from swh.indexer.storage.interface import IndexerStorageInterface
 from swh.indexer.storage.model import (
+    ContentMimetypeRow,
     DirectoryIntrinsicMetadataRow,
     OriginExtrinsicMetadataRow,
     OriginIntrinsicMetadataRow,
 )
 from swh.journal.writer import get_journal_writer
 from swh.model.hashutil import hash_to_bytes
-from swh.model.model import Origin, OriginVisitStatus
+from swh.model.model import Content, Origin, OriginVisitStatus
 
 from .test_metadata import REMD
-from .utils import DIRECTORY2, REVISION
+from .utils import DIRECTORY2, RAW_CONTENTS, REVISION, fill_obj_storage
 
 
 def fill_idx_storage(idx_storage: IndexerStorageInterface, nb_rows: int) -> List[int]:
@@ -731,3 +732,87 @@
         )
     ]
     assert sorted(results, key=lambda r: r.id) == expected_results
+
+
+def test_cli_journal_client_index__content_mimetype(
+    cli_runner,
+    swh_config,
+    kafka_prefix: str,
+    kafka_server,
+    consumer: Consumer,
+    idx_storage,
+    obj_storage,
+    storage,
+    mocker,
+    swh_indexer_config,
+):
+    """Test the 'swh indexer journal-client' cli tool."""
+    journal_writer = get_journal_writer(
+        "kafka",
+        brokers=[kafka_server],
+        prefix=kafka_prefix,
+        client_id="test producer",
+        value_sanitizer=lambda object_type, value: value,
+        flush_timeout=3,  # fail early if something is going wrong
+    )
+
+    fill_obj_storage(obj_storage)
+
+    contents = []
+    expected_results = []
+    content_ids = []
+    for content_id, content_d in RAW_CONTENTS.items():
+        raw_content = content_d[0]
+        content = Content.from_data(raw_content)
+
+        assert content_id == content.sha1
+
+        contents.append(content)
+        content_ids.append(content_id)
+
+        if isinstance(content_d[1], tuple):
+            mimetype = content_d[1][1]
+        else:
+            mimetype = content_d[1]
+        encoding = content_d[2]
+        content_mimetype_row = ContentMimetypeRow(
+            id=content.sha1,
+            tool={"id": 1, **swh_indexer_config["tools"]},
+            mimetype=mimetype,
+            encoding=encoding,
+        )
+        expected_results.append(content_mimetype_row)
+
+    assert len(contents) == len(RAW_CONTENTS)
+
+    storage.content_add(contents)
+    journal_writer.write_additions("content", contents)
+
+    result = cli_runner.invoke(
+        indexer_cli_group,
+        [
+            "-C",
+            swh_config,
+            "journal-client",
+            "content-mimetype",
+            "--broker",
+            kafka_server,
+            "--prefix",
+            kafka_prefix,
+            "--group-id",
+            "test-consumer",
+            "--stop-after-objects",
+            len(contents),
+        ],
+        catch_exceptions=False,
+    )
+
+    # Check the output
+    expected_output = "Done.\n"
+    assert result.exit_code == 0, result.output
+    assert result.output == expected_output
+
+    results = idx_storage.content_mimetype_get(content_ids)
+    assert len(results) == len(expected_results)
+    for result in results:
+        assert result in expected_results
diff --git a/swh/indexer/tests/test_ctags.py b/swh/indexer/tests/test_ctags.py
--- a/swh/indexer/tests/test_ctags.py
+++ b/swh/indexer/tests/test_ctags.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2018  The Software Heritage developers
+# Copyright (C) 2017-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -15,6 +15,7 @@
 from swh.indexer.tests.utils import (
     BASE_TEST_CONFIG,
     OBJ_STORAGE_DATA,
+    RAW_CONTENT_IDS,
     SHA1_TO_CTAGS,
     CommonContentIndexerTest,
     fill_obj_storage,
@@ -99,16 +100,14 @@
         fill_obj_storage(self.indexer.objstorage)
 
         # Prepare test input
-        self.id0 = "01c9379dfc33803963d07c1ccc748d3fe4c96bb5"
-        self.id1 = "d4c647f0fc257591cc9ba1722484229780d1c607"
-        self.id2 = "688a5ef812c53907562fe379d4b3851e69c7cb15"
+        self.id0, self.id1, self.id2 = RAW_CONTENT_IDS
 
         tool = {k.replace("tool_", ""): v for (k, v) in self.indexer.tool.items()}
 
         self.expected_results = [
             *[
                 ContentCtagsRow(
-                    id=hash_to_bytes(self.id0),
+                    id=self.id0,
                     tool=tool,
                     **kwargs,
                 )
@@ -116,7 +115,7 @@
             ],
             *[
                 ContentCtagsRow(
-                    id=hash_to_bytes(self.id1),
+                    id=self.id1,
                     tool=tool,
                     **kwargs,
                 )
@@ -124,7 +123,7 @@
             ],
             *[
                 ContentCtagsRow(
-                    id=hash_to_bytes(self.id2),
+                    id=self.id2,
                     tool=tool,
                     **kwargs,
                 )
@@ -137,7 +136,7 @@
     def _set_mocks(self):
         def find_ctags_for_content(raw_content):
             for (sha1, ctags) in SHA1_TO_CTAGS.items():
-                if OBJ_STORAGE_DATA[sha1] == raw_content:
+                if OBJ_STORAGE_DATA[hash_to_bytes(sha1)] == raw_content:
                     return ctags
             else:
                 raise ValueError(
@@ -155,7 +154,7 @@
             id_ = cmd[-1].split("/")[-1]
             return "\n".join(
                 json.dumps({"language": ctag["lang"], **ctag})
-                for ctag in SHA1_TO_CTAGS[id_]
+                for ctag in SHA1_TO_CTAGS[hash_to_bytes(id_)]
             )
 
         self._real_check_output = swh.indexer.ctags.subprocess.check_output
diff --git a/swh/indexer/tests/test_fossology_license.py b/swh/indexer/tests/test_fossology_license.py
--- a/swh/indexer/tests/test_fossology_license.py
+++ b/swh/indexer/tests/test_fossology_license.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2018  The Software Heritage developers
+# Copyright (C) 2017-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -18,6 +18,7 @@
 from swh.indexer.storage.model import ContentLicenseRow
 from swh.indexer.tests.utils import (
     BASE_TEST_CONFIG,
+    RAW_CONTENT_IDS,
     SHA1_TO_LICENSES,
     CommonContentIndexerPartitionTest,
     CommonContentIndexerTest,
@@ -55,8 +56,8 @@
     if isinstance(id, bytes):
         path = path.decode("utf-8")
     # path is something like /tmp/tmpXXX/<sha1> so we keep only the sha1 part
-    path = path.split("/")[-1]
-    return {"licenses": SHA1_TO_LICENSES.get(path, [])}
+    id_ = path.split("/")[-1]
+    return {"licenses": SHA1_TO_LICENSES.get(hash_to_bytes(id_), [])}
 
 
 CONFIG = {
@@ -97,23 +98,18 @@
         fill_storage(self.indexer.storage)
         fill_obj_storage(self.indexer.objstorage)
 
-        self.id0 = "01c9379dfc33803963d07c1ccc748d3fe4c96bb5"
-        self.id1 = "688a5ef812c53907562fe379d4b3851e69c7cb15"
-        self.id2 = "da39a3ee5e6b4b0d3255bfef95601890afd80709"  # empty content
+        self.id0, self.id1, self.id2 = RAW_CONTENT_IDS
 
         tool = {k.replace("tool_", ""): v for (k, v) in self.indexer.tool.items()}
+
         # then
         self.expected_results = [
             *[
-                ContentLicenseRow(
-                    id=hash_to_bytes(self.id0), tool=tool, license=license
-                )
+                ContentLicenseRow(id=self.id0, tool=tool, license=license)
                 for license in SHA1_TO_LICENSES[self.id0]
             ],
             *[
-                ContentLicenseRow(
-                    id=hash_to_bytes(self.id1), tool=tool, license=license
-                )
+                ContentLicenseRow(id=self.id1, tool=tool, license=license)
                 for license in SHA1_TO_LICENSES[self.id1]
             ],
             *[],  # self.id2
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -95,7 +95,7 @@
         metadata_indexer.idx_storage.content_metadata_add(
             [
                 ContentMetadataRow(
-                    id=DIRECTORY2.entries[0].target,
+                    id=dir_.entries[0].target,
                     indexer_configuration_id=tool["id"],
                     metadata=YARN_PARSER_METADATA,
                 )
@@ -105,9 +105,7 @@
         metadata_indexer.run([dir_.id])
 
         results = list(
-            metadata_indexer.idx_storage.directory_intrinsic_metadata_get(
-                [DIRECTORY2.id]
-            )
+            metadata_indexer.idx_storage.directory_intrinsic_metadata_get([dir_.id])
         )
 
         expected_results = [
diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py
--- a/swh/indexer/tests/test_mimetype.py
+++ b/swh/indexer/tests/test_mimetype.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2020  The Software Heritage developers
+# Copyright (C) 2017-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -16,22 +16,19 @@
 from swh.indexer.storage.model import ContentMimetypeRow
 from swh.indexer.tests.utils import (
     BASE_TEST_CONFIG,
+    RAW_CONTENT_IDS,
+    RAW_CONTENTS,
     CommonContentIndexerPartitionTest,
     CommonContentIndexerTest,
     fill_obj_storage,
     fill_storage,
     filter_dict,
 )
-from swh.model.hashutil import hash_to_bytes
 
 
 @pytest.mark.parametrize(
     "raw_text,mimetype,encoding",
-    [
-        ("du français".encode(), "text/plain", "utf-8"),
-        (b"def __init__(self):", ("text/x-python", "text/x-script.python"), "us-ascii"),
-        (b"\xff\xfe\x00\x00\x00\x00\xff\xfe\xff\xff", "application/octet-stream", ""),
-    ],
+    RAW_CONTENTS.values(),
 )
 def test_compute_mimetype_encoding(raw_text, mimetype, encoding):
     """Compute mimetype encoding should return results"""
@@ -79,32 +76,25 @@
         fill_storage(self.indexer.storage)
         fill_obj_storage(self.indexer.objstorage)
 
-        self.id0 = "01c9379dfc33803963d07c1ccc748d3fe4c96bb5"
-        self.id1 = "688a5ef812c53907562fe379d4b3851e69c7cb15"
-        self.id2 = "da39a3ee5e6b4b0d3255bfef95601890afd80709"
+        self.id0, self.id1, self.id2 = RAW_CONTENT_IDS
 
         tool = {k.replace("tool_", ""): v for (k, v) in self.indexer.tool.items()}
 
-        self.expected_results = [
-            ContentMimetypeRow(
-                id=hash_to_bytes(self.id0),
-                tool=tool,
-                mimetype="text/plain",
-                encoding="us-ascii",
-            ),
-            ContentMimetypeRow(
-                id=hash_to_bytes(self.id1),
-                tool=tool,
-                mimetype="text/plain",
-                encoding="us-ascii",
-            ),
-            ContentMimetypeRow(
-                id=hash_to_bytes(self.id2),
-                tool=tool,
-                mimetype="application/x-empty",
-                encoding="binary",
-            ),
-        ]
+        results = []
+        for raw_content_id in RAW_CONTENT_IDS:
+            content_t = RAW_CONTENTS[raw_content_id]
+            # New magic version can return different results, this deals with such a case
+            if isinstance(content_t[1], tuple):
+                mimetype = content_t[1][1]
+            else:
+                mimetype = content_t[1]
+            encoding = content_t[2]
+            mimetype_row = ContentMimetypeRow(
+                id=raw_content_id, tool=tool, mimetype=mimetype, encoding=encoding
+            )
+            results.append(mimetype_row)
+
+        self.expected_results = results
 
 
 RANGE_CONFIG = dict(list(CONFIG.items()) + [("write_batch_size", 100)])
diff --git a/swh/indexer/tests/utils.py b/swh/indexer/tests/utils.py
--- a/swh/indexer/tests/utils.py
+++ b/swh/indexer/tests/utils.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2020  The Software Heritage developers
+# Copyright (C) 2017-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -6,14 +6,13 @@
 import abc
 import datetime
 import functools
-from typing import Any, Dict
+from typing import Any, Dict, List, Tuple
 import unittest
 
 from hypothesis import strategies
 
 from swh.core.api.classes import stream_results
 from swh.indexer.storage import INDEXER_CFG_KEY
-from swh.model import hashutil
 from swh.model.hashutil import hash_to_bytes
 from swh.model.model import (
     Content,
@@ -40,7 +39,6 @@
     INDEXER_CFG_KEY: {"cls": "memory"},
 }
 
-
 ORIGIN_VISITS = [
     {"type": "git", "origin": "https://github.com/SoftwareHeritage/swh-storage"},
     {"type": "ftp", "origin": "rsync://ftp.gnu.org/gnu/3dldf"},
@@ -61,20 +59,229 @@
 
 ORIGINS = [Origin(url=visit["origin"]) for visit in ORIGIN_VISITS]
 
+OBJ_STORAGE_RAW_CONTENT: Dict[str, bytes] = {
+    "text:some": b"this is some text",
+    "text:another": b"another text",
+    "text:yet": b"yet another text",
+    "python:code": b"""
+    import unittest
+    import logging
+    from swh.indexer.mimetype import MimetypeIndexer
+    from swh.indexer.tests.test_utils import MockObjStorage
+
+    class MockStorage():
+        def content_mimetype_add(self, mimetypes):
+            self.state = mimetypes
+
+        def indexer_configuration_add(self, tools):
+            return [{
+                'id': 10,
+            }]
+    """,
+    "c:struct": b"""
+        #ifndef __AVL__
+        #define __AVL__
+
+        typedef struct _avl_tree avl_tree;
+
+        typedef struct _data_t {
+          int content;
+        } data_t;
+    """,
+    "lisp:assertion": b"""
+    (should 'pygments (recognize 'lisp 'easily))
+
+    """,
+    "json:test-metadata-package.json": b"""
+    {
+        "name": "test_metadata",
+        "version": "0.0.1",
+        "description": "Simple package.json test for indexer",
+        "repository": {
+          "type": "git",
+          "url": "https://github.com/moranegg/metadata_test"
+      }
+    }
+    """,
+    "json:npm-package.json": b"""
+    {
+      "version": "5.0.3",
+      "name": "npm",
+      "description": "a package manager for JavaScript",
+      "keywords": [
+        "install",
+        "modules",
+        "package manager",
+        "package.json"
+      ],
+      "preferGlobal": true,
+      "config": {
+        "publishtest": false
+      },
+      "homepage": "https://docs.npmjs.com/",
+      "author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
+      "repository": {
+        "type": "git",
+        "url": "https://github.com/npm/npm"
+      },
+      "bugs": {
+        "url": "https://github.com/npm/npm/issues"
+      },
+      "dependencies": {
+        "JSONStream": "~1.3.1",
+        "abbrev": "~1.1.0",
+        "ansi-regex": "~2.1.1",
+        "ansicolors": "~0.3.2",
+        "ansistyles": "~0.1.3"
+      },
+      "devDependencies": {
+        "tacks": "~1.2.6",
+        "tap": "~10.3.2"
+      },
+      "license": "Artistic-2.0"
+    }
+
+    """,
+    "text:carriage-return": b"""
+    """,
+    "text:empty": b"",
+    # was 626364 / b'bcd'
+    "text:unimportant": b"unimportant content for bcd",
+    # was 636465 / b'cde' now yarn-parser package.json
+    "json:yarn-parser-package.json": b"""
+    {
+      "name": "yarn-parser",
+      "version": "1.0.0",
+      "description": "Tiny web service for parsing yarn.lock files",
+      "main": "index.js",
+      "scripts": {
+        "start": "node index.js",
+        "test": "mocha"
+      },
+      "engines": {
+        "node": "9.8.0"
+      },
+      "repository": {
+        "type": "git",
+        "url": "git+https://github.com/librariesio/yarn-parser.git"
+      },
+      "keywords": [
+        "yarn",
+        "parse",
+        "lock",
+        "dependencies"
+      ],
+      "author": "Andrew Nesbitt",
+      "license": "AGPL-3.0",
+      "bugs": {
+        "url": "https://github.com/librariesio/yarn-parser/issues"
+      },
+      "homepage": "https://github.com/librariesio/yarn-parser#readme",
+      "dependencies": {
+        "@yarnpkg/lockfile": "^1.0.0",
+        "body-parser": "^1.15.2",
+        "express": "^4.14.0"
+      },
+      "devDependencies": {
+        "chai": "^4.1.2",
+        "mocha": "^5.2.0",
+        "request": "^2.87.0",
+        "test": "^0.6.0"
+      }
+    }
+
+""",
+}
+
+MAPPING_CONTENT_ID: Dict[str, bytes] = {}
+OBJ_STORAGE_DATA: Dict[bytes, bytes] = {}
+
+for key_description, data in OBJ_STORAGE_RAW_CONTENT.items():
+    content = Content.from_data(data)
+    MAPPING_CONTENT_ID[key_description] = content.sha1
+    OBJ_STORAGE_DATA[content.sha1] = data
+
+
+RAW_CONTENT_METADATA = [
+    (
+        "du français".encode(),
+        "text/plain",
+        "utf-8",
+    ),
+    (
+        b"def __init__(self):",
+        ("text/x-python", "text/x-script.python"),
+        "us-ascii",
+    ),
+    (
+        b"\xff\xfe\x00\x00\x00\x00\xff\xfe\xff\xff",
+        "application/octet-stream",
+        "",
+    ),
+]
+
+RAW_CONTENTS: Dict[bytes, Tuple] = {}
+RAW_CONTENT_IDS: List[bytes] = []
+
+for index, raw_content_d in enumerate(RAW_CONTENT_METADATA):
+    raw_content = raw_content_d[0]
+    content = Content.from_data(raw_content)
+    RAW_CONTENTS[content.sha1] = raw_content_d
+    RAW_CONTENT_IDS.append(content.sha1)
+    # and write it to objstorage data so it's flushed in the objstorage
+    MAPPING_CONTENT_ID["text-key-{index}"] = content.sha1
+    OBJ_STORAGE_DATA[content.sha1] = raw_content
+
+
+SHA1_TO_LICENSES: Dict[bytes, List[str]] = {
+    RAW_CONTENT_IDS[0]: ["GPL"],
+    MAPPING_CONTENT_ID["python:code"]: ["Apache2.0"],
+    MAPPING_CONTENT_ID["c:struct"]: ["MIT"],
+    RAW_CONTENT_IDS[1]: ["AGPL"],
+    RAW_CONTENT_IDS[2]: [],
+}
+
+
+SHA1_TO_CTAGS: Dict[bytes, List[Dict[str, Any]]] = {
+    RAW_CONTENT_IDS[0]: [
+        {
+            "name": "foo",
+            "kind": "str",
+            "line": 10,
+            "lang": "bar",
+        }
+    ],
+    RAW_CONTENT_IDS[1]: [
+        {
+            "name": "symbol",
+            "kind": "float",
+            "line": 99,
+            "lang": "python",
+        }
+    ],
+    RAW_CONTENT_IDS[2]: [
+        {
+            "name": "let",
+            "kind": "int",
+            "line": 100,
+            "lang": "haskell",
+        }
+    ],
+}
+
 
 DIRECTORY = Directory(
-    id=hash_to_bytes("34f335a750111ca0a8b64d8034faec9eedc396be"),
     entries=(
         DirectoryEntry(
             name=b"index.js",
             type="file",
-            target=hash_to_bytes("01c9379dfc33803963d07c1ccc748d3fe4c96bb5"),
+            target=RAW_CONTENT_IDS[0],
             perms=0o100644,
         ),
         DirectoryEntry(
             name=b"package.json",
             type="file",
-            target=hash_to_bytes("26a9f72a7c87cc9205725cfd879f514ff4f3d8d5"),
+            target=MAPPING_CONTENT_ID["json:test-metadata-package.json"],
             perms=0o100644,
         ),
         DirectoryEntry(
@@ -87,12 +294,11 @@
 )
 
 DIRECTORY2 = Directory(
-    id=b"\xf8zz\xa1\x12`<1$\xfav\xf9\x01\xfd5\x85F`\xf2\xb6",
     entries=(
         DirectoryEntry(
             name=b"package.json",
             type="file",
-            target=hash_to_bytes("f5305243b3ce7ef8dc864ebc73794da304025beb"),
+            target=MAPPING_CONTENT_ID["json:yarn-parser-package.json"],
             perms=0o100644,
         ),
     ),
@@ -280,178 +486,6 @@
 assert len(SNAPSHOTS) == len(ORIGIN_VISITS)
 
 
-SHA1_TO_LICENSES = {
-    "01c9379dfc33803963d07c1ccc748d3fe4c96bb5": ["GPL"],
-    "02fb2c89e14f7fab46701478c83779c7beb7b069": ["Apache2.0"],
-    "103bc087db1d26afc3a0283f38663d081e9b01e6": ["MIT"],
-    "688a5ef812c53907562fe379d4b3851e69c7cb15": ["AGPL"],
-    "da39a3ee5e6b4b0d3255bfef95601890afd80709": [],
-}
-
-
-SHA1_TO_CTAGS = {
-    "01c9379dfc33803963d07c1ccc748d3fe4c96bb5": [
-        {
-            "name": "foo",
-            "kind": "str",
-            "line": 10,
-            "lang": "bar",
-        }
-    ],
-    "d4c647f0fc257591cc9ba1722484229780d1c607": [
-        {
-            "name": "let",
-            "kind": "int",
-            "line": 100,
-            "lang": "haskell",
-        }
-    ],
-    "688a5ef812c53907562fe379d4b3851e69c7cb15": [
-        {
-            "name": "symbol",
-            "kind": "float",
-            "line": 99,
-            "lang": "python",
-        }
-    ],
-}
-
-
-OBJ_STORAGE_DATA = {
-    "01c9379dfc33803963d07c1ccc748d3fe4c96bb5": b"this is some text",
-    "688a5ef812c53907562fe379d4b3851e69c7cb15": b"another text",
-    "8986af901dd2043044ce8f0d8fc039153641cf17": b"yet another text",
-    "02fb2c89e14f7fab46701478c83779c7beb7b069": b"""
-    import unittest
-    import logging
-    from swh.indexer.mimetype import MimetypeIndexer
-    from swh.indexer.tests.test_utils import MockObjStorage
-
-    class MockStorage():
-        def content_mimetype_add(self, mimetypes):
-            self.state = mimetypes
-
-        def indexer_configuration_add(self, tools):
-            return [{
-                'id': 10,
-            }]
-    """,
-    "103bc087db1d26afc3a0283f38663d081e9b01e6": b"""
-        #ifndef __AVL__
-        #define __AVL__
-
-        typedef struct _avl_tree avl_tree;
-
-        typedef struct _data_t {
-          int content;
-        } data_t;
-    """,
-    "93666f74f1cf635c8c8ac118879da6ec5623c410": b"""
-    (should 'pygments (recognize 'lisp 'easily))
-
-    """,
-    "26a9f72a7c87cc9205725cfd879f514ff4f3d8d5": b"""
-    {
-        "name": "test_metadata",
-        "version": "0.0.1",
-        "description": "Simple package.json test for indexer",
-        "repository": {
-          "type": "git",
-          "url": "https://github.com/moranegg/metadata_test"
-      }
-    }
-    """,
-    "d4c647f0fc257591cc9ba1722484229780d1c607": b"""
-    {
-      "version": "5.0.3",
-      "name": "npm",
-      "description": "a package manager for JavaScript",
-      "keywords": [
-        "install",
-        "modules",
-        "package manager",
-        "package.json"
-      ],
-      "preferGlobal": true,
-      "config": {
-        "publishtest": false
-      },
-      "homepage": "https://docs.npmjs.com/",
-      "author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
-      "repository": {
-        "type": "git",
-        "url": "https://github.com/npm/npm"
-      },
-      "bugs": {
-        "url": "https://github.com/npm/npm/issues"
-      },
-      "dependencies": {
-        "JSONStream": "~1.3.1",
-        "abbrev": "~1.1.0",
-        "ansi-regex": "~2.1.1",
-        "ansicolors": "~0.3.2",
-        "ansistyles": "~0.1.3"
-      },
-      "devDependencies": {
-        "tacks": "~1.2.6",
-        "tap": "~10.3.2"
-      },
-      "license": "Artistic-2.0"
-    }
-
-    """,
-    "a7ab314d8a11d2c93e3dcf528ca294e7b431c449": b"""
-    """,
-    "da39a3ee5e6b4b0d3255bfef95601890afd80709": b"",
-    # was 626364 / b'bcd'
-    "e3e40fee6ff8a52f06c3b428bfe7c0ed2ef56e92": b"unimportant content for bcd",
-    # was 636465 / b'cde' now yarn-parser package.json
-    "f5305243b3ce7ef8dc864ebc73794da304025beb": b"""
-    {
-      "name": "yarn-parser",
-      "version": "1.0.0",
-      "description": "Tiny web service for parsing yarn.lock files",
-      "main": "index.js",
-      "scripts": {
-        "start": "node index.js",
-        "test": "mocha"
-      },
-      "engines": {
-        "node": "9.8.0"
-      },
-      "repository": {
-        "type": "git",
-        "url": "git+https://github.com/librariesio/yarn-parser.git"
-      },
-      "keywords": [
-        "yarn",
-        "parse",
-        "lock",
-        "dependencies"
-      ],
-      "author": "Andrew Nesbitt",
-      "license": "AGPL-3.0",
-      "bugs": {
-        "url": "https://github.com/librariesio/yarn-parser/issues"
-      },
-      "homepage": "https://github.com/librariesio/yarn-parser#readme",
-      "dependencies": {
-        "@yarnpkg/lockfile": "^1.0.0",
-        "body-parser": "^1.15.2",
-        "express": "^4.14.0"
-      },
-      "devDependencies": {
-        "chai": "^4.1.2",
-        "mocha": "^5.2.0",
-        "request": "^2.87.0",
-        "test": "^0.6.0"
-      }
-    }
-
-""",
-}
-
-
 YARN_PARSER_METADATA = {
     "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
     "url": "https://github.com/librariesio/yarn-parser#readme",
@@ -613,8 +647,8 @@
 
 def fill_obj_storage(obj_storage):
     """Add some content in an object storage."""
-    for (obj_id, content) in OBJ_STORAGE_DATA.items():
-        obj_storage.add(content, obj_id=hash_to_bytes(obj_id))
+    for obj_id, content in OBJ_STORAGE_DATA.items():
+        obj_storage.add(content, obj_id)
 
 
 def fill_storage(storage):
@@ -639,21 +673,7 @@
         )
         storage.origin_visit_status_add([visit_status])
 
-    contents = []
-    for (obj_id, content) in OBJ_STORAGE_DATA.items():
-        content_hashes = hashutil.MultiHash.from_data(content).digest()
-        contents.append(
-            Content(
-                data=content,
-                length=len(content),
-                status="visible",
-                sha1=hash_to_bytes(obj_id),
-                sha1_git=hash_to_bytes(obj_id),
-                sha256=content_hashes["sha256"],
-                blake2s256=content_hashes["blake2s256"],
-            )
-        )
-    storage.content_add(contents)
+    storage.content_add([Content.from_data(data) for data in OBJ_STORAGE_DATA.values()])
 
 
 class CommonContentIndexerTest(metaclass=abc.ABCMeta):
@@ -662,9 +682,7 @@
         return self.indexer.idx_storage.state
 
     def assert_results_ok(self, sha1s, expected_results=None):
-        sha1s = [
-            sha1 if isinstance(sha1, bytes) else hash_to_bytes(sha1) for sha1 in sha1s
-        ]
+        sha1s = [hash_to_bytes(sha1) for sha1 in sha1s]
         actual_results = list(self.get_indexer_results(sha1s))
 
         if expected_results is None:
@@ -698,11 +716,7 @@
         self.indexer.run(sha1s)
 
         # then
-        expected_results = [
-            res
-            for res in self.expected_results
-            if hashutil.hash_to_hex(res.id) in sha1s
-        ]
+        expected_results = [res for res in self.expected_results if res.id in sha1s]
 
         self.assert_results_ok(sha1s, expected_results)
 
@@ -711,6 +725,7 @@
     """Allows to factorize tests on range indexer."""
 
     def setUp(self):
+        # still useful?
         self.contents = sorted(OBJ_STORAGE_DATA)
 
     def assert_results_ok(self, partition_id, nb_partitions, actual_results):