diff --git a/swh/indexer/codemeta.py b/swh/indexer/codemeta.py
index 1148f7e..b279320 100644
--- a/swh/indexer/codemeta.py
+++ b/swh/indexer/codemeta.py
@@ -1,206 +1,205 @@
 # Copyright (C) 2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import collections
 import csv
 import itertools
 import json
 import os.path
 import re
 
 from pyld import jsonld
 
 import swh.indexer
 
 _DATA_DIR = os.path.join(os.path.dirname(swh.indexer.__file__), "data")
 
 CROSSWALK_TABLE_PATH = os.path.join(_DATA_DIR, "codemeta", "crosswalk.csv")
 
 CODEMETA_CONTEXT_PATH = os.path.join(_DATA_DIR, "codemeta", "codemeta.jsonld")
 
 
 with open(CODEMETA_CONTEXT_PATH) as fd:
     CODEMETA_CONTEXT = json.load(fd)
 
 CODEMETA_CONTEXT_URL = "https://doi.org/10.5063/schema/codemeta-2.0"
 CODEMETA_ALTERNATE_CONTEXT_URLS = {
     ("https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld")
 }
 CODEMETA_URI = "https://codemeta.github.io/terms/"
 SCHEMA_URI = "http://schema.org/"
 FORGEFED_URI = "https://forgefed.org/ns#"
 ACTIVITYSTREAMS_URI = "https://www.w3.org/ns/activitystreams#"
 
 
 PROPERTY_BLACKLIST = {
     # CodeMeta properties that we cannot properly represent.
     SCHEMA_URI + "softwareRequirements",
     CODEMETA_URI + "softwareSuggestions",
     # Duplicate of 'author'
     SCHEMA_URI + "creator",
 }
 
 _codemeta_field_separator = re.compile(r"\s*[,/]\s*")
 
 
 def make_absolute_uri(local_name):
     definition = CODEMETA_CONTEXT["@context"][local_name]
     if isinstance(definition, str):
         return definition
     elif isinstance(definition, dict):
         prefixed_name = definition["@id"]
         (prefix, local_name) = prefixed_name.split(":")
         if prefix == "schema":
             canonical_name = SCHEMA_URI + local_name
         elif prefix == "codemeta":
             canonical_name = CODEMETA_URI + local_name
         else:
             assert False, prefix
         return canonical_name
     else:
         assert False, definition
 
 
 def _read_crosstable(fd):
     reader = csv.reader(fd)
     try:
         header = next(reader)
     except StopIteration:
         raise ValueError("empty file")
 
     data_sources = set(header) - {"Parent Type", "Property", "Type", "Description"}
-    assert "codemeta-V1" in data_sources
 
     codemeta_translation = {data_source: {} for data_source in data_sources}
     terms = set()
 
     for line in reader:  # For each canonical name
         local_name = dict(zip(header, line))["Property"]
         if not local_name:
             continue
         canonical_name = make_absolute_uri(local_name)
         if canonical_name in PROPERTY_BLACKLIST:
             continue
         terms.add(canonical_name)
         for (col, value) in zip(header, line):  # For each cell in the row
             if col in data_sources:
                 # If that's not the parentType/property/type/description
                 for local_name in _codemeta_field_separator.split(value):
                     # For each of the data source's properties that maps
                     # to this canonical name
                     if local_name.strip():
                         codemeta_translation[col][local_name.strip()] = canonical_name
 
     return (terms, codemeta_translation)
 
 
 with open(CROSSWALK_TABLE_PATH) as fd:
     (CODEMETA_TERMS, CROSSWALK_TABLE) = _read_crosstable(fd)
 
 
 def _document_loader(url, options=None):
     """Document loader for pyld.
 
     Reads the local codemeta.jsonld file instead of fetching it
     from the Internet every single time."""
     if url == CODEMETA_CONTEXT_URL or url in CODEMETA_ALTERNATE_CONTEXT_URLS:
         return {
             "contextUrl": None,
             "documentUrl": url,
             "document": CODEMETA_CONTEXT,
         }
     elif url == CODEMETA_URI:
         raise Exception(
             "{} is CodeMeta's URI, use {} as context url".format(
                 CODEMETA_URI, CODEMETA_CONTEXT_URL
             )
         )
     else:
         raise Exception(url)
 
 
 def compact(doc):
     """Same as `pyld.jsonld.compact`, but in the context of CodeMeta."""
     return jsonld.compact(
         doc, CODEMETA_CONTEXT_URL, options={"documentLoader": _document_loader}
     )
 
 
 def expand(doc):
     """Same as `pyld.jsonld.expand`, but in the context of CodeMeta."""
     return jsonld.expand(doc, options={"documentLoader": _document_loader})
 
 
 def merge_values(v1, v2):
     """If v1 and v2 are of the form `{"@list": l1}` and `{"@list": l2}`,
     returns `{"@list": l1 + l2}`.
     Otherwise, make them lists (if they are not already) and concatenate
     them.
 
     >>> merge_values('a', 'b')
     ['a', 'b']
     >>> merge_values(['a', 'b'], 'c')
     ['a', 'b', 'c']
     >>> merge_values({'@list': ['a', 'b']}, {'@list': ['c']})
     {'@list': ['a', 'b', 'c']}
     """
     if v1 is None:
         return v2
     elif v2 is None:
         return v1
     elif isinstance(v1, dict) and set(v1) == {"@list"}:
         assert isinstance(v1["@list"], list)
         if isinstance(v2, dict) and set(v2) == {"@list"}:
             assert isinstance(v2["@list"], list)
             return {"@list": v1["@list"] + v2["@list"]}
         else:
             raise ValueError("Cannot merge %r and %r" % (v1, v2))
     else:
         if isinstance(v2, dict) and "@list" in v2:
             raise ValueError("Cannot merge %r and %r" % (v1, v2))
         if not isinstance(v1, list):
             v1 = [v1]
         if not isinstance(v2, list):
             v2 = [v2]
         return v1 + v2
 
 
 def merge_documents(documents):
     """Takes a list of metadata dicts, each generated from a different
     metadata file, and merges them.
 
     Removes duplicates, if any."""
     documents = list(itertools.chain.from_iterable(map(expand, documents)))
     merged_document = collections.defaultdict(list)
     for document in documents:
         for (key, values) in document.items():
             if key == "@id":
                 # @id does not get expanded to a list
                 value = values
 
                 # Only one @id is allowed, move it to sameAs
                 if "@id" not in merged_document:
                     merged_document["@id"] = value
                 elif value != merged_document["@id"]:
                     if value not in merged_document[SCHEMA_URI + "sameAs"]:
                         merged_document[SCHEMA_URI + "sameAs"].append(value)
             else:
                 for value in values:
                     if isinstance(value, dict) and set(value) == {"@list"}:
                         # Value is of the form {'@list': [item1, item2]}
                         # instead of the usual [item1, item2].
                         # We need to merge the inner lists (and mostly
                         # preserve order).
                         merged_value = merged_document.setdefault(key, {"@list": []})
                         for subvalue in value["@list"]:
                             # merged_value must be of the form
                             # {'@list': [item1, item2]}; as it is the same
                             # type as value, which is an @list.
                             if subvalue not in merged_value["@list"]:
                                 merged_value["@list"].append(subvalue)
                     elif value not in merged_document[key]:
                         merged_document[key].append(value)
 
     return compact(merged_document)
diff --git a/swh/indexer/data/composer.csv b/swh/indexer/data/composer.csv
new file mode 100644
index 0000000..599a931
--- /dev/null
+++ b/swh/indexer/data/composer.csv
@@ -0,0 +1,68 @@
+Property,Composer
+codeRepository,support.source
+programmingLanguage,
+runtimePlatform,
+targetProduct,
+applicationCategory,
+applicationSubCategory,
+downloadUrl,
+fileSize,
+installUrl,
+memoryRequirements,
+operatingSystem,
+permissions,
+processorRequirements,
+releaseNotes,
+softwareHelp,
+softwareRequirements,require
+softwareVersion,version
+storageRequirements,
+supportingData,
+author,authors
+citation,
+contributor,
+copyrightHolder,
+copyrightYear,
+dateCreated,
+dateModified,
+datePublished,
+editor,
+encoding,
+fileFormat,
+funder,
+keywords,keywords
+license,license
+producer,
+provider,
+publisher,
+sponsor,
+version,version
+isAccessibleForFree,
+isPartOf,
+hasPart,
+position,
+description,description
+identifier,name
+name,name
+sameAs,
+url,homepage
+relatedLink,
+givenName,
+familyName,
+email,author.email
+affiliation,
+identifier,
+name,author.name
+address,
+type,
+id,
+softwareSuggestions,suggest
+maintainer,
+contIntegration,
+buildInstructions,
+developmentStatus,
+embargoDate,
+funding,
+issueTracker,support.issues
+referencePublication,
+readme,
\ No newline at end of file
diff --git a/swh/indexer/metadata_dictionary/__init__.py b/swh/indexer/metadata_dictionary/__init__.py
index fafd74f..4d4f4de 100644
--- a/swh/indexer/metadata_dictionary/__init__.py
+++ b/swh/indexer/metadata_dictionary/__init__.py
@@ -1,46 +1,47 @@
 # Copyright (C) 2017-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import collections
 
 import click
 
-from . import cff, codemeta, github, maven, npm, python, ruby
+from . import cff, codemeta, composer, github, maven, npm, python, ruby
 
 MAPPINGS = {
     "CffMapping": cff.CffMapping,
     "CodemetaMapping": codemeta.CodemetaMapping,
     "GemspecMapping": ruby.GemspecMapping,
     "GitHubMapping": github.GitHubMapping,
     "MavenMapping": maven.MavenMapping,
     "NpmMapping": npm.NpmMapping,
     "PythonPkginfoMapping": python.PythonPkginfoMapping,
+    "ComposerMapping": composer.ComposerMapping,
 }
 
 
 def list_terms():
     """Returns a dictionary with all supported CodeMeta terms as keys,
     and the mappings that support each of them as values."""
     d = collections.defaultdict(set)
     for mapping in MAPPINGS.values():
         for term in mapping.supported_terms():
             d[term].add(mapping)
     return d
 
 
 @click.command()
 @click.argument("mapping_name")
 @click.argument("file_name")
 def main(mapping_name: str, file_name: str):
     from pprint import pprint
 
     with open(file_name, "rb") as fd:
         file_content = fd.read()
     res = MAPPINGS[mapping_name]().translate(file_content)
     pprint(res)
 
 
 if __name__ == "__main__":
     main()
diff --git a/swh/indexer/metadata_dictionary/composer.py b/swh/indexer/metadata_dictionary/composer.py
new file mode 100644
index 0000000..5d0f2ee
--- /dev/null
+++ b/swh/indexer/metadata_dictionary/composer.py
@@ -0,0 +1,56 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os.path
+
+from swh.indexer.codemeta import _DATA_DIR, SCHEMA_URI, _read_crosstable
+
+from .base import JsonMapping
+
+COMPOSER_TABLE_PATH = os.path.join(_DATA_DIR, "composer.csv")
+
+with open(COMPOSER_TABLE_PATH) as fd:
+    (CODEMETA_TERMS, COMPOSER_TABLE) = _read_crosstable(fd)
+
+
+class ComposerMapping(JsonMapping):
+    """Dedicated class for Packagist(composer.json) mapping and translation"""
+
+    name = "composer"
+    mapping = COMPOSER_TABLE["Composer"]
+    filename = b"composer.json"
+    string_fields = [
+        "name",
+        "description",
+        "version",
+        "keywords",
+        "homepage",
+        "license",
+        "author",
+        "authors",
+    ]
+
+    def normalize_homepage(self, s):
+        if isinstance(s, str):
+            return {"@id": s}
+
+    def normalize_license(self, s):
+        if isinstance(s, str):
+            return {"@id": "https://spdx.org/licenses/" + s}
+
+    def normalize_authors(self, author_list):
+        authors = []
+        for author in author_list:
+            author_obj = {"@type": SCHEMA_URI + "Person"}
+
+            if isinstance(author, dict):
+                if isinstance(author.get("name", None), str):
+                    author_obj[SCHEMA_URI + "name"] = author.get("name", None)
+                if isinstance(author.get("email", None), str):
+                    author_obj[SCHEMA_URI + "email"] = author.get("email", None)
+
+                authors.append(author_obj)
+
+        return {"@list": authors}
diff --git a/swh/indexer/tests/metadata_dictionary/test_composer.py b/swh/indexer/tests/metadata_dictionary/test_composer.py
new file mode 100644
index 0000000..9513938
--- /dev/null
+++ b/swh/indexer/tests/metadata_dictionary/test_composer.py
@@ -0,0 +1,84 @@
+# Copyright (C) 2017-2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.indexer.metadata_dictionary import MAPPINGS
+
+
+def test_compute_metadata_composer():
+    raw_content = """{
+"name": "symfony/polyfill-mbstring",
+"type": "library",
+"description": "Symfony polyfill for the Mbstring extension",
+"keywords": [
+    "polyfill",
+    "shim",
+    "compatibility",
+    "portable"
+],
+"homepage": "https://symfony.com",
+"license": "MIT",
+"authors": [
+    {
+        "name": "Nicolas Grekas",
+        "email": "p@tchwork.com"
+    },
+    {
+        "name": "Symfony Community",
+        "homepage": "https://symfony.com/contributors"
+    }
+],
+"require": {
+    "php": ">=7.1"
+},
+"provide": {
+    "ext-mbstring": "*"
+},
+"autoload": {
+    "files": [
+        "bootstrap.php"
+    ]
+},
+"suggest": {
+    "ext-mbstring": "For best performance"
+},
+"minimum-stability": "dev",
+"extra": {
+    "branch-alias": {
+        "dev-main": "1.26-dev"
+    },
+    "thanks": {
+        "name": "symfony/polyfill",
+        "url": "https://github.com/symfony/polyfill"
+    }
+}
+}
+    """.encode(
+        "utf-8"
+    )
+
+    result = MAPPINGS["ComposerMapping"]().translate(raw_content)
+
+    expected = {
+        "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+        "type": "SoftwareSourceCode",
+        "name": "symfony/polyfill-mbstring",
+        "keywords": ["polyfill", "shim", "compatibility", "portable"],
+        "description": "Symfony polyfill for the Mbstring extension",
+        "url": "https://symfony.com",
+        "license": "https://spdx.org/licenses/MIT",
+        "author": [
+            {
+                "type": "Person",
+                "name": "Nicolas Grekas",
+                "email": "p@tchwork.com",
+            },
+            {
+                "type": "Person",
+                "name": "Symfony Community",
+            },
+        ],
+    }
+
+    assert result == expected
diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py
index 8e21163..24bde04 100644
--- a/swh/indexer/tests/test_cli.py
+++ b/swh/indexer/tests/test_cli.py
@@ -1,656 +1,657 @@
 # Copyright (C) 2019-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import datetime
 from functools import reduce
 import re
 from typing import Any, Dict, List
 from unittest.mock import patch
 
 from click.testing import CliRunner
 from confluent_kafka import Consumer
 import pytest
 
 from swh.indexer.cli import indexer_cli_group
 from swh.indexer.storage.interface import IndexerStorageInterface
 from swh.indexer.storage.model import (
     DirectoryIntrinsicMetadataRow,
     OriginIntrinsicMetadataRow,
 )
 from swh.journal.writer import get_journal_writer
 from swh.model.hashutil import hash_to_bytes
 from swh.model.model import OriginVisitStatus
 
 from .utils import DIRECTORY2, REVISION
 
 
 def fill_idx_storage(idx_storage: IndexerStorageInterface, nb_rows: int) -> List[int]:
     tools: List[Dict[str, Any]] = [
         {
             "tool_name": "tool %d" % i,
             "tool_version": "0.0.1",
             "tool_configuration": {},
         }
         for i in range(2)
     ]
     tools = idx_storage.indexer_configuration_add(tools)
 
     origin_metadata = [
         OriginIntrinsicMetadataRow(
             id="file://dev/%04d" % origin_id,
             from_directory=hash_to_bytes("abcd{:0>36}".format(origin_id)),
             indexer_configuration_id=tools[origin_id % 2]["id"],
             metadata={"name": "origin %d" % origin_id},
             mappings=["mapping%d" % (origin_id % 10)],
         )
         for origin_id in range(nb_rows)
     ]
     directory_metadata = [
         DirectoryIntrinsicMetadataRow(
             id=hash_to_bytes("abcd{:0>36}".format(origin_id)),
             indexer_configuration_id=tools[origin_id % 2]["id"],
             metadata={"name": "origin %d" % origin_id},
             mappings=["mapping%d" % (origin_id % 10)],
         )
         for origin_id in range(nb_rows)
     ]
 
     idx_storage.directory_intrinsic_metadata_add(directory_metadata)
     idx_storage.origin_intrinsic_metadata_add(origin_metadata)
 
     return [tool["id"] for tool in tools]
 
 
 def _origins_in_task_args(tasks):
     """Returns the set of origins contained in the arguments of the
     provided tasks (assumed to be of type index-origin-metadata)."""
     return reduce(
         set.union, (set(task["arguments"]["args"][0]) for task in tasks), set()
     )
 
 
 def _assert_tasks_for_origins(tasks, origins):
     expected_kwargs = {}
     assert {task["type"] for task in tasks} == {"index-origin-metadata"}
     assert all(len(task["arguments"]["args"]) == 1 for task in tasks)
     for task in tasks:
         assert task["arguments"]["kwargs"] == expected_kwargs, task
     assert _origins_in_task_args(tasks) == set(["file://dev/%04d" % i for i in origins])
 
 
 @pytest.fixture
 def cli_runner():
     return CliRunner()
 
 
 def test_cli_mapping_list(cli_runner, swh_config):
     result = cli_runner.invoke(
         indexer_cli_group,
         ["-C", swh_config, "mapping", "list"],
         catch_exceptions=False,
     )
     expected_output = "\n".join(
         [
             "cff",
             "codemeta",
+            "composer",
             "gemspec",
             "github",
             "maven",
             "npm",
             "pkg-info",
             "",
         ]  # must be sorted for test to pass
     )
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
 
 def test_cli_mapping_list_terms(cli_runner, swh_config):
     result = cli_runner.invoke(
         indexer_cli_group,
         ["-C", swh_config, "mapping", "list-terms"],
         catch_exceptions=False,
     )
     assert result.exit_code == 0, result.output
     assert re.search(r"http://schema.org/url:\n.*npm", result.output)
     assert re.search(r"http://schema.org/url:\n.*codemeta", result.output)
     assert re.search(
         r"https://codemeta.github.io/terms/developmentStatus:\n\tcodemeta",
         result.output,
     )
 
 
 def test_cli_mapping_list_terms_exclude(cli_runner, swh_config):
     result = cli_runner.invoke(
         indexer_cli_group,
         ["-C", swh_config, "mapping", "list-terms", "--exclude-mapping", "codemeta"],
         catch_exceptions=False,
     )
     assert result.exit_code == 0, result.output
     assert re.search(r"http://schema.org/url:\n.*npm", result.output)
     assert not re.search(r"http://schema.org/url:\n.*codemeta", result.output)
     assert not re.search(
         r"https://codemeta.github.io/terms/developmentStatus:\n\tcodemeta",
         result.output,
     )
 
 
 @patch("swh.scheduler.cli.utils.TASK_BATCH_SIZE", 3)
 @patch("swh.scheduler.cli_utils.TASK_BATCH_SIZE", 3)
 def test_cli_origin_metadata_reindex_empty_db(
     cli_runner, swh_config, indexer_scheduler, idx_storage, storage
 ):
     result = cli_runner.invoke(
         indexer_cli_group,
         [
             "-C",
             swh_config,
             "schedule",
             "reindex_origin_metadata",
         ],
         catch_exceptions=False,
     )
     expected_output = "Nothing to do (no origin metadata matched the criteria).\n"
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 0
 
 
 @patch("swh.scheduler.cli.utils.TASK_BATCH_SIZE", 3)
 @patch("swh.scheduler.cli_utils.TASK_BATCH_SIZE", 3)
 def test_cli_origin_metadata_reindex_divisor(
     cli_runner, swh_config, indexer_scheduler, idx_storage, storage
 ):
     """Tests the re-indexing when origin_batch_size*task_batch_size is a
     divisor of nb_origins."""
     fill_idx_storage(idx_storage, 90)
 
     result = cli_runner.invoke(
         indexer_cli_group,
         [
             "-C",
             swh_config,
             "schedule",
             "reindex_origin_metadata",
         ],
         catch_exceptions=False,
     )
 
     # Check the output
     expected_output = (
         "Scheduled 3 tasks (30 origins).\n"
         "Scheduled 6 tasks (60 origins).\n"
         "Scheduled 9 tasks (90 origins).\n"
         "Done.\n"
     )
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     # Check scheduled tasks
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 9
     _assert_tasks_for_origins(tasks, range(90))
 
 
 @patch("swh.scheduler.cli.utils.TASK_BATCH_SIZE", 3)
 @patch("swh.scheduler.cli_utils.TASK_BATCH_SIZE", 3)
 def test_cli_origin_metadata_reindex_dry_run(
     cli_runner, swh_config, indexer_scheduler, idx_storage, storage
 ):
     """Tests the re-indexing when origin_batch_size*task_batch_size is a
     divisor of nb_origins."""
     fill_idx_storage(idx_storage, 90)
 
     result = cli_runner.invoke(
         indexer_cli_group,
         [
             "-C",
             swh_config,
             "schedule",
             "--dry-run",
             "reindex_origin_metadata",
         ],
         catch_exceptions=False,
     )
 
     # Check the output
     expected_output = (
         "Scheduled 3 tasks (30 origins).\n"
         "Scheduled 6 tasks (60 origins).\n"
         "Scheduled 9 tasks (90 origins).\n"
         "Done.\n"
     )
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     # Check scheduled tasks
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 0
 
 
 @patch("swh.scheduler.cli.utils.TASK_BATCH_SIZE", 3)
 @patch("swh.scheduler.cli_utils.TASK_BATCH_SIZE", 3)
 def test_cli_origin_metadata_reindex_nondivisor(
     cli_runner, swh_config, indexer_scheduler, idx_storage, storage
 ):
     """Tests the re-indexing when neither origin_batch_size or
     task_batch_size is a divisor of nb_origins."""
     fill_idx_storage(idx_storage, 70)
 
     result = cli_runner.invoke(
         indexer_cli_group,
         [
             "-C",
             swh_config,
             "schedule",
             "reindex_origin_metadata",
             "--batch-size",
             "20",
         ],
         catch_exceptions=False,
     )
 
     # Check the output
     expected_output = (
         "Scheduled 3 tasks (60 origins).\n"
         "Scheduled 4 tasks (70 origins).\n"
         "Done.\n"
     )
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     # Check scheduled tasks
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 4
     _assert_tasks_for_origins(tasks, range(70))
 
 
 @patch("swh.scheduler.cli.utils.TASK_BATCH_SIZE", 3)
 @patch("swh.scheduler.cli_utils.TASK_BATCH_SIZE", 3)
 def test_cli_origin_metadata_reindex_filter_one_mapping(
     cli_runner, swh_config, indexer_scheduler, idx_storage, storage
 ):
     """Tests the re-indexing when origin_batch_size*task_batch_size is a
     divisor of nb_origins."""
     fill_idx_storage(idx_storage, 110)
 
     result = cli_runner.invoke(
         indexer_cli_group,
         [
             "-C",
             swh_config,
             "schedule",
             "reindex_origin_metadata",
             "--mapping",
             "mapping1",
         ],
         catch_exceptions=False,
     )
 
     # Check the output
     expected_output = "Scheduled 2 tasks (11 origins).\nDone.\n"
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     # Check scheduled tasks
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 2
     _assert_tasks_for_origins(tasks, [1, 11, 21, 31, 41, 51, 61, 71, 81, 91, 101])
 
 
 @patch("swh.scheduler.cli.utils.TASK_BATCH_SIZE", 3)
 @patch("swh.scheduler.cli_utils.TASK_BATCH_SIZE", 3)
 def test_cli_origin_metadata_reindex_filter_two_mappings(
     cli_runner, swh_config, indexer_scheduler, idx_storage, storage
 ):
     """Tests the re-indexing when origin_batch_size*task_batch_size is a
     divisor of nb_origins."""
     fill_idx_storage(idx_storage, 110)
 
     result = cli_runner.invoke(
         indexer_cli_group,
         [
             "--config-file",
             swh_config,
             "schedule",
             "reindex_origin_metadata",
             "--mapping",
             "mapping1",
             "--mapping",
             "mapping2",
         ],
         catch_exceptions=False,
     )
 
     # Check the output
     expected_output = "Scheduled 3 tasks (22 origins).\nDone.\n"
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     # Check scheduled tasks
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 3
     _assert_tasks_for_origins(
         tasks,
         [
             1,
             11,
             21,
             31,
             41,
             51,
             61,
             71,
             81,
             91,
             101,
             2,
             12,
             22,
             32,
             42,
             52,
             62,
             72,
             82,
             92,
             102,
         ],
     )
 
 
 @patch("swh.scheduler.cli.utils.TASK_BATCH_SIZE", 3)
 @patch("swh.scheduler.cli_utils.TASK_BATCH_SIZE", 3)
 def test_cli_origin_metadata_reindex_filter_one_tool(
     cli_runner, swh_config, indexer_scheduler, idx_storage, storage
 ):
     """Tests the re-indexing when origin_batch_size*task_batch_size is a
     divisor of nb_origins."""
     tool_ids = fill_idx_storage(idx_storage, 110)
 
     result = cli_runner.invoke(
         indexer_cli_group,
         [
             "-C",
             swh_config,
             "schedule",
             "reindex_origin_metadata",
             "--tool-id",
             str(tool_ids[0]),
         ],
         catch_exceptions=False,
     )
 
     # Check the output
     expected_output = (
         "Scheduled 3 tasks (30 origins).\n"
         "Scheduled 6 tasks (55 origins).\n"
         "Done.\n"
     )
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     # Check scheduled tasks
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 6
     _assert_tasks_for_origins(tasks, [x * 2 for x in range(55)])
 
 
 def now():
     return datetime.datetime.now(tz=datetime.timezone.utc)
 
 
 def test_cli_journal_client_schedule(
     cli_runner,
     swh_config,
     indexer_scheduler,
     kafka_prefix: str,
     kafka_server,
     consumer: Consumer,
 ):
     """Test the 'swh indexer journal-client' cli tool."""
     journal_writer = get_journal_writer(
         "kafka",
         brokers=[kafka_server],
         prefix=kafka_prefix,
         client_id="test producer",
         value_sanitizer=lambda object_type, value: value,
         flush_timeout=3,  # fail early if something is going wrong
     )
 
     visit_statuses = [
         OriginVisitStatus(
             origin="file:///dev/zero",
             visit=1,
             date=now(),
             status="full",
             snapshot=None,
         ),
         OriginVisitStatus(
             origin="file:///dev/foobar",
             visit=2,
             date=now(),
             status="full",
             snapshot=None,
         ),
         OriginVisitStatus(
             origin="file:///tmp/spamegg",
             visit=3,
             date=now(),
             status="full",
             snapshot=None,
         ),
         OriginVisitStatus(
             origin="file:///dev/0002",
             visit=6,
             date=now(),
             status="full",
             snapshot=None,
         ),
         OriginVisitStatus(  # will be filtered out due to its 'partial' status
             origin="file:///dev/0000",
             visit=4,
             date=now(),
             status="partial",
             snapshot=None,
         ),
         OriginVisitStatus(  # will be filtered out due to its 'ongoing' status
             origin="file:///dev/0001",
             visit=5,
             date=now(),
             status="ongoing",
             snapshot=None,
         ),
     ]
 
     journal_writer.write_additions("origin_visit_status", visit_statuses)
     visit_statuses_full = [vs for vs in visit_statuses if vs.status == "full"]
 
     result = cli_runner.invoke(
         indexer_cli_group,
         [
             "-C",
             swh_config,
             "journal-client",
             "--broker",
             kafka_server,
             "--prefix",
             kafka_prefix,
             "--group-id",
             "test-consumer",
             "--stop-after-objects",
             len(visit_statuses),
             "--origin-metadata-task-type",
             "index-origin-metadata",
         ],
         catch_exceptions=False,
     )
 
     # Check the output
     expected_output = "Done.\n"
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     # Check scheduled tasks
     tasks = indexer_scheduler.search_tasks(task_type="index-origin-metadata")
 
     # This can be split into multiple tasks but no more than the origin-visit-statuses
     # written in the journal
     assert len(tasks) <= len(visit_statuses_full)
 
     actual_origins = []
     for task in tasks:
         actual_task = dict(task)
         assert actual_task["type"] == "index-origin-metadata"
         scheduled_origins = actual_task["arguments"]["args"][0]
         actual_origins.extend(scheduled_origins)
 
     assert set(actual_origins) == {vs.origin for vs in visit_statuses_full}
 
 
 def test_cli_journal_client_without_brokers(
     cli_runner, swh_config, kafka_prefix: str, kafka_server, consumer: Consumer
 ):
     """Without brokers configuration, the cli fails."""
 
     with pytest.raises(ValueError, match="brokers"):
         cli_runner.invoke(
             indexer_cli_group,
             [
                 "-C",
                 swh_config,
                 "journal-client",
             ],
             catch_exceptions=False,
         )
 
 
 @pytest.mark.parametrize("indexer_name", ["origin-intrinsic-metadata", "*"])
 def test_cli_journal_client_index(
     cli_runner,
     swh_config,
     kafka_prefix: str,
     kafka_server,
     consumer: Consumer,
     idx_storage,
     storage,
     mocker,
     swh_indexer_config,
     indexer_name: str,
 ):
     """Test the 'swh indexer journal-client' cli tool."""
     journal_writer = get_journal_writer(
         "kafka",
         brokers=[kafka_server],
         prefix=kafka_prefix,
         client_id="test producer",
         value_sanitizer=lambda object_type, value: value,
         flush_timeout=3,  # fail early if something is going wrong
     )
 
     visit_statuses = [
         OriginVisitStatus(
             origin="file:///dev/zero",
             visit=1,
             date=now(),
             status="full",
             snapshot=None,
         ),
         OriginVisitStatus(
             origin="file:///dev/foobar",
             visit=2,
             date=now(),
             status="full",
             snapshot=None,
         ),
         OriginVisitStatus(
             origin="file:///tmp/spamegg",
             visit=3,
             date=now(),
             status="full",
             snapshot=None,
         ),
         OriginVisitStatus(
             origin="file:///dev/0002",
             visit=6,
             date=now(),
             status="full",
             snapshot=None,
         ),
         OriginVisitStatus(  # will be filtered out due to its 'partial' status
             origin="file:///dev/0000",
             visit=4,
             date=now(),
             status="partial",
             snapshot=None,
         ),
         OriginVisitStatus(  # will be filtered out due to its 'ongoing' status
             origin="file:///dev/0001",
             visit=5,
             date=now(),
             status="ongoing",
             snapshot=None,
         ),
     ]
 
     journal_writer.write_additions("origin_visit_status", visit_statuses)
     visit_statuses_full = [vs for vs in visit_statuses if vs.status == "full"]
     storage.revision_add([REVISION])
 
     mocker.patch(
         "swh.indexer.metadata.get_head_swhid",
         return_value=REVISION.swhid(),
     )
 
     mocker.patch(
         "swh.indexer.metadata.DirectoryMetadataIndexer.index",
         return_value=[
             DirectoryIntrinsicMetadataRow(
                 id=DIRECTORY2.id,
                 indexer_configuration_id=1,
                 mappings=["cff"],
                 metadata={"foo": "bar"},
             )
         ],
     )
     result = cli_runner.invoke(
         indexer_cli_group,
         [
             "-C",
             swh_config,
             "journal-client",
             indexer_name,
             "--broker",
             kafka_server,
             "--prefix",
             kafka_prefix,
             "--group-id",
             "test-consumer",
             "--stop-after-objects",
             len(visit_statuses),
         ],
         catch_exceptions=False,
     )
 
     # Check the output
     expected_output = "Done.\n"
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     results = idx_storage.origin_intrinsic_metadata_get(
         [status.origin for status in visit_statuses]
     )
     expected_results = [
         OriginIntrinsicMetadataRow(
             id=status.origin,
             from_directory=DIRECTORY2.id,
             tool={"id": 1, **swh_indexer_config["tools"]},
             mappings=["cff"],
             metadata={"foo": "bar"},
         )
         for status in sorted(visit_statuses_full, key=lambda r: r.origin)
     ]
     assert sorted(results, key=lambda r: r.id) == expected_results