diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
--- a/swh/indexer/storage/__init__.py
+++ b/swh/indexer/storage/__init__.py
@@ -8,13 +8,14 @@
 import psycopg2
 import psycopg2.pool
 
-from collections import defaultdict
+from collections import defaultdict, Counter
 
 from swh.storage.common import db_transaction_generator, db_transaction
 from swh.storage.exc import StorageDBError
-from .db import Db
 
 from . import converters
+from .db import Db
+from .exc import IndexerStorageArgumentException, DuplicateId
 
 
 INDEXER_CFG_KEY = 'indexer_storage'
@@ -51,7 +52,7 @@
     return IndexerStorage(**args)
 
 
-def _check_id_duplicates(data):
+def check_id_duplicates(data):
     """
     If any two dictionaries in `data` have the same id, raises
     a `ValueError`.
@@ -61,20 +62,22 @@
     Args:
         data (List[dict]): List of dictionaries to be inserted
 
-    >>> _check_id_duplicates([
+    >>> check_id_duplicates([
     ...     {'id': 'foo', 'data': 'spam'},
     ...     {'id': 'bar', 'data': 'egg'},
     ... ])
-    >>> _check_id_duplicates([
+    >>> check_id_duplicates([
     ...     {'id': 'foo', 'data': 'spam'},
     ...     {'id': 'foo', 'data': 'egg'},
     ... ])
     Traceback (most recent call last):
       ...
-    ValueError: The same id is present more than once.
+    swh.indexer.storage.exc.DuplicateId: ['foo']
     """
-    if len({item['id'] for item in data}) < len(data):
-        raise ValueError('The same id is present more than once.')
+    counter = Counter(item['id'] for item in data)
+    duplicates = [id_ for (id_, count) in counter.items() if count >= 2]
+    if duplicates:
+        raise DuplicateId(duplicates)
 
 
 class IndexerStorage:
@@ -132,11 +135,11 @@
                            with_textual_data=False,
                            db=None, cur=None):
         if limit is None:
-            raise ValueError('Development error: limit should not be None')
+            raise IndexerStorageArgumentException('limit should not be None')
         if content_type not in db.content_indexer_names:
-            err = 'Development error: Wrong type. Should be one of [%s]' % (
+            err = 'Wrong type. Should be one of [%s]' % (
                 ','.join(db.content_indexer_names))
-            raise ValueError(err)
+            raise IndexerStorageArgumentException(err)
 
         ids = []
         next_id = None
@@ -165,7 +168,7 @@
     @db_transaction()
     def content_mimetype_add(self, mimetypes, conflict_update=False, db=None,
                              cur=None):
-        _check_id_duplicates(mimetypes)
+        check_id_duplicates(mimetypes)
         mimetypes.sort(key=lambda m: m['id'])
         db.mktemp_content_mimetype(cur)
         db.copy_to(mimetypes, 'tmp_content_mimetype',
@@ -193,7 +196,7 @@
     @db_transaction()
     def content_language_add(self, languages, conflict_update=False, db=None,
                              cur=None):
-        _check_id_duplicates(languages)
+        check_id_duplicates(languages)
         languages.sort(key=lambda m: m['id'])
         db.mktemp_content_language(cur)
         # empty language is mapped to 'unknown'
@@ -221,7 +224,7 @@
     @db_transaction()
     def content_ctags_add(self, ctags, conflict_update=False, db=None,
                           cur=None):
-        _check_id_duplicates(ctags)
+        check_id_duplicates(ctags)
         ctags.sort(key=lambda m: m['id'])
 
         def _convert_ctags(__ctags):
@@ -262,7 +265,7 @@
     @db_transaction()
     def content_fossology_license_add(self, licenses, conflict_update=False,
                                       db=None, cur=None):
-        _check_id_duplicates(licenses)
+        check_id_duplicates(licenses)
         licenses.sort(key=lambda m: m['id'])
         db.mktemp_content_fossology_license(cur)
         db.copy_to(
@@ -299,7 +302,7 @@
     @db_transaction()
     def content_metadata_add(self, metadata, conflict_update=False, db=None,
                              cur=None):
-        _check_id_duplicates(metadata)
+        check_id_duplicates(metadata)
         metadata.sort(key=lambda m: m['id'])
 
         db.mktemp_content_metadata(cur)
@@ -324,7 +327,7 @@
     @db_transaction()
     def revision_intrinsic_metadata_add(self, metadata, conflict_update=False,
                                         db=None, cur=None):
-        _check_id_duplicates(metadata)
+        check_id_duplicates(metadata)
         metadata.sort(key=lambda m: m['id'])
 
         db.mktemp_revision_intrinsic_metadata(cur)
@@ -349,7 +352,7 @@
     def origin_intrinsic_metadata_add(self, metadata,
                                       conflict_update=False, db=None,
                                       cur=None):
-        _check_id_duplicates(metadata)
+        check_id_duplicates(metadata)
         metadata.sort(key=lambda m: m['id'])
 
         db.mktemp_origin_intrinsic_metadata(cur)
diff --git a/swh/indexer/storage/api/client.py b/swh/indexer/storage/api/client.py
--- a/swh/indexer/storage/api/client.py
+++ b/swh/indexer/storage/api/client.py
@@ -5,7 +5,10 @@
 
 from swh.core.api import RPCClient
 
-from swh.storage.exc import StorageAPIError
+from swh.indexer.storage.exc import (
+    IndexerStorageAPIError, IndexerStorageArgumentException,
+    DuplicateId,
+)
 
 from ..interface import IndexerStorageInterface
 
@@ -14,4 +17,7 @@
     """Proxy to a remote storage API"""
 
     backend_class = IndexerStorageInterface
-    api_exception = StorageAPIError
+    api_exception = IndexerStorageAPIError
+    reraise_exceptions = [
+        IndexerStorageArgumentException, DuplicateId
+    ]
diff --git a/swh/indexer/storage/api/server.py b/swh/indexer/storage/api/server.py
--- a/swh/indexer/storage/api/server.py
+++ b/swh/indexer/storage/api/server.py
@@ -12,6 +12,7 @@
 from swh.indexer.storage import (
     get_indexer_storage, INDEXER_CFG_KEY
 )
+from swh.indexer.storage.exc import IndexerStorageArgumentException
 from swh.indexer.storage.interface import IndexerStorageInterface
 
 
@@ -34,6 +35,11 @@
     return error_handler(exception, encode_data)
 
 
+@app.errorhandler(IndexerStorageArgumentException)
+def argument_error_handler(exception):
+    return error_handler(exception, encode_data, status_code=400)
+
+
 @app.route('/')
 def index():
     return 'SWH Indexer Storage API server'
diff --git a/swh/indexer/storage/exc.py b/swh/indexer/storage/exc.py
new file mode 100644
--- /dev/null
+++ b/swh/indexer/storage/exc.py
@@ -0,0 +1,19 @@
+# Copyright (C) 2020  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+class IndexerStorageAPIError(Exception):
+    """Generic error of the indexer storage."""
+    pass
+
+
+class IndexerStorageArgumentException(Exception):
+    """Argument passed to an IndexerStorage endpoint is invalid."""
+    pass
+
+
+class DuplicateId(IndexerStorageArgumentException):
+    """The same identifier is present more than once."""
+    pass
diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py
--- a/swh/indexer/storage/in_memory.py
+++ b/swh/indexer/storage/in_memory.py
@@ -10,8 +10,10 @@
 import operator
 import math
 import re
+from typing import Any, Dict, List
 
-from . import MAPPING_NAMES
+from . import MAPPING_NAMES, check_id_duplicates
+from .exc import IndexerStorageArgumentException
 
 SHA1_DIGEST_SIZE = 160
 
@@ -25,6 +27,12 @@
     }
 
 
+def check_id_types(data: List[Dict[str, Any]]):
+    """Checks all elements of the list have an 'id' whose type is 'bytes'."""
+    if not all(isinstance(item.get('id'), bytes) for item in data):
+        raise IndexerStorageArgumentException('identifiers must be bytes.')
+
+
 class SubStorage:
     """Implements common missing/get/add logic for each indexer type."""
     def __init__(self, tools):
@@ -91,7 +99,7 @@
             **limit** (int): Limit result
 
         Raises:
-            ValueError for limit to None
+            IndexerStorageArgumentException for limit to None
 
         Returns:
             a dict with keys:
@@ -101,7 +109,7 @@
 
         """
         if limit is None:
-            raise ValueError('Development error: limit should not be None')
+            raise IndexerStorageArgumentException('limit should not be None')
         from_index = bisect.bisect_left(self._sorted_ids, start)
         to_index = bisect.bisect_right(self._sorted_ids, end, lo=from_index)
         if to_index - from_index >= limit:
@@ -131,9 +139,7 @@
 
         """
         data = list(data)
-        if len({x['id'] for x in data}) < len(data):
-            # For "exception-compatibility" with the pgsql backend
-            raise ValueError('The same id is present more than once.')
+        check_id_duplicates(data)
         for item in data:
             item = item.copy()
             tool_id = item.pop('indexer_configuration_id')
@@ -211,8 +217,7 @@
             start, end, indexer_configuration_id, limit)
 
     def content_mimetype_add(self, mimetypes, conflict_update=False):
-        if not all(isinstance(x['id'], bytes) for x in mimetypes):
-            raise TypeError('identifiers must be bytes.')
+        check_id_types(mimetypes)
         self._mimetypes.add(mimetypes, conflict_update)
 
     def content_mimetype_get(self, ids):
@@ -225,8 +230,7 @@
         yield from self._languages.get(ids)
 
     def content_language_add(self, languages, conflict_update=False):
-        if not all(isinstance(x['id'], bytes) for x in languages):
-            raise TypeError('identifiers must be bytes.')
+        check_id_types(languages)
         self._languages.add(languages, conflict_update)
 
     def content_ctags_missing(self, ctags):
@@ -242,8 +246,7 @@
                 }
 
     def content_ctags_add(self, ctags, conflict_update=False):
-        if not all(isinstance(x['id'], bytes) for x in ctags):
-            raise TypeError('identifiers must be bytes.')
+        check_id_types(ctags)
         self._content_ctags.add_merge(ctags, conflict_update, 'ctags')
 
     def content_ctags_search(self, expression,
@@ -277,8 +280,7 @@
             yield {id_: facts}
 
     def content_fossology_license_add(self, licenses, conflict_update=False):
-        if not all(isinstance(x['id'], bytes) for x in licenses):
-            raise TypeError('identifiers must be bytes.')
+        check_id_types(licenses)
         self._licenses.add_merge(licenses, conflict_update, 'licenses')
 
     def content_fossology_license_get_range(
@@ -293,8 +295,7 @@
         yield from self._content_metadata.get(ids)
 
     def content_metadata_add(self, metadata, conflict_update=False):
-        if not all(isinstance(x['id'], bytes) for x in metadata):
-            raise TypeError('identifiers must be bytes.')
+        check_id_types(metadata)
         self._content_metadata.add(metadata, conflict_update)
 
     def revision_intrinsic_metadata_missing(self, metadata):
@@ -304,8 +305,7 @@
         yield from self._revision_intrinsic_metadata.get(ids)
 
     def revision_intrinsic_metadata_add(self, metadata, conflict_update=False):
-        if not all(isinstance(x['id'], bytes) for x in metadata):
-            raise TypeError('identifiers must be bytes.')
+        check_id_types(metadata)
         self._revision_intrinsic_metadata.add(metadata, conflict_update)
 
     def revision_intrinsic_metadata_delete(self, entries):
diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py
--- a/swh/indexer/tests/storage/test_storage.py
+++ b/swh/indexer/tests/storage/test_storage.py
@@ -10,6 +10,9 @@
 
 from swh.model.hashutil import hash_to_bytes
 
+from swh.indexer.storage.exc import (
+    IndexerStorageArgumentException, DuplicateId,
+)
 from swh.indexer.storage.interface import IndexerStorageInterface
 
 
@@ -253,7 +256,7 @@
         # when
         endpoint(storage, etype, 'add')([data_rev1])
 
-        with pytest.raises(ValueError):
+        with pytest.raises(DuplicateId):
             endpoint(storage, etype, 'add')(
                 [data_rev2, data_rev2],
                 conflict_update=True)
@@ -317,13 +320,12 @@
             self, swh_indexer_storage):
         """mimetype_get_range call with wrong limit input should fail"""
         storage = swh_indexer_storage
-        with pytest.raises(ValueError) as e:
+        with pytest.raises(IndexerStorageArgumentException) as e:
             storage.content_mimetype_get_range(
                 start=None, end=None, indexer_configuration_id=None,
                 limit=None)
 
-        assert e.value.args == (
-            'Development error: limit should not be None',)
+        assert e.value.args == ('limit should not be None',)
 
     def test_generate_content_mimetype_get_range_no_limit(
             self, swh_indexer_storage_with_data):
@@ -925,13 +927,12 @@
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         """license_get_range call with wrong limit input should fail"""
-        with pytest.raises(ValueError) as e:
+        with pytest.raises(IndexerStorageArgumentException) as e:
             storage.content_fossology_license_get_range(
                 start=None, end=None, indexer_configuration_id=None,
                 limit=None)
 
-        assert e.value.args == (
-            'Development error: limit should not be None',)
+        assert e.value.args == ('limit should not be None',)
 
     def test_generate_content_fossology_license_get_range_no_limit(
             self, swh_indexer_storage_with_data):
@@ -1424,7 +1425,7 @@
         # when
         storage.revision_intrinsic_metadata_add([metadata_rev])
 
-        with pytest.raises(ValueError):
+        with pytest.raises(DuplicateId):
             storage.origin_intrinsic_metadata_add([
                 metadata_origin, metadata_origin])