diff --git a/sql/upgrades/172.sql b/sql/upgrades/172.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/172.sql
@@ -0,0 +1,25 @@
+-- SWH DB schema upgrade
+-- from_version: 171
+-- to_version: 172
+-- description: add raw_extrinsic_metadata.id
+
+insert into dbversion(version, release, description)
+  values(172, now(), 'Work In Progress');
+-- 1. restart swh-storage, so that it stops relying on
+--    raw_extrinsic_metadata_content_authority_date_fetcher being a unique index
+
+-- 2. rename old index
+
+alter index raw_extrinsic_metadata_content_authority_date_fetcher
+  rename to raw_extrinsic_metadata_content_authority_date_fetcher_unique;
+
+-- 3. create new (non-unique) index (excluding fetcher, because it was only needed
+--    for uniqueness and not for accesses
+
+create index concurrently raw_extrinsic_metadata_content_authority_date
+  on raw_extrinsic_metadata(target, authority_id, discovery_date);
+
+-- 4. remove old index
+
+drop index raw_extrinsic_metadata_content_authority_date_fetcher_unique;
+
diff --git a/swh/storage/cassandra/cql.py b/swh/storage/cassandra/cql.py
--- a/swh/storage/cassandra/cql.py
+++ b/swh/storage/cassandra/cql.py
@@ -1016,16 +1016,15 @@
     @_prepared_select_statement(
         RawExtrinsicMetadataRow,
         "WHERE target=? AND authority_type=? AND authority_url=? "
-        "AND (discovery_date, fetcher_name, fetcher_version) > (?, ?, ?)",
+        "AND (discovery_date, id) > (?, ?)",
     )
-    def raw_extrinsic_metadata_get_after_date_and_fetcher(
+    def raw_extrinsic_metadata_get_after_date_and_id(
         self,
         target: str,
         authority_type: str,
         authority_url: str,
         after_date: datetime.datetime,
-        after_fetcher_name: str,
-        after_fetcher_version: str,
+        after_id: bytes,
         *,
         statement,
     ) -> Iterable[RawExtrinsicMetadataRow]:
@@ -1033,14 +1032,7 @@
             RawExtrinsicMetadataRow.from_dict,
             self._execute_with_retries(
                 statement,
-                [
-                    target,
-                    authority_type,
-                    authority_url,
-                    after_date,
-                    after_fetcher_name,
-                    after_fetcher_version,
-                ],
+                [target, authority_type, authority_url, after_date, after_id,],
             ),
         )
 
diff --git a/swh/storage/cassandra/model.py b/swh/storage/cassandra/model.py
--- a/swh/storage/cassandra/model.py
+++ b/swh/storage/cassandra/model.py
@@ -246,10 +246,11 @@
         "authority_type",
         "authority_url",
         "discovery_date",
-        "fetcher_name",
-        "fetcher_version",
+        "id",
     )
 
+    id: bytes
+
     type: str
     target: str
 
diff --git a/swh/storage/cassandra/schema.py b/swh/storage/cassandra/schema.py
--- a/swh/storage/cassandra/schema.py
+++ b/swh/storage/cassandra/schema.py
@@ -188,6 +188,8 @@
 );""",
     """
 CREATE TABLE IF NOT EXISTS raw_extrinsic_metadata (
+    id              blob,
+
     type            text,
     target          text,
 
@@ -211,8 +213,33 @@
     path            blob,
     directory       text,
 
-    PRIMARY KEY ((target), authority_type, authority_url, discovery_date,
-                           fetcher_name, fetcher_version)
+    PRIMARY KEY ((target), authority_type, authority_url, discovery_date, id)
+
+    -- An explanation is in order for this primary key:
+    --
+    -- Intuitively, the primary key should only be 'id', because two metadata
+    -- entries are the same iff the id is the same; and 'id' is used for
+    -- deduplication.
+    --
+    -- However, we also want to query by
+    -- (target, authority_type, authority_url, discovery_date)
+    -- The naive solution to this would be an extra table, to use as index;
+    -- but it means 1. extra code to keep them in sync 2. overhead when writing
+    -- 3. overhead + random reads (instead of linear) when reading.
+    --
+    -- Therefore, we use a single table for both, by adding the column
+    -- we want to query with before the id.
+    -- It solves both a) the query/order issues and b) the uniqueness issue because:
+    --
+    -- a) adding the id at the end of the primary key does not change the rows' order:
+    --    for two different rows, id1 != id2, so
+    --    (target1, ..., date1) < (target2, ..., date2)
+    --    <=> (target1, ..., date1, id1) < (target2, ..., date2, id2)
+    --
+    -- b) the id is a hash of all the columns, so:
+    --    rows are the same
+    --    <=> id1 == id2
+    --    <=> (target1, ..., date1, id1) == (target2, ..., date2, id2)
 );""",
     """
 CREATE TABLE IF NOT EXISTS object_count (
diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -1196,6 +1196,7 @@
 
             try:
                 row = RawExtrinsicMetadataRow(
+                    id=metadata_entry.id,
                     type=metadata_entry.target.object_type.name.lower(),
                     target=str(metadata_entry.target),
                     authority_type=metadata_entry.authority.type.value,
@@ -1226,20 +1227,13 @@
         limit: int = 1000,
     ) -> PagedResult[RawExtrinsicMetadata]:
         if page_token is not None:
-            (after_date, after_fetcher_name, after_fetcher_url) = msgpack_loads(
-                base64.b64decode(page_token)
-            )
+            (after_date, id_) = msgpack_loads(base64.b64decode(page_token))
             if after and after_date < after:
                 raise StorageArgumentException(
                     "page_token is inconsistent with the value of 'after'."
                 )
-            entries = self._cql_runner.raw_extrinsic_metadata_get_after_date_and_fetcher(  # noqa
-                str(target),
-                authority.type.value,
-                authority.url,
-                after_date,
-                after_fetcher_name,
-                after_fetcher_url,
+            entries = self._cql_runner.raw_extrinsic_metadata_get_after_date_and_id(
+                str(target), authority.type.value, authority.url, after_date, id_,
             )
         elif after is not None:
             entries = self._cql_runner.raw_extrinsic_metadata_get_after_date(
@@ -1287,13 +1281,7 @@
             assert len(results) == limit
             last_result = results[-1]
             next_page_token: Optional[str] = base64.b64encode(
-                msgpack_dumps(
-                    (
-                        last_result.discovery_date,
-                        last_result.fetcher.name,
-                        last_result.fetcher.version,
-                    )
-                )
+                msgpack_dumps((last_result.discovery_date, last_result.id,))
             ).decode()
         else:
             next_page_token = None
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -599,23 +599,22 @@
         )
         return (m for m in metadata if m.discovery_date > after)
 
-    def raw_extrinsic_metadata_get_after_date_and_fetcher(
+    def raw_extrinsic_metadata_get_after_date_and_id(
         self,
         target: str,
         authority_type: str,
         authority_url: str,
         after_date: datetime.datetime,
-        after_fetcher_name: str,
-        after_fetcher_version: str,
+        after_id: bytes,
     ) -> Iterable[RawExtrinsicMetadataRow]:
         metadata = self._raw_extrinsic_metadata.get_from_partition_key((target,))
-        after_tuple = (after_date, after_fetcher_name, after_fetcher_version)
+        after_tuple = (after_date, after_id)
         return (
             m
             for m in metadata
             if m.authority_type == authority_type
             and m.authority_url == authority_url
-            and (m.discovery_date, m.fetcher_name, m.fetcher_version) > after_tuple
+            and (m.discovery_date, m.id) > after_tuple
         )
 
     def raw_extrinsic_metadata_get(
diff --git a/swh/storage/postgresql/db.py b/swh/storage/postgresql/db.py
--- a/swh/storage/postgresql/db.py
+++ b/swh/storage/postgresql/db.py
@@ -29,7 +29,7 @@
 
     """
 
-    current_version = 171
+    current_version = 172
 
     def mktemp_dir_entry(self, entry_type, cur=None):
         self._cursor(cur).execute(
@@ -1226,7 +1226,7 @@
         INSERT INTO raw_extrinsic_metadata
             ({', '.join(_raw_extrinsic_metadata_insert_cols)})
         VALUES ({', '.join('%s' for _ in _raw_extrinsic_metadata_insert_cols)})
-        ON CONFLICT (target, authority_id, discovery_date, fetcher_id)
+        ON CONFLICT (id)
         DO NOTHING
     """
 
diff --git a/swh/storage/sql/30-schema.sql b/swh/storage/sql/30-schema.sql
--- a/swh/storage/sql/30-schema.sql
+++ b/swh/storage/sql/30-schema.sql
@@ -17,7 +17,7 @@
 
 -- latest schema version
 insert into dbversion(version, release, description)
-      values(171, now(), 'Work In Progress');
+      values(172, now(), 'Work In Progress');
 
 -- a SHA1 checksum
 create domain sha1 as bytea check (length(value) = 20);
diff --git a/swh/storage/sql/60-indexes.sql b/swh/storage/sql/60-indexes.sql
--- a/swh/storage/sql/60-indexes.sql
+++ b/swh/storage/sql/60-indexes.sql
@@ -268,7 +268,7 @@
 create unique index concurrently raw_extrinsic_metadata_pkey on raw_extrinsic_metadata(id);
 alter table raw_extrinsic_metadata add primary key using index raw_extrinsic_metadata_pkey;
 
-create unique index concurrently raw_extrinsic_metadata_content_authority_date_fetcher on raw_extrinsic_metadata(target, authority_id, discovery_date, fetcher_id);
+create index concurrently raw_extrinsic_metadata_content_authority_date on raw_extrinsic_metadata(target, authority_id, discovery_date);
 
 \if :dbflavor_default
   alter table raw_extrinsic_metadata add constraint raw_extrinsic_metadata_authority_fkey foreign key (authority_id) references metadata_authority(id) not valid;
diff --git a/swh/storage/tests/storage_tests.py b/swh/storage/tests/storage_tests.py
--- a/swh/storage/tests/storage_tests.py
+++ b/swh/storage/tests/storage_tests.py
@@ -3771,37 +3771,28 @@
             assert obj in actual_objects
 
     def test_content_metadata_add_duplicate(self, swh_storage, sample_data):
-        """Duplicates should be silently updated."""
+        """Duplicates should be silently ignored."""
         content = sample_data.content
         fetcher = sample_data.metadata_fetcher
         authority = sample_data.metadata_authority
         content_metadata, content_metadata2 = sample_data.content_metadata[:2]
 
-        new_content_metadata2 = RawExtrinsicMetadata.from_dict(
-            {
-                **remove_keys(content_metadata2.to_dict(), ("id",)),  # recompute id
-                "format": "new-format",
-                "metadata": b"new-metadata",
-            }
-        )
-
         swh_storage.metadata_fetcher_add([fetcher])
         swh_storage.metadata_authority_add([authority])
 
         swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2])
-        swh_storage.raw_extrinsic_metadata_add([new_content_metadata2])
+        swh_storage.raw_extrinsic_metadata_add([content_metadata2, content_metadata])
 
         result = swh_storage.raw_extrinsic_metadata_get(
             content.swhid().to_extended(), authority
         )
         assert result.next_page_token is None
 
-        expected_results1 = (content_metadata, new_content_metadata2)
-        expected_results2 = (content_metadata, content_metadata2)
+        expected_results = (content_metadata, content_metadata2)
 
-        assert tuple(sorted(result.results, key=lambda x: x.discovery_date,)) in (
-            expected_results1,  # cassandra
-            expected_results2,  # postgresql
+        assert (
+            tuple(sorted(result.results, key=lambda x: x.discovery_date,))
+            == expected_results
         )
 
     def test_content_metadata_get(self, swh_storage, sample_data):
@@ -3996,19 +3987,11 @@
         origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2]
         assert swh_storage.origin_add([origin]) == {"origin:add": 1}
 
-        new_origin_metadata2 = RawExtrinsicMetadata.from_dict(
-            {
-                **remove_keys(origin_metadata2.to_dict(), ("id",)),  # recompute id
-                "format": "new-format",
-                "metadata": b"new-metadata",
-            }
-        )
-
         swh_storage.metadata_fetcher_add([fetcher])
         swh_storage.metadata_authority_add([authority])
 
         swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2])
-        swh_storage.raw_extrinsic_metadata_add([new_origin_metadata2])
+        swh_storage.raw_extrinsic_metadata_add([origin_metadata2, origin_metadata])
 
         result = swh_storage.raw_extrinsic_metadata_get(
             Origin(origin.url).swhid(), authority
@@ -4016,12 +3999,11 @@
         assert result.next_page_token is None
 
         # which of the two behavior happens is backend-specific.
-        expected_results1 = (origin_metadata, new_origin_metadata2)
-        expected_results2 = (origin_metadata, origin_metadata2)
+        expected_results = (origin_metadata, origin_metadata2)
 
-        assert tuple(sorted(result.results, key=lambda x: x.discovery_date,)) in (
-            expected_results1,  # cassandra
-            expected_results2,  # postgresql
+        assert (
+            tuple(sorted(result.results, key=lambda x: x.discovery_date,))
+            == expected_results
         )
 
     def test_origin_metadata_get(self, swh_storage, sample_data):