diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py
--- a/swh/search/elasticsearch.py
+++ b/swh/search/elasticsearch.py
@@ -46,14 +46,12 @@
             index='origin',
             body={
                 'properties': {
+                    'sha1': {
+                        'type': 'keyword',
+                        'doc_values': True,
+                    },
                     'url': {
                         'type': 'text',
-                        # TODO: consider removing fielddata when
-                        # swh-storage allows querying by hash, so the
-                        # full URL does not have to be stored in ES'
-                        # memory. See:
-                        # https://www.elastic.co/guide/en/elasticsearch/reference/current/fielddata.html#before-enabling-fielddata
-                        'fielddata': True,
                         # To split URLs into token on any character
                         # that is not alphanumerical
                         'analyzer': 'simple',
@@ -83,15 +81,20 @@
     @remote_api_endpoint('origin/update')
     def origin_update(self, documents: Iterable[dict]) -> None:
         documents = map(_sanitize_origin, documents)
+        documents_with_sha1 = ((origin_identifier(document), document)
+                               for document in documents)
         actions = [
             {
                 '_op_type': 'update',
-                '_id': origin_identifier(document),
+                '_id': sha1,
                 '_index': 'origin',
-                'doc': document,
+                'doc': {
+                    **document,
+                    'sha1': sha1,
+                },
                 'doc_as_upsert': True,
             }
-            for document in documents
+            for (sha1, document) in documents_with_sha1
         ]
         # TODO: make refresh='wait_for' configurable (we don't need it
         # in production, it will probably be a performance issue)
@@ -180,7 +183,7 @@
             'size': count,
             'sort': [
                 {'_score': 'desc'},
-                {'_id': 'asc'},
+                {'sha1': 'asc'},
             ]
         }
         if page_token:
@@ -189,7 +192,7 @@
                 base64.b64decode(page_token))
             body['search_after'] = \
                 [page_token_content[b'score'],
-                 page_token_content[b'id'].decode('ascii')]
+                 page_token_content[b'sha1'].decode('ascii')]
 
         res = self._backend.search(
             index='origin',
@@ -203,7 +206,7 @@
             last_hit = hits[-1]
             next_page_token_content = {
                 b'score': last_hit['_score'],
-                b'id': last_hit['_id'],
+                b'sha1': last_hit['_source']['sha1'],
             }
             next_page_token = base64.b64encode(msgpack.dumps(
                 next_page_token_content))  # type: Optional[bytes]