diff --git a/PKG-INFO b/PKG-INFO
index 922684c..22a5020 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,52 +1,52 @@
 Metadata-Version: 2.1
 Name: swh.search
-Version: 0.7.0
+Version: 0.7.1
 Summary: Software Heritage search service
 Home-page: https://forge.softwareheritage.org/diffusion/DSEA
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-search
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-search/
 Description: swh-search
         ==========
         
         Search service for the Software Heritage archive.
         
         It is similar to swh-storage in what it contains,
         but provides different ways to query it: while swh-storage is mostly
         a key-value store that returns an object from a primary key,
         swh-search is focused on reverse indices, to allow finding objects that
         match some criteria; for example full-text search.
         
         Currently uses ElasticSearch, and provides only origin search (by URL and metadata)
         
         # Dependencies
         
         Python tests for this module include tests that cannot be run without a local
         ElasticSearch instance, so you need the ElasticSearch server executable on your
         machine (no need to have a running ElasticSearch server).
         
         ## Debian-like host
         
         The elasticsearch package is required. As it's not part of debian-stable,
         [another debian repository is required to be
         configured](https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html#deb-repo)
         
         ## Non Debian-like host
         
         The tests expect:
         - `/usr/share/elasticsearch/jdk/bin/java` to exist.
         - `org.elasticsearch.bootstrap.Elasticsearch` to be in java's classpath.
         
 Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 3 - Alpha
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: testing
diff --git a/debian/changelog b/debian/changelog
index 1ff8ce4..5c22961 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,210 +1,213 @@
-swh-search (0.7.0-1~swh1~bpo10+1) buster-swh; urgency=medium
+swh-search (0.7.1-1~swh1) unstable-swh; urgency=medium
 
-  * Rebuild for buster-swh
+  * New upstream release 0.7.1     - (tagged by Vincent SELLIER
+    <vincent.sellier@softwareheritage.org> on 2021-03-04 15:59:28 +0100)
+  * Upstream changes:     - v0.7.1     - Changelog:     - * Allow to
+    instantiate the service with default indexes configuration
 
- -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 04 Mar 2021 11:26:16 +0000
+ -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 04 Mar 2021 15:06:34 +0000
 
 swh-search (0.7.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.7.0     - (tagged by Vincent SELLIER
     <vincent.sellier@softwareheritage.org> on 2021-03-04 12:09:12 +0100)
   * Upstream changes:     - v0.7.0     - Changelog:     - * Ensure the
     elasticsearch indexes are initialized before the first request     -
     * Use elasticsearch aliases to simplify maintenance operations     -
     * search.cli: Drop unused and untested rpc-serve cli entrypoint     -
     * api.wsgi: Drop unused wsgi module     - * Add missing server tests
     - * Add typing to origin_update's argument and origin_search's
     return
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 04 Mar 2021 11:19:29 +0000
 
 swh-search (0.6.1-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.6.1     - (tagged by Antoine Lambert
     <antoine.lambert@inria.fr> on 2021-02-18 18:55:56 +0100)
   * Upstream changes:     - version 0.6.1
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 18 Feb 2021 18:00:51 +0000
 
 swh-search (0.6.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.6.0     - (tagged by Antoine Lambert
     <antoine.lambert@inria.fr> on 2021-02-18 15:28:07 +0100)
   * Upstream changes:     - version 0.6.0
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 18 Feb 2021 14:31:07 +0000
 
 swh-search (0.5.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.5.0     - (tagged by Vincent SELLIER
     <vincent.sellier@softwareheritage.org> on 2021-02-18 11:20:43 +0100)
   * Upstream changes:     - v0.5.0     - Add monitoring metrics
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 18 Feb 2021 10:25:39 +0000
 
 swh-search (0.4.2-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.4.2     - (tagged by Antoine Lambert
     <antoine.lambert@inria.fr> on 2021-02-17 11:09:21 +0100)
   * Upstream changes:     - version 0.4.2
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 17 Feb 2021 10:14:16 +0000
 
 swh-search (0.4.1-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.4.1     - (tagged by Vincent SELLIER
     <vincent.sellier@softwareheritage.org> on 2021-01-07 16:15:23 +0100)
   * Upstream changes:     - v0.4.1
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 07 Jan 2021 15:18:24 +0000
 
 swh-search (0.4.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.4.0     - (tagged by Vincent SELLIER
     <vincent.sellier@softwareheritage.org> on 2020-12-23 16:37:18 +0100)
   * Upstream changes:     - Support an index name prefix
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 23 Dec 2020 15:41:09 +0000
 
 swh-search (0.3.5-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.3.5     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2020-12-22 17:32:26 +0100)
   * Upstream changes:     - v0.3.5     - * Write some basic
     documentation to describe what swh-search is.     - * Add more
     comments in elasticsearch.py
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 22 Dec 2020 16:38:29 +0000
 
 swh-search (0.3.4-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.3.4     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-12-17 12:13:49
     +0100)
   * Upstream changes:     - v0.3.4     - search.journal_client: Actually
     filter on full origin_visit_status
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 17 Dec 2020 11:16:32 +0000
 
 swh-search (0.3.3-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.3.3     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-12-11 15:20:01
     +0100)
   * Upstream changes:     - v0.3.3     - Use cross-field search.     -
     Normalize Codemeta documents by expanding them.     - Add test for
     long descriptions.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 11 Dec 2020 14:22:59 +0000
 
 swh-search (0.3.2-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.3.2     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-12-10 09:49:35
     +0100)
   * Upstream changes:     - v0.3.2     - search.journal_client: Fix key
     error     - test_journal_client: Migrate to pytest
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 10 Dec 2020 08:54:53 +0000
 
 swh-search (0.3.1-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.3.1     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-12-09 18:21:33
     +0100)
   * Upstream changes:     - v0.3.1     - Allow configuration through cli
     or config file
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 09 Dec 2020 18:53:39 +0000
 
 swh-search (0.3.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.3.0     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-12-08 11:30:33
     +0100)
   * Upstream changes:     - v0.3.0     - cli: Subscribe journal client
     to origin_intrinsic_metadata topic     - cli: Subscribe journal
     client to origin_visit_status     - cli: Allow topic prefix
     declaration through cli or configuration     - cli: Allow object-
     type declaration through cli or configuration     - tox.ini: pin
     black to the pre-commit version (19.10b0) to avoid flip-flops     -
     Run isort after the CLI import changes
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 08 Dec 2020 10:33:30 +0000
 
 swh-search (0.2.3-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.2.3     - (tagged by David Douard
     <david.douard@sdfa3.org> on 2020-09-25 12:51:11 +0200)
   * Upstream changes:     - v0.2.3
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 25 Sep 2020 10:53:12 +0000
 
 swh-search (0.2.2-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.2.2     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-08-03 11:58:53
     +0200)
   * Upstream changes:     - v0.2.2     - Fix test_cli.invoke for old
     PyYAML versions (such as 3.13, in Debian 10).
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Mon, 03 Aug 2020 10:00:05 +0000
 
 swh-search (0.2.1-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.2.1     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-08-03 10:59:31
     +0200)
   * Upstream changes:     - v0.2.1     - setup.py: Migrate from
     vcversioner to setuptools-scm
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Mon, 03 Aug 2020 09:00:39 +0000
 
 swh-search (0.2.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.2.0     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-08-03 10:40:39
     +0200)
   * Upstream changes:     - v0.2.0     - swh.search: Define an interface
     for search backends and use it     - swh.search.get_search: Simplify
     instantiation
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Mon, 03 Aug 2020 08:42:45 +0000
 
 swh-search (0.1.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.1.0     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-07-31 14:05:22
     +0200)
   * Upstream changes:     - v0.1.0     - Type origin_search(...) ->
     PagedResult[Dict]     - README: Update necessary dependencies for
     test purposes     - Fixes on journal updates     - Blackify strings
     - setup: Update the minimum required runtime python3 version
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 31 Jul 2020 12:10:22 +0000
 
 swh-search (0.0.4-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.4     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2020-01-23 15:00:50
     +0100)
   * Upstream changes:     - v0.0.4 docs: Remove swh-py-template label -
     Only return results where all terms match. - Don't use
     refresh='wait_for' when updating origins. - Add a 'sha1' field to
     origin documents, used for sorting. - Add a pre-commit config file -
     Migrate tox.ini to extras = xxx instead of deps = .[testing] - De-
     specify testenv:py3 - Include all requirements in MANIFEST.in
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 23 Jan 2020 14:04:17 +0000
 
 swh-search (0.0.3-1~swh2) unstable-swh; urgency=medium
 
   * Filter out swh/__init__.py from package
 
  -- Nicolas Dandrimont <olasd@debian.org>  Tue, 14 Jan 2020 16:38:23 +0100
 
 swh-search (0.0.3-1~swh1) unstable-swh; urgency=medium
 
   * Initial packaging
 
  -- Nicolas Dandrimont <olasd@debian.org>  Mon, 13 Jan 2020 16:59:11 +0100
diff --git a/swh.search.egg-info/PKG-INFO b/swh.search.egg-info/PKG-INFO
index 922684c..22a5020 100644
--- a/swh.search.egg-info/PKG-INFO
+++ b/swh.search.egg-info/PKG-INFO
@@ -1,52 +1,52 @@
 Metadata-Version: 2.1
 Name: swh.search
-Version: 0.7.0
+Version: 0.7.1
 Summary: Software Heritage search service
 Home-page: https://forge.softwareheritage.org/diffusion/DSEA
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-search
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-search/
 Description: swh-search
         ==========
         
         Search service for the Software Heritage archive.
         
         It is similar to swh-storage in what it contains,
         but provides different ways to query it: while swh-storage is mostly
         a key-value store that returns an object from a primary key,
         swh-search is focused on reverse indices, to allow finding objects that
         match some criteria; for example full-text search.
         
         Currently uses ElasticSearch, and provides only origin search (by URL and metadata)
         
         # Dependencies
         
         Python tests for this module include tests that cannot be run without a local
         ElasticSearch instance, so you need the ElasticSearch server executable on your
         machine (no need to have a running ElasticSearch server).
         
         ## Debian-like host
         
         The elasticsearch package is required. As it's not part of debian-stable,
         [another debian repository is required to be
         configured](https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html#deb-repo)
         
         ## Non Debian-like host
         
         The tests expect:
         - `/usr/share/elasticsearch/jdk/bin/java` to exist.
         - `org.elasticsearch.bootstrap.Elasticsearch` to be in java's classpath.
         
 Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 3 - Alpha
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: testing
diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py
index b90885d..842c837 100644
--- a/swh/search/elasticsearch.py
+++ b/swh/search/elasticsearch.py
@@ -1,312 +1,312 @@
 # Copyright (C) 2019-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import base64
 from typing import Any, Dict, Iterable, Iterator, List, Optional
 
 from elasticsearch import Elasticsearch, helpers
 import msgpack
 
 from swh.indexer import codemeta
 from swh.model import model
 from swh.model.identifiers import origin_identifier
 from swh.search.interface import MinimalOriginDict, OriginDict, PagedResult
 from swh.search.metrics import send_metric, timed
 
 INDEX_NAME_PARAM = "index"
 READ_ALIAS_PARAM = "read_alias"
 WRITE_ALIAS_PARAM = "write_alias"
 
 ORIGIN_DEFAULT_CONFIG = {
     INDEX_NAME_PARAM: "origin",
     READ_ALIAS_PARAM: "origin-read",
     WRITE_ALIAS_PARAM: "origin-write",
 }
 
 
 def _sanitize_origin(origin):
     origin = origin.copy()
 
     # Whitelist fields to be saved in Elasticsearch
     res = {"url": origin.pop("url")}
     for field_name in ("intrinsic_metadata", "has_visits", "visit_types"):
         if field_name in origin:
             res[field_name] = origin.pop(field_name)
 
     # Run the JSON-LD expansion algorithm
     # <https://www.w3.org/TR/json-ld-api/#expansion>
     # to normalize the Codemeta metadata.
     # This is required as Elasticsearch will needs each field to have a consistent
     # type across documents to be searchable; and non-expanded JSON-LD documents
     # can have various types in the same field. For example, all these are
     # equivalent in JSON-LD:
     # * {"author": "Jane Doe"}
     # * {"author": ["Jane Doe"]}
     # * {"author": {"@value": "Jane Doe"}}
     # * {"author": [{"@value": "Jane Doe"}]}
     # and JSON-LD expansion will convert them all to the last one.
     if "intrinsic_metadata" in res:
         res["intrinsic_metadata"] = codemeta.expand(res["intrinsic_metadata"])
 
     return res
 
 
 def token_encode(index_to_tokenize: Dict[bytes, Any]) -> str:
     """Tokenize as string an index page result from a search
 
     """
     page_token = base64.b64encode(msgpack.dumps(index_to_tokenize))
     return page_token.decode()
 
 
 def token_decode(page_token: str) -> Dict[bytes, Any]:
     """Read the page_token
 
     """
     return msgpack.loads(base64.b64decode(page_token.encode()), raw=True)
 
 
 class ElasticSearch:
-    def __init__(self, hosts: List[str], indexes: Dict[str, Dict[str, str]]):
+    def __init__(self, hosts: List[str], indexes: Dict[str, Dict[str, str]] = {}):
         self._backend = Elasticsearch(hosts=hosts)
 
         # Merge current configuration with default values
         origin_config = indexes.get("origin", {})
         self.origin_config = {**ORIGIN_DEFAULT_CONFIG, **origin_config}
 
     def _get_origin_index(self) -> str:
         return self.origin_config[INDEX_NAME_PARAM]
 
     def _get_origin_read_alias(self) -> str:
         return self.origin_config[READ_ALIAS_PARAM]
 
     def _get_origin_write_alias(self) -> str:
         return self.origin_config[WRITE_ALIAS_PARAM]
 
     @timed
     def check(self):
         return self._backend.ping()
 
     def deinitialize(self) -> None:
         """Removes all indices from the Elasticsearch backend"""
         self._backend.indices.delete(index="*")
 
     def initialize(self) -> None:
         """Declare Elasticsearch indices, aliases and mappings"""
 
         if not self._backend.indices.exists(index=self._get_origin_index()):
             self._backend.indices.create(index=self._get_origin_index())
 
         if not self._backend.indices.exists_alias(self._get_origin_read_alias()):
             self._backend.indices.put_alias(
                 index=self._get_origin_index(), name=self._get_origin_read_alias()
             )
 
         if not self._backend.indices.exists_alias(self._get_origin_write_alias()):
             self._backend.indices.put_alias(
                 index=self._get_origin_index(), name=self._get_origin_write_alias()
             )
 
         self._backend.indices.put_mapping(
             index=self._get_origin_index(),
             body={
                 "date_detection": False,
                 "properties": {
                     # sha1 of the URL; used as the document id
                     "sha1": {"type": "keyword", "doc_values": True,},
                     # Used both to search URLs, and as the result to return
                     # as a response to queries
                     "url": {
                         "type": "text",
                         # To split URLs into token on any character
                         # that is not alphanumerical
                         "analyzer": "simple",
                         # 2-gram and partial-3-gram search (ie. with the end of the
                         # third word potentially missing)
                         "fields": {
                             "as_you_type": {
                                 "type": "search_as_you_type",
                                 "analyzer": "simple",
                             }
                         },
                     },
                     "visit_types": {"type": "keyword"},
                     # used to filter out origins that were never visited
                     "has_visits": {"type": "boolean",},
                     "intrinsic_metadata": {
                         "type": "nested",
                         "properties": {
                             "@context": {
                                 # don't bother indexing tokens in these URIs, as the
                                 # are used as namespaces
                                 "type": "keyword",
                             }
                         },
                     },
                 },
             },
         )
 
     @timed
     def flush(self) -> None:
         self._backend.indices.refresh(index=self._get_origin_write_alias())
 
     @timed
     def origin_update(self, documents: Iterable[OriginDict]) -> None:
         write_index = self._get_origin_write_alias()
         documents = map(_sanitize_origin, documents)
         documents_with_sha1 = (
             (origin_identifier(document), document) for document in documents
         )
         # painless script that will be executed when updating an origin document
         update_script = """
         // backup current visit_types field value
         List visit_types = ctx._source.getOrDefault("visit_types", []);
 
         // update origin document with new field values
         ctx._source.putAll(params);
 
         // restore previous visit types after visit_types field overriding
         if (ctx._source.containsKey("visit_types")) {
             for (int i = 0; i < visit_types.length; ++i) {
                 if (!ctx._source.visit_types.contains(visit_types[i])) {
                     ctx._source.visit_types.add(visit_types[i]);
                 }
             }
         }
         """
 
         actions = [
             {
                 "_op_type": "update",
                 "_id": sha1,
                 "_index": write_index,
                 "scripted_upsert": True,
                 "upsert": {**document, "sha1": sha1,},
                 "script": {
                     "source": update_script,
                     "lang": "painless",
                     "params": document,
                 },
             }
             for (sha1, document) in documents_with_sha1
         ]
 
         indexed_count, errors = helpers.bulk(self._backend, actions, index=write_index)
         assert isinstance(errors, List)  # Make mypy happy
 
         send_metric("document:index", count=indexed_count, method_name="origin_update")
         send_metric(
             "document:index_error", count=len(errors), method_name="origin_update"
         )
 
     def origin_dump(self) -> Iterator[model.Origin]:
         results = helpers.scan(self._backend, index=self._get_origin_read_alias())
         for hit in results:
             yield self._backend.termvectors(
                 index=self._get_origin_read_alias(), id=hit["_id"], fields=["*"]
             )
 
     @timed
     def origin_search(
         self,
         *,
         url_pattern: Optional[str] = None,
         metadata_pattern: Optional[str] = None,
         with_visit: bool = False,
         visit_types: Optional[List[str]] = None,
         page_token: Optional[str] = None,
         limit: int = 50,
     ) -> PagedResult[MinimalOriginDict]:
         query_clauses: List[Dict[str, Any]] = []
 
         if url_pattern:
             query_clauses.append(
                 {
                     "multi_match": {
                         "query": url_pattern,
                         "type": "bool_prefix",
                         "operator": "and",
                         "fields": [
                             "url.as_you_type",
                             "url.as_you_type._2gram",
                             "url.as_you_type._3gram",
                         ],
                     }
                 }
             )
 
         if metadata_pattern:
             query_clauses.append(
                 {
                     "nested": {
                         "path": "intrinsic_metadata",
                         "query": {
                             "multi_match": {
                                 "query": metadata_pattern,
                                 # Makes it so that the "foo bar" query returns
                                 # documents which contain "foo" in a field and "bar"
                                 # in a different field
                                 "type": "cross_fields",
                                 # All keywords must be found in a document for it to
                                 # be considered a match.
                                 # TODO: allow missing keywords?
                                 "operator": "and",
                                 # Searches on all fields of the intrinsic_metadata dict,
                                 # recursively.
                                 "fields": ["intrinsic_metadata.*"],
                             }
                         },
                     }
                 }
             )
 
         if not query_clauses:
             raise ValueError(
                 "At least one of url_pattern and metadata_pattern must be provided."
             )
 
         if with_visit:
             query_clauses.append({"term": {"has_visits": True,}})
 
         if visit_types is not None:
             query_clauses.append({"terms": {"visit_types": visit_types}})
 
         body = {
             "query": {"bool": {"must": query_clauses,}},
             "sort": [{"_score": "desc"}, {"sha1": "asc"},],
         }
         if page_token:
             # TODO: use ElasticSearch's scroll API?
             page_token_content = token_decode(page_token)
             body["search_after"] = [
                 page_token_content[b"score"],
                 page_token_content[b"sha1"].decode("ascii"),
             ]
 
         res = self._backend.search(
             index=self._get_origin_read_alias(), body=body, size=limit
         )
 
         hits = res["hits"]["hits"]
 
         next_page_token: Optional[str] = None
 
         if len(hits) == limit:
             # There are more results after this page; return a pagination token
             # to get them in a future query
             last_hit = hits[-1]
             next_page_token_content = {
                 b"score": last_hit["_score"],
                 b"sha1": last_hit["_source"]["sha1"],
             }
             next_page_token = token_encode(next_page_token_content)
 
         assert len(hits) <= limit
 
         return PagedResult(
             results=[{"url": hit["_source"]["url"]} for hit in hits],
             next_page_token=next_page_token,
         )