diff --git a/.gitignore b/.gitignore
index d8e3b91..0c1c008 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,14 +1,18 @@
 *.pyc
 *.sw?
 *~
 .coverage
 .eggs/
 __pycache__
 *.egg-info/
 build/
 dist/
 version.txt
 .tox
 .mypy_cache/
 .hypothesis/
 .vscode/
+node_modules/
+static/
+*.wasm
+*.so
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index e69de29..b97d981 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -0,0 +1 @@
+Kumar Shivendu
diff --git a/Makefile.local b/Makefile.local
new file mode 100644
index 0000000..1d1fd9d
--- /dev/null
+++ b/Makefile.local
@@ -0,0 +1,31 @@
+YARN ?= yarn
+PYTHON ?= python3
+
+
+ts-install: package.json
+	$(PYTHON) setup.py ts_install
+
+ts-generate: ts-install query_language/grammar.js
+	$(PYTHON) setup.py ts_generate
+
+ts-dev: ts-install
+ifdef sanitize
+	$(YARN) dev | sed '5,$$s/[[0-9]\+, [0-9]\+]/ /g' | sed '5,$$s/ *- *//g';
+else
+	$(YARN) dev;
+endif
+
+ts-test: ts-install
+	$(YARN) test
+
+ts-repl: ts-generate
+	$(YARN) repl
+
+ts-build-so: ts-generate query_language/src/
+	$(PYTHON) setup.py ts_build_so
+
+ts-build-wasm: ts-generate query_language/src/
+	$(PYTHON) setup.py ts_build_wasm
+
+ts-build: ts-build-so ts-build-wasm
+	@echo 'Build completed'
diff --git a/PKG-INFO b/PKG-INFO
index 8d7a6a7..351e528 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,56 +1,91 @@
 Metadata-Version: 2.1
 Name: swh.search
-Version: 0.10.0
+Version: 0.11.0
 Summary: Software Heritage search service
 Home-page: https://forge.softwareheritage.org/diffusion/DSEA
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-search
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-search/
 Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 3 - Alpha
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: testing
 License-File: LICENSE
 License-File: AUTHORS
 
 swh-search
 ==========
 
 Search service for the Software Heritage archive.
 
 It is similar to swh-storage in what it contains,
 but provides different ways to query it: while swh-storage is mostly
 a key-value store that returns an object from a primary key,
 swh-search is focused on reverse indices, to allow finding objects that
 match some criteria; for example full-text search.
 
 Currently uses ElasticSearch, and provides only origin search (by URL and metadata)
 
-# Dependencies
+## Dependencies
 
-Python tests for this module include tests that cannot be run without a local
+- Python tests for this module include tests that cannot be run without a local
 ElasticSearch instance, so you need the ElasticSearch server executable on your
 machine (no need to have a running ElasticSearch server).
 
-## Debian-like host
+    - Debian-like host
 
-The elasticsearch package is required. As it's not part of debian-stable,
-[another debian repository is required to be
-configured](https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html#deb-repo)
+        The elasticsearch package is required. As it's not part of debian-stable,
+        [another debian repository is required to be
+        configured](https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html#deb-repo)
 
-## Non Debian-like host
+    - Non Debian-like host
+
+        The tests expect:
+        - `/usr/share/elasticsearch/jdk/bin/java` to exist.
+        - `org.elasticsearch.bootstrap.Elasticsearch` to be in java's classpath.
+- Emscripten is required for generating tree-sitter WASM module. The following commands need to be executed for the setup:
+    ```bash
+    cd /opt && git clone https://github.com/emscripten-core/emsdk.git && cd emsdk && \
+    ./emsdk install latest && ./emsdk activate latest
+    PATH="${PATH}:/opt/emsdk/upstream/emscripten"
+    ```
+
+    **Note:** If emsdk isn't found in the PATH, the tree-sitter cli automatically pulls `emscripten/emsdk` image from docker hub when `make ts-build-wasm` or `make ts-build` is used.
+
+
+## Make targets
+
+Below is the list of available make targets that can be executed from the root directory of swh-search in order to build and/or execute the swh-search under various configurations:
+
+* **ts-install**: Install node_modules and emscripten SDK required for TreeSitter
+
+* **ts-generate**: Generate parser files(C and JSON) from the grammar
+
+* **ts-repl**: Starts a web based playground for the TreeSitter grammar. It's the recommended way for developing TreeSitter grammar.
+
+* **ts-dev**: Parse the `query_language/sample_query` and print the corresponding syntax expression
+along with the start and end positions of all the nodes.
+
+* **ts-dev sanitize=1**: Same as **ts-dev** but without start and end position of the nodes.
+This format is expected by TreeSitter's native test command. `sanitize=1` cleans the output
+of **ts-dev** using `sed` to achieve the desired format.
+
+* **ts-test**: executes TreeSitter's native tests
+
+* **ts-build-so**: Generates `swh_ql.so` file from the previously generated parser using py-tree-sitter
+
+* **ts-build-so**: Generates `swh_ql.wasm` file from the previously generated parser using emscripten
+
+* **ts-build**: Executes both **ts-build-so** and **ts-build-so**
 
-The tests expect:
-- `/usr/share/elasticsearch/jdk/bin/java` to exist.
-- `org.elasticsearch.bootstrap.Elasticsearch` to be in java's classpath.
 
 
diff --git a/README.md b/README.md
index 4292fe2..71c17b6 100644
--- a/README.md
+++ b/README.md
@@ -1,30 +1,65 @@
 swh-search
 ==========
 
 Search service for the Software Heritage archive.
 
 It is similar to swh-storage in what it contains,
 but provides different ways to query it: while swh-storage is mostly
 a key-value store that returns an object from a primary key,
 swh-search is focused on reverse indices, to allow finding objects that
 match some criteria; for example full-text search.
 
 Currently uses ElasticSearch, and provides only origin search (by URL and metadata)
 
-# Dependencies
+## Dependencies
 
-Python tests for this module include tests that cannot be run without a local
+- Python tests for this module include tests that cannot be run without a local
 ElasticSearch instance, so you need the ElasticSearch server executable on your
 machine (no need to have a running ElasticSearch server).
 
-## Debian-like host
+    - Debian-like host
 
-The elasticsearch package is required. As it's not part of debian-stable,
-[another debian repository is required to be
-configured](https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html#deb-repo)
+        The elasticsearch package is required. As it's not part of debian-stable,
+        [another debian repository is required to be
+        configured](https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html#deb-repo)
 
-## Non Debian-like host
+    - Non Debian-like host
+
+        The tests expect:
+        - `/usr/share/elasticsearch/jdk/bin/java` to exist.
+        - `org.elasticsearch.bootstrap.Elasticsearch` to be in java's classpath.
+- Emscripten is required for generating tree-sitter WASM module. The following commands need to be executed for the setup:
+    ```bash
+    cd /opt && git clone https://github.com/emscripten-core/emsdk.git && cd emsdk && \
+    ./emsdk install latest && ./emsdk activate latest
+    PATH="${PATH}:/opt/emsdk/upstream/emscripten"
+    ```
+
+    **Note:** If emsdk isn't found in the PATH, the tree-sitter cli automatically pulls `emscripten/emsdk` image from docker hub when `make ts-build-wasm` or `make ts-build` is used.
+
+
+## Make targets
+
+Below is the list of available make targets that can be executed from the root directory of swh-search in order to build and/or execute the swh-search under various configurations:
+
+* **ts-install**: Install node_modules and emscripten SDK required for TreeSitter
+
+* **ts-generate**: Generate parser files(C and JSON) from the grammar
+
+* **ts-repl**: Starts a web based playground for the TreeSitter grammar. It's the recommended way for developing TreeSitter grammar.
+
+* **ts-dev**: Parse the `query_language/sample_query` and print the corresponding syntax expression
+along with the start and end positions of all the nodes.
+
+* **ts-dev sanitize=1**: Same as **ts-dev** but without start and end position of the nodes.
+This format is expected by TreeSitter's native test command. `sanitize=1` cleans the output
+of **ts-dev** using `sed` to achieve the desired format.
+
+* **ts-test**: executes TreeSitter's native tests
+
+* **ts-build-so**: Generates `swh_ql.so` file from the previously generated parser using py-tree-sitter
+
+* **ts-build-so**: Generates `swh_ql.wasm` file from the previously generated parser using emscripten
+
+* **ts-build**: Executes both **ts-build-so** and **ts-build-so**
 
-The tests expect:
-- `/usr/share/elasticsearch/jdk/bin/java` to exist.
-- `org.elasticsearch.bootstrap.Elasticsearch` to be in java's classpath.
diff --git a/docs/index.rst b/docs/index.rst
index d8c7a4e..f4c6087 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,36 +1,37 @@
 .. _swh-search:
 
 Software Heritage - Search service
 ==================================
 
 The SWH search service, or swh-search, stores a partial copy of
 the |swh| archive.
 It is similar to swh-storage in what it contains,
 but provides different ways to query it: while swh-storage is mostly
 a key-value store that returns an object from a primary key,
 swh-search is focused on reverse indices, to allow finding objects that
 match some criteria; for example full-text search.
 
 It is currently based on Elasticsearch, allowing full-text search on both
 URLs (using a ``search_as_you_type`` index) and on intrinsic metadata of
 their head revision in CodeMeta_ format.
 
 It is kept in sync with :ref:`the main SWH storage <swh-storage>` via
 :ref:`the SWH journal <swh-journal>`.
 
 .. _CodeMeta: https://codemeta.github.io/
 
 
 .. toctree::
    :maxdepth: 2
    :caption: Contents:
 
    cli
+   query-language
 
 Reference Documentation
 -----------------------
 
 .. toctree::
    :maxdepth: 2
 
    /apidoc/swh.search
diff --git a/docs/query-language.rst b/docs/query-language.rst
new file mode 100644
index 0000000..ed6623a
--- /dev/null
+++ b/docs/query-language.rst
@@ -0,0 +1,190 @@
+Search Query Language
+=====================
+
+
+Every query is composed of filters separated by ``and`` or ``or``.
+These filters have 3 components in the order : ``Name Operator Value``
+
+Some of the examples are :
+    * ``origin = django and language in [python] and visits >= 5``
+    * ``last_revision > 2020-01-01 and limit = 10``
+    * ``last_visit > 2021-01-01 or last_visit < 2020-01-01``
+    * ``visited = false and metadata = "kubernetes" or origin = "minikube"``
+    * ``keyword in ["orchestration", "kubectl"] and language in ["go", "rust"]``
+    * ``(origin = debian or visit_type = ["deb"]) and license in ["GPL-3"]``
+
+**Note**:
+    * Whitespaces are optional between the three components of a filter.
+    * The conjunction operators have left precedence. Therefore ``foo and bar and baz`` means ``(foo and bar) and baz``
+    * ``and`` has higher precedence than ``or``. Therefore ``foo or bar and baz`` means ``foo or (bar and baz)``
+    * Precedence can be overridden using parentheses: ``(`` and ``)``. For example, you can override the default precedence in the previous query as: ``(foo or bar) and baz``
+    * To actually search for ``and`` or ``or`` as strings, just put them within quotes. Example : ``metadata : "vcs history and metadata"``, or even just ``metadata : "and"`` to search for the string ``and`` in the metadata
+
+The filters have been classified based on the type of value that they expects.
+
+
+Pattern filters
+---------------
+Returns origins having the given keywords in their url or intrinsic metadata
+
+    * Name:
+        * ``origin``: Keywords from the origin url
+        * ``metadata``: Keywords from all the intrinsic metadata fields
+    * Operator: ``=``
+    * Value: String wrapped in quotation marks(``"`` or ``'``)
+
+**Note:** If a string has no whitespace then the quotation marks become optional.
+
+**Examples:**
+
+    * ``origin = https://github.com/Django/django``
+    * ``origin = kubernetes``
+    * ``origin = "github python"``
+    * ``metadata = orchestration``
+    * ``metadata = "javascript language"``
+
+Boolean filters
+---------------
+Returns origins having their boolean type values equal to given values
+
+    * Name: ``visited`` : Whether the origin has been visited
+    * Operator: ``=``
+    * Value: ``true`` or ``false``
+
+**Examples:**
+
+    * ``visited = true``
+    * ``visited = false``
+
+
+Numeric filters
+---------------
+Returns origins having their numeric type values in the given range
+
+    * Name: ``visits`` : Number of visits of an origin
+    * Operator: ``<`` ``<=`` ``=`` ``!=`` ``>`` ``>=``
+    * Value: Positive integer
+
+**Examples:**
+
+
+    * ``visits > 2``
+    * ``visits = 5``
+    * ``visits <= 10``
+
+
+Un-bounded List filters
+-----------------------
+
+Returns origins that satisfy the criteria based on a given list
+
+    * Name:
+        * ``language`` : Programming languages used
+        * ``license`` : License used
+        * ``keyword`` : keywords (often same as tags) or description (includes README) from the metadata
+    * Operator: ``in`` ``not in``
+    * Value: Array of strings
+
+**Note:**
+    * If a string has no whitespace then the quotation marks become optional.
+
+    * The ``keyword`` filter gives more priority to the keywords field of intrinsic metadata than the description field. So origins having the queried term in their intrinsic metadata keyword will appear first.
+
+
+**Examples:**
+
+    * ``language in [python, js]``
+    * ``license in ["GPL 3.0 or later", MIT]``
+    * ``keyword in ["Software Heritage", swh]``
+
+
+Bounded List filters
+--------------------
+
+Returns origins that satisfy the criteria based on a list of fixed options
+
+    **visit_type**
+
+    * Name: ``visit_type`` : Returns only origins with at least one of the specified visit types
+    * Operator: ``=``
+    * Value: Array of the following values
+
+        ``any``
+        ``cran``
+        ``deb``
+        ``deposit``
+        ``ftp``
+        ``hg``
+        ``git``
+        ``nixguix``
+        ``npm``
+        ``pypi``
+        ``svn``
+        ``tar``
+
+    **sort_by**
+
+    * Name: ``sort_by`` : Sorts origins based on the given list of origin attributes
+    * Operator: ``=``
+    * Value: Array of the following values
+
+        ``visits``
+        ``last_visit``
+        ``last_eventful_visit``
+        ``last_revision``
+        ``last_release``
+        ``created``
+        ``modified``
+        ``published``
+
+**Examples:**
+
+
+    * ``visit_type = [svn, npm]``
+    * ``visit_type = [nixguix, "ftp"]``
+    * ``sort_by = ["last_visit", created]``
+    * ``sort_by = [visits, modified]``
+
+Date filters
+------------
+
+Returns origins having their date type values in the given range
+
+    * Name:
+
+            * ``last_visit`` : Latest visit date
+            * ``last_eventful_visit`` : Latest visit date where a new snapshot was detected
+            * ``last_revision`` : Latest commit date
+            * ``last_release`` : Latest release date
+            * ``created`` Creation date
+            * ``modified`` Modification date
+            * ``published`` Published date
+
+    * Operator: ``<`` ``<=`` ``=`` ``!=`` ``>`` ``>=``
+    * Value: Date in ``Standard ISO`` format
+
+    **Note:** The last three date filters are based on metadata that has to be manually entered
+    by the repository authors. So they might not be correct or up-to-date.
+
+**Examples:**
+
+    * ``last_visit > 2001-01-01 and last_visit < 2101-01-01``
+    * ``last_revision = "2000-01-01 18:35Z"``
+    * ``last_release != "2021-07-17T18:35:00Z"``
+    * ``created <= "2021-07-17 18:35"``
+
+Limit filter
+------------
+
+Limits the number of results to at most N
+
+    * Name: ``limit``
+    * Operator: ``=``
+    * Value: Positive Integer
+
+**Note:** The default value of the limit is 50
+
+**Examples:**
+
+    * ``limit = 1``
+    * ``limit = 15``
diff --git a/mypy.ini b/mypy.ini
index 5c756c5..02b0e9f 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -1,21 +1,24 @@
 [mypy]
 namespace_packages = True
 warn_unused_ignores = True
 
 
 # 3rd party libraries without stubs (yet)
 
 [mypy-confluent_kafka.*]
 ignore_missing_imports = True
 
 [mypy-elasticsearch.*]
 ignore_missing_imports = True
 
 [mypy-msgpack.*]
 ignore_missing_imports = True
 
 [mypy-pkg_resources.*]
 ignore_missing_imports = True
 
 [mypy-pytest.*]
 ignore_missing_imports = True
+
+[mypy-tree_sitter.*]
+ignore_missing_imports = True
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..71c6ebb
--- /dev/null
+++ b/package.json
@@ -0,0 +1,36 @@
+{
+  "name": "swh-search-query-language-parser",
+  "version": "1.0.0",
+  "description": "Parser for Software Heritage archive search query language",
+  "scripts": {
+    "generate": "cd query_language && tree-sitter generate --no-bindings && echo 'Generated parser files '",
+    "dev": "yarn generate && cd query_language && tree-sitter parse sample_query",
+    "test": "yarn generate && cd query_language && tree-sitter test",
+    "build-so": "yarn generate && cd query_language && python3 build.py",
+    "build-wasm": "yarn generate && cd query_language && tree-sitter build-wasm . && mv tree-sitter-swh_search_ql.wasm swh_ql.wasm",
+    "build": "yarn build-so && yarn build-wasm",
+    "repl": "yarn generate && cd query_language && tree-sitter build-wasm && tree-sitter playground"
+  },
+  "repository": {
+    "type": "git",
+    "url": "https://forge.softwareheritage.org/source/swh-search.git"
+  },
+  "keywords": [
+    "swh",
+    "Software Heritage",
+    "treesitter",
+    "parser",
+    "custom",
+    "search",
+    "query",
+    "language"
+  ],
+  "author": "The Software Heritage developers",
+  "license": "GPL-3.0-only",
+  "dependencies": {
+    "nan": "^2.14.2"
+  },
+  "devDependencies": {
+    "tree-sitter-cli": "^0.20.0"
+  }
+}
diff --git a/pyproject.toml b/pyproject.toml
index 69b8f4d..4785edb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,11 +1,14 @@
 [tool.black]
 target-version = ['py37']
 
 [tool.isort]
 multi_line_output = 3
 include_trailing_comma = true
 force_grid_wrap = 0
 use_parentheses = true
 ensure_newline_before_comments = true
 line_length = 88
 force_sort_within_sections = true
+
+[build-system]
+requires = ["setuptools", "wheel", "tree_sitter"]
diff --git a/query_language/.gitignore b/query_language/.gitignore
new file mode 100644
index 0000000..4368455
--- /dev/null
+++ b/query_language/.gitignore
@@ -0,0 +1,7 @@
+src
+build
+bindings
+binding.gyp
+Cargo.toml
+package.json
+log.html
diff --git a/query_language/build.py b/query_language/build.py
new file mode 100644
index 0000000..62c3de2
--- /dev/null
+++ b/query_language/build.py
@@ -0,0 +1,3 @@
+from tree_sitter import Language
+
+Language.build_library("swh_ql.so", ["."])
diff --git a/query_language/grammar.js b/query_language/grammar.js
new file mode 100644
index 0000000..ab5320b
--- /dev/null
+++ b/query_language/grammar.js
@@ -0,0 +1,216 @@
+// Copyright (C) 2019-2021  The Software Heritage developers
+// See the AUTHORS file at the top-level directory of this distribution
+// License: GNU General Public License version 3, or any later version
+// See top-level LICENSE file for more information
+
+
+const PRECEDENCE = {
+    or: 2,
+    and: 3,
+    bracket: 4,
+}
+
+module.exports = grammar({
+    name: 'swh_search_ql',
+
+    rules: {
+        query: $ => seq(
+            $.filters,
+            optional($.and),
+            choice(
+                seq(optional($.sortBy), optional($.and), optional($.limit)),
+                seq(optional($.limit), optional($.and), optional($.sortBy)),
+            ),
+        ),
+
+
+
+        filters: $ => choice(
+            prec.left(PRECEDENCE.and,
+                seq(
+                    field('left', $.filters),
+                    field('operator', $.and),
+                    field('right', $.filters),
+                )
+            ),
+            prec.left(PRECEDENCE.or,
+                seq(
+                    field('left', $.filters),
+                    field('operator', $.or),
+                    field('right', $.filters),
+                )
+            ),
+            prec.left(PRECEDENCE.bracket,
+                seq("(", $.filters, ")"),
+            ),
+            $.filter
+        ),
+
+        sortBy: $ => seq($.sortByField, $.sortByOp, $.sortByVal),
+        sortByField: $ => token('sort_by'),
+        sortByOp: $ => $.equalOp,
+        sortByVal: $ => createArray(optionalWrapWith($.sortByOptions, ["'", '"'])),
+        sortByOptions: $ => seq(optional(token.immediate('-')), choice(
+            'visits',
+            'last_visit',
+            'last_eventful_visit',
+            'last_revision',
+            'last_release',
+            'created',
+            'modified',
+            'published'
+        )),
+
+        limit: $ => seq('limit', $.equalOp, $.number),
+
+        filter: $ => choice(
+            $.patternFilter,
+            $.booleanFilter,
+            $.numericFilter,
+            $.boundedListFilter,
+            $.unboundedListFilter,
+            $.dateFilter
+        ),
+
+        patternFilter: $ => seq($.patternField, $.patternOp, $.patternVal),
+        patternField: $ => token(choice('origin', 'metadata')),
+        patternOp: $ => $.equalOp,
+        patternVal: $ => $.string,
+
+        booleanFilter: $ => seq($.booleanField, $.booleanOp, $.booleanVal),
+        booleanField: $ => token(choice('visited')),
+        booleanOp: $ => $.equalOp,
+        booleanVal: $ => choice($.booleanTrue, $.booleanFalse),
+
+        numericFilter: $ => seq($.numericField, $.numericOp, $.numberVal),
+        numericField: $ => token(choice('visits')),
+        numericOp: $ => $.rangeOp,
+        numberVal: $ => $.number,
+
+        boundedListFilter: $ => choice($.visitTypeFilter),
+
+        visitTypeFilter: $ => seq($.visitTypeField, $.visitTypeOp, $.visitTypeVal),
+        visitTypeField: $ => token(choice('visit_type')),
+        visitTypeOp: $ => $.equalOp,
+        visitTypeVal: $ => createArray(optionalWrapWith($.visitTypeOptions, ["'", '"'])),
+        visitTypeOptions: $ => choice(
+            "any",
+            "cran",
+            "deb",
+            "deposit",
+            "ftp",
+            "hg",
+            "git",
+            "nixguix",
+            "npm",
+            "pypi",
+            "svn",
+            "tar"
+        ), // TODO: fetch this list dynamically from other swh services?
+
+        sortBy: $ => seq($.sortByField, $.sortByOp, $.sortByVal),
+        sortByField: $ => token(choice('sort_by')),
+        sortByOp: $ => $.equalOp,
+        sortByVal: $ => createArray(optionalWrapWith($.sortByOptions, ["'", '"'])),
+        sortByOptions: $ => seq(
+            optional('-'),
+            choice(
+                'visits',
+                'last_visit',
+                'last_eventful_visit',
+                'last_revision',
+                'last_release',
+                'created',
+                'modified',
+                'published'
+            )
+        ),
+
+        unboundedListFilter: $ => seq($.listField, $.listOp, $.listVal),
+        listField: $ => token(choice('language', 'license', 'keyword')),
+        listOp: $ => $.choiceOp,
+        listVal: $ => createArray($.string),
+
+
+        dateFilter: $ => seq($.dateField, $.dateOp, $.dateVal),
+        dateField: $ => token(choice(
+            'last_visit',
+            'last_eventful_visit',
+            'last_revision',
+            'last_release',
+            'created',
+            'modified',
+            'published'
+        )),
+        dateOp: $ => $.rangeOp,
+        dateVal: $ => $.isoDateTime,
+
+        limit: $ => seq('limit', $.equalOp, $.number),
+
+
+        rangeOp: $ => token(choice('<', '<=', '=', '!=', '>=', '>')),
+        equalOp: $ => token('='),
+        choiceOp: $ => token(choice('in', 'not in')),
+
+        isoDateTime: $ => {
+            const dateRegex = (/\d{4}[-]\d{2}[-]\d{2}/).source
+            const dateTimeSepRegex = (/(\s|T)*/).source
+            const timeRegex = (/(\d{2}:\d{2}(:\d{2}(\.\d{6})?)?)?/).source
+            const timezoneRegex = (/(\+\d{2}:\d{2}|Z)?/).source
+            return new RegExp(dateRegex + dateTimeSepRegex + timeRegex + timezoneRegex)
+        },
+
+        string: $ => choice(wrapWith($.stringContent, ["'", '"']), $.singleWord),
+        number: $ => /\d+/,
+        booleanTrue: $ => "true",
+        booleanFalse: $ => "false",
+
+        or: $ => "or",
+        and: $ => "and",
+
+        stringContent: $ => repeat1(choice(
+            token.immediate(/[^\\'"\n]+/),
+            $.escape_sequence
+        )),
+        singleWord: $ => /[^\s"'\[\]\(\),]+/,
+        escape_sequence: $ => token.immediate(seq(
+            '\\',
+            /(\"|\'|\\|\/|b|n|r|t|u)/
+        )),
+
+    }
+});
+
+
+function joinBySep1(rule, sep) {
+    // At least one repetition of the rule separated by `sep`
+    return seq(rule, repeat(seq(sep, optional(rule))))
+}
+
+function joinBySep(rule, sep = ",") {
+    // Any number of repetitions of the rule separated by `sep`
+    return optional(joinBySep1(rule, sep))
+}
+
+function createArray(rule) {
+    // An array having `rule` as its member
+    return seq(
+        "[",
+        joinBySep(
+            field('array_member', rule),
+            ","
+        ),
+        "]"
+    )
+}
+
+function wrapWith(rule, wrappers = ["'", '"']) {
+    // The rule must be wrapped with one of the wrappers
+    const wrappedRules = wrappers.map(wrapper => seq(wrapper, rule, wrapper))
+    return choice(...wrappedRules)
+}
+
+function optionalWrapWith(rule, wrappers = ["'", '"']) {
+    // The rule may or may not be wrapped with the wrappers
+    return choice(wrapWith(rule, wrappers), rule)
+}
diff --git a/query_language/sample_query b/query_language/sample_query
new file mode 100644
index 0000000..3d8c08d
--- /dev/null
+++ b/query_language/sample_query
@@ -0,0 +1,6 @@
+(origin = django/django and language in ["python"] or visits >= 5) or
+(last_revision > 2020-01-01 and limit = 10) or
+(last_visit > 2021-01-01 or last_visit < 2020-01-01) or
+(visited = false and metadata = "gitlab") or
+(keyword in ["orchestration", "kubectl"] and language in ["go", "rust"]) or
+(visit_type = [deb] and license in ["GPL-3"])
diff --git a/query_language/test/corpus/combinations.txt b/query_language/test/corpus/combinations.txt
new file mode 100644
index 0000000..07802ea
--- /dev/null
+++ b/query_language/test/corpus/combinations.txt
@@ -0,0 +1,75 @@
+==============================
+Empty query (should throw error)
+==============================
+
+---
+
+(ERROR)
+
+
+==================
+Origins with django as keyword, python language, and more than 5 visits
+==================
+
+origin = django and language in ["python"] and visits >= 5
+
+---
+(query (filters (filters (filters (filter (patternFilter (patternField) (patternOp (equalOp)) (patternVal (string (singleWord)))))) (and) (filters (filter (unboundedListFilter (listField) (listOp (choiceOp)) (listVal (string (stringContent))))))) (and) (filters (filter (numericFilter (numericField) (numericOp (rangeOp)) (numberVal (number)))))))
+
+==================
+10 origins with latest revision after 2020-01-01
+==================
+last_revision > 2020-01-01 limit = 10
+---
+(query (filters (filter (dateFilter (dateField) (dateOp (rangeOp)) (dateVal (isoDateTime))))) (limit (equalOp) (number)))
+
+==================
+Origins with last visit date not in 2020-2021 (sorted by number of visits)
+==================
+
+last_visit > 2021-01-01 or last_visit < 2020-01-01 sort_by = ["visits"]
+---
+(query (filters (filters (filter (dateFilter (dateField) (dateOp (rangeOp)) (dateVal (isoDateTime))))) (or) (filters (filter (dateFilter (dateField) (dateOp (rangeOp)) (dateVal (isoDateTime)))))) (sortBy (sortByField) (sortByOp (equalOp)) (sortByVal (sortByOptions))))
+
+==================
+Unvisited origins with kubernetes in metadata or minikube in url
+==================
+
+visited = false and metadata = "kubernetes" or origin = "minikube"
+
+---
+(query (filters (filters (filters (filter (booleanFilter (booleanField) (booleanOp (equalOp)) (booleanVal (booleanFalse))))) (and) (filters (filter (patternFilter (patternField) (patternOp (equalOp)) (patternVal (string (stringContent))))))) (or) (filters (filter (patternFilter (patternField) (patternOp (equalOp)) (patternVal (string (stringContent))))))))
+
+==================
+Origins with "orchestration" or "kubectl" as keywords and language as "go" or "rust"
+==================
+
+keyword in ["orchestration", "kubectl"] and language in ["go", "rust"]
+
+---
+(query (filters (filters (filter (unboundedListFilter (listField) (listOp (choiceOp)) (listVal (string (stringContent)) (string (stringContent)))))) (and) (filters (filter (unboundedListFilter (listField) (listOp (choiceOp)) (listVal (string (stringContent)) (string (stringContent))))))))
+
+==================
+Origins with a GPL-3 license that have "debian" in their url or have visit type as "deb"
+==================
+(origin = debian or visit_type = ["deb"]) and license in ["GPL-3"]
+---
+
+(query (filters (filters (filters (filters (filter (patternFilter (patternField) (patternOp (equalOp)) (patternVal (string (singleWord)))))) (or) (filters (filter (boundedListFilter (visitTypeFilter (visitTypeField) (visitTypeOp (equalOp)) (visitTypeVal (visitTypeOptions)))))))) (and) (filters (filter (unboundedListFilter (listField) (listOp (choiceOp)) (listVal (string (stringContent))))))))
+
+==================
+Origins with `and` and `or` inside filter values
+==================
+(origin = "foo and bar or baz")
+---
+
+(query (filters (filters (filter (patternFilter (patternField) (patternOp (equalOp)) (patternVal (string (stringContent))))))))
+
+
+==================
+Origins with `'` and `"` inside filter values
+==================
+(origin = "foo \\ \'bar\' \"baz\" ")
+---
+
+(query (filters (filters (filter (patternFilter (patternField) (patternOp (equalOp)) (patternVal (string (stringContent (escape_sequence) (escape_sequence) (escape_sequence) (escape_sequence) (escape_sequence)))))))))
diff --git a/requirements-test.txt b/requirements-test.txt
index d0c4f08..7b5b9a1 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,7 +1,8 @@
 pytest
 pytest-mock
 confluent-kafka
 types-click
 types-pytz
 types-pyyaml
 types-requests
+types-setuptools
diff --git a/requirements.txt b/requirements.txt
index 12608a1..422247f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,7 @@
 # Add here external Python modules dependencies, one per line. Module names
 # should match https://pypi.python.org/pypi names. For the full spec or
 # dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html
 click
 elasticsearch>=7.0.0,<8.0.0
 typing-extensions
+tree_sitter
diff --git a/setup.py b/setup.py
index c6fef58..122a92d 100755
--- a/setup.py
+++ b/setup.py
@@ -1,71 +1,188 @@
 #!/usr/bin/env python3
 # Copyright (C) 2015-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+from distutils.cmd import Command
 from io import open
-from os import path
+from os import environ, path, system
 
 from setuptools import find_packages, setup
+from setuptools.command.build_py import build_py
+from setuptools.command.sdist import sdist
 
 here = path.abspath(path.dirname(__file__))
 
 # Get the long description from the README file
 with open(path.join(here, "README.md"), encoding="utf-8") as f:
     long_description = f.read()
 
 
 def parse_requirements(name=None):
     if name:
         reqf = "requirements-%s.txt" % name
     else:
         reqf = "requirements.txt"
 
     requirements = []
     if not path.exists(reqf):
         return requirements
 
     with open(reqf) as f:
         for line in f.readlines():
             line = line.strip()
             if not line or line.startswith("#"):
                 continue
             requirements.append(line)
     return requirements
 
 
+yarn = environ.get("YARN", "yarn")
+
+
+class TSCommand(Command):
+    user_options = []
+
+    def initialize_options(self):
+        pass
+
+    def finalize_options(self):
+        pass
+
+
+class TSInstallCommand(TSCommand):
+    description = "Installs node_modules related to query language"
+
+    def run(self):
+        system(f"{yarn} install")
+
+
+class TSGenerateCommand(TSCommand):
+    description = "Generates parser related files from grammar.js"
+
+    def run(self):
+        system(f"{yarn} generate")
+
+
+class TSBuildSoCommand(TSCommand):
+    description = "Builds swh_ql.so"
+
+    def run(self):
+        system(f"{yarn} build-so && echo 'swh_ql.so file generated'")
+
+
+class TSBuildWasmCommand(TSCommand):
+    description = "Builds swh_ql.wasm"
+
+    def run(self):
+        system(f"{yarn} build-wasm && echo 'swh_ql.wasm file generated'")
+
+
+class TSBuildCommand(TSCommand):
+    description = "Builds swh_ql.so and swh_ql.wasm"
+
+    def run(self):
+        self.run_command("ts_build_so")
+        self.run_command("ts_build_wasm")
+
+
+class TSBuildExportCommand(TSCommand):
+    description = "Builds swh_ql.so and swh_ql.wasm and exports them to static/"
+
+    def initialize_options(self):
+        self.build_lib = None
+        super().initialize_options()
+
+    def finalize_options(self):
+        self.set_undefined_options("build", ("build_lib", "build_lib"))
+        super().finalize_options()
+
+    def run(self):
+        self.run_command("ts_install")
+        self.run_command("ts_build")
+
+        system("echo 'static files generated. copying them to package dir'")
+        system(f"mkdir {self.build_lib}/swh/search/static")
+        system(
+            f"cp query_language/swh_ql.so {self.build_lib}/swh/search/static/swh_ql.so"
+        )
+        system(
+            f"cp query_language/swh_ql.wasm "
+            f"{self.build_lib}/swh/search/static/swh_ql.wasm"
+        )
+
+
+class custom_build(build_py):
+    def run(self):
+        super().run()
+
+        if not self.dry_run:
+            self.run_command("ts_build_export")
+
+
+class custom_sdist(sdist):
+    def make_release_tree(self, base_dir, files):
+        super().make_release_tree(base_dir, files)
+        # TODO: build the .c file and .wasm but not .so, because it's architecture-
+        # dependent, and shouldn't be in a sdist (aka *source* distribution)
+        if not self.dry_run:
+            self.run_command("ts_install")
+            self.run_command("ts_build")
+
+            system("echo 'static files generated. copying them to package dir'")
+            system(f"mkdir {base_dir}/swh/search/static")
+            system(
+                f"cp query_language/swh_ql.so {base_dir}/swh/search/static/swh_ql.so"
+            )
+            system(
+                f"cp query_language/swh_ql.wasm "
+                f"{base_dir}/swh/search/static/swh_ql.wasm"
+            )
+
+
 setup(
     name="swh.search",
     description="Software Heritage search service",
     long_description=long_description,
     long_description_content_type="text/markdown",
     python_requires=">=3.7",
     author="Software Heritage developers",
     author_email="swh-devel@inria.fr",
     url="https://forge.softwareheritage.org/diffusion/DSEA",
     packages=find_packages(),  # packages's modules
     install_requires=parse_requirements() + parse_requirements("swh"),
     tests_require=parse_requirements("test"),
     entry_points="""
         [swh.cli.subcommands]
         search=swh.search.cli
     """,
     setup_requires=["setuptools-scm"],
     use_scm_version=True,
     extras_require={"testing": parse_requirements("test")},
     include_package_data=True,
     classifiers=[
         "Programming Language :: Python :: 3",
         "Intended Audience :: Developers",
         "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
         "Operating System :: OS Independent",
         "Development Status :: 3 - Alpha",
     ],
     project_urls={
         "Bug Reports": "https://forge.softwareheritage.org/maniphest",
         "Funding": "https://www.softwareheritage.org/donate",
         "Source": "https://forge.softwareheritage.org/source/swh-search",
         "Documentation": "https://docs.softwareheritage.org/devel/swh-search/",
     },
+    cmdclass={
+        "build_py": custom_build,
+        "sdist": custom_sdist,
+        "ts_install": TSInstallCommand,
+        "ts_generate": TSGenerateCommand,
+        "ts_build_so": TSBuildSoCommand,
+        "ts_build_wasm": TSBuildWasmCommand,
+        "ts_build": TSBuildCommand,
+        "ts_build_export": TSBuildExportCommand,
+    },
+    zip_safe=False,
 )
diff --git a/swh.search.egg-info/PKG-INFO b/swh.search.egg-info/PKG-INFO
index 8d7a6a7..351e528 100644
--- a/swh.search.egg-info/PKG-INFO
+++ b/swh.search.egg-info/PKG-INFO
@@ -1,56 +1,91 @@
 Metadata-Version: 2.1
 Name: swh.search
-Version: 0.10.0
+Version: 0.11.0
 Summary: Software Heritage search service
 Home-page: https://forge.softwareheritage.org/diffusion/DSEA
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-search
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-search/
 Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 3 - Alpha
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: testing
 License-File: LICENSE
 License-File: AUTHORS
 
 swh-search
 ==========
 
 Search service for the Software Heritage archive.
 
 It is similar to swh-storage in what it contains,
 but provides different ways to query it: while swh-storage is mostly
 a key-value store that returns an object from a primary key,
 swh-search is focused on reverse indices, to allow finding objects that
 match some criteria; for example full-text search.
 
 Currently uses ElasticSearch, and provides only origin search (by URL and metadata)
 
-# Dependencies
+## Dependencies
 
-Python tests for this module include tests that cannot be run without a local
+- Python tests for this module include tests that cannot be run without a local
 ElasticSearch instance, so you need the ElasticSearch server executable on your
 machine (no need to have a running ElasticSearch server).
 
-## Debian-like host
+    - Debian-like host
 
-The elasticsearch package is required. As it's not part of debian-stable,
-[another debian repository is required to be
-configured](https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html#deb-repo)
+        The elasticsearch package is required. As it's not part of debian-stable,
+        [another debian repository is required to be
+        configured](https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html#deb-repo)
 
-## Non Debian-like host
+    - Non Debian-like host
+
+        The tests expect:
+        - `/usr/share/elasticsearch/jdk/bin/java` to exist.
+        - `org.elasticsearch.bootstrap.Elasticsearch` to be in java's classpath.
+- Emscripten is required for generating tree-sitter WASM module. The following commands need to be executed for the setup:
+    ```bash
+    cd /opt && git clone https://github.com/emscripten-core/emsdk.git && cd emsdk && \
+    ./emsdk install latest && ./emsdk activate latest
+    PATH="${PATH}:/opt/emsdk/upstream/emscripten"
+    ```
+
+    **Note:** If emsdk isn't found in the PATH, the tree-sitter cli automatically pulls `emscripten/emsdk` image from docker hub when `make ts-build-wasm` or `make ts-build` is used.
+
+
+## Make targets
+
+Below is the list of available make targets that can be executed from the root directory of swh-search in order to build and/or execute the swh-search under various configurations:
+
+* **ts-install**: Install node_modules and emscripten SDK required for TreeSitter
+
+* **ts-generate**: Generate parser files(C and JSON) from the grammar
+
+* **ts-repl**: Starts a web based playground for the TreeSitter grammar. It's the recommended way for developing TreeSitter grammar.
+
+* **ts-dev**: Parse the `query_language/sample_query` and print the corresponding syntax expression
+along with the start and end positions of all the nodes.
+
+* **ts-dev sanitize=1**: Same as **ts-dev** but without start and end position of the nodes.
+This format is expected by TreeSitter's native test command. `sanitize=1` cleans the output
+of **ts-dev** using `sed` to achieve the desired format.
+
+* **ts-test**: executes TreeSitter's native tests
+
+* **ts-build-so**: Generates `swh_ql.so` file from the previously generated parser using py-tree-sitter
+
+* **ts-build-so**: Generates `swh_ql.wasm` file from the previously generated parser using emscripten
+
+* **ts-build**: Executes both **ts-build-so** and **ts-build-so**
 
-The tests expect:
-- `/usr/share/elasticsearch/jdk/bin/java` to exist.
-- `org.elasticsearch.bootstrap.Elasticsearch` to be in java's classpath.
 
 
diff --git a/swh.search.egg-info/SOURCES.txt b/swh.search.egg-info/SOURCES.txt
index b01eb1d..6d24cc4 100644
--- a/swh.search.egg-info/SOURCES.txt
+++ b/swh.search.egg-info/SOURCES.txt
@@ -1,58 +1,70 @@
 .gitignore
 .pre-commit-config.yaml
 AUTHORS
 CODE_OF_CONDUCT.md
 CONTRIBUTORS
 LICENSE
 MANIFEST.in
 Makefile
+Makefile.local
 README.md
 mypy.ini
+package.json
 pyproject.toml
 pytest.ini
 requirements-swh.txt
 requirements-test.txt
 requirements.txt
 setup.cfg
 setup.py
 tox.ini
+yarn.lock
 docs/.gitignore
 docs/Makefile
 docs/cli.rst
 docs/conf.py
 docs/index.rst
+docs/query-language.rst
 docs/_static/.placeholder
 docs/_templates/.placeholder
 es_config/elasticsearch.keystore
 es_config/elasticsearch.yml
 es_config/jvm.options
 es_config/log4j2.properties
+query_language/.gitignore
+query_language/build.py
+query_language/grammar.js
+query_language/sample_query
+query_language/test/corpus/combinations.txt
 swh/__init__.py
 swh.search.egg-info/PKG-INFO
 swh.search.egg-info/SOURCES.txt
 swh.search.egg-info/dependency_links.txt
 swh.search.egg-info/entry_points.txt
+swh.search.egg-info/not-zip-safe
 swh.search.egg-info/requires.txt
 swh.search.egg-info/top_level.txt
 swh/search/__init__.py
 swh/search/cli.py
 swh/search/elasticsearch.py
 swh/search/in_memory.py
 swh/search/interface.py
 swh/search/journal_client.py
 swh/search/metrics.py
 swh/search/py.typed
+swh/search/translator.py
 swh/search/utils.py
 swh/search/api/__init__.py
 swh/search/api/client.py
 swh/search/api/server.py
 swh/search/tests/__init__.py
 swh/search/tests/conftest.py
 swh/search/tests/test_api_client.py
 swh/search/tests/test_cli.py
 swh/search/tests/test_elasticsearch.py
 swh/search/tests/test_in_memory.py
 swh/search/tests/test_init.py
 swh/search/tests/test_journal_client.py
 swh/search/tests/test_search.py
-swh/search/tests/test_server.py
\ No newline at end of file
+swh/search/tests/test_server.py
+swh/search/tests/test_translator.py
\ No newline at end of file
diff --git a/swh.search.egg-info/not-zip-safe b/swh.search.egg-info/not-zip-safe
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/swh.search.egg-info/not-zip-safe
@@ -0,0 +1 @@
+
diff --git a/swh.search.egg-info/requires.txt b/swh.search.egg-info/requires.txt
index fb6c09c..1e51e34 100644
--- a/swh.search.egg-info/requires.txt
+++ b/swh.search.egg-info/requires.txt
@@ -1,16 +1,18 @@
 click
 elasticsearch<8.0.0,>=7.0.0
 typing-extensions
+tree_sitter
 swh.core[http]>=0.3.0
 swh.indexer
 swh.journal>=0.1.0
 swh.model
 
 [testing]
 pytest
 pytest-mock
 confluent-kafka
 types-click
 types-pytz
 types-pyyaml
 types-requests
+types-setuptools
diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py
index 6853226..efc2b66 100644
--- a/swh/search/elasticsearch.py
+++ b/swh/search/elasticsearch.py
@@ -1,614 +1,529 @@
 # Copyright (C) 2019-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import base64
+import logging
+import pprint
 from textwrap import dedent
 from typing import Any, Dict, Iterable, Iterator, List, Optional
 
 from elasticsearch import Elasticsearch, helpers
 import msgpack
 
 from swh.indexer import codemeta
 from swh.model import model
 from swh.model.identifiers import origin_identifier
 from swh.search.interface import (
     SORT_BY_OPTIONS,
     MinimalOriginDict,
     OriginDict,
     PagedResult,
 )
 from swh.search.metrics import send_metric, timed
-from swh.search.utils import get_expansion, is_date_parsable
+from swh.search.translator import Translator
+from swh.search.utils import escape, get_expansion, is_date_parsable
+
+logger = logging.getLogger(__name__)
 
 INDEX_NAME_PARAM = "index"
 READ_ALIAS_PARAM = "read_alias"
 WRITE_ALIAS_PARAM = "write_alias"
 
 ORIGIN_DEFAULT_CONFIG = {
     INDEX_NAME_PARAM: "origin",
     READ_ALIAS_PARAM: "origin-read",
     WRITE_ALIAS_PARAM: "origin-write",
 }
 
 
 def _sanitize_origin(origin):
     origin = origin.copy()
 
     # Whitelist fields to be saved in Elasticsearch
     res = {"url": origin.pop("url")}
     for field_name in (
         "blocklisted",
         "has_visits",
         "intrinsic_metadata",
         "visit_types",
         "nb_visits",
         "snapshot_id",
         "last_visit_date",
         "last_eventful_visit_date",
         "last_revision_date",
         "last_release_date",
     ):
         if field_name in origin:
             res[field_name] = origin.pop(field_name)
 
     # Run the JSON-LD expansion algorithm
     # <https://www.w3.org/TR/json-ld-api/#expansion>
     # to normalize the Codemeta metadata.
     # This is required as Elasticsearch will needs each field to have a consistent
     # type across documents to be searchable; and non-expanded JSON-LD documents
     # can have various types in the same field. For example, all these are
     # equivalent in JSON-LD:
     # * {"author": "Jane Doe"}
     # * {"author": ["Jane Doe"]}
     # * {"author": {"@value": "Jane Doe"}}
     # * {"author": [{"@value": "Jane Doe"}]}
     # and JSON-LD expansion will convert them all to the last one.
     if "intrinsic_metadata" in res:
         intrinsic_metadata = res["intrinsic_metadata"]
         for date_field in ["dateCreated", "dateModified", "datePublished"]:
             if date_field in intrinsic_metadata:
                 date = intrinsic_metadata[date_field]
 
                 # If date{Created,Modified,Published} value isn't parsable
                 # It gets rejected and isn't stored (unlike other fields)
                 if not is_date_parsable(date):
                     intrinsic_metadata.pop(date_field)
 
         res["intrinsic_metadata"] = codemeta.expand(intrinsic_metadata)
 
     return res
 
 
 def token_encode(index_to_tokenize: Dict[bytes, Any]) -> str:
     """Tokenize as string an index page result from a search"""
     page_token = base64.b64encode(msgpack.dumps(index_to_tokenize))
     return page_token.decode()
 
 
 def token_decode(page_token: str) -> Dict[bytes, Any]:
     """Read the page_token"""
     return msgpack.loads(base64.b64decode(page_token.encode()), raw=True)
 
 
 class ElasticSearch:
     def __init__(self, hosts: List[str], indexes: Dict[str, Dict[str, str]] = {}):
         self._backend = Elasticsearch(hosts=hosts)
+        self._translator = Translator()
 
         # Merge current configuration with default values
         origin_config = indexes.get("origin", {})
         self.origin_config = {**ORIGIN_DEFAULT_CONFIG, **origin_config}
 
     def _get_origin_index(self) -> str:
         return self.origin_config[INDEX_NAME_PARAM]
 
     def _get_origin_read_alias(self) -> str:
         return self.origin_config[READ_ALIAS_PARAM]
 
     def _get_origin_write_alias(self) -> str:
         return self.origin_config[WRITE_ALIAS_PARAM]
 
     @timed
     def check(self):
         return self._backend.ping()
 
     def deinitialize(self) -> None:
         """Removes all indices from the Elasticsearch backend"""
         self._backend.indices.delete(index="*")
 
     def initialize(self) -> None:
         """Declare Elasticsearch indices, aliases and mappings"""
 
         if not self._backend.indices.exists(index=self._get_origin_index()):
             self._backend.indices.create(index=self._get_origin_index())
 
         if not self._backend.indices.exists_alias(self._get_origin_read_alias()):
             self._backend.indices.put_alias(
                 index=self._get_origin_index(), name=self._get_origin_read_alias()
             )
 
         if not self._backend.indices.exists_alias(self._get_origin_write_alias()):
             self._backend.indices.put_alias(
                 index=self._get_origin_index(), name=self._get_origin_write_alias()
             )
 
         self._backend.indices.put_mapping(
             index=self._get_origin_index(),
             body={
                 "dynamic_templates": [
                     {
                         "booleans_as_string": {
                             # All fields stored as string in the metadata
                             # even the booleans
                             "match_mapping_type": "boolean",
                             "path_match": "intrinsic_metadata.*",
                             "mapping": {"type": "keyword"},
                         }
                     }
                 ],
                 "date_detection": False,
                 "properties": {
                     # sha1 of the URL; used as the document id
                     "sha1": {"type": "keyword", "doc_values": True,},
                     # Used both to search URLs, and as the result to return
                     # as a response to queries
                     "url": {
                         "type": "text",
                         # To split URLs into token on any character
                         # that is not alphanumerical
                         "analyzer": "simple",
                         # 2-gram and partial-3-gram search (ie. with the end of the
                         # third word potentially missing)
                         "fields": {
                             "as_you_type": {
                                 "type": "search_as_you_type",
                                 "analyzer": "simple",
                             }
                         },
                     },
                     "visit_types": {"type": "keyword"},
                     # used to filter out origins that were never visited
                     "has_visits": {"type": "boolean",},
                     "nb_visits": {"type": "integer"},
                     "snapshot_id": {"type": "keyword"},
                     "last_visit_date": {"type": "date"},
                     "last_eventful_visit_date": {"type": "date"},
                     "last_release_date": {"type": "date"},
                     "last_revision_date": {"type": "date"},
                     "intrinsic_metadata": {
                         "type": "nested",
                         "properties": {
                             "@context": {
                                 # don't bother indexing tokens in these URIs, as the
                                 # are used as namespaces
                                 "type": "keyword",
                             },
                             "http://schema": {
                                 "properties": {
                                     "org/dateCreated": {
                                         "properties": {"@value": {"type": "date",}}
                                     },
                                     "org/dateModified": {
                                         "properties": {"@value": {"type": "date",}}
                                     },
                                     "org/datePublished": {
                                         "properties": {"@value": {"type": "date",}}
                                     },
                                 }
                             },
                         },
                     },
                     # Has this origin been taken down?
                     "blocklisted": {"type": "boolean",},
                 },
             },
         )
 
     @timed
     def flush(self) -> None:
         self._backend.indices.refresh(index=self._get_origin_write_alias())
 
     @timed
     def origin_update(self, documents: Iterable[OriginDict]) -> None:
         write_index = self._get_origin_write_alias()
         documents = map(_sanitize_origin, documents)
         documents_with_sha1 = (
             (origin_identifier(document), document) for document in documents
         )
         # painless script that will be executed when updating an origin document
         update_script = dedent(
             """
             // utility function to get and parse date
             ZonedDateTime getDate(def ctx, String date_field) {
                 String default_date = "0001-01-01T00:00:00Z";
                 String date = ctx._source.getOrDefault(date_field, default_date);
                 return ZonedDateTime.parse(date);
             }
 
             // backup current visit_types field value
             List visit_types = ctx._source.getOrDefault("visit_types", []);
             int nb_visits = ctx._source.getOrDefault("nb_visits", 0);
 
             ZonedDateTime last_visit_date = getDate(ctx, "last_visit_date");
 
             String snapshot_id = ctx._source.getOrDefault("snapshot_id", "");
             ZonedDateTime last_eventful_visit_date =
                 getDate(ctx, "last_eventful_visit_date");
             ZonedDateTime last_revision_date = getDate(ctx, "last_revision_date");
             ZonedDateTime last_release_date = getDate(ctx, "last_release_date");
 
             // update origin document with new field values
             ctx._source.putAll(params);
 
             // restore previous visit types after visit_types field overriding
             if (ctx._source.containsKey("visit_types")) {
                 for (int i = 0; i < visit_types.length; ++i) {
                     if (!ctx._source.visit_types.contains(visit_types[i])) {
                         ctx._source.visit_types.add(visit_types[i]);
                     }
                 }
             }
 
             // Undo overwrite if incoming nb_visits is smaller
             if (ctx._source.containsKey("nb_visits")) {
                 int incoming_nb_visits = ctx._source.getOrDefault("nb_visits", 0);
                 if(incoming_nb_visits < nb_visits){
                     ctx._source.nb_visits = nb_visits;
                 }
             }
 
             // Undo overwrite if incoming last_visit_date is older
             if (ctx._source.containsKey("last_visit_date")) {
                 ZonedDateTime incoming_last_visit_date = getDate(ctx, "last_visit_date");
                 int difference =
                     // returns -1, 0 or 1
                     incoming_last_visit_date.compareTo(last_visit_date);
                 if(difference < 0){
                     ctx._source.last_visit_date = last_visit_date;
                 }
             }
 
             // Undo update of last_eventful_date and snapshot_id if
             // snapshot_id hasn't changed OR incoming_last_eventful_visit_date is older
             if (ctx._source.containsKey("snapshot_id")) {
                 String incoming_snapshot_id = ctx._source.getOrDefault("snapshot_id", "");
                 ZonedDateTime incoming_last_eventful_visit_date =
                     getDate(ctx, "last_eventful_visit_date");
                 int difference =
                     // returns -1, 0 or 1
                     incoming_last_eventful_visit_date.compareTo(last_eventful_visit_date);
                 if(snapshot_id == incoming_snapshot_id || difference < 0){
                     ctx._source.snapshot_id = snapshot_id;
                     ctx._source.last_eventful_visit_date = last_eventful_visit_date;
                 }
             }
 
             // Undo overwrite if incoming last_revision_date is older
             if (ctx._source.containsKey("last_revision_date")) {
                 ZonedDateTime incoming_last_revision_date =
                     getDate(ctx, "last_revision_date");
                 int difference =
                     // returns -1, 0 or 1
                     incoming_last_revision_date.compareTo(last_revision_date);
                 if(difference < 0){
                     ctx._source.last_revision_date = last_revision_date;
                 }
             }
 
             // Undo overwrite if incoming last_release_date is older
             if (ctx._source.containsKey("last_release_date")) {
                 ZonedDateTime incoming_last_release_date =
                     getDate(ctx, "last_release_date");
                 // returns -1, 0 or 1
                 int difference = incoming_last_release_date.compareTo(last_release_date);
                 if(difference < 0){
                     ctx._source.last_release_date = last_release_date;
                 }
             }
             """  # noqa
         )
 
         actions = [
             {
                 "_op_type": "update",
                 "_id": sha1,
                 "_index": write_index,
                 "scripted_upsert": True,
                 "upsert": {**document, "sha1": sha1,},
                 "script": {
                     "source": update_script,
                     "lang": "painless",
                     "params": document,
                 },
             }
             for (sha1, document) in documents_with_sha1
         ]
 
         indexed_count, errors = helpers.bulk(self._backend, actions, index=write_index)
         assert isinstance(errors, List)  # Make mypy happy
 
         send_metric("document:index", count=indexed_count, method_name="origin_update")
         send_metric(
             "document:index_error", count=len(errors), method_name="origin_update"
         )
 
     def origin_dump(self) -> Iterator[model.Origin]:
         results = helpers.scan(self._backend, index=self._get_origin_read_alias())
         for hit in results:
             yield self._backend.termvectors(
                 index=self._get_origin_read_alias(), id=hit["_id"], fields=["*"]
             )
 
     @timed
     def origin_search(
         self,
         *,
+        query: str = "",
         url_pattern: Optional[str] = None,
         metadata_pattern: Optional[str] = None,
         with_visit: bool = False,
         visit_types: Optional[List[str]] = None,
         min_nb_visits: int = 0,
         min_last_visit_date: str = "",
         min_last_eventful_visit_date: str = "",
         min_last_revision_date: str = "",
         min_last_release_date: str = "",
         min_date_created: str = "",
         min_date_modified: str = "",
         min_date_published: str = "",
         programming_languages: Optional[List[str]] = None,
         licenses: Optional[List[str]] = None,
         keywords: Optional[List[str]] = None,
         sort_by: Optional[List[str]] = None,
         page_token: Optional[str] = None,
         limit: int = 50,
     ) -> PagedResult[MinimalOriginDict]:
         query_clauses: List[Dict[str, Any]] = []
 
+        query_filters = []
         if url_pattern:
-            query_clauses.append(
-                {
-                    "multi_match": {
-                        "query": url_pattern,
-                        "type": "bool_prefix",
-                        "operator": "and",
-                        "fields": [
-                            "url.as_you_type",
-                            "url.as_you_type._2gram",
-                            "url.as_you_type._3gram",
-                        ],
-                    }
-                }
-            )
+            query_filters.append(f"origin = {escape(url_pattern)}")
 
         if metadata_pattern:
-            query_clauses.append(
-                {
-                    "nested": {
-                        "path": "intrinsic_metadata",
-                        "query": {
-                            "multi_match": {
-                                "query": metadata_pattern,
-                                # Makes it so that the "foo bar" query returns
-                                # documents which contain "foo" in a field and "bar"
-                                # in a different field
-                                "type": "cross_fields",
-                                # All keywords must be found in a document for it to
-                                # be considered a match.
-                                # TODO: allow missing keywords?
-                                "operator": "and",
-                                # Searches on all fields of the intrinsic_metadata dict,
-                                # recursively.
-                                "fields": ["intrinsic_metadata.*"],
-                                # date{Created,Modified,Published} are of type date
-                                "lenient": True,
-                            }
-                        },
-                    }
-                }
-            )
+            query_filters.append(f"metadata = {escape(metadata_pattern)}")
 
-        if not query_clauses:
-            raise ValueError(
-                "At least one of url_pattern and metadata_pattern must be provided."
-            )
+        # if not query_clauses:
+        #     raise ValueError(
+        #         "At least one of url_pattern and metadata_pattern must be provided."
+        #     )
 
         if with_visit:
-            query_clauses.append({"term": {"has_visits": True,}})
+            query_filters.append(f"visited = {'true' if with_visit else 'false'}")
         if min_nb_visits:
-            query_clauses.append({"range": {"nb_visits": {"gte": min_nb_visits,},}})
+            query_filters.append(f"visits >= {min_nb_visits}")
         if min_last_visit_date:
-            query_clauses.append(
-                {
-                    "range": {
-                        "last_visit_date": {
-                            "gte": min_last_visit_date.replace("Z", "+00:00"),
-                        }
-                    }
-                }
+            query_filters.append(
+                f"last_visit >= {min_last_visit_date.replace('Z', '+00:00')}"
             )
         if min_last_eventful_visit_date:
-            query_clauses.append(
-                {
-                    "range": {
-                        "last_eventful_visit_date": {
-                            "gte": min_last_eventful_visit_date.replace("Z", "+00:00"),
-                        }
-                    }
-                }
+            query_filters.append(
+                "last_eventful_visit >= "
+                f"{min_last_eventful_visit_date.replace('Z', '+00:00')}"
             )
         if min_last_revision_date:
-            query_clauses.append(
-                {
-                    "range": {
-                        "last_revision_date": {
-                            "gte": min_last_revision_date.replace("Z", "+00:00"),
-                        }
-                    }
-                }
+            query_filters.append(
+                f"last_revision >= {min_last_revision_date.replace('Z', '+00:00')}"
             )
         if min_last_release_date:
-            query_clauses.append(
-                {
-                    "range": {
-                        "last_release_date": {
-                            "gte": min_last_release_date.replace("Z", "+00:00"),
-                        }
-                    }
-                }
+            query_filters.append(
+                f"last_release >= {min_last_release_date.replace('Z', '+00:00')}"
             )
         if keywords:
-            query_clauses.append(
-                {
-                    "nested": {
-                        "path": "intrinsic_metadata",
-                        "query": {
-                            "multi_match": {
-                                "query": " ".join(keywords),
-                                "fields": [
-                                    get_expansion("keywords", ".") + "^2",
-                                    get_expansion("descriptions", "."),
-                                    # "^2" boosts an origin's score by 2x
-                                    # if it the queried keywords are
-                                    # found in its intrinsic_metadata.keywords
-                                ],
-                            }
-                        },
-                    }
-                }
-            )
-
-        intrinsic_metadata_filters: List[Dict[str, Dict]] = []
-
+            query_filters.append(f"keyword in {escape(keywords)}")
         if licenses:
-            license_filters: List[Dict[str, Any]] = []
-            for license in licenses:
-                license_filters.append(
-                    {"match": {get_expansion("licenses", "."): license}}
-                )
-            intrinsic_metadata_filters.append({"bool": {"should": license_filters}})
+            query_filters.append(f"license in {escape(licenses)}")
 
         if programming_languages:
-            language_filters: List[Dict[str, Any]] = []
-            for language in programming_languages:
-                language_filters.append(
-                    {"match": {get_expansion("programming_languages", "."): language}}
-                )
-            intrinsic_metadata_filters.append({"bool": {"should": language_filters}})
+            query_filters.append(f"language in {escape(programming_languages)}")
 
         if min_date_created:
-            intrinsic_metadata_filters.append(
-                {
-                    "range": {
-                        get_expansion("date_created", "."): {"gte": min_date_created,}
-                    }
-                }
+            query_filters.append(
+                f"created >= {min_date_created.replace('Z', '+00:00')}"
             )
         if min_date_modified:
-            intrinsic_metadata_filters.append(
-                {
-                    "range": {
-                        get_expansion("date_modified", "."): {"gte": min_date_modified,}
-                    }
-                }
+            query_filters.append(
+                f"modified >= {min_date_modified.replace('Z', '+00:00')}"
             )
         if min_date_published:
-            intrinsic_metadata_filters.append(
-                {
-                    "range": {
-                        get_expansion("date_published", "."): {
-                            "gte": min_date_published,
-                        }
-                    }
-                }
-            )
-
-        if intrinsic_metadata_filters:
-            query_clauses.append(
-                {
-                    "nested": {
-                        "path": "intrinsic_metadata",
-                        "query": {"bool": {"must": intrinsic_metadata_filters,}},
-                        # "must" is equivalent to "AND"
-                        # "should" is equivalent to "OR"
-                        # Resulting origins must return true for the following:
-                        # (license_1 OR license_2 ..) AND (lang_1 OR lang_2 ..)
-                        # This is equivalent to {"must": [
-                        #   {"should": [license_1,license_2] },
-                        #   {"should": [lang_1,lang_2]}] }
-                        # ]}
-                        # Note: Usage of "bool" has been omitted for readability
-                    }
-                }
+            query_filters.append(
+                f"published >= {min_date_published.replace('Z', '+00:00')}"
             )
 
         if visit_types is not None:
-            query_clauses.append({"terms": {"visit_types": visit_types}})
+            query_filters.append(f"visit_type = {escape(visit_types)}")
+
+        combined_filters = f"({' and '.join(query_filters)})"
+        query = f"{combined_filters}{' and ' if query != '' else ' '}{query}"
+        parsed_query = self._translator.parse_query(query)
+        query_clauses.append(parsed_query["filters"])
+
+        field_map = {
+            "visits": "nb_visits",
+            "last_visit": "last_visit_date",
+            "last_eventful_visit": "last_eventful_visit_date",
+            "last_revision": "last_revision_date",
+            "last_release": "last_release_date",
+            "created": "date_created",
+            "modified": "date_modified",
+            "published": "date_published",
+        }
+
+        if "sortBy" in parsed_query:
+            if sort_by is None:
+                sort_by = []
+            for sort_by_option in parsed_query["sortBy"]:
+                if sort_by_option[0] == "-":
+                    sort_by.append("-" + field_map[sort_by_option[1:]])
+                else:
+                    sort_by.append(field_map[sort_by_option])
+        if parsed_query.get("limit", 0):
+            limit = parsed_query["limit"]
 
         sorting_params: List[Dict[str, Any]] = []
 
         if sort_by:
             for field in sort_by:
                 order = "asc"
                 if field and field[0] == "-":
                     field = field[1:]
                     order = "desc"
 
                 if field in ["date_created", "date_modified", "date_published"]:
                     sorting_params.append(
                         {
                             get_expansion(field, "."): {
                                 "nested_path": "intrinsic_metadata",
                                 "order": order,
                             }
                         }
                     )
                 elif field in SORT_BY_OPTIONS:
                     sorting_params.append({field: order})
 
         sorting_params.extend(
             [{"_score": "desc"}, {"sha1": "asc"},]
         )
 
         body = {
             "query": {
                 "bool": {
                     "must": query_clauses,
                     "must_not": [{"term": {"blocklisted": True}}],
                 }
             },
             "sort": sorting_params,
         }
 
         if page_token:
             # TODO: use ElasticSearch's scroll API?
             page_token_content = token_decode(page_token)
             body["search_after"] = [
                 page_token_content[b"score"],
                 page_token_content[b"sha1"].decode("ascii"),
             ]
 
+        if logger.isEnabledFor(logging.DEBUG):
+            formatted_body = pprint.pformat(body)
+            logger.debug("Search query body: %s", formatted_body)
+
         res = self._backend.search(
             index=self._get_origin_read_alias(), body=body, size=limit
         )
 
         hits = res["hits"]["hits"]
 
         next_page_token: Optional[str] = None
 
         if len(hits) == limit:
             # There are more results after this page; return a pagination token
             # to get them in a future query
             last_hit = hits[-1]
             next_page_token_content = {
                 b"score": last_hit["_score"],
                 b"sha1": last_hit["_source"]["sha1"],
             }
             next_page_token = token_encode(next_page_token_content)
 
         assert len(hits) <= limit
 
         return PagedResult(
             results=[{"url": hit["_source"]["url"]} for hit in hits],
             next_page_token=next_page_token,
         )
diff --git a/swh/search/in_memory.py b/swh/search/in_memory.py
index 3a5938e..4492f5b 100644
--- a/swh/search/in_memory.py
+++ b/swh/search/in_memory.py
@@ -1,507 +1,508 @@
 # Copyright (C) 2019-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from collections import defaultdict
 from datetime import datetime, timezone
 import re
 from typing import Any, Dict, Iterable, Iterator, List, Optional
 
 from swh.indexer import codemeta
 from swh.model.identifiers import origin_identifier
 from swh.search.interface import (
     SORT_BY_OPTIONS,
     MinimalOriginDict,
     OriginDict,
     PagedResult,
 )
 from swh.search.utils import get_expansion, is_date_parsable
 
 _words_regexp = re.compile(r"\w+")
 
 
 def _dict_words_set(d):
     """Recursively extract set of words from dict content."""
     values = set()
 
     def extract(obj, words):
         if isinstance(obj, dict):
             for k, v in obj.items():
                 extract(v, words)
         elif isinstance(obj, list):
             for item in obj:
                 extract(item, words)
         else:
             words.update(_words_regexp.findall(str(obj).lower()))
         return words
 
     return extract(d, values)
 
 
 def _nested_get(nested_dict, nested_keys, default=""):
     """Extracts values from deeply nested dictionary nested_dict
     using the nested_keys and returns a list of all of the values
     discovered in the process.
 
 
     >>> nested_dict = [
     ... {"name": [{"@value": {"first": "f1", "last": "l1"}}], "address": "XYZ"},
     ... {"name": [{"@value": {"first": "f2", "last": "l2"}}], "address": "ABC"},
     ... ]
     >>> _nested_get(nested_dict, ["name", "@value", "last"])
     ['l1', 'l2']
     >>> _nested_get(nested_dict, ["address"])
     ['XYZ', 'ABC']
 
     It doesn't allow fetching intermediate values and returns "" for such cases
     >>> _nested_get(nested_dict, ["name", "@value"])
     ['', '']
     """
 
     def _nested_get_recursive(nested_dict, nested_keys):
         try:
             curr_obj = nested_dict
             type_curr_obj = type(curr_obj)
             for i, key in enumerate(nested_keys):
                 if key in curr_obj:
                     curr_obj = curr_obj[key]
                     type_curr_obj = type(curr_obj)
                 else:
                     if type_curr_obj == list:
                         curr_obj = [
                             _nested_get_recursive(obj, nested_keys[i:])
                             for obj in curr_obj
                         ]
                     # If value isn't a list or string or integer
                     elif type_curr_obj != str and type_curr_obj != int:
                         return default
 
             # If only one element is present in the list, take it out
             # This ensures a flat array every time
             if type_curr_obj == list and len(curr_obj) == 1:
                 curr_obj = curr_obj[0]
 
             return curr_obj
         except Exception:
             return default
 
     res = _nested_get_recursive(nested_dict, nested_keys)
     if type(res) != list:
         return [res]
 
     return res
 
 
 def _tokenize(x):
     return x.lower().replace(",", " ").split()
 
 
 def _get_sorting_key(origin, field):
     """Get value of the field from an origin for sorting origins.
 
     Here field should be a member of SORT_BY_OPTIONS.
     If "-" is present at the start of field then invert the value
     in a way that it reverses the sorting order.
     """
     reversed = False
     if field[0] == "-":
         field = field[1:]
         reversed = True
 
     DATETIME_OBJ_MAX = datetime.max.replace(tzinfo=timezone.utc)
     DATETIME_MIN = "0001-01-01T00:00:00Z"
 
     DATE_OBJ_MAX = datetime.max
     DATE_MIN = "0001-01-01"
 
     if field == "score":
         if reversed:
             return -origin.get(field, 0)
         else:
             return origin.get(field, 0)
 
     if field in ["date_created", "date_modified", "date_published"]:
         date = datetime.strptime(
             _nested_get(origin, get_expansion(field), DATE_MIN)[0], "%Y-%m-%d"
         )
         if reversed:
             return DATE_OBJ_MAX - date
         else:
             return date
 
     elif field in ["nb_visits"]:  # unlike other options, nb_visits is of type integer
         if reversed:
             return -origin.get(field, 0)
         else:
             return origin.get(field, 0)
 
     elif field in SORT_BY_OPTIONS:
         date = datetime.fromisoformat(
             origin.get(field, DATETIME_MIN).replace("Z", "+00:00")
         )
         if reversed:
             return DATETIME_OBJ_MAX - date
         else:
             return date
 
 
 class InMemorySearch:
     def __init__(self):
         pass
 
     def check(self):
         return True
 
     def deinitialize(self) -> None:
         if hasattr(self, "_origins"):
             del self._origins
             del self._origin_ids
 
     def initialize(self) -> None:
         self._origins: Dict[str, Dict[str, Any]] = defaultdict(dict)
         self._origin_ids: List[str] = []
 
     def flush(self) -> None:
         pass
 
     _url_splitter = re.compile(r"\W")
 
     def origin_update(self, documents: Iterable[OriginDict]) -> None:
         for source_document in documents:
             document: Dict[str, Any] = dict(source_document)
             id_ = origin_identifier(document)
             if "url" in document:
                 document["_url_tokens"] = set(
                     self._url_splitter.split(source_document["url"])
                 )
             if "visit_types" in document:
                 document["visit_types"] = set(source_document["visit_types"])
                 if "visit_types" in self._origins[id_]:
                     document["visit_types"].update(self._origins[id_]["visit_types"])
             if "nb_visits" in document:
                 document["nb_visits"] = max(
                     document["nb_visits"], self._origins[id_].get("nb_visits", 0)
                 )
             if "last_visit_date" in document:
                 document["last_visit_date"] = max(
                     datetime.fromisoformat(document["last_visit_date"]),
                     datetime.fromisoformat(
                         self._origins[id_]
                         .get("last_visit_date", "0001-01-01T00:00:00.000000Z",)
                         .replace("Z", "+00:00")
                     ),
                 ).isoformat()
 
             if "snapshot_id" in document and "last_eventful_visit_date" in document:
                 incoming_date = datetime.fromisoformat(
                     document["last_eventful_visit_date"]
                 )
                 current_date = datetime.fromisoformat(
                     self._origins[id_]
                     .get("last_eventful_visit_date", "0001-01-01T00:00:00Z",)
                     .replace("Z", "+00:00")
                 )
                 incoming_snapshot_id = document["snapshot_id"]
                 current_snapshot_id = self._origins[id_].get("snapshot_id", "")
 
                 if (
                     incoming_snapshot_id == current_snapshot_id
                     or incoming_date < current_date
                 ):
                     # update not required so override the incoming_values
                     document["snapshot_id"] = current_snapshot_id
                     document["last_eventful_visit_date"] = current_date.isoformat()
 
             if "last_revision_date" in document:
                 document["last_revision_date"] = max(
                     datetime.fromisoformat(document["last_revision_date"]),
                     datetime.fromisoformat(
                         self._origins[id_]
                         .get("last_revision_date", "0001-01-01T00:00:00Z",)
                         .replace("Z", "+00:00")
                     ),
                 ).isoformat()
             if "last_release_date" in document:
                 document["last_release_date"] = max(
                     datetime.fromisoformat(document["last_release_date"]),
                     datetime.fromisoformat(
                         self._origins[id_]
                         .get("last_release_date", "0001-01-01T00:00:00Z",)
                         .replace("Z", "+00:00")
                     ),
                 ).isoformat()
             if "intrinsic_metadata" in document:
                 intrinsic_metadata = document["intrinsic_metadata"]
 
                 for date_field in ["dateCreated", "dateModified", "datePublished"]:
                     if date_field in intrinsic_metadata:
                         date = intrinsic_metadata[date_field]
 
                         # If date{Created,Modified,Published} value isn't parsable
                         # It gets rejected and isn't stored (unlike other fields)
                         if not is_date_parsable(date):
                             intrinsic_metadata.pop(date_field)
 
                 document["intrinsic_metadata"] = codemeta.expand(intrinsic_metadata)
 
                 if len(document["intrinsic_metadata"]) != 1:
                     continue
 
                 metadata = document["intrinsic_metadata"][0]
                 if "http://schema.org/license" in metadata:
                     metadata["http://schema.org/license"] = [
                         {"@id": license["@id"].lower()}
                         for license in metadata["http://schema.org/license"]
                     ]
                 if "http://schema.org/programmingLanguage" in metadata:
                     metadata["http://schema.org/programmingLanguage"] = [
                         {"@value": license["@value"].lower()}
                         for license in metadata["http://schema.org/programmingLanguage"]
                     ]
 
             self._origins[id_].update(document)
 
             if id_ not in self._origin_ids:
                 self._origin_ids.append(id_)
 
     def origin_search(
         self,
         *,
+        query: str = "",
         url_pattern: Optional[str] = None,
         metadata_pattern: Optional[str] = None,
         with_visit: bool = False,
         visit_types: Optional[List[str]] = None,
         min_nb_visits: int = 0,
         min_last_visit_date: str = "",
         min_last_eventful_visit_date: str = "",
         min_last_revision_date: str = "",
         min_last_release_date: str = "",
         min_date_created: str = "",
         min_date_modified: str = "",
         min_date_published: str = "",
         programming_languages: Optional[List[str]] = None,
         licenses: Optional[List[str]] = None,
         keywords: Optional[List[str]] = None,
         sort_by: Optional[List[str]] = None,
         page_token: Optional[str] = None,
         limit: int = 50,
     ) -> PagedResult[MinimalOriginDict]:
         hits: Iterator[Dict[str, Any]] = (
             self._origins[id_]
             for id_ in self._origin_ids
             if not self._origins[id_].get("blocklisted")
         )
 
         if url_pattern:
             tokens = set(self._url_splitter.split(url_pattern))
 
             def predicate(match):
                 missing_tokens = tokens - match["_url_tokens"]
                 if len(missing_tokens) == 0:
                     return True
                 elif len(missing_tokens) > 1:
                     return False
                 else:
                     # There is one missing token, look up by prefix.
                     (missing_token,) = missing_tokens
                     return any(
                         token.startswith(missing_token)
                         for token in match["_url_tokens"]
                     )
 
             hits = filter(predicate, hits)
 
         if metadata_pattern:
             metadata_pattern_words = set(
                 _words_regexp.findall(metadata_pattern.lower())
             )
 
             def predicate(match):
                 if "intrinsic_metadata" not in match:
                     return False
 
                 return metadata_pattern_words.issubset(
                     _dict_words_set(match["intrinsic_metadata"])
                 )
 
             hits = filter(predicate, hits)
 
         if not url_pattern and not metadata_pattern:
             raise ValueError(
                 "At least one of url_pattern and metadata_pattern must be provided."
             )
 
         next_page_token: Optional[str] = None
 
         if with_visit:
             hits = filter(lambda o: o.get("has_visits"), hits)
         if min_nb_visits:
             hits = filter(lambda o: o.get("nb_visits", 0) >= min_nb_visits, hits)
         if min_last_visit_date:
             hits = filter(
                 lambda o: datetime.fromisoformat(
                     o.get("last_visit_date", "0001-01-01T00:00:00Z").replace(
                         "Z", "+00:00"
                     )
                 )
                 >= datetime.fromisoformat(min_last_visit_date),
                 hits,
             )
 
         if min_last_eventful_visit_date:
             hits = filter(
                 lambda o: datetime.fromisoformat(
                     o.get("last_eventful_visit_date", "0001-01-01T00:00:00Z").replace(
                         "Z", "+00:00"
                     )
                 )
                 >= datetime.fromisoformat(min_last_eventful_visit_date),
                 hits,
             )
 
         if min_last_revision_date:
             hits = filter(
                 lambda o: datetime.fromisoformat(
                     o.get("last_revision_date", "0001-01-01T00:00:00Z").replace(
                         "Z", "+00:00"
                     )
                 )
                 >= datetime.fromisoformat(min_last_revision_date),
                 hits,
             )
         if min_last_release_date:
             hits = filter(
                 lambda o: datetime.fromisoformat(
                     o.get("last_release_date", "0001-01-01T00:00:00Z").replace(
                         "Z", "+00:00"
                     )
                 )
                 >= datetime.fromisoformat(min_last_release_date),
                 hits,
             )
 
         if min_date_created:
             min_date_created_obj = datetime.strptime(min_date_created, "%Y-%m-%d")
             hits = filter(
                 lambda o: datetime.strptime(
                     _nested_get(o, get_expansion("date_created"))[0], "%Y-%m-%d"
                 )
                 >= min_date_created_obj,
                 hits,
             )
         if min_date_modified:
             min_date_modified_obj = datetime.strptime(min_date_modified, "%Y-%m-%d")
             hits = filter(
                 lambda o: datetime.strptime(
                     _nested_get(o, get_expansion("date_modified"))[0], "%Y-%m-%d"
                 )
                 >= min_date_modified_obj,
                 hits,
             )
         if min_date_published:
             min_date_published_obj = datetime.strptime(min_date_published, "%Y-%m-%d")
             hits = filter(
                 lambda o: datetime.strptime(
                     _nested_get(o, get_expansion("date_published"))[0], "%Y-%m-%d"
                 )
                 >= min_date_published_obj,
                 hits,
             )
 
         if licenses:
             queried_licenses = [license_keyword.lower() for license_keyword in licenses]
             hits = filter(
                 lambda o: any(
                     # If any of the queried licenses are found, include the origin
                     any(
                         # returns True if queried_license_keyword is found
                         # in any of the licenses of the origin
                         queried_license_keyword in origin_license
                         for origin_license in _nested_get(o, get_expansion("licenses"))
                     )
                     for queried_license_keyword in queried_licenses
                 ),
                 hits,
             )
         if programming_languages:
             queried_programming_languages = [
                 lang_keyword.lower() for lang_keyword in programming_languages
             ]
             hits = filter(
                 lambda o: any(
                     # If any of the queried languages are found, include the origin
                     any(
                         # returns True if queried_lang_keyword is found
                         # in any of the langs of the origin
                         queried_lang_keyword in origin_lang
                         for origin_lang in _nested_get(
                             o, get_expansion("programming_languages")
                         )
                     )
                     for queried_lang_keyword in queried_programming_languages
                 ),
                 hits,
             )
         if keywords:
 
             if sort_by:
                 sort_by.append("-score")
             else:
                 sort_by = ["-score"]
 
             from copy import deepcopy
 
             hits_list = deepcopy(list(hits))
 
             for origin in hits_list:
                 origin_keywords = [
                     _tokenize(keyword)
                     for keyword in _nested_get(origin, get_expansion("keywords"))
                 ]
                 origin_descriptions = [
                     _tokenize(description)
                     for description in _nested_get(
                         origin, get_expansion("descriptions")
                     )
                 ]
 
                 for q_keyword in keywords:
                     for origin_keyword_tokens in origin_keywords:
                         if q_keyword in origin_keyword_tokens:
                             origin["score"] = origin.get("score", 0) + 2
                     for origin_description_token in origin_descriptions:
                         if q_keyword in origin_description_token:
                             origin["score"] = origin.get("score", 0) + 1
 
             hits = (origin for origin in hits_list if origin.get("score", 0) > 0)
 
         if visit_types is not None:
             visit_types_set = set(visit_types)
             hits = filter(
                 lambda o: visit_types_set.intersection(o.get("visit_types", set())),
                 hits,
             )
 
         hits_list = list(hits)
         if sort_by:
             sort_by_list = list(sort_by)
             hits_list.sort(
                 key=lambda o: tuple(
                     _get_sorting_key(o, field) for field in sort_by_list
                 )
             )
 
         start_at_index = int(page_token) if page_token else 0
 
         origins = [
             {"url": hit["url"]}
             for hit in hits_list[start_at_index : start_at_index + limit]
         ]
 
         if len(origins) == limit:
             next_page_token = str(start_at_index + limit)
 
         assert len(origins) <= limit
 
         return PagedResult(results=origins, next_page_token=next_page_token,)
diff --git a/swh/search/interface.py b/swh/search/interface.py
index b2fb4ab..73bb3a2 100644
--- a/swh/search/interface.py
+++ b/swh/search/interface.py
@@ -1,133 +1,135 @@
 # Copyright (C) 2020-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from typing import Iterable, List, Optional, TypeVar
 
 from typing_extensions import TypedDict
 
 from swh.core.api import remote_api_endpoint
 from swh.core.api.classes import PagedResult as CorePagedResult
 
 TResult = TypeVar("TResult")
 PagedResult = CorePagedResult[TResult, str]
 
 SORT_BY_OPTIONS = [
     "nb_visits",
     "last_visit_date",
     "last_eventful_visit_date",
     "last_revision_date",
     "last_release_date",
     "date_created",
     "date_modified",
     "date_published",
 ]
 
 
 class MinimalOriginDict(TypedDict):
     """Mandatory keys of an :class:`OriginDict`"""
 
     url: str
 
 
 class OriginDict(MinimalOriginDict, total=False):
     """Argument passed to :meth:`SearchInterface.origin_update`."""
 
     visit_types: List[str]
     has_visits: bool
 
 
 class SearchInterface:
     @remote_api_endpoint("check")
     def check(self):
         """Dedicated method to execute some specific check per implementation.
 
         """
         ...
 
     @remote_api_endpoint("flush")
     def flush(self) -> None:
         """Blocks until all previous calls to _update() are completely
         applied.
 
         """
         ...
 
     @remote_api_endpoint("origin/update")
     def origin_update(self, documents: Iterable[OriginDict]) -> None:
         """Persist documents to the search backend.
 
         """
         ...
 
     @remote_api_endpoint("origin/search")
     def origin_search(
         self,
         *,
+        query: str = "",
         url_pattern: Optional[str] = None,
         metadata_pattern: Optional[str] = None,
         with_visit: bool = False,
         visit_types: Optional[List[str]] = None,
         min_nb_visits: int = 0,
         min_last_visit_date: str = "",
         min_last_eventful_visit_date: str = "",
         min_last_revision_date: str = "",
         min_last_release_date: str = "",
         min_date_created: str = "",
         min_date_modified: str = "",
         min_date_published: str = "",
         programming_languages: Optional[List[str]] = None,
         licenses: Optional[List[str]] = None,
         keywords: Optional[List[str]] = None,
         sort_by: Optional[List[str]] = None,
         page_token: Optional[str] = None,
         limit: int = 50,
     ) -> PagedResult[MinimalOriginDict]:
         """Searches for origins matching the `url_pattern`.
 
         Args:
+            query: Find origins according the queries written as per the
+                swh-search query language syntax.
             url_pattern: Part of the URL to search for
             metadata_pattern: Keywords to look for
             (across all the fields of intrinsic_metadata)
-            with_visit: Whether origins with no visit are to be
-              filtered out
+            with_visit: Whether origins with no visits are to be filtered out
             visit_types: Only origins having any of the provided visit types
                 (e.g. git, svn, pypi) will be returned
             min_nb_visits: Filter origins that have number of visits >=
                 the provided value
             min_last_visit_date: Filter origins that have
                 last_visit_date on or after the provided date(ISO format)
             min_last_eventful_visit_date: Filter origins that have
                 last_eventful_visit_date (eventful = snapshot_id changed)
                 on or after the provided date(ISO format)
             min_last_revision_date: Filter origins that have
                 last_revision_date on or after the provided date(ISO format)
             min_last_release_date: Filter origins that have
                 last_release_date on or after the provided date(ISO format)
             min_date_created: Filter origins that have date_created
                 from intrinsic_metadata on or after the provided date
             min_date_modified: Filter origins that have date_modified
                 from intrinsic_metadata on or after the provided date
             min_date_published: Filter origins that have date_published
                 from intrinsic_metadata on or after the provided date
             programming_languages: Filter origins with programming languages
                 present in the given list (based on instrinsic_metadata)
             licenses: Filter origins with licenses present in the given list
                 (based on instrinsic_metadata)
             keywords: Filter origins having description/keywords
                 (extracted from instrinsic_metadata) that match given values
             sort_by: Sort results based on a list of fields mentioned in SORT_BY_OPTIONS
                 (nb_visits,last_visit_date, last_eventful_visit_date,
                 last_revision_date, last_release_date).
                 Return results in descending order if "-" is present at the beginning
                 otherwise in ascending order.
             page_token: Opaque value used for pagination
             limit: number of results to return
 
         Returns:
             PagedResult of origin dicts matching the search criteria. If next_page_token
             is None, there is no longer data to retrieve.
 
         """
         ...
diff --git a/swh/search/journal_client.py b/swh/search/journal_client.py
index 95922f6..2884e10 100644
--- a/swh/search/journal_client.py
+++ b/swh/search/journal_client.py
@@ -1,120 +1,131 @@
 # Copyright (C) 2018-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import logging
+import sys
+from typing import Dict, Optional
 
 from swh.model.model import TargetType
 from swh.storage.algos.snapshot import snapshot_get_all_branches
+from swh.storage.interface import StorageInterface
 
 EXPECTED_MESSAGE_TYPES = {
     "origin",
-    "origin_visit",
     "origin_visit_status",
     "origin_intrinsic_metadata",
 }
 
 
-def fetch_last_revision_release_date(snapshot_id, storage):
+def fetch_last_revision_release_date(
+    snapshot_id: bytes, storage: StorageInterface
+) -> Dict[str, str]:
+    if "pytest" not in sys.modules:
+        # FIXME: This function is too slow to be reasonably used in the journal-client
+        # (at least the main one), we need to figure out a solution before this can
+        # be enabled again.
+        return {}
+
     if not snapshot_id:
         return {}
 
-    branches = snapshot_get_all_branches(storage, snapshot_id).branches.values()
+    snapshot = snapshot_get_all_branches(storage, snapshot_id)
+    if not snapshot:
+        return {}
+
+    branches = snapshot.branches.values()
 
     tip_revision_ids = []
     tip_release_ids = []
 
     for branch in branches:
         if branch.target_type == TargetType.REVISION:
             tip_revision_ids.append(branch.target)
         elif branch.target_type == TargetType.RELEASE:
             tip_release_ids.append(branch.target)
 
     revision_datetimes = [
         revision.date.to_datetime()
         for revision in storage.revision_get(tip_revision_ids)
+        if revision and revision.date
     ]
 
     release_datetimes = [
-        release.date.to_datetime() for release in storage.release_get(tip_release_ids)
+        release.date.to_datetime()
+        for release in storage.release_get(tip_release_ids)
+        if release and release.date
     ]
 
-    return {
-        "last_revision_date": max(revision_datetimes).isoformat(),
-        "last_release_date": max(release_datetimes).isoformat(),
-    }
+    ret = {}
+    if revision_datetimes:
+        ret["last_revision_date"] = max(revision_datetimes).isoformat()
+    if release_datetimes:
+        ret["last_release_date"] = max(release_datetimes).isoformat()
+
+    return ret
 
 
 def process_journal_objects(messages, *, search, storage=None):
     """Worker function for `JournalClient.process(worker_fn)`, after
     currification of `scheduler` and `task_names`."""
     assert set(messages) <= EXPECTED_MESSAGE_TYPES, set(messages)
 
     if "origin" in messages:
         process_origins(messages["origin"], search)
 
-    if "origin_visit" in messages:
-        process_origin_visits(messages["origin_visit"], search)
-
     if "origin_visit_status" in messages:
         process_origin_visit_statuses(messages["origin_visit_status"], search, storage)
 
     if "origin_intrinsic_metadata" in messages:
         process_origin_intrinsic_metadata(messages["origin_intrinsic_metadata"], search)
 
 
 def process_origins(origins, search):
     logging.debug("processing origins %r", origins)
 
     search.origin_update(origins)
 
 
-def process_origin_visits(visits, search):
-    logging.debug("processing origin visits %r", visits)
-
-    search.origin_update(
-        [
-            {
-                "url": (
-                    visit["origin"]
-                    if isinstance(visit["origin"], str)
-                    else visit["origin"]["url"]
-                ),
-                "visit_types": [visit["type"]],
-            }
-            for visit in visits
-        ]
-    )
-
-
 def process_origin_visit_statuses(visit_statuses, search, storage):
     logging.debug("processing origin visit statuses %r", visit_statuses)
 
-    full_visit_status = [
-        {
+    def hexify(b: Optional[bytes]) -> Optional[str]:
+        if b is None:
+            return None
+        return b.hex()
+
+    processed_visit_statuses = []
+    for visit_status in visit_statuses:
+        processed_status = {
             "url": visit_status["origin"],
-            "has_visits": True,
-            "nb_visits": visit_status["visit"],
-            "snapshot_id": visit_status.get("snapshot"),
-            "last_visit_date": visit_status["date"].isoformat(),
-            "last_eventful_visit_date": visit_status["date"].isoformat(),
-            **fetch_last_revision_release_date(visit_status.get("snapshot"), storage),
+            "visit_types": [visit_status["type"]],
         }
-        for visit_status in visit_statuses
-        if visit_status["status"] == "full"
-    ]
-
-    if full_visit_status:
-        search.origin_update(full_visit_status)
+        if visit_status["status"] == "full":
+            processed_status.update(
+                {
+                    "has_visits": True,
+                    "nb_visits": visit_status["visit"],
+                    "snapshot_id": hexify(visit_status.get("snapshot")),
+                    "last_visit_date": visit_status["date"].isoformat(),
+                    "last_eventful_visit_date": visit_status["date"].isoformat(),
+                    **fetch_last_revision_release_date(
+                        visit_status.get("snapshot"), storage
+                    ),
+                }
+            )
+        processed_visit_statuses.append(processed_status)
+
+    if processed_visit_statuses:
+        search.origin_update(processed_visit_statuses)
 
 
 def process_origin_intrinsic_metadata(origin_metadata, search):
     logging.debug("processing origin intrinsic_metadata %r", origin_metadata)
 
     origin_metadata = [
         {"url": item["id"], "intrinsic_metadata": item["metadata"],}
         for item in origin_metadata
     ]
 
     search.origin_update(origin_metadata)
diff --git a/swh/search/static/swh_ql.so b/swh/search/static/swh_ql.so
new file mode 100755
index 0000000..e193991
Binary files /dev/null and b/swh/search/static/swh_ql.so differ
diff --git a/swh/search/static/swh_ql.wasm b/swh/search/static/swh_ql.wasm
new file mode 100755
index 0000000..a5f111b
Binary files /dev/null and b/swh/search/static/swh_ql.wasm differ
diff --git a/swh/search/tests/test_cli.py b/swh/search/tests/test_cli.py
index 136c2a7..d4d2392 100644
--- a/swh/search/tests/test_cli.py
+++ b/swh/search/tests/test_cli.py
@@ -1,444 +1,397 @@
 # Copyright (C) 2019-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import copy
 from datetime import datetime, timezone
 import tempfile
 
 from click.testing import CliRunner
 from confluent_kafka import Producer
 import pytest
 import yaml
 
 from swh.journal.serializers import value_to_kafka
 from swh.model.hashutil import hash_to_bytes
 from swh.search import get_search
 from swh.search.cli import search_cli_group
 
 CLI_CONFIG = """
 search:
     cls: elasticsearch
     hosts:
     - '%(elasticsearch_host)s'
     indexes:
         origin:
             index: test
             read_alias: test-read
             write_alias: test-write
 storage:
     cls: memory
 """
 
 JOURNAL_OBJECTS_CONFIG_TEMPLATE = """
 journal:
     brokers:
         - {broker}
     prefix: {prefix}
     group_id: {group_id}
 """
 
 
 def invoke(catch_exceptions, args, config="", *, elasticsearch_host):
     runner = CliRunner()
     with tempfile.NamedTemporaryFile("a", suffix=".yml") as config_fd:
         config_fd.write(
             (CLI_CONFIG + config) % {"elasticsearch_host": elasticsearch_host}
         )
         config_fd.seek(0)
         result = runner.invoke(search_cli_group, ["-C" + config_fd.name] + args)
     if not catch_exceptions and result.exception:
         print(result.output)
         raise result.exception
     return result
 
 
 def test__journal_client__origin(
     swh_search, elasticsearch_host: str, kafka_prefix: str, kafka_server
 ):
     """Tests the re-indexing when origin_batch_size*task_batch_size is a
     divisor of nb_origins."""
     producer = Producer(
         {
             "bootstrap.servers": kafka_server,
             "client.id": "test search origin producer",
             "acks": "all",
         }
     )
     origin_foobar_baz = {
         "url": "http://foobar.baz",
     }
     value = value_to_kafka(origin_foobar_baz)
     topic = f"{kafka_prefix}.origin"
     producer.produce(topic=topic, key=b"bogus-origin", value=value)
+    producer.flush()
 
     journal_objects_config = JOURNAL_OBJECTS_CONFIG_TEMPLATE.format(
         broker=kafka_server, prefix=kafka_prefix, group_id="test-consumer"
     )
     result = invoke(
         False,
         [
             "journal-client",
             "objects",
             "--stop-after-objects",
             "1",
             "--object-type",
             "origin",
             "--prefix",
             kafka_prefix,
         ],
         journal_objects_config,
         elasticsearch_host=elasticsearch_host,
     )
 
     # Check the output
     expected_output = "Processed 1 messages.\nDone.\n"
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     swh_search.flush()
 
     # searching origin without visit as requirement
     actual_page = swh_search.origin_search(url_pattern="foobar")
     # We find it
     assert actual_page.next_page_token is None
     assert actual_page.results == [origin_foobar_baz]
 
     # It's an origin with no visit, searching for it with visit
     actual_page = swh_search.origin_search(url_pattern="foobar", with_visit=True)
     # returns nothing
     assert actual_page.next_page_token is None
     assert actual_page.results == []
 
 
-def test__journal_client__origin_visit(
-    swh_search, elasticsearch_host, kafka_prefix: str, kafka_server
-):
-    """Tests the re-indexing when origin_batch_size*task_batch_size is a
-    divisor of nb_origins."""
-    origin_foobar = {"url": "http://baz.foobar"}
-    producer = Producer(
-        {
-            "bootstrap.servers": kafka_server,
-            "client.id": "test search origin visit producer",
-            "acks": "all",
-        }
-    )
-    topic = f"{kafka_prefix}.origin_visit"
-    value = value_to_kafka({"origin": origin_foobar["url"], "type": "git"})
-    producer.produce(topic=topic, key=b"bogus-origin-visit", value=value)
-
-    journal_objects_config = JOURNAL_OBJECTS_CONFIG_TEMPLATE.format(
-        broker=kafka_server, prefix=kafka_prefix, group_id="test-consumer"
-    )
-    result = invoke(
-        False,
-        [
-            "journal-client",
-            "objects",
-            "--stop-after-objects",
-            "1",
-            "--object-type",
-            "origin_visit",
-        ],
-        journal_objects_config,
-        elasticsearch_host=elasticsearch_host,
-    )
-
-    # Check the output
-    expected_output = "Processed 1 messages.\nDone.\n"
-    assert result.exit_code == 0, result.output
-    assert result.output == expected_output
-
-    swh_search.flush()
-
-    actual_page = swh_search.origin_search(url_pattern="foobar", with_visit=False)
-    assert actual_page.next_page_token is None
-    assert actual_page.results == [origin_foobar]
-
-    # Not considered visited unless the visit is full
-    actual_page = swh_search.origin_search(url_pattern="foobar", with_visit=True)
-    assert actual_page.next_page_token is None
-    assert actual_page.results == []
-
-
 def test__journal_client__origin_visit_status(
     swh_search, elasticsearch_host, kafka_prefix: str, kafka_server
 ):
     """Subscribing to origin-visit-status should result in swh-search indexation
 
     """
     origin_foobar = {"url": "http://baz.foobar"}
     producer = Producer(
         {
             "bootstrap.servers": kafka_server,
             "client.id": "test search origin visit status producer",
             "acks": "all",
         }
     )
     topic = f"{kafka_prefix}.origin_visit_status"
     value = value_to_kafka(
         {
             "origin": origin_foobar["url"],
             "visit": 1,
+            "type": "git",
             "date": datetime.now(tz=timezone.utc),
             "snapshot": None,
             "status": "full",
         }
     )
     producer.produce(topic=topic, key=b"bogus-origin-visit-status", value=value)
+    producer.flush()
 
     journal_objects_config = JOURNAL_OBJECTS_CONFIG_TEMPLATE.format(
         broker=kafka_server, prefix=kafka_prefix, group_id="test-consumer"
     )
     result = invoke(
         False,
         [
             "journal-client",
             "objects",
             "--stop-after-objects",
             "1",
             "--prefix",
             kafka_prefix,
             "--object-type",
             "origin_visit_status",
         ],
         journal_objects_config,
         elasticsearch_host=elasticsearch_host,
     )
 
     # Check the output
     expected_output = "Processed 1 messages.\nDone.\n"
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     swh_search.flush()
 
     # Both search returns the visit
     actual_page = swh_search.origin_search(url_pattern="foobar", with_visit=False)
     assert actual_page.next_page_token is None
     assert actual_page.results == [origin_foobar]
 
     actual_page = swh_search.origin_search(url_pattern="foobar", with_visit=True)
     assert actual_page.next_page_token is None
     assert actual_page.results == [origin_foobar]
 
 
 def test__journal_client__origin_intrinsic_metadata(
     swh_search, elasticsearch_host, kafka_prefix: str, kafka_server
 ):
     """Subscribing to origin-intrinsic-metadata should result in swh-search indexation
 
     """
     origin_foobar = {"url": "https://github.com/clojure/clojure"}
 
     origin_intrinsic_metadata = {
         "id": origin_foobar["url"],
         "metadata": {
             "name": "clojure",
             "type": "SoftwareSourceCode",
             "license": "http://opensource.org/licenses/eclipse-1.0.php",
             "version": "1.10.2-master-SNAPSHOT",
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "identifier": "org.clojure",
             "description": "Clojure core environment and runtime library.",
             "codeRepository": "https://repo.maven.apache.org/maven2/org/clojure/clojure",  # noqa
         },
         "indexer_configuration_id": 1,
         "from_revision": hash_to_bytes("f47c139e20970ee0852166f48ee2a4626632b86e"),
         "mappings": ["maven"],
     }
 
     producer = Producer(
         {
             "bootstrap.servers": kafka_server,
             "client.id": "test search origin intrinsic metadata producer",
             "acks": "all",
         }
     )
     topic = f"{kafka_prefix}.origin_intrinsic_metadata"
     value = value_to_kafka(origin_intrinsic_metadata)
     producer.produce(topic=topic, key=b"bogus-origin-intrinsic-metadata", value=value)
+    producer.flush()
 
     journal_objects_config = JOURNAL_OBJECTS_CONFIG_TEMPLATE.format(
         broker=kafka_server, prefix=kafka_prefix, group_id="test-consumer"
     )
     result = invoke(
         False,
         [
             "journal-client",
             "objects",
             "--stop-after-objects",
             "1",
             "--object-type",
             "origin_intrinsic_metadata",
         ],
         journal_objects_config,
         elasticsearch_host=elasticsearch_host,
     )
 
     # Check the output
     expected_output = "Processed 1 messages.\nDone.\n"
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     swh_search.flush()
 
     # search without visit returns the metadata
     actual_page = swh_search.origin_search(url_pattern="clojure", with_visit=False)
     assert actual_page.next_page_token is None
     assert actual_page.results == [origin_foobar]
 
     # no visit associated so it does not return anything
     actual_page = swh_search.origin_search(url_pattern="clojure", with_visit=True)
     assert actual_page.next_page_token is None
     assert actual_page.results == []
 
 
 def test__journal_client__missing_main_journal_config_key(elasticsearch_host):
     """Missing configuration on journal should raise"""
     with pytest.raises(KeyError, match="journal"):
         invoke(
             catch_exceptions=False,
             args=["journal-client", "objects", "--stop-after-objects", "1",],
             config="",  # missing config will make it raise
             elasticsearch_host=elasticsearch_host,
         )
 
 
 def test__journal_client__missing_journal_config_keys(elasticsearch_host):
     """Missing configuration on mandatory journal keys should raise"""
     kafka_prefix = "swh.journal.objects"
     journal_objects_config = JOURNAL_OBJECTS_CONFIG_TEMPLATE.format(
         broker="192.0.2.1", prefix=kafka_prefix, group_id="test-consumer"
     )
     journal_config = yaml.safe_load(journal_objects_config)
 
     for key in journal_config["journal"].keys():
         if key == "prefix":  # optional
             continue
         cfg = copy.deepcopy(journal_config)
         del cfg["journal"][key]  # make config incomplete
         yaml_cfg = yaml.dump(cfg)
 
         with pytest.raises(TypeError, match=f"{key}"):
             invoke(
                 catch_exceptions=False,
                 args=[
                     "journal-client",
                     "objects",
                     "--stop-after-objects",
                     "1",
                     "--prefix",
                     kafka_prefix,
                     "--object-type",
                     "origin_visit_status",
                 ],
                 config=yaml_cfg,  # incomplete config will make the cli raise
                 elasticsearch_host=elasticsearch_host,
             )
 
 
 def test__journal_client__missing_prefix_config_key(
     swh_search, elasticsearch_host, kafka_server
 ):
     """Missing configuration on mandatory prefix key should raise"""
 
     journal_cfg_template = """
 journal:
     brokers:
         - {broker}
     group_id: {group_id}
     """
 
     journal_cfg = journal_cfg_template.format(
         broker=kafka_server, group_id="test-consumer"
     )
 
     with pytest.raises(ValueError, match="prefix"):
         invoke(
             False,
             # Missing --prefix (and no config key) will make the cli raise
             [
                 "journal-client",
                 "objects",
                 "--stop-after-objects",
                 "1",
                 "--object-type",
                 "origin_visit_status",
             ],
             journal_cfg,
             elasticsearch_host=elasticsearch_host,
         )
 
 
 def test__journal_client__missing_object_types_config_key(
     swh_search, elasticsearch_host, kafka_server
 ):
     """Missing configuration on mandatory object-types key should raise"""
 
     journal_cfg_template = """
 journal:
     brokers:
         - {broker}
     prefix: swh.journal.objects
     group_id: {group_id}
     """
 
     journal_cfg = journal_cfg_template.format(
         broker=kafka_server, group_id="test-consumer"
     )
 
     with pytest.raises(ValueError, match="object_types"):
         invoke(
             False,
             # Missing --object-types (and no config key) will make the cli raise
             ["journal-client", "objects", "--stop-after-objects", "1"],
             journal_cfg,
             elasticsearch_host=elasticsearch_host,
         )
 
 
 def test__initialize__with_index_name(elasticsearch_host):
     """Initializing the index with an index name should create the right index"""
 
     search = get_search(
         "elasticsearch",
         hosts=[elasticsearch_host],
         indexes={"origin": {"index": "test"}},
     )
 
     assert search._get_origin_index() == "test"
     assert search._get_origin_read_alias() == "origin-read"
     assert search._get_origin_write_alias() == "origin-write"
 
 
 def test__initialize__with_read_alias(elasticsearch_host):
     """Initializing the index with a search alias name should create
        the right search alias"""
 
     search = get_search(
         "elasticsearch",
         hosts=[elasticsearch_host],
         indexes={"origin": {"read_alias": "test"}},
     )
 
     assert search._get_origin_index() == "origin"
     assert search._get_origin_read_alias() == "test"
     assert search._get_origin_write_alias() == "origin-write"
 
 
 def test__initialize__with_write_alias(elasticsearch_host):
     """Initializing the index with an indexing alias name should create
        the right indexing alias"""
 
     search = get_search(
         "elasticsearch",
         hosts=[elasticsearch_host],
         indexes={"origin": {"write_alias": "test"}},
     )
 
     assert search._get_origin_index() == "origin"
     assert search._get_origin_read_alias() == "origin-read"
     assert search._get_origin_write_alias() == "test"
diff --git a/swh/search/tests/test_elasticsearch.py b/swh/search/tests/test_elasticsearch.py
index c59e173..943f9ed 100644
--- a/swh/search/tests/test_elasticsearch.py
+++ b/swh/search/tests/test_elasticsearch.py
@@ -1,119 +1,167 @@
 # Copyright (C) 2019-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+from datetime import datetime, timedelta, timezone
 from textwrap import dedent
 import types
 import unittest
 
 from elasticsearch.helpers.errors import BulkIndexError
 import pytest
 
 from swh.search.metrics import OPERATIONS_METRIC
 
 from .test_search import CommonSearchTest
 
 
 class BaseElasticsearchTest(unittest.TestCase):
     @pytest.fixture(autouse=True)
     def _instantiate_search(self, swh_search, elasticsearch_host, mocker):
         self._elasticsearch_host = elasticsearch_host
         self.search = swh_search
         self.mocker = mocker
 
         # override self.search.origin_update to catch painless script errors
         # and pretty print them
         origin_update = self.search.origin_update
 
         def _origin_update(self, *args, **kwargs):
             script_error = False
             error_detail = ""
             try:
                 origin_update(*args, **kwargs)
             except BulkIndexError as e:
                 error = e.errors[0].get("update", {}).get("error", {}).get("caused_by")
                 if error and "script_stack" in error:
                     script_error = True
                     error_detail = dedent(
                         f"""
                         Painless update script failed ({error.get('reason')}).
                         error type: {error.get('caused_by', {}).get('type')}
                         error reason: {error.get('caused_by', {}).get('reason')}
                         script stack:
 
                         """
                     )
                     error_detail += "\n".join(error["script_stack"])
                 else:
                     raise e
             assert script_error is False, error_detail[1:]
 
         self.search.origin_update = types.MethodType(_origin_update, self.search)
 
     def reset(self):
         self.search.deinitialize()
         self.search.initialize()
 
 
 class TestElasticsearchSearch(CommonSearchTest, BaseElasticsearchTest):
     def test_metrics_update_duration(self):
         mock = self.mocker.patch("swh.search.metrics.statsd.timing")
 
         for url in ["http://foobar.bar", "http://foobar.baz"]:
             self.search.origin_update([{"url": url}])
 
         assert mock.call_count == 2
 
     def test_metrics_search_duration(self):
         mock = self.mocker.patch("swh.search.metrics.statsd.timing")
 
         for url_pattern in ["foobar", "foobaz"]:
             self.search.origin_search(url_pattern=url_pattern, with_visit=True)
 
         assert mock.call_count == 2
 
     def test_metrics_indexation_counters(self):
         mock_es = self.mocker.patch("elasticsearch.helpers.bulk")
         mock_es.return_value = 2, ["error"]
 
         mock_metrics = self.mocker.patch("swh.search.metrics.statsd.increment")
 
         self.search.origin_update([{"url": "http://foobar.baz"}])
 
         assert mock_metrics.call_count == 2
 
         mock_metrics.assert_any_call(
             OPERATIONS_METRIC,
             2,
             tags={
                 "endpoint": "origin_update",
                 "object_type": "document",
                 "operation": "index",
             },
         )
         mock_metrics.assert_any_call(
             OPERATIONS_METRIC,
             1,
             tags={
                 "endpoint": "origin_update",
                 "object_type": "document",
                 "operation": "index_error",
             },
         )
 
     def test_write_alias_usage(self):
         mock = self.mocker.patch("elasticsearch.helpers.bulk")
         mock.return_value = 2, ["result"]
 
         self.search.origin_update([{"url": "http://foobar.baz"}])
 
         assert mock.call_args[1]["index"] == "test-write"
 
     def test_read_alias_usage(self):
         mock = self.mocker.patch("elasticsearch.Elasticsearch.search")
         mock.return_value = {"hits": {"hits": []}}
 
         self.search.origin_search(url_pattern="foobar.baz")
 
         assert mock.call_args[1]["index"] == "test-read"
+
+    def test_sort_by_and_limit_query(self):
+        now = datetime.now(tz=timezone.utc).isoformat()
+        now_minus_5_hours = (
+            datetime.now(tz=timezone.utc) - timedelta(hours=5)
+        ).isoformat()
+        now_plus_5_hours = (
+            datetime.now(tz=timezone.utc) + timedelta(hours=5)
+        ).isoformat()
+
+        ORIGINS = [
+            {
+                "url": "http://foobar.1.com",
+                "nb_visits": 1,
+                "last_visit_date": now_minus_5_hours,
+                "last_eventful_visit_date": now_minus_5_hours,
+            },
+            {
+                "url": "http://foobar.2.com",
+                "nb_visits": 2,
+                "last_visit_date": now,
+                "last_eventful_visit_date": now,
+            },
+            {
+                "url": "http://foobar.3.com",
+                "nb_visits": 3,
+                "last_visit_date": now_plus_5_hours,
+                "last_eventful_visit_date": now_minus_5_hours,
+            },
+        ]
+
+        self.search.origin_update(ORIGINS)
+        self.search.flush()
+
+        def _check_results(query, origin_indices):
+            page = self.search.origin_search(url_pattern="foobar", query=query)
+            results = [r["url"] for r in page.results]
+            assert results == [ORIGINS[index]["url"] for index in origin_indices]
+
+        _check_results("sort_by = [-visits]", [2, 1, 0])
+        _check_results("sort_by = [last_visit]", [0, 1, 2])
+        _check_results("sort_by = [-last_eventful_visit, visits]", [1, 0, 2])
+        _check_results("sort_by = [last_eventful_visit,-last_visit]", [2, 0, 1])
+
+        _check_results("sort_by = [-visits] limit = 1", [2])
+        _check_results("sort_by = [last_visit] and limit = 2", [0, 1])
+        _check_results("sort_by = [-last_eventful_visit, visits] limit = 3", [1, 0, 2])
diff --git a/swh/search/tests/test_journal_client.py b/swh/search/tests/test_journal_client.py
index fd353eb..c225c55 100644
--- a/swh/search/tests/test_journal_client.py
+++ b/swh/search/tests/test_journal_client.py
@@ -1,276 +1,300 @@
 # Copyright (C) 2019-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from datetime import datetime, timezone
 import functools
 from unittest.mock import MagicMock
 
+import pytest
+
 from swh.model.model import (
     ObjectType,
     Person,
     Release,
     Revision,
     RevisionType,
     Snapshot,
     SnapshotBranch,
     TargetType,
     Timestamp,
     TimestampWithTimezone,
     hash_to_bytes,
 )
-from swh.search.journal_client import process_journal_objects
+from swh.search.journal_client import (
+    fetch_last_revision_release_date,
+    process_journal_objects,
+)
 from swh.storage import get_storage
 
 DATES = [
     TimestampWithTimezone(
         timestamp=Timestamp(seconds=1234567891, microseconds=0,),
         offset=120,
         negative_utc=False,
     ),
     TimestampWithTimezone(
         timestamp=Timestamp(seconds=1234567892, microseconds=0,),
         offset=120,
         negative_utc=False,
     ),
     TimestampWithTimezone(
         timestamp=Timestamp(seconds=1234567893, microseconds=0,),
         offset=120,
         negative_utc=False,
     ),
     TimestampWithTimezone(
         timestamp=Timestamp(seconds=1234567894, microseconds=0,),
         offset=120,
         negative_utc=False,
     ),
 ]
 
 COMMITTERS = [
     Person(fullname=b"foo", name=b"foo", email=b""),
     Person(fullname=b"bar", name=b"bar", email=b""),
 ]
 
 REVISIONS = [
     Revision(
         message=b"revision_1_message",
         date=DATES[0],
         committer=COMMITTERS[0],
         author=COMMITTERS[0],
         committer_date=DATES[0],
         type=RevisionType.GIT,
         directory=b"\x01" * 20,
         synthetic=False,
         metadata=None,
         parents=(
             hash_to_bytes("9b918dd063cec85c2bc63cc7f167e29f5894dcbc"),
             hash_to_bytes("757f38bdcd8473aaa12df55357f5e2f1a318e672"),
         ),
     ),
     Revision(
         message=b"revision_2_message",
         date=DATES[1],
         committer=COMMITTERS[1],
         author=COMMITTERS[1],
         committer_date=DATES[1],
         type=RevisionType.MERCURIAL,
         directory=b"\x02" * 20,
         synthetic=False,
         metadata=None,
         parents=(),
         extra_headers=((b"foo", b"bar"),),
     ),
     Revision(
         message=b"revision_3_message",
         date=DATES[2],
         committer=COMMITTERS[0],
         author=COMMITTERS[0],
         committer_date=DATES[2],
         type=RevisionType.GIT,
         directory=b"\x03" * 20,
         synthetic=False,
         metadata=None,
         parents=(),
     ),
 ]
 
 RELEASES = [
     Release(
         name=b"v0.0.1",
         date=DATES[1],
         author=COMMITTERS[0],
         target_type=ObjectType.REVISION,
         target=b"\x04" * 20,
         message=b"foo",
         synthetic=False,
     ),
     Release(
         name=b"v0.0.2",
         date=DATES[2],
         author=COMMITTERS[1],
         target_type=ObjectType.REVISION,
         target=b"\x05" * 20,
         message=b"bar",
         synthetic=False,
     ),
     Release(
         name=b"v0.0.3",
         date=DATES[3],
         author=COMMITTERS[1],
         target_type=ObjectType.REVISION,
         target=b"\x05" * 20,
         message=b"foobar",
         synthetic=False,
     ),
 ]
 
 SNAPSHOTS = [
     Snapshot(
         branches={
             b"target/revision1": SnapshotBranch(
                 target_type=TargetType.REVISION, target=REVISIONS[0].id,
             ),
             b"target/revision2": SnapshotBranch(
                 target_type=TargetType.REVISION, target=REVISIONS[1].id,
             ),
             b"target/revision3": SnapshotBranch(
                 target_type=TargetType.REVISION, target=REVISIONS[2].id,
             ),
             b"target/release1": SnapshotBranch(
                 target_type=TargetType.RELEASE, target=RELEASES[0].id
             ),
             b"target/release2": SnapshotBranch(
                 target_type=TargetType.RELEASE, target=RELEASES[1].id
             ),
             b"target/release3": SnapshotBranch(
                 target_type=TargetType.RELEASE, target=RELEASES[2].id
             ),
             b"target/alias": SnapshotBranch(
                 target_type=TargetType.ALIAS, target=b"target/revision1"
             ),
         },
     ),
+    Snapshot(
+        branches={
+            b"target/revision1": SnapshotBranch(
+                target_type=TargetType.REVISION, target=REVISIONS[0].id,
+            )
+        },
+    ),
+    Snapshot(
+        branches={
+            b"target/release1": SnapshotBranch(
+                target_type=TargetType.RELEASE, target=RELEASES[0].id
+            )
+        },
+    ),
+    Snapshot(branches={}),
 ]
 
 
+@pytest.fixture
+def storage():
+    storage = get_storage("memory")
+
+    storage.revision_add(REVISIONS)
+    storage.release_add(RELEASES)
+    storage.snapshot_add(SNAPSHOTS)
+    return storage
+
+
 def test_journal_client_origin_from_journal():
     search_mock = MagicMock()
 
     worker_fn = functools.partial(process_journal_objects, search=search_mock,)
 
     worker_fn({"origin": [{"url": "http://foobar.baz"},]})
     search_mock.origin_update.assert_called_once_with(
         [{"url": "http://foobar.baz"},]
     )
 
     search_mock.reset_mock()
 
     worker_fn({"origin": [{"url": "http://foobar.baz"}, {"url": "http://barbaz.qux"},]})
     search_mock.origin_update.assert_called_once_with(
         [{"url": "http://foobar.baz"}, {"url": "http://barbaz.qux"},]
     )
 
 
-def test_journal_client_origin_visit_from_journal():
+def test_journal_client_origin_visit_status_from_journal(storage):
     search_mock = MagicMock()
 
-    worker_fn = functools.partial(process_journal_objects, search=search_mock,)
-
-    worker_fn({"origin_visit": [{"origin": "http://foobar.baz", "type": "git"},]})
-    search_mock.origin_update.assert_called_once_with(
-        [{"url": "http://foobar.baz", "visit_types": ["git"]},]
-    )
-
-
-def test_journal_client_origin_visit_status_from_journal():
-    search_mock = MagicMock()
-    storage = get_storage("memory")
-
-    storage.revision_add(REVISIONS)
-    storage.release_add(RELEASES)
-    storage.snapshot_add(SNAPSHOTS)
-
     worker_fn = functools.partial(
         process_journal_objects, search=search_mock, storage=storage
     )
     current_datetime = datetime.now(tz=timezone.utc)
 
     worker_fn(
         {
             "origin_visit_status": [
                 {
                     "origin": "http://foobar.baz",
                     "status": "full",
+                    "type": "git",
                     "visit": 5,
                     "date": current_datetime,
                     "snapshot": SNAPSHOTS[0].id,
                 }  # full visits ok
             ]
         }
     )
     search_mock.origin_update.assert_called_once_with(
         [
             {
                 "url": "http://foobar.baz",
+                "visit_types": ["git"],
                 "has_visits": True,
                 "nb_visits": 5,
-                "snapshot_id": SNAPSHOTS[0].id,
+                "snapshot_id": SNAPSHOTS[0].id.hex(),
                 "last_visit_date": current_datetime.isoformat(),
                 "last_eventful_visit_date": current_datetime.isoformat(),
                 "last_revision_date": "2009-02-14T01:31:33+02:00",
                 "last_release_date": "2009-02-14T01:31:34+02:00",
             },
         ]
     )
 
     search_mock.reset_mock()
 
-    # non-full visits are filtered out
+    # non-full visits only set the visit_types attribute
     worker_fn(
         {
             "origin_visit_status": [
                 {
                     "origin": "http://foobar.baz",
+                    "type": "git",
                     "status": "partial",
                     "visit": 5,
                     "date": current_datetime,
                 }
             ]
         }
     )
-    search_mock.origin_update.assert_not_called()
+    search_mock.origin_update.assert_called_once_with(
+        [{"url": "http://foobar.baz", "visit_types": ["git"]}]
+    )
 
 
 def test_journal_client_origin_metadata_from_journal():
     search_mock = MagicMock()
 
     worker_fn = functools.partial(process_journal_objects, search=search_mock,)
 
     worker_fn(
         {
             "origin_intrinsic_metadata": [
                 {
                     "id": "http://foobar.baz",
                     "metadata": {
                         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                         "description": "foo bar",
                         "programmingLanguage": "python",
                         "license": "MIT",
                     },
                 },
             ]
         }
     )
     search_mock.origin_update.assert_called_once_with(
         [
             {
                 "url": "http://foobar.baz",
                 "intrinsic_metadata": {
                     "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                     "description": "foo bar",
                     "programmingLanguage": "python",
                     "license": "MIT",
                 },
             },
         ]
     )
+
+
+def test_fetch_last_revision_release_date(storage):
+    for snapshot in SNAPSHOTS:
+        assert fetch_last_revision_release_date(snapshot.id, storage) is not None
diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py
index b5bfa06..3655445 100644
--- a/swh/search/tests/test_search.py
+++ b/swh/search/tests/test_search.py
@@ -1,1153 +1,1168 @@
 # Copyright (C) 2019-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from datetime import datetime, timedelta, timezone
 from itertools import permutations
 
 from hypothesis import given, settings, strategies
 import pytest
 
 from swh.core.api.classes import stream_results
 
 
 class CommonSearchTest:
     def test_origin_url_unique_word_prefix(self):
         origin_foobar_baz = {"url": "http://foobar.baz"}
         origin_barbaz_qux = {"url": "http://barbaz.qux"}
         origin_qux_quux = {"url": "http://qux.quux"}
         origins = [origin_foobar_baz, origin_barbaz_qux, origin_qux_quux]
 
         self.search.origin_update(origins)
         self.search.flush()
 
         actual_page = self.search.origin_search(url_pattern="foobar")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin_foobar_baz]
 
         actual_page = self.search.origin_search(url_pattern="barb")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin_barbaz_qux]
 
         # 'bar' is part of 'foobar', but is not the beginning of it
         actual_page = self.search.origin_search(url_pattern="bar")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin_barbaz_qux]
 
         actual_page = self.search.origin_search(url_pattern="barbaz")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin_barbaz_qux]
 
     def test_origin_url_unique_word_prefix_multiple_results(self):
         origin_foobar_baz = {"url": "http://foobar.baz"}
         origin_barbaz_qux = {"url": "http://barbaz.qux"}
         origin_qux_quux = {"url": "http://qux.quux"}
 
         self.search.origin_update(
             [origin_foobar_baz, origin_barbaz_qux, origin_qux_quux]
         )
         self.search.flush()
 
         actual_page = self.search.origin_search(url_pattern="qu")
         assert actual_page.next_page_token is None
         results = [r["url"] for r in actual_page.results]
         expected_results = [o["url"] for o in [origin_qux_quux, origin_barbaz_qux]]
         assert sorted(results) == sorted(expected_results)
 
         actual_page = self.search.origin_search(url_pattern="qux")
         assert actual_page.next_page_token is None
         results = [r["url"] for r in actual_page.results]
         expected_results = [o["url"] for o in [origin_qux_quux, origin_barbaz_qux]]
         assert sorted(results) == sorted(expected_results)
 
     def test_origin_url_all_terms(self):
         origin_foo_bar_baz = {"url": "http://foo.bar/baz"}
         origin_foo_bar_foo_bar = {"url": "http://foo.bar/foo.bar"}
         origins = [origin_foo_bar_baz, origin_foo_bar_foo_bar]
 
         self.search.origin_update(origins)
         self.search.flush()
 
         # Only results containing all terms should be returned.
         actual_page = self.search.origin_search(url_pattern="foo bar baz")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin_foo_bar_baz]
 
     def test_origin_with_visit(self):
         origin_foobar_baz = {"url": "http://foobar/baz"}
 
         self.search.origin_update(
             [{**o, "has_visits": True} for o in [origin_foobar_baz]]
         )
         self.search.flush()
 
         actual_page = self.search.origin_search(url_pattern="foobar", with_visit=True)
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin_foobar_baz]
 
     def test_origin_with_visit_added(self):
         origin_foobar_baz = {"url": "http://foobar.baz"}
 
         self.search.origin_update([origin_foobar_baz])
         self.search.flush()
 
         actual_page = self.search.origin_search(url_pattern="foobar", with_visit=True)
         assert actual_page.next_page_token is None
         assert actual_page.results == []
 
         self.search.origin_update(
             [{**o, "has_visits": True} for o in [origin_foobar_baz]]
         )
         self.search.flush()
 
         actual_page = self.search.origin_search(url_pattern="foobar", with_visit=True)
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin_foobar_baz]
 
     def test_origin_no_visit_types_search(self):
         origins = [{"url": "http://foobar.baz"}]
 
         self.search.origin_update(origins)
         self.search.flush()
 
         actual_page = self.search.origin_search(url_pattern="http", visit_types=["git"])
         assert actual_page.next_page_token is None
         results = [r["url"] for r in actual_page.results]
         expected_results = []
         assert sorted(results) == sorted(expected_results)
 
         actual_page = self.search.origin_search(url_pattern="http", visit_types=None)
         assert actual_page.next_page_token is None
         results = [r["url"] for r in actual_page.results]
         expected_results = [origin["url"] for origin in origins]
         assert sorted(results) == sorted(expected_results)
 
     def test_origin_visit_types_search(self):
         origins = [
             {"url": "http://foobar.baz", "visit_types": ["git"]},
             {"url": "http://barbaz.qux", "visit_types": ["svn"]},
             {"url": "http://qux.quux", "visit_types": ["hg"]},
         ]
 
         self.search.origin_update(origins)
         self.search.flush()
 
         for origin in origins:
             actual_page = self.search.origin_search(
                 url_pattern="http", visit_types=origin["visit_types"]
             )
             assert actual_page.next_page_token is None
             results = [r["url"] for r in actual_page.results]
             expected_results = [origin["url"]]
             assert sorted(results) == sorted(expected_results)
 
         actual_page = self.search.origin_search(url_pattern="http", visit_types=None)
         assert actual_page.next_page_token is None
         results = [r["url"] for r in actual_page.results]
         expected_results = [origin["url"] for origin in origins]
         assert sorted(results) == sorted(expected_results)
 
     def test_origin_visit_types_update_search(self):
         origin_url = "http://foobar.baz"
         self.search.origin_update([{"url": origin_url}])
         self.search.flush()
 
         def _add_visit_type(visit_type):
             self.search.origin_update(
                 [{"url": origin_url, "visit_types": [visit_type]}]
             )
             self.search.flush()
 
         def _check_visit_types(visit_types_list):
             for visit_types in visit_types_list:
                 actual_page = self.search.origin_search(
                     url_pattern="http", visit_types=visit_types
                 )
                 assert actual_page.next_page_token is None
                 results = [r["url"] for r in actual_page.results]
                 expected_results = [origin_url]
                 assert sorted(results) == sorted(expected_results)
 
         _add_visit_type("git")
         _check_visit_types([["git"], ["git", "hg"]])
 
         _add_visit_type("svn")
         _check_visit_types([["git"], ["svn"], ["svn", "git"], ["git", "hg", "svn"]])
 
         _add_visit_type("hg")
         _check_visit_types(
             [
                 ["git"],
                 ["svn"],
                 ["hg"],
                 ["svn", "git"],
                 ["hg", "git"],
                 ["hg", "svn"],
                 ["git", "hg", "svn"],
             ]
         )
 
     def test_origin_nb_visits_update_search(self):
         origin_url = "http://foobar.baz"
         self.search.origin_update([{"url": origin_url}])
         self.search.flush()
 
         def _update_nb_visits(nb_visits):
             self.search.origin_update([{"url": origin_url, "nb_visits": nb_visits}])
             self.search.flush()
 
         def _check_min_nb_visits(min_nb_visits):
             actual_page = self.search.origin_search(
                 url_pattern=origin_url, min_nb_visits=min_nb_visits,
             )
             assert actual_page.next_page_token is None
             results = [r["url"] for r in actual_page.results]
             expected_results = [origin_url]
             assert sorted(results) == sorted(expected_results)
 
         _update_nb_visits(2)
         _check_min_nb_visits(2)  # Works for = 2
         _check_min_nb_visits(1)  # Works for < 2
 
         with pytest.raises(AssertionError):
             _check_min_nb_visits(
                 5
             )  # No results for nb_visits >= 5 (should throw error)
 
         _update_nb_visits(5)
         _check_min_nb_visits(5)  # Works for = 5
         _check_min_nb_visits(3)  # Works for < 5
 
     def test_origin_last_visit_date_update_search(self):
         origin_url = "http://foobar.baz"
         self.search.origin_update([{"url": origin_url}])
         self.search.flush()
 
         def _update_last_visit_date(last_visit_date):
             self.search.origin_update(
                 [{"url": origin_url, "last_visit_date": last_visit_date}]
             )
             self.search.flush()
 
         def _check_min_last_visit_date(min_last_visit_date):
             actual_page = self.search.origin_search(
                 url_pattern=origin_url, min_last_visit_date=min_last_visit_date,
             )
             assert actual_page.next_page_token is None
             results = [r["url"] for r in actual_page.results]
             expected_results = [origin_url]
             assert sorted(results) == sorted(expected_results)
 
         now = datetime.now(tz=timezone.utc).isoformat()
         now_minus_5_hours = (
             datetime.now(tz=timezone.utc) - timedelta(hours=5)
         ).isoformat()
         now_plus_5_hours = (
             datetime.now(tz=timezone.utc) + timedelta(hours=5)
         ).isoformat()
 
         _update_last_visit_date(now)
 
         _check_min_last_visit_date(now)  # Works for =
         _check_min_last_visit_date(now_minus_5_hours)  # Works for <
         with pytest.raises(AssertionError):
             _check_min_last_visit_date(now_plus_5_hours)  # Fails for >
 
         _update_last_visit_date(now_plus_5_hours)
 
         _check_min_last_visit_date(now_plus_5_hours)  # Works for =
         _check_min_last_visit_date(now)  # Works for <
 
     def test_journal_client_origin_visit_status_permutation(self):
         NOW = datetime.now(tz=timezone.utc).isoformat()
         NOW_MINUS_5_HOURS = (
             datetime.now(tz=timezone.utc) - timedelta(hours=5)
         ).isoformat()
         NOW_PLUS_5_HOURS = (
             datetime.now(tz=timezone.utc) + timedelta(hours=5)
         ).isoformat()
 
         VISIT_STATUSES = [
             {
                 "url": "http://foobar.baz",
                 "snapshot_id": "SNAPSHOT_1",
                 "last_eventful_visit_date": NOW,
             },
             {
                 "url": "http://foobar.baz",
                 "snapshot_id": "SNAPSHOT_1",
                 "last_eventful_visit_date": NOW_MINUS_5_HOURS,
             },
             {
                 "url": "http://foobar.baz",
                 "snapshot_id": "SNAPSHOT_2",
                 "last_eventful_visit_date": NOW_PLUS_5_HOURS,
             },
         ]
 
         for visit_statuses in permutations(VISIT_STATUSES, len(VISIT_STATUSES)):
             self.search.origin_update(visit_statuses)
             self.search.flush()
             origin_url = "http://foobar.baz"
             actual_page = self.search.origin_search(
                 url_pattern=origin_url, min_last_eventful_visit_date=NOW_PLUS_5_HOURS,
             )
             assert actual_page.next_page_token is None
             results = [r["url"] for r in actual_page.results]
             expected_results = [origin_url]
             assert sorted(results) == sorted(expected_results)
 
             self.reset()
 
     def test_origin_last_eventful_visit_date_update_search(self):
         origin_url = "http://foobar.baz"
         self.search.origin_update([{"url": origin_url}])
         self.search.flush()
 
         def _update_last_eventful_visit_date(snapshot_id, last_eventful_visit_date):
             self.search.origin_update(
                 [
                     {
                         "url": origin_url,
                         "snapshot_id": snapshot_id,
                         "last_eventful_visit_date": last_eventful_visit_date,
                     }
                 ]
             )
             self.search.flush()
 
         def _check_min_last_eventful_visit_date(min_last_eventful_visit_date):
             actual_page = self.search.origin_search(
                 url_pattern=origin_url,
                 min_last_eventful_visit_date=min_last_eventful_visit_date,
             )
             assert actual_page.next_page_token is None
             results = [r["url"] for r in actual_page.results]
             expected_results = [origin_url]
             assert sorted(results) == sorted(expected_results)
 
         now = datetime.now(tz=timezone.utc).isoformat()
         now_minus_5_hours = (
             datetime.now(tz=timezone.utc) - timedelta(hours=5)
         ).isoformat()
         now_plus_5_hours = (
             datetime.now(tz=timezone.utc) + timedelta(hours=5)
         ).isoformat()
 
         snapshot_1 = "SNAPSHOT_1"
         snapshot_2 = "SNAPSHOT_2"
 
         _update_last_eventful_visit_date(snapshot_1, now)
 
         _check_min_last_eventful_visit_date(now)  # Works for =
         _check_min_last_eventful_visit_date(now_minus_5_hours)  # Works for <
         with pytest.raises(AssertionError):
             _check_min_last_eventful_visit_date(now_plus_5_hours)  # Fails for >
 
         _update_last_eventful_visit_date(
             snapshot_1, now_plus_5_hours
         )  # Revisit(not eventful) same origin
 
         _check_min_last_eventful_visit_date(
             now
         )  # Should remain the same because recent visit wasn't eventful
         with pytest.raises(AssertionError):
             _check_min_last_eventful_visit_date(now_plus_5_hours)
 
         _update_last_eventful_visit_date(
             snapshot_2, now_plus_5_hours
         )  # Revisit(eventful) same origin
         _check_min_last_eventful_visit_date(now_plus_5_hours)  # Works for =
         _check_min_last_eventful_visit_date(now)  # Works for <
 
     def _test_origin_last_revision_release_date_update_search(self, date_type):
         origin_url = "http://foobar.baz"
         self.search.origin_update([{"url": origin_url}])
         self.search.flush()
 
         def _update_last_revision_release_date(date):
             self.search.origin_update([{"url": origin_url, date_type: date,}])
             self.search.flush()
 
         def _check_min_last_revision_release_date(date):
             actual_page = self.search.origin_search(
                 url_pattern=origin_url, **{f"min_{date_type}": date},
             )
             assert actual_page.next_page_token is None
             results = [r["url"] for r in actual_page.results]
             expected_results = [origin_url]
             assert sorted(results) == sorted(expected_results)
 
         now = datetime.now(tz=timezone.utc).isoformat()
         now_minus_5_hours = (
             datetime.now(tz=timezone.utc) - timedelta(hours=5)
         ).isoformat()
         now_plus_5_hours = (
             datetime.now(tz=timezone.utc) + timedelta(hours=5)
         ).isoformat()
 
         _update_last_revision_release_date(now)
 
         _check_min_last_revision_release_date(now)
         _check_min_last_revision_release_date(now_minus_5_hours)
         with pytest.raises(AssertionError):
             _check_min_last_revision_release_date(now_plus_5_hours)
 
         _update_last_revision_release_date(now_plus_5_hours)
 
         _check_min_last_revision_release_date(now_plus_5_hours)
         _check_min_last_revision_release_date(now)
 
     def test_origin_last_revision_date_update_search(self):
         self._test_origin_last_revision_release_date_update_search(
             date_type="last_revision_date"
         )
 
     def test_origin_last_release_date_update_search(self):
         self._test_origin_last_revision_release_date_update_search(
             date_type="last_revision_date"
         )
 
     def test_origin_instrinsic_metadata_dates_filter_sorting_search(self):
 
         DATE_0 = "1999-06-28"
         DATE_1 = "2001-02-13"
         DATE_2 = "2005-10-02"
 
         ORIGINS = [
             {
                 "url": "http://foobar.0.com",
                 "intrinsic_metadata": {
                     "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                     "dateCreated": DATE_0,
                     "dateModified": DATE_1,
                     "datePublished": DATE_2,
                 },
             },
             {
                 "url": "http://foobar.1.com",
                 "intrinsic_metadata": {
                     "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                     "dateCreated": DATE_1,
                     "dateModified": DATE_2,
                     "datePublished": DATE_2,
                 },
             },
             {
                 "url": "http://foobar.2.com",
                 "intrinsic_metadata": {
                     "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                     "dateCreated": DATE_2,
                     "dateModified": DATE_2,
                     "datePublished": DATE_2,
                 },
             },
         ]
         self.search.origin_update(ORIGINS)
         self.search.flush()
 
         def _check_results(origin_indices, sort_results=True, **kwargs):
             page = self.search.origin_search(url_pattern="foobar", **kwargs)
             results = [r["url"] for r in page.results]
             if sort_results:
                 assert sorted(results) == sorted(
                     [ORIGINS[index]["url"] for index in origin_indices]
                 )
             else:
                 assert results == [ORIGINS[index]["url"] for index in origin_indices]
 
         _check_results(min_date_created=DATE_0, origin_indices=[0, 1, 2])
         _check_results(min_date_created=DATE_1, origin_indices=[1, 2])
         _check_results(min_date_created=DATE_2, origin_indices=[2])
 
         _check_results(min_date_modified=DATE_0, origin_indices=[0, 1, 2])
         _check_results(min_date_modified=DATE_1, origin_indices=[0, 1, 2])
         _check_results(min_date_modified=DATE_2, origin_indices=[1, 2])
 
         _check_results(min_date_published=DATE_0, origin_indices=[0, 1, 2])
         _check_results(min_date_published=DATE_1, origin_indices=[0, 1, 2])
         _check_results(min_date_published=DATE_2, origin_indices=[0, 1, 2])
 
         # Sorting
         _check_results(
             sort_by=["-date_created"], origin_indices=[2, 1, 0], sort_results=False
         )
         _check_results(
             sort_by=["date_created"], origin_indices=[0, 1, 2], sort_results=False
         )
 
     def test_origin_keywords_search(self):
         ORIGINS = [
             {
                 "url": "http://foobar.1.com",
                 "intrinsic_metadata": {
                     "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                     "description": "Django is a backend framework for applications",
                     "keywords": "django,backend,server,web,framework",
                 },
             },
             {
                 "url": "http://foobar.2.com",
                 "intrinsic_metadata": {
                     "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                     "description": "Native Android applications are fast",
                     "keywords": "android,mobile,ui",
                 },
             },
             {
                 "url": "http://foobar.3.com",
                 "intrinsic_metadata": {
                     "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                     "description": "React framework helps you build web applications",
                     "keywords": "react,web,ui",
                 },
             },
         ]
         self.search.origin_update(ORIGINS)
         self.search.flush()
 
         def _check_results(keywords, origin_indices, sorting=False):
             page = self.search.origin_search(url_pattern="foobar", keywords=keywords)
             results = [r["url"] for r in page.results]
             if sorting:
                 assert sorted(results) == sorted(
                     [ORIGINS[index]["url"] for index in origin_indices]
                 )
             else:
                 assert results == [ORIGINS[index]["url"] for index in origin_indices]
 
         _check_results(["build"], [2])
 
         _check_results(["web"], [2, 0])
         _check_results(["ui"], [1, 2])
 
         # Following tests ensure that boosts work properly
 
         # Baseline: "applications" is common in all origin descriptions
         _check_results(["applications"], [1, 0, 2], True)
 
         # ORIGINS[0] has 'framework' in: keyword + description
         # ORIGINS[2] has 'framework' in: description
         # ORIGINS[1] has 'framework' in: None
         _check_results(["framework", "applications"], [0, 2, 1])
 
         # ORIGINS[1] has 'ui' in: keyword
         # ORIGINS[1] has 'ui' in: keyword
         # ORIGINS[0] has 'ui' in: None
         _check_results(["applications", "ui"], [1, 2, 0])
 
         # ORIGINS[2] has 'web' in: keyword + description
         # ORIGINS[0] has 'web' in: keyword
         # ORIGINS[1] has 'web' in: None
         _check_results(["web", "applications"], [2, 0, 1])
 
     def test_origin_sort_by_search(self):
 
         now = datetime.now(tz=timezone.utc).isoformat()
         now_minus_5_hours = (
             datetime.now(tz=timezone.utc) - timedelta(hours=5)
         ).isoformat()
         now_plus_5_hours = (
             datetime.now(tz=timezone.utc) + timedelta(hours=5)
         ).isoformat()
 
         ORIGINS = [
             {
                 "url": "http://foobar.1.com",
                 "nb_visits": 1,
                 "last_visit_date": now_minus_5_hours,
             },
             {"url": "http://foobar.2.com", "nb_visits": 2, "last_visit_date": now,},
             {
                 "url": "http://foobar.3.com",
                 "nb_visits": 3,
                 "last_visit_date": now_plus_5_hours,
             },
         ]
         self.search.origin_update(ORIGINS)
         self.search.flush()
 
         def _check_results(sort_by, origins):
             page = self.search.origin_search(url_pattern="foobar", sort_by=sort_by)
             results = [r["url"] for r in page.results]
             assert results == [origin["url"] for origin in origins]
 
         _check_results(["nb_visits"], ORIGINS)
         _check_results(["-nb_visits"], ORIGINS[::-1])
 
         _check_results(["last_visit_date"], ORIGINS)
         _check_results(["-last_visit_date"], ORIGINS[::-1])
 
         _check_results(["nb_visits", "-last_visit_date"], ORIGINS)
         _check_results(["-last_visit_date", "nb_visits"], ORIGINS[::-1])
 
     def test_origin_instrinsic_metadata_license_search(self):
         ORIGINS = [
             {
                 "url": "http://foobar.1.com",
                 "intrinsic_metadata": {
                     "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                     "description": "foo bar",
                     "license": "https://spdx.org/licenses/MIT",
                 },
             },
             {
                 "url": "http://foobar.2.com",
                 "intrinsic_metadata": {
                     "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                     "description": "foo bar",
                     "license": "BSD-3-Clause",
                 },
             },
         ]
         self.search.origin_update(ORIGINS)
         self.search.flush()
 
         def _check_results(licenses, origin_indices):
             page = self.search.origin_search(url_pattern="foobar", licenses=licenses)
             results = [r["url"] for r in page.results]
             assert sorted(results) == sorted(
                 [ORIGINS[i]["url"] for i in origin_indices]
             )
 
         _check_results(["MIT"], [0])
         _check_results(["bsd"], [1])
         _check_results(["mit", "3-Clause"], [0, 1])
 
     def test_origin_instrinsic_metadata_programming_language_search(self):
         ORIGINS = [
             {
                 "url": "http://foobar.1.com",
                 "intrinsic_metadata": {
                     "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                     "description": "foo bar",
                     "programmingLanguage": "python",
                 },
             },
             {
                 "url": "http://foobar.2.com",
                 "intrinsic_metadata": {
                     "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                     "description": "foo bar",
                     "programmingLanguage": "javascript",
                 },
             },
         ]
         self.search.origin_update(ORIGINS)
         self.search.flush()
 
         def _check_results(programming_languages, origin_indices):
             page = self.search.origin_search(
                 url_pattern="foobar", programming_languages=programming_languages
             )
             results = [r["url"] for r in page.results]
             assert sorted(results) == sorted(
                 [ORIGINS[i]["url"] for i in origin_indices]
             )
 
         _check_results(["python"], [0])
         _check_results(["javascript"], [1])
         _check_results(["python", "javascript"], [0, 1])
 
     def test_origin_instrinsic_metadata_multiple_field_search(self):
         ORIGINS = [
             {
                 "url": "http://foobar.1.com",
                 "intrinsic_metadata": {
                     "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                     "description": "foo bar 1",
                     "programmingLanguage": "python",
                     "license": "https://spdx.org/licenses/MIT",
                 },
             },
             {
                 "url": "http://foobar.2.com",
                 "intrinsic_metadata": {
                     "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                     "description": "foo bar 2",
                     "programmingLanguage": ["javascript", "html", "css"],
                     "license": [
                         "https://spdx.org/licenses/CC-BY-1.0",
                         "https://spdx.org/licenses/Apache-1.0",
                     ],
                 },
             },
             {
                 "url": "http://foobar.3.com",
                 "intrinsic_metadata": {
                     "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                     "description": "foo bar 3",
                     "programmingLanguage": ["Cpp", "c"],
                     "license": "https://spdx.org/licenses/LGPL-2.0-only",
                 },
             },
         ]
         self.search.origin_update(ORIGINS)
         self.search.flush()
 
         def _check_result(programming_languages, licenses, origin_indices):
             page = self.search.origin_search(
                 url_pattern="foobar",
                 programming_languages=programming_languages,
                 licenses=licenses,
             )
             results = [r["url"] for r in page.results]
             assert sorted(results) == sorted(
                 [ORIGINS[i]["url"] for i in origin_indices]
             )
 
         _check_result(["javascript"], ["CC"], [1])
         _check_result(["css"], ["CC"], [1])
         _check_result(["css"], ["CC", "apache"], [1])
 
         _check_result(["python", "javascript"], ["MIT"], [0])
 
         _check_result(["c", "python"], ["LGPL", "mit"], [2, 0])
 
     def test_origin_update_with_no_visit_types(self):
         """
         Update an origin with visit types first then with no visit types,
         check origin can still be searched with visit types afterwards.
         """
         origin_url = "http://foobar.baz"
         self.search.origin_update([{"url": origin_url, "visit_types": ["git"]}])
         self.search.flush()
 
         self.search.origin_update([{"url": origin_url}])
         self.search.flush()
 
         actual_page = self.search.origin_search(url_pattern="http", visit_types=["git"])
         assert actual_page.next_page_token is None
         results = [r["url"] for r in actual_page.results]
         expected_results = [origin_url]
         assert results == expected_results
 
     def test_origin_intrinsic_metadata_description(self):
         origin1_nothin = {"url": "http://origin1"}
         origin2_foobar = {"url": "http://origin2"}
         origin3_barbaz = {"url": "http://origin3"}
 
         self.search.origin_update(
             [
                 {**origin1_nothin, "intrinsic_metadata": {},},
                 {
                     **origin2_foobar,
                     "intrinsic_metadata": {
                         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                         "description": "foo bar",
                     },
                 },
                 {
                     **origin3_barbaz,
                     "intrinsic_metadata": {
                         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                         "description": "bar baz",
                     },
                 },
             ]
         )
         self.search.flush()
 
         actual_page = self.search.origin_search(metadata_pattern="foo")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin2_foobar]
 
         actual_page = self.search.origin_search(metadata_pattern="foo bar")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin2_foobar]
 
         actual_page = self.search.origin_search(metadata_pattern="bar baz")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin3_barbaz]
 
     def test_origin_intrinsic_metadata_all_terms(self):
         origin1_foobarfoobar = {"url": "http://origin1"}
         origin3_foobarbaz = {"url": "http://origin2"}
 
         self.search.origin_update(
             [
                 {
                     **origin1_foobarfoobar,
                     "intrinsic_metadata": {
                         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                         "description": "foo bar foo bar",
                     },
                 },
                 {
                     **origin3_foobarbaz,
                     "intrinsic_metadata": {
                         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                         "description": "foo bar baz",
                     },
                 },
             ]
         )
         self.search.flush()
 
         actual_page = self.search.origin_search(metadata_pattern="foo bar baz")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin3_foobarbaz]
 
     def test_origin_intrinsic_metadata_long_description(self):
         """Checks ElasticSearch does not try to store large values untokenize,
         which would be inefficient and crash it with:
 
         Document contains at least one immense term in field="intrinsic_metadata.http://schema.org/description.@value" (whose UTF8 encoding is longer than the max length 32766), all of which were skipped.
         """  # noqa
         origin1 = {"url": "http://origin1"}
 
         self.search.origin_update(
             [
                 {
                     **origin1,
                     "intrinsic_metadata": {
                         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                         "description": " ".join(f"foo{i}" for i in range(100000)),
                     },
                 },
             ]
         )
         self.search.flush()
 
         actual_page = self.search.origin_search(metadata_pattern="foo42")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin1]
 
     def test_origin_intrinsic_metadata_matches_cross_fields(self):
         """Checks the backend finds results even if the two words in the query are
         each in a different field."""
         origin1 = {"url": "http://origin1"}
 
         self.search.origin_update(
             [
                 {
                     **origin1,
                     "intrinsic_metadata": {
                         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                         "description": "foo bar",
                         "author": "John Doe",
                     },
                 },
             ]
         )
         self.search.flush()
 
         actual_page = self.search.origin_search(metadata_pattern="foo John")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin1]
 
     def test_origin_intrinsic_metadata_nested(self):
         origin1_nothin = {"url": "http://origin1"}
         origin2_foobar = {"url": "http://origin2"}
         origin3_barbaz = {"url": "http://origin3"}
 
         self.search.origin_update(
             [
                 {**origin1_nothin, "intrinsic_metadata": {},},
                 {
                     **origin2_foobar,
                     "intrinsic_metadata": {
                         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                         "keywords": ["foo", "bar"],
                     },
                 },
                 {
                     **origin3_barbaz,
                     "intrinsic_metadata": {
                         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                         "keywords": ["bar", "baz"],
                     },
                 },
             ]
         )
         self.search.flush()
 
         actual_page = self.search.origin_search(metadata_pattern="foo")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin2_foobar]
 
         actual_page = self.search.origin_search(metadata_pattern="foo bar")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin2_foobar]
 
         actual_page = self.search.origin_search(metadata_pattern="bar baz")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin3_barbaz]
 
     def test_origin_intrinsic_metadata_inconsistent_type(self):
         """Checks the same field can have a concrete value, an object, or an array
         in different documents."""
         origin1_foobar = {"url": "http://origin1"}
         origin2_barbaz = {"url": "http://origin2"}
         origin3_bazqux = {"url": "http://origin3"}
 
         self.search.origin_update(
             [
                 {
                     **origin1_foobar,
                     "intrinsic_metadata": {
                         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                         "author": {"familyName": "Foo", "givenName": "Bar",},
                     },
                 },
             ]
         )
         self.search.flush()
         self.search.origin_update(
             [
                 {
                     **origin2_barbaz,
                     "intrinsic_metadata": {
                         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                         "author": "Bar Baz",
                     },
                 },
                 {
                     **origin3_bazqux,
                     "intrinsic_metadata": {
                         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                         "author": ["Baz", "Qux"],
                     },
                 },
             ]
         )
         self.search.flush()
 
         actual_page = self.search.origin_search(metadata_pattern="bar")
         assert actual_page.next_page_token is None
         results = [r["url"] for r in actual_page.results]
         expected_results = [o["url"] for o in [origin2_barbaz, origin1_foobar]]
         assert sorted(results) == sorted(expected_results)
 
         actual_page = self.search.origin_search(metadata_pattern="baz")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin2_barbaz, origin3_bazqux]
 
         actual_page = self.search.origin_search(metadata_pattern="foo")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin1_foobar]
 
         actual_page = self.search.origin_search(metadata_pattern="bar baz")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin2_barbaz]
 
         actual_page = self.search.origin_search(metadata_pattern="qux")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin3_bazqux]
 
         actual_page = self.search.origin_search(metadata_pattern="baz qux")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin3_bazqux]
 
         actual_page = self.search.origin_search(metadata_pattern="foo bar")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin1_foobar]
 
     def test_origin_intrinsic_metadata_string_mapping(self):
         """Checks inserting a date-like in a field does not update the mapping to
         require every document uses a date in that field; or that search queries
         use a date either.
         Likewise for numeric and boolean fields."""
         origin1 = {"url": "http://origin1"}
         origin2 = {"url": "http://origin2"}
 
         self.search.origin_update(
             [
                 {
                     **origin1,
                     "intrinsic_metadata": {
                         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                         "dateCreated": "2021-02-18T10:16:52",
                         "version": "1.0",
                         "isAccessibleForFree": True,
                     },
                 }
             ]
         )
         self.search.flush()
         self.search.origin_update(
             [
                 {
                     **origin2,
                     "intrinsic_metadata": {
                         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                         "dateCreated": "a long time ago",
                         "address": "in a galaxy far, far away",
                         "version": "a new hope",
                         "isAccessibleForFree": "it depends",
                     },
                 },
             ]
         )
         self.search.flush()
 
         actual_page = self.search.origin_search(metadata_pattern="1.0")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin1]
 
         actual_page = self.search.origin_search(metadata_pattern="long")
         assert actual_page.next_page_token is None
         assert (
             actual_page.results == []
         )  # "%Y-%m-%d" not followed, so value is rejected
 
         actual_page = self.search.origin_search(metadata_pattern="true")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin1]
 
         actual_page = self.search.origin_search(metadata_pattern="it depends")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin2]
 
     def test_origin_intrinsic_metadata_update(self):
         origin = {"url": "http://origin1"}
         origin_data = {
             **origin,
             "intrinsic_metadata": {
                 "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                 "author": "John Doe",
             },
         }
 
         self.search.origin_update([origin_data])
         self.search.flush()
 
         actual_page = self.search.origin_search(metadata_pattern="John")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin]
 
         origin_data["intrinsic_metadata"]["author"] = "Jane Doe"
 
         self.search.origin_update([origin_data])
         self.search.flush()
 
         actual_page = self.search.origin_search(metadata_pattern="Jane")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin]
 
     # TODO: add more tests with more codemeta terms
 
     # TODO: add more tests with edge cases
 
     @settings(deadline=None)
     @given(strategies.integers(min_value=1, max_value=4))
     def test_origin_url_paging(self, limit):
         # TODO: no hypothesis
         origin1_foo = {"url": "http://origin1/foo"}
         origin2_foobar = {"url": "http://origin2/foo/bar"}
         origin3_foobarbaz = {"url": "http://origin3/foo/bar/baz"}
 
         self.reset()
         self.search.origin_update([origin1_foo, origin2_foobar, origin3_foobarbaz])
         self.search.flush()
 
         results = stream_results(
             self.search.origin_search, url_pattern="foo bar baz", limit=limit
         )
         results = [res["url"] for res in results]
         expected_results = [o["url"] for o in [origin3_foobarbaz]]
         assert sorted(results[0 : len(expected_results)]) == sorted(expected_results)
 
         results = stream_results(
             self.search.origin_search, url_pattern="foo bar", limit=limit
         )
         results = [res["url"] for res in results]
         expected_results = [o["url"] for o in [origin2_foobar, origin3_foobarbaz]]
         assert sorted(results[0 : len(expected_results)]) == sorted(expected_results)
 
         results = stream_results(
             self.search.origin_search, url_pattern="foo", limit=limit
         )
         results = [res["url"] for res in results]
         expected_results = [
             o["url"] for o in [origin1_foo, origin2_foobar, origin3_foobarbaz]
         ]
         assert sorted(results[0 : len(expected_results)]) == sorted(expected_results)
 
     @settings(deadline=None)
     @given(strategies.integers(min_value=1, max_value=4))
     def test_origin_intrinsic_metadata_paging(self, limit):
         # TODO: no hypothesis
         origin1_foo = {"url": "http://origin1"}
         origin2_foobar = {"url": "http://origin2"}
         origin3_foobarbaz = {"url": "http://origin3"}
 
         self.reset()
         self.search.origin_update(
             [
                 {
                     **origin1_foo,
                     "intrinsic_metadata": {
                         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                         "keywords": ["foo"],
                     },
                 },
                 {
                     **origin2_foobar,
                     "intrinsic_metadata": {
                         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                         "keywords": ["foo", "bar"],
                     },
                 },
                 {
                     **origin3_foobarbaz,
                     "intrinsic_metadata": {
                         "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                         "keywords": ["foo", "bar", "baz"],
                     },
                 },
             ]
         )
         self.search.flush()
 
         results = stream_results(
             self.search.origin_search, metadata_pattern="foo bar baz", limit=limit
         )
         assert list(results) == [origin3_foobarbaz]
 
         results = stream_results(
             self.search.origin_search, metadata_pattern="foo bar", limit=limit
         )
         assert list(results) == [origin2_foobar, origin3_foobarbaz]
 
         results = stream_results(
             self.search.origin_search, metadata_pattern="foo", limit=limit
         )
         assert list(results) == [origin1_foo, origin2_foobar, origin3_foobarbaz]
 
     def test_search_blocklisted_results(self):
         origin1 = {"url": "http://origin1"}
         origin2 = {"url": "http://origin2", "blocklisted": True}
 
         self.search.origin_update([origin1, origin2])
         self.search.flush()
 
         actual_page = self.search.origin_search(url_pattern="origin")
         assert actual_page.next_page_token is None
         assert actual_page.results == [origin1]
 
     def test_search_blocklisted_update(self):
         origin1 = {"url": "http://origin1"}
         self.search.origin_update([origin1])
         self.search.flush()
 
         result_page = self.search.origin_search(url_pattern="origin")
         assert result_page.next_page_token is None
         assert result_page.results == [origin1]
 
         self.search.origin_update([{**origin1, "blocklisted": True}])
         self.search.flush()
 
         result_page = self.search.origin_search(url_pattern="origin")
         assert result_page.next_page_token is None
         assert result_page.results == []
 
         self.search.origin_update(
             [{**origin1, "has_visits": True, "visit_types": ["git"]}]
         )
         self.search.flush()
 
         result_page = self.search.origin_search(url_pattern="origin")
         assert result_page.next_page_token is None
         assert result_page.results == []
+
+    def test_filter_keyword_in_filter(self):
+        origin1 = {
+            "url": "foo language in ['foo baz'] bar",
+        }
+        self.search.origin_update([origin1])
+        self.search.flush()
+
+        result_page = self.search.origin_search(url_pattern="language in ['foo bar']")
+        assert result_page.next_page_token is None
+        assert result_page.results == [origin1]
+
+        result_page = self.search.origin_search(url_pattern="baaz")
+        assert result_page.next_page_token is None
+        assert result_page.results == []
diff --git a/swh/search/tests/test_translator.py b/swh/search/tests/test_translator.py
new file mode 100644
index 0000000..9dcf018
--- /dev/null
+++ b/swh/search/tests/test_translator.py
@@ -0,0 +1,351 @@
+import pytest
+
+from swh.search.translator import Translator
+from swh.search.utils import get_expansion
+
+
+def _test_results(query, expected):
+    output = Translator().parse_query(query)
+    assert output == expected
+
+
+def test_empty_query():
+    query = ""
+    with pytest.raises(Exception):
+        _test_results(query, {})
+
+
+def test_conjunction_operators():
+    query = "visited = true or visits > 2 and visits < 5"
+    expected = {
+        "filters": {
+            "bool": {
+                "should": [
+                    {"term": {"has_visits": True}},
+                    {
+                        "bool": {
+                            "must": [
+                                {"range": {"nb_visits": {"gt": 2}}},
+                                {"range": {"nb_visits": {"lt": 5}}},
+                            ]
+                        }
+                    },
+                ]
+            }
+        }
+    }
+    _test_results(query, expected)
+
+
+def test_conjunction_op_precedence_override():
+    query = "(visited = false or visits > 2) and visits < 5"
+    expected = {
+        "filters": {
+            "bool": {
+                "must": [
+                    {
+                        "bool": {
+                            "should": [
+                                {"term": {"has_visits": False}},
+                                {"range": {"nb_visits": {"gt": 2}}},
+                            ]
+                        }
+                    },
+                    {"range": {"nb_visits": {"lt": 5}}},
+                ]
+            }
+        }
+    }
+
+    _test_results(query, expected)
+
+
+def test_limit_and_sortby():
+    query = "visited = true sort_by = [-visits,last_visit] limit = 15"
+    expected = {
+        "filters": {"term": {"has_visits": True}},
+        "sortBy": ["-visits", "last_visit"],
+        "limit": 15,
+    }
+
+    _test_results(query, expected)
+
+
+def test_deeply_nested_filters():
+    query = "(((visited = true and visits > 0)))"
+    expected = {
+        "filters": {
+            "bool": {
+                "must": [
+                    {"term": {"has_visits": True},},
+                    {"range": {"nb_visits": {"gt": 0}}},
+                ]
+            }
+        },
+    }
+
+    _test_results(query, expected)
+
+
+def test_origin_and_metadata_filters():
+    query = 'origin = django or metadata = "framework and web"'
+    expected = {
+        "filters": {
+            "bool": {
+                "should": [
+                    {
+                        "multi_match": {
+                            "query": "django",
+                            "type": "bool_prefix",
+                            "operator": "and",
+                            "fields": [
+                                "url.as_you_type",
+                                "url.as_you_type._2gram",
+                                "url.as_you_type._3gram",
+                            ],
+                        }
+                    },
+                    {
+                        "nested": {
+                            "path": "intrinsic_metadata",
+                            "query": {
+                                "multi_match": {
+                                    "query": "framework and web",
+                                    "type": "cross_fields",
+                                    "operator": "and",
+                                    "fields": ["intrinsic_metadata.*"],
+                                    "lenient": True,
+                                }
+                            },
+                        }
+                    },
+                ]
+            }
+        }
+    }
+
+    _test_results(query, expected)
+
+
+def test_visits_not_equal_to_filter():
+    query = "visits != 5"
+    expected = {
+        "filters": {
+            "bool": {"must_not": [{"range": {"nb_visits": {"gte": 5, "lte": 5}}},]}
+        },
+    }
+
+    _test_results(query, expected)
+
+
+def test_visit_type_filter():
+    query = 'visit_type = [git,"pypi"]'
+    expected = {"filters": {"terms": {"visit_types": ["git", "pypi"]}}}
+
+    _test_results(query, expected)
+
+
+def test_keyword_filter():
+    query = r"""keyword in [word1, "word2 \" \' word3"]"""
+    expected = {
+        "filters": {
+            "nested": {
+                "path": "intrinsic_metadata",
+                "query": {
+                    "multi_match": {
+                        "query": r"""word1 word2 " ' word3""",
+                        "fields": [
+                            get_expansion("keywords", ".") + "^2",
+                            get_expansion("descriptions", "."),
+                        ],
+                    }
+                },
+            }
+        }
+    }
+
+    _test_results(query, expected)
+
+
+def test_language_filter():
+    query = 'language in [python, "go lang", cpp]'
+    expected = {
+        "filters": {
+            "nested": {
+                "path": "intrinsic_metadata",
+                "query": {
+                    "bool": {
+                        "should": [
+                            {
+                                "match": {
+                                    get_expansion(
+                                        "programming_languages", "."
+                                    ): "python"
+                                }
+                            },
+                            {
+                                "match": {
+                                    get_expansion(
+                                        "programming_languages", "."
+                                    ): "go lang"
+                                }
+                            },
+                            {
+                                "match": {
+                                    get_expansion("programming_languages", "."): "cpp"
+                                }
+                            },
+                        ]
+                    }
+                },
+            }
+        }
+    }
+
+    _test_results(query, expected)
+
+
+def test_license_filter():
+    query = 'license in ["GPL 3", Apache, MIT]'
+    expected = {
+        "filters": {
+            "nested": {
+                "path": "intrinsic_metadata",
+                "query": {
+                    "bool": {
+                        "should": [
+                            {"match": {get_expansion("licenses", "."): "GPL 3"}},
+                            {"match": {get_expansion("licenses", "."): "Apache"}},
+                            {"match": {get_expansion("licenses", "."): "MIT"}},
+                        ]
+                    }
+                },
+            }
+        }
+    }
+
+    _test_results(query, expected)
+
+
+def test_date_created_not_equal_to_filter():
+    query = "created != 2020-01-01"
+    expected = {
+        "filters": {
+            "nested": {
+                "path": "intrinsic_metadata",
+                "query": {
+                    "bool": {
+                        "must_not": [
+                            {
+                                "range": {
+                                    get_expansion("date_created", "."): {
+                                        "gte": "2020-01-01",
+                                        "lte": "2020-01-01",
+                                    }
+                                }
+                            }
+                        ]
+                    }
+                },
+            }
+        }
+    }
+
+    _test_results(query, expected)
+
+
+def test_date_created_greater_than_filter():
+    query = "created >= 2020-01-01"
+    expected = {
+        "filters": {
+            "nested": {
+                "path": "intrinsic_metadata",
+                "query": {
+                    "bool": {
+                        "must": [
+                            {
+                                "range": {
+                                    get_expansion("date_created", "."): {
+                                        "gte": "2020-01-01",
+                                    }
+                                }
+                            }
+                        ]
+                    }
+                },
+            }
+        }
+    }
+
+    _test_results(query, expected)
+
+
+def test_last_eventful_visit_not_equal_to_filter():
+    query = "last_visit != 2020-01-01"
+    expected = {
+        "filters": {
+            "bool": {
+                "must_not": [
+                    {
+                        "range": {
+                            "last_visit_date": {
+                                "gte": "2020-01-01",
+                                "lte": "2020-01-01",
+                            }
+                        }
+                    }
+                ]
+            }
+        }
+    }
+
+    _test_results(query, expected)
+
+
+def test_last_eventful_visit_less_than_to_filter():
+    query = "last_visit < 2020-01-01"
+    expected = {"filters": {"range": {"last_visit_date": {"lt": "2020-01-01"}}}}
+
+    _test_results(query, expected)
+
+
+def test_keyword_no_escape_inside_filter():
+    # any keyword (filter name/operator/value) inside a filter
+    # must be considered a string.
+    query = r'''origin = "language in [\'go lang\', python]"'''
+    expected = {
+        "filters": {
+            "multi_match": {
+                "query": r"""language in ['go lang', python]""",
+                "type": "bool_prefix",
+                "operator": "and",
+                "fields": [
+                    "url.as_you_type",
+                    "url.as_you_type._2gram",
+                    "url.as_you_type._3gram",
+                ],
+            }
+        }
+    }
+    _test_results(query, expected)
+
+
+def test_escaped_punctutation_parsing():
+    query = r"""keyword in ["foo \'\" bar"]"""
+    expected = {
+        "filters": {
+            "nested": {
+                "path": "intrinsic_metadata",
+                "query": {
+                    "multi_match": {
+                        "query": r"""foo '" bar""",
+                        "fields": [
+                            get_expansion("keywords", ".") + "^2",
+                            get_expansion("descriptions", "."),
+                        ],
+                    }
+                },
+            }
+        }
+    }
+    _test_results(query, expected)
diff --git a/swh/search/translator.py b/swh/search/translator.py
new file mode 100644
index 0000000..9a607bf
--- /dev/null
+++ b/swh/search/translator.py
@@ -0,0 +1,301 @@
+import os
+
+from pkg_resources import resource_filename
+from tree_sitter import Language, Parser
+
+from swh.search.utils import get_expansion, unescape
+
+
+class Translator:
+
+    RANGE_OPERATOR_MAP = {
+        ">": "gt",
+        "<": "lt",
+        ">=": "gte",
+        "<=": "lte",
+    }
+
+    def __init__(self):
+        ql_rel_paths = [
+            "static/swh_ql.so",  # installed
+            "../../query_language/static/swh_ql.so",  # development
+        ]
+        for ql_rel_path in ql_rel_paths:
+            ql_path = resource_filename("swh.search", ql_rel_path)
+            if os.path.exists(ql_path):
+                break
+        else:
+            assert False, "swh_ql.so was not found in any of the expected paths"
+
+        search_ql = Language(ql_path, "swh_search_ql")
+
+        self.parser = Parser()
+        self.parser.set_language(search_ql)
+        self.query = ""
+
+    def parse_query(self, query):
+        self.query = query
+        tree = self.parser.parse(query.encode("utf8"))
+        self.query_node = tree.root_node
+
+        if self.query_node.has_error:
+            raise Exception("Invalid query")
+
+        return self._traverse(self.query_node)
+
+    def _traverse(self, node):
+        if len(node.children) == 3 and node.children[1].type == "filters":
+            # filters => ( filters )
+            return self._traverse(node.children[1])  # Go past the () brackets
+        if node.type == "query":
+            result = {}
+            for child in node.children:
+                # query => filters sort_by limit
+                result[child.type] = self._traverse(child)
+
+            return result
+
+        if node.type == "filters":
+            if len(node.children) == 1:
+                # query => filters
+                # filters => filters
+                # filters => filter
+                # Current node is just a wrapper, so go one level deep
+                return self._traverse(node.children[0])
+
+            if len(node.children) == 3:
+                # filters => filters conj_op filters
+                filters1 = self._traverse(node.children[0])
+                conj_op = self._get_value(node.children[1])
+                filters2 = self._traverse(node.children[2])
+
+                if conj_op == "and":
+                    # "must" is equivalent to "AND"
+                    return {"bool": {"must": [filters1, filters2]}}
+                if conj_op == "or":
+                    # "should" is equivalent to "OR"
+                    return {"bool": {"should": [filters1, filters2]}}
+
+        if node.type == "filter":
+            filter_category = node.children[0]
+            return self._parse_filter(filter_category)
+
+        if node.type == "sortBy":
+            return self._parse_filter(node)
+
+        if node.type == "limit":
+            return self._parse_filter(node)
+
+        return Exception(
+            f"Unknown node type ({node.type}) "
+            f"or unexpected number of children ({node.children})"
+        )
+
+    def _get_value(self, node):
+        if (
+            len(node.children) > 0
+            and node.children[0].type == "["
+            and node.children[-1].type == "]"
+        ):
+            # array
+            return [self._get_value(child) for child in node.children if child.is_named]
+
+        start = node.start_point[1]
+        end = node.end_point[1]
+
+        value = self.query[start:end]
+
+        if len(value) > 1 and (
+            (value[0] == "'" and value[-1] == "'") or (value[0] and value[-1] == '"')
+        ):
+            return unescape(value[1:-1])
+
+        if node.type in ["number", "numberVal"]:
+            return int(value)
+        return unescape(value)
+
+    def _parse_filter(self, filter):
+
+        if filter.type == "boundedListFilter":
+            filter = filter.children[0]
+
+        children = filter.children
+        assert len(children) == 3
+
+        category = filter.type
+        name, op, value = [self._get_value(child) for child in children]
+
+        if category == "patternFilter":
+            if name == "origin":
+                return {
+                    "multi_match": {
+                        "query": value,
+                        "type": "bool_prefix",
+                        "operator": "and",
+                        "fields": [
+                            "url.as_you_type",
+                            "url.as_you_type._2gram",
+                            "url.as_you_type._3gram",
+                        ],
+                    }
+                }
+            elif name == "metadata":
+                return {
+                    "nested": {
+                        "path": "intrinsic_metadata",
+                        "query": {
+                            "multi_match": {
+                                "query": value,
+                                # Makes it so that the "foo bar" query returns
+                                # documents which contain "foo" in a field and "bar"
+                                # in a different field
+                                "type": "cross_fields",
+                                # All keywords must be found in a document for it to
+                                # be considered a match.
+                                # TODO: allow missing keywords?
+                                "operator": "and",
+                                # Searches on all fields of the intrinsic_metadata dict,
+                                # recursively.
+                                "fields": ["intrinsic_metadata.*"],
+                                # date{Created,Modified,Published} are of type date
+                                "lenient": True,
+                            }
+                        },
+                    }
+                }
+
+        if category == "booleanFilter":
+            if name == "visited":
+                return {"term": {"has_visits": value == "true"}}
+
+        if category == "numericFilter":
+            if name == "visits":
+                if op in ["=", "!="]:
+                    return {
+                        "bool": {
+                            ("must" if op == "=" else "must_not"): [
+                                {"range": {"nb_visits": {"gte": value, "lte": value}}}
+                            ]
+                        }
+                    }
+                else:
+                    return {
+                        "range": {"nb_visits": {self.RANGE_OPERATOR_MAP[op]: value}}
+                    }
+
+        if category == "visitTypeFilter":
+            if name == "visit_type":
+                return {"terms": {"visit_types": value}}
+
+        if category == "unboundedListFilter":
+            value_array = value
+
+            if name == "keyword":
+                return {
+                    "nested": {
+                        "path": "intrinsic_metadata",
+                        "query": {
+                            "multi_match": {
+                                "query": " ".join(value_array),
+                                "fields": [
+                                    get_expansion("keywords", ".") + "^2",
+                                    get_expansion("descriptions", "."),
+                                    # "^2" boosts an origin's score by 2x
+                                    # if it the queried keywords are
+                                    # found in its intrinsic_metadata.keywords
+                                ],
+                            }
+                        },
+                    }
+                }
+            elif name in ["language", "license"]:
+                name_mapping = {
+                    "language": "programming_languages",
+                    "license": "licenses",
+                }
+                name = name_mapping[name]
+
+                return {
+                    "nested": {
+                        "path": "intrinsic_metadata",
+                        "query": {
+                            "bool": {
+                                "should": [
+                                    {"match": {get_expansion(name, "."): val}}
+                                    for val in value_array
+                                ],
+                            }
+                        },
+                    }
+                }
+
+        if category == "dateFilter":
+
+            if name in ["created", "modified", "published"]:
+                if op in ["=", "!="]:
+                    return {
+                        "nested": {
+                            "path": "intrinsic_metadata",
+                            "query": {
+                                "bool": {
+                                    ("must" if op == "=" else "must_not"): [
+                                        {
+                                            "range": {
+                                                get_expansion(f"date_{name}", "."): {
+                                                    "gte": value,
+                                                    "lte": value,
+                                                }
+                                            }
+                                        }
+                                    ],
+                                }
+                            },
+                        }
+                    }
+
+                return {
+                    "nested": {
+                        "path": "intrinsic_metadata",
+                        "query": {
+                            "bool": {
+                                "must": [
+                                    {
+                                        "range": {
+                                            get_expansion(f"date_{name}", "."): {
+                                                self.RANGE_OPERATOR_MAP[op]: value,
+                                            }
+                                        }
+                                    }
+                                ],
+                            }
+                        },
+                    }
+                }
+            else:
+                if op in ["=", "!="]:
+                    return {
+                        "bool": {
+                            ("must" if op == "=" else "must_not"): [
+                                {
+                                    "range": {
+                                        f"{name}_date": {"gte": value, "lte": value,}
+                                    }
+                                }
+                            ],
+                        }
+                    }
+                return {
+                    "range": {
+                        f"{name}_date": {
+                            self.RANGE_OPERATOR_MAP[op]: value.replace("Z", "+00:00"),
+                        }
+                    }
+                }
+
+        if category == "sortBy":
+            return value
+
+        if category == "limit":
+            return value
+
+        raise Exception(f"Unknown filter {category}.{name}")
diff --git a/swh/search/utils.py b/swh/search/utils.py
index 54ae2b1..aaa9ed8 100644
--- a/swh/search/utils.py
+++ b/swh/search/utils.py
@@ -1,57 +1,105 @@
 from datetime import datetime
 
 import iso8601  # type: ignore
 
 
 def get_expansion(field, sep=None):
     METADATA_FIELDS = {
         "licenses": ["intrinsic_metadata", "http://schema.org/license", "@id"],
         "programming_languages": [
             "intrinsic_metadata",
             "http://schema.org/programmingLanguage",
             "@value",
         ],
         "keywords": ["intrinsic_metadata", "http://schema.org/keywords", "@value",],
         "descriptions": [
             "intrinsic_metadata",
             "http://schema.org/description",
             "@value",
         ],
         "date_created": [
             "intrinsic_metadata",
             "http://schema.org/dateCreated",
             "@value",
         ],
         "date_modified": [
             "intrinsic_metadata",
             "http://schema.org/dateModified",
             "@value",
         ],
         "date_published": [
             "intrinsic_metadata",
             "http://schema.org/datePublished",
             "@value",
         ],
     }
 
     if sep:
         return sep.join(METADATA_FIELDS[field])
 
     return METADATA_FIELDS[field]
 
 
 def is_date_parsable(date_str):
     """
     Return True if date_str is in the format
     %Y-%m-%d or the standard ISO format.
     Otherwise return False.
     """
     try:
         datetime.strptime(date_str, "%Y-%m-%d")
         return True
     except Exception:
         try:
             iso8601.parse_date(date_str)
             return True
         except Exception:
             return False
+
+
+def escape(obj):
+    r"""Makes the object directly injectable into the
+    query language by converting the escapable parts of
+    the object into escape sequences.
+
+    For strings, appends \ before special characters like ', ", and \
+
+    For arrays, applies the same transformation on each element, joins the
+    elements and returns a string-like representation of the list.
+
+    >>> print(escape("foo ' bar"))
+    "foo \' bar"
+
+    >>> print(escape([r"foo ' bar", r"bar \\\' baz", r'foo " baz']))
+    ["foo \' bar", "bar \\\\\\\' baz", "foo \" baz"]
+
+    """
+    if type(obj) == list:
+        items = [escape(item) for item in obj]
+        return "[" + ", ".join(items) + "]"
+    elif type(obj) == str:
+        return (
+            '"'
+            + obj.translate({ord("'"): r"\'", ord('"'): r"\"", ord("\\"): r"\\",})
+            + '"'
+        )
+    else:
+        raise Exception(f"Unexpected item type {type(obj)}")
+
+
+def unescape(string):
+    r"""Processes the escaped special characters
+
+    >>> unescape(r'''foo " bar''') == r'''foo " bar'''
+    True
+    >>> unescape(r'''foo \" bar''') == r'''foo " bar'''
+    True
+    >>> unescape(r'''foo \\" bar''') == r'''foo \" bar'''
+    True
+    >>> unescape(r'''foo \\\" bar''') == r'''foo \" bar'''
+    True
+    >>> unescape(r'''foo \\\\" bar''') == r'''foo \\" bar'''
+    True
+    """
+
+    return bytes(string, "utf-8").decode("unicode_escape")
diff --git a/tox.ini b/tox.ini
index aef0583..b70d51c 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,73 +1,74 @@
 [tox]
 envlist=black,flake8,mypy,py3
 
 [testenv]
+passenv = YARN
 extras =
   testing
 deps =
   pytest-cov
 commands =
   pytest --doctest-modules \
          {envsitepackagesdir}/swh/search \
          --cov={envsitepackagesdir}/swh/search \
          --cov-branch {posargs}
 
 [testenv:black]
 skip_install = true
 deps =
   black==19.10b0
 commands =
   {envpython} -m black --check swh
 
 [testenv:flake8]
 skip_install = true
 deps =
   flake8
 commands =
   {envpython} -m flake8
 
 [testenv:mypy]
 extras =
   testing
 deps =
   mypy
 commands =
   mypy swh
 
 # build documentation outside swh-environment using the current
 # git HEAD of swh-docs, is executed on CI for each diff to prevent
 # breaking doc build
 [testenv:sphinx]
 whitelist_externals = make
 usedevelop = true
 extras =
   testing
 deps =
   # fetch and install swh-docs in develop mode
   -e git+https://forge.softwareheritage.org/source/swh-docs#egg=swh.docs
 
 setenv =
   SWH_PACKAGE_DOC_TOX_BUILD = 1
   # turn warnings into errors
   SPHINXOPTS = -W
 commands =
   make -I ../.tox/sphinx/src/swh-docs/swh/ -C docs
 
 
 # build documentation only inside swh-environment using local state
 # of swh-docs package
 [testenv:sphinx-dev]
 whitelist_externals = make
 usedevelop = true
 extras =
   testing
 deps =
   # install swh-docs in develop mode
   -e ../swh-docs
 
 setenv =
   SWH_PACKAGE_DOC_TOX_BUILD = 1
   # turn warnings into errors
   SPHINXOPTS = -W
 commands =
   make -I ../.tox/sphinx-dev/src/swh-docs/swh/ -C docs
diff --git a/yarn.lock b/yarn.lock
new file mode 100644
index 0000000..a9f8eaf
--- /dev/null
+++ b/yarn.lock
@@ -0,0 +1,13 @@
+# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
+# yarn lockfile v1
+
+
+nan@^2.14.2:
+  version "2.14.2"
+  resolved "https://registry.yarnpkg.com/nan/-/nan-2.14.2.tgz#f5376400695168f4cc694ac9393d0c9585eeea19"
+  integrity sha512-M2ufzIiINKCuDfBSAUr1vWQ+vuVcA9kqx8JJUsbQi6yf1uGRyb7HfpdfUr5qLXf3B/t8dPvcjhKMmlfnP47EzQ==
+
+tree-sitter-cli@^0.20.0:
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/tree-sitter-cli/-/tree-sitter-cli-0.20.0.tgz#feaaa11c7ecf44a6e236aa1e2963b85d045d33cc"
+  integrity sha512-4D1qapWbJXZ5rrSUGM5rcw5Vuq/smzn9KbiFRhlON6KeuuXjra+KAtDYVrDgAoLIG4ku+jbEEGrJxCptUGi3dg==