diff --git a/Makefile.local b/Makefile.local index 1d1fd9d..b363dbb 100644 --- a/Makefile.local +++ b/Makefile.local @@ -1,31 +1,32 @@ YARN ?= yarn PYTHON ?= python3 - ts-install: package.json $(PYTHON) setup.py ts_install ts-generate: ts-install query_language/grammar.js $(PYTHON) setup.py ts_generate ts-dev: ts-install ifdef sanitize $(YARN) dev | sed '5,$$s/[[0-9]\+, [0-9]\+]/ /g' | sed '5,$$s/ *- *//g'; else $(YARN) dev; endif ts-test: ts-install $(YARN) test ts-repl: ts-generate $(YARN) repl ts-build-so: ts-generate query_language/src/ $(PYTHON) setup.py ts_build_so ts-build-wasm: ts-generate query_language/src/ $(PYTHON) setup.py ts_build_wasm ts-build: ts-build-so ts-build-wasm @echo 'Build completed' + +test: ts-build-so diff --git a/PKG-INFO b/PKG-INFO index 94c6798..d9f99ac 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,90 +1,90 @@ Metadata-Version: 2.1 Name: swh.search -Version: 0.11.2 +Version: 0.11.3 Summary: Software Heritage search service Home-page: https://forge.softwareheritage.org/diffusion/DSEA Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-search Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-search/ Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 3 - Alpha Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing License-File: LICENSE License-File: AUTHORS swh-search ========== Search service for the Software Heritage archive. It is similar to swh-storage in what it contains, but provides different ways to query it: while swh-storage is mostly a key-value store that returns an object from a primary key, swh-search is focused on reverse indices, to allow finding objects that match some criteria; for example full-text search. Currently uses ElasticSearch, and provides only origin search (by URL and metadata) ## Dependencies - Python tests for this module include tests that cannot be run without a local ElasticSearch instance, so you need the ElasticSearch server executable on your machine (no need to have a running ElasticSearch server). - Debian-like host The elasticsearch package is required. As it's not part of debian-stable, [another debian repository is required to be configured](https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html#deb-repo) - Non Debian-like host The tests expect: - `/usr/share/elasticsearch/jdk/bin/java` to exist. - `org.elasticsearch.bootstrap.Elasticsearch` to be in java's classpath. - Emscripten is required for generating tree-sitter WASM module. The following commands need to be executed for the setup: ```bash cd /opt && git clone https://github.com/emscripten-core/emsdk.git && cd emsdk && \ ./emsdk install latest && ./emsdk activate latest PATH="${PATH}:/opt/emsdk/upstream/emscripten" ``` **Note:** If emsdk isn't found in the PATH, the tree-sitter cli automatically pulls `emscripten/emsdk` image from docker hub when `make ts-build-wasm` or `make ts-build` is used. ## Make targets Below is the list of available make targets that can be executed from the root directory of swh-search in order to build and/or execute the swh-search under various configurations: * **ts-install**: Install node_modules and emscripten SDK required for TreeSitter * **ts-generate**: Generate parser files(C and JSON) from the grammar * **ts-repl**: Starts a web based playground for the TreeSitter grammar. It's the recommended way for developing TreeSitter grammar. * **ts-dev**: Parse the `query_language/sample_query` and print the corresponding syntax expression along with the start and end positions of all the nodes. * **ts-dev sanitize=1**: Same as **ts-dev** but without start and end position of the nodes. This format is expected by TreeSitter's native test command. `sanitize=1` cleans the output of **ts-dev** using `sed` to achieve the desired format. * **ts-test**: executes TreeSitter's native tests * **ts-build-so**: Generates `swh_ql.so` file from the previously generated parser using py-tree-sitter * **ts-build-so**: Generates `swh_ql.wasm` file from the previously generated parser using emscripten * **ts-build**: Executes both **ts-build-so** and **ts-build-so** diff --git a/package.json b/package.json index 523b762..e8b665e 100644 --- a/package.json +++ b/package.json @@ -1,36 +1,34 @@ { "name": "swh-search-query-language-parser", "version": "1.0.0", "description": "Parser for Software Heritage archive search query language", "scripts": { - "generate": "cd query_language && tree-sitter generate --no-bindings && echo 'Generated parser files '", - "dev": "yarn generate && cd query_language && tree-sitter parse sample_query", - "test": "yarn generate && cd query_language && tree-sitter test", - "build-so": "yarn generate && cd query_language && python3 build.py", - "build-wasm": "yarn generate && cd query_language && tree-sitter build-wasm . && mv tree-sitter-swh_search_ql.wasm swh_ql.wasm", + "generate": "cd swh/search/query_language && tree-sitter generate --no-bindings && echo 'Generated parser files '", + "dev": "yarn generate && cd swh/search/query_language && tree-sitter parse sample_query", + "test": "yarn generate && cd swh/search/query_language && tree-sitter test", "build": "echo 'use `pip3 install .` or `pip3 wheel .` instead.'", - "repl": "yarn generate && cd query_language && tree-sitter build-wasm && tree-sitter playground" + "repl": "yarn generate && cd swh/search/query_language && tree-sitter build-wasm && tree-sitter playground" }, "repository": { "type": "git", "url": "https://forge.softwareheritage.org/source/swh-search.git" }, "keywords": [ "swh", "Software Heritage", "treesitter", "parser", "custom", "search", "query", "language" ], "author": "The Software Heritage developers", "license": "GPL-3.0-only", "dependencies": { "nan": "^2.14.2" }, "devDependencies": { "tree-sitter-cli": "^0.20.0" } } diff --git a/query_language/build.py b/query_language/build.py deleted file mode 100644 index 62c3de2..0000000 --- a/query_language/build.py +++ /dev/null @@ -1,3 +0,0 @@ -from tree_sitter import Language - -Language.build_library("swh_ql.so", ["."]) diff --git a/setup.py b/setup.py index 52de412..a8d4c08 100755 --- a/setup.py +++ b/setup.py @@ -1,201 +1,185 @@ #!/usr/bin/env python3 # Copyright (C) 2015-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from distutils.cmd import Command +from distutils.command.build import build from io import open import os import shutil import subprocess -import sys from setuptools import find_packages, setup -from setuptools.command.build_py import build_py +from setuptools.command.develop import develop from setuptools.command.sdist import sdist here = os.path.abspath(os.path.dirname(__file__)) # Get the long description from the README file with open(os.path.join(here, "README.md"), encoding="utf-8") as f: long_description = f.read() def parse_requirements(name=None): if name: reqf = "requirements-%s.txt" % name else: reqf = "requirements.txt" requirements = [] if not os.path.exists(reqf): return requirements with open(reqf) as f: for line in f.readlines(): line = line.strip() if not line or line.startswith("#"): continue requirements.append(line) return requirements yarn = os.environ.get("YARN", "yarn") class TSCommand(Command): user_options = [] def initialize_options(self): pass def finalize_options(self): pass class TSInstallCommand(TSCommand): description = "Installs node_modules related to query language" def run(self): subprocess.run([yarn, "install"], check=True) -class TSGenerateCommand(TSCommand): - description = "Generates parser related files from grammar.js" - - def run(self): - subprocess.run([yarn, "generate"], check=True) - - class TSBuildSoCommand(TSCommand): description = "Builds swh_ql.so" + def initialize_options(self): + self.build_lib = None + super().initialize_options() + + def finalize_options(self): + self.set_undefined_options("build", ("build_lib", "build_lib")) + super().finalize_options() + def run(self): - # setup_requires changes sys.path so the build dependencies - # can be imported even though they are in a temporary - # directory (usually `.eggs`). We need to pass this updated sys.path to - # 'yarn build-so', as it invokes a Python script that needs to import - # tree_sitter - env = {**os.environ, "PYTHONPATH": os.pathsep.join(sys.path)} - subprocess.run([yarn, "build-so"], check=True, env=env) - print("swh_ql.so file generated") + self.run_command("ts_install") + ql_dir = os.path.join(self.build_lib, "swh/search/query_language") + if not os.path.exists(os.path.join(ql_dir, "src/parser.c")): + generate_parser(ql_dir, copy_tree=True) -class TSBuildWasmCommand(TSCommand): - description = "Builds swh_ql.wasm" + static_dir = os.path.join(self.build_lib, "swh/search/static") + os.makedirs(static_dir, exist_ok=True) - def run(self): - subprocess.run([yarn, "build-wasm"], check=True) - print("swh_ql.wasm file generated") + # This import cannot be toplevel, as setuptools installs it after the script + # starts running + from tree_sitter import Language + + Language.build_library(os.path.join(static_dir, "swh_ql.so"), [ql_dir]) + print("swh_ql.so file generated") class TSBuildCommand(TSCommand): description = "Builds swh_ql.so and swh_ql.wasm" def run(self): self.run_command("ts_build_so") - self.run_command("ts_build_wasm") -class TSBuildExportCommand(TSCommand): - description = "Builds swh_ql.so and swh_ql.wasm and exports them to static/" +class custom_build(build): + def run(self): + super().run() - def initialize_options(self): - self.build_lib = None - super().initialize_options() + if not self.dry_run: + self.run_command("ts_build") - def finalize_options(self): - self.set_undefined_options("build", ("build_lib", "build_lib")) - super().finalize_options() - def run(self): - self.run_command("ts_install") - self.run_command("ts_build") +class custom_sdist(sdist): + def make_release_tree(self, base_dir, files): + super().make_release_tree(base_dir, files) - print("static files generated. copying them to package dir") - os.makedirs(os.path.join(self.build_lib, "swh/search/static"), exist_ok=True) - shutil.copyfile( - "query_language/swh_ql.so", - os.path.join(self.build_lib, "swh/search/static/swh_ql.so"), - ) - shutil.copyfile( - "query_language/swh_ql.wasm", - os.path.join(self.build_lib, "swh/search/static/swh_ql.wasm"), - ) + dist_ql_path = os.path.join(base_dir, "swh/search/query_language") + if not self.dry_run: + self.run_command("ts_install") + + generate_parser(dist_ql_path, copy_tree=True) -class custom_build(build_py): + +class custom_develop(develop): def run(self): super().run() if not self.dry_run: - self.run_command("ts_build_export") + generate_parser("swh/search/query_language", copy_tree=False) -class custom_sdist(sdist): - def make_release_tree(self, base_dir, files): - super().make_release_tree(base_dir, files) - # TODO: build the .c file and .wasm but not .so, because it's architecture- - # dependent, and shouldn't be in a sdist (aka *source* distribution) - if not self.dry_run: - self.run_command("ts_install") - self.run_command("ts_build") +def generate_parser(dest_path, copy_tree): + if copy_tree: + # FIXME: setuptools should copy this itself... + print("Copying parser files") + if os.path.exists(dest_path): + shutil.rmtree(dest_path) + shutil.copytree("swh/search/query_language", dest_path) - print("static files generated. copying them to package dir") - os.makedirs(os.path.join(base_dir, "swh/search/static"), exist_ok=True) - shutil.copyfile( - "query_language/swh_ql.so", - os.path.join(base_dir, "swh/search/static/swh_ql.so"), - ) - shutil.copyfile( - "query_language/swh_ql.wasm", - os.path.join(base_dir, "swh/search/static/swh_ql.wasm"), - ) + print("Getting path") + path = subprocess.check_output([yarn, "bin"]).decode().strip() + env = {**os.environ, "PATH": os.pathsep.join([path, os.environ["PATH"]])} + print("Generating") + subprocess.run(["tree-sitter", "generate", "--no-bindings"], cwd=dest_path, env=env) setup( name="swh.search", description="Software Heritage search service", long_description=long_description, long_description_content_type="text/markdown", python_requires=">=3.7", author="Software Heritage developers", author_email="swh-devel@inria.fr", url="https://forge.softwareheritage.org/diffusion/DSEA", packages=find_packages(), # packages's modules install_requires=parse_requirements() + parse_requirements("swh"), tests_require=parse_requirements("test"), entry_points=""" [swh.cli.subcommands] search=swh.search.cli """, setup_requires=["setuptools-scm", "tree-sitter==0.19.0"], use_scm_version=True, extras_require={"testing": parse_requirements("test")}, include_package_data=True, classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Development Status :: 3 - Alpha", ], project_urls={ "Bug Reports": "https://forge.softwareheritage.org/maniphest", "Funding": "https://www.softwareheritage.org/donate", "Source": "https://forge.softwareheritage.org/source/swh-search", "Documentation": "https://docs.softwareheritage.org/devel/swh-search/", }, cmdclass={ - "build_py": custom_build, + "build": custom_build, "sdist": custom_sdist, + "develop": custom_develop, "ts_install": TSInstallCommand, - "ts_generate": TSGenerateCommand, "ts_build_so": TSBuildSoCommand, - "ts_build_wasm": TSBuildWasmCommand, "ts_build": TSBuildCommand, - "ts_build_export": TSBuildExportCommand, }, zip_safe=False, ) diff --git a/swh.search.egg-info/PKG-INFO b/swh.search.egg-info/PKG-INFO index 94c6798..d9f99ac 100644 --- a/swh.search.egg-info/PKG-INFO +++ b/swh.search.egg-info/PKG-INFO @@ -1,90 +1,90 @@ Metadata-Version: 2.1 Name: swh.search -Version: 0.11.2 +Version: 0.11.3 Summary: Software Heritage search service Home-page: https://forge.softwareheritage.org/diffusion/DSEA Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-search Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-search/ Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 3 - Alpha Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing License-File: LICENSE License-File: AUTHORS swh-search ========== Search service for the Software Heritage archive. It is similar to swh-storage in what it contains, but provides different ways to query it: while swh-storage is mostly a key-value store that returns an object from a primary key, swh-search is focused on reverse indices, to allow finding objects that match some criteria; for example full-text search. Currently uses ElasticSearch, and provides only origin search (by URL and metadata) ## Dependencies - Python tests for this module include tests that cannot be run without a local ElasticSearch instance, so you need the ElasticSearch server executable on your machine (no need to have a running ElasticSearch server). - Debian-like host The elasticsearch package is required. As it's not part of debian-stable, [another debian repository is required to be configured](https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html#deb-repo) - Non Debian-like host The tests expect: - `/usr/share/elasticsearch/jdk/bin/java` to exist. - `org.elasticsearch.bootstrap.Elasticsearch` to be in java's classpath. - Emscripten is required for generating tree-sitter WASM module. The following commands need to be executed for the setup: ```bash cd /opt && git clone https://github.com/emscripten-core/emsdk.git && cd emsdk && \ ./emsdk install latest && ./emsdk activate latest PATH="${PATH}:/opt/emsdk/upstream/emscripten" ``` **Note:** If emsdk isn't found in the PATH, the tree-sitter cli automatically pulls `emscripten/emsdk` image from docker hub when `make ts-build-wasm` or `make ts-build` is used. ## Make targets Below is the list of available make targets that can be executed from the root directory of swh-search in order to build and/or execute the swh-search under various configurations: * **ts-install**: Install node_modules and emscripten SDK required for TreeSitter * **ts-generate**: Generate parser files(C and JSON) from the grammar * **ts-repl**: Starts a web based playground for the TreeSitter grammar. It's the recommended way for developing TreeSitter grammar. * **ts-dev**: Parse the `query_language/sample_query` and print the corresponding syntax expression along with the start and end positions of all the nodes. * **ts-dev sanitize=1**: Same as **ts-dev** but without start and end position of the nodes. This format is expected by TreeSitter's native test command. `sanitize=1` cleans the output of **ts-dev** using `sed` to achieve the desired format. * **ts-test**: executes TreeSitter's native tests * **ts-build-so**: Generates `swh_ql.so` file from the previously generated parser using py-tree-sitter * **ts-build-so**: Generates `swh_ql.wasm` file from the previously generated parser using emscripten * **ts-build**: Executes both **ts-build-so** and **ts-build-so** diff --git a/swh.search.egg-info/SOURCES.txt b/swh.search.egg-info/SOURCES.txt index f4609a7..34e7969 100644 --- a/swh.search.egg-info/SOURCES.txt +++ b/swh.search.egg-info/SOURCES.txt @@ -1,71 +1,70 @@ .gitignore .pre-commit-config.yaml AUTHORS CODE_OF_CONDUCT.md CONTRIBUTORS LICENSE MANIFEST.in Makefile Makefile.local README.md mypy.ini package.json pyproject.toml pytest.ini requirements-swh.txt requirements-test.txt requirements.txt setup.cfg setup.py tox.ini yarn.lock docs/.gitignore docs/Makefile docs/cli.rst docs/conf.py docs/index.rst docs/query-language.rst docs/_static/.placeholder docs/_templates/.placeholder es_config/elasticsearch.keystore es_config/elasticsearch.yml es_config/jvm.options es_config/log4j2.properties -query_language/.gitignore -query_language/build.py -query_language/grammar.js -query_language/sample_query -query_language/tokens.js -query_language/test/corpus/combinations.txt swh/__init__.py swh.search.egg-info/PKG-INFO swh.search.egg-info/SOURCES.txt swh.search.egg-info/dependency_links.txt swh.search.egg-info/entry_points.txt swh.search.egg-info/not-zip-safe swh.search.egg-info/requires.txt swh.search.egg-info/top_level.txt swh/search/__init__.py swh/search/cli.py swh/search/elasticsearch.py swh/search/in_memory.py swh/search/interface.py swh/search/journal_client.py swh/search/metrics.py swh/search/py.typed swh/search/translator.py swh/search/utils.py swh/search/api/__init__.py swh/search/api/client.py swh/search/api/server.py +swh/search/query_language/.gitignore +swh/search/query_language/grammar.js +swh/search/query_language/sample_query +swh/search/query_language/tokens.js +swh/search/query_language/test/corpus/combinations.txt swh/search/tests/__init__.py swh/search/tests/conftest.py swh/search/tests/test_api_client.py swh/search/tests/test_cli.py swh/search/tests/test_elasticsearch.py swh/search/tests/test_in_memory.py swh/search/tests/test_init.py swh/search/tests/test_journal_client.py swh/search/tests/test_search.py swh/search/tests/test_server.py swh/search/tests/test_translator.py \ No newline at end of file diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py index efc2b66..05ccf1f 100644 --- a/swh/search/elasticsearch.py +++ b/swh/search/elasticsearch.py @@ -1,529 +1,554 @@ # Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import base64 +from collections import Counter import logging import pprint from textwrap import dedent from typing import Any, Dict, Iterable, Iterator, List, Optional from elasticsearch import Elasticsearch, helpers import msgpack from swh.indexer import codemeta from swh.model import model from swh.model.identifiers import origin_identifier from swh.search.interface import ( SORT_BY_OPTIONS, MinimalOriginDict, OriginDict, PagedResult, ) from swh.search.metrics import send_metric, timed from swh.search.translator import Translator from swh.search.utils import escape, get_expansion, is_date_parsable logger = logging.getLogger(__name__) INDEX_NAME_PARAM = "index" READ_ALIAS_PARAM = "read_alias" WRITE_ALIAS_PARAM = "write_alias" ORIGIN_DEFAULT_CONFIG = { INDEX_NAME_PARAM: "origin", READ_ALIAS_PARAM: "origin-read", WRITE_ALIAS_PARAM: "origin-write", } def _sanitize_origin(origin): origin = origin.copy() # Whitelist fields to be saved in Elasticsearch res = {"url": origin.pop("url")} for field_name in ( "blocklisted", "has_visits", "intrinsic_metadata", "visit_types", "nb_visits", "snapshot_id", "last_visit_date", "last_eventful_visit_date", "last_revision_date", "last_release_date", ): if field_name in origin: res[field_name] = origin.pop(field_name) # Run the JSON-LD expansion algorithm # # to normalize the Codemeta metadata. # This is required as Elasticsearch will needs each field to have a consistent # type across documents to be searchable; and non-expanded JSON-LD documents # can have various types in the same field. For example, all these are # equivalent in JSON-LD: # * {"author": "Jane Doe"} # * {"author": ["Jane Doe"]} # * {"author": {"@value": "Jane Doe"}} # * {"author": [{"@value": "Jane Doe"}]} # and JSON-LD expansion will convert them all to the last one. if "intrinsic_metadata" in res: intrinsic_metadata = res["intrinsic_metadata"] for date_field in ["dateCreated", "dateModified", "datePublished"]: if date_field in intrinsic_metadata: date = intrinsic_metadata[date_field] # If date{Created,Modified,Published} value isn't parsable # It gets rejected and isn't stored (unlike other fields) if not is_date_parsable(date): intrinsic_metadata.pop(date_field) res["intrinsic_metadata"] = codemeta.expand(intrinsic_metadata) return res def token_encode(index_to_tokenize: Dict[bytes, Any]) -> str: """Tokenize as string an index page result from a search""" page_token = base64.b64encode(msgpack.dumps(index_to_tokenize)) return page_token.decode() def token_decode(page_token: str) -> Dict[bytes, Any]: """Read the page_token""" return msgpack.loads(base64.b64decode(page_token.encode()), raw=True) class ElasticSearch: def __init__(self, hosts: List[str], indexes: Dict[str, Dict[str, str]] = {}): self._backend = Elasticsearch(hosts=hosts) self._translator = Translator() # Merge current configuration with default values origin_config = indexes.get("origin", {}) self.origin_config = {**ORIGIN_DEFAULT_CONFIG, **origin_config} def _get_origin_index(self) -> str: return self.origin_config[INDEX_NAME_PARAM] def _get_origin_read_alias(self) -> str: return self.origin_config[READ_ALIAS_PARAM] def _get_origin_write_alias(self) -> str: return self.origin_config[WRITE_ALIAS_PARAM] @timed def check(self): return self._backend.ping() def deinitialize(self) -> None: """Removes all indices from the Elasticsearch backend""" self._backend.indices.delete(index="*") def initialize(self) -> None: """Declare Elasticsearch indices, aliases and mappings""" if not self._backend.indices.exists(index=self._get_origin_index()): self._backend.indices.create(index=self._get_origin_index()) if not self._backend.indices.exists_alias(self._get_origin_read_alias()): self._backend.indices.put_alias( index=self._get_origin_index(), name=self._get_origin_read_alias() ) if not self._backend.indices.exists_alias(self._get_origin_write_alias()): self._backend.indices.put_alias( index=self._get_origin_index(), name=self._get_origin_write_alias() ) self._backend.indices.put_mapping( index=self._get_origin_index(), body={ "dynamic_templates": [ { "booleans_as_string": { # All fields stored as string in the metadata # even the booleans "match_mapping_type": "boolean", "path_match": "intrinsic_metadata.*", "mapping": {"type": "keyword"}, } } ], "date_detection": False, "properties": { # sha1 of the URL; used as the document id "sha1": {"type": "keyword", "doc_values": True,}, # Used both to search URLs, and as the result to return # as a response to queries "url": { "type": "text", # To split URLs into token on any character # that is not alphanumerical "analyzer": "simple", # 2-gram and partial-3-gram search (ie. with the end of the # third word potentially missing) "fields": { "as_you_type": { "type": "search_as_you_type", "analyzer": "simple", } }, }, "visit_types": {"type": "keyword"}, # used to filter out origins that were never visited "has_visits": {"type": "boolean",}, "nb_visits": {"type": "integer"}, "snapshot_id": {"type": "keyword"}, "last_visit_date": {"type": "date"}, "last_eventful_visit_date": {"type": "date"}, "last_release_date": {"type": "date"}, "last_revision_date": {"type": "date"}, "intrinsic_metadata": { "type": "nested", "properties": { "@context": { # don't bother indexing tokens in these URIs, as the # are used as namespaces "type": "keyword", }, "http://schema": { "properties": { "org/dateCreated": { "properties": {"@value": {"type": "date",}} }, "org/dateModified": { "properties": {"@value": {"type": "date",}} }, "org/datePublished": { "properties": {"@value": {"type": "date",}} }, } }, }, }, # Has this origin been taken down? "blocklisted": {"type": "boolean",}, }, }, ) @timed def flush(self) -> None: self._backend.indices.refresh(index=self._get_origin_write_alias()) @timed def origin_update(self, documents: Iterable[OriginDict]) -> None: write_index = self._get_origin_write_alias() documents = map(_sanitize_origin, documents) documents_with_sha1 = ( (origin_identifier(document), document) for document in documents ) # painless script that will be executed when updating an origin document update_script = dedent( """ // utility function to get and parse date ZonedDateTime getDate(def ctx, String date_field) { String default_date = "0001-01-01T00:00:00Z"; String date = ctx._source.getOrDefault(date_field, default_date); return ZonedDateTime.parse(date); } // backup current visit_types field value List visit_types = ctx._source.getOrDefault("visit_types", []); int nb_visits = ctx._source.getOrDefault("nb_visits", 0); ZonedDateTime last_visit_date = getDate(ctx, "last_visit_date"); String snapshot_id = ctx._source.getOrDefault("snapshot_id", ""); ZonedDateTime last_eventful_visit_date = getDate(ctx, "last_eventful_visit_date"); ZonedDateTime last_revision_date = getDate(ctx, "last_revision_date"); ZonedDateTime last_release_date = getDate(ctx, "last_release_date"); // update origin document with new field values ctx._source.putAll(params); // restore previous visit types after visit_types field overriding if (ctx._source.containsKey("visit_types")) { for (int i = 0; i < visit_types.length; ++i) { if (!ctx._source.visit_types.contains(visit_types[i])) { ctx._source.visit_types.add(visit_types[i]); } } } // Undo overwrite if incoming nb_visits is smaller if (ctx._source.containsKey("nb_visits")) { int incoming_nb_visits = ctx._source.getOrDefault("nb_visits", 0); if(incoming_nb_visits < nb_visits){ ctx._source.nb_visits = nb_visits; } } // Undo overwrite if incoming last_visit_date is older if (ctx._source.containsKey("last_visit_date")) { ZonedDateTime incoming_last_visit_date = getDate(ctx, "last_visit_date"); int difference = // returns -1, 0 or 1 incoming_last_visit_date.compareTo(last_visit_date); if(difference < 0){ ctx._source.last_visit_date = last_visit_date; } } // Undo update of last_eventful_date and snapshot_id if // snapshot_id hasn't changed OR incoming_last_eventful_visit_date is older if (ctx._source.containsKey("snapshot_id")) { String incoming_snapshot_id = ctx._source.getOrDefault("snapshot_id", ""); ZonedDateTime incoming_last_eventful_visit_date = getDate(ctx, "last_eventful_visit_date"); int difference = // returns -1, 0 or 1 incoming_last_eventful_visit_date.compareTo(last_eventful_visit_date); if(snapshot_id == incoming_snapshot_id || difference < 0){ ctx._source.snapshot_id = snapshot_id; ctx._source.last_eventful_visit_date = last_eventful_visit_date; } } // Undo overwrite if incoming last_revision_date is older if (ctx._source.containsKey("last_revision_date")) { ZonedDateTime incoming_last_revision_date = getDate(ctx, "last_revision_date"); int difference = // returns -1, 0 or 1 incoming_last_revision_date.compareTo(last_revision_date); if(difference < 0){ ctx._source.last_revision_date = last_revision_date; } } // Undo overwrite if incoming last_release_date is older if (ctx._source.containsKey("last_release_date")) { ZonedDateTime incoming_last_release_date = getDate(ctx, "last_release_date"); // returns -1, 0 or 1 int difference = incoming_last_release_date.compareTo(last_release_date); if(difference < 0){ ctx._source.last_release_date = last_release_date; } } """ # noqa ) actions = [ { "_op_type": "update", "_id": sha1, "_index": write_index, "scripted_upsert": True, "upsert": {**document, "sha1": sha1,}, "script": { "source": update_script, "lang": "painless", "params": document, }, } for (sha1, document) in documents_with_sha1 ] indexed_count, errors = helpers.bulk(self._backend, actions, index=write_index) assert isinstance(errors, List) # Make mypy happy send_metric("document:index", count=indexed_count, method_name="origin_update") send_metric( "document:index_error", count=len(errors), method_name="origin_update" ) def origin_dump(self) -> Iterator[model.Origin]: results = helpers.scan(self._backend, index=self._get_origin_read_alias()) for hit in results: yield self._backend.termvectors( index=self._get_origin_read_alias(), id=hit["_id"], fields=["*"] ) @timed def origin_search( self, *, query: str = "", url_pattern: Optional[str] = None, metadata_pattern: Optional[str] = None, with_visit: bool = False, visit_types: Optional[List[str]] = None, min_nb_visits: int = 0, min_last_visit_date: str = "", min_last_eventful_visit_date: str = "", min_last_revision_date: str = "", min_last_release_date: str = "", min_date_created: str = "", min_date_modified: str = "", min_date_published: str = "", programming_languages: Optional[List[str]] = None, licenses: Optional[List[str]] = None, keywords: Optional[List[str]] = None, sort_by: Optional[List[str]] = None, page_token: Optional[str] = None, limit: int = 50, ) -> PagedResult[MinimalOriginDict]: query_clauses: List[Dict[str, Any]] = [] query_filters = [] if url_pattern: query_filters.append(f"origin = {escape(url_pattern)}") if metadata_pattern: query_filters.append(f"metadata = {escape(metadata_pattern)}") # if not query_clauses: # raise ValueError( # "At least one of url_pattern and metadata_pattern must be provided." # ) if with_visit: query_filters.append(f"visited = {'true' if with_visit else 'false'}") if min_nb_visits: query_filters.append(f"visits >= {min_nb_visits}") if min_last_visit_date: query_filters.append( f"last_visit >= {min_last_visit_date.replace('Z', '+00:00')}" ) if min_last_eventful_visit_date: query_filters.append( "last_eventful_visit >= " f"{min_last_eventful_visit_date.replace('Z', '+00:00')}" ) if min_last_revision_date: query_filters.append( f"last_revision >= {min_last_revision_date.replace('Z', '+00:00')}" ) if min_last_release_date: query_filters.append( f"last_release >= {min_last_release_date.replace('Z', '+00:00')}" ) if keywords: query_filters.append(f"keyword in {escape(keywords)}") if licenses: query_filters.append(f"license in {escape(licenses)}") if programming_languages: query_filters.append(f"language in {escape(programming_languages)}") if min_date_created: query_filters.append( f"created >= {min_date_created.replace('Z', '+00:00')}" ) if min_date_modified: query_filters.append( f"modified >= {min_date_modified.replace('Z', '+00:00')}" ) if min_date_published: query_filters.append( f"published >= {min_date_published.replace('Z', '+00:00')}" ) if visit_types is not None: query_filters.append(f"visit_type = {escape(visit_types)}") combined_filters = f"({' and '.join(query_filters)})" query = f"{combined_filters}{' and ' if query != '' else ' '}{query}" parsed_query = self._translator.parse_query(query) query_clauses.append(parsed_query["filters"]) field_map = { "visits": "nb_visits", "last_visit": "last_visit_date", "last_eventful_visit": "last_eventful_visit_date", "last_revision": "last_revision_date", "last_release": "last_release_date", "created": "date_created", "modified": "date_modified", "published": "date_published", } if "sortBy" in parsed_query: if sort_by is None: sort_by = [] for sort_by_option in parsed_query["sortBy"]: if sort_by_option[0] == "-": sort_by.append("-" + field_map[sort_by_option[1:]]) else: sort_by.append(field_map[sort_by_option]) if parsed_query.get("limit", 0): limit = parsed_query["limit"] sorting_params: List[Dict[str, Any]] = [] if sort_by: for field in sort_by: order = "asc" if field and field[0] == "-": field = field[1:] order = "desc" if field in ["date_created", "date_modified", "date_published"]: sorting_params.append( { get_expansion(field, "."): { "nested_path": "intrinsic_metadata", "order": order, } } ) elif field in SORT_BY_OPTIONS: sorting_params.append({field: order}) sorting_params.extend( [{"_score": "desc"}, {"sha1": "asc"},] ) body = { "query": { "bool": { "must": query_clauses, "must_not": [{"term": {"blocklisted": True}}], } }, "sort": sorting_params, } if page_token: # TODO: use ElasticSearch's scroll API? page_token_content = token_decode(page_token) body["search_after"] = [ page_token_content[b"score"], page_token_content[b"sha1"].decode("ascii"), ] if logger.isEnabledFor(logging.DEBUG): formatted_body = pprint.pformat(body) logger.debug("Search query body: %s", formatted_body) res = self._backend.search( index=self._get_origin_read_alias(), body=body, size=limit ) hits = res["hits"]["hits"] next_page_token: Optional[str] = None if len(hits) == limit: # There are more results after this page; return a pagination token # to get them in a future query last_hit = hits[-1] next_page_token_content = { b"score": last_hit["_score"], b"sha1": last_hit["_source"]["sha1"], } next_page_token = token_encode(next_page_token_content) assert len(hits) <= limit return PagedResult( results=[{"url": hit["_source"]["url"]} for hit in hits], next_page_token=next_page_token, ) + + def visit_types_count(self) -> Counter: + body = { + "aggs": { + "not_blocklisted": { + "filter": {"bool": {"must_not": [{"term": {"blocklisted": True}}]}}, + "aggs": { + "visit_types": {"terms": {"field": "visit_types", "size": 1000}} + }, + } + } + } + + res = self._backend.search( + index=self._get_origin_read_alias(), body=body, size=0 + ) + + buckets = ( + res.get("aggregations", {}) + .get("not_blocklisted", {}) + .get("visit_types", {}) + .get("buckets", []) + ) + return Counter({bucket["key"]: bucket["doc_count"] for bucket in buckets}) diff --git a/swh/search/in_memory.py b/swh/search/in_memory.py index 4492f5b..dfc0e4e 100644 --- a/swh/search/in_memory.py +++ b/swh/search/in_memory.py @@ -1,508 +1,516 @@ # Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from collections import defaultdict +from collections import Counter, defaultdict from datetime import datetime, timezone +from itertools import chain import re from typing import Any, Dict, Iterable, Iterator, List, Optional from swh.indexer import codemeta from swh.model.identifiers import origin_identifier from swh.search.interface import ( SORT_BY_OPTIONS, MinimalOriginDict, OriginDict, PagedResult, ) from swh.search.utils import get_expansion, is_date_parsable _words_regexp = re.compile(r"\w+") def _dict_words_set(d): """Recursively extract set of words from dict content.""" values = set() def extract(obj, words): if isinstance(obj, dict): for k, v in obj.items(): extract(v, words) elif isinstance(obj, list): for item in obj: extract(item, words) else: words.update(_words_regexp.findall(str(obj).lower())) return words return extract(d, values) def _nested_get(nested_dict, nested_keys, default=""): """Extracts values from deeply nested dictionary nested_dict using the nested_keys and returns a list of all of the values discovered in the process. >>> nested_dict = [ ... {"name": [{"@value": {"first": "f1", "last": "l1"}}], "address": "XYZ"}, ... {"name": [{"@value": {"first": "f2", "last": "l2"}}], "address": "ABC"}, ... ] >>> _nested_get(nested_dict, ["name", "@value", "last"]) ['l1', 'l2'] >>> _nested_get(nested_dict, ["address"]) ['XYZ', 'ABC'] It doesn't allow fetching intermediate values and returns "" for such cases >>> _nested_get(nested_dict, ["name", "@value"]) ['', ''] """ def _nested_get_recursive(nested_dict, nested_keys): try: curr_obj = nested_dict type_curr_obj = type(curr_obj) for i, key in enumerate(nested_keys): if key in curr_obj: curr_obj = curr_obj[key] type_curr_obj = type(curr_obj) else: if type_curr_obj == list: curr_obj = [ _nested_get_recursive(obj, nested_keys[i:]) for obj in curr_obj ] # If value isn't a list or string or integer elif type_curr_obj != str and type_curr_obj != int: return default # If only one element is present in the list, take it out # This ensures a flat array every time if type_curr_obj == list and len(curr_obj) == 1: curr_obj = curr_obj[0] return curr_obj except Exception: return default res = _nested_get_recursive(nested_dict, nested_keys) if type(res) != list: return [res] return res def _tokenize(x): return x.lower().replace(",", " ").split() def _get_sorting_key(origin, field): """Get value of the field from an origin for sorting origins. Here field should be a member of SORT_BY_OPTIONS. If "-" is present at the start of field then invert the value in a way that it reverses the sorting order. """ reversed = False if field[0] == "-": field = field[1:] reversed = True DATETIME_OBJ_MAX = datetime.max.replace(tzinfo=timezone.utc) DATETIME_MIN = "0001-01-01T00:00:00Z" DATE_OBJ_MAX = datetime.max DATE_MIN = "0001-01-01" if field == "score": if reversed: return -origin.get(field, 0) else: return origin.get(field, 0) if field in ["date_created", "date_modified", "date_published"]: date = datetime.strptime( _nested_get(origin, get_expansion(field), DATE_MIN)[0], "%Y-%m-%d" ) if reversed: return DATE_OBJ_MAX - date else: return date elif field in ["nb_visits"]: # unlike other options, nb_visits is of type integer if reversed: return -origin.get(field, 0) else: return origin.get(field, 0) elif field in SORT_BY_OPTIONS: date = datetime.fromisoformat( origin.get(field, DATETIME_MIN).replace("Z", "+00:00") ) if reversed: return DATETIME_OBJ_MAX - date else: return date class InMemorySearch: def __init__(self): pass def check(self): return True def deinitialize(self) -> None: if hasattr(self, "_origins"): del self._origins del self._origin_ids def initialize(self) -> None: self._origins: Dict[str, Dict[str, Any]] = defaultdict(dict) self._origin_ids: List[str] = [] def flush(self) -> None: pass _url_splitter = re.compile(r"\W") def origin_update(self, documents: Iterable[OriginDict]) -> None: for source_document in documents: document: Dict[str, Any] = dict(source_document) id_ = origin_identifier(document) if "url" in document: document["_url_tokens"] = set( self._url_splitter.split(source_document["url"]) ) if "visit_types" in document: document["visit_types"] = set(source_document["visit_types"]) if "visit_types" in self._origins[id_]: document["visit_types"].update(self._origins[id_]["visit_types"]) if "nb_visits" in document: document["nb_visits"] = max( document["nb_visits"], self._origins[id_].get("nb_visits", 0) ) if "last_visit_date" in document: document["last_visit_date"] = max( datetime.fromisoformat(document["last_visit_date"]), datetime.fromisoformat( self._origins[id_] .get("last_visit_date", "0001-01-01T00:00:00.000000Z",) .replace("Z", "+00:00") ), ).isoformat() if "snapshot_id" in document and "last_eventful_visit_date" in document: incoming_date = datetime.fromisoformat( document["last_eventful_visit_date"] ) current_date = datetime.fromisoformat( self._origins[id_] .get("last_eventful_visit_date", "0001-01-01T00:00:00Z",) .replace("Z", "+00:00") ) incoming_snapshot_id = document["snapshot_id"] current_snapshot_id = self._origins[id_].get("snapshot_id", "") if ( incoming_snapshot_id == current_snapshot_id or incoming_date < current_date ): # update not required so override the incoming_values document["snapshot_id"] = current_snapshot_id document["last_eventful_visit_date"] = current_date.isoformat() if "last_revision_date" in document: document["last_revision_date"] = max( datetime.fromisoformat(document["last_revision_date"]), datetime.fromisoformat( self._origins[id_] .get("last_revision_date", "0001-01-01T00:00:00Z",) .replace("Z", "+00:00") ), ).isoformat() if "last_release_date" in document: document["last_release_date"] = max( datetime.fromisoformat(document["last_release_date"]), datetime.fromisoformat( self._origins[id_] .get("last_release_date", "0001-01-01T00:00:00Z",) .replace("Z", "+00:00") ), ).isoformat() if "intrinsic_metadata" in document: intrinsic_metadata = document["intrinsic_metadata"] for date_field in ["dateCreated", "dateModified", "datePublished"]: if date_field in intrinsic_metadata: date = intrinsic_metadata[date_field] # If date{Created,Modified,Published} value isn't parsable # It gets rejected and isn't stored (unlike other fields) if not is_date_parsable(date): intrinsic_metadata.pop(date_field) document["intrinsic_metadata"] = codemeta.expand(intrinsic_metadata) if len(document["intrinsic_metadata"]) != 1: continue metadata = document["intrinsic_metadata"][0] if "http://schema.org/license" in metadata: metadata["http://schema.org/license"] = [ {"@id": license["@id"].lower()} for license in metadata["http://schema.org/license"] ] if "http://schema.org/programmingLanguage" in metadata: metadata["http://schema.org/programmingLanguage"] = [ {"@value": license["@value"].lower()} for license in metadata["http://schema.org/programmingLanguage"] ] self._origins[id_].update(document) if id_ not in self._origin_ids: self._origin_ids.append(id_) def origin_search( self, *, query: str = "", url_pattern: Optional[str] = None, metadata_pattern: Optional[str] = None, with_visit: bool = False, visit_types: Optional[List[str]] = None, min_nb_visits: int = 0, min_last_visit_date: str = "", min_last_eventful_visit_date: str = "", min_last_revision_date: str = "", min_last_release_date: str = "", min_date_created: str = "", min_date_modified: str = "", min_date_published: str = "", programming_languages: Optional[List[str]] = None, licenses: Optional[List[str]] = None, keywords: Optional[List[str]] = None, sort_by: Optional[List[str]] = None, page_token: Optional[str] = None, limit: int = 50, ) -> PagedResult[MinimalOriginDict]: - hits: Iterator[Dict[str, Any]] = ( - self._origins[id_] - for id_ in self._origin_ids - if not self._origins[id_].get("blocklisted") - ) + hits = self._get_hits() if url_pattern: tokens = set(self._url_splitter.split(url_pattern)) def predicate(match): missing_tokens = tokens - match["_url_tokens"] if len(missing_tokens) == 0: return True elif len(missing_tokens) > 1: return False else: # There is one missing token, look up by prefix. (missing_token,) = missing_tokens return any( token.startswith(missing_token) for token in match["_url_tokens"] ) hits = filter(predicate, hits) if metadata_pattern: metadata_pattern_words = set( _words_regexp.findall(metadata_pattern.lower()) ) def predicate(match): if "intrinsic_metadata" not in match: return False return metadata_pattern_words.issubset( _dict_words_set(match["intrinsic_metadata"]) ) hits = filter(predicate, hits) if not url_pattern and not metadata_pattern: raise ValueError( "At least one of url_pattern and metadata_pattern must be provided." ) next_page_token: Optional[str] = None if with_visit: hits = filter(lambda o: o.get("has_visits"), hits) if min_nb_visits: hits = filter(lambda o: o.get("nb_visits", 0) >= min_nb_visits, hits) if min_last_visit_date: hits = filter( lambda o: datetime.fromisoformat( o.get("last_visit_date", "0001-01-01T00:00:00Z").replace( "Z", "+00:00" ) ) >= datetime.fromisoformat(min_last_visit_date), hits, ) if min_last_eventful_visit_date: hits = filter( lambda o: datetime.fromisoformat( o.get("last_eventful_visit_date", "0001-01-01T00:00:00Z").replace( "Z", "+00:00" ) ) >= datetime.fromisoformat(min_last_eventful_visit_date), hits, ) if min_last_revision_date: hits = filter( lambda o: datetime.fromisoformat( o.get("last_revision_date", "0001-01-01T00:00:00Z").replace( "Z", "+00:00" ) ) >= datetime.fromisoformat(min_last_revision_date), hits, ) if min_last_release_date: hits = filter( lambda o: datetime.fromisoformat( o.get("last_release_date", "0001-01-01T00:00:00Z").replace( "Z", "+00:00" ) ) >= datetime.fromisoformat(min_last_release_date), hits, ) if min_date_created: min_date_created_obj = datetime.strptime(min_date_created, "%Y-%m-%d") hits = filter( lambda o: datetime.strptime( _nested_get(o, get_expansion("date_created"))[0], "%Y-%m-%d" ) >= min_date_created_obj, hits, ) if min_date_modified: min_date_modified_obj = datetime.strptime(min_date_modified, "%Y-%m-%d") hits = filter( lambda o: datetime.strptime( _nested_get(o, get_expansion("date_modified"))[0], "%Y-%m-%d" ) >= min_date_modified_obj, hits, ) if min_date_published: min_date_published_obj = datetime.strptime(min_date_published, "%Y-%m-%d") hits = filter( lambda o: datetime.strptime( _nested_get(o, get_expansion("date_published"))[0], "%Y-%m-%d" ) >= min_date_published_obj, hits, ) if licenses: queried_licenses = [license_keyword.lower() for license_keyword in licenses] hits = filter( lambda o: any( # If any of the queried licenses are found, include the origin any( # returns True if queried_license_keyword is found # in any of the licenses of the origin queried_license_keyword in origin_license for origin_license in _nested_get(o, get_expansion("licenses")) ) for queried_license_keyword in queried_licenses ), hits, ) if programming_languages: queried_programming_languages = [ lang_keyword.lower() for lang_keyword in programming_languages ] hits = filter( lambda o: any( # If any of the queried languages are found, include the origin any( # returns True if queried_lang_keyword is found # in any of the langs of the origin queried_lang_keyword in origin_lang for origin_lang in _nested_get( o, get_expansion("programming_languages") ) ) for queried_lang_keyword in queried_programming_languages ), hits, ) if keywords: if sort_by: sort_by.append("-score") else: sort_by = ["-score"] from copy import deepcopy hits_list = deepcopy(list(hits)) for origin in hits_list: origin_keywords = [ _tokenize(keyword) for keyword in _nested_get(origin, get_expansion("keywords")) ] origin_descriptions = [ _tokenize(description) for description in _nested_get( origin, get_expansion("descriptions") ) ] for q_keyword in keywords: for origin_keyword_tokens in origin_keywords: if q_keyword in origin_keyword_tokens: origin["score"] = origin.get("score", 0) + 2 for origin_description_token in origin_descriptions: if q_keyword in origin_description_token: origin["score"] = origin.get("score", 0) + 1 hits = (origin for origin in hits_list if origin.get("score", 0) > 0) if visit_types is not None: visit_types_set = set(visit_types) hits = filter( lambda o: visit_types_set.intersection(o.get("visit_types", set())), hits, ) hits_list = list(hits) if sort_by: sort_by_list = list(sort_by) hits_list.sort( key=lambda o: tuple( _get_sorting_key(o, field) for field in sort_by_list ) ) start_at_index = int(page_token) if page_token else 0 origins = [ {"url": hit["url"]} for hit in hits_list[start_at_index : start_at_index + limit] ] if len(origins) == limit: next_page_token = str(start_at_index + limit) assert len(origins) <= limit return PagedResult(results=origins, next_page_token=next_page_token,) + + def visit_types_count(self) -> Counter: + hits = self._get_hits() + return Counter(chain(*[hit.get("visit_types", []) for hit in hits])) + + def _get_hits(self) -> Iterator[Dict[str, Any]]: + return ( + self._origins[id_] + for id_ in self._origin_ids + if not self._origins[id_].get("blocklisted") + ) diff --git a/swh/search/interface.py b/swh/search/interface.py index 73bb3a2..03d148e 100644 --- a/swh/search/interface.py +++ b/swh/search/interface.py @@ -1,135 +1,142 @@ # Copyright (C) 2020-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from collections import Counter from typing import Iterable, List, Optional, TypeVar from typing_extensions import TypedDict from swh.core.api import remote_api_endpoint from swh.core.api.classes import PagedResult as CorePagedResult TResult = TypeVar("TResult") PagedResult = CorePagedResult[TResult, str] SORT_BY_OPTIONS = [ "nb_visits", "last_visit_date", "last_eventful_visit_date", "last_revision_date", "last_release_date", "date_created", "date_modified", "date_published", ] class MinimalOriginDict(TypedDict): """Mandatory keys of an :class:`OriginDict`""" url: str class OriginDict(MinimalOriginDict, total=False): """Argument passed to :meth:`SearchInterface.origin_update`.""" visit_types: List[str] has_visits: bool class SearchInterface: @remote_api_endpoint("check") def check(self): """Dedicated method to execute some specific check per implementation. """ ... @remote_api_endpoint("flush") def flush(self) -> None: """Blocks until all previous calls to _update() are completely applied. """ ... @remote_api_endpoint("origin/update") def origin_update(self, documents: Iterable[OriginDict]) -> None: """Persist documents to the search backend. """ ... @remote_api_endpoint("origin/search") def origin_search( self, *, query: str = "", url_pattern: Optional[str] = None, metadata_pattern: Optional[str] = None, with_visit: bool = False, visit_types: Optional[List[str]] = None, min_nb_visits: int = 0, min_last_visit_date: str = "", min_last_eventful_visit_date: str = "", min_last_revision_date: str = "", min_last_release_date: str = "", min_date_created: str = "", min_date_modified: str = "", min_date_published: str = "", programming_languages: Optional[List[str]] = None, licenses: Optional[List[str]] = None, keywords: Optional[List[str]] = None, sort_by: Optional[List[str]] = None, page_token: Optional[str] = None, limit: int = 50, ) -> PagedResult[MinimalOriginDict]: """Searches for origins matching the `url_pattern`. Args: query: Find origins according the queries written as per the swh-search query language syntax. url_pattern: Part of the URL to search for metadata_pattern: Keywords to look for (across all the fields of intrinsic_metadata) with_visit: Whether origins with no visits are to be filtered out visit_types: Only origins having any of the provided visit types (e.g. git, svn, pypi) will be returned min_nb_visits: Filter origins that have number of visits >= the provided value min_last_visit_date: Filter origins that have last_visit_date on or after the provided date(ISO format) min_last_eventful_visit_date: Filter origins that have last_eventful_visit_date (eventful = snapshot_id changed) on or after the provided date(ISO format) min_last_revision_date: Filter origins that have last_revision_date on or after the provided date(ISO format) min_last_release_date: Filter origins that have last_release_date on or after the provided date(ISO format) min_date_created: Filter origins that have date_created from intrinsic_metadata on or after the provided date min_date_modified: Filter origins that have date_modified from intrinsic_metadata on or after the provided date min_date_published: Filter origins that have date_published from intrinsic_metadata on or after the provided date programming_languages: Filter origins with programming languages present in the given list (based on instrinsic_metadata) licenses: Filter origins with licenses present in the given list (based on instrinsic_metadata) keywords: Filter origins having description/keywords (extracted from instrinsic_metadata) that match given values sort_by: Sort results based on a list of fields mentioned in SORT_BY_OPTIONS (nb_visits,last_visit_date, last_eventful_visit_date, last_revision_date, last_release_date). Return results in descending order if "-" is present at the beginning otherwise in ascending order. page_token: Opaque value used for pagination limit: number of results to return Returns: PagedResult of origin dicts matching the search criteria. If next_page_token is None, there is no longer data to retrieve. """ ... + + @remote_api_endpoint("visit_types_count") + def visit_types_count(self) -> Counter: + """Returns origin counts per visit type (git, hg, svn, ...). + """ + ... diff --git a/query_language/.gitignore b/swh/search/query_language/.gitignore similarity index 100% rename from query_language/.gitignore rename to swh/search/query_language/.gitignore diff --git a/query_language/grammar.js b/swh/search/query_language/grammar.js similarity index 100% rename from query_language/grammar.js rename to swh/search/query_language/grammar.js diff --git a/query_language/sample_query b/swh/search/query_language/sample_query similarity index 100% rename from query_language/sample_query rename to swh/search/query_language/sample_query diff --git a/swh/search/query_language/src/grammar.json b/swh/search/query_language/src/grammar.json new file mode 100644 index 0000000..916a506 --- /dev/null +++ b/swh/search/query_language/src/grammar.json @@ -0,0 +1,1302 @@ +{ + "name": "swh_search_ql", + "rules": { + "query": { + "type": "SEQ", + "members": [ + { + "type": "SYMBOL", + "name": "filters" + }, + { + "type": "CHOICE", + "members": [ + { + "type": "SEQ", + "members": [ + { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "and" + }, + { + "type": "BLANK" + } + ] + }, + { + "type": "CHOICE", + "members": [ + { + "type": "SEQ", + "members": [ + { + "type": "SYMBOL", + "name": "sortBy" + }, + { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "and" + }, + { + "type": "BLANK" + } + ] + }, + { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "limit" + }, + { + "type": "BLANK" + } + ] + } + ] + }, + { + "type": "SEQ", + "members": [ + { + "type": "SYMBOL", + "name": "limit" + }, + { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "and" + }, + { + "type": "BLANK" + } + ] + }, + { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "sortBy" + }, + { + "type": "BLANK" + } + ] + } + ] + } + ] + } + ] + }, + { + "type": "BLANK" + } + ] + } + ] + }, + "filters": { + "type": "CHOICE", + "members": [ + { + "type": "PREC_LEFT", + "value": 3, + "content": { + "type": "SEQ", + "members": [ + { + "type": "FIELD", + "name": "left", + "content": { + "type": "SYMBOL", + "name": "filters" + } + }, + { + "type": "FIELD", + "name": "operator", + "content": { + "type": "SYMBOL", + "name": "and" + } + }, + { + "type": "FIELD", + "name": "right", + "content": { + "type": "SYMBOL", + "name": "filters" + } + } + ] + } + }, + { + "type": "PREC_LEFT", + "value": 2, + "content": { + "type": "SEQ", + "members": [ + { + "type": "FIELD", + "name": "left", + "content": { + "type": "SYMBOL", + "name": "filters" + } + }, + { + "type": "FIELD", + "name": "operator", + "content": { + "type": "SYMBOL", + "name": "or" + } + }, + { + "type": "FIELD", + "name": "right", + "content": { + "type": "SYMBOL", + "name": "filters" + } + } + ] + } + }, + { + "type": "PREC_LEFT", + "value": 4, + "content": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "(" + }, + { + "type": "SYMBOL", + "name": "filters" + }, + { + "type": "STRING", + "value": ")" + } + ] + } + }, + { + "type": "SYMBOL", + "name": "filter" + } + ] + }, + "sortBy": { + "type": "SEQ", + "members": [ + { + "type": "FIELD", + "name": "field", + "content": { + "type": "SYMBOL", + "name": "sortByField" + } + }, + { + "type": "FIELD", + "name": "op", + "content": { + "type": "SYMBOL", + "name": "sortByOp" + } + }, + { + "type": "FIELD", + "name": "value", + "content": { + "type": "SYMBOL", + "name": "sortByVal" + } + } + ] + }, + "sortByField": { + "type": "TOKEN", + "content": { + "type": "STRING", + "value": "sort_by" + } + }, + "sortByOp": { + "type": "SYMBOL", + "name": "equalOp" + }, + "sortByVal": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "[" + }, + { + "type": "CHOICE", + "members": [ + { + "type": "SEQ", + "members": [ + { + "type": "FIELD", + "name": "array_member", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "CHOICE", + "members": [ + { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "'" + }, + { + "type": "SYMBOL", + "name": "sortByOptions" + }, + { + "type": "STRING", + "value": "'" + } + ] + }, + { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "\"" + }, + { + "type": "SYMBOL", + "name": "sortByOptions" + }, + { + "type": "STRING", + "value": "\"" + } + ] + } + ] + }, + { + "type": "SYMBOL", + "name": "sortByOptions" + } + ] + } + }, + { + "type": "REPEAT", + "content": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "," + }, + { + "type": "CHOICE", + "members": [ + { + "type": "FIELD", + "name": "array_member", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "CHOICE", + "members": [ + { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "'" + }, + { + "type": "SYMBOL", + "name": "sortByOptions" + }, + { + "type": "STRING", + "value": "'" + } + ] + }, + { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "\"" + }, + { + "type": "SYMBOL", + "name": "sortByOptions" + }, + { + "type": "STRING", + "value": "\"" + } + ] + } + ] + }, + { + "type": "SYMBOL", + "name": "sortByOptions" + } + ] + } + }, + { + "type": "BLANK" + } + ] + } + ] + } + } + ] + }, + { + "type": "BLANK" + } + ] + }, + { + "type": "STRING", + "value": "]" + } + ] + }, + "sortByOptions": { + "type": "SEQ", + "members": [ + { + "type": "CHOICE", + "members": [ + { + "type": "STRING", + "value": "-" + }, + { + "type": "BLANK" + } + ] + }, + { + "type": "CHOICE", + "members": [ + { + "type": "STRING", + "value": "visits" + }, + { + "type": "STRING", + "value": "last_visit" + }, + { + "type": "STRING", + "value": "last_eventful_visit" + }, + { + "type": "STRING", + "value": "last_revision" + }, + { + "type": "STRING", + "value": "last_release" + }, + { + "type": "STRING", + "value": "created" + }, + { + "type": "STRING", + "value": "modified" + }, + { + "type": "STRING", + "value": "published" + } + ] + } + ] + }, + "limit": { + "type": "SEQ", + "members": [ + { + "type": "FIELD", + "name": "field", + "content": { + "type": "SYMBOL", + "name": "limitField" + } + }, + { + "type": "FIELD", + "name": "op", + "content": { + "type": "SYMBOL", + "name": "equalOp" + } + }, + { + "type": "FIELD", + "name": "value", + "content": { + "type": "SYMBOL", + "name": "number" + } + } + ] + }, + "limitField": { + "type": "TOKEN", + "content": { + "type": "STRING", + "value": "limit" + } + }, + "filter": { + "type": "FIELD", + "name": "category", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "patternFilter" + }, + { + "type": "SYMBOL", + "name": "booleanFilter" + }, + { + "type": "SYMBOL", + "name": "numericFilter" + }, + { + "type": "SYMBOL", + "name": "boundedListFilter" + }, + { + "type": "SYMBOL", + "name": "unboundedListFilter" + }, + { + "type": "SYMBOL", + "name": "dateFilter" + } + ] + } + }, + "patternFilter": { + "type": "SEQ", + "members": [ + { + "type": "FIELD", + "name": "field", + "content": { + "type": "SYMBOL", + "name": "patternField" + } + }, + { + "type": "FIELD", + "name": "op", + "content": { + "type": "SYMBOL", + "name": "patternOp" + } + }, + { + "type": "FIELD", + "name": "value", + "content": { + "type": "SYMBOL", + "name": "patternVal" + } + } + ] + }, + "patternField": { + "type": "TOKEN", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "STRING", + "value": "origin" + }, + { + "type": "STRING", + "value": "metadata" + } + ] + } + }, + "patternOp": { + "type": "SYMBOL", + "name": "equalOp" + }, + "patternVal": { + "type": "SYMBOL", + "name": "string" + }, + "booleanFilter": { + "type": "SEQ", + "members": [ + { + "type": "FIELD", + "name": "field", + "content": { + "type": "SYMBOL", + "name": "booleanField" + } + }, + { + "type": "FIELD", + "name": "op", + "content": { + "type": "SYMBOL", + "name": "booleanOp" + } + }, + { + "type": "FIELD", + "name": "value", + "content": { + "type": "SYMBOL", + "name": "booleanVal" + } + } + ] + }, + "booleanField": { + "type": "TOKEN", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "STRING", + "value": "visited" + } + ] + } + }, + "booleanOp": { + "type": "SYMBOL", + "name": "equalOp" + }, + "booleanVal": { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "booleanTrue" + }, + { + "type": "SYMBOL", + "name": "booleanFalse" + } + ] + }, + "numericFilter": { + "type": "SEQ", + "members": [ + { + "type": "FIELD", + "name": "field", + "content": { + "type": "SYMBOL", + "name": "numericField" + } + }, + { + "type": "FIELD", + "name": "op", + "content": { + "type": "SYMBOL", + "name": "numericOp" + } + }, + { + "type": "FIELD", + "name": "value", + "content": { + "type": "SYMBOL", + "name": "numberVal" + } + } + ] + }, + "numericField": { + "type": "TOKEN", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "STRING", + "value": "visits" + } + ] + } + }, + "numericOp": { + "type": "SYMBOL", + "name": "rangeOp" + }, + "numberVal": { + "type": "SYMBOL", + "name": "number" + }, + "boundedListFilter": { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "visitTypeFilter" + } + ] + }, + "visitTypeFilter": { + "type": "SEQ", + "members": [ + { + "type": "FIELD", + "name": "field", + "content": { + "type": "SYMBOL", + "name": "visitTypeField" + } + }, + { + "type": "FIELD", + "name": "op", + "content": { + "type": "SYMBOL", + "name": "visitTypeOp" + } + }, + { + "type": "FIELD", + "name": "value", + "content": { + "type": "SYMBOL", + "name": "visitTypeVal" + } + } + ] + }, + "visitTypeField": { + "type": "TOKEN", + "content": { + "type": "STRING", + "value": "visit_type" + } + }, + "visitTypeOp": { + "type": "SYMBOL", + "name": "equalOp" + }, + "visitTypeVal": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "[" + }, + { + "type": "CHOICE", + "members": [ + { + "type": "SEQ", + "members": [ + { + "type": "FIELD", + "name": "array_member", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "CHOICE", + "members": [ + { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "'" + }, + { + "type": "SYMBOL", + "name": "visitTypeOptions" + }, + { + "type": "STRING", + "value": "'" + } + ] + }, + { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "\"" + }, + { + "type": "SYMBOL", + "name": "visitTypeOptions" + }, + { + "type": "STRING", + "value": "\"" + } + ] + } + ] + }, + { + "type": "SYMBOL", + "name": "visitTypeOptions" + } + ] + } + }, + { + "type": "REPEAT", + "content": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "," + }, + { + "type": "CHOICE", + "members": [ + { + "type": "FIELD", + "name": "array_member", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "CHOICE", + "members": [ + { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "'" + }, + { + "type": "SYMBOL", + "name": "visitTypeOptions" + }, + { + "type": "STRING", + "value": "'" + } + ] + }, + { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "\"" + }, + { + "type": "SYMBOL", + "name": "visitTypeOptions" + }, + { + "type": "STRING", + "value": "\"" + } + ] + } + ] + }, + { + "type": "SYMBOL", + "name": "visitTypeOptions" + } + ] + } + }, + { + "type": "BLANK" + } + ] + } + ] + } + } + ] + }, + { + "type": "BLANK" + } + ] + }, + { + "type": "STRING", + "value": "]" + } + ] + }, + "visitTypeOptions": { + "type": "CHOICE", + "members": [ + { + "type": "STRING", + "value": "any" + }, + { + "type": "STRING", + "value": "cran" + }, + { + "type": "STRING", + "value": "deb" + }, + { + "type": "STRING", + "value": "deposit" + }, + { + "type": "STRING", + "value": "ftp" + }, + { + "type": "STRING", + "value": "hg" + }, + { + "type": "STRING", + "value": "git" + }, + { + "type": "STRING", + "value": "nixguix" + }, + { + "type": "STRING", + "value": "npm" + }, + { + "type": "STRING", + "value": "pypi" + }, + { + "type": "STRING", + "value": "svn" + }, + { + "type": "STRING", + "value": "tar" + } + ] + }, + "unboundedListFilter": { + "type": "SEQ", + "members": [ + { + "type": "FIELD", + "name": "field", + "content": { + "type": "SYMBOL", + "name": "listField" + } + }, + { + "type": "FIELD", + "name": "op", + "content": { + "type": "SYMBOL", + "name": "listOp" + } + }, + { + "type": "FIELD", + "name": "value", + "content": { + "type": "SYMBOL", + "name": "listVal" + } + } + ] + }, + "listField": { + "type": "TOKEN", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "STRING", + "value": "language" + }, + { + "type": "STRING", + "value": "license" + }, + { + "type": "STRING", + "value": "keyword" + } + ] + } + }, + "listOp": { + "type": "SYMBOL", + "name": "choiceOp" + }, + "listVal": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "[" + }, + { + "type": "CHOICE", + "members": [ + { + "type": "SEQ", + "members": [ + { + "type": "FIELD", + "name": "array_member", + "content": { + "type": "SYMBOL", + "name": "string" + } + }, + { + "type": "REPEAT", + "content": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "," + }, + { + "type": "CHOICE", + "members": [ + { + "type": "FIELD", + "name": "array_member", + "content": { + "type": "SYMBOL", + "name": "string" + } + }, + { + "type": "BLANK" + } + ] + } + ] + } + } + ] + }, + { + "type": "BLANK" + } + ] + }, + { + "type": "STRING", + "value": "]" + } + ] + }, + "dateFilter": { + "type": "SEQ", + "members": [ + { + "type": "FIELD", + "name": "field", + "content": { + "type": "SYMBOL", + "name": "dateField" + } + }, + { + "type": "FIELD", + "name": "op", + "content": { + "type": "SYMBOL", + "name": "dateOp" + } + }, + { + "type": "FIELD", + "name": "value", + "content": { + "type": "SYMBOL", + "name": "dateVal" + } + } + ] + }, + "dateField": { + "type": "TOKEN", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "STRING", + "value": "last_visit" + }, + { + "type": "STRING", + "value": "last_eventful_visit" + }, + { + "type": "STRING", + "value": "last_revision" + }, + { + "type": "STRING", + "value": "last_release" + }, + { + "type": "STRING", + "value": "created" + }, + { + "type": "STRING", + "value": "modified" + }, + { + "type": "STRING", + "value": "published" + } + ] + } + }, + "dateOp": { + "type": "SYMBOL", + "name": "rangeOp" + }, + "dateVal": { + "type": "SYMBOL", + "name": "isoDateTime" + }, + "rangeOp": { + "type": "TOKEN", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "STRING", + "value": "<" + }, + { + "type": "STRING", + "value": "<=" + }, + { + "type": "STRING", + "value": "=" + }, + { + "type": "STRING", + "value": "!=" + }, + { + "type": "STRING", + "value": ">=" + }, + { + "type": "STRING", + "value": ">" + } + ] + } + }, + "equalOp": { + "type": "TOKEN", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "STRING", + "value": "=" + } + ] + } + }, + "choiceOp": { + "type": "TOKEN", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "STRING", + "value": "in" + }, + { + "type": "STRING", + "value": "not in" + } + ] + } + }, + "isoDateTime": { + "type": "PATTERN", + "value": "\\d{4}[-]\\d{2}[-]\\d{2}(\\s|T)*(\\d{2}:\\d{2}(:\\d{2}(\\.\\d{6})?)?)?(\\+\\d{2}:\\d{2}|Z)?" + }, + "string": { + "type": "CHOICE", + "members": [ + { + "type": "CHOICE", + "members": [ + { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "'" + }, + { + "type": "SYMBOL", + "name": "stringContent" + }, + { + "type": "STRING", + "value": "'" + } + ] + }, + { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "\"" + }, + { + "type": "SYMBOL", + "name": "stringContent" + }, + { + "type": "STRING", + "value": "\"" + } + ] + } + ] + }, + { + "type": "SYMBOL", + "name": "singleWord" + } + ] + }, + "number": { + "type": "PATTERN", + "value": "\\d+" + }, + "booleanTrue": { + "type": "STRING", + "value": "true" + }, + "booleanFalse": { + "type": "STRING", + "value": "false" + }, + "or": { + "type": "STRING", + "value": "or" + }, + "and": { + "type": "STRING", + "value": "and" + }, + "singleWord": { + "type": "PATTERN", + "value": "[^\\s\"'\\[\\]\\(\\),]+" + }, + "stringContent": { + "type": "REPEAT1", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "IMMEDIATE_TOKEN", + "content": { + "type": "PATTERN", + "value": "[^\\\\'\"\\n]+" + } + }, + { + "type": "SYMBOL", + "name": "escape_sequence" + } + ] + } + }, + "escape_sequence": { + "type": "IMMEDIATE_TOKEN", + "content": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "\\" + }, + { + "type": "PATTERN", + "value": "(\\\"|\\'|\\\\|\\/|b|n|r|t|u)" + } + ] + } + } + }, + "extras": [ + { + "type": "PATTERN", + "value": "\\s" + } + ], + "conflicts": [], + "precedences": [], + "externals": [], + "inline": [], + "supertypes": [] +} + diff --git a/swh/search/query_language/src/node-types.json b/swh/search/query_language/src/node-types.json new file mode 100644 index 0000000..0ff9ffd --- /dev/null +++ b/swh/search/query_language/src/node-types.json @@ -0,0 +1,887 @@ +[ + { + "type": "booleanFilter", + "named": true, + "fields": { + "field": { + "multiple": false, + "required": true, + "types": [ + { + "type": "booleanField", + "named": true + } + ] + }, + "op": { + "multiple": false, + "required": true, + "types": [ + { + "type": "booleanOp", + "named": true + } + ] + }, + "value": { + "multiple": false, + "required": true, + "types": [ + { + "type": "booleanVal", + "named": true + } + ] + } + } + }, + { + "type": "booleanOp", + "named": true, + "fields": {}, + "children": { + "multiple": false, + "required": true, + "types": [ + { + "type": "equalOp", + "named": true + } + ] + } + }, + { + "type": "booleanVal", + "named": true, + "fields": {}, + "children": { + "multiple": false, + "required": true, + "types": [ + { + "type": "booleanFalse", + "named": true + }, + { + "type": "booleanTrue", + "named": true + } + ] + } + }, + { + "type": "boundedListFilter", + "named": true, + "fields": {}, + "children": { + "multiple": false, + "required": true, + "types": [ + { + "type": "visitTypeFilter", + "named": true + } + ] + } + }, + { + "type": "dateFilter", + "named": true, + "fields": { + "field": { + "multiple": false, + "required": true, + "types": [ + { + "type": "dateField", + "named": true + } + ] + }, + "op": { + "multiple": false, + "required": true, + "types": [ + { + "type": "dateOp", + "named": true + } + ] + }, + "value": { + "multiple": false, + "required": true, + "types": [ + { + "type": "dateVal", + "named": true + } + ] + } + } + }, + { + "type": "dateOp", + "named": true, + "fields": {}, + "children": { + "multiple": false, + "required": true, + "types": [ + { + "type": "rangeOp", + "named": true + } + ] + } + }, + { + "type": "dateVal", + "named": true, + "fields": {}, + "children": { + "multiple": false, + "required": true, + "types": [ + { + "type": "isoDateTime", + "named": true + } + ] + } + }, + { + "type": "filter", + "named": true, + "fields": { + "category": { + "multiple": false, + "required": true, + "types": [ + { + "type": "booleanFilter", + "named": true + }, + { + "type": "boundedListFilter", + "named": true + }, + { + "type": "dateFilter", + "named": true + }, + { + "type": "numericFilter", + "named": true + }, + { + "type": "patternFilter", + "named": true + }, + { + "type": "unboundedListFilter", + "named": true + } + ] + } + } + }, + { + "type": "filters", + "named": true, + "fields": { + "left": { + "multiple": false, + "required": false, + "types": [ + { + "type": "filters", + "named": true + } + ] + }, + "operator": { + "multiple": false, + "required": false, + "types": [ + { + "type": "and", + "named": true + }, + { + "type": "or", + "named": true + } + ] + }, + "right": { + "multiple": false, + "required": false, + "types": [ + { + "type": "filters", + "named": true + } + ] + } + }, + "children": { + "multiple": false, + "required": false, + "types": [ + { + "type": "filter", + "named": true + }, + { + "type": "filters", + "named": true + } + ] + } + }, + { + "type": "limit", + "named": true, + "fields": { + "field": { + "multiple": false, + "required": true, + "types": [ + { + "type": "limitField", + "named": true + } + ] + }, + "op": { + "multiple": false, + "required": true, + "types": [ + { + "type": "equalOp", + "named": true + } + ] + }, + "value": { + "multiple": false, + "required": true, + "types": [ + { + "type": "number", + "named": true + } + ] + } + } + }, + { + "type": "listOp", + "named": true, + "fields": {}, + "children": { + "multiple": false, + "required": true, + "types": [ + { + "type": "choiceOp", + "named": true + } + ] + } + }, + { + "type": "listVal", + "named": true, + "fields": { + "array_member": { + "multiple": true, + "required": false, + "types": [ + { + "type": "string", + "named": true + } + ] + } + } + }, + { + "type": "numberVal", + "named": true, + "fields": {}, + "children": { + "multiple": false, + "required": true, + "types": [ + { + "type": "number", + "named": true + } + ] + } + }, + { + "type": "numericFilter", + "named": true, + "fields": { + "field": { + "multiple": false, + "required": true, + "types": [ + { + "type": "numericField", + "named": true + } + ] + }, + "op": { + "multiple": false, + "required": true, + "types": [ + { + "type": "numericOp", + "named": true + } + ] + }, + "value": { + "multiple": false, + "required": true, + "types": [ + { + "type": "numberVal", + "named": true + } + ] + } + } + }, + { + "type": "numericOp", + "named": true, + "fields": {}, + "children": { + "multiple": false, + "required": true, + "types": [ + { + "type": "rangeOp", + "named": true + } + ] + } + }, + { + "type": "patternFilter", + "named": true, + "fields": { + "field": { + "multiple": false, + "required": true, + "types": [ + { + "type": "patternField", + "named": true + } + ] + }, + "op": { + "multiple": false, + "required": true, + "types": [ + { + "type": "patternOp", + "named": true + } + ] + }, + "value": { + "multiple": false, + "required": true, + "types": [ + { + "type": "patternVal", + "named": true + } + ] + } + } + }, + { + "type": "patternOp", + "named": true, + "fields": {}, + "children": { + "multiple": false, + "required": true, + "types": [ + { + "type": "equalOp", + "named": true + } + ] + } + }, + { + "type": "patternVal", + "named": true, + "fields": {}, + "children": { + "multiple": false, + "required": true, + "types": [ + { + "type": "string", + "named": true + } + ] + } + }, + { + "type": "query", + "named": true, + "fields": {}, + "children": { + "multiple": true, + "required": true, + "types": [ + { + "type": "and", + "named": true + }, + { + "type": "filters", + "named": true + }, + { + "type": "limit", + "named": true + }, + { + "type": "sortBy", + "named": true + } + ] + } + }, + { + "type": "sortBy", + "named": true, + "fields": { + "field": { + "multiple": false, + "required": true, + "types": [ + { + "type": "sortByField", + "named": true + } + ] + }, + "op": { + "multiple": false, + "required": true, + "types": [ + { + "type": "sortByOp", + "named": true + } + ] + }, + "value": { + "multiple": false, + "required": true, + "types": [ + { + "type": "sortByVal", + "named": true + } + ] + } + } + }, + { + "type": "sortByOp", + "named": true, + "fields": {}, + "children": { + "multiple": false, + "required": true, + "types": [ + { + "type": "equalOp", + "named": true + } + ] + } + }, + { + "type": "sortByOptions", + "named": true, + "fields": {} + }, + { + "type": "sortByVal", + "named": true, + "fields": { + "array_member": { + "multiple": true, + "required": false, + "types": [ + { + "type": "\"", + "named": false + }, + { + "type": "'", + "named": false + }, + { + "type": "sortByOptions", + "named": true + } + ] + } + } + }, + { + "type": "string", + "named": true, + "fields": {}, + "children": { + "multiple": false, + "required": true, + "types": [ + { + "type": "singleWord", + "named": true + }, + { + "type": "stringContent", + "named": true + } + ] + } + }, + { + "type": "stringContent", + "named": true, + "fields": {}, + "children": { + "multiple": true, + "required": false, + "types": [ + { + "type": "escape_sequence", + "named": true + } + ] + } + }, + { + "type": "unboundedListFilter", + "named": true, + "fields": { + "field": { + "multiple": false, + "required": true, + "types": [ + { + "type": "listField", + "named": true + } + ] + }, + "op": { + "multiple": false, + "required": true, + "types": [ + { + "type": "listOp", + "named": true + } + ] + }, + "value": { + "multiple": false, + "required": true, + "types": [ + { + "type": "listVal", + "named": true + } + ] + } + } + }, + { + "type": "visitTypeFilter", + "named": true, + "fields": { + "field": { + "multiple": false, + "required": true, + "types": [ + { + "type": "visitTypeField", + "named": true + } + ] + }, + "op": { + "multiple": false, + "required": true, + "types": [ + { + "type": "visitTypeOp", + "named": true + } + ] + }, + "value": { + "multiple": false, + "required": true, + "types": [ + { + "type": "visitTypeVal", + "named": true + } + ] + } + } + }, + { + "type": "visitTypeOp", + "named": true, + "fields": {}, + "children": { + "multiple": false, + "required": true, + "types": [ + { + "type": "equalOp", + "named": true + } + ] + } + }, + { + "type": "visitTypeOptions", + "named": true, + "fields": {} + }, + { + "type": "visitTypeVal", + "named": true, + "fields": { + "array_member": { + "multiple": true, + "required": false, + "types": [ + { + "type": "\"", + "named": false + }, + { + "type": "'", + "named": false + }, + { + "type": "visitTypeOptions", + "named": true + } + ] + } + } + }, + { + "type": "\"", + "named": false + }, + { + "type": "'", + "named": false + }, + { + "type": "(", + "named": false + }, + { + "type": ")", + "named": false + }, + { + "type": ",", + "named": false + }, + { + "type": "-", + "named": false + }, + { + "type": "[", + "named": false + }, + { + "type": "]", + "named": false + }, + { + "type": "and", + "named": true + }, + { + "type": "any", + "named": false + }, + { + "type": "booleanFalse", + "named": true + }, + { + "type": "booleanField", + "named": true + }, + { + "type": "booleanTrue", + "named": true + }, + { + "type": "choiceOp", + "named": true + }, + { + "type": "cran", + "named": false + }, + { + "type": "created", + "named": false + }, + { + "type": "dateField", + "named": true + }, + { + "type": "deb", + "named": false + }, + { + "type": "deposit", + "named": false + }, + { + "type": "equalOp", + "named": true + }, + { + "type": "escape_sequence", + "named": true + }, + { + "type": "ftp", + "named": false + }, + { + "type": "git", + "named": false + }, + { + "type": "hg", + "named": false + }, + { + "type": "isoDateTime", + "named": true + }, + { + "type": "last_eventful_visit", + "named": false + }, + { + "type": "last_release", + "named": false + }, + { + "type": "last_revision", + "named": false + }, + { + "type": "last_visit", + "named": false + }, + { + "type": "limitField", + "named": true + }, + { + "type": "listField", + "named": true + }, + { + "type": "modified", + "named": false + }, + { + "type": "nixguix", + "named": false + }, + { + "type": "npm", + "named": false + }, + { + "type": "number", + "named": true + }, + { + "type": "numericField", + "named": true + }, + { + "type": "or", + "named": true + }, + { + "type": "patternField", + "named": true + }, + { + "type": "published", + "named": false + }, + { + "type": "pypi", + "named": false + }, + { + "type": "rangeOp", + "named": true + }, + { + "type": "singleWord", + "named": true + }, + { + "type": "sortByField", + "named": true + }, + { + "type": "svn", + "named": false + }, + { + "type": "tar", + "named": false + }, + { + "type": "visitTypeField", + "named": true + }, + { + "type": "visits", + "named": false + } +] \ No newline at end of file diff --git a/swh/search/query_language/src/parser.c b/swh/search/query_language/src/parser.c new file mode 100644 index 0000000..0865d2d --- /dev/null +++ b/swh/search/query_language/src/parser.c @@ -0,0 +1,3307 @@ +#include + +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#endif + +#define LANGUAGE_VERSION 13 +#define STATE_COUNT 126 +#define LARGE_STATE_COUNT 2 +#define SYMBOL_COUNT 83 +#define ALIAS_COUNT 0 +#define TOKEN_COUNT 49 +#define EXTERNAL_TOKEN_COUNT 0 +#define FIELD_COUNT 8 +#define MAX_ALIAS_SEQUENCE_LENGTH 6 +#define PRODUCTION_ID_COUNT 9 + +enum { + anon_sym_LPAREN = 1, + anon_sym_RPAREN = 2, + sym_sortByField = 3, + anon_sym_LBRACK = 4, + anon_sym_SQUOTE = 5, + anon_sym_DQUOTE = 6, + anon_sym_COMMA = 7, + anon_sym_RBRACK = 8, + anon_sym_DASH = 9, + anon_sym_visits = 10, + anon_sym_last_visit = 11, + anon_sym_last_eventful_visit = 12, + anon_sym_last_revision = 13, + anon_sym_last_release = 14, + anon_sym_created = 15, + anon_sym_modified = 16, + anon_sym_published = 17, + sym_limitField = 18, + sym_patternField = 19, + sym_booleanField = 20, + sym_numericField = 21, + sym_visitTypeField = 22, + anon_sym_any = 23, + anon_sym_cran = 24, + anon_sym_deb = 25, + anon_sym_deposit = 26, + anon_sym_ftp = 27, + anon_sym_hg = 28, + anon_sym_git = 29, + anon_sym_nixguix = 30, + anon_sym_npm = 31, + anon_sym_pypi = 32, + anon_sym_svn = 33, + anon_sym_tar = 34, + sym_listField = 35, + sym_dateField = 36, + sym_rangeOp = 37, + sym_equalOp = 38, + sym_choiceOp = 39, + sym_isoDateTime = 40, + sym_number = 41, + sym_booleanTrue = 42, + sym_booleanFalse = 43, + sym_or = 44, + sym_and = 45, + sym_singleWord = 46, + aux_sym_stringContent_token1 = 47, + sym_escape_sequence = 48, + sym_query = 49, + sym_filters = 50, + sym_sortBy = 51, + sym_sortByOp = 52, + sym_sortByVal = 53, + sym_sortByOptions = 54, + sym_limit = 55, + sym_filter = 56, + sym_patternFilter = 57, + sym_patternOp = 58, + sym_patternVal = 59, + sym_booleanFilter = 60, + sym_booleanOp = 61, + sym_booleanVal = 62, + sym_numericFilter = 63, + sym_numericOp = 64, + sym_numberVal = 65, + sym_boundedListFilter = 66, + sym_visitTypeFilter = 67, + sym_visitTypeOp = 68, + sym_visitTypeVal = 69, + sym_visitTypeOptions = 70, + sym_unboundedListFilter = 71, + sym_listOp = 72, + sym_listVal = 73, + sym_dateFilter = 74, + sym_dateOp = 75, + sym_dateVal = 76, + sym_string = 77, + sym_stringContent = 78, + aux_sym_sortByVal_repeat1 = 79, + aux_sym_visitTypeVal_repeat1 = 80, + aux_sym_listVal_repeat1 = 81, + aux_sym_stringContent_repeat1 = 82, +}; + +static const char * const ts_symbol_names[] = { + [ts_builtin_sym_end] = "end", + [anon_sym_LPAREN] = "(", + [anon_sym_RPAREN] = ")", + [sym_sortByField] = "sortByField", + [anon_sym_LBRACK] = "[", + [anon_sym_SQUOTE] = "'", + [anon_sym_DQUOTE] = "\"", + [anon_sym_COMMA] = ",", + [anon_sym_RBRACK] = "]", + [anon_sym_DASH] = "-", + [anon_sym_visits] = "visits", + [anon_sym_last_visit] = "last_visit", + [anon_sym_last_eventful_visit] = "last_eventful_visit", + [anon_sym_last_revision] = "last_revision", + [anon_sym_last_release] = "last_release", + [anon_sym_created] = "created", + [anon_sym_modified] = "modified", + [anon_sym_published] = "published", + [sym_limitField] = "limitField", + [sym_patternField] = "patternField", + [sym_booleanField] = "booleanField", + [sym_numericField] = "numericField", + [sym_visitTypeField] = "visitTypeField", + [anon_sym_any] = "any", + [anon_sym_cran] = "cran", + [anon_sym_deb] = "deb", + [anon_sym_deposit] = "deposit", + [anon_sym_ftp] = "ftp", + [anon_sym_hg] = "hg", + [anon_sym_git] = "git", + [anon_sym_nixguix] = "nixguix", + [anon_sym_npm] = "npm", + [anon_sym_pypi] = "pypi", + [anon_sym_svn] = "svn", + [anon_sym_tar] = "tar", + [sym_listField] = "listField", + [sym_dateField] = "dateField", + [sym_rangeOp] = "rangeOp", + [sym_equalOp] = "equalOp", + [sym_choiceOp] = "choiceOp", + [sym_isoDateTime] = "isoDateTime", + [sym_number] = "number", + [sym_booleanTrue] = "booleanTrue", + [sym_booleanFalse] = "booleanFalse", + [sym_or] = "or", + [sym_and] = "and", + [sym_singleWord] = "singleWord", + [aux_sym_stringContent_token1] = "stringContent_token1", + [sym_escape_sequence] = "escape_sequence", + [sym_query] = "query", + [sym_filters] = "filters", + [sym_sortBy] = "sortBy", + [sym_sortByOp] = "sortByOp", + [sym_sortByVal] = "sortByVal", + [sym_sortByOptions] = "sortByOptions", + [sym_limit] = "limit", + [sym_filter] = "filter", + [sym_patternFilter] = "patternFilter", + [sym_patternOp] = "patternOp", + [sym_patternVal] = "patternVal", + [sym_booleanFilter] = "booleanFilter", + [sym_booleanOp] = "booleanOp", + [sym_booleanVal] = "booleanVal", + [sym_numericFilter] = "numericFilter", + [sym_numericOp] = "numericOp", + [sym_numberVal] = "numberVal", + [sym_boundedListFilter] = "boundedListFilter", + [sym_visitTypeFilter] = "visitTypeFilter", + [sym_visitTypeOp] = "visitTypeOp", + [sym_visitTypeVal] = "visitTypeVal", + [sym_visitTypeOptions] = "visitTypeOptions", + [sym_unboundedListFilter] = "unboundedListFilter", + [sym_listOp] = "listOp", + [sym_listVal] = "listVal", + [sym_dateFilter] = "dateFilter", + [sym_dateOp] = "dateOp", + [sym_dateVal] = "dateVal", + [sym_string] = "string", + [sym_stringContent] = "stringContent", + [aux_sym_sortByVal_repeat1] = "sortByVal_repeat1", + [aux_sym_visitTypeVal_repeat1] = "visitTypeVal_repeat1", + [aux_sym_listVal_repeat1] = "listVal_repeat1", + [aux_sym_stringContent_repeat1] = "stringContent_repeat1", +}; + +static const TSSymbol ts_symbol_map[] = { + [ts_builtin_sym_end] = ts_builtin_sym_end, + [anon_sym_LPAREN] = anon_sym_LPAREN, + [anon_sym_RPAREN] = anon_sym_RPAREN, + [sym_sortByField] = sym_sortByField, + [anon_sym_LBRACK] = anon_sym_LBRACK, + [anon_sym_SQUOTE] = anon_sym_SQUOTE, + [anon_sym_DQUOTE] = anon_sym_DQUOTE, + [anon_sym_COMMA] = anon_sym_COMMA, + [anon_sym_RBRACK] = anon_sym_RBRACK, + [anon_sym_DASH] = anon_sym_DASH, + [anon_sym_visits] = anon_sym_visits, + [anon_sym_last_visit] = anon_sym_last_visit, + [anon_sym_last_eventful_visit] = anon_sym_last_eventful_visit, + [anon_sym_last_revision] = anon_sym_last_revision, + [anon_sym_last_release] = anon_sym_last_release, + [anon_sym_created] = anon_sym_created, + [anon_sym_modified] = anon_sym_modified, + [anon_sym_published] = anon_sym_published, + [sym_limitField] = sym_limitField, + [sym_patternField] = sym_patternField, + [sym_booleanField] = sym_booleanField, + [sym_numericField] = sym_numericField, + [sym_visitTypeField] = sym_visitTypeField, + [anon_sym_any] = anon_sym_any, + [anon_sym_cran] = anon_sym_cran, + [anon_sym_deb] = anon_sym_deb, + [anon_sym_deposit] = anon_sym_deposit, + [anon_sym_ftp] = anon_sym_ftp, + [anon_sym_hg] = anon_sym_hg, + [anon_sym_git] = anon_sym_git, + [anon_sym_nixguix] = anon_sym_nixguix, + [anon_sym_npm] = anon_sym_npm, + [anon_sym_pypi] = anon_sym_pypi, + [anon_sym_svn] = anon_sym_svn, + [anon_sym_tar] = anon_sym_tar, + [sym_listField] = sym_listField, + [sym_dateField] = sym_dateField, + [sym_rangeOp] = sym_rangeOp, + [sym_equalOp] = sym_equalOp, + [sym_choiceOp] = sym_choiceOp, + [sym_isoDateTime] = sym_isoDateTime, + [sym_number] = sym_number, + [sym_booleanTrue] = sym_booleanTrue, + [sym_booleanFalse] = sym_booleanFalse, + [sym_or] = sym_or, + [sym_and] = sym_and, + [sym_singleWord] = sym_singleWord, + [aux_sym_stringContent_token1] = aux_sym_stringContent_token1, + [sym_escape_sequence] = sym_escape_sequence, + [sym_query] = sym_query, + [sym_filters] = sym_filters, + [sym_sortBy] = sym_sortBy, + [sym_sortByOp] = sym_sortByOp, + [sym_sortByVal] = sym_sortByVal, + [sym_sortByOptions] = sym_sortByOptions, + [sym_limit] = sym_limit, + [sym_filter] = sym_filter, + [sym_patternFilter] = sym_patternFilter, + [sym_patternOp] = sym_patternOp, + [sym_patternVal] = sym_patternVal, + [sym_booleanFilter] = sym_booleanFilter, + [sym_booleanOp] = sym_booleanOp, + [sym_booleanVal] = sym_booleanVal, + [sym_numericFilter] = sym_numericFilter, + [sym_numericOp] = sym_numericOp, + [sym_numberVal] = sym_numberVal, + [sym_boundedListFilter] = sym_boundedListFilter, + [sym_visitTypeFilter] = sym_visitTypeFilter, + [sym_visitTypeOp] = sym_visitTypeOp, + [sym_visitTypeVal] = sym_visitTypeVal, + [sym_visitTypeOptions] = sym_visitTypeOptions, + [sym_unboundedListFilter] = sym_unboundedListFilter, + [sym_listOp] = sym_listOp, + [sym_listVal] = sym_listVal, + [sym_dateFilter] = sym_dateFilter, + [sym_dateOp] = sym_dateOp, + [sym_dateVal] = sym_dateVal, + [sym_string] = sym_string, + [sym_stringContent] = sym_stringContent, + [aux_sym_sortByVal_repeat1] = aux_sym_sortByVal_repeat1, + [aux_sym_visitTypeVal_repeat1] = aux_sym_visitTypeVal_repeat1, + [aux_sym_listVal_repeat1] = aux_sym_listVal_repeat1, + [aux_sym_stringContent_repeat1] = aux_sym_stringContent_repeat1, +}; + +static const TSSymbolMetadata ts_symbol_metadata[] = { + [ts_builtin_sym_end] = { + .visible = false, + .named = true, + }, + [anon_sym_LPAREN] = { + .visible = true, + .named = false, + }, + [anon_sym_RPAREN] = { + .visible = true, + .named = false, + }, + [sym_sortByField] = { + .visible = true, + .named = true, + }, + [anon_sym_LBRACK] = { + .visible = true, + .named = false, + }, + [anon_sym_SQUOTE] = { + .visible = true, + .named = false, + }, + [anon_sym_DQUOTE] = { + .visible = true, + .named = false, + }, + [anon_sym_COMMA] = { + .visible = true, + .named = false, + }, + [anon_sym_RBRACK] = { + .visible = true, + .named = false, + }, + [anon_sym_DASH] = { + .visible = true, + .named = false, + }, + [anon_sym_visits] = { + .visible = true, + .named = false, + }, + [anon_sym_last_visit] = { + .visible = true, + .named = false, + }, + [anon_sym_last_eventful_visit] = { + .visible = true, + .named = false, + }, + [anon_sym_last_revision] = { + .visible = true, + .named = false, + }, + [anon_sym_last_release] = { + .visible = true, + .named = false, + }, + [anon_sym_created] = { + .visible = true, + .named = false, + }, + [anon_sym_modified] = { + .visible = true, + .named = false, + }, + [anon_sym_published] = { + .visible = true, + .named = false, + }, + [sym_limitField] = { + .visible = true, + .named = true, + }, + [sym_patternField] = { + .visible = true, + .named = true, + }, + [sym_booleanField] = { + .visible = true, + .named = true, + }, + [sym_numericField] = { + .visible = true, + .named = true, + }, + [sym_visitTypeField] = { + .visible = true, + .named = true, + }, + [anon_sym_any] = { + .visible = true, + .named = false, + }, + [anon_sym_cran] = { + .visible = true, + .named = false, + }, + [anon_sym_deb] = { + .visible = true, + .named = false, + }, + [anon_sym_deposit] = { + .visible = true, + .named = false, + }, + [anon_sym_ftp] = { + .visible = true, + .named = false, + }, + [anon_sym_hg] = { + .visible = true, + .named = false, + }, + [anon_sym_git] = { + .visible = true, + .named = false, + }, + [anon_sym_nixguix] = { + .visible = true, + .named = false, + }, + [anon_sym_npm] = { + .visible = true, + .named = false, + }, + [anon_sym_pypi] = { + .visible = true, + .named = false, + }, + [anon_sym_svn] = { + .visible = true, + .named = false, + }, + [anon_sym_tar] = { + .visible = true, + .named = false, + }, + [sym_listField] = { + .visible = true, + .named = true, + }, + [sym_dateField] = { + .visible = true, + .named = true, + }, + [sym_rangeOp] = { + .visible = true, + .named = true, + }, + [sym_equalOp] = { + .visible = true, + .named = true, + }, + [sym_choiceOp] = { + .visible = true, + .named = true, + }, + [sym_isoDateTime] = { + .visible = true, + .named = true, + }, + [sym_number] = { + .visible = true, + .named = true, + }, + [sym_booleanTrue] = { + .visible = true, + .named = true, + }, + [sym_booleanFalse] = { + .visible = true, + .named = true, + }, + [sym_or] = { + .visible = true, + .named = true, + }, + [sym_and] = { + .visible = true, + .named = true, + }, + [sym_singleWord] = { + .visible = true, + .named = true, + }, + [aux_sym_stringContent_token1] = { + .visible = false, + .named = false, + }, + [sym_escape_sequence] = { + .visible = true, + .named = true, + }, + [sym_query] = { + .visible = true, + .named = true, + }, + [sym_filters] = { + .visible = true, + .named = true, + }, + [sym_sortBy] = { + .visible = true, + .named = true, + }, + [sym_sortByOp] = { + .visible = true, + .named = true, + }, + [sym_sortByVal] = { + .visible = true, + .named = true, + }, + [sym_sortByOptions] = { + .visible = true, + .named = true, + }, + [sym_limit] = { + .visible = true, + .named = true, + }, + [sym_filter] = { + .visible = true, + .named = true, + }, + [sym_patternFilter] = { + .visible = true, + .named = true, + }, + [sym_patternOp] = { + .visible = true, + .named = true, + }, + [sym_patternVal] = { + .visible = true, + .named = true, + }, + [sym_booleanFilter] = { + .visible = true, + .named = true, + }, + [sym_booleanOp] = { + .visible = true, + .named = true, + }, + [sym_booleanVal] = { + .visible = true, + .named = true, + }, + [sym_numericFilter] = { + .visible = true, + .named = true, + }, + [sym_numericOp] = { + .visible = true, + .named = true, + }, + [sym_numberVal] = { + .visible = true, + .named = true, + }, + [sym_boundedListFilter] = { + .visible = true, + .named = true, + }, + [sym_visitTypeFilter] = { + .visible = true, + .named = true, + }, + [sym_visitTypeOp] = { + .visible = true, + .named = true, + }, + [sym_visitTypeVal] = { + .visible = true, + .named = true, + }, + [sym_visitTypeOptions] = { + .visible = true, + .named = true, + }, + [sym_unboundedListFilter] = { + .visible = true, + .named = true, + }, + [sym_listOp] = { + .visible = true, + .named = true, + }, + [sym_listVal] = { + .visible = true, + .named = true, + }, + [sym_dateFilter] = { + .visible = true, + .named = true, + }, + [sym_dateOp] = { + .visible = true, + .named = true, + }, + [sym_dateVal] = { + .visible = true, + .named = true, + }, + [sym_string] = { + .visible = true, + .named = true, + }, + [sym_stringContent] = { + .visible = true, + .named = true, + }, + [aux_sym_sortByVal_repeat1] = { + .visible = false, + .named = false, + }, + [aux_sym_visitTypeVal_repeat1] = { + .visible = false, + .named = false, + }, + [aux_sym_listVal_repeat1] = { + .visible = false, + .named = false, + }, + [aux_sym_stringContent_repeat1] = { + .visible = false, + .named = false, + }, +}; + +enum { + field_array_member = 1, + field_category = 2, + field_field = 3, + field_left = 4, + field_op = 5, + field_operator = 6, + field_right = 7, + field_value = 8, +}; + +static const char * const ts_field_names[] = { + [0] = NULL, + [field_array_member] = "array_member", + [field_category] = "category", + [field_field] = "field", + [field_left] = "left", + [field_op] = "op", + [field_operator] = "operator", + [field_right] = "right", + [field_value] = "value", +}; + +static const TSFieldMapSlice ts_field_map_slices[PRODUCTION_ID_COUNT] = { + [1] = {.index = 0, .length = 1}, + [2] = {.index = 1, .length = 3}, + [3] = {.index = 4, .length = 3}, + [4] = {.index = 7, .length = 1}, + [5] = {.index = 8, .length = 2}, + [6] = {.index = 10, .length = 2}, + [7] = {.index = 12, .length = 3}, + [8] = {.index = 15, .length = 4}, +}; + +static const TSFieldMapEntry ts_field_map_entries[] = { + [0] = + {field_category, 0}, + [1] = + {field_field, 0}, + {field_op, 1}, + {field_value, 2}, + [4] = + {field_left, 0}, + {field_operator, 1}, + {field_right, 2}, + [7] = + {field_array_member, 1}, + [8] = + {field_array_member, 1}, + {field_array_member, 2, .inherited = true}, + [10] = + {field_array_member, 0, .inherited = true}, + {field_array_member, 1, .inherited = true}, + [12] = + {field_array_member, 1}, + {field_array_member, 2}, + {field_array_member, 3}, + [15] = + {field_array_member, 1}, + {field_array_member, 2}, + {field_array_member, 3}, + {field_array_member, 4, .inherited = true}, +}; + +static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = { + [0] = {0}, +}; + +static const uint16_t ts_non_terminal_alias_map[] = { + 0, +}; + +static inline bool sym_singleWord_character_set_1(int32_t c) { + return (c < '"' + ? (c < '\r' + ? (c < '\t' + ? c == 0 + : c <= '\n') + : (c <= '\r' || c == ' ')) + : (c <= '"' || (c < '[' + ? (c < ',' + ? (c >= '\'' && c <= ')') + : c <= ',') + : (c <= '[' || c == ']')))); +} + +static bool ts_lex(TSLexer *lexer, TSStateId state) { + START_LEXER(); + eof = lexer->eof(lexer); + switch (state) { + case 0: + if (eof) ADVANCE(236); + if (lookahead == '!') ADVANCE(10); + if (lookahead == '"') ADVANCE(242); + if (lookahead == '\'') ADVANCE(241); + if (lookahead == '(') ADVANCE(237); + if (lookahead == ')') ADVANCE(238); + if (lookahead == ',') ADVANCE(243); + if (lookahead == '-') ADVANCE(245); + if (lookahead == '<') ADVANCE(274); + if (lookahead == '=') ADVANCE(273); + if (lookahead == '>') ADVANCE(274); + if (lookahead == '[') ADVANCE(240); + if (lookahead == '\\') ADVANCE(211); + if (lookahead == ']') ADVANCE(244); + if (lookahead == 'a') ADVANCE(124); + if (lookahead == 'c') ADVANCE(147); + if (lookahead == 'd') ADVANCE(48); + if (lookahead == 'f') ADVANCE(18); + if (lookahead == 'g') ADVANCE(84); + if (lookahead == 'h') ADVANCE(77); + if (lookahead == 'i') ADVANCE(125); + if (lookahead == 'k') ADVANCE(49); + if (lookahead == 'l') ADVANCE(19); + if (lookahead == 'm') ADVANCE(59); + if (lookahead == 'n') ADVANCE(85); + if (lookahead == 'o') ADVANCE(148); + if (lookahead == 'p') ADVANCE(194); + if (lookahead == 's') ADVANCE(139); + if (lookahead == 't') ADVANCE(23); + if (lookahead == 'v') ADVANCE(86); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') SKIP(234) + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(285); + END_STATE(); + case 1: + if (lookahead == '\n') SKIP(4) + if (lookahead == '"') ADVANCE(242); + if (lookahead == '\'') ADVANCE(241); + if (lookahead == '\\') ADVANCE(211); + if (lookahead == '\t' || + lookahead == '\r' || + lookahead == ' ') ADVANCE(293); + if (lookahead != 0) ADVANCE(294); + END_STATE(); + case 2: + if (lookahead == ' ') ADVANCE(89); + END_STATE(); + case 3: + if (lookahead == '"') ADVANCE(242); + if (lookahead == '\'') ADVANCE(241); + if (lookahead == ',') ADVANCE(243); + if (lookahead == ']') ADVANCE(244); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') SKIP(3) + if (lookahead != 0 && + lookahead != '(' && + lookahead != ')' && + lookahead != '[') ADVANCE(292); + END_STATE(); + case 4: + if (lookahead == '"') ADVANCE(242); + if (lookahead == '\'') ADVANCE(241); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') SKIP(4) + END_STATE(); + case 5: + if (lookahead == '(') ADVANCE(237); + if (lookahead == '=') ADVANCE(275); + if (lookahead == 'c') ADVANCE(154); + if (lookahead == 'k') ADVANCE(49); + if (lookahead == 'l') ADVANCE(31); + if (lookahead == 'm') ADVANCE(60); + if (lookahead == 'o') ADVANCE(153); + if (lookahead == 'p') ADVANCE(199); + if (lookahead == 's') ADVANCE(138); + if (lookahead == 'v') ADVANCE(112); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') SKIP(5) + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(286); + END_STATE(); + case 6: + if (lookahead == '-') ADVANCE(219); + END_STATE(); + case 7: + if (lookahead == '-') ADVANCE(220); + END_STATE(); + case 8: + if (lookahead == ':') ADVANCE(221); + END_STATE(); + case 9: + if (lookahead == ':') ADVANCE(222); + END_STATE(); + case 10: + if (lookahead == '=') ADVANCE(273); + END_STATE(); + case 11: + if (lookahead == '_') ADVANCE(53); + END_STATE(); + case 12: + if (lookahead == '_') ADVANCE(34); + END_STATE(); + case 13: + if (lookahead == '_') ADVANCE(202); + END_STATE(); + case 14: + if (lookahead == '_') ADVANCE(186); + if (lookahead == 'e') ADVANCE(41); + if (lookahead == 's') ADVANCE(246); + END_STATE(); + case 15: + if (lookahead == '_') ADVANCE(186); + if (lookahead == 'e') ADVANCE(41); + if (lookahead == 's') ADVANCE(257); + END_STATE(); + case 16: + if (lookahead == '_') ADVANCE(72); + END_STATE(); + case 17: + if (lookahead == '_') ADVANCE(203); + END_STATE(); + case 18: + if (lookahead == 'a') ADVANCE(115); + if (lookahead == 't') ADVANCE(144); + END_STATE(); + case 19: + if (lookahead == 'a') ADVANCE(131); + if (lookahead == 'i') ADVANCE(36); + END_STATE(); + case 20: + if (lookahead == 'a') ADVANCE(255); + END_STATE(); + case 21: + if (lookahead == 'a') ADVANCE(46); + END_STATE(); + case 22: + if (lookahead == 'a') ADVANCE(127); + if (lookahead == 'e') ADVANCE(26); + END_STATE(); + case 23: + if (lookahead == 'a') ADVANCE(149); + if (lookahead == 'r') ADVANCE(197); + END_STATE(); + case 24: + if (lookahead == 'a') ADVANCE(81); + END_STATE(); + case 25: + if (lookahead == 'a') ADVANCE(158); + if (lookahead == 'i') ADVANCE(123); + END_STATE(); + case 26: + if (lookahead == 'a') ADVANCE(188); + END_STATE(); + case 27: + if (lookahead == 'a') ADVANCE(190); + END_STATE(); + case 28: + if (lookahead == 'a') ADVANCE(187); + END_STATE(); + case 29: + if (lookahead == 'a') ADVANCE(162); + END_STATE(); + case 30: + if (lookahead == 'a') ADVANCE(163); + END_STATE(); + case 31: + if (lookahead == 'a') ADVANCE(132); + if (lookahead == 'i') ADVANCE(36); + END_STATE(); + case 32: + if (lookahead == 'b') ADVANCE(261); + if (lookahead == 'p') ADVANCE(143); + END_STATE(); + case 33: + if (lookahead == 'b') ADVANCE(117); + END_STATE(); + case 34: + if (lookahead == 'b') ADVANCE(209); + END_STATE(); + case 35: + if (lookahead == 'b') ADVANCE(119); + END_STATE(); + case 36: + if (lookahead == 'c') ADVANCE(62); + if (lookahead == 'm') ADVANCE(96); + END_STATE(); + case 37: + if (lookahead == 'd') ADVANCE(291); + END_STATE(); + case 38: + if (lookahead == 'd') ADVANCE(291); + if (lookahead == 'y') ADVANCE(259); + END_STATE(); + case 39: + if (lookahead == 'd') ADVANCE(251); + END_STATE(); + case 40: + if (lookahead == 'd') ADVANCE(271); + END_STATE(); + case 41: + if (lookahead == 'd') ADVANCE(256); + END_STATE(); + case 42: + if (lookahead == 'd') ADVANCE(252); + END_STATE(); + case 43: + if (lookahead == 'd') ADVANCE(253); + END_STATE(); + case 44: + if (lookahead == 'd') ADVANCE(272); + END_STATE(); + case 45: + if (lookahead == 'd') ADVANCE(87); + END_STATE(); + case 46: + if (lookahead == 'd') ADVANCE(28); + END_STATE(); + case 47: + if (lookahead == 'd') ADVANCE(108); + END_STATE(); + case 48: + if (lookahead == 'e') ADVANCE(32); + END_STATE(); + case 49: + if (lookahead == 'e') ADVANCE(208); + END_STATE(); + case 50: + if (lookahead == 'e') ADVANCE(26); + END_STATE(); + case 51: + if (lookahead == 'e') ADVANCE(287); + END_STATE(); + case 52: + if (lookahead == 'e') ADVANCE(288); + END_STATE(); + case 53: + if (lookahead == 'e') ADVANCE(201); + if (lookahead == 'r') ADVANCE(55); + if (lookahead == 'v') ADVANCE(110); + END_STATE(); + case 54: + if (lookahead == 'e') ADVANCE(271); + END_STATE(); + case 55: + if (lookahead == 'e') ADVANCE(118); + END_STATE(); + case 56: + if (lookahead == 'e') ADVANCE(258); + END_STATE(); + case 57: + if (lookahead == 'e') ADVANCE(250); + END_STATE(); + case 58: + if (lookahead == 'e') ADVANCE(272); + END_STATE(); + case 59: + if (lookahead == 'e') ADVANCE(182); + if (lookahead == 'o') ADVANCE(45); + END_STATE(); + case 60: + if (lookahead == 'e') ADVANCE(182); + if (lookahead == 'o') ADVANCE(47); + END_STATE(); + case 61: + if (lookahead == 'e') ADVANCE(39); + END_STATE(); + case 62: + if (lookahead == 'e') ADVANCE(135); + END_STATE(); + case 63: + if (lookahead == 'e') ADVANCE(29); + END_STATE(); + case 64: + if (lookahead == 'e') ADVANCE(42); + END_STATE(); + case 65: + if (lookahead == 'e') ADVANCE(134); + END_STATE(); + case 66: + if (lookahead == 'e') ADVANCE(43); + END_STATE(); + case 67: + if (lookahead == 'e') ADVANCE(44); + END_STATE(); + case 68: + if (lookahead == 'e') ADVANCE(27); + END_STATE(); + case 69: + if (lookahead == 'e') ADVANCE(30); + END_STATE(); + case 70: + if (lookahead == 'e') ADVANCE(120); + END_STATE(); + case 71: + if (lookahead == 'e') ADVANCE(136); + END_STATE(); + case 72: + if (lookahead == 'e') ADVANCE(204); + if (lookahead == 'r') ADVANCE(70); + if (lookahead == 'v') ADVANCE(113); + END_STATE(); + case 73: + if (lookahead == 'f') ADVANCE(195); + END_STATE(); + case 74: + if (lookahead == 'f') ADVANCE(98); + END_STATE(); + case 75: + if (lookahead == 'f') ADVANCE(102); + END_STATE(); + case 76: + if (lookahead == 'f') ADVANCE(200); + END_STATE(); + case 77: + if (lookahead == 'g') ADVANCE(264); + END_STATE(); + case 78: + if (lookahead == 'g') ADVANCE(196); + END_STATE(); + case 79: + if (lookahead == 'g') ADVANCE(198); + END_STATE(); + case 80: + if (lookahead == 'g') ADVANCE(95); + END_STATE(); + case 81: + if (lookahead == 'g') ADVANCE(54); + END_STATE(); + case 82: + if (lookahead == 'h') ADVANCE(66); + END_STATE(); + case 83: + if (lookahead == 'h') ADVANCE(67); + END_STATE(); + case 84: + if (lookahead == 'i') ADVANCE(173); + END_STATE(); + case 85: + if (lookahead == 'i') ADVANCE(207); + if (lookahead == 'o') ADVANCE(174); + if (lookahead == 'p') ADVANCE(122); + END_STATE(); + case 86: + if (lookahead == 'i') ADVANCE(166); + END_STATE(); + case 87: + if (lookahead == 'i') ADVANCE(74); + END_STATE(); + case 88: + if (lookahead == 'i') ADVANCE(268); + END_STATE(); + case 89: + if (lookahead == 'i') ADVANCE(125); + END_STATE(); + case 90: + if (lookahead == 'i') ADVANCE(206); + END_STATE(); + case 91: + if (lookahead == 'i') ADVANCE(80); + END_STATE(); + case 92: + if (lookahead == 'i') ADVANCE(156); + END_STATE(); + case 93: + if (lookahead == 'i') ADVANCE(141); + END_STATE(); + case 94: + if (lookahead == 'i') ADVANCE(161); + END_STATE(); + case 95: + if (lookahead == 'i') ADVANCE(128); + END_STATE(); + case 96: + if (lookahead == 'i') ADVANCE(176); + END_STATE(); + case 97: + if (lookahead == 'i') ADVANCE(177); + END_STATE(); + case 98: + if (lookahead == 'i') ADVANCE(64); + END_STATE(); + case 99: + if (lookahead == 'i') ADVANCE(178); + END_STATE(); + case 100: + if (lookahead == 'i') ADVANCE(179); + END_STATE(); + case 101: + if (lookahead == 'i') ADVANCE(180); + END_STATE(); + case 102: + if (lookahead == 'i') ADVANCE(67); + END_STATE(); + case 103: + if (lookahead == 'i') ADVANCE(185); + END_STATE(); + case 104: + if (lookahead == 'i') ADVANCE(181); + END_STATE(); + case 105: + if (lookahead == 'i') ADVANCE(189); + END_STATE(); + case 106: + if (lookahead == 'i') ADVANCE(164); + END_STATE(); + case 107: + if (lookahead == 'i') ADVANCE(142); + END_STATE(); + case 108: + if (lookahead == 'i') ADVANCE(75); + END_STATE(); + case 109: + if (lookahead == 'i') ADVANCE(165); + END_STATE(); + case 110: + if (lookahead == 'i') ADVANCE(168); + END_STATE(); + case 111: + if (lookahead == 'i') ADVANCE(169); + END_STATE(); + case 112: + if (lookahead == 'i') ADVANCE(170); + END_STATE(); + case 113: + if (lookahead == 'i') ADVANCE(171); + END_STATE(); + case 114: + if (lookahead == 'i') ADVANCE(172); + END_STATE(); + case 115: + if (lookahead == 'l') ADVANCE(159); + END_STATE(); + case 116: + if (lookahead == 'l') ADVANCE(13); + END_STATE(); + case 117: + if (lookahead == 'l') ADVANCE(92); + END_STATE(); + case 118: + if (lookahead == 'l') ADVANCE(63); + if (lookahead == 'v') ADVANCE(94); + END_STATE(); + case 119: + if (lookahead == 'l') ADVANCE(106); + END_STATE(); + case 120: + if (lookahead == 'l') ADVANCE(69); + if (lookahead == 'v') ADVANCE(109); + END_STATE(); + case 121: + if (lookahead == 'l') ADVANCE(17); + END_STATE(); + case 122: + if (lookahead == 'm') ADVANCE(267); + END_STATE(); + case 123: + if (lookahead == 'm') ADVANCE(96); + END_STATE(); + case 124: + if (lookahead == 'n') ADVANCE(38); + END_STATE(); + case 125: + if (lookahead == 'n') ADVANCE(276); + END_STATE(); + case 126: + if (lookahead == 'n') ADVANCE(269); + END_STATE(); + case 127: + if (lookahead == 'n') ADVANCE(260); + END_STATE(); + case 128: + if (lookahead == 'n') ADVANCE(255); + END_STATE(); + case 129: + if (lookahead == 'n') ADVANCE(249); + END_STATE(); + case 130: + if (lookahead == 'n') ADVANCE(272); + END_STATE(); + case 131: + if (lookahead == 'n') ADVANCE(78); + if (lookahead == 's') ADVANCE(175); + END_STATE(); + case 132: + if (lookahead == 'n') ADVANCE(78); + if (lookahead == 's') ADVANCE(191); + END_STATE(); + case 133: + if (lookahead == 'n') ADVANCE(37); + END_STATE(); + case 134: + if (lookahead == 'n') ADVANCE(184); + END_STATE(); + case 135: + if (lookahead == 'n') ADVANCE(160); + END_STATE(); + case 136: + if (lookahead == 'n') ADVANCE(192); + END_STATE(); + case 137: + if (lookahead == 'o') ADVANCE(45); + END_STATE(); + case 138: + if (lookahead == 'o') ADVANCE(152); + END_STATE(); + case 139: + if (lookahead == 'o') ADVANCE(152); + if (lookahead == 'v') ADVANCE(126); + END_STATE(); + case 140: + if (lookahead == 'o') ADVANCE(151); + END_STATE(); + case 141: + if (lookahead == 'o') ADVANCE(129); + END_STATE(); + case 142: + if (lookahead == 'o') ADVANCE(130); + END_STATE(); + case 143: + if (lookahead == 'o') ADVANCE(167); + END_STATE(); + case 144: + if (lookahead == 'p') ADVANCE(263); + END_STATE(); + case 145: + if (lookahead == 'p') ADVANCE(88); + END_STATE(); + case 146: + if (lookahead == 'p') ADVANCE(56); + END_STATE(); + case 147: + if (lookahead == 'r') ADVANCE(22); + END_STATE(); + case 148: + if (lookahead == 'r') ADVANCE(290); + END_STATE(); + case 149: + if (lookahead == 'r') ADVANCE(270); + END_STATE(); + case 150: + if (lookahead == 'r') ADVANCE(289); + END_STATE(); + case 151: + if (lookahead == 'r') ADVANCE(40); + END_STATE(); + case 152: + if (lookahead == 'r') ADVANCE(183); + END_STATE(); + case 153: + if (lookahead == 'r') ADVANCE(91); + END_STATE(); + case 154: + if (lookahead == 'r') ADVANCE(68); + END_STATE(); + case 155: + if (lookahead == 'r') ADVANCE(50); + END_STATE(); + case 156: + if (lookahead == 's') ADVANCE(82); + END_STATE(); + case 157: + if (lookahead == 's') ADVANCE(246); + END_STATE(); + case 158: + if (lookahead == 's') ADVANCE(175); + END_STATE(); + case 159: + if (lookahead == 's') ADVANCE(52); + END_STATE(); + case 160: + if (lookahead == 's') ADVANCE(54); + END_STATE(); + case 161: + if (lookahead == 's') ADVANCE(93); + END_STATE(); + case 162: + if (lookahead == 's') ADVANCE(57); + END_STATE(); + case 163: + if (lookahead == 's') ADVANCE(58); + END_STATE(); + case 164: + if (lookahead == 's') ADVANCE(83); + END_STATE(); + case 165: + if (lookahead == 's') ADVANCE(107); + END_STATE(); + case 166: + if (lookahead == 's') ADVANCE(97); + END_STATE(); + case 167: + if (lookahead == 's') ADVANCE(99); + END_STATE(); + case 168: + if (lookahead == 's') ADVANCE(100); + END_STATE(); + case 169: + if (lookahead == 's') ADVANCE(101); + END_STATE(); + case 170: + if (lookahead == 's') ADVANCE(103); + END_STATE(); + case 171: + if (lookahead == 's') ADVANCE(104); + END_STATE(); + case 172: + if (lookahead == 's') ADVANCE(105); + END_STATE(); + case 173: + if (lookahead == 't') ADVANCE(265); + END_STATE(); + case 174: + if (lookahead == 't') ADVANCE(2); + END_STATE(); + case 175: + if (lookahead == 't') ADVANCE(11); + END_STATE(); + case 176: + if (lookahead == 't') ADVANCE(254); + END_STATE(); + case 177: + if (lookahead == 't') ADVANCE(14); + END_STATE(); + case 178: + if (lookahead == 't') ADVANCE(262); + END_STATE(); + case 179: + if (lookahead == 't') ADVANCE(247); + END_STATE(); + case 180: + if (lookahead == 't') ADVANCE(248); + END_STATE(); + case 181: + if (lookahead == 't') ADVANCE(272); + END_STATE(); + case 182: + if (lookahead == 't') ADVANCE(21); + END_STATE(); + case 183: + if (lookahead == 't') ADVANCE(12); + END_STATE(); + case 184: + if (lookahead == 't') ADVANCE(73); + END_STATE(); + case 185: + if (lookahead == 't') ADVANCE(15); + END_STATE(); + case 186: + if (lookahead == 't') ADVANCE(210); + END_STATE(); + case 187: + if (lookahead == 't') ADVANCE(20); + END_STATE(); + case 188: + if (lookahead == 't') ADVANCE(61); + END_STATE(); + case 189: + if (lookahead == 't') ADVANCE(157); + END_STATE(); + case 190: + if (lookahead == 't') ADVANCE(67); + END_STATE(); + case 191: + if (lookahead == 't') ADVANCE(16); + END_STATE(); + case 192: + if (lookahead == 't') ADVANCE(76); + END_STATE(); + case 193: + if (lookahead == 'u') ADVANCE(33); + END_STATE(); + case 194: + if (lookahead == 'u') ADVANCE(33); + if (lookahead == 'y') ADVANCE(145); + END_STATE(); + case 195: + if (lookahead == 'u') ADVANCE(116); + END_STATE(); + case 196: + if (lookahead == 'u') ADVANCE(24); + END_STATE(); + case 197: + if (lookahead == 'u') ADVANCE(51); + END_STATE(); + case 198: + if (lookahead == 'u') ADVANCE(90); + END_STATE(); + case 199: + if (lookahead == 'u') ADVANCE(35); + END_STATE(); + case 200: + if (lookahead == 'u') ADVANCE(121); + END_STATE(); + case 201: + if (lookahead == 'v') ADVANCE(65); + END_STATE(); + case 202: + if (lookahead == 'v') ADVANCE(111); + END_STATE(); + case 203: + if (lookahead == 'v') ADVANCE(113); + END_STATE(); + case 204: + if (lookahead == 'v') ADVANCE(71); + END_STATE(); + case 205: + if (lookahead == 'w') ADVANCE(140); + END_STATE(); + case 206: + if (lookahead == 'x') ADVANCE(266); + END_STATE(); + case 207: + if (lookahead == 'x') ADVANCE(79); + END_STATE(); + case 208: + if (lookahead == 'y') ADVANCE(205); + END_STATE(); + case 209: + if (lookahead == 'y') ADVANCE(239); + END_STATE(); + case 210: + if (lookahead == 'y') ADVANCE(146); + END_STATE(); + case 211: + if (lookahead == '"' || + lookahead == '\'' || + lookahead == '/' || + lookahead == '\\' || + lookahead == 'b' || + lookahead == 'n' || + lookahead == 'r' || + lookahead == 't' || + lookahead == 'u') ADVANCE(295); + END_STATE(); + case 212: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(7); + END_STATE(); + case 213: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(281); + END_STATE(); + case 214: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(277); + END_STATE(); + case 215: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(8); + END_STATE(); + case 216: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(279); + END_STATE(); + case 217: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(278); + END_STATE(); + case 218: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(280); + END_STATE(); + case 219: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(212); + END_STATE(); + case 220: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(213); + END_STATE(); + case 221: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(216); + END_STATE(); + case 222: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(214); + END_STATE(); + case 223: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(217); + END_STATE(); + case 224: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(218); + END_STATE(); + case 225: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(6); + END_STATE(); + case 226: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(225); + END_STATE(); + case 227: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(224); + END_STATE(); + case 228: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(226); + END_STATE(); + case 229: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(9); + END_STATE(); + case 230: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(229); + END_STATE(); + case 231: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(227); + END_STATE(); + case 232: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(231); + END_STATE(); + case 233: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(232); + END_STATE(); + case 234: + if (eof) ADVANCE(236); + if (lookahead == '!') ADVANCE(10); + if (lookahead == '"') ADVANCE(242); + if (lookahead == '\'') ADVANCE(241); + if (lookahead == '(') ADVANCE(237); + if (lookahead == ')') ADVANCE(238); + if (lookahead == ',') ADVANCE(243); + if (lookahead == '-') ADVANCE(245); + if (lookahead == '<') ADVANCE(274); + if (lookahead == '=') ADVANCE(273); + if (lookahead == '>') ADVANCE(274); + if (lookahead == '[') ADVANCE(240); + if (lookahead == ']') ADVANCE(244); + if (lookahead == 'a') ADVANCE(124); + if (lookahead == 'c') ADVANCE(147); + if (lookahead == 'd') ADVANCE(48); + if (lookahead == 'f') ADVANCE(18); + if (lookahead == 'g') ADVANCE(84); + if (lookahead == 'h') ADVANCE(77); + if (lookahead == 'i') ADVANCE(125); + if (lookahead == 'k') ADVANCE(49); + if (lookahead == 'l') ADVANCE(19); + if (lookahead == 'm') ADVANCE(59); + if (lookahead == 'n') ADVANCE(85); + if (lookahead == 'o') ADVANCE(148); + if (lookahead == 'p') ADVANCE(194); + if (lookahead == 's') ADVANCE(139); + if (lookahead == 't') ADVANCE(23); + if (lookahead == 'v') ADVANCE(86); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') SKIP(234) + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(285); + END_STATE(); + case 235: + if (eof) ADVANCE(236); + if (lookahead == '!') ADVANCE(10); + if (lookahead == '"') ADVANCE(242); + if (lookahead == '\'') ADVANCE(241); + if (lookahead == ')') ADVANCE(238); + if (lookahead == ',') ADVANCE(243); + if (lookahead == '-') ADVANCE(245); + if (lookahead == '<') ADVANCE(274); + if (lookahead == '=') ADVANCE(273); + if (lookahead == '>') ADVANCE(274); + if (lookahead == ']') ADVANCE(244); + if (lookahead == 'a') ADVANCE(133); + if (lookahead == 'c') ADVANCE(155); + if (lookahead == 'l') ADVANCE(25); + if (lookahead == 'm') ADVANCE(137); + if (lookahead == 'o') ADVANCE(150); + if (lookahead == 'p') ADVANCE(193); + if (lookahead == 's') ADVANCE(138); + if (lookahead == 'v') ADVANCE(114); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') SKIP(235) + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(228); + END_STATE(); + case 236: + ACCEPT_TOKEN(ts_builtin_sym_end); + END_STATE(); + case 237: + ACCEPT_TOKEN(anon_sym_LPAREN); + END_STATE(); + case 238: + ACCEPT_TOKEN(anon_sym_RPAREN); + END_STATE(); + case 239: + ACCEPT_TOKEN(sym_sortByField); + END_STATE(); + case 240: + ACCEPT_TOKEN(anon_sym_LBRACK); + END_STATE(); + case 241: + ACCEPT_TOKEN(anon_sym_SQUOTE); + END_STATE(); + case 242: + ACCEPT_TOKEN(anon_sym_DQUOTE); + END_STATE(); + case 243: + ACCEPT_TOKEN(anon_sym_COMMA); + END_STATE(); + case 244: + ACCEPT_TOKEN(anon_sym_RBRACK); + END_STATE(); + case 245: + ACCEPT_TOKEN(anon_sym_DASH); + END_STATE(); + case 246: + ACCEPT_TOKEN(anon_sym_visits); + END_STATE(); + case 247: + ACCEPT_TOKEN(anon_sym_last_visit); + END_STATE(); + case 248: + ACCEPT_TOKEN(anon_sym_last_eventful_visit); + END_STATE(); + case 249: + ACCEPT_TOKEN(anon_sym_last_revision); + END_STATE(); + case 250: + ACCEPT_TOKEN(anon_sym_last_release); + END_STATE(); + case 251: + ACCEPT_TOKEN(anon_sym_created); + END_STATE(); + case 252: + ACCEPT_TOKEN(anon_sym_modified); + END_STATE(); + case 253: + ACCEPT_TOKEN(anon_sym_published); + END_STATE(); + case 254: + ACCEPT_TOKEN(sym_limitField); + END_STATE(); + case 255: + ACCEPT_TOKEN(sym_patternField); + END_STATE(); + case 256: + ACCEPT_TOKEN(sym_booleanField); + END_STATE(); + case 257: + ACCEPT_TOKEN(sym_numericField); + END_STATE(); + case 258: + ACCEPT_TOKEN(sym_visitTypeField); + END_STATE(); + case 259: + ACCEPT_TOKEN(anon_sym_any); + END_STATE(); + case 260: + ACCEPT_TOKEN(anon_sym_cran); + END_STATE(); + case 261: + ACCEPT_TOKEN(anon_sym_deb); + END_STATE(); + case 262: + ACCEPT_TOKEN(anon_sym_deposit); + END_STATE(); + case 263: + ACCEPT_TOKEN(anon_sym_ftp); + END_STATE(); + case 264: + ACCEPT_TOKEN(anon_sym_hg); + END_STATE(); + case 265: + ACCEPT_TOKEN(anon_sym_git); + END_STATE(); + case 266: + ACCEPT_TOKEN(anon_sym_nixguix); + END_STATE(); + case 267: + ACCEPT_TOKEN(anon_sym_npm); + END_STATE(); + case 268: + ACCEPT_TOKEN(anon_sym_pypi); + END_STATE(); + case 269: + ACCEPT_TOKEN(anon_sym_svn); + END_STATE(); + case 270: + ACCEPT_TOKEN(anon_sym_tar); + END_STATE(); + case 271: + ACCEPT_TOKEN(sym_listField); + END_STATE(); + case 272: + ACCEPT_TOKEN(sym_dateField); + END_STATE(); + case 273: + ACCEPT_TOKEN(sym_rangeOp); + END_STATE(); + case 274: + ACCEPT_TOKEN(sym_rangeOp); + if (lookahead == '=') ADVANCE(273); + END_STATE(); + case 275: + ACCEPT_TOKEN(sym_equalOp); + END_STATE(); + case 276: + ACCEPT_TOKEN(sym_choiceOp); + END_STATE(); + case 277: + ACCEPT_TOKEN(sym_isoDateTime); + END_STATE(); + case 278: + ACCEPT_TOKEN(sym_isoDateTime); + if (lookahead == '+') ADVANCE(230); + if (lookahead == '.') ADVANCE(233); + if (lookahead == 'Z') ADVANCE(277); + END_STATE(); + case 279: + ACCEPT_TOKEN(sym_isoDateTime); + if (lookahead == '+') ADVANCE(230); + if (lookahead == ':') ADVANCE(223); + if (lookahead == 'Z') ADVANCE(277); + END_STATE(); + case 280: + ACCEPT_TOKEN(sym_isoDateTime); + if (lookahead == '+') ADVANCE(230); + if (lookahead == 'Z') ADVANCE(277); + END_STATE(); + case 281: + ACCEPT_TOKEN(sym_isoDateTime); + if (lookahead == '+') ADVANCE(230); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ' || + lookahead == 'T') ADVANCE(281); + if (lookahead == 'Z') ADVANCE(277); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(215); + END_STATE(); + case 282: + ACCEPT_TOKEN(sym_number); + if (lookahead == '-') ADVANCE(219); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(286); + END_STATE(); + case 283: + ACCEPT_TOKEN(sym_number); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(282); + END_STATE(); + case 284: + ACCEPT_TOKEN(sym_number); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(283); + END_STATE(); + case 285: + ACCEPT_TOKEN(sym_number); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(284); + END_STATE(); + case 286: + ACCEPT_TOKEN(sym_number); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(286); + END_STATE(); + case 287: + ACCEPT_TOKEN(sym_booleanTrue); + END_STATE(); + case 288: + ACCEPT_TOKEN(sym_booleanFalse); + END_STATE(); + case 289: + ACCEPT_TOKEN(sym_or); + END_STATE(); + case 290: + ACCEPT_TOKEN(sym_or); + if (lookahead == 'i') ADVANCE(80); + END_STATE(); + case 291: + ACCEPT_TOKEN(sym_and); + END_STATE(); + case 292: + ACCEPT_TOKEN(sym_singleWord); + if (!sym_singleWord_character_set_1(lookahead)) ADVANCE(292); + END_STATE(); + case 293: + ACCEPT_TOKEN(aux_sym_stringContent_token1); + if (lookahead == '\t' || + lookahead == '\r' || + lookahead == ' ') ADVANCE(293); + if (lookahead != 0 && + lookahead != '\n' && + lookahead != '"' && + lookahead != '\'' && + lookahead != '\\') ADVANCE(294); + END_STATE(); + case 294: + ACCEPT_TOKEN(aux_sym_stringContent_token1); + if (lookahead != 0 && + lookahead != '\n' && + lookahead != '"' && + lookahead != '\'' && + lookahead != '\\') ADVANCE(294); + END_STATE(); + case 295: + ACCEPT_TOKEN(sym_escape_sequence); + END_STATE(); + default: + return false; + } +} + +static const TSLexMode ts_lex_modes[STATE_COUNT] = { + [0] = {.lex_state = 0}, + [1] = {.lex_state = 5}, + [2] = {.lex_state = 5}, + [3] = {.lex_state = 0}, + [4] = {.lex_state = 5}, + [5] = {.lex_state = 0}, + [6] = {.lex_state = 5}, + [7] = {.lex_state = 5}, + [8] = {.lex_state = 235}, + [9] = {.lex_state = 0}, + [10] = {.lex_state = 0}, + [11] = {.lex_state = 235}, + [12] = {.lex_state = 0}, + [13] = {.lex_state = 0}, + [14] = {.lex_state = 235}, + [15] = {.lex_state = 235}, + [16] = {.lex_state = 235}, + [17] = {.lex_state = 235}, + [18] = {.lex_state = 235}, + [19] = {.lex_state = 235}, + [20] = {.lex_state = 235}, + [21] = {.lex_state = 235}, + [22] = {.lex_state = 235}, + [23] = {.lex_state = 235}, + [24] = {.lex_state = 235}, + [25] = {.lex_state = 235}, + [26] = {.lex_state = 235}, + [27] = {.lex_state = 235}, + [28] = {.lex_state = 235}, + [29] = {.lex_state = 235}, + [30] = {.lex_state = 235}, + [31] = {.lex_state = 235}, + [32] = {.lex_state = 235}, + [33] = {.lex_state = 235}, + [34] = {.lex_state = 235}, + [35] = {.lex_state = 235}, + [36] = {.lex_state = 235}, + [37] = {.lex_state = 235}, + [38] = {.lex_state = 235}, + [39] = {.lex_state = 235}, + [40] = {.lex_state = 235}, + [41] = {.lex_state = 235}, + [42] = {.lex_state = 235}, + [43] = {.lex_state = 235}, + [44] = {.lex_state = 3}, + [45] = {.lex_state = 235}, + [46] = {.lex_state = 235}, + [47] = {.lex_state = 3}, + [48] = {.lex_state = 3}, + [49] = {.lex_state = 1}, + [50] = {.lex_state = 1}, + [51] = {.lex_state = 0}, + [52] = {.lex_state = 1}, + [53] = {.lex_state = 0}, + [54] = {.lex_state = 0}, + [55] = {.lex_state = 0}, + [56] = {.lex_state = 0}, + [57] = {.lex_state = 0}, + [58] = {.lex_state = 0}, + [59] = {.lex_state = 1}, + [60] = {.lex_state = 0}, + [61] = {.lex_state = 0}, + [62] = {.lex_state = 0}, + [63] = {.lex_state = 0}, + [64] = {.lex_state = 0}, + [65] = {.lex_state = 0}, + [66] = {.lex_state = 0}, + [67] = {.lex_state = 0}, + [68] = {.lex_state = 0}, + [69] = {.lex_state = 0}, + [70] = {.lex_state = 0}, + [71] = {.lex_state = 0}, + [72] = {.lex_state = 0}, + [73] = {.lex_state = 0}, + [74] = {.lex_state = 0}, + [75] = {.lex_state = 0}, + [76] = {.lex_state = 0}, + [77] = {.lex_state = 0}, + [78] = {.lex_state = 0}, + [79] = {.lex_state = 0}, + [80] = {.lex_state = 235}, + [81] = {.lex_state = 3}, + [82] = {.lex_state = 0}, + [83] = {.lex_state = 0}, + [84] = {.lex_state = 0}, + [85] = {.lex_state = 0}, + [86] = {.lex_state = 0}, + [87] = {.lex_state = 0}, + [88] = {.lex_state = 0}, + [89] = {.lex_state = 5}, + [90] = {.lex_state = 0}, + [91] = {.lex_state = 5}, + [92] = {.lex_state = 0}, + [93] = {.lex_state = 0}, + [94] = {.lex_state = 5}, + [95] = {.lex_state = 0}, + [96] = {.lex_state = 0}, + [97] = {.lex_state = 0}, + [98] = {.lex_state = 0}, + [99] = {.lex_state = 0}, + [100] = {.lex_state = 5}, + [101] = {.lex_state = 235}, + [102] = {.lex_state = 5}, + [103] = {.lex_state = 235}, + [104] = {.lex_state = 235}, + [105] = {.lex_state = 0}, + [106] = {.lex_state = 0}, + [107] = {.lex_state = 235}, + [108] = {.lex_state = 0}, + [109] = {.lex_state = 0}, + [110] = {.lex_state = 0}, + [111] = {.lex_state = 0}, + [112] = {.lex_state = 0}, + [113] = {.lex_state = 0}, + [114] = {.lex_state = 0}, + [115] = {.lex_state = 5}, + [116] = {.lex_state = 0}, + [117] = {.lex_state = 5}, + [118] = {.lex_state = 0}, + [119] = {.lex_state = 0}, + [120] = {.lex_state = 0}, + [121] = {.lex_state = 0}, + [122] = {.lex_state = 0}, + [123] = {.lex_state = 0}, + [124] = {.lex_state = 0}, + [125] = {.lex_state = 5}, +}; + +static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { + [0] = { + [ts_builtin_sym_end] = ACTIONS(1), + [anon_sym_LPAREN] = ACTIONS(1), + [anon_sym_RPAREN] = ACTIONS(1), + [sym_sortByField] = ACTIONS(1), + [anon_sym_LBRACK] = ACTIONS(1), + [anon_sym_SQUOTE] = ACTIONS(1), + [anon_sym_DQUOTE] = ACTIONS(1), + [anon_sym_COMMA] = ACTIONS(1), + [anon_sym_RBRACK] = ACTIONS(1), + [anon_sym_DASH] = ACTIONS(1), + [anon_sym_visits] = ACTIONS(1), + [anon_sym_last_visit] = ACTIONS(1), + [anon_sym_last_eventful_visit] = ACTIONS(1), + [anon_sym_last_revision] = ACTIONS(1), + [anon_sym_last_release] = ACTIONS(1), + [anon_sym_created] = ACTIONS(1), + [anon_sym_modified] = ACTIONS(1), + [anon_sym_published] = ACTIONS(1), + [sym_limitField] = ACTIONS(1), + [sym_patternField] = ACTIONS(1), + [sym_booleanField] = ACTIONS(1), + [sym_numericField] = ACTIONS(1), + [sym_visitTypeField] = ACTIONS(1), + [anon_sym_any] = ACTIONS(1), + [anon_sym_cran] = ACTIONS(1), + [anon_sym_deb] = ACTIONS(1), + [anon_sym_deposit] = ACTIONS(1), + [anon_sym_ftp] = ACTIONS(1), + [anon_sym_hg] = ACTIONS(1), + [anon_sym_git] = ACTIONS(1), + [anon_sym_nixguix] = ACTIONS(1), + [anon_sym_npm] = ACTIONS(1), + [anon_sym_pypi] = ACTIONS(1), + [anon_sym_svn] = ACTIONS(1), + [anon_sym_tar] = ACTIONS(1), + [sym_listField] = ACTIONS(1), + [sym_dateField] = ACTIONS(1), + [sym_rangeOp] = ACTIONS(1), + [sym_equalOp] = ACTIONS(1), + [sym_choiceOp] = ACTIONS(1), + [sym_isoDateTime] = ACTIONS(1), + [sym_number] = ACTIONS(1), + [sym_booleanTrue] = ACTIONS(1), + [sym_booleanFalse] = ACTIONS(1), + [sym_or] = ACTIONS(1), + [sym_and] = ACTIONS(1), + [sym_escape_sequence] = ACTIONS(1), + }, + [1] = { + [sym_query] = STATE(116), + [sym_filters] = STATE(21), + [sym_filter] = STATE(31), + [sym_patternFilter] = STATE(30), + [sym_booleanFilter] = STATE(30), + [sym_numericFilter] = STATE(30), + [sym_boundedListFilter] = STATE(30), + [sym_visitTypeFilter] = STATE(29), + [sym_unboundedListFilter] = STATE(30), + [sym_dateFilter] = STATE(30), + [anon_sym_LPAREN] = ACTIONS(3), + [sym_patternField] = ACTIONS(5), + [sym_booleanField] = ACTIONS(7), + [sym_numericField] = ACTIONS(9), + [sym_visitTypeField] = ACTIONS(11), + [sym_listField] = ACTIONS(13), + [sym_dateField] = ACTIONS(15), + }, +}; + +static const uint16_t ts_small_parse_table[] = { + [0] = 15, + ACTIONS(3), 1, + anon_sym_LPAREN, + ACTIONS(5), 1, + sym_patternField, + ACTIONS(7), 1, + sym_booleanField, + ACTIONS(9), 1, + sym_numericField, + ACTIONS(11), 1, + sym_visitTypeField, + ACTIONS(13), 1, + sym_listField, + ACTIONS(15), 1, + sym_dateField, + ACTIONS(17), 1, + sym_sortByField, + ACTIONS(19), 1, + sym_limitField, + STATE(25), 1, + sym_filters, + STATE(29), 1, + sym_visitTypeFilter, + STATE(31), 1, + sym_filter, + STATE(55), 1, + sym_sortBy, + STATE(56), 1, + sym_limit, + STATE(30), 6, + sym_patternFilter, + sym_booleanFilter, + sym_numericFilter, + sym_boundedListFilter, + sym_unboundedListFilter, + sym_dateFilter, + [51] = 5, + ACTIONS(21), 1, + anon_sym_SQUOTE, + ACTIONS(23), 1, + anon_sym_DQUOTE, + STATE(96), 1, + sym_visitTypeOptions, + ACTIONS(25), 2, + anon_sym_COMMA, + anon_sym_RBRACK, + ACTIONS(27), 12, + anon_sym_any, + anon_sym_cran, + anon_sym_deb, + anon_sym_deposit, + anon_sym_ftp, + anon_sym_hg, + anon_sym_git, + anon_sym_nixguix, + anon_sym_npm, + anon_sym_pypi, + anon_sym_svn, + anon_sym_tar, + [79] = 11, + ACTIONS(3), 1, + anon_sym_LPAREN, + ACTIONS(5), 1, + sym_patternField, + ACTIONS(7), 1, + sym_booleanField, + ACTIONS(9), 1, + sym_numericField, + ACTIONS(11), 1, + sym_visitTypeField, + ACTIONS(13), 1, + sym_listField, + ACTIONS(15), 1, + sym_dateField, + STATE(29), 1, + sym_visitTypeFilter, + STATE(31), 1, + sym_filter, + STATE(80), 1, + sym_filters, + STATE(30), 6, + sym_patternFilter, + sym_booleanFilter, + sym_numericFilter, + sym_boundedListFilter, + sym_unboundedListFilter, + sym_dateFilter, + [118] = 5, + ACTIONS(29), 1, + anon_sym_SQUOTE, + ACTIONS(31), 1, + anon_sym_DQUOTE, + ACTIONS(33), 1, + anon_sym_RBRACK, + STATE(83), 1, + sym_visitTypeOptions, + ACTIONS(27), 12, + anon_sym_any, + anon_sym_cran, + anon_sym_deb, + anon_sym_deposit, + anon_sym_ftp, + anon_sym_hg, + anon_sym_git, + anon_sym_nixguix, + anon_sym_npm, + anon_sym_pypi, + anon_sym_svn, + anon_sym_tar, + [145] = 11, + ACTIONS(3), 1, + anon_sym_LPAREN, + ACTIONS(5), 1, + sym_patternField, + ACTIONS(7), 1, + sym_booleanField, + ACTIONS(9), 1, + sym_numericField, + ACTIONS(11), 1, + sym_visitTypeField, + ACTIONS(13), 1, + sym_listField, + ACTIONS(15), 1, + sym_dateField, + STATE(25), 1, + sym_filters, + STATE(29), 1, + sym_visitTypeFilter, + STATE(31), 1, + sym_filter, + STATE(30), 6, + sym_patternFilter, + sym_booleanFilter, + sym_numericFilter, + sym_boundedListFilter, + sym_unboundedListFilter, + sym_dateFilter, + [184] = 11, + ACTIONS(3), 1, + anon_sym_LPAREN, + ACTIONS(5), 1, + sym_patternField, + ACTIONS(7), 1, + sym_booleanField, + ACTIONS(9), 1, + sym_numericField, + ACTIONS(11), 1, + sym_visitTypeField, + ACTIONS(13), 1, + sym_listField, + ACTIONS(15), 1, + sym_dateField, + STATE(29), 1, + sym_visitTypeFilter, + STATE(31), 1, + sym_filter, + STATE(35), 1, + sym_filters, + STATE(30), 6, + sym_patternFilter, + sym_booleanFilter, + sym_numericFilter, + sym_boundedListFilter, + sym_unboundedListFilter, + sym_dateFilter, + [223] = 6, + ACTIONS(35), 1, + anon_sym_SQUOTE, + ACTIONS(37), 1, + anon_sym_DQUOTE, + ACTIONS(41), 1, + anon_sym_DASH, + STATE(88), 1, + sym_sortByOptions, + ACTIONS(39), 2, + anon_sym_COMMA, + anon_sym_RBRACK, + ACTIONS(43), 8, + anon_sym_visits, + anon_sym_last_visit, + anon_sym_last_eventful_visit, + anon_sym_last_revision, + anon_sym_last_release, + anon_sym_created, + anon_sym_modified, + anon_sym_published, + [250] = 2, + STATE(106), 1, + sym_visitTypeOptions, + ACTIONS(27), 12, + anon_sym_any, + anon_sym_cran, + anon_sym_deb, + anon_sym_deposit, + anon_sym_ftp, + anon_sym_hg, + anon_sym_git, + anon_sym_nixguix, + anon_sym_npm, + anon_sym_pypi, + anon_sym_svn, + anon_sym_tar, + [268] = 2, + STATE(114), 1, + sym_visitTypeOptions, + ACTIONS(27), 12, + anon_sym_any, + anon_sym_cran, + anon_sym_deb, + anon_sym_deposit, + anon_sym_ftp, + anon_sym_hg, + anon_sym_git, + anon_sym_nixguix, + anon_sym_npm, + anon_sym_pypi, + anon_sym_svn, + anon_sym_tar, + [286] = 6, + ACTIONS(41), 1, + anon_sym_DASH, + ACTIONS(45), 1, + anon_sym_SQUOTE, + ACTIONS(47), 1, + anon_sym_DQUOTE, + ACTIONS(49), 1, + anon_sym_RBRACK, + STATE(86), 1, + sym_sortByOptions, + ACTIONS(43), 8, + anon_sym_visits, + anon_sym_last_visit, + anon_sym_last_eventful_visit, + anon_sym_last_revision, + anon_sym_last_release, + anon_sym_created, + anon_sym_modified, + anon_sym_published, + [312] = 2, + STATE(111), 1, + sym_visitTypeOptions, + ACTIONS(27), 12, + anon_sym_any, + anon_sym_cran, + anon_sym_deb, + anon_sym_deposit, + anon_sym_ftp, + anon_sym_hg, + anon_sym_git, + anon_sym_nixguix, + anon_sym_npm, + anon_sym_pypi, + anon_sym_svn, + anon_sym_tar, + [330] = 2, + STATE(110), 1, + sym_visitTypeOptions, + ACTIONS(27), 12, + anon_sym_any, + anon_sym_cran, + anon_sym_deb, + anon_sym_deposit, + anon_sym_ftp, + anon_sym_hg, + anon_sym_git, + anon_sym_nixguix, + anon_sym_npm, + anon_sym_pypi, + anon_sym_svn, + anon_sym_tar, + [348] = 3, + ACTIONS(41), 1, + anon_sym_DASH, + STATE(108), 1, + sym_sortByOptions, + ACTIONS(43), 8, + anon_sym_visits, + anon_sym_last_visit, + anon_sym_last_eventful_visit, + anon_sym_last_revision, + anon_sym_last_release, + anon_sym_created, + anon_sym_modified, + anon_sym_published, + [365] = 3, + ACTIONS(41), 1, + anon_sym_DASH, + STATE(123), 1, + sym_sortByOptions, + ACTIONS(43), 8, + anon_sym_visits, + anon_sym_last_visit, + anon_sym_last_eventful_visit, + anon_sym_last_revision, + anon_sym_last_release, + anon_sym_created, + anon_sym_modified, + anon_sym_published, + [382] = 3, + ACTIONS(41), 1, + anon_sym_DASH, + STATE(122), 1, + sym_sortByOptions, + ACTIONS(43), 8, + anon_sym_visits, + anon_sym_last_visit, + anon_sym_last_eventful_visit, + anon_sym_last_revision, + anon_sym_last_release, + anon_sym_created, + anon_sym_modified, + anon_sym_published, + [399] = 3, + ACTIONS(41), 1, + anon_sym_DASH, + STATE(113), 1, + sym_sortByOptions, + ACTIONS(43), 8, + anon_sym_visits, + anon_sym_last_visit, + anon_sym_last_eventful_visit, + anon_sym_last_revision, + anon_sym_last_release, + anon_sym_created, + anon_sym_modified, + anon_sym_published, + [416] = 1, + ACTIONS(51), 8, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + anon_sym_COMMA, + anon_sym_RBRACK, + sym_limitField, + sym_or, + sym_and, + [427] = 1, + ACTIONS(53), 8, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + anon_sym_COMMA, + anon_sym_RBRACK, + sym_limitField, + sym_or, + sym_and, + [438] = 1, + ACTIONS(55), 8, + anon_sym_visits, + anon_sym_last_visit, + anon_sym_last_eventful_visit, + anon_sym_last_revision, + anon_sym_last_release, + anon_sym_created, + anon_sym_modified, + anon_sym_published, + [449] = 7, + ACTIONS(17), 1, + sym_sortByField, + ACTIONS(19), 1, + sym_limitField, + ACTIONS(57), 1, + ts_builtin_sym_end, + ACTIONS(59), 1, + sym_or, + ACTIONS(61), 1, + sym_and, + STATE(51), 1, + sym_sortBy, + STATE(57), 1, + sym_limit, + [471] = 1, + ACTIONS(63), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [480] = 1, + ACTIONS(65), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [489] = 1, + ACTIONS(67), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [498] = 1, + ACTIONS(69), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [507] = 1, + ACTIONS(71), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [516] = 1, + ACTIONS(73), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [525] = 1, + ACTIONS(75), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [534] = 1, + ACTIONS(77), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [543] = 1, + ACTIONS(79), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [552] = 1, + ACTIONS(81), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [561] = 1, + ACTIONS(83), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [570] = 1, + ACTIONS(85), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [579] = 1, + ACTIONS(87), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [588] = 2, + ACTIONS(89), 1, + sym_and, + ACTIONS(69), 5, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + [599] = 1, + ACTIONS(91), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [608] = 1, + ACTIONS(93), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [617] = 1, + ACTIONS(95), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [626] = 1, + ACTIONS(97), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [635] = 1, + ACTIONS(99), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [644] = 1, + ACTIONS(101), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [653] = 1, + ACTIONS(103), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [662] = 1, + ACTIONS(105), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [671] = 5, + ACTIONS(107), 1, + anon_sym_SQUOTE, + ACTIONS(109), 1, + anon_sym_DQUOTE, + ACTIONS(113), 1, + sym_singleWord, + STATE(99), 1, + sym_string, + ACTIONS(111), 2, + anon_sym_COMMA, + anon_sym_RBRACK, + [688] = 1, + ACTIONS(115), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [697] = 1, + ACTIONS(117), 6, + ts_builtin_sym_end, + anon_sym_RPAREN, + sym_sortByField, + sym_limitField, + sym_or, + sym_and, + [706] = 5, + ACTIONS(107), 1, + anon_sym_SQUOTE, + ACTIONS(109), 1, + anon_sym_DQUOTE, + ACTIONS(113), 1, + sym_singleWord, + ACTIONS(119), 1, + anon_sym_RBRACK, + STATE(69), 1, + sym_string, + [722] = 5, + ACTIONS(107), 1, + anon_sym_SQUOTE, + ACTIONS(109), 1, + anon_sym_DQUOTE, + ACTIONS(113), 1, + sym_singleWord, + STATE(38), 1, + sym_patternVal, + STATE(39), 1, + sym_string, + [738] = 3, + STATE(50), 1, + aux_sym_stringContent_repeat1, + ACTIONS(121), 2, + anon_sym_SQUOTE, + anon_sym_DQUOTE, + ACTIONS(123), 2, + aux_sym_stringContent_token1, + sym_escape_sequence, + [750] = 3, + STATE(50), 1, + aux_sym_stringContent_repeat1, + ACTIONS(125), 2, + anon_sym_SQUOTE, + anon_sym_DQUOTE, + ACTIONS(127), 2, + aux_sym_stringContent_token1, + sym_escape_sequence, + [762] = 4, + ACTIONS(19), 1, + sym_limitField, + ACTIONS(130), 1, + ts_builtin_sym_end, + ACTIONS(132), 1, + sym_and, + STATE(119), 1, + sym_limit, + [775] = 3, + STATE(49), 1, + aux_sym_stringContent_repeat1, + STATE(124), 1, + sym_stringContent, + ACTIONS(134), 2, + aux_sym_stringContent_token1, + sym_escape_sequence, + [786] = 1, + ACTIONS(136), 4, + anon_sym_SQUOTE, + anon_sym_DQUOTE, + anon_sym_COMMA, + anon_sym_RBRACK, + [793] = 1, + ACTIONS(138), 4, + anon_sym_SQUOTE, + anon_sym_DQUOTE, + anon_sym_COMMA, + anon_sym_RBRACK, + [800] = 4, + ACTIONS(19), 1, + sym_limitField, + ACTIONS(140), 1, + ts_builtin_sym_end, + ACTIONS(142), 1, + sym_and, + STATE(109), 1, + sym_limit, + [813] = 4, + ACTIONS(17), 1, + sym_sortByField, + ACTIONS(140), 1, + ts_builtin_sym_end, + ACTIONS(144), 1, + sym_and, + STATE(109), 1, + sym_sortBy, + [826] = 4, + ACTIONS(17), 1, + sym_sortByField, + ACTIONS(130), 1, + ts_builtin_sym_end, + ACTIONS(146), 1, + sym_and, + STATE(119), 1, + sym_sortBy, + [839] = 1, + ACTIONS(148), 4, + anon_sym_SQUOTE, + anon_sym_DQUOTE, + anon_sym_COMMA, + anon_sym_RBRACK, + [846] = 3, + STATE(49), 1, + aux_sym_stringContent_repeat1, + STATE(120), 1, + sym_stringContent, + ACTIONS(134), 2, + aux_sym_stringContent_token1, + sym_escape_sequence, + [857] = 2, + STATE(41), 1, + sym_booleanVal, + ACTIONS(150), 2, + sym_booleanTrue, + sym_booleanFalse, + [865] = 3, + ACTIONS(19), 1, + sym_limitField, + ACTIONS(152), 1, + ts_builtin_sym_end, + STATE(112), 1, + sym_limit, + [875] = 1, + ACTIONS(154), 3, + ts_builtin_sym_end, + sym_limitField, + sym_and, + [881] = 3, + ACTIONS(17), 1, + sym_sortByField, + ACTIONS(140), 1, + ts_builtin_sym_end, + STATE(109), 1, + sym_sortBy, + [891] = 3, + ACTIONS(156), 1, + anon_sym_COMMA, + ACTIONS(158), 1, + anon_sym_RBRACK, + STATE(66), 1, + aux_sym_sortByVal_repeat1, + [901] = 1, + ACTIONS(160), 3, + ts_builtin_sym_end, + sym_limitField, + sym_and, + [907] = 3, + ACTIONS(162), 1, + anon_sym_COMMA, + ACTIONS(165), 1, + anon_sym_RBRACK, + STATE(66), 1, + aux_sym_sortByVal_repeat1, + [917] = 3, + ACTIONS(167), 1, + anon_sym_COMMA, + ACTIONS(169), 1, + anon_sym_RBRACK, + STATE(73), 1, + aux_sym_visitTypeVal_repeat1, + [927] = 1, + ACTIONS(171), 3, + ts_builtin_sym_end, + sym_limitField, + sym_and, + [933] = 3, + ACTIONS(173), 1, + anon_sym_COMMA, + ACTIONS(175), 1, + anon_sym_RBRACK, + STATE(85), 1, + aux_sym_listVal_repeat1, + [943] = 3, + ACTIONS(156), 1, + anon_sym_COMMA, + ACTIONS(177), 1, + anon_sym_RBRACK, + STATE(64), 1, + aux_sym_sortByVal_repeat1, + [953] = 1, + ACTIONS(179), 3, + ts_builtin_sym_end, + sym_limitField, + sym_and, + [959] = 1, + ACTIONS(181), 3, + ts_builtin_sym_end, + sym_sortByField, + sym_and, + [965] = 3, + ACTIONS(167), 1, + anon_sym_COMMA, + ACTIONS(183), 1, + anon_sym_RBRACK, + STATE(79), 1, + aux_sym_visitTypeVal_repeat1, + [975] = 3, + ACTIONS(19), 1, + sym_limitField, + ACTIONS(140), 1, + ts_builtin_sym_end, + STATE(109), 1, + sym_limit, + [985] = 3, + ACTIONS(17), 1, + sym_sortByField, + ACTIONS(152), 1, + ts_builtin_sym_end, + STATE(112), 1, + sym_sortBy, + [995] = 3, + ACTIONS(156), 1, + anon_sym_COMMA, + ACTIONS(185), 1, + anon_sym_RBRACK, + STATE(66), 1, + aux_sym_sortByVal_repeat1, + [1005] = 1, + ACTIONS(187), 3, + ts_builtin_sym_end, + sym_limitField, + sym_and, + [1011] = 3, + ACTIONS(189), 1, + anon_sym_COMMA, + ACTIONS(192), 1, + anon_sym_RBRACK, + STATE(78), 1, + aux_sym_listVal_repeat1, + [1021] = 3, + ACTIONS(194), 1, + anon_sym_COMMA, + ACTIONS(197), 1, + anon_sym_RBRACK, + STATE(79), 1, + aux_sym_visitTypeVal_repeat1, + [1031] = 3, + ACTIONS(59), 1, + sym_or, + ACTIONS(89), 1, + sym_and, + ACTIONS(199), 1, + anon_sym_RPAREN, + [1041] = 1, + ACTIONS(201), 3, + anon_sym_SQUOTE, + anon_sym_DQUOTE, + sym_singleWord, + [1047] = 3, + ACTIONS(167), 1, + anon_sym_COMMA, + ACTIONS(203), 1, + anon_sym_RBRACK, + STATE(79), 1, + aux_sym_visitTypeVal_repeat1, + [1057] = 3, + ACTIONS(167), 1, + anon_sym_COMMA, + ACTIONS(205), 1, + anon_sym_RBRACK, + STATE(82), 1, + aux_sym_visitTypeVal_repeat1, + [1067] = 1, + ACTIONS(207), 3, + ts_builtin_sym_end, + sym_limitField, + sym_and, + [1073] = 3, + ACTIONS(173), 1, + anon_sym_COMMA, + ACTIONS(209), 1, + anon_sym_RBRACK, + STATE(78), 1, + aux_sym_listVal_repeat1, + [1083] = 3, + ACTIONS(156), 1, + anon_sym_COMMA, + ACTIONS(211), 1, + anon_sym_RBRACK, + STATE(76), 1, + aux_sym_sortByVal_repeat1, + [1093] = 2, + ACTIONS(213), 1, + sym_choiceOp, + STATE(98), 1, + sym_listOp, + [1100] = 1, + ACTIONS(215), 2, + anon_sym_COMMA, + anon_sym_RBRACK, + [1105] = 2, + ACTIONS(217), 1, + sym_number, + STATE(43), 1, + sym_numberVal, + [1112] = 1, + ACTIONS(219), 2, + anon_sym_COMMA, + anon_sym_RBRACK, + [1117] = 2, + ACTIONS(221), 1, + sym_equalOp, + STATE(48), 1, + sym_patternOp, + [1124] = 2, + ACTIONS(223), 1, + anon_sym_LBRACK, + STATE(71), 1, + sym_sortByVal, + [1131] = 1, + ACTIONS(225), 2, + anon_sym_COMMA, + anon_sym_RBRACK, + [1136] = 2, + ACTIONS(227), 1, + sym_equalOp, + STATE(60), 1, + sym_booleanOp, + [1143] = 2, + ACTIONS(229), 1, + anon_sym_LBRACK, + STATE(45), 1, + sym_visitTypeVal, + [1150] = 1, + ACTIONS(231), 2, + anon_sym_COMMA, + anon_sym_RBRACK, + [1155] = 1, + ACTIONS(233), 2, + sym_booleanTrue, + sym_booleanFalse, + [1160] = 2, + ACTIONS(235), 1, + anon_sym_LBRACK, + STATE(46), 1, + sym_listVal, + [1167] = 1, + ACTIONS(237), 2, + anon_sym_COMMA, + anon_sym_RBRACK, + [1172] = 2, + ACTIONS(239), 1, + sym_equalOp, + STATE(92), 1, + sym_sortByOp, + [1179] = 2, + ACTIONS(241), 1, + sym_rangeOp, + STATE(89), 1, + sym_numericOp, + [1186] = 2, + ACTIONS(243), 1, + sym_equalOp, + STATE(95), 1, + sym_visitTypeOp, + [1193] = 2, + ACTIONS(245), 1, + sym_isoDateTime, + STATE(26), 1, + sym_dateVal, + [1200] = 2, + ACTIONS(247), 1, + sym_rangeOp, + STATE(103), 1, + sym_dateOp, + [1207] = 1, + ACTIONS(249), 1, + anon_sym_LBRACK, + [1211] = 1, + ACTIONS(251), 1, + anon_sym_SQUOTE, + [1215] = 1, + ACTIONS(253), 1, + sym_isoDateTime, + [1219] = 1, + ACTIONS(255), 1, + anon_sym_DQUOTE, + [1223] = 1, + ACTIONS(152), 1, + ts_builtin_sym_end, + [1227] = 1, + ACTIONS(257), 1, + anon_sym_SQUOTE, + [1231] = 1, + ACTIONS(257), 1, + anon_sym_DQUOTE, + [1235] = 1, + ACTIONS(259), 1, + ts_builtin_sym_end, + [1239] = 1, + ACTIONS(255), 1, + anon_sym_SQUOTE, + [1243] = 1, + ACTIONS(251), 1, + anon_sym_DQUOTE, + [1247] = 1, + ACTIONS(261), 1, + sym_number, + [1251] = 1, + ACTIONS(263), 1, + ts_builtin_sym_end, + [1255] = 1, + ACTIONS(265), 1, + sym_equalOp, + [1259] = 1, + ACTIONS(267), 1, + anon_sym_LBRACK, + [1263] = 1, + ACTIONS(140), 1, + ts_builtin_sym_end, + [1267] = 1, + ACTIONS(269), 1, + anon_sym_DQUOTE, + [1271] = 1, + ACTIONS(271), 1, + anon_sym_LBRACK, + [1275] = 1, + ACTIONS(273), 1, + anon_sym_SQUOTE, + [1279] = 1, + ACTIONS(273), 1, + anon_sym_DQUOTE, + [1283] = 1, + ACTIONS(269), 1, + anon_sym_SQUOTE, + [1287] = 1, + ACTIONS(275), 1, + sym_number, +}; + +static const uint32_t ts_small_parse_table_map[] = { + [SMALL_STATE(2)] = 0, + [SMALL_STATE(3)] = 51, + [SMALL_STATE(4)] = 79, + [SMALL_STATE(5)] = 118, + [SMALL_STATE(6)] = 145, + [SMALL_STATE(7)] = 184, + [SMALL_STATE(8)] = 223, + [SMALL_STATE(9)] = 250, + [SMALL_STATE(10)] = 268, + [SMALL_STATE(11)] = 286, + [SMALL_STATE(12)] = 312, + [SMALL_STATE(13)] = 330, + [SMALL_STATE(14)] = 348, + [SMALL_STATE(15)] = 365, + [SMALL_STATE(16)] = 382, + [SMALL_STATE(17)] = 399, + [SMALL_STATE(18)] = 416, + [SMALL_STATE(19)] = 427, + [SMALL_STATE(20)] = 438, + [SMALL_STATE(21)] = 449, + [SMALL_STATE(22)] = 471, + [SMALL_STATE(23)] = 480, + [SMALL_STATE(24)] = 489, + [SMALL_STATE(25)] = 498, + [SMALL_STATE(26)] = 507, + [SMALL_STATE(27)] = 516, + [SMALL_STATE(28)] = 525, + [SMALL_STATE(29)] = 534, + [SMALL_STATE(30)] = 543, + [SMALL_STATE(31)] = 552, + [SMALL_STATE(32)] = 561, + [SMALL_STATE(33)] = 570, + [SMALL_STATE(34)] = 579, + [SMALL_STATE(35)] = 588, + [SMALL_STATE(36)] = 599, + [SMALL_STATE(37)] = 608, + [SMALL_STATE(38)] = 617, + [SMALL_STATE(39)] = 626, + [SMALL_STATE(40)] = 635, + [SMALL_STATE(41)] = 644, + [SMALL_STATE(42)] = 653, + [SMALL_STATE(43)] = 662, + [SMALL_STATE(44)] = 671, + [SMALL_STATE(45)] = 688, + [SMALL_STATE(46)] = 697, + [SMALL_STATE(47)] = 706, + [SMALL_STATE(48)] = 722, + [SMALL_STATE(49)] = 738, + [SMALL_STATE(50)] = 750, + [SMALL_STATE(51)] = 762, + [SMALL_STATE(52)] = 775, + [SMALL_STATE(53)] = 786, + [SMALL_STATE(54)] = 793, + [SMALL_STATE(55)] = 800, + [SMALL_STATE(56)] = 813, + [SMALL_STATE(57)] = 826, + [SMALL_STATE(58)] = 839, + [SMALL_STATE(59)] = 846, + [SMALL_STATE(60)] = 857, + [SMALL_STATE(61)] = 865, + [SMALL_STATE(62)] = 875, + [SMALL_STATE(63)] = 881, + [SMALL_STATE(64)] = 891, + [SMALL_STATE(65)] = 901, + [SMALL_STATE(66)] = 907, + [SMALL_STATE(67)] = 917, + [SMALL_STATE(68)] = 927, + [SMALL_STATE(69)] = 933, + [SMALL_STATE(70)] = 943, + [SMALL_STATE(71)] = 953, + [SMALL_STATE(72)] = 959, + [SMALL_STATE(73)] = 965, + [SMALL_STATE(74)] = 975, + [SMALL_STATE(75)] = 985, + [SMALL_STATE(76)] = 995, + [SMALL_STATE(77)] = 1005, + [SMALL_STATE(78)] = 1011, + [SMALL_STATE(79)] = 1021, + [SMALL_STATE(80)] = 1031, + [SMALL_STATE(81)] = 1041, + [SMALL_STATE(82)] = 1047, + [SMALL_STATE(83)] = 1057, + [SMALL_STATE(84)] = 1067, + [SMALL_STATE(85)] = 1073, + [SMALL_STATE(86)] = 1083, + [SMALL_STATE(87)] = 1093, + [SMALL_STATE(88)] = 1100, + [SMALL_STATE(89)] = 1105, + [SMALL_STATE(90)] = 1112, + [SMALL_STATE(91)] = 1117, + [SMALL_STATE(92)] = 1124, + [SMALL_STATE(93)] = 1131, + [SMALL_STATE(94)] = 1136, + [SMALL_STATE(95)] = 1143, + [SMALL_STATE(96)] = 1150, + [SMALL_STATE(97)] = 1155, + [SMALL_STATE(98)] = 1160, + [SMALL_STATE(99)] = 1167, + [SMALL_STATE(100)] = 1172, + [SMALL_STATE(101)] = 1179, + [SMALL_STATE(102)] = 1186, + [SMALL_STATE(103)] = 1193, + [SMALL_STATE(104)] = 1200, + [SMALL_STATE(105)] = 1207, + [SMALL_STATE(106)] = 1211, + [SMALL_STATE(107)] = 1215, + [SMALL_STATE(108)] = 1219, + [SMALL_STATE(109)] = 1223, + [SMALL_STATE(110)] = 1227, + [SMALL_STATE(111)] = 1231, + [SMALL_STATE(112)] = 1235, + [SMALL_STATE(113)] = 1239, + [SMALL_STATE(114)] = 1243, + [SMALL_STATE(115)] = 1247, + [SMALL_STATE(116)] = 1251, + [SMALL_STATE(117)] = 1255, + [SMALL_STATE(118)] = 1259, + [SMALL_STATE(119)] = 1263, + [SMALL_STATE(120)] = 1267, + [SMALL_STATE(121)] = 1271, + [SMALL_STATE(122)] = 1275, + [SMALL_STATE(123)] = 1279, + [SMALL_STATE(124)] = 1283, + [SMALL_STATE(125)] = 1287, +}; + +static const TSParseActionEntry ts_parse_actions[] = { + [0] = {.entry = {.count = 0, .reusable = false}}, + [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(), + [3] = {.entry = {.count = 1, .reusable = true}}, SHIFT(4), + [5] = {.entry = {.count = 1, .reusable = true}}, SHIFT(91), + [7] = {.entry = {.count = 1, .reusable = true}}, SHIFT(94), + [9] = {.entry = {.count = 1, .reusable = true}}, SHIFT(101), + [11] = {.entry = {.count = 1, .reusable = true}}, SHIFT(102), + [13] = {.entry = {.count = 1, .reusable = true}}, SHIFT(87), + [15] = {.entry = {.count = 1, .reusable = true}}, SHIFT(104), + [17] = {.entry = {.count = 1, .reusable = true}}, SHIFT(100), + [19] = {.entry = {.count = 1, .reusable = true}}, SHIFT(117), + [21] = {.entry = {.count = 1, .reusable = true}}, SHIFT(13), + [23] = {.entry = {.count = 1, .reusable = true}}, SHIFT(12), + [25] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_visitTypeVal_repeat1, 1), + [27] = {.entry = {.count = 1, .reusable = true}}, SHIFT(58), + [29] = {.entry = {.count = 1, .reusable = true}}, SHIFT(9), + [31] = {.entry = {.count = 1, .reusable = true}}, SHIFT(10), + [33] = {.entry = {.count = 1, .reusable = true}}, SHIFT(28), + [35] = {.entry = {.count = 1, .reusable = true}}, SHIFT(16), + [37] = {.entry = {.count = 1, .reusable = true}}, SHIFT(15), + [39] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_sortByVal_repeat1, 1), + [41] = {.entry = {.count = 1, .reusable = true}}, SHIFT(20), + [43] = {.entry = {.count = 1, .reusable = true}}, SHIFT(54), + [45] = {.entry = {.count = 1, .reusable = true}}, SHIFT(17), + [47] = {.entry = {.count = 1, .reusable = true}}, SHIFT(14), + [49] = {.entry = {.count = 1, .reusable = true}}, SHIFT(84), + [51] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_string, 1), + [53] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_string, 3), + [55] = {.entry = {.count = 1, .reusable = true}}, SHIFT(53), + [57] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_query, 1), + [59] = {.entry = {.count = 1, .reusable = true}}, SHIFT(7), + [61] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), + [63] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_listVal, 4, .production_id = 5), + [65] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_visitTypeVal, 5, .production_id = 7), + [67] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_visitTypeVal, 3, .production_id = 4), + [69] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_filters, 3, .production_id = 3), + [71] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_dateFilter, 3, .production_id = 2), + [73] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_listVal, 2), + [75] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_visitTypeVal, 2), + [77] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_boundedListFilter, 1), + [79] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_filter, 1, .production_id = 1), + [81] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_filters, 1), + [83] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_listVal, 3, .production_id = 4), + [85] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_filters, 3), + [87] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_visitTypeVal, 4, .production_id = 5), + [89] = {.entry = {.count = 1, .reusable = true}}, SHIFT(6), + [91] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_visitTypeVal, 6, .production_id = 8), + [93] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_dateVal, 1), + [95] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_patternFilter, 3, .production_id = 2), + [97] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_patternVal, 1), + [99] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_booleanVal, 1), + [101] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_booleanFilter, 3, .production_id = 2), + [103] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_numberVal, 1), + [105] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_numericFilter, 3, .production_id = 2), + [107] = {.entry = {.count = 1, .reusable = true}}, SHIFT(52), + [109] = {.entry = {.count = 1, .reusable = true}}, SHIFT(59), + [111] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_listVal_repeat1, 1), + [113] = {.entry = {.count = 1, .reusable = true}}, SHIFT(18), + [115] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_visitTypeFilter, 3, .production_id = 2), + [117] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_unboundedListFilter, 3, .production_id = 2), + [119] = {.entry = {.count = 1, .reusable = true}}, SHIFT(27), + [121] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_stringContent, 1), + [123] = {.entry = {.count = 1, .reusable = true}}, SHIFT(50), + [125] = {.entry = {.count = 1, .reusable = false}}, REDUCE(aux_sym_stringContent_repeat1, 2), + [127] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_stringContent_repeat1, 2), SHIFT_REPEAT(50), + [130] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_query, 2), + [132] = {.entry = {.count = 1, .reusable = true}}, SHIFT(74), + [134] = {.entry = {.count = 1, .reusable = true}}, SHIFT(49), + [136] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_sortByOptions, 2), + [138] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_sortByOptions, 1), + [140] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_query, 3), + [142] = {.entry = {.count = 1, .reusable = true}}, SHIFT(61), + [144] = {.entry = {.count = 1, .reusable = true}}, SHIFT(75), + [146] = {.entry = {.count = 1, .reusable = true}}, SHIFT(63), + [148] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_visitTypeOptions, 1), + [150] = {.entry = {.count = 1, .reusable = true}}, SHIFT(40), + [152] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_query, 4), + [154] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_sortByVal, 6, .production_id = 8), + [156] = {.entry = {.count = 1, .reusable = true}}, SHIFT(8), + [158] = {.entry = {.count = 1, .reusable = true}}, SHIFT(62), + [160] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_sortByVal, 5, .production_id = 7), + [162] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_sortByVal_repeat1, 2, .production_id = 6), SHIFT_REPEAT(8), + [165] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_sortByVal_repeat1, 2, .production_id = 6), + [167] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3), + [169] = {.entry = {.count = 1, .reusable = true}}, SHIFT(23), + [171] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_sortByVal, 4, .production_id = 5), + [173] = {.entry = {.count = 1, .reusable = true}}, SHIFT(44), + [175] = {.entry = {.count = 1, .reusable = true}}, SHIFT(32), + [177] = {.entry = {.count = 1, .reusable = true}}, SHIFT(65), + [179] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_sortBy, 3, .production_id = 2), + [181] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_limit, 3, .production_id = 2), + [183] = {.entry = {.count = 1, .reusable = true}}, SHIFT(36), + [185] = {.entry = {.count = 1, .reusable = true}}, SHIFT(68), + [187] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_sortByVal, 3, .production_id = 4), + [189] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_listVal_repeat1, 2, .production_id = 6), SHIFT_REPEAT(44), + [192] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_listVal_repeat1, 2, .production_id = 6), + [194] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_visitTypeVal_repeat1, 2, .production_id = 6), SHIFT_REPEAT(3), + [197] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_visitTypeVal_repeat1, 2, .production_id = 6), + [199] = {.entry = {.count = 1, .reusable = true}}, SHIFT(33), + [201] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_patternOp, 1), + [203] = {.entry = {.count = 1, .reusable = true}}, SHIFT(34), + [205] = {.entry = {.count = 1, .reusable = true}}, SHIFT(24), + [207] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_sortByVal, 2), + [209] = {.entry = {.count = 1, .reusable = true}}, SHIFT(22), + [211] = {.entry = {.count = 1, .reusable = true}}, SHIFT(77), + [213] = {.entry = {.count = 1, .reusable = true}}, SHIFT(118), + [215] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_sortByVal_repeat1, 2, .production_id = 4), + [217] = {.entry = {.count = 1, .reusable = true}}, SHIFT(42), + [219] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_sortByVal_repeat1, 4, .production_id = 7), + [221] = {.entry = {.count = 1, .reusable = true}}, SHIFT(81), + [223] = {.entry = {.count = 1, .reusable = true}}, SHIFT(11), + [225] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_visitTypeVal_repeat1, 4, .production_id = 7), + [227] = {.entry = {.count = 1, .reusable = true}}, SHIFT(97), + [229] = {.entry = {.count = 1, .reusable = true}}, SHIFT(5), + [231] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_visitTypeVal_repeat1, 2, .production_id = 4), + [233] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_booleanOp, 1), + [235] = {.entry = {.count = 1, .reusable = true}}, SHIFT(47), + [237] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_listVal_repeat1, 2, .production_id = 4), + [239] = {.entry = {.count = 1, .reusable = true}}, SHIFT(121), + [241] = {.entry = {.count = 1, .reusable = true}}, SHIFT(125), + [243] = {.entry = {.count = 1, .reusable = true}}, SHIFT(105), + [245] = {.entry = {.count = 1, .reusable = true}}, SHIFT(37), + [247] = {.entry = {.count = 1, .reusable = true}}, SHIFT(107), + [249] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_visitTypeOp, 1), + [251] = {.entry = {.count = 1, .reusable = true}}, SHIFT(67), + [253] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_dateOp, 1), + [255] = {.entry = {.count = 1, .reusable = true}}, SHIFT(70), + [257] = {.entry = {.count = 1, .reusable = true}}, SHIFT(93), + [259] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_query, 5), + [261] = {.entry = {.count = 1, .reusable = true}}, SHIFT(72), + [263] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), + [265] = {.entry = {.count = 1, .reusable = true}}, SHIFT(115), + [267] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_listOp, 1), + [269] = {.entry = {.count = 1, .reusable = true}}, SHIFT(19), + [271] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_sortByOp, 1), + [273] = {.entry = {.count = 1, .reusable = true}}, SHIFT(90), + [275] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_numericOp, 1), +}; + +#ifdef __cplusplus +extern "C" { +#endif +#ifdef _WIN32 +#define extern __declspec(dllexport) +#endif + +extern const TSLanguage *tree_sitter_swh_search_ql(void) { + static const TSLanguage language = { + .version = LANGUAGE_VERSION, + .symbol_count = SYMBOL_COUNT, + .alias_count = ALIAS_COUNT, + .token_count = TOKEN_COUNT, + .external_token_count = EXTERNAL_TOKEN_COUNT, + .state_count = STATE_COUNT, + .large_state_count = LARGE_STATE_COUNT, + .production_id_count = PRODUCTION_ID_COUNT, + .field_count = FIELD_COUNT, + .max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH, + .parse_table = &ts_parse_table[0][0], + .small_parse_table = ts_small_parse_table, + .small_parse_table_map = ts_small_parse_table_map, + .parse_actions = ts_parse_actions, + .symbol_names = ts_symbol_names, + .field_names = ts_field_names, + .field_map_slices = ts_field_map_slices, + .field_map_entries = ts_field_map_entries, + .symbol_metadata = ts_symbol_metadata, + .public_symbol_map = ts_symbol_map, + .alias_map = ts_non_terminal_alias_map, + .alias_sequences = &ts_alias_sequences[0][0], + .lex_modes = ts_lex_modes, + .lex_fn = ts_lex, + }; + return &language; +} +#ifdef __cplusplus +} +#endif diff --git a/swh/search/query_language/src/tree_sitter/parser.h b/swh/search/query_language/src/tree_sitter/parser.h new file mode 100644 index 0000000..cbbc7b4 --- /dev/null +++ b/swh/search/query_language/src/tree_sitter/parser.h @@ -0,0 +1,223 @@ +#ifndef TREE_SITTER_PARSER_H_ +#define TREE_SITTER_PARSER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#define ts_builtin_sym_error ((TSSymbol)-1) +#define ts_builtin_sym_end 0 +#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 + +typedef uint16_t TSStateId; + +#ifndef TREE_SITTER_API_H_ +typedef uint16_t TSSymbol; +typedef uint16_t TSFieldId; +typedef struct TSLanguage TSLanguage; +#endif + +typedef struct { + TSFieldId field_id; + uint8_t child_index; + bool inherited; +} TSFieldMapEntry; + +typedef struct { + uint16_t index; + uint16_t length; +} TSFieldMapSlice; + +typedef struct { + bool visible; + bool named; + bool supertype; +} TSSymbolMetadata; + +typedef struct TSLexer TSLexer; + +struct TSLexer { + int32_t lookahead; + TSSymbol result_symbol; + void (*advance)(TSLexer *, bool); + void (*mark_end)(TSLexer *); + uint32_t (*get_column)(TSLexer *); + bool (*is_at_included_range_start)(const TSLexer *); + bool (*eof)(const TSLexer *); +}; + +typedef enum { + TSParseActionTypeShift, + TSParseActionTypeReduce, + TSParseActionTypeAccept, + TSParseActionTypeRecover, +} TSParseActionType; + +typedef union { + struct { + uint8_t type; + TSStateId state; + bool extra; + bool repetition; + } shift; + struct { + uint8_t type; + uint8_t child_count; + TSSymbol symbol; + int16_t dynamic_precedence; + uint16_t production_id; + } reduce; + uint8_t type; +} TSParseAction; + +typedef struct { + uint16_t lex_state; + uint16_t external_lex_state; +} TSLexMode; + +typedef union { + TSParseAction action; + struct { + uint8_t count; + bool reusable; + } entry; +} TSParseActionEntry; + +struct TSLanguage { + uint32_t version; + uint32_t symbol_count; + uint32_t alias_count; + uint32_t token_count; + uint32_t external_token_count; + uint32_t state_count; + uint32_t large_state_count; + uint32_t production_id_count; + uint32_t field_count; + uint16_t max_alias_sequence_length; + const uint16_t *parse_table; + const uint16_t *small_parse_table; + const uint32_t *small_parse_table_map; + const TSParseActionEntry *parse_actions; + const char * const *symbol_names; + const char * const *field_names; + const TSFieldMapSlice *field_map_slices; + const TSFieldMapEntry *field_map_entries; + const TSSymbolMetadata *symbol_metadata; + const TSSymbol *public_symbol_map; + const uint16_t *alias_map; + const TSSymbol *alias_sequences; + const TSLexMode *lex_modes; + bool (*lex_fn)(TSLexer *, TSStateId); + bool (*keyword_lex_fn)(TSLexer *, TSStateId); + TSSymbol keyword_capture_token; + struct { + const bool *states; + const TSSymbol *symbol_map; + void *(*create)(void); + void (*destroy)(void *); + bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); + unsigned (*serialize)(void *, char *); + void (*deserialize)(void *, const char *, unsigned); + } external_scanner; +}; + +/* + * Lexer Macros + */ + +#define START_LEXER() \ + bool result = false; \ + bool skip = false; \ + bool eof = false; \ + int32_t lookahead; \ + goto start; \ + next_state: \ + lexer->advance(lexer, skip); \ + start: \ + skip = false; \ + lookahead = lexer->lookahead; + +#define ADVANCE(state_value) \ + { \ + state = state_value; \ + goto next_state; \ + } + +#define SKIP(state_value) \ + { \ + skip = true; \ + state = state_value; \ + goto next_state; \ + } + +#define ACCEPT_TOKEN(symbol_value) \ + result = true; \ + lexer->result_symbol = symbol_value; \ + lexer->mark_end(lexer); + +#define END_STATE() return result; + +/* + * Parse Table Macros + */ + +#define SMALL_STATE(id) id - LARGE_STATE_COUNT + +#define STATE(id) id + +#define ACTIONS(id) id + +#define SHIFT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = state_value \ + } \ + }} + +#define SHIFT_REPEAT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = state_value, \ + .repetition = true \ + } \ + }} + +#define SHIFT_EXTRA() \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .extra = true \ + } \ + }} + +#define REDUCE(symbol_val, child_count_val, ...) \ + {{ \ + .reduce = { \ + .type = TSParseActionTypeReduce, \ + .symbol = symbol_val, \ + .child_count = child_count_val, \ + __VA_ARGS__ \ + }, \ + }} + +#define RECOVER() \ + {{ \ + .type = TSParseActionTypeRecover \ + }} + +#define ACCEPT_INPUT() \ + {{ \ + .type = TSParseActionTypeAccept \ + }} + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_PARSER_H_ diff --git a/query_language/test/corpus/combinations.txt b/swh/search/query_language/test/corpus/combinations.txt similarity index 100% rename from query_language/test/corpus/combinations.txt rename to swh/search/query_language/test/corpus/combinations.txt diff --git a/query_language/tokens.js b/swh/search/query_language/tokens.js similarity index 100% rename from query_language/tokens.js rename to swh/search/query_language/tokens.js diff --git a/swh/search/static/swh_ql.so b/swh/search/static/swh_ql.so deleted file mode 100644 index 28b901d..0000000 Binary files a/swh/search/static/swh_ql.so and /dev/null differ diff --git a/swh/search/static/swh_ql.wasm b/swh/search/static/swh_ql.wasm deleted file mode 100644 index cd6914b..0000000 Binary files a/swh/search/static/swh_ql.wasm and /dev/null differ diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py index 3655445..1559ddb 100644 --- a/swh/search/tests/test_search.py +++ b/swh/search/tests/test_search.py @@ -1,1168 +1,1189 @@ # Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from collections import Counter from datetime import datetime, timedelta, timezone from itertools import permutations from hypothesis import given, settings, strategies import pytest from swh.core.api.classes import stream_results class CommonSearchTest: def test_origin_url_unique_word_prefix(self): origin_foobar_baz = {"url": "http://foobar.baz"} origin_barbaz_qux = {"url": "http://barbaz.qux"} origin_qux_quux = {"url": "http://qux.quux"} origins = [origin_foobar_baz, origin_barbaz_qux, origin_qux_quux] self.search.origin_update(origins) self.search.flush() actual_page = self.search.origin_search(url_pattern="foobar") assert actual_page.next_page_token is None assert actual_page.results == [origin_foobar_baz] actual_page = self.search.origin_search(url_pattern="barb") assert actual_page.next_page_token is None assert actual_page.results == [origin_barbaz_qux] # 'bar' is part of 'foobar', but is not the beginning of it actual_page = self.search.origin_search(url_pattern="bar") assert actual_page.next_page_token is None assert actual_page.results == [origin_barbaz_qux] actual_page = self.search.origin_search(url_pattern="barbaz") assert actual_page.next_page_token is None assert actual_page.results == [origin_barbaz_qux] def test_origin_url_unique_word_prefix_multiple_results(self): origin_foobar_baz = {"url": "http://foobar.baz"} origin_barbaz_qux = {"url": "http://barbaz.qux"} origin_qux_quux = {"url": "http://qux.quux"} self.search.origin_update( [origin_foobar_baz, origin_barbaz_qux, origin_qux_quux] ) self.search.flush() actual_page = self.search.origin_search(url_pattern="qu") assert actual_page.next_page_token is None results = [r["url"] for r in actual_page.results] expected_results = [o["url"] for o in [origin_qux_quux, origin_barbaz_qux]] assert sorted(results) == sorted(expected_results) actual_page = self.search.origin_search(url_pattern="qux") assert actual_page.next_page_token is None results = [r["url"] for r in actual_page.results] expected_results = [o["url"] for o in [origin_qux_quux, origin_barbaz_qux]] assert sorted(results) == sorted(expected_results) def test_origin_url_all_terms(self): origin_foo_bar_baz = {"url": "http://foo.bar/baz"} origin_foo_bar_foo_bar = {"url": "http://foo.bar/foo.bar"} origins = [origin_foo_bar_baz, origin_foo_bar_foo_bar] self.search.origin_update(origins) self.search.flush() # Only results containing all terms should be returned. actual_page = self.search.origin_search(url_pattern="foo bar baz") assert actual_page.next_page_token is None assert actual_page.results == [origin_foo_bar_baz] def test_origin_with_visit(self): origin_foobar_baz = {"url": "http://foobar/baz"} self.search.origin_update( [{**o, "has_visits": True} for o in [origin_foobar_baz]] ) self.search.flush() actual_page = self.search.origin_search(url_pattern="foobar", with_visit=True) assert actual_page.next_page_token is None assert actual_page.results == [origin_foobar_baz] def test_origin_with_visit_added(self): origin_foobar_baz = {"url": "http://foobar.baz"} self.search.origin_update([origin_foobar_baz]) self.search.flush() actual_page = self.search.origin_search(url_pattern="foobar", with_visit=True) assert actual_page.next_page_token is None assert actual_page.results == [] self.search.origin_update( [{**o, "has_visits": True} for o in [origin_foobar_baz]] ) self.search.flush() actual_page = self.search.origin_search(url_pattern="foobar", with_visit=True) assert actual_page.next_page_token is None assert actual_page.results == [origin_foobar_baz] def test_origin_no_visit_types_search(self): origins = [{"url": "http://foobar.baz"}] self.search.origin_update(origins) self.search.flush() actual_page = self.search.origin_search(url_pattern="http", visit_types=["git"]) assert actual_page.next_page_token is None results = [r["url"] for r in actual_page.results] expected_results = [] assert sorted(results) == sorted(expected_results) actual_page = self.search.origin_search(url_pattern="http", visit_types=None) assert actual_page.next_page_token is None results = [r["url"] for r in actual_page.results] expected_results = [origin["url"] for origin in origins] assert sorted(results) == sorted(expected_results) def test_origin_visit_types_search(self): origins = [ {"url": "http://foobar.baz", "visit_types": ["git"]}, {"url": "http://barbaz.qux", "visit_types": ["svn"]}, {"url": "http://qux.quux", "visit_types": ["hg"]}, ] self.search.origin_update(origins) self.search.flush() for origin in origins: actual_page = self.search.origin_search( url_pattern="http", visit_types=origin["visit_types"] ) assert actual_page.next_page_token is None results = [r["url"] for r in actual_page.results] expected_results = [origin["url"]] assert sorted(results) == sorted(expected_results) actual_page = self.search.origin_search(url_pattern="http", visit_types=None) assert actual_page.next_page_token is None results = [r["url"] for r in actual_page.results] expected_results = [origin["url"] for origin in origins] assert sorted(results) == sorted(expected_results) def test_origin_visit_types_update_search(self): origin_url = "http://foobar.baz" self.search.origin_update([{"url": origin_url}]) self.search.flush() def _add_visit_type(visit_type): self.search.origin_update( [{"url": origin_url, "visit_types": [visit_type]}] ) self.search.flush() def _check_visit_types(visit_types_list): for visit_types in visit_types_list: actual_page = self.search.origin_search( url_pattern="http", visit_types=visit_types ) assert actual_page.next_page_token is None results = [r["url"] for r in actual_page.results] expected_results = [origin_url] assert sorted(results) == sorted(expected_results) _add_visit_type("git") _check_visit_types([["git"], ["git", "hg"]]) _add_visit_type("svn") _check_visit_types([["git"], ["svn"], ["svn", "git"], ["git", "hg", "svn"]]) _add_visit_type("hg") _check_visit_types( [ ["git"], ["svn"], ["hg"], ["svn", "git"], ["hg", "git"], ["hg", "svn"], ["git", "hg", "svn"], ] ) def test_origin_nb_visits_update_search(self): origin_url = "http://foobar.baz" self.search.origin_update([{"url": origin_url}]) self.search.flush() def _update_nb_visits(nb_visits): self.search.origin_update([{"url": origin_url, "nb_visits": nb_visits}]) self.search.flush() def _check_min_nb_visits(min_nb_visits): actual_page = self.search.origin_search( url_pattern=origin_url, min_nb_visits=min_nb_visits, ) assert actual_page.next_page_token is None results = [r["url"] for r in actual_page.results] expected_results = [origin_url] assert sorted(results) == sorted(expected_results) _update_nb_visits(2) _check_min_nb_visits(2) # Works for = 2 _check_min_nb_visits(1) # Works for < 2 with pytest.raises(AssertionError): _check_min_nb_visits( 5 ) # No results for nb_visits >= 5 (should throw error) _update_nb_visits(5) _check_min_nb_visits(5) # Works for = 5 _check_min_nb_visits(3) # Works for < 5 def test_origin_last_visit_date_update_search(self): origin_url = "http://foobar.baz" self.search.origin_update([{"url": origin_url}]) self.search.flush() def _update_last_visit_date(last_visit_date): self.search.origin_update( [{"url": origin_url, "last_visit_date": last_visit_date}] ) self.search.flush() def _check_min_last_visit_date(min_last_visit_date): actual_page = self.search.origin_search( url_pattern=origin_url, min_last_visit_date=min_last_visit_date, ) assert actual_page.next_page_token is None results = [r["url"] for r in actual_page.results] expected_results = [origin_url] assert sorted(results) == sorted(expected_results) now = datetime.now(tz=timezone.utc).isoformat() now_minus_5_hours = ( datetime.now(tz=timezone.utc) - timedelta(hours=5) ).isoformat() now_plus_5_hours = ( datetime.now(tz=timezone.utc) + timedelta(hours=5) ).isoformat() _update_last_visit_date(now) _check_min_last_visit_date(now) # Works for = _check_min_last_visit_date(now_minus_5_hours) # Works for < with pytest.raises(AssertionError): _check_min_last_visit_date(now_plus_5_hours) # Fails for > _update_last_visit_date(now_plus_5_hours) _check_min_last_visit_date(now_plus_5_hours) # Works for = _check_min_last_visit_date(now) # Works for < def test_journal_client_origin_visit_status_permutation(self): NOW = datetime.now(tz=timezone.utc).isoformat() NOW_MINUS_5_HOURS = ( datetime.now(tz=timezone.utc) - timedelta(hours=5) ).isoformat() NOW_PLUS_5_HOURS = ( datetime.now(tz=timezone.utc) + timedelta(hours=5) ).isoformat() VISIT_STATUSES = [ { "url": "http://foobar.baz", "snapshot_id": "SNAPSHOT_1", "last_eventful_visit_date": NOW, }, { "url": "http://foobar.baz", "snapshot_id": "SNAPSHOT_1", "last_eventful_visit_date": NOW_MINUS_5_HOURS, }, { "url": "http://foobar.baz", "snapshot_id": "SNAPSHOT_2", "last_eventful_visit_date": NOW_PLUS_5_HOURS, }, ] for visit_statuses in permutations(VISIT_STATUSES, len(VISIT_STATUSES)): self.search.origin_update(visit_statuses) self.search.flush() origin_url = "http://foobar.baz" actual_page = self.search.origin_search( url_pattern=origin_url, min_last_eventful_visit_date=NOW_PLUS_5_HOURS, ) assert actual_page.next_page_token is None results = [r["url"] for r in actual_page.results] expected_results = [origin_url] assert sorted(results) == sorted(expected_results) self.reset() def test_origin_last_eventful_visit_date_update_search(self): origin_url = "http://foobar.baz" self.search.origin_update([{"url": origin_url}]) self.search.flush() def _update_last_eventful_visit_date(snapshot_id, last_eventful_visit_date): self.search.origin_update( [ { "url": origin_url, "snapshot_id": snapshot_id, "last_eventful_visit_date": last_eventful_visit_date, } ] ) self.search.flush() def _check_min_last_eventful_visit_date(min_last_eventful_visit_date): actual_page = self.search.origin_search( url_pattern=origin_url, min_last_eventful_visit_date=min_last_eventful_visit_date, ) assert actual_page.next_page_token is None results = [r["url"] for r in actual_page.results] expected_results = [origin_url] assert sorted(results) == sorted(expected_results) now = datetime.now(tz=timezone.utc).isoformat() now_minus_5_hours = ( datetime.now(tz=timezone.utc) - timedelta(hours=5) ).isoformat() now_plus_5_hours = ( datetime.now(tz=timezone.utc) + timedelta(hours=5) ).isoformat() snapshot_1 = "SNAPSHOT_1" snapshot_2 = "SNAPSHOT_2" _update_last_eventful_visit_date(snapshot_1, now) _check_min_last_eventful_visit_date(now) # Works for = _check_min_last_eventful_visit_date(now_minus_5_hours) # Works for < with pytest.raises(AssertionError): _check_min_last_eventful_visit_date(now_plus_5_hours) # Fails for > _update_last_eventful_visit_date( snapshot_1, now_plus_5_hours ) # Revisit(not eventful) same origin _check_min_last_eventful_visit_date( now ) # Should remain the same because recent visit wasn't eventful with pytest.raises(AssertionError): _check_min_last_eventful_visit_date(now_plus_5_hours) _update_last_eventful_visit_date( snapshot_2, now_plus_5_hours ) # Revisit(eventful) same origin _check_min_last_eventful_visit_date(now_plus_5_hours) # Works for = _check_min_last_eventful_visit_date(now) # Works for < def _test_origin_last_revision_release_date_update_search(self, date_type): origin_url = "http://foobar.baz" self.search.origin_update([{"url": origin_url}]) self.search.flush() def _update_last_revision_release_date(date): self.search.origin_update([{"url": origin_url, date_type: date,}]) self.search.flush() def _check_min_last_revision_release_date(date): actual_page = self.search.origin_search( url_pattern=origin_url, **{f"min_{date_type}": date}, ) assert actual_page.next_page_token is None results = [r["url"] for r in actual_page.results] expected_results = [origin_url] assert sorted(results) == sorted(expected_results) now = datetime.now(tz=timezone.utc).isoformat() now_minus_5_hours = ( datetime.now(tz=timezone.utc) - timedelta(hours=5) ).isoformat() now_plus_5_hours = ( datetime.now(tz=timezone.utc) + timedelta(hours=5) ).isoformat() _update_last_revision_release_date(now) _check_min_last_revision_release_date(now) _check_min_last_revision_release_date(now_minus_5_hours) with pytest.raises(AssertionError): _check_min_last_revision_release_date(now_plus_5_hours) _update_last_revision_release_date(now_plus_5_hours) _check_min_last_revision_release_date(now_plus_5_hours) _check_min_last_revision_release_date(now) def test_origin_last_revision_date_update_search(self): self._test_origin_last_revision_release_date_update_search( date_type="last_revision_date" ) def test_origin_last_release_date_update_search(self): self._test_origin_last_revision_release_date_update_search( date_type="last_revision_date" ) def test_origin_instrinsic_metadata_dates_filter_sorting_search(self): DATE_0 = "1999-06-28" DATE_1 = "2001-02-13" DATE_2 = "2005-10-02" ORIGINS = [ { "url": "http://foobar.0.com", "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "dateCreated": DATE_0, "dateModified": DATE_1, "datePublished": DATE_2, }, }, { "url": "http://foobar.1.com", "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "dateCreated": DATE_1, "dateModified": DATE_2, "datePublished": DATE_2, }, }, { "url": "http://foobar.2.com", "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "dateCreated": DATE_2, "dateModified": DATE_2, "datePublished": DATE_2, }, }, ] self.search.origin_update(ORIGINS) self.search.flush() def _check_results(origin_indices, sort_results=True, **kwargs): page = self.search.origin_search(url_pattern="foobar", **kwargs) results = [r["url"] for r in page.results] if sort_results: assert sorted(results) == sorted( [ORIGINS[index]["url"] for index in origin_indices] ) else: assert results == [ORIGINS[index]["url"] for index in origin_indices] _check_results(min_date_created=DATE_0, origin_indices=[0, 1, 2]) _check_results(min_date_created=DATE_1, origin_indices=[1, 2]) _check_results(min_date_created=DATE_2, origin_indices=[2]) _check_results(min_date_modified=DATE_0, origin_indices=[0, 1, 2]) _check_results(min_date_modified=DATE_1, origin_indices=[0, 1, 2]) _check_results(min_date_modified=DATE_2, origin_indices=[1, 2]) _check_results(min_date_published=DATE_0, origin_indices=[0, 1, 2]) _check_results(min_date_published=DATE_1, origin_indices=[0, 1, 2]) _check_results(min_date_published=DATE_2, origin_indices=[0, 1, 2]) # Sorting _check_results( sort_by=["-date_created"], origin_indices=[2, 1, 0], sort_results=False ) _check_results( sort_by=["date_created"], origin_indices=[0, 1, 2], sort_results=False ) def test_origin_keywords_search(self): ORIGINS = [ { "url": "http://foobar.1.com", "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "description": "Django is a backend framework for applications", "keywords": "django,backend,server,web,framework", }, }, { "url": "http://foobar.2.com", "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "description": "Native Android applications are fast", "keywords": "android,mobile,ui", }, }, { "url": "http://foobar.3.com", "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "description": "React framework helps you build web applications", "keywords": "react,web,ui", }, }, ] self.search.origin_update(ORIGINS) self.search.flush() def _check_results(keywords, origin_indices, sorting=False): page = self.search.origin_search(url_pattern="foobar", keywords=keywords) results = [r["url"] for r in page.results] if sorting: assert sorted(results) == sorted( [ORIGINS[index]["url"] for index in origin_indices] ) else: assert results == [ORIGINS[index]["url"] for index in origin_indices] _check_results(["build"], [2]) _check_results(["web"], [2, 0]) _check_results(["ui"], [1, 2]) # Following tests ensure that boosts work properly # Baseline: "applications" is common in all origin descriptions _check_results(["applications"], [1, 0, 2], True) # ORIGINS[0] has 'framework' in: keyword + description # ORIGINS[2] has 'framework' in: description # ORIGINS[1] has 'framework' in: None _check_results(["framework", "applications"], [0, 2, 1]) # ORIGINS[1] has 'ui' in: keyword # ORIGINS[1] has 'ui' in: keyword # ORIGINS[0] has 'ui' in: None _check_results(["applications", "ui"], [1, 2, 0]) # ORIGINS[2] has 'web' in: keyword + description # ORIGINS[0] has 'web' in: keyword # ORIGINS[1] has 'web' in: None _check_results(["web", "applications"], [2, 0, 1]) def test_origin_sort_by_search(self): now = datetime.now(tz=timezone.utc).isoformat() now_minus_5_hours = ( datetime.now(tz=timezone.utc) - timedelta(hours=5) ).isoformat() now_plus_5_hours = ( datetime.now(tz=timezone.utc) + timedelta(hours=5) ).isoformat() ORIGINS = [ { "url": "http://foobar.1.com", "nb_visits": 1, "last_visit_date": now_minus_5_hours, }, {"url": "http://foobar.2.com", "nb_visits": 2, "last_visit_date": now,}, { "url": "http://foobar.3.com", "nb_visits": 3, "last_visit_date": now_plus_5_hours, }, ] self.search.origin_update(ORIGINS) self.search.flush() def _check_results(sort_by, origins): page = self.search.origin_search(url_pattern="foobar", sort_by=sort_by) results = [r["url"] for r in page.results] assert results == [origin["url"] for origin in origins] _check_results(["nb_visits"], ORIGINS) _check_results(["-nb_visits"], ORIGINS[::-1]) _check_results(["last_visit_date"], ORIGINS) _check_results(["-last_visit_date"], ORIGINS[::-1]) _check_results(["nb_visits", "-last_visit_date"], ORIGINS) _check_results(["-last_visit_date", "nb_visits"], ORIGINS[::-1]) def test_origin_instrinsic_metadata_license_search(self): ORIGINS = [ { "url": "http://foobar.1.com", "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "description": "foo bar", "license": "https://spdx.org/licenses/MIT", }, }, { "url": "http://foobar.2.com", "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "description": "foo bar", "license": "BSD-3-Clause", }, }, ] self.search.origin_update(ORIGINS) self.search.flush() def _check_results(licenses, origin_indices): page = self.search.origin_search(url_pattern="foobar", licenses=licenses) results = [r["url"] for r in page.results] assert sorted(results) == sorted( [ORIGINS[i]["url"] for i in origin_indices] ) _check_results(["MIT"], [0]) _check_results(["bsd"], [1]) _check_results(["mit", "3-Clause"], [0, 1]) def test_origin_instrinsic_metadata_programming_language_search(self): ORIGINS = [ { "url": "http://foobar.1.com", "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "description": "foo bar", "programmingLanguage": "python", }, }, { "url": "http://foobar.2.com", "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "description": "foo bar", "programmingLanguage": "javascript", }, }, ] self.search.origin_update(ORIGINS) self.search.flush() def _check_results(programming_languages, origin_indices): page = self.search.origin_search( url_pattern="foobar", programming_languages=programming_languages ) results = [r["url"] for r in page.results] assert sorted(results) == sorted( [ORIGINS[i]["url"] for i in origin_indices] ) _check_results(["python"], [0]) _check_results(["javascript"], [1]) _check_results(["python", "javascript"], [0, 1]) def test_origin_instrinsic_metadata_multiple_field_search(self): ORIGINS = [ { "url": "http://foobar.1.com", "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "description": "foo bar 1", "programmingLanguage": "python", "license": "https://spdx.org/licenses/MIT", }, }, { "url": "http://foobar.2.com", "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "description": "foo bar 2", "programmingLanguage": ["javascript", "html", "css"], "license": [ "https://spdx.org/licenses/CC-BY-1.0", "https://spdx.org/licenses/Apache-1.0", ], }, }, { "url": "http://foobar.3.com", "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "description": "foo bar 3", "programmingLanguage": ["Cpp", "c"], "license": "https://spdx.org/licenses/LGPL-2.0-only", }, }, ] self.search.origin_update(ORIGINS) self.search.flush() def _check_result(programming_languages, licenses, origin_indices): page = self.search.origin_search( url_pattern="foobar", programming_languages=programming_languages, licenses=licenses, ) results = [r["url"] for r in page.results] assert sorted(results) == sorted( [ORIGINS[i]["url"] for i in origin_indices] ) _check_result(["javascript"], ["CC"], [1]) _check_result(["css"], ["CC"], [1]) _check_result(["css"], ["CC", "apache"], [1]) _check_result(["python", "javascript"], ["MIT"], [0]) _check_result(["c", "python"], ["LGPL", "mit"], [2, 0]) def test_origin_update_with_no_visit_types(self): """ Update an origin with visit types first then with no visit types, check origin can still be searched with visit types afterwards. """ origin_url = "http://foobar.baz" self.search.origin_update([{"url": origin_url, "visit_types": ["git"]}]) self.search.flush() self.search.origin_update([{"url": origin_url}]) self.search.flush() actual_page = self.search.origin_search(url_pattern="http", visit_types=["git"]) assert actual_page.next_page_token is None results = [r["url"] for r in actual_page.results] expected_results = [origin_url] assert results == expected_results def test_origin_intrinsic_metadata_description(self): origin1_nothin = {"url": "http://origin1"} origin2_foobar = {"url": "http://origin2"} origin3_barbaz = {"url": "http://origin3"} self.search.origin_update( [ {**origin1_nothin, "intrinsic_metadata": {},}, { **origin2_foobar, "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "description": "foo bar", }, }, { **origin3_barbaz, "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "description": "bar baz", }, }, ] ) self.search.flush() actual_page = self.search.origin_search(metadata_pattern="foo") assert actual_page.next_page_token is None assert actual_page.results == [origin2_foobar] actual_page = self.search.origin_search(metadata_pattern="foo bar") assert actual_page.next_page_token is None assert actual_page.results == [origin2_foobar] actual_page = self.search.origin_search(metadata_pattern="bar baz") assert actual_page.next_page_token is None assert actual_page.results == [origin3_barbaz] def test_origin_intrinsic_metadata_all_terms(self): origin1_foobarfoobar = {"url": "http://origin1"} origin3_foobarbaz = {"url": "http://origin2"} self.search.origin_update( [ { **origin1_foobarfoobar, "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "description": "foo bar foo bar", }, }, { **origin3_foobarbaz, "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "description": "foo bar baz", }, }, ] ) self.search.flush() actual_page = self.search.origin_search(metadata_pattern="foo bar baz") assert actual_page.next_page_token is None assert actual_page.results == [origin3_foobarbaz] def test_origin_intrinsic_metadata_long_description(self): """Checks ElasticSearch does not try to store large values untokenize, which would be inefficient and crash it with: Document contains at least one immense term in field="intrinsic_metadata.http://schema.org/description.@value" (whose UTF8 encoding is longer than the max length 32766), all of which were skipped. """ # noqa origin1 = {"url": "http://origin1"} self.search.origin_update( [ { **origin1, "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "description": " ".join(f"foo{i}" for i in range(100000)), }, }, ] ) self.search.flush() actual_page = self.search.origin_search(metadata_pattern="foo42") assert actual_page.next_page_token is None assert actual_page.results == [origin1] def test_origin_intrinsic_metadata_matches_cross_fields(self): """Checks the backend finds results even if the two words in the query are each in a different field.""" origin1 = {"url": "http://origin1"} self.search.origin_update( [ { **origin1, "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "description": "foo bar", "author": "John Doe", }, }, ] ) self.search.flush() actual_page = self.search.origin_search(metadata_pattern="foo John") assert actual_page.next_page_token is None assert actual_page.results == [origin1] def test_origin_intrinsic_metadata_nested(self): origin1_nothin = {"url": "http://origin1"} origin2_foobar = {"url": "http://origin2"} origin3_barbaz = {"url": "http://origin3"} self.search.origin_update( [ {**origin1_nothin, "intrinsic_metadata": {},}, { **origin2_foobar, "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "keywords": ["foo", "bar"], }, }, { **origin3_barbaz, "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "keywords": ["bar", "baz"], }, }, ] ) self.search.flush() actual_page = self.search.origin_search(metadata_pattern="foo") assert actual_page.next_page_token is None assert actual_page.results == [origin2_foobar] actual_page = self.search.origin_search(metadata_pattern="foo bar") assert actual_page.next_page_token is None assert actual_page.results == [origin2_foobar] actual_page = self.search.origin_search(metadata_pattern="bar baz") assert actual_page.next_page_token is None assert actual_page.results == [origin3_barbaz] def test_origin_intrinsic_metadata_inconsistent_type(self): """Checks the same field can have a concrete value, an object, or an array in different documents.""" origin1_foobar = {"url": "http://origin1"} origin2_barbaz = {"url": "http://origin2"} origin3_bazqux = {"url": "http://origin3"} self.search.origin_update( [ { **origin1_foobar, "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "author": {"familyName": "Foo", "givenName": "Bar",}, }, }, ] ) self.search.flush() self.search.origin_update( [ { **origin2_barbaz, "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "author": "Bar Baz", }, }, { **origin3_bazqux, "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "author": ["Baz", "Qux"], }, }, ] ) self.search.flush() actual_page = self.search.origin_search(metadata_pattern="bar") assert actual_page.next_page_token is None results = [r["url"] for r in actual_page.results] expected_results = [o["url"] for o in [origin2_barbaz, origin1_foobar]] assert sorted(results) == sorted(expected_results) actual_page = self.search.origin_search(metadata_pattern="baz") assert actual_page.next_page_token is None assert actual_page.results == [origin2_barbaz, origin3_bazqux] actual_page = self.search.origin_search(metadata_pattern="foo") assert actual_page.next_page_token is None assert actual_page.results == [origin1_foobar] actual_page = self.search.origin_search(metadata_pattern="bar baz") assert actual_page.next_page_token is None assert actual_page.results == [origin2_barbaz] actual_page = self.search.origin_search(metadata_pattern="qux") assert actual_page.next_page_token is None assert actual_page.results == [origin3_bazqux] actual_page = self.search.origin_search(metadata_pattern="baz qux") assert actual_page.next_page_token is None assert actual_page.results == [origin3_bazqux] actual_page = self.search.origin_search(metadata_pattern="foo bar") assert actual_page.next_page_token is None assert actual_page.results == [origin1_foobar] def test_origin_intrinsic_metadata_string_mapping(self): """Checks inserting a date-like in a field does not update the mapping to require every document uses a date in that field; or that search queries use a date either. Likewise for numeric and boolean fields.""" origin1 = {"url": "http://origin1"} origin2 = {"url": "http://origin2"} self.search.origin_update( [ { **origin1, "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "dateCreated": "2021-02-18T10:16:52", "version": "1.0", "isAccessibleForFree": True, }, } ] ) self.search.flush() self.search.origin_update( [ { **origin2, "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "dateCreated": "a long time ago", "address": "in a galaxy far, far away", "version": "a new hope", "isAccessibleForFree": "it depends", }, }, ] ) self.search.flush() actual_page = self.search.origin_search(metadata_pattern="1.0") assert actual_page.next_page_token is None assert actual_page.results == [origin1] actual_page = self.search.origin_search(metadata_pattern="long") assert actual_page.next_page_token is None assert ( actual_page.results == [] ) # "%Y-%m-%d" not followed, so value is rejected actual_page = self.search.origin_search(metadata_pattern="true") assert actual_page.next_page_token is None assert actual_page.results == [origin1] actual_page = self.search.origin_search(metadata_pattern="it depends") assert actual_page.next_page_token is None assert actual_page.results == [origin2] def test_origin_intrinsic_metadata_update(self): origin = {"url": "http://origin1"} origin_data = { **origin, "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "author": "John Doe", }, } self.search.origin_update([origin_data]) self.search.flush() actual_page = self.search.origin_search(metadata_pattern="John") assert actual_page.next_page_token is None assert actual_page.results == [origin] origin_data["intrinsic_metadata"]["author"] = "Jane Doe" self.search.origin_update([origin_data]) self.search.flush() actual_page = self.search.origin_search(metadata_pattern="Jane") assert actual_page.next_page_token is None assert actual_page.results == [origin] # TODO: add more tests with more codemeta terms # TODO: add more tests with edge cases @settings(deadline=None) @given(strategies.integers(min_value=1, max_value=4)) def test_origin_url_paging(self, limit): # TODO: no hypothesis origin1_foo = {"url": "http://origin1/foo"} origin2_foobar = {"url": "http://origin2/foo/bar"} origin3_foobarbaz = {"url": "http://origin3/foo/bar/baz"} self.reset() self.search.origin_update([origin1_foo, origin2_foobar, origin3_foobarbaz]) self.search.flush() results = stream_results( self.search.origin_search, url_pattern="foo bar baz", limit=limit ) results = [res["url"] for res in results] expected_results = [o["url"] for o in [origin3_foobarbaz]] assert sorted(results[0 : len(expected_results)]) == sorted(expected_results) results = stream_results( self.search.origin_search, url_pattern="foo bar", limit=limit ) results = [res["url"] for res in results] expected_results = [o["url"] for o in [origin2_foobar, origin3_foobarbaz]] assert sorted(results[0 : len(expected_results)]) == sorted(expected_results) results = stream_results( self.search.origin_search, url_pattern="foo", limit=limit ) results = [res["url"] for res in results] expected_results = [ o["url"] for o in [origin1_foo, origin2_foobar, origin3_foobarbaz] ] assert sorted(results[0 : len(expected_results)]) == sorted(expected_results) @settings(deadline=None) @given(strategies.integers(min_value=1, max_value=4)) def test_origin_intrinsic_metadata_paging(self, limit): # TODO: no hypothesis origin1_foo = {"url": "http://origin1"} origin2_foobar = {"url": "http://origin2"} origin3_foobarbaz = {"url": "http://origin3"} self.reset() self.search.origin_update( [ { **origin1_foo, "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "keywords": ["foo"], }, }, { **origin2_foobar, "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "keywords": ["foo", "bar"], }, }, { **origin3_foobarbaz, "intrinsic_metadata": { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "keywords": ["foo", "bar", "baz"], }, }, ] ) self.search.flush() results = stream_results( self.search.origin_search, metadata_pattern="foo bar baz", limit=limit ) assert list(results) == [origin3_foobarbaz] results = stream_results( self.search.origin_search, metadata_pattern="foo bar", limit=limit ) assert list(results) == [origin2_foobar, origin3_foobarbaz] results = stream_results( self.search.origin_search, metadata_pattern="foo", limit=limit ) assert list(results) == [origin1_foo, origin2_foobar, origin3_foobarbaz] def test_search_blocklisted_results(self): origin1 = {"url": "http://origin1"} origin2 = {"url": "http://origin2", "blocklisted": True} self.search.origin_update([origin1, origin2]) self.search.flush() actual_page = self.search.origin_search(url_pattern="origin") assert actual_page.next_page_token is None assert actual_page.results == [origin1] def test_search_blocklisted_update(self): origin1 = {"url": "http://origin1"} self.search.origin_update([origin1]) self.search.flush() result_page = self.search.origin_search(url_pattern="origin") assert result_page.next_page_token is None assert result_page.results == [origin1] self.search.origin_update([{**origin1, "blocklisted": True}]) self.search.flush() result_page = self.search.origin_search(url_pattern="origin") assert result_page.next_page_token is None assert result_page.results == [] self.search.origin_update( [{**origin1, "has_visits": True, "visit_types": ["git"]}] ) self.search.flush() result_page = self.search.origin_search(url_pattern="origin") assert result_page.next_page_token is None assert result_page.results == [] def test_filter_keyword_in_filter(self): origin1 = { "url": "foo language in ['foo baz'] bar", } self.search.origin_update([origin1]) self.search.flush() result_page = self.search.origin_search(url_pattern="language in ['foo bar']") assert result_page.next_page_token is None assert result_page.results == [origin1] result_page = self.search.origin_search(url_pattern="baaz") assert result_page.next_page_token is None assert result_page.results == [] + + def test_visit_types_count(self): + assert self.search.visit_types_count() == Counter() + + origins = [ + {"url": "http://foobar.baz", "visit_types": ["git"], "blocklisted": True} + ] + + for idx, visit_type in enumerate(["git", "hg", "svn"]): + for i in range(idx + 1): + origins.append( + { + "url": f"http://{visit_type}.foobar.baz.{i}", + "visit_types": [visit_type], + } + ) + self.search.origin_update(origins) + self.search.flush() + + assert self.search.visit_types_count() == Counter(git=1, hg=2, svn=3) diff --git a/swh/search/translator.py b/swh/search/translator.py index 03c6344..4229bde 100644 --- a/swh/search/translator.py +++ b/swh/search/translator.py @@ -1,301 +1,307 @@ +# Copyright (C) 2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import logging import os +import tempfile from pkg_resources import resource_filename from tree_sitter import Language, Parser from swh.search.utils import get_expansion, unescape +logger = logging.getLogger(__name__) + class Translator: RANGE_OPERATOR_MAP = { ">": "gt", "<": "lt", ">=": "gte", "<=": "lte", } def __init__(self): - ql_rel_paths = [ - "static/swh_ql.so", # installed - "../../query_language/swh_ql.so", # development - ] - for ql_rel_path in ql_rel_paths: - ql_path = resource_filename("swh.search", ql_rel_path) - if os.path.exists(ql_path): - break - else: - assert False, "swh_ql.so was not found in any of the expected paths" + ql_path = resource_filename("swh.search", "static/swh_ql.so") + if not os.path.exists(ql_path): + logging.info("%s does not exist, building in temporary directory", ql_path) + self._build_dir = tempfile.TemporaryDirectory(prefix="swh.search-build") + source_path = resource_filename("swh.search", "query_language") + ql_path = os.path.join(self._build_dir.name, "swh_ql.so") + Language.build_library(ql_path, [source_path]) search_ql = Language(ql_path, "swh_search_ql") self.parser = Parser() self.parser.set_language(search_ql) self.query = "" def parse_query(self, query): self.query = query tree = self.parser.parse(query.encode("utf8")) self.query_node = tree.root_node if self.query_node.has_error: raise Exception("Invalid query") return self._traverse(self.query_node) def _traverse(self, node): if len(node.children) == 3 and node.children[1].type == "filters": # filters => ( filters ) return self._traverse(node.children[1]) # Go past the () brackets if node.type == "query": result = {} for child in node.children: # query => filters sort_by limit result[child.type] = self._traverse(child) return result if node.type == "filters": if len(node.children) == 1: # query => filters # filters => filters # filters => filter # Current node is just a wrapper, so go one level deep return self._traverse(node.children[0]) if len(node.children) == 3: # filters => filters conj_op filters filters1 = self._traverse(node.children[0]) conj_op = self._get_value(node.children[1]) filters2 = self._traverse(node.children[2]) if conj_op == "and": # "must" is equivalent to "AND" return {"bool": {"must": [filters1, filters2]}} if conj_op == "or": # "should" is equivalent to "OR" return {"bool": {"should": [filters1, filters2]}} if node.type == "filter": filter_category = node.children[0] return self._parse_filter(filter_category) if node.type == "sortBy": return self._parse_filter(node) if node.type == "limit": return self._parse_filter(node) return Exception( f"Unknown node type ({node.type}) " f"or unexpected number of children ({node.children})" ) def _get_value(self, node): if ( len(node.children) > 0 and node.children[0].type == "[" and node.children[-1].type == "]" ): # array return [self._get_value(child) for child in node.children if child.is_named] start = node.start_point[1] end = node.end_point[1] value = self.query[start:end] if len(value) > 1 and ( (value[0] == "'" and value[-1] == "'") or (value[0] and value[-1] == '"') ): return unescape(value[1:-1]) if node.type in ["number", "numberVal"]: return int(value) return unescape(value) def _parse_filter(self, filter): if filter.type == "boundedListFilter": filter = filter.children[0] children = filter.children assert len(children) == 3 category = filter.type name, op, value = [self._get_value(child) for child in children] if category == "patternFilter": if name == "origin": return { "multi_match": { "query": value, "type": "bool_prefix", "operator": "and", "fields": [ "url.as_you_type", "url.as_you_type._2gram", "url.as_you_type._3gram", ], } } elif name == "metadata": return { "nested": { "path": "intrinsic_metadata", "query": { "multi_match": { "query": value, # Makes it so that the "foo bar" query returns # documents which contain "foo" in a field and "bar" # in a different field "type": "cross_fields", # All keywords must be found in a document for it to # be considered a match. # TODO: allow missing keywords? "operator": "and", # Searches on all fields of the intrinsic_metadata dict, # recursively. "fields": ["intrinsic_metadata.*"], # date{Created,Modified,Published} are of type date "lenient": True, } }, } } if category == "booleanFilter": if name == "visited": return {"term": {"has_visits": value == "true"}} if category == "numericFilter": if name == "visits": if op in ["=", "!="]: return { "bool": { ("must" if op == "=" else "must_not"): [ {"range": {"nb_visits": {"gte": value, "lte": value}}} ] } } else: return { "range": {"nb_visits": {self.RANGE_OPERATOR_MAP[op]: value}} } if category == "visitTypeFilter": if name == "visit_type": return {"terms": {"visit_types": value}} if category == "unboundedListFilter": value_array = value if name == "keyword": return { "nested": { "path": "intrinsic_metadata", "query": { "multi_match": { "query": " ".join(value_array), "fields": [ get_expansion("keywords", ".") + "^2", get_expansion("descriptions", "."), # "^2" boosts an origin's score by 2x # if it the queried keywords are # found in its intrinsic_metadata.keywords ], } }, } } elif name in ["language", "license"]: name_mapping = { "language": "programming_languages", "license": "licenses", } name = name_mapping[name] return { "nested": { "path": "intrinsic_metadata", "query": { "bool": { "should": [ {"match": {get_expansion(name, "."): val}} for val in value_array ], } }, } } if category == "dateFilter": if name in ["created", "modified", "published"]: if op in ["=", "!="]: return { "nested": { "path": "intrinsic_metadata", "query": { "bool": { ("must" if op == "=" else "must_not"): [ { "range": { get_expansion(f"date_{name}", "."): { "gte": value, "lte": value, } } } ], } }, } } return { "nested": { "path": "intrinsic_metadata", "query": { "bool": { "must": [ { "range": { get_expansion(f"date_{name}", "."): { self.RANGE_OPERATOR_MAP[op]: value, } } } ], } }, } } else: if op in ["=", "!="]: return { "bool": { ("must" if op == "=" else "must_not"): [ { "range": { f"{name}_date": {"gte": value, "lte": value,} } } ], } } return { "range": { f"{name}_date": { self.RANGE_OPERATOR_MAP[op]: value.replace("Z", "+00:00"), } } } if category == "sortBy": return value if category == "limit": return value raise Exception(f"Unknown filter {category}.{name}")