Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7122958
D5990.id21645.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
23 KB
Subscribers
None
D5990.id21645.diff
View Options
diff --git a/.gitignore b/.gitignore
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,5 @@
.mypy_cache/
.hypothesis/
.vscode/
+node_modules/
+generated/
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -0,0 +1 @@
+Kumar Shivendu
diff --git a/Makefile.local b/Makefile.local
new file mode 100644
--- /dev/null
+++ b/Makefile.local
@@ -0,0 +1,15 @@
+YARN ?= yarn
+PYTHON ?= python3
+
+
+ts-install: package.json
+ $(YARN) install
+
+ts-build:
+ cd search_language && $(PYTHON) build.py
+
+ts-test:
+ $(YARN) test
+
+ts-dev:
+ $(YARN) dev
diff --git a/mypy.ini b/mypy.ini
--- a/mypy.ini
+++ b/mypy.ini
@@ -19,3 +19,6 @@
[mypy-pytest.*]
ignore_missing_imports = True
+
+[mypy-tree_sitter.*]
+ignore_missing_imports = True
diff --git a/package.json b/package.json
new file mode 100644
--- /dev/null
+++ b/package.json
@@ -0,0 +1,33 @@
+{
+ "name": "swh-search-query-language-parser",
+ "version": "1.0.0",
+ "description": "Parser for Software Heritage archive search query language",
+ "scripts": {
+ "generate": "cd search_language && tree-sitter generate",
+ "dev": "cd search_language && tree-sitter generate && tree-sitter parse sample_query",
+ "test": "cd search_language && tree-sitter generate && tree-sitter test",
+ "repl": "cd search_language && tree-sitter build-wasm && tree-sitter playground"
+ },
+ "repository": {
+ "type": "git",
+ "url": "https://forge.softwareheritage.org/source/swh-search.git"
+ },
+ "keywords": [
+ "swh",
+ "Software Heritage",
+ "treesitter",
+ "parser",
+ "custom",
+ "search",
+ "query",
+ "language"
+ ],
+ "author": "The Software Heritage developers",
+ "license": "GPL-3.0-only",
+ "dependencies": {
+ "nan": "^2.14.2"
+ },
+ "devDependencies": {
+ "tree-sitter-cli": "^0.20.0"
+ }
+}
diff --git a/pyproject.toml b/pyproject.toml
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,3 +9,6 @@
ensure_newline_before_comments = true
line_length = 88
force_sort_within_sections = true
+
+[build-system]
+requires = ["setuptools", "wheel", "tree_sitter"]
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@
click
elasticsearch>=7.0.0,<8.0.0
typing-extensions
+tree_sitter
diff --git a/search_language/.gitignore b/search_language/.gitignore
new file mode 100644
--- /dev/null
+++ b/search_language/.gitignore
@@ -0,0 +1,6 @@
+src
+build
+bindings
+binding.gyp
+Cargo.toml
+package.json
diff --git a/search_language/build.py b/search_language/build.py
new file mode 100644
--- /dev/null
+++ b/search_language/build.py
@@ -0,0 +1,3 @@
+from tree_sitter import Language
+
+Language.build_library("../generated/swh_ql.so", ["."])
diff --git a/search_language/grammar.js b/search_language/grammar.js
new file mode 100644
--- /dev/null
+++ b/search_language/grammar.js
@@ -0,0 +1,132 @@
+// Copyright (C) 2019-2021 The Software Heritage developers
+// See the AUTHORS file at the top-level directory of this distribution
+// License: GNU General Public License version 3, or any later version
+// See top-level LICENSE file for more information
+
+module.exports = grammar({
+ name: 'swh_search_query_language',
+
+ rules: {
+ query: $ => repeat(
+ choice(
+ $.patternFilter,
+ $.booleanFilter,
+ $.numericFilter,
+ $.unboundedListFilter,
+ $.boundedListFilter,
+ $.dateFilter,
+ $.limitFilter
+ )
+ ),
+
+ patternFilter: $ => seq($.patternField, $.patternOp, $.patternVal),
+ patternField: $ => token(choice('url', 'metadata')),
+ patternOp: $ => $.colonOp,
+ patternVal: $ => $.string,
+
+ booleanFilter: $ => seq($.booleanField, $.booleanOp, $.booleanVal),
+ booleanField: $ => token(choice('with_visit')),
+ booleanOp: $ => $.colonOp,
+ booleanVal: $ => choice($.booleanTrue, $.booleanFalse),
+
+ numericFilter: $ => seq($.numericField, $.numericOp, $.numberVal),
+ numericField: $ => token(choice('nb_visits')),
+ numericOp: $ => $.rangeOp,
+ numberVal: $ => $.number,
+
+ boundedListFilter: $ => choice($.visitTypeFilter, $.sortByFilter),
+
+ visitTypeFilter: $ => seq($.visitTypeField, $.visitTypeOp, $.visitTypeVal),
+ visitTypeField: $ => token(choice('visit_types')),
+ visitTypeOp: $ => $.colonOp,
+ visitTypeVal: $ => createArray($.visitTypeOptions),
+ visitTypeOptions: $ => choice(
+ "any",
+ "cran",
+ "deb",
+ "deposit",
+ "ftp",
+ "hg",
+ "git",
+ "nixguix",
+ "npm",
+ "pypi",
+ "svn",
+ "tar"
+ ),
+
+ sortByFilter: $ => seq($.sortByField, $.sortByOp, $.sortByVal),
+ sortByField: $ => token(choice('sort_by')),
+ sortByOp: $ => $.colonOp,
+ sortByVal: $ => createArray($.sortByOptions),
+ sortByOptions: $ => choice(
+ 'nb_visits',
+ 'last_visit_date',
+ 'last_eventful_visit_date',
+ 'last_revision_date',
+ 'last_release_date',
+ 'date_created',
+ 'date_modified',
+ 'date_published'
+ ),
+
+ unboundedListFilter: $ => seq($.listField, $.listOp, $.listVal),
+ listField: $ => token(choice('programming_languages', 'licenses', 'keywords')),
+ listOp: $ => $.choiceOp,
+ listVal: $ => createArray($.words), // Needs to be fixed !!
+ // currently doesn't accept: licenses in ["MIT", BSD,] because of BSD (no inverted comma)
+
+
+ dateFilter: $ => seq($.dateField, $.dateOp, $.dateVal),
+ dateField: $ => token(choice(
+ 'last_visit_date',
+ 'last_eventful_visit_date',
+ 'last_revision_date',
+ 'last_release_date',
+ 'date_created',
+ 'date_modified',
+ 'date_published'
+ )),
+ dateOp: $ => $.rangeOp,
+ dateVal: $ => $.dateWithOptionalTime,
+
+ limitFilter: $ => seq('limit', $.colonOp, $.number),
+
+
+ rangeOp: $ => token(choice('<', '<=', '=', '!=', '>=', '>')),
+ colonOp: $ => token(':'),
+ choiceOp: $ => token(choice('in', 'not in')),
+
+ dateWithOptionalTime: $ => /\d{4}[-]\d{2}[-]\d{2}(\s|T)*(\d{2}:\d{2}(:\d{2})?)?/,
+
+ string: $ => choice(wrapWithInvertedComma($.words), $.word),
+ number: $ => /\d+/,
+ booleanTrue: $ => "true",
+ booleanFalse: $ => "false",
+
+ words: $ => repeat1(seq($.word)),
+ word: $ => /[^\s"']+/,
+
+ }
+});
+
+function commaSep1(rule) {
+ return seq(rule, repeat(seq(",", optional(rule))))
+}
+
+function commaSep(rule) {
+ return optional(commaSep1(rule))
+}
+
+function createArray(rule) {
+ return seq('[', commaSep(
+ field('array_member', (choice(wrapWithInvertedComma(rule), rule)))
+ ), ']')
+}
+
+function wrapWithInvertedComma(rule) {
+ return choice(
+ seq("'", rule, "'"),
+ seq('"', rule, '"')
+ )
+}
diff --git a/search_language/sample_query b/search_language/sample_query
new file mode 100644
--- /dev/null
+++ b/search_language/sample_query
@@ -0,0 +1,6 @@
+url : "github.com/django/Django" metadata : "Repo description"
+with_visit : true with_visit : false
+nb_visits >= 0 nb_visits = 10 nb_visits != 256 nb_visits < 1000
+sort_by : ["nb_visits", "last_revision_date nb_visits", last_release_date,]
+last_release_date < 2001-02-13 15:54:21
+licenses in ["MIT", "BSD X", "Apache XZY ABC", ]
diff --git a/search_language/test/corpus/booleanFilters.txt b/search_language/test/corpus/booleanFilters.txt
new file mode 100644
--- /dev/null
+++ b/search_language/test/corpus/booleanFilters.txt
@@ -0,0 +1,42 @@
+==================
+boolean filter
+==================
+
+with_visit: true
+with_visit: false
+with_visit :"true"
+with_visit : 'false'
+
+---
+
+
+(query [0, 0] - [4, 0]
+ (booleanFilter [0, 0] - [0, 16]
+ (booleanField [0, 0] - [0, 10])
+ (booleanOp [0, 10] - [0, 11]
+ (colonOp [0, 10] - [0, 11]))
+ (booleanVal [0, 12] - [0, 16]
+ (booleanTrue [0, 12] - [0, 16])))
+ (booleanFilter [1, 0] - [1, 17]
+ (booleanField [1, 0] - [1, 10])
+ (booleanOp [1, 10] - [1, 11]
+ (colonOp [1, 10] - [1, 11]))
+ (booleanVal [1, 12] - [1, 17]
+ (booleanFalse [1, 12] - [1, 17])))
+ (booleanFilter [2, 0] - [2, 17]
+ (booleanField [2, 0] - [2, 10])
+ (booleanOp [2, 11] - [2, 12]
+ (colonOp [2, 11] - [2, 12]))
+ (ERROR [2, 12] - [2, 13])
+ (booleanVal [2, 13] - [2, 17]
+ (booleanTrue [2, 13] - [2, 17])))
+ (ERROR [2, 17] - [2, 18])
+ (booleanFilter [3, 0] - [3, 19]
+ (booleanField [3, 0] - [3, 10])
+ (booleanOp [3, 11] - [3, 12]
+ (colonOp [3, 11] - [3, 12]))
+ (ERROR [3, 13] - [3, 14])
+ (booleanVal [3, 14] - [3, 19]
+ (booleanFalse [3, 14] - [3, 19])))
+ (ERROR [3, 19] - [3, 20]))
+sample_query 0 ms (ERROR [2, 12] - [2, 13])
diff --git a/search_language/test/corpus/boundedListFilters.txt b/search_language/test/corpus/boundedListFilters.txt
new file mode 100644
--- /dev/null
+++ b/search_language/test/corpus/boundedListFilters.txt
@@ -0,0 +1,51 @@
+==================
+sort_by filter
+==================
+
+sort_by : ["nb_visits", "last_revision_date", last_release_date]
+sort_by :[some_invalid_field, "last_eventful_visit_date", "date_created"]
+sort_by: ["date_published", 'date_modified']
+sort_by:['date_published', "last_visit_date"]
+
+---
+
+
+(query [0, 0] - [4, 0]
+ (boundedListFilter [0, 0] - [0, 64]
+ (sortByFilter [0, 0] - [0, 64]
+ (sortByField [0, 0] - [0, 7])
+ (sortByOp [0, 8] - [0, 9]
+ (colonOp [0, 8] - [0, 9]))
+ (sortByVal [0, 10] - [0, 64]
+ array_member: (sortByOptions [0, 12] - [0, 21])
+ array_member: (sortByOptions [0, 25] - [0, 43])
+ array_member: (sortByOptions [0, 46] - [0, 63]))))
+ (boundedListFilter [1, 0] - [1, 73]
+ (sortByFilter [1, 0] - [1, 73]
+ (sortByField [1, 0] - [1, 7])
+ (sortByOp [1, 8] - [1, 9]
+ (colonOp [1, 8] - [1, 9]))
+ (sortByVal [1, 9] - [1, 73]
+ (ERROR [1, 10] - [1, 29]
+ (ERROR [1, 10] - [1, 15])
+ (choiceOp [1, 15] - [1, 17])
+ (ERROR [1, 17] - [1, 28]))
+ array_member: (sortByOptions [1, 31] - [1, 55])
+ array_member: (sortByOptions [1, 59] - [1, 71]))))
+ (boundedListFilter [2, 0] - [2, 44]
+ (sortByFilter [2, 0] - [2, 44]
+ (sortByField [2, 0] - [2, 7])
+ (sortByOp [2, 7] - [2, 8]
+ (colonOp [2, 7] - [2, 8]))
+ (sortByVal [2, 9] - [2, 44]
+ array_member: (sortByOptions [2, 11] - [2, 25])
+ array_member: (sortByOptions [2, 29] - [2, 42]))))
+ (boundedListFilter [3, 0] - [3, 45]
+ (sortByFilter [3, 0] - [3, 45]
+ (sortByField [3, 0] - [3, 7])
+ (sortByOp [3, 7] - [3, 8]
+ (colonOp [3, 7] - [3, 8]))
+ (sortByVal [3, 8] - [3, 45]
+ array_member: (sortByOptions [3, 10] - [3, 24])
+ array_member: (sortByOptions [3, 28] - [3, 43])))))
+sample_query 0 ms (ERROR [1, 10] - [1, 29])
diff --git a/search_language/test/corpus/combinations.txt b/search_language/test/corpus/combinations.txt
new file mode 100644
--- /dev/null
+++ b/search_language/test/corpus/combinations.txt
@@ -0,0 +1,82 @@
+==================
+Combinations
+==================
+
+url = "github.com/django/Django" metadata = something in metadata
+with_visit = true with_visit = false
+nb_visits >= 0 nb_visits = 10 nb_visits != 256 nb_visits < 1000
+sort_by = ["nb_visits", "last_revision_date", last_release_date, ]
+last_release_date < 2001-02-13 15:54:21
+licenses in ["MIT","BSD X","Apache", ]
+
+---
+
+(query [0, 0] - [6, 0]
+ (patternFilter [0, 0] - [0, 32]
+ (patternField [0, 0] - [0, 3])
+ (patternOp [0, 3] - [0, 6])
+ (patternVal [0, 6] - [0, 32]
+ (string [0, 6] - [0, 32]
+ (string_content [0, 7] - [0, 31]))))
+ (patternFilter [0, 33] - [0, 65]
+ (patternField [0, 33] - [0, 41])
+ (patternOp [0, 41] - [0, 44])
+ (patternVal [0, 44] - [0, 65]
+ (string [0, 44] - [0, 65]
+ (string_content [0, 44] - [0, 65]))))
+ (booleanFilter [1, 0] - [1, 17]
+ (booleanField [1, 0] - [1, 10])
+ (booleanOp [1, 10] - [1, 13])
+ (booleanVal [1, 13] - [1, 17]
+ (booleanTrue [1, 13] - [1, 17])))
+ (booleanFilter [1, 18] - [1, 36]
+ (booleanField [1, 18] - [1, 28])
+ (booleanOp [1, 28] - [1, 31])
+ (booleanVal [1, 31] - [1, 36]
+ (booleanFalse [1, 31] - [1, 36])))
+ (numericFilter [2, 0] - [2, 14]
+ (numericField [2, 0] - [2, 9])
+ (numericOp [2, 10] - [2, 12]
+ (rangeOp [2, 10] - [2, 12]))
+ (numberVal [2, 13] - [2, 14]
+ (number [2, 13] - [2, 14])))
+ (numericFilter [2, 16] - [2, 30]
+ (numericField [2, 16] - [2, 25])
+ (numericOp [2, 26] - [2, 27]
+ (rangeOp [2, 26] - [2, 27]))
+ (numberVal [2, 28] - [2, 30]
+ (number [2, 28] - [2, 30])))
+ (numericFilter [2, 31] - [2, 47]
+ (numericField [2, 31] - [2, 40])
+ (numericOp [2, 41] - [2, 43]
+ (rangeOp [2, 41] - [2, 43]))
+ (numberVal [2, 44] - [2, 47]
+ (number [2, 44] - [2, 47])))
+ (numericFilter [2, 48] - [2, 64]
+ (numericField [2, 48] - [2, 57])
+ (numericOp [2, 58] - [2, 59]
+ (rangeOp [2, 58] - [2, 59]))
+ (numberVal [2, 60] - [2, 64]
+ (number [2, 60] - [2, 64])))
+ (boundedListFilter [3, 0] - [3, 66]
+ (sortByFilter [3, 0] - [3, 66]
+ (sortByField [3, 0] - [3, 7])
+ (sortByOp [3, 7] - [3, 10])
+ (sortByVal [3, 10] - [3, 66]
+ array_member: (sortByOptions [3, 12] - [3, 21])
+ array_member: (sortByOptions [3, 25] - [3, 43])
+ array_member: (sortByOptions [3, 46] - [3, 63]))))
+ (dateFilter [4, 0] - [4, 39]
+ (dateField [4, 0] - [4, 17])
+ (dateOp [4, 18] - [4, 19]
+ (rangeOp [4, 18] - [4, 19]))
+ (dateVal [4, 20] - [4, 39]
+ (dateWithOptionalTime [4, 20] - [4, 39])))
+ (unboundedListFilter [5, 0] - [5, 38]
+ (listField [5, 0] - [5, 8])
+ (listOp [5, 9] - [5, 11])
+ (listVal [5, 12] - [5, 38]
+ array_member: (string_content [5, 14] - [5, 17])
+ array_member: (string_content [5, 20] - [5, 25])
+ array_member: (string_content [5, 28] - [5, 34]))))
+
diff --git a/search_language/test/corpus/dateFilters.txt b/search_language/test/corpus/dateFilters.txt
new file mode 100644
--- /dev/null
+++ b/search_language/test/corpus/dateFilters.txt
@@ -0,0 +1,58 @@
+==================
+date filters
+==================
+
+
+last_release_date< 2001-02-13 15:54:21
+last_revision_date <=2001-02-13 15:54
+date_created=2001-02-13
+last_eventful_visit_date!=2001-02-13
+date_modified>2010-02-13 15:54:21
+date_published>=2010-02-13
+some_non_existent_field>=2010-02-13
+
+
+---
+
+(query [0, 0] - [7, 0]
+ (dateFilter [0, 0] - [0, 38]
+ (dateField [0, 0] - [0, 17])
+ (dateOp [0, 17] - [0, 18]
+ (rangeOp [0, 17] - [0, 18]))
+ (dateVal [0, 19] - [0, 38]
+ (dateWithOptionalTime [0, 19] - [0, 38])))
+ (dateFilter [1, 0] - [1, 40]
+ (dateField [1, 0] - [1, 18])
+ (dateOp [1, 19] - [1, 21]
+ (rangeOp [1, 19] - [1, 21]))
+ (dateVal [1, 21] - [1, 40]
+ (dateWithOptionalTime [1, 21] - [1, 40])))
+ (dateFilter [2, 0] - [3, 0]
+ (dateField [2, 0] - [2, 12])
+ (dateOp [2, 12] - [2, 13]
+ (rangeOp [2, 12] - [2, 13]))
+ (dateVal [2, 13] - [3, 0]
+ (dateWithOptionalTime [2, 13] - [3, 0])))
+ (dateFilter [3, 0] - [4, 0]
+ (dateField [3, 0] - [3, 24])
+ (dateOp [3, 24] - [3, 26]
+ (rangeOp [3, 24] - [3, 26]))
+ (dateVal [3, 26] - [4, 0]
+ (dateWithOptionalTime [3, 26] - [4, 0])))
+ (dateFilter [4, 0] - [5, 0]
+ (dateField [4, 0] - [4, 13])
+ (dateOp [4, 13] - [4, 14]
+ (rangeOp [4, 13] - [4, 14]))
+ (dateVal [4, 14] - [5, 0]
+ (dateWithOptionalTime [4, 14] - [5, 0])))
+ (dateFilter [5, 0] - [6, 0]
+ (dateField [5, 0] - [5, 14])
+ (dateOp [5, 14] - [5, 16]
+ (rangeOp [5, 14] - [5, 16]))
+ (dateVal [5, 16] - [6, 0]
+ (dateWithOptionalTime [5, 16] - [6, 0])))
+ (ERROR [6, 0] - [7, 0]
+ (ERROR [6, 0] - [6, 23])
+ (rangeOp [6, 23] - [6, 25])
+ (dateWithOptionalTime [6, 25] - [7, 0])))
+sample_query 0 ms (ERROR [6, 0] - [7, 0])
diff --git a/search_language/test/corpus/numericFilters.txt b/search_language/test/corpus/numericFilters.txt
new file mode 100644
--- /dev/null
+++ b/search_language/test/corpus/numericFilters.txt
@@ -0,0 +1,51 @@
+==================
+numeric filter
+==================
+
+nb_visits< 1000
+nb_visits <= 0
+nb_visits =10
+nb_visits != 256
+nb_visits> 1000
+nb_visits>=1000
+
+---
+
+
+(query [0, 0] - [5, 15]
+ (numericFilter [0, 0] - [0, 15]
+ (numericField [0, 0] - [0, 9])
+ (numericOp [0, 9] - [0, 10]
+ (rangeOp [0, 9] - [0, 10]))
+ (numberVal [0, 11] - [0, 15]
+ (number [0, 11] - [0, 15])))
+ (numericFilter [1, 0] - [1, 14]
+ (numericField [1, 0] - [1, 9])
+ (numericOp [1, 10] - [1, 12]
+ (rangeOp [1, 10] - [1, 12]))
+ (numberVal [1, 13] - [1, 14]
+ (number [1, 13] - [1, 14])))
+ (numericFilter [2, 0] - [2, 13]
+ (numericField [2, 0] - [2, 9])
+ (numericOp [2, 10] - [2, 11]
+ (rangeOp [2, 10] - [2, 11]))
+ (numberVal [2, 11] - [2, 13]
+ (number [2, 11] - [2, 13])))
+ (numericFilter [3, 0] - [3, 16]
+ (numericField [3, 0] - [3, 9])
+ (numericOp [3, 10] - [3, 12]
+ (rangeOp [3, 10] - [3, 12]))
+ (numberVal [3, 13] - [3, 16]
+ (number [3, 13] - [3, 16])))
+ (numericFilter [4, 0] - [4, 15]
+ (numericField [4, 0] - [4, 9])
+ (numericOp [4, 9] - [4, 10]
+ (rangeOp [4, 9] - [4, 10]))
+ (numberVal [4, 11] - [4, 15]
+ (number [4, 11] - [4, 15])))
+ (numericFilter [5, 0] - [5, 15]
+ (numericField [5, 0] - [5, 9])
+ (numericOp [5, 9] - [5, 11]
+ (rangeOp [5, 9] - [5, 11]))
+ (numberVal [5, 11] - [5, 15]
+ (number [5, 11] - [5, 15]))))
diff --git a/search_language/test/corpus/patternFilters.txt b/search_language/test/corpus/patternFilters.txt
new file mode 100644
--- /dev/null
+++ b/search_language/test/corpus/patternFilters.txt
@@ -0,0 +1,94 @@
+==================
+url filter
+==================
+
+url: github.com/django/Django
+url:github.com/\django/Django
+url :"github.com/\django/nDjango"
+url : 'github com \/ django Django'
+
+---
+
+(query [1, 0] - [5, 0]
+ (patternFilter [1, 0] - [1, 29]
+ (patternField [1, 0] - [1, 3])
+ (patternOp [1, 3] - [1, 4]
+ (colonOp [1, 3] - [1, 4]))
+ (patternVal [1, 5] - [1, 29]
+ (string [1, 5] - [1, 29]
+ (word [1, 5] - [1, 29]))))
+ (patternFilter [2, 0] - [2, 29]
+ (patternField [2, 0] - [2, 3])
+ (patternOp [2, 3] - [2, 4]
+ (colonOp [2, 3] - [2, 4]))
+ (patternVal [2, 4] - [2, 29]
+ (string [2, 4] - [2, 29]
+ (word [2, 4] - [2, 29]))))
+ (patternFilter [3, 0] - [3, 33]
+ (patternField [3, 0] - [3, 3])
+ (patternOp [3, 4] - [3, 5]
+ (colonOp [3, 4] - [3, 5]))
+ (patternVal [3, 5] - [3, 33]
+ (string [3, 5] - [3, 33]
+ (words [3, 6] - [3, 32]
+ (word [3, 6] - [3, 32])))))
+ (patternFilter [4, 0] - [4, 35]
+ (patternField [4, 0] - [4, 3])
+ (patternOp [4, 4] - [4, 5]
+ (colonOp [4, 4] - [4, 5]))
+ (patternVal [4, 6] - [4, 35]
+ (string [4, 6] - [4, 35]
+ (words [4, 7] - [4, 34]
+ (word [4, 7] - [4, 13])
+ (word [4, 14] - [4, 17])
+ (word [4, 18] - [4, 20])
+ (word [4, 21] - [4, 27])
+ (word [4, 28] - [4, 34]))))))
+
+
+==================
+metadata filter
+==================
+
+metadata: https://github.com/python/mypy
+metadata: static,typing
+metadata :"static typing"
+metadata : 'python join us issues'
+
+---
+
+(query [1, 0] - [5, 0]
+ (patternFilter [1, 0] - [1, 40]
+ (patternField [1, 0] - [1, 8])
+ (patternOp [1, 8] - [1, 9]
+ (colonOp [1, 8] - [1, 9]))
+ (patternVal [1, 10] - [1, 40]
+ (string [1, 10] - [1, 40]
+ (word [1, 10] - [1, 40]))))
+ (patternFilter [2, 0] - [2, 23]
+ (patternField [2, 0] - [2, 8])
+ (patternOp [2, 8] - [2, 9]
+ (colonOp [2, 8] - [2, 9]))
+ (patternVal [2, 10] - [2, 23]
+ (string [2, 10] - [2, 23]
+ (word [2, 10] - [2, 23]))))
+ (patternFilter [3, 0] - [3, 25]
+ (patternField [3, 0] - [3, 8])
+ (patternOp [3, 9] - [3, 10]
+ (colonOp [3, 9] - [3, 10]))
+ (patternVal [3, 10] - [3, 25]
+ (string [3, 10] - [3, 25]
+ (words [3, 11] - [3, 24]
+ (word [3, 11] - [3, 17])
+ (word [3, 18] - [3, 24])))))
+ (patternFilter [4, 0] - [4, 34]
+ (patternField [4, 0] - [4, 8])
+ (patternOp [4, 9] - [4, 10]
+ (colonOp [4, 9] - [4, 10]))
+ (patternVal [4, 11] - [4, 34]
+ (string [4, 11] - [4, 34]
+ (words [4, 12] - [4, 33]
+ (word [4, 12] - [4, 18])
+ (word [4, 19] - [4, 23])
+ (word [4, 24] - [4, 26])
+ (word [4, 27] - [4, 33]))))))
diff --git a/search_language/test/corpus/unboundedListFilter.txt b/search_language/test/corpus/unboundedListFilter.txt
new file mode 100644
--- /dev/null
+++ b/search_language/test/corpus/unboundedListFilter.txt
@@ -0,0 +1,40 @@
+==================
+sort_by filter
+==================
+
+
+licenses in ["MIT", "BSD X","Apache version 1.0"]
+licenses in ["MIT", BSD,]
+licenses in []
+
+---
+
+
+(query [0, 0] - [2, 14]
+ (unboundedListFilter [0, 0] - [0, 50]
+ (listField [0, 0] - [0, 8])
+ (listOp [0, 9] - [0, 11]
+ (choiceOp [0, 9] - [0, 11]))
+ (listVal [0, 12] - [0, 50]
+ array_member: (words [0, 14] - [0, 17]
+ (word [0, 14] - [0, 17]))
+ array_member: (words [0, 21] - [0, 26]
+ (word [0, 21] - [0, 24])
+ (word [0, 25] - [0, 26]))
+ array_member: (words [0, 29] - [0, 48]
+ (word [0, 29] - [0, 35])
+ (word [0, 36] - [0, 40])
+ (word [0, 41] - [0, 48]))))
+ (unboundedListFilter [1, 0] - [2, 14]
+ (listField [1, 0] - [1, 8])
+ (listOp [1, 9] - [1, 11]
+ (choiceOp [1, 9] - [1, 11]))
+ (listVal [1, 12] - [2, 14]
+ array_member: (words [1, 14] - [1, 17]
+ (word [1, 14] - [1, 17]))
+ array_member: (words [1, 20] - [2, 14]
+ (word [1, 20] - [1, 25])
+ (word [2, 0] - [2, 8])
+ (word [2, 9] - [2, 11])
+ (word [2, 12] - [2, 14])))))
+sample_query 0 ms (MISSING "]" [2, 14] - [2, 14])
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -4,8 +4,9 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from distutils.command.build_py import build_py
from io import open
-from os import path
+from os import path, system
from setuptools import find_packages, setup
@@ -35,6 +36,17 @@
return requirements
+class custom_build(build_py):
+ def run(self):
+ if not self.dry_run:
+ from tree_sitter import Language # type: ignore
+
+ system("yarn install && yarn generate")
+ Language.build_library("generated/swh_ql.so", ["search_language"])
+
+ build_py.run(self)
+
+
setup(
name="swh.search",
description="Software Heritage search service",
@@ -68,4 +80,6 @@
"Source": "https://forge.softwareheritage.org/source/swh-search",
"Documentation": "https://docs.softwareheritage.org/devel/swh-search/",
},
+ cmdclass={"build_py": custom_build},
+ data_files=[("share/swh-search", ["generated/swh_ql.so"])],
)
diff --git a/yarn.lock b/yarn.lock
new file mode 100644
--- /dev/null
+++ b/yarn.lock
@@ -0,0 +1,13 @@
+# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
+# yarn lockfile v1
+
+
+nan@^2.14.2:
+ version "2.14.2"
+ resolved "https://registry.yarnpkg.com/nan/-/nan-2.14.2.tgz#f5376400695168f4cc694ac9393d0c9585eeea19"
+ integrity sha512-M2ufzIiINKCuDfBSAUr1vWQ+vuVcA9kqx8JJUsbQi6yf1uGRyb7HfpdfUr5qLXf3B/t8dPvcjhKMmlfnP47EzQ==
+
+tree-sitter-cli@^0.20.0:
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/tree-sitter-cli/-/tree-sitter-cli-0.20.0.tgz#feaaa11c7ecf44a6e236aa1e2963b85d045d33cc"
+ integrity sha512-4D1qapWbJXZ5rrSUGM5rcw5Vuq/smzn9KbiFRhlON6KeuuXjra+KAtDYVrDgAoLIG4ku+jbEEGrJxCptUGi3dg==
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Dec 17, 1:30 PM (2 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3231661
Attached To
D5990: query_language: Setup tree-sitter and grammar.js
Event Timeline
Log In to Comment