diff --git a/docs/query-language.rst b/docs/query-language.rst --- a/docs/query-language.rst +++ b/docs/query-language.rst @@ -6,12 +6,12 @@ These filters have 3 components in the order : ``Name Operator Value`` Some of the examples are : - * ``origin = django and language in [python] and visits >= 5`` + * ``origin : plasma and language in [python] and visits >= 5`` * ``last_revision > 2020-01-01 and limit = 10`` * ``last_visit > 2021-01-01 or last_visit < 2020-01-01`` - * ``visited = false and metadata = "kubernetes" or origin = "minikube"`` + * ``visited = false and metadata = "kubernetes" or origin : "minikube"`` * ``keyword in ["orchestration", "kubectl"] and language in ["go", "rust"]`` - * ``(origin = debian or visit_type = ["deb"]) and license in ["GPL-3"]`` + * ``(origin : debian or visit_type = ["deb"]) and license in ["GPL-3"]`` **Note**: * Whitespaces are optional between the three components of a filter. @@ -30,18 +30,18 @@ * Name: * ``origin``: Keywords from the origin url * ``metadata``: Keywords from all the intrinsic metadata fields - * Operator: ``=`` + * Operator: ``:`` * Value: String wrapped in quotation marks(``"`` or ``'``) **Note:** If a string has no whitespace then the quotation marks become optional. **Examples:** - * ``origin = https://github.com/Django/django`` - * ``origin = kubernetes`` - * ``origin = "github python"`` - * ``metadata = orchestration`` - * ``metadata = "javascript language"`` + * ``origin : https://github.com/Django/django`` + * ``origin : kubernetes`` + * ``origin : "github python"`` + * ``metadata : orchestration`` + * ``metadata : "javascript language"`` Boolean filters --------------- diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py --- a/swh/search/elasticsearch.py +++ b/swh/search/elasticsearch.py @@ -373,10 +373,10 @@ query_filters = [] if url_pattern: - query_filters.append(f"origin = {escape(url_pattern)}") + query_filters.append(f"origin : {escape(url_pattern)}") if metadata_pattern: - query_filters.append(f"metadata = {escape(metadata_pattern)}") + query_filters.append(f"metadata : {escape(metadata_pattern)}") # if not query_clauses: # raise ValueError( diff --git a/swh/search/query_language/grammar.js b/swh/search/query_language/grammar.js --- a/swh/search/query_language/grammar.js +++ b/swh/search/query_language/grammar.js @@ -11,7 +11,7 @@ listFields, dateFields } = require("./tokens.js"); -const { equalOp, rangeOp, choiceOp } = require("./tokens.js"); +const { equalOp, containOp, rangeOp, choiceOp } = require("./tokens.js"); const { sortByOptions, visitTypeOptions } = require("./tokens.js"); const { OR, AND, TRUE, FALSE } = require("./tokens.js"); @@ -80,7 +80,7 @@ patternFilter: $ => annotateFilter($.patternField, $.patternOp, $.patternVal), patternField: $ => token(choice(...patternFields)), - patternOp: $ => $.equalOp, + patternOp: $ => $.containOp, patternVal: $ => $.string, booleanFilter: $ => annotateFilter($.booleanField, $.booleanOp, $.booleanVal), @@ -116,6 +116,7 @@ rangeOp: $ => token(choice(...rangeOp)), equalOp: $ => token(choice(...equalOp)), + containOp: $ => token(choice(...containOp)), choiceOp: $ => token(choice(...choiceOp)), isoDateTime: $ => { diff --git a/swh/search/query_language/sample_query b/swh/search/query_language/sample_query --- a/swh/search/query_language/sample_query +++ b/swh/search/query_language/sample_query @@ -1,6 +1,6 @@ -(origin = django/django and language in ["python"] or visits >= 5) or +(origin : django/django and language in ["python"] or visits >= 5) or (last_revision > 2020-01-01 and limit = 10) or (last_visit > 2021-01-01 or last_visit < 2020-01-01) or -(visited = false and metadata = "gitlab") or +(visited = false and metadata : "gitlab") or (keyword in ["orchestration", "kubectl"] and language in ["go", "rust"]) or (visit_type = [deb] and license in ["GPL-3"]) diff --git a/swh/search/query_language/test/corpus/combinations.txt b/swh/search/query_language/test/corpus/combinations.txt --- a/swh/search/query_language/test/corpus/combinations.txt +++ b/swh/search/query_language/test/corpus/combinations.txt @@ -11,10 +11,10 @@ Origins with django as keyword, python language, and more than 5 visits ================== -origin = django and language in ["python"] and visits >= 5 +origin : django and language in ["python"] and visits >= 5 --- -(query (filters (filters (filters (filter (patternFilter (patternField) (patternOp (equalOp)) (patternVal (string (singleWord)))))) (and) (filters (filter (unboundedListFilter (listField) (listOp (choiceOp)) (listVal (string (stringContent))))))) (and) (filters (filter (numericFilter (numericField) (numericOp (rangeOp)) (numberVal (number))))))) +(query (filters (filters (filters (filter (patternFilter (patternField) (patternOp (containOp)) (patternVal (string (singleWord)))))) (and) (filters (filter (unboundedListFilter (listField) (listOp (choiceOp)) (listVal (string (stringContent))))))) (and) (filters (filter (numericFilter (numericField) (numericOp (rangeOp)) (numberVal (number))))))) ================== 10 origins with latest revision after 2020-01-01 @@ -35,10 +35,10 @@ Unvisited origins with kubernetes in metadata or minikube in url ================== -visited = false and metadata = "kubernetes" or origin = "minikube" +visited = false and metadata : "kubernetes" or origin : "minikube" --- -(query (filters (filters (filters (filter (booleanFilter (booleanField) (booleanOp (equalOp)) (booleanVal (booleanFalse))))) (and) (filters (filter (patternFilter (patternField) (patternOp (equalOp)) (patternVal (string (stringContent))))))) (or) (filters (filter (patternFilter (patternField) (patternOp (equalOp)) (patternVal (string (stringContent)))))))) +(query (filters (filters (filters (filter (booleanFilter (booleanField) (booleanOp (equalOp)) (booleanVal (booleanFalse))))) (and) (filters (filter (patternFilter (patternField) (patternOp (containOp)) (patternVal (string (stringContent))))))) (or) (filters (filter (patternFilter (patternField) (patternOp (containOp)) (patternVal (string (stringContent)))))))) ================== Origins with "orchestration" or "kubectl" as keywords and language as "go" or "rust" @@ -52,27 +52,27 @@ ================== Origins with a GPL-3 license that have "debian" in their url or have visit type as "deb" ================== -(origin = debian or visit_type = ["deb"]) and license in ["GPL-3"] +(origin : debian or visit_type = ["deb"]) and license in ["GPL-3"] --- -(query (filters (filters (filters (filters (filter (patternFilter (patternField) (patternOp (equalOp)) (patternVal (string (singleWord)))))) (or) (filters (filter (boundedListFilter (visitTypeFilter (visitTypeField) (visitTypeOp (equalOp)) (visitTypeVal (visitTypeOptions)))))))) (and) (filters (filter (unboundedListFilter (listField) (listOp (choiceOp)) (listVal (string (stringContent)))))))) +(query (filters (filters (filters (filters (filter (patternFilter (patternField) (patternOp (containOp)) (patternVal (string (singleWord)))))) (or) (filters (filter (boundedListFilter (visitTypeFilter (visitTypeField) (visitTypeOp (equalOp)) (visitTypeVal (visitTypeOptions)))))))) (and) (filters (filter (unboundedListFilter (listField) (listOp (choiceOp)) (listVal (string (stringContent)))))))) ================== Origins with `and` and `or` inside filter values ================== -(origin = "foo and bar or baz") +(origin : "foo and bar or baz") --- -(query (filters (filters (filter (patternFilter (patternField) (patternOp (equalOp)) (patternVal (string (stringContent)))))))) +(query (filters (filters (filter (patternFilter (patternField) (patternOp (containOp)) (patternVal (string (stringContent)))))))) ================== Origins with `'` and `"` inside filter values ================== -(origin = "foo \\ \'bar\' \"baz\" ") +(origin : "foo \\ \'bar\' \"baz\" ") --- -(query (filters (filters (filter (patternFilter (patternField) (patternOp (equalOp)) (patternVal (string (stringContent (escape_sequence) (escape_sequence) (escape_sequence) (escape_sequence) (escape_sequence))))))))) +(query (filters (filters (filter (patternFilter (patternField) (patternOp (containOp)) (patternVal (string (stringContent (escape_sequence) (escape_sequence) (escape_sequence) (escape_sequence) (escape_sequence))))))))) ================== Incomplete conjunction operators should throw error diff --git a/swh/search/query_language/tokens.js b/swh/search/query_language/tokens.js --- a/swh/search/query_language/tokens.js +++ b/swh/search/query_language/tokens.js @@ -35,6 +35,7 @@ // Operators const equalOp = ['=']; +const containOp = [':']; const rangeOp = ['<', '<=', '=', '!=', '>=', '>']; const choiceOp = ['in', 'not in']; @@ -93,6 +94,7 @@ // Operators equalOp, + containOp, rangeOp, choiceOp, diff --git a/swh/search/tests/test_elasticsearch.py b/swh/search/tests/test_elasticsearch.py --- a/swh/search/tests/test_elasticsearch.py +++ b/swh/search/tests/test_elasticsearch.py @@ -176,7 +176,7 @@ results = { r["url"] - for r in self.search.origin_search(query='origin = "foobar"').results + for r in self.search.origin_search(query='origin : "foobar"').results } assert results == { "http://foobar.1.com", diff --git a/swh/search/tests/test_translator.py b/swh/search/tests/test_translator.py --- a/swh/search/tests/test_translator.py +++ b/swh/search/tests/test_translator.py @@ -93,7 +93,7 @@ def test_origin_and_metadata_filters(): - query = 'origin = django or metadata = "framework and web"' + query = 'origin : django or metadata : "framework and web"' expected = { "filters": { "bool": { @@ -317,7 +317,7 @@ def test_keyword_no_escape_inside_filter(): # any keyword (filter name/operator/value) inside a filter # must be considered a string. - query = r'''origin = "language in [\'go lang\', python]"''' + query = r'''origin : "language in [\'go lang\', python]"''' expected = { "filters": { "multi_match": {