Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9340347
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
10 KB
Subscribers
None
View Options
diff --git a/query_language/grammar.js b/query_language/grammar.js
index 4a02fe3..aa94745 100644
--- a/query_language/grammar.js
+++ b/query_language/grammar.js
@@ -1,184 +1,200 @@
// Copyright (C) 2019-2021 The Software Heritage developers
// See the AUTHORS file at the top-level directory of this distribution
// License: GNU General Public License version 3, or any later version
// See top-level LICENSE file for more information
const PRECEDENCE = {
or: 2,
and: 3,
bracket: 4,
}
module.exports = grammar({
name: 'swh_search_ql',
rules: {
- query: $ => $.filters,
+ query: $ => seq($.filters, optional($.sortBy) ,optional($.limit)),
filters: $ => choice(
prec.left(PRECEDENCE.and,
seq(
field('left', $.filters),
field('operator', $.and),
field('right', $.filters),
)
),
prec.left(PRECEDENCE.or,
seq(
field('left', $.filters),
field('operator', $.or),
field('right', $.filters),
)
),
prec.left(PRECEDENCE.bracket,
seq("(", $.filters, ")"),
),
$.filter
),
+ sortBy: $ => seq($.sortByField, $.sortByOp, $.sortByVal),
+ sortByField: $ => token('sort_by'),
+ sortByOp: $ => $.equalOp,
+ sortByVal: $ => createArray(optionalWrapWith($.sortByOptions, ["'", '"'])),
+ sortByOptions: $ => seq(optional(token.immediate('-')) ,choice(
+ 'visits',
+ 'last_visit',
+ 'last_eventful_visit',
+ 'last_revision',
+ 'last_release',
+ 'created',
+ 'modified',
+ 'published'
+ )),
+
+ limit: $ => seq('limit', $.equalOp, $.number),
+
filter: $ => choice(
$.patternFilter,
$.booleanFilter,
$.numericFilter,
$.boundedListFilter,
$.unboundedListFilter,
- $.dateFilter,
- $.limitFilter
+ $.dateFilter
),
patternFilter: $ => seq($.patternField, $.patternOp, $.patternVal),
patternField: $ => token(choice('origin', 'metadata')),
patternOp: $ => $.equalOp,
patternVal: $ => $.string,
booleanFilter: $ => seq($.booleanField, $.booleanOp, $.booleanVal),
booleanField: $ => token(choice('visited')),
booleanOp: $ => $.equalOp,
booleanVal: $ => choice($.booleanTrue, $.booleanFalse),
numericFilter: $ => seq($.numericField, $.numericOp, $.numberVal),
numericField: $ => token(choice('visits')),
numericOp: $ => $.rangeOp,
numberVal: $ => $.number,
- boundedListFilter: $ => choice($.visitTypeFilter, $.sortByFilter),
+ boundedListFilter: $ => choice($.visitTypeFilter),
visitTypeFilter: $ => seq($.visitTypeField, $.visitTypeOp, $.visitTypeVal),
visitTypeField: $ => token(choice('visit_type')),
visitTypeOp: $ => $.equalOp,
visitTypeVal: $ => createArray(optionalWrapWith($.visitTypeOptions, ["'", '"'])),
visitTypeOptions: $ => choice(
"any",
"cran",
"deb",
"deposit",
"ftp",
"hg",
"git",
"nixguix",
"npm",
"pypi",
"svn",
"tar"
), // TODO: fetch this list dynamically from other swh services?
- sortByFilter: $ => seq($.sortByField, $.sortByOp, $.sortByVal),
+ sortBy: $ => seq($.sortByField, $.sortByOp, $.sortByVal),
sortByField: $ => token(choice('sort_by')),
sortByOp: $ => $.equalOp,
sortByVal: $ => createArray(optionalWrapWith($.sortByOptions, ["'", '"'])),
sortByOptions: $ => seq(
optional(token.immediate('-')),
choice(
'visits',
'last_visit',
'last_eventful_visit',
'last_revision',
'last_release',
'created',
'modified',
'published'
)),
unboundedListFilter: $ => seq($.listField, $.listOp, $.listVal),
listField: $ => token(choice('language', 'license', 'keyword')),
listOp: $ => $.choiceOp,
listVal: $ => createArray($.string),
dateFilter: $ => seq($.dateField, $.dateOp, $.dateVal),
dateField: $ => token(choice(
'last_visit',
'last_eventful_visit',
'last_revision',
'last_release',
'created',
'modified',
'published'
)),
dateOp: $ => $.rangeOp,
dateVal: $ => $.isoDateTime,
- limitFilter: $ => seq('limit', $.equalOp, $.number),
+ limit: $ => seq('limit', $.equalOp, $.number),
rangeOp: $ => token(choice('<', '<=', '=', '!=', '>=', '>')),
equalOp: $ => token('='),
choiceOp: $ => token(choice('in', 'not in')),
isoDateTime: $ => /\d{4}[-]\d{2}[-]\d{2}(\s|T)*(\d{2}:\d{2}(:\d{2})?)?(Z)?/,
string: $ => choice(wrapWith($.stringContent, ["'", '"']), $.singleWord),
number: $ => /\d+/,
booleanTrue: $ => "true",
booleanFalse: $ => "false",
or: $ => "or",
and: $ => "and",
stringContent: $ => repeat1(choice(
token.immediate(/[^\\"\n]+/),
$.escape_sequence
)),
singleWord: $ => /[^\s"'\[\]\(\)]+/,
escape_sequence: $ => token.immediate(seq(
'\\',
/(\"|\'|\\|\/|b|n|r|t|u)/
)),
}
});
function joinBySep1(rule, sep) {
// At least one repetition of the rule separated by `sep`
return seq(rule, repeat(seq(sep, optional(rule))))
}
function joinBySep(rule, sep = ",") {
// Any number of repetitions of the rule separated by `sep`
return optional(joinBySep1(rule, sep))
}
function createArray(rule) {
// An array having `rule` as its member
return seq(
"[",
joinBySep(
field('array_member', rule),
","
),
"]"
)
}
function wrapWith(rule, wrappers = ["'", '"']) {
// The rule must be wrapped with one of the wrappers
const wrappedRules = wrappers.map(wrapper => seq(wrapper, rule, wrapper))
return choice(...wrappedRules)
}
function optionalWrapWith(rule, wrappers = ["'", '"']) {
// The rule may or may not be wrapped with the wrappers
return choice(wrapWith(rule, wrappers), rule)
}
diff --git a/query_language/test/corpus/combinations.txt b/query_language/test/corpus/combinations.txt
index cf66e84..07802ea 100644
--- a/query_language/test/corpus/combinations.txt
+++ b/query_language/test/corpus/combinations.txt
@@ -1,76 +1,75 @@
==============================
Empty query (should throw error)
==============================
---
(ERROR)
==================
Origins with django as keyword, python language, and more than 5 visits
==================
origin = django and language in ["python"] and visits >= 5
---
(query (filters (filters (filters (filter (patternFilter (patternField) (patternOp (equalOp)) (patternVal (string (singleWord)))))) (and) (filters (filter (unboundedListFilter (listField) (listOp (choiceOp)) (listVal (string (stringContent))))))) (and) (filters (filter (numericFilter (numericField) (numericOp (rangeOp)) (numberVal (number)))))))
==================
10 origins with latest revision after 2020-01-01
==================
-last_revision > 2020-01-01 and limit = 10
+last_revision > 2020-01-01 limit = 10
---
-
-(query (filters (filters (filter (dateFilter (dateField) (dateOp (rangeOp)) (dateVal (isoDateTime))))) (and) (filters (filter (limitFilter (equalOp) (number))))))
+(query (filters (filter (dateFilter (dateField) (dateOp (rangeOp)) (dateVal (isoDateTime))))) (limit (equalOp) (number)))
==================
-Origins with last visit date not in 2020-2021
+Origins with last visit date not in 2020-2021 (sorted by number of visits)
==================
-last_visit > 2021-01-01 or last_visit < 2020-01-01
+last_visit > 2021-01-01 or last_visit < 2020-01-01 sort_by = ["visits"]
---
-(query (filters (filters (filter (dateFilter (dateField) (dateOp (rangeOp)) (dateVal (isoDateTime))))) (or) (filters (filter (dateFilter (dateField) (dateOp (rangeOp)) (dateVal (isoDateTime)))))))
+(query (filters (filters (filter (dateFilter (dateField) (dateOp (rangeOp)) (dateVal (isoDateTime))))) (or) (filters (filter (dateFilter (dateField) (dateOp (rangeOp)) (dateVal (isoDateTime)))))) (sortBy (sortByField) (sortByOp (equalOp)) (sortByVal (sortByOptions))))
==================
Unvisited origins with kubernetes in metadata or minikube in url
==================
visited = false and metadata = "kubernetes" or origin = "minikube"
---
(query (filters (filters (filters (filter (booleanFilter (booleanField) (booleanOp (equalOp)) (booleanVal (booleanFalse))))) (and) (filters (filter (patternFilter (patternField) (patternOp (equalOp)) (patternVal (string (stringContent))))))) (or) (filters (filter (patternFilter (patternField) (patternOp (equalOp)) (patternVal (string (stringContent))))))))
==================
Origins with "orchestration" or "kubectl" as keywords and language as "go" or "rust"
==================
keyword in ["orchestration", "kubectl"] and language in ["go", "rust"]
---
(query (filters (filters (filter (unboundedListFilter (listField) (listOp (choiceOp)) (listVal (string (stringContent)) (string (stringContent)))))) (and) (filters (filter (unboundedListFilter (listField) (listOp (choiceOp)) (listVal (string (stringContent)) (string (stringContent))))))))
==================
Origins with a GPL-3 license that have "debian" in their url or have visit type as "deb"
==================
(origin = debian or visit_type = ["deb"]) and license in ["GPL-3"]
---
(query (filters (filters (filters (filters (filter (patternFilter (patternField) (patternOp (equalOp)) (patternVal (string (singleWord)))))) (or) (filters (filter (boundedListFilter (visitTypeFilter (visitTypeField) (visitTypeOp (equalOp)) (visitTypeVal (visitTypeOptions)))))))) (and) (filters (filter (unboundedListFilter (listField) (listOp (choiceOp)) (listVal (string (stringContent))))))))
==================
-Origins with 'and' and 'or' inside filter values
+Origins with `and` and `or` inside filter values
==================
(origin = "foo and bar or baz")
---
(query (filters (filters (filter (patternFilter (patternField) (patternOp (equalOp)) (patternVal (string (stringContent))))))))
==================
Origins with `'` and `"` inside filter values
==================
(origin = "foo \\ \'bar\' \"baz\" ")
---
(query (filters (filters (filter (patternFilter (patternField) (patternOp (equalOp)) (patternVal (string (stringContent (escape_sequence) (escape_sequence) (escape_sequence) (escape_sequence) (escape_sequence)))))))))
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Jul 4 2025, 10:34 AM (4 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3351891
Attached To
rDSEA Archive search
Event Timeline
Log In to Comment