diff --git a/query_language/grammar.js b/query_language/grammar.js --- a/query_language/grammar.js +++ b/query_language/grammar.js @@ -1,8 +1,19 @@ -// Copyright (C) 2019-2021 The Software Heritage developers +// Copyright (C) 2021 The Software Heritage developers // See the AUTHORS file at the top-level directory of this distribution // License: GNU General Public License version 3, or any later version // See top-level LICENSE file for more information +const { visitTypeField, sortByField, limitField } = require("./tokens.js"); +const { + patternFields, + booleanFields, + numericFields, + listFields, + dateFields +} = require("./tokens.js"); +const { equalOp, rangeOp, choiceOp } = require("./tokens.js"); +const { sortByOptions, visitTypeOptions } = require("./tokens.js"); +const { OR, AND, TRUE, FALSE } = require("./tokens.js"); const PRECEDENCE = { or: 2, @@ -16,15 +27,15 @@ rules: { query: $ => seq( $.filters, - optional($.and), - choice( - seq(optional($.sortBy), optional($.and), optional($.limit)), - seq(optional($.limit), optional($.and), optional($.sortBy)), - ), + optional(seq( + optional($.and), + choice( + seq($.sortBy, optional($.and), optional($.limit)), + seq($.limit, optional($.and), optional($.sortBy)), + ), + )) ), - - filters: $ => choice( prec.left(PRECEDENCE.and, seq( @@ -46,111 +57,66 @@ $.filter ), - sortBy: $ => seq($.sortByField, $.sortByOp, $.sortByVal), - sortByField: $ => token('sort_by'), + sortBy: $ => annotateFilter($.sortByField, $.sortByOp, $.sortByVal), + sortByField: $ => token(sortByField), sortByOp: $ => $.equalOp, sortByVal: $ => createArray(optionalWrapWith($.sortByOptions, ["'", '"'])), - sortByOptions: $ => seq(optional(token.immediate('-')), choice( - 'visits', - 'last_visit', - 'last_eventful_visit', - 'last_revision', - 'last_release', - 'created', - 'modified', - 'published' - )), + sortByOptions: $ => seq( + optional('-'), + choice(...sortByOptions) + ), - limit: $ => seq('limit', $.equalOp, $.number), + limit: $ => annotateFilter($.limitField, $.equalOp, $.number), + limitField: $ => token(limitField), - filter: $ => choice( + filter: $ => field('category', choice( $.patternFilter, $.booleanFilter, $.numericFilter, $.boundedListFilter, $.unboundedListFilter, $.dateFilter - ), + )), - patternFilter: $ => seq($.patternField, $.patternOp, $.patternVal), - patternField: $ => token(choice('origin', 'metadata')), + patternFilter: $ => annotateFilter($.patternField, $.patternOp, $.patternVal), + patternField: $ => token(choice(...patternFields)), patternOp: $ => $.equalOp, patternVal: $ => $.string, - booleanFilter: $ => seq($.booleanField, $.booleanOp, $.booleanVal), - booleanField: $ => token(choice('visited')), + booleanFilter: $ => annotateFilter($.booleanField, $.booleanOp, $.booleanVal), + booleanField: $ => token(choice(...booleanFields)), booleanOp: $ => $.equalOp, booleanVal: $ => choice($.booleanTrue, $.booleanFalse), - numericFilter: $ => seq($.numericField, $.numericOp, $.numberVal), - numericField: $ => token(choice('visits')), + numericFilter: $ => annotateFilter($.numericField, $.numericOp, $.numberVal), + numericField: $ => token(choice(...numericFields)), numericOp: $ => $.rangeOp, numberVal: $ => $.number, + // Array members must be from the given options boundedListFilter: $ => choice($.visitTypeFilter), - visitTypeFilter: $ => seq($.visitTypeField, $.visitTypeOp, $.visitTypeVal), - visitTypeField: $ => token(choice('visit_type')), + visitTypeFilter: $ => annotateFilter($.visitTypeField, $.visitTypeOp, $.visitTypeVal), + visitTypeField: $ => token(visitTypeField), visitTypeOp: $ => $.equalOp, visitTypeVal: $ => createArray(optionalWrapWith($.visitTypeOptions, ["'", '"'])), - visitTypeOptions: $ => choice( - "any", - "cran", - "deb", - "deposit", - "ftp", - "hg", - "git", - "nixguix", - "npm", - "pypi", - "svn", - "tar" - ), // TODO: fetch this list dynamically from other swh services? - - sortBy: $ => seq($.sortByField, $.sortByOp, $.sortByVal), - sortByField: $ => token(choice('sort_by')), - sortByOp: $ => $.equalOp, - sortByVal: $ => createArray(optionalWrapWith($.sortByOptions, ["'", '"'])), - sortByOptions: $ => seq( - optional('-'), - choice( - 'visits', - 'last_visit', - 'last_eventful_visit', - 'last_revision', - 'last_release', - 'created', - 'modified', - 'published' - ) - ), + visitTypeOptions: $ => choice(...visitTypeOptions), + // TODO: fetch visitTypeOptions choices dynamically from other swh services? - unboundedListFilter: $ => seq($.listField, $.listOp, $.listVal), - listField: $ => token(choice('language', 'license', 'keyword')), + // Array members can be any string + unboundedListFilter: $ => annotateFilter($.listField, $.listOp, $.listVal), + listField: $ => token(choice(...listFields)), listOp: $ => $.choiceOp, listVal: $ => createArray($.string), - - dateFilter: $ => seq($.dateField, $.dateOp, $.dateVal), - dateField: $ => token(choice( - 'last_visit', - 'last_eventful_visit', - 'last_revision', - 'last_release', - 'created', - 'modified', - 'published' - )), + dateFilter: $ => annotateFilter($.dateField, $.dateOp, $.dateVal), + dateField: $ => token(choice(...dateFields)), dateOp: $ => $.rangeOp, dateVal: $ => $.isoDateTime, - limit: $ => seq('limit', $.equalOp, $.number), - - - rangeOp: $ => token(choice('<', '<=', '=', '!=', '>=', '>')), - equalOp: $ => token('='), - choiceOp: $ => token(choice('in', 'not in')), + rangeOp: $ => token(choice(...rangeOp)), + equalOp: $ => token(choice(...equalOp)), + choiceOp: $ => token(choice(...choiceOp)), isoDateTime: $ => { const dateRegex = (/\d{4}[-]\d{2}[-]\d{2}/).source @@ -162,17 +128,19 @@ string: $ => choice(wrapWith($.stringContent, ["'", '"']), $.singleWord), number: $ => /\d+/, - booleanTrue: $ => "true", - booleanFalse: $ => "false", + booleanTrue: $ => TRUE, + booleanFalse: $ => FALSE, - or: $ => "or", - and: $ => "and", + or: $ => OR, + and: $ => AND, + singleWord: $ => /[^\s"'\[\]\(\),]+/, + + // Based on tree-sitter-json grammar: stringContent: $ => repeat1(choice( token.immediate(/[^\\'"\n]+/), $.escape_sequence )), - singleWord: $ => /[^\s"'\[\]\(\),]+/, escape_sequence: $ => token.immediate(seq( '\\', /(\"|\'|\\|\/|b|n|r|t|u)/ @@ -214,3 +182,11 @@ // The rule may or may not be wrapped with the wrappers return choice(wrapWith(rule, wrappers), rule) } + +function annotateFilter(filterField, filterOp, filterVal) { + return seq( + field('field', filterField), + field('op', filterOp), + field('value', filterVal) + ); +} diff --git a/query_language/test/corpus/combinations.txt b/query_language/test/corpus/combinations.txt --- a/query_language/test/corpus/combinations.txt +++ b/query_language/test/corpus/combinations.txt @@ -21,7 +21,7 @@ ================== last_revision > 2020-01-01 limit = 10 --- -(query (filters (filter (dateFilter (dateField) (dateOp (rangeOp)) (dateVal (isoDateTime))))) (limit (equalOp) (number))) +(query (filters (filter (dateFilter (dateField) (dateOp (rangeOp)) (dateVal (isoDateTime))))) (limit (limitField) (equalOp) (number))) ================== Origins with last visit date not in 2020-2021 (sorted by number of visits) @@ -73,3 +73,10 @@ --- (query (filters (filters (filter (patternFilter (patternField) (patternOp (equalOp)) (patternVal (string (stringContent (escape_sequence) (escape_sequence) (escape_sequence) (escape_sequence) (escape_sequence))))))))) + +================== +Incomplete conjunction operators should throw error +================== +visits > 5 and +--- +(query (filters (filter (numericFilter (numericField) (numericOp (rangeOp)) (numberVal (number))))) (ERROR (and))) diff --git a/query_language/tokens.js b/query_language/tokens.js new file mode 100644 --- /dev/null +++ b/query_language/tokens.js @@ -0,0 +1,105 @@ +// Copyright (C) 2021 The Software Heritage developers +// See the AUTHORS file at the top-level directory of this distribution +// License: GNU General Public License version 3, or any later version +// See top-level LICENSE file for more information + +// Field tokens +const visitTypeField = 'visit_type'; +const sortByField = 'sort_by'; +const limitField = 'limit'; + +// Field categories +const patternFields = ['origin', 'metadata']; +const booleanFields = ['visited']; +const numericFields = ['visits']; +const boundedListFields = [visitTypeField]; +const listFields = ['language', 'license', 'keyword']; +const dateFields = [ + 'last_visit', + 'last_eventful_visit', + 'last_revision', + 'last_release', + 'created', + 'modified', + 'published' +]; + +const fields = [].concat( + patternFields, + booleanFields, + numericFields, + boundedListFields, + listFields, + dateFields +); + +// Operators +const equalOp = ['=']; +const rangeOp = ['<', '<=', '=', '!=', '>=', '>']; +const choiceOp = ['in', 'not in']; + + +// Values +const sortByOptions = [ + 'visits', + 'last_visit', + 'last_eventful_visit', + 'last_revision', + 'last_release', + 'created', + 'modified', + 'published' +]; + +const visitTypeOptions = [ + "any", + "cran", + "deb", + "deposit", + "ftp", + "hg", + "git", + "nixguix", + "npm", + "pypi", + "svn", + "tar" +]; + +// Extra tokens +const OR = "or"; +const AND = "and"; + +const TRUE = "true"; +const FALSE = "false"; + +module.exports = { + // Field tokens + visitTypeField, + sortByField, + limitField, + + // Field categories + patternFields, + booleanFields, + numericFields, + boundedListFields, + listFields, + dateFields, + fields, + + // Operators + equalOp, + rangeOp, + choiceOp, + + // Values + sortByOptions, + visitTypeOptions, + + // Extra tokens + OR, + AND, + TRUE, + FALSE +}