module.exports = grammar({ name: 'swh_search_query_language', rules: { program: $ => repeat( choice( $.patternFilter, $.booleanFilter, $.numericFilter, $.unboundedListFilter, $.boundedListFilter, $.dateFilter, ) ), patternFilter: $ => seq($.patternField, $.patternOp, $.patternVal), patternField: $ => token(choice('url', 'metadata')), patternOp: $ => choice(' : ', ' = '), patternVal: $ => $.string, // should be REGEX booleanFilter: $ => seq($.booleanField, $.booleanOp, $.booleanVal), booleanField: $ => choice('with_visit'), booleanOp: $ => choice(' = ', ' : '), booleanVal: $ => choice($.true, $.false), numericFilter: $ => seq($.numericField, $.numericOp, $.numberVal), numericField: $ => choice('nb_visits', 'limit'), numericOp : $ => $.rangeOp, numberVal: $ => $.number, boundedListFilter: $ => choice($.visitTypeFilter, $.sortByFilter), visitTypeFilter: $ => seq($.visitTypeField, $.visitTypeOp, $.visitTypeVal), visitTypeField: $ => 'visit_types', visitTypeOp : $ => choice(':'), visitTypeVal: $ => seq('[', commaSepStr($.visitTypeOptions), ']'), visitTypeOptions: $ => choice( "any", "cran", "deb", "deposit", "ftp", "hg", "git", "nixguix", "npm", "pypi", "svn", "tar" ), sortByFilter: $ => seq($.sortByField, $.sortByOp, $.sortByVal), sortByField: $ => 'sort_by', sortByOp: $ => choice(':'), sortByVal: $ => seq('[', commaSepStr($.sortByOptions), ']'), sortByOptions: $ => choice( 'nb_visits', 'last_visit_date', 'last_eventful_visit_date', 'last_revision_date', 'last_release_date', 'date_created', 'date_modified', 'date_published' ), unboundedListFilter: $ => seq($.listField, $.listOp, $.listVal), listField: $ => choice('programming_languages', 'licenses', 'keywords'), listOp: $ => token(choice('in', 'not in')), listVal: $ => $.array, dateFilter: $ => seq($.dateField, $.rangeOp, $.dateWithOptionalTime), dateField: $ => choice( 'last_visit_date', 'last_eventful_visit_date', 'last_revision_date', 'last_release_date', 'date_created', 'date_modified', 'date_published' ), dateOp: $ => $.rangeOp, dateVal: $ => $.dateWithOptionalTime, rangeOp: $ => choice('<', '<=', '=', '!=', '>=', '>'), array: $ => seq( "[", commaSepStr($.string_content), "]" ), dateWithOptionalTime: $ => /\d{4}[-]\d{2}[-]\d{2} (\d{2}:\d{2}(:\d{2})*)*/, // ^\d{2}\/\d{2}\/\d{4}\s*(?:\d{2}:\d{2}(?::\d{2})?)?$ // Reference : https://stackoverflow.com/questions/23786905/regex-for-validating-dd-mm-yyyy-with-optional-time // Matches : // 21/05/2014 // 21/05/2014 15:54 // 21/05/2014 15:54:12 string: $ => wrapWithInvertedComma($.string_content), number : $ => /\d+/, true: $ => "true", false: $ => "false", string_content: $ => repeat1(choice( token.immediate(/[^\\"\n]+/), $.escape_sequence )), escape_sequence: $ => token.immediate(seq( '\\', /(\"|\\|\/|b|n|r|t|u)/ )), } }); function commaSep1(rule) { return seq(rule, repeat(seq(",", rule))) } function commaSep(rule) { return optional(commaSep1(rule)) } function commaSepStr(rule){ return commaSep(wrapWithInvertedComma(rule)) } function wrapWithInvertedComma(rule) { return choice( seq("'", rule, "'"), seq('"', rule, '"'), rule, ) }