diff --git a/.gitignore b/.gitignore --- a/.gitignore +++ b/.gitignore @@ -39,3 +39,4 @@ cypress/junit/ cypress/downloads/ .eslintcache +tree-sitter-swh_search_ql.wasm diff --git a/Makefile.local b/Makefile.local --- a/Makefile.local +++ b/Makefile.local @@ -5,12 +5,13 @@ SETTINGS_TEST ?= swh.web.settings.tests SETTINGS_DEV ?= swh.web.settings.development SETTINGS_PROD = swh.web.settings.production +SWH_SEARCH_DIR := $(shell python -c "import os;from swh import search; print(os.path.dirname(search.__file__))") yarn-install: package.json $(YARN) install --frozen-lockfile .PHONY: build-webpack-dev -build-webpack-dev: yarn-install +build-webpack-dev: $(YARN) build-dev .PHONY: build-webpack-test @@ -123,3 +124,7 @@ # https://github.com/typeddjango/django-stubs/issues/166 check-mypy: DJANGO_SETTINGS_MODULE=$(SETTINGS_DEV) $(MYPY) $(MYPYFLAGS) swh + +build-ts-wasm: + yarn run tree-sitter build-wasm $(SWH_SEARCH_DIR)/query_language + cp $(SWH_SEARCH_DIR)/query_language/tokens.js assets/tokens.js diff --git a/assets/config/webpack.config.development.js b/assets/config/webpack.config.development.js --- a/assets/config/webpack.config.development.js +++ b/assets/config/webpack.config.development.js @@ -149,10 +149,16 @@ alias: { 'pdfjs-dist': 'pdfjs-dist/build/pdf.min.js' }, + // for web-tree-sitter + fallback: { + 'path': false, + 'fs': false + }, // configure base paths for resolving modules with webpack modules: [ 'node_modules', - path.resolve(__dirname, '../src') + path.resolve(__dirname, '../src'), + path.resolve(__dirname, '../../../swh-search/query_language/') ] }, stats: 'errors-warnings', @@ -212,6 +218,18 @@ } }] }, + { + test: /\.wasm$/, + type: 'javascript/auto', + use: [{ + loader: 'file-loader', + options: { + name: '[name].[ext]', + outputPath: 'js/' + } + }] + }, + // expose jquery to the global context as $ and jQuery when importing it { test: require.resolve('jquery'), @@ -370,6 +388,14 @@ { from: path.resolve(nodeModules, 'mathjax/es5/output/chtml/fonts/woff-v2/**'), to: path.resolve(__dirname, '../../static/fonts/[name][ext]') + }, + { + from: path.resolve(__dirname, '../../tree-sitter-swh_search_ql.wasm'), + to: path.resolve(__dirname, '../../static/js/swh_ql.wasm') + }, + { + from: path.resolve(__dirname, '../tokens.js'), + to: path.resolve(__dirname, '../../static/js/') } ] }), diff --git a/assets/src/bundles/browse/origin-search.js b/assets/src/bundles/browse/origin-search.js --- a/assets/src/bundles/browse/origin-search.js +++ b/assets/src/bundles/browse/origin-search.js @@ -6,6 +6,7 @@ */ import {handleFetchError, errorMessageFromResponse, isArchivedOrigin} from 'utils/functions'; +import {initAutocomplete} from 'utils/search-ql-autocomplete'; const limit = 100; const linksPrev = []; @@ -195,6 +196,17 @@ export function initOriginSearch() { $(document).ready(() => { + const inputBox = document.querySelector('#swh-origins-url-patterns'); + const submitBtn = document.querySelector('#swh-search-submit'); + const validQueryCallback = (isValid) => { + submitBtn.disabled = !isValid; + // if (!isValid) + // inputBox.classList.add('invalid'); + // else + // inputBox.classList.remove('invalid'); + }; + initAutocomplete(inputBox, validQueryCallback); + $('#swh-search-origins').submit(event => { event.preventDefault(); if (event.target.checkValidity()) { diff --git a/assets/src/utils/autocomplete.css b/assets/src/utils/autocomplete.css new file mode 100644 --- /dev/null +++ b/assets/src/utils/autocomplete.css @@ -0,0 +1,50 @@ +.autocomplete { + position: relative; + display: inline-block; +} + +input.invalid { + outline: none !important; + + /* border: 2px solid red; */ +} + +/* position the autocomplete items to be the same width as the container: */ +.autocomplete-items { + position: absolute; + border: 1px solid #d4d4d4; + width: 200px; + border-top: none; + z-index: 99998; + top: 100%; + left: 0; + right: 0; + + /* + z-index: 99999; is taken by swh-top-bar + overflow-y: scroll; + */ +} + +.autocomplete-items div { + padding: 3px; + padding-left: 5px; + cursor: pointer; + background-color: #fff; + + /* + font-size: 15px; + border-bottom: 1px solid #d4d4d4; + */ +} + +/* when hovering an item: */ +.autocomplete-items div:hover { + background-color: #e9e9e9; +} + +/* when navigating through the items using the arrow keys: */ +.autocomplete-active { + background-color: #e20026 !important; + color: #fff; +} diff --git a/assets/src/utils/autocomplete.js b/assets/src/utils/autocomplete.js new file mode 100644 --- /dev/null +++ b/assets/src/utils/autocomplete.js @@ -0,0 +1,148 @@ +import 'utils/autocomplete.css'; + +export class Autocomplete { + constructor(params) { + const {inputBox, suggestions} = params; + this.inputBox = inputBox; + this.suggestions = suggestions; + this.currentIndex = -1; + + this.autocompleteList = document.createElement('div'); + this.autocompleteList.setAttribute('class', 'autocomplete-items'); + this.inputBox.parentNode.appendChild(this.autocompleteList); + + this.initListeners(); + } + + initListeners() { + this.inputBox.addEventListener('focus', this.updateLists.bind(this)); + this.inputBox.addEventListener('input', this.updateLists.bind(this)); + + this.inputBox.addEventListener('keydown', (e) => { + if (e.keyCode === 40) { // down + e.preventDefault(); + this.currentIndex++; + this.addActive(); + } else if (e.keyCode === 38) { // up + e.preventDefault(); + this.currentIndex--; + this.addActive(); + } else if (e.keyCode === 13 || e.keyCode === 9) { // enter or tab + e.preventDefault(); + if (this.currentIndex > -1) { + // Simulate a click on the "active" item: + if (this.autocompleteList) this.autocompleteList.children[this.currentIndex].click(); + } + } else if (e.keyCode === 27) { // escape + e.preventDefault(); + this.removeAllItems(e.target); + } + }); + + document.addEventListener('click', (e) => { this.removeAllItems(e.target); }); + } + + updateLists() { + const inputValue = this.inputBox.value; + + const tokens = inputValue.split(); + const lastToken = tokens[tokens.length - 1]; + const lastChar = lastToken[lastToken.length - 1]; + + /* close any already open lists of autocompleted values */ + this.removeAllItems(); + + this.currentIndex = -1; + + const suggestions = this.suggestions.filter(s => (s.indexOf(lastToken) >= 0 || lastChar === ' ')); + + suggestions.slice(0, 10).forEach(suggestion => { + const itemDiv = document.createElement('div'); + if (lastChar === ' ') { + itemDiv.innerHTML = suggestion; + } else { + const indexOfLastToken = suggestion.indexOf(lastToken); + + itemDiv.innerHTML = suggestion.substr(0, indexOfLastToken) + + '' + + suggestion.substr(indexOfLastToken, lastToken.length) + + '' + + suggestion.substr( + indexOfLastToken + lastToken.length, suggestion.length - (lastToken.length - 2) + ); + + } + + itemDiv.setAttribute('data-value', suggestion); + itemDiv.setAttribute('data-editable-suggestion', 'false'); + itemDiv.setAttribute('title', 'Include repos with the provided term in their url (origin)'); + + const suggestionClick = (e) => { + const toInsert = e.target.getAttribute('data-value'); + const isEditableSuggestion = e.target.getAttribute('data-editable-suggestion'); + + if (isEditableSuggestion === 'true') return; + + const oldValue = this.inputBox.value; + const tokens = oldValue.split(); + const lastToken = tokens[tokens.length - 1]; + const lastChar = lastToken[lastToken.length - 1]; + + let newValue = ''; + + if (lastChar === ' ' || oldValue === '') { + newValue = oldValue + toInsert; + } else { + // const position = this.inputBox.selectionStart; + const queryWithoutLastToken = tokens.slice(0, tokens.length - 2).join(' '); + newValue = queryWithoutLastToken + ((queryWithoutLastToken !== '') ? ' ' : '') + toInsert; + } + + this.inputBox.value = newValue; + this.inputBox.blur(); + this.inputBox.focus(); + // this.inputBox.dispatchEvent(new Event('input')) + }; + + itemDiv.addEventListener('click', suggestionClick.bind(this)); + + this.autocompleteList.appendChild(itemDiv); + }); + + if (suggestions?.length) { + // Select first element on each update + this.currentIndex = 0; + this.addActive(); + } + } + + addActive() { + // a function to classify an item as "active": + if (!this.autocompleteList) return false; + // start by removing the "active" class on all items: + const n = this.autocompleteList.childElementCount; + this.removeActive(); + if (this.currentIndex >= n) this.currentIndex = 0; + if (this.currentIndex < 0) this.currentIndex = (n - 1); + // add class "autocomplete-active": + this.autocompleteList.children[this.currentIndex].classList.add('autocomplete-active'); + } + + removeActive() { + /* a function to remove the "active" class from all autocomplete items */ + Array.from(this.autocompleteList.children).forEach(autocompleteItem => { + autocompleteItem.classList.remove('autocomplete-active'); + }); + } + + removeAllItems(element) { + /* + close all autocomplete lists in the document, + except the one passed as an argument + */ + if (element !== this.inputBox && this.autocompleteList) { + this.autocompleteList.innerHTML = ''; + } + } + +} diff --git a/assets/src/utils/search-ql-autocomplete.js b/assets/src/utils/search-ql-autocomplete.js new file mode 100644 --- /dev/null +++ b/assets/src/utils/search-ql-autocomplete.js @@ -0,0 +1,224 @@ +import {staticAsset} from 'utils/functions'; +import 'web-tree-sitter/tree-sitter.wasm'; +import Parser from 'web-tree-sitter'; +import {Autocomplete} from 'utils/autocomplete.js'; +import { + fields, limitField, sortByField, // fields + sortByOptions, visitTypeOptions, // options + equalOp, containOp, rangeOp, choiceOp, // operators + AND, OR, TRUE, FALSE // special tokens +} from '../../tokens.js'; + +const filterNames = fields.concat(sortByField, limitField); + +const languageSyntax = [ + { + category: 'patternFilter', + field: 'patternField', + operator: 'containOp', + value: 'patternVal', + suggestion: ['string', '"string"'] + }, + { + category: 'booleanFilter', + field: 'booleanField', + operator: 'equalOp', + value: 'booleanVal', + suggestion: [TRUE, FALSE] + }, + { + category: 'numericFilter', + field: 'numericField', + operator: 'rangeOp', + value: 'numberVal', + suggestion: ['15'] + }, + { + category: 'boundedListFilter', + field: 'visitTypeField', + operator: 'equalOp', + value: 'visitTypeVal', + options: visitTypeOptions, + suggestion: ['['] + }, + { + category: 'unboundedListFilter', + field: 'listField', + operator: 'choiceOp', + value: 'listVal', + options: ['string', '"string"'], + suggestion: ['['] + }, + { + category: 'dateFilter', + field: 'dateField', + operator: 'rangeOp', + value: 'dateVal', + suggestion: ['2000-01-01', '2000-01-01T00:00Z'] + }, + { + category: 'sortBy', + field: 'sortByField', + operator: 'equalOp', + value: 'sortByVal', + options: sortByOptions, + suggestion: ['['] + }, + { + category: 'limit', + field: 'limit', + operator: 'equalOp', + value: 'number', + suggestion: ['50'] + } +]; + +const filterOperators = {equalOp, containOp, choiceOp, rangeOp}; + +const findMissingNode = (node) => { + if (node.isMissing()) { + return node; + } + if (node.children.length > 0) { + for (let i = 0; i < node.children.length; i++) { + const missingNode = findMissingNode(node.children[i]); + if (missingNode !== null) { return missingNode; } + } + } + + return null; +}; + +const isWrapperNode = (child, parent) => { + if (!child || !parent) return false; + if (parent.namedChildren.length === 1 && parent.type !== 'ERROR') return true; + return ( + (child.startPosition.column === parent.startPosition.column) && + (child.endPosition.column === parent.endPosition.column) + ); +}; + +const isCategoryNode = (node) => { + if (!node || node === null) return false; + if (node.type === 'ERROR' || languageSyntax.filter(f => f.category === node.type).length > 0) { return true; } + + return false; +}; + +const suggestNextNode = (tree, inputBox) => { + const cursor = inputBox.selectionStart - 1; + const query = inputBox.value; + + let lastTokenIndex = cursor; + // let distFromLastToken = 0; + while (query[lastTokenIndex] === ' ') { + lastTokenIndex--; + // distFromLastToken++; + } + + // if(query === "visit_type = []") debugger; + + const lastTokenPosition = {row: 0, column: lastTokenIndex}; + const lastTokenNode = tree.rootNode.descendantForPosition(lastTokenPosition, lastTokenPosition); + + const missingNode = findMissingNode(tree.rootNode); + + // Find last token node wrapper + let lastTokenNodeWrapper = lastTokenNode; + while (isWrapperNode(lastTokenNodeWrapper, lastTokenNodeWrapper.parent)) { + lastTokenNodeWrapper = lastTokenNodeWrapper.parent; + } + + // Find last token node wrapper sibling + const lastTokenNodeWrapperSibling = lastTokenNodeWrapper.previousSibling; + + // Find current filter category + let currentFilterCategory = lastTokenNode; + while (!isCategoryNode(currentFilterCategory)) { + currentFilterCategory = currentFilterCategory.parent; + } + + console.log(lastTokenNode); + console.log(`LAST NODE: ${lastTokenNode.type}`); + console.log(`LAST NODE ANCESTOR: ${lastTokenNodeWrapper.type}`); + console.log(`LAST NODE ANCESTOR SIBLING: ${lastTokenNodeWrapperSibling?.type}`); + console.log(`LAST CATEGORY: ${currentFilterCategory.type}`); + + // Suggest options for array valued filters + if ((lastTokenNode.type === ',' && lastTokenNodeWrapper.type.indexOf('Val') > 0) || + (lastTokenNode.type === '[' && currentFilterCategory) + ) { + const filter = languageSyntax.filter(f => f.category === currentFilterCategory.type)[0]; + console.log(filter.options); + return filter.options ?? []; + } + if ( + (!tree.rootNode.hasError() && (lastTokenNodeWrapper.type.indexOf('Val') > 0)) || + (lastTokenNode.type === ')' || lastTokenNode.type === ']') + ) { + // Suggest AND/OR + return [AND, OR]; + } + if (missingNode && missingNode !== null) { + // Suggest missing nodes (Automatically suggested by Tree-sitter) + if (missingNode.type === ')') { + return [AND, OR, ')']; + } else if (missingNode.type === ']') { + return [',', ']']; + } + } + + if (lastTokenNode.type === 'ERROR' || + (lastTokenNode.type === '(') || + ((lastTokenNode.type === AND || lastTokenNode.type === OR)) + ) { + // Suggest field names + return filterNames.concat('('); + } else if (languageSyntax.map(f => f.field).includes(lastTokenNode.type)) { + // Suggest operators + const filter = languageSyntax.filter(f => f.field === lastTokenNode.type)[0]; + return filterOperators[filter.operator]; + } else if (lastTokenNode.type in filterOperators) { + // Suggest values + const filter = languageSyntax.filter(f => ( + f.field === lastTokenNodeWrapperSibling.type + ))[0]; + return filter.suggestion; + } + + return []; +}; + +export const initAutocomplete = (inputBox, validQueryCallback) => { + Parser.init().then(async() => { + const parser = new Parser(); + const swhSearchQL = await Parser.Language.load(staticAsset('js/swh_ql.wasm')); + parser.setLanguage(swhSearchQL); + + const autocomplete = new Autocomplete( + {inputBox, suggestions: ['('].concat(filterNames)} + ); + + const getSuggestions = (e) => { + // if (e.keycode !== 32) // space + // return; + const tree = parser.parse(inputBox.value); + + if (tree.rootNode.hasError()) { + validQueryCallback(false); + // inputBox.classList.add('invalid'); + } else { + validQueryCallback(true); + // inputBox.classList.remove('invalid'); + } + + console.log(`input(${inputBox.value}) => ${tree.rootNode.toString()}`); + + const suggestions = suggestNextNode(tree, inputBox); + // if (suggestions) + autocomplete.suggestions = suggestions; // .map(item => `${item} `); + }; + + inputBox.addEventListener('keydown', getSuggestions.bind(this)); + }); +}; diff --git a/assets/src/utils/tokens.js b/assets/src/utils/tokens.js new file mode 100644 --- /dev/null +++ b/assets/src/utils/tokens.js @@ -0,0 +1,109 @@ +// Copyright (C) 2021 The Software Heritage developers +// See the AUTHORS file at the top-level directory of this distribution +// License: GNU General Public License version 3, or any later version +// See top-level LICENSE file for more information + +// Field tokens +const visitTypeField = 'visit_type'; +const sortByField = 'sort_by'; +const limitField = 'limit'; + +// Field categories +const patternFields = ['origin', 'metadata']; +const booleanFields = ['visited']; +const numericFields = ['visits']; +const boundedListFields = [visitTypeField]; +const listFields = ['language', 'license', 'keyword']; +const dateFields = [ + 'last_visit', + 'last_eventful_visit', + 'last_revision', + 'last_release', + 'created', + 'modified', + 'published' +]; + +const fields = [].concat( + patternFields, + booleanFields, + numericFields, + boundedListFields, + listFields, + dateFields +); + +// Operators +const equalOp = ['=']; +const containOp = [':']; +const rangeOp = ['<', '<=', '=', '!=', '>=', '>']; +const choiceOp = ['in', 'not in']; + +// Values +const sortByOptions = [ + 'visits', + 'last_visit', + 'last_eventful_visit', + 'last_revision', + 'last_release', + 'created', + 'modified', + 'published' +]; + +const visitTypeOptions = [ + 'any', + 'bzr', + 'cran', + 'cvs', + 'deb', + 'deposit', + 'ftp', + 'hg', + 'git', + 'nixguix', + 'npm', + 'opam', + 'pypi', + 'svn', + 'tar' +]; + +// Extra tokens +const OR = 'or'; +const AND = 'and'; + +const TRUE = 'true'; +const FALSE = 'false'; + +module.exports = { + // Field tokens + visitTypeField, + sortByField, + limitField, + + // Field categories + patternFields, + booleanFields, + numericFields, + boundedListFields, + listFields, + dateFields, + fields, + + // Operators + equalOp, + containOp, + rangeOp, + choiceOp, + + // Values + sortByOptions, + visitTypeOptions, + + // Extra tokens + OR, + AND, + TRUE, + FALSE +}; diff --git a/assets/tokens.js b/assets/tokens.js new file mode 100644 --- /dev/null +++ b/assets/tokens.js @@ -0,0 +1,109 @@ +// Copyright (C) 2021 The Software Heritage developers +// See the AUTHORS file at the top-level directory of this distribution +// License: GNU General Public License version 3, or any later version +// See top-level LICENSE file for more information + +// Field tokens +const visitTypeField = 'visit_type'; +const sortByField = 'sort_by'; +const limitField = 'limit'; + +// Field categories +const patternFields = ['origin', 'metadata']; +const booleanFields = ['visited']; +const numericFields = ['visits']; +const boundedListFields = [visitTypeField]; +const listFields = ['language', 'license', 'keyword']; +const dateFields = [ + 'last_visit', + 'last_eventful_visit', + 'last_revision', + 'last_release', + 'created', + 'modified', + 'published' +]; + +const fields = [].concat( + patternFields, + booleanFields, + numericFields, + boundedListFields, + listFields, + dateFields +); + +// Operators +const equalOp = ['=']; +const containOp = [':']; +const rangeOp = ['<', '<=', '=', '!=', '>=', '>']; +const choiceOp = ['in', 'not in']; + +// Values +const sortByOptions = [ + 'visits', + 'last_visit', + 'last_eventful_visit', + 'last_revision', + 'last_release', + 'created', + 'modified', + 'published' +]; + +const visitTypeOptions = [ + 'any', + 'bzr', + 'cran', + 'cvs', + 'deb', + 'deposit', + 'ftp', + 'hg', + 'git', + 'nixguix', + 'npm', + 'opam', + 'pypi', + 'svn', + 'tar' +]; + +// Extra tokens +const OR = 'or'; +const AND = 'and'; + +const TRUE = 'true'; +const FALSE = 'false'; + +module.exports = { + // Field tokens + visitTypeField, + sortByField, + limitField, + + // Field categories + patternFields, + booleanFields, + numericFields, + boundedListFields, + listFields, + dateFields, + fields, + + // Operators + equalOp, + containOp, + rangeOp, + choiceOp, + + // Values + sortByOptions, + visitTypeOptions, + + // Extra tokens + OR, + AND, + TRUE, + FALSE +}; diff --git a/mypy.ini b/mypy.ini --- a/mypy.ini +++ b/mypy.ini @@ -18,6 +18,12 @@ [mypy-django_js_reverse.*] ignore_missing_imports = True +[mypy-django_plugin.*] +no_implicit_reexport = False + +[mypy-drf_plugin.main] +no_implicit_reexport = False + [mypy-htmlmin.*] ignore_missing_imports = True diff --git a/package.json b/package.json --- a/package.json +++ b/package.json @@ -9,7 +9,8 @@ "build": "NODE_ENV=production webpack --config assets/config/webpack.config.production.js --color", "mochawesome": "mochawesome-merge cypress/mochawesome/results/*.json > cypress/mochawesome/mochawesome.json && marge -o cypress/mochawesome/report cypress/mochawesome/mochawesome.json", "eslint": "eslint -c assets/config/.eslintrc --fix assets/** cypress/integration/** cypress/plugins/** cypress/support/**", - "preinstall": "npm -v || (SWH_WEB=$PWD && cd /tmp && yarn add npm && cd node_modules/npm && yarn link && cd $SWH_WEB && yarn link npm)", + "preinstall": "npm -v || (SWH_WEB=$PWD && cd /tmp && yarn add npm && cd node_modules/npm && yarn link && cd $SWH_WEB && yarn link npm) && /lib/x86_64-linux-gnu/libc.so.6", + "postinstall": "SWH_SEARCH_DIR=$(python3 -c 'import os;from swh import search; print(os.path.dirname(search.__file__))') && yarn run tree-sitter build-wasm $SWH_SEARCH_DIR/query_language --docker && cp $SWH_SEARCH_DIR/query_language/tokens.js assets/tokens.js", "nyc-report": "nyc report --reporter=lcov" }, "repository": { @@ -89,6 +90,7 @@ "typeface-alegreya": "^1.1.13", "typeface-alegreya-sans": "^1.1.13", "waypoints": "^4.0.1", + "web-tree-sitter": "^0.20.5", "whatwg-fetch": "^3.6.2" }, "devDependencies": { @@ -121,6 +123,7 @@ "eslint-webpack-plugin": "^3.1.1", "exports-loader": "^3.1.0", "expose-loader": "^3.1.0", + "file-loader": "^6.2.0", "imports-loader": "^3.1.1", "istanbul-lib-coverage": "^3.2.0", "json-stable-stringify": "^1.0.1", @@ -149,6 +152,7 @@ "stylelint": "^14.6.1", "stylelint-config-standard": "^25.0.0", "terser-webpack-plugin": "^5.3.1", + "tree-sitter-cli": "^0.20.6", "url-loader": "^4.1.1", "webpack": "^5.72.0", "webpack-bundle-tracker": "^1.5.0", diff --git a/swh/web/settings/common.py b/swh/web/settings/common.py --- a/swh/web/settings/common.py +++ b/swh/web/settings/common.py @@ -145,6 +145,7 @@ if not os.path.exists(STATIC_DIR): # static folder location when developping swh-web STATIC_DIR = os.path.join(PROJECT_DIR, "../../../static") + STATICFILES_DIRS = [STATIC_DIR] INTERNAL_IPS = ["127.0.0.1"] diff --git a/swh/web/templates/includes/origin-search-form.html b/swh/web/templates/includes/origin-search-form.html --- a/swh/web/templates/includes/origin-search-form.html +++ b/swh/web/templates/includes/origin-search-form.html @@ -10,9 +10,9 @@ + oninput="swh.webapp.validateSWHIDInput(this)" autofocus required autocomplete="off">
- +
@@ -48,7 +48,6 @@ search in metadata (instead of URL) - {% if user.is_authenticated and user.is_staff or "swh.web.search_ql" in user.get_all_permissions %}
@@ -60,7 +59,6 @@
- {% endif %}