diff --git a/assets/src/bundles/browse/origin-search.js b/assets/src/bundles/browse/origin-search.js --- a/assets/src/bundles/browse/origin-search.js +++ b/assets/src/bundles/browse/origin-search.js @@ -109,7 +109,9 @@ baseSearchUrl = new URL(Urls.api_1_origin_metadata_search(), window.location); baseSearchUrl.searchParams.append('fulltext', searchQueryText); } else { + const useSearchQL = $('#swh-search-use-ql').prop('checked'); baseSearchUrl = new URL(Urls.api_1_origin_search(searchQueryText), window.location); + baseSearchUrl.searchParams.append('use_ql', useSearchQL ?? false); } const withVisit = $('#swh-search-origins-with-visit').prop('checked'); @@ -198,6 +200,7 @@ const searchQueryText = $('#swh-origins-url-patterns').val().trim(); const withVisit = $('#swh-search-origins-with-visit').prop('checked'); const withContent = $('#swh-filter-empty-visits').prop('checked'); + const useSearchQL = $('#swh-search-use-ql').prop('checked'); const searchMetadata = $('#swh-search-origin-metadata').prop('checked'); const visitType = $('#swh-search-visit-type').val(); const queryParameters = new URLSearchParams(); @@ -208,6 +211,9 @@ if (withContent) { queryParameters.append('with_content', withContent); } + if (useSearchQL) { + queryParameters.append('use_ql', useSearchQL ?? false); + } if (searchMetadata) { queryParameters.append('search_metadata', searchMetadata); } @@ -243,12 +249,14 @@ const urlParams = new URLSearchParams(window.location.search); const query = urlParams.get('q'); const withVisit = urlParams.has('with_visit'); + const useSearchQL = urlParams.has('use_ql'); const withContent = urlParams.has('with_content'); const searchMetadata = urlParams.has('search_metadata'); const visitType = urlParams.get('visit_type'); if (query) { $('#swh-origins-url-patterns').val(query); $('#swh-search-origins-with-visit').prop('checked', withVisit); + $('#swh-search-use-ql').prop('checked', useSearchQL ?? false); $('#swh-filter-empty-visits').prop('checked', withContent); $('#swh-search-origin-metadata').prop('checked', searchMetadata); if (visitType) { diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py --- a/swh/web/api/views/origin.py +++ b/swh/web/api/views/origin.py @@ -168,6 +168,7 @@ results. :param string url_pattern: a string pattern + :query boolean use_ql: whether to use swh search query language or not :query int limit: the maximum number of found origins to return (bounded to 1000) :query boolean with_visit: if true, only return origins with at least @@ -189,12 +190,14 @@ result = {} limit = min(int(request.query_params.get("limit", "70")), 1000) page_token = request.query_params.get("page_token") + use_ql = request.query_params.get("use_ql", "false") with_visit = request.query_params.get("with_visit", "false") visit_type = request.query_params.get("visit_type") (results, page_token) = api_lookup( archive.search_origin, url_pattern, + bool(strtobool(use_ql)), limit, bool(strtobool(with_visit)), [visit_type] if visit_type else None, diff --git a/swh/web/browse/urls.py b/swh/web/browse/urls.py --- a/swh/web/browse/urls.py +++ b/swh/web/browse/urls.py @@ -6,6 +6,7 @@ from django.conf.urls import url from django.shortcuts import redirect, render +from swh.web import config from swh.web.browse.browseurls import BrowseUrls from swh.web.browse.identifiers import swhid_browse import swh.web.browse.views.content # noqa @@ -25,7 +26,12 @@ def _browse_search_view(request): return render( - request, "browse/search.html", {"heading": "Search software origins to browse"} + request, + "browse/search.html", + { + "heading": "Search software origins to browse", + "enable_ql": config.get_config()["search_config"].get("enable_ql", False), + }, ) diff --git a/swh/web/common/archive.py b/swh/web/common/archive.py --- a/swh/web/common/archive.py +++ b/swh/web/common/archive.py @@ -297,6 +297,7 @@ def search_origin( url_pattern: str, + use_ql: bool = False, limit: int = 50, with_visit: bool = False, visit_types: Optional[List[str]] = None, @@ -307,6 +308,7 @@ Args: url_pattern: the string pattern to search for in origin urls + use_ql: whether to use swh search query language or not limit: the maximum number of found origins to return with_visit: Whether origins with no visit are to be filtered out visit_types: Only origins having any of the provided visit types @@ -321,13 +323,22 @@ assert isinstance(page_token, str) if search: - page_result = search.origin_search( - url_pattern=url_pattern, - page_token=page_token, - with_visit=with_visit, - visit_types=visit_types, - limit=limit, - ) + if config.get_config()["search_config"].get("enable_ql") and use_ql: + page_result = search.origin_search( + query=url_pattern, + page_token=page_token, + with_visit=with_visit, + visit_types=visit_types, + limit=limit, + ) + else: + page_result = search.origin_search( + url_pattern=url_pattern, + page_token=page_token, + with_visit=with_visit, + visit_types=visit_types, + limit=limit, + ) origins = [converters.from_origin(ori_dict) for ori_dict in page_result.results] else: # Fallback to swh-storage if swh-search is not configured @@ -367,7 +378,7 @@ """ results = [] - if search and config.get_config()["metadata_search_backend"] == "swh-search": + if search and config.get_config()["search_config"]["backend"] == "swh-search": page_result = search.origin_search(metadata_pattern=fulltext, limit=limit,) matches = idx_storage.origin_intrinsic_metadata_get( [r["url"] for r in page_result.results] diff --git a/swh/web/config.py b/swh/web/config.py --- a/swh/web/config.py +++ b/swh/web/config.py @@ -41,10 +41,6 @@ DEFAULT_CONFIG = { "allowed_hosts": ("list", []), - "search": ( - "dict", - {"cls": "remote", "url": "http://127.0.0.1:5010/", "timeout": 10,}, - ), "storage": ( "dict", {"cls": "remote", "url": "http://127.0.0.1:5002/", "timeout": 10,}, @@ -57,6 +53,14 @@ "dict", {"cls": "remote", "url": "http://127.0.0.1:5011/", "timeout": 1,}, ), + "search": ( + "dict", + {"cls": "remote", "url": "http://127.0.0.1:5010/", "timeout": 10,}, + ), + "search_config": ( + "dict", + {"backend": "swh-indexer-storage", "enable_ql": False}, # or "swh-search" + ), "log_dir": ("string", "/tmp/swh/log"), "debug": ("bool", False), "serve_assets": ("bool", False), @@ -128,7 +132,6 @@ "json_path": "1.0/status/578e5eddcdc0cc7951000520", }, ), - "metadata_search_backend": ("string", "swh-indexer-storage"), # or "swh-search" "counters_backend": ("string", "swh-storage"), # or "swh-counters" "staging_server_names": ("list", STAGING_SERVER_NAMES), "instance_name": ("str", "archive-test.softwareheritage.org"), diff --git a/swh/web/templates/includes/origin-search-form.html b/swh/web/templates/includes/origin-search-form.html --- a/swh/web/templates/includes/origin-search-form.html +++ b/swh/web/templates/includes/origin-search-form.html @@ -48,6 +48,15 @@ search in metadata (instead of URL) + {% if enable_ql %} +
+ + +
+ {% endif %}