diff --git a/cypress/integration/origin-search.spec.js b/cypress/integration/origin-search.spec.js --- a/cypress/integration/origin-search.spec.js +++ b/cypress/integration/origin-search.spec.js @@ -106,6 +106,31 @@ }); + it('should filter origins by visit type', function() { + cy.intercept('**/visit/latest/**').as('checkOriginVisits'); + cy.get('#swh-origins-url-patterns') + .type('http'); + + for (let visitType of ['git', 'tar']) { + cy.get('#swh-search-visit-type') + .select(visitType); + + cy.get('.swh-search-icon') + .click(); + + cy.wait('@checkOriginVisits'); + + cy.get('#origin-search-results') + .should('be.visible'); + + cy.get('tbody tr td.swh-origin-visit-type').then(elts => { + for (let elt of elts) { + cy.get(elt).should('have.text', visitType); + } + }); + } + }); + it('should show not found message when no repo matches', function() { searchShouldShowNotFound(nonExistentText, 'No origins matching the search criteria were found.'); diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py --- a/swh/web/api/views/origin.py +++ b/swh/web/api/views/origin.py @@ -190,12 +190,14 @@ limit = min(int(request.query_params.get("limit", "70")), 1000) page_token = request.query_params.get("page_token") with_visit = request.query_params.get("with_visit", "false") + visit_type = request.query_params.get("visit_type") (results, page_token) = api_lookup( archive.search_origin, url_pattern, limit, bool(strtobool(with_visit)), + [visit_type] if visit_type else None, page_token, enrich_fn=enrich_origin_search_result, request=request, @@ -205,6 +207,7 @@ query_params = {} query_params["limit"] = limit query_params["page_token"] = page_token + query_params["visit_type"] = visit_type result["headers"] = { "link-next": reverse( diff --git a/swh/web/assets/src/bundles/browse/origin-search.js b/swh/web/assets/src/bundles/browse/origin-search.js --- a/swh/web/assets/src/bundles/browse/origin-search.js +++ b/swh/web/assets/src/bundles/browse/origin-search.js @@ -39,7 +39,7 @@ let tableRow = ``; tableRow += - `` + + `` + '' + 'Checking'; tableRow += @@ -119,6 +119,10 @@ let withVisit = $('#swh-search-origins-with-visit').prop('checked'); baseSearchUrl.searchParams.append('limit', limit); baseSearchUrl.searchParams.append('with_visit', withVisit); + const visitType = $('#swh-search-visit-type').val(); + if (visitType !== 'any') { + baseSearchUrl.searchParams.append('visit_type', visitType); + } let searchUrl = baseSearchUrl.toString(); searchOrigins(searchUrl); } @@ -200,6 +204,7 @@ let withVisit = $('#swh-search-origins-with-visit').prop('checked'); let withContent = $('#swh-filter-empty-visits').prop('checked'); let searchMetadata = $('#swh-search-origin-metadata').prop('checked'); + const visitType = $('#swh-search-visit-type').val(); let queryParameters = new URLSearchParams(); queryParameters.append('q', searchQueryText); if (withVisit) { @@ -211,6 +216,9 @@ if (searchMetadata) { queryParameters.append('search_metadata', searchMetadata); } + if (visitType !== 'any') { + queryParameters.append('visit_type', visitType); + } // Update the url, triggering page reload and effective search window.location = `${Urls.browse_search()}?${queryParameters.toString()}`; }); @@ -239,11 +247,15 @@ let withVisit = urlParams.has('with_visit'); let withContent = urlParams.has('with_content'); let searchMetadata = urlParams.has('search_metadata'); + let visitType = urlParams.get('visit_type'); if (query) { $('#swh-origins-url-patterns').val(query); $('#swh-search-origins-with-visit').prop('checked', withVisit); $('#swh-filter-empty-visits').prop('checked', withContent); $('#swh-search-origin-metadata').prop('checked', searchMetadata); + if (visitType) { + $('#swh-search-visit-type').val(visitType); + } doSearch(); } }); diff --git a/swh/web/common/archive.py b/swh/web/common/archive.py --- a/swh/web/common/archive.py +++ b/swh/web/common/archive.py @@ -298,6 +298,7 @@ url_pattern: str, limit: int = 50, with_visit: bool = False, + visit_types: Optional[List[str]] = None, page_token: Optional[str] = None, ) -> Tuple[List[OriginInfo], Optional[str]]: """Search for origins whose urls contain a provided string pattern @@ -306,6 +307,9 @@ Args: url_pattern: the string pattern to search for in origin urls limit: the maximum number of found origins to return + with_visit: Whether origins with no visit are to be filtered out + visit_types: Only origins having any of the provided visit types + (e.g. git, svn, pypi) will be returned page_token: opaque string used to get the next results of a search Returns: @@ -320,6 +324,7 @@ url_pattern=url_pattern, page_token=page_token, with_visit=with_visit, + visit_types=visit_types, limit=limit, ) origins = [converters.from_origin(ori_dict) for ori_dict in page_result.results] @@ -339,6 +344,7 @@ page_token=page_token, with_visit=with_visit, limit=limit, + visit_types=visit_types, regexp=True, ) origins = [converters.from_origin(ori.to_dict()) for ori in page_result.results] diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py --- a/swh/web/common/utils.py +++ b/swh/web/common/utils.py @@ -22,7 +22,7 @@ from swh.web.common.exc import BadInputExc from swh.web.common.typing import QueryParameters -from swh.web.config import get_config +from swh.web.config import ORIGIN_VISIT_TYPES, get_config SWH_WEB_METRICS_REGISTRY = CollectorRegistry(auto_describe=True) @@ -273,6 +273,7 @@ for server_name in config["staging_server_names"] ] ), + "visit_types": ORIGIN_VISIT_TYPES, } diff --git a/swh/web/config.py b/swh/web/config.py --- a/swh/web/config.py +++ b/swh/web/config.py @@ -21,6 +21,21 @@ "webapp.internal.staging.swh.network", ] +ORIGIN_VISIT_TYPES = [ + "cran", + "deb", + "deposit", + "ftp", + "hg", + "git", + "nixguix", + "npm", + "pypi", + "svn", + "tar", +] + + SETTINGS_DIR = os.path.dirname(settings.__file__) DEFAULT_CONFIG = { diff --git a/swh/web/templates/includes/origin-search-form.html b/swh/web/templates/includes/origin-search-form.html --- a/swh/web/templates/includes/origin-search-form.html +++ b/swh/web/templates/includes/origin-search-form.html @@ -14,6 +14,17 @@ +
+
+ + +
+
diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py --- a/swh/web/tests/api/views/test_origin.py +++ b/swh/web/tests/api/views/test_origin.py @@ -518,6 +518,34 @@ } +@pytest.mark.parametrize("backend", ["swh-search", "swh-storage"]) +def test_api_origin_search_visit_type(api_client, mocker, backend): + if backend != "swh-search": + # equivalent to not configuring search in the config + mocker.patch("swh.web.common.archive.search", None) + + expected_origins = { + "https://github.com/wcoder/highlightjs-line-numbers.js", + "https://github.com/memononen/libtess2", + } + + url = reverse( + "api-1-origin-search", + url_args={"url_pattern": "github com",}, + query_params={"visit_type": "git"}, + ) + rv = check_api_get_responses(api_client, url, status_code=200) + assert {origin["url"] for origin in rv.data} == expected_origins + + url = reverse( + "api-1-origin-search", + url_args={"url_pattern": "github com",}, + query_params={"visit_type": "foo"}, + ) + rv = check_api_get_responses(api_client, url, status_code=200) + assert rv.data == [] + + @pytest.mark.parametrize("backend", ["swh-search", "swh-storage"]) @pytest.mark.parametrize("limit", [1, 2, 3, 10]) def test_api_origin_search_scroll(api_client, archive_data, mocker, limit, backend): diff --git a/swh/web/tests/data.py b/swh/web/tests/data.py --- a/swh/web/tests/data.py +++ b/swh/web/tests/data.py @@ -198,13 +198,15 @@ ori = storage.origin_get([origin["url"]])[0] origin.update(ori.to_dict()) # add an 'id' key if enabled - search.origin_update([{"url": origin["url"], "has_visits": True}]) + search.origin_update( + [{"url": origin["url"], "has_visits": True, "visit_types": ["git"]}] + ) for i in range(250): url = "https://many.origins/%d" % (i + 1) # storage.origin_add([{'url': url}]) storage.origin_add([Origin(url=url)]) - search.origin_update([{"url": url, "has_visits": True}]) + search.origin_update([{"url": url, "has_visits": True, "visit_types": ["tar"]}]) date = now() visit = OriginVisit(origin=url, date=date, type="tar") visit = storage.origin_visit_add([visit])[0]