diff --git a/assets/src/bundles/browse/origin-search.js b/assets/src/bundles/browse/origin-search.js index 06ac97f7..7b547457 100644 --- a/assets/src/bundles/browse/origin-search.js +++ b/assets/src/bundles/browse/origin-search.js @@ -1,270 +1,271 @@ /** * Copyright (C) 2018-2021 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ import {handleFetchError, errorMessageFromResponse, isArchivedOrigin} from 'utils/functions'; const limit = 100; const linksPrev = []; let linkNext = null; let linkCurrent = null; let inSearch = false; function parseLinkHeader(s) { const re = /<(.+)>; rel="next"/; return s.match(re)[1]; } function fixTableRowsStyle() { setTimeout(() => { $('#origin-search-results tbody tr').removeAttr('style'); }); } function clearOriginSearchResultsTable() { $('#origin-search-results tbody tr').remove(); } async function populateOriginSearchResultsTable(origins) { if (origins.length > 0) { $('#swh-origin-search-results').show(); $('#swh-no-result').hide(); clearOriginSearchResultsTable(); const table = $('#origin-search-results tbody'); const promises = []; for (const [i, origin] of origins.entries()) { const browseUrl = `${Urls.browse_origin()}?origin_url=${encodeURIComponent(origin.url)}`; let tableRow = ``; tableRow += `` + '' + 'Checking'; tableRow += '' + `${origin.url}`; tableRow += `` + '' + 'Checking'; tableRow += ''; table.append(tableRow); // get async latest visit snapshot and update visit status icon let latestSnapshotUrl = Urls.api_1_origin_visit_latest(origin.url); latestSnapshotUrl += '?require_snapshot=true'; promises.push(fetch(latestSnapshotUrl)); } const responses = await Promise.all(promises); const responsesData = await Promise.all(responses.map(r => r.json())); for (let i = 0; i < responses.length; ++i) { const response = responses[i]; const data = responsesData[i]; if (response.status !== 404 && data.type) { $(`#visit-type-origin-${i}`).html(data.type); $(`#visit-status-origin-${i}`).html( 'Archived'); } else { $(`#visit-type-origin-${i}`).html('unknown'); $(`#visit-status-origin-${i}`).html( 'Pending archival'); if ($('#swh-filter-empty-visits').prop('checked')) { $(`#origin-${i}`).remove(); } } } fixTableRowsStyle(); } else { $('#swh-origin-search-results').hide(); $('#swh-no-result').text('No origins matching the search criteria were found.'); $('#swh-no-result').show(); } if (linkNext === null) { $('#origins-next-results-button').addClass('disabled'); } else { $('#origins-next-results-button').removeClass('disabled'); } if (linksPrev.length === 0) { $('#origins-prev-results-button').addClass('disabled'); } else { $('#origins-prev-results-button').removeClass('disabled'); } inSearch = false; setTimeout(() => { window.scrollTo(0, 0); }); } function searchOriginsFirst(searchQueryText, limit) { let baseSearchUrl; const searchMetadata = $('#swh-search-origin-metadata').prop('checked'); if (searchMetadata) { baseSearchUrl = new URL(Urls.api_1_origin_metadata_search(), window.location); baseSearchUrl.searchParams.append('fulltext', searchQueryText); } else { const useSearchQL = $('#swh-search-use-ql').prop('checked'); baseSearchUrl = new URL(Urls.api_1_origin_search(searchQueryText), window.location); baseSearchUrl.searchParams.append('use_ql', useSearchQL ?? false); } const withVisit = $('#swh-search-origins-with-visit').prop('checked'); baseSearchUrl.searchParams.append('limit', limit); baseSearchUrl.searchParams.append('with_visit', withVisit); const visitType = $('#swh-search-visit-type').val(); if (visitType !== 'any') { baseSearchUrl.searchParams.append('visit_type', visitType); } const searchUrl = baseSearchUrl.toString(); searchOrigins(searchUrl); } async function searchOrigins(searchUrl) { clearOriginSearchResultsTable(); $('.swh-loading').addClass('show'); try { const response = await fetch(searchUrl); handleFetchError(response); const data = await response.json(); // Save link to the current results page linkCurrent = searchUrl; // Save link to the next results page. linkNext = null; if (response.headers.has('Link')) { const parsedLink = parseLinkHeader(response.headers.get('Link')); if (parsedLink !== undefined) { linkNext = parsedLink; } } // prevLinks is updated by the caller, which is the one to know if // we're going forward or backward in the pages. $('.swh-loading').removeClass('show'); populateOriginSearchResultsTable(data); } catch (errorResponse) { const errorData = await errorResponse.json(); $('.swh-loading').removeClass('show'); inSearch = false; $('#swh-origin-search-results').hide(); $('#swh-no-result').text(errorMessageFromResponse( errorData, 'An unknown error occurred while searching origins')); $('#swh-no-result').show(); } } async function doSearch() { $('#swh-no-result').hide(); const searchQueryText = $('#swh-origins-url-patterns').val(); inSearch = true; if (searchQueryText.startsWith('swh:')) { try { // searchQueryText may be a PID so sending search queries to PID resolve endpoint const resolveSWHIDUrl = Urls.api_1_resolve_swhid(searchQueryText); const response = await fetch(resolveSWHIDUrl); handleFetchError(response); const data = await response.json(); // SWHID has been successfully resolved, // so redirect to browse page window.location = data.browse_url; } catch (response) { // display a useful error message if the input // looks like a SWHID const data = await response.json(); $('#swh-origin-search-results').hide(); $('.swh-search-pagination').hide(); $('#swh-no-result').text(data.reason); $('#swh-no-result').show(); } } else if (await isArchivedOrigin(searchQueryText)) { // redirect to the browse origin window.location.href = `${Urls.browse_origin()}?origin_url=${encodeURIComponent(searchQueryText)}`; } else { // otherwise, proceed with origins search irrespective of the error $('#swh-origin-search-results').show(); $('.swh-search-pagination').show(); searchOriginsFirst(searchQueryText, limit); } } export function initOriginSearch() { $(document).ready(() => { $('#swh-search-origins').submit(event => { event.preventDefault(); if (event.target.checkValidity()) { $(event.target).removeClass('was-validated'); const searchQueryText = $('#swh-origins-url-patterns').val().trim(); const withVisit = $('#swh-search-origins-with-visit').prop('checked'); const withContent = $('#swh-filter-empty-visits').prop('checked'); const useSearchQL = $('#swh-search-use-ql').prop('checked'); const searchMetadata = $('#swh-search-origin-metadata').prop('checked'); const visitType = $('#swh-search-visit-type').val(); const queryParameters = new URLSearchParams(); queryParameters.append('q', searchQueryText); if (withVisit) { queryParameters.append('with_visit', withVisit); } if (withContent) { queryParameters.append('with_content', withContent); } if (useSearchQL) { queryParameters.append('use_ql', useSearchQL ?? false); } if (searchMetadata) { queryParameters.append('search_metadata', searchMetadata); } if (visitType !== 'any') { queryParameters.append('visit_type', visitType); } // Update the url, triggering page reload and effective search window.location = `${Urls.browse_search()}?${queryParameters.toString()}`; } else { $(event.target).addClass('was-validated'); } }); $('#origins-next-results-button').click(event => { if ($('#origins-next-results-button').hasClass('disabled') || inSearch) { return; } inSearch = true; linksPrev.push(linkCurrent); searchOrigins(linkNext); event.preventDefault(); }); $('#origins-prev-results-button').click(event => { if ($('#origins-prev-results-button').hasClass('disabled') || inSearch) { return; } inSearch = true; searchOrigins(linksPrev.pop()); event.preventDefault(); }); - const urlParams = new URLSearchParams(window.location.search); - const query = urlParams.get('q'); - const withVisit = urlParams.has('with_visit'); - const useSearchQL = urlParams.has('use_ql'); - const withContent = urlParams.has('with_content'); - const searchMetadata = urlParams.has('search_metadata'); - const visitType = urlParams.get('visit_type'); - if (query) { + if (window.location.search) { + const urlParams = new URLSearchParams(window.location.search); + const query = urlParams.get('q'); + const withVisit = urlParams.has('with_visit'); + const useSearchQL = urlParams.has('use_ql'); + const withContent = urlParams.has('with_content'); + const searchMetadata = urlParams.has('search_metadata'); + const visitType = urlParams.get('visit_type'); + $('#swh-origins-url-patterns').val(query); $('#swh-search-origins-with-visit').prop('checked', withVisit); $('#swh-search-use-ql').prop('checked', useSearchQL ?? false); $('#swh-filter-empty-visits').prop('checked', withContent); $('#swh-search-origin-metadata').prop('checked', searchMetadata); if (visitType) { $('#swh-search-visit-type').val(visitType); } doSearch(); } }); } diff --git a/cypress/integration/origin-search.spec.js b/cypress/integration/origin-search.spec.js index 4589adbf..3e56ca82 100644 --- a/cypress/integration/origin-search.spec.js +++ b/cypress/integration/origin-search.spec.js @@ -1,608 +1,653 @@ /** * Copyright (C) 2019-2021 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ const nonExistentText = 'NoMatchExists'; let origin; let url; function doSearch(searchText, searchInputElt = '#swh-origins-url-patterns') { if (searchText.startsWith('swh:')) { cy.intercept('**/api/1/resolve/**') .as('swhidResolve'); } cy.get(searchInputElt) // to avoid sending too much SWHID validation requests // as cypress insert character one by one when using type .invoke('val', searchText.slice(0, -1)) .type(searchText.slice(-1)) .get('.swh-search-icon') .click({force: true}); if (searchText.startsWith('swh:')) { cy.wait('@swhidResolve'); } } function searchShouldRedirect(searchText, redirectUrl) { doSearch(searchText); cy.location('pathname') .should('equal', redirectUrl); } function searchShouldShowNotFound(searchText, msg) { doSearch(searchText); if (searchText.startsWith('swh:')) { cy.get('.invalid-feedback') .should('be.visible') .and('contain', msg); } } function stubOriginVisitLatestRequests(status = 200, response = {type: 'tar'}, aliasSuffix = '') { cy.intercept({url: '**/visit/latest/**'}, { body: response, statusCode: status }).as(`originVisitLatest${aliasSuffix}`); } describe('Test origin-search', function() { before(function() { origin = this.origin[0]; url = this.Urls.browse_search(); }); beforeEach(function() { cy.visit(url); }); it('should have focus on search form after page load', function() { cy.get('#swh-origins-url-patterns') .should('have.attr', 'autofocus'); // for some reason, autofocus is not honored when running cypress tests // while it is in non controlled browsers // .should('have.focus'); }); it('should redirect to browse when archived URL is searched', function() { cy.get('#swh-origins-url-patterns') .type(origin.url); cy.get('.swh-search-icon') .click(); cy.location('pathname') .should('eq', this.Urls.browse_origin_directory()); cy.location('search') .should('eq', `?origin_url=${origin.url}`); }); it('should not redirect for non valid URL', function() { cy.get('#swh-origins-url-patterns') .type('www.example'); // Invalid URL cy.get('.swh-search-icon') .click(); cy.location('pathname') .should('eq', this.Urls.browse_search()); // Stay in the current page }); it('should not redirect for valid non archived URL', function() { cy.get('#swh-origins-url-patterns') .type('http://eaxmple.com/test/'); // Valid URL, but not archived cy.get('.swh-search-icon') .click(); cy.location('pathname') .should('eq', this.Urls.browse_search()); // Stay in the current page }); it('should remove origin URL with no archived content', function() { stubOriginVisitLatestRequests(404); // Using a non full origin URL here // This is because T3354 redirects to the origin in case of a valid, archived URL cy.get('#swh-origins-url-patterns') .type(origin.url.slice(0, -1)); cy.get('.swh-search-icon') .click(); cy.wait('@originVisitLatest'); cy.get('#origin-search-results') .should('be.visible') .find('tbody tr').should('have.length', 0); stubOriginVisitLatestRequests(200, {}, '2'); cy.get('.swh-search-icon') .click(); cy.wait('@originVisitLatest2'); cy.get('#origin-search-results') .should('be.visible') .find('tbody tr').should('have.length', 0); }); it('should filter origins by visit type', function() { cy.intercept('**/visit/latest/**').as('checkOriginVisits'); cy.get('#swh-origins-url-patterns') .type('http'); for (const visitType of ['git', 'tar']) { cy.get('#swh-search-visit-type') .select(visitType); cy.get('.swh-search-icon') .click(); cy.wait('@checkOriginVisits'); cy.get('#origin-search-results') .should('be.visible'); cy.get('tbody tr td.swh-origin-visit-type').then(elts => { for (const elt of elts) { cy.get(elt).should('have.text', visitType); } }); } }); it('should show not found message when no repo matches', function() { searchShouldShowNotFound(nonExistentText, 'No origins matching the search criteria were found.'); }); it('should add appropriate URL parameters', function() { // Check all three checkboxes and check if // correct url params are added cy.get('#swh-search-origins-with-visit') .check({force: true}) .get('#swh-filter-empty-visits') .check({force: true}) .get('#swh-search-origin-metadata') .check({force: true}) .then(() => { const searchText = origin.url.slice(0, -1); doSearch(searchText); cy.location('search').then(locationSearch => { const urlParams = new URLSearchParams(locationSearch); const query = urlParams.get('q'); const withVisit = urlParams.has('with_visit'); const withContent = urlParams.has('with_content'); const searchMetadata = urlParams.has('search_metadata'); assert.strictEqual(query, searchText); assert.strictEqual(withVisit, true); assert.strictEqual(withContent, true); assert.strictEqual(searchMetadata, true); }); }); }); it('should search in origin intrinsic metadata', function() { cy.intercept('GET', '**/origin/metadata-search/**').as( 'originMetadataSearch' ); cy.get('#swh-search-origins-with-visit') .check({force: true}) .get('#swh-filter-empty-visits') .check({force: true}) .get('#swh-search-origin-metadata') .check({force: true}) .then(() => { const searchText = 'plugin'; doSearch(searchText); cy.wait('@originMetadataSearch').then((req) => { expect(req.response.body[0].metadata.metadata.description).to.equal( 'Line numbering plugin for Highlight.js' // metadata is defined in _TEST_ORIGINS variable in swh/web/tests/data.py ); }); }); }); it('should not send request to the resolve endpoint', function() { cy.intercept(`${this.Urls.api_1_resolve_swhid('').slice(0, -1)}**`) .as('resolveSWHID'); cy.intercept(`${this.Urls.api_1_origin_search(origin.url.slice(0, -1))}**`) .as('searchOrigin'); cy.get('#swh-origins-url-patterns') .type(origin.url.slice(0, -1), {delay: 0, force: true}); cy.get('.swh-search-icon') .click(); cy.wait('@searchOrigin'); cy.xhrShouldBeCalled('resolveSWHID', 0); cy.xhrShouldBeCalled('searchOrigin', 1); }); it('should add query language support for staff users', function() { cy.get('#swh-search-use-ql') .should('not.exist'); cy.adminLogin(); cy.visit(url); cy.get('#swh-search-use-ql') .should('exist'); }); it('should show error messages when using the query language', function() { cy.adminLogin(); cy.visit(url); cy.intercept('GET', `${this.Urls.api_1_origin_search('**')}**`, { body: { 'exception': 'BadInputExc', 'reason': 'Syntax error in search query: Invalid query' }, statusCode: 400 }) .as('searchOrigin'); cy.get('#swh-search-use-ql') .should('exist') .click({force: true}); // Covered by label cy.get('#swh-origins-url-patterns') .type('this is not a valid query') .type('{enter}'); cy.wait('@searchOrigin').then((xhr) => { cy.get('#swh-no-result') .should('contain', 'Syntax error in search query'); }); }); + function checkSearchHasResults() { + cy.get('.swh-search-icon') + .click(); + + cy.wait('@checkOriginVisits'); + + cy.get('#origin-search-results') + .should('be.visible'); + + cy.get('tbody tr td.swh-origin-visit-type') + .should('exist'); + } + + it('should search all origins when no pattern is provided', function() { + cy.intercept('**/visit/latest/**').as('checkOriginVisits'); + + // with default filters + checkSearchHasResults(); + + // remove filters + cy.get('#swh-search-origins-with-visit') + .uncheck({force: true}) + .get('#swh-filter-empty-visits') + .uncheck({force: true}); + checkSearchHasResults(); + + }); + + it('should search all origins for a visit type', function() { + cy.intercept('**/visit/latest/**').as('checkOriginVisits'); + + for (const visitType of ['git', 'tar']) { + cy.get('#swh-search-visit-type') + .select(visitType); + + checkSearchHasResults(); + + cy.get('tbody tr td.swh-origin-visit-type').then(elts => { + for (const elt of elts) { + cy.get(elt).should('have.text', visitType); + } + }); + } + }); + context('Test pagination', function() { it('should not paginate if there are not many results', function() { // Setup search cy.get('#swh-search-origins-with-visit') .uncheck({force: true}) .get('#swh-filter-empty-visits') .uncheck({force: true}) .then(() => { const searchText = 'libtess'; // Get first page of results doSearch(searchText); cy.get('.swh-search-result-entry') .should('have.length', 1); cy.get('.swh-search-result-entry#origin-0 td a') .should('have.text', 'https://github.com/memononen/libtess2'); cy.get('#origins-prev-results-button') .should('have.class', 'disabled'); cy.get('#origins-next-results-button') .should('have.class', 'disabled'); }); }); it('should paginate forward when there are many results', function() { stubOriginVisitLatestRequests(); // Setup search cy.get('#swh-search-origins-with-visit') .uncheck({force: true}) .get('#swh-filter-empty-visits') .uncheck({force: true}) .then(() => { const searchText = 'many.origins'; // Get first page of results doSearch(searchText); cy.wait('@originVisitLatest'); cy.get('.swh-search-result-entry') .should('have.length', 100); cy.get('.swh-search-result-entry#origin-0 td a') .should('have.text', 'https://many.origins/1'); cy.get('.swh-search-result-entry#origin-99 td a') .should('have.text', 'https://many.origins/100'); cy.get('#origins-prev-results-button') .should('have.class', 'disabled'); cy.get('#origins-next-results-button') .should('not.have.class', 'disabled'); // Get second page of results cy.get('#origins-next-results-button a') .click(); cy.wait('@originVisitLatest'); cy.get('.swh-search-result-entry') .should('have.length', 100); cy.get('.swh-search-result-entry#origin-0 td a') .should('have.text', 'https://many.origins/101'); cy.get('.swh-search-result-entry#origin-99 td a') .should('have.text', 'https://many.origins/200'); cy.get('#origins-prev-results-button') .should('not.have.class', 'disabled'); cy.get('#origins-next-results-button') .should('not.have.class', 'disabled'); // Get third (and last) page of results cy.get('#origins-next-results-button a') .click(); cy.wait('@originVisitLatest'); cy.get('.swh-search-result-entry') .should('have.length', 50); cy.get('.swh-search-result-entry#origin-0 td a') .should('have.text', 'https://many.origins/201'); cy.get('.swh-search-result-entry#origin-49 td a') .should('have.text', 'https://many.origins/250'); cy.get('#origins-prev-results-button') .should('not.have.class', 'disabled'); cy.get('#origins-next-results-button') .should('have.class', 'disabled'); }); }); it('should paginate backward from a middle page', function() { stubOriginVisitLatestRequests(); // Setup search cy.get('#swh-search-origins-with-visit') .uncheck({force: true}) .get('#swh-filter-empty-visits') .uncheck({force: true}) .then(() => { const searchText = 'many.origins'; // Get first page of results doSearch(searchText); cy.wait('@originVisitLatest'); cy.get('#origins-prev-results-button') .should('have.class', 'disabled'); cy.get('#origins-next-results-button') .should('not.have.class', 'disabled'); // Get second page of results cy.get('#origins-next-results-button a') .click(); cy.wait('@originVisitLatest'); cy.get('#origins-prev-results-button') .should('not.have.class', 'disabled'); cy.get('#origins-next-results-button') .should('not.have.class', 'disabled'); // Get first page of results again cy.get('#origins-prev-results-button a') .click(); cy.wait('@originVisitLatest'); cy.get('.swh-search-result-entry') .should('have.length', 100); cy.get('.swh-search-result-entry#origin-0 td a') .should('have.text', 'https://many.origins/1'); cy.get('.swh-search-result-entry#origin-99 td a') .should('have.text', 'https://many.origins/100'); cy.get('#origins-prev-results-button') .should('have.class', 'disabled'); cy.get('#origins-next-results-button') .should('not.have.class', 'disabled'); }); }); it('should paginate backward from the last page', function() { stubOriginVisitLatestRequests(); // Setup search cy.get('#swh-search-origins-with-visit') .uncheck({force: true}) .get('#swh-filter-empty-visits') .uncheck({force: true}) .then(() => { const searchText = 'many.origins'; // Get first page of results doSearch(searchText); cy.wait('@originVisitLatest'); cy.get('#origins-prev-results-button') .should('have.class', 'disabled'); cy.get('#origins-next-results-button') .should('not.have.class', 'disabled'); // Get second page of results cy.get('#origins-next-results-button a') .click(); cy.wait('@originVisitLatest'); cy.get('#origins-prev-results-button') .should('not.have.class', 'disabled'); cy.get('#origins-next-results-button') .should('not.have.class', 'disabled'); // Get third (and last) page of results cy.get('#origins-next-results-button a') .click(); cy.get('#origins-prev-results-button') .should('not.have.class', 'disabled'); cy.get('#origins-next-results-button') .should('have.class', 'disabled'); // Get second page of results again cy.get('#origins-prev-results-button a') .click(); cy.wait('@originVisitLatest'); cy.get('.swh-search-result-entry') .should('have.length', 100); cy.get('.swh-search-result-entry#origin-0 td a') .should('have.text', 'https://many.origins/101'); cy.get('.swh-search-result-entry#origin-99 td a') .should('have.text', 'https://many.origins/200'); cy.get('#origins-prev-results-button') .should('not.have.class', 'disabled'); cy.get('#origins-next-results-button') .should('not.have.class', 'disabled'); // Get first page of results again cy.get('#origins-prev-results-button a') .click(); cy.wait('@originVisitLatest'); cy.get('.swh-search-result-entry') .should('have.length', 100); cy.get('.swh-search-result-entry#origin-0 td a') .should('have.text', 'https://many.origins/1'); cy.get('.swh-search-result-entry#origin-99 td a') .should('have.text', 'https://many.origins/100'); cy.get('#origins-prev-results-button') .should('have.class', 'disabled'); cy.get('#origins-next-results-button') .should('not.have.class', 'disabled'); }); }); }); context('Test valid SWHIDs', function() { it('should resolve directory', function() { const redirectUrl = this.Urls.browse_directory(origin.content[0].directory); const swhid = `swh:1:dir:${origin.content[0].directory}`; searchShouldRedirect(swhid, redirectUrl); }); it('should resolve revision', function() { const redirectUrl = this.Urls.browse_revision(origin.revisions[0]); const swhid = `swh:1:rev:${origin.revisions[0]}`; searchShouldRedirect(swhid, redirectUrl); }); it('should resolve snapshot', function() { const redirectUrl = this.Urls.browse_snapshot_directory(origin.snapshot); const swhid = `swh:1:snp:${origin.snapshot}`; searchShouldRedirect(swhid, redirectUrl); }); it('should resolve content', function() { const redirectUrl = this.Urls.browse_content(`sha1_git:${origin.content[0].sha1git}`); const swhid = `swh:1:cnt:${origin.content[0].sha1git}`; searchShouldRedirect(swhid, redirectUrl); }); it('should not send request to the search endpoint', function() { const swhid = `swh:1:rev:${origin.revisions[0]}`; cy.intercept(this.Urls.api_1_resolve_swhid(swhid)) .as('resolveSWHID'); cy.intercept(`${this.Urls.api_1_origin_search('').slice(0, -1)}**`) .as('searchOrigin'); cy.get('#swh-origins-url-patterns') .type(swhid, {delay: 0, force: true}); cy.get('.swh-search-icon') .click(); cy.wait('@resolveSWHID'); cy.xhrShouldBeCalled('resolveSWHID', 1); cy.xhrShouldBeCalled('searchOrigin', 0); }); }); context('Test invalid SWHIDs', function() { it('should show not found for directory', function() { const swhid = `swh:1:dir:${this.unarchivedRepo.rootDirectory}`; const msg = `Directory with sha1_git ${this.unarchivedRepo.rootDirectory} not found`; searchShouldShowNotFound(swhid, msg); }); it('should show not found for snapshot', function() { const swhid = `swh:1:snp:${this.unarchivedRepo.snapshot}`; const msg = `Snapshot with id ${this.unarchivedRepo.snapshot} not found!`; searchShouldShowNotFound(swhid, msg); }); it('should show not found for revision', function() { const swhid = `swh:1:rev:${this.unarchivedRepo.revision}`; const msg = `Revision with sha1_git ${this.unarchivedRepo.revision} not found.`; searchShouldShowNotFound(swhid, msg); }); it('should show not found for content', function() { const swhid = `swh:1:cnt:${this.unarchivedRepo.content[0].sha1git}`; const msg = `Content with sha1_git checksum equals to ${this.unarchivedRepo.content[0].sha1git} not found!`; searchShouldShowNotFound(swhid, msg); }); function checkInvalidSWHIDReport(url, searchInputElt, swhidInput, validationMessagePattern = '') { cy.visit(url); doSearch(swhidInput, searchInputElt); cy.get(searchInputElt) .then($el => $el[0].checkValidity()).should('be.false'); cy.get(searchInputElt) .invoke('prop', 'validationMessage') .should('not.equal', '') .should('contain', validationMessagePattern); } it('should report invalid SWHID in search page input', function() { const swhidInput = `swh:1:cnt:${this.unarchivedRepo.content[0].sha1git};lines=45-60/`; checkInvalidSWHIDReport(this.Urls.browse_search(), '#swh-origins-url-patterns', swhidInput); cy.get('.invalid-feedback') .should('be.visible'); }); it('should report invalid SWHID in top right search input', function() { const swhidInput = `swh:1:cnt:${this.unarchivedRepo.content[0].sha1git};lines=45-60/`; checkInvalidSWHIDReport(this.Urls.browse_help(), '#swh-origins-search-top-input', swhidInput); }); it('should report SWHID with uppercase chars in search page input', function() { const swhidInput = `swh:1:cnt:${this.unarchivedRepo.content[0].sha1git}`.toUpperCase(); checkInvalidSWHIDReport(this.Urls.browse_search(), '#swh-origins-url-patterns', swhidInput, swhidInput.toLowerCase()); cy.get('.invalid-feedback') .should('be.visible'); }); it('should report SWHID with uppercase chars in top right search input', function() { let swhidInput = `swh:1:cnt:${this.unarchivedRepo.content[0].sha1git}`.toUpperCase(); swhidInput += ';lines=45-60/'; checkInvalidSWHIDReport(this.Urls.browse_help(), '#swh-origins-search-top-input', swhidInput.toLowerCase()); }); }); }); diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py index 0683fa02..84fba5d0 100644 --- a/swh/web/api/views/origin.py +++ b/swh/web/api/views/origin.py @@ -1,498 +1,498 @@ # Copyright (C) 2015-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from distutils.util import strtobool from functools import partial from swh.search.exc import SearchQuerySyntaxError from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.api.utils import ( enrich_origin, enrich_origin_search_result, enrich_origin_visit, ) from swh.web.api.views.utils import api_lookup from swh.web.common import archive from swh.web.common.exc import BadInputExc from swh.web.common.origin_visits import get_origin_visits from swh.web.common.utils import origin_visit_types, reverse DOC_RETURN_ORIGIN = """ :>json string origin_visits_url: link to in order to get information about the visits for that origin :>json string url: the origin canonical url """ DOC_RETURN_ORIGIN_ARRAY = DOC_RETURN_ORIGIN.replace(":>json", ":>jsonarr") DOC_RETURN_ORIGIN_VISIT = """ :>json string date: ISO8601/RFC3339 representation of the visit date (in UTC) :>json str origin: the origin canonical url :>json string origin_url: link to get information about the origin :>jsonarr string snapshot: the snapshot identifier of the visit (may be null if status is not **full**). :>jsonarr string snapshot_url: link to :http:get:`/api/1/snapshot/(snapshot_id)/` in order to get information about the snapshot of the visit (may be null if status is not **full**). :>json string status: status of the visit (either **full**, **partial** or **ongoing**) :>json number visit: the unique identifier of the visit """ DOC_RETURN_ORIGIN_VISIT_ARRAY = DOC_RETURN_ORIGIN_VISIT.replace(":>json", ":>jsonarr") DOC_RETURN_ORIGIN_VISIT_ARRAY += """ :>jsonarr number id: the unique identifier of the origin :>jsonarr string origin_visit_url: link to :http:get:`/api/1/origin/(origin_url)/visit/(visit_id)/` in order to get information about the visit """ @api_route(r"/origins/", "api-1-origins") @api_doc("/origins/", noargs=True) @format_docstring(return_origin_array=DOC_RETURN_ORIGIN_ARRAY) def api_origins(request): """ .. http:get:: /api/1/origins/ Get list of archived software origins. .. warning:: This endpoint used to provide an ``origin_from`` query parameter, and guarantee an order on results. This is no longer true, and only the Link header should be used for paginating through results. :query int origin_count: The maximum number of origins to return (default to 100, can not exceed 10000) {return_origin_array} {common_headers} {resheader_link} :statuscode 200: no error **Example:** .. parsed-literal:: :swh_web_api:`origins?origin_count=500` """ old_param_origin_from = request.query_params.get("origin_from") if old_param_origin_from: raise BadInputExc("Please use the Link header to browse through result") page_token = request.query_params.get("page_token", None) limit = min(int(request.query_params.get("origin_count", "100")), 10000) page_result = archive.lookup_origins(page_token, limit) origins = [enrich_origin(o, request=request) for o in page_result.results] next_page_token = page_result.next_page_token response = {"results": origins, "headers": {}} if next_page_token is not None: response["headers"]["link-next"] = reverse( "api-1-origins", query_params={"page_token": next_page_token, "origin_count": limit}, request=request, ) return response @api_route(r"/origin/(?P.+)/get/", "api-1-origin") @api_doc("/origin/") @format_docstring(return_origin=DOC_RETURN_ORIGIN) def api_origin(request, origin_url): """ .. http:get:: /api/1/origin/(origin_url)/get/ Get information about a software origin. :param string origin_url: the origin url {return_origin} {common_headers} :statuscode 200: no error :statuscode 404: requested origin can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/https://github.com/python/cpython/get/` """ ori_dict = {"url": origin_url} error_msg = "Origin with url %s not found." % ori_dict["url"] return api_lookup( archive.lookup_origin, ori_dict, notfound_msg=error_msg, enrich_fn=enrich_origin, request=request, ) def _visit_types(): docstring = "" # available visit types are queried using swh-search so we do it in a try # block in case of failure (for instance in docker environment when # elasticsearch service is not available) try: visit_types = [f"**{visit_type}**" for visit_type in origin_visit_types()] docstring = ", ".join(visit_types[:-1]) + f", and {visit_types[-1]}" except Exception: docstring = "???" pass return docstring @api_route( - r"/origin/search/(?P.+)/", + r"/origin/search/(?P.*)/", "api-1-origin-search", throttle_scope="swh_api_origin_search", ) @api_doc("/origin/search/") @format_docstring( return_origin_array=DOC_RETURN_ORIGIN_ARRAY, visit_types=_visit_types() ) def api_origin_search(request, url_pattern): """ .. http:get:: /api/1/origin/search/(url_pattern)/ Search for software origins whose urls contain a provided string pattern or match a provided regular expression. The search is performed in a case insensitive way. .. warning:: This endpoint used to provide an ``offset`` query parameter, and guarantee an order on results. This is no longer true, and only the Link header should be used for paginating through results. :param string url_pattern: a string pattern :query boolean use_ql: whether to use swh search query language or not :query int limit: the maximum number of found origins to return (bounded to 1000) :query boolean with_visit: if true, only return origins with at least one visit by Software heritage :query string visit_type: if provided, only return origins with that specific visit type (currently the supported types are {visit_types}) {return_origin_array} {common_headers} {resheader_link} :statuscode 200: no error **Example:** .. parsed-literal:: :swh_web_api:`origin/search/python/?limit=2` """ result = {} limit = min(int(request.query_params.get("limit", "70")), 1000) page_token = request.query_params.get("page_token") use_ql = request.query_params.get("use_ql", "false") with_visit = request.query_params.get("with_visit", "false") visit_type = request.query_params.get("visit_type") try: (results, page_token) = api_lookup( archive.search_origin, url_pattern, bool(strtobool(use_ql)), limit, bool(strtobool(with_visit)), [visit_type] if visit_type else None, page_token, enrich_fn=enrich_origin_search_result, request=request, ) except SearchQuerySyntaxError as e: raise BadInputExc(f"Syntax error in search query: {e.args[0]}") if page_token is not None: query_params = {k: v for (k, v) in request.GET.dict().items()} query_params["page_token"] = page_token result["headers"] = { "link-next": reverse( "api-1-origin-search", url_args={"url_pattern": url_pattern}, query_params=query_params, request=request, ) } result.update({"results": results}) return result @api_route(r"/origin/metadata-search/", "api-1-origin-metadata-search") @api_doc("/origin/metadata-search/", noargs=True) @format_docstring(return_origin_array=DOC_RETURN_ORIGIN_ARRAY) def api_origin_metadata_search(request): """ .. http:get:: /api/1/origin/metadata-search/ Search for software origins whose metadata (expressed as a JSON-LD/CodeMeta dictionary) match the provided criteria. For now, only full-text search on this dictionary is supported. :query str fulltext: a string that will be matched against origin metadata; results are ranked and ordered starting with the best ones. :query int limit: the maximum number of found origins to return (bounded to 100) {return_origin_array} {common_headers} :statuscode 200: no error **Example:** .. parsed-literal:: :swh_web_api:`origin/metadata-search/?limit=2&fulltext=Jane%20Doe` """ fulltext = request.query_params.get("fulltext", None) limit = min(int(request.query_params.get("limit", "70")), 100) if not fulltext: content = '"fulltext" must be provided and non-empty.' raise BadInputExc(content) results = api_lookup( archive.search_origin_metadata, fulltext, limit, request=request ) return { "results": results, } @api_route(r"/origin/(?P.*)/visits/", "api-1-origin-visits") @api_doc("/origin/visits/") @format_docstring(return_origin_visit_array=DOC_RETURN_ORIGIN_VISIT_ARRAY) def api_origin_visits(request, origin_url): """ .. http:get:: /api/1/origin/(origin_url)/visits/ Get information about all visits of a software origin. Visits are returned sorted in descending order according to their date. :param str origin_url: a software origin URL :query int per_page: specify the number of visits to list, for pagination purposes :query int last_visit: visit to start listing from, for pagination purposes {common_headers} {resheader_link} {return_origin_visit_array} :statuscode 200: no error :statuscode 404: requested origin can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/https://github.com/hylang/hy/visits/` """ result = {} origin_query = {"url": origin_url} notfound_msg = "No origin {} found".format(origin_url) url_args_next = {"origin_url": origin_url} per_page = int(request.query_params.get("per_page", "10")) last_visit = request.query_params.get("last_visit") if last_visit: last_visit = int(last_visit) def _lookup_origin_visits(origin_query, last_visit=last_visit, per_page=per_page): all_visits = get_origin_visits(origin_query) all_visits.reverse() visits = [] if not last_visit: visits = all_visits[:per_page] else: for i, v in enumerate(all_visits): if v["visit"] == last_visit: visits = all_visits[i + 1 : i + 1 + per_page] break for v in visits: yield v results = api_lookup( _lookup_origin_visits, origin_query, notfound_msg=notfound_msg, enrich_fn=partial( enrich_origin_visit, with_origin_link=False, with_origin_visit_link=True ), request=request, ) if results: nb_results = len(results) if nb_results == per_page: new_last_visit = results[-1]["visit"] query_params = {} query_params["last_visit"] = new_last_visit if request.query_params.get("per_page"): query_params["per_page"] = per_page result["headers"] = { "link-next": reverse( "api-1-origin-visits", url_args=url_args_next, query_params=query_params, request=request, ) } result.update({"results": results}) return result @api_route( r"/origin/(?P.*)/visit/latest/", "api-1-origin-visit-latest", throttle_scope="swh_api_origin_visit_latest", ) @api_doc("/origin/visit/latest/") @format_docstring(return_origin_visit=DOC_RETURN_ORIGIN_VISIT) def api_origin_visit_latest(request, origin_url=None): """ .. http:get:: /api/1/origin/(origin_url)/visit/latest/ Get information about the latest visit of a software origin. :param str origin_url: a software origin URL :query boolean require_snapshot: if true, only return a visit with a snapshot {common_headers} {return_origin_visit} :statuscode 200: no error :statuscode 404: requested origin or visit can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/https://github.com/hylang/hy/visit/latest/` """ require_snapshot = request.query_params.get("require_snapshot", "false") return api_lookup( archive.lookup_origin_visit_latest, origin_url, bool(strtobool(require_snapshot)), notfound_msg=("No visit for origin {} found".format(origin_url)), enrich_fn=partial( enrich_origin_visit, with_origin_link=True, with_origin_visit_link=False ), request=request, ) @api_route( r"/origin/(?P.*)/visit/(?P[0-9]+)/", "api-1-origin-visit" ) @api_doc("/origin/visit/") @format_docstring(return_origin_visit=DOC_RETURN_ORIGIN_VISIT) def api_origin_visit(request, visit_id, origin_url): """ .. http:get:: /api/1/origin/(origin_url)/visit/(visit_id)/ Get information about a specific visit of a software origin. :param str origin_url: a software origin URL :param int visit_id: a visit identifier {common_headers} {return_origin_visit} :statuscode 200: no error :statuscode 404: requested origin or visit can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/https://github.com/hylang/hy/visit/1/` """ return api_lookup( archive.lookup_origin_visit, origin_url, int(visit_id), notfound_msg=("No visit {} for origin {} found".format(visit_id, origin_url)), enrich_fn=partial( enrich_origin_visit, with_origin_link=True, with_origin_visit_link=False ), request=request, ) @api_route( r"/origin/(?P.+)/intrinsic-metadata/", "api-origin-intrinsic-metadata" ) @api_doc("/origin/intrinsic-metadata/") @format_docstring() def api_origin_intrinsic_metadata(request, origin_url): """ .. http:get:: /api/1/origin/(origin_url)/intrinsic-metadata Get intrinsic metadata of a software origin (as a JSON-LD/CodeMeta dictionary). :param string origin_url: the origin url :>json string ???: intrinsic metadata field of the origin {common_headers} :statuscode 200: no error :statuscode 404: requested origin can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/https://github.com/python/cpython/intrinsic-metadata` """ return api_lookup( archive.lookup_origin_intrinsic_metadata, origin_url, notfound_msg=f"Origin with url {origin_url} not found", enrich_fn=enrich_origin, request=request, ) diff --git a/swh/web/templates/includes/origin-search-form.html b/swh/web/templates/includes/origin-search-form.html index 3356bba6..18dc1afa 100644 --- a/swh/web/templates/includes/origin-search-form.html +++ b/swh/web/templates/includes/origin-search-form.html @@ -1,68 +1,68 @@ {% comment %} Copyright (C) 2020-2021 The Software Heritage developers See the AUTHORS file at the top-level directory of this distribution License: GNU Affero General Public License version 3, or any later version See top-level LICENSE file for more information {% endcomment %}
+ oninput="swh.webapp.validateSWHIDInput(this)" autofocus>
{% if user.is_authenticated and user.is_staff or "swh.web.search_ql" in user.get_all_permissions %} {% endif %}
diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py index aec07e32..326ebc26 100644 --- a/swh/web/tests/api/views/test_origin.py +++ b/swh/web/tests/api/views/test_origin.py @@ -1,795 +1,838 @@ # Copyright (C) 2015-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import timedelta import json from hypothesis import given import pytest from swh.indexer.storage.model import OriginIntrinsicMetadataRow from swh.model.hashutil import hash_to_bytes from swh.model.model import Origin, OriginVisit, OriginVisitStatus from swh.search.exc import SearchQuerySyntaxError from swh.search.interface import PagedResult from swh.storage.exc import StorageAPIError, StorageDBError from swh.storage.utils import now from swh.web.api.utils import enrich_origin, enrich_origin_visit from swh.web.common.exc import BadInputExc from swh.web.common.origin_visits import get_origin_visits from swh.web.common.utils import reverse from swh.web.tests.api.views.utils import scroll_results from swh.web.tests.data import ( INDEXER_TOOL, ORIGIN_MASTER_REVISION, ORIGIN_METADATA_KEY, ORIGIN_METADATA_VALUE, ) from swh.web.tests.strategies import new_origin, new_snapshots, visit_dates from swh.web.tests.utils import check_api_get_responses def test_api_lookup_origin_visits_raise_error(api_client, mocker): mock_get_origin_visits = mocker.patch("swh.web.api.views.origin.get_origin_visits") err_msg = "voluntary error to check the bad request middleware." mock_get_origin_visits.side_effect = BadInputExc(err_msg) url = reverse("api-1-origin-visits", url_args={"origin_url": "http://foo"}) rv = check_api_get_responses(api_client, url, status_code=400) assert rv.data == {"exception": "BadInputExc", "reason": err_msg} def test_api_lookup_origin_visits_raise_swh_storage_error_db(api_client, mocker): mock_get_origin_visits = mocker.patch("swh.web.api.views.origin.get_origin_visits") err_msg = "Storage exploded! Will be back online shortly!" mock_get_origin_visits.side_effect = StorageDBError(err_msg) url = reverse("api-1-origin-visits", url_args={"origin_url": "http://foo"}) rv = check_api_get_responses(api_client, url, status_code=503) assert rv.data == { "exception": "StorageDBError", "reason": "An unexpected error occurred in the backend: %s" % err_msg, } def test_api_lookup_origin_visits_raise_swh_storage_error_api(api_client, mocker): mock_get_origin_visits = mocker.patch("swh.web.api.views.origin.get_origin_visits") err_msg = "Storage API dropped dead! Will resurrect asap!" mock_get_origin_visits.side_effect = StorageAPIError(err_msg) url = reverse("api-1-origin-visits", url_args={"origin_url": "http://foo"}) rv = check_api_get_responses(api_client, url, status_code=503) assert rv.data == { "exception": "StorageAPIError", "reason": "An unexpected error occurred in the api backend: %s" % err_msg, } @given(new_origin(), visit_dates(3), new_snapshots(3)) def test_api_lookup_origin_visits( api_client, subtest, new_origin, visit_dates, new_snapshots ): # ensure archive_data fixture will be reset between each hypothesis # example test run @subtest def test_inner(archive_data): archive_data.origin_add([new_origin]) for i, visit_date in enumerate(visit_dates): origin_visit = archive_data.origin_visit_add( [ OriginVisit( origin=new_origin.url, date=visit_date, type="git", ) ] )[0] archive_data.snapshot_add([new_snapshots[i]]) visit_status = OriginVisitStatus( origin=new_origin.url, visit=origin_visit.visit, date=now(), status="full", snapshot=new_snapshots[i].id, ) archive_data.origin_visit_status_add([visit_status]) all_visits = list(reversed(get_origin_visits(new_origin.to_dict()))) for last_visit, expected_visits in ( (None, all_visits[:2]), (all_visits[1]["visit"], all_visits[2:]), ): url = reverse( "api-1-origin-visits", url_args={"origin_url": new_origin.url}, query_params={"per_page": 2, "last_visit": last_visit}, ) rv = check_api_get_responses(api_client, url, status_code=200) for i in range(len(expected_visits)): expected_visits[i] = enrich_origin_visit( expected_visits[i], with_origin_link=False, with_origin_visit_link=True, request=rv.wsgi_request, ) assert rv.data == expected_visits @given(new_origin(), visit_dates(3), new_snapshots(3)) def test_api_lookup_origin_visits_by_id( api_client, subtest, new_origin, visit_dates, new_snapshots ): # ensure archive_data fixture will be reset between each hypothesis # example test run @subtest def test_inner(archive_data): archive_data.origin_add([new_origin]) for i, visit_date in enumerate(visit_dates): origin_visit = archive_data.origin_visit_add( [ OriginVisit( origin=new_origin.url, date=visit_date, type="git", ) ] )[0] archive_data.snapshot_add([new_snapshots[i]]) visit_status = OriginVisitStatus( origin=new_origin.url, visit=origin_visit.visit, date=now(), status="full", snapshot=new_snapshots[i].id, ) archive_data.origin_visit_status_add([visit_status]) all_visits = list(reversed(get_origin_visits(new_origin.to_dict()))) for last_visit, expected_visits in ( (None, all_visits[:2]), (all_visits[1]["visit"], all_visits[2:4]), ): url = reverse( "api-1-origin-visits", url_args={"origin_url": new_origin.url}, query_params={"per_page": 2, "last_visit": last_visit}, ) rv = check_api_get_responses(api_client, url, status_code=200) for i in range(len(expected_visits)): expected_visits[i] = enrich_origin_visit( expected_visits[i], with_origin_link=False, with_origin_visit_link=True, request=rv.wsgi_request, ) assert rv.data == expected_visits @given(new_origin(), visit_dates(3), new_snapshots(3)) def test_api_lookup_origin_visit( api_client, subtest, new_origin, visit_dates, new_snapshots ): # ensure archive_data fixture will be reset between each hypothesis # example test run @subtest def test_inner(archive_data): archive_data.origin_add([new_origin]) for i, visit_date in enumerate(visit_dates): origin_visit = archive_data.origin_visit_add( [ OriginVisit( origin=new_origin.url, date=visit_date, type="git", ) ] )[0] visit_id = origin_visit.visit archive_data.snapshot_add([new_snapshots[i]]) visit_status = OriginVisitStatus( origin=new_origin.url, visit=origin_visit.visit, date=visit_date + timedelta(minutes=5), status="full", snapshot=new_snapshots[i].id, ) archive_data.origin_visit_status_add([visit_status]) url = reverse( "api-1-origin-visit", url_args={"origin_url": new_origin.url, "visit_id": visit_id}, ) rv = check_api_get_responses(api_client, url, status_code=200) expected_visit = archive_data.origin_visit_get_by(new_origin.url, visit_id) expected_visit = enrich_origin_visit( expected_visit, with_origin_link=True, with_origin_visit_link=False, request=rv.wsgi_request, ) assert rv.data == expected_visit @given(new_origin()) def test_api_lookup_origin_visit_latest_no_visit(api_client, archive_data, new_origin): archive_data.origin_add([new_origin]) url = reverse("api-1-origin-visit-latest", url_args={"origin_url": new_origin.url}) rv = check_api_get_responses(api_client, url, status_code=404) assert rv.data == { "exception": "NotFoundExc", "reason": "No visit for origin %s found" % new_origin.url, } @given(new_origin(), visit_dates(2), new_snapshots(1)) def test_api_lookup_origin_visit_latest( api_client, subtest, new_origin, visit_dates, new_snapshots ): # ensure archive_data fixture will be reset between each hypothesis # example test run @subtest def test_inner(archive_data): archive_data.origin_add([new_origin]) visit_dates.sort() visit_ids = [] for i, visit_date in enumerate(visit_dates): origin_visit = archive_data.origin_visit_add( [ OriginVisit( origin=new_origin.url, date=visit_date, type="git", ) ] )[0] visit_ids.append(origin_visit.visit) archive_data.snapshot_add([new_snapshots[0]]) visit_status = OriginVisitStatus( origin=new_origin.url, visit=visit_ids[0], date=now(), status="full", snapshot=new_snapshots[0].id, ) archive_data.origin_visit_status_add([visit_status]) url = reverse( "api-1-origin-visit-latest", url_args={"origin_url": new_origin.url} ) rv = check_api_get_responses(api_client, url, status_code=200) expected_visit = archive_data.origin_visit_status_get_latest( new_origin.url, type="git" ) expected_visit = enrich_origin_visit( expected_visit, with_origin_link=True, with_origin_visit_link=False, request=rv.wsgi_request, ) assert rv.data == expected_visit @given(new_origin(), visit_dates(2), new_snapshots(1)) def test_api_lookup_origin_visit_latest_with_snapshot( api_client, subtest, new_origin, visit_dates, new_snapshots ): # ensure archive_data fixture will be reset between each hypothesis # example test run @subtest def test_inner(archive_data): archive_data.origin_add([new_origin]) visit_dates.sort() visit_ids = [] for i, visit_date in enumerate(visit_dates): origin_visit = archive_data.origin_visit_add( [ OriginVisit( origin=new_origin.url, date=visit_date, type="git", ) ] )[0] visit_ids.append(origin_visit.visit) archive_data.snapshot_add([new_snapshots[0]]) # Add snapshot to the latest visit visit_id = visit_ids[-1] visit_status = OriginVisitStatus( origin=new_origin.url, visit=visit_id, date=now(), status="full", snapshot=new_snapshots[0].id, ) archive_data.origin_visit_status_add([visit_status]) url = reverse( "api-1-origin-visit-latest", url_args={"origin_url": new_origin.url}, query_params={"require_snapshot": True}, ) rv = check_api_get_responses(api_client, url, status_code=200) expected_visit = archive_data.origin_visit_status_get_latest( new_origin.url, type="git", require_snapshot=True ) expected_visit = enrich_origin_visit( expected_visit, with_origin_link=True, with_origin_visit_link=False, request=rv.wsgi_request, ) assert rv.data == expected_visit def test_api_lookup_origin_visit_not_found(api_client, origin): all_visits = list(reversed(get_origin_visits(origin))) max_visit_id = max([v["visit"] for v in all_visits]) url = reverse( "api-1-origin-visit", url_args={"origin_url": origin["url"], "visit_id": max_visit_id + 1}, ) rv = check_api_get_responses(api_client, url, status_code=404) assert rv.data == { "exception": "NotFoundExc", "reason": "Origin %s or its visit with id %s not found!" % (origin["url"], max_visit_id + 1), } def test_api_origins_wrong_input(api_client, archive_data): """Should fail with 400 if the input is deprecated.""" # fail if wrong input url = reverse("api-1-origins", query_params={"origin_from": 1}) rv = check_api_get_responses(api_client, url, status_code=400) assert rv.data == { "exception": "BadInputExc", "reason": "Please use the Link header to browse through result", } def test_api_origins(api_client, archive_data): page_result = archive_data.origin_list(limit=10000) origins = page_result.results origin_urls = {origin.url for origin in origins} # Get only one url = reverse("api-1-origins", query_params={"origin_count": 1}) rv = check_api_get_responses(api_client, url, status_code=200) assert len(rv.data) == 1 assert {origin["url"] for origin in rv.data} <= origin_urls # Get all url = reverse("api-1-origins", query_params={"origin_count": len(origins)}) rv = check_api_get_responses(api_client, url, status_code=200) assert len(rv.data) == len(origins) assert {origin["url"] for origin in rv.data} == origin_urls # Get "all + 10" url = reverse("api-1-origins", query_params={"origin_count": len(origins) + 10}) rv = check_api_get_responses(api_client, url, status_code=200) assert len(rv.data) == len(origins) assert {origin["url"] for origin in rv.data} == origin_urls @pytest.mark.parametrize("origin_count", [1, 2, 10, 100]) def test_api_origins_scroll(api_client, archive_data, origin_count): page_result = archive_data.origin_list(limit=10000) origins = page_result.results origin_urls = {origin.url for origin in origins} url = reverse("api-1-origins", query_params={"origin_count": origin_count}) results = scroll_results(api_client, url) assert len(results) == len(origins) assert {origin["url"] for origin in results} == origin_urls def test_api_origin_by_url(api_client, archive_data, origin): origin_url = origin["url"] url = reverse("api-1-origin", url_args={"origin_url": origin_url}) rv = check_api_get_responses(api_client, url, status_code=200) expected_origin = archive_data.origin_get([origin_url])[0] expected_origin = enrich_origin(expected_origin, rv.wsgi_request) assert rv.data == expected_origin @given(new_origin()) def test_api_origin_not_found(api_client, new_origin): url = reverse("api-1-origin", url_args={"origin_url": new_origin.url}) rv = check_api_get_responses(api_client, url, status_code=404) assert rv.data == { "exception": "NotFoundExc", "reason": "Origin with url %s not found!" % new_origin.url, } @pytest.mark.parametrize("backend", ["swh-search", "swh-storage"]) def test_api_origin_search(api_client, mocker, backend): if backend != "swh-search": # equivalent to not configuring search in the config mocker.patch("swh.web.common.archive.search", None) expected_origins = { "https://github.com/wcoder/highlightjs-line-numbers.js", "https://github.com/memononen/libtess2", } # Search for 'github.com', get only one url = reverse( "api-1-origin-search", url_args={"url_pattern": "github.com"}, query_params={"limit": 1}, ) rv = check_api_get_responses(api_client, url, status_code=200) assert len(rv.data) == 1 assert {origin["url"] for origin in rv.data} <= expected_origins assert rv.data == [ enrich_origin({"url": origin["url"]}, request=rv.wsgi_request) for origin in rv.data ] # Search for 'github.com', get all url = reverse( "api-1-origin-search", url_args={"url_pattern": "github.com"}, query_params={"limit": 2}, ) rv = check_api_get_responses(api_client, url, status_code=200) assert {origin["url"] for origin in rv.data} == expected_origins assert rv.data == [ enrich_origin({"url": origin["url"]}, request=rv.wsgi_request) for origin in rv.data ] # Search for 'github.com', get more than available url = reverse( "api-1-origin-search", url_args={"url_pattern": "github.com"}, query_params={"limit": 10}, ) rv = check_api_get_responses(api_client, url, status_code=200) assert {origin["url"] for origin in rv.data} == expected_origins assert rv.data == [ enrich_origin({"url": origin["url"]}, request=rv.wsgi_request) for origin in rv.data ] @pytest.mark.parametrize("backend", ["swh-search", "swh-storage"]) def test_api_origin_search_words(api_client, mocker, backend): if backend != "swh-search": # equivalent to not configuring search in the config mocker.patch("swh.web.common.archive.search", None) expected_origins = { "https://github.com/wcoder/highlightjs-line-numbers.js", "https://github.com/memononen/libtess2", } url = reverse( "api-1-origin-search", url_args={"url_pattern": "github com"}, query_params={"limit": 2}, ) rv = check_api_get_responses(api_client, url, status_code=200) assert {origin["url"] for origin in rv.data} == expected_origins url = reverse( "api-1-origin-search", url_args={"url_pattern": "com github"}, query_params={"limit": 2}, ) rv = check_api_get_responses(api_client, url, status_code=200) assert {origin["url"] for origin in rv.data} == expected_origins url = reverse( "api-1-origin-search", url_args={"url_pattern": "memononen libtess2"}, query_params={"limit": 2}, ) rv = check_api_get_responses(api_client, url, status_code=200) assert len(rv.data) == 1 assert {origin["url"] for origin in rv.data} == { "https://github.com/memononen/libtess2" } url = reverse( "api-1-origin-search", url_args={"url_pattern": "libtess2 memononen"}, query_params={"limit": 2}, ) rv = check_api_get_responses(api_client, url, status_code=200) assert len(rv.data) == 1 assert {origin["url"] for origin in rv.data} == { "https://github.com/memononen/libtess2" } @pytest.mark.parametrize("backend", ["swh-search", "swh-storage"]) def test_api_origin_search_visit_type(api_client, mocker, backend): if backend != "swh-search": # equivalent to not configuring search in the config mocker.patch("swh.web.common.archive.search", None) expected_origins = { "https://github.com/wcoder/highlightjs-line-numbers.js", "https://github.com/memononen/libtess2", } url = reverse( "api-1-origin-search", url_args={ "url_pattern": "github com", }, query_params={"visit_type": "git"}, ) rv = check_api_get_responses(api_client, url, status_code=200) assert {origin["url"] for origin in rv.data} == expected_origins url = reverse( "api-1-origin-search", url_args={ "url_pattern": "github com", }, query_params={"visit_type": "foo"}, ) rv = check_api_get_responses(api_client, url, status_code=200) assert rv.data == [] def test_api_origin_search_use_ql(api_client, mocker): expected_origins = { "https://github.com/wcoder/highlightjs-line-numbers.js", "https://github.com/memononen/libtess2", } ORIGINS = [{"url": origin} for origin in expected_origins] mock_archive_search = mocker.patch("swh.web.common.archive.search") mock_archive_search.origin_search.return_value = PagedResult( results=ORIGINS, next_page_token=None, ) query = "origin : 'github.com'" url = reverse( "api-1-origin-search", url_args={"url_pattern": query}, query_params={"visit_type": "git", "use_ql": "true"}, ) rv = check_api_get_responses(api_client, url, status_code=200) assert {origin["url"] for origin in rv.data} == expected_origins mock_archive_search.origin_search.assert_called_with( query=query, page_token=None, with_visit=False, visit_types=["git"], limit=70 ) def test_api_origin_search_ql_syntax_error(api_client, mocker): mock_archive_search = mocker.patch("swh.web.common.archive.search") mock_archive_search.origin_search.side_effect = SearchQuerySyntaxError( "Invalid syntax" ) query = "this is not a valid query" url = reverse( "api-1-origin-search", url_args={"url_pattern": query}, query_params={"visit_type": "git", "use_ql": "true"}, ) rv = check_api_get_responses(api_client, url, status_code=400) assert rv.data == { "exception": "BadInputExc", "reason": "Syntax error in search query: Invalid syntax", } mock_archive_search.origin_search.assert_called_with( query=query, page_token=None, with_visit=False, visit_types=["git"], limit=70 ) @pytest.mark.parametrize("backend", ["swh-search", "swh-storage"]) @pytest.mark.parametrize("limit", [1, 2, 3, 10]) def test_api_origin_search_scroll(api_client, archive_data, mocker, limit, backend): if backend != "swh-search": # equivalent to not configuring search in the config mocker.patch("swh.web.common.archive.search", None) expected_origins = { "https://github.com/wcoder/highlightjs-line-numbers.js", "https://github.com/memononen/libtess2", } url = reverse( "api-1-origin-search", url_args={"url_pattern": "github.com"}, query_params={"limit": limit}, ) results = scroll_results(api_client, url) assert {origin["url"] for origin in results} == expected_origins @pytest.mark.parametrize("backend", ["swh-search", "swh-storage"]) def test_api_origin_search_limit(api_client, archive_data, tests_data, mocker, backend): if backend == "swh-search": tests_data["search"].origin_update( [{"url": "http://foobar/{}".format(i)} for i in range(2000)] ) else: # equivalent to not configuring search in the config mocker.patch("swh.web.common.archive.search", None) archive_data.origin_add( [Origin(url="http://foobar/{}".format(i)) for i in range(2000)] ) url = reverse( "api-1-origin-search", url_args={"url_pattern": "foobar"}, query_params={"limit": 1050}, ) rv = check_api_get_responses(api_client, url, status_code=200) assert len(rv.data) == 1000 @pytest.mark.parametrize("backend", ["swh-search", "swh-indexer-storage"]) def test_api_origin_metadata_search(api_client, mocker, backend): mock_config = mocker.patch("swh.web.common.archive.config") mock_config.get_config.return_value = { "search_config": {"metadata_backend": backend} } url = reverse( "api-1-origin-metadata-search", query_params={"fulltext": ORIGIN_METADATA_VALUE} ) rv = check_api_get_responses(api_client, url, status_code=200) rv.data = sorted(rv.data, key=lambda d: d["url"]) expected_data = sorted( [ { "url": origin_url, "metadata": { "from_revision": ORIGIN_MASTER_REVISION[origin_url], "tool": { "name": INDEXER_TOOL["tool_name"], "version": INDEXER_TOOL["tool_version"], "configuration": INDEXER_TOOL["tool_configuration"], "id": INDEXER_TOOL["id"], }, "mappings": [], }, } for origin_url in sorted(ORIGIN_MASTER_REVISION.keys()) ], key=lambda d: d["url"], ) for i in range(len(expected_data)): expected = expected_data[i] response = rv.data[i] metadata = response["metadata"].pop("metadata") assert any( [ORIGIN_METADATA_VALUE in json.dumps(val) for val in metadata.values()] ) assert response == expected def test_api_origin_metadata_search_limit(api_client, mocker): mock_idx_storage = mocker.patch("swh.web.common.archive.idx_storage") oimsft = mock_idx_storage.origin_intrinsic_metadata_search_fulltext oimsft.side_effect = lambda conjunction, limit: [ OriginIntrinsicMetadataRow( id=origin_url, from_revision=hash_to_bytes(master_rev), indexer_configuration_id=INDEXER_TOOL["id"], metadata={ORIGIN_METADATA_KEY: ORIGIN_METADATA_VALUE}, mappings=[], ) for origin_url, master_rev in ORIGIN_MASTER_REVISION.items() ] url = reverse( "api-1-origin-metadata-search", query_params={"fulltext": ORIGIN_METADATA_VALUE} ) rv = check_api_get_responses(api_client, url, status_code=200) assert len(rv.data) == len(ORIGIN_MASTER_REVISION) oimsft.assert_called_with(conjunction=[ORIGIN_METADATA_VALUE], limit=70) url = reverse( "api-1-origin-metadata-search", query_params={"fulltext": ORIGIN_METADATA_VALUE, "limit": 10}, ) rv = check_api_get_responses(api_client, url, status_code=200) assert len(rv.data) == len(ORIGIN_MASTER_REVISION) oimsft.assert_called_with(conjunction=[ORIGIN_METADATA_VALUE], limit=10) url = reverse( "api-1-origin-metadata-search", query_params={"fulltext": ORIGIN_METADATA_VALUE, "limit": 987}, ) rv = check_api_get_responses(api_client, url, status_code=200) assert len(rv.data) == len(ORIGIN_MASTER_REVISION) oimsft.assert_called_with(conjunction=[ORIGIN_METADATA_VALUE], limit=100) def test_api_origin_intrinsic_metadata(api_client, origin): url = reverse( "api-origin-intrinsic-metadata", url_args={"origin_url": origin["url"]} ) rv = check_api_get_responses(api_client, url, status_code=200) assert ORIGIN_METADATA_KEY in rv.data assert rv.data[ORIGIN_METADATA_KEY] == ORIGIN_METADATA_VALUE def test_api_origin_metadata_search_invalid(api_client, mocker): mock_idx_storage = mocker.patch("swh.web.common.archive.idx_storage") url = reverse("api-1-origin-metadata-search") check_api_get_responses(api_client, url, status_code=400) mock_idx_storage.assert_not_called() @pytest.mark.parametrize("backend", ["swh-counters", "swh-storage"]) def test_api_stat_counters(api_client, mocker, backend): mock_config = mocker.patch("swh.web.common.archive.config") mock_config.get_config.return_value = {"counters_backend": backend} url = reverse("api-1-stat-counters") rv = check_api_get_responses(api_client, url, status_code=200) counts = json.loads(rv.content) for obj in ["content", "origin", "release", "directory", "revision"]: assert counts.get(obj, 0) > 0 + + +@pytest.fixture +def archived_origins(archive_data): + page_result = archive_data.origin_list(page_token=None, limit=10000) + origins = [origin.to_dict() for origin in page_result.results] + for origin in origins: + ovs = archive_data.origin_visit_get_with_statuses(origin["url"]).results + del origin["id"] + origin["type"] = ovs[0].visit.type + + return origins + + +def test_api_origin_search_empty_pattern(api_client, archived_origins): + url = reverse( + "api-1-origin-search", + url_args={"url_pattern": ""}, + query_params={"limit": 10000}, + ) + + rv = check_api_get_responses(api_client, url, status_code=200) + + assert {o["url"] for o in rv.data} == {o["url"] for o in archived_origins} + + +def test_api_origin_search_empty_pattern_and_visit_type(api_client, archived_origins): + + visit_types = {o["type"] for o in archived_origins} + + for visit_type in visit_types: + + url = reverse( + "api-1-origin-search", + url_args={"url_pattern": ""}, + query_params={"visit_type": visit_type, "limit": 10000}, + ) + + rv = check_api_get_responses(api_client, url, status_code=200) + + assert {o["url"] for o in rv.data} == { + o["url"] for o in archived_origins if o["type"] == visit_type + }