diff --git a/assets/src/bundles/browse/origin-search.js b/assets/src/bundles/browse/origin-search.js
index 06ac97f7..7b547457 100644
--- a/assets/src/bundles/browse/origin-search.js
+++ b/assets/src/bundles/browse/origin-search.js
@@ -1,270 +1,271 @@
/**
* Copyright (C) 2018-2021 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU Affero General Public License version 3, or any later version
* See top-level LICENSE file for more information
*/
import {handleFetchError, errorMessageFromResponse, isArchivedOrigin} from 'utils/functions';
const limit = 100;
const linksPrev = [];
let linkNext = null;
let linkCurrent = null;
let inSearch = false;
function parseLinkHeader(s) {
const re = /<(.+)>; rel="next"/;
return s.match(re)[1];
}
function fixTableRowsStyle() {
setTimeout(() => {
$('#origin-search-results tbody tr').removeAttr('style');
});
}
function clearOriginSearchResultsTable() {
$('#origin-search-results tbody tr').remove();
}
async function populateOriginSearchResultsTable(origins) {
if (origins.length > 0) {
$('#swh-origin-search-results').show();
$('#swh-no-result').hide();
clearOriginSearchResultsTable();
const table = $('#origin-search-results tbody');
const promises = [];
for (const [i, origin] of origins.entries()) {
const browseUrl = `${Urls.browse_origin()}?origin_url=${encodeURIComponent(origin.url)}`;
let tableRow =
`
`;
tableRow +=
`` +
'' +
'Checking | ';
tableRow +=
'' +
`${origin.url} | `;
tableRow +=
`` +
'' +
'Checking | ';
tableRow += '
';
table.append(tableRow);
// get async latest visit snapshot and update visit status icon
let latestSnapshotUrl = Urls.api_1_origin_visit_latest(origin.url);
latestSnapshotUrl += '?require_snapshot=true';
promises.push(fetch(latestSnapshotUrl));
}
const responses = await Promise.all(promises);
const responsesData = await Promise.all(responses.map(r => r.json()));
for (let i = 0; i < responses.length; ++i) {
const response = responses[i];
const data = responsesData[i];
if (response.status !== 404 && data.type) {
$(`#visit-type-origin-${i}`).html(data.type);
$(`#visit-status-origin-${i}`).html(
'Archived');
} else {
$(`#visit-type-origin-${i}`).html('unknown');
$(`#visit-status-origin-${i}`).html(
'Pending archival');
if ($('#swh-filter-empty-visits').prop('checked')) {
$(`#origin-${i}`).remove();
}
}
}
fixTableRowsStyle();
} else {
$('#swh-origin-search-results').hide();
$('#swh-no-result').text('No origins matching the search criteria were found.');
$('#swh-no-result').show();
}
if (linkNext === null) {
$('#origins-next-results-button').addClass('disabled');
} else {
$('#origins-next-results-button').removeClass('disabled');
}
if (linksPrev.length === 0) {
$('#origins-prev-results-button').addClass('disabled');
} else {
$('#origins-prev-results-button').removeClass('disabled');
}
inSearch = false;
setTimeout(() => {
window.scrollTo(0, 0);
});
}
function searchOriginsFirst(searchQueryText, limit) {
let baseSearchUrl;
const searchMetadata = $('#swh-search-origin-metadata').prop('checked');
if (searchMetadata) {
baseSearchUrl = new URL(Urls.api_1_origin_metadata_search(), window.location);
baseSearchUrl.searchParams.append('fulltext', searchQueryText);
} else {
const useSearchQL = $('#swh-search-use-ql').prop('checked');
baseSearchUrl = new URL(Urls.api_1_origin_search(searchQueryText), window.location);
baseSearchUrl.searchParams.append('use_ql', useSearchQL ?? false);
}
const withVisit = $('#swh-search-origins-with-visit').prop('checked');
baseSearchUrl.searchParams.append('limit', limit);
baseSearchUrl.searchParams.append('with_visit', withVisit);
const visitType = $('#swh-search-visit-type').val();
if (visitType !== 'any') {
baseSearchUrl.searchParams.append('visit_type', visitType);
}
const searchUrl = baseSearchUrl.toString();
searchOrigins(searchUrl);
}
async function searchOrigins(searchUrl) {
clearOriginSearchResultsTable();
$('.swh-loading').addClass('show');
try {
const response = await fetch(searchUrl);
handleFetchError(response);
const data = await response.json();
// Save link to the current results page
linkCurrent = searchUrl;
// Save link to the next results page.
linkNext = null;
if (response.headers.has('Link')) {
const parsedLink = parseLinkHeader(response.headers.get('Link'));
if (parsedLink !== undefined) {
linkNext = parsedLink;
}
}
// prevLinks is updated by the caller, which is the one to know if
// we're going forward or backward in the pages.
$('.swh-loading').removeClass('show');
populateOriginSearchResultsTable(data);
} catch (errorResponse) {
const errorData = await errorResponse.json();
$('.swh-loading').removeClass('show');
inSearch = false;
$('#swh-origin-search-results').hide();
$('#swh-no-result').text(errorMessageFromResponse(
errorData, 'An unknown error occurred while searching origins'));
$('#swh-no-result').show();
}
}
async function doSearch() {
$('#swh-no-result').hide();
const searchQueryText = $('#swh-origins-url-patterns').val();
inSearch = true;
if (searchQueryText.startsWith('swh:')) {
try {
// searchQueryText may be a PID so sending search queries to PID resolve endpoint
const resolveSWHIDUrl = Urls.api_1_resolve_swhid(searchQueryText);
const response = await fetch(resolveSWHIDUrl);
handleFetchError(response);
const data = await response.json();
// SWHID has been successfully resolved,
// so redirect to browse page
window.location = data.browse_url;
} catch (response) {
// display a useful error message if the input
// looks like a SWHID
const data = await response.json();
$('#swh-origin-search-results').hide();
$('.swh-search-pagination').hide();
$('#swh-no-result').text(data.reason);
$('#swh-no-result').show();
}
} else if (await isArchivedOrigin(searchQueryText)) {
// redirect to the browse origin
window.location.href =
`${Urls.browse_origin()}?origin_url=${encodeURIComponent(searchQueryText)}`;
} else {
// otherwise, proceed with origins search irrespective of the error
$('#swh-origin-search-results').show();
$('.swh-search-pagination').show();
searchOriginsFirst(searchQueryText, limit);
}
}
export function initOriginSearch() {
$(document).ready(() => {
$('#swh-search-origins').submit(event => {
event.preventDefault();
if (event.target.checkValidity()) {
$(event.target).removeClass('was-validated');
const searchQueryText = $('#swh-origins-url-patterns').val().trim();
const withVisit = $('#swh-search-origins-with-visit').prop('checked');
const withContent = $('#swh-filter-empty-visits').prop('checked');
const useSearchQL = $('#swh-search-use-ql').prop('checked');
const searchMetadata = $('#swh-search-origin-metadata').prop('checked');
const visitType = $('#swh-search-visit-type').val();
const queryParameters = new URLSearchParams();
queryParameters.append('q', searchQueryText);
if (withVisit) {
queryParameters.append('with_visit', withVisit);
}
if (withContent) {
queryParameters.append('with_content', withContent);
}
if (useSearchQL) {
queryParameters.append('use_ql', useSearchQL ?? false);
}
if (searchMetadata) {
queryParameters.append('search_metadata', searchMetadata);
}
if (visitType !== 'any') {
queryParameters.append('visit_type', visitType);
}
// Update the url, triggering page reload and effective search
window.location = `${Urls.browse_search()}?${queryParameters.toString()}`;
} else {
$(event.target).addClass('was-validated');
}
});
$('#origins-next-results-button').click(event => {
if ($('#origins-next-results-button').hasClass('disabled') || inSearch) {
return;
}
inSearch = true;
linksPrev.push(linkCurrent);
searchOrigins(linkNext);
event.preventDefault();
});
$('#origins-prev-results-button').click(event => {
if ($('#origins-prev-results-button').hasClass('disabled') || inSearch) {
return;
}
inSearch = true;
searchOrigins(linksPrev.pop());
event.preventDefault();
});
- const urlParams = new URLSearchParams(window.location.search);
- const query = urlParams.get('q');
- const withVisit = urlParams.has('with_visit');
- const useSearchQL = urlParams.has('use_ql');
- const withContent = urlParams.has('with_content');
- const searchMetadata = urlParams.has('search_metadata');
- const visitType = urlParams.get('visit_type');
- if (query) {
+ if (window.location.search) {
+ const urlParams = new URLSearchParams(window.location.search);
+ const query = urlParams.get('q');
+ const withVisit = urlParams.has('with_visit');
+ const useSearchQL = urlParams.has('use_ql');
+ const withContent = urlParams.has('with_content');
+ const searchMetadata = urlParams.has('search_metadata');
+ const visitType = urlParams.get('visit_type');
+
$('#swh-origins-url-patterns').val(query);
$('#swh-search-origins-with-visit').prop('checked', withVisit);
$('#swh-search-use-ql').prop('checked', useSearchQL ?? false);
$('#swh-filter-empty-visits').prop('checked', withContent);
$('#swh-search-origin-metadata').prop('checked', searchMetadata);
if (visitType) {
$('#swh-search-visit-type').val(visitType);
}
doSearch();
}
});
}
diff --git a/cypress/integration/origin-search.spec.js b/cypress/integration/origin-search.spec.js
index 4589adbf..3e56ca82 100644
--- a/cypress/integration/origin-search.spec.js
+++ b/cypress/integration/origin-search.spec.js
@@ -1,608 +1,653 @@
/**
* Copyright (C) 2019-2021 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU Affero General Public License version 3, or any later version
* See top-level LICENSE file for more information
*/
const nonExistentText = 'NoMatchExists';
let origin;
let url;
function doSearch(searchText, searchInputElt = '#swh-origins-url-patterns') {
if (searchText.startsWith('swh:')) {
cy.intercept('**/api/1/resolve/**')
.as('swhidResolve');
}
cy.get(searchInputElt)
// to avoid sending too much SWHID validation requests
// as cypress insert character one by one when using type
.invoke('val', searchText.slice(0, -1))
.type(searchText.slice(-1))
.get('.swh-search-icon')
.click({force: true});
if (searchText.startsWith('swh:')) {
cy.wait('@swhidResolve');
}
}
function searchShouldRedirect(searchText, redirectUrl) {
doSearch(searchText);
cy.location('pathname')
.should('equal', redirectUrl);
}
function searchShouldShowNotFound(searchText, msg) {
doSearch(searchText);
if (searchText.startsWith('swh:')) {
cy.get('.invalid-feedback')
.should('be.visible')
.and('contain', msg);
}
}
function stubOriginVisitLatestRequests(status = 200, response = {type: 'tar'}, aliasSuffix = '') {
cy.intercept({url: '**/visit/latest/**'}, {
body: response,
statusCode: status
}).as(`originVisitLatest${aliasSuffix}`);
}
describe('Test origin-search', function() {
before(function() {
origin = this.origin[0];
url = this.Urls.browse_search();
});
beforeEach(function() {
cy.visit(url);
});
it('should have focus on search form after page load', function() {
cy.get('#swh-origins-url-patterns')
.should('have.attr', 'autofocus');
// for some reason, autofocus is not honored when running cypress tests
// while it is in non controlled browsers
// .should('have.focus');
});
it('should redirect to browse when archived URL is searched', function() {
cy.get('#swh-origins-url-patterns')
.type(origin.url);
cy.get('.swh-search-icon')
.click();
cy.location('pathname')
.should('eq', this.Urls.browse_origin_directory());
cy.location('search')
.should('eq', `?origin_url=${origin.url}`);
});
it('should not redirect for non valid URL', function() {
cy.get('#swh-origins-url-patterns')
.type('www.example'); // Invalid URL
cy.get('.swh-search-icon')
.click();
cy.location('pathname')
.should('eq', this.Urls.browse_search()); // Stay in the current page
});
it('should not redirect for valid non archived URL', function() {
cy.get('#swh-origins-url-patterns')
.type('http://eaxmple.com/test/'); // Valid URL, but not archived
cy.get('.swh-search-icon')
.click();
cy.location('pathname')
.should('eq', this.Urls.browse_search()); // Stay in the current page
});
it('should remove origin URL with no archived content', function() {
stubOriginVisitLatestRequests(404);
// Using a non full origin URL here
// This is because T3354 redirects to the origin in case of a valid, archived URL
cy.get('#swh-origins-url-patterns')
.type(origin.url.slice(0, -1));
cy.get('.swh-search-icon')
.click();
cy.wait('@originVisitLatest');
cy.get('#origin-search-results')
.should('be.visible')
.find('tbody tr').should('have.length', 0);
stubOriginVisitLatestRequests(200, {}, '2');
cy.get('.swh-search-icon')
.click();
cy.wait('@originVisitLatest2');
cy.get('#origin-search-results')
.should('be.visible')
.find('tbody tr').should('have.length', 0);
});
it('should filter origins by visit type', function() {
cy.intercept('**/visit/latest/**').as('checkOriginVisits');
cy.get('#swh-origins-url-patterns')
.type('http');
for (const visitType of ['git', 'tar']) {
cy.get('#swh-search-visit-type')
.select(visitType);
cy.get('.swh-search-icon')
.click();
cy.wait('@checkOriginVisits');
cy.get('#origin-search-results')
.should('be.visible');
cy.get('tbody tr td.swh-origin-visit-type').then(elts => {
for (const elt of elts) {
cy.get(elt).should('have.text', visitType);
}
});
}
});
it('should show not found message when no repo matches', function() {
searchShouldShowNotFound(nonExistentText,
'No origins matching the search criteria were found.');
});
it('should add appropriate URL parameters', function() {
// Check all three checkboxes and check if
// correct url params are added
cy.get('#swh-search-origins-with-visit')
.check({force: true})
.get('#swh-filter-empty-visits')
.check({force: true})
.get('#swh-search-origin-metadata')
.check({force: true})
.then(() => {
const searchText = origin.url.slice(0, -1);
doSearch(searchText);
cy.location('search').then(locationSearch => {
const urlParams = new URLSearchParams(locationSearch);
const query = urlParams.get('q');
const withVisit = urlParams.has('with_visit');
const withContent = urlParams.has('with_content');
const searchMetadata = urlParams.has('search_metadata');
assert.strictEqual(query, searchText);
assert.strictEqual(withVisit, true);
assert.strictEqual(withContent, true);
assert.strictEqual(searchMetadata, true);
});
});
});
it('should search in origin intrinsic metadata', function() {
cy.intercept('GET', '**/origin/metadata-search/**').as(
'originMetadataSearch'
);
cy.get('#swh-search-origins-with-visit')
.check({force: true})
.get('#swh-filter-empty-visits')
.check({force: true})
.get('#swh-search-origin-metadata')
.check({force: true})
.then(() => {
const searchText = 'plugin';
doSearch(searchText);
cy.wait('@originMetadataSearch').then((req) => {
expect(req.response.body[0].metadata.metadata.description).to.equal(
'Line numbering plugin for Highlight.js'
// metadata is defined in _TEST_ORIGINS variable in swh/web/tests/data.py
);
});
});
});
it('should not send request to the resolve endpoint', function() {
cy.intercept(`${this.Urls.api_1_resolve_swhid('').slice(0, -1)}**`)
.as('resolveSWHID');
cy.intercept(`${this.Urls.api_1_origin_search(origin.url.slice(0, -1))}**`)
.as('searchOrigin');
cy.get('#swh-origins-url-patterns')
.type(origin.url.slice(0, -1), {delay: 0, force: true});
cy.get('.swh-search-icon')
.click();
cy.wait('@searchOrigin');
cy.xhrShouldBeCalled('resolveSWHID', 0);
cy.xhrShouldBeCalled('searchOrigin', 1);
});
it('should add query language support for staff users', function() {
cy.get('#swh-search-use-ql')
.should('not.exist');
cy.adminLogin();
cy.visit(url);
cy.get('#swh-search-use-ql')
.should('exist');
});
it('should show error messages when using the query language', function() {
cy.adminLogin();
cy.visit(url);
cy.intercept('GET', `${this.Urls.api_1_origin_search('**')}**`,
{
body: {
'exception': 'BadInputExc',
'reason': 'Syntax error in search query: Invalid query'
},
statusCode: 400
})
.as('searchOrigin');
cy.get('#swh-search-use-ql')
.should('exist')
.click({force: true}); // Covered by label
cy.get('#swh-origins-url-patterns')
.type('this is not a valid query')
.type('{enter}');
cy.wait('@searchOrigin').then((xhr) => {
cy.get('#swh-no-result')
.should('contain', 'Syntax error in search query');
});
});
+ function checkSearchHasResults() {
+ cy.get('.swh-search-icon')
+ .click();
+
+ cy.wait('@checkOriginVisits');
+
+ cy.get('#origin-search-results')
+ .should('be.visible');
+
+ cy.get('tbody tr td.swh-origin-visit-type')
+ .should('exist');
+ }
+
+ it('should search all origins when no pattern is provided', function() {
+ cy.intercept('**/visit/latest/**').as('checkOriginVisits');
+
+ // with default filters
+ checkSearchHasResults();
+
+ // remove filters
+ cy.get('#swh-search-origins-with-visit')
+ .uncheck({force: true})
+ .get('#swh-filter-empty-visits')
+ .uncheck({force: true});
+ checkSearchHasResults();
+
+ });
+
+ it('should search all origins for a visit type', function() {
+ cy.intercept('**/visit/latest/**').as('checkOriginVisits');
+
+ for (const visitType of ['git', 'tar']) {
+ cy.get('#swh-search-visit-type')
+ .select(visitType);
+
+ checkSearchHasResults();
+
+ cy.get('tbody tr td.swh-origin-visit-type').then(elts => {
+ for (const elt of elts) {
+ cy.get(elt).should('have.text', visitType);
+ }
+ });
+ }
+ });
+
context('Test pagination', function() {
it('should not paginate if there are not many results', function() {
// Setup search
cy.get('#swh-search-origins-with-visit')
.uncheck({force: true})
.get('#swh-filter-empty-visits')
.uncheck({force: true})
.then(() => {
const searchText = 'libtess';
// Get first page of results
doSearch(searchText);
cy.get('.swh-search-result-entry')
.should('have.length', 1);
cy.get('.swh-search-result-entry#origin-0 td a')
.should('have.text', 'https://github.com/memononen/libtess2');
cy.get('#origins-prev-results-button')
.should('have.class', 'disabled');
cy.get('#origins-next-results-button')
.should('have.class', 'disabled');
});
});
it('should paginate forward when there are many results', function() {
stubOriginVisitLatestRequests();
// Setup search
cy.get('#swh-search-origins-with-visit')
.uncheck({force: true})
.get('#swh-filter-empty-visits')
.uncheck({force: true})
.then(() => {
const searchText = 'many.origins';
// Get first page of results
doSearch(searchText);
cy.wait('@originVisitLatest');
cy.get('.swh-search-result-entry')
.should('have.length', 100);
cy.get('.swh-search-result-entry#origin-0 td a')
.should('have.text', 'https://many.origins/1');
cy.get('.swh-search-result-entry#origin-99 td a')
.should('have.text', 'https://many.origins/100');
cy.get('#origins-prev-results-button')
.should('have.class', 'disabled');
cy.get('#origins-next-results-button')
.should('not.have.class', 'disabled');
// Get second page of results
cy.get('#origins-next-results-button a')
.click();
cy.wait('@originVisitLatest');
cy.get('.swh-search-result-entry')
.should('have.length', 100);
cy.get('.swh-search-result-entry#origin-0 td a')
.should('have.text', 'https://many.origins/101');
cy.get('.swh-search-result-entry#origin-99 td a')
.should('have.text', 'https://many.origins/200');
cy.get('#origins-prev-results-button')
.should('not.have.class', 'disabled');
cy.get('#origins-next-results-button')
.should('not.have.class', 'disabled');
// Get third (and last) page of results
cy.get('#origins-next-results-button a')
.click();
cy.wait('@originVisitLatest');
cy.get('.swh-search-result-entry')
.should('have.length', 50);
cy.get('.swh-search-result-entry#origin-0 td a')
.should('have.text', 'https://many.origins/201');
cy.get('.swh-search-result-entry#origin-49 td a')
.should('have.text', 'https://many.origins/250');
cy.get('#origins-prev-results-button')
.should('not.have.class', 'disabled');
cy.get('#origins-next-results-button')
.should('have.class', 'disabled');
});
});
it('should paginate backward from a middle page', function() {
stubOriginVisitLatestRequests();
// Setup search
cy.get('#swh-search-origins-with-visit')
.uncheck({force: true})
.get('#swh-filter-empty-visits')
.uncheck({force: true})
.then(() => {
const searchText = 'many.origins';
// Get first page of results
doSearch(searchText);
cy.wait('@originVisitLatest');
cy.get('#origins-prev-results-button')
.should('have.class', 'disabled');
cy.get('#origins-next-results-button')
.should('not.have.class', 'disabled');
// Get second page of results
cy.get('#origins-next-results-button a')
.click();
cy.wait('@originVisitLatest');
cy.get('#origins-prev-results-button')
.should('not.have.class', 'disabled');
cy.get('#origins-next-results-button')
.should('not.have.class', 'disabled');
// Get first page of results again
cy.get('#origins-prev-results-button a')
.click();
cy.wait('@originVisitLatest');
cy.get('.swh-search-result-entry')
.should('have.length', 100);
cy.get('.swh-search-result-entry#origin-0 td a')
.should('have.text', 'https://many.origins/1');
cy.get('.swh-search-result-entry#origin-99 td a')
.should('have.text', 'https://many.origins/100');
cy.get('#origins-prev-results-button')
.should('have.class', 'disabled');
cy.get('#origins-next-results-button')
.should('not.have.class', 'disabled');
});
});
it('should paginate backward from the last page', function() {
stubOriginVisitLatestRequests();
// Setup search
cy.get('#swh-search-origins-with-visit')
.uncheck({force: true})
.get('#swh-filter-empty-visits')
.uncheck({force: true})
.then(() => {
const searchText = 'many.origins';
// Get first page of results
doSearch(searchText);
cy.wait('@originVisitLatest');
cy.get('#origins-prev-results-button')
.should('have.class', 'disabled');
cy.get('#origins-next-results-button')
.should('not.have.class', 'disabled');
// Get second page of results
cy.get('#origins-next-results-button a')
.click();
cy.wait('@originVisitLatest');
cy.get('#origins-prev-results-button')
.should('not.have.class', 'disabled');
cy.get('#origins-next-results-button')
.should('not.have.class', 'disabled');
// Get third (and last) page of results
cy.get('#origins-next-results-button a')
.click();
cy.get('#origins-prev-results-button')
.should('not.have.class', 'disabled');
cy.get('#origins-next-results-button')
.should('have.class', 'disabled');
// Get second page of results again
cy.get('#origins-prev-results-button a')
.click();
cy.wait('@originVisitLatest');
cy.get('.swh-search-result-entry')
.should('have.length', 100);
cy.get('.swh-search-result-entry#origin-0 td a')
.should('have.text', 'https://many.origins/101');
cy.get('.swh-search-result-entry#origin-99 td a')
.should('have.text', 'https://many.origins/200');
cy.get('#origins-prev-results-button')
.should('not.have.class', 'disabled');
cy.get('#origins-next-results-button')
.should('not.have.class', 'disabled');
// Get first page of results again
cy.get('#origins-prev-results-button a')
.click();
cy.wait('@originVisitLatest');
cy.get('.swh-search-result-entry')
.should('have.length', 100);
cy.get('.swh-search-result-entry#origin-0 td a')
.should('have.text', 'https://many.origins/1');
cy.get('.swh-search-result-entry#origin-99 td a')
.should('have.text', 'https://many.origins/100');
cy.get('#origins-prev-results-button')
.should('have.class', 'disabled');
cy.get('#origins-next-results-button')
.should('not.have.class', 'disabled');
});
});
});
context('Test valid SWHIDs', function() {
it('should resolve directory', function() {
const redirectUrl = this.Urls.browse_directory(origin.content[0].directory);
const swhid = `swh:1:dir:${origin.content[0].directory}`;
searchShouldRedirect(swhid, redirectUrl);
});
it('should resolve revision', function() {
const redirectUrl = this.Urls.browse_revision(origin.revisions[0]);
const swhid = `swh:1:rev:${origin.revisions[0]}`;
searchShouldRedirect(swhid, redirectUrl);
});
it('should resolve snapshot', function() {
const redirectUrl = this.Urls.browse_snapshot_directory(origin.snapshot);
const swhid = `swh:1:snp:${origin.snapshot}`;
searchShouldRedirect(swhid, redirectUrl);
});
it('should resolve content', function() {
const redirectUrl = this.Urls.browse_content(`sha1_git:${origin.content[0].sha1git}`);
const swhid = `swh:1:cnt:${origin.content[0].sha1git}`;
searchShouldRedirect(swhid, redirectUrl);
});
it('should not send request to the search endpoint', function() {
const swhid = `swh:1:rev:${origin.revisions[0]}`;
cy.intercept(this.Urls.api_1_resolve_swhid(swhid))
.as('resolveSWHID');
cy.intercept(`${this.Urls.api_1_origin_search('').slice(0, -1)}**`)
.as('searchOrigin');
cy.get('#swh-origins-url-patterns')
.type(swhid, {delay: 0, force: true});
cy.get('.swh-search-icon')
.click();
cy.wait('@resolveSWHID');
cy.xhrShouldBeCalled('resolveSWHID', 1);
cy.xhrShouldBeCalled('searchOrigin', 0);
});
});
context('Test invalid SWHIDs', function() {
it('should show not found for directory', function() {
const swhid = `swh:1:dir:${this.unarchivedRepo.rootDirectory}`;
const msg = `Directory with sha1_git ${this.unarchivedRepo.rootDirectory} not found`;
searchShouldShowNotFound(swhid, msg);
});
it('should show not found for snapshot', function() {
const swhid = `swh:1:snp:${this.unarchivedRepo.snapshot}`;
const msg = `Snapshot with id ${this.unarchivedRepo.snapshot} not found!`;
searchShouldShowNotFound(swhid, msg);
});
it('should show not found for revision', function() {
const swhid = `swh:1:rev:${this.unarchivedRepo.revision}`;
const msg = `Revision with sha1_git ${this.unarchivedRepo.revision} not found.`;
searchShouldShowNotFound(swhid, msg);
});
it('should show not found for content', function() {
const swhid = `swh:1:cnt:${this.unarchivedRepo.content[0].sha1git}`;
const msg = `Content with sha1_git checksum equals to ${this.unarchivedRepo.content[0].sha1git} not found!`;
searchShouldShowNotFound(swhid, msg);
});
function checkInvalidSWHIDReport(url, searchInputElt, swhidInput, validationMessagePattern = '') {
cy.visit(url);
doSearch(swhidInput, searchInputElt);
cy.get(searchInputElt)
.then($el => $el[0].checkValidity()).should('be.false');
cy.get(searchInputElt)
.invoke('prop', 'validationMessage')
.should('not.equal', '')
.should('contain', validationMessagePattern);
}
it('should report invalid SWHID in search page input', function() {
const swhidInput =
`swh:1:cnt:${this.unarchivedRepo.content[0].sha1git};lines=45-60/`;
checkInvalidSWHIDReport(this.Urls.browse_search(), '#swh-origins-url-patterns', swhidInput);
cy.get('.invalid-feedback')
.should('be.visible');
});
it('should report invalid SWHID in top right search input', function() {
const swhidInput =
`swh:1:cnt:${this.unarchivedRepo.content[0].sha1git};lines=45-60/`;
checkInvalidSWHIDReport(this.Urls.browse_help(), '#swh-origins-search-top-input', swhidInput);
});
it('should report SWHID with uppercase chars in search page input', function() {
const swhidInput =
`swh:1:cnt:${this.unarchivedRepo.content[0].sha1git}`.toUpperCase();
checkInvalidSWHIDReport(this.Urls.browse_search(), '#swh-origins-url-patterns', swhidInput, swhidInput.toLowerCase());
cy.get('.invalid-feedback')
.should('be.visible');
});
it('should report SWHID with uppercase chars in top right search input', function() {
let swhidInput =
`swh:1:cnt:${this.unarchivedRepo.content[0].sha1git}`.toUpperCase();
swhidInput += ';lines=45-60/';
checkInvalidSWHIDReport(this.Urls.browse_help(), '#swh-origins-search-top-input', swhidInput.toLowerCase());
});
});
});
diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py
index 0683fa02..84fba5d0 100644
--- a/swh/web/api/views/origin.py
+++ b/swh/web/api/views/origin.py
@@ -1,498 +1,498 @@
# Copyright (C) 2015-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from distutils.util import strtobool
from functools import partial
from swh.search.exc import SearchQuerySyntaxError
from swh.web.api.apidoc import api_doc, format_docstring
from swh.web.api.apiurls import api_route
from swh.web.api.utils import (
enrich_origin,
enrich_origin_search_result,
enrich_origin_visit,
)
from swh.web.api.views.utils import api_lookup
from swh.web.common import archive
from swh.web.common.exc import BadInputExc
from swh.web.common.origin_visits import get_origin_visits
from swh.web.common.utils import origin_visit_types, reverse
DOC_RETURN_ORIGIN = """
:>json string origin_visits_url: link to in order to get information
about the visits for that origin
:>json string url: the origin canonical url
"""
DOC_RETURN_ORIGIN_ARRAY = DOC_RETURN_ORIGIN.replace(":>json", ":>jsonarr")
DOC_RETURN_ORIGIN_VISIT = """
:>json string date: ISO8601/RFC3339 representation of the visit date (in UTC)
:>json str origin: the origin canonical url
:>json string origin_url: link to get information about the origin
:>jsonarr string snapshot: the snapshot identifier of the visit
(may be null if status is not **full**).
:>jsonarr string snapshot_url: link to
:http:get:`/api/1/snapshot/(snapshot_id)/` in order to get
information about the snapshot of the visit
(may be null if status is not **full**).
:>json string status: status of the visit (either **full**,
**partial** or **ongoing**)
:>json number visit: the unique identifier of the visit
"""
DOC_RETURN_ORIGIN_VISIT_ARRAY = DOC_RETURN_ORIGIN_VISIT.replace(":>json", ":>jsonarr")
DOC_RETURN_ORIGIN_VISIT_ARRAY += """
:>jsonarr number id: the unique identifier of the origin
:>jsonarr string origin_visit_url: link to
:http:get:`/api/1/origin/(origin_url)/visit/(visit_id)/`
in order to get information about the visit
"""
@api_route(r"/origins/", "api-1-origins")
@api_doc("/origins/", noargs=True)
@format_docstring(return_origin_array=DOC_RETURN_ORIGIN_ARRAY)
def api_origins(request):
"""
.. http:get:: /api/1/origins/
Get list of archived software origins.
.. warning::
This endpoint used to provide an ``origin_from`` query parameter,
and guarantee an order on results. This is no longer true,
and only the Link header should be used for paginating through
results.
:query int origin_count: The maximum number of origins to return
(default to 100, can not exceed 10000)
{return_origin_array}
{common_headers}
{resheader_link}
:statuscode 200: no error
**Example:**
.. parsed-literal::
:swh_web_api:`origins?origin_count=500`
"""
old_param_origin_from = request.query_params.get("origin_from")
if old_param_origin_from:
raise BadInputExc("Please use the Link header to browse through result")
page_token = request.query_params.get("page_token", None)
limit = min(int(request.query_params.get("origin_count", "100")), 10000)
page_result = archive.lookup_origins(page_token, limit)
origins = [enrich_origin(o, request=request) for o in page_result.results]
next_page_token = page_result.next_page_token
response = {"results": origins, "headers": {}}
if next_page_token is not None:
response["headers"]["link-next"] = reverse(
"api-1-origins",
query_params={"page_token": next_page_token, "origin_count": limit},
request=request,
)
return response
@api_route(r"/origin/(?P.+)/get/", "api-1-origin")
@api_doc("/origin/")
@format_docstring(return_origin=DOC_RETURN_ORIGIN)
def api_origin(request, origin_url):
"""
.. http:get:: /api/1/origin/(origin_url)/get/
Get information about a software origin.
:param string origin_url: the origin url
{return_origin}
{common_headers}
:statuscode 200: no error
:statuscode 404: requested origin can not be found in the archive
**Example:**
.. parsed-literal::
:swh_web_api:`origin/https://github.com/python/cpython/get/`
"""
ori_dict = {"url": origin_url}
error_msg = "Origin with url %s not found." % ori_dict["url"]
return api_lookup(
archive.lookup_origin,
ori_dict,
notfound_msg=error_msg,
enrich_fn=enrich_origin,
request=request,
)
def _visit_types():
docstring = ""
# available visit types are queried using swh-search so we do it in a try
# block in case of failure (for instance in docker environment when
# elasticsearch service is not available)
try:
visit_types = [f"**{visit_type}**" for visit_type in origin_visit_types()]
docstring = ", ".join(visit_types[:-1]) + f", and {visit_types[-1]}"
except Exception:
docstring = "???"
pass
return docstring
@api_route(
- r"/origin/search/(?P.+)/",
+ r"/origin/search/(?P.*)/",
"api-1-origin-search",
throttle_scope="swh_api_origin_search",
)
@api_doc("/origin/search/")
@format_docstring(
return_origin_array=DOC_RETURN_ORIGIN_ARRAY, visit_types=_visit_types()
)
def api_origin_search(request, url_pattern):
"""
.. http:get:: /api/1/origin/search/(url_pattern)/
Search for software origins whose urls contain a provided string
pattern or match a provided regular expression.
The search is performed in a case insensitive way.
.. warning::
This endpoint used to provide an ``offset`` query parameter,
and guarantee an order on results. This is no longer true,
and only the Link header should be used for paginating through
results.
:param string url_pattern: a string pattern
:query boolean use_ql: whether to use swh search query language or not
:query int limit: the maximum number of found origins to return
(bounded to 1000)
:query boolean with_visit: if true, only return origins with at least
one visit by Software heritage
:query string visit_type: if provided, only return origins with that
specific visit type (currently the supported types are {visit_types})
{return_origin_array}
{common_headers}
{resheader_link}
:statuscode 200: no error
**Example:**
.. parsed-literal::
:swh_web_api:`origin/search/python/?limit=2`
"""
result = {}
limit = min(int(request.query_params.get("limit", "70")), 1000)
page_token = request.query_params.get("page_token")
use_ql = request.query_params.get("use_ql", "false")
with_visit = request.query_params.get("with_visit", "false")
visit_type = request.query_params.get("visit_type")
try:
(results, page_token) = api_lookup(
archive.search_origin,
url_pattern,
bool(strtobool(use_ql)),
limit,
bool(strtobool(with_visit)),
[visit_type] if visit_type else None,
page_token,
enrich_fn=enrich_origin_search_result,
request=request,
)
except SearchQuerySyntaxError as e:
raise BadInputExc(f"Syntax error in search query: {e.args[0]}")
if page_token is not None:
query_params = {k: v for (k, v) in request.GET.dict().items()}
query_params["page_token"] = page_token
result["headers"] = {
"link-next": reverse(
"api-1-origin-search",
url_args={"url_pattern": url_pattern},
query_params=query_params,
request=request,
)
}
result.update({"results": results})
return result
@api_route(r"/origin/metadata-search/", "api-1-origin-metadata-search")
@api_doc("/origin/metadata-search/", noargs=True)
@format_docstring(return_origin_array=DOC_RETURN_ORIGIN_ARRAY)
def api_origin_metadata_search(request):
"""
.. http:get:: /api/1/origin/metadata-search/
Search for software origins whose metadata (expressed as a
JSON-LD/CodeMeta dictionary) match the provided criteria.
For now, only full-text search on this dictionary is supported.
:query str fulltext: a string that will be matched against origin
metadata; results are ranked and ordered starting with the best
ones.
:query int limit: the maximum number of found origins to return
(bounded to 100)
{return_origin_array}
{common_headers}
:statuscode 200: no error
**Example:**
.. parsed-literal::
:swh_web_api:`origin/metadata-search/?limit=2&fulltext=Jane%20Doe`
"""
fulltext = request.query_params.get("fulltext", None)
limit = min(int(request.query_params.get("limit", "70")), 100)
if not fulltext:
content = '"fulltext" must be provided and non-empty.'
raise BadInputExc(content)
results = api_lookup(
archive.search_origin_metadata, fulltext, limit, request=request
)
return {
"results": results,
}
@api_route(r"/origin/(?P.*)/visits/", "api-1-origin-visits")
@api_doc("/origin/visits/")
@format_docstring(return_origin_visit_array=DOC_RETURN_ORIGIN_VISIT_ARRAY)
def api_origin_visits(request, origin_url):
"""
.. http:get:: /api/1/origin/(origin_url)/visits/
Get information about all visits of a software origin.
Visits are returned sorted in descending order according
to their date.
:param str origin_url: a software origin URL
:query int per_page: specify the number of visits to list, for
pagination purposes
:query int last_visit: visit to start listing from, for pagination
purposes
{common_headers}
{resheader_link}
{return_origin_visit_array}
:statuscode 200: no error
:statuscode 404: requested origin can not be found in the archive
**Example:**
.. parsed-literal::
:swh_web_api:`origin/https://github.com/hylang/hy/visits/`
"""
result = {}
origin_query = {"url": origin_url}
notfound_msg = "No origin {} found".format(origin_url)
url_args_next = {"origin_url": origin_url}
per_page = int(request.query_params.get("per_page", "10"))
last_visit = request.query_params.get("last_visit")
if last_visit:
last_visit = int(last_visit)
def _lookup_origin_visits(origin_query, last_visit=last_visit, per_page=per_page):
all_visits = get_origin_visits(origin_query)
all_visits.reverse()
visits = []
if not last_visit:
visits = all_visits[:per_page]
else:
for i, v in enumerate(all_visits):
if v["visit"] == last_visit:
visits = all_visits[i + 1 : i + 1 + per_page]
break
for v in visits:
yield v
results = api_lookup(
_lookup_origin_visits,
origin_query,
notfound_msg=notfound_msg,
enrich_fn=partial(
enrich_origin_visit, with_origin_link=False, with_origin_visit_link=True
),
request=request,
)
if results:
nb_results = len(results)
if nb_results == per_page:
new_last_visit = results[-1]["visit"]
query_params = {}
query_params["last_visit"] = new_last_visit
if request.query_params.get("per_page"):
query_params["per_page"] = per_page
result["headers"] = {
"link-next": reverse(
"api-1-origin-visits",
url_args=url_args_next,
query_params=query_params,
request=request,
)
}
result.update({"results": results})
return result
@api_route(
r"/origin/(?P.*)/visit/latest/",
"api-1-origin-visit-latest",
throttle_scope="swh_api_origin_visit_latest",
)
@api_doc("/origin/visit/latest/")
@format_docstring(return_origin_visit=DOC_RETURN_ORIGIN_VISIT)
def api_origin_visit_latest(request, origin_url=None):
"""
.. http:get:: /api/1/origin/(origin_url)/visit/latest/
Get information about the latest visit of a software origin.
:param str origin_url: a software origin URL
:query boolean require_snapshot: if true, only return a visit
with a snapshot
{common_headers}
{return_origin_visit}
:statuscode 200: no error
:statuscode 404: requested origin or visit can not be found in the
archive
**Example:**
.. parsed-literal::
:swh_web_api:`origin/https://github.com/hylang/hy/visit/latest/`
"""
require_snapshot = request.query_params.get("require_snapshot", "false")
return api_lookup(
archive.lookup_origin_visit_latest,
origin_url,
bool(strtobool(require_snapshot)),
notfound_msg=("No visit for origin {} found".format(origin_url)),
enrich_fn=partial(
enrich_origin_visit, with_origin_link=True, with_origin_visit_link=False
),
request=request,
)
@api_route(
r"/origin/(?P.*)/visit/(?P[0-9]+)/", "api-1-origin-visit"
)
@api_doc("/origin/visit/")
@format_docstring(return_origin_visit=DOC_RETURN_ORIGIN_VISIT)
def api_origin_visit(request, visit_id, origin_url):
"""
.. http:get:: /api/1/origin/(origin_url)/visit/(visit_id)/
Get information about a specific visit of a software origin.
:param str origin_url: a software origin URL
:param int visit_id: a visit identifier
{common_headers}
{return_origin_visit}
:statuscode 200: no error
:statuscode 404: requested origin or visit can not be found in the
archive
**Example:**
.. parsed-literal::
:swh_web_api:`origin/https://github.com/hylang/hy/visit/1/`
"""
return api_lookup(
archive.lookup_origin_visit,
origin_url,
int(visit_id),
notfound_msg=("No visit {} for origin {} found".format(visit_id, origin_url)),
enrich_fn=partial(
enrich_origin_visit, with_origin_link=True, with_origin_visit_link=False
),
request=request,
)
@api_route(
r"/origin/(?P.+)/intrinsic-metadata/", "api-origin-intrinsic-metadata"
)
@api_doc("/origin/intrinsic-metadata/")
@format_docstring()
def api_origin_intrinsic_metadata(request, origin_url):
"""
.. http:get:: /api/1/origin/(origin_url)/intrinsic-metadata
Get intrinsic metadata of a software origin (as a JSON-LD/CodeMeta dictionary).
:param string origin_url: the origin url
:>json string ???: intrinsic metadata field of the origin
{common_headers}
:statuscode 200: no error
:statuscode 404: requested origin can not be found in the archive
**Example:**
.. parsed-literal::
:swh_web_api:`origin/https://github.com/python/cpython/intrinsic-metadata`
"""
return api_lookup(
archive.lookup_origin_intrinsic_metadata,
origin_url,
notfound_msg=f"Origin with url {origin_url} not found",
enrich_fn=enrich_origin,
request=request,
)
diff --git a/swh/web/templates/includes/origin-search-form.html b/swh/web/templates/includes/origin-search-form.html
index 3356bba6..18dc1afa 100644
--- a/swh/web/templates/includes/origin-search-form.html
+++ b/swh/web/templates/includes/origin-search-form.html
@@ -1,68 +1,68 @@
{% comment %}
Copyright (C) 2020-2021 The Software Heritage developers
See the AUTHORS file at the top-level directory of this distribution
License: GNU Affero General Public License version 3, or any later version
See top-level LICENSE file for more information
{% endcomment %}
diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py
index aec07e32..326ebc26 100644
--- a/swh/web/tests/api/views/test_origin.py
+++ b/swh/web/tests/api/views/test_origin.py
@@ -1,795 +1,838 @@
# Copyright (C) 2015-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import timedelta
import json
from hypothesis import given
import pytest
from swh.indexer.storage.model import OriginIntrinsicMetadataRow
from swh.model.hashutil import hash_to_bytes
from swh.model.model import Origin, OriginVisit, OriginVisitStatus
from swh.search.exc import SearchQuerySyntaxError
from swh.search.interface import PagedResult
from swh.storage.exc import StorageAPIError, StorageDBError
from swh.storage.utils import now
from swh.web.api.utils import enrich_origin, enrich_origin_visit
from swh.web.common.exc import BadInputExc
from swh.web.common.origin_visits import get_origin_visits
from swh.web.common.utils import reverse
from swh.web.tests.api.views.utils import scroll_results
from swh.web.tests.data import (
INDEXER_TOOL,
ORIGIN_MASTER_REVISION,
ORIGIN_METADATA_KEY,
ORIGIN_METADATA_VALUE,
)
from swh.web.tests.strategies import new_origin, new_snapshots, visit_dates
from swh.web.tests.utils import check_api_get_responses
def test_api_lookup_origin_visits_raise_error(api_client, mocker):
mock_get_origin_visits = mocker.patch("swh.web.api.views.origin.get_origin_visits")
err_msg = "voluntary error to check the bad request middleware."
mock_get_origin_visits.side_effect = BadInputExc(err_msg)
url = reverse("api-1-origin-visits", url_args={"origin_url": "http://foo"})
rv = check_api_get_responses(api_client, url, status_code=400)
assert rv.data == {"exception": "BadInputExc", "reason": err_msg}
def test_api_lookup_origin_visits_raise_swh_storage_error_db(api_client, mocker):
mock_get_origin_visits = mocker.patch("swh.web.api.views.origin.get_origin_visits")
err_msg = "Storage exploded! Will be back online shortly!"
mock_get_origin_visits.side_effect = StorageDBError(err_msg)
url = reverse("api-1-origin-visits", url_args={"origin_url": "http://foo"})
rv = check_api_get_responses(api_client, url, status_code=503)
assert rv.data == {
"exception": "StorageDBError",
"reason": "An unexpected error occurred in the backend: %s" % err_msg,
}
def test_api_lookup_origin_visits_raise_swh_storage_error_api(api_client, mocker):
mock_get_origin_visits = mocker.patch("swh.web.api.views.origin.get_origin_visits")
err_msg = "Storage API dropped dead! Will resurrect asap!"
mock_get_origin_visits.side_effect = StorageAPIError(err_msg)
url = reverse("api-1-origin-visits", url_args={"origin_url": "http://foo"})
rv = check_api_get_responses(api_client, url, status_code=503)
assert rv.data == {
"exception": "StorageAPIError",
"reason": "An unexpected error occurred in the api backend: %s" % err_msg,
}
@given(new_origin(), visit_dates(3), new_snapshots(3))
def test_api_lookup_origin_visits(
api_client, subtest, new_origin, visit_dates, new_snapshots
):
# ensure archive_data fixture will be reset between each hypothesis
# example test run
@subtest
def test_inner(archive_data):
archive_data.origin_add([new_origin])
for i, visit_date in enumerate(visit_dates):
origin_visit = archive_data.origin_visit_add(
[
OriginVisit(
origin=new_origin.url,
date=visit_date,
type="git",
)
]
)[0]
archive_data.snapshot_add([new_snapshots[i]])
visit_status = OriginVisitStatus(
origin=new_origin.url,
visit=origin_visit.visit,
date=now(),
status="full",
snapshot=new_snapshots[i].id,
)
archive_data.origin_visit_status_add([visit_status])
all_visits = list(reversed(get_origin_visits(new_origin.to_dict())))
for last_visit, expected_visits in (
(None, all_visits[:2]),
(all_visits[1]["visit"], all_visits[2:]),
):
url = reverse(
"api-1-origin-visits",
url_args={"origin_url": new_origin.url},
query_params={"per_page": 2, "last_visit": last_visit},
)
rv = check_api_get_responses(api_client, url, status_code=200)
for i in range(len(expected_visits)):
expected_visits[i] = enrich_origin_visit(
expected_visits[i],
with_origin_link=False,
with_origin_visit_link=True,
request=rv.wsgi_request,
)
assert rv.data == expected_visits
@given(new_origin(), visit_dates(3), new_snapshots(3))
def test_api_lookup_origin_visits_by_id(
api_client, subtest, new_origin, visit_dates, new_snapshots
):
# ensure archive_data fixture will be reset between each hypothesis
# example test run
@subtest
def test_inner(archive_data):
archive_data.origin_add([new_origin])
for i, visit_date in enumerate(visit_dates):
origin_visit = archive_data.origin_visit_add(
[
OriginVisit(
origin=new_origin.url,
date=visit_date,
type="git",
)
]
)[0]
archive_data.snapshot_add([new_snapshots[i]])
visit_status = OriginVisitStatus(
origin=new_origin.url,
visit=origin_visit.visit,
date=now(),
status="full",
snapshot=new_snapshots[i].id,
)
archive_data.origin_visit_status_add([visit_status])
all_visits = list(reversed(get_origin_visits(new_origin.to_dict())))
for last_visit, expected_visits in (
(None, all_visits[:2]),
(all_visits[1]["visit"], all_visits[2:4]),
):
url = reverse(
"api-1-origin-visits",
url_args={"origin_url": new_origin.url},
query_params={"per_page": 2, "last_visit": last_visit},
)
rv = check_api_get_responses(api_client, url, status_code=200)
for i in range(len(expected_visits)):
expected_visits[i] = enrich_origin_visit(
expected_visits[i],
with_origin_link=False,
with_origin_visit_link=True,
request=rv.wsgi_request,
)
assert rv.data == expected_visits
@given(new_origin(), visit_dates(3), new_snapshots(3))
def test_api_lookup_origin_visit(
api_client, subtest, new_origin, visit_dates, new_snapshots
):
# ensure archive_data fixture will be reset between each hypothesis
# example test run
@subtest
def test_inner(archive_data):
archive_data.origin_add([new_origin])
for i, visit_date in enumerate(visit_dates):
origin_visit = archive_data.origin_visit_add(
[
OriginVisit(
origin=new_origin.url,
date=visit_date,
type="git",
)
]
)[0]
visit_id = origin_visit.visit
archive_data.snapshot_add([new_snapshots[i]])
visit_status = OriginVisitStatus(
origin=new_origin.url,
visit=origin_visit.visit,
date=visit_date + timedelta(minutes=5),
status="full",
snapshot=new_snapshots[i].id,
)
archive_data.origin_visit_status_add([visit_status])
url = reverse(
"api-1-origin-visit",
url_args={"origin_url": new_origin.url, "visit_id": visit_id},
)
rv = check_api_get_responses(api_client, url, status_code=200)
expected_visit = archive_data.origin_visit_get_by(new_origin.url, visit_id)
expected_visit = enrich_origin_visit(
expected_visit,
with_origin_link=True,
with_origin_visit_link=False,
request=rv.wsgi_request,
)
assert rv.data == expected_visit
@given(new_origin())
def test_api_lookup_origin_visit_latest_no_visit(api_client, archive_data, new_origin):
archive_data.origin_add([new_origin])
url = reverse("api-1-origin-visit-latest", url_args={"origin_url": new_origin.url})
rv = check_api_get_responses(api_client, url, status_code=404)
assert rv.data == {
"exception": "NotFoundExc",
"reason": "No visit for origin %s found" % new_origin.url,
}
@given(new_origin(), visit_dates(2), new_snapshots(1))
def test_api_lookup_origin_visit_latest(
api_client, subtest, new_origin, visit_dates, new_snapshots
):
# ensure archive_data fixture will be reset between each hypothesis
# example test run
@subtest
def test_inner(archive_data):
archive_data.origin_add([new_origin])
visit_dates.sort()
visit_ids = []
for i, visit_date in enumerate(visit_dates):
origin_visit = archive_data.origin_visit_add(
[
OriginVisit(
origin=new_origin.url,
date=visit_date,
type="git",
)
]
)[0]
visit_ids.append(origin_visit.visit)
archive_data.snapshot_add([new_snapshots[0]])
visit_status = OriginVisitStatus(
origin=new_origin.url,
visit=visit_ids[0],
date=now(),
status="full",
snapshot=new_snapshots[0].id,
)
archive_data.origin_visit_status_add([visit_status])
url = reverse(
"api-1-origin-visit-latest", url_args={"origin_url": new_origin.url}
)
rv = check_api_get_responses(api_client, url, status_code=200)
expected_visit = archive_data.origin_visit_status_get_latest(
new_origin.url, type="git"
)
expected_visit = enrich_origin_visit(
expected_visit,
with_origin_link=True,
with_origin_visit_link=False,
request=rv.wsgi_request,
)
assert rv.data == expected_visit
@given(new_origin(), visit_dates(2), new_snapshots(1))
def test_api_lookup_origin_visit_latest_with_snapshot(
api_client, subtest, new_origin, visit_dates, new_snapshots
):
# ensure archive_data fixture will be reset between each hypothesis
# example test run
@subtest
def test_inner(archive_data):
archive_data.origin_add([new_origin])
visit_dates.sort()
visit_ids = []
for i, visit_date in enumerate(visit_dates):
origin_visit = archive_data.origin_visit_add(
[
OriginVisit(
origin=new_origin.url,
date=visit_date,
type="git",
)
]
)[0]
visit_ids.append(origin_visit.visit)
archive_data.snapshot_add([new_snapshots[0]])
# Add snapshot to the latest visit
visit_id = visit_ids[-1]
visit_status = OriginVisitStatus(
origin=new_origin.url,
visit=visit_id,
date=now(),
status="full",
snapshot=new_snapshots[0].id,
)
archive_data.origin_visit_status_add([visit_status])
url = reverse(
"api-1-origin-visit-latest",
url_args={"origin_url": new_origin.url},
query_params={"require_snapshot": True},
)
rv = check_api_get_responses(api_client, url, status_code=200)
expected_visit = archive_data.origin_visit_status_get_latest(
new_origin.url, type="git", require_snapshot=True
)
expected_visit = enrich_origin_visit(
expected_visit,
with_origin_link=True,
with_origin_visit_link=False,
request=rv.wsgi_request,
)
assert rv.data == expected_visit
def test_api_lookup_origin_visit_not_found(api_client, origin):
all_visits = list(reversed(get_origin_visits(origin)))
max_visit_id = max([v["visit"] for v in all_visits])
url = reverse(
"api-1-origin-visit",
url_args={"origin_url": origin["url"], "visit_id": max_visit_id + 1},
)
rv = check_api_get_responses(api_client, url, status_code=404)
assert rv.data == {
"exception": "NotFoundExc",
"reason": "Origin %s or its visit with id %s not found!"
% (origin["url"], max_visit_id + 1),
}
def test_api_origins_wrong_input(api_client, archive_data):
"""Should fail with 400 if the input is deprecated."""
# fail if wrong input
url = reverse("api-1-origins", query_params={"origin_from": 1})
rv = check_api_get_responses(api_client, url, status_code=400)
assert rv.data == {
"exception": "BadInputExc",
"reason": "Please use the Link header to browse through result",
}
def test_api_origins(api_client, archive_data):
page_result = archive_data.origin_list(limit=10000)
origins = page_result.results
origin_urls = {origin.url for origin in origins}
# Get only one
url = reverse("api-1-origins", query_params={"origin_count": 1})
rv = check_api_get_responses(api_client, url, status_code=200)
assert len(rv.data) == 1
assert {origin["url"] for origin in rv.data} <= origin_urls
# Get all
url = reverse("api-1-origins", query_params={"origin_count": len(origins)})
rv = check_api_get_responses(api_client, url, status_code=200)
assert len(rv.data) == len(origins)
assert {origin["url"] for origin in rv.data} == origin_urls
# Get "all + 10"
url = reverse("api-1-origins", query_params={"origin_count": len(origins) + 10})
rv = check_api_get_responses(api_client, url, status_code=200)
assert len(rv.data) == len(origins)
assert {origin["url"] for origin in rv.data} == origin_urls
@pytest.mark.parametrize("origin_count", [1, 2, 10, 100])
def test_api_origins_scroll(api_client, archive_data, origin_count):
page_result = archive_data.origin_list(limit=10000)
origins = page_result.results
origin_urls = {origin.url for origin in origins}
url = reverse("api-1-origins", query_params={"origin_count": origin_count})
results = scroll_results(api_client, url)
assert len(results) == len(origins)
assert {origin["url"] for origin in results} == origin_urls
def test_api_origin_by_url(api_client, archive_data, origin):
origin_url = origin["url"]
url = reverse("api-1-origin", url_args={"origin_url": origin_url})
rv = check_api_get_responses(api_client, url, status_code=200)
expected_origin = archive_data.origin_get([origin_url])[0]
expected_origin = enrich_origin(expected_origin, rv.wsgi_request)
assert rv.data == expected_origin
@given(new_origin())
def test_api_origin_not_found(api_client, new_origin):
url = reverse("api-1-origin", url_args={"origin_url": new_origin.url})
rv = check_api_get_responses(api_client, url, status_code=404)
assert rv.data == {
"exception": "NotFoundExc",
"reason": "Origin with url %s not found!" % new_origin.url,
}
@pytest.mark.parametrize("backend", ["swh-search", "swh-storage"])
def test_api_origin_search(api_client, mocker, backend):
if backend != "swh-search":
# equivalent to not configuring search in the config
mocker.patch("swh.web.common.archive.search", None)
expected_origins = {
"https://github.com/wcoder/highlightjs-line-numbers.js",
"https://github.com/memononen/libtess2",
}
# Search for 'github.com', get only one
url = reverse(
"api-1-origin-search",
url_args={"url_pattern": "github.com"},
query_params={"limit": 1},
)
rv = check_api_get_responses(api_client, url, status_code=200)
assert len(rv.data) == 1
assert {origin["url"] for origin in rv.data} <= expected_origins
assert rv.data == [
enrich_origin({"url": origin["url"]}, request=rv.wsgi_request)
for origin in rv.data
]
# Search for 'github.com', get all
url = reverse(
"api-1-origin-search",
url_args={"url_pattern": "github.com"},
query_params={"limit": 2},
)
rv = check_api_get_responses(api_client, url, status_code=200)
assert {origin["url"] for origin in rv.data} == expected_origins
assert rv.data == [
enrich_origin({"url": origin["url"]}, request=rv.wsgi_request)
for origin in rv.data
]
# Search for 'github.com', get more than available
url = reverse(
"api-1-origin-search",
url_args={"url_pattern": "github.com"},
query_params={"limit": 10},
)
rv = check_api_get_responses(api_client, url, status_code=200)
assert {origin["url"] for origin in rv.data} == expected_origins
assert rv.data == [
enrich_origin({"url": origin["url"]}, request=rv.wsgi_request)
for origin in rv.data
]
@pytest.mark.parametrize("backend", ["swh-search", "swh-storage"])
def test_api_origin_search_words(api_client, mocker, backend):
if backend != "swh-search":
# equivalent to not configuring search in the config
mocker.patch("swh.web.common.archive.search", None)
expected_origins = {
"https://github.com/wcoder/highlightjs-line-numbers.js",
"https://github.com/memononen/libtess2",
}
url = reverse(
"api-1-origin-search",
url_args={"url_pattern": "github com"},
query_params={"limit": 2},
)
rv = check_api_get_responses(api_client, url, status_code=200)
assert {origin["url"] for origin in rv.data} == expected_origins
url = reverse(
"api-1-origin-search",
url_args={"url_pattern": "com github"},
query_params={"limit": 2},
)
rv = check_api_get_responses(api_client, url, status_code=200)
assert {origin["url"] for origin in rv.data} == expected_origins
url = reverse(
"api-1-origin-search",
url_args={"url_pattern": "memononen libtess2"},
query_params={"limit": 2},
)
rv = check_api_get_responses(api_client, url, status_code=200)
assert len(rv.data) == 1
assert {origin["url"] for origin in rv.data} == {
"https://github.com/memononen/libtess2"
}
url = reverse(
"api-1-origin-search",
url_args={"url_pattern": "libtess2 memononen"},
query_params={"limit": 2},
)
rv = check_api_get_responses(api_client, url, status_code=200)
assert len(rv.data) == 1
assert {origin["url"] for origin in rv.data} == {
"https://github.com/memononen/libtess2"
}
@pytest.mark.parametrize("backend", ["swh-search", "swh-storage"])
def test_api_origin_search_visit_type(api_client, mocker, backend):
if backend != "swh-search":
# equivalent to not configuring search in the config
mocker.patch("swh.web.common.archive.search", None)
expected_origins = {
"https://github.com/wcoder/highlightjs-line-numbers.js",
"https://github.com/memononen/libtess2",
}
url = reverse(
"api-1-origin-search",
url_args={
"url_pattern": "github com",
},
query_params={"visit_type": "git"},
)
rv = check_api_get_responses(api_client, url, status_code=200)
assert {origin["url"] for origin in rv.data} == expected_origins
url = reverse(
"api-1-origin-search",
url_args={
"url_pattern": "github com",
},
query_params={"visit_type": "foo"},
)
rv = check_api_get_responses(api_client, url, status_code=200)
assert rv.data == []
def test_api_origin_search_use_ql(api_client, mocker):
expected_origins = {
"https://github.com/wcoder/highlightjs-line-numbers.js",
"https://github.com/memononen/libtess2",
}
ORIGINS = [{"url": origin} for origin in expected_origins]
mock_archive_search = mocker.patch("swh.web.common.archive.search")
mock_archive_search.origin_search.return_value = PagedResult(
results=ORIGINS,
next_page_token=None,
)
query = "origin : 'github.com'"
url = reverse(
"api-1-origin-search",
url_args={"url_pattern": query},
query_params={"visit_type": "git", "use_ql": "true"},
)
rv = check_api_get_responses(api_client, url, status_code=200)
assert {origin["url"] for origin in rv.data} == expected_origins
mock_archive_search.origin_search.assert_called_with(
query=query, page_token=None, with_visit=False, visit_types=["git"], limit=70
)
def test_api_origin_search_ql_syntax_error(api_client, mocker):
mock_archive_search = mocker.patch("swh.web.common.archive.search")
mock_archive_search.origin_search.side_effect = SearchQuerySyntaxError(
"Invalid syntax"
)
query = "this is not a valid query"
url = reverse(
"api-1-origin-search",
url_args={"url_pattern": query},
query_params={"visit_type": "git", "use_ql": "true"},
)
rv = check_api_get_responses(api_client, url, status_code=400)
assert rv.data == {
"exception": "BadInputExc",
"reason": "Syntax error in search query: Invalid syntax",
}
mock_archive_search.origin_search.assert_called_with(
query=query, page_token=None, with_visit=False, visit_types=["git"], limit=70
)
@pytest.mark.parametrize("backend", ["swh-search", "swh-storage"])
@pytest.mark.parametrize("limit", [1, 2, 3, 10])
def test_api_origin_search_scroll(api_client, archive_data, mocker, limit, backend):
if backend != "swh-search":
# equivalent to not configuring search in the config
mocker.patch("swh.web.common.archive.search", None)
expected_origins = {
"https://github.com/wcoder/highlightjs-line-numbers.js",
"https://github.com/memononen/libtess2",
}
url = reverse(
"api-1-origin-search",
url_args={"url_pattern": "github.com"},
query_params={"limit": limit},
)
results = scroll_results(api_client, url)
assert {origin["url"] for origin in results} == expected_origins
@pytest.mark.parametrize("backend", ["swh-search", "swh-storage"])
def test_api_origin_search_limit(api_client, archive_data, tests_data, mocker, backend):
if backend == "swh-search":
tests_data["search"].origin_update(
[{"url": "http://foobar/{}".format(i)} for i in range(2000)]
)
else:
# equivalent to not configuring search in the config
mocker.patch("swh.web.common.archive.search", None)
archive_data.origin_add(
[Origin(url="http://foobar/{}".format(i)) for i in range(2000)]
)
url = reverse(
"api-1-origin-search",
url_args={"url_pattern": "foobar"},
query_params={"limit": 1050},
)
rv = check_api_get_responses(api_client, url, status_code=200)
assert len(rv.data) == 1000
@pytest.mark.parametrize("backend", ["swh-search", "swh-indexer-storage"])
def test_api_origin_metadata_search(api_client, mocker, backend):
mock_config = mocker.patch("swh.web.common.archive.config")
mock_config.get_config.return_value = {
"search_config": {"metadata_backend": backend}
}
url = reverse(
"api-1-origin-metadata-search", query_params={"fulltext": ORIGIN_METADATA_VALUE}
)
rv = check_api_get_responses(api_client, url, status_code=200)
rv.data = sorted(rv.data, key=lambda d: d["url"])
expected_data = sorted(
[
{
"url": origin_url,
"metadata": {
"from_revision": ORIGIN_MASTER_REVISION[origin_url],
"tool": {
"name": INDEXER_TOOL["tool_name"],
"version": INDEXER_TOOL["tool_version"],
"configuration": INDEXER_TOOL["tool_configuration"],
"id": INDEXER_TOOL["id"],
},
"mappings": [],
},
}
for origin_url in sorted(ORIGIN_MASTER_REVISION.keys())
],
key=lambda d: d["url"],
)
for i in range(len(expected_data)):
expected = expected_data[i]
response = rv.data[i]
metadata = response["metadata"].pop("metadata")
assert any(
[ORIGIN_METADATA_VALUE in json.dumps(val) for val in metadata.values()]
)
assert response == expected
def test_api_origin_metadata_search_limit(api_client, mocker):
mock_idx_storage = mocker.patch("swh.web.common.archive.idx_storage")
oimsft = mock_idx_storage.origin_intrinsic_metadata_search_fulltext
oimsft.side_effect = lambda conjunction, limit: [
OriginIntrinsicMetadataRow(
id=origin_url,
from_revision=hash_to_bytes(master_rev),
indexer_configuration_id=INDEXER_TOOL["id"],
metadata={ORIGIN_METADATA_KEY: ORIGIN_METADATA_VALUE},
mappings=[],
)
for origin_url, master_rev in ORIGIN_MASTER_REVISION.items()
]
url = reverse(
"api-1-origin-metadata-search", query_params={"fulltext": ORIGIN_METADATA_VALUE}
)
rv = check_api_get_responses(api_client, url, status_code=200)
assert len(rv.data) == len(ORIGIN_MASTER_REVISION)
oimsft.assert_called_with(conjunction=[ORIGIN_METADATA_VALUE], limit=70)
url = reverse(
"api-1-origin-metadata-search",
query_params={"fulltext": ORIGIN_METADATA_VALUE, "limit": 10},
)
rv = check_api_get_responses(api_client, url, status_code=200)
assert len(rv.data) == len(ORIGIN_MASTER_REVISION)
oimsft.assert_called_with(conjunction=[ORIGIN_METADATA_VALUE], limit=10)
url = reverse(
"api-1-origin-metadata-search",
query_params={"fulltext": ORIGIN_METADATA_VALUE, "limit": 987},
)
rv = check_api_get_responses(api_client, url, status_code=200)
assert len(rv.data) == len(ORIGIN_MASTER_REVISION)
oimsft.assert_called_with(conjunction=[ORIGIN_METADATA_VALUE], limit=100)
def test_api_origin_intrinsic_metadata(api_client, origin):
url = reverse(
"api-origin-intrinsic-metadata", url_args={"origin_url": origin["url"]}
)
rv = check_api_get_responses(api_client, url, status_code=200)
assert ORIGIN_METADATA_KEY in rv.data
assert rv.data[ORIGIN_METADATA_KEY] == ORIGIN_METADATA_VALUE
def test_api_origin_metadata_search_invalid(api_client, mocker):
mock_idx_storage = mocker.patch("swh.web.common.archive.idx_storage")
url = reverse("api-1-origin-metadata-search")
check_api_get_responses(api_client, url, status_code=400)
mock_idx_storage.assert_not_called()
@pytest.mark.parametrize("backend", ["swh-counters", "swh-storage"])
def test_api_stat_counters(api_client, mocker, backend):
mock_config = mocker.patch("swh.web.common.archive.config")
mock_config.get_config.return_value = {"counters_backend": backend}
url = reverse("api-1-stat-counters")
rv = check_api_get_responses(api_client, url, status_code=200)
counts = json.loads(rv.content)
for obj in ["content", "origin", "release", "directory", "revision"]:
assert counts.get(obj, 0) > 0
+
+
+@pytest.fixture
+def archived_origins(archive_data):
+ page_result = archive_data.origin_list(page_token=None, limit=10000)
+ origins = [origin.to_dict() for origin in page_result.results]
+ for origin in origins:
+ ovs = archive_data.origin_visit_get_with_statuses(origin["url"]).results
+ del origin["id"]
+ origin["type"] = ovs[0].visit.type
+
+ return origins
+
+
+def test_api_origin_search_empty_pattern(api_client, archived_origins):
+ url = reverse(
+ "api-1-origin-search",
+ url_args={"url_pattern": ""},
+ query_params={"limit": 10000},
+ )
+
+ rv = check_api_get_responses(api_client, url, status_code=200)
+
+ assert {o["url"] for o in rv.data} == {o["url"] for o in archived_origins}
+
+
+def test_api_origin_search_empty_pattern_and_visit_type(api_client, archived_origins):
+
+ visit_types = {o["type"] for o in archived_origins}
+
+ for visit_type in visit_types:
+
+ url = reverse(
+ "api-1-origin-search",
+ url_args={"url_pattern": ""},
+ query_params={"visit_type": visit_type, "limit": 10000},
+ )
+
+ rv = check_api_get_responses(api_client, url, status_code=200)
+
+ assert {o["url"] for o in rv.data} == {
+ o["url"] for o in archived_origins if o["type"] == visit_type
+ }