diff --git a/cypress/integration/origin-search.spec.js b/cypress/integration/origin-search.spec.js
index 0771a4e1..74d09e32 100644
--- a/cypress/integration/origin-search.spec.js
+++ b/cypress/integration/origin-search.spec.js
@@ -1,152 +1,355 @@
/**
* Copyright (C) 2019 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU Affero General Public License version 3, or any later version
* See top-level LICENSE file for more information
*/
const nonExistentText = 'NoMatchExists';
let origin;
let url;
function doSearch(searchText) {
cy.get('#origins-url-patterns')
.type(searchText)
.get('.swh-search-icon')
.click();
}
function searchShouldRedirect(searchText, redirectUrl) {
doSearch(searchText);
cy.location('pathname')
.should('equal', redirectUrl);
}
function searchShouldShowNotFound(searchText, msg) {
doSearch(searchText);
cy.get('#swh-no-result')
.should('be.visible')
.and('contain', msg);
}
describe('Test origin-search', function() {
before(function() {
origin = this.origin[0];
url = this.Urls.browse_search();
});
beforeEach(function() {
cy.visit(url);
});
it('should show in result when url is searched', function() {
cy.get('#origins-url-patterns')
.type(origin.url);
cy.get('.swh-search-icon')
.click();
cy.get('#origin-search-results')
.should('be.visible');
cy.contains('tr', origin.url)
.should('be.visible')
.find('.swh-visit-status')
.find('i')
.should('have.class', 'fa-check')
.and('have.attr', 'title',
'Origin has at least one full visit by Software Heritage');
});
it('should show not found message when no repo matches', function() {
searchShouldShowNotFound(nonExistentText,
'No origins matching the search criteria were found.');
});
it('should add appropriate URL parameters', function() {
// Check all three checkboxes and check if
// correct url params are added
cy.get('#swh-search-origins-with-visit')
.check()
.get('#swh-filter-empty-visits')
.check()
.get('#swh-search-origin-metadata')
.check()
.then(() => {
const searchText = origin.url;
doSearch(searchText);
cy.location('search').then(locationSearch => {
const urlParams = new URLSearchParams(locationSearch);
const query = urlParams.get('q');
const withVisit = urlParams.has('with_visit');
const withContent = urlParams.has('with_content');
const searchMetadata = urlParams.has('search_metadata');
assert.strictEqual(query, searchText);
assert.strictEqual(withVisit, true);
assert.strictEqual(withContent, true);
assert.strictEqual(searchMetadata, true);
});
});
});
+ context('Test pagination', function() {
+ it('should not paginate if there are not many results', function() {
+ // Setup search
+ cy.get('#swh-search-origins-with-visit')
+ .uncheck()
+ .get('#swh-filter-empty-visits')
+ .uncheck()
+ .then(() => {
+ const searchText = 'libtess';
+
+ // Get first page of results
+ doSearch(searchText);
+
+ cy.get('.swh-search-result-entry')
+ .should('have.length', 1);
+
+ cy.get('.swh-search-result-entry#origin-0 td a')
+ .should('have.text', 'https://github.com/memononen/libtess2');
+
+ cy.get('#origins-prev-results-button')
+ .should('have.class', 'disabled');
+ cy.get('#origins-next-results-button')
+ .should('have.class', 'disabled');
+ });
+ });
+
+ it('should paginate forward when there are many results', function() {
+ // Setup search
+ cy.get('#swh-search-origins-with-visit')
+ .uncheck()
+ .get('#swh-filter-empty-visits')
+ .uncheck()
+ .then(() => {
+ const searchText = 'many.origins';
+
+ // Get first page of results
+ doSearch(searchText);
+
+ cy.get('.swh-search-result-entry')
+ .should('have.length', 100);
+
+ cy.get('.swh-search-result-entry#origin-0 td a')
+ .should('have.text', 'https://many.origins/1');
+ cy.get('.swh-search-result-entry#origin-99 td a')
+ .should('have.text', 'https://many.origins/100');
+
+ cy.get('#origins-prev-results-button')
+ .should('have.class', 'disabled');
+ cy.get('#origins-next-results-button')
+ .should('not.have.class', 'disabled');
+
+ // Get second page of results
+ cy.get('#origins-next-results-button a')
+ .click();
+
+ cy.get('.swh-search-result-entry')
+ .should('have.length', 100);
+
+ cy.get('.swh-search-result-entry#origin-0 td a')
+ .should('have.text', 'https://many.origins/101');
+ cy.get('.swh-search-result-entry#origin-99 td a')
+ .should('have.text', 'https://many.origins/200');
+
+ cy.get('#origins-prev-results-button')
+ .should('not.have.class', 'disabled');
+ cy.get('#origins-next-results-button')
+ .should('not.have.class', 'disabled');
+
+ // Get third (and last) page of results
+ cy.get('#origins-next-results-button a')
+ .click();
+
+ cy.get('.swh-search-result-entry')
+ .should('have.length', 50);
+
+ cy.get('.swh-search-result-entry#origin-0 td a')
+ .should('have.text', 'https://many.origins/201');
+ cy.get('.swh-search-result-entry#origin-49 td a')
+ .should('have.text', 'https://many.origins/250');
+
+ cy.get('#origins-prev-results-button')
+ .should('not.have.class', 'disabled');
+ cy.get('#origins-next-results-button')
+ .should('have.class', 'disabled');
+ });
+ });
+
+ it('should paginate backward from a middle page', function() {
+ // Setup search
+ cy.get('#swh-search-origins-with-visit')
+ .uncheck()
+ .get('#swh-filter-empty-visits')
+ .uncheck()
+ .then(() => {
+ const searchText = 'many.origins';
+
+ // Get first page of results
+ doSearch(searchText);
+
+ cy.get('#origins-prev-results-button')
+ .should('have.class', 'disabled');
+ cy.get('#origins-next-results-button')
+ .should('not.have.class', 'disabled');
+
+ // Get second page of results
+ cy.get('#origins-next-results-button a')
+ .click();
+ cy.get('#origins-prev-results-button')
+ .should('not.have.class', 'disabled');
+ cy.get('#origins-next-results-button')
+ .should('not.have.class', 'disabled');
+
+ // Get first page of results again
+ cy.get('#origins-prev-results-button a')
+ .click();
+
+ cy.get('.swh-search-result-entry')
+ .should('have.length', 100);
+
+ cy.get('.swh-search-result-entry#origin-0 td a')
+ .should('have.text', 'https://many.origins/1');
+ cy.get('.swh-search-result-entry#origin-99 td a')
+ .should('have.text', 'https://many.origins/100');
+
+ cy.get('#origins-prev-results-button')
+ .should('have.class', 'disabled');
+ cy.get('#origins-next-results-button')
+ .should('not.have.class', 'disabled');
+ });
+ });
+
+ it('should paginate backward from the last page', function() {
+ // Setup search
+ cy.get('#swh-search-origins-with-visit')
+ .uncheck()
+ .get('#swh-filter-empty-visits')
+ .uncheck()
+ .then(() => {
+ const searchText = 'many.origins';
+
+ // Get first page of results
+ doSearch(searchText);
+
+ cy.get('#origins-prev-results-button')
+ .should('have.class', 'disabled');
+ cy.get('#origins-next-results-button')
+ .should('not.have.class', 'disabled');
+
+ // Get second page of results
+ cy.get('#origins-next-results-button a')
+ .click();
+
+ cy.get('#origins-prev-results-button')
+ .should('not.have.class', 'disabled');
+ cy.get('#origins-next-results-button')
+ .should('not.have.class', 'disabled');
+
+ // Get third (and last) page of results
+ cy.get('#origins-next-results-button a')
+ .click();
+
+ cy.get('#origins-prev-results-button')
+ .should('not.have.class', 'disabled');
+ cy.get('#origins-next-results-button')
+ .should('have.class', 'disabled');
+
+ // Get second page of results again
+ cy.get('#origins-prev-results-button a')
+ .click();
+
+ cy.get('.swh-search-result-entry')
+ .should('have.length', 100);
+
+ cy.get('.swh-search-result-entry#origin-0 td a')
+ .should('have.text', 'https://many.origins/101');
+ cy.get('.swh-search-result-entry#origin-99 td a')
+ .should('have.text', 'https://many.origins/200');
+
+ cy.get('#origins-prev-results-button')
+ .should('not.have.class', 'disabled');
+ cy.get('#origins-next-results-button')
+ .should('not.have.class', 'disabled');
+
+ // Get first page of results again
+ cy.get('#origins-prev-results-button a')
+ .click();
+
+ cy.get('.swh-search-result-entry')
+ .should('have.length', 100);
+
+ cy.get('.swh-search-result-entry#origin-0 td a')
+ .should('have.text', 'https://many.origins/1');
+ cy.get('.swh-search-result-entry#origin-99 td a')
+ .should('have.text', 'https://many.origins/100');
+
+ cy.get('#origins-prev-results-button')
+ .should('have.class', 'disabled');
+ cy.get('#origins-next-results-button')
+ .should('not.have.class', 'disabled');
+ });
+ });
+ });
+
context('Test valid persistent ids', function() {
it('should resolve directory', function() {
const redirectUrl = this.Urls.browse_directory(origin.content[0].directory);
const persistentId = `swh:1:dir:${origin.content[0].directory}`;
searchShouldRedirect(persistentId, redirectUrl);
});
it('should resolve revision', function() {
const redirectUrl = this.Urls.browse_revision(origin.revisions[0]);
const persistentId = `swh:1:rev:${origin.revisions[0]}`;
searchShouldRedirect(persistentId, redirectUrl);
});
it('should resolve snapshot', function() {
const redirectUrl = this.Urls.browse_snapshot_directory(origin.snapshot);
const persistentId = `swh:1:snp:${origin.snapshot}`;
searchShouldRedirect(persistentId, redirectUrl);
});
it('should resolve content', function() {
const redirectUrl = this.Urls.browse_content(`sha1_git:${origin.content[0].sha1git}`);
const persistentId = `swh:1:cnt:${origin.content[0].sha1git}`;
searchShouldRedirect(persistentId, redirectUrl);
});
});
context('Test invalid persistent ids', function() {
it('should show not found for directory', function() {
const persistentId = `swh:1:dir:${this.unarchivedRepo.rootDirectory}`;
const msg = `Directory with sha1_git ${this.unarchivedRepo.rootDirectory} not found`;
searchShouldShowNotFound(persistentId, msg);
});
it('should show not found for snapshot', function() {
const persistentId = `swh:1:snp:${this.unarchivedRepo.snapshot}`;
const msg = `Snapshot with id ${this.unarchivedRepo.snapshot} not found!`;
searchShouldShowNotFound(persistentId, msg);
});
it('should show not found for revision', function() {
const persistentId = `swh:1:rev:${this.unarchivedRepo.revision}`;
const msg = `Revision with sha1_git ${this.unarchivedRepo.revision} not found.`;
searchShouldShowNotFound(persistentId, msg);
});
it('should show not found for content', function() {
const persistentId = `swh:1:cnt:${this.unarchivedRepo.content[0].sha1git}`;
const msg = `Content with sha1_git checksum equals to ${this.unarchivedRepo.content[0].sha1git} not found!`;
searchShouldShowNotFound(persistentId, msg);
});
});
});
diff --git a/swh/web/assets/src/bundles/browse/origin-search.js b/swh/web/assets/src/bundles/browse/origin-search.js
index 1ad757b3..12861fb7 100644
--- a/swh/web/assets/src/bundles/browse/origin-search.js
+++ b/swh/web/assets/src/bundles/browse/origin-search.js
@@ -1,233 +1,244 @@
/**
* Copyright (C) 2018-2019 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU Affero General Public License version 3, or any later version
* See top-level LICENSE file for more information
*/
import {heapsPermute} from 'utils/heaps-permute';
import {handleFetchError} from 'utils/functions';
-let originPatterns;
-let perPage = 100;
-let limit = perPage * 2;
-let offset = 0;
-let currentData = null;
+const limit = 100;
+let linksPrev = [];
+let linkNext = null;
+let linkCurrent = null;
let inSearch = false;
+function parseLinkHeader(s) {
+ let re = /<(.+)>; rel="next"/;
+ return s.match(re)[1];
+}
+
function fixTableRowsStyle() {
setTimeout(() => {
$('#origin-search-results tbody tr').removeAttr('style');
});
}
function clearOriginSearchResultsTable() {
$('#origin-search-results tbody tr').remove();
}
-function populateOriginSearchResultsTable(origins, offset) {
- let localOffset = offset % limit;
+function populateOriginSearchResultsTable(origins) {
if (origins.length > 0) {
$('#swh-origin-search-results').show();
$('#swh-no-result').hide();
clearOriginSearchResultsTable();
let table = $('#origin-search-results tbody');
- for (let i = localOffset; i < localOffset + perPage && i < origins.length; ++i) {
- let origin = origins[i];
+ for (let [i, origin] of origins.entries()) {
let browseUrl = Urls.browse_origin(origin.url);
let tableRow = `
`;
tableRow += `${encodeURI(origin.url)} | `;
tableRow += ` | `;
tableRow += ` | `;
tableRow += '
';
table.append(tableRow);
// get async latest visit snapshot and update visit status icon
let latestSnapshotUrl = Urls.api_1_origin_visit_latest(origin.url);
latestSnapshotUrl += '?require_snapshot=true';
fetch(latestSnapshotUrl)
.then(response => response.json())
.then(data => {
$(`#visit-type-origin-${i}`).text(data.type);
$(`#visit-status-origin-${i}`).children().remove();
if (data) {
$(`#visit-status-origin-${i}`).append('');
} else {
$(`#visit-status-origin-${i}`).append('');
if ($('#swh-filter-empty-visits').prop('checked')) {
$(`#origin-${i}`).remove();
}
}
});
}
fixTableRowsStyle();
} else {
$('#swh-origin-search-results').hide();
$('#swh-no-result').text('No origins matching the search criteria were found.');
$('#swh-no-result').show();
}
- if (origins.length - localOffset < perPage ||
- (origins.length < limit && (localOffset + perPage) === origins.length)) {
+
+ if (linkNext === null) {
$('#origins-next-results-button').addClass('disabled');
} else {
$('#origins-next-results-button').removeClass('disabled');
}
- if (offset > 0) {
- $('#origins-prev-results-button').removeClass('disabled');
- } else {
+
+ if (linksPrev.length === 0) {
$('#origins-prev-results-button').addClass('disabled');
+ } else {
+ $('#origins-prev-results-button').removeClass('disabled');
}
+
inSearch = false;
setTimeout(() => {
window.scrollTo(0, 0);
});
}
function escapeStringRegexp(str) {
let matchOperatorsRe = /[|\\{}()[\]^$+*?.]/g;
return str.replace(matchOperatorsRe, '%5C$&');
}
-function searchOrigins(patterns, limit, searchOffset, offset) {
+function searchOriginsFirst(patterns, limit) {
let baseSearchUrl;
let searchMetadata = $('#swh-search-origin-metadata').prop('checked');
if (searchMetadata) {
baseSearchUrl = Urls.api_1_origin_metadata_search() + `?fulltext=${patterns}`;
} else {
- originPatterns = patterns;
let patternsArray = patterns.trim().replace(/\s+/g, ' ').split(' ');
for (let i = 0; i < patternsArray.length; ++i) {
patternsArray[i] = escapeStringRegexp(patternsArray[i]);
}
// url length must be less than 4096 for modern browsers
// assuming average word length, 6 is max patternArray.length
if (patternsArray.length < 7) {
let patternsPermut = [];
heapsPermute(patternsArray, p => patternsPermut.push(p.join('.*')));
let regex = patternsPermut.join('|');
baseSearchUrl = Urls.api_1_origin_search(regex) + `?regexp=true`;
} else {
baseSearchUrl = Urls.api_1_origin_search(patternsArray.join('.*')) + `?regexp=true`;
}
}
let withVisit = $('#swh-search-origins-with-visit').prop('checked');
- let searchUrl = baseSearchUrl + `&limit=${limit}&offset=${searchOffset}&with_visit=${withVisit}`;
+ let searchUrl = baseSearchUrl + `&limit=${limit}&with_visit=${withVisit}`;
+ searchOrigins(searchUrl);
+}
+function searchOrigins(searchUrl) {
clearOriginSearchResultsTable();
$('.swh-loading').addClass('show');
- fetch(searchUrl)
+ let response = fetch(searchUrl)
.then(handleFetchError)
- .then(response => response.json())
+ .then(resp => {
+ response = resp;
+ return response.json();
+ })
.then(data => {
- currentData = data;
+ // Save link to the current results page
+ linkCurrent = searchUrl;
+ // Save link to the next results page.
+ linkNext = null;
+ if (response.headers.has('Link')) {
+ let parsedLink = parseLinkHeader(response.headers.get('Link'));
+ if (parsedLink !== undefined) {
+ linkNext = parsedLink;
+ }
+ }
+ // prevLinks is updated by the caller, which is the one to know if
+ // we're going forward or backward in the pages.
+
$('.swh-loading').removeClass('show');
- populateOriginSearchResultsTable(data, offset);
+ populateOriginSearchResultsTable(data);
})
.catch(response => {
$('.swh-loading').removeClass('show');
inSearch = false;
$('#swh-origin-search-results').hide();
$('#swh-no-result').text(`Error ${response.status}: ${response.statusText}`);
$('#swh-no-result').show();
});
}
function doSearch() {
$('#swh-no-result').hide();
let patterns = $('#origins-url-patterns').val();
- offset = 0;
inSearch = true;
// first try to resolve a swh persistent identifier
let resolvePidUrl = Urls.api_1_resolve_swh_pid(patterns);
fetch(resolvePidUrl)
.then(handleFetchError)
.then(response => response.json())
.then(data => {
// pid has been successfully resolved,
// so redirect to browse page
window.location = data.browse_url;
})
.catch(response => {
// pid resolving failed
if (patterns.startsWith('swh:')) {
// display a useful error message if the input
// looks like a swh pid
response.json().then(data => {
$('#swh-origin-search-results').hide();
$('.swh-search-pagination').hide();
$('#swh-no-result').text(data.reason);
$('#swh-no-result').show();
});
} else {
// otherwise, proceed with origins search
$('#swh-origin-search-results').show();
$('.swh-search-pagination').show();
- searchOrigins(patterns, limit, offset, offset);
+ searchOriginsFirst(patterns, limit);
}
});
}
export function initOriginSearch() {
$(document).ready(() => {
$('#swh-search-origins').submit(event => {
event.preventDefault();
let patterns = $('#origins-url-patterns').val().trim();
let withVisit = $('#swh-search-origins-with-visit').prop('checked');
let withContent = $('#swh-filter-empty-visits').prop('checked');
let searchMetadata = $('#swh-search-origin-metadata').prop('checked');
let queryParameters = '?q=' + encodeURIComponent(patterns);
if (withVisit) {
queryParameters += '&with_visit';
}
if (withContent) {
queryParameters += '&with_content';
}
if (searchMetadata) {
queryParameters += '&search_metadata';
}
// Update the url, triggering page reload and effective search
window.location.search = queryParameters;
});
$('#origins-next-results-button').click(event => {
if ($('#origins-next-results-button').hasClass('disabled') || inSearch) {
return;
}
inSearch = true;
- offset += perPage;
- if (!currentData || (offset >= limit && offset % limit === 0)) {
- searchOrigins(originPatterns, limit, offset, offset);
- } else {
- populateOriginSearchResultsTable(currentData, offset);
- }
+ linksPrev.push(linkCurrent);
+ searchOrigins(linkNext);
event.preventDefault();
});
$('#origins-prev-results-button').click(event => {
if ($('#origins-prev-results-button').hasClass('disabled') || inSearch) {
return;
}
inSearch = true;
- offset -= perPage;
- if (!currentData || (offset > 0 && (offset + perPage) % limit === 0)) {
- searchOrigins(originPatterns, limit, (offset + perPage) - limit, offset);
- } else {
- populateOriginSearchResultsTable(currentData, offset);
- }
+ searchOrigins(linksPrev.pop());
event.preventDefault();
});
let urlParams = new URLSearchParams(window.location.search);
let query = urlParams.get('q');
let withVisit = urlParams.has('with_visit');
let withContent = urlParams.has('with_content');
let searchMetadata = urlParams.has('search_metadata');
if (query) {
$('#origins-url-patterns').val(query);
$('#swh-search-origins-with-visit').prop('checked', withVisit);
$('#swh-filter-empty-visits').prop('checked', withContent);
$('#swh-search-origin-metadata').prop('checked', searchMetadata);
doSearch();
}
});
}
diff --git a/swh/web/tests/data.py b/swh/web/tests/data.py
index f60a5f63..a4ffe7c7 100644
--- a/swh/web/tests/data.py
+++ b/swh/web/tests/data.py
@@ -1,465 +1,473 @@
# Copyright (C) 2018-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import random
from copy import deepcopy
from typing import Dict
from rest_framework.decorators import api_view
from rest_framework.response import Response
from swh.indexer.fossology_license import FossologyLicenseIndexer
from swh.indexer.mimetype import MimetypeIndexer
from swh.indexer.ctags import CtagsIndexer
from swh.indexer.storage import get_indexer_storage
from swh.model.from_disk import Directory
from swh.model.hashutil import hash_to_hex, hash_to_bytes, DEFAULT_ALGORITHMS
from swh.model.identifiers import directory_identifier
from swh.loader.git.from_disk import GitLoaderFromArchive
from swh.storage.algos.dir_iterators import dir_iterator
from swh.web import config
from swh.web.browse.utils import (
get_mimetype_and_encoding_for_content, prepare_content_for_display
)
from swh.web.common import service
from swh.web.common.highlightjs import get_hljs_language_from_filename
# Module used to initialize data that will be provided as tests input
# Configuration for git loader
_TEST_LOADER_CONFIG = {
'storage': {
'cls': 'memory'
},
'send_contents': True,
'send_directories': True,
'send_revisions': True,
'send_releases': True,
'send_snapshot': True,
'content_size_limit': 100 * 1024 * 1024,
'content_packet_size': 10,
'content_packet_size_bytes': 100 * 1024 * 1024,
'directory_packet_size': 10,
'revision_packet_size': 10,
'release_packet_size': 10,
'save_data': False,
}
# Base content indexer configuration
_TEST_INDEXER_BASE_CONFIG = {
'storage': {
'cls': 'memory'
},
'objstorage': {
'cls': 'memory',
'args': {},
},
'indexer_storage': {
'cls': 'memory',
'args': {},
}
}
def random_sha1():
return hash_to_hex(bytes(random.randint(0, 255) for _ in range(20)))
def random_sha256():
return hash_to_hex(bytes(random.randint(0, 255) for _ in range(32)))
def random_blake2s256():
return hash_to_hex(bytes(random.randint(0, 255) for _ in range(32)))
def random_content():
return {
'sha1': random_sha1(),
'sha1_git': random_sha1(),
'sha256': random_sha256(),
'blake2s256': random_blake2s256(),
}
# MimetypeIndexer with custom configuration for tests
class _MimetypeIndexer(MimetypeIndexer):
def parse_config_file(self, *args, **kwargs):
return {
**_TEST_INDEXER_BASE_CONFIG,
'tools': {
'name': 'file',
'version': '1:5.30-1+deb9u1',
'configuration': {
"type": "library",
"debian-package": "python3-magic"
}
}
}
# FossologyLicenseIndexer with custom configuration for tests
class _FossologyLicenseIndexer(FossologyLicenseIndexer):
def parse_config_file(self, *args, **kwargs):
return {
**_TEST_INDEXER_BASE_CONFIG,
'workdir': '/tmp/swh/indexer.fossology.license',
'tools': {
'name': 'nomos',
'version': '3.1.0rc2-31-ga2cbb8c',
'configuration': {
'command_line': 'nomossa ',
},
}
}
# CtagsIndexer with custom configuration for tests
class _CtagsIndexer(CtagsIndexer):
def parse_config_file(self, *args, **kwargs):
return {
**_TEST_INDEXER_BASE_CONFIG,
'workdir': '/tmp/swh/indexer.ctags',
'languages': {'c': 'c'},
'tools': {
'name': 'universal-ctags',
'version': '~git7859817b',
'configuration': {
'command_line': '''ctags --fields=+lnz --sort=no --links=no ''' # noqa
'''--output-format=json '''
},
}
}
# Lightweight git repositories that will be loaded to generate
# input data for tests
_TEST_ORIGINS = [
{
'type': 'git',
'url': 'https://github.com/wcoder/highlightjs-line-numbers.js',
'archives': ['highlightjs-line-numbers.js.zip',
'highlightjs-line-numbers.js_visit2.zip'],
'visit_date': ['Dec 1 2018, 01:00 UTC',
'Jan 20 2019, 15:00 UTC']
},
{
'type': 'git',
'url': 'https://github.com/memononen/libtess2',
'archives': ['libtess2.zip'],
'visit_date': ['May 25 2018, 01:00 UTC']
},
{
'type': 'git',
'url': 'repo_with_submodules',
'archives': ['repo_with_submodules.tgz'],
'visit_date': ['Jan 1 2019, 01:00 UTC']
}
]
_contents = {}
# Tests data initialization
def _init_tests_data():
# Load git repositories from archives
loader = GitLoaderFromArchive(config=_TEST_LOADER_CONFIG)
# Get reference to the memory storage
storage = loader.storage
for origin in _TEST_ORIGINS:
for i, archive in enumerate(origin['archives']):
origin_repo_archive = \
os.path.join(os.path.dirname(__file__),
'resources/repos/%s' % archive)
loader.load(origin['url'], origin_repo_archive,
origin['visit_date'][i])
origin.update(storage.origin_get(origin)) # add an 'id' key if enabled
+ for i in range(250):
+ url = 'https://many.origins/%d' % (i+1)
+ storage.origin_add([{'url': url}])
+ visit = storage.origin_visit_add(url, '2019-12-03 13:55:05', 'tar')
+ storage.origin_visit_update(
+ url, visit['visit'],
+ snapshot='1a8893e6a86f444e8be8e7bda6cb34fb1735a00e')
+
contents = set()
directories = set()
revisions = set()
releases = set()
snapshots = set()
content_path = {}
# Get all objects loaded into the test archive
for origin in _TEST_ORIGINS:
snp = storage.snapshot_get_latest(origin['url'])
snapshots.add(hash_to_hex(snp['id']))
for branch_name, branch_data in snp['branches'].items():
if branch_data['target_type'] == 'revision':
revisions.add(branch_data['target'])
elif branch_data['target_type'] == 'release':
release = next(storage.release_get([branch_data['target']]))
revisions.add(release['target'])
releases.add(hash_to_hex(branch_data['target']))
for rev_log in storage.revision_shortlog(set(revisions)):
rev_id = rev_log[0]
revisions.add(rev_id)
for rev in storage.revision_get(revisions):
dir_id = rev['directory']
directories.add(hash_to_hex(dir_id))
for entry in dir_iterator(storage, dir_id):
content_path[entry['sha1']] = '/'.join(
[hash_to_hex(dir_id), entry['path'].decode('utf-8')])
if entry['type'] == 'file':
contents.add(entry['sha1'])
elif entry['type'] == 'dir':
directories.add(hash_to_hex(entry['target']))
# Get all checksums for each content
contents_metadata = storage.content_get_metadata(contents)
contents = []
for content_metadata in contents_metadata:
contents.append({
algo: hash_to_hex(content_metadata[algo])
for algo in DEFAULT_ALGORITHMS
})
path = content_path[content_metadata['sha1']]
cnt = next(storage.content_get([content_metadata['sha1']]))
mimetype, encoding = get_mimetype_and_encoding_for_content(cnt['data'])
content_display_data = prepare_content_for_display(
cnt['data'], mimetype, path)
contents[-1]['path'] = path
contents[-1]['mimetype'] = mimetype
contents[-1]['encoding'] = encoding
contents[-1]['hljs_language'] = content_display_data['language']
contents[-1]['data'] = content_display_data['content_data']
_contents[contents[-1]['sha1']] = contents[-1]
# Create indexer storage instance that will be shared by indexers
idx_storage = get_indexer_storage('memory', {})
# Add the empty directory to the test archive
empty_dir_id = directory_identifier({'entries': []})
empty_dir_id_bin = hash_to_bytes(empty_dir_id)
storage.directory_add([{'id': empty_dir_id_bin, 'entries': []}])
# Return tests data
return {
'storage': storage,
'idx_storage': idx_storage,
'origins': _TEST_ORIGINS,
'contents': contents,
'directories': list(directories),
'releases': list(releases),
'revisions': list(map(hash_to_hex, revisions)),
'snapshots': list(snapshots),
'generated_checksums': set(),
}
def _init_indexers(tests_data):
# Instantiate content indexers that will be used in tests
# and force them to use the memory storages
indexers = {}
for idx_name, idx_class in (('mimetype_indexer', _MimetypeIndexer),
('license_indexer', _FossologyLicenseIndexer),
('ctags_indexer', _CtagsIndexer)):
idx = idx_class()
idx.storage = tests_data['storage']
idx.objstorage = tests_data['storage'].objstorage
idx.idx_storage = tests_data['idx_storage']
idx.register_tools(idx.config['tools'])
indexers[idx_name] = idx
return indexers
def get_content(content_sha1):
return _contents.get(content_sha1)
_tests_data = None
_current_tests_data = None
_indexer_loggers = {}
def get_tests_data(reset=False):
"""
Initialize tests data and return them in a dict.
"""
global _tests_data, _current_tests_data
if _tests_data is None:
_tests_data = _init_tests_data()
indexers = _init_indexers(_tests_data)
for (name, idx) in indexers.items():
# pytest makes the loggers use a temporary file; and deepcopy
# requires serializability. So we remove them, and add them
# back after the copy.
_indexer_loggers[name] = idx.log
del idx.log
_tests_data.update(indexers)
if reset or _current_tests_data is None:
_current_tests_data = deepcopy(_tests_data)
for (name, logger) in _indexer_loggers.items():
_current_tests_data[name].log = logger
return _current_tests_data
def override_storages(storage, idx_storage):
"""
Helper function to replace the storages from which archive data
are fetched.
"""
swh_config = config.get_config()
swh_config.update({'storage': storage})
service.storage = storage
swh_config.update({'indexer_storage': idx_storage})
service.idx_storage = idx_storage
# Implement some special endpoints used to provide input tests data
# when executing end to end tests with cypress
_content_code_data_exts = {} # type: Dict[str, Dict[str, str]]
_content_code_data_filenames = {} # type: Dict[str, Dict[str, str]]
_content_other_data_exts = {} # type: Dict[str, Dict[str, str]]
def _init_content_tests_data(data_path, data_dict, ext_key):
"""
Helper function to read the content of a directory, store it
into a test archive and add some files metadata (sha1 and/or
expected programming language) in a dict.
Args:
data_path (str): path to a directory relative to the tests
folder of swh-web
data_dict (dict): the dict that will store files metadata
ext_key (bool): whether to use file extensions or filenames
as dict keys
"""
test_contents_dir = os.path.join(
os.path.dirname(__file__), data_path).encode('utf-8')
directory = Directory.from_disk(path=test_contents_dir, data=True,
save_path=True)
objects = directory.collect()
for c in objects['content'].values():
c['status'] = 'visible'
sha1 = hash_to_hex(c['sha1'])
if ext_key:
key = c['path'].decode('utf-8').split('.')[-1]
filename = 'test.' + key
else:
filename = c['path'].decode('utf-8').split('/')[-1]
key = filename
language = get_hljs_language_from_filename(filename)
data_dict[key] = {'sha1': sha1,
'language': language}
del c['path']
del c['perms']
storage = get_tests_data()['storage']
storage.content_add(objects['content'].values())
def _init_content_code_data_exts():
"""
Fill a global dictionary which maps source file extension to
a code content example.
"""
global _content_code_data_exts
_init_content_tests_data('resources/contents/code/extensions',
_content_code_data_exts, True)
def _init_content_other_data_exts():
"""
Fill a global dictionary which maps a file extension to
a content example.
"""
global _content_other_data_exts
_init_content_tests_data('resources/contents/other/extensions',
_content_other_data_exts, True)
def _init_content_code_data_filenames():
"""
Fill a global dictionary which maps a filename to
a content example.
"""
global _content_code_data_filenames
_init_content_tests_data('resources/contents/code/filenames',
_content_code_data_filenames, False)
if config.get_config()['e2e_tests_mode']:
_init_content_code_data_exts()
_init_content_other_data_exts()
_init_content_code_data_filenames()
@api_view(['GET'])
def get_content_code_data_all_exts(request):
"""
Endpoint implementation returning a list of all source file
extensions to test for highlighting using cypress.
"""
return Response(sorted(_content_code_data_exts.keys()),
status=200, content_type='application/json')
@api_view(['GET'])
def get_content_code_data_by_ext(request, ext):
"""
Endpoint implementation returning metadata of a code content example
based on the source file extension.
"""
data = None
status = 404
if ext in _content_code_data_exts:
data = _content_code_data_exts[ext]
status = 200
return Response(data, status=status, content_type='application/json')
@api_view(['GET'])
def get_content_other_data_by_ext(request, ext):
"""
Endpoint implementation returning metadata of a content example
based on the file extension.
"""
_init_content_other_data_exts()
data = None
status = 404
if ext in _content_other_data_exts:
data = _content_other_data_exts[ext]
status = 200
return Response(data, status=status, content_type='application/json')
@api_view(['GET'])
def get_content_code_data_all_filenames(request):
"""
Endpoint implementation returning a list of all source filenames
to test for highlighting using cypress.
"""
return Response(sorted(_content_code_data_filenames.keys()),
status=200, content_type='application/json')
@api_view(['GET'])
def get_content_code_data_by_filename(request, filename):
"""
Endpoint implementation returning metadata of a code content example
based on the source filename.
"""
data = None
status = 404
if filename in _content_code_data_filenames:
data = _content_code_data_filenames[filename]
status = 200
return Response(data, status=status, content_type='application/json')