diff --git a/cypress/integration/origin-search.spec.js b/cypress/integration/origin-search.spec.js index 0771a4e1..74d09e32 100644 --- a/cypress/integration/origin-search.spec.js +++ b/cypress/integration/origin-search.spec.js @@ -1,152 +1,355 @@ /** * Copyright (C) 2019 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ const nonExistentText = 'NoMatchExists'; let origin; let url; function doSearch(searchText) { cy.get('#origins-url-patterns') .type(searchText) .get('.swh-search-icon') .click(); } function searchShouldRedirect(searchText, redirectUrl) { doSearch(searchText); cy.location('pathname') .should('equal', redirectUrl); } function searchShouldShowNotFound(searchText, msg) { doSearch(searchText); cy.get('#swh-no-result') .should('be.visible') .and('contain', msg); } describe('Test origin-search', function() { before(function() { origin = this.origin[0]; url = this.Urls.browse_search(); }); beforeEach(function() { cy.visit(url); }); it('should show in result when url is searched', function() { cy.get('#origins-url-patterns') .type(origin.url); cy.get('.swh-search-icon') .click(); cy.get('#origin-search-results') .should('be.visible'); cy.contains('tr', origin.url) .should('be.visible') .find('.swh-visit-status') .find('i') .should('have.class', 'fa-check') .and('have.attr', 'title', 'Origin has at least one full visit by Software Heritage'); }); it('should show not found message when no repo matches', function() { searchShouldShowNotFound(nonExistentText, 'No origins matching the search criteria were found.'); }); it('should add appropriate URL parameters', function() { // Check all three checkboxes and check if // correct url params are added cy.get('#swh-search-origins-with-visit') .check() .get('#swh-filter-empty-visits') .check() .get('#swh-search-origin-metadata') .check() .then(() => { const searchText = origin.url; doSearch(searchText); cy.location('search').then(locationSearch => { const urlParams = new URLSearchParams(locationSearch); const query = urlParams.get('q'); const withVisit = urlParams.has('with_visit'); const withContent = urlParams.has('with_content'); const searchMetadata = urlParams.has('search_metadata'); assert.strictEqual(query, searchText); assert.strictEqual(withVisit, true); assert.strictEqual(withContent, true); assert.strictEqual(searchMetadata, true); }); }); }); + context('Test pagination', function() { + it('should not paginate if there are not many results', function() { + // Setup search + cy.get('#swh-search-origins-with-visit') + .uncheck() + .get('#swh-filter-empty-visits') + .uncheck() + .then(() => { + const searchText = 'libtess'; + + // Get first page of results + doSearch(searchText); + + cy.get('.swh-search-result-entry') + .should('have.length', 1); + + cy.get('.swh-search-result-entry#origin-0 td a') + .should('have.text', 'https://github.com/memononen/libtess2'); + + cy.get('#origins-prev-results-button') + .should('have.class', 'disabled'); + cy.get('#origins-next-results-button') + .should('have.class', 'disabled'); + }); + }); + + it('should paginate forward when there are many results', function() { + // Setup search + cy.get('#swh-search-origins-with-visit') + .uncheck() + .get('#swh-filter-empty-visits') + .uncheck() + .then(() => { + const searchText = 'many.origins'; + + // Get first page of results + doSearch(searchText); + + cy.get('.swh-search-result-entry') + .should('have.length', 100); + + cy.get('.swh-search-result-entry#origin-0 td a') + .should('have.text', 'https://many.origins/1'); + cy.get('.swh-search-result-entry#origin-99 td a') + .should('have.text', 'https://many.origins/100'); + + cy.get('#origins-prev-results-button') + .should('have.class', 'disabled'); + cy.get('#origins-next-results-button') + .should('not.have.class', 'disabled'); + + // Get second page of results + cy.get('#origins-next-results-button a') + .click(); + + cy.get('.swh-search-result-entry') + .should('have.length', 100); + + cy.get('.swh-search-result-entry#origin-0 td a') + .should('have.text', 'https://many.origins/101'); + cy.get('.swh-search-result-entry#origin-99 td a') + .should('have.text', 'https://many.origins/200'); + + cy.get('#origins-prev-results-button') + .should('not.have.class', 'disabled'); + cy.get('#origins-next-results-button') + .should('not.have.class', 'disabled'); + + // Get third (and last) page of results + cy.get('#origins-next-results-button a') + .click(); + + cy.get('.swh-search-result-entry') + .should('have.length', 50); + + cy.get('.swh-search-result-entry#origin-0 td a') + .should('have.text', 'https://many.origins/201'); + cy.get('.swh-search-result-entry#origin-49 td a') + .should('have.text', 'https://many.origins/250'); + + cy.get('#origins-prev-results-button') + .should('not.have.class', 'disabled'); + cy.get('#origins-next-results-button') + .should('have.class', 'disabled'); + }); + }); + + it('should paginate backward from a middle page', function() { + // Setup search + cy.get('#swh-search-origins-with-visit') + .uncheck() + .get('#swh-filter-empty-visits') + .uncheck() + .then(() => { + const searchText = 'many.origins'; + + // Get first page of results + doSearch(searchText); + + cy.get('#origins-prev-results-button') + .should('have.class', 'disabled'); + cy.get('#origins-next-results-button') + .should('not.have.class', 'disabled'); + + // Get second page of results + cy.get('#origins-next-results-button a') + .click(); + cy.get('#origins-prev-results-button') + .should('not.have.class', 'disabled'); + cy.get('#origins-next-results-button') + .should('not.have.class', 'disabled'); + + // Get first page of results again + cy.get('#origins-prev-results-button a') + .click(); + + cy.get('.swh-search-result-entry') + .should('have.length', 100); + + cy.get('.swh-search-result-entry#origin-0 td a') + .should('have.text', 'https://many.origins/1'); + cy.get('.swh-search-result-entry#origin-99 td a') + .should('have.text', 'https://many.origins/100'); + + cy.get('#origins-prev-results-button') + .should('have.class', 'disabled'); + cy.get('#origins-next-results-button') + .should('not.have.class', 'disabled'); + }); + }); + + it('should paginate backward from the last page', function() { + // Setup search + cy.get('#swh-search-origins-with-visit') + .uncheck() + .get('#swh-filter-empty-visits') + .uncheck() + .then(() => { + const searchText = 'many.origins'; + + // Get first page of results + doSearch(searchText); + + cy.get('#origins-prev-results-button') + .should('have.class', 'disabled'); + cy.get('#origins-next-results-button') + .should('not.have.class', 'disabled'); + + // Get second page of results + cy.get('#origins-next-results-button a') + .click(); + + cy.get('#origins-prev-results-button') + .should('not.have.class', 'disabled'); + cy.get('#origins-next-results-button') + .should('not.have.class', 'disabled'); + + // Get third (and last) page of results + cy.get('#origins-next-results-button a') + .click(); + + cy.get('#origins-prev-results-button') + .should('not.have.class', 'disabled'); + cy.get('#origins-next-results-button') + .should('have.class', 'disabled'); + + // Get second page of results again + cy.get('#origins-prev-results-button a') + .click(); + + cy.get('.swh-search-result-entry') + .should('have.length', 100); + + cy.get('.swh-search-result-entry#origin-0 td a') + .should('have.text', 'https://many.origins/101'); + cy.get('.swh-search-result-entry#origin-99 td a') + .should('have.text', 'https://many.origins/200'); + + cy.get('#origins-prev-results-button') + .should('not.have.class', 'disabled'); + cy.get('#origins-next-results-button') + .should('not.have.class', 'disabled'); + + // Get first page of results again + cy.get('#origins-prev-results-button a') + .click(); + + cy.get('.swh-search-result-entry') + .should('have.length', 100); + + cy.get('.swh-search-result-entry#origin-0 td a') + .should('have.text', 'https://many.origins/1'); + cy.get('.swh-search-result-entry#origin-99 td a') + .should('have.text', 'https://many.origins/100'); + + cy.get('#origins-prev-results-button') + .should('have.class', 'disabled'); + cy.get('#origins-next-results-button') + .should('not.have.class', 'disabled'); + }); + }); + }); + context('Test valid persistent ids', function() { it('should resolve directory', function() { const redirectUrl = this.Urls.browse_directory(origin.content[0].directory); const persistentId = `swh:1:dir:${origin.content[0].directory}`; searchShouldRedirect(persistentId, redirectUrl); }); it('should resolve revision', function() { const redirectUrl = this.Urls.browse_revision(origin.revisions[0]); const persistentId = `swh:1:rev:${origin.revisions[0]}`; searchShouldRedirect(persistentId, redirectUrl); }); it('should resolve snapshot', function() { const redirectUrl = this.Urls.browse_snapshot_directory(origin.snapshot); const persistentId = `swh:1:snp:${origin.snapshot}`; searchShouldRedirect(persistentId, redirectUrl); }); it('should resolve content', function() { const redirectUrl = this.Urls.browse_content(`sha1_git:${origin.content[0].sha1git}`); const persistentId = `swh:1:cnt:${origin.content[0].sha1git}`; searchShouldRedirect(persistentId, redirectUrl); }); }); context('Test invalid persistent ids', function() { it('should show not found for directory', function() { const persistentId = `swh:1:dir:${this.unarchivedRepo.rootDirectory}`; const msg = `Directory with sha1_git ${this.unarchivedRepo.rootDirectory} not found`; searchShouldShowNotFound(persistentId, msg); }); it('should show not found for snapshot', function() { const persistentId = `swh:1:snp:${this.unarchivedRepo.snapshot}`; const msg = `Snapshot with id ${this.unarchivedRepo.snapshot} not found!`; searchShouldShowNotFound(persistentId, msg); }); it('should show not found for revision', function() { const persistentId = `swh:1:rev:${this.unarchivedRepo.revision}`; const msg = `Revision with sha1_git ${this.unarchivedRepo.revision} not found.`; searchShouldShowNotFound(persistentId, msg); }); it('should show not found for content', function() { const persistentId = `swh:1:cnt:${this.unarchivedRepo.content[0].sha1git}`; const msg = `Content with sha1_git checksum equals to ${this.unarchivedRepo.content[0].sha1git} not found!`; searchShouldShowNotFound(persistentId, msg); }); }); }); diff --git a/swh/web/assets/src/bundles/browse/origin-search.js b/swh/web/assets/src/bundles/browse/origin-search.js index 1ad757b3..12861fb7 100644 --- a/swh/web/assets/src/bundles/browse/origin-search.js +++ b/swh/web/assets/src/bundles/browse/origin-search.js @@ -1,233 +1,244 @@ /** * Copyright (C) 2018-2019 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ import {heapsPermute} from 'utils/heaps-permute'; import {handleFetchError} from 'utils/functions'; -let originPatterns; -let perPage = 100; -let limit = perPage * 2; -let offset = 0; -let currentData = null; +const limit = 100; +let linksPrev = []; +let linkNext = null; +let linkCurrent = null; let inSearch = false; +function parseLinkHeader(s) { + let re = /<(.+)>; rel="next"/; + return s.match(re)[1]; +} + function fixTableRowsStyle() { setTimeout(() => { $('#origin-search-results tbody tr').removeAttr('style'); }); } function clearOriginSearchResultsTable() { $('#origin-search-results tbody tr').remove(); } -function populateOriginSearchResultsTable(origins, offset) { - let localOffset = offset % limit; +function populateOriginSearchResultsTable(origins) { if (origins.length > 0) { $('#swh-origin-search-results').show(); $('#swh-no-result').hide(); clearOriginSearchResultsTable(); let table = $('#origin-search-results tbody'); - for (let i = localOffset; i < localOffset + perPage && i < origins.length; ++i) { - let origin = origins[i]; + for (let [i, origin] of origins.entries()) { let browseUrl = Urls.browse_origin(origin.url); let tableRow = ``; tableRow += `${encodeURI(origin.url)}`; tableRow += ``; tableRow += ``; tableRow += ''; table.append(tableRow); // get async latest visit snapshot and update visit status icon let latestSnapshotUrl = Urls.api_1_origin_visit_latest(origin.url); latestSnapshotUrl += '?require_snapshot=true'; fetch(latestSnapshotUrl) .then(response => response.json()) .then(data => { $(`#visit-type-origin-${i}`).text(data.type); $(`#visit-status-origin-${i}`).children().remove(); if (data) { $(`#visit-status-origin-${i}`).append(''); } else { $(`#visit-status-origin-${i}`).append(''); if ($('#swh-filter-empty-visits').prop('checked')) { $(`#origin-${i}`).remove(); } } }); } fixTableRowsStyle(); } else { $('#swh-origin-search-results').hide(); $('#swh-no-result').text('No origins matching the search criteria were found.'); $('#swh-no-result').show(); } - if (origins.length - localOffset < perPage || - (origins.length < limit && (localOffset + perPage) === origins.length)) { + + if (linkNext === null) { $('#origins-next-results-button').addClass('disabled'); } else { $('#origins-next-results-button').removeClass('disabled'); } - if (offset > 0) { - $('#origins-prev-results-button').removeClass('disabled'); - } else { + + if (linksPrev.length === 0) { $('#origins-prev-results-button').addClass('disabled'); + } else { + $('#origins-prev-results-button').removeClass('disabled'); } + inSearch = false; setTimeout(() => { window.scrollTo(0, 0); }); } function escapeStringRegexp(str) { let matchOperatorsRe = /[|\\{}()[\]^$+*?.]/g; return str.replace(matchOperatorsRe, '%5C$&'); } -function searchOrigins(patterns, limit, searchOffset, offset) { +function searchOriginsFirst(patterns, limit) { let baseSearchUrl; let searchMetadata = $('#swh-search-origin-metadata').prop('checked'); if (searchMetadata) { baseSearchUrl = Urls.api_1_origin_metadata_search() + `?fulltext=${patterns}`; } else { - originPatterns = patterns; let patternsArray = patterns.trim().replace(/\s+/g, ' ').split(' '); for (let i = 0; i < patternsArray.length; ++i) { patternsArray[i] = escapeStringRegexp(patternsArray[i]); } // url length must be less than 4096 for modern browsers // assuming average word length, 6 is max patternArray.length if (patternsArray.length < 7) { let patternsPermut = []; heapsPermute(patternsArray, p => patternsPermut.push(p.join('.*'))); let regex = patternsPermut.join('|'); baseSearchUrl = Urls.api_1_origin_search(regex) + `?regexp=true`; } else { baseSearchUrl = Urls.api_1_origin_search(patternsArray.join('.*')) + `?regexp=true`; } } let withVisit = $('#swh-search-origins-with-visit').prop('checked'); - let searchUrl = baseSearchUrl + `&limit=${limit}&offset=${searchOffset}&with_visit=${withVisit}`; + let searchUrl = baseSearchUrl + `&limit=${limit}&with_visit=${withVisit}`; + searchOrigins(searchUrl); +} +function searchOrigins(searchUrl) { clearOriginSearchResultsTable(); $('.swh-loading').addClass('show'); - fetch(searchUrl) + let response = fetch(searchUrl) .then(handleFetchError) - .then(response => response.json()) + .then(resp => { + response = resp; + return response.json(); + }) .then(data => { - currentData = data; + // Save link to the current results page + linkCurrent = searchUrl; + // Save link to the next results page. + linkNext = null; + if (response.headers.has('Link')) { + let parsedLink = parseLinkHeader(response.headers.get('Link')); + if (parsedLink !== undefined) { + linkNext = parsedLink; + } + } + // prevLinks is updated by the caller, which is the one to know if + // we're going forward or backward in the pages. + $('.swh-loading').removeClass('show'); - populateOriginSearchResultsTable(data, offset); + populateOriginSearchResultsTable(data); }) .catch(response => { $('.swh-loading').removeClass('show'); inSearch = false; $('#swh-origin-search-results').hide(); $('#swh-no-result').text(`Error ${response.status}: ${response.statusText}`); $('#swh-no-result').show(); }); } function doSearch() { $('#swh-no-result').hide(); let patterns = $('#origins-url-patterns').val(); - offset = 0; inSearch = true; // first try to resolve a swh persistent identifier let resolvePidUrl = Urls.api_1_resolve_swh_pid(patterns); fetch(resolvePidUrl) .then(handleFetchError) .then(response => response.json()) .then(data => { // pid has been successfully resolved, // so redirect to browse page window.location = data.browse_url; }) .catch(response => { // pid resolving failed if (patterns.startsWith('swh:')) { // display a useful error message if the input // looks like a swh pid response.json().then(data => { $('#swh-origin-search-results').hide(); $('.swh-search-pagination').hide(); $('#swh-no-result').text(data.reason); $('#swh-no-result').show(); }); } else { // otherwise, proceed with origins search $('#swh-origin-search-results').show(); $('.swh-search-pagination').show(); - searchOrigins(patterns, limit, offset, offset); + searchOriginsFirst(patterns, limit); } }); } export function initOriginSearch() { $(document).ready(() => { $('#swh-search-origins').submit(event => { event.preventDefault(); let patterns = $('#origins-url-patterns').val().trim(); let withVisit = $('#swh-search-origins-with-visit').prop('checked'); let withContent = $('#swh-filter-empty-visits').prop('checked'); let searchMetadata = $('#swh-search-origin-metadata').prop('checked'); let queryParameters = '?q=' + encodeURIComponent(patterns); if (withVisit) { queryParameters += '&with_visit'; } if (withContent) { queryParameters += '&with_content'; } if (searchMetadata) { queryParameters += '&search_metadata'; } // Update the url, triggering page reload and effective search window.location.search = queryParameters; }); $('#origins-next-results-button').click(event => { if ($('#origins-next-results-button').hasClass('disabled') || inSearch) { return; } inSearch = true; - offset += perPage; - if (!currentData || (offset >= limit && offset % limit === 0)) { - searchOrigins(originPatterns, limit, offset, offset); - } else { - populateOriginSearchResultsTable(currentData, offset); - } + linksPrev.push(linkCurrent); + searchOrigins(linkNext); event.preventDefault(); }); $('#origins-prev-results-button').click(event => { if ($('#origins-prev-results-button').hasClass('disabled') || inSearch) { return; } inSearch = true; - offset -= perPage; - if (!currentData || (offset > 0 && (offset + perPage) % limit === 0)) { - searchOrigins(originPatterns, limit, (offset + perPage) - limit, offset); - } else { - populateOriginSearchResultsTable(currentData, offset); - } + searchOrigins(linksPrev.pop()); event.preventDefault(); }); let urlParams = new URLSearchParams(window.location.search); let query = urlParams.get('q'); let withVisit = urlParams.has('with_visit'); let withContent = urlParams.has('with_content'); let searchMetadata = urlParams.has('search_metadata'); if (query) { $('#origins-url-patterns').val(query); $('#swh-search-origins-with-visit').prop('checked', withVisit); $('#swh-filter-empty-visits').prop('checked', withContent); $('#swh-search-origin-metadata').prop('checked', searchMetadata); doSearch(); } }); } diff --git a/swh/web/tests/data.py b/swh/web/tests/data.py index f60a5f63..a4ffe7c7 100644 --- a/swh/web/tests/data.py +++ b/swh/web/tests/data.py @@ -1,465 +1,473 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import os import random from copy import deepcopy from typing import Dict from rest_framework.decorators import api_view from rest_framework.response import Response from swh.indexer.fossology_license import FossologyLicenseIndexer from swh.indexer.mimetype import MimetypeIndexer from swh.indexer.ctags import CtagsIndexer from swh.indexer.storage import get_indexer_storage from swh.model.from_disk import Directory from swh.model.hashutil import hash_to_hex, hash_to_bytes, DEFAULT_ALGORITHMS from swh.model.identifiers import directory_identifier from swh.loader.git.from_disk import GitLoaderFromArchive from swh.storage.algos.dir_iterators import dir_iterator from swh.web import config from swh.web.browse.utils import ( get_mimetype_and_encoding_for_content, prepare_content_for_display ) from swh.web.common import service from swh.web.common.highlightjs import get_hljs_language_from_filename # Module used to initialize data that will be provided as tests input # Configuration for git loader _TEST_LOADER_CONFIG = { 'storage': { 'cls': 'memory' }, 'send_contents': True, 'send_directories': True, 'send_revisions': True, 'send_releases': True, 'send_snapshot': True, 'content_size_limit': 100 * 1024 * 1024, 'content_packet_size': 10, 'content_packet_size_bytes': 100 * 1024 * 1024, 'directory_packet_size': 10, 'revision_packet_size': 10, 'release_packet_size': 10, 'save_data': False, } # Base content indexer configuration _TEST_INDEXER_BASE_CONFIG = { 'storage': { 'cls': 'memory' }, 'objstorage': { 'cls': 'memory', 'args': {}, }, 'indexer_storage': { 'cls': 'memory', 'args': {}, } } def random_sha1(): return hash_to_hex(bytes(random.randint(0, 255) for _ in range(20))) def random_sha256(): return hash_to_hex(bytes(random.randint(0, 255) for _ in range(32))) def random_blake2s256(): return hash_to_hex(bytes(random.randint(0, 255) for _ in range(32))) def random_content(): return { 'sha1': random_sha1(), 'sha1_git': random_sha1(), 'sha256': random_sha256(), 'blake2s256': random_blake2s256(), } # MimetypeIndexer with custom configuration for tests class _MimetypeIndexer(MimetypeIndexer): def parse_config_file(self, *args, **kwargs): return { **_TEST_INDEXER_BASE_CONFIG, 'tools': { 'name': 'file', 'version': '1:5.30-1+deb9u1', 'configuration': { "type": "library", "debian-package": "python3-magic" } } } # FossologyLicenseIndexer with custom configuration for tests class _FossologyLicenseIndexer(FossologyLicenseIndexer): def parse_config_file(self, *args, **kwargs): return { **_TEST_INDEXER_BASE_CONFIG, 'workdir': '/tmp/swh/indexer.fossology.license', 'tools': { 'name': 'nomos', 'version': '3.1.0rc2-31-ga2cbb8c', 'configuration': { 'command_line': 'nomossa ', }, } } # CtagsIndexer with custom configuration for tests class _CtagsIndexer(CtagsIndexer): def parse_config_file(self, *args, **kwargs): return { **_TEST_INDEXER_BASE_CONFIG, 'workdir': '/tmp/swh/indexer.ctags', 'languages': {'c': 'c'}, 'tools': { 'name': 'universal-ctags', 'version': '~git7859817b', 'configuration': { 'command_line': '''ctags --fields=+lnz --sort=no --links=no ''' # noqa '''--output-format=json ''' }, } } # Lightweight git repositories that will be loaded to generate # input data for tests _TEST_ORIGINS = [ { 'type': 'git', 'url': 'https://github.com/wcoder/highlightjs-line-numbers.js', 'archives': ['highlightjs-line-numbers.js.zip', 'highlightjs-line-numbers.js_visit2.zip'], 'visit_date': ['Dec 1 2018, 01:00 UTC', 'Jan 20 2019, 15:00 UTC'] }, { 'type': 'git', 'url': 'https://github.com/memononen/libtess2', 'archives': ['libtess2.zip'], 'visit_date': ['May 25 2018, 01:00 UTC'] }, { 'type': 'git', 'url': 'repo_with_submodules', 'archives': ['repo_with_submodules.tgz'], 'visit_date': ['Jan 1 2019, 01:00 UTC'] } ] _contents = {} # Tests data initialization def _init_tests_data(): # Load git repositories from archives loader = GitLoaderFromArchive(config=_TEST_LOADER_CONFIG) # Get reference to the memory storage storage = loader.storage for origin in _TEST_ORIGINS: for i, archive in enumerate(origin['archives']): origin_repo_archive = \ os.path.join(os.path.dirname(__file__), 'resources/repos/%s' % archive) loader.load(origin['url'], origin_repo_archive, origin['visit_date'][i]) origin.update(storage.origin_get(origin)) # add an 'id' key if enabled + for i in range(250): + url = 'https://many.origins/%d' % (i+1) + storage.origin_add([{'url': url}]) + visit = storage.origin_visit_add(url, '2019-12-03 13:55:05', 'tar') + storage.origin_visit_update( + url, visit['visit'], + snapshot='1a8893e6a86f444e8be8e7bda6cb34fb1735a00e') + contents = set() directories = set() revisions = set() releases = set() snapshots = set() content_path = {} # Get all objects loaded into the test archive for origin in _TEST_ORIGINS: snp = storage.snapshot_get_latest(origin['url']) snapshots.add(hash_to_hex(snp['id'])) for branch_name, branch_data in snp['branches'].items(): if branch_data['target_type'] == 'revision': revisions.add(branch_data['target']) elif branch_data['target_type'] == 'release': release = next(storage.release_get([branch_data['target']])) revisions.add(release['target']) releases.add(hash_to_hex(branch_data['target'])) for rev_log in storage.revision_shortlog(set(revisions)): rev_id = rev_log[0] revisions.add(rev_id) for rev in storage.revision_get(revisions): dir_id = rev['directory'] directories.add(hash_to_hex(dir_id)) for entry in dir_iterator(storage, dir_id): content_path[entry['sha1']] = '/'.join( [hash_to_hex(dir_id), entry['path'].decode('utf-8')]) if entry['type'] == 'file': contents.add(entry['sha1']) elif entry['type'] == 'dir': directories.add(hash_to_hex(entry['target'])) # Get all checksums for each content contents_metadata = storage.content_get_metadata(contents) contents = [] for content_metadata in contents_metadata: contents.append({ algo: hash_to_hex(content_metadata[algo]) for algo in DEFAULT_ALGORITHMS }) path = content_path[content_metadata['sha1']] cnt = next(storage.content_get([content_metadata['sha1']])) mimetype, encoding = get_mimetype_and_encoding_for_content(cnt['data']) content_display_data = prepare_content_for_display( cnt['data'], mimetype, path) contents[-1]['path'] = path contents[-1]['mimetype'] = mimetype contents[-1]['encoding'] = encoding contents[-1]['hljs_language'] = content_display_data['language'] contents[-1]['data'] = content_display_data['content_data'] _contents[contents[-1]['sha1']] = contents[-1] # Create indexer storage instance that will be shared by indexers idx_storage = get_indexer_storage('memory', {}) # Add the empty directory to the test archive empty_dir_id = directory_identifier({'entries': []}) empty_dir_id_bin = hash_to_bytes(empty_dir_id) storage.directory_add([{'id': empty_dir_id_bin, 'entries': []}]) # Return tests data return { 'storage': storage, 'idx_storage': idx_storage, 'origins': _TEST_ORIGINS, 'contents': contents, 'directories': list(directories), 'releases': list(releases), 'revisions': list(map(hash_to_hex, revisions)), 'snapshots': list(snapshots), 'generated_checksums': set(), } def _init_indexers(tests_data): # Instantiate content indexers that will be used in tests # and force them to use the memory storages indexers = {} for idx_name, idx_class in (('mimetype_indexer', _MimetypeIndexer), ('license_indexer', _FossologyLicenseIndexer), ('ctags_indexer', _CtagsIndexer)): idx = idx_class() idx.storage = tests_data['storage'] idx.objstorage = tests_data['storage'].objstorage idx.idx_storage = tests_data['idx_storage'] idx.register_tools(idx.config['tools']) indexers[idx_name] = idx return indexers def get_content(content_sha1): return _contents.get(content_sha1) _tests_data = None _current_tests_data = None _indexer_loggers = {} def get_tests_data(reset=False): """ Initialize tests data and return them in a dict. """ global _tests_data, _current_tests_data if _tests_data is None: _tests_data = _init_tests_data() indexers = _init_indexers(_tests_data) for (name, idx) in indexers.items(): # pytest makes the loggers use a temporary file; and deepcopy # requires serializability. So we remove them, and add them # back after the copy. _indexer_loggers[name] = idx.log del idx.log _tests_data.update(indexers) if reset or _current_tests_data is None: _current_tests_data = deepcopy(_tests_data) for (name, logger) in _indexer_loggers.items(): _current_tests_data[name].log = logger return _current_tests_data def override_storages(storage, idx_storage): """ Helper function to replace the storages from which archive data are fetched. """ swh_config = config.get_config() swh_config.update({'storage': storage}) service.storage = storage swh_config.update({'indexer_storage': idx_storage}) service.idx_storage = idx_storage # Implement some special endpoints used to provide input tests data # when executing end to end tests with cypress _content_code_data_exts = {} # type: Dict[str, Dict[str, str]] _content_code_data_filenames = {} # type: Dict[str, Dict[str, str]] _content_other_data_exts = {} # type: Dict[str, Dict[str, str]] def _init_content_tests_data(data_path, data_dict, ext_key): """ Helper function to read the content of a directory, store it into a test archive and add some files metadata (sha1 and/or expected programming language) in a dict. Args: data_path (str): path to a directory relative to the tests folder of swh-web data_dict (dict): the dict that will store files metadata ext_key (bool): whether to use file extensions or filenames as dict keys """ test_contents_dir = os.path.join( os.path.dirname(__file__), data_path).encode('utf-8') directory = Directory.from_disk(path=test_contents_dir, data=True, save_path=True) objects = directory.collect() for c in objects['content'].values(): c['status'] = 'visible' sha1 = hash_to_hex(c['sha1']) if ext_key: key = c['path'].decode('utf-8').split('.')[-1] filename = 'test.' + key else: filename = c['path'].decode('utf-8').split('/')[-1] key = filename language = get_hljs_language_from_filename(filename) data_dict[key] = {'sha1': sha1, 'language': language} del c['path'] del c['perms'] storage = get_tests_data()['storage'] storage.content_add(objects['content'].values()) def _init_content_code_data_exts(): """ Fill a global dictionary which maps source file extension to a code content example. """ global _content_code_data_exts _init_content_tests_data('resources/contents/code/extensions', _content_code_data_exts, True) def _init_content_other_data_exts(): """ Fill a global dictionary which maps a file extension to a content example. """ global _content_other_data_exts _init_content_tests_data('resources/contents/other/extensions', _content_other_data_exts, True) def _init_content_code_data_filenames(): """ Fill a global dictionary which maps a filename to a content example. """ global _content_code_data_filenames _init_content_tests_data('resources/contents/code/filenames', _content_code_data_filenames, False) if config.get_config()['e2e_tests_mode']: _init_content_code_data_exts() _init_content_other_data_exts() _init_content_code_data_filenames() @api_view(['GET']) def get_content_code_data_all_exts(request): """ Endpoint implementation returning a list of all source file extensions to test for highlighting using cypress. """ return Response(sorted(_content_code_data_exts.keys()), status=200, content_type='application/json') @api_view(['GET']) def get_content_code_data_by_ext(request, ext): """ Endpoint implementation returning metadata of a code content example based on the source file extension. """ data = None status = 404 if ext in _content_code_data_exts: data = _content_code_data_exts[ext] status = 200 return Response(data, status=status, content_type='application/json') @api_view(['GET']) def get_content_other_data_by_ext(request, ext): """ Endpoint implementation returning metadata of a content example based on the file extension. """ _init_content_other_data_exts() data = None status = 404 if ext in _content_other_data_exts: data = _content_other_data_exts[ext] status = 200 return Response(data, status=status, content_type='application/json') @api_view(['GET']) def get_content_code_data_all_filenames(request): """ Endpoint implementation returning a list of all source filenames to test for highlighting using cypress. """ return Response(sorted(_content_code_data_filenames.keys()), status=200, content_type='application/json') @api_view(['GET']) def get_content_code_data_by_filename(request, filename): """ Endpoint implementation returning metadata of a code content example based on the source filename. """ data = None status = 404 if filename in _content_code_data_filenames: data = _content_code_data_filenames[filename] status = 200 return Response(data, status=status, content_type='application/json')