Changeset View
Changeset View
Standalone View
Standalone View
swh/web/tests/data.py
# Copyright (C) 2018-2019 The Software Heritage developers | # Copyright (C) 2018-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU Affero General Public License version 3, or any later version | # License: GNU Affero General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from copy import deepcopy | |||||
import os | import os | ||||
import time | import time | ||||
from swh.indexer.language import LanguageIndexer | from swh.indexer.language import LanguageIndexer | ||||
from swh.indexer.fossology_license import FossologyLicenseIndexer | from swh.indexer.fossology_license import FossologyLicenseIndexer | ||||
from swh.indexer.mimetype import MimetypeIndexer | from swh.indexer.mimetype import MimetypeIndexer | ||||
from swh.indexer.ctags import CtagsIndexer | from swh.indexer.ctags import CtagsIndexer | ||||
from swh.indexer.storage import get_indexer_storage | from swh.indexer.storage import get_indexer_storage | ||||
▲ Show 20 Lines • Show All 139 Lines • ▼ Show 20 Lines | |||||
_contents = {} | _contents = {} | ||||
# Tests data initialization | # Tests data initialization | ||||
def _init_tests_data(): | def _init_tests_data(): | ||||
# Load git repositories from archives | # Load git repositories from archives | ||||
loader = GitLoaderFromArchive(config=_TEST_LOADER_CONFIG) | loader = GitLoaderFromArchive(config=_TEST_LOADER_CONFIG) | ||||
# Get reference to the memory storage | |||||
storage = loader.storage | |||||
for origin in _TEST_ORIGINS: | for origin in _TEST_ORIGINS: | ||||
nb_visits = len(origin['archives']) | nb_visits = len(origin['archives']) | ||||
for i, archive in enumerate(origin['archives']): | for i, archive in enumerate(origin['archives']): | ||||
origin_repo_archive = \ | origin_repo_archive = \ | ||||
os.path.join(os.path.dirname(__file__), | os.path.join(os.path.dirname(__file__), | ||||
'resources/repos/%s' % archive) | 'resources/repos/%s' % archive) | ||||
loader.load(origin['url'], origin_repo_archive, None) | loader.load(origin['url'], origin_repo_archive, None) | ||||
if nb_visits > 1 and i != nb_visits - 1: | if nb_visits > 1 and i != nb_visits - 1: | ||||
time.sleep(1) | time.sleep(1) | ||||
# Get reference to the memory storage | |||||
storage = loader.storage | |||||
contents = set() | contents = set() | ||||
directories = set() | directories = set() | ||||
revisions = set() | revisions = set() | ||||
releases = set() | releases = set() | ||||
snapshots = set() | snapshots = set() | ||||
persons = set() | persons = set() | ||||
content_path = {} | content_path = {} | ||||
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines | for content_metadata in contents_metadata: | ||||
contents[-1]['encoding'] = encoding | contents[-1]['encoding'] = encoding | ||||
contents[-1]['hljs_language'] = content_display_data['language'] | contents[-1]['hljs_language'] = content_display_data['language'] | ||||
contents[-1]['data'] = content_display_data['content_data'] | contents[-1]['data'] = content_display_data['content_data'] | ||||
_contents[contents[-1]['sha1']] = contents[-1] | _contents[contents[-1]['sha1']] = contents[-1] | ||||
# Create indexer storage instance that will be shared by indexers | # Create indexer storage instance that will be shared by indexers | ||||
idx_storage = get_indexer_storage('memory', {}) | idx_storage = get_indexer_storage('memory', {}) | ||||
# Instantiate content indexers that will be used in tests | |||||
# and force them to use the memory storages | |||||
indexers = {} | |||||
for idx_name, idx_class in (('mimetype_indexer', _MimetypeIndexer), | |||||
('language_indexer', _LanguageIndexer), | |||||
('license_indexer', _FossologyLicenseIndexer), | |||||
('ctags_indexer', _CtagsIndexer)): | |||||
idx = idx_class() | |||||
idx.storage = storage | |||||
idx.objstorage = storage.objstorage | |||||
idx.idx_storage = idx_storage | |||||
idx.register_tools(idx.config['tools']) | |||||
indexers[idx_name] = idx | |||||
# Add the empty directory to the test archive | # Add the empty directory to the test archive | ||||
empty_dir_id = directory_identifier({'entries': []}) | empty_dir_id = directory_identifier({'entries': []}) | ||||
empty_dir_id_bin = hash_to_bytes(empty_dir_id) | empty_dir_id_bin = hash_to_bytes(empty_dir_id) | ||||
storage.directory_add([{'id': empty_dir_id_bin, 'entries': []}]) | storage.directory_add([{'id': empty_dir_id_bin, 'entries': []}]) | ||||
# Return tests data | # Return tests data | ||||
return { | return { | ||||
'storage': storage, | 'storage': storage, | ||||
'idx_storage': idx_storage, | 'idx_storage': idx_storage, | ||||
**indexers, | |||||
'origins': _TEST_ORIGINS, | 'origins': _TEST_ORIGINS, | ||||
'contents': contents, | 'contents': contents, | ||||
'directories': list(directories), | 'directories': list(directories), | ||||
'persons': list(persons), | 'persons': list(persons), | ||||
'releases': list(releases), | 'releases': list(releases), | ||||
'revisions': list(map(hash_to_hex, revisions)), | 'revisions': list(map(hash_to_hex, revisions)), | ||||
'snapshots': list(snapshots) | 'snapshots': list(snapshots), | ||||
'generated_checksums': set(), | |||||
} | } | ||||
def _init_indexers(tests_data): | |||||
# Instantiate content indexers that will be used in tests | |||||
# and force them to use the memory storages | |||||
indexers = {} | |||||
for idx_name, idx_class in (('mimetype_indexer', _MimetypeIndexer), | |||||
('language_indexer', _LanguageIndexer), | |||||
('license_indexer', _FossologyLicenseIndexer), | |||||
('ctags_indexer', _CtagsIndexer)): | |||||
idx = idx_class() | |||||
idx.storage = tests_data['storage'] | |||||
idx.objstorage = tests_data['storage'].objstorage | |||||
idx.idx_storage = tests_data['idx_storage'] | |||||
idx.register_tools(idx.config['tools']) | |||||
indexers[idx_name] = idx | |||||
return indexers | |||||
def get_content(content_sha1): | def get_content(content_sha1): | ||||
return _contents.get(content_sha1) | return _contents.get(content_sha1) | ||||
_tests_data = None | _tests_data = None | ||||
_current_tests_data = None | |||||
_indexer_loggers = {} | |||||
def get_tests_data(): | def get_tests_data(reset=False): | ||||
""" | """ | ||||
Initialize tests data and return them in a dict. | Initialize tests data and return them in a dict. | ||||
""" | """ | ||||
global _tests_data | global _tests_data, _current_tests_data | ||||
if _tests_data is None: | if _tests_data is None: | ||||
_tests_data = _init_tests_data() | _tests_data = _init_tests_data() | ||||
return _tests_data | indexers = _init_indexers(_tests_data) | ||||
for (name, idx) in indexers.items(): | |||||
# pytest makes the loggers use a temporary file; and deepcopy | |||||
# requires serializability. So we remove them, and add them | |||||
# back after the copy. | |||||
_indexer_loggers[name] = idx.log | |||||
del idx.log | |||||
_tests_data.update(indexers) | |||||
if reset or _current_tests_data is None: | |||||
_current_tests_data = deepcopy(_tests_data) | |||||
for (name, logger) in _indexer_loggers.items(): | |||||
_current_tests_data[name].log = logger | |||||
return _current_tests_data |