Differential D1443 Diff 4710 swh/web/tests/data.py

Changeset View

Standalone View

swh/web/tests/data.py

# Copyright (C) 2018-2019 The Software Heritage developers		# Copyright (C) 2018-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution		# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version		# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information		# See top-level LICENSE file for more information

		from copy import deepcopy
import os		import os
import time		import time

from swh.indexer.language import LanguageIndexer		from swh.indexer.language import LanguageIndexer
from swh.indexer.fossology_license import FossologyLicenseIndexer		from swh.indexer.fossology_license import FossologyLicenseIndexer
from swh.indexer.mimetype import MimetypeIndexer		from swh.indexer.mimetype import MimetypeIndexer
from swh.indexer.ctags import CtagsIndexer		from swh.indexer.ctags import CtagsIndexer
from swh.indexer.storage import get_indexer_storage		from swh.indexer.storage import get_indexer_storage
▲ Show 20 Lines • Show All 139 Lines • ▼ Show 20 Lines

_contents = {}		_contents = {}


# Tests data initialization		# Tests data initialization
def _init_tests_data():		def _init_tests_data():
# Load git repositories from archives		# Load git repositories from archives
loader = GitLoaderFromArchive(config=_TEST_LOADER_CONFIG)		loader = GitLoaderFromArchive(config=_TEST_LOADER_CONFIG)

		# Get reference to the memory storage
		storage = loader.storage

for origin in _TEST_ORIGINS:		for origin in _TEST_ORIGINS:
nb_visits = len(origin['archives'])		nb_visits = len(origin['archives'])
for i, archive in enumerate(origin['archives']):		for i, archive in enumerate(origin['archives']):
origin_repo_archive = \		origin_repo_archive = \
os.path.join(os.path.dirname(__file__),		os.path.join(os.path.dirname(__file__),
'resources/repos/%s' % archive)		'resources/repos/%s' % archive)
loader.load(origin['url'], origin_repo_archive, None)		loader.load(origin['url'], origin_repo_archive, None)
if nb_visits > 1 and i != nb_visits - 1:		if nb_visits > 1 and i != nb_visits - 1:
time.sleep(1)		time.sleep(1)

# Get reference to the memory storage
storage = loader.storage

contents = set()		contents = set()
directories = set()		directories = set()
revisions = set()		revisions = set()
releases = set()		releases = set()
snapshots = set()		snapshots = set()
persons = set()		persons = set()

content_path = {}		content_path = {}
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines	for content_metadata in contents_metadata:
contents[-1]['encoding'] = encoding		contents[-1]['encoding'] = encoding
contents[-1]['hljs_language'] = content_display_data['language']		contents[-1]['hljs_language'] = content_display_data['language']
contents[-1]['data'] = content_display_data['content_data']		contents[-1]['data'] = content_display_data['content_data']
_contents[contents[-1]['sha1']] = contents[-1]		_contents[contents[-1]['sha1']] = contents[-1]

# Create indexer storage instance that will be shared by indexers		# Create indexer storage instance that will be shared by indexers
idx_storage = get_indexer_storage('memory', {})		idx_storage = get_indexer_storage('memory', {})

# Instantiate content indexers that will be used in tests
# and force them to use the memory storages
indexers = {}
for idx_name, idx_class in (('mimetype_indexer', _MimetypeIndexer),
('language_indexer', _LanguageIndexer),
('license_indexer', _FossologyLicenseIndexer),
('ctags_indexer', _CtagsIndexer)):
idx = idx_class()
idx.storage = storage
idx.objstorage = storage.objstorage
idx.idx_storage = idx_storage
idx.register_tools(idx.config['tools'])
indexers[idx_name] = idx

# Add the empty directory to the test archive		# Add the empty directory to the test archive
empty_dir_id = directory_identifier({'entries': []})		empty_dir_id = directory_identifier({'entries': []})
empty_dir_id_bin = hash_to_bytes(empty_dir_id)		empty_dir_id_bin = hash_to_bytes(empty_dir_id)
storage.directory_add([{'id': empty_dir_id_bin, 'entries': []}])		storage.directory_add([{'id': empty_dir_id_bin, 'entries': []}])

# Return tests data		# Return tests data
return {		return {
'storage': storage,		'storage': storage,
'idx_storage': idx_storage,		'idx_storage': idx_storage,
**indexers,
'origins': _TEST_ORIGINS,		'origins': _TEST_ORIGINS,
'contents': contents,		'contents': contents,
'directories': list(directories),		'directories': list(directories),
'persons': list(persons),		'persons': list(persons),
'releases': list(releases),		'releases': list(releases),
'revisions': list(map(hash_to_hex, revisions)),		'revisions': list(map(hash_to_hex, revisions)),
'snapshots': list(snapshots)		'snapshots': list(snapshots),
		'generated_checksums': set(),
}		}


		def _init_indexers(tests_data):
		# Instantiate content indexers that will be used in tests
		# and force them to use the memory storages
		indexers = {}
		for idx_name, idx_class in (('mimetype_indexer', _MimetypeIndexer),
		('language_indexer', _LanguageIndexer),
		('license_indexer', _FossologyLicenseIndexer),
		('ctags_indexer', _CtagsIndexer)):
		idx = idx_class()
		idx.storage = tests_data['storage']
		idx.objstorage = tests_data['storage'].objstorage
		idx.idx_storage = tests_data['idx_storage']
		idx.register_tools(idx.config['tools'])
		indexers[idx_name] = idx

		return indexers


def get_content(content_sha1):		def get_content(content_sha1):
return _contents.get(content_sha1)		return _contents.get(content_sha1)


_tests_data = None		_tests_data = None
		_current_tests_data = None
		_indexer_loggers = {}


def get_tests_data():		def get_tests_data(reset=False):
"""		"""
Initialize tests data and return them in a dict.		Initialize tests data and return them in a dict.
"""		"""
global _tests_data		global _tests_data, _current_tests_data
if _tests_data is None:		if _tests_data is None:
_tests_data = _init_tests_data()		_tests_data = _init_tests_data()
return _tests_data		indexers = _init_indexers(_tests_data)
		for (name, idx) in indexers.items():
		# pytest makes the loggers use a temporary file; and deepcopy
		# requires serializability. So we remove them, and add them
		# back after the copy.
		_indexer_loggers[name] = idx.log
		del idx.log
		_tests_data.update(indexers)
		if reset or _current_tests_data is None:
		_current_tests_data = deepcopy(_tests_data)
		for (name, logger) in _indexer_loggers.items():
		_current_tests_data[name].log = logger
		return _current_tests_data