diff --git a/MANIFEST.in b/MANIFEST.in --- a/MANIFEST.in +++ b/MANIFEST.in @@ -10,3 +10,4 @@ recursive-include swh/web/templates * include swh/web/tests/browse/views/data/swh-logo.png include swh/web/tests/browse/views/data/iso-8859-1_encoded_content +recursive-include swh/web/tests/resources * \ No newline at end of file diff --git a/debian/control b/debian/control --- a/debian/control +++ b/debian/control @@ -13,6 +13,7 @@ python3-django-js-reverse, python3-docutils, python3-htmlmin, + python3-hypothesis (>= 3.11.0~), python3-magic (>= 0.3.0~), python3-lxml, python3-pytest, @@ -26,10 +27,11 @@ python3-yaml, python3-swh.core (>= 0.0.40~), python3-swh.model (>= 0.0.25~), - python3-swh.storage (>= 0.0.109~), - python3-swh.indexer.storage (>= 0.0.52~), + python3-swh.storage (>= 0.0.115~), + python3-swh.indexer (>= 0.0.120~), python3-swh.vault (>= 0.0.20~), - python3-swh.scheduler (>= 0.0.31~) + python3-swh.scheduler (>= 0.0.31~), + python3-swh.loader.git (>= 0.0.47~) Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/diffusion/DWUI/ @@ -37,8 +39,8 @@ Architecture: all Depends: python3-swh.core (>= 0.0.40~), python3-swh.model (>= 0.0.25~), - python3-swh.storage (>= 0.0.109~), - python3-swh.indexer.storage (>= 0.0.52~), + python3-swh.storage (>= 0.0.115~), + python3-swh.indexer.storage (>= 0.0.120~), python3-swh.vault (>= 0.0.20~), python3-swh.scheduler (>= 0.0.31~), ${misc:Depends}, diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,6 +1,6 @@ swh.core >= 0.0.40 swh.model >= 0.0.25 -swh.storage >= 0.0.109 +swh.storage >= 0.0.115 swh.vault >= 0.0.20 -swh.indexer >= 0.0.52 -swh.scheduler >= 0.0.31 +swh.indexer >= 0.0.120 +swh.scheduler >= 0.0.31 \ No newline at end of file diff --git a/requirements-test.txt b/requirements-test.txt --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,3 +1,4 @@ pytest pytest-django - +hypothesis +swh.loader.git >= 0.0.47 diff --git a/swh/web/tests/data.py b/swh/web/tests/data.py new file mode 100644 --- /dev/null +++ b/swh/web/tests/data.py @@ -0,0 +1,237 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import os + +from swh.indexer.language import LanguageIndexer +from swh.indexer.fossology_license import FossologyLicenseIndexer +from swh.indexer.mimetype import MimetypeIndexer +from swh.indexer.ctags import CtagsIndexer +from swh.indexer.storage import get_indexer_storage +from swh.model.hashutil import hash_to_hex, DEFAULT_ALGORITHMS +from swh.loader.git.from_disk import GitLoaderFromArchive +from swh.storage.algos.dir_iterators import dir_iterator + +# Module used to initialize data that will be provided as tests input + +# Configuration for git loader +_TEST_LOADER_CONFIG = { + 'storage': { + 'cls': 'memory', + 'args': {} + }, + 'send_contents': True, + 'send_directories': True, + 'send_revisions': True, + 'send_releases': True, + 'send_snapshot': True, + + 'content_size_limit': 100 * 1024 * 1024, + 'content_packet_size': 10, + 'content_packet_size_bytes': 100 * 1024 * 1024, + 'directory_packet_size': 10, + 'revision_packet_size': 10, + 'release_packet_size': 10, + + 'save_data': False, +} + +# Base content indexer configuration +_TEST_INDEXER_BASE_CONFIG = { + 'storage': { + 'cls': 'memory', + 'args': {}, + }, + 'objstorage': { + 'cls': 'memory', + 'args': {}, + }, + 'indexer_storage': { + 'cls': 'memory', + 'args': {}, + } +} + + +# MimetypeIndexer with custom configuration for tests +class _MimetypeIndexer(MimetypeIndexer): + def parse_config_file(self, *args, **kwargs): + return { + **_TEST_INDEXER_BASE_CONFIG, + 'tools': { + 'name': 'file', + 'version': '1:5.30-1+deb9u1', + 'configuration': { + "type": "library", + "debian-package": "python3-magic" + } + } + } + + +# LanguageIndexer with custom configuration for tests +class _LanguageIndexer(LanguageIndexer): + def parse_config_file(self, *args, **kwargs): + return { + **_TEST_INDEXER_BASE_CONFIG, + 'tools': { + 'name': 'pygments', + 'version': '2.0.1+dfsg-1.1+deb8u1', + 'configuration': { + 'type': 'library', + 'debian-package': 'python3-pygments', + 'max_content_size': 10240, + } + } + } + + +# FossologyLicenseIndexer with custom configuration for tests +class _FossologyLicenseIndexer(FossologyLicenseIndexer): + def parse_config_file(self, *args, **kwargs): + return { + **_TEST_INDEXER_BASE_CONFIG, + 'workdir': '/tmp/swh/indexer.fossology.license', + 'tools': { + 'name': 'nomos', + 'version': '3.1.0rc2-31-ga2cbb8c', + 'configuration': { + 'command_line': 'nomossa ', + }, + } + } + + +# CtagsIndexer with custom configuration for tests +class _CtagsIndexer(CtagsIndexer): + def parse_config_file(self, *args, **kwargs): + return { + **_TEST_INDEXER_BASE_CONFIG, + 'workdir': '/tmp/swh/indexer.ctags', + 'languages': {'c': 'c'}, + 'tools': { + 'name': 'universal-ctags', + 'version': '~git7859817b', + 'configuration': { + 'command_line': '''ctags --fields=+lnz --sort=no --links=no ''' # noqa + '''--output-format=json ''' + }, + } + } + + +# Lightweight git repositories that will be loaded to generate +# input data for tests +_TEST_ORIGINS = [ + { + 'id': 1, + 'type': 'git', + 'url': 'https://github.com/wcoder/highlightjs-line-numbers.js', + 'archive': 'highlightjs-line-numbers.js.zip' + }, + { + 'id': 2, + 'type': 'git', + 'url': 'https://github.com/memononen/libtess2', + 'archive': 'libtess2.zip' + } +] + + +# Tests data initialization +def _init_tests_data(): + # Load git repositories from archives + loader = GitLoaderFromArchive(config=_TEST_LOADER_CONFIG) + for origin in _TEST_ORIGINS: + origin_repo_archive = \ + os.path.join(os.path.dirname(__file__), + 'resources/repos/%s' % origin['archive']) + loader.load(origin['url'], origin_repo_archive, None) + + # Get reference to the memory storage + storage = loader.storage + + contents = set() + directories = set() + revisions = set() + releases = set() + snapshots = set() + + # Get all objects loaded into the test archive + for origin in _TEST_ORIGINS: + snp = storage.snapshot_get_latest(origin['id']) + snapshots.add(hash_to_hex(snp['id'])) + for branch_name, branch_data in snp['branches'].items(): + if branch_data['target_type'] == 'revision': + revisions.add(branch_data['target']) + elif branch_data['target_type'] == 'release': + release = next(storage.release_get([branch_data['target']])) + revisions.add(release['target']) + releases.add(hash_to_hex(branch_data['target'])) + + for rev_log in storage.revision_shortlog(set(revisions)): + rev_id = rev_log[0] + revisions.add(rev_id) + + for rev in storage.revision_get(revisions): + dir_id = rev['directory'] + directories.add(hash_to_hex(dir_id)) + for entry in dir_iterator(storage, dir_id): + if entry['type'] == 'file': + contents.add(entry['sha1']) + else: + directories.add(hash_to_hex(entry['target'])) + + # Get all checksums for each content + contents_metadata = storage.content_get_metadata(contents) + contents = [] + for content_metadata in contents_metadata: + contents.append({ + algo: hash_to_hex(content_metadata[algo]) + for algo in DEFAULT_ALGORITHMS + }) + + # Create indexer storage instance that will be shared by indexers + idx_storage = get_indexer_storage('memory', {}) + + # Instantiate content indexers that will be used in tests + # and force them to use the memory storages + indexers = {} + for idx_name, idx_class in (('mimetype_indexer', _MimetypeIndexer), + ('language_indexer', _LanguageIndexer), + ('license_indexer', _FossologyLicenseIndexer), + ('ctags_indexer', _CtagsIndexer)): + idx = idx_class() + idx.storage = storage + idx.objstorage = storage.objstorage + idx.idx_storage = idx_storage + idx.register_tools(idx.config['tools']) + indexers[idx_name] = idx + + # Return tests data + return { + 'storage': storage, + 'idx_storage': idx_storage, + **indexers, + 'origins': _TEST_ORIGINS, + 'contents': contents, + 'directories': list(directories), + 'releases': list(releases), + 'revisions': list(map(hash_to_hex, revisions)), + 'snapshots': list(snapshots) + } + + +_tests_data = None + + +def get_tests_data(): + """ + Initialize tests data and return them in a dict. + """ + global _tests_data + if _tests_data is None: + _tests_data = _init_tests_data() + return _tests_data diff --git a/swh/web/tests/resources/repos/highlightjs-line-numbers.js.zip b/swh/web/tests/resources/repos/highlightjs-line-numbers.js.zip new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@