diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py --- a/swh/web/tests/api/views/test_origin.py +++ b/swh/web/tests/api/views/test_origin.py @@ -222,107 +222,109 @@ (new_origin['type'], new_origin['url']) }) - @patch('swh.web.common.service.idx_storage') @given(origin()) - def test_api_origin_metadata_search(self, mock_idx_storage, origin): + def test_api_origin_metadata_search(self, origin): + with patch('swh.web.common.service.idx_storage') as mock_idx_storage: + mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ + .side_effect = lambda conjunction, limit: [{ + 'from_revision': ( + b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed ' + b'\xf2U\xfa\x05B8'), + 'metadata': {'author': 'Jane Doe'}, + 'id': origin['id'], + 'tool': { + 'configuration': { + 'context': ['NpmMapping', 'CodemetaMapping'], + 'type': 'local' + }, + 'id': 3, + 'name': 'swh-metadata-detector', + 'version': '0.0.1' + } + }] + + url = reverse('api-origin-metadata-search', + query_params={'fulltext': 'Jane Doe'}) + rv = self.client.get(url) - mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ - .side_effect = lambda conjunction, limit: [{ - 'from_revision': - b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed \xf2U\xfa\x05B8', - 'metadata': {'author': 'Jane Doe'}, + self.assertEqual(rv.status_code, 200, rv.content) + self.assertEqual(rv['Content-Type'], 'application/json') + expected_data = [{ 'id': origin['id'], - 'tool': { - 'configuration': { - 'context': ['NpmMapping', 'CodemetaMapping'], - 'type': 'local' - }, - 'id': 3, - 'name': 'swh-metadata-detector', - 'version': '0.0.1' + 'type': origin['type'], + 'url': origin['url'], + 'metadata': { + 'metadata': {'author': 'Jane Doe'}, + 'from_revision': ( + '7026b7c1a2af56521e951c01ed20f255fa054238'), + 'tool': { + 'configuration': { + 'context': ['NpmMapping', 'CodemetaMapping'], + 'type': 'local' + }, + 'id': 3, + 'name': 'swh-metadata-detector', + 'version': '0.0.1', + } } }] + self.assertEqual(rv.data, expected_data) + mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ + .assert_called_with(conjunction=['Jane Doe'], limit=70) - url = reverse('api-origin-metadata-search', - query_params={'fulltext': 'Jane Doe'}) - rv = self.client.get(url) - - self.assertEqual(rv.status_code, 200, rv.content) - self.assertEqual(rv['Content-Type'], 'application/json') - expected_data = [{ - 'id': origin['id'], - 'type': origin['type'], - 'url': origin['url'], - 'metadata': { - 'metadata': {'author': 'Jane Doe'}, - 'from_revision': '7026b7c1a2af56521e951c01ed20f255fa054238', - 'tool': { - 'configuration': { - 'context': ['NpmMapping', 'CodemetaMapping'], - 'type': 'local' - }, - 'id': 3, - 'name': 'swh-metadata-detector', - 'version': '0.0.1', - } - } - }] - self.assertEqual(rv.data, expected_data) - mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ - .assert_called_with(conjunction=['Jane Doe'], limit=70) - - @patch('swh.web.common.service.idx_storage') @given(origin()) - def test_api_origin_metadata_search_limit(self, mock_idx_storage, origin): - - mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ - .side_effect = lambda conjunction, limit: [{ - 'from_revision': - b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed \xf2U\xfa\x05B8', - 'metadata': {'author': 'Jane Doe'}, - 'id': origin['id'], - 'tool': { - 'configuration': { - 'context': ['NpmMapping', 'CodemetaMapping'], - 'type': 'local' - }, - 'id': 3, - 'name': 'swh-metadata-detector', - 'version': '0.0.1' - } - }] - - url = reverse('api-origin-metadata-search', - query_params={'fulltext': 'Jane Doe'}) - rv = self.client.get(url) + def test_api_origin_metadata_search_limit(self, origin): + + with patch('swh.web.common.service.idx_storage') as mock_idx_storage: + mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ + .side_effect = lambda conjunction, limit: [{ + 'from_revision': ( + b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed ' + b'\xf2U\xfa\x05B8'), + 'metadata': {'author': 'Jane Doe'}, + 'id': origin['id'], + 'tool': { + 'configuration': { + 'context': ['NpmMapping', 'CodemetaMapping'], + 'type': 'local' + }, + 'id': 3, + 'name': 'swh-metadata-detector', + 'version': '0.0.1' + } + }] + + url = reverse('api-origin-metadata-search', + query_params={'fulltext': 'Jane Doe'}) + rv = self.client.get(url) - self.assertEqual(rv.status_code, 200, rv.content) - self.assertEqual(rv['Content-Type'], 'application/json') - self.assertEqual(len(rv.data), 1) - mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ - .assert_called_with(conjunction=['Jane Doe'], limit=70) + self.assertEqual(rv.status_code, 200, rv.content) + self.assertEqual(rv['Content-Type'], 'application/json') + self.assertEqual(len(rv.data), 1) + mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ + .assert_called_with(conjunction=['Jane Doe'], limit=70) - url = reverse('api-origin-metadata-search', - query_params={'fulltext': 'Jane Doe', - 'limit': 10}) - rv = self.client.get(url) + url = reverse('api-origin-metadata-search', + query_params={'fulltext': 'Jane Doe', + 'limit': 10}) + rv = self.client.get(url) - self.assertEqual(rv.status_code, 200, rv.content) - self.assertEqual(rv['Content-Type'], 'application/json') - self.assertEqual(len(rv.data), 1) - mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ - .assert_called_with(conjunction=['Jane Doe'], limit=10) + self.assertEqual(rv.status_code, 200, rv.content) + self.assertEqual(rv['Content-Type'], 'application/json') + self.assertEqual(len(rv.data), 1) + mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ + .assert_called_with(conjunction=['Jane Doe'], limit=10) - url = reverse('api-origin-metadata-search', - query_params={'fulltext': 'Jane Doe', - 'limit': 987}) - rv = self.client.get(url) + url = reverse('api-origin-metadata-search', + query_params={'fulltext': 'Jane Doe', + 'limit': 987}) + rv = self.client.get(url) - self.assertEqual(rv.status_code, 200, rv.content) - self.assertEqual(rv['Content-Type'], 'application/json') - self.assertEqual(len(rv.data), 1) - mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ - .assert_called_with(conjunction=['Jane Doe'], limit=100) + self.assertEqual(rv.status_code, 200, rv.content) + self.assertEqual(rv['Content-Type'], 'application/json') + self.assertEqual(len(rv.data), 1) + mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ + .assert_called_with(conjunction=['Jane Doe'], limit=100) @patch('swh.web.common.service.idx_storage') def test_api_origin_metadata_search_invalid(self, mock_idx_storage): diff --git a/swh/web/tests/data.py b/swh/web/tests/data.py --- a/swh/web/tests/data.py +++ b/swh/web/tests/data.py @@ -3,6 +3,7 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information +from copy import deepcopy import os import time @@ -158,6 +159,10 @@ def _init_tests_data(): # Load git repositories from archives loader = GitLoaderFromArchive(config=_TEST_LOADER_CONFIG) + + # Get reference to the memory storage + storage = loader.storage + for origin in _TEST_ORIGINS: nb_visits = len(origin['archives']) for i, archive in enumerate(origin['archives']): @@ -168,9 +173,6 @@ if nb_visits > 1 and i != nb_visits - 1: time.sleep(1) - # Get reference to the memory storage - storage = loader.storage - contents = set() directories = set() revisions = set() @@ -233,20 +235,6 @@ # Create indexer storage instance that will be shared by indexers idx_storage = get_indexer_storage('memory', {}) - # Instantiate content indexers that will be used in tests - # and force them to use the memory storages - indexers = {} - for idx_name, idx_class in (('mimetype_indexer', _MimetypeIndexer), - ('language_indexer', _LanguageIndexer), - ('license_indexer', _FossologyLicenseIndexer), - ('ctags_indexer', _CtagsIndexer)): - idx = idx_class() - idx.storage = storage - idx.objstorage = storage.objstorage - idx.idx_storage = idx_storage - idx.register_tools(idx.config['tools']) - indexers[idx_name] = idx - # Add the empty directory to the test archive empty_dir_id = directory_identifier({'entries': []}) empty_dir_id_bin = hash_to_bytes(empty_dir_id) @@ -256,29 +244,61 @@ return { 'storage': storage, 'idx_storage': idx_storage, - **indexers, 'origins': _TEST_ORIGINS, 'contents': contents, 'directories': list(directories), 'persons': list(persons), 'releases': list(releases), 'revisions': list(map(hash_to_hex, revisions)), - 'snapshots': list(snapshots) + 'snapshots': list(snapshots), + 'generated_checksums': set(), } +def _init_indexers(tests_data): + # Instantiate content indexers that will be used in tests + # and force them to use the memory storages + indexers = {} + for idx_name, idx_class in (('mimetype_indexer', _MimetypeIndexer), + ('language_indexer', _LanguageIndexer), + ('license_indexer', _FossologyLicenseIndexer), + ('ctags_indexer', _CtagsIndexer)): + idx = idx_class() + idx.storage = tests_data['storage'] + idx.objstorage = tests_data['storage'].objstorage + idx.idx_storage = tests_data['idx_storage'] + idx.register_tools(idx.config['tools']) + indexers[idx_name] = idx + + return indexers + + def get_content(content_sha1): return _contents.get(content_sha1) _tests_data = None +_current_tests_data = None +_indexer_loggers = {} -def get_tests_data(): +def get_tests_data(reset=False): """ Initialize tests data and return them in a dict. """ - global _tests_data + global _tests_data, _current_tests_data if _tests_data is None: _tests_data = _init_tests_data() - return _tests_data + indexers = _init_indexers(_tests_data) + for (name, idx) in indexers.items(): + # pytest makes the loggers use a temporary file; and deepcopy + # requires serializability. So we remove them, and add them + # back after the copy. + _indexer_loggers[name] = idx.log + del idx.log + _tests_data.update(indexers) + if reset or _current_tests_data is None: + _current_tests_data = deepcopy(_tests_data) + for (name, logger) in _indexer_loggers.items(): + _current_tests_data[name].log = logger + return _current_tests_data diff --git a/swh/web/tests/strategies.py b/swh/web/tests/strategies.py --- a/swh/web/tests/strategies.py +++ b/swh/web/tests/strategies.py @@ -32,26 +32,21 @@ if repr(settings()) == repr(hypothesis_default_settings): settings.load_profile('swh-web') -# Import tests data -tests_data = get_tests_data() -storage = tests_data['storage'] - # The following strategies exploit the hypothesis capabilities -_generated_checksums = set() - def _filter_checksum(cs): + generated_checksums = get_tests_data()['generated_checksums'] if not int.from_bytes(cs, byteorder='little') or \ - cs in _generated_checksums: + cs in generated_checksums: return False - _generated_checksums.add(cs) + generated_checksums.add(cs) return True def _known_swh_object(object_type): - return sampled_from(tests_data[object_type]) + return sampled_from(get_tests_data()[object_type]) def sha1(): @@ -152,8 +147,8 @@ into the test archive. """ return new_content().filter( - lambda c: next(storage.content_get( - [hash_to_bytes(c['sha1'])])) is None) + lambda c: next(get_tests_data()['storage'].content_get( + [hash_to_bytes(c['sha1'])])) is None) def unknown_contents(): @@ -177,6 +172,7 @@ Hypothesis strategy returning a random directory containing sub directories ingested into the test archive. """ + storage = get_tests_data()['storage'] return directory().filter( lambda d: any([e['type'] == 'dir' for e in list(storage.directory_ls(hash_to_bytes(d)))])) @@ -195,6 +191,7 @@ Hypothesis strategy returning a random directory not ingested into the test archive. """ + storage = get_tests_data()['storage'] return sha1().filter( lambda s: len(list(storage.directory_missing([hash_to_bytes(s)]))) > 0) @@ -213,8 +210,9 @@ into the test archive. """ ret = [] + tests_data = get_tests_data() for origin in tests_data['origins']: - visits = list(storage.origin_visit_get(origin['id'])) + visits = list(tests_data['storage'].origin_visit_get(origin['id'])) if len(visits) > 1: ret.append(origin) return sampled_from(ret) @@ -226,8 +224,9 @@ into the test archive. """ ret = [] + tests_data = get_tests_data() for origin in tests_data['origins']: - snapshot = storage.snapshot_get_latest(origin['id']) + snapshot = tests_data['storage'].snapshot_get_latest(origin['id']) if any([b['target_type'] == 'release' for b in snapshot['branches'].values()]): ret.append(origin) @@ -247,6 +246,7 @@ Hypothesis strategy returning a random origin not ingested into the test archive. """ + storage = get_tests_data()['storage'] return new_origin_strategy().map(lambda origin: origin.to_dict()).filter( lambda origin: storage.origin_get([origin])[0] is None) @@ -288,7 +288,7 @@ into the test archive. """ return sha1().filter( - lambda s: next(storage.release_get([s])) is None) + lambda s: next(get_tests_data()['storage'].release_get([s])) is None) def revision(): @@ -304,6 +304,7 @@ Hypothesis strategy returning a random revision not ingested into the test archive. """ + storage = get_tests_data()['storage'] return sha1().filter( lambda s: next(storage.revision_get([hash_to_bytes(s)])) is None) @@ -398,6 +399,7 @@ Hypothesis strategy returning a random revision not ingested into the test archive. """ + storage = get_tests_data()['storage'] return sha1().filter( lambda s: storage.snapshot_get(hash_to_bytes(s)) is None) @@ -419,6 +421,8 @@ def _get_origin_dfs_revisions_walker(): + tests_data = get_tests_data() + storage = tests_data['storage'] origin = random.choice(tests_data['origins'][:-1]) snapshot = storage.snapshot_get_latest(origin['id']) head = snapshot['branches'][b'HEAD']['target'] diff --git a/swh/web/tests/testcase.py b/swh/web/tests/testcase.py --- a/swh/web/tests/testcase.py +++ b/swh/web/tests/testcase.py @@ -32,152 +32,130 @@ a json serializable format in order to ease tests implementation. """ - @classmethod - def setUpClass(cls): - super().setUpClass() - tests_data = get_tests_data() - cls.storage = tests_data['storage'] - cls.idx_storage = tests_data['idx_storage'] - cls.mimetype_indexer = tests_data['mimetype_indexer'] - cls.language_indexer = tests_data['language_indexer'] - cls.license_indexer = tests_data['license_indexer'] - cls.ctags_indexer = tests_data['ctags_indexer'] + def _pre_setup(self): + cache.clear() + + tests_data = get_tests_data(reset=True) + self.storage = tests_data['storage'] + self.idx_storage = tests_data['idx_storage'] + self.mimetype_indexer = tests_data['mimetype_indexer'] + self.language_indexer = tests_data['language_indexer'] + self.license_indexer = tests_data['license_indexer'] + self.ctags_indexer = tests_data['ctags_indexer'] # Update swh-web configuration to use the in-memory storage # instantiated in the tests.data module swh_config = config.get_config() - swh_config.update({'storage': cls.storage}) - service.storage = cls.storage + swh_config.update({'storage': self.storage}) + service.storage = self.storage # Update swh-web configuration to use the in-memory indexer storage # instantiated in the tests.data modules - swh_config.update({'indexer_storage': cls.idx_storage}) - service.idx_storage = cls.idx_storage + swh_config.update({'indexer_storage': self.idx_storage}) + service.idx_storage = self.idx_storage - @classmethod - def content_add_mimetype(cls, cnt_id): - cls.mimetype_indexer.run([hash_to_bytes(cnt_id)], - 'update-dups') + super()._pre_setup() + + def content_add_mimetype(self, cnt_id): + self.mimetype_indexer.run([hash_to_bytes(cnt_id)], + 'update-dups') - @classmethod - def content_get_mimetype(cls, cnt_id): - mimetype = next(cls.idx_storage.content_mimetype_get( + def content_get_mimetype(self, cnt_id): + mimetype = next(self.idx_storage.content_mimetype_get( [hash_to_bytes(cnt_id)])) return converters.from_filetype(mimetype) - @classmethod - def content_add_language(cls, cnt_id): - cls.language_indexer.run([hash_to_bytes(cnt_id)], - 'update-dups') + def content_add_language(self, cnt_id): + self.language_indexer.run([hash_to_bytes(cnt_id)], + 'update-dups') - @classmethod - def content_get_language(cls, cnt_id): - lang = next(cls.idx_storage.content_language_get( + def content_get_language(self, cnt_id): + lang = next(self.idx_storage.content_language_get( [hash_to_bytes(cnt_id)])) return converters.from_swh(lang, hashess={'id'}) - @classmethod - def content_add_license(cls, cnt_id): - cls.license_indexer.run([hash_to_bytes(cnt_id)], - 'update-dups') + def content_add_license(self, cnt_id): + self.license_indexer.run([hash_to_bytes(cnt_id)], + 'update-dups') - @classmethod - def content_get_license(cls, cnt_id): + def content_get_license(self, cnt_id): cnt_id_bytes = hash_to_bytes(cnt_id) - lic = next(cls.idx_storage.content_fossology_license_get( + lic = next(self.idx_storage.content_fossology_license_get( [cnt_id_bytes])) return converters.from_swh({'id': cnt_id_bytes, 'facts': lic[cnt_id_bytes]}, hashess={'id'}) - @classmethod - def content_add_ctags(cls, cnt_id): - cls.ctags_indexer.run([hash_to_bytes(cnt_id)], - 'update-dups') + def content_add_ctags(self, cnt_id): + self.ctags_indexer.run([hash_to_bytes(cnt_id)], + 'update-dups') - @classmethod - def content_get_ctags(cls, cnt_id): + def content_get_ctags(self, cnt_id): cnt_id_bytes = hash_to_bytes(cnt_id) - ctags = cls.idx_storage.content_ctags_get([cnt_id_bytes]) + ctags = self.idx_storage.content_ctags_get([cnt_id_bytes]) for ctag in ctags: yield converters.from_swh(ctag, hashess={'id'}) - @classmethod - def content_get_metadata(cls, cnt_id): + def content_get_metadata(self, cnt_id): cnt_id_bytes = hash_to_bytes(cnt_id) - metadata = next(cls.storage.content_get_metadata([cnt_id_bytes])) + metadata = next(self.storage.content_get_metadata([cnt_id_bytes])) return converters.from_swh(metadata, hashess={'sha1', 'sha1_git', 'sha256', 'blake2s256'}) - @classmethod - def content_get(cls, cnt_id): + def content_get(self, cnt_id): cnt_id_bytes = hash_to_bytes(cnt_id) - cnt = next(cls.storage.content_get([cnt_id_bytes])) + cnt = next(self.storage.content_get([cnt_id_bytes])) return converters.from_content(cnt) - @classmethod - def directory_ls(cls, dir_id): + def directory_ls(self, dir_id): cnt_id_bytes = hash_to_bytes(dir_id) dir_content = map(converters.from_directory_entry, - cls.storage.directory_ls(cnt_id_bytes)) + self.storage.directory_ls(cnt_id_bytes)) return list(dir_content) - @classmethod - def release_get(cls, rel_id): + def release_get(self, rel_id): rel_id_bytes = hash_to_bytes(rel_id) - rel_data = next(cls.storage.release_get([rel_id_bytes])) + rel_data = next(self.storage.release_get([rel_id_bytes])) return converters.from_release(rel_data) - @classmethod - def revision_get(cls, rev_id): + def revision_get(self, rev_id): rev_id_bytes = hash_to_bytes(rev_id) - rev_data = next(cls.storage.revision_get([rev_id_bytes])) + rev_data = next(self.storage.revision_get([rev_id_bytes])) return converters.from_revision(rev_data) - @classmethod - def revision_log(cls, rev_id, limit=None): + def revision_log(self, rev_id, limit=None): rev_id_bytes = hash_to_bytes(rev_id) return list(map(converters.from_revision, - cls.storage.revision_log([rev_id_bytes], limit=limit))) + self.storage.revision_log([rev_id_bytes], limit=limit))) - @classmethod - def snapshot_get_latest(cls, origin_id): - snp = cls.storage.snapshot_get_latest(origin_id) + def snapshot_get_latest(self, origin_id): + snp = self.storage.snapshot_get_latest(origin_id) return converters.from_snapshot(snp) - @classmethod - def origin_get(cls, origin_info): - origin = cls.storage.origin_get(origin_info) + def origin_get(self, origin_info): + origin = self.storage.origin_get(origin_info) return converters.from_origin(origin) - @classmethod - def origin_visit_get(cls, origin_id): - visits = cls.storage.origin_visit_get(origin_id) + def origin_visit_get(self, origin_id): + visits = self.storage.origin_visit_get(origin_id) return list(map(converters.from_origin_visit, visits)) - @classmethod - def origin_visit_get_by(cls, origin_id, visit_id): - visit = cls.storage.origin_visit_get_by(origin_id, visit_id) + def origin_visit_get_by(self, origin_id, visit_id): + visit = self.storage.origin_visit_get_by(origin_id, visit_id) return converters.from_origin_visit(visit) - @classmethod - def snapshot_get(cls, snapshot_id): - snp = cls.storage.snapshot_get(hash_to_bytes(snapshot_id)) + def snapshot_get(self, snapshot_id): + snp = self.storage.snapshot_get(hash_to_bytes(snapshot_id)) return converters.from_snapshot(snp) - @classmethod - def snapshot_get_branches(cls, snapshot_id, branches_from='', + def snapshot_get_branches(self, snapshot_id, branches_from='', branches_count=1000, target_types=None): - snp = cls.storage.snapshot_get_branches(hash_to_bytes(snapshot_id), - branches_from.encode(), - branches_count, target_types) + snp = self.storage.snapshot_get_branches( + hash_to_bytes(snapshot_id), branches_from.encode(), + branches_count, target_types) return converters.from_snapshot(snp) - @classmethod - def person_get(cls, person_id): - person = next(cls.storage.person_get([person_id])) + def person_get(self, person_id): + person = next(self.storage.person_get([person_id])) return converters.from_person(person) - - def setUp(self): - cache.clear()