diff --git a/swh/indexer/tests/conftest.py b/swh/indexer/tests/conftest.py new file mode 100644 --- /dev/null +++ b/swh/indexer/tests/conftest.py @@ -0,0 +1,28 @@ +import pytest +from datetime import timedelta +from swh.scheduler.tests.conftest import * # noqa + + +TASK_NAMES = ['revision_metadata', 'origin_intrinsic_metadata'] + + +@pytest.fixture +def indexer_scheduler(swh_scheduler): + for taskname in TASK_NAMES: + swh_scheduler.create_task_type({ + 'type': taskname, + 'description': 'The {} indexer testing task'.format(taskname), + 'backend_name': 'swh.indexer.tests.tasks.{}'.format(taskname), + 'default_interval': timedelta(days=1), + 'min_interval': timedelta(hours=6), + 'max_interval': timedelta(days=12), + 'num_retries': 3, + }) + return swh_scheduler + + +@pytest.fixture(scope='session') +def celery_includes(): + return [ + 'swh.indexer.tests.tasks', + ] diff --git a/swh/indexer/tests/tasks.py b/swh/indexer/tests/tasks.py new file mode 100644 --- /dev/null +++ b/swh/indexer/tests/tasks.py @@ -0,0 +1,47 @@ +from swh.scheduler.celery_backend.config import app +from swh.indexer.metadata import ( + OriginMetadataIndexer, RevisionMetadataIndexer +) +from .test_metadata import ContentMetadataTestIndexer +from .test_utils import BASE_TEST_CONFIG + + +class RevisionMetadataTestIndexer(RevisionMetadataIndexer): + """Specific indexer whose configuration is enough to satisfy the + indexing tests. + """ + ContentMetadataIndexer = ContentMetadataTestIndexer + + def parse_config_file(self, *args, **kwargs): + return { + **BASE_TEST_CONFIG, + 'tools': { + 'name': 'swh-metadata-detector', + 'version': '0.0.2', + 'configuration': { + 'type': 'local', + 'context': 'NpmMapping' + } + } + } + + +class OriginMetadataTestIndexer(OriginMetadataIndexer): + def parse_config_file(self, *args, **kwargs): + return { + **BASE_TEST_CONFIG, + 'tools': [] + } + + +@app.task +def revision_metadata(*args, **kwargs): + indexer = RevisionMetadataTestIndexer() + indexer.run(*args, **kwargs) + print('REV RESULT=', indexer.results) + + +@app.task +def origin_intrinsic_metadata(*args, **kwargs): + indexer = OriginMetadataTestIndexer() + indexer.run(*args, **kwargs) diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py --- a/swh/indexer/tests/test_origin_metadata.py +++ b/swh/indexer/tests/test_origin_metadata.py @@ -3,171 +3,118 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import time -import unittest -import unittest.mock +from celery.result import AsyncResult +from unittest import mock -from celery import task from swh.model.hashutil import hash_to_bytes from swh.storage.in_memory import Storage -from swh.indexer.metadata import ( - OriginMetadataIndexer, RevisionMetadataIndexer -) from swh.indexer.storage.in_memory import IndexerStorage from swh.objstorage.objstorage_in_memory import InMemoryObjStorage -from swh.scheduler.tests.scheduler_testing import SchedulerTestFixture -from .test_utils import ( - BASE_TEST_CONFIG, fill_storage, fill_obj_storage -) -from .test_origin_head import OriginHeadTestIndexer -from .test_metadata import ContentMetadataTestIndexer - - -class RevisionMetadataTestIndexer(RevisionMetadataIndexer): - """Specific indexer whose configuration is enough to satisfy the - indexing tests. - """ - ContentMetadataIndexer = ContentMetadataTestIndexer - - def parse_config_file(self, *args, **kwargs): - return { - **BASE_TEST_CONFIG, - 'tools': { - 'name': 'swh-metadata-detector', - 'version': '0.0.2', - 'configuration': { - 'type': 'local', - 'context': 'NpmMapping' - } - } - } - - -@task -def revision_metadata_test_task(*args, **kwargs): - indexer = RevisionMetadataTestIndexer() - indexer.run(*args, **kwargs) - return indexer.results - - -class OriginMetadataTestIndexer(OriginMetadataIndexer): - def parse_config_file(self, *args, **kwargs): - return { - **BASE_TEST_CONFIG, - 'tools': [] - } +from swh.scheduler.celery_backend.runner import run_ready_tasks - -@task -def origin_intrinsic_metadata_test_task(*args, **kwargs): - indexer = OriginMetadataTestIndexer() - indexer.run(*args, **kwargs) - return indexer.results +from .test_utils import fill_storage, fill_obj_storage +from .test_origin_head import OriginHeadTestIndexer +from swh.indexer.tests.tasks import ( + RevisionMetadataTestIndexer, OriginMetadataTestIndexer) class OriginHeadTestIndexer(OriginHeadTestIndexer): def prepare(self): super().prepare() self.config['tasks'] = { - 'revision_metadata': 'revision_metadata_test_task', - 'origin_intrinsic_metadata': 'origin_intrinsic_metadata_test_task', + 'revision_metadata': 'revision_metadata', + 'origin_intrinsic_metadata': 'origin_intrinsic_metadata', } -class TestOriginMetadata(SchedulerTestFixture, unittest.TestCase): - def setUp(self): - super().setUp() - self.maxDiff = None - self.add_scheduler_task_type( - 'revision_metadata_test_task', - 'swh.indexer.tests.test_origin_metadata.' - 'revision_metadata_test_task') - self.add_scheduler_task_type( - 'origin_intrinsic_metadata_test_task', - 'swh.indexer.tests.test_origin_metadata.' - 'origin_intrinsic_metadata_test_task') - RevisionMetadataTestIndexer.scheduler = self.scheduler - - def tearDown(self): +@mock.patch('swh.indexer.storage.in_memory.IndexerStorage') +@mock.patch('swh.storage.in_memory.Storage') +def test_pipeline(storage_mock, idx_storage_mock, + swh_app, celery_session_worker, indexer_scheduler): + scheduler = indexer_scheduler + # Always returns the same instance of the idx storage, because + # this function is called by each of the three indexers. + objstorage = InMemoryObjStorage() + storage = Storage() + idx_storage = IndexerStorage() + + storage_mock.return_value = storage + idx_storage_mock.return_value = idx_storage + + fill_obj_storage(objstorage) + fill_storage(storage) + + # TODO: find a better way to share the ContentMetadataIndexer use + # the same objstorage instance. + import swh.objstorage + old_inmem_objstorage = swh.objstorage._STORAGE_CLASSES['memory'] + swh.objstorage._STORAGE_CLASSES['memory'] = lambda: objstorage + try: + RevisionMetadataTestIndexer.scheduler = scheduler + OriginMetadataTestIndexer.scheduler = scheduler + indexer = OriginHeadTestIndexer() + indexer.scheduler = scheduler + indexer.run(["git+https://github.com/librariesio/yarn-parser"]) + + tasks = run_ready_tasks(scheduler, swh_app) # Run the first task + # Wait for the task to complete and schedule the 2nd one + assert len(tasks) == 1 + promise = AsyncResult(id=tasks[0]['backend_id']) + promise.wait() + + tasks = run_ready_tasks(scheduler, swh_app) # Run the second task + assert len(tasks) == 1 + promise = AsyncResult(id=tasks[0]['backend_id']) + promise.wait() + finally: + swh.objstorage._STORAGE_CLASSES['memory'] = old_inmem_objstorage del RevisionMetadataTestIndexer.scheduler - super().tearDown() - - @unittest.mock.patch('swh.indexer.storage.in_memory.IndexerStorage') - @unittest.mock.patch('swh.storage.in_memory.Storage') - def test_pipeline(self, storage_mock, idx_storage_mock): - # Always returns the same instance of the idx storage, because - # this function is called by each of the three indexers. - objstorage = InMemoryObjStorage() - storage = Storage() - idx_storage = IndexerStorage() - - storage_mock.return_value = storage - idx_storage_mock.return_value = idx_storage - - fill_obj_storage(objstorage) - fill_storage(storage) - - # TODO: find a better way to share the ContentMetadataIndexer use - # the same objstorage instance. - import swh.objstorage - old_inmem_objstorage = swh.objstorage._STORAGE_CLASSES['memory'] - swh.objstorage._STORAGE_CLASSES['memory'] = lambda: objstorage - try: - indexer = OriginHeadTestIndexer() - indexer.scheduler = self.scheduler - indexer.run(["git+https://github.com/librariesio/yarn-parser"]) - - self.run_ready_tasks() # Run the first task - # Give it time to complete and schedule the 2nd one - time.sleep(0.1) - self.run_ready_tasks() # Run the second task - finally: - swh.objstorage._STORAGE_CLASSES['memory'] = old_inmem_objstorage - - origin = storage.origin_get({ - 'type': 'git', - 'url': 'https://github.com/librariesio/yarn-parser'}) - rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f') - - metadata = { - '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', - 'url': - 'https://github.com/librariesio/yarn-parser#readme', - 'codeRepository': - 'git+git+https://github.com/librariesio/yarn-parser.git', - 'author': [{ - 'type': 'Person', - 'name': 'Andrew Nesbitt' - }], - 'license': 'https://spdx.org/licenses/AGPL-3.0', - 'version': '1.0.0', - 'description': - 'Tiny web service for parsing yarn.lock files', - 'issueTracker': - 'https://github.com/librariesio/yarn-parser/issues', - 'name': 'yarn-parser', - 'keywords': ['yarn', 'parse', 'lock', 'dependencies'], - } - rev_metadata = { - 'id': rev_id, - 'translated_metadata': metadata, - } - origin_metadata = { - 'origin_id': origin['id'], - 'from_revision': rev_id, - 'metadata': metadata, - } - - results = list(indexer.idx_storage.revision_metadata_get([rev_id])) - for result in results: - del result['tool'] - self.assertEqual(results, [rev_metadata]) - - results = list(indexer.idx_storage.origin_intrinsic_metadata_get([ - origin['id']])) - for result in results: - del result['tool'] - self.assertEqual(results, [origin_metadata]) + del OriginMetadataTestIndexer.scheduler + + origin = storage.origin_get({ + 'type': 'git', + 'url': 'https://github.com/librariesio/yarn-parser'}) + rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f') + + metadata = { + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'url': + 'https://github.com/librariesio/yarn-parser#readme', + 'codeRepository': + 'git+git+https://github.com/librariesio/yarn-parser.git', + 'author': [{ + 'type': 'Person', + 'name': 'Andrew Nesbitt' + }], + 'license': 'https://spdx.org/licenses/AGPL-3.0', + 'version': '1.0.0', + 'description': + 'Tiny web service for parsing yarn.lock files', + 'issueTracker': + 'https://github.com/librariesio/yarn-parser/issues', + 'name': 'yarn-parser', + 'keywords': ['yarn', 'parse', 'lock', 'dependencies'], + } + rev_metadata = { + 'id': rev_id, + 'translated_metadata': metadata, + } + origin_metadata = { + 'origin_id': origin['id'], + 'from_revision': rev_id, + 'metadata': metadata, + } + + results = list(indexer.idx_storage.revision_metadata_get([rev_id])) + for result in results: + del result['tool'] + assert results == [rev_metadata] + + results = list(indexer.idx_storage.origin_intrinsic_metadata_get([ + origin['id']])) + for result in results: + del result['tool'] + assert results == [origin_metadata]