Page MenuHomeSoftware Heritage

D952.id.diff
No OneTemporary

D952.id.diff

diff --git a/swh/indexer/tests/conftest.py b/swh/indexer/tests/conftest.py
new file mode 100644
--- /dev/null
+++ b/swh/indexer/tests/conftest.py
@@ -0,0 +1,28 @@
+import pytest
+from datetime import timedelta
+from swh.scheduler.tests.conftest import * # noqa
+
+
+TASK_NAMES = ['revision_metadata', 'origin_intrinsic_metadata']
+
+
+@pytest.fixture
+def indexer_scheduler(swh_scheduler):
+ for taskname in TASK_NAMES:
+ swh_scheduler.create_task_type({
+ 'type': taskname,
+ 'description': 'The {} indexer testing task'.format(taskname),
+ 'backend_name': 'swh.indexer.tests.tasks.{}'.format(taskname),
+ 'default_interval': timedelta(days=1),
+ 'min_interval': timedelta(hours=6),
+ 'max_interval': timedelta(days=12),
+ 'num_retries': 3,
+ })
+ return swh_scheduler
+
+
+@pytest.fixture(scope='session')
+def celery_includes():
+ return [
+ 'swh.indexer.tests.tasks',
+ ]
diff --git a/swh/indexer/tests/tasks.py b/swh/indexer/tests/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/indexer/tests/tasks.py
@@ -0,0 +1,47 @@
+from swh.scheduler.celery_backend.config import app
+from swh.indexer.metadata import (
+ OriginMetadataIndexer, RevisionMetadataIndexer
+)
+from .test_metadata import ContentMetadataTestIndexer
+from .test_utils import BASE_TEST_CONFIG
+
+
+class RevisionMetadataTestIndexer(RevisionMetadataIndexer):
+ """Specific indexer whose configuration is enough to satisfy the
+ indexing tests.
+ """
+ ContentMetadataIndexer = ContentMetadataTestIndexer
+
+ def parse_config_file(self, *args, **kwargs):
+ return {
+ **BASE_TEST_CONFIG,
+ 'tools': {
+ 'name': 'swh-metadata-detector',
+ 'version': '0.0.2',
+ 'configuration': {
+ 'type': 'local',
+ 'context': 'NpmMapping'
+ }
+ }
+ }
+
+
+class OriginMetadataTestIndexer(OriginMetadataIndexer):
+ def parse_config_file(self, *args, **kwargs):
+ return {
+ **BASE_TEST_CONFIG,
+ 'tools': []
+ }
+
+
+@app.task
+def revision_metadata(*args, **kwargs):
+ indexer = RevisionMetadataTestIndexer()
+ indexer.run(*args, **kwargs)
+ print('REV RESULT=', indexer.results)
+
+
+@app.task
+def origin_intrinsic_metadata(*args, **kwargs):
+ indexer = OriginMetadataTestIndexer()
+ indexer.run(*args, **kwargs)
diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py
--- a/swh/indexer/tests/test_origin_metadata.py
+++ b/swh/indexer/tests/test_origin_metadata.py
@@ -3,171 +3,118 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import time
-import unittest
-import unittest.mock
+from celery.result import AsyncResult
+from unittest import mock
-from celery import task
from swh.model.hashutil import hash_to_bytes
from swh.storage.in_memory import Storage
-from swh.indexer.metadata import (
- OriginMetadataIndexer, RevisionMetadataIndexer
-)
from swh.indexer.storage.in_memory import IndexerStorage
from swh.objstorage.objstorage_in_memory import InMemoryObjStorage
-from swh.scheduler.tests.scheduler_testing import SchedulerTestFixture
-from .test_utils import (
- BASE_TEST_CONFIG, fill_storage, fill_obj_storage
-)
-from .test_origin_head import OriginHeadTestIndexer
-from .test_metadata import ContentMetadataTestIndexer
-
-
-class RevisionMetadataTestIndexer(RevisionMetadataIndexer):
- """Specific indexer whose configuration is enough to satisfy the
- indexing tests.
- """
- ContentMetadataIndexer = ContentMetadataTestIndexer
-
- def parse_config_file(self, *args, **kwargs):
- return {
- **BASE_TEST_CONFIG,
- 'tools': {
- 'name': 'swh-metadata-detector',
- 'version': '0.0.2',
- 'configuration': {
- 'type': 'local',
- 'context': 'NpmMapping'
- }
- }
- }
-
-
-@task
-def revision_metadata_test_task(*args, **kwargs):
- indexer = RevisionMetadataTestIndexer()
- indexer.run(*args, **kwargs)
- return indexer.results
-
-
-class OriginMetadataTestIndexer(OriginMetadataIndexer):
- def parse_config_file(self, *args, **kwargs):
- return {
- **BASE_TEST_CONFIG,
- 'tools': []
- }
+from swh.scheduler.celery_backend.runner import run_ready_tasks
-
-@task
-def origin_intrinsic_metadata_test_task(*args, **kwargs):
- indexer = OriginMetadataTestIndexer()
- indexer.run(*args, **kwargs)
- return indexer.results
+from .test_utils import fill_storage, fill_obj_storage
+from .test_origin_head import OriginHeadTestIndexer
+from swh.indexer.tests.tasks import (
+ RevisionMetadataTestIndexer, OriginMetadataTestIndexer)
class OriginHeadTestIndexer(OriginHeadTestIndexer):
def prepare(self):
super().prepare()
self.config['tasks'] = {
- 'revision_metadata': 'revision_metadata_test_task',
- 'origin_intrinsic_metadata': 'origin_intrinsic_metadata_test_task',
+ 'revision_metadata': 'revision_metadata',
+ 'origin_intrinsic_metadata': 'origin_intrinsic_metadata',
}
-class TestOriginMetadata(SchedulerTestFixture, unittest.TestCase):
- def setUp(self):
- super().setUp()
- self.maxDiff = None
- self.add_scheduler_task_type(
- 'revision_metadata_test_task',
- 'swh.indexer.tests.test_origin_metadata.'
- 'revision_metadata_test_task')
- self.add_scheduler_task_type(
- 'origin_intrinsic_metadata_test_task',
- 'swh.indexer.tests.test_origin_metadata.'
- 'origin_intrinsic_metadata_test_task')
- RevisionMetadataTestIndexer.scheduler = self.scheduler
-
- def tearDown(self):
+@mock.patch('swh.indexer.storage.in_memory.IndexerStorage')
+@mock.patch('swh.storage.in_memory.Storage')
+def test_pipeline(storage_mock, idx_storage_mock,
+ swh_app, celery_session_worker, indexer_scheduler):
+ scheduler = indexer_scheduler
+ # Always returns the same instance of the idx storage, because
+ # this function is called by each of the three indexers.
+ objstorage = InMemoryObjStorage()
+ storage = Storage()
+ idx_storage = IndexerStorage()
+
+ storage_mock.return_value = storage
+ idx_storage_mock.return_value = idx_storage
+
+ fill_obj_storage(objstorage)
+ fill_storage(storage)
+
+ # TODO: find a better way to share the ContentMetadataIndexer use
+ # the same objstorage instance.
+ import swh.objstorage
+ old_inmem_objstorage = swh.objstorage._STORAGE_CLASSES['memory']
+ swh.objstorage._STORAGE_CLASSES['memory'] = lambda: objstorage
+ try:
+ RevisionMetadataTestIndexer.scheduler = scheduler
+ OriginMetadataTestIndexer.scheduler = scheduler
+ indexer = OriginHeadTestIndexer()
+ indexer.scheduler = scheduler
+ indexer.run(["git+https://github.com/librariesio/yarn-parser"])
+
+ tasks = run_ready_tasks(scheduler, swh_app) # Run the first task
+ # Wait for the task to complete and schedule the 2nd one
+ assert len(tasks) == 1
+ promise = AsyncResult(id=tasks[0]['backend_id'])
+ promise.wait()
+
+ tasks = run_ready_tasks(scheduler, swh_app) # Run the second task
+ assert len(tasks) == 1
+ promise = AsyncResult(id=tasks[0]['backend_id'])
+ promise.wait()
+ finally:
+ swh.objstorage._STORAGE_CLASSES['memory'] = old_inmem_objstorage
del RevisionMetadataTestIndexer.scheduler
- super().tearDown()
-
- @unittest.mock.patch('swh.indexer.storage.in_memory.IndexerStorage')
- @unittest.mock.patch('swh.storage.in_memory.Storage')
- def test_pipeline(self, storage_mock, idx_storage_mock):
- # Always returns the same instance of the idx storage, because
- # this function is called by each of the three indexers.
- objstorage = InMemoryObjStorage()
- storage = Storage()
- idx_storage = IndexerStorage()
-
- storage_mock.return_value = storage
- idx_storage_mock.return_value = idx_storage
-
- fill_obj_storage(objstorage)
- fill_storage(storage)
-
- # TODO: find a better way to share the ContentMetadataIndexer use
- # the same objstorage instance.
- import swh.objstorage
- old_inmem_objstorage = swh.objstorage._STORAGE_CLASSES['memory']
- swh.objstorage._STORAGE_CLASSES['memory'] = lambda: objstorage
- try:
- indexer = OriginHeadTestIndexer()
- indexer.scheduler = self.scheduler
- indexer.run(["git+https://github.com/librariesio/yarn-parser"])
-
- self.run_ready_tasks() # Run the first task
- # Give it time to complete and schedule the 2nd one
- time.sleep(0.1)
- self.run_ready_tasks() # Run the second task
- finally:
- swh.objstorage._STORAGE_CLASSES['memory'] = old_inmem_objstorage
-
- origin = storage.origin_get({
- 'type': 'git',
- 'url': 'https://github.com/librariesio/yarn-parser'})
- rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
-
- metadata = {
- '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
- 'url':
- 'https://github.com/librariesio/yarn-parser#readme',
- 'codeRepository':
- 'git+git+https://github.com/librariesio/yarn-parser.git',
- 'author': [{
- 'type': 'Person',
- 'name': 'Andrew Nesbitt'
- }],
- 'license': 'https://spdx.org/licenses/AGPL-3.0',
- 'version': '1.0.0',
- 'description':
- 'Tiny web service for parsing yarn.lock files',
- 'issueTracker':
- 'https://github.com/librariesio/yarn-parser/issues',
- 'name': 'yarn-parser',
- 'keywords': ['yarn', 'parse', 'lock', 'dependencies'],
- }
- rev_metadata = {
- 'id': rev_id,
- 'translated_metadata': metadata,
- }
- origin_metadata = {
- 'origin_id': origin['id'],
- 'from_revision': rev_id,
- 'metadata': metadata,
- }
-
- results = list(indexer.idx_storage.revision_metadata_get([rev_id]))
- for result in results:
- del result['tool']
- self.assertEqual(results, [rev_metadata])
-
- results = list(indexer.idx_storage.origin_intrinsic_metadata_get([
- origin['id']]))
- for result in results:
- del result['tool']
- self.assertEqual(results, [origin_metadata])
+ del OriginMetadataTestIndexer.scheduler
+
+ origin = storage.origin_get({
+ 'type': 'git',
+ 'url': 'https://github.com/librariesio/yarn-parser'})
+ rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
+
+ metadata = {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'url':
+ 'https://github.com/librariesio/yarn-parser#readme',
+ 'codeRepository':
+ 'git+git+https://github.com/librariesio/yarn-parser.git',
+ 'author': [{
+ 'type': 'Person',
+ 'name': 'Andrew Nesbitt'
+ }],
+ 'license': 'https://spdx.org/licenses/AGPL-3.0',
+ 'version': '1.0.0',
+ 'description':
+ 'Tiny web service for parsing yarn.lock files',
+ 'issueTracker':
+ 'https://github.com/librariesio/yarn-parser/issues',
+ 'name': 'yarn-parser',
+ 'keywords': ['yarn', 'parse', 'lock', 'dependencies'],
+ }
+ rev_metadata = {
+ 'id': rev_id,
+ 'translated_metadata': metadata,
+ }
+ origin_metadata = {
+ 'origin_id': origin['id'],
+ 'from_revision': rev_id,
+ 'metadata': metadata,
+ }
+
+ results = list(indexer.idx_storage.revision_metadata_get([rev_id]))
+ for result in results:
+ del result['tool']
+ assert results == [rev_metadata]
+
+ results = list(indexer.idx_storage.origin_intrinsic_metadata_get([
+ origin['id']]))
+ for result in results:
+ del result['tool']
+ assert results == [origin_metadata]

File Metadata

Mime Type
text/plain
Expires
Mar 17 2025, 6:35 PM (7 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224510

Event Timeline