Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9696903
D742.id2321.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
D742.id2321.diff
View Options
diff --git a/swh/indexer/fossology_license.py b/swh/indexer/fossology_license.py
--- a/swh/indexer/fossology_license.py
+++ b/swh/indexer/fossology_license.py
@@ -160,7 +160,8 @@
It:
- filters out the non textual content
- - (optionally) filters out content already indexed (cf :callable:`range`)
+ - (optionally) filters out content already indexed (cf
+ :func:`indexed_contents_in_range`)
- reads content from objstorage per the content's id (sha1)
- computes {mimetype, encoding} from that content
- stores result in storage
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -5,17 +5,20 @@
import unittest
-from swh.indexer.metadata_dictionary import CROSSWALK_TABLE, MAPPINGS
-from swh.indexer.metadata_detector import detect_metadata
-from swh.indexer.metadata_detector import extract_minimal_metadata_dict
-from swh.indexer.metadata import ContentMetadataIndexer
-from swh.indexer.metadata import RevisionMetadataIndexer
-from swh.indexer.tests.test_utils import MockObjStorage, MockStorage
-from swh.indexer.tests.test_utils import MockIndexerStorage
-
from swh.model.hashutil import hash_to_bytes
-from .test_utils import BASE_TEST_CONFIG
+from swh.indexer.metadata_dictionary import CROSSWALK_TABLE, MAPPINGS
+from swh.indexer.metadata_detector import (
+ detect_metadata, extract_minimal_metadata_dict
+)
+from swh.indexer.metadata import (
+ ContentMetadataIndexer, RevisionMetadataIndexer
+)
+
+from .test_utils import (
+ MockObjStorage, MockStorage, MockIndexerStorage,
+ BASE_TEST_CONFIG
+)
class ContentMetadataTestIndexer(ContentMetadataIndexer):
diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py
--- a/swh/indexer/tests/test_origin_metadata.py
+++ b/swh/indexer/tests/test_origin_metadata.py
@@ -4,62 +4,33 @@
# See top-level LICENSE file for more information
import time
-import logging
import unittest
-from celery import task
-
-from swh.indexer.metadata import OriginMetadataIndexer, \
- RevisionMetadataIndexer, ContentMetadataIndexer
-from swh.indexer.tests.test_utils import MockObjStorage, MockStorage
-from swh.indexer.tests.test_utils import MockIndexerStorage
-from swh.indexer.tests.test_origin_head import OriginHeadTestIndexer
-
-from swh.scheduler.tests.scheduler_testing import SchedulerTestFixture
+from celery import task
from swh.model.hashutil import hash_to_bytes
-from .test_utils import BASE_TEST_CONFIG
-
-
-class OriginMetadataTestIndexer(OriginMetadataIndexer):
- def parse_config_file(self, *args, **kwargs):
- return {
- **BASE_TEST_CONFIG,
- 'tools': [],
- }
-
- def prepare(self):
- super().prepare()
- self.storage = MockStorage()
- self.idx_storage = MockIndexerStorage()
- self.objstorage = MockObjStorage()
-
+from swh.indexer.metadata import (
+ OriginMetadataIndexer, RevisionMetadataIndexer
+)
-class ContentMetadataTestIndexer(ContentMetadataIndexer):
- """Specific Metadata whose configuration is enough to satisfy the
- indexing tests.
- """
- def prepare(self):
- self.idx_storage = MockIndexerStorage()
- self.log = logging.getLogger('swh.indexer')
- self.objstorage = MockObjStorage()
- self.tools = self.register_tools(self.config['tools'])
- self.tool = self.tools[0]
- self.results = []
+from swh.scheduler.tests.scheduler_testing import SchedulerTestFixture
+from .test_utils import (
+ MockObjStorage, MockStorage, MockIndexerStorage,
+ BASE_TEST_CONFIG
+)
+from .test_origin_head import OriginHeadTestIndexer
+from .test_metadata import ContentMetadataTestIndexer
class RevisionMetadataTestIndexer(RevisionMetadataIndexer):
"""Specific indexer whose configuration is enough to satisfy the
indexing tests.
"""
-
ContentMetadataIndexer = ContentMetadataTestIndexer
- def prepare(self):
- self.config = {
- 'storage': {},
- 'objstorage': {},
- 'indexer_storage': {},
+ def parse_config_file(self, *args, **kwargs):
+ return {
+ **BASE_TEST_CONFIG,
'tools': {
'name': 'swh-metadata-detector',
'version': '0.0.2',
@@ -69,21 +40,38 @@
}
}
}
- self.storage = MockStorage()
+
+ def prepare(self):
+ super().prepare()
self.idx_storage = MockIndexerStorage()
- self.log = logging.getLogger('swh.indexer')
+ self.storage = MockStorage()
self.objstorage = MockObjStorage()
- self.tools = self.register_tools(self.config['tools'])
- self.tool = self.tools[0]
+
+
+revision_metadata_indexer = RevisionMetadataTestIndexer()
@task
def revision_metadata_test_task(*args, **kwargs):
- indexer = RevisionMetadataTestIndexer()
+ indexer = revision_metadata_indexer
indexer.run(*args, **kwargs)
return indexer.results
+class OriginMetadataTestIndexer(OriginMetadataIndexer):
+ def parse_config_file(self, *args, **kwargs):
+ return {
+ **BASE_TEST_CONFIG,
+ 'tools': []
+ }
+
+ def prepare(self):
+ super().prepare()
+ self.storage = MockStorage()
+ self.objstorage = MockObjStorage()
+ self.idx_storage = MockIndexerStorage()
+
+
@task
def origin_intrinsic_metadata_test_task(*args, **kwargs):
indexer = OriginMetadataTestIndexer()
@@ -149,18 +137,30 @@
rev_metadata = {
'id': hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'),
'translated_metadata': metadata,
- 'indexer_configuration_id': 7,
}
origin_metadata = {
'origin_id': 54974445,
'from_revision': hash_to_bytes(
'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'),
'metadata': metadata,
- 'indexer_configuration_id': 7,
}
expected_results = [
- ('origin_intrinsic_metadata', True, [origin_metadata]),
- ('revision_metadata', True, [rev_metadata])]
+ ('revision_metadata', True, [rev_metadata]),
+ ('origin_intrinsic_metadata', True, [origin_metadata]),
+ ]
results = list(indexer.idx_storage.added_data)
+ for result in results:
+ metadata = result[2]
+ for item in metadata:
+ # cannot check those (generated ids)
+ del item['indexer_configuration_id']
+
+ from pprint import pprint
+ print('##### actual')
+ pprint(results)
+ # print()
+ # print('##### expected')
+ # pprint(expected_results)
+
self.assertCountEqual(expected_results, results)
diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py
--- a/swh/indexer/tests/test_utils.py
+++ b/swh/indexer/tests/test_utils.py
@@ -357,7 +357,15 @@
for item in self.revision_metadata.get(id_):
item = item.copy()
tool_id = item.pop('indexer_configuration_id')
- item['tool'] = self.tools[tool_id].copy()
+ if tool_id in self.tools:
+ item['tool'] = self.tools[tool_id].copy()
+ else: # HACK: this needs to be removed altogether
+ item['tool'] = {
+ 'id': tool_id,
+ 'name': tool_id[0],
+ 'version': tool_id[1],
+ 'configuration': tool_id[2],
+ }
yield item
def origin_intrinsic_metadata_add(self, metadata, conflict_update=None):
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Aug 17, 10:03 PM (3 h, 31 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225991
Attached To
D742: indexer: Simplify the init step for the last tests
Event Timeline
Log In to Comment