diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py --- a/swh/indexer/indexer.py +++ b/swh/indexer/indexer.py @@ -155,10 +155,13 @@ Without this step, the indexer cannot possibly run. """ - self.config = self.parse_config_file( - additional_configs=[self.ADDITIONAL_CONFIG]) - if self.config['storage']: - self.storage = get_storage(**self.config['storage']) + # HACK to deal with edge case (e.g revision metadata indexer) + if not hasattr(self, 'config'): + self.config = self.parse_config_file( + additional_configs=[self.ADDITIONAL_CONFIG]) + config_storage = self.config.get('storage') + if config_storage: + self.storage = get_storage(**config_storage) objstorage = self.config['objstorage'] self.objstorage = get_objstorage(objstorage['cls'], objstorage['args']) idx_storage = self.config[INDEXER_CFG_KEY] @@ -203,7 +206,6 @@ ValueError if not a list nor a dict. """ - tools = self.config['tools'] if isinstance(tools, list): tools = list(map(self._prepare_tool, tools)) elif isinstance(tools, dict): diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py --- a/swh/indexer/metadata.py +++ b/swh/indexer/metadata.py @@ -28,14 +28,19 @@ - store result in content_metadata table """ + # Note: This used when the content metadata indexer is used alone + # (not the case for example in the case of the RevisionMetadataIndexer) CONFIG_BASE_FILENAME = 'indexer/content_metadata' def __init__(self, tool, config): - # twisted way to use the exact same config of RevisionMetadataIndexer - # object that uses internally ContentMetadataIndexer + # FIXME: Simplify this twisted way to use the exact same + # config of RevisionMetadataIndexer object that uses + # internally ContentMetadataIndexer self.config = config self.config['tools'] = tool + self.results = [] super().__init__() + self.tool = self.tools[0] # Tool is now registered (cf. prepare call) def filter(self, ids): """Filter out known sha1s and return only missing ones. @@ -69,8 +74,6 @@ mapping_name = self.tool['tool_configuration']['context'] result['translated_metadata'] = MAPPINGS[mapping_name] \ .translate(data) - # a twisted way to keep result with indexer object for get_results - self.results.append(result) except Exception: self.log.exception( "Problem during tool retrieval of metadata translation") @@ -91,16 +94,6 @@ self.idx_storage.content_metadata_add( results, conflict_update=(policy_update == 'update-dups')) - def get_results(self): - """can be called only if run method was called before - - Returns: - list: list of content_metadata entries calculated by - current indexer - - """ - return self.results - class RevisionMetadataIndexer(RevisionIndexer): """Revision-level indexer @@ -227,8 +220,8 @@ # -> get raw_contents # -> translate each content config = { - INDEXER_CFG_KEY: self.idx_storage, - 'objstorage': self.objstorage + k: self.config[k] + for k in [INDEXER_CFG_KEY, 'objstorage', 'storage'] } for context in detected_files.keys(): tool['configuration']['context'] = context @@ -250,19 +243,18 @@ if item not in sha1s_in_storage] if sha1s_filtered: - # schedule indexation of content + # content indexing try: c_metadata_indexer.run(sha1s_filtered, policy_update='ignore-dups') # on the fly possibility: - results = c_metadata_indexer.get_results() - - for result in results: + for result in c_metadata_indexer.results: local_metadata = result['translated_metadata'] translated_metadata.append(local_metadata) - except Exception as e: - self.log.warning("""Exception while indexing content""", e) + except Exception: + self.log.exception( + "Exception while indexing metadata on contents") # transform translated_metadata into min set with swh-metadata-detector min_metadata = extract_minimal_metadata_dict(translated_metadata) diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -39,12 +39,9 @@ def prepare(self): self.config = { - 'storage': { - 'cls': 'remote', - 'args': { - 'url': 'http://localhost:9999', - } - }, + 'storage': {}, + 'objstorage': {}, + 'indexer_storage': {}, 'tools': { 'name': 'swh-metadata-detector', 'version': '0.0.2', @@ -60,7 +57,6 @@ self.objstorage = MockObjStorage() self.tools = self.register_tools(self.config['tools']) self.tool = self.tools[0] - self.results = [] class Metadata(unittest.TestCase):