diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py --- a/swh/indexer/indexer.py +++ b/swh/indexer/indexer.py @@ -169,12 +169,12 @@ self.log = logging.getLogger('swh.indexer') self.tools = list(self.register_tools(self.config['tools'])) - def check(self): + def check(self, *, check_tools=True): """Check the indexer's configuration is ok before proceeding. If ok, does nothing. If not raise error. """ - if not self.tools: + if check_tools and not self.tools: raise ValueError('Tools %s is unknown, cannot continue' % self.tools) @@ -211,7 +211,8 @@ else: raise ValueError('Configuration tool(s) must be a dict or list!') - return self.idx_storage.indexer_configuration_add(tools) + if tools: + return self.idx_storage.indexer_configuration_add(tools) @abc.abstractmethod def index(self, id, data): diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py --- a/swh/indexer/metadata.py +++ b/swh/indexer/metadata.py @@ -163,13 +163,13 @@ - translated_metadata: dict of retrieved metadata """ - try: - result = { - 'id': rev['id'].decode(), - 'indexer_configuration_id': self.tool['id'], - 'translated_metadata': None - } + result = { + 'id': rev['id'].decode(), + 'indexer_configuration_id': self.tool['id'], + 'translated_metadata': None + } + try: root_dir = rev['directory'] dir_ls = self.storage.directory_ls(root_dir, recursive=False) files = [entry for entry in dir_ls if entry['type'] == 'file'] @@ -268,6 +268,14 @@ class OriginMetadataIndexer(OriginIndexer): + ADDITIONAL_CONFIG = { + 'tools': ('list', []) + } + + def check(self, **kwargs): + kwargs['check_tools'] = False + super().check(**kwargs) + def filter(self, ids): return ids diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py --- a/swh/indexer/storage/__init__.py +++ b/swh/indexer/storage/__init__.py @@ -603,6 +603,7 @@ - **id**: sha1_git of revision - **translated_metadata**: arbitrary dict + - **indexer_configuration_id**: tool used to compute metadata conflict_update: Flag to determine if we want to overwrite (true) or skip duplicates (false, the default) @@ -649,6 +650,7 @@ - **from_revision**: sha1 id of the revision used to generate these metadata. - **metadata**: arbitrary dict + - **indexer_configuration_id**: tool used to compute metadata conflict_update: Flag to determine if we want to overwrite (true) or skip duplicates (false, the default) diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py --- a/swh/indexer/tests/test_origin_metadata.py +++ b/swh/indexer/tests/test_origin_metadata.py @@ -26,18 +26,13 @@ 'url': 'http://localhost:9999', } }, - 'tools': { - 'name': 'origin-metadata', - 'version': '0.0.1', - 'configuration': {} - } + 'tools': [], } self.storage = MockStorage() self.idx_storage = MockIndexerStorage() self.log = logging.getLogger('swh.indexer') self.objstorage = MockObjStorage() self.tools = self.register_tools(self.config['tools']) - self.tool = self.tools[0] self.results = [] diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py --- a/swh/indexer/tests/test_utils.py +++ b/swh/indexer/tests/test_utils.py @@ -271,9 +271,14 @@ added_data = [] def indexer_configuration_add(self, tools): - tool = tools[0] + results = [] + for tool in tools: + results.append(self._indexer_configuration_add_one(tool)) + return results + + def _indexer_configuration_add_one(self, tool): if tool['tool_name'] == 'swh-metadata-translator': - return [{ + return { 'id': 30, 'tool_name': 'swh-metadata-translator', 'tool_version': '0.0.1', @@ -281,9 +286,9 @@ 'type': 'local', 'context': 'NpmMapping' }, - }] + } elif tool['tool_name'] == 'swh-metadata-detector': - return [{ + return { 'id': 7, 'tool_name': 'swh-metadata-detector', 'tool_version': '0.0.1', @@ -291,14 +296,14 @@ 'type': 'local', 'context': 'NpmMapping' }, - }] + } elif tool['tool_name'] == 'origin-metadata': - return [{ + return { 'id': 8, 'tool_name': 'origin-metadata', 'tool_version': '0.0.1', 'tool_configuration': {}, - }] + } else: assert False, 'Unknown tool {tool_name}'.format(**tool)