diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py --- a/swh/indexer/tests/test_utils.py +++ b/swh/indexer/tests/test_utils.py @@ -7,7 +7,6 @@ import hashlib import random -from swh.objstorage.exc import ObjNotFoundError from swh.model import hashutil from swh.model.hashutil import hash_to_bytes, hash_to_hex @@ -405,117 +404,6 @@ obj_storage.add(content, obj_id=hash_to_bytes(obj_id)) -class MockObjStorage: - """Mock an swh-objstorage objstorage with predefined contents. - - """ - data = {} - - def __init__(self): - self.data = OBJ_STORAGE_DATA.copy() - - def __iter__(self): - yield from self.data.keys() - - def __contains__(self, sha1): - return self.data.get(sha1) is not None - - def get(self, sha1): - raw_content = self.data.get(sha1) - if raw_content is None: - raise ObjNotFoundError(sha1) - return raw_content - - -class MockIndexerStorage(): - """Mock an swh-indexer storage. - - """ - added_data = [] - revision_metadata = {} - tools = {} - - def indexer_configuration_add(self, tools): - results = [] - for tool in tools: - results.append(self._indexer_configuration_add_one(tool)) - return results - - def _indexer_configuration_add_one(self, tool): - if tool['tool_name'] == 'swh-metadata-translator': - tool2 = { - 'id': 30, - 'tool_name': 'swh-metadata-translator', - 'tool_version': '0.0.1', - 'tool_configuration': { - 'type': 'local', - 'context': 'NpmMapping' - }, - } - elif tool['tool_name'] == 'swh-metadata-detector': - tool2 = { - 'id': 7, - 'tool_name': 'swh-metadata-detector', - 'tool_version': '0.0.1', - 'tool_configuration': { - 'type': 'local', - 'context': 'NpmMapping' - }, - } - elif tool['tool_name'] == 'origin-metadata': - tool2 = { - 'id': 8, - 'tool_name': 'origin-metadata', - 'tool_version': '0.0.1', - 'tool_configuration': {}, - } - else: - assert False, 'Unknown tool {tool_name}'.format(**tool) - - self.tools[tool2['id']] = tool2 - return tool2 - - def content_metadata_missing(self, sha1s): - yield from [] - - def content_metadata_add(self, metadata, conflict_update=None): - self.added_data.append( - ('content_metadata', conflict_update, metadata)) - - def revision_metadata_add(self, metadata, conflict_update=None): - assert conflict_update - self.added_data.append( - ('revision_metadata', conflict_update, metadata)) - for item in metadata: - assert isinstance(item['id'], bytes) - self.revision_metadata.setdefault(item['id'], []).append(item) - - def revision_metadata_get(self, ids): - for id_ in ids: - assert isinstance(id_, bytes) - for item in self.revision_metadata.get(id_): - item = item.copy() - tool_id = item.pop('indexer_configuration_id') - if tool_id in self.tools: - item['tool'] = self.tools[tool_id].copy() - else: # HACK: this needs to be removed altogether - item['tool'] = { - 'id': tool_id, - 'name': tool_id[0], - 'version': tool_id[1], - 'configuration': tool_id[2], - } - yield item - - def origin_intrinsic_metadata_add(self, metadata, conflict_update=None): - self.added_data.append( - ('origin_intrinsic_metadata', conflict_update, metadata)) - - def content_metadata_get(self, sha1s): - assert sha1s == [b'cde'] - return CONTENT_METADATA - - def fill_storage(storage): for origin in ORIGINS: origin = origin.copy() @@ -559,147 +447,6 @@ }]) -class MockStorage(): - """Mock a real swh-storage storage to simplify reading indexers' - outputs. - - """ - def origin_get(self, id_): - for origin in ORIGINS: - for (k, v) in id_.items(): - if origin[k] != v: - break - else: - # This block is run iff we didn't break, ie. if all supplied - # parts of the id are set to the expected value. - return origin - assert False, id_ - - def snapshot_get_latest(self, origin_id): - if origin_id in SNAPSHOTS: - return SNAPSHOTS[origin_id] - else: - assert False, origin_id - - def revision_get(self, revisions): - return REVISIONS.copy() - - def directory_ls(self, directory, recursive=False, cur=None): - assert directory == DIRECTORY_ID - return DIRECTORY - - -class BasicMockStorage(): - """In memory implementation to fake the content_get_range api. - - FIXME: To remove when the actual in-memory lands. - - """ - contents = [] - - def __init__(self, contents): - self.contents = contents - - def content_get_range(self, start, end, limit=1000): - # to make input test data consilient with actual runtime the - # other way of doing properly things would be to rewrite all - # tests (that's another task entirely so not right now) - if isinstance(start, bytes): - start = hashutil.hash_to_hex(start) - if isinstance(end, bytes): - end = hashutil.hash_to_hex(end) - results = [] - _next_id = None - counter = 0 - for c in self.contents: - _id = c['sha1'] - if start <= _id and _id <= end: - results.append(c) - if counter >= limit: - break - counter += 1 - - return { - 'contents': results, - 'next': _next_id - } - - -class BasicMockIndexerStorage(): - """Mock Indexer storage to simplify reading indexers' outputs. - - """ - state = [] - - def _internal_add(self, data, conflict_update=None): - """All content indexer have the same structure. So reuse `data` as the - same data. It's either mimetype, language, - fossology_license, etc... - - """ - self.state = data - self.conflict_update = conflict_update - - def content_mimetype_add(self, data, conflict_update=None): - self._internal_add(data, conflict_update=conflict_update) - - def content_fossology_license_add(self, data, conflict_update=None): - self._internal_add(data, conflict_update=conflict_update) - - def content_language_add(self, data, conflict_update=None): - self._internal_add(data, conflict_update=conflict_update) - - def content_ctags_add(self, data, conflict_update=None): - self._internal_add(data, conflict_update=conflict_update) - - def _internal_get_range(self, start, end, - indexer_configuration_id, limit=1000): - """Same logic as _internal_add, we retrieve indexed data given an - identifier. So the code here does not change even though - the underlying data does. - - """ - # to make input test data consilient with actual runtime the - # other way of doing properly things would be to rewrite all - # tests (that's another task entirely so not right now) - if isinstance(start, bytes): - start = hashutil.hash_to_hex(start) - if isinstance(end, bytes): - end = hashutil.hash_to_hex(end) - results = [] - _next = None - counter = 0 - for m in self.state: - _id = m['id'] - _tool_id = m['indexer_configuration_id'] - if (start <= _id and _id <= end and - _tool_id == indexer_configuration_id): - results.append(_id) - if counter >= limit: - break - counter += 1 - - return { - 'ids': results, - 'next': _next - } - - def content_mimetype_get_range( - self, start, end, indexer_configuration_id, limit=1000): - return self._internal_get_range( - start, end, indexer_configuration_id, limit=limit) - - def content_fossology_license_get_range( - self, start, end, indexer_configuration_id, limit=1000): - return self._internal_get_range( - start, end, indexer_configuration_id, limit=limit) - - def indexer_configuration_add(self, tools): - return [{ - 'id': 10, - }] - - class CommonIndexerNoTool: """Mixin to wronly initialize content indexer""" def prepare(self):