diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py --- a/swh/indexer/storage/in_memory.py +++ b/swh/indexer/storage/in_memory.py @@ -170,6 +170,7 @@ def __init__(self): self._tools = {} self._mimetypes = SubStorage(self._tools) + self._languages = SubStorage(self._tools) self._content_ctags = SubStorage(self._tools) self._licenses = SubStorage(self._tools) self._content_metadata = SubStorage(self._tools) @@ -253,6 +254,55 @@ """ yield from self._mimetypes.get(ids) + def content_language_missing(self, languages): + """List languages missing from storage. + + Args: + languages (iterable): dictionaries with keys: + + - **id** (bytes): sha1 identifier + - **indexer_configuration_id** (int): tool used to compute + the results + + Yields: + an iterable of missing id for the tuple (id, + indexer_configuration_id) + + """ + yield from self._languages.missing(languages) + + def content_language_get(self, ids): + """Retrieve full content language per ids. + + Args: + ids (iterable): sha1 identifier + + Yields: + languages (iterable): dictionaries with keys: + + - **id** (bytes): sha1 identifier + - **lang** (bytes): raw content's language + - **tool** (dict): Tool used to compute the language + + """ + yield from self._languages.get(ids) + + def content_language_add(self, languages, conflict_update=False): + """Add languages not present in storage. + + Args: + languages (iterable): dictionaries with keys: + + - **id** (bytes): sha1 + - **lang** (bytes): language detected + + conflict_update (bool): Flag to determine if we want to + overwrite (true) or skip duplicates (false, the + default) + + """ + self._languages.add(languages, conflict_update) + def content_ctags_missing(self, ctags): """List ctags missing from storage. diff --git a/swh/indexer/tests/storage/test_in_memory.py b/swh/indexer/tests/storage/test_in_memory.py --- a/swh/indexer/tests/storage/test_in_memory.py +++ b/swh/indexer/tests/storage/test_in_memory.py @@ -21,22 +21,6 @@ pass @pytest.mark.xfail - def test_content_language_missing(self): - pass - - @pytest.mark.xfail - def test_content_language_get(self): - pass - - @pytest.mark.xfail - def test_content_language_add__drop_duplicate(self): - pass - - @pytest.mark.xfail - def test_content_language_add__update_in_place_duplicate(self): - pass - - @pytest.mark.xfail def test_origin_intrinsic_metadata_get(self): pass diff --git a/swh/indexer/tests/test_language.py b/swh/indexer/tests/test_language.py --- a/swh/indexer/tests/test_language.py +++ b/swh/indexer/tests/test_language.py @@ -7,8 +7,8 @@ from swh.indexer import language from swh.indexer.language import LanguageIndexer from swh.indexer.tests.test_utils import ( - BasicMockIndexerStorage, MockObjStorage, CommonContentIndexerTest, - CommonIndexerWithErrorsTest, CommonIndexerNoTool, BASE_TEST_CONFIG + CommonContentIndexerTest, CommonIndexerWithErrorsTest, + CommonIndexerNoTool, BASE_TEST_CONFIG, fill_storage, fill_obj_storage ) @@ -30,12 +30,6 @@ } } - def prepare(self): - super().prepare() - self.idx_storage = BasicMockIndexerStorage() - self.objstorage = MockObjStorage() - self.tool_config = self.config['tools']['configuration'] - class Language(unittest.TestCase): """Tests pygments tool for language detection @@ -60,8 +54,14 @@ - Unknown sha1 in the input list are not indexed """ + + def get_indexer_results(self, ids): + yield from self.indexer.idx_storage.content_language_get(ids) + def setUp(self): self.indexer = LanguageTestIndexer() + fill_storage(self.indexer.storage) + fill_obj_storage(self.indexer.objstorage) self.id0 = '02fb2c89e14f7fab46701478c83779c7beb7b069' self.id1 = '103bc087db1d26afc3a0283f38663d081e9b01e6'