diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py --- a/swh/indexer/storage/in_memory.py +++ b/swh/indexer/storage/in_memory.py @@ -102,10 +102,62 @@ def __init__(self): self._tools = {} + self._mimetypes = SubStorage(self._tools) self._content_ctags = SubStorage(self._tools) self._content_metadata = SubStorage(self._tools) self._revision_metadata = SubStorage(self._tools) + def content_mimetype_missing(self, mimetypes): + """Generate mimetypes missing from storage. + + Args: + mimetypes (iterable): iterable of dict with keys: + + - **id** (bytes): sha1 identifier + - **indexer_configuration_id** (int): tool used to compute the + results + + Yields: + tuple (id, indexer_configuration_id): missing id + + """ + yield from self._mimetypes.missing(mimetypes) + + def content_mimetype_add(self, mimetypes, conflict_update=False): + """Add mimetypes not present in storage. + + Args: + mimetypes (iterable): dictionaries with keys: + + - **id** (bytes): sha1 identifier + - **mimetype** (bytes): raw content's mimetype + - **encoding** (bytes): raw content's encoding + - **indexer_configuration_id** (int): tool's id used to + compute the results + - **conflict_update** (bool): Flag to determine if we want to + overwrite (``True``) or skip duplicates (``False``, the + default) + + """ + self._mimetypes.add(mimetypes, conflict_update) + + def content_mimetype_get(self, ids, db=None, cur=None): + """Retrieve full content mimetype per ids. + + Args: + ids (iterable): sha1 identifier + + Yields: + mimetypes (iterable): dictionaries with keys: + + - **id** (bytes): sha1 identifier + - **mimetype** (bytes): raw content's mimetype + - **encoding** (bytes): raw content's encoding + - **tool** (dict): Tool used to compute the language + + """ + yield from self._mimetypes.get(ids) + def content_ctags_missing(self, ctags): """List ctags missing from storage. diff --git a/swh/indexer/tests/storage/test_in_memory.py b/swh/indexer/tests/storage/test_in_memory.py --- a/swh/indexer/tests/storage/test_in_memory.py +++ b/swh/indexer/tests/storage/test_in_memory.py @@ -18,22 +18,6 @@ pass @pytest.mark.xfail - def test_content_mimetype_missing(self): - pass - - @pytest.mark.xfail - def test_content_mimetype_add__drop_duplicate(self): - pass - - @pytest.mark.xfail - def test_content_mimetype_add__update_in_place_duplicate(self): - pass - - @pytest.mark.xfail - def test_content_mimetype_get(self): - pass - - @pytest.mark.xfail def test_content_language_missing(self): pass diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py --- a/swh/indexer/tests/test_mimetype.py +++ b/swh/indexer/tests/test_mimetype.py @@ -4,7 +4,6 @@ # See top-level LICENSE file for more information import unittest -import logging from unittest.mock import patch @@ -61,12 +60,6 @@ }, } - def prepare(self): - super().prepare() - self.idx_storage = BasicMockIndexerStorage() - self.log = logging.getLogger('swh.indexer') - self.objstorage = MockObjStorage() - class TestMimetypeIndexer(CommonContentIndexerTest, unittest.TestCase): """Mimetype indexer test scenarios: @@ -75,8 +68,13 @@ - Unknown sha1 in the input list are not indexed """ + + def get_indexer_results(self, ids): + yield from self.idx_storage.content_mimetype_get(ids) + def setUp(self): self.indexer = MimetypeTestIndexer() + self.idx_storage = self.indexer.idx_storage self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5' self.id1 = '688a5ef812c53907562fe379d4b3851e69c7cb15'