Page MenuHomeSoftware Heritage

D789.id.diff
No OneTemporary

D789.id.diff

diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py
--- a/swh/indexer/storage/in_memory.py
+++ b/swh/indexer/storage/in_memory.py
@@ -170,6 +170,7 @@
def __init__(self):
self._tools = {}
self._mimetypes = SubStorage(self._tools)
+ self._languages = SubStorage(self._tools)
self._content_ctags = SubStorage(self._tools)
self._licenses = SubStorage(self._tools)
self._content_metadata = SubStorage(self._tools)
@@ -253,6 +254,55 @@
"""
yield from self._mimetypes.get(ids)
+ def content_language_missing(self, languages):
+ """List languages missing from storage.
+
+ Args:
+ languages (iterable): dictionaries with keys:
+
+ - **id** (bytes): sha1 identifier
+ - **indexer_configuration_id** (int): tool used to compute
+ the results
+
+ Yields:
+ an iterable of missing id for the tuple (id,
+ indexer_configuration_id)
+
+ """
+ yield from self._languages.missing(languages)
+
+ def content_language_get(self, ids):
+ """Retrieve full content language per ids.
+
+ Args:
+ ids (iterable): sha1 identifier
+
+ Yields:
+ languages (iterable): dictionaries with keys:
+
+ - **id** (bytes): sha1 identifier
+ - **lang** (bytes): raw content's language
+ - **tool** (dict): Tool used to compute the language
+
+ """
+ yield from self._languages.get(ids)
+
+ def content_language_add(self, languages, conflict_update=False):
+ """Add languages not present in storage.
+
+ Args:
+ languages (iterable): dictionaries with keys:
+
+ - **id** (bytes): sha1
+ - **lang** (bytes): language detected
+
+ conflict_update (bool): Flag to determine if we want to
+ overwrite (true) or skip duplicates (false, the
+ default)
+
+ """
+ self._languages.add(languages, conflict_update)
+
def content_ctags_missing(self, ctags):
"""List ctags missing from storage.
diff --git a/swh/indexer/tests/storage/test_in_memory.py b/swh/indexer/tests/storage/test_in_memory.py
--- a/swh/indexer/tests/storage/test_in_memory.py
+++ b/swh/indexer/tests/storage/test_in_memory.py
@@ -21,22 +21,6 @@
pass
@pytest.mark.xfail
- def test_content_language_missing(self):
- pass
-
- @pytest.mark.xfail
- def test_content_language_get(self):
- pass
-
- @pytest.mark.xfail
- def test_content_language_add__drop_duplicate(self):
- pass
-
- @pytest.mark.xfail
- def test_content_language_add__update_in_place_duplicate(self):
- pass
-
- @pytest.mark.xfail
def test_origin_intrinsic_metadata_get(self):
pass
diff --git a/swh/indexer/tests/test_language.py b/swh/indexer/tests/test_language.py
--- a/swh/indexer/tests/test_language.py
+++ b/swh/indexer/tests/test_language.py
@@ -7,8 +7,8 @@
from swh.indexer import language
from swh.indexer.language import LanguageIndexer
from swh.indexer.tests.test_utils import (
- BasicMockIndexerStorage, MockObjStorage, CommonContentIndexerTest,
- CommonIndexerWithErrorsTest, CommonIndexerNoTool, BASE_TEST_CONFIG
+ CommonContentIndexerTest, CommonIndexerWithErrorsTest,
+ CommonIndexerNoTool, BASE_TEST_CONFIG, fill_storage, fill_obj_storage
)
@@ -30,12 +30,6 @@
}
}
- def prepare(self):
- super().prepare()
- self.idx_storage = BasicMockIndexerStorage()
- self.objstorage = MockObjStorage()
- self.tool_config = self.config['tools']['configuration']
-
class Language(unittest.TestCase):
"""Tests pygments tool for language detection
@@ -60,8 +54,14 @@
- Unknown sha1 in the input list are not indexed
"""
+
+ def get_indexer_results(self, ids):
+ yield from self.indexer.idx_storage.content_language_get(ids)
+
def setUp(self):
self.indexer = LanguageTestIndexer()
+ fill_storage(self.indexer.storage)
+ fill_obj_storage(self.indexer.objstorage)
self.id0 = '02fb2c89e14f7fab46701478c83779c7beb7b069'
self.id1 = '103bc087db1d26afc3a0283f38663d081e9b01e6'

File Metadata

Mime Type
text/plain
Expires
Mar 17 2025, 7:26 PM (7 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220857

Event Timeline