Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124871
D783.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
10 KB
Subscribers
None
D783.diff
View Options
diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
--- a/swh/indexer/storage/__init__.py
+++ b/swh/indexer/storage/__init__.py
@@ -346,7 +346,7 @@
- **id** (bytes): content's identifier
- **name** (str): symbol's name
- **kind** (str): symbol's kind
- - **language** (str): language for that content
+ - **lang** (str): language for that content
- **tool** (dict): tool used to compute the ctags' info
@@ -365,7 +365,7 @@
- **id** (bytes): sha1
- **ctags** ([list): List of dictionary with keys: name, kind,
- line, language
+ line, lang
"""
def _convert_ctags(__ctags):
diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py
--- a/swh/indexer/storage/in_memory.py
+++ b/swh/indexer/storage/in_memory.py
@@ -6,28 +6,30 @@
from collections import defaultdict
import json
+SHA1_DIGEST_SIZE = 160
-class MetadataStorage:
- """Implements missing/get/add logic for both content_metadata and
- revision_metadata."""
+
+def _transform_tool(tool):
+ return {
+ 'id': tool['id'],
+ 'name': tool['tool_name'],
+ 'version': tool['tool_version'],
+ 'configuration': tool['tool_configuration'],
+ }
+
+
+class SubStorage:
+ """Implements common missing/get/add logic for each indexer type."""
def __init__(self, tools):
self._tools = tools
- self._metadata = {} # map (id_, tool_id) -> metadata_dict
+ self._data = {} # map (id_, tool_id) -> metadata_dict
self._tools_per_id = defaultdict(set) # map id_ -> Set[tool_id]
- def _transform_tool(self, tool):
- return {
- 'id': tool['id'],
- 'name': tool['tool_name'],
- 'version': tool['tool_version'],
- 'configuration': tool['tool_configuration'],
- }
-
def missing(self, ids):
- """List metadata missing from storage.
+ """List data missing from storage.
Args:
- metadata (iterable): dictionaries with keys:
+ data (iterable): dictionaries with keys:
- **id** (bytes): sha1 identifier
- **indexer_configuration_id** (int): tool used to compute
@@ -44,7 +46,7 @@
yield id_
def get(self, ids):
- """Retrieve metadata per id.
+ """Retrieve data per id.
Args:
ids (iterable): sha1 checksums
@@ -53,8 +55,8 @@
dict: dictionaries with the following keys:
- **id** (bytes)
- - **translated_metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
+ - arbitrary data (as provided to `add`)
"""
for id_ in ids:
@@ -62,35 +64,36 @@
key = (id_, tool_id)
yield {
'id': id_,
- 'tool': self._transform_tool(self._tools[tool_id]),
- 'translated_metadata': self._metadata[key],
+ 'tool': _transform_tool(self._tools[tool_id]),
+ **self._data[key],
}
- def add(self, metadata, conflict_update):
- """Add metadata not present in storage.
+ def add(self, data, conflict_update):
+ """Add data not present in storage.
Args:
- metadata (iterable): dictionaries with keys:
+ data (iterable): dictionaries with keys:
- **id**: sha1
- - **translated_metadata**: arbitrary dict
- **indexer_configuration_id**: tool used to compute the
results
+ - arbitrary data
conflict_update (bool): Flag to determine if we want to overwrite
(true) or skip duplicates (false)
"""
- for item in metadata:
- tool_id = item['indexer_configuration_id']
- data = item['translated_metadata']
- id_ = item['id']
+ for item in data:
+ item = item.copy()
+ tool_id = item.pop('indexer_configuration_id')
+ id_ = item.pop('id')
+ data = item
if not conflict_update and \
tool_id in self._tools_per_id.get(id_, set()):
# Duplicate, should not be updated
continue
key = (id_, tool_id)
- self._metadata[key] = data
+ self._data[key] = data
self._tools_per_id[id_].add(tool_id)
@@ -99,8 +102,122 @@
def __init__(self):
self._tools = {}
- self._content_metadata = MetadataStorage(self._tools)
- self._revision_metadata = MetadataStorage(self._tools)
+ self._content_ctags = SubStorage(self._tools)
+ self._content_metadata = SubStorage(self._tools)
+ self._revision_metadata = SubStorage(self._tools)
+
+ def content_ctags_missing(self, ctags):
+ """List ctags missing from storage.
+
+ Args:
+ ctags (iterable): dicts with keys:
+
+ - **id** (bytes): sha1 identifier
+ - **indexer_configuration_id** (int): tool used to compute
+ the results
+
+ Yields:
+ an iterable of missing id for the tuple (id,
+ indexer_configuration_id)
+
+ """
+ yield from self._content_ctags.missing(ctags)
+
+ def content_ctags_get(self, ids):
+ """Retrieve ctags per id.
+
+ Args:
+ ids (iterable): sha1 checksums
+
+ Yields:
+ Dictionaries with keys:
+
+ - **id** (bytes): content's identifier
+ - **name** (str): symbol's name
+ - **kind** (str): symbol's kind
+ - **lang** (str): language for that content
+ - **tool** (dict): tool used to compute the ctags' info
+
+
+ """
+ for item in self._content_ctags.get(ids):
+ for item_ctags_item in item['ctags']:
+ yield {
+ 'id': item['id'],
+ 'tool': item['tool'],
+ **item_ctags_item
+ }
+
+ def content_ctags_add(self, ctags, conflict_update=False):
+ """Add ctags not present in storage
+
+ Args:
+ ctags (iterable): dictionaries with keys:
+
+ - **id** (bytes): sha1
+ - **ctags** ([list): List of dictionary with keys: name, kind,
+ line, lang
+ - **indexer_configuration_id**: tool used to compute the
+ results
+
+ """
+ for item in ctags:
+ tool_id = item['indexer_configuration_id']
+ if conflict_update:
+ item_ctags = []
+ else:
+ # TODO: this merges old ctags with new ctags. This is
+ # pointless, new ctags should replace the old ones.
+ existing = list(self._content_ctags.get([item['id']]))
+ item_ctags = [
+ {
+ key: ctags_item[key]
+ for key in ('name', 'kind', 'line', 'lang')
+ }
+ for existing_item in existing
+ if existing_item['tool']['id'] == tool_id
+ for ctags_item in existing_item['ctags']
+ ]
+ for new_item_ctags in item['ctags']:
+ if new_item_ctags not in item_ctags:
+ item_ctags.append(new_item_ctags)
+ self._content_ctags.add([
+ {
+ 'id': item['id'],
+ 'indexer_configuration_id': tool_id,
+ 'ctags': item_ctags,
+ }
+ ], conflict_update=True)
+
+ def content_ctags_search(self, expression,
+ limit=10, last_sha1=None, db=None, cur=None):
+ """Search through content's raw ctags symbols.
+
+ Args:
+ expression (str): Expression to search for
+ limit (int): Number of rows to return (default to 10).
+ last_sha1 (str): Offset from which retrieving data (default to '').
+
+ Yields:
+ rows of ctags including id, name, lang, kind, line, etc...
+
+ """
+ nb_matches = 0
+ for ((id_, tool_id), item) in \
+ sorted(self._content_ctags._data.items()):
+ if id_ <= (last_sha1 or bytes(0 for _ in range(SHA1_DIGEST_SIZE))):
+ continue
+ nb_matches += 1
+ for ctags_item in item['ctags']:
+ if ctags_item['name'] != expression:
+ continue
+ yield {
+ 'id': id_,
+ 'tool': _transform_tool(self._tools[tool_id]),
+ **ctags_item
+ }
+ if nb_matches >= limit:
+ return
def content_metadata_missing(self, metadata):
"""List metadata missing from storage.
diff --git a/swh/indexer/tests/storage/test_in_memory.py b/swh/indexer/tests/storage/test_in_memory.py
--- a/swh/indexer/tests/storage/test_in_memory.py
+++ b/swh/indexer/tests/storage/test_in_memory.py
@@ -50,30 +50,6 @@
pass
@pytest.mark.xfail
- def test_content_ctags_missing(self):
- pass
-
- @pytest.mark.xfail
- def test_content_ctags_get(self):
- pass
-
- @pytest.mark.xfail
- def test_content_ctags_search(self):
- pass
-
- @pytest.mark.xfail
- def test_content_ctags_search_no_result(self):
- pass
-
- @pytest.mark.xfail
- def test_content_ctags_add__add_new_ctags_added(self):
- pass
-
- @pytest.mark.xfail
- def test_content_ctags_add__update_in_place(self):
- pass
-
- @pytest.mark.xfail
def test_content_fossology_license_get(self):
pass
diff --git a/swh/indexer/tests/test_ctags.py b/swh/indexer/tests/test_ctags.py
--- a/swh/indexer/tests/test_ctags.py
+++ b/swh/indexer/tests/test_ctags.py
@@ -11,7 +11,7 @@
)
from swh.indexer.tests.test_utils import (
- BasicMockIndexerStorage, MockObjStorage, CommonContentIndexerTest,
+ CommonContentIndexerTest,
CommonIndexerWithErrorsTest, CommonIndexerNoTool,
SHA1_TO_CTAGS, NoDiskIndexer, BASE_TEST_CONFIG
)
@@ -99,12 +99,6 @@
'workdir': '/nowhere',
}
- def prepare(self):
- super().prepare()
- self.idx_storage = BasicMockIndexerStorage()
- self.objstorage = MockObjStorage()
- self.tool_config = self.config['tools']['configuration']
-
class TestCtagsIndexer(CommonContentIndexerTest, unittest.TestCase):
"""Ctags indexer test scenarios:
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 21 2024, 10:11 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218124
Attached To
D783: rm ctags mocks + add ctags to idx db + fix doc.
Event Timeline
Log In to Comment