Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/storage/in_memory.py
Show First 20 Lines • Show All 164 Lines • ▼ Show 20 Lines | |||||
class IndexerStorage: | class IndexerStorage: | ||||
"""In-memory SWH indexer storage.""" | """In-memory SWH indexer storage.""" | ||||
def __init__(self): | def __init__(self): | ||||
self._tools = {} | self._tools = {} | ||||
self._mimetypes = SubStorage(self._tools) | self._mimetypes = SubStorage(self._tools) | ||||
self._languages = SubStorage(self._tools) | |||||
self._content_ctags = SubStorage(self._tools) | self._content_ctags = SubStorage(self._tools) | ||||
self._licenses = SubStorage(self._tools) | self._licenses = SubStorage(self._tools) | ||||
self._content_metadata = SubStorage(self._tools) | self._content_metadata = SubStorage(self._tools) | ||||
self._revision_metadata = SubStorage(self._tools) | self._revision_metadata = SubStorage(self._tools) | ||||
def content_mimetype_missing(self, mimetypes): | def content_mimetype_missing(self, mimetypes): | ||||
"""Generate mimetypes missing from storage. | """Generate mimetypes missing from storage. | ||||
▲ Show 20 Lines • Show All 67 Lines • ▼ Show 20 Lines | def content_mimetype_get(self, ids, db=None, cur=None): | ||||
- **id** (bytes): sha1 identifier | - **id** (bytes): sha1 identifier | ||||
- **mimetype** (bytes): raw content's mimetype | - **mimetype** (bytes): raw content's mimetype | ||||
- **encoding** (bytes): raw content's encoding | - **encoding** (bytes): raw content's encoding | ||||
- **tool** (dict): Tool used to compute the language | - **tool** (dict): Tool used to compute the language | ||||
""" | """ | ||||
yield from self._mimetypes.get(ids) | yield from self._mimetypes.get(ids) | ||||
def content_language_missing(self, languages): | |||||
"""List languages missing from storage. | |||||
Args: | |||||
languages (iterable): dictionaries with keys: | |||||
- **id** (bytes): sha1 identifier | |||||
- **indexer_configuration_id** (int): tool used to compute | |||||
the results | |||||
Yields: | |||||
an iterable of missing id for the tuple (id, | |||||
indexer_configuration_id) | |||||
""" | |||||
yield from self._languages.missing(languages) | |||||
def content_language_get(self, ids): | |||||
"""Retrieve full content language per ids. | |||||
Args: | |||||
ids (iterable): sha1 identifier | |||||
Yields: | |||||
languages (iterable): dictionaries with keys: | |||||
- **id** (bytes): sha1 identifier | |||||
- **lang** (bytes): raw content's language | |||||
- **tool** (dict): Tool used to compute the language | |||||
""" | |||||
yield from self._languages.get(ids) | |||||
def content_language_add(self, languages, conflict_update=False): | |||||
"""Add languages not present in storage. | |||||
Args: | |||||
languages (iterable): dictionaries with keys: | |||||
- **id** (bytes): sha1 | |||||
- **lang** (bytes): language detected | |||||
conflict_update (bool): Flag to determine if we want to | |||||
overwrite (true) or skip duplicates (false, the | |||||
default) | |||||
""" | |||||
self._languages.add(languages, conflict_update) | |||||
def content_ctags_missing(self, ctags): | def content_ctags_missing(self, ctags): | ||||
"""List ctags missing from storage. | """List ctags missing from storage. | ||||
Args: | Args: | ||||
ctags (iterable): dicts with keys: | ctags (iterable): dicts with keys: | ||||
- **id** (bytes): sha1 identifier | - **id** (bytes): sha1 identifier | ||||
- **indexer_configuration_id** (int): tool used to compute | - **indexer_configuration_id** (int): tool used to compute | ||||
▲ Show 20 Lines • Show All 297 Lines • Show Last 20 Lines |