Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/storage/in_memory.py
Show First 20 Lines • Show All 189 Lines • ▼ Show 20 Lines | class IndexerStorage: | ||||
"""In-memory SWH indexer storage.""" | """In-memory SWH indexer storage.""" | ||||
def __init__(self): | def __init__(self): | ||||
self._tools = {} | self._tools = {} | ||||
self._mimetypes = SubStorage(self._tools) | self._mimetypes = SubStorage(self._tools) | ||||
self._languages = SubStorage(self._tools) | self._languages = SubStorage(self._tools) | ||||
self._content_ctags = SubStorage(self._tools) | self._content_ctags = SubStorage(self._tools) | ||||
self._licenses = SubStorage(self._tools) | self._licenses = SubStorage(self._tools) | ||||
self._content_metadata = SubStorage(self._tools) | |||||
self._revision_intrinsic_metadata = SubStorage(self._tools) | self._revision_intrinsic_metadata = SubStorage(self._tools) | ||||
self._origin_intrinsic_metadata = SubStorage(self._tools) | self._origin_intrinsic_metadata = SubStorage(self._tools) | ||||
def content_mimetype_missing(self, mimetypes): | def content_mimetype_missing(self, mimetypes): | ||||
"""Generate mimetypes missing from storage. | """Generate mimetypes missing from storage. | ||||
Args: | Args: | ||||
mimetypes (iterable): iterable of dict with keys: | mimetypes (iterable): iterable of dict with keys: | ||||
▲ Show 20 Lines • Show All 275 Lines • ▼ Show 20 Lines | def content_fossology_license_get_range( | ||||
- **ids** [bytes]: iterable of content ids within the range. | - **ids** [bytes]: iterable of content ids within the range. | ||||
- **next** (Optional[bytes]): The next range of sha1 starts at | - **next** (Optional[bytes]): The next range of sha1 starts at | ||||
this sha1 if any | this sha1 if any | ||||
""" | """ | ||||
return self._licenses.get_range( | return self._licenses.get_range( | ||||
start, end, indexer_configuration_id, limit) | start, end, indexer_configuration_id, limit) | ||||
def content_metadata_missing(self, metadata): | |||||
"""List metadata missing from storage. | |||||
Args: | |||||
metadata (iterable): dictionaries with keys: | |||||
- **id** (bytes): sha1 identifier | |||||
- **indexer_configuration_id** (int): tool used to compute | |||||
the results | |||||
Yields: | |||||
missing sha1s | |||||
""" | |||||
yield from self._content_metadata.missing(metadata) | |||||
def content_metadata_get(self, ids): | |||||
"""Retrieve metadata per id. | |||||
Args: | |||||
ids (iterable): sha1 checksums | |||||
Yields: | |||||
dictionaries with the following keys: | |||||
- **id** (bytes) | |||||
- **metadata** (str): associated metadata | |||||
- **tool** (dict): tool used to compute metadata | |||||
""" | |||||
yield from self._content_metadata.get(ids) | |||||
def content_metadata_add(self, metadata, conflict_update=False): | |||||
"""Add metadata not present in storage. | |||||
Args: | |||||
metadata (iterable): dictionaries with keys: | |||||
- **id**: sha1 | |||||
- **metadata**: arbitrary dict | |||||
- **indexer_configuration_id**: tool used to compute the | |||||
results | |||||
conflict_update: Flag to determine if we want to overwrite (true) | |||||
or skip duplicates (false, the default) | |||||
""" | |||||
if not all(isinstance(x['id'], bytes) for x in metadata): | |||||
raise TypeError('identifiers must be bytes.') | |||||
self._content_metadata.add(metadata, conflict_update) | |||||
def revision_intrinsic_metadata_missing(self, metadata): | def revision_intrinsic_metadata_missing(self, metadata): | ||||
"""List metadata missing from storage. | """List metadata missing from storage. | ||||
Args: | Args: | ||||
metadata (iterable): dictionaries with keys: | metadata (iterable): dictionaries with keys: | ||||
- **id** (bytes): sha1_git revision identifier | - **id** (bytes): sha1_git revision identifier | ||||
- **indexer_configuration_id** (int): tool used to compute | - **indexer_configuration_id** (int): tool used to compute | ||||
▲ Show 20 Lines • Show All 288 Lines • Show Last 20 Lines |