Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/storage/in_memory.py
# Copyright (C) 2018 The Software Heritage developers | # Copyright (C) 2018 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import bisect | import bisect | ||||
from collections import defaultdict, Counter | from collections import defaultdict, Counter | ||||
import itertools | import itertools | ||||
import json | import json | ||||
import operator | import operator | ||||
import math | import math | ||||
import re | import re | ||||
from swh.storage.journal_writer import get_journal_writer | |||||
from . import MAPPING_NAMES | from . import MAPPING_NAMES | ||||
SHA1_DIGEST_SIZE = 160 | SHA1_DIGEST_SIZE = 160 | ||||
def _transform_tool(tool): | def _transform_tool(tool): | ||||
return { | return { | ||||
'id': tool['id'], | 'id': tool['id'], | ||||
▲ Show 20 Lines • Show All 162 Lines • ▼ Show 20 Lines | def delete(self, entries): | ||||
self._tools_per_id[id_].remove(tool_id) | self._tools_per_id[id_].remove(tool_id) | ||||
if key in self._data: | if key in self._data: | ||||
del self._data[key] | del self._data[key] | ||||
class IndexerStorage: | class IndexerStorage: | ||||
"""In-memory SWH indexer storage.""" | """In-memory SWH indexer storage.""" | ||||
def __init__(self): | def __init__(self, journal_writer=None): | ||||
self._tools = {} | self._tools = {} | ||||
self._mimetypes = SubStorage(self._tools) | self._mimetypes = SubStorage(self._tools) | ||||
self._languages = SubStorage(self._tools) | self._languages = SubStorage(self._tools) | ||||
self._content_ctags = SubStorage(self._tools) | self._content_ctags = SubStorage(self._tools) | ||||
self._licenses = SubStorage(self._tools) | self._licenses = SubStorage(self._tools) | ||||
self._content_metadata = SubStorage(self._tools) | self._content_metadata = SubStorage(self._tools) | ||||
self._revision_intrinsic_metadata = SubStorage(self._tools) | self._revision_intrinsic_metadata = SubStorage(self._tools) | ||||
self._origin_intrinsic_metadata = SubStorage(self._tools) | self._origin_intrinsic_metadata = SubStorage(self._tools) | ||||
if journal_writer: | |||||
self.journal_writer = get_journal_writer(**journal_writer) | |||||
else: | |||||
self.journal_writer = None | |||||
def content_mimetype_missing(self, mimetypes): | def content_mimetype_missing(self, mimetypes): | ||||
"""Generate mimetypes missing from storage. | """Generate mimetypes missing from storage. | ||||
Args: | Args: | ||||
mimetypes (iterable): iterable of dict with keys: | mimetypes (iterable): iterable of dict with keys: | ||||
- **id** (bytes): sha1 identifier | - **id** (bytes): sha1 identifier | ||||
- **indexer_configuration_id** (int): tool used to compute the | - **indexer_configuration_id** (int): tool used to compute the | ||||
▲ Show 20 Lines • Show All 428 Lines • ▼ Show 20 Lines | def origin_intrinsic_metadata_add(self, metadata, | ||||
- **indexer_configuration_id**: tool used to compute metadata | - **indexer_configuration_id**: tool used to compute metadata | ||||
- **mappings** (List[str]): list of mappings used to translate | - **mappings** (List[str]): list of mappings used to translate | ||||
these metadata | these metadata | ||||
conflict_update: Flag to determine if we want to overwrite (true) | conflict_update: Flag to determine if we want to overwrite (true) | ||||
or skip duplicates (false, the default) | or skip duplicates (false, the default) | ||||
""" | """ | ||||
if self.journal_writer: | |||||
journal_metadata = [] | |||||
for item in metadata: | |||||
item = item.copy() | |||||
item['tool'] = _transform_tool( | |||||
self._tools[item.pop('indexer_configuration_id')]) | |||||
journal_metadata.append(item) | |||||
self.journal_writer.write_additions( | |||||
'origin_intrinsic_metadata', journal_metadata) | |||||
self._origin_intrinsic_metadata.add(metadata, conflict_update) | self._origin_intrinsic_metadata.add(metadata, conflict_update) | ||||
def origin_intrinsic_metadata_delete(self, entries): | def origin_intrinsic_metadata_delete(self, entries): | ||||
"""Remove origin metadata from the storage. | """Remove origin metadata from the storage. | ||||
Args: | Args: | ||||
entries (dict): dictionaries with the following keys: | entries (dict): dictionaries with the following keys: | ||||
- **id** (int): origin identifier | - **id** (int): origin identifier | ||||
▲ Show 20 Lines • Show All 183 Lines • Show Last 20 Lines |