Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/storage/in_memory.py
# Copyright (C) 2018-2020 The Software Heritage developers | # Copyright (C) 2018-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from collections import Counter, defaultdict | from collections import Counter, defaultdict | ||||
import itertools | import itertools | ||||
import json | import json | ||||
import math | import math | ||||
Show All 10 Lines | from typing import ( | ||||
Tuple, | Tuple, | ||||
Type, | Type, | ||||
TypeVar, | TypeVar, | ||||
Union, | Union, | ||||
) | ) | ||||
from swh.core.collections import SortedList | from swh.core.collections import SortedList | ||||
from swh.model.hashutil import hash_to_bytes, hash_to_hex | from swh.model.hashutil import hash_to_bytes, hash_to_hex | ||||
from swh.model.model import SHA1_SIZE, Sha1Git | from swh.model.model import SHA1_SIZE | ||||
from swh.storage.utils import get_partition_bounds_bytes | from swh.storage.utils import get_partition_bounds_bytes | ||||
from . import MAPPING_NAMES, check_id_duplicates | from . import MAPPING_NAMES, check_id_duplicates | ||||
from .exc import IndexerStorageArgumentException | from .exc import IndexerStorageArgumentException | ||||
from .interface import PagedResult, Sha1 | from .interface import PagedResult, Sha1 | ||||
from .model import ( | from .model import ( | ||||
BaseRow, | BaseRow, | ||||
ContentCtagsRow, | |||||
ContentLanguageRow, | ContentLanguageRow, | ||||
ContentLicenseRow, | ContentLicenseRow, | ||||
ContentMetadataRow, | ContentMetadataRow, | ||||
ContentMimetypeRow, | ContentMimetypeRow, | ||||
DirectoryIntrinsicMetadataRow, | DirectoryIntrinsicMetadataRow, | ||||
OriginExtrinsicMetadataRow, | OriginExtrinsicMetadataRow, | ||||
OriginIntrinsicMetadataRow, | OriginIntrinsicMetadataRow, | ||||
) | ) | ||||
▲ Show 20 Lines • Show All 198 Lines • ▼ Show 20 Lines | def __init__(self, journal_writer=None): | ||||
"version": tool["tool_version"], | "version": tool["tool_version"], | ||||
"configuration": tool["tool_configuration"], | "configuration": tool["tool_configuration"], | ||||
} | } | ||||
self.journal_writer = JournalWriter(tool_getter, journal_writer) | self.journal_writer = JournalWriter(tool_getter, journal_writer) | ||||
args = (self._tools, self.journal_writer) | args = (self._tools, self.journal_writer) | ||||
self._mimetypes = SubStorage(ContentMimetypeRow, *args) | self._mimetypes = SubStorage(ContentMimetypeRow, *args) | ||||
self._languages = SubStorage(ContentLanguageRow, *args) | self._languages = SubStorage(ContentLanguageRow, *args) | ||||
self._content_ctags = SubStorage(ContentCtagsRow, *args) | |||||
self._licenses = SubStorage(ContentLicenseRow, *args) | self._licenses = SubStorage(ContentLicenseRow, *args) | ||||
self._content_metadata = SubStorage(ContentMetadataRow, *args) | self._content_metadata = SubStorage(ContentMetadataRow, *args) | ||||
self._directory_intrinsic_metadata = SubStorage( | self._directory_intrinsic_metadata = SubStorage( | ||||
DirectoryIntrinsicMetadataRow, *args | DirectoryIntrinsicMetadataRow, *args | ||||
) | ) | ||||
self._origin_intrinsic_metadata = SubStorage(OriginIntrinsicMetadataRow, *args) | self._origin_intrinsic_metadata = SubStorage(OriginIntrinsicMetadataRow, *args) | ||||
self._origin_extrinsic_metadata = SubStorage(OriginExtrinsicMetadataRow, *args) | self._origin_extrinsic_metadata = SubStorage(OriginExtrinsicMetadataRow, *args) | ||||
Show All 35 Lines | def content_language_get(self, ids: Iterable[Sha1]) -> List[ContentLanguageRow]: | ||||
return self._languages.get(ids) | return self._languages.get(ids) | ||||
def content_language_add( | def content_language_add( | ||||
self, languages: List[ContentLanguageRow] | self, languages: List[ContentLanguageRow] | ||||
) -> Dict[str, int]: | ) -> Dict[str, int]: | ||||
added = self._languages.add(languages) | added = self._languages.add(languages) | ||||
return {"content_language:add": added} | return {"content_language:add": added} | ||||
def content_ctags_missing(self, ctags: Iterable[Dict]) -> List[Tuple[Sha1, int]]: | |||||
return self._content_ctags.missing(ctags) | |||||
def content_ctags_get(self, ids: Iterable[Sha1]) -> List[ContentCtagsRow]: | |||||
return self._content_ctags.get(ids) | |||||
def content_ctags_add(self, ctags: List[ContentCtagsRow]) -> Dict[str, int]: | |||||
added = self._content_ctags.add(ctags) | |||||
return {"content_ctags:add": added} | |||||
def content_ctags_search( | |||||
self, expression: str, limit: int = 10, last_sha1: Optional[Sha1] = None | |||||
) -> List[ContentCtagsRow]: | |||||
nb_matches = 0 | |||||
items_per_id: Dict[Tuple[Sha1Git, ToolId], List[ContentCtagsRow]] = {} | |||||
for item in sorted(self._content_ctags.get_all()): | |||||
if item.id <= (last_sha1 or bytes(0 for _ in range(SHA1_DIGEST_SIZE))): | |||||
continue | |||||
items_per_id.setdefault( | |||||
(item.id, item.indexer_configuration_id), [] | |||||
).append(item) | |||||
results = [] | |||||
for items in items_per_id.values(): | |||||
for item in items: | |||||
if item.name != expression: | |||||
continue | |||||
nb_matches += 1 | |||||
if nb_matches > limit: | |||||
break | |||||
results.append(item) | |||||
return results | |||||
def content_fossology_license_get( | def content_fossology_license_get( | ||||
self, ids: Iterable[Sha1] | self, ids: Iterable[Sha1] | ||||
) -> List[ContentLicenseRow]: | ) -> List[ContentLicenseRow]: | ||||
return self._licenses.get(ids) | return self._licenses.get(ids) | ||||
def content_fossology_license_add( | def content_fossology_license_add( | ||||
self, licenses: List[ContentLicenseRow] | self, licenses: List[ContentLicenseRow] | ||||
) -> Dict[str, int]: | ) -> Dict[str, int]: | ||||
▲ Show 20 Lines • Show All 175 Lines • Show Last 20 Lines |