Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/storage/in_memory.py
Show First 20 Lines • Show All 187 Lines • ▼ Show 20 Lines | ) -> PagedResult[Sha1]: | ||||
if counter >= limit: | if counter >= limit: | ||||
next_page_token = hash_to_hex(sha1) | next_page_token = hash_to_hex(sha1) | ||||
break | break | ||||
ids.append(sha1) | ids.append(sha1) | ||||
assert len(ids) <= limit | assert len(ids) <= limit | ||||
return PagedResult(results=ids, next_page_token=next_page_token) | return PagedResult(results=ids, next_page_token=next_page_token) | ||||
def add(self, data: Iterable[TValue], conflict_update: bool) -> int: | def add(self, data: Iterable[TValue]) -> int: | ||||
"""Add data not present in storage. | """Add data not present in storage. | ||||
Args: | Args: | ||||
data (iterable): dictionaries with keys: | data (iterable): dictionaries with keys: | ||||
- **id**: sha1 | - **id**: sha1 | ||||
- **indexer_configuration_id**: tool used to compute the | - **indexer_configuration_id**: tool used to compute the | ||||
results | results | ||||
- arbitrary data | - arbitrary data | ||||
conflict_update (bool): Flag to determine if we want to overwrite | |||||
(true) or skip duplicates (false) | |||||
""" | """ | ||||
data = list(data) | data = list(data) | ||||
check_id_duplicates(data) | check_id_duplicates(data) | ||||
count = 0 | count = 0 | ||||
for obj in data: | for obj in data: | ||||
item = obj.to_dict() | item = obj.to_dict() | ||||
id_ = item.pop("id") | id_ = item.pop("id") | ||||
tool_id = item["indexer_configuration_id"] | tool_id = item["indexer_configuration_id"] | ||||
key = _key_from_dict(obj.unique_key()) | key = _key_from_dict(obj.unique_key()) | ||||
if not conflict_update and key in self._data[id_]: | |||||
# Duplicate, should not be updated | |||||
continue | |||||
self._data[id_][key] = item | self._data[id_][key] = item | ||||
self._tools_per_id[id_].add(tool_id) | self._tools_per_id[id_].add(tool_id) | ||||
count += 1 | count += 1 | ||||
if id_ not in self._sorted_ids: | if id_ not in self._sorted_ids: | ||||
self._sorted_ids.add(id_) | self._sorted_ids.add(id_) | ||||
return count | return count | ||||
Show All 30 Lines | def content_mimetype_get_partition( | ||||
page_token: Optional[str] = None, | page_token: Optional[str] = None, | ||||
limit: int = 1000, | limit: int = 1000, | ||||
) -> PagedResult[Sha1]: | ) -> PagedResult[Sha1]: | ||||
return self._mimetypes.get_partition( | return self._mimetypes.get_partition( | ||||
indexer_configuration_id, partition_id, nb_partitions, page_token, limit | indexer_configuration_id, partition_id, nb_partitions, page_token, limit | ||||
) | ) | ||||
def content_mimetype_add( | def content_mimetype_add( | ||||
self, mimetypes: List[ContentMimetypeRow], conflict_update: bool = False | self, mimetypes: List[ContentMimetypeRow] | ||||
) -> Dict[str, int]: | ) -> Dict[str, int]: | ||||
added = self._mimetypes.add(mimetypes, conflict_update) | added = self._mimetypes.add(mimetypes) | ||||
return {"content_mimetype:add": added} | return {"content_mimetype:add": added} | ||||
def content_mimetype_get(self, ids: Iterable[Sha1]) -> List[ContentMimetypeRow]: | def content_mimetype_get(self, ids: Iterable[Sha1]) -> List[ContentMimetypeRow]: | ||||
return self._mimetypes.get(ids) | return self._mimetypes.get(ids) | ||||
def content_language_missing( | def content_language_missing( | ||||
self, languages: Iterable[Dict] | self, languages: Iterable[Dict] | ||||
) -> List[Tuple[Sha1, int]]: | ) -> List[Tuple[Sha1, int]]: | ||||
return self._languages.missing(languages) | return self._languages.missing(languages) | ||||
def content_language_get(self, ids: Iterable[Sha1]) -> List[ContentLanguageRow]: | def content_language_get(self, ids: Iterable[Sha1]) -> List[ContentLanguageRow]: | ||||
return self._languages.get(ids) | return self._languages.get(ids) | ||||
def content_language_add( | def content_language_add( | ||||
self, languages: List[ContentLanguageRow], conflict_update: bool = False | self, languages: List[ContentLanguageRow] | ||||
) -> Dict[str, int]: | ) -> Dict[str, int]: | ||||
added = self._languages.add(languages, conflict_update) | added = self._languages.add(languages) | ||||
return {"content_language:add": added} | return {"content_language:add": added} | ||||
def content_ctags_missing(self, ctags: Iterable[Dict]) -> List[Tuple[Sha1, int]]: | def content_ctags_missing(self, ctags: Iterable[Dict]) -> List[Tuple[Sha1, int]]: | ||||
return self._content_ctags.missing(ctags) | return self._content_ctags.missing(ctags) | ||||
def content_ctags_get(self, ids: Iterable[Sha1]) -> List[ContentCtagsRow]: | def content_ctags_get(self, ids: Iterable[Sha1]) -> List[ContentCtagsRow]: | ||||
return self._content_ctags.get(ids) | return self._content_ctags.get(ids) | ||||
def content_ctags_add( | def content_ctags_add(self, ctags: List[ContentCtagsRow]) -> Dict[str, int]: | ||||
self, ctags: List[ContentCtagsRow], conflict_update: bool = False | added = self._content_ctags.add(ctags) | ||||
) -> Dict[str, int]: | |||||
added = self._content_ctags.add(ctags, conflict_update,) | |||||
return {"content_ctags:add": added} | return {"content_ctags:add": added} | ||||
def content_ctags_search( | def content_ctags_search( | ||||
self, expression: str, limit: int = 10, last_sha1: Optional[Sha1] = None | self, expression: str, limit: int = 10, last_sha1: Optional[Sha1] = None | ||||
) -> List[ContentCtagsRow]: | ) -> List[ContentCtagsRow]: | ||||
nb_matches = 0 | nb_matches = 0 | ||||
items_per_id: Dict[Tuple[Sha1Git, ToolId], List[ContentCtagsRow]] = {} | items_per_id: Dict[Tuple[Sha1Git, ToolId], List[ContentCtagsRow]] = {} | ||||
for item in sorted(self._content_ctags.get_all()): | for item in sorted(self._content_ctags.get_all()): | ||||
Show All 16 Lines | ) -> List[ContentCtagsRow]: | ||||
return results | return results | ||||
def content_fossology_license_get( | def content_fossology_license_get( | ||||
self, ids: Iterable[Sha1] | self, ids: Iterable[Sha1] | ||||
) -> List[ContentLicenseRow]: | ) -> List[ContentLicenseRow]: | ||||
return self._licenses.get(ids) | return self._licenses.get(ids) | ||||
def content_fossology_license_add( | def content_fossology_license_add( | ||||
self, licenses: List[ContentLicenseRow], conflict_update: bool = False | self, licenses: List[ContentLicenseRow] | ||||
) -> Dict[str, int]: | ) -> Dict[str, int]: | ||||
added = self._licenses.add(licenses, conflict_update) | added = self._licenses.add(licenses) | ||||
return {"content_fossology_license:add": added} | return {"content_fossology_license:add": added} | ||||
def content_fossology_license_get_partition( | def content_fossology_license_get_partition( | ||||
self, | self, | ||||
indexer_configuration_id: int, | indexer_configuration_id: int, | ||||
partition_id: int, | partition_id: int, | ||||
nb_partitions: int, | nb_partitions: int, | ||||
page_token: Optional[str] = None, | page_token: Optional[str] = None, | ||||
limit: int = 1000, | limit: int = 1000, | ||||
) -> PagedResult[Sha1]: | ) -> PagedResult[Sha1]: | ||||
return self._licenses.get_partition( | return self._licenses.get_partition( | ||||
indexer_configuration_id, partition_id, nb_partitions, page_token, limit | indexer_configuration_id, partition_id, nb_partitions, page_token, limit | ||||
) | ) | ||||
def content_metadata_missing( | def content_metadata_missing( | ||||
self, metadata: Iterable[Dict] | self, metadata: Iterable[Dict] | ||||
) -> List[Tuple[Sha1, int]]: | ) -> List[Tuple[Sha1, int]]: | ||||
return self._content_metadata.missing(metadata) | return self._content_metadata.missing(metadata) | ||||
def content_metadata_get(self, ids: Iterable[Sha1]) -> List[ContentMetadataRow]: | def content_metadata_get(self, ids: Iterable[Sha1]) -> List[ContentMetadataRow]: | ||||
return self._content_metadata.get(ids) | return self._content_metadata.get(ids) | ||||
def content_metadata_add( | def content_metadata_add( | ||||
self, metadata: List[ContentMetadataRow], conflict_update: bool = False | self, metadata: List[ContentMetadataRow] | ||||
) -> Dict[str, int]: | ) -> Dict[str, int]: | ||||
added = self._content_metadata.add(metadata, conflict_update) | added = self._content_metadata.add(metadata) | ||||
return {"content_metadata:add": added} | return {"content_metadata:add": added} | ||||
def revision_intrinsic_metadata_missing( | def revision_intrinsic_metadata_missing( | ||||
self, metadata: Iterable[Dict] | self, metadata: Iterable[Dict] | ||||
) -> List[Tuple[Sha1, int]]: | ) -> List[Tuple[Sha1, int]]: | ||||
return self._revision_intrinsic_metadata.missing(metadata) | return self._revision_intrinsic_metadata.missing(metadata) | ||||
def revision_intrinsic_metadata_get( | def revision_intrinsic_metadata_get( | ||||
self, ids: Iterable[Sha1] | self, ids: Iterable[Sha1] | ||||
) -> List[RevisionIntrinsicMetadataRow]: | ) -> List[RevisionIntrinsicMetadataRow]: | ||||
return self._revision_intrinsic_metadata.get(ids) | return self._revision_intrinsic_metadata.get(ids) | ||||
def revision_intrinsic_metadata_add( | def revision_intrinsic_metadata_add( | ||||
self, | self, metadata: List[RevisionIntrinsicMetadataRow] | ||||
metadata: List[RevisionIntrinsicMetadataRow], | |||||
conflict_update: bool = False, | |||||
) -> Dict[str, int]: | ) -> Dict[str, int]: | ||||
added = self._revision_intrinsic_metadata.add(metadata, conflict_update) | added = self._revision_intrinsic_metadata.add(metadata) | ||||
return {"revision_intrinsic_metadata:add": added} | return {"revision_intrinsic_metadata:add": added} | ||||
def origin_intrinsic_metadata_get( | def origin_intrinsic_metadata_get( | ||||
self, urls: Iterable[str] | self, urls: Iterable[str] | ||||
) -> List[OriginIntrinsicMetadataRow]: | ) -> List[OriginIntrinsicMetadataRow]: | ||||
return self._origin_intrinsic_metadata.get(urls) | return self._origin_intrinsic_metadata.get(urls) | ||||
def origin_intrinsic_metadata_add( | def origin_intrinsic_metadata_add( | ||||
self, metadata: List[OriginIntrinsicMetadataRow], conflict_update: bool = False | self, metadata: List[OriginIntrinsicMetadataRow] | ||||
) -> Dict[str, int]: | ) -> Dict[str, int]: | ||||
added = self._origin_intrinsic_metadata.add(metadata, conflict_update) | added = self._origin_intrinsic_metadata.add(metadata) | ||||
return {"origin_intrinsic_metadata:add": added} | return {"origin_intrinsic_metadata:add": added} | ||||
def origin_intrinsic_metadata_search_fulltext( | def origin_intrinsic_metadata_search_fulltext( | ||||
self, conjunction: List[str], limit: int = 100 | self, conjunction: List[str], limit: int = 100 | ||||
) -> List[OriginIntrinsicMetadataRow]: | ) -> List[OriginIntrinsicMetadataRow]: | ||||
# A very crude fulltext search implementation, but that's enough | # A very crude fulltext search implementation, but that's enough | ||||
# to work on English metadata | # to work on English metadata | ||||
tokens_re = re.compile("[a-zA-Z0-9]+") | tokens_re = re.compile("[a-zA-Z0-9]+") | ||||
▲ Show 20 Lines • Show All 99 Lines • Show Last 20 Lines |