Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/storage/interface.py
# Copyright (C) 2015-2020 The Software Heritage developers | # Copyright (C) 2015-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from typing import Dict, List | from typing import Dict, List, Optional, TypeVar | ||||
from swh.core.api import remote_api_endpoint | from swh.core.api import remote_api_endpoint | ||||
from swh.core.api.classes import PagedResult as CorePagedResult | |||||
TResult = TypeVar("TResult") | |||||
PagedResult = CorePagedResult[TResult, str] | |||||
Sha1 = bytes | |||||
class IndexerStorageInterface: | class IndexerStorageInterface: | ||||
@remote_api_endpoint("check_config") | @remote_api_endpoint("check_config") | ||||
def check_config(self, *, check_write): | def check_config(self, *, check_write): | ||||
"""Check that the storage is configured and ready to go.""" | """Check that the storage is configured and ready to go.""" | ||||
... | ... | ||||
Show All 9 Lines | def content_mimetype_missing(self, mimetypes): | ||||
results | results | ||||
Yields: | Yields: | ||||
tuple (id, indexer_configuration_id): missing id | tuple (id, indexer_configuration_id): missing id | ||||
""" | """ | ||||
... | ... | ||||
def _content_get_range( | @remote_api_endpoint("content_mimetype/range") | ||||
def content_mimetype_get_partition( | |||||
self, | self, | ||||
content_type, | indexer_configuration_id: int, | ||||
start, | partition_id: int, | ||||
end, | nb_partitions: int, | ||||
indexer_configuration_id, | page_token: Optional[str] = None, | ||||
limit=1000, | limit: int = 1000, | ||||
with_textual_data=False, | ) -> PagedResult[Sha1]: | ||||
): | """Retrieve mimetypes within partition partition_id bound by limit. | ||||
"""Retrieve ids of type content_type within range [start, end] bound | |||||
by limit. | |||||
Args: | Args: | ||||
**content_type** (str): content's type (mimetype, language, etc...) | **indexer_configuration_id**: The tool used to index data | ||||
**start** (bytes): Starting identifier range (expected smaller | **partition_id**: index of the partition to fetch | ||||
than end) | **nb_partitions**: total number of partitions to split into | ||||
**end** (bytes): Ending identifier range (expected larger | **page_token**: opaque token used for pagination | ||||
than start) | **limit**: Limit result (default to 1000) | ||||
**indexer_configuration_id** (int): The tool used to index data | |||||
**limit** (int): Limit result (default to 1000) | |||||
**with_textual_data** (bool): Deal with only textual | |||||
content (True) or all | |||||
content (all contents by | |||||
defaults, False) | |||||
Raises: | Raises: | ||||
ValueError for; | IndexerStorageArgumentException for; | ||||
- limit to None | - limit to None | ||||
- wrong content_type provided | - wrong indexer_type provided | ||||
ardumont: doc fix. | |||||
Returns: | |||||
a dict with keys: | |||||
- **ids** [bytes]: iterable of content ids within the range. | |||||
- **next** (Optional[bytes]): The next range of sha1 starts at | |||||
this sha1 if any | |||||
""" | |||||
... | |||||
@remote_api_endpoint("content_mimetype/range") | |||||
def content_mimetype_get_range( | |||||
self, start, end, indexer_configuration_id, limit=1000 | |||||
): | |||||
"""Retrieve mimetypes within range [start, end] bound by limit. | |||||
Args: | |||||
**start** (bytes): Starting identifier range (expected smaller | |||||
than end) | |||||
**end** (bytes): Ending identifier range (expected larger | |||||
than start) | |||||
**indexer_configuration_id** (int): The tool used to index data | |||||
**limit** (int): Limit result (default to 1000) | |||||
Raises: | |||||
ValueError for limit to None | |||||
Returns: | Returns: | ||||
a dict with keys: | PagedResult of Sha1. If next_page_token is None, there is no more data | ||||
- **ids** [bytes]: iterable of content ids within the range. | to fetch | ||||
- **next** (Optional[bytes]): The next range of sha1 starts at | |||||
this sha1 if any | |||||
""" | """ | ||||
... | ... | ||||
@remote_api_endpoint("content_mimetype/add") | @remote_api_endpoint("content_mimetype/add") | ||||
def content_mimetype_add( | def content_mimetype_add( | ||||
self, mimetypes: List[Dict], conflict_update: bool = False | self, mimetypes: List[Dict], conflict_update: bool = False | ||||
) -> Dict[str, int]: | ) -> Dict[str, int]: | ||||
▲ Show 20 Lines • Show All 199 Lines • ▼ Show 20 Lines | ) -> Dict[str, int]: | ||||
Returns: | Returns: | ||||
Dict summary of number of rows added | Dict summary of number of rows added | ||||
""" | """ | ||||
... | ... | ||||
@remote_api_endpoint("content/fossology_license/range") | @remote_api_endpoint("content/fossology_license/range") | ||||
def content_fossology_license_get_range( | def content_fossology_license_get_partition( | ||||
self, start, end, indexer_configuration_id, limit=1000 | self, | ||||
): | indexer_configuration_id: int, | ||||
"""Retrieve licenses within range [start, end] bound by limit. | partition_id: int, | ||||
nb_partitions: int, | |||||
page_token: Optional[str] = None, | |||||
limit: int = 1000, | |||||
) -> PagedResult[Sha1]: | |||||
"""Retrieve licenses within the partition partition_id bound by limit. | |||||
Done Inline Actionsdoc fix. ardumont: doc fix. | |||||
Args: | Args: | ||||
**start** (bytes): Starting identifier range (expected smaller | **indexer_configuration_id**: The tool used to index data | ||||
than end) | **partition_id**: index of the partition to fetch | ||||
**end** (bytes): Ending identifier range (expected larger | **nb_partitions**: total number of partitions to split into | ||||
than start) | **page_token**: opaque token used for pagination | ||||
**indexer_configuration_id** (int): The tool used to index data | **limit**: Limit result (default to 1000) | ||||
**limit** (int): Limit result (default to 1000) | |||||
Raises: | Raises: | ||||
ValueError for limit to None | IndexerStorageArgumentException for; | ||||
- limit to None | |||||
- wrong indexer_type provided | |||||
Returns: | Returns: PagedResult of Sha1. If next_page_token is None, there is no more data | ||||
a dict with keys: | to fetch | ||||
Done Inline Actionsline. ardumont: line. | |||||
- **ids** [bytes]: iterable of content ids within the range. | |||||
- **next** (Optional[bytes]): The next range of sha1 starts at | |||||
this sha1 if any | |||||
""" | """ | ||||
... | ... | ||||
@remote_api_endpoint("content_metadata/missing") | @remote_api_endpoint("content_metadata/missing") | ||||
def content_metadata_missing(self, metadata): | def content_metadata_missing(self, metadata): | ||||
"""List metadata missing from storage. | """List metadata missing from storage. | ||||
▲ Show 20 Lines • Show All 303 Lines • Show Last 20 Lines |
doc fix.