Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/storage/in_memory.py
Show First 20 Lines • Show All 700 Lines • ▼ Show 20 Lines | def origin_intrinsic_metadata_search_fulltext( | ||||
for data in self._origin_intrinsic_metadata.get_all()] | for data in self._origin_intrinsic_metadata.get_all()] | ||||
results = [(rank_, data) for (rank_, data) in results if rank_ > 0] | results = [(rank_, data) for (rank_, data) in results if rank_ > 0] | ||||
results.sort(key=operator.itemgetter(0), # Don't try to order 'data' | results.sort(key=operator.itemgetter(0), # Don't try to order 'data' | ||||
reverse=True) | reverse=True) | ||||
for (rank_, result) in results[:limit]: | for (rank_, result) in results[:limit]: | ||||
yield result | yield result | ||||
def origin_intrinsic_metadata_search_by_producer( | def origin_intrinsic_metadata_search_by_producer( | ||||
self, start='', end=None, limit=100, ids_only=False, | self, page_token='', limit=100, ids_only=False, | ||||
mappings=None, tool_ids=None, | mappings=None, tool_ids=None, | ||||
db=None, cur=None): | db=None, cur=None): | ||||
"""Returns the list of origins whose metadata contain all the terms. | """Returns the list of origins whose metadata contain all the terms. | ||||
Args: | Args: | ||||
start (str): The minimum origin url to return | page_token (str): Opaque token used for pagination. | ||||
end (str): The maximum origin url to return | |||||
limit (int): The maximum number of results to return | limit (int): The maximum number of results to return | ||||
ids_only (bool): Determines whether only origin ids are returned | ids_only (bool): Determines whether only origin ids are returned | ||||
or the content as well | or the content as well | ||||
mappings (List[str]): Returns origins whose intrinsic metadata | mappings (List[str]): Returns origins whose intrinsic metadata | ||||
were generated using at least one of these mappings. | were generated using at least one of these mappings. | ||||
Yields: | Returns: | ||||
list: list of origin ids (int) if `ids_only=True`, else | dict: dict with the following keys: | ||||
dictionaries with the following keys: | - **next_page_token** (str, optional): opaque token to be used as | ||||
`page_token` for retrieveing the next page. | |||||
- **origins** (list): list of origin url (str) if `ids_only=True` | |||||
else dictionaries with the following keys: | |||||
- **id** (str): origin url | - **id** (str): origin urls | ||||
- **from_revision**: sha1 id of the revision used to generate | - **from_revision**: sha1 id of the revision used to generate | ||||
these metadata. | these metadata. | ||||
- **metadata** (str): associated metadata | - **metadata** (str): associated metadata | ||||
- **tool** (dict): tool used to compute metadata | - **tool** (dict): tool used to compute metadata | ||||
- **mappings** (List[str]): list of mappings used to translate | - **mappings** (List[str]): list of mappings used to translate | ||||
these metadata | these metadata | ||||
""" | """ | ||||
assert isinstance(page_token, str) | |||||
nb_results = 0 | nb_results = 0 | ||||
if mappings is not None: | if mappings is not None: | ||||
mappings = frozenset(mappings) | mappings = frozenset(mappings) | ||||
if tool_ids is not None: | if tool_ids is not None: | ||||
tool_ids = frozenset(tool_ids) | tool_ids = frozenset(tool_ids) | ||||
origins = [] | |||||
# we go to limit+1 to check wether we should add next_page_token in | |||||
# the response | |||||
for entry in self._origin_intrinsic_metadata.get_all(): | for entry in self._origin_intrinsic_metadata.get_all(): | ||||
if entry['id'] < start or (end and entry['id'] > end): | if entry['id'] <= page_token: | ||||
continue | continue | ||||
if nb_results >= limit: | if nb_results >= (limit + 1): | ||||
return | break | ||||
if mappings is not None and mappings.isdisjoint(entry['mappings']): | if mappings is not None and mappings.isdisjoint(entry['mappings']): | ||||
continue | continue | ||||
if tool_ids is not None and entry['tool']['id'] not in tool_ids: | if tool_ids is not None and entry['tool']['id'] not in tool_ids: | ||||
continue | continue | ||||
if ids_only: | origins.append(entry) | ||||
yield entry['id'] | |||||
else: | |||||
yield entry | |||||
nb_results += 1 | nb_results += 1 | ||||
result = {} | |||||
if len(origins) > limit: | |||||
origins = origins[:limit] | |||||
result['next_page_token'] = origins[-1]['id'] | |||||
if ids_only: | |||||
origins = [origin['id'] for origin in origins] | |||||
result['origins'] = origins | |||||
return result | |||||
def origin_intrinsic_metadata_stats(self): | def origin_intrinsic_metadata_stats(self): | ||||
"""Returns statistics on stored intrinsic metadata. | """Returns statistics on stored intrinsic metadata. | ||||
Returns: | Returns: | ||||
dict: dictionary with keys: | dict: dictionary with keys: | ||||
- total (int): total number of origins that were indexed | - total (int): total number of origins that were indexed | ||||
(possibly yielding an empty metadata dictionary) | (possibly yielding an empty metadata dictionary) | ||||
▲ Show 20 Lines • Show All 69 Lines • Show Last 20 Lines |