Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/storage/__init__.py
Show First 20 Lines • Show All 775 Lines • ▼ Show 20 Lines | def origin_intrinsic_metadata_search_fulltext( | ||||
""" | """ | ||||
for c in db.origin_intrinsic_metadata_search_fulltext( | for c in db.origin_intrinsic_metadata_search_fulltext( | ||||
conjunction, limit=limit, cur=cur): | conjunction, limit=limit, cur=cur): | ||||
yield converters.db_to_metadata( | yield converters.db_to_metadata( | ||||
dict(zip(db.origin_intrinsic_metadata_cols, c))) | dict(zip(db.origin_intrinsic_metadata_cols, c))) | ||||
@remote_api_endpoint('origin_intrinsic_metadata/search/by_producer') | @remote_api_endpoint('origin_intrinsic_metadata/search/by_producer') | ||||
@db_transaction_generator() | @db_transaction() | ||||
def origin_intrinsic_metadata_search_by_producer( | def origin_intrinsic_metadata_search_by_producer( | ||||
self, start='', end=None, limit=100, ids_only=False, | self, page_token='', limit=100, ids_only=False, | ||||
mappings=None, tool_ids=None, | mappings=None, tool_ids=None, | ||||
db=None, cur=None): | db=None, cur=None): | ||||
"""Returns the list of origins whose metadata contain all the terms. | """Returns the list of origins whose metadata contain all the terms. | ||||
Args: | Args: | ||||
start (str): The minimum origin url to return | page_token (str): Opaque token used for pagination. | ||||
end (str): The maximum origin url to return | |||||
limit (int): The maximum number of results to return | limit (int): The maximum number of results to return | ||||
ids_only (bool): Determines whether only origin urls are | ids_only (bool): Determines whether only origin urls are | ||||
returned or the content as well | returned or the content as well | ||||
mappings (List[str]): Returns origins whose intrinsic metadata | mappings (List[str]): Returns origins whose intrinsic metadata | ||||
were generated using at least one of these mappings. | were generated using at least one of these mappings. | ||||
Yields: | Returns: | ||||
list: list of origin ids (int) if `ids_only=True`, else | dict: dict with the following keys: | ||||
dictionaries with the following keys: | - **next_page_token** (str, optional): opaque token to be used as | ||||
`page_token` for retrieveing the next page. If absent, there is | |||||
no more pages to gather. | |||||
- **origins** (list): list of origin url (str) if `ids_only=True` | |||||
else dictionaries with the following keys: | |||||
- **id** (str): origin urls | - **id** (str): origin urls | ||||
- **from_revision**: sha1 id of the revision used to generate | - **from_revision**: sha1 id of the revision used to generate | ||||
these metadata. | these metadata. | ||||
- **metadata** (str): associated metadata | - **metadata** (str): associated metadata | ||||
- **tool** (dict): tool used to compute metadata | - **tool** (dict): tool used to compute metadata | ||||
- **mappings** (List[str]): list of mappings used to translate | - **mappings** (List[str]): list of mappings used to translate | ||||
these metadata | these metadata | ||||
""" | """ | ||||
if page_token is None: | |||||
page_token = '' | |||||
assert isinstance(page_token, str) | |||||
res = db.origin_intrinsic_metadata_search_by_producer( | res = db.origin_intrinsic_metadata_search_by_producer( | ||||
start, end, limit, ids_only, mappings, tool_ids, cur) | page_token, limit, ids_only, mappings, tool_ids, cur) | ||||
result = {} | |||||
if ids_only: | if ids_only: | ||||
for (origin,) in res: | result['origins'] = [origin for (origin,) in res] | ||||
yield origin | if result['origins']: | ||||
result['next_page_token'] = result['origins'][-1] | |||||
else: | else: | ||||
for c in res: | result['origins'] = [converters.db_to_metadata( | ||||
yield converters.db_to_metadata( | dict(zip(db.origin_intrinsic_metadata_cols, c)))for c in res] | ||||
dict(zip(db.origin_intrinsic_metadata_cols, c))) | if result['origins']: | ||||
result['next_page_token'] = result['origins'][-1]['id'] | |||||
return result | |||||
@remote_api_endpoint('origin_intrinsic_metadata/stats') | @remote_api_endpoint('origin_intrinsic_metadata/stats') | ||||
@db_transaction() | @db_transaction() | ||||
def origin_intrinsic_metadata_stats( | def origin_intrinsic_metadata_stats( | ||||
self, db=None, cur=None): | self, db=None, cur=None): | ||||
"""Returns counts of indexed metadata per origins, broken down | """Returns counts of indexed metadata per origins, broken down | ||||
into metadata types. | into metadata types. | ||||
▲ Show 20 Lines • Show All 95 Lines • Show Last 20 Lines |