Changeset View
Changeset View
Standalone View
Standalone View
swh/web/common/archive.py
Show First 20 Lines • Show All 291 Lines • ▼ Show 20 Lines | ) -> PagedResult[OriginInfo]: | ||||
return PagedResult( | return PagedResult( | ||||
[converters.from_origin(o.to_dict()) for o in page.results], | [converters.from_origin(o.to_dict()) for o in page.results], | ||||
next_page_token=page.next_page_token, | next_page_token=page.next_page_token, | ||||
) | ) | ||||
def search_origin( | def search_origin( | ||||
url_pattern: str, | url_pattern: str, | ||||
use_ql: bool = False, | |||||
limit: int = 50, | limit: int = 50, | ||||
with_visit: bool = False, | with_visit: bool = False, | ||||
visit_types: Optional[List[str]] = None, | visit_types: Optional[List[str]] = None, | ||||
page_token: Optional[str] = None, | page_token: Optional[str] = None, | ||||
) -> Tuple[List[OriginInfo], Optional[str]]: | ) -> Tuple[List[OriginInfo], Optional[str]]: | ||||
"""Search for origins whose urls contain a provided string pattern | """Search for origins whose urls contain a provided string pattern | ||||
or match a provided regular expression. | or match a provided regular expression. | ||||
Args: | Args: | ||||
url_pattern: the string pattern to search for in origin urls | url_pattern: the string pattern to search for in origin urls | ||||
use_ql: whether to use swh search query language or not | |||||
limit: the maximum number of found origins to return | limit: the maximum number of found origins to return | ||||
with_visit: Whether origins with no visit are to be filtered out | with_visit: Whether origins with no visit are to be filtered out | ||||
visit_types: Only origins having any of the provided visit types | visit_types: Only origins having any of the provided visit types | ||||
(e.g. git, svn, pypi) will be returned | (e.g. git, svn, pypi) will be returned | ||||
page_token: opaque string used to get the next results of a search | page_token: opaque string used to get the next results of a search | ||||
Returns: | Returns: | ||||
list of origin information as dict. | list of origin information as dict. | ||||
""" | """ | ||||
if page_token: | if page_token: | ||||
assert isinstance(page_token, str) | assert isinstance(page_token, str) | ||||
if search: | if search: | ||||
if config.get_config()["search_config"].get("enable_ql") and use_ql: | |||||
page_result = search.origin_search( | |||||
query=url_pattern, | |||||
page_token=page_token, | |||||
with_visit=with_visit, | |||||
visit_types=visit_types, | |||||
limit=limit, | |||||
) | |||||
else: | |||||
page_result = search.origin_search( | page_result = search.origin_search( | ||||
url_pattern=url_pattern, | url_pattern=url_pattern, | ||||
page_token=page_token, | page_token=page_token, | ||||
with_visit=with_visit, | with_visit=with_visit, | ||||
visit_types=visit_types, | visit_types=visit_types, | ||||
limit=limit, | limit=limit, | ||||
) | ) | ||||
origins = [converters.from_origin(ori_dict) for ori_dict in page_result.results] | origins = [converters.from_origin(ori_dict) for ori_dict in page_result.results] | ||||
else: | else: | ||||
# Fallback to swh-storage if swh-search is not configured | # Fallback to swh-storage if swh-search is not configured | ||||
search_words = [re.escape(word) for word in url_pattern.split()] | search_words = [re.escape(word) for word in url_pattern.split()] | ||||
if len(search_words) >= 7: | if len(search_words) >= 7: | ||||
url_pattern = ".*".join(search_words) | url_pattern = ".*".join(search_words) | ||||
else: | else: | ||||
pattern_parts = [] | pattern_parts = [] | ||||
Show All 23 Lines | Args: | ||||
fulltext: the string pattern to search for in origin metadata | fulltext: the string pattern to search for in origin metadata | ||||
limit: the maximum number of found origins to return | limit: the maximum number of found origins to return | ||||
Returns: | Returns: | ||||
Iterable of origin metadata information for existing origins | Iterable of origin metadata information for existing origins | ||||
""" | """ | ||||
results = [] | results = [] | ||||
if search and config.get_config()["metadata_search_backend"] == "swh-search": | if search and config.get_config()["search_config"]["backend"] == "swh-search": | ||||
page_result = search.origin_search(metadata_pattern=fulltext, limit=limit,) | page_result = search.origin_search(metadata_pattern=fulltext, limit=limit,) | ||||
matches = idx_storage.origin_intrinsic_metadata_get( | matches = idx_storage.origin_intrinsic_metadata_get( | ||||
[r["url"] for r in page_result.results] | [r["url"] for r in page_result.results] | ||||
) | ) | ||||
else: | else: | ||||
matches = idx_storage.origin_intrinsic_metadata_search_fulltext( | matches = idx_storage.origin_intrinsic_metadata_search_fulltext( | ||||
conjunction=[fulltext], limit=limit | conjunction=[fulltext], limit=limit | ||||
) | ) | ||||
▲ Show 20 Lines • Show All 1,019 Lines • Show Last 20 Lines |