Changeset View
Changeset View
Standalone View
Standalone View
swh/web/common/archive.py
Show First 20 Lines • Show All 292 Lines • ▼ Show 20 Lines | return PagedResult( | ||||
next_page_token=page.next_page_token, | next_page_token=page.next_page_token, | ||||
) | ) | ||||
def search_origin( | def search_origin( | ||||
url_pattern: str, | url_pattern: str, | ||||
limit: int = 50, | limit: int = 50, | ||||
with_visit: bool = False, | with_visit: bool = False, | ||||
visit_types: Optional[List[str]] = None, | |||||
page_token: Optional[str] = None, | page_token: Optional[str] = None, | ||||
) -> Tuple[List[OriginInfo], Optional[str]]: | ) -> Tuple[List[OriginInfo], Optional[str]]: | ||||
"""Search for origins whose urls contain a provided string pattern | """Search for origins whose urls contain a provided string pattern | ||||
or match a provided regular expression. | or match a provided regular expression. | ||||
Args: | Args: | ||||
url_pattern: the string pattern to search for in origin urls | url_pattern: the string pattern to search for in origin urls | ||||
limit: the maximum number of found origins to return | limit: the maximum number of found origins to return | ||||
with_visit: Whether origins with no visit are to be filtered out | |||||
visit_types: Only origins having any of the provided visit types | |||||
(e.g. git, svn, pypi) will be returned | |||||
page_token: opaque string used to get the next results of a search | page_token: opaque string used to get the next results of a search | ||||
Returns: | Returns: | ||||
list of origin information as dict. | list of origin information as dict. | ||||
""" | """ | ||||
if page_token: | if page_token: | ||||
assert isinstance(page_token, str) | assert isinstance(page_token, str) | ||||
if search: | if search: | ||||
page_result = search.origin_search( | page_result = search.origin_search( | ||||
url_pattern=url_pattern, | url_pattern=url_pattern, | ||||
page_token=page_token, | page_token=page_token, | ||||
with_visit=with_visit, | with_visit=with_visit, | ||||
visit_types=visit_types, | |||||
limit=limit, | limit=limit, | ||||
) | ) | ||||
origins = [converters.from_origin(ori_dict) for ori_dict in page_result.results] | origins = [converters.from_origin(ori_dict) for ori_dict in page_result.results] | ||||
else: | else: | ||||
# Fallback to swh-storage if swh-search is not configured | # Fallback to swh-storage if swh-search is not configured | ||||
search_words = [re.escape(word) for word in url_pattern.split()] | search_words = [re.escape(word) for word in url_pattern.split()] | ||||
if len(search_words) >= 7: | if len(search_words) >= 7: | ||||
url_pattern = ".*".join(search_words) | url_pattern = ".*".join(search_words) | ||||
else: | else: | ||||
pattern_parts = [] | pattern_parts = [] | ||||
for permut in itertools.permutations(search_words): | for permut in itertools.permutations(search_words): | ||||
pattern_parts.append(".*".join(permut)) | pattern_parts.append(".*".join(permut)) | ||||
url_pattern = "|".join(pattern_parts) | url_pattern = "|".join(pattern_parts) | ||||
page_result = storage.origin_search( | page_result = storage.origin_search( | ||||
url_pattern, | url_pattern, | ||||
page_token=page_token, | page_token=page_token, | ||||
with_visit=with_visit, | with_visit=with_visit, | ||||
limit=limit, | limit=limit, | ||||
visit_types=visit_types, | |||||
regexp=True, | regexp=True, | ||||
) | ) | ||||
origins = [converters.from_origin(ori.to_dict()) for ori in page_result.results] | origins = [converters.from_origin(ori.to_dict()) for ori in page_result.results] | ||||
return (origins, page_result.next_page_token) | return (origins, page_result.next_page_token) | ||||
def search_origin_metadata( | def search_origin_metadata( | ||||
▲ Show 20 Lines • Show All 1,039 Lines • Show Last 20 Lines |