Changeset View
Changeset View
Standalone View
Standalone View
swh/web/common/service.py
Show First 20 Lines • Show All 241 Lines • ▼ Show 20 Lines | def lookup_origins(origin_from=1, origin_count=100): | ||||
Yields: | Yields: | ||||
origins information as dicts | origins information as dicts | ||||
""" | """ | ||||
origins = storage.origin_get_range(origin_from, origin_count) | origins = storage.origin_get_range(origin_from, origin_count) | ||||
return map(converters.from_origin, origins) | return map(converters.from_origin, origins) | ||||
def search_origin(url_pattern, offset=0, limit=50, with_visit=False): | def search_origin(url_pattern, limit=50, with_visit=False, page_token=None): | ||||
"""Search for origins whose urls contain a provided string pattern | """Search for origins whose urls contain a provided string pattern | ||||
or match a provided regular expression. | or match a provided regular expression. | ||||
Args: | Args: | ||||
url_pattern: the string pattern to search for in origin urls | url_pattern: the string pattern to search for in origin urls | ||||
offset: number of found origins to skip before returning results | |||||
limit: the maximum number of found origins to return | limit: the maximum number of found origins to return | ||||
page_token: opaque string used to get the next results of a search | |||||
Returns: | Returns: | ||||
list of origin information as dict. | list of origin information as dict. | ||||
""" | """ | ||||
offset = int(page_token) if page_token else 0 | |||||
regexp = True | regexp = True | ||||
search_words = [re.escape(word) for word in url_pattern.split()] | search_words = [re.escape(word) for word in url_pattern.split()] | ||||
if len(search_words) >= 7: | if len(search_words) >= 7: | ||||
url_pattern = '.*'.join(search_words) | url_pattern = '.*'.join(search_words) | ||||
else: | else: | ||||
pattern_parts = [] | pattern_parts = [] | ||||
for permut in itertools.permutations(search_words): | for permut in itertools.permutations(search_words): | ||||
pattern_parts.append('.*'.join(permut)) | pattern_parts.append('.*'.join(permut)) | ||||
url_pattern = '|'.join(pattern_parts) | url_pattern = '|'.join(pattern_parts) | ||||
origins = storage.origin_search(url_pattern, offset, limit, regexp, | origins = storage.origin_search(url_pattern, offset, limit, regexp, | ||||
with_visit) | with_visit) | ||||
return map(converters.from_origin, origins) | origins = list(map(converters.from_origin, origins)) | ||||
if len(origins) >= limit: | |||||
page_token = str(offset + len(origins)) | |||||
else: | |||||
page_token = None | |||||
return (origins, page_token) | |||||
def search_origin_metadata(fulltext, limit=50): | def search_origin_metadata(fulltext, limit=50): | ||||
"""Search for origins whose metadata match a provided string pattern. | """Search for origins whose metadata match a provided string pattern. | ||||
Args: | Args: | ||||
fulltext: the string pattern to search for in origin metadata | fulltext: the string pattern to search for in origin metadata | ||||
offset: number of found origins to skip before returning results | offset: number of found origins to skip before returning results | ||||
▲ Show 20 Lines • Show All 875 Lines • Show Last 20 Lines |