Changeset View
Changeset View
Standalone View
Standalone View
swh/web/common/service.py
Show First 20 Lines • Show All 241 Lines • ▼ Show 20 Lines | def lookup_origins(origin_from=1, origin_count=100): | ||||
Yields: | Yields: | ||||
origins information as dicts | origins information as dicts | ||||
""" | """ | ||||
origins = storage.origin_get_range(origin_from, origin_count) | origins = storage.origin_get_range(origin_from, origin_count) | ||||
return map(converters.from_origin, origins) | return map(converters.from_origin, origins) | ||||
def search_origin(url_pattern, offset=0, limit=50, regexp=False, | def search_origin(url_pattern, offset=0, limit=50, with_visit=False): | ||||
with_visit=False): | |||||
"""Search for origins whose urls contain a provided string pattern | """Search for origins whose urls contain a provided string pattern | ||||
or match a provided regular expression. | or match a provided regular expression. | ||||
Args: | Args: | ||||
url_pattern: the string pattern to search for in origin urls | url_pattern: the string pattern to search for in origin urls | ||||
offset: number of found origins to skip before returning results | offset: number of found origins to skip before returning results | ||||
limit: the maximum number of found origins to return | limit: the maximum number of found origins to return | ||||
Returns: | Returns: | ||||
list of origin information as dict. | list of origin information as dict. | ||||
""" | """ | ||||
if not regexp: | |||||
# If the query is not a regexp, rewrite it as a regexp. | |||||
regexp = True | regexp = True | ||||
search_words = [re.escape(word) for word in url_pattern.split()] | search_words = [re.escape(word) for word in url_pattern.split()] | ||||
if len(search_words) >= 7: | if len(search_words) >= 7: | ||||
url_pattern = '.*'.join(search_words) | url_pattern = '.*'.join(search_words) | ||||
else: | else: | ||||
pattern_parts = [] | pattern_parts = [] | ||||
for permut in itertools.permutations(search_words): | for permut in itertools.permutations(search_words): | ||||
pattern_parts.append('.*'.join(permut)) | pattern_parts.append('.*'.join(permut)) | ||||
url_pattern = '|'.join(pattern_parts) | url_pattern = '|'.join(pattern_parts) | ||||
origins = storage.origin_search(url_pattern, offset, limit, regexp, | origins = storage.origin_search(url_pattern, offset, limit, regexp, | ||||
with_visit) | with_visit) | ||||
return map(converters.from_origin, origins) | return map(converters.from_origin, origins) | ||||
def search_origin_metadata(fulltext, limit=50): | def search_origin_metadata(fulltext, limit=50): | ||||
"""Search for origins whose metadata match a provided string pattern. | """Search for origins whose metadata match a provided string pattern. | ||||
▲ Show 20 Lines • Show All 879 Lines • Show Last 20 Lines |