Changeset View
Changeset View
Standalone View
Standalone View
swh/web/common/service.py
Show All 17 Lines | from swh.model.identifiers import ( | ||||
CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT | CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT | ||||
) | ) | ||||
from swh.web.common import converters | from swh.web.common import converters | ||||
from swh.web.common import query | from swh.web.common import query | ||||
from swh.web.common.exc import BadInputExc, NotFoundExc | from swh.web.common.exc import BadInputExc, NotFoundExc | ||||
from swh.web.common.origin_visits import get_origin_visit | from swh.web.common.origin_visits import get_origin_visit | ||||
from swh.web import config | from swh.web import config | ||||
search = config.search() | |||||
storage = config.storage() | storage = config.storage() | ||||
vault = config.vault() | vault = config.vault() | ||||
idx_storage = config.indexer_storage() | idx_storage = config.indexer_storage() | ||||
MAX_LIMIT = 50 # Top limit the users can ask for | MAX_LIMIT = 50 # Top limit the users can ask for | ||||
▲ Show 20 Lines • Show All 221 Lines • ▼ Show 20 Lines | Args: | ||||
url_pattern: the string pattern to search for in origin urls | url_pattern: the string pattern to search for in origin urls | ||||
limit: the maximum number of found origins to return | limit: the maximum number of found origins to return | ||||
page_token: opaque string used to get the next results of a search | page_token: opaque string used to get the next results of a search | ||||
Returns: | Returns: | ||||
list of origin information as dict. | list of origin information as dict. | ||||
""" | """ | ||||
if search: | |||||
results = search.origin_search(url_pattern=url_pattern, count=limit, | |||||
page_token=page_token, | |||||
with_visit=with_visit) | |||||
origins = list(map(converters.from_origin, results['results'])) | |||||
return (origins, results['next_page_token']) | |||||
else: | |||||
# Fallback to swh-storage if swh-search is not configured | |||||
offset = int(page_token) if page_token else 0 | offset = int(page_token) if page_token else 0 | ||||
regexp = True | regexp = True | ||||
search_words = [re.escape(word) for word in url_pattern.split()] | search_words = [re.escape(word) for word in url_pattern.split()] | ||||
if len(search_words) >= 7: | if len(search_words) >= 7: | ||||
url_pattern = '.*'.join(search_words) | url_pattern = '.*'.join(search_words) | ||||
else: | else: | ||||
pattern_parts = [] | pattern_parts = [] | ||||
for permut in itertools.permutations(search_words): | for permut in itertools.permutations(search_words): | ||||
pattern_parts.append('.*'.join(permut)) | pattern_parts.append('.*'.join(permut)) | ||||
url_pattern = '|'.join(pattern_parts) | url_pattern = '|'.join(pattern_parts) | ||||
origins = storage.origin_search(url_pattern, offset, limit, regexp, | origins = storage.origin_search(url_pattern, offset, limit, regexp, | ||||
with_visit) | with_visit) | ||||
origins = list(map(converters.from_origin, origins)) | origins = list(map(converters.from_origin, origins)) | ||||
if len(origins) >= limit: | if len(origins) >= limit: | ||||
page_token = str(offset + len(origins)) | page_token = str(offset + len(origins)) | ||||
else: | else: | ||||
page_token = None | page_token = None | ||||
return (origins, page_token) | return (origins, page_token) | ||||
def search_origin_metadata(fulltext, limit=50): | def search_origin_metadata(fulltext, limit=50): | ||||
"""Search for origins whose metadata match a provided string pattern. | """Search for origins whose metadata match a provided string pattern. | ||||
Args: | Args: | ||||
fulltext: the string pattern to search for in origin metadata | fulltext: the string pattern to search for in origin metadata | ||||
offset: number of found origins to skip before returning results | offset: number of found origins to skip before returning results | ||||
▲ Show 20 Lines • Show All 875 Lines • Show Last 20 Lines |