Changeset View
Changeset View
Standalone View
Standalone View
swh/web/common/service.py
# Copyright (C) 2015-2019 The Software Heritage developers | # Copyright (C) 2015-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU Affero General Public License version 3, or any later version | # License: GNU Affero General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import itertools | |||||
import os | import os | ||||
import re | |||||
from collections import defaultdict | from collections import defaultdict | ||||
from typing import Any, Dict | from typing import Any, Dict | ||||
from swh.model import hashutil | from swh.model import hashutil | ||||
from swh.storage.algos import diff, revisions_walker | from swh.storage.algos import diff, revisions_walker | ||||
▲ Show 20 Lines • Show All 239 Lines • ▼ Show 20 Lines | Args: | ||||
url_pattern: the string pattern to search for in origin urls | url_pattern: the string pattern to search for in origin urls | ||||
offset: number of found origins to skip before returning results | offset: number of found origins to skip before returning results | ||||
limit: the maximum number of found origins to return | limit: the maximum number of found origins to return | ||||
Returns: | Returns: | ||||
list of origin information as dict. | list of origin information as dict. | ||||
""" | """ | ||||
if not regexp: | |||||
# If the query is not a regexp, rewrite it as a regexp. | |||||
regexp = True | |||||
search_words = [re.escape(word) for word in url_pattern.split()] | |||||
if len(search_words) >= 7: | |||||
url_pattern = '.*'.join(search_words) | |||||
else: | |||||
pattern_parts = [] | |||||
for permut in itertools.permutations(search_words): | |||||
pattern_parts.append('.*'.join(permut)) | |||||
url_pattern = '|'.join(pattern_parts) | |||||
origins = storage.origin_search(url_pattern, offset, limit, regexp, | origins = storage.origin_search(url_pattern, offset, limit, regexp, | ||||
with_visit) | with_visit) | ||||
return map(converters.from_origin, origins) | return map(converters.from_origin, origins) | ||||
def search_origin_metadata(fulltext, limit=50): | def search_origin_metadata(fulltext, limit=50): | ||||
"""Search for origins whose metadata match a provided string pattern. | """Search for origins whose metadata match a provided string pattern. | ||||
▲ Show 20 Lines • Show All 878 Lines • Show Last 20 Lines |