Changeset View
Changeset View
Standalone View
Standalone View
swh/web/common/service.py
# Copyright (C) 2015-2020 The Software Heritage developers | # Copyright (C) 2015-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU Affero General Public License version 3, or any later version | # License: GNU Affero General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import itertools | import itertools | ||||
import os | import os | ||||
import re | import re | ||||
from collections import defaultdict | from collections import defaultdict | ||||
from typing import Any, Dict, List, Set | from typing import Any, Dict, List, Set, Iterator, Optional, Tuple | ||||
from swh.model import hashutil | from swh.model import hashutil | ||||
from swh.storage.algos import diff, revisions_walker | from swh.storage.algos import diff, revisions_walker | ||||
from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT | from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT | ||||
from swh.web import config | |||||
from swh.web.common import converters | from swh.web.common import converters | ||||
from swh.web.common import query | from swh.web.common import query | ||||
from swh.web.common.exc import BadInputExc, NotFoundExc | from swh.web.common.exc import BadInputExc, NotFoundExc | ||||
from swh.web.common.origin_visits import get_origin_visit | from swh.web.common.origin_visits import get_origin_visit | ||||
from swh.web import config | from swh.web.common.typing import OriginInfo, OriginVisitInfo | ||||
search = config.search() | search = config.search() | ||||
storage = config.storage() | storage = config.storage() | ||||
vault = config.vault() | vault = config.vault() | ||||
idx_storage = config.indexer_storage() | idx_storage = config.indexer_storage() | ||||
MAX_LIMIT = 50 # Top limit the users can ask for | MAX_LIMIT = 50 # Top limit the users can ask for | ||||
▲ Show 20 Lines • Show All 175 Lines • ▼ Show 20 Lines | if not sha1: | ||||
return None | return None | ||||
lic = _first_element(idx_storage.content_fossology_license_get([sha1])) | lic = _first_element(idx_storage.content_fossology_license_get([sha1])) | ||||
if not lic: | if not lic: | ||||
return None | return None | ||||
return converters.from_swh({"id": sha1, "facts": lic[sha1]}, hashess={"id"}) | return converters.from_swh({"id": sha1, "facts": lic[sha1]}, hashess={"id"}) | ||||
def lookup_origin(origin: Dict[str, str]) -> Dict[str, str]: | def lookup_origin(origin: OriginInfo) -> OriginInfo: | ||||
"""Return information about the origin matching dict origin. | """Return information about the origin matching dict origin. | ||||
Args: | Args: | ||||
origin: origin's dict with 'url' key | origin: origin's dict with 'url' key | ||||
Returns: | Returns: | ||||
origin information as dict. | origin information as dict. | ||||
Show All 14 Lines | for orig in origins: | ||||
if origin_info: | if origin_info: | ||||
break | break | ||||
if not origin_info: | if not origin_info: | ||||
msg = "Origin with url %s not found!" % origin["url"] | msg = "Origin with url %s not found!" % origin["url"] | ||||
raise NotFoundExc(msg) | raise NotFoundExc(msg) | ||||
return converters.from_origin(origin_info) | return converters.from_origin(origin_info) | ||||
def lookup_origins(origin_from=1, origin_count=100): | def lookup_origins( | ||||
origin_from: int = 1, origin_count: int = 100 | |||||
) -> Iterator[OriginInfo]: | |||||
"""Get list of archived software origins in a paginated way. | """Get list of archived software origins in a paginated way. | ||||
Origins are sorted by id before returning them | Origins are sorted by id before returning them | ||||
Args: | Args: | ||||
origin_from (int): The minimum id of the origins to return | origin_from (int): The minimum id of the origins to return | ||||
origin_count (int): The maximum number of origins to return | origin_count (int): The maximum number of origins to return | ||||
Yields: | Yields: | ||||
origins information as dicts | origins information as dicts | ||||
""" | """ | ||||
origins = storage.origin_get_range(origin_from, origin_count) | origins = storage.origin_get_range(origin_from, origin_count) | ||||
return map(converters.from_origin, origins) | return map(converters.from_origin, origins) | ||||
def search_origin(url_pattern, limit=50, with_visit=False, page_token=None): | def search_origin( | ||||
url_pattern: str, limit: int = 50, with_visit: bool = False, page_token: Any = None | |||||
) -> Tuple[List[OriginInfo], Any]: | |||||
"""Search for origins whose urls contain a provided string pattern | """Search for origins whose urls contain a provided string pattern | ||||
or match a provided regular expression. | or match a provided regular expression. | ||||
Args: | Args: | ||||
url_pattern: the string pattern to search for in origin urls | url_pattern: the string pattern to search for in origin urls | ||||
limit: the maximum number of found origins to return | limit: the maximum number of found origins to return | ||||
page_token: opaque string used to get the next results of a search | page_token: opaque string used to get the next results of a search | ||||
Show All 18 Lines | else: | ||||
if len(search_words) >= 7: | if len(search_words) >= 7: | ||||
url_pattern = ".*".join(search_words) | url_pattern = ".*".join(search_words) | ||||
else: | else: | ||||
pattern_parts = [] | pattern_parts = [] | ||||
for permut in itertools.permutations(search_words): | for permut in itertools.permutations(search_words): | ||||
pattern_parts.append(".*".join(permut)) | pattern_parts.append(".*".join(permut)) | ||||
url_pattern = "|".join(pattern_parts) | url_pattern = "|".join(pattern_parts) | ||||
origins = storage.origin_search(url_pattern, offset, limit, regexp, with_visit) | origins_raw = storage.origin_search( | ||||
origins = list(map(converters.from_origin, origins)) | url_pattern, offset, limit, regexp, with_visit | ||||
) | |||||
origins = list(map(converters.from_origin, origins_raw)) | |||||
if len(origins) >= limit: | if len(origins) >= limit: | ||||
page_token = str(offset + len(origins)) | page_token = str(offset + len(origins)) | ||||
else: | else: | ||||
page_token = None | page_token = None | ||||
return (origins, page_token) | return (origins, page_token) | ||||
def search_origin_metadata(fulltext, limit=50): | def search_origin_metadata(fulltext, limit=50): | ||||
▲ Show 20 Lines • Show All 179 Lines • ▼ Show 20 Lines | |||||
def lookup_revision_multiple(sha1_git_list): | def lookup_revision_multiple(sha1_git_list): | ||||
"""Return information about the revisions identified with | """Return information about the revisions identified with | ||||
their sha1_git identifiers. | their sha1_git identifiers. | ||||
Args: | Args: | ||||
sha1_git_list: A list of revision sha1_git identifiers | sha1_git_list: A list of revision sha1_git identifiers | ||||
Returns: | Returns: | ||||
Generator of revisions information as dict. | Iterator of revisions information as dict. | ||||
Raises: | Raises: | ||||
ValueError if the identifier provided is not of sha1 nature. | ValueError if the identifier provided is not of sha1 nature. | ||||
""" | """ | ||||
sha1_bin_list = (_to_sha1_bin(sha1_git) for sha1_git in sha1_git_list) | sha1_bin_list = (_to_sha1_bin(sha1_git) for sha1_git in sha1_git_list) | ||||
revisions = storage.revision_get(sha1_bin_list) or [] | revisions = storage.revision_get(sha1_bin_list) or [] | ||||
return (converters.from_revision(r) for r in revisions) | return (converters.from_revision(r) for r in revisions) | ||||
▲ Show 20 Lines • Show All 347 Lines • ▼ Show 20 Lines | def stat_counters(): | ||||
"""Return the stat counters for Software Heritage | """Return the stat counters for Software Heritage | ||||
Returns: | Returns: | ||||
A dict mapping textual labels to integer values. | A dict mapping textual labels to integer values. | ||||
""" | """ | ||||
return storage.stat_counters() | return storage.stat_counters() | ||||
def _lookup_origin_visits(origin_url, last_visit=None, limit=10): | def _lookup_origin_visits( | ||||
origin_url: str, last_visit: Optional[int] = None, limit: int = 10 | |||||
) -> Iterator[Dict[str, Any]]: | |||||
"""Yields the origin origins' visits. | """Yields the origin origins' visits. | ||||
Args: | Args: | ||||
origin_url (str): origin to list visits for | origin_url (str): origin to list visits for | ||||
last_visit (int): last visit to lookup from | last_visit (int): last visit to lookup from | ||||
limit (int): Number of elements max to display | limit (int): Number of elements max to display | ||||
Yields: | Yields: | ||||
Dictionaries of origin_visit for that origin | Dictionaries of origin_visit for that origin | ||||
""" | """ | ||||
limit = min(limit, MAX_LIMIT) | limit = min(limit, MAX_LIMIT) | ||||
for visit in storage.origin_visit_get( | for visit in storage.origin_visit_get( | ||||
origin_url, last_visit=last_visit, limit=limit | origin_url, last_visit=last_visit, limit=limit | ||||
): | ): | ||||
visit["origin"] = origin_url | visit["origin"] = origin_url | ||||
yield visit | yield visit | ||||
def lookup_origin_visits(origin, last_visit=None, per_page=10): | def lookup_origin_visits( | ||||
origin: str, last_visit: Optional[int] = None, per_page: int = 10 | |||||
) -> Iterator[OriginVisitInfo]: | |||||
"""Yields the origin origins' visits. | """Yields the origin origins' visits. | ||||
Args: | Args: | ||||
origin: origin to list visits for | origin: origin to list visits for | ||||
Yields: | Yields: | ||||
Dictionaries of origin_visit for that origin | Dictionaries of origin_visit for that origin | ||||
""" | """ | ||||
visits = _lookup_origin_visits(origin, last_visit=last_visit, limit=per_page) | visits = _lookup_origin_visits(origin, last_visit=last_visit, limit=per_page) | ||||
for visit in visits: | for visit in visits: | ||||
yield converters.from_origin_visit(visit) | yield converters.from_origin_visit(visit) | ||||
def lookup_origin_visit_latest(origin_url, require_snapshot): | def lookup_origin_visit_latest( | ||||
origin_url: str, require_snapshot: bool | |||||
) -> OriginVisitInfo: | |||||
"""Return the origin's latest visit | """Return the origin's latest visit | ||||
Args: | Args: | ||||
origin_url (str): origin to list visits for | origin_url (str): origin to list visits for | ||||
require_snapshot (bool): filter out origins without a snapshot | require_snapshot (bool): filter out origins without a snapshot | ||||
Returns: | Returns: | ||||
dict: The origin_visit concerned | dict: The origin_visit concerned | ||||
""" | """ | ||||
visit = storage.origin_visit_get_latest( | visit = storage.origin_visit_get_latest( | ||||
origin_url, require_snapshot=require_snapshot | origin_url, require_snapshot=require_snapshot | ||||
) | ) | ||||
return converters.from_origin_visit(visit) | return converters.from_origin_visit(visit) | ||||
def lookup_origin_visit(origin_url, visit_id): | def lookup_origin_visit(origin_url: str, visit_id: int) -> OriginVisitInfo: | ||||
"""Return information about visit visit_id with origin origin. | """Return information about visit visit_id with origin origin. | ||||
Args: | Args: | ||||
origin (str): origin concerned by the visit | origin (str): origin concerned by the visit | ||||
visit_id: the visit identifier to lookup | visit_id: the visit identifier to lookup | ||||
Yields: | Yields: | ||||
The dict origin_visit concerned | The dict origin_visit concerned | ||||
▲ Show 20 Lines • Show All 311 Lines • Show Last 20 Lines |