Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/utils.py
Show All 28 Lines | |||||
DOWNLOAD_HASHES = set(["sha1", "sha256", "length"]) | DOWNLOAD_HASHES = set(["sha1", "sha256", "length"]) | ||||
EMPTY_AUTHOR = Person.from_fullname(b"") | EMPTY_AUTHOR = Person.from_fullname(b"") | ||||
def api_info(url: str, **extra_params) -> bytes: | |||||
"""Basic api client to retrieve information on project. This deals with | |||||
fetching json metadata about pypi projects. | |||||
Args: | |||||
url (str): The api url (e.g PyPI, npm, etc...) | |||||
Raises: | |||||
NotFound in case of query failures (for some reasons: 404, ...) | |||||
Returns: | |||||
The associated response's information | |||||
""" | |||||
response = requests.get(url, **{**DEFAULT_PARAMS, **extra_params}) | |||||
if response.status_code != 200: | |||||
raise NotFound(f"Fail to query '{url}'. Reason: {response.status_code}") | |||||
return response.content | |||||
def _content_disposition_filename(header: str) -> Optional[str]: | def _content_disposition_filename(header: str) -> Optional[str]: | ||||
fname = None | fname = None | ||||
fnames = re.findall(r"filename[\*]?=([^;]+)", header) | fnames = re.findall(r"filename[\*]?=([^;]+)", header) | ||||
if fnames and "utf-8''" in fnames[0].lower(): | if fnames and "utf-8''" in fnames[0].lower(): | ||||
# RFC 5987 | # RFC 5987 | ||||
fname = re.sub("utf-8''", "", fnames[0], flags=re.IGNORECASE) | fname = re.sub("utf-8''", "", fnames[0], flags=re.IGNORECASE) | ||||
fname = unquote(fname) | fname = unquote(fname) | ||||
elif fnames: | elif fnames: | ||||
Show All 11 Lines | def _retry_if_throttling(retry_state) -> bool: | ||||
if attempt.failed: | if attempt.failed: | ||||
exception = attempt.exception() | exception = attempt.exception() | ||||
return ( | return ( | ||||
isinstance(exception, HTTPError) and exception.response.status_code == 429 | isinstance(exception, HTTPError) and exception.response.status_code == 429 | ||||
) | ) | ||||
return False | return False | ||||
@retry( | throttling_retry = retry( | ||||
retry=_retry_if_throttling, | retry=_retry_if_throttling, | ||||
wait=wait_exponential(exp_base=10), | wait=wait_exponential(exp_base=10), | ||||
stop=stop_after_attempt(max_attempt_number=5), | stop=stop_after_attempt(max_attempt_number=5), | ||||
before_sleep=before_sleep_log(logger, logging.WARNING), | before_sleep=before_sleep_log(logger, logging.WARNING), | ||||
reraise=True, | reraise=True, | ||||
) | ) | ||||
@throttling_retry | |||||
def download( | def download( | ||||
url: str, | url: str, | ||||
dest: str, | dest: str, | ||||
hashes: Dict = {}, | hashes: Dict = {}, | ||||
filename: Optional[str] = None, | filename: Optional[str] = None, | ||||
auth: Optional[Tuple[str, str]] = None, | auth: Optional[Tuple[str, str]] = None, | ||||
extra_request_headers: Optional[Dict[str, str]] = None, | extra_request_headers: Optional[Dict[str, str]] = None, | ||||
) -> Tuple[str, Dict]: | ) -> Tuple[str, Dict]: | ||||
▲ Show 20 Lines • Show All 77 Lines • ▼ Show 20 Lines | extrinsic_metadata = { | ||||
"url": url, | "url": url, | ||||
} | } | ||||
logger.debug("extrinsic_metadata", extrinsic_metadata) | logger.debug("extrinsic_metadata", extrinsic_metadata) | ||||
return filepath, extrinsic_metadata | return filepath, extrinsic_metadata | ||||
@throttling_retry | |||||
def get_url_body(url: str, **extra_params) -> bytes: | |||||
"""Basic api client to retrieve information, typically JSON metadata, | |||||
on software package. | |||||
Args: | |||||
url (str): The api url (e.g PyPI, npm, etc...) | |||||
Raises: | |||||
NotFound in case of query failures (for some reasons: 404, ...) | |||||
Returns: | |||||
The associated response's information | |||||
""" | |||||
logger.debug("Fetching %s", url) | |||||
response = requests.get(url, **{**DEFAULT_PARAMS, **extra_params}) | |||||
if response.status_code == 404: | |||||
raise NotFound(f"Fail to query '{url}'. Reason: {response.status_code}") | |||||
response.raise_for_status() | |||||
return response.content | |||||
def release_name(version: str, filename: Optional[str] = None) -> str: | def release_name(version: str, filename: Optional[str] = None) -> str: | ||||
if filename: | if filename: | ||||
return "releases/%s/%s" % (version, filename) | return "releases/%s/%s" % (version, filename) | ||||
return "releases/%s" % version | return "releases/%s" % version | ||||
TReturn = TypeVar("TReturn") | TReturn = TypeVar("TReturn") | ||||
TSelf = TypeVar("TSelf") | TSelf = TypeVar("TSelf") | ||||
Show All 16 Lines |