Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/utils.py
Show First 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | |||||
def download( | def download( | ||||
url: str, | url: str, | ||||
dest: str, | dest: str, | ||||
hashes: Dict = {}, | hashes: Dict = {}, | ||||
filename: Optional[str] = None, | filename: Optional[str] = None, | ||||
auth: Optional[Tuple[str, str]] = None, | auth: Optional[Tuple[str, str]] = None, | ||||
extra_request_headers: Optional[Dict[str, str]] = None, | |||||
) -> Tuple[str, Dict]: | ) -> Tuple[str, Dict]: | ||||
"""Download a remote tarball from url, uncompresses and computes swh hashes | """Download a remote tarball from url, uncompresses and computes swh hashes | ||||
on it. | on it. | ||||
Args: | Args: | ||||
url: Artifact uri to fetch, uncompress and hash | url: Artifact uri to fetch, uncompress and hash | ||||
dest: Directory to write the archive to | dest: Directory to write the archive to | ||||
hashes: Dict of expected hashes (key is the hash algo) for the artifact | hashes: Dict of expected hashes (key is the hash algo) for the artifact | ||||
to download (those hashes are expected to be hex string) | to download (those hashes are expected to be hex string) | ||||
auth: Optional tuple of login/password (for http authentication | auth: Optional tuple of login/password (for http authentication | ||||
service, e.g. deposit) | service, e.g. deposit) | ||||
Raises: | Raises: | ||||
ValueError in case of any error when fetching/computing (length, | ValueError in case of any error when fetching/computing (length, | ||||
checksums mismatched...) | checksums mismatched...) | ||||
Returns: | Returns: | ||||
Tuple of local (filepath, hashes of filepath) | Tuple of local (filepath, hashes of filepath) | ||||
""" | """ | ||||
params = copy.deepcopy(DEFAULT_PARAMS) | params = copy.deepcopy(DEFAULT_PARAMS) | ||||
if auth is not None: | if auth is not None: | ||||
params["auth"] = auth | params["auth"] = auth | ||||
if extra_request_headers is not None: | |||||
params["headers"].update(extra_request_headers) | |||||
# so the connection does not hang indefinitely (read/connection timeout) | # so the connection does not hang indefinitely (read/connection timeout) | ||||
timeout = params.get("timeout", 60) | timeout = params.get("timeout", 60) | ||||
response = requests.get(url, **params, timeout=timeout, stream=True) | response = requests.get(url, **params, timeout=timeout, stream=True) | ||||
if response.status_code != 200: | if response.status_code != 200: | ||||
raise ValueError("Fail to query '%s'. Reason: %s" % (url, response.status_code)) | raise ValueError("Fail to query '%s'. Reason: %s" % (url, response.status_code)) | ||||
filename = filename if filename else os.path.basename(url) | filename = filename if filename else os.path.basename(url) | ||||
logger.debug("filename: %s", filename) | logger.debug("filename: %s", filename) | ||||
▲ Show 20 Lines • Show All 60 Lines • Show Last 20 Lines |