Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/utils.py
# Copyright (C) 2019-2021 The Software Heritage developers | # Copyright (C) 2019-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import copy | import copy | ||||
import functools | import functools | ||||
import logging | import logging | ||||
import os | import os | ||||
from typing import Callable, Dict, Optional, Tuple, TypeVar | from typing import Callable, Dict, Optional, Tuple, TypeVar | ||||
from urllib.parse import urlparse | |||||
import requests | import requests | ||||
from swh.loader.exception import NotFound | from swh.loader.exception import NotFound | ||||
from swh.loader.package import DEFAULT_PARAMS | from swh.loader.package import DEFAULT_PARAMS | ||||
from swh.model.hashutil import HASH_BLOCK_SIZE, MultiHash | from swh.model.hashutil import HASH_BLOCK_SIZE, MultiHash | ||||
from swh.model.model import Person | from swh.model.model import Person | ||||
▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines | ) -> Tuple[str, Dict]: | ||||
""" | """ | ||||
params = copy.deepcopy(DEFAULT_PARAMS) | params = copy.deepcopy(DEFAULT_PARAMS) | ||||
if auth is not None: | if auth is not None: | ||||
params["auth"] = auth | params["auth"] = auth | ||||
if extra_request_headers is not None: | if extra_request_headers is not None: | ||||
params["headers"].update(extra_request_headers) | params["headers"].update(extra_request_headers) | ||||
# so the connection does not hang indefinitely (read/connection timeout) | # so the connection does not hang indefinitely (read/connection timeout) | ||||
timeout = params.get("timeout", 60) | timeout = params.get("timeout", 60) | ||||
if not urlparse(url).scheme: | |||||
url = "http://" + url | |||||
ardumont: what about https? | |||||
response = requests.get(url, **params, timeout=timeout, stream=True) | response = requests.get(url, **params, timeout=timeout, stream=True) | ||||
if response.status_code != 200: | if response.status_code != 200: | ||||
raise ValueError("Fail to query '%s'. Reason: %s" % (url, response.status_code)) | raise ValueError("Fail to query '%s'. Reason: %s" % (url, response.status_code)) | ||||
filename = filename if filename else os.path.basename(url) | filename = filename if filename else os.path.basename(url) | ||||
logger.debug("filename: %s", filename) | logger.debug("filename: %s", filename) | ||||
filepath = os.path.join(dest, filename) | filepath = os.path.join(dest, filename) | ||||
logger.debug("filepath: %s", filepath) | logger.debug("filepath: %s", filepath) | ||||
▲ Show 20 Lines • Show All 58 Lines • Show Last 20 Lines |
what about https?