Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/utils.py
# Copyright (C) 2019-2021 The Software Heritage developers | # Copyright (C) 2019-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import copy | import copy | ||||
import functools | import functools | ||||
import itertools | import itertools | ||||
import logging | import logging | ||||
import os | import os | ||||
import re | import re | ||||
from typing import Callable, Dict, Optional, Tuple, TypeVar | from typing import Callable, Dict, Optional, Tuple, TypeVar | ||||
from urllib.parse import unquote | from urllib.parse import unquote, urlsplit | ||||
from urllib.request import urlopen | from urllib.request import urlopen | ||||
import requests | import requests | ||||
from swh.loader.exception import NotFound | from swh.loader.exception import NotFound | ||||
from swh.loader.package import DEFAULT_PARAMS | from swh.loader.package import DEFAULT_PARAMS | ||||
from swh.model.hashutil import HASH_BLOCK_SIZE, MultiHash | from swh.model.hashutil import HASH_BLOCK_SIZE, MultiHash | ||||
from swh.model.model import Person | from swh.model.model import Person | ||||
▲ Show 20 Lines • Show All 91 Lines • ▼ Show 20 Lines | else: | ||||
url = response.url | url = response.url | ||||
# try to extract filename from content-disposition header if available | # try to extract filename from content-disposition header if available | ||||
if filename is None and "content-disposition" in response.headers: | if filename is None and "content-disposition" in response.headers: | ||||
filename = _content_disposition_filename( | filename = _content_disposition_filename( | ||||
response.headers["content-disposition"] | response.headers["content-disposition"] | ||||
) | ) | ||||
response_data = response.iter_content(chunk_size=HASH_BLOCK_SIZE) | response_data = response.iter_content(chunk_size=HASH_BLOCK_SIZE) | ||||
filename = filename if filename else os.path.basename(url) | filename = filename if filename else os.path.basename(urlsplit(url).path) | ||||
logger.debug("filename: %s", filename) | logger.debug("filename: %s", filename) | ||||
filepath = os.path.join(dest, filename) | filepath = os.path.join(dest, filename) | ||||
logger.debug("filepath: %s", filepath) | logger.debug("filepath: %s", filepath) | ||||
h = MultiHash(hash_names=DOWNLOAD_HASHES) | h = MultiHash(hash_names=DOWNLOAD_HASHES) | ||||
with open(filepath, "wb") as f: | with open(filepath, "wb") as f: | ||||
for chunk in response_data: | for chunk in response_data: | ||||
h.update(chunk) | h.update(chunk) | ||||
▲ Show 20 Lines • Show All 55 Lines • Show Last 20 Lines |