Page MenuHomeSoftware Heritage

D6239.diff
No OneTemporary

D6239.diff

diff --git a/swh/loader/package/tests/test_utils.py b/swh/loader/package/tests/test_utils.py
--- a/swh/loader/package/tests/test_utils.py
+++ b/swh/loader/package/tests/test_utils.py
@@ -6,6 +6,8 @@
import json
import os
+from unittest.mock import MagicMock
+from urllib.error import URLError
import pytest
@@ -130,6 +132,45 @@
download(url, dest=str(tmp_path), hashes=expected_hashes)
+@pytest.mark.fs
+def test_ftp_download_ok(tmp_path, mocker):
+ """Download without issue should provide filename and hashes"""
+ filename = "requests-0.0.1.tar.gz"
+ url = "ftp://pypi.org/pypi/requests/%s" % filename
+ data = b"this is something"
+
+ cm = MagicMock()
+ cm.getstatus.return_value = 200
+ cm.read.side_effect = [data, b""]
+ cm.__enter__.return_value = cm
+ mocker.patch("swh.loader.package.utils.urlopen").return_value = cm
+
+ actual_filepath, actual_hashes = download(url, dest=str(tmp_path))
+
+ actual_filename = os.path.basename(actual_filepath)
+ assert actual_filename == filename
+ assert actual_hashes["length"] == len(data)
+ assert (
+ actual_hashes["checksums"]["sha1"] == "fdd1ce606a904b08c816ba84f3125f2af44d92b2"
+ ) # noqa
+ assert (
+ actual_hashes["checksums"]["sha256"]
+ == "1d9224378d77925d612c9f926eb9fb92850e6551def8328011b6a972323298d5"
+ )
+
+
+@pytest.mark.fs
+def test_ftp_download_ko(tmp_path, mocker):
+ """Download without issue should provide filename and hashes"""
+ filename = "requests-0.0.1.tar.gz"
+ url = "ftp://pypi.org/pypi/requests/%s" % filename
+
+ mocker.patch("swh.loader.package.utils.urlopen").side_effect = URLError("FTP error")
+
+ with pytest.raises(URLError):
+ download(url, dest=str(tmp_path))
+
+
def test_api_info_failure(requests_mock):
"""Failure to fetch info/release information should raise"""
url = "https://pypi.org/pypi/requests/json"
diff --git a/swh/loader/package/utils.py b/swh/loader/package/utils.py
--- a/swh/loader/package/utils.py
+++ b/swh/loader/package/utils.py
@@ -5,9 +5,11 @@
import copy
import functools
+import itertools
import logging
import os
from typing import Callable, Dict, Optional, Tuple, TypeVar
+from urllib.request import urlopen
import requests
@@ -79,9 +81,18 @@
params["headers"].update(extra_request_headers)
# so the connection does not hang indefinitely (read/connection timeout)
timeout = params.get("timeout", 60)
- response = requests.get(url, **params, timeout=timeout, stream=True)
- if response.status_code != 200:
- raise ValueError("Fail to query '%s'. Reason: %s" % (url, response.status_code))
+
+ if url.startswith("ftp://"):
+ response = urlopen(url, timeout=timeout)
+ chunks = (response.read(HASH_BLOCK_SIZE) for _ in itertools.count())
+ response_data = itertools.takewhile(bool, chunks)
+ else:
+ response = requests.get(url, **params, timeout=timeout, stream=True)
+ if response.status_code != 200:
+ raise ValueError(
+ "Fail to query '%s'. Reason: %s" % (url, response.status_code)
+ )
+ response_data = response.iter_content(chunk_size=HASH_BLOCK_SIZE)
filename = filename if filename else os.path.basename(url)
logger.debug("filename: %s", filename)
@@ -90,10 +101,12 @@
h = MultiHash(hash_names=DOWNLOAD_HASHES)
with open(filepath, "wb") as f:
- for chunk in response.iter_content(chunk_size=HASH_BLOCK_SIZE):
+ for chunk in response_data:
h.update(chunk)
f.write(chunk)
+ response.close()
+
# Also check the expected hashes if provided
if hashes:
actual_hashes = h.hexdigest()

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 30, 5:02 PM (5 h, 28 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224888

Event Timeline