diff --git a/swh/loader/package/tests/test_utils.py b/swh/loader/package/tests/test_utils.py --- a/swh/loader/package/tests/test_utils.py +++ b/swh/loader/package/tests/test_utils.py @@ -54,6 +54,14 @@ ) +@pytest.mark.fs +def test_download_no_url_schema(tmp_path, requests_mock): + url = "www.roudoudou.com/export/cpc/rasm/rasm_v0117_src.zip" + requests_mock.get(f"http://{url}", status_code=200) + + download(url, dest=str(tmp_path)) + + @pytest.mark.fs def test_download_ok_no_header(tmp_path, requests_mock): """Download without issue should provide filename and hashes""" diff --git a/swh/loader/package/utils.py b/swh/loader/package/utils.py --- a/swh/loader/package/utils.py +++ b/swh/loader/package/utils.py @@ -8,6 +8,7 @@ import logging import os from typing import Callable, Dict, Optional, Tuple, TypeVar +from urllib.parse import urlparse import requests @@ -79,6 +80,8 @@ params["headers"].update(extra_request_headers) # so the connection does not hang indefinitely (read/connection timeout) timeout = params.get("timeout", 60) + if not urlparse(url).scheme: + url = "http://" + url response = requests.get(url, **params, timeout=timeout, stream=True) if response.status_code != 200: raise ValueError("Fail to query '%s'. Reason: %s" % (url, response.status_code))