diff --git a/swh/lister/nixguix/lister.py b/swh/lister/nixguix/lister.py --- a/swh/lister/nixguix/lister.py +++ b/swh/lister/nixguix/lister.py @@ -129,8 +129,7 @@ url = urls[index] try: - is_tar = _is_tarball(url) - return is_tar, urls[0] + return _is_tarball(url), urls[0] except IndexError: if request is None: raise ArtifactNatureUndetected( @@ -285,15 +284,25 @@ ) elif artifact_type == "url": # It's either a tarball or a file - urls = artifact.get("urls") - if not urls: + origin_urls = artifact.get("urls") + if not origin_urls: # Nothing to fetch logger.warning("Skipping url <%s>: empty artifact", artifact) continue - assert urls is not None + assert origin_urls is not None + + # Deal with urls with empty scheme (basic fallback to http) + urls = [] + for url in origin_urls: + urlparsed = urlparse(url) + if urlparsed.scheme == "": + logger.warning("Missing scheme for <%s>, fallback to http", url) + fixed_url = f"http://{url}" + else: + fixed_url = url + urls.append(fixed_url) - # FIXME: T3294: Fix missing scheme in urls origin, *fallback_urls = urls integrity = artifact.get("integrity") diff --git a/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json b/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json --- a/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json +++ b/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json @@ -29,6 +29,13 @@ ], "integrity": "sha256-bss09x9yOnuW+Q5BHHjf8nNcCNxCKMdl9/2/jKSFcrQ=" }, + { + "type": "url", + "urls": [ + "www.roudoudou.com/export/cpc/rasm/rasm_v0117_src.zip" + ], + "integrity": "sha256-wAEswtkl3ulAw3zq4perrGS6Wlww5XXnQYsEAoYT9fI=" + }, { "type": "url", "urls": [ diff --git a/swh/lister/nixguix/tests/test_lister.py b/swh/lister/nixguix/tests/test_lister.py --- a/swh/lister/nixguix/tests/test_lister.py +++ b/swh/lister/nixguix/tests/test_lister.py @@ -146,7 +146,7 @@ assert origin == url -def test_lister_nixguix(datadir, swh_scheduler, requests_mock): +def test_lister_nixguix_ok(datadir, swh_scheduler, requests_mock): """NixGuixLister should list all origins per visit type""" url = "https://nix-community.github.io/nixpkgs-swh/sources-unstable.json" origin_upstream = "https://github.com/NixOS/nixpkgs"