diff --git a/swh/lister/nixguix/lister.py b/swh/lister/nixguix/lister.py --- a/swh/lister/nixguix/lister.py +++ b/swh/lister/nixguix/lister.py @@ -22,7 +22,7 @@ from pathlib import Path import random from typing import Any, Dict, Iterator, List, Optional, Tuple, Union -from urllib.parse import urlparse +from urllib.parse import parse_qsl, urlparse import requests from requests.exceptions import ConnectionError, InvalidSchema, SSLError @@ -146,7 +146,23 @@ urlparsed = urlparse(url) if urlparsed.scheme not in ("http", "https", "ftp"): raise ArtifactNatureMistyped(f"Mistyped artifact '{url}'") - return Path(urlparsed.path).suffixes[-1].lstrip(".") in TARBALL_EXTENSIONS + + errors = [] + query_params = dict(parse_qsl(urlparsed.query)) + for path in [query_params.get(key) for key in ["f", "file", "url", "name"]] + [ + urlparsed.path + ]: + if not path: + continue + try: + file_ = Path(path).suffixes[-1] + break + except IndexError as e: + errors.append(e) + + if errors: + raise errors[-1] + return file_.lstrip(".") in TARBALL_EXTENSIONS index = random.randrange(len(urls)) url = urls[index] diff --git a/swh/lister/nixguix/tests/test_lister.py b/swh/lister/nixguix/tests/test_lister.py --- a/swh/lister/nixguix/tests/test_lister.py +++ b/swh/lister/nixguix/tests/test_lister.py @@ -38,13 +38,25 @@ + [[f"one.{ext}?foo=bar"] for ext in TARBALL_EXTENSIONS], ) def test_is_tarball_simple(tarballs): - """Simple check on tarball should discriminate betwenn tarball and file""" + """Simple check on tarball should discriminate between tarball and file""" urls = [f"https://example.org/{tarball}" for tarball in tarballs] is_tar, origin = is_tarball(urls) assert is_tar is True assert origin == urls[0] +@pytest.mark.parametrize( + "query_param", + ["file", "f", "url", "name"], +) +def test_is_tarball_not_so_simple(query_param): + """More involved check on tarball should discriminate between tarball and file""" + url = f"https://example.org/download.php?foo=bar&{query_param}=one.tar.gz" + is_tar, origin = is_tarball([url]) + assert is_tar is True + assert origin == url + + @pytest.mark.parametrize( "files", [