urls = ['https://example.org/a', 'https://example.org/r', 'https://example.org/t', 'https://example.org/i', 'https://example.org/f', 'https://example.org/a', ...]
request = None
def is_tarball(urls: List[str], request: Optional[Any] = None) -> Tuple[bool, str]:
"""Determine whether a list of files actually are tarballs or simple files.
When this cannot be answered simply out of the url, when request is provided, this
executes a HTTP `HEAD` query on the url to determine the information. If request is
not provided, this raises an ArtifactNatureUndetected exception.
Args:
urls: name of the remote files for which the extension needs to be checked.
Raises:
ArtifactNatureUndetected when the artifact's nature cannot be detected out
of its url
ArtifactNatureMistyped when the artifact is not a tarball nor a file. It's up to
the caller to do what's right with it.
Returns: A tuple (bool, url). The boolean represents whether the url is an archive
or not. The second parameter is the actual url once the head request is issued
as a fallback of not finding out whether the urls are tarballs or not.
"""
def _is_tarball(url):
"""Determine out of an extension whether url is a tarball.
Raises:
IndexError in case no extension is available
"""
urlparsed = urlparse(url)
if urlparsed.scheme not in ("http", "https", "ftp"):
raise ArtifactNatureMistyped(f"Mistyped artifact '{url}'")
errors = []
query_params = dict(parse_qsl(urlparsed.query))
for path in [query_params.get(key) for key in ["f", "file", "url"]] + [
urlparsed.path
]:
if not path:
continue
try:
file_ = Path(path).suffixes[-1]
break
except IndexError as e:
errors.append(e)
if errors:
raise errors[-1]
return file_.lstrip(".") in TARBALL_EXTENSIONS
index = random.randrange(len(urls))
url = urls[index]
try:
> return _is_tarball(url), urls[0]
.tox/py3/lib/python3.7/site-packages/swh/lister/nixguix/lister.py:171:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
url = 'https://example.org/o'
def _is_tarball(url):
"""Determine out of an extension whether url is a tarball.
Raises:
IndexError in case no extension is available
"""
urlparsed = urlparse(url)
if urlparsed.scheme not in ("http", "https", "ftp"):
raise ArtifactNatureMistyped(f"Mistyped artifact '{url}'")
errors = []
query_params = dict(parse_qsl(urlparsed.query))
for path in [query_params.get(key) for key in ["f", "file", "url"]] + [
urlparsed.path
]:
if not path:
continue
try:
file_ = Path(path).suffixes[-1]
break
except IndexError as e:
errors.append(e)
if errors:
> raise errors[-1]
.tox/py3/lib/python3.7/site-packages/swh/lister/nixguix/lister.py:164:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
url = 'https://example.org/o'
def _is_tarball(url):
"""Determine out of an extension whether url is a tarball.
Raises:
IndexError in case no extension is available
"""
urlparsed = urlparse(url)
if urlparsed.scheme not in ("http", "https", "ftp"):
raise ArtifactNatureMistyped(f"Mistyped artifact '{url}'")
errors = []
query_params = dict(parse_qsl(urlparsed.query))
for path in [query_params.get(key) for key in ["f", "file", "url"]] + [
urlparsed.path
]:
if not path:
continue
try:
> file_ = Path(path).suffixes[-1]
E IndexError: list index out of range
.tox/py3/lib/python3.7/site-packages/swh/lister/nixguix/lister.py:158: IndexError
During handling of the above exception, another exception occurred:
tarballs = 'artifact.php?url=one.zip&foo=bar'
@pytest.mark.parametrize(
"tarballs",
[
"download.php?file=one.tar.gz&foo=bar",
"count.php?f=one.gzip&foo=bar",
"artifact.php?url=one.zip&foo=bar",
"files?name=one.tbz&foo=bar",
],
)
def test_is_tarball_not_so_simple(tarballs):
"""More involved check on tarball should discriminate between tarball and file"""
urls = [f"https://example.org/{tarball}" for tarball in tarballs]
> is_tar, origin = is_tarball(urls)
.tox/py3/lib/python3.7/site-packages/swh/lister/nixguix/tests/test_lister.py:60:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
urls = ['https://example.org/a', 'https://example.org/r', 'https://example.org/t', 'https://example.org/i', 'https://example.org/f', 'https://example.org/a', ...]
request = None
def is_tarball(urls: List[str], request: Optional[Any] = None) -> Tuple[bool, str]:
"""Determine whether a list of files actually are tarballs or simple files.
When this cannot be answered simply out of the url, when request is provided, this
executes a HTTP `HEAD` query on the url to determine the information. If request is
not provided, this raises an ArtifactNatureUndetected exception.
Args:
urls: name of the remote files for which the extension needs to be checked.
Raises:
ArtifactNatureUndetected when the artifact's nature cannot be detected out
of its url
ArtifactNatureMistyped when the artifact is not a tarball nor a file. It's up to
the caller to do what's right with it.
Returns: A tuple (bool, url). The boolean represents whether the url is an archive
or not. The second parameter is the actual url once the head request is issued
as a fallback of not finding out whether the urls are tarballs or not.
"""
def _is_tarball(url):
"""Determine out of an extension whether url is a tarball.
Raises:
IndexError in case no extension is available
"""
urlparsed = urlparse(url)
if urlparsed.scheme not in ("http", "https", "ftp"):
raise ArtifactNatureMistyped(f"Mistyped artifact '{url}'")
errors = []
query_params = dict(parse_qsl(urlparsed.query))
for path in [query_params.get(key) for key in ["f", "file", "url"]] + [
urlparsed.path
]:
if not path:
continue
try:
file_ = Path(path).suffixes[-1]
break
except IndexError as e:
errors.append(e)
if errors:
raise errors[-1]
return file_.lstrip(".") in TARBALL_EXTENSIONS
index = random.randrange(len(urls))
url = urls[index]
try:
return _is_tarball(url), urls[0]
except IndexError:
if request is None:
raise ArtifactNatureUndetected(
> f"Cannot determine artifact type from url <{url}>"
)
E swh.lister.nixguix.lister.ArtifactNatureUndetected: Cannot determine artifact type from url <https://example.org/o>
.tox/py3/lib/python3.7/site-packages/swh/lister/nixguix/lister.py:175: ArtifactNatureUndetected
TEST RESULT
TEST RESULT
- Run At
- Oct 4 2022, 8:50 PM