Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/nixguix/lister.py
Show First 20 Lines • Show All 123 Lines • ▼ Show 20 Lines | def _is_tarball(url): | ||||
if urlparsed.scheme not in ("http", "https", "ftp"): | if urlparsed.scheme not in ("http", "https", "ftp"): | ||||
raise ArtifactNatureMistyped(f"Mistyped artifact '{url}'") | raise ArtifactNatureMistyped(f"Mistyped artifact '{url}'") | ||||
return Path(urlparsed.path).suffixes[-1].lstrip(".") in TARBALL_EXTENSIONS | return Path(urlparsed.path).suffixes[-1].lstrip(".") in TARBALL_EXTENSIONS | ||||
index = random.randrange(len(urls)) | index = random.randrange(len(urls)) | ||||
url = urls[index] | url = urls[index] | ||||
try: | try: | ||||
is_tar = _is_tarball(url) | return _is_tarball(url), urls[0] | ||||
return is_tar, urls[0] | |||||
except IndexError: | except IndexError: | ||||
if request is None: | if request is None: | ||||
raise ArtifactNatureUndetected( | raise ArtifactNatureUndetected( | ||||
f"Cannot determine artifact type from url <{url}>" | f"Cannot determine artifact type from url <{url}>" | ||||
) | ) | ||||
logger.warning( | logger.warning( | ||||
"Cannot detect extension for <%s>. Fallback to http head query", | "Cannot detect extension for <%s>. Fallback to http head query", | ||||
url, | url, | ||||
▲ Show 20 Lines • Show All 138 Lines • ▼ Show 20 Lines | def get_pages(self) -> Iterator[PageResult]: | ||||
) | ) | ||||
if not artifact_url: | if not artifact_url: | ||||
continue | continue | ||||
yield ArtifactType.VCS, VCS( | yield ArtifactType.VCS, VCS( | ||||
origin=artifact_url, type=artifact_type, ref=plain_ref | origin=artifact_url, type=artifact_type, ref=plain_ref | ||||
) | ) | ||||
elif artifact_type == "url": | elif artifact_type == "url": | ||||
# It's either a tarball or a file | # It's either a tarball or a file | ||||
urls = artifact.get("urls") | origin_urls = artifact.get("urls") | ||||
if not urls: | if not origin_urls: | ||||
# Nothing to fetch | # Nothing to fetch | ||||
logger.warning("Skipping url <%s>: empty artifact", artifact) | logger.warning("Skipping url <%s>: empty artifact", artifact) | ||||
continue | continue | ||||
assert urls is not None | assert origin_urls is not None | ||||
# Deal with urls with empty scheme (basic fallback to http) | |||||
urls = [] | |||||
for url in origin_urls: | |||||
urlparsed = urlparse(url) | |||||
if urlparsed.scheme == "": | |||||
logger.warning("Missing scheme for <%s>, fallback to http", url) | |||||
fixed_url = f"http://{url}" | |||||
else: | |||||
fixed_url = url | |||||
urls.append(fixed_url) | |||||
# FIXME: T3294: Fix missing scheme in urls | |||||
origin, *fallback_urls = urls | origin, *fallback_urls = urls | ||||
integrity = artifact.get("integrity") | integrity = artifact.get("integrity") | ||||
if integrity is None: | if integrity is None: | ||||
logger.warning("Skipping url <%s>: missing integrity field", origin) | logger.warning("Skipping url <%s>: missing integrity field", origin) | ||||
continue | continue | ||||
try: | try: | ||||
▲ Show 20 Lines • Show All 82 Lines • Show Last 20 Lines |