Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/nixguix/lister.py
Show All 16 Lines | |||||
import base64 | import base64 | ||||
import binascii | import binascii | ||||
from dataclasses import dataclass | from dataclasses import dataclass | ||||
from enum import Enum | from enum import Enum | ||||
import logging | import logging | ||||
from pathlib import Path | from pathlib import Path | ||||
import random | import random | ||||
import re | |||||
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union | from typing import Any, Dict, Iterator, List, Optional, Tuple, Union | ||||
from urllib.parse import parse_qsl, urlparse | from urllib.parse import parse_qsl, urlparse | ||||
import requests | import requests | ||||
from requests.exceptions import ConnectionError, InvalidSchema, SSLError | from requests.exceptions import ConnectionError, InvalidSchema, SSLError | ||||
from swh.core.github.utils import GitHubSession | from swh.core.github.utils import GitHubSession | ||||
from swh.core.tarball import MIMETYPE_TO_ARCHIVE_FORMAT | from swh.core.tarball import MIMETYPE_TO_ARCHIVE_FORMAT | ||||
▲ Show 20 Lines • Show All 97 Lines • ▼ Show 20 Lines | |||||
PageResult = Tuple[ArtifactType, Union[Artifact, VCS]] | PageResult = Tuple[ArtifactType, Union[Artifact, VCS]] | ||||
VCS_SUPPORTED = ("git", "svn", "hg") | VCS_SUPPORTED = ("git", "svn", "hg") | ||||
# Rough approximation of what we can find of mimetypes for tarballs "out there" | # Rough approximation of what we can find of mimetypes for tarballs "out there" | ||||
POSSIBLE_TARBALL_MIMETYPES = tuple(MIMETYPE_TO_ARCHIVE_FORMAT.keys()) | POSSIBLE_TARBALL_MIMETYPES = tuple(MIMETYPE_TO_ARCHIVE_FORMAT.keys()) | ||||
ardumont: probably too open a regexp... | |||||
Done Inline Actionsopened* ardumont: opened* | |||||
PATTERN_VERSION = re.compile(r"(v*[0-9]+[.])([0-9]+[.]*)+") | |||||
def url_endswith( | def url_endswith( | ||||
urlparsed, extensions: List[str], raise_when_no_extension: bool = True | urlparsed, extensions: List[str], raise_when_no_extension: bool = True | ||||
) -> bool: | ) -> bool: | ||||
"""Determine whether urlparsed ends with one of the extensions. | """Determine whether urlparsed ends with one of the extensions passed as parameter. | ||||
This also account for the edge case of a filename with only a version as name (so no | |||||
extension in the end.) | |||||
Raises: | Raises: | ||||
ArtifactWithoutExtension in case no extension is available and raise_when_no_extension | ArtifactWithoutExtension in case no extension is available and | ||||
is True (the default) | raise_when_no_extension is True (the default) | ||||
""" | """ | ||||
paths = [Path(p) for (_, p) in [("_", urlparsed.path)] + parse_qsl(urlparsed.query)] | paths = [Path(p) for (_, p) in [("_", urlparsed.path)] + parse_qsl(urlparsed.query)] | ||||
if raise_when_no_extension and not any(path.suffix != "" for path in paths): | if raise_when_no_extension and not any(path.suffix != "" for path in paths): | ||||
raise ArtifactWithoutExtension | raise ArtifactWithoutExtension | ||||
return any(path.suffix.endswith(tuple(extensions)) for path in paths) | match = any(path.suffix.endswith(tuple(extensions)) for path in paths) | ||||
if match: | |||||
return match | |||||
# Some false negative can happen (e.g. https://<netloc>/path/0.1.5)), so make sure | |||||
# to catch those | |||||
name = Path(urlparsed.path).name | |||||
if not PATTERN_VERSION.match(name): | |||||
return match | |||||
if raise_when_no_extension: | |||||
raise ArtifactWithoutExtension | |||||
return False | |||||
def is_tarball(urls: List[str], request: Optional[Any] = None) -> Tuple[bool, str]: | def is_tarball(urls: List[str], request: Optional[Any] = None) -> Tuple[bool, str]: | ||||
"""Determine whether a list of files actually are tarballs or simple files. | """Determine whether a list of files actually are tarballs or simple files. | ||||
When this cannot be answered simply out of the url, when request is provided, this | When this cannot be answered simply out of the url, when request is provided, this | ||||
executes a HTTP `HEAD` query on the url to determine the information. If request is | executes a HTTP `HEAD` query on the url to determine the information. If request is | ||||
not provided, this raises an ArtifactNatureUndetected exception. | not provided, this raises an ArtifactNatureUndetected exception. | ||||
▲ Show 20 Lines • Show All 378 Lines • Show Last 20 Lines |
probably too open a regexp...