Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/gnu/tree.py
# Copyright (C) 2019-2021 The Software Heritage developers | # Copyright (C) 2019-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from datetime import datetime, timezone | from datetime import datetime, timezone | ||||
import gzip | import gzip | ||||
import json | import json | ||||
import logging | import logging | ||||
from os import path | from os import path | ||||
from pathlib import Path | from pathlib import Path | ||||
import re | import re | ||||
from typing import Any, List, Mapping, Sequence, Tuple | from typing import Any, List, Mapping, Sequence, Tuple | ||||
from urllib.parse import urlparse | from urllib.parse import urlparse | ||||
import requests | import requests | ||||
from swh.lister import TARBALL_EXTENSIONS | |||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
class GNUTree: | class GNUTree: | ||||
"""Gnu Tree's representation""" | """Gnu Tree's representation""" | ||||
def __init__(self, url: str): | def __init__(self, url: str): | ||||
self.url = url # filepath or uri | self.url = url # filepath or uri | ||||
▲ Show 20 Lines • Show All 155 Lines • ▼ Show 20 Lines | def check_filename_is_archive(filename: str) -> bool: | ||||
if len(file_suffixes) == 1 and file_suffixes[-1] in (".zip", ".tar"): | if len(file_suffixes) == 1 and file_suffixes[-1] in (".zip", ".tar"): | ||||
return True | return True | ||||
elif len(file_suffixes) > 1: | elif len(file_suffixes) > 1: | ||||
if file_suffixes[-1] == ".zip" or file_suffixes[-2] == ".tar": | if file_suffixes[-1] == ".zip" or file_suffixes[-2] == ".tar": | ||||
return True | return True | ||||
return False | return False | ||||
# to recognize existing naming pattern | |||||
EXTENSIONS = [ | |||||
"zip", | |||||
"tar", | |||||
"gz", | |||||
"tgz", | |||||
"bz2", | |||||
"bzip2", | |||||
"lzma", | |||||
"lz", | |||||
"xz", | |||||
"Z", | |||||
"7z", | |||||
] | |||||
VERSION_KEYWORDS = [ | VERSION_KEYWORDS = [ | ||||
"cygwin_me", | "cygwin_me", | ||||
"w32", | "w32", | ||||
"win32", | "win32", | ||||
"nt", | "nt", | ||||
"cygwin", | "cygwin", | ||||
"mingw", | "mingw", | ||||
"latest", | "latest", | ||||
▲ Show 20 Lines • Show All 52 Lines • ▼ Show 20 Lines | |||||
| | | | ||||
# We couldn't match a release number, put everything in the | # We couldn't match a release number, put everything in the | ||||
# software name. | # software name. | ||||
(?P<software_name2>.+?) | (?P<software_name2>.+?) | ||||
) | ) | ||||
(?P<extension>(?:\.(?:{extensions}))+) | (?P<extension>(?:\.(?:{extensions}))+) | ||||
$ | $ | ||||
""".format( | """.format( | ||||
extensions="|".join(EXTENSIONS), | extensions="|".join(TARBALL_EXTENSIONS), | ||||
vkeywords="|".join("%s[-]?" % k for k in VERSION_KEYWORDS), | vkeywords="|".join("%s[-]?" % k for k in VERSION_KEYWORDS), | ||||
) | ) | ||||
def get_version(uri: str) -> str: | def get_version(uri: str) -> str: | ||||
"""Extract branch name from tarball uri | """Extract branch name from tarball uri | ||||
Args: | Args: | ||||
▲ Show 20 Lines • Show All 52 Lines • Show Last 20 Lines |