Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/migrate_extrinsic_metadata.py
Show First 20 Lines • Show All 109 Lines • ▼ Show 20 Lines | |||||
deposit_revision_message_re = re.compile( | deposit_revision_message_re = re.compile( | ||||
b"(?P<client>[a-z-]*): " | b"(?P<client>[a-z-]*): " | ||||
b"Deposit (?P<deposit_id>[0-9]+) in collection (?P<collection>[a-z-]+).*" | b"Deposit (?P<deposit_id>[0-9]+) in collection (?P<collection>[a-z-]+).*" | ||||
) | ) | ||||
# not reliable, because PyPI allows arbitrary names | # not reliable, because PyPI allows arbitrary names | ||||
def pypi_project_from_filename(filename): | def pypi_project_from_filename(filename): | ||||
if filename == "mongomotor-0.13.0.n.tar.gz": | if filename.endswith(".egg"): | ||||
return None | |||||
elif filename == "mongomotor-0.13.0.n.tar.gz": | |||||
return "mongomotor" | return "mongomotor" | ||||
elif re.match(r"datahaven-rev[0-9]+\.tar\.gz", filename): | elif re.match(r"datahaven-rev[0-9]+\.tar\.gz", filename): | ||||
return "datahaven" | return "datahaven" | ||||
elif re.match(r"Dtls-[0-9]\.[0-9]\.[0-9]\.sdist_with_openssl\..*", filename): | elif re.match(r"Dtls-[0-9]\.[0-9]\.[0-9]\.sdist_with_openssl\..*", filename): | ||||
return "Dtls" | return "Dtls" | ||||
elif re.match("pytz-20[0-9][0-9][a-z].tar.gz", filename): | elif re.match(r"(gae)?pytz-20[0-9][0-9][a-z]\.(tar\.gz|zip)", filename): | ||||
return "pytz" | return filename.split("-", 1)[0] | ||||
elif filename.startswith(("powny-", "obedient.powny-",)): | elif filename.startswith(("powny-", "obedient.powny-",)): | ||||
return filename.split("-")[0] | return filename.split("-")[0] | ||||
elif filename.startswith("devpi-theme-16-"): | elif filename.startswith("devpi-theme-16-"): | ||||
return "devpi-theme-16" | return "devpi-theme-16" | ||||
elif re.match("[^-]+-[0-9]+.tar.gz", filename): | elif re.match("[^-]+-[0-9]+.tar.gz", filename): | ||||
return filename.split("-")[0] | return filename.split("-")[0] | ||||
elif filename == "ohai-1!0.tar.gz": | elif filename == "ohai-1!0.tar.gz": | ||||
return "ohai" | return "ohai" | ||||
Show All 11 Lines | elif filename.startswith("youtube_dl_server-alpha."): | ||||
return "youtube_dl_server" | return "youtube_dl_server" | ||||
elif filename == "json-extensions-b76bc7d.tar.gz": | elif filename == "json-extensions-b76bc7d.tar.gz": | ||||
return "json-extensions" | return "json-extensions" | ||||
elif filename == "LitReview-0.6989ev.tar.gz": | elif filename == "LitReview-0.6989ev.tar.gz": | ||||
# typo of "dev" | # typo of "dev" | ||||
return "LitReview" | return "LitReview" | ||||
elif filename.startswith("django_options-r"): | elif filename.startswith("django_options-r"): | ||||
return "django_options" | return "django_options" | ||||
elif filename == "Greater than, equal, or less Library-0.1.tar.gz": | |||||
return "Greater-than-equal-or-less-Library" | |||||
elif filename.startswith("upstart--main-"): | |||||
return "upstart" | |||||
filename = filename.replace(" ", "-") | filename = filename.replace(" ", "-") | ||||
match = re.match( | match = re.match( | ||||
r"^(?P<project_name>[a-z_.-]+)" # project name | r"^(?P<project_name>[a-z_.-]+)" # project name | ||||
r"\.(tar\.gz|tar\.bz2|tgz|zip)$", # extension | r"\.(tar\.gz|tar\.bz2|tgz|zip)$", # extension | ||||
filename, | filename, | ||||
re.I, | re.I, | ||||
) | ) | ||||
if match: | if match: | ||||
return match.group("project_name") | return match.group("project_name") | ||||
# First try with a rather strict format, but that allows accidentally | # First try with a rather strict format, but that allows accidentally | ||||
# matching the version as part of the package name | # matching the version as part of the package name | ||||
match = re.match( | match = re.match( | ||||
r"^(?P<project_name>[a-z0-9_.]+?([-_][a-z][a-z0-9.]+?)*?)" # project name | r"^(?P<project_name>[a-z0-9_.]+?([-_][a-z][a-z0-9.]+?)*?)" # project name | ||||
r"-v?" | r"-v?" | ||||
r"([0-9]+!)?" # epoch | r"([0-9]+!)?" # epoch | ||||
r"[0-9_.]+([a-z]+[0-9]+)?" # "main" version | r"[0-9_.]+([a-z]+[0-9]+)?" # "main" version | ||||
r"([.-]?(alpha|beta|dev|post|pre|rc)(\.?[0-9]+)?)*" # development status | r"([.-]?(alpha|beta|dev|post|pre|rc)(\.?[0-9]+)?)*" # development status | ||||
r"([.-]?20[012][0-9]{5,9})?" # date | r"([.-]?20[012][0-9]{5,9})?" # date | ||||
r"([.-]g?[0-9a-f]+)?" # git commit | r"([.-]g?[0-9a-f]+)?" # git commit | ||||
r"(-py(thon)?[23](\.?[0-9]{1,2})?)?" # python version | r"([-+]py(thon)?(3k|[23](\.?[0-9]{1,2})?))?" # python version | ||||
r"\.(tar\.gz|tar\.bz2|tgz|zip)$", # extension | r"\.(tar\.gz|tar\.bz2|tgz|zip)$", # extension | ||||
filename, | filename, | ||||
re.I, | re.I, | ||||
) | ) | ||||
if match: | if match: | ||||
return match.group("project_name") | return match.group("project_name") | ||||
# If that doesn't work, give up on trying to parse version suffixes, | # If that doesn't work, give up on trying to parse version suffixes, | ||||
Show All 9 Lines | match = re.match( | ||||
r"|20[012][0-9]-[01][0-9]-[0-3][0-9]" # date as ISO 8601 | r"|20[012][0-9]-[01][0-9]-[0-3][0-9]" # date as ISO 8601 | ||||
r")" # end of "main" version | r")" # end of "main" version | ||||
r"[a-z]?(dev|pre)?" # direct version suffix | r"[a-z]?(dev|pre)?" # direct version suffix | ||||
r"([._-].*)?" # extra suffixes | r"([._-].*)?" # extra suffixes | ||||
r"\.(tar\.gz|tar\.bz2|tgz|zip)$", # extension | r"\.(tar\.gz|tar\.bz2|tgz|zip)$", # extension | ||||
filename, | filename, | ||||
re.I, | re.I, | ||||
) | ) | ||||
if match: | |||||
return match.group("project_name") | |||||
# If that still doesn't work, give one last chance if there's only one | |||||
# dash or underscore in the name | |||||
match = re.match( | |||||
r"^(?P<project_name>[^_-]+)" # project name | |||||
r"[_-][^_-]+" # version | |||||
r"\.(tar\.gz|tar\.bz2|tgz|zip)$", # extension | |||||
filename, | |||||
) | |||||
assert match, filename | assert match, filename | ||||
return match.group("project_name") | return match.group("project_name") | ||||
def cran_package_from_url(filename): | def cran_package_from_url(filename): | ||||
match = re.match( | match = re.match( | ||||
r"^https://cran\.r-project\.org/src/contrib/" | r"^https://cran\.r-project\.org/src/contrib/" | ||||
r"(?P<package_name>[a-zA-Z0-9.]+)_[0-9.-]+(\.tar\.gz)?$", | r"(?P<package_name>[a-zA-Z0-9.]+)_[0-9.-]+(\.tar\.gz)?$", | ||||
▲ Show 20 Lines • Show All 888 Lines • Show Last 20 Lines |