diff --git a/swh/storage/migrate_extrinsic_metadata.py b/swh/storage/migrate_extrinsic_metadata.py --- a/swh/storage/migrate_extrinsic_metadata.py +++ b/swh/storage/migrate_extrinsic_metadata.py @@ -115,14 +115,16 @@ # not reliable, because PyPI allows arbitrary names def pypi_project_from_filename(filename): - if filename == "mongomotor-0.13.0.n.tar.gz": + if filename.endswith(".egg"): + return None + elif filename == "mongomotor-0.13.0.n.tar.gz": return "mongomotor" elif re.match(r"datahaven-rev[0-9]+\.tar\.gz", filename): return "datahaven" elif re.match(r"Dtls-[0-9]\.[0-9]\.[0-9]\.sdist_with_openssl\..*", filename): return "Dtls" - elif re.match("pytz-20[0-9][0-9][a-z].tar.gz", filename): - return "pytz" + elif re.match(r"(gae)?pytz-20[0-9][0-9][a-z]\.(tar\.gz|zip)", filename): + return filename.split("-", 1)[0] elif filename.startswith(("powny-", "obedient.powny-",)): return filename.split("-")[0] elif filename.startswith("devpi-theme-16-"): @@ -150,6 +152,10 @@ return "LitReview" elif filename.startswith("django_options-r"): return "django_options" + elif filename == "Greater than, equal, or less Library-0.1.tar.gz": + return "Greater-than-equal-or-less-Library" + elif filename.startswith("upstart--main-"): + return "upstart" filename = filename.replace(" ", "-") match = re.match( @@ -171,7 +177,7 @@ r"([.-]?(alpha|beta|dev|post|pre|rc)(\.?[0-9]+)?)*" # development status r"([.-]?20[012][0-9]{5,9})?" # date r"([.-]g?[0-9a-f]+)?" # git commit - r"(-py(thon)?[23](\.?[0-9]{1,2})?)?" # python version + r"([-+]py(thon)?(3k|[23](\.?[0-9]{1,2})?))?" # python version r"\.(tar\.gz|tar\.bz2|tgz|zip)$", # extension filename, re.I, @@ -197,6 +203,18 @@ filename, re.I, ) + if match: + return match.group("project_name") + + # If that still doesn't work, give one last chance if there's only one + # dash or underscore in the name + + match = re.match( + r"^(?P[^_-]+)" # project name + r"[_-][^_-]+" # version + r"\.(tar\.gz|tar\.bz2|tgz|zip)$", # extension + filename, + ) assert match, filename return match.group("project_name") diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py b/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py @@ -114,6 +114,15 @@ ("LitReview-0.6989ev.tar.gz", "LitReview"), ("django_options-r5.tar.gz", "django_options"), ("ddlib-2013-11-07.tar.gz", "ddlib"), + ("python-morfeusz-0.3000+py3k.tar.gz", "python-morfeusz"), + ("gaepytz-2011h.zip", "gaepytz"), + ("ftldat-r3.tar.gz", "ftldat"), + ("tigretoolbox-0.0.0-py2.7-linux-x86_64.egg", None), + ( + "Greater than, equal, or less Library-0.1.tar.gz", + "Greater-than-equal-or-less-Library", + ), + ("upstart--main-.-VLazy.object.at.0x104ba8b50-.tar.gz", "upstart"), ] for (filename, project) in files: