diff --git a/swh/storage/migrate_extrinsic_metadata.py b/swh/storage/migrate_extrinsic_metadata.py --- a/swh/storage/migrate_extrinsic_metadata.py +++ b/swh/storage/migrate_extrinsic_metadata.py @@ -115,6 +115,7 @@ # not reliable, because PyPI allows arbitrary names def pypi_project_from_filename(filename): + original_filename = filename if filename.endswith(".egg"): return None elif filename == "mongomotor-0.13.0.n.tar.gz": @@ -156,6 +157,20 @@ return "Greater-than-equal-or-less-Library" elif filename.startswith("upstart--main-"): return "upstart" + elif filename == "duckduckpy0.1.tar.gz": + return "duckduckpy" + elif filename == "QUI for MPlayer snapshot_9-14-2011.zip": + return "QUI-for-MPlayer" + elif filename == "Eddy's Memory Game-1.0.zip": + return "Eddy-s-Memory-Game" + elif filename == "jekyll2nikola-0-0-1.tar.gz": + return "jekyll2nikola" + elif filename.startswith("ore.workflowed"): + return "ore.workflowed" + elif re.match("instancemanager-[0-9]*", filename): + return "instancemanager" + elif filename == "OrzMC_W&L-1.0.0.tar.gz": + return "OrzMC-W-L" filename = filename.replace(" ", "-") match = re.match( @@ -215,7 +230,7 @@ r"\.(tar\.gz|tar\.bz2|tgz|zip)$", # extension filename, ) - assert match, filename + assert match, original_filename return match.group("project_name") diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py b/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py @@ -123,6 +123,13 @@ "Greater-than-equal-or-less-Library", ), ("upstart--main-.-VLazy.object.at.0x104ba8b50-.tar.gz", "upstart"), + ("duckduckpy0.1.tar.gz", "duckduckpy"), + ("QUI for MPlayer snapshot_9-14-2011.zip", "QUI-for-MPlayer"), + ("Eddy's Memory Game-1.0.zip", "Eddy-s-Memory-Game"), + ("jekyll2nikola-0-0-1.tar.gz", "jekyll2nikola"), + ("ore.workflowed-0-6-2.tar.gz", "ore.workflowed"), + ("instancemanager-1.0rc-r34317.tar.gz", "instancemanager"), + ("OrzMC_W&L-1.0.0.tar.gz", "OrzMC-W-L"), ] for (filename, project) in files: