Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9749881
D4334.id15359.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
3 KB
Subscribers
None
D4334.id15359.diff
View Options
diff --git a/swh/storage/migrate_extrinsic_metadata.py b/swh/storage/migrate_extrinsic_metadata.py
--- a/swh/storage/migrate_extrinsic_metadata.py
+++ b/swh/storage/migrate_extrinsic_metadata.py
@@ -234,6 +234,30 @@
return match.group("project_name")
+def pypi_origin_from_filename(storage, rev_id: bytes, filename: str) -> Optional[str]:
+ project_name = pypi_project_from_filename(filename)
+ origin = f"https://pypi.org/project/{project_name}/"
+ # But unfortunately, the filename is user-provided, and doesn't
+ # necessarily match the package name on pypi. Therefore, we need
+ # to check it.
+ if not _check_revision_in_origin(storage, origin, rev_id):
+ origin_with_dashes = origin.replace("_", "-")
+ # if the file name contains underscores but we can't find
+ # a matching origin, also try with dashes. It's common for package
+ # names containing underscores to use dashes on pypi.
+ if (
+ "_" in origin
+ and check_origin_exists(storage, origin_with_dashes)
+ and _check_revision_in_origin(storage, origin_with_dashes, rev_id)
+ ):
+ origin = origin_with_dashes
+ else:
+ print(f"revision {rev_id.hex()} false positive of origin {origin}.")
+ return None
+
+ return origin
+
+
def cran_package_from_url(filename):
match = re.match(
r"^https://cran\.r-project\.org/src/contrib/"
@@ -907,39 +931,12 @@
assert len(metadata["original_artifact"]) == 1
- project_name = pypi_project_from_filename(
- metadata["original_artifact"][0]["filename"]
+ origin = pypi_origin_from_filename(
+ storage, row["id"], metadata["original_artifact"][0]["filename"]
)
- origin = f"https://pypi.org/project/{project_name}/"
- # But unfortunately, the filename is user-provided, and doesn't
- # necessarily match the package name on pypi. Therefore, we need
- # to check it.
- if not _check_revision_in_origin(storage, origin, row["id"]):
- origin_with_dashes = origin.replace("_", "-")
- # if the file name contains underscores but we can't find
- # a matching origin, also try with dashes. It's common for package
- # names containing underscores to use dashes on pypi.
- if (
- "_" in origin
- and check_origin_exists(storage, origin_with_dashes)
- and _check_revision_in_origin(
- storage, origin_with_dashes, row["id"]
- )
- ):
- origin = origin_with_dashes
- else:
- print(
- f"revision {row['id'].hex()} false positive of origin {origin}."
- )
- origin = None
if "project" in metadata:
# pypi loader format 2
-
- # same reason as above, we can't do this:
- # if metadata["project"]:
- # assert metadata["project"]["name"] == project_name
-
load_metadata(
storage,
row["id"],
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Aug 24, 6:23 PM (8 h, 10 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223649
Attached To
D4334: migrate_extrinsic_metadata: move pypi_origin_from_filename to its own function.
Event Timeline
Log In to Comment