Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9344378
D6270.id22700.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
2 KB
Subscribers
None
D6270.id22700.diff
View Options
diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py
--- a/swh/loader/package/pypi/loader.py
+++ b/swh/loader/package/pypi/loader.py
@@ -109,8 +109,15 @@
def get_package_info(self, version: str) -> Iterator[Tuple[str, PyPIPackageInfo]]:
res = []
for meta in self.info()["releases"][version]:
- if meta["packagetype"] != "sdist":
+ # process only standard sdist archives
+ if meta["packagetype"] != "sdist" or any(
+ [
+ meta["filename"].lower().endswith(ext)
+ for ext in (".deb", ".egg", ".rpm", ".whl")
+ ]
+ ):
continue
+
p_info = PyPIPackageInfo.from_metadata(meta)
res.append((version, p_info))
diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py
--- a/swh/loader/package/pypi/tests/test_pypi.py
+++ b/swh/loader/package/pypi/tests/test_pypi.py
@@ -848,3 +848,55 @@
# without comment_text and version in PKG-INFO, message should be empty
assert revision.message == b""
+
+
+def test_filter_out_invalid_sdists(swh_storage, requests_mock):
+ project_name = "swh-test-sdist-filtering"
+ version = "1.0.0"
+ url = f"https://pypi.org/project/{project_name}"
+ json_url = f"https://pypi.org/pypi/{project_name}/json"
+
+ common_sdist_entries = {
+ "url": "",
+ "comment_text": "",
+ "digests": {"sha256": ""},
+ "upload_time": "",
+ "packagetype": "sdist",
+ }
+
+ requests_mock.get(
+ json_url,
+ json={
+ "releases": {
+ version: [
+ {
+ **common_sdist_entries,
+ "filename": f"{project_name}-{version}.tar.gz",
+ },
+ {
+ **common_sdist_entries,
+ "filename": f"{project_name}-{version}.egg",
+ },
+ {
+ **common_sdist_entries,
+ "filename": f"{project_name}-{version}.deb",
+ },
+ {
+ **common_sdist_entries,
+ "filename": f"{project_name}-{version}.rpm",
+ },
+ {
+ **common_sdist_entries,
+ "filename": f"{project_name}-{version}.whl",
+ },
+ ]
+ },
+ },
+ )
+
+ loader = PyPILoader(swh_storage, url)
+
+ packages = list(loader.get_package_info(version=version))
+
+ assert len(packages) == 1
+ assert packages[0][1].filename.endswith(".tar.gz")
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 2:23 PM (3 d, 9 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3230789
Attached To
D6270: pypi/loader: Filter out sdist archives not of interest
Event Timeline
Log In to Comment