Page MenuHomeSoftware Heritage

D6270.id22700.diff
No OneTemporary

D6270.id22700.diff

diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py
--- a/swh/loader/package/pypi/loader.py
+++ b/swh/loader/package/pypi/loader.py
@@ -109,8 +109,15 @@
def get_package_info(self, version: str) -> Iterator[Tuple[str, PyPIPackageInfo]]:
res = []
for meta in self.info()["releases"][version]:
- if meta["packagetype"] != "sdist":
+ # process only standard sdist archives
+ if meta["packagetype"] != "sdist" or any(
+ [
+ meta["filename"].lower().endswith(ext)
+ for ext in (".deb", ".egg", ".rpm", ".whl")
+ ]
+ ):
continue
+
p_info = PyPIPackageInfo.from_metadata(meta)
res.append((version, p_info))
diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py
--- a/swh/loader/package/pypi/tests/test_pypi.py
+++ b/swh/loader/package/pypi/tests/test_pypi.py
@@ -848,3 +848,55 @@
# without comment_text and version in PKG-INFO, message should be empty
assert revision.message == b""
+
+
+def test_filter_out_invalid_sdists(swh_storage, requests_mock):
+ project_name = "swh-test-sdist-filtering"
+ version = "1.0.0"
+ url = f"https://pypi.org/project/{project_name}"
+ json_url = f"https://pypi.org/pypi/{project_name}/json"
+
+ common_sdist_entries = {
+ "url": "",
+ "comment_text": "",
+ "digests": {"sha256": ""},
+ "upload_time": "",
+ "packagetype": "sdist",
+ }
+
+ requests_mock.get(
+ json_url,
+ json={
+ "releases": {
+ version: [
+ {
+ **common_sdist_entries,
+ "filename": f"{project_name}-{version}.tar.gz",
+ },
+ {
+ **common_sdist_entries,
+ "filename": f"{project_name}-{version}.egg",
+ },
+ {
+ **common_sdist_entries,
+ "filename": f"{project_name}-{version}.deb",
+ },
+ {
+ **common_sdist_entries,
+ "filename": f"{project_name}-{version}.rpm",
+ },
+ {
+ **common_sdist_entries,
+ "filename": f"{project_name}-{version}.whl",
+ },
+ ]
+ },
+ },
+ )
+
+ loader = PyPILoader(swh_storage, url)
+
+ packages = list(loader.get_package_info(version=version))
+
+ assert len(packages) == 1
+ assert packages[0][1].filename.endswith(".tar.gz")

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 2:23 PM (3 d, 9 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3230789

Event Timeline