Page MenuHomeSoftware Heritage

D5027.diff
No OneTemporary

D5027.diff

diff --git a/mypy.ini b/mypy.ini
--- a/mypy.ini
+++ b/mypy.ini
@@ -36,6 +36,3 @@
[mypy-urllib3.util.*]
ignore_missing_imports = True
-
-[mypy-xmltodict.*]
-ignore_missing_imports = True
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,6 @@
python_debian
requests
setuptools
-xmltodict
iso8601
beautifulsoup4
launchpadlib
diff --git a/swh/lister/pypi/lister.py b/swh/lister/pypi/lister.py
--- a/swh/lister/pypi/lister.py
+++ b/swh/lister/pypi/lister.py
@@ -6,8 +6,8 @@
import logging
from typing import Iterator, List, Optional
+from bs4 import BeautifulSoup
import requests
-import xmltodict
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
@@ -54,8 +54,9 @@
response.raise_for_status()
- page_xmldict = xmltodict.parse(response.content)
- page_results = [p["#text"] for p in page_xmldict["html"]["body"]["a"]]
+ page = BeautifulSoup(response.content, features="html.parser")
+
+ page_results = [p.text for p in page.find_all("a")]
yield page_results

File Metadata

Mime Type
text/plain
Expires
Dec 21 2024, 6:05 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220636

Event Timeline