diff --git a/mypy.ini b/mypy.ini --- a/mypy.ini +++ b/mypy.ini @@ -36,6 +36,3 @@ [mypy-urllib3.util.*] ignore_missing_imports = True - -[mypy-xmltodict.*] -ignore_missing_imports = True diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,6 @@ python_debian requests setuptools -xmltodict iso8601 beautifulsoup4 launchpadlib diff --git a/swh/lister/pypi/lister.py b/swh/lister/pypi/lister.py --- a/swh/lister/pypi/lister.py +++ b/swh/lister/pypi/lister.py @@ -6,8 +6,8 @@ import logging from typing import Iterator, List, Optional +from bs4 import BeautifulSoup import requests -import xmltodict from swh.scheduler.interface import SchedulerInterface from swh.scheduler.model import ListedOrigin @@ -54,8 +54,9 @@ response.raise_for_status() - page_xmldict = xmltodict.parse(response.content) - page_results = [p["#text"] for p in page_xmldict["html"]["body"]["a"]] + page = BeautifulSoup(response.content, features="html.parser") + + page_results = [p.text for p in page.find_all("a")] yield page_results