diff --git a/swh/lister/cgit/lister.py b/swh/lister/cgit/lister.py --- a/swh/lister/cgit/lister.py +++ b/swh/lister/cgit/lister.py @@ -69,7 +69,8 @@ def _get_and_parse(self, url: str) -> BeautifulSoup: """Get the given url and parse the retrieved HTML using BeautifulSoup""" response = self.session.get(url) - response.raise_for_status() + if not response.ok: + return None return BeautifulSoup(response.text, features="html.parser") def get_pages(self) -> Iterator[Repositories]: @@ -80,6 +81,8 @@ next_page: Optional[str] = self.url while next_page: bs_idx = self._get_and_parse(next_page) + assert bs_idx is not None + page_results = [] for tr in bs_idx.find("div", {"class": "content"}).find_all( @@ -130,6 +133,8 @@ def _get_origin_from_repository_url(self, repository_url: str) -> Optional[str]: """Extract the git url from the repository page""" bs = self._get_and_parse(repository_url) + if bs is None: + return None # origin urls are listed on the repository page # TODO check if forcing https is better or not ? diff --git a/swh/lister/cgit/tests/test_lister.py b/swh/lister/cgit/tests/test_lister.py --- a/swh/lister/cgit/tests/test_lister.py +++ b/swh/lister/cgit/tests/test_lister.py @@ -7,6 +7,7 @@ import pytest +from swh.core.pytest_plugin import requests_mock_datadir_factory from swh.lister import __version__ from swh.lister.cgit.lister import CGitLister, _parse_last_updated_date from swh.lister.pattern import ListerStats @@ -142,3 +143,20 @@ repository = {"url": "url", "last_updated_date": date_str} assert _parse_last_updated_date(repository) == expected_date + + +requests_mock_datadir_missing_url = requests_mock_datadir_factory( + ignore_urls=["https://git.tizen/cgit/adaptation/ap_samsung/audio-hal-e4x12/",] +) + + +def test_lister_cgit_get_origin_from_repo_failing( + requests_mock_datadir_missing_url, swh_scheduler +): + url = "https://git.tizen/cgit/" + lister_cgit = CGitLister(swh_scheduler, url=url) + + stats = lister_cgit.run() + + expected_nb_origins = 15 + assert stats == ListerStats(pages=3, origins=expected_nb_origins)