diff --git a/swh/lister/launchpad/lister.py b/swh/lister/launchpad/lister.py --- a/swh/lister/launchpad/lister.py +++ b/swh/lister/launchpad/lister.py @@ -93,12 +93,15 @@ """ assert self.lister_obj.id is not None + prev_origin_url = None + for repo in page: origin_url = repo.git_https_url - # filter out origins with invalid URL - if not origin_url.startswith("https://"): + # filter out origins with invalid URL or origin previously listed + # (last modified repository will be listed twice by launchpadlib) + if not origin_url.startswith("https://") or origin_url == prev_origin_url: continue last_update = repo.date_last_modified @@ -107,6 +110,8 @@ logger.debug("Found origin %s last updated on %s", origin_url, last_update) + prev_origin_url = origin_url + yield ListedOrigin( lister_id=self.lister_obj.id, visit_type="git", diff --git a/swh/lister/launchpad/tests/test_lister.py b/swh/lister/launchpad/tests/test_lister.py --- a/swh/lister/launchpad/tests/test_lister.py +++ b/swh/lister/launchpad/tests/test_lister.py @@ -154,3 +154,22 @@ assert not lister.updated assert stats.pages == 1 assert stats.origins == 0 + + +def test_launchpad_lister_duplicated_origin( + swh_scheduler, mocker, +): + origin = _Repo( + { + "git_https_url": "https://git.launchpad.net/test", + "date_last_modified": "2021-01-14 21:05:31.231406+00:00", + } + ) + origins = [origin, origin] + _mock_getRepositories(mocker, origins) + lister = LaunchpadLister(scheduler=swh_scheduler) + stats = lister.run() + + assert lister.updated + assert stats.pages == 1 + assert stats.origins == 1