diff --git a/swh/lister/github/lister.py b/swh/lister/github/lister.py --- a/swh/lister/github/lister.py +++ b/swh/lister/github/lister.py @@ -8,7 +8,7 @@ import logging import random import time -from typing import Any, Dict, Iterator, List, Optional +from typing import Any, Dict, Iterator, List, Optional, Set from urllib.parse import parse_qs, urlparse import iso8601 @@ -305,11 +305,17 @@ """ assert self.lister_obj.id is not None + seen_in_page: Set[str] = set() + for repo in page: if not repo: # null repositories in listings happen sometimes... continue + if repo["html_url"] in seen_in_page: + continue + seen_in_page.add(repo["html_url"]) + pushed_at_str = repo.get("pushed_at") pushed_at: Optional[datetime.datetime] = None if pushed_at_str: