diff --git a/requirements-http.txt b/requirements-http.txt
--- a/requirements-http.txt
+++ b/requirements-http.txt
@@ -6,3 +6,4 @@
 iso8601
 msgpack >= 1.0.0
 requests
+tenacity
diff --git a/swh/core/github/utils.py b/swh/core/github/utils.py
--- a/swh/core/github/utils.py
+++ b/swh/core/github/utils.py
@@ -1,17 +1,30 @@
-# Copyright (C) 2022  The Software Heritage developers
+# Copyright (C) 2020-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
+import logging
+import random
 import re
-from typing import Optional
+import time
+from typing import Dict, List, Optional
 
 import requests
+from tenacity import (
+    retry,
+    retry_any,
+    retry_if_exception_type,
+    retry_if_result,
+    wait_exponential,
+)
 
 GITHUB_PATTERN = re.compile(r"https?://github.com/(?P<user_repo>.*)")
 
 
+logger = logging.getLogger(__name__)
+
+
 def _url_github_html(user_repo: str) -> str:
     """Given the user repo, returns the expected github html url."""
     return f"https://github.com/{user_repo}"
@@ -46,3 +59,156 @@
         return None
     data = response.json()
     return data["html_url"]
+
+
+class RateLimited(Exception):
+    def __init__(self, response):
+        self.reset_time: Optional[int]
+
+        # Figure out how long we need to sleep because of that rate limit
+        ratelimit_reset = response.headers.get("X-Ratelimit-Reset")
+        retry_after = response.headers.get("Retry-After")
+        if ratelimit_reset is not None:
+            self.reset_time = int(ratelimit_reset)
+        elif retry_after is not None:
+            self.reset_time = int(time.time()) + int(retry_after) + 1
+        else:
+            logger.warning(
+                "Received a rate-limit-like status code %s, but no rate-limit "
+                "headers set. Response content: %s",
+                response.status_code,
+                response.content,
+            )
+            self.reset_time = None
+        self.response = response
+
+
+class MissingRateLimitReset(Exception):
+    pass
+
+
+class GitHubSession:
+    """Manages a :class:`requests.Session` with (optionally) multiple credentials,
+    and cycles through them when reaching rate-limits."""
+
+    credentials: Optional[List[Dict[str, str]]] = None
+
+    def __init__(
+        self, user_agent: str, credentials: Optional[List[Dict[str, str]]] = None
+    ) -> None:
+        """Initialize a requests session with the proper headers for requests to
+        GitHub."""
+        if credentials:
+            creds = credentials.copy()
+            random.shuffle(creds)
+            self.credentials = creds
+
+        self.session = requests.Session()
+
+        self.session.headers.update(
+            {"Accept": "application/vnd.github.v3+json", "User-Agent": user_agent}
+        )
+
+        self.anonymous = not self.credentials
+
+        if self.anonymous:
+            logger.warning("No tokens set in configuration, using anonymous mode")
+
+        self.token_index = -1
+        self.current_user: Optional[str] = None
+
+        if not self.anonymous:
+            # Initialize the first token value in the session headers
+            self.set_next_session_token()
+
+    def set_next_session_token(self) -> None:
+        """Update the current authentication token with the next one in line."""
+
+        assert self.credentials
+
+        self.token_index = (self.token_index + 1) % len(self.credentials)
+
+        auth = self.credentials[self.token_index]
+
+        self.current_user = auth["username"]
+        logger.debug("Using authentication token for user %s", self.current_user)
+
+        if "password" in auth:
+            token = auth["password"]
+        else:
+            token = auth["token"]
+
+        self.session.headers.update({"Authorization": f"token {token}"})
+
+    @retry(
+        wait=wait_exponential(multiplier=1, min=4, max=10),
+        retry=retry_any(
+            # ChunkedEncodingErrors happen when the TLS connection gets reset, e.g.
+            # when running the lister on a connection with high latency
+            retry_if_exception_type(requests.exceptions.ChunkedEncodingError),
+            # 502 status codes happen for a Server Error, sometimes
+            retry_if_result(lambda r: r.status_code == 502),
+        ),
+    )
+    def _request(self, url: str) -> requests.Response:
+        response = self.session.get(url)
+
+        if (
+            # GitHub returns inconsistent status codes between unauthenticated
+            # rate limit and authenticated rate limits. Handle both.
+            response.status_code == 429
+            or (self.anonymous and response.status_code == 403)
+        ):
+            raise RateLimited(response)
+
+        return response
+
+    def request(self, url) -> requests.Response:
+        """Repeatedly requests the given URL, cycling through credentials and sleeping
+        if necessary; until either a successful response or :exc:`MissingRateLimitReset`
+        """
+        # The following for/else loop handles rate limiting; if successful,
+        # it provides the rest of the function with a `response` object.
+        #
+        # If all tokens are rate-limited, we sleep until the reset time,
+        # then `continue` into another iteration of the outer while loop,
+        # attempting to get data from the same URL again.
+
+        while True:
+            max_attempts = len(self.credentials) if self.credentials else 1
+            reset_times: Dict[int, int] = {}  # token index -> time
+            for attempt in range(max_attempts):
+                try:
+                    return self._request(url)
+                except RateLimited as e:
+                    reset_info = "(unknown reset)"
+                    if e.reset_time is not None:
+                        reset_times[self.token_index] = e.reset_time
+                        reset_info = "(resetting in %ss)" % (e.reset_time - time.time())
+
+                    if not self.anonymous:
+                        logger.info(
+                            "Rate limit exhausted for current user %s %s",
+                            self.current_user,
+                            reset_info,
+                        )
+                        # Use next token in line
+                        self.set_next_session_token()
+                        # Wait one second to avoid triggering GitHub's abuse rate limits
+                        time.sleep(1)
+
+            # All tokens have been rate-limited. What do we do?
+
+            if not reset_times:
+                logger.warning(
+                    "No X-Ratelimit-Reset value found in responses for any token; "
+                    "Giving up."
+                )
+                raise MissingRateLimitReset()
+
+            sleep_time = max(reset_times.values()) - time.time() + 1
+            logger.info(
+                "Rate limits exhausted for all tokens. Sleeping for %f seconds.",
+                sleep_time,
+            )
+            time.sleep(sleep_time)
diff --git a/swh/core/tests/test_github_utils.py b/swh/core/tests/test_github_utils.py
--- a/swh/core/tests/test_github_utils.py
+++ b/swh/core/tests/test_github_utils.py
@@ -3,9 +3,15 @@
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+import logging
+import time
+from typing import Dict, Iterator, List, Optional, Union
+
 import pytest
+import requests_mock
 
 from swh.core.github.utils import (
+    GitHubSession,
     _sanitize_github_url,
     _url_github_api,
     _url_github_html,
@@ -49,3 +55,283 @@
     """It should return the input url when that origin is not a github one"""
     url = "https://example.org"
     assert get_canonical_github_origin_url(url) == url
+
+
+def fake_time_sleep(duration: float, sleep_calls: Optional[List[float]] = None):
+    """Record calls to time.sleep in the sleep_calls list"""
+    if duration < 0:
+        raise ValueError("Can't sleep for a negative amount of time!")
+    if sleep_calls is not None:
+        sleep_calls.append(duration)
+
+
+def fake_time_time():
+    """Return 0 when running time.time()"""
+    return 0
+
+
+@pytest.fixture
+def monkeypatch_sleep_calls(monkeypatch) -> Iterator[List[float]]:
+    """Monkeypatch `time.time` and `time.sleep`. Returns a list cumulating the arguments
+    passed to time.sleep()."""
+    sleeps: List[float] = []
+    monkeypatch.setattr(time, "sleep", lambda d: fake_time_sleep(d, sleeps))
+    monkeypatch.setattr(time, "time", fake_time_time)
+    yield sleeps
+
+
+@pytest.fixture()
+def num_before_ratelimit() -> int:
+    """Number of successful requests before the ratelimit hits"""
+    return 0
+
+
+@pytest.fixture()
+def num_ratelimit() -> Optional[int]:
+    """Number of rate-limited requests; None means infinity"""
+    return None
+
+
+@pytest.fixture()
+def ratelimit_reset() -> Optional[int]:
+    """Value of the X-Ratelimit-Reset header on ratelimited responses"""
+    return None
+
+
+def github_ratelimit_callback(
+    request: requests_mock.request._RequestObjectProxy,
+    context: requests_mock.response._Context,
+    ratelimit_reset: Optional[int],
+) -> Dict[str, str]:
+    """Return a rate-limited GitHub API response."""
+    # Check request headers
+    assert request.headers["Accept"] == "application/vnd.github.v3+json"
+    assert request.headers["User-Agent"] is not None
+    if "Authorization" in request.headers:
+        context.status_code = 429
+    else:
+        context.status_code = 403
+
+    if ratelimit_reset is not None:
+        context.headers["X-Ratelimit-Reset"] = str(ratelimit_reset)
+
+    return {
+        "message": "API rate limit exceeded for <IP>.",
+        "documentation_url": "https://developer.github.com/v3/#rate-limiting",
+    }
+
+
+def github_repo(i: int) -> Dict[str, Union[int, str]]:
+    """Basic repository information returned by the GitHub API"""
+
+    repo: Dict[str, Union[int, str]] = {
+        "id": i,
+        "html_url": f"https://github.com/origin/{i}",
+    }
+
+    # Set the pushed_at date on one of the origins
+    if i == 4321:
+        repo["pushed_at"] = "2018-11-08T13:16:24Z"
+
+    return repo
+
+
+HTTP_GH_API_URL = "https://api.github.com/repositories"
+
+
+def github_response_callback(
+    request: requests_mock.request._RequestObjectProxy,
+    context: requests_mock.response._Context,
+    page_size: int = 1000,
+    origin_count: int = 10000,
+) -> List[Dict[str, Union[str, int]]]:
+    """Return minimal GitHub API responses for the common case where the loader
+    hasn't been rate-limited"""
+    # Check request headers
+    assert request.headers["Accept"] == "application/vnd.github.v3+json"
+    assert request.headers["User-Agent"] is not None
+
+    # Check request parameters: per_page == 1000, since = last_repo_id
+    assert "per_page" in request.qs
+    assert request.qs["per_page"] == [str(page_size)]
+    assert "since" in request.qs
+
+    since = int(request.qs["since"][0])
+
+    next_page = since + page_size
+    if next_page < origin_count:
+        # the first id for the next page is within our origin count; add a Link
+        # header to the response
+        next_url = f"{HTTP_GH_API_URL}?per_page={page_size}&since={next_page}"
+        context.headers["Link"] = f"<{next_url}>; rel=next"
+
+    return [github_repo(i) for i in range(since + 1, min(next_page, origin_count) + 1)]
+
+
+@pytest.fixture()
+def requests_ratelimited(
+    num_before_ratelimit: int,
+    num_ratelimit: Optional[int],
+    ratelimit_reset: Optional[int],
+) -> Iterator[requests_mock.Mocker]:
+    """Mock requests to the GitHub API, returning a rate-limiting status code
+    after `num_before_ratelimit` requests.
+
+    GitHub does inconsistent rate-limiting:
+      - Anonymous requests return a 403 status code
+      - Authenticated requests return a 429 status code, with an
+        X-Ratelimit-Reset header.
+
+    This fixture takes multiple arguments (which can be overridden with a
+    :func:`pytest.mark.parametrize` parameter):
+     - num_before_ratelimit: the global number of requests until the
+       ratelimit triggers
+     - num_ratelimit: the number of requests that return a
+       rate-limited response.
+     - ratelimit_reset: the timestamp returned in X-Ratelimit-Reset if the
+       request is authenticated.
+
+    The default values set in the previous fixtures make all requests return a rate
+    limit response.
+    """
+    current_request = 0
+
+    def response_callback(request, context):
+        nonlocal current_request
+        current_request += 1
+        if num_before_ratelimit < current_request and (
+            num_ratelimit is None
+            or current_request < num_before_ratelimit + num_ratelimit + 1
+        ):
+            return github_ratelimit_callback(request, context, ratelimit_reset)
+        else:
+            return github_response_callback(request, context)
+
+    with requests_mock.Mocker() as mock:
+        mock.get(HTTP_GH_API_URL, json=response_callback)
+        yield mock
+
+
+@pytest.fixture
+def github_credentials() -> List[Dict[str, str]]:
+    """Return a static list of GitHub credentials"""
+    return sorted(
+        [{"username": f"swh{i:d}", "token": f"token-{i:d}"} for i in range(3)]
+        + [
+            {"username": f"swh-legacy{i:d}", "password": f"token-legacy-{i:d}"}
+            for i in range(3)
+        ],
+        key=lambda c: c["username"],
+    )
+
+
+@pytest.fixture
+def all_tokens(github_credentials) -> List[str]:
+    """Return the list of tokens matching the static credential"""
+
+    return [t.get("token", t.get("password")) for t in github_credentials]
+
+
+def test_github_session_anonymous_session():
+    user_agent = ("GitHub Session Test",)
+    github_session = GitHubSession(
+        user_agent=user_agent,
+    )
+    assert github_session.anonymous is True
+
+    actual_headers = github_session.session.headers
+    assert actual_headers["Accept"] == "application/vnd.github.v3+json"
+    assert actual_headers["User-Agent"] == user_agent
+
+
+@pytest.mark.parametrize(
+    "num_ratelimit", [1]  # return a single rate-limit response, then continue
+)
+def test_github_session_ratelimit_once_recovery(
+    caplog,
+    requests_ratelimited,
+    num_ratelimit,
+    monkeypatch_sleep_calls,
+    github_credentials,
+):
+    """GitHubSession should recover from hitting the rate-limit once"""
+    caplog.set_level(logging.DEBUG, "swh.core.github.utils")
+
+    github_session = GitHubSession(
+        user_agent="GitHub Session Test", credentials=github_credentials
+    )
+
+    res = github_session.request(f"{HTTP_GH_API_URL}?per_page=1000&since=10")
+    assert res.status_code == 200
+
+    token_users = []
+    for record in caplog.records:
+        if "Using authentication token" in record.message:
+            token_users.append(record.args[0])
+
+    # check that we used one more token than we saw rate limited requests
+    assert len(token_users) == 1 + num_ratelimit
+
+    # check that we slept for one second between our token uses
+    assert monkeypatch_sleep_calls == [1]
+
+
+def test_github_session_authenticated_credentials(
+    caplog, github_credentials, all_tokens
+):
+    """GitHubSession should have Authorization headers set in authenticated mode"""
+    caplog.set_level(logging.DEBUG, "swh.core.github.utils")
+
+    github_session = GitHubSession(
+        "GitHub Session Test", credentials=github_credentials
+    )
+
+    assert github_session.anonymous is False
+    assert github_session.token_index == 0
+    assert (
+        sorted(github_session.credentials, key=lambda t: t["username"])
+        == github_credentials
+    )
+    assert github_session.session.headers["Authorization"] in [
+        f"token {t}" for t in all_tokens
+    ]
+
+
+@pytest.mark.parametrize(
+    # Do 5 successful requests, return 6 ratelimits (to exhaust the credentials) with a
+    # set value for X-Ratelimit-Reset, then resume listing successfully.
+    "num_before_ratelimit, num_ratelimit, ratelimit_reset",
+    [(5, 6, 123456)],
+)
+def test_github_session_ratelimit_reset_sleep(
+    caplog,
+    requests_ratelimited,
+    monkeypatch_sleep_calls,
+    num_before_ratelimit,
+    num_ratelimit,
+    ratelimit_reset,
+    github_credentials,
+):
+    """GitHubSession should handle rate-limit with authentication tokens."""
+    caplog.set_level(logging.DEBUG, "swh.core.github.utils")
+
+    github_session = GitHubSession(
+        user_agent="GitHub Session Test", credentials=github_credentials
+    )
+
+    for _ in range(num_ratelimit):
+        github_session.request(f"{HTTP_GH_API_URL}?per_page=1000&since=10")
+
+    # We sleep 1 second every time we change credentials, then we sleep until
+    # ratelimit_reset + 1
+    expected_sleep_calls = len(github_credentials) * [1] + [ratelimit_reset + 1]
+    assert monkeypatch_sleep_calls == expected_sleep_calls
+
+    found_exhaustion_message = False
+    for record in caplog.records:
+        if record.levelname == "INFO":
+            if "Rate limits exhausted for all tokens" in record.message:
+                found_exhaustion_message = True
+                break
+
+    assert found_exhaustion_message is True