diff --git a/swh/lister/github/lister.py b/swh/lister/github/lister.py
index ae10d71..5728727 100644
--- a/swh/lister/github/lister.py
+++ b/swh/lister/github/lister.py
@@ -1,208 +1,205 @@
 # Copyright (C) 2020-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from dataclasses import asdict, dataclass
 import datetime
 import logging
 from typing import Any, Dict, Iterator, List, Optional, Set
 from urllib.parse import parse_qs, urlparse
 
 import iso8601
 
-from swh.core.github.utils import GitHubSession, MissingRateLimitReset
+from swh.core.github.utils import MissingRateLimitReset
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
 from ..pattern import CredentialsType, Lister
 
 logger = logging.getLogger(__name__)
 
 
 @dataclass
 class GitHubListerState:
     """State of the GitHub lister"""
 
     last_seen_id: int = 0
     """Numeric id of the last repository listed on an incremental pass"""
 
 
 class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]):
     """List origins from GitHub.
 
     By default, the lister runs in incremental mode: it lists all repositories,
     starting with the `last_seen_id` stored in the scheduler backend.
 
     Providing the `first_id` and `last_id` arguments enables the "relisting" mode: in
     that mode, the lister finds the origins present in the range **excluding**
     `first_id` and **including** `last_id`. In this mode, the lister can overrun the
     `last_id`: it will always record all the origins seen in a given page. As the lister
     is fully idempotent, this is not a practical problem. Once relisting completes, the
     lister state in the scheduler backend is not updated.
 
     When the config contains a set of credentials, we shuffle this list at the beginning
     of the listing. To follow GitHub's `abuse rate limit policy`_, we keep using the
     same token over and over again, until its rate limit runs out. Once that happens, we
     switch to the next token over in our shuffled list.
 
     When a request fails with a rate limit exception for all tokens, we pause the
     listing until the largest value for X-Ratelimit-Reset over all tokens.
 
     When the credentials aren't set in the lister config, the lister can run in
     anonymous mode too (e.g. for testing purposes).
 
     .. _abuse rate limit policy: https://developer.github.com/v3/guides/best-practices-for-integrators/#dealing-with-abuse-rate-limits
 
 
     Args:
       first_id: the id of the first repo to list
       last_id: stop listing after seeing a repo with an id higher than this value.
 
     """  # noqa: B950
 
     LISTER_NAME = "github"
 
     API_URL = "https://api.github.com/repositories"
     PAGE_SIZE = 1000
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         credentials: CredentialsType = None,
         first_id: Optional[int] = None,
         last_id: Optional[int] = None,
     ):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
             url=self.API_URL,
             instance="github",
+            with_github_session=True,
         )
 
         self.first_id = first_id
         self.last_id = last_id
 
         self.relisting = self.first_id is not None or self.last_id is not None
 
-        self.github_session = GitHubSession(
-            credentials=self.credentials,
-            user_agent=str(self.session.headers["User-Agent"]),
-        )
-
     def state_from_dict(self, d: Dict[str, Any]) -> GitHubListerState:
         return GitHubListerState(**d)
 
     def state_to_dict(self, state: GitHubListerState) -> Dict[str, Any]:
         return asdict(state)
 
     def get_pages(self) -> Iterator[List[Dict[str, Any]]]:
         current_id = 0
         if self.first_id is not None:
             current_id = self.first_id
         elif self.state is not None:
             current_id = self.state.last_seen_id
 
         current_url = f"{self.API_URL}?since={current_id}&per_page={self.PAGE_SIZE}"
 
         while self.last_id is None or current_id < self.last_id:
             logger.debug("Getting page %s", current_url)
 
             try:
+                assert self.github_session is not None
                 response = self.github_session.request(current_url)
             except MissingRateLimitReset:
                 # Give up
                 break
 
             # We've successfully retrieved a (non-ratelimited) `response`. We
             # still need to check it for validity.
 
             if response.status_code != 200:
                 logger.warning(
                     "Got unexpected status_code %s: %s",
                     response.status_code,
                     response.content,
                 )
                 break
 
             yield response.json()
 
             if "next" not in response.links:
                 # No `next` link, we've reached the end of the world
                 logger.debug(
                     "No next link found in the response headers, all caught up"
                 )
                 break
 
             # GitHub strongly advises to use the next link directly. We still
             # parse it to get the id of the last repository we've reached so
             # far.
             next_url = response.links["next"]["url"]
             parsed_url = urlparse(next_url)
             if not parsed_url.query:
                 logger.warning("Failed to parse url %s", next_url)
                 break
 
             parsed_query = parse_qs(parsed_url.query)
             current_id = int(parsed_query["since"][0])
             current_url = next_url
 
     def get_origins_from_page(
         self, page: List[Dict[str, Any]]
     ) -> Iterator[ListedOrigin]:
         """Convert a page of GitHub repositories into a list of ListedOrigins.
 
         This records the html_url, as well as the pushed_at value if it exists.
         """
         assert self.lister_obj.id is not None
 
         seen_in_page: Set[str] = set()
 
         for repo in page:
             if not repo:
                 # null repositories in listings happen sometimes...
                 continue
 
             if repo["html_url"] in seen_in_page:
                 continue
             seen_in_page.add(repo["html_url"])
 
             pushed_at_str = repo.get("pushed_at")
             pushed_at: Optional[datetime.datetime] = None
             if pushed_at_str:
                 pushed_at = iso8601.parse_date(pushed_at_str)
 
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 url=repo["html_url"],
                 visit_type="git",
                 last_update=pushed_at,
             )
 
     def commit_page(self, page: List[Dict[str, Any]]):
         """Update the currently stored state using the latest listed page"""
         if self.relisting:
             # Don't update internal state when relisting
             return
 
         if not page:
             # Sometimes, when you reach the end of the world, GitHub returns an empty
             # page of repositories
             return
 
         last_id = page[-1]["id"]
 
         if last_id > self.state.last_seen_id:
             self.state.last_seen_id = last_id
 
     def finalize(self):
         if self.relisting:
             return
 
         # Pull fresh lister state from the scheduler backend
         scheduler_state = self.get_state_from_scheduler()
 
         # Update the lister state in the backend only if the last seen id of
         # the current run is higher than that stored in the database.
         if self.state.last_seen_id > scheduler_state.last_seen_id:
             self.updated = True
diff --git a/swh/lister/github/tests/test_lister.py b/swh/lister/github/tests/test_lister.py
index 88c5bf4..a09d606 100644
--- a/swh/lister/github/tests/test_lister.py
+++ b/swh/lister/github/tests/test_lister.py
@@ -1,245 +1,245 @@
 # Copyright (C) 2020-2022 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import datetime
 import logging
 from typing import Any, Dict, Iterator, List
 
 import pytest
 import requests_mock
 
 from swh.core.github.pytest_plugin import github_response_callback
 from swh.lister.github.lister import GitHubLister
 from swh.lister.pattern import CredentialsType, ListerStats
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import Lister
 
 NUM_PAGES = 10
 ORIGIN_COUNT = GitHubLister.PAGE_SIZE * NUM_PAGES
 
 
 @pytest.fixture()
 def requests_mocker() -> Iterator[requests_mock.Mocker]:
     with requests_mock.Mocker() as mock:
         mock.get(GitHubLister.API_URL, json=github_response_callback)
         yield mock
 
 
 def get_lister_data(swh_scheduler: SchedulerInterface) -> Lister:
     """Retrieve the data for the GitHub Lister"""
     return swh_scheduler.get_or_create_lister(name="github", instance_name="github")
 
 
 def set_lister_state(swh_scheduler: SchedulerInterface, state: Dict[str, Any]) -> None:
     """Set the state of the lister in database"""
     lister = swh_scheduler.get_or_create_lister(name="github", instance_name="github")
     lister.current_state = state
     swh_scheduler.update_lister(lister)
 
 
 def check_origin_4321(swh_scheduler: SchedulerInterface, lister: Lister) -> None:
     """Check that origin 4321 exists and has the proper last_update timestamp"""
     origin_4321_req = swh_scheduler.get_listed_origins(
         url="https://github.com/origin/4321"
     )
     assert len(origin_4321_req.results) == 1
     origin_4321 = origin_4321_req.results[0]
     assert origin_4321.lister_id == lister.id
     assert origin_4321.visit_type == "git"
     assert origin_4321.last_update == datetime.datetime(
         2018, 11, 8, 13, 16, 24, tzinfo=datetime.timezone.utc
     )
 
 
 def check_origin_5555(swh_scheduler: SchedulerInterface, lister: Lister) -> None:
     """Check that origin 5555 exists and has no last_update timestamp"""
     origin_5555_req = swh_scheduler.get_listed_origins(
         url="https://github.com/origin/5555"
     )
     assert len(origin_5555_req.results) == 1
     origin_5555 = origin_5555_req.results[0]
     assert origin_5555.lister_id == lister.id
     assert origin_5555.visit_type == "git"
     assert origin_5555.last_update is None
 
 
 def test_from_empty_state(
     swh_scheduler, caplog, requests_mocker: requests_mock.Mocker
 ) -> None:
     caplog.set_level(logging.DEBUG, "swh.lister.github.lister")
 
     # Run the lister in incremental mode
     lister = GitHubLister(scheduler=swh_scheduler)
     res = lister.run()
 
     assert res == ListerStats(pages=NUM_PAGES, origins=ORIGIN_COUNT)
 
     listed_origins = swh_scheduler.get_listed_origins(limit=ORIGIN_COUNT + 1)
     assert len(listed_origins.results) == ORIGIN_COUNT
     assert listed_origins.next_page_token is None
 
     lister_data = get_lister_data(swh_scheduler)
     assert lister_data.current_state == {"last_seen_id": ORIGIN_COUNT}
 
     check_origin_4321(swh_scheduler, lister_data)
     check_origin_5555(swh_scheduler, lister_data)
 
 
 def test_incremental(swh_scheduler, caplog, requests_mocker) -> None:
     caplog.set_level(logging.DEBUG, "swh.lister.github.lister")
 
     # Number of origins to skip
     skip_origins = 2000
     expected_origins = ORIGIN_COUNT - skip_origins
 
     # Bump the last_seen_id in the scheduler backend
     set_lister_state(swh_scheduler, {"last_seen_id": skip_origins})
 
     # Run the lister in incremental mode
     lister = GitHubLister(scheduler=swh_scheduler)
     res = lister.run()
 
     # add 1 page to the number of full_pages if partial_page_len is not 0
     full_pages, partial_page_len = divmod(expected_origins, GitHubLister.PAGE_SIZE)
     expected_pages = full_pages + bool(partial_page_len)
 
     assert res == ListerStats(pages=expected_pages, origins=expected_origins)
 
     listed_origins = swh_scheduler.get_listed_origins(limit=expected_origins + 1)
     assert len(listed_origins.results) == expected_origins
     assert listed_origins.next_page_token is None
 
     lister_data = get_lister_data(swh_scheduler)
     assert lister_data.current_state == {"last_seen_id": ORIGIN_COUNT}
 
     check_origin_4321(swh_scheduler, lister_data)
     check_origin_5555(swh_scheduler, lister_data)
 
 
 def test_relister(swh_scheduler, caplog, requests_mocker) -> None:
     caplog.set_level(logging.DEBUG, "swh.lister.github.lister")
 
     # Only set this state as a canary: in the currently tested mode, the lister
     # should not be touching it.
     set_lister_state(swh_scheduler, {"last_seen_id": 123})
 
     # Use "relisting" mode to list origins between id 10 and 1011
     lister = GitHubLister(scheduler=swh_scheduler, first_id=10, last_id=1011)
     res = lister.run()
 
     # Make sure we got two full pages of results
     assert res == ListerStats(pages=2, origins=2000)
 
     # Check that the relisting mode hasn't touched the stored state.
     lister_data = get_lister_data(swh_scheduler)
     assert lister_data.current_state == {"last_seen_id": 123}
 
 
 def test_anonymous_ratelimit(swh_scheduler, caplog, requests_ratelimited) -> None:
     caplog.set_level(logging.DEBUG, "swh.core.github.utils")
 
     lister = GitHubLister(scheduler=swh_scheduler)
-    assert lister.github_session.anonymous
+    assert lister.github_session is not None and lister.github_session.anonymous
     assert "using anonymous mode" in caplog.records[-1].message
     caplog.clear()
 
     res = lister.run()
     assert res == ListerStats(pages=0, origins=0)
 
     last_log = caplog.records[-1]
     assert last_log.levelname == "WARNING"
     assert "No X-Ratelimit-Reset value found in responses" in last_log.message
 
 
 @pytest.fixture
 def lister_credentials(github_credentials: List[Dict[str, str]]) -> CredentialsType:
     """Return the credentials formatted for use by the lister"""
     return {"github": {"github": github_credentials}}
 
 
 def test_authenticated_credentials(
     swh_scheduler, caplog, github_credentials, lister_credentials, all_tokens
 ):
     """Test credentials management when the lister is authenticated"""
     caplog.set_level(logging.DEBUG, "swh.lister.github.lister")
 
     lister = GitHubLister(scheduler=swh_scheduler, credentials=lister_credentials)
     assert lister.github_session.token_index == 0
     assert sorted(lister.credentials, key=lambda t: t["username"]) == github_credentials
     assert lister.github_session.session.headers["Authorization"] in [
         "token %s" % t for t in all_tokens
     ]
 
 
 @pytest.mark.parametrize(
     "num_ratelimit", [1]
 )  # return a single rate-limit response, then continue
 def test_ratelimit_once_recovery(
     swh_scheduler,
     caplog,
     requests_ratelimited,
     num_ratelimit,
     monkeypatch_sleep_calls,
     lister_credentials,
 ):
     """Check that the lister recovers from hitting the rate-limit once"""
     caplog.set_level(logging.DEBUG, "swh.core.github.utils")
 
     lister = GitHubLister(scheduler=swh_scheduler, credentials=lister_credentials)
 
     res = lister.run()
     # check that we used all the pages
     assert res == ListerStats(pages=NUM_PAGES, origins=ORIGIN_COUNT)
 
     token_users = []
     for record in caplog.records:
         if "Using authentication token" in record.message:
             token_users.append(record.args[0])
 
     # check that we used one more token than we saw rate limited requests
     assert len(token_users) == 1 + num_ratelimit
 
     # check that we slept for one second between our token uses
     assert monkeypatch_sleep_calls == [1]
 
 
 @pytest.mark.parametrize(
     # Do 5 successful requests, return 6 ratelimits (to exhaust the credentials) with a
     # set value for X-Ratelimit-Reset, then resume listing successfully.
     "num_before_ratelimit, num_ratelimit, ratelimit_reset",
     [(5, 6, 123456)],
 )
 def test_ratelimit_reset_sleep(
     swh_scheduler,
     caplog,
     requests_ratelimited,
     monkeypatch_sleep_calls,
     num_before_ratelimit,
     ratelimit_reset,
     github_credentials,
     lister_credentials,
 ):
     """Check that the lister properly handles rate-limiting when providing it with
     authentication tokens"""
     caplog.set_level(logging.DEBUG, "swh.core.github.utils")
 
     lister = GitHubLister(scheduler=swh_scheduler, credentials=lister_credentials)
 
     res = lister.run()
     assert res == ListerStats(pages=NUM_PAGES, origins=ORIGIN_COUNT)
 
     # We sleep 1 second every time we change credentials, then we sleep until
     # ratelimit_reset + 1
     expected_sleep_calls = len(github_credentials) * [1] + [ratelimit_reset + 1]
     assert monkeypatch_sleep_calls == expected_sleep_calls
 
     found_exhaustion_message = False
     for record in caplog.records:
         if record.levelname == "INFO":
             if "Rate limits exhausted for all tokens" in record.message:
                 found_exhaustion_message = True
                 break
 
     assert found_exhaustion_message
diff --git a/swh/lister/maven/lister.py b/swh/lister/maven/lister.py
index 2055b91..195a8a3 100644
--- a/swh/lister/maven/lister.py
+++ b/swh/lister/maven/lister.py
@@ -1,406 +1,403 @@
 # Copyright (C) 2021-2022 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from dataclasses import asdict, dataclass
 from datetime import datetime, timezone
 import logging
 import re
 from typing import Any, Dict, Iterator, Optional
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup
 import lxml
 import requests
 
-from swh.core.github.utils import GitHubSession
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
 from ..pattern import CredentialsType, Lister
 
 logger = logging.getLogger(__name__)
 
 RepoPage = Dict[str, Any]
 
 SUPPORTED_SCM_TYPES = ("git", "svn", "hg", "cvs", "bzr")
 
 
 @dataclass
 class MavenListerState:
     """State of the MavenLister"""
 
     last_seen_doc: int = -1
     """Last doc ID ingested during an incremental pass
 
     """
 
     last_seen_pom: int = -1
     """Last doc ID related to a pom and ingested during
        an incremental pass
 
     """
 
 
 class MavenLister(Lister[MavenListerState, RepoPage]):
     """List origins from a Maven repository.
 
     Maven Central provides artifacts for Java builds.
     It includes POM files and source archives, which we download to get
     the source code of artifacts and links to their scm repository.
 
     This lister yields origins of types: git/svn/hg or whatever the Artifacts
     use as repository type, plus maven types for the maven loader (tgz, jar)."""
 
     LISTER_NAME = "maven"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         url: str,
         index_url: str = None,
         instance: Optional[str] = None,
         credentials: CredentialsType = None,
         incremental: bool = True,
     ):
         """Lister class for Maven repositories.
 
         Args:
             url: main URL of the Maven repository, i.e. url of the base index
                 used to fetch maven artifacts. For Maven central use
                 https://repo1.maven.org/maven2/
             index_url: the URL to download the exported text indexes from.
                 Would typically be a local host running the export docker image.
                 See README.md in this directory for more information.
             instance: Name of maven instance. Defaults to url's network location
                 if unset.
             incremental: bool, defaults to True. Defines if incremental listing
                 is activated or not.
 
         """
         self.BASE_URL = url
         self.INDEX_URL = index_url
         self.incremental = incremental
 
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
             url=url,
             instance=instance,
+            with_github_session=True,
         )
 
         self.session.headers.update({"Accept": "application/json"})
 
         self.jar_origins: Dict[str, ListedOrigin] = {}
-        self.github_session = GitHubSession(
-            credentials=self.credentials,
-            user_agent=str(self.session.headers["User-Agent"]),
-        )
 
     def state_from_dict(self, d: Dict[str, Any]) -> MavenListerState:
         return MavenListerState(**d)
 
     def state_to_dict(self, state: MavenListerState) -> Dict[str, Any]:
         return asdict(state)
 
     def get_pages(self) -> Iterator[RepoPage]:
         """Retrieve and parse exported maven indexes to
         identify all pom files and src archives.
         """
 
         # Example of returned RepoPage's:
         # [
         #   {
         #     "type": "maven",
         #     "url": "https://maven.xwiki.org/..-5.4.2-sources.jar",
         #     "time": 1626109619335,
         #     "gid": "org.xwiki.platform",
         #     "aid": "xwiki-platform-wikistream-events-xwiki",
         #     "version": "5.4.2"
         #   },
         #   {
         #     "type": "scm",
         #     "url": "scm:git:git://github.com/openengsb/openengsb-framework.git",
         #     "project": "openengsb-framework",
         #   },
         #   ...
         # ]
 
         # Download the main text index file.
         logger.info("Downloading computed index from %s.", self.INDEX_URL)
         assert self.INDEX_URL is not None
         try:
             response = self.http_request(self.INDEX_URL, stream=True)
         except requests.HTTPError:
             logger.error("Index %s not found, stopping", self.INDEX_URL)
             raise
 
         # Prepare regexes to parse index exports.
 
         # Parse doc id.
         # Example line: "doc 13"
         re_doc = re.compile(r"^doc (?P<doc>\d+)$")
 
         # Parse gid, aid, version, classifier, extension.
         # Example line: "    value al.aldi|sprova4j|0.1.0|sources|jar"
         re_val = re.compile(
             r"^\s{4}value (?P<gid>[^|]+)\|(?P<aid>[^|]+)\|(?P<version>[^|]+)\|"
             + r"(?P<classifier>[^|]+)\|(?P<ext>[^|]+)$"
         )
 
         # Parse last modification time.
         # Example line: "    value jar|1626109619335|14316|2|2|0|jar"
         re_time = re.compile(
             r"^\s{4}value ([^|]+)\|(?P<mtime>[^|]+)\|([^|]+)\|([^|]+)\|([^|]+)"
             + r"\|([^|]+)\|([^|]+)$"
         )
 
         # Read file line by line and process it
         out_pom: Dict = {}
         jar_src: Dict = {}
         doc_id: int = 0
         jar_src["doc"] = None
         url_src = None
 
         iterator = response.iter_lines(chunk_size=1024)
         for line_bytes in iterator:
             # Read the index text export and get URLs and SCMs.
             line = line_bytes.decode(errors="ignore")
             m_doc = re_doc.match(line)
             if m_doc is not None:
                 doc_id = int(m_doc.group("doc"))
                 # jar_src["doc"] contains the id of the current document, whatever
                 # its type (scm or jar).
                 jar_src["doc"] = doc_id
             else:
                 m_val = re_val.match(line)
                 if m_val is not None:
                     (gid, aid, version, classifier, ext) = m_val.groups()
                     ext = ext.strip()
                     path = "/".join(gid.split("."))
                     if classifier == "NA" and ext.lower() == "pom":
                         # If incremental mode, we don't record any line that is
                         # before our last recorded doc id.
                         if (
                             self.incremental
                             and self.state
                             and self.state.last_seen_pom
                             and self.state.last_seen_pom >= doc_id
                         ):
                             continue
                         url_path = f"{path}/{aid}/{version}/{aid}-{version}.{ext}"
                         url_pom = urljoin(
                             self.BASE_URL,
                             url_path,
                         )
                         out_pom[url_pom] = doc_id
                     elif (
                         classifier.lower() == "sources" or ("src" in classifier)
                     ) and ext.lower() in ("zip", "jar"):
                         url_path = (
                             f"{path}/{aid}/{version}/{aid}-{version}-{classifier}.{ext}"
                         )
                         url_src = urljoin(self.BASE_URL, url_path)
                         jar_src["gid"] = gid
                         jar_src["aid"] = aid
                         jar_src["version"] = version
                 else:
                     m_time = re_time.match(line)
                     if m_time is not None and url_src is not None:
                         time = m_time.group("mtime")
                         jar_src["time"] = int(time)
                         artifact_metadata_d = {
                             "type": "maven",
                             "url": url_src,
                             **jar_src,
                         }
                         logger.debug(
                             "* Yielding jar %s: %s", url_src, artifact_metadata_d
                         )
                         yield artifact_metadata_d
                         url_src = None
 
         logger.info("Found %s poms.", len(out_pom))
 
         # Now fetch pom files and scan them for scm info.
 
         logger.info("Fetching poms..")
         for pom_url in out_pom:
             try:
                 response = self.http_request(pom_url)
                 parsed_pom = BeautifulSoup(response.content, "xml")
                 project = parsed_pom.find("project")
                 if project is None:
                     continue
                 scm = project.find("scm")
                 if scm is not None:
                     connection = scm.find("connection")
                     if connection is not None:
                         artifact_metadata_d = {
                             "type": "scm",
                             "doc": out_pom[pom_url],
                             "url": connection.text,
                         }
                         logger.debug(
                             "* Yielding pom %s: %s", pom_url, artifact_metadata_d
                         )
                         yield artifact_metadata_d
                     else:
                         logger.debug("No scm.connection in pom %s", pom_url)
                 else:
                     logger.debug("No scm in pom %s", pom_url)
             except requests.HTTPError:
                 logger.warning(
                     "POM info page could not be fetched, skipping project '%s'",
                     pom_url,
                 )
             except lxml.etree.Error as error:
                 logger.info("Could not parse POM %s XML: %s.", pom_url, error)
 
     def get_scm(self, page: RepoPage) -> Optional[ListedOrigin]:
         """Retrieve scm origin out of the page information. Only called when type of the
         page is scm.
 
         Try and detect an scm/vcs repository. Note that official format is in the form:
         scm:{type}:git://example.org/{user}/{repo}.git but some projects directly put
         the repo url (without the "scm:type"), so we have to check against the content
         to extract the type and url properly.
 
         Raises
             AssertionError when the type of the page is not 'scm'
 
         Returns
             ListedOrigin with proper canonical scm url (for github) if any is found,
             None otherwise.
 
         """
 
         assert page["type"] == "scm"
         visit_type: Optional[str] = None
         url: Optional[str] = None
         m_scm = re.match(r"^scm:(?P<type>[^:]+):(?P<url>.*)$", page["url"])
         if m_scm is None:
             return None
 
         scm_type = m_scm.group("type")
         if scm_type and scm_type in SUPPORTED_SCM_TYPES:
             url = m_scm.group("url")
             visit_type = scm_type
         elif page["url"].endswith(".git"):
             url = page["url"].lstrip("scm:")
             visit_type = "git"
         else:
             return None
 
         if url and visit_type == "git":
+            assert self.github_session is not None
             # Non-github urls will be returned as is, github ones will be canonical ones
             url = self.github_session.get_canonical_url(url)
 
         if not url:
             return None
 
         assert visit_type is not None
         assert self.lister_obj.id is not None
         return ListedOrigin(
             lister_id=self.lister_obj.id,
             url=url,
             visit_type=visit_type,
         )
 
     def get_origins_from_page(self, page: RepoPage) -> Iterator[ListedOrigin]:
 
         """Convert a page of Maven repositories into a list of ListedOrigins."""
         if page["type"] == "scm":
             listed_origin = self.get_scm(page)
             if listed_origin:
                 yield listed_origin
         else:
             # Origin is gathering source archives:
             last_update_dt = None
             last_update_iso = ""
             try:
                 last_update_seconds = str(page["time"])[:-3]
                 last_update_dt = datetime.fromtimestamp(int(last_update_seconds))
                 last_update_dt = last_update_dt.astimezone(timezone.utc)
             except (OverflowError, ValueError):
                 logger.warning("- Failed to convert datetime %s.", last_update_seconds)
             if last_update_dt:
                 last_update_iso = last_update_dt.isoformat()
 
             # Origin URL will target page holding sources for all versions of
             # an artifactId (package name) inside a groupId (namespace)
             path = "/".join(page["gid"].split("."))
             origin_url = urljoin(self.BASE_URL, f"{path}/{page['aid']}")
 
             artifact = {
                 **{k: v for k, v in page.items() if k != "doc"},
                 "time": last_update_iso,
                 "base_url": self.BASE_URL,
             }
 
             if origin_url not in self.jar_origins:
                 # Create ListedOrigin instance if we did not see that origin yet
                 assert self.lister_obj.id is not None
                 jar_origin = ListedOrigin(
                     lister_id=self.lister_obj.id,
                     url=origin_url,
                     visit_type=page["type"],
                     last_update=last_update_dt,
                     extra_loader_arguments={"artifacts": [artifact]},
                 )
                 self.jar_origins[origin_url] = jar_origin
             else:
                 # Update list of source artifacts for that origin otherwise
                 jar_origin = self.jar_origins[origin_url]
                 artifacts = jar_origin.extra_loader_arguments["artifacts"]
                 if artifact not in artifacts:
                     artifacts.append(artifact)
 
             if (
                 jar_origin.last_update
                 and last_update_dt
                 and last_update_dt > jar_origin.last_update
             ):
                 jar_origin.last_update = last_update_dt
 
             if not self.incremental or (
                 self.state and page["doc"] > self.state.last_seen_doc
             ):
                 # Yield origin with updated source artifacts, multiple instances of
                 # ListedOrigin for the same origin URL but with different artifacts
                 # list will be sent to the scheduler but it will deduplicate them and
                 # take the latest one to upsert in database
                 yield jar_origin
 
     def commit_page(self, page: RepoPage) -> None:
         """Update currently stored state using the latest listed doc.
 
         Note: this is a noop for full listing mode
 
         """
         if self.incremental and self.state:
             # We need to differentiate the two state counters according
             # to the type of origin.
             if page["type"] == "maven" and page["doc"] > self.state.last_seen_doc:
                 self.state.last_seen_doc = page["doc"]
             elif page["type"] == "scm" and page["doc"] > self.state.last_seen_pom:
                 self.state.last_seen_doc = page["doc"]
                 self.state.last_seen_pom = page["doc"]
 
     def finalize(self) -> None:
         """Finalize the lister state, set update if any progress has been made.
 
         Note: this is a noop for full listing mode
 
         """
         if self.incremental and self.state:
             last_seen_doc = self.state.last_seen_doc
             last_seen_pom = self.state.last_seen_pom
 
             scheduler_state = self.get_state_from_scheduler()
             if last_seen_doc and last_seen_pom:
                 if (scheduler_state.last_seen_doc < last_seen_doc) or (
                     scheduler_state.last_seen_pom < last_seen_pom
                 ):
                     self.updated = True
diff --git a/swh/lister/nixguix/lister.py b/swh/lister/nixguix/lister.py
index 0b8e8be..9ebe82e 100644
--- a/swh/lister/nixguix/lister.py
+++ b/swh/lister/nixguix/lister.py
@@ -1,576 +1,566 @@
 # Copyright (C) 2020-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 """NixGuix lister definition.
 
 This lists artifacts out of manifest for Guix or Nixpkgs manifests.
 
 Artifacts can be of types:
 - upstream git repository (NixOS/nixpkgs, Guix)
 - VCS repositories (svn, git, hg, ...)
 - unique file
 - unique tarball
 
 """
 
 import base64
 import binascii
 from dataclasses import dataclass
 from enum import Enum
 import logging
 from pathlib import Path
 import random
 import re
 from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
 from urllib.parse import parse_qsl, urlparse
 
 import requests
 from requests.exceptions import ConnectionError, InvalidSchema, SSLError
 
-from swh.core.github.utils import GitHubSession
 from swh.core.tarball import MIMETYPE_TO_ARCHIVE_FORMAT
 from swh.lister import TARBALL_EXTENSIONS
 from swh.lister.pattern import CredentialsType, StatelessLister
 from swh.scheduler.model import ListedOrigin
 
 logger = logging.getLogger(__name__)
 
 
 # By default, ignore binary files and archives containing binaries
 DEFAULT_EXTENSIONS_TO_IGNORE = [
     "AppImage",
     "bin",
     "exe",
     "iso",
     "linux64",
     "msi",
     "png",
     "dic",
     "deb",
     "rpm",
 ]
 
 
 class ArtifactNatureUndetected(ValueError):
     """Raised when a remote artifact's nature (tarball, file) cannot be detected."""
 
     pass
 
 
 class ArtifactNatureMistyped(ValueError):
     """Raised when a remote artifact is neither a tarball nor a file.
 
     Error of this type are' probably a misconfiguration in the manifest generation that
     badly typed a vcs repository.
 
     """
 
     pass
 
 
 class ArtifactWithoutExtension(ValueError):
     """Raised when an artifact nature cannot be determined by its name."""
 
     pass
 
 
 class ChecksumsComputation(Enum):
     """The possible artifact types listed out of the manifest."""
 
     STANDARD = "standard"
     """Standard checksums (e.g. sha1, sha256, ...) on the tarball or file."""
     NAR = "nar"
     """The hash is computed over the NAR archive dump of the output (e.g. uncompressed
     directory.)"""
 
 
 MAPPING_CHECKSUMS_COMPUTATION = {
     "flat": ChecksumsComputation.STANDARD,
     "recursive": ChecksumsComputation.NAR,
 }
 """Mapping between the outputHashMode from the manifest and how to compute checksums."""
 
 
 @dataclass
 class Artifact:
     """Metadata information on Remote Artifact with url (tarball or file)."""
 
     origin: str
     """Canonical url retrieve the tarball artifact."""
     visit_type: str
     """Either 'tar' or 'file' """
     fallback_urls: List[str]
     """List of urls to retrieve tarball artifact if canonical url no longer works."""
     checksums: Dict[str, str]
     """Integrity hash converted into a checksum dict."""
     checksums_computation: ChecksumsComputation
     """Checksums computation mode to provide to loaders (e.g. nar, standard, ...)"""
 
 
 @dataclass
 class VCS:
     """Metadata information on VCS."""
 
     origin: str
     """Origin url of the vcs"""
     type: str
     """Type of (d)vcs, e.g. svn, git, hg, ..."""
     ref: Optional[str] = None
     """Reference either a svn commit id, a git commit, ..."""
 
 
 class ArtifactType(Enum):
     """The possible artifact types listed out of the manifest."""
 
     ARTIFACT = "artifact"
     VCS = "vcs"
 
 
 PageResult = Tuple[ArtifactType, Union[Artifact, VCS]]
 
 
 VCS_SUPPORTED = ("git", "svn", "hg")
 
 # Rough approximation of what we can find of mimetypes for tarballs "out there"
 POSSIBLE_TARBALL_MIMETYPES = tuple(MIMETYPE_TO_ARCHIVE_FORMAT.keys())
 
 
 PATTERN_VERSION = re.compile(r"(v*[0-9]+[.])([0-9]+[.]*)+")
 
 
 def url_endswith(
     urlparsed, extensions: List[str], raise_when_no_extension: bool = True
 ) -> bool:
     """Determine whether urlparsed ends with one of the extensions passed as parameter.
 
     This also account for the edge case of a filename with only a version as name (so no
     extension in the end.)
 
     Raises:
         ArtifactWithoutExtension in case no extension is available and
         raise_when_no_extension is True (the default)
 
     """
     paths = [Path(p) for (_, p) in [("_", urlparsed.path)] + parse_qsl(urlparsed.query)]
     if raise_when_no_extension and not any(path.suffix != "" for path in paths):
         raise ArtifactWithoutExtension
     match = any(path.suffix.endswith(tuple(extensions)) for path in paths)
     if match:
         return match
     # Some false negative can happen (e.g. https://<netloc>/path/0.1.5)), so make sure
     # to catch those
     name = Path(urlparsed.path).name
     if not PATTERN_VERSION.match(name):
         return match
     if raise_when_no_extension:
         raise ArtifactWithoutExtension
     return False
 
 
 def is_tarball(urls: List[str], request: Optional[Any] = None) -> Tuple[bool, str]:
     """Determine whether a list of files actually are tarballs or simple files.
 
     When this cannot be answered simply out of the url, when request is provided, this
     executes a HTTP `HEAD` query on the url to determine the information. If request is
     not provided, this raises an ArtifactNatureUndetected exception.
 
     Args:
         urls: name of the remote files for which the extension needs to be checked.
 
     Raises:
         ArtifactNatureUndetected when the artifact's nature cannot be detected out
             of its url
         ArtifactNatureMistyped when the artifact is not a tarball nor a file. It's up to
             the caller to do what's right with it.
 
     Returns: A tuple (bool, url). The boolean represents whether the url is an archive
         or not. The second parameter is the actual url once the head request is issued
         as a fallback of not finding out whether the urls are tarballs or not.
 
     """
 
     def _is_tarball(url):
         """Determine out of an extension whether url is a tarball.
 
         Raises:
             ArtifactWithoutExtension in case no extension is available
 
         """
         urlparsed = urlparse(url)
         if urlparsed.scheme not in ("http", "https", "ftp"):
             raise ArtifactNatureMistyped(f"Mistyped artifact '{url}'")
         return url_endswith(urlparsed, TARBALL_EXTENSIONS)
 
     index = random.randrange(len(urls))
     url = urls[index]
 
     try:
         return _is_tarball(url), urls[0]
     except ArtifactWithoutExtension:
         if request is None:
             raise ArtifactNatureUndetected(
                 f"Cannot determine artifact type from url <{url}>"
             )
         logger.warning(
             "Cannot detect extension for <%s>. Fallback to http head query",
             url,
         )
 
         try:
             response = request.head(url)
         except (InvalidSchema, SSLError, ConnectionError):
             raise ArtifactNatureUndetected(
                 f"Cannot determine artifact type from url <{url}>"
             )
 
         if not response.ok or response.status_code == 404:
             raise ArtifactNatureUndetected(
                 f"Cannot determine artifact type from url <{url}>"
             )
         location = response.headers.get("Location")
         if location:  # It's not always present
             logger.debug("Location: %s", location)
             try:
                 # FIXME: location is also returned as it's considered the true origin,
                 # true enough?
                 return _is_tarball(location), location
             except ArtifactWithoutExtension:
                 logger.warning(
                     "Still cannot detect extension through location <%s>...",
                     url,
                 )
 
         origin = urls[0]
 
         content_type = response.headers.get("Content-Type")
         if content_type:
             logger.debug("Content-Type: %s", content_type)
             if content_type == "application/json":
                 return False, origin
             return content_type.startswith(POSSIBLE_TARBALL_MIMETYPES), origin
 
         content_disposition = response.headers.get("Content-Disposition")
         if content_disposition:
             logger.debug("Content-Disposition: %s", content_disposition)
             if "filename=" in content_disposition:
                 fields = content_disposition.split("; ")
                 for field in fields:
                     if "filename=" in field:
                         _, filename = field.split("filename=")
                         break
 
                 return (
                     url_endswith(
                         urlparse(filename),
                         TARBALL_EXTENSIONS,
                         raise_when_no_extension=False,
                     ),
                     origin,
                 )
 
         raise ArtifactNatureUndetected(
             f"Cannot determine artifact type from url <{url}>"
         )
 
 
 VCS_KEYS_MAPPING = {
     "git": {
         "ref": "git_ref",
         "url": "git_url",
     },
     "svn": {
         "ref": "svn_revision",
         "url": "svn_url",
     },
     "hg": {
         "ref": "hg_changeset",
         "url": "hg_url",
     },
 }
 
 
 class NixGuixLister(StatelessLister[PageResult]):
     """List Guix or Nix sources out of a public json manifest.
 
     This lister can output:
     - unique tarball (.tar.gz, .tbz2, ...)
     - vcs repositories (e.g. git, hg, svn)
     - unique file (.lisp, .py, ...)
 
     Note that no `last_update` is available in either manifest.
 
     For `url` types artifacts, this tries to determine the artifact's nature, tarball or
     file. It first tries to compute out of the "url" extension. In case of no extension,
     it fallbacks to query (HEAD) the url to retrieve the origin out of the `Location`
     response header, and then checks the extension again.
 
     Optionally, when the `extension_to_ignore` parameter is provided, it extends the
     default extensions to ignore (`DEFAULT_EXTENSIONS_TO_IGNORE`) with those passed.
     This can be used to drop further binary files detected in the wild.
 
     """
 
     LISTER_NAME = "nixguix"
 
     def __init__(
         self,
         scheduler,
         url: str,
         origin_upstream: str,
         instance: Optional[str] = None,
         credentials: Optional[CredentialsType] = None,
         # canonicalize urls, can be turned off during docker runs
         canonicalize: bool = True,
         extensions_to_ignore: List[str] = [],
         **kwargs: Any,
     ):
         super().__init__(
             scheduler=scheduler,
             url=url.rstrip("/"),
             instance=instance,
             credentials=credentials,
+            with_github_session=canonicalize,
         )
         # either full fqdn NixOS/nixpkgs or guix repository urls
         # maybe add an assert on those specific urls?
         self.origin_upstream = origin_upstream
         self.extensions_to_ignore = DEFAULT_EXTENSIONS_TO_IGNORE + extensions_to_ignore
 
         self.session = requests.Session()
-        # for testing purposes, we may want to skip this step (e.g. docker run and rate
-        # limit)
-        self.github_session = (
-            GitHubSession(
-                credentials=self.credentials,
-                user_agent=str(self.session.headers["User-Agent"]),
-            )
-            if canonicalize
-            else None
-        )
 
     def build_artifact(
         self, artifact_url: str, artifact_type: str, artifact_ref: Optional[str] = None
     ) -> Optional[Tuple[ArtifactType, VCS]]:
         """Build a canonicalized vcs artifact when possible."""
         origin = (
             self.github_session.get_canonical_url(artifact_url)
             if self.github_session
             else artifact_url
         )
         if not origin:
             return None
         return ArtifactType.VCS, VCS(
             origin=origin, type=artifact_type, ref=artifact_ref
         )
 
     def get_pages(self) -> Iterator[PageResult]:
         """Yield one page per "typed" origin referenced in manifest."""
         # fetch and parse the manifest...
         response = self.http_request(self.url)
 
         # ... if any
         raw_data = response.json()
         yield ArtifactType.VCS, VCS(origin=self.origin_upstream, type="git")
 
         # grep '"type"' guix-sources.json | sort | uniq
         #       "type": false                             <<<<<<<<< noise
         #       "type": "git",
         #       "type": "hg",
         #       "type": "no-origin",                      <<<<<<<<< noise
         #       "type": "svn",
         #       "type": "url",
 
         # grep '"type"' nixpkgs-sources-unstable.json | sort | uniq
         #  "type": "url",
 
         sources = raw_data["sources"]
         random.shuffle(sources)
 
         for artifact in sources:
             artifact_type = artifact["type"]
             if artifact_type in VCS_SUPPORTED:
                 plain_url = artifact[VCS_KEYS_MAPPING[artifact_type]["url"]]
                 plain_ref = artifact[VCS_KEYS_MAPPING[artifact_type]["ref"]]
                 built_artifact = self.build_artifact(
                     plain_url, artifact_type, plain_ref
                 )
                 if not built_artifact:
                     continue
                 yield built_artifact
             elif artifact_type == "url":
                 # It's either a tarball or a file
                 origin_urls = artifact.get("urls")
                 if not origin_urls:
                     # Nothing to fetch
                     logger.warning("Skipping url <%s>: empty artifact", artifact)
                     continue
 
                 assert origin_urls is not None
 
                 # Deal with urls with empty scheme (basic fallback to http)
                 urls = []
                 for url in origin_urls:
                     urlparsed = urlparse(url)
                     if urlparsed.scheme == "":
                         logger.warning("Missing scheme for <%s>: fallback to http", url)
                         fixed_url = f"http://{url}"
                     else:
                         fixed_url = url
                     urls.append(fixed_url)
 
                 origin, *fallback_urls = urls
 
                 if origin.endswith(".git"):
                     built_artifact = self.build_artifact(origin, "git")
                     if not built_artifact:
                         continue
                     yield built_artifact
                     continue
 
                 outputHash = artifact.get("outputHash")
                 integrity = artifact.get("integrity")
                 if integrity is None and outputHash is None:
                     logger.warning(
                         "Skipping url <%s>: missing integrity and outputHash field",
                         origin,
                     )
                     continue
 
                 # Falls back to outputHash field if integrity is missing
                 if integrity is None and outputHash:
                     # We'll deal with outputHash as integrity field
                     integrity = outputHash
 
                 try:
                     is_tar, origin = is_tarball(urls, self.session)
                 except ArtifactNatureMistyped:
                     logger.warning(
                         "Mistyped url <%s>: trying to deal with it properly", origin
                     )
                     urlparsed = urlparse(origin)
                     artifact_type = urlparsed.scheme
 
                     if artifact_type in VCS_SUPPORTED:
                         built_artifact = self.build_artifact(origin, artifact_type)
                         if not built_artifact:
                             continue
                         yield built_artifact
                     else:
                         logger.warning(
                             "Skipping url <%s>: undetected remote artifact type", origin
                         )
                     continue
                 except ArtifactNatureUndetected:
                     logger.warning(
                         "Skipping url <%s>: undetected remote artifact type", origin
                     )
                     continue
 
                 # Determine the content checksum stored in the integrity field and
                 # convert into a dict of checksums. This only parses the
                 # `hash-expression` (hash-<b64-encoded-checksum>) as defined in
                 # https://w3c.github.io/webappsec-subresource-integrity/#the-integrity-attribute
                 try:
                     chksum_algo, chksum_b64 = integrity.split("-")
                     checksums: Dict[str, str] = {
                         chksum_algo: base64.decodebytes(chksum_b64.encode()).hex()
                     }
                 except binascii.Error:
                     logger.exception(
                         "Skipping url: <%s>: integrity computation failure for <%s>",
                         url,
                         artifact,
                     )
                     continue
 
                 # The 'outputHashMode' attribute determines how the hash is computed. It
                 # must be one of the following two values:
                 # - "flat": (default) The output must be a non-executable regular file.
                 #     If it isn’t, the build fails. The hash is simply computed over the
                 #     contents of that file (so it’s equal to what Unix commands like
                 #     `sha256sum` or `sha1sum` produce).
                 # - "recursive": The hash is computed over the NAR archive dump of the
                 #       output (i.e., the result of `nix-store --dump`). In this case,
                 #       the output can be anything, including a directory tree.
                 outputHashMode = artifact.get("outputHashMode", "flat")
 
                 if not is_tar and outputHashMode == "recursive":
                     # T4608: Cannot deal with those properly yet as some can be missing
                     # 'critical' information about how to recompute the hash (e.g. fs
                     # layout, executable bit, ...)
                     logger.warning(
                         "Skipping artifact <%s>: 'file' artifact of type <%s> is"
                         " missing information to properly check its integrity",
                         artifact,
                         artifact_type,
                     )
                     continue
 
                 # At this point plenty of heuristics happened and we should have found
                 # the right origin and its nature.
 
                 # Let's check and filter it out if it is to be ignored (if possible).
                 # Some origin urls may not have extension at this point (e.g
                 # http://git.marmaro.de/?p=mmh;a=snp;h=<id>;sf=tgz), let them through.
                 if url_endswith(
                     urlparse(origin),
                     self.extensions_to_ignore,
                     raise_when_no_extension=False,
                 ):
                     logger.warning(
                         "Skipping artifact <%s>: 'file' artifact of type <%s> is"
                         " ignored due to lister configuration. It should ignore"
                         " origins with extension [%s]",
                         origin,
                         artifact_type,
                         ",".join(self.extensions_to_ignore),
                     )
                     continue
 
                 logger.debug("%s: %s", "dir" if is_tar else "cnt", origin)
                 yield ArtifactType.ARTIFACT, Artifact(
                     origin=origin,
                     fallback_urls=fallback_urls,
                     checksums=checksums,
                     checksums_computation=MAPPING_CHECKSUMS_COMPUTATION[outputHashMode],
                     visit_type="directory" if is_tar else "content",
                 )
             else:
                 logger.warning(
                     "Skipping artifact <%s>: unsupported type %s",
                     artifact,
                     artifact_type,
                 )
 
     def vcs_to_listed_origin(self, artifact: VCS) -> Iterator[ListedOrigin]:
         """Given a vcs repository, yield a ListedOrigin."""
         assert self.lister_obj.id is not None
         # FIXME: What to do with the "ref" (e.g. git/hg/svn commit, ...)
         yield ListedOrigin(
             lister_id=self.lister_obj.id,
             url=artifact.origin,
             visit_type=artifact.type,
         )
 
     def artifact_to_listed_origin(self, artifact: Artifact) -> Iterator[ListedOrigin]:
         """Given an artifact (tarball, file), yield one ListedOrigin."""
         assert self.lister_obj.id is not None
         yield ListedOrigin(
             lister_id=self.lister_obj.id,
             url=artifact.origin,
             visit_type=artifact.visit_type,
             extra_loader_arguments={
                 "checksums": artifact.checksums,
                 "checksums_computation": artifact.checksums_computation.value,
                 "fallback_urls": artifact.fallback_urls,
             },
         )
 
     def get_origins_from_page(
         self, artifact_tuple: PageResult
     ) -> Iterator[ListedOrigin]:
         """Given an artifact tuple (type, artifact), yield a ListedOrigin."""
         artifact_type, artifact = artifact_tuple
         mapping_type_fn = getattr(self, f"{artifact_type.value}_to_listed_origin")
         yield from mapping_type_fn(artifact)
diff --git a/swh/lister/packagist/lister.py b/swh/lister/packagist/lister.py
index 251c25a..e9fa296 100644
--- a/swh/lister/packagist/lister.py
+++ b/swh/lister/packagist/lister.py
@@ -1,179 +1,176 @@
 # Copyright (C) 2019-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from dataclasses import dataclass
 from datetime import datetime, timezone
 import logging
 from typing import Any, Dict, Iterator, List, Optional
 
 import iso8601
 import requests
 
-from swh.core.github.utils import GitHubSession
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
 from ..pattern import CredentialsType, Lister
 
 logger = logging.getLogger(__name__)
 
 PackagistPageType = List[str]
 
 
 @dataclass
 class PackagistListerState:
     """State of Packagist lister"""
 
     last_listing_date: Optional[datetime] = None
     """Last date when packagist lister was executed"""
 
 
 class PackagistLister(Lister[PackagistListerState, PackagistPageType]):
     """
     List all Packagist projects and send associated origins to scheduler.
 
     The lister queries the Packagist API, whose documentation can be found at
     https://packagist.org/apidoc.
 
     For each package, its metadata are retrieved using Packagist API endpoints
     whose responses are served from static files, which are guaranteed to be
     efficient on the Packagist side (no dymamic queries).
     Furthermore, subsequent listing will send the "If-Modified-Since" HTTP
     header to only retrieve packages metadata updated since the previous listing
     operation in order to save bandwidth and return only origins which might have
     new released versions.
     """
 
     LISTER_NAME = "Packagist"
     PACKAGIST_PACKAGES_LIST_URL = "https://packagist.org/packages/list.json"
     PACKAGIST_REPO_BASE_URL = "https://repo.packagist.org/p"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         credentials: CredentialsType = None,
     ):
         super().__init__(
             scheduler=scheduler,
             url=self.PACKAGIST_PACKAGES_LIST_URL,
             instance="packagist",
             credentials=credentials,
+            with_github_session=True,
         )
 
         self.session.headers.update({"Accept": "application/json"})
         self.listing_date = datetime.now().astimezone(tz=timezone.utc)
-        self.github_session = GitHubSession(
-            credentials=self.credentials,
-            user_agent=str(self.session.headers["User-Agent"]),
-        )
 
     def state_from_dict(self, d: Dict[str, Any]) -> PackagistListerState:
         last_listing_date = d.get("last_listing_date")
         if last_listing_date is not None:
             d["last_listing_date"] = iso8601.parse_date(last_listing_date)
         return PackagistListerState(**d)
 
     def state_to_dict(self, state: PackagistListerState) -> Dict[str, Any]:
         d: Dict[str, Optional[str]] = {"last_listing_date": None}
         last_listing_date = state.last_listing_date
         if last_listing_date is not None:
             d["last_listing_date"] = last_listing_date.isoformat()
         return d
 
     def api_request(self, url: str) -> Any:
         response = self.http_request(url)
         # response is empty when status code is 304
         return response.json() if response.status_code == 200 else {}
 
     def get_pages(self) -> Iterator[PackagistPageType]:
         """
         Yield a single page listing all Packagist projects.
         """
         yield self.api_request(self.PACKAGIST_PACKAGES_LIST_URL)["packageNames"]
 
     def get_origins_from_page(self, page: PackagistPageType) -> Iterator[ListedOrigin]:
         """
         Iterate on all Packagist projects and yield ListedOrigin instances.
         """
         assert self.lister_obj.id is not None
 
         # save some bandwidth by only getting packages metadata updated since
         # last listing
         if self.state.last_listing_date is not None:
             if_modified_since = self.state.last_listing_date.strftime(
                 "%a, %d %b %Y %H:%M:%S GMT"
             )
             self.session.headers["If-Modified-Since"] = if_modified_since
 
         # to ensure origins will not be listed multiple times
         origin_urls = set()
 
         for package_name in page:
             try:
                 metadata = self.api_request(
                     f"{self.PACKAGIST_REPO_BASE_URL}/{package_name}.json"
                 )
                 if not metadata.get("packages", {}):
                     # package metadata not updated since last listing
                     continue
                 if package_name not in metadata["packages"]:
                     # missing package metadata in response
                     continue
                 versions_info = metadata["packages"][package_name].values()
             except requests.HTTPError:
                 # error when getting package metadata (usually 404 when a
                 # package has been removed), skip it and process next package
                 continue
 
             origin_url = None
             visit_type = None
             last_update = None
 
             # extract origin url for package, vcs type and latest release date
             for version_info in versions_info:
                 origin_url = version_info.get("source", {}).get("url", "")
                 if not origin_url:
                     continue
                 # can be git, hg or svn
                 visit_type = version_info.get("source", {}).get("type", "")
                 dist_time_str = version_info.get("time", "")
                 if not dist_time_str:
                     continue
                 dist_time = iso8601.parse_date(dist_time_str)
                 if last_update is None or dist_time > last_update:
                     last_update = dist_time
 
             # skip package with already seen origin url or with missing required info
             if visit_type is None or origin_url is None or origin_url in origin_urls:
                 continue
 
             if visit_type == "git":
                 # Non-github urls will be returned as is, github ones will be canonical
                 # ones
+                assert self.github_session is not None
                 origin_url = (
                     self.github_session.get_canonical_url(origin_url) or origin_url
                 )
 
             # bitbucket closed its mercurial hosting service, those origins can not be
             # loaded into the archive anymore
             if visit_type == "hg" and origin_url.startswith("https://bitbucket.org/"):
                 continue
 
             origin_urls.add(origin_url)
 
             logger.debug(
                 "Found package %s last updated on %s", package_name, last_update
             )
 
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 url=origin_url,
                 visit_type=visit_type,
                 last_update=last_update,
             )
 
     def finalize(self) -> None:
         self.state.last_listing_date = self.listing_date
         self.updated = True
diff --git a/swh/lister/pattern.py b/swh/lister/pattern.py
index 7492683..5b3a33d 100644
--- a/swh/lister/pattern.py
+++ b/swh/lister/pattern.py
@@ -1,321 +1,332 @@
 # Copyright (C) 2020-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from __future__ import annotations
 
 from dataclasses import dataclass
 import logging
 from typing import Any, Dict, Generic, Iterable, Iterator, List, Optional, Set, TypeVar
 from urllib.parse import urlparse
 
 import requests
 from tenacity.before_sleep import before_sleep_log
 
 from swh.core.config import load_from_envvar
+from swh.core.github.utils import GitHubSession
 from swh.core.utils import grouper
 from swh.scheduler import get_scheduler, model
 from swh.scheduler.interface import SchedulerInterface
 
 from . import USER_AGENT_TEMPLATE
 from .utils import http_retry
 
 logger = logging.getLogger(__name__)
 
 
 @dataclass
 class ListerStats:
     pages: int = 0
     origins: int = 0
 
     def __add__(self, other: ListerStats) -> ListerStats:
         return self.__class__(self.pages + other.pages, self.origins + other.origins)
 
     def __iadd__(self, other: ListerStats):
         self.pages += other.pages
         self.origins += other.origins
 
     def dict(self) -> Dict[str, int]:
         return {"pages": self.pages, "origins": self.origins}
 
 
 StateType = TypeVar("StateType")
 PageType = TypeVar("PageType")
 
 BackendStateType = Dict[str, Any]
 CredentialsType = Optional[Dict[str, Dict[str, List[Dict[str, str]]]]]
 
 
 class Lister(Generic[StateType, PageType]):
     """The base class for a Software Heritage lister.
 
     A lister scrapes a page by page list of origins from an upstream (a forge, the API
     of a package manager, ...), and massages the results of that scrape into a list of
     origins that are recorded by the scheduler backend.
 
     The main loop of the lister, :meth:`run`, basically revolves around the
     :meth:`get_pages` iterator, which sets up the lister state, then yields the scrape
     results page by page. The :meth:`get_origins_from_page` method converts the pages
     into a list of :class:`model.ListedOrigin`, sent to the scheduler at every page. The
     :meth:`commit_page` method can be used to update the lister state after a page of
     origins has been recorded in the scheduler backend.
 
     The :func:`finalize` method is called at lister teardown (whether the run has
     been successful or not) to update the local :attr:`state` object before it's sent to
     the database. This method must set the :attr:`updated` attribute if an updated
     state needs to be sent to the scheduler backend. This method can call
     :func:`get_state_from_scheduler` to refresh and merge the lister state from the
     scheduler before it's finalized (and potentially minimize the risk of race
     conditions between concurrent runs of the lister).
 
     The state of the lister is serialized and deserialized from the dict stored in the
     scheduler backend, using the :meth:`state_from_dict` and :meth:`state_to_dict`
     methods.
 
     Args:
       scheduler: the instance of the Scheduler being used to register the
         origins listed by this lister
       url: a URL representing this lister, e.g. the API's base URL
       instance: the instance name, to uniquely identify this lister instance,
         if not provided the URL network location will be used
       credentials: dictionary of credentials for all listers. The first level
         identifies the :attr:`LISTER_NAME`, the second level the lister
         :attr:`instance`. The final level is a list of dicts containing the
         expected credentials for the given instance of that lister.
 
     Generic types:
       - *StateType*: concrete lister type; should usually be a :class:`dataclass` for
         stricter typing
       - *PageType*: type of scrape results; can usually be a :class:`requests.Response`,
         or a :class:`dict`
 
     """
 
     LISTER_NAME: str = ""
+    github_session: Optional[GitHubSession] = None
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         url: str,
         instance: Optional[str] = None,
         credentials: CredentialsType = None,
+        with_github_session: bool = False,
     ):
         if not self.LISTER_NAME:
             raise ValueError("Must set the LISTER_NAME attribute on Lister classes")
 
         self.url = url
         if instance is not None:
             self.instance = instance
         else:
             self.instance = urlparse(url).netloc
 
         self.scheduler = scheduler
 
         if not credentials:
             credentials = {}
         self.credentials = list(
             credentials.get(self.LISTER_NAME, {}).get(self.instance, [])
         )
 
         # store the initial state of the lister
         self.state = self.get_state_from_scheduler()
         self.updated = False
 
         self.session = requests.Session()
         # Declare the USER_AGENT is more sysadm-friendly for the forge we list
         self.session.headers.update(
             {"User-Agent": USER_AGENT_TEMPLATE % self.LISTER_NAME}
         )
+        self.github_session: Optional[GitHubSession] = (
+            GitHubSession(
+                credentials=credentials.get("github", {}).get("github", []),
+                user_agent=str(self.session.headers["User-Agent"]),
+            )
+            if with_github_session
+            else None
+        )
 
         self.recorded_origins: Set[str] = set()
 
     @http_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
     def http_request(self, url: str, method="GET", **kwargs) -> requests.Response:
 
         logger.debug("Fetching URL %s with params %s", url, kwargs.get("params"))
 
         response = self.session.request(method, url, **kwargs)
         if response.status_code not in (200, 304):
             logger.warning(
                 "Unexpected HTTP status code %s on %s: %s",
                 response.status_code,
                 response.url,
                 response.content,
             )
         response.raise_for_status()
 
         return response
 
     def run(self) -> ListerStats:
         """Run the lister.
 
         Returns:
           A counter with the number of pages and origins seen for this run
           of the lister.
 
         """
         full_stats = ListerStats()
         self.recorded_origins = set()
 
         try:
             for page in self.get_pages():
                 full_stats.pages += 1
                 origins = self.get_origins_from_page(page)
                 sent_origins = self.send_origins(origins)
                 self.recorded_origins.update(sent_origins)
                 full_stats.origins = len(self.recorded_origins)
                 self.commit_page(page)
         finally:
             self.finalize()
             if self.updated:
                 self.set_state_in_scheduler()
 
         return full_stats
 
     def get_state_from_scheduler(self) -> StateType:
         """Update the state in the current instance from the state in the scheduler backend.
 
         This updates :attr:`lister_obj`, and returns its (deserialized) current state,
         to allow for comparison with the local state.
 
         Returns:
           the state retrieved from the scheduler backend
         """
         self.lister_obj = self.scheduler.get_or_create_lister(
             name=self.LISTER_NAME, instance_name=self.instance
         )
         return self.state_from_dict(self.lister_obj.current_state)
 
     def set_state_in_scheduler(self) -> None:
         """Update the state in the scheduler backend from the state of the current
         instance.
 
         Raises:
           swh.scheduler.exc.StaleData: in case of a race condition between
             concurrent listers (from :meth:`swh.scheduler.Scheduler.update_lister`).
         """
         self.lister_obj.current_state = self.state_to_dict(self.state)
         self.lister_obj = self.scheduler.update_lister(self.lister_obj)
 
     # State management to/from the scheduler
 
     def state_from_dict(self, d: BackendStateType) -> StateType:
         """Convert the state stored in the scheduler backend (as a dict),
         to the concrete StateType for this lister."""
         raise NotImplementedError
 
     def state_to_dict(self, state: StateType) -> BackendStateType:
         """Convert the StateType for this lister to its serialization as dict for
         storage in the scheduler.
 
         Values must be JSON-compatible as that's what the backend database expects.
         """
         raise NotImplementedError
 
     def finalize(self) -> None:
         """Custom hook to finalize the lister state before returning from the main loop.
 
         This method must set :attr:`updated` if the lister has done some work.
 
         If relevant, this method can use :meth`get_state_from_scheduler` to merge the
         current lister state with the one from the scheduler backend, reducing the risk
         of race conditions if we're running concurrent listings.
 
         This method is called in a `finally` block, which means it will also run when
         the lister fails.
 
         """
         pass
 
     # Actual listing logic
 
     def get_pages(self) -> Iterator[PageType]:
         """Retrieve a list of pages of listed results. This is the main loop of the lister.
 
         Returns:
           an iterator of raw pages fetched from the platform currently being listed.
         """
         raise NotImplementedError
 
     def get_origins_from_page(self, page: PageType) -> Iterator[model.ListedOrigin]:
         """Extract a list of :class:`model.ListedOrigin` from a raw page of results.
 
         Args:
           page: a single page of results
         Returns:
           an iterator for the origins present on the given page of results
         """
         raise NotImplementedError
 
     def commit_page(self, page: PageType) -> None:
         """Custom hook called after the current page has been committed in the scheduler
         backend.
 
         This method can be used to update the state after a page of origins has been
         successfully recorded in the scheduler backend. If the new state should be
         recorded at the point the lister completes, the :attr:`updated` attribute must
         be set.
 
         """
         pass
 
     def send_origins(self, origins: Iterable[model.ListedOrigin]) -> List[str]:
         """Record a list of :class:`model.ListedOrigin` in the scheduler.
 
         Returns:
           the list of origin URLs recorded in scheduler database
         """
         recorded_origins = []
         for batch_origins in grouper(origins, n=1000):
             ret = self.scheduler.record_listed_origins(batch_origins)
             recorded_origins += [origin.url for origin in ret]
 
         return recorded_origins
 
     @classmethod
     def from_config(cls, scheduler: Dict[str, Any], **config: Any):
         """Instantiate a lister from a configuration dict.
 
         This is basically a backwards-compatibility shim for the CLI.
 
         Args:
           scheduler: instantiation config for the scheduler
           config: the configuration dict for the lister, with the following keys:
             - credentials (optional): credentials list for the scheduler
             - any other kwargs passed to the lister.
 
         Returns:
           the instantiated lister
         """
         # Drop the legacy config keys which aren't used for this generation of listers.
         for legacy_key in ("storage", "lister", "celery"):
             config.pop(legacy_key, None)
 
         # Instantiate the scheduler
         scheduler_instance = get_scheduler(**scheduler)
 
         return cls(scheduler=scheduler_instance, **config)
 
     @classmethod
     def from_configfile(cls, **kwargs: Any):
         """Instantiate a lister from the configuration loaded from the
         SWH_CONFIG_FILENAME envvar, with potential extra keyword arguments
         if their value is not None.
 
         Args:
             kwargs: kwargs passed to the lister instantiation
         """
         config = dict(load_from_envvar())
         config.update({k: v for k, v in kwargs.items() if v is not None})
         return cls.from_config(**config)
 
 
 class StatelessLister(Lister[None, PageType], Generic[PageType]):
     def state_from_dict(self, d: BackendStateType) -> None:
         """Always return empty state"""
         return None
 
     def state_to_dict(self, state: None) -> BackendStateType:
         """Always set empty state"""
         return {}