diff --git a/swh/lister/__init__.py b/swh/lister/__init__.py
index f4448d8..be53d8b 100644
--- a/swh/lister/__init__.py
+++ b/swh/lister/__init__.py
@@ -1,61 +1,62 @@
 # Copyright (C) 2018-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import logging
 
 import pkg_resources
 
 logger = logging.getLogger(__name__)
 
 
 try:
     __version__ = pkg_resources.get_distribution("swh.lister").version
 except pkg_resources.DistributionNotFound:
     __version__ = "devel"
 
-USER_AGENT_TEMPLATE = "Software Heritage Lister (%s)"
-USER_AGENT = USER_AGENT_TEMPLATE % __version__
-
+USER_AGENT_TEMPLATE = (
+    f"Software Heritage %s lister v{__version__}"
+    " (+https://www.softwareheritage.org/contact)"
+)
 
 LISTERS = {
     entry_point.name.split(".", 1)[1]: entry_point
     for entry_point in pkg_resources.iter_entry_points("swh.workers")
     if entry_point.name.split(".", 1)[0] == "lister"
 }
 
 
 SUPPORTED_LISTERS = list(LISTERS)
 
 
 def get_lister(lister_name, db_url=None, **conf):
     """Instantiate a lister given its name.
 
     Args:
         lister_name (str): Lister's name
         conf (dict): Configuration dict (lister db cnx, policy, priority...)
 
     Returns:
         Tuple (instantiated lister, drop_tables function, init schema function,
         insert minimum data function)
 
     """
     if lister_name not in LISTERS:
         raise ValueError(
             "Invalid lister %s: only supported listers are %s"
             % (lister_name, SUPPORTED_LISTERS)
         )
     if db_url:
         conf["lister"] = {"cls": "local", "args": {"db": db_url}}
 
     registry_entry = LISTERS[lister_name].load()()
     lister_cls = registry_entry["lister"]
 
     from swh.lister import pattern
 
     if issubclass(lister_cls, pattern.Lister):
         return lister_cls.from_config(**conf)
     else:
         # Old-style lister
         return lister_cls(override_config=conf)
diff --git a/swh/lister/cgit/tests/test_lister.py b/swh/lister/cgit/tests/test_lister.py
index 9d0f123..c6ffcf2 100644
--- a/swh/lister/cgit/tests/test_lister.py
+++ b/swh/lister/cgit/tests/test_lister.py
@@ -1,280 +1,280 @@
 # Copyright (C) 2019-2022 The Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from datetime import datetime, timedelta, timezone
 import os
 from typing import List
 
 import pytest
 
 from swh.core.pytest_plugin import requests_mock_datadir_factory
 from swh.lister import __version__
 from swh.lister.cgit.lister import CGitLister, _parse_last_updated_date
 from swh.lister.pattern import ListerStats
 
 
 def test_lister_cgit_get_pages_one_page(requests_mock_datadir, swh_scheduler):
     url = "https://git.savannah.gnu.org/cgit/"
     lister_cgit = CGitLister(swh_scheduler, url=url)
 
     repos: List[List[str]] = list(lister_cgit.get_pages())
     flattened_repos = sum(repos, [])
     assert len(flattened_repos) == 977
 
     assert flattened_repos[0]["url"] == "https://git.savannah.gnu.org/cgit/elisp-es.git"
     # note the url below is NOT a subpath of /cgit/
     assert (
         flattened_repos[-1]["url"] == "https://git.savannah.gnu.org/path/to/yetris.git"
     )  # noqa
     # note the url below is NOT on the same server
     assert flattened_repos[-2]["url"] == "http://example.org/cgit/xstarcastle.git"
 
 
 def test_lister_cgit_get_pages_with_pages(requests_mock_datadir, swh_scheduler):
     url = "https://git.tizen/cgit/"
     lister_cgit = CGitLister(swh_scheduler, url=url)
 
     repos: List[List[str]] = list(lister_cgit.get_pages())
     flattened_repos = sum(repos, [])
     # we should have 16 repos (listed on 3 pages)
     assert len(repos) == 3
     assert len(flattened_repos) == 16
 
 
 def test_lister_cgit_run_with_page(requests_mock_datadir, swh_scheduler):
     """cgit lister supports pagination"""
 
     url = "https://git.tizen/cgit/"
     lister_cgit = CGitLister(swh_scheduler, url=url)
 
     stats = lister_cgit.run()
 
     expected_nb_origins = 16
     assert stats == ListerStats(pages=3, origins=expected_nb_origins)
 
     # test page parsing
     scheduler_origins = swh_scheduler.get_listed_origins(
         lister_cgit.lister_obj.id
     ).results
     assert len(scheduler_origins) == expected_nb_origins
 
     # test listed repositories
     for listed_origin in scheduler_origins:
         assert listed_origin.visit_type == "git"
         assert listed_origin.url.startswith("https://git.tizen")
 
     # test user agent content
     assert len(requests_mock_datadir.request_history) != 0
     for request in requests_mock_datadir.request_history:
         assert "User-Agent" in request.headers
         user_agent = request.headers["User-Agent"]
-        assert "Software Heritage Lister" in user_agent
+        assert "Software Heritage cgit lister" in user_agent
         assert __version__ in user_agent
 
 
 def test_lister_cgit_run_populates_last_update(requests_mock_datadir, swh_scheduler):
     """cgit lister returns last updated date"""
 
     url = "https://git.tizen/cgit"
 
     urls_without_date = [
         f"https://git.tizen.org/cgit/{suffix_url}"
         for suffix_url in [
             "All-Projects",
             "All-Users",
             "Lock-Projects",
         ]
     ]
 
     lister_cgit = CGitLister(swh_scheduler, url=url)
 
     stats = lister_cgit.run()
 
     expected_nb_origins = 16
     assert stats == ListerStats(pages=3, origins=expected_nb_origins)
 
     # test page parsing
     scheduler_origins = swh_scheduler.get_listed_origins(
         lister_cgit.lister_obj.id
     ).results
     assert len(scheduler_origins) == expected_nb_origins
 
     # test listed repositories
     for listed_origin in scheduler_origins:
         if listed_origin.url in urls_without_date:
             assert listed_origin.last_update is None
         else:
             assert listed_origin.last_update is not None
 
 
 @pytest.mark.parametrize(
     "date_str,expected_date",
     [
         ({}, None),
         ("unexpected date", None),
         ("2020-0140-10 10:10:10 (GMT)", None),
         (
             "2020-01-10 10:10:10 (GMT)",
             datetime(
                 year=2020,
                 month=1,
                 day=10,
                 hour=10,
                 minute=10,
                 second=10,
                 tzinfo=timezone.utc,
             ),
         ),
         (
             "2019-08-04 05:10:41 +0100",
             datetime(
                 year=2019,
                 month=8,
                 day=4,
                 hour=5,
                 minute=10,
                 second=41,
                 tzinfo=timezone(timedelta(hours=1)),
             ),
         ),
     ],
 )
 def test_lister_cgit_date_parsing(date_str, expected_date):
     """test cgit lister date parsing"""
 
     repository = {"url": "url", "last_updated_date": date_str}
 
     assert _parse_last_updated_date(repository) == expected_date
 
 
 requests_mock_datadir_missing_url = requests_mock_datadir_factory(
     ignore_urls=[
         "https://git.tizen/cgit/adaptation/ap_samsung/audio-hal-e4x12",
     ]
 )
 
 
 def test_lister_cgit_get_origin_from_repo_failing(
     requests_mock_datadir_missing_url, swh_scheduler
 ):
     url = "https://git.tizen/cgit/"
     lister_cgit = CGitLister(swh_scheduler, url=url)
 
     stats = lister_cgit.run()
 
     expected_nb_origins = 15
     assert stats == ListerStats(pages=3, origins=expected_nb_origins)
 
 
 @pytest.mark.parametrize(
     "credentials, expected_credentials",
     [
         (None, []),
         ({"key": "value"}, []),
         (
             {"cgit": {"tizen": [{"username": "user", "password": "pass"}]}},
             [{"username": "user", "password": "pass"}],
         ),
     ],
 )
 def test_lister_cgit_instantiation_with_credentials(
     credentials, expected_credentials, swh_scheduler
 ):
     url = "https://git.tizen/cgit/"
     lister = CGitLister(
         swh_scheduler, url=url, instance="tizen", credentials=credentials
     )
 
     # Credentials are allowed in constructor
     assert lister.credentials == expected_credentials
 
 
 def test_lister_cgit_from_configfile(swh_scheduler_config, mocker):
     load_from_envvar = mocker.patch("swh.lister.pattern.load_from_envvar")
     load_from_envvar.return_value = {
         "scheduler": {"cls": "local", **swh_scheduler_config},
         "url": "https://git.tizen/cgit/",
         "instance": "tizen",
         "credentials": {},
     }
     lister = CGitLister.from_configfile()
     assert lister.scheduler is not None
     assert lister.credentials is not None
 
 
 @pytest.mark.parametrize(
     "url,base_git_url,expected_nb_origins",
     [
         ("https://git.eclipse.org/c", "https://eclipse.org/r", 5),
         ("https://git.baserock.org/cgit/", "https://git.baserock.org/git/", 3),
         ("https://jff.email/cgit/", "git://jff.email/opt/git/", 6),
     ],
 )
 def test_lister_cgit_with_base_git_url(
     url, base_git_url, expected_nb_origins, requests_mock_datadir, swh_scheduler
 ):
     """With base git url provided, listed urls should be the computed origin urls"""
     lister_cgit = CGitLister(
         swh_scheduler,
         url=url,
         base_git_url=base_git_url,
     )
 
     stats = lister_cgit.run()
 
     assert stats == ListerStats(pages=1, origins=expected_nb_origins)
 
     # test page parsing
     scheduler_origins = swh_scheduler.get_listed_origins(
         lister_cgit.lister_obj.id
     ).results
     assert len(scheduler_origins) == expected_nb_origins
 
     # test listed repositories
     for listed_origin in scheduler_origins:
         assert listed_origin.visit_type == "git"
         assert listed_origin.url.startswith(base_git_url)
         assert (
             listed_origin.url.startswith(url) is False
         ), f"url should be mapped to {base_git_url}"
 
 
 def test_lister_cgit_get_pages_with_pages_and_retry(
     requests_mock_datadir, requests_mock, datadir, mocker, swh_scheduler
 ):
     url = "https://git.tizen/cgit/"
 
     with open(os.path.join(datadir, "https_git.tizen/cgit,ofs=50"), "rb") as page:
 
         requests_mock.get(
             f"{url}?ofs=50",
             [
                 {"content": None, "status_code": 429},
                 {"content": None, "status_code": 429},
                 {"content": page.read(), "status_code": 200},
             ],
         )
 
         lister_cgit = CGitLister(swh_scheduler, url=url)
 
         mocker.patch.object(lister_cgit.http_request.retry, "sleep")
 
         repos: List[List[str]] = list(lister_cgit.get_pages())
         flattened_repos = sum(repos, [])
         # we should have 16 repos (listed on 3 pages)
         assert len(repos) == 3
         assert len(flattened_repos) == 16
 
 
 def test_lister_cgit_summary_not_default(requests_mock_datadir, swh_scheduler):
     """cgit lister returns git url when the default repository tab is not the summary"""
 
     url = "https://git.acdw.net/cgit"
 
     lister_cgit = CGitLister(swh_scheduler, url=url)
 
     stats = lister_cgit.run()
 
     expected_nb_origins = 1
     assert stats == ListerStats(pages=1, origins=expected_nb_origins)
diff --git a/swh/lister/github/lister.py b/swh/lister/github/lister.py
index acef224..ae10d71 100644
--- a/swh/lister/github/lister.py
+++ b/swh/lister/github/lister.py
@@ -1,208 +1,208 @@
 # Copyright (C) 2020-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from dataclasses import asdict, dataclass
 import datetime
 import logging
 from typing import Any, Dict, Iterator, List, Optional, Set
 from urllib.parse import parse_qs, urlparse
 
 import iso8601
 
 from swh.core.github.utils import GitHubSession, MissingRateLimitReset
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
-from .. import USER_AGENT
 from ..pattern import CredentialsType, Lister
 
 logger = logging.getLogger(__name__)
 
 
 @dataclass
 class GitHubListerState:
     """State of the GitHub lister"""
 
     last_seen_id: int = 0
     """Numeric id of the last repository listed on an incremental pass"""
 
 
 class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]):
     """List origins from GitHub.
 
     By default, the lister runs in incremental mode: it lists all repositories,
     starting with the `last_seen_id` stored in the scheduler backend.
 
     Providing the `first_id` and `last_id` arguments enables the "relisting" mode: in
     that mode, the lister finds the origins present in the range **excluding**
     `first_id` and **including** `last_id`. In this mode, the lister can overrun the
     `last_id`: it will always record all the origins seen in a given page. As the lister
     is fully idempotent, this is not a practical problem. Once relisting completes, the
     lister state in the scheduler backend is not updated.
 
     When the config contains a set of credentials, we shuffle this list at the beginning
     of the listing. To follow GitHub's `abuse rate limit policy`_, we keep using the
     same token over and over again, until its rate limit runs out. Once that happens, we
     switch to the next token over in our shuffled list.
 
     When a request fails with a rate limit exception for all tokens, we pause the
     listing until the largest value for X-Ratelimit-Reset over all tokens.
 
     When the credentials aren't set in the lister config, the lister can run in
     anonymous mode too (e.g. for testing purposes).
 
     .. _abuse rate limit policy: https://developer.github.com/v3/guides/best-practices-for-integrators/#dealing-with-abuse-rate-limits
 
 
     Args:
       first_id: the id of the first repo to list
       last_id: stop listing after seeing a repo with an id higher than this value.
 
     """  # noqa: B950
 
     LISTER_NAME = "github"
 
     API_URL = "https://api.github.com/repositories"
     PAGE_SIZE = 1000
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         credentials: CredentialsType = None,
         first_id: Optional[int] = None,
         last_id: Optional[int] = None,
     ):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
             url=self.API_URL,
             instance="github",
         )
 
         self.first_id = first_id
         self.last_id = last_id
 
         self.relisting = self.first_id is not None or self.last_id is not None
 
         self.github_session = GitHubSession(
-            credentials=self.credentials, user_agent=USER_AGENT
+            credentials=self.credentials,
+            user_agent=str(self.session.headers["User-Agent"]),
         )
 
     def state_from_dict(self, d: Dict[str, Any]) -> GitHubListerState:
         return GitHubListerState(**d)
 
     def state_to_dict(self, state: GitHubListerState) -> Dict[str, Any]:
         return asdict(state)
 
     def get_pages(self) -> Iterator[List[Dict[str, Any]]]:
         current_id = 0
         if self.first_id is not None:
             current_id = self.first_id
         elif self.state is not None:
             current_id = self.state.last_seen_id
 
         current_url = f"{self.API_URL}?since={current_id}&per_page={self.PAGE_SIZE}"
 
         while self.last_id is None or current_id < self.last_id:
             logger.debug("Getting page %s", current_url)
 
             try:
                 response = self.github_session.request(current_url)
             except MissingRateLimitReset:
                 # Give up
                 break
 
             # We've successfully retrieved a (non-ratelimited) `response`. We
             # still need to check it for validity.
 
             if response.status_code != 200:
                 logger.warning(
                     "Got unexpected status_code %s: %s",
                     response.status_code,
                     response.content,
                 )
                 break
 
             yield response.json()
 
             if "next" not in response.links:
                 # No `next` link, we've reached the end of the world
                 logger.debug(
                     "No next link found in the response headers, all caught up"
                 )
                 break
 
             # GitHub strongly advises to use the next link directly. We still
             # parse it to get the id of the last repository we've reached so
             # far.
             next_url = response.links["next"]["url"]
             parsed_url = urlparse(next_url)
             if not parsed_url.query:
                 logger.warning("Failed to parse url %s", next_url)
                 break
 
             parsed_query = parse_qs(parsed_url.query)
             current_id = int(parsed_query["since"][0])
             current_url = next_url
 
     def get_origins_from_page(
         self, page: List[Dict[str, Any]]
     ) -> Iterator[ListedOrigin]:
         """Convert a page of GitHub repositories into a list of ListedOrigins.
 
         This records the html_url, as well as the pushed_at value if it exists.
         """
         assert self.lister_obj.id is not None
 
         seen_in_page: Set[str] = set()
 
         for repo in page:
             if not repo:
                 # null repositories in listings happen sometimes...
                 continue
 
             if repo["html_url"] in seen_in_page:
                 continue
             seen_in_page.add(repo["html_url"])
 
             pushed_at_str = repo.get("pushed_at")
             pushed_at: Optional[datetime.datetime] = None
             if pushed_at_str:
                 pushed_at = iso8601.parse_date(pushed_at_str)
 
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 url=repo["html_url"],
                 visit_type="git",
                 last_update=pushed_at,
             )
 
     def commit_page(self, page: List[Dict[str, Any]]):
         """Update the currently stored state using the latest listed page"""
         if self.relisting:
             # Don't update internal state when relisting
             return
 
         if not page:
             # Sometimes, when you reach the end of the world, GitHub returns an empty
             # page of repositories
             return
 
         last_id = page[-1]["id"]
 
         if last_id > self.state.last_seen_id:
             self.state.last_seen_id = last_id
 
     def finalize(self):
         if self.relisting:
             return
 
         # Pull fresh lister state from the scheduler backend
         scheduler_state = self.get_state_from_scheduler()
 
         # Update the lister state in the backend only if the last seen id of
         # the current run is higher than that stored in the database.
         if self.state.last_seen_id > scheduler_state.last_seen_id:
             self.updated = True
diff --git a/swh/lister/gitlab/tests/test_lister.py b/swh/lister/gitlab/tests/test_lister.py
index 80650b8..6bbffcd 100644
--- a/swh/lister/gitlab/tests/test_lister.py
+++ b/swh/lister/gitlab/tests/test_lister.py
@@ -1,357 +1,358 @@
-# Copyright (C) 2017-2021 The Software Heritage developers
+# Copyright (C) 2017-2022 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+import functools
 import json
 import logging
 from pathlib import Path
 from typing import Dict, List
 
 import pytest
 from requests.status_codes import codes
 
-from swh.lister import USER_AGENT
+from swh.lister import USER_AGENT_TEMPLATE
 from swh.lister.gitlab.lister import GitLabLister, _parse_id_after
 from swh.lister.pattern import ListerStats
 from swh.lister.tests.test_utils import assert_sleep_calls
 from swh.lister.utils import WAIT_EXP_BASE
 
 logger = logging.getLogger(__name__)
 
 
 def api_url(instance: str) -> str:
     return f"https://{instance}/api/v4/"
 
 
-def _match_request(request):
-    return request.headers.get("User-Agent") == USER_AGENT
+def _match_request(request, lister_name="gitlab"):
+    return request.headers.get("User-Agent") == USER_AGENT_TEMPLATE % lister_name
 
 
 def test_lister_gitlab(datadir, swh_scheduler, requests_mock):
     """Gitlab lister supports full listing"""
     instance = "gitlab.com"
     lister = GitLabLister(swh_scheduler, url=api_url(instance), instance=instance)
 
     response = gitlab_page_response(datadir, instance, 1)
 
     requests_mock.get(
         lister.page_url(),
         [{"json": response}],
         additional_matcher=_match_request,
     )
 
     listed_result = lister.run()
     expected_nb_origins = len(response)
     assert listed_result == ListerStats(pages=1, origins=expected_nb_origins)
 
     scheduler_origins = lister.scheduler.get_listed_origins(
         lister.lister_obj.id
     ).results
     assert len(scheduler_origins) == expected_nb_origins
 
     for listed_origin in scheduler_origins:
         assert listed_origin.visit_type == "git"
         assert listed_origin.url.startswith(f"https://{instance}")
         assert listed_origin.last_update is not None
 
 
 def test_lister_gitlab_heptapod(datadir, swh_scheduler, requests_mock):
     """Heptapod lister happily lists hg, hg_git as hg and git origins"""
     name = "heptapod"
     instance = "foss.heptapod.net"
     lister = GitLabLister(
         swh_scheduler, url=api_url(instance), name=name, instance=instance
     )
     assert lister.LISTER_NAME == name
 
     response = gitlab_page_response(datadir, instance, 1)
 
     requests_mock.get(
         lister.page_url(),
         [{"json": response}],
-        additional_matcher=_match_request,
+        additional_matcher=functools.partial(_match_request, lister_name="heptapod"),
     )
 
     listed_result = lister.run()
     expected_nb_origins = len(response)
 
     for entry in response:
         assert entry["vcs_type"] in ("hg", "hg_git")
 
     assert listed_result == ListerStats(pages=1, origins=expected_nb_origins)
 
     scheduler_origins = lister.scheduler.get_listed_origins(
         lister.lister_obj.id
     ).results
     assert len(scheduler_origins) == expected_nb_origins
 
     for listed_origin in scheduler_origins:
         assert listed_origin.visit_type == "hg"
         assert listed_origin.url.startswith(f"https://{instance}")
         assert listed_origin.last_update is not None
 
 
 def gitlab_page_response(datadir, instance: str, id_after: int) -> List[Dict]:
     """Return list of repositories (out of test dataset)"""
     datapath = Path(datadir, f"https_{instance}", f"api_response_page{id_after}.json")
     return json.loads(datapath.read_text()) if datapath.exists else []
 
 
 def test_lister_gitlab_with_pages(swh_scheduler, requests_mock, datadir):
     """Gitlab lister supports pagination"""
     instance = "gite.lirmm.fr"
     lister = GitLabLister(swh_scheduler, url=api_url(instance))
 
     response1 = gitlab_page_response(datadir, instance, 1)
     response2 = gitlab_page_response(datadir, instance, 2)
 
     requests_mock.get(
         lister.page_url(),
         [{"json": response1, "headers": {"Link": f"<{lister.page_url(2)}>; rel=next"}}],
         additional_matcher=_match_request,
     )
 
     requests_mock.get(
         lister.page_url(2),
         [{"json": response2}],
         additional_matcher=_match_request,
     )
 
     listed_result = lister.run()
 
     expected_nb_origins = len(response1) + len(response2)
     assert listed_result == ListerStats(pages=2, origins=expected_nb_origins)
 
     scheduler_origins = lister.scheduler.get_listed_origins(
         lister.lister_obj.id
     ).results
     assert len(scheduler_origins) == expected_nb_origins
 
     for listed_origin in scheduler_origins:
         assert listed_origin.visit_type == "git"
         assert listed_origin.url.startswith(f"https://{instance}")
         assert listed_origin.last_update is not None
 
 
 def test_lister_gitlab_incremental(swh_scheduler, requests_mock, datadir):
     """Gitlab lister supports incremental visits"""
     instance = "gite.lirmm.fr"
     url = api_url(instance)
     lister = GitLabLister(swh_scheduler, url=url, instance=instance, incremental=True)
 
     url_page1 = lister.page_url()
     response1 = gitlab_page_response(datadir, instance, 1)
     url_page2 = lister.page_url(2)
     response2 = gitlab_page_response(datadir, instance, 2)
     url_page3 = lister.page_url(3)
     response3 = gitlab_page_response(datadir, instance, 3)
 
     requests_mock.get(
         url_page1,
         [{"json": response1, "headers": {"Link": f"<{url_page2}>; rel=next"}}],
         additional_matcher=_match_request,
     )
     requests_mock.get(
         url_page2,
         [{"json": response2}],
         additional_matcher=_match_request,
     )
 
     listed_result = lister.run()
 
     expected_nb_origins = len(response1) + len(response2)
     assert listed_result == ListerStats(pages=2, origins=expected_nb_origins)
     assert lister.state.last_seen_next_link == url_page2
 
     lister2 = GitLabLister(swh_scheduler, url=url, instance=instance, incremental=True)
 
     # Lister will start back at the last stop
     requests_mock.get(
         url_page2,
         [{"json": response2, "headers": {"Link": f"<{url_page3}>; rel=next"}}],
         additional_matcher=_match_request,
     )
     requests_mock.get(
         url_page3,
         [{"json": response3}],
         additional_matcher=_match_request,
     )
 
     listed_result2 = lister2.run()
 
     assert listed_result2 == ListerStats(
         pages=2, origins=len(response2) + len(response3)
     )
     assert lister2.state.last_seen_next_link == url_page3
 
     assert lister.lister_obj.id == lister2.lister_obj.id
     scheduler_origins = lister2.scheduler.get_listed_origins(
         lister2.lister_obj.id
     ).results
 
     assert len(scheduler_origins) == len(response1) + len(response2) + len(response3)
 
     for listed_origin in scheduler_origins:
         assert listed_origin.visit_type == "git"
         assert listed_origin.url.startswith(f"https://{instance}")
         assert listed_origin.last_update is not None
 
 
 def test_lister_gitlab_rate_limit(swh_scheduler, requests_mock, datadir, mocker):
     """Gitlab lister supports rate-limit"""
     instance = "gite.lirmm.fr"
     url = api_url(instance)
     lister = GitLabLister(swh_scheduler, url=url, instance=instance)
 
     url_page1 = lister.page_url()
     response1 = gitlab_page_response(datadir, instance, 1)
     url_page2 = lister.page_url(2)
     response2 = gitlab_page_response(datadir, instance, 2)
 
     requests_mock.get(
         url_page1,
         [{"json": response1, "headers": {"Link": f"<{url_page2}>; rel=next"}}],
         additional_matcher=_match_request,
     )
     requests_mock.get(
         url_page2,
         [
             # rate limited twice
             {"status_code": codes.forbidden, "headers": {"RateLimit-Remaining": "0"}},
             {"status_code": codes.forbidden, "headers": {"RateLimit-Remaining": "0"}},
             # ok
             {"json": response2},
         ],
         additional_matcher=_match_request,
     )
 
     # To avoid this test being too slow, we mock sleep within the retry behavior
     mock_sleep = mocker.patch.object(lister.get_page_result.retry, "sleep")
 
     listed_result = lister.run()
 
     expected_nb_origins = len(response1) + len(response2)
     assert listed_result == ListerStats(pages=2, origins=expected_nb_origins)
 
     assert_sleep_calls(mocker, mock_sleep, [1, WAIT_EXP_BASE])
 
 
 @pytest.mark.parametrize("status_code", [502, 503, 520])
 def test_lister_gitlab_http_errors(
     swh_scheduler, requests_mock, datadir, mocker, status_code
 ):
     """Gitlab lister should retry requests when encountering HTTP 50x errors"""
     instance = "gite.lirmm.fr"
     url = api_url(instance)
     lister = GitLabLister(swh_scheduler, url=url, instance=instance)
 
     url_page1 = lister.page_url()
     response1 = gitlab_page_response(datadir, instance, 1)
     url_page2 = lister.page_url(2)
     response2 = gitlab_page_response(datadir, instance, 2)
 
     requests_mock.get(
         url_page1,
         [{"json": response1, "headers": {"Link": f"<{url_page2}>; rel=next"}}],
         additional_matcher=_match_request,
     )
     requests_mock.get(
         url_page2,
         [
             # first request ends up with error
             {"status_code": status_code},
             # second request is ok
             {"json": response2},
         ],
         additional_matcher=_match_request,
     )
 
     # To avoid this test being too slow, we mock sleep within the retry behavior
     mock_sleep = mocker.patch.object(lister.get_page_result.retry, "sleep")
 
     listed_result = lister.run()
 
     expected_nb_origins = len(response1) + len(response2)
     assert listed_result == ListerStats(pages=2, origins=expected_nb_origins)
 
     assert_sleep_calls(mocker, mock_sleep, [1])
 
 
 def test_lister_gitlab_http_error_500(swh_scheduler, requests_mock, datadir):
     """Gitlab lister should skip buggy URL and move to next page."""
     instance = "gite.lirmm.fr"
     url = api_url(instance)
     lister = GitLabLister(swh_scheduler, url=url, instance=instance)
 
     url_page1 = lister.page_url()
     response1 = gitlab_page_response(datadir, instance, 1)
     url_page2 = lister.page_url(lister.per_page)
     url_page3 = lister.page_url(2 * lister.per_page)
     response3 = gitlab_page_response(datadir, instance, 3)
 
     requests_mock.get(
         url_page1,
         [{"json": response1, "headers": {"Link": f"<{url_page2}>; rel=next"}}],
         additional_matcher=_match_request,
     )
     requests_mock.get(
         url_page2,
         [
             {"status_code": 500},
         ],
         additional_matcher=_match_request,
     )
 
     requests_mock.get(
         url_page3,
         [{"json": response3}],
         additional_matcher=_match_request,
     )
 
     listed_result = lister.run()
 
     expected_nb_origins = len(response1) + len(response3)
     assert listed_result == ListerStats(pages=2, origins=expected_nb_origins)
 
 
 def test_lister_gitlab_credentials(swh_scheduler):
     """Gitlab lister supports credentials configuration"""
     instance = "gitlab"
     credentials = {
         "gitlab": {instance: [{"username": "user", "password": "api-token"}]}
     }
     url = api_url(instance)
     lister = GitLabLister(
         scheduler=swh_scheduler, url=url, instance=instance, credentials=credentials
     )
     assert lister.session.headers["Authorization"] == "Bearer api-token"
 
 
 @pytest.mark.parametrize(
     "url",
     [
         api_url("gitlab").rstrip("/"),
         api_url("gitlab"),
     ],
 )
 def test_lister_gitlab_url_computation(url, swh_scheduler):
     lister = GitLabLister(scheduler=swh_scheduler, url=url)
     assert not lister.url.endswith("/")
 
     page_url = lister.page_url()
     # ensure the generated url contains the separated /
     assert page_url.startswith(f"{lister.url}/projects")
 
 
 @pytest.mark.parametrize(
     "url,expected_result",
     [
         (None, None),
         ("http://dummy/?query=1", None),
         ("http://dummy/?foo=bar&id_after=1&some=result", 1),
         ("http://dummy/?foo=bar&id_after=&some=result", None),
     ],
 )
 def test__parse_id_after(url, expected_result):
     assert _parse_id_after(url) == expected_result
diff --git a/swh/lister/maven/lister.py b/swh/lister/maven/lister.py
index b230552..2055b91 100644
--- a/swh/lister/maven/lister.py
+++ b/swh/lister/maven/lister.py
@@ -1,406 +1,406 @@
 # Copyright (C) 2021-2022 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from dataclasses import asdict, dataclass
 from datetime import datetime, timezone
 import logging
 import re
 from typing import Any, Dict, Iterator, Optional
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup
 import lxml
 import requests
 
 from swh.core.github.utils import GitHubSession
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
-from .. import USER_AGENT
 from ..pattern import CredentialsType, Lister
 
 logger = logging.getLogger(__name__)
 
 RepoPage = Dict[str, Any]
 
 SUPPORTED_SCM_TYPES = ("git", "svn", "hg", "cvs", "bzr")
 
 
 @dataclass
 class MavenListerState:
     """State of the MavenLister"""
 
     last_seen_doc: int = -1
     """Last doc ID ingested during an incremental pass
 
     """
 
     last_seen_pom: int = -1
     """Last doc ID related to a pom and ingested during
        an incremental pass
 
     """
 
 
 class MavenLister(Lister[MavenListerState, RepoPage]):
     """List origins from a Maven repository.
 
     Maven Central provides artifacts for Java builds.
     It includes POM files and source archives, which we download to get
     the source code of artifacts and links to their scm repository.
 
     This lister yields origins of types: git/svn/hg or whatever the Artifacts
     use as repository type, plus maven types for the maven loader (tgz, jar)."""
 
     LISTER_NAME = "maven"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         url: str,
         index_url: str = None,
         instance: Optional[str] = None,
         credentials: CredentialsType = None,
         incremental: bool = True,
     ):
         """Lister class for Maven repositories.
 
         Args:
             url: main URL of the Maven repository, i.e. url of the base index
                 used to fetch maven artifacts. For Maven central use
                 https://repo1.maven.org/maven2/
             index_url: the URL to download the exported text indexes from.
                 Would typically be a local host running the export docker image.
                 See README.md in this directory for more information.
             instance: Name of maven instance. Defaults to url's network location
                 if unset.
             incremental: bool, defaults to True. Defines if incremental listing
                 is activated or not.
 
         """
         self.BASE_URL = url
         self.INDEX_URL = index_url
         self.incremental = incremental
 
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
             url=url,
             instance=instance,
         )
 
         self.session.headers.update({"Accept": "application/json"})
 
         self.jar_origins: Dict[str, ListedOrigin] = {}
         self.github_session = GitHubSession(
-            credentials=self.credentials, user_agent=USER_AGENT
+            credentials=self.credentials,
+            user_agent=str(self.session.headers["User-Agent"]),
         )
 
     def state_from_dict(self, d: Dict[str, Any]) -> MavenListerState:
         return MavenListerState(**d)
 
     def state_to_dict(self, state: MavenListerState) -> Dict[str, Any]:
         return asdict(state)
 
     def get_pages(self) -> Iterator[RepoPage]:
         """Retrieve and parse exported maven indexes to
         identify all pom files and src archives.
         """
 
         # Example of returned RepoPage's:
         # [
         #   {
         #     "type": "maven",
         #     "url": "https://maven.xwiki.org/..-5.4.2-sources.jar",
         #     "time": 1626109619335,
         #     "gid": "org.xwiki.platform",
         #     "aid": "xwiki-platform-wikistream-events-xwiki",
         #     "version": "5.4.2"
         #   },
         #   {
         #     "type": "scm",
         #     "url": "scm:git:git://github.com/openengsb/openengsb-framework.git",
         #     "project": "openengsb-framework",
         #   },
         #   ...
         # ]
 
         # Download the main text index file.
         logger.info("Downloading computed index from %s.", self.INDEX_URL)
         assert self.INDEX_URL is not None
         try:
             response = self.http_request(self.INDEX_URL, stream=True)
         except requests.HTTPError:
             logger.error("Index %s not found, stopping", self.INDEX_URL)
             raise
 
         # Prepare regexes to parse index exports.
 
         # Parse doc id.
         # Example line: "doc 13"
         re_doc = re.compile(r"^doc (?P<doc>\d+)$")
 
         # Parse gid, aid, version, classifier, extension.
         # Example line: "    value al.aldi|sprova4j|0.1.0|sources|jar"
         re_val = re.compile(
             r"^\s{4}value (?P<gid>[^|]+)\|(?P<aid>[^|]+)\|(?P<version>[^|]+)\|"
             + r"(?P<classifier>[^|]+)\|(?P<ext>[^|]+)$"
         )
 
         # Parse last modification time.
         # Example line: "    value jar|1626109619335|14316|2|2|0|jar"
         re_time = re.compile(
             r"^\s{4}value ([^|]+)\|(?P<mtime>[^|]+)\|([^|]+)\|([^|]+)\|([^|]+)"
             + r"\|([^|]+)\|([^|]+)$"
         )
 
         # Read file line by line and process it
         out_pom: Dict = {}
         jar_src: Dict = {}
         doc_id: int = 0
         jar_src["doc"] = None
         url_src = None
 
         iterator = response.iter_lines(chunk_size=1024)
         for line_bytes in iterator:
             # Read the index text export and get URLs and SCMs.
             line = line_bytes.decode(errors="ignore")
             m_doc = re_doc.match(line)
             if m_doc is not None:
                 doc_id = int(m_doc.group("doc"))
                 # jar_src["doc"] contains the id of the current document, whatever
                 # its type (scm or jar).
                 jar_src["doc"] = doc_id
             else:
                 m_val = re_val.match(line)
                 if m_val is not None:
                     (gid, aid, version, classifier, ext) = m_val.groups()
                     ext = ext.strip()
                     path = "/".join(gid.split("."))
                     if classifier == "NA" and ext.lower() == "pom":
                         # If incremental mode, we don't record any line that is
                         # before our last recorded doc id.
                         if (
                             self.incremental
                             and self.state
                             and self.state.last_seen_pom
                             and self.state.last_seen_pom >= doc_id
                         ):
                             continue
                         url_path = f"{path}/{aid}/{version}/{aid}-{version}.{ext}"
                         url_pom = urljoin(
                             self.BASE_URL,
                             url_path,
                         )
                         out_pom[url_pom] = doc_id
                     elif (
                         classifier.lower() == "sources" or ("src" in classifier)
                     ) and ext.lower() in ("zip", "jar"):
                         url_path = (
                             f"{path}/{aid}/{version}/{aid}-{version}-{classifier}.{ext}"
                         )
                         url_src = urljoin(self.BASE_URL, url_path)
                         jar_src["gid"] = gid
                         jar_src["aid"] = aid
                         jar_src["version"] = version
                 else:
                     m_time = re_time.match(line)
                     if m_time is not None and url_src is not None:
                         time = m_time.group("mtime")
                         jar_src["time"] = int(time)
                         artifact_metadata_d = {
                             "type": "maven",
                             "url": url_src,
                             **jar_src,
                         }
                         logger.debug(
                             "* Yielding jar %s: %s", url_src, artifact_metadata_d
                         )
                         yield artifact_metadata_d
                         url_src = None
 
         logger.info("Found %s poms.", len(out_pom))
 
         # Now fetch pom files and scan them for scm info.
 
         logger.info("Fetching poms..")
         for pom_url in out_pom:
             try:
                 response = self.http_request(pom_url)
                 parsed_pom = BeautifulSoup(response.content, "xml")
                 project = parsed_pom.find("project")
                 if project is None:
                     continue
                 scm = project.find("scm")
                 if scm is not None:
                     connection = scm.find("connection")
                     if connection is not None:
                         artifact_metadata_d = {
                             "type": "scm",
                             "doc": out_pom[pom_url],
                             "url": connection.text,
                         }
                         logger.debug(
                             "* Yielding pom %s: %s", pom_url, artifact_metadata_d
                         )
                         yield artifact_metadata_d
                     else:
                         logger.debug("No scm.connection in pom %s", pom_url)
                 else:
                     logger.debug("No scm in pom %s", pom_url)
             except requests.HTTPError:
                 logger.warning(
                     "POM info page could not be fetched, skipping project '%s'",
                     pom_url,
                 )
             except lxml.etree.Error as error:
                 logger.info("Could not parse POM %s XML: %s.", pom_url, error)
 
     def get_scm(self, page: RepoPage) -> Optional[ListedOrigin]:
         """Retrieve scm origin out of the page information. Only called when type of the
         page is scm.
 
         Try and detect an scm/vcs repository. Note that official format is in the form:
         scm:{type}:git://example.org/{user}/{repo}.git but some projects directly put
         the repo url (without the "scm:type"), so we have to check against the content
         to extract the type and url properly.
 
         Raises
             AssertionError when the type of the page is not 'scm'
 
         Returns
             ListedOrigin with proper canonical scm url (for github) if any is found,
             None otherwise.
 
         """
 
         assert page["type"] == "scm"
         visit_type: Optional[str] = None
         url: Optional[str] = None
         m_scm = re.match(r"^scm:(?P<type>[^:]+):(?P<url>.*)$", page["url"])
         if m_scm is None:
             return None
 
         scm_type = m_scm.group("type")
         if scm_type and scm_type in SUPPORTED_SCM_TYPES:
             url = m_scm.group("url")
             visit_type = scm_type
         elif page["url"].endswith(".git"):
             url = page["url"].lstrip("scm:")
             visit_type = "git"
         else:
             return None
 
         if url and visit_type == "git":
             # Non-github urls will be returned as is, github ones will be canonical ones
             url = self.github_session.get_canonical_url(url)
 
         if not url:
             return None
 
         assert visit_type is not None
         assert self.lister_obj.id is not None
         return ListedOrigin(
             lister_id=self.lister_obj.id,
             url=url,
             visit_type=visit_type,
         )
 
     def get_origins_from_page(self, page: RepoPage) -> Iterator[ListedOrigin]:
 
         """Convert a page of Maven repositories into a list of ListedOrigins."""
         if page["type"] == "scm":
             listed_origin = self.get_scm(page)
             if listed_origin:
                 yield listed_origin
         else:
             # Origin is gathering source archives:
             last_update_dt = None
             last_update_iso = ""
             try:
                 last_update_seconds = str(page["time"])[:-3]
                 last_update_dt = datetime.fromtimestamp(int(last_update_seconds))
                 last_update_dt = last_update_dt.astimezone(timezone.utc)
             except (OverflowError, ValueError):
                 logger.warning("- Failed to convert datetime %s.", last_update_seconds)
             if last_update_dt:
                 last_update_iso = last_update_dt.isoformat()
 
             # Origin URL will target page holding sources for all versions of
             # an artifactId (package name) inside a groupId (namespace)
             path = "/".join(page["gid"].split("."))
             origin_url = urljoin(self.BASE_URL, f"{path}/{page['aid']}")
 
             artifact = {
                 **{k: v for k, v in page.items() if k != "doc"},
                 "time": last_update_iso,
                 "base_url": self.BASE_URL,
             }
 
             if origin_url not in self.jar_origins:
                 # Create ListedOrigin instance if we did not see that origin yet
                 assert self.lister_obj.id is not None
                 jar_origin = ListedOrigin(
                     lister_id=self.lister_obj.id,
                     url=origin_url,
                     visit_type=page["type"],
                     last_update=last_update_dt,
                     extra_loader_arguments={"artifacts": [artifact]},
                 )
                 self.jar_origins[origin_url] = jar_origin
             else:
                 # Update list of source artifacts for that origin otherwise
                 jar_origin = self.jar_origins[origin_url]
                 artifacts = jar_origin.extra_loader_arguments["artifacts"]
                 if artifact not in artifacts:
                     artifacts.append(artifact)
 
             if (
                 jar_origin.last_update
                 and last_update_dt
                 and last_update_dt > jar_origin.last_update
             ):
                 jar_origin.last_update = last_update_dt
 
             if not self.incremental or (
                 self.state and page["doc"] > self.state.last_seen_doc
             ):
                 # Yield origin with updated source artifacts, multiple instances of
                 # ListedOrigin for the same origin URL but with different artifacts
                 # list will be sent to the scheduler but it will deduplicate them and
                 # take the latest one to upsert in database
                 yield jar_origin
 
     def commit_page(self, page: RepoPage) -> None:
         """Update currently stored state using the latest listed doc.
 
         Note: this is a noop for full listing mode
 
         """
         if self.incremental and self.state:
             # We need to differentiate the two state counters according
             # to the type of origin.
             if page["type"] == "maven" and page["doc"] > self.state.last_seen_doc:
                 self.state.last_seen_doc = page["doc"]
             elif page["type"] == "scm" and page["doc"] > self.state.last_seen_pom:
                 self.state.last_seen_doc = page["doc"]
                 self.state.last_seen_pom = page["doc"]
 
     def finalize(self) -> None:
         """Finalize the lister state, set update if any progress has been made.
 
         Note: this is a noop for full listing mode
 
         """
         if self.incremental and self.state:
             last_seen_doc = self.state.last_seen_doc
             last_seen_pom = self.state.last_seen_pom
 
             scheduler_state = self.get_state_from_scheduler()
             if last_seen_doc and last_seen_pom:
                 if (scheduler_state.last_seen_doc < last_seen_doc) or (
                     scheduler_state.last_seen_pom < last_seen_pom
                 ):
                     self.updated = True
diff --git a/swh/lister/npm/tests/test_lister.py b/swh/lister/npm/tests/test_lister.py
index e8f8fa8..7c4fa93 100644
--- a/swh/lister/npm/tests/test_lister.py
+++ b/swh/lister/npm/tests/test_lister.py
@@ -1,218 +1,220 @@
 # Copyright (C) 2018-2022 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from itertools import chain
 import json
 from pathlib import Path
 
 import iso8601
 import pytest
 from requests.exceptions import HTTPError
 
-from swh.lister import USER_AGENT
+from swh.lister import USER_AGENT_TEMPLATE
 from swh.lister.npm.lister import NpmLister, NpmListerState
 
 
 @pytest.fixture
 def npm_full_listing_page1(datadir):
     return json.loads(Path(datadir, "npm_full_page1.json").read_text())
 
 
 @pytest.fixture
 def npm_full_listing_page2(datadir):
     return json.loads(Path(datadir, "npm_full_page2.json").read_text())
 
 
 @pytest.fixture
 def npm_incremental_listing_page1(datadir):
     return json.loads(Path(datadir, "npm_incremental_page1.json").read_text())
 
 
 @pytest.fixture
 def npm_incremental_listing_page2(datadir):
     return json.loads(Path(datadir, "npm_incremental_page2.json").read_text())
 
 
 @pytest.fixture(autouse=True)
 def retry_sleep_mock(mocker):
     mocker.patch.object(NpmLister.http_request.retry, "sleep")
 
 
 def _check_listed_npm_packages(lister, packages, scheduler_origins):
     for package in packages:
         package_name = package["doc"]["name"]
         latest_version = package["doc"]["dist-tags"]["latest"]
         package_last_update = iso8601.parse_date(package["doc"]["time"][latest_version])
         origin_url = lister.PACKAGE_URL_TEMPLATE.format(package_name=package_name)
 
         scheduler_origin = [o for o in scheduler_origins if o.url == origin_url]
         assert scheduler_origin
         assert scheduler_origin[0].last_update == package_last_update
 
 
 def _match_request(request):
-    return request.headers.get("User-Agent") == USER_AGENT
+    return (
+        request.headers.get("User-Agent") == USER_AGENT_TEMPLATE % NpmLister.LISTER_NAME
+    )
 
 
 def _url_params(page_size, **kwargs):
     params = {"limit": page_size, "include_docs": "true"}
     params.update(**kwargs)
     return params
 
 
 def test_npm_lister_full(
     swh_scheduler, requests_mock, mocker, npm_full_listing_page1, npm_full_listing_page2
 ):
     """Simulate a full listing of four npm packages in two pages"""
     page_size = 2
     lister = NpmLister(scheduler=swh_scheduler, page_size=page_size, incremental=False)
 
     requests_mock.get(
         lister.API_FULL_LISTING_URL,
         [
             {"json": npm_full_listing_page1},
             {"json": npm_full_listing_page2},
         ],
         additional_matcher=_match_request,
     )
 
     spy_request = mocker.spy(lister.session, "request")
 
     stats = lister.run()
     assert stats.pages == 2
     assert stats.origins == page_size * stats.pages
 
     spy_request.assert_has_calls(
         [
             mocker.call(
                 "GET",
                 lister.API_FULL_LISTING_URL,
                 params=_url_params(page_size + 1, startkey='""'),
             ),
             mocker.call(
                 "GET",
                 lister.API_FULL_LISTING_URL,
                 params=_url_params(
                     page_size + 1,
                     startkey=f'"{npm_full_listing_page1["rows"][-1]["id"]}"',
                 ),
             ),
         ]
     )
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
 
     _check_listed_npm_packages(
         lister,
         chain(npm_full_listing_page1["rows"][:-1], npm_full_listing_page2["rows"]),
         scheduler_origins,
     )
 
     assert lister.get_state_from_scheduler() == NpmListerState()
 
 
 def test_npm_lister_incremental(
     swh_scheduler,
     requests_mock,
     mocker,
     npm_incremental_listing_page1,
     npm_incremental_listing_page2,
 ):
     """Simulate an incremental listing of four npm packages in two pages"""
     page_size = 2
     lister = NpmLister(scheduler=swh_scheduler, page_size=page_size, incremental=True)
 
     requests_mock.get(
         lister.API_INCREMENTAL_LISTING_URL,
         [
             {"json": npm_incremental_listing_page1},
             {"json": npm_incremental_listing_page2},
             {"json": {"results": []}},
         ],
         additional_matcher=_match_request,
     )
 
     spy_request = mocker.spy(lister.session, "request")
 
     assert lister.get_state_from_scheduler() == NpmListerState()
 
     stats = lister.run()
     assert stats.pages == 2
     assert stats.origins == page_size * stats.pages
 
     last_seq = npm_incremental_listing_page2["results"][-1]["seq"]
 
     spy_request.assert_has_calls(
         [
             mocker.call(
                 "GET",
                 lister.API_INCREMENTAL_LISTING_URL,
                 params=_url_params(page_size, since="0"),
             ),
             mocker.call(
                 "GET",
                 lister.API_INCREMENTAL_LISTING_URL,
                 params=_url_params(
                     page_size,
                     since=str(npm_incremental_listing_page1["results"][-1]["seq"]),
                 ),
             ),
             mocker.call(
                 "GET",
                 lister.API_INCREMENTAL_LISTING_URL,
                 params=_url_params(page_size, since=str(last_seq)),
             ),
         ]
     )
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
 
     _check_listed_npm_packages(
         lister,
         chain(
             npm_incremental_listing_page1["results"],
             npm_incremental_listing_page2["results"],
         ),
         scheduler_origins,
     )
 
     assert lister.get_state_from_scheduler() == NpmListerState(last_seq=last_seq)
 
 
 def test_npm_lister_incremental_restart(
     swh_scheduler,
     requests_mock,
     mocker,
 ):
     """Check incremental npm listing will restart from saved state"""
     page_size = 2
     last_seq = 67
     lister = NpmLister(scheduler=swh_scheduler, page_size=page_size, incremental=True)
     lister.state = NpmListerState(last_seq=last_seq)
 
     requests_mock.get(lister.API_INCREMENTAL_LISTING_URL, json={"results": []})
 
     spy_request = mocker.spy(lister.session, "request")
 
     lister.run()
 
     spy_request.assert_called_with(
         "GET",
         lister.API_INCREMENTAL_LISTING_URL,
         params=_url_params(page_size, since=str(last_seq)),
     )
 
 
 def test_npm_lister_http_error(
     swh_scheduler,
     requests_mock,
     mocker,
 ):
     lister = NpmLister(scheduler=swh_scheduler)
 
     requests_mock.get(lister.API_FULL_LISTING_URL, status_code=500)
 
     with pytest.raises(HTTPError):
         lister.run()
diff --git a/swh/lister/pattern.py b/swh/lister/pattern.py
index 5b327e1..d188896 100644
--- a/swh/lister/pattern.py
+++ b/swh/lister/pattern.py
@@ -1,314 +1,316 @@
 # Copyright (C) 2020-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from __future__ import annotations
 
 from dataclasses import dataclass
 import logging
 from typing import Any, Dict, Generic, Iterable, Iterator, List, Optional, TypeVar
 from urllib.parse import urlparse
 
 import requests
 from tenacity.before_sleep import before_sleep_log
 
 from swh.core.config import load_from_envvar
 from swh.core.utils import grouper
 from swh.scheduler import get_scheduler, model
 from swh.scheduler.interface import SchedulerInterface
 
-from . import USER_AGENT
+from . import USER_AGENT_TEMPLATE
 from .utils import http_retry
 
 logger = logging.getLogger(__name__)
 
 
 @dataclass
 class ListerStats:
     pages: int = 0
     origins: int = 0
 
     def __add__(self, other: ListerStats) -> ListerStats:
         return self.__class__(self.pages + other.pages, self.origins + other.origins)
 
     def __iadd__(self, other: ListerStats):
         self.pages += other.pages
         self.origins += other.origins
 
     def dict(self) -> Dict[str, int]:
         return {"pages": self.pages, "origins": self.origins}
 
 
 StateType = TypeVar("StateType")
 PageType = TypeVar("PageType")
 
 BackendStateType = Dict[str, Any]
 CredentialsType = Optional[Dict[str, Dict[str, List[Dict[str, str]]]]]
 
 
 class Lister(Generic[StateType, PageType]):
     """The base class for a Software Heritage lister.
 
     A lister scrapes a page by page list of origins from an upstream (a forge, the API
     of a package manager, ...), and massages the results of that scrape into a list of
     origins that are recorded by the scheduler backend.
 
     The main loop of the lister, :meth:`run`, basically revolves around the
     :meth:`get_pages` iterator, which sets up the lister state, then yields the scrape
     results page by page. The :meth:`get_origins_from_page` method converts the pages
     into a list of :class:`model.ListedOrigin`, sent to the scheduler at every page. The
     :meth:`commit_page` method can be used to update the lister state after a page of
     origins has been recorded in the scheduler backend.
 
     The :func:`finalize` method is called at lister teardown (whether the run has
     been successful or not) to update the local :attr:`state` object before it's sent to
     the database. This method must set the :attr:`updated` attribute if an updated
     state needs to be sent to the scheduler backend. This method can call
     :func:`get_state_from_scheduler` to refresh and merge the lister state from the
     scheduler before it's finalized (and potentially minimize the risk of race
     conditions between concurrent runs of the lister).
 
     The state of the lister is serialized and deserialized from the dict stored in the
     scheduler backend, using the :meth:`state_from_dict` and :meth:`state_to_dict`
     methods.
 
     Args:
       scheduler: the instance of the Scheduler being used to register the
         origins listed by this lister
       url: a URL representing this lister, e.g. the API's base URL
       instance: the instance name, to uniquely identify this lister instance,
         if not provided the URL network location will be used
       credentials: dictionary of credentials for all listers. The first level
         identifies the :attr:`LISTER_NAME`, the second level the lister
         :attr:`instance`. The final level is a list of dicts containing the
         expected credentials for the given instance of that lister.
 
     Generic types:
       - *StateType*: concrete lister type; should usually be a :class:`dataclass` for
         stricter typing
       - *PageType*: type of scrape results; can usually be a :class:`requests.Response`,
         or a :class:`dict`
 
     """
 
     LISTER_NAME: str = ""
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         url: str,
         instance: Optional[str] = None,
         credentials: CredentialsType = None,
     ):
         if not self.LISTER_NAME:
             raise ValueError("Must set the LISTER_NAME attribute on Lister classes")
 
         self.url = url
         if instance is not None:
             self.instance = instance
         else:
             self.instance = urlparse(url).netloc
 
         self.scheduler = scheduler
 
         if not credentials:
             credentials = {}
         self.credentials = list(
             credentials.get(self.LISTER_NAME, {}).get(self.instance, [])
         )
 
         # store the initial state of the lister
         self.state = self.get_state_from_scheduler()
         self.updated = False
 
         self.session = requests.Session()
         # Declare the USER_AGENT is more sysadm-friendly for the forge we list
-        self.session.headers.update({"User-Agent": USER_AGENT})
+        self.session.headers.update(
+            {"User-Agent": USER_AGENT_TEMPLATE % self.LISTER_NAME}
+        )
 
     @http_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
     def http_request(self, url: str, method="GET", **kwargs) -> requests.Response:
 
         logger.debug("Fetching URL %s with params %s", url, kwargs.get("params"))
 
         response = self.session.request(method, url, **kwargs)
         if response.status_code not in (200, 304):
             logger.warning(
                 "Unexpected HTTP status code %s on %s: %s",
                 response.status_code,
                 response.url,
                 response.content,
             )
         response.raise_for_status()
 
         return response
 
     def run(self) -> ListerStats:
         """Run the lister.
 
         Returns:
           A counter with the number of pages and origins seen for this run
           of the lister.
 
         """
         full_stats = ListerStats()
 
         try:
             for page in self.get_pages():
                 full_stats.pages += 1
                 origins = self.get_origins_from_page(page)
                 full_stats.origins += self.send_origins(origins)
                 self.commit_page(page)
         finally:
             self.finalize()
             if self.updated:
                 self.set_state_in_scheduler()
 
         return full_stats
 
     def get_state_from_scheduler(self) -> StateType:
         """Update the state in the current instance from the state in the scheduler backend.
 
         This updates :attr:`lister_obj`, and returns its (deserialized) current state,
         to allow for comparison with the local state.
 
         Returns:
           the state retrieved from the scheduler backend
         """
         self.lister_obj = self.scheduler.get_or_create_lister(
             name=self.LISTER_NAME, instance_name=self.instance
         )
         return self.state_from_dict(self.lister_obj.current_state)
 
     def set_state_in_scheduler(self) -> None:
         """Update the state in the scheduler backend from the state of the current
         instance.
 
         Raises:
           swh.scheduler.exc.StaleData: in case of a race condition between
             concurrent listers (from :meth:`swh.scheduler.Scheduler.update_lister`).
         """
         self.lister_obj.current_state = self.state_to_dict(self.state)
         self.lister_obj = self.scheduler.update_lister(self.lister_obj)
 
     # State management to/from the scheduler
 
     def state_from_dict(self, d: BackendStateType) -> StateType:
         """Convert the state stored in the scheduler backend (as a dict),
         to the concrete StateType for this lister."""
         raise NotImplementedError
 
     def state_to_dict(self, state: StateType) -> BackendStateType:
         """Convert the StateType for this lister to its serialization as dict for
         storage in the scheduler.
 
         Values must be JSON-compatible as that's what the backend database expects.
         """
         raise NotImplementedError
 
     def finalize(self) -> None:
         """Custom hook to finalize the lister state before returning from the main loop.
 
         This method must set :attr:`updated` if the lister has done some work.
 
         If relevant, this method can use :meth`get_state_from_scheduler` to merge the
         current lister state with the one from the scheduler backend, reducing the risk
         of race conditions if we're running concurrent listings.
 
         This method is called in a `finally` block, which means it will also run when
         the lister fails.
 
         """
         pass
 
     # Actual listing logic
 
     def get_pages(self) -> Iterator[PageType]:
         """Retrieve a list of pages of listed results. This is the main loop of the lister.
 
         Returns:
           an iterator of raw pages fetched from the platform currently being listed.
         """
         raise NotImplementedError
 
     def get_origins_from_page(self, page: PageType) -> Iterator[model.ListedOrigin]:
         """Extract a list of :class:`model.ListedOrigin` from a raw page of results.
 
         Args:
           page: a single page of results
         Returns:
           an iterator for the origins present on the given page of results
         """
         raise NotImplementedError
 
     def commit_page(self, page: PageType) -> None:
         """Custom hook called after the current page has been committed in the scheduler
         backend.
 
         This method can be used to update the state after a page of origins has been
         successfully recorded in the scheduler backend. If the new state should be
         recorded at the point the lister completes, the :attr:`updated` attribute must
         be set.
 
         """
         pass
 
     def send_origins(self, origins: Iterable[model.ListedOrigin]) -> int:
         """Record a list of :class:`model.ListedOrigin` in the scheduler.
 
         Returns:
           the number of listed origins recorded in the scheduler
         """
         count = 0
         for batch_origins in grouper(origins, n=1000):
             ret = self.scheduler.record_listed_origins(batch_origins)
             count += len(ret)
 
         return count
 
     @classmethod
     def from_config(cls, scheduler: Dict[str, Any], **config: Any):
         """Instantiate a lister from a configuration dict.
 
         This is basically a backwards-compatibility shim for the CLI.
 
         Args:
           scheduler: instantiation config for the scheduler
           config: the configuration dict for the lister, with the following keys:
             - credentials (optional): credentials list for the scheduler
             - any other kwargs passed to the lister.
 
         Returns:
           the instantiated lister
         """
         # Drop the legacy config keys which aren't used for this generation of listers.
         for legacy_key in ("storage", "lister", "celery"):
             config.pop(legacy_key, None)
 
         # Instantiate the scheduler
         scheduler_instance = get_scheduler(**scheduler)
 
         return cls(scheduler=scheduler_instance, **config)
 
     @classmethod
     def from_configfile(cls, **kwargs: Any):
         """Instantiate a lister from the configuration loaded from the
         SWH_CONFIG_FILENAME envvar, with potential extra keyword arguments
         if their value is not None.
 
         Args:
             kwargs: kwargs passed to the lister instantiation
         """
         config = dict(load_from_envvar())
         config.update({k: v for k, v in kwargs.items() if v is not None})
         return cls.from_config(**config)
 
 
 class StatelessLister(Lister[None, PageType], Generic[PageType]):
     def state_from_dict(self, d: BackendStateType) -> None:
         """Always return empty state"""
         return None
 
     def state_to_dict(self, state: None) -> BackendStateType:
         """Always set empty state"""
         return {}
diff --git a/swh/lister/phabricator/tests/test_lister.py b/swh/lister/phabricator/tests/test_lister.py
index ed35435..c6e7043 100644
--- a/swh/lister/phabricator/tests/test_lister.py
+++ b/swh/lister/phabricator/tests/test_lister.py
@@ -1,142 +1,143 @@
 # Copyright (C) 2019-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import json
 from pathlib import Path
 
 import pytest
 from requests.exceptions import HTTPError
 
-from swh.lister import USER_AGENT
+from swh.lister import USER_AGENT_TEMPLATE
 from swh.lister.phabricator.lister import PhabricatorLister, get_repo_url
 
 
 @pytest.fixture
 def phabricator_repositories_page1(datadir):
     return json.loads(
         Path(datadir, "phabricator_api_repositories_page1.json").read_text()
     )
 
 
 @pytest.fixture
 def phabricator_repositories_page2(datadir):
     return json.loads(
         Path(datadir, "phabricator_api_repositories_page2.json").read_text()
     )
 
 
 @pytest.fixture(autouse=True)
 def retry_sleep_mock(mocker):
     mocker.patch.object(PhabricatorLister.http_request.retry, "sleep")
 
 
 def test_get_repo_url(phabricator_repositories_page1):
     repos = phabricator_repositories_page1["result"]["data"]
     for repo in repos:
         expected_name = "https://forge.softwareheritage.org/source/%s.git" % (
             repo["fields"]["shortName"]
         )
         assert get_repo_url(repo["attachments"]["uris"]["uris"]) == expected_name
 
 
 def test_get_repo_url_undefined_protocol():
     undefined_protocol_uris = [
         {
             "fields": {
                 "uri": {
                     "raw": "https://svn.blender.org/svnroot/bf-blender/",
                     "display": "https://svn.blender.org/svnroot/bf-blender/",
                     "effective": "https://svn.blender.org/svnroot/bf-blender/",
                     "normalized": "svn.blender.org/svnroot/bf-blender",
                 },
                 "builtin": {"protocol": None, "identifier": None},
             },
         }
     ]
     expected_name = "https://svn.blender.org/svnroot/bf-blender/"
     assert get_repo_url(undefined_protocol_uris) == expected_name
 
 
 def test_lister_url_param(swh_scheduler):
     FORGE_BASE_URL = "https://forge.softwareheritage.org"
     API_REPOSITORY_PATH = "/api/diffusion.repository.search"
 
     for url in (
         FORGE_BASE_URL,
         f"{FORGE_BASE_URL}/",
         f"{FORGE_BASE_URL}/{API_REPOSITORY_PATH}",
         f"{FORGE_BASE_URL}/{API_REPOSITORY_PATH}/",
     ):
 
         lister = PhabricatorLister(
             scheduler=swh_scheduler, url=FORGE_BASE_URL, instance="swh", api_token="foo"
         )
 
         expected_url = f"{FORGE_BASE_URL}{API_REPOSITORY_PATH}"
 
         assert lister.url == expected_url
 
 
 def test_lister(
     swh_scheduler,
     requests_mock,
     phabricator_repositories_page1,
     phabricator_repositories_page2,
 ):
     FORGE_BASE_URL = "https://forge.softwareheritage.org"
     API_TOKEN = "foo"
 
     lister = PhabricatorLister(
         scheduler=swh_scheduler, url=FORGE_BASE_URL, instance="swh", api_token=API_TOKEN
     )
 
     def match_request(request):
         return (
-            request.headers.get("User-Agent") == USER_AGENT
+            request.headers.get("User-Agent")
+            == USER_AGENT_TEMPLATE % PhabricatorLister.LISTER_NAME
             and f"api.token={API_TOKEN}" in request.body
         )
 
     requests_mock.post(
         f"{FORGE_BASE_URL}{lister.API_REPOSITORY_PATH}",
         [
             {"json": phabricator_repositories_page1},
             {"json": phabricator_repositories_page2},
         ],
         additional_matcher=match_request,
     )
 
     stats = lister.run()
 
     expected_nb_origins = len(phabricator_repositories_page1["result"]["data"]) * 2
 
     assert stats.pages == 2
     assert stats.origins == expected_nb_origins
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
 
     assert len(scheduler_origins) == expected_nb_origins
 
 
 def test_lister_request_error(
     swh_scheduler,
     requests_mock,
     phabricator_repositories_page1,
 ):
     FORGE_BASE_URL = "https://forge.softwareheritage.org"
 
     lister = PhabricatorLister(
         scheduler=swh_scheduler, url=FORGE_BASE_URL, instance="swh", api_token="foo"
     )
 
     requests_mock.post(
         f"{FORGE_BASE_URL}{lister.API_REPOSITORY_PATH}",
         [
             {"status_code": 200, "json": phabricator_repositories_page1},
             {"status_code": 500, "reason": "Internal Server Error"},
         ],
     )
 
     with pytest.raises(HTTPError):
         lister.run()
diff --git a/swh/lister/pubdev/lister.py b/swh/lister/pubdev/lister.py
index 8910f39..fd1dc45 100644
--- a/swh/lister/pubdev/lister.py
+++ b/swh/lister/pubdev/lister.py
@@ -1,106 +1,94 @@
 # Copyright (C) 2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import logging
 from typing import Iterator, List, Optional
 
 import iso8601
 from requests.exceptions import HTTPError
 
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
-from .. import __version__
 from ..pattern import CredentialsType, StatelessLister
 
-# https://github.com/dart-lang/pub/blob/master/doc/repository-spec-v2.md#metadata-headers
-USER_AGENT = (
-    f"Software Heritage PubDev Lister v{__version__} "
-    "(+https://www.softwareheritage.org/contact)"
-)
-
 logger = logging.getLogger(__name__)
 
 # Aliasing the page results returned by `get_pages` method from the lister.
 PubDevListerPage = List[str]
 
 
 class PubDevLister(StatelessLister[PubDevListerPage]):
     """List pub.dev (Dart, Flutter) origins."""
 
     LISTER_NAME = "pubdev"
     VISIT_TYPE = "pubdev"
     INSTANCE = "pubdev"
 
     BASE_URL = "https://pub.dev/"
     PACKAGE_NAMES_URL_PATTERN = "{base_url}api/package-names"
     PACKAGE_INFO_URL_PATTERN = "{base_url}api/packages/{pkgname}"
     ORIGIN_URL_PATTERN = "{base_url}packages/{pkgname}"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         credentials: Optional[CredentialsType] = None,
     ):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
             instance=self.INSTANCE,
             url=self.BASE_URL,
         )
 
-        self.session.headers.update(
-            {
-                "Accept": "application/json",
-                "User-Agent": USER_AGENT,
-            }
-        )
+        self.session.headers.update({"Accept": "application/json"})
 
     def get_pages(self) -> Iterator[PubDevListerPage]:
         """Yield an iterator which returns 'page'
 
         It uses the api provided by https://pub.dev/api/ to find Dart and Flutter package
         origins.
 
         The http api call get "{base_url}package-names" to retrieve a sorted list
         of all package names.
 
         There is only one page that list all origins url based on "{base_url}packages/{pkgname}"
         """
         response = self.http_request(
             url=self.PACKAGE_NAMES_URL_PATTERN.format(base_url=self.url)
         )
         yield response.json()["packages"]
 
     def get_origins_from_page(self, page: PubDevListerPage) -> Iterator[ListedOrigin]:
         """Iterate on all pages and yield ListedOrigin instances."""
         assert self.lister_obj.id is not None
 
         for pkgname in page:
             package_info_url = self.PACKAGE_INFO_URL_PATTERN.format(
                 base_url=self.url, pkgname=pkgname
             )
             try:
                 response = self.http_request(url=package_info_url)
             except HTTPError:
                 logger.warning(
                     "Failed to fetch metadata for package %s, skipping it from listing.",
                     pkgname,
                 )
                 continue
             package_metadata = response.json()
             package_versions = package_metadata["versions"]
             last_published = max(
                 package_version["published"] for package_version in package_versions
             )
             origin_url = self.ORIGIN_URL_PATTERN.format(
                 base_url=self.url, pkgname=pkgname
             )
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 visit_type=self.VISIT_TYPE,
                 url=origin_url,
                 last_update=iso8601.parse_date(last_published),
             )
diff --git a/swh/lister/pubdev/tests/test_lister.py b/swh/lister/pubdev/tests/test_lister.py
index ac2be14..5113249 100644
--- a/swh/lister/pubdev/tests/test_lister.py
+++ b/swh/lister/pubdev/tests/test_lister.py
@@ -1,49 +1,53 @@
 # Copyright (C) 2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-from swh.lister.pubdev.lister import USER_AGENT, PubDevLister
+from swh.lister import USER_AGENT_TEMPLATE
+from swh.lister.pubdev.lister import PubDevLister
 
 expected_origins = {
     "https://pub.dev/packages/Autolinker",
     "https://pub.dev/packages/Babylon",
 }
 
 
 def test_pubdev_lister(datadir, requests_mock_datadir, swh_scheduler):
     lister = PubDevLister(scheduler=swh_scheduler)
     res = lister.run()
 
     assert res.pages == 1
     assert res.origins == 2
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
 
     assert len(scheduler_origins) == len(expected_origins)
 
     for origin in scheduler_origins:
         assert origin.visit_type == "pubdev"
         assert origin.url in expected_origins
         assert origin.last_update is not None
 
 
 def _match_request(request):
-    return request.headers.get("User-Agent") == USER_AGENT
+    return (
+        request.headers.get("User-Agent")
+        == USER_AGENT_TEMPLATE % PubDevLister.LISTER_NAME
+    )
 
 
 def test_pubdev_lister_skip_package(
     datadir, requests_mock_datadir, swh_scheduler, requests_mock
 ):
 
     requests_mock.get(
         "https://pub.dev/api/packages/Autolinker",
         status_code=404,
         additional_matcher=_match_request,
     )
 
     lister = PubDevLister(scheduler=swh_scheduler)
     res = lister.run()
 
     assert res.pages == 1
     assert res.origins == 1
diff --git a/swh/lister/sourceforge/tests/test_lister.py b/swh/lister/sourceforge/tests/test_lister.py
index d6aabc3..1a97bf3 100644
--- a/swh/lister/sourceforge/tests/test_lister.py
+++ b/swh/lister/sourceforge/tests/test_lister.py
@@ -1,537 +1,540 @@
 # Copyright (C) 2021-2022 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import datetime
 import functools
 import json
 from pathlib import Path
 import re
 
 from iso8601 import iso8601
 import pytest
 from requests.exceptions import HTTPError
 
-from swh.lister import USER_AGENT
+from swh.lister import USER_AGENT_TEMPLATE
 from swh.lister.sourceforge.lister import (
     MAIN_SITEMAP_URL,
     PROJECT_API_URL_FORMAT,
     SourceForgeLister,
     SourceForgeListerState,
 )
 from swh.lister.tests.test_utils import assert_sleep_calls
 from swh.lister.utils import WAIT_EXP_BASE
 
 # Mapping of project name to namespace
 from swh.scheduler.model import ListedOrigin
 
 TEST_PROJECTS = {
     "aaron": "p",
     "adobexmp": "adobe",
     "backapps": "p",
     "backapps/website": "p",
     "bzr-repo": "p",
     "mojunk": "p",
     "mramm": "p",
     "os3dmodels": "p",
     "random-mercurial": "p",
     "t12eksandbox": "p",
     "ocaml-lpd": "p",
 }
 
 URLS_MATCHER = {
     PROJECT_API_URL_FORMAT.format(namespace=namespace, project=project): project
     for project, namespace in TEST_PROJECTS.items()
 }
 
 
 def get_main_sitemap(datadir):
     return Path(datadir, "main-sitemap.xml").read_text()
 
 
 def get_subsitemap_0(datadir):
     return Path(datadir, "subsitemap-0.xml").read_text()
 
 
 def get_subsitemap_1(datadir):
     return Path(datadir, "subsitemap-1.xml").read_text()
 
 
 def get_project_json(datadir, request, context):
     url = request.url
     project = URLS_MATCHER.get(url)
     assert project is not None, f"Url '{url}' could not be matched"
     project = project.replace("/", "-")
     return json.loads(Path(datadir, f"{project}.json").read_text())
 
 
 def get_cvs_info_page(datadir):
     return Path(datadir, "aaron.html").read_text()
 
 
 def get_bzr_repo_page(datadir, repo_name):
     return Path(datadir, f"{repo_name}.html").read_text()
 
 
 def _check_request_headers(request):
-    return request.headers.get("User-Agent") == USER_AGENT
+    return (
+        request.headers.get("User-Agent")
+        == USER_AGENT_TEMPLATE % SourceForgeLister.LISTER_NAME
+    )
 
 
 def _check_listed_origins(lister, swh_scheduler):
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
     res = {o.url: (o.visit_type, str(o.last_update.date())) for o in scheduler_origins}
     assert res == {
         "https://svn.code.sf.net/p/backapps/website/code": ("svn", "2021-02-11"),
         "https://git.code.sf.net/p/os3dmodels/git": ("git", "2017-03-31"),
         "https://svn.code.sf.net/p/os3dmodels/svn": ("svn", "2017-03-31"),
         "https://git.code.sf.net/p/mramm/files": ("git", "2019-04-04"),
         "https://git.code.sf.net/p/mramm/git": ("git", "2019-04-04"),
         "https://svn.code.sf.net/p/mramm/svn": ("svn", "2019-04-04"),
         "https://git.code.sf.net/p/mojunk/git": ("git", "2017-12-31"),
         "https://git.code.sf.net/p/mojunk/git2": ("git", "2017-12-31"),
         "https://svn.code.sf.net/p/mojunk/svn": ("svn", "2017-12-31"),
         "http://hg.code.sf.net/p/random-mercurial/hg": ("hg", "2019-05-02"),
         "http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox": (
             "bzr",
             "2011-02-09",
         ),
         "http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd/trunk": (
             "bzr",
             "2011-04-17",
         ),
         "rsync://a.cvs.sourceforge.net/cvsroot/aaron/aaron": ("cvs", "2013-03-07"),
         "rsync://a.cvs.sourceforge.net/cvsroot/aaron/www": ("cvs", "2013-03-07"),
     }
 
 
 def test_sourceforge_lister_full(swh_scheduler, requests_mock, datadir):
     """
     Simulate a full listing of an artificially restricted sourceforge.
     There are 5 different projects, spread over two sub-sitemaps, a few of which
     have multiple VCS listed, one has none, one is outside of the standard `/p/`
     namespace, some with custom mount points.
     All non-interesting but related entries have been kept.
     """
     lister = SourceForgeLister(scheduler=swh_scheduler)
 
     requests_mock.get(
         MAIN_SITEMAP_URL,
         text=get_main_sitemap(datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         "https://sourceforge.net/allura_sitemap/sitemap-0.xml",
         text=get_subsitemap_0(datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         "https://sourceforge.net/allura_sitemap/sitemap-1.xml",
         text=get_subsitemap_1(datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         re.compile("https://sourceforge.net/rest/.*"),
         json=functools.partial(get_project_json, datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         re.compile("http://aaron.cvs.sourceforge.net/"),
         text=get_cvs_info_page(datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         re.compile("http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox"),
         text=get_bzr_repo_page(datadir, "t12eksandbox"),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         re.compile("http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd"),
         text=get_bzr_repo_page(datadir, "ocaml-lpd"),
         additional_matcher=_check_request_headers,
     )
 
     stats = lister.run()
     # - os3dmodels (2 repos),
     # - mramm (3 repos),
     # - mojunk (3 repos),
     # - backapps/website (1 repo),
     # - random-mercurial (1 repo).
     # - t12eksandbox (1 repo).
     # - ocaml-lpd (1 repo).
     # adobe and backapps itself have no repos.
     assert stats.pages == 8
     assert stats.origins == 14
     expected_state = {
         "subsitemap_last_modified": {
             "https://sourceforge.net/allura_sitemap/sitemap-0.xml": "2021-03-18",
             "https://sourceforge.net/allura_sitemap/sitemap-1.xml": "2021-03-18",
         },
         "empty_projects": {
             "https://sourceforge.net/rest/p/backapps": "2021-02-11",
             "https://sourceforge.net/rest/adobe/adobexmp": "2017-10-17",
         },
     }
     assert lister.state_to_dict(lister.state) == expected_state
 
     _check_listed_origins(lister, swh_scheduler)
 
 
 def test_sourceforge_lister_incremental(swh_scheduler, requests_mock, datadir, mocker):
     """
     Simulate an incremental listing of an artificially restricted sourceforge.
     Same dataset as the full run, because it's enough to validate the different cases.
     """
     lister = SourceForgeLister(scheduler=swh_scheduler, incremental=True)
 
     requests_mock.get(
         MAIN_SITEMAP_URL,
         text=get_main_sitemap(datadir),
         additional_matcher=_check_request_headers,
     )
 
     def not_called(request, *args, **kwargs):
         raise AssertionError(f"Should not have been called: '{request.url}'")
 
     requests_mock.get(
         "https://sourceforge.net/allura_sitemap/sitemap-0.xml",
         text=get_subsitemap_0(datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         "https://sourceforge.net/allura_sitemap/sitemap-1.xml",
         text=not_called,
         additional_matcher=_check_request_headers,
     )
 
     def filtered_get_project_json(request, context):
         # These projects should not be requested again
         assert URLS_MATCHER[request.url] not in {"adobe", "mojunk"}
         return get_project_json(datadir, request, context)
 
     requests_mock.get(
         re.compile("https://sourceforge.net/rest/.*"),
         json=filtered_get_project_json,
         additional_matcher=_check_request_headers,
     )
 
     requests_mock.get(
         re.compile("http://aaron.cvs.sourceforge.net/"),
         text=get_cvs_info_page(datadir),
         additional_matcher=_check_request_headers,
     )
 
     requests_mock.get(
         re.compile("http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox"),
         text=get_bzr_repo_page(datadir, "t12eksandbox"),
         additional_matcher=_check_request_headers,
     )
 
     requests_mock.get(
         re.compile("http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd"),
         text=get_bzr_repo_page(datadir, "ocaml-lpd"),
         additional_matcher=_check_request_headers,
     )
 
     faked_listed_origins = [
         # mramm: changed
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="git",
             url="https://git.code.sf.net/p/mramm/files",
             last_update=iso8601.parse_date("2019-01-01"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="git",
             url="https://git.code.sf.net/p/mramm/git",
             last_update=iso8601.parse_date("2019-01-01"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="svn",
             url="https://svn.code.sf.net/p/mramm/svn",
             last_update=iso8601.parse_date("2019-01-01"),
         ),
         # stayed the same, even though its subsitemap has changed
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="git",
             url="https://git.code.sf.net/p/os3dmodels/git",
             last_update=iso8601.parse_date("2017-03-31"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="svn",
             url="https://svn.code.sf.net/p/os3dmodels/svn",
             last_update=iso8601.parse_date("2017-03-31"),
         ),
         # others: stayed the same, should be skipped
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="git",
             url="https://git.code.sf.net/p/mojunk/git",
             last_update=iso8601.parse_date("2017-12-31"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="git",
             url="https://git.code.sf.net/p/mojunk/git2",
             last_update=iso8601.parse_date("2017-12-31"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="svn",
             url="https://svn.code.sf.net/p/mojunk/svn",
             last_update=iso8601.parse_date("2017-12-31"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="svn",
             url="https://svn.code.sf.net/p/backapps/website/code",
             last_update=iso8601.parse_date("2021-02-11"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="hg",
             url="http://hg.code.sf.net/p/random-mercurial/hg",
             last_update=iso8601.parse_date("2019-05-02"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="bzr",
             url="http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox",
             last_update=iso8601.parse_date("2011-02-09"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="bzr",
             url="http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd/trunk",
             last_update=iso8601.parse_date("2011-04-17"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="cvs",
             url="rsync://a.cvs.sourceforge.net/cvsroot/aaron/aaron",
             last_update=iso8601.parse_date("2013-03-07"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="cvs",
             url="rsync://a.cvs.sourceforge.net/cvsroot/aaron/www",
             last_update=iso8601.parse_date("2013-03-07"),
         ),
     ]
     swh_scheduler.record_listed_origins(faked_listed_origins)
 
     to_date = datetime.date.fromisoformat
     faked_state = SourceForgeListerState(
         subsitemap_last_modified={
             # changed
             "https://sourceforge.net/allura_sitemap/sitemap-0.xml": to_date(
                 "2021-02-18"
             ),
             # stayed the same
             "https://sourceforge.net/allura_sitemap/sitemap-1.xml": to_date(
                 "2021-03-18"
             ),
         },
         empty_projects={
             "https://sourceforge.net/rest/p/backapps": to_date("2020-02-11"),
             "https://sourceforge.net/rest/adobe/adobexmp": to_date("2017-10-17"),
         },
     )
     lister.state = faked_state
 
     stats = lister.run()
 
     # - mramm (3 repos),  # changed
     assert stats.pages == 1
     assert stats.origins == 3
     expected_state = {
         "subsitemap_last_modified": {
             "https://sourceforge.net/allura_sitemap/sitemap-0.xml": "2021-03-18",
             "https://sourceforge.net/allura_sitemap/sitemap-1.xml": "2021-03-18",
         },
         "empty_projects": {
             "https://sourceforge.net/rest/p/backapps": "2021-02-11",  # changed
             "https://sourceforge.net/rest/adobe/adobexmp": "2017-10-17",
         },
     }
     assert lister.state_to_dict(lister.state) == expected_state
 
     # origins have been updated
     _check_listed_origins(lister, swh_scheduler)
 
 
 def test_sourceforge_lister_retry(swh_scheduler, requests_mock, mocker, datadir):
 
     lister = SourceForgeLister(scheduler=swh_scheduler)
 
     # Exponential retries take a long time, so stub time.sleep
     mocked_sleep = mocker.patch.object(lister.http_request.retry, "sleep")
 
     requests_mock.get(
         MAIN_SITEMAP_URL,
         [
             {"status_code": 429},
             {"status_code": 429},
             {"text": get_main_sitemap(datadir)},
         ],
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         "https://sourceforge.net/allura_sitemap/sitemap-0.xml",
         [{"status_code": 429}, {"text": get_subsitemap_0(datadir), "status_code": 301}],
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         "https://sourceforge.net/allura_sitemap/sitemap-1.xml",
         [{"status_code": 429}, {"text": get_subsitemap_1(datadir)}],
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         re.compile("https://sourceforge.net/rest/.*"),
         [{"status_code": 429}, {"json": functools.partial(get_project_json, datadir)}],
         additional_matcher=_check_request_headers,
     )
 
     requests_mock.get(
         re.compile("http://aaron.cvs.sourceforge.net/"),
         text=get_cvs_info_page(datadir),
         additional_matcher=_check_request_headers,
     )
 
     requests_mock.get(
         re.compile("http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox"),
         text=get_bzr_repo_page(datadir, "t12eksandbox"),
         additional_matcher=_check_request_headers,
     )
 
     requests_mock.get(
         re.compile("http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd"),
         text=get_bzr_repo_page(datadir, "ocaml-lpd"),
         additional_matcher=_check_request_headers,
     )
 
     stats = lister.run()
     # - os3dmodels (2 repos),
     # - mramm (3 repos),
     # - mojunk (3 repos),
     # - backapps/website (1 repo),
     # - random-mercurial (1 repo).
     # - t12eksandbox (1 repo).
     # - ocaml-lpd (1 repo).
     # adobe and backapps itself have no repos.
     assert stats.pages == 8
     assert stats.origins == 14
 
     _check_listed_origins(lister, swh_scheduler)
 
     # Test `time.sleep` is called with exponential retries
     assert_sleep_calls(mocker, mocked_sleep, [1, WAIT_EXP_BASE, 1, 1])
 
 
 @pytest.mark.parametrize("status_code", [500, 503, 504, 403, 404])
 def test_sourceforge_lister_http_error(
     swh_scheduler, requests_mock, status_code, mocker
 ):
     lister = SourceForgeLister(scheduler=swh_scheduler)
 
     # Exponential retries take a long time, so stub time.sleep
     mocked_sleep = mocker.patch.object(lister.http_request.retry, "sleep")
 
     requests_mock.get(MAIN_SITEMAP_URL, status_code=status_code)
 
     with pytest.raises(HTTPError):
         lister.run()
 
     exp_retries = []
     if status_code >= 500:
         exp_retries = [1.0, 10.0, 100.0, 1000.0]
 
     assert_sleep_calls(mocker, mocked_sleep, exp_retries)
 
 
 @pytest.mark.parametrize("status_code", [500, 503, 504, 403, 404])
 def test_sourceforge_lister_project_error(
     datadir, swh_scheduler, requests_mock, status_code, mocker
 ):
     lister = SourceForgeLister(scheduler=swh_scheduler)
     # Exponential retries take a long time, so stub time.sleep
     mocker.patch.object(lister.http_request.retry, "sleep")
 
     requests_mock.get(
         MAIN_SITEMAP_URL,
         text=get_main_sitemap(datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         "https://sourceforge.net/allura_sitemap/sitemap-0.xml",
         text=get_subsitemap_0(datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         "https://sourceforge.net/allura_sitemap/sitemap-1.xml",
         text=get_subsitemap_1(datadir),
         additional_matcher=_check_request_headers,
     )
     # Request mocks precedence is LIFO
     requests_mock.get(
         re.compile("https://sourceforge.net/rest/.*"),
         json=functools.partial(get_project_json, datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         re.compile("http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox"),
         text=get_bzr_repo_page(datadir, "t12eksandbox"),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         re.compile("http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd"),
         text=get_bzr_repo_page(datadir, "ocaml-lpd"),
         additional_matcher=_check_request_headers,
     )
     # Make all `mramm` requests fail
     # `mramm` is in subsitemap 0, which ensures we keep listing after an error.
     requests_mock.get(
         re.compile("https://sourceforge.net/rest/p/mramm"), status_code=status_code
     )
 
     # Make request to CVS info page fail
     requests_mock.get(
         re.compile("http://aaron.cvs.sourceforge.net/"), status_code=status_code
     )
 
     stats = lister.run()
     # - os3dmodels (2 repos),
     # - mojunk (3 repos),
     # - backapps/website (1 repo),
     # - random-mercurial (1 repo).
     # - t12eksandbox (1 repo).
     # - ocaml-lpd (1 repo).
     # adobe and backapps itself have no repos.
     # Did *not* list mramm
     assert stats.pages == 6
     assert stats.origins == 9
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
     res = {o.url: (o.visit_type, str(o.last_update.date())) for o in scheduler_origins}
     # Ensure no `mramm` origins are listed, but all others are.
     assert res == {
         "https://svn.code.sf.net/p/backapps/website/code": ("svn", "2021-02-11"),
         "https://git.code.sf.net/p/os3dmodels/git": ("git", "2017-03-31"),
         "https://svn.code.sf.net/p/os3dmodels/svn": ("svn", "2017-03-31"),
         "https://git.code.sf.net/p/mojunk/git": ("git", "2017-12-31"),
         "https://git.code.sf.net/p/mojunk/git2": ("git", "2017-12-31"),
         "https://svn.code.sf.net/p/mojunk/svn": ("svn", "2017-12-31"),
         "http://hg.code.sf.net/p/random-mercurial/hg": ("hg", "2019-05-02"),
         "http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox": (
             "bzr",
             "2011-02-09",
         ),
         "http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd/trunk": (
             "bzr",
             "2011-04-17",
         ),
     }