diff --git a/PKG-INFO b/PKG-INFO
index 8d49371..22218bf 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,125 +1,125 @@
 Metadata-Version: 2.1
 Name: swh.lister
-Version: 3.0.2
+Version: 4.0.0
 Summary: Software Heritage lister
 Home-page: https://forge.softwareheritage.org/diffusion/DLSGH/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-lister
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-lister/
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: testing
 License-File: LICENSE
 
 swh-lister
 ==========
 
 This component from the Software Heritage stack aims to produce listings
 of software origins and their urls hosted on various public developer platforms
 or package managers. As these operations are quite similar, it provides a set of
 Python modules abstracting common software origins listing behaviors.
 
 It also provides several lister implementations, contained in the
 following Python modules:
 
 - `swh.lister.bitbucket`
 - `swh.lister.cgit`
 - `swh.lister.cran`
 - `swh.lister.debian`
 - `swh.lister.gitea`
 - `swh.lister.github`
 - `swh.lister.gitlab`
 - `swh.lister.gnu`
 - `swh.lister.golang`
 - `swh.lister.launchpad`
 - `swh.lister.maven`
 - `swh.lister.npm`
 - `swh.lister.packagist`
 - `swh.lister.phabricator`
 - `swh.lister.pypi`
 - `swh.lister.tuleap`
 - `swh.lister.gogs`
 
 Dependencies
 ------------
 
 All required dependencies can be found in the `requirements*.txt` files located
 at the root of the repository.
 
 Local deployment
 ----------------
 
 ## lister configuration
 
 Each lister implemented so far by Software Heritage (`bitbucket`, `cgit`, `cran`, `debian`,
 `gitea`, `github`, `gitlab`, `gnu`, `golang`, `launchpad`, `npm`, `packagist`, `phabricator`, `pypi`, `tuleap`, `maven`)
 must be configured by following the instructions below (please note that you have to replace
 `<lister_name>` by one of the lister name introduced above).
 
 ### Preparation steps
 
 1. `mkdir ~/.config/swh/`
 2. create configuration file `~/.config/swh/listers.yml`
 
 ### Configuration file sample
 
 Minimalistic configuration shared by all listers to add in file `~/.config/swh/listers.yml`:
 
 ```lang=yml
 scheduler:
   cls: 'remote'
   args:
     url: 'http://localhost:5008/'
 
 credentials: {}
 ```
 
 Note: This expects scheduler (5008) service to run locally
 
 ## Executing a lister
 
 Once configured, a lister can be executed by using the `swh` CLI tool with the
 following options and commands:
 
 ```
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister <lister_name> [lister_parameters]
 ```
 
 Examples:
 
 ```
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister bitbucket
 
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister cran
 
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister gitea url=https://codeberg.org/api/v1/
 
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister gitlab url=https://salsa.debian.org/api/v4/
 
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister npm
 
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister pypi
 ```
 
 Licensing
 ---------
 
 This program is free software: you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
 Foundation, either version 3 of the License, or (at your option) any later
 version.
 
 This program is distributed in the hope that it will be useful, but WITHOUT ANY
 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 PARTICULAR PURPOSE.  See the GNU General Public License for more details.
 
 See top-level LICENSE file for the full text of the GNU General Public License
 along with this program.
diff --git a/docs/new_lister_template.py b/docs/new_lister_template.py
index 20e3e90..a13a83f 100644
--- a/docs/new_lister_template.py
+++ b/docs/new_lister_template.py
@@ -1,165 +1,130 @@
-# Copyright (C) 2021  The Software Heritage developers
+# Copyright (C) 2021-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from dataclasses import asdict, dataclass
 import logging
 from typing import Any, Dict, Iterator, List
 from urllib.parse import urljoin
 
-import requests
-from tenacity.before_sleep import before_sleep_log
-
-from swh.lister.utils import throttling_retry
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
-from .. import USER_AGENT
 from ..pattern import CredentialsType, Lister
 
 logger = logging.getLogger(__name__)
 
 # Aliasing the page results returned by `get_pages` method from the lister.
 NewForgeListerPage = List[Dict[str, Any]]
 
 
 @dataclass
 class NewForgeListerState:
     """The NewForgeLister instance state. This is used for incremental listing."""
 
     current: str = ""
     """Id of the last origin listed on an incremental pass"""
 
 
 # If there is no need to keep state, subclass StatelessLister[NewForgeListerPage]
 class NewForgeLister(Lister[NewForgeListerState, NewForgeListerPage]):
     """List origins from the "NewForge" forge."""
 
     # Part of the lister API, that identifies this lister
     LISTER_NAME = ""
     # (Optional) CVS type of the origins listed by this lister, if constant
     VISIT_TYPE = ""
 
     # Instance URLs include the hostname and the common path prefix of processed URLs
     EXAMPLE_BASE_URL = "https://netloc/api/v1/"
     # Path of a specific resource to process, to join the base URL with
     EXAMPLE_PATH = "origins/list"
 
     def __init__(
         self,
         # Required
         scheduler: SchedulerInterface,
         # Instance URL, required for multi-instances listers (e.g gitlab, ...)
         url: str,
         # Instance name (free form) required for multi-instance listers,
         # or computed from `url`
         instance: str,
         # Required whether lister supports authentication or not
         credentials: CredentialsType = None,
     ):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
             url=url,
             instance=instance,
         )
 
-        self.session = requests.Session()
-        # Declare the USER_AGENT is more sysadm-friendly for the forge we list
-        self.session.headers.update(
-            {"Accept": "application/json", "User-Agent": USER_AGENT}
-        )
+        self.session.headers.update({"Accept": "application/json"})
 
     def state_from_dict(self, d: Dict[str, Any]) -> NewForgeListerState:
         return NewForgeListerState(**d)
 
     def state_to_dict(self, state: NewForgeListerState) -> Dict[str, Any]:
         return asdict(state)
 
-    @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
-    def page_request(self, url, params) -> requests.Response:
-        # Do the network resource request under a retrying decorator
-        # to handle rate limiting and transient errors up to a limit.
-        # `throttling_retry` by default use the `requests` library to check
-        # only for rate-limit and a base-10 exponential waiting strategy.
-        # This can be customized by passed waiting, retrying and logging strategies
-        # as functions. See the `tenacity` library documentation.
-
-        # Log listed URL to ease debugging
-        logger.debug("Fetching URL %s with params %s", url, params)
-        response = self.session.get(url, params=params)
-
-        if response.status_code != 200:
-            # Log response content to ease debugging
-            logger.warning(
-                "Unexpected HTTP status code %s on %s: %s",
-                response.status_code,
-                response.url,
-                response.content,
-            )
-        # The lister must fail on blocking errors
-        response.raise_for_status()
-
-        return response
-
     def get_pages(self) -> Iterator[NewForgeListerPage]:
         # The algorithm depends on the service, but should request data reliably,
         # following pagination if relevant and yielding pages in a streaming fashion.
         # If incremental listing is supported, initialize from saved lister state.
         # Make use of any next page URL provided.
         # Simplify the results early to ease testing and debugging.
 
         # Initialize from the lister saved state
         current = ""
         if self.state.current is not None:
             current = self.state.current
 
         # Construct the URL of a service endpoint, the lister can have others to fetch
         url = urljoin(self.url, self.EXAMPLE_PATH)
 
         while current is not None:
             # Parametrize the request for incremental listing
-            body = self.page_request(url, {"current": current}).json()
+            body = self.http_request(url, params={"current": current}).json()
 
             # Simplify the page if possible to only the necessary elements
             # and yield it
             yield body
 
             # Get the next page parameter or end the loop when there is none
             current = body.get("next")
 
     def get_origins_from_page(self, page: NewForgeListerPage) -> Iterator[ListedOrigin]:
         """Convert a page of NewForgeLister repositories into a list of ListedOrigins"""
         assert self.lister_obj.id is not None
 
         for element in page:
 
             yield ListedOrigin(
                 # Required. Should use this value.
                 lister_id=self.lister_obj.id,
                 # Required. Visit type of the currently processed origin
                 visit_type=self.VISIT_TYPE,
                 # Required. URL corresponding to the origin for loaders to ingest
                 url=...,
                 # Should get it if the service provides it and if it induces no
                 # substantial additional processing cost
                 last_update=...,
             )
 
     def commit_page(self, page: NewForgeListerPage) -> None:
         # Update the lister state to the latest `current`
         current = page[-1]["current"]
 
         if current > self.state.current:
             self.state.current = current
 
     def finalize(self) -> None:
         # Pull fresh lister state from the scheduler backend, in case multiple
         # listers run concurrently
         scheduler_state = self.get_state_from_scheduler()
 
         # Update the lister state in the backend only if `current` is fresher than
         # the one stored in the database.
         if self.state.current > scheduler_state.current:
             self.updated = True
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
index d4ae380..c01195e 100644
--- a/docs/tutorial.rst
+++ b/docs/tutorial.rst
@@ -1,380 +1,380 @@
 .. _lister-tutorial:
 
 Tutorial: list the content of your favorite forge in just a few steps
 =====================================================================
 
 Overview
 --------
 
 The three major phases of work in Software Heritage's preservation process, on the
 technical side, are *listing software sources*, *scheduling updates* and *loading the
 software artifacts into the archive*.
 
 A previous effort in 2017 consisted in designing the framework to make lister a
 straightforward "fill in the blanks" process, based on gained experience on the
 diversity found in the listed services. This is the second iteration on the lister
 framework design, comprising a library and an API which is easier to work with and less
 "magic" (read implicit). This new design is part of a larger effort in redesigning the
 scheduling system for the recurring tasks updating the content of the archive.
 
 .. _fundamentals:
 
 Fundamentals
 ------------
 
 Fundamentally, a basic lister must follow these steps:
 
 1. Issue a network request for a service endpoint.
 2. Convert the response data into a model object.
 3. Send the model object to the scheduler.
 
 Steps 1 and 3 are generic problems, that are often already solved by helpers or in other
 listers. That leaves us mainly to implement step 2, which is simple when the remote
 service provides an API.
 
 .. _prerequisites:
 
 Prerequisites
 -------------
 
 Skills:
 
 * object-oriented Python
 * requesting remote services through HTTP
 * scrapping if no API is offered
 
 Analysis of the target service. Prepare the following elements to write the lister:
 
 * instance names and URLs
 * requesting scheme: base URL, path, query_string, POST data, headers
 * authentication types and which one to support, if any
 * rate-limiting: HTTP codes and headers used
 * data format: JSON/XML/HTML/...?
 * mapping between remote data and needed data (ListedOrigin model, internal state)
 
 We will now walk through the steps to build a new lister.
 Please use this template to start with: :download:`new_lister_template.py`
 
 .. _lister-declaration:
 
 Lister declaration
 ------------------
 
 In order to write a lister, two basic elements are required. These are the
 :py:class:`Lister` base class and the :py:class:`ListedOrigin` scheduler model class.
 Optionally, for listers that need to keep a state and support incremental listing, an
 additional object :py:class:`ListerState` will come into play.
 
 Each lister must subclass :py:class:`Lister <swh.lister.pattern.Lister>` either directly
 or through a subclass such as :py:class:`StatelessLister
 <swh.lister.pattern.StatelessLister>` for stateless ones.
 
 We extensively type-annotate our listers, as any new code, which makes proeminent that
 those lister classes are generic, and take the following parameters:
 
 * :py:class:`Lister`: the lister state type, the page type
 * :py:class:`StatelessLister`: only the page type
 
 You can can start by declaring a stateless lister and leave the implementation of state
 for later if the listing needs it. We will see how to in :ref:`handling-lister-state`.
 
 Both the lister state type and the page type are user-defined types. However, while the
 page type may only exist as a type annotation, the state type for a stateful lister must
 be associated with a concrete object. The state type is commonly defined as a dataclass
 whereas the page type is often a mere annotation, potentially given a nice alias.
 
 Example lister declaration::
 
     NewForgePage = List[Dict[str, Any]]
 
     @dataclass
     class NewForgeListerState:
         ...
 
     class NewForgeLister(Lister[NewForgeListerState, NewForgePage]):
         LISTER_NAME = "My"
         ...
 
 The new lister must declare a name through the :py:attr:`LISTER_NAME` class attribute.
 
 .. _lister-construction:
 
 Lister construction
 -------------------
 
 The lister constructor is only required to ask for a :py:class:`SchedulerInterface`
 object to pass to the base class. But it does not mean that it is all that's needed for
 it to useful. A lister need information on which remote service to talk to. It needs an
 URL.
 
 Some services are centralized and offered by a single organization. Think of Github.
 Others are offered by many people across the Internet, each using a different hosting,
 each providing specific data. Think of the many Gitlab instances. We need a name to
 identify each instance, and even if there is only one, we need its URL to access it
 concretely.
 
 Now, you may think of any strategy to infer the information or hardcode it, but the base
 class needs an URL and an instance name. In any case, for a multi-instance service, you
 better be explicit and require the URL as constructor argument. We recommend the URL to
 be some form of a base URL, to be concatenated with any variable part appearing either
 because there exist multiple instances or the URL need recomputation in the listing
 process.
 
 If we need any credentials to access a remote service, and do so in our polite but
 persistent fashion (remember that we want fresh information), you are encouraged to
 provide support for authenticated access. The base class support handling credentials as
 a set of identifier/secret pair. It knows how to load from a secrets store the right
 ones for the current ("lister name", "instance name") setting, if none were originally
 provided through the task parameters. You can ask for other types of access tokens in a
 separate parameter, but then you lose this advantage.
 
 Example of a typical lister constructor::
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         url: str,
         instance: str,
         credentials: CredentialsType = None,
     ):
         super().__init__(
             scheduler=scheduler, url=url, instance=instance, credentials=credentials,
         )
         ...
 
 .. _core-lister-functionality:
 
 Core lister functionality
 -------------------------
 
 For the lister to contribute data to the archive, you now have to write the logic to
 fetch data from the remote service, and format it in the canonical form the scheduler
 expects, as outined in :ref:`fundamentals`. To this purpose, the two methods to
 implement are::
 
     def get_pages(self) -> Iterator[NewForgePage]:
         ...
 
     def get_origins_from_page(self, page: NewForgePage) -> Iterator[ListedOrigin]:
         ...
 
 Those two core functions are called by the principal lister method,
 :py:meth:`Lister.run`, found in the base class.
 
 :py:meth:`get_pages` is the guts of the lister. It takes no arguments and must produce
 data pages. An iterator is fine here, as the :py:meth:`Lister.run` method only mean to
 iterate in a single pass on it. This method gets its input from a network request to a
 remote service's endpoint to retrieve the data we long for.
 
 Depending on whether the data is adequately structured for our purpose can be tricky.
 Here you may have to show off your data scraping skills, or just consume a well-designed
 API. Those aspects are discussed more specifically in the section
 :ref:`handling-specific-topics`.
 
 In any case, we want the data we return to be usefully filtered and structured. The
 easiest way to create an iterator is to use the ``yield`` keyword. Yield each data page
 you have structured in accordance with the page type you have declared. The page type
 exists only for static type checking of data passed from :py:meth:`get_pages` to
 :py:meth:`get_origins_from_page`; you can choose whatever fits the bill.
 
 :py:meth:`get_origins_from_page` is simpler. For each individual software origin you
 have received in the page, you convert and yield a :py:class:`ListedOrigin` model
 object. This datatype has the following mandatory fields:
 
 * lister id: you generally fill this with the value of :py:attr:`self.lister_obj.id`
 
 * visit type: the type of software distribution format the service provides. For use by
   a corresponding loader. It is an identifier, so you have to either use an existing
   value or craft a new one if you get off the beaten track and tackle a new software
   source. But then you will have to discuss the name with the core developers.
 
   Example: Phabricator is a forge that can handle Git or SVN repositories. The visit
   type would be "git" when listing such a repo that provides a Git URL that we can load.
 
 * origin URL: an URL that, combined with the visit type, will serve as the input of
   loader.
 
 This datatype can also further be detailed with the optional fields:
 
 * last update date: freshness information on this origin, which is useful to the
   scheduler for optimizing its scheduling decisions. Fill it if provided by the service,
   at no substantial additional runtime cost, e.g. in the same request.
 
 * extra loader arguments: extra parameters to be passed to the loader for it to be
   able to load the origin. It is needed for example when additional context is needed
   along with the URL to effectively load from the origin.
 
 See the definition of :swh_web:`ListedOrigin <browse/swh:1:rev:03460207a17d82635ef5a6f12358392143eb9eef/?origin_url=https://forge.softwareheritage.org/source/swh-scheduler.git&path=swh/scheduler/model.py&revision=03460207a17d82635ef5a6f12358392143eb9eef#L134-L177>`.
 
 Now that that we showed how those two methods operate, let's put it together by showing
 how they fit in the principal :py:meth:`Lister.run` method::
 
     def run(self) -> ListerStats:
 
         full_stats = ListerStats()
 
         try:
             for page in self.get_pages():
                 full_stats.pages += 1
                 origins = self.get_origins_from_page(page)
                 full_stats.origins += self.send_origins(origins)
                 self.commit_page(page)
         finally:
             self.finalize()
             if self.updated:
                 self.set_state_in_scheduler()
 
         return full_stats
 
 :py:meth:`Lister.send_origins` is the method that sends listed origins to the scheduler.
 
 The :py:class:`ListerState` datastructure, defined along the base lister class, is used
 to compute the number of listed pages and origins in a single lister run. It is useful
 both for the scheduler that automatically collects this information and to test the
 lister.
 
 You see that the bulk of a lister run consists in streaming data gathered from the
 remote service to the scheduler. And this is done under a ``try...finally`` construct to
 have the lister state reliably recorded in case of unhandled error. We will explain the
 role of the remaining methods and attributes appearing here in the next section as it is
 related to the lister state.
 
 .. _handling-lister-state:
 
 Handling lister state
 ---------------------
 
 With what we have covered until now you can write a stateless lister. Unfortunately,
 some services provide too much data to efficiently deal with it in a one-shot fashion.
 Listing a given software source can take several hours or days to process. Our listers
 can also give valid output, but fail on an unexpected condition and would have to start
 over. As we want to be able to resume the listing process from a given element, provided
 by the remote service and guaranteed to be ordered, such as a date or a numeric
 identifier, we need to deal with state.
 
 The remaining part of the lister API is reserved for dealing with lister state.
 
 If the service to list has no pagination, then the data set to handle is small enough to
 not require keeping lister state. In the opposite case, you will have to determine which
 piece of information should be recorded in the lister state. As said earlier, we
 recommend declaring a dataclass for the lister state::
 
     @dataclass
     class NewForgeListerState:
         current: str = ""
 
     class NewForgeLister(Lister[NewForgeListerState, NewForgePage]):
         ...
 
 A pair of methods, :py:meth:`state_from_dict` and :py:meth:`state_to_dict` are used to
 respectively import lister state from the scheduler and export lister state to the
 scheduler. Some fields may need help to be serialized to the scheduler, such as dates,
 so this needs to be handled there.
 
 Where is the state used? Taking the general case of a paginating service, the lister
 state is used at the beginning of the :py:meth:`get_pages` method to initialize the
 variables associated with the last listing progress. That way we can start from an
 arbitrary element, or just the first one if there is no last lister state.
 
 The :py:meth:`commit_page` is called on successful page processing, after the new
 origins are sent to the scheduler. Here you should mainly update the lister state by
 taking into account the new page processed, e.g. advance a date or serial field.
 
 Finally, upon either completion or error, the :py:meth:`finalize` is called. There you
 must set attribute :py:attr:`updated` to True if you were successful in advancing in the
 listing process. To do this you will commonly retrieve the latest saved lister state
 from the scheduler and compare with your current lister state. If lister state was
 updated, ultimately the current lister state will be recorded in the scheduler.
 
 We have now seen the stateful lister API. Note that some listers may implement more
 flexibility in the use of lister state. Some allow an `incremental` parameter that
 governs whether or not we will do a stateful listing or not. It is up to you to support
 additional functionality if it seems relevant.
 
 .. _handling-specific-topics:
 
 Handling specific topics
 ------------------------
 
 Here is a quick coverage of common topics left out from lister construction and
 :py:meth:`get_pages` descriptions.
 
 Sessions
 ^^^^^^^^
 
 When requesting a web service repeatedly, most parameters including headers do not
 change and could be set up once initially. We recommend setting up a e.g. HTTP session,
 as instance attribute so that further requesting code can focus on what really changes.
 Some ubiquitous HTTP headers include "Accept" to set to the service response format and
 "User-Agent" for which we provide a recommended value :py:const:`USER_AGENT` to be
 imported from :py:mod:`swh.lister`. Authentication is also commonly provided through
 headers, so you can also set it up in the session.
 
 Transport error handling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 We generally recommend logging every unhandleable error with the response content and
 then immediately stop the listing by doing an equivalent of
 :py:meth:`Response.raise_for_status` from the ``requests`` library. As for rate-limiting
 errors, we have a strategy of using a flexible decorator to handle the retrying for us.
-It is based on the ``tenacity`` library and accessible as :py:func:`throttling_retry` from
+It is based on the ``tenacity`` library and accessible as :py:func:`http_retry` from
 :py:mod:`swh.lister.utils`.
 
 Pagination
 ^^^^^^^^^^
 
 This one is a moving target. You have to understand how the pagination mechanics of the
 particular service works. Some guidelines though. The identifier may be minimal (an id
 to pass as query parameter), compound (a set of such parameters) or complete (a whole
 URL). If the service provides the next URL, use it. The piece of information may be
 found either in the response body, or in a header. Once identified, you still have to
 implement the logic of requesting and extracting it in a loop and quitting the loop when
 there is no more data to fetch.
 
 Page results
 ^^^^^^^^^^^^
 
 First, when retrieving page results, which involves some protocols and parsing logic,
 please make sure that any deviance from what was expected will result in an
 informational error. You also have to simplify the results, both with filtering request
 parameters if the service supports it, and by extracting from the response only the
 information needed into a structured page. This all makes for easier debugging.
 
 Misc files
 ^^^^^^^^^^
 
 There are also a few files that need to be modified outside of the lister directory, namely:
 
 * :file:`/setup.py` to add your lister to the end of the list in the *setup* section::
 
     entry_points="""
         [swh.cli.subcommands]
         lister=swh.lister.cli
         [swh.workers]
         lister.bitbucket=swh.lister.bitbucket:register
         lister.cgit=swh.lister.cgit:register
         ..."""
 
 * :file:`/swh/lister/tests/test_cli.py` to get a default set of parameters in scheduler-related tests.
 * :file:`/README.md` to reference the new lister.
 * :file:`/CONTRIBUTORS` to add your name.
 
 Testing your lister
 -------------------
 
 When developing a new lister, it's important to test. For this, add the tests
 (check :file:`swh/lister/*/tests/`) and register the celery tasks in the main
 conftest.py (:file:`swh/lister/core/tests/conftest.py`).
 
 Another important step is to actually run it within the docker-dev
 (:ref:`run-lister-tutorial`).
 
 More about listers
 ------------------
 
 See current implemented listers as examples (GitHub_, Bitbucket_, CGit_, GitLab_ ).
 
 .. _GitHub: https://forge.softwareheritage.org/source/swh-lister/browse/master/swh/lister/github/lister.py
 .. _Bitbucket: https://forge.softwareheritage.org/source/swh-lister/browse/master/swh/lister/bitbucket/lister.py
 .. _CGit: https://forge.softwareheritage.org/source/swh-lister/browse/master/swh/lister/cgit/lister.py
 .. _GitLab: https://forge.softwareheritage.org/source/swh-lister/browse/master/swh/lister/gitlab/lister.py
diff --git a/mypy.ini b/mypy.ini
index 42c58d8..286fec0 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -1,44 +1,50 @@
 [mypy]
 namespace_packages = True
 warn_unused_ignores = True
 
 # 3rd party libraries without stubs (yet)
 
 [mypy-bs4.*]
 ignore_missing_imports = True
 
 [mypy-celery.*]
 ignore_missing_imports = True
 
 [mypy-debian.*]
 ignore_missing_imports = True
 
 [mypy-iso8601.*]
 ignore_missing_imports = True
 
 [mypy-launchpadlib.*]
 ignore_missing_imports = True
 
 [mypy-lazr.*]
 ignore_missing_imports = True
 
 [mypy-lxml.*]
 ignore_missing_imports = True
 
 [mypy-pkg_resources.*]
 ignore_missing_imports = True
 
 [mypy-pytest.*]
 ignore_missing_imports = True
 
 [mypy-pytest_postgresql.*]
 ignore_missing_imports = True
 
 [mypy-requests_mock.*]
 ignore_missing_imports = True
 
 [mypy-urllib3.util.*]
 ignore_missing_imports = True
 
 [mypy-dulwich.*]
 ignore_missing_imports = True
+
+[mypy-testing.postgresql.*]
+ignore_missing_imports = True
+
+[mypy-psycopg2.*]
+ignore_missing_imports = True
diff --git a/requirements-swh.txt b/requirements-swh.txt
index 3281b3e..7c34143 100644
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,2 +1,2 @@
-swh.core[db,github] >= 2.8
+swh.core[db,github] >= 2.16.1
 swh.scheduler >= 0.8
diff --git a/requirements.txt b/requirements.txt
index a909c6d..17a1e8f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,11 @@
 python_debian
 requests
 setuptools
 iso8601
 beautifulsoup4
 launchpadlib
 tenacity >= 6.2
 lxml
 dulwich
+testing.postgresql
+psycopg2
diff --git a/setup.py b/setup.py
index 8d3d7dd..7c55f6c 100755
--- a/setup.py
+++ b/setup.py
@@ -1,96 +1,103 @@
 #!/usr/bin/env python3
 # Copyright (C) 2015-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from io import open
 from os import path
 
 from setuptools import find_packages, setup
 
 here = path.abspath(path.dirname(__file__))
 
 # Get the long description from the README file
 with open(path.join(here, "README.md"), encoding="utf-8") as f:
     long_description = f.read()
 
 
 def parse_requirements(name=None):
     if name:
         reqf = "requirements-%s.txt" % name
     else:
         reqf = "requirements.txt"
 
     requirements = []
     if not path.exists(reqf):
         return requirements
 
     with open(reqf) as f:
         for line in f.readlines():
             line = line.strip()
             if not line or line.startswith("#"):
                 continue
             requirements.append(line)
     return requirements
 
 
 setup(
     name="swh.lister",
     description="Software Heritage lister",
     long_description=long_description,
     long_description_content_type="text/markdown",
     python_requires=">=3.7",
     author="Software Heritage developers",
     author_email="swh-devel@inria.fr",
     url="https://forge.softwareheritage.org/diffusion/DLSGH/",
     packages=find_packages(),
     install_requires=parse_requirements() + parse_requirements("swh"),
     tests_require=parse_requirements("test"),
     setup_requires=["setuptools-scm"],
     extras_require={"testing": parse_requirements("test")},
     use_scm_version=True,
     include_package_data=True,
     entry_points="""
         [swh.cli.subcommands]
         lister=swh.lister.cli
         [swh.workers]
         lister.arch=swh.lister.arch:register
         lister.aur=swh.lister.aur:register
         lister.bitbucket=swh.lister.bitbucket:register
         lister.bower=swh.lister.bower:register
         lister.cgit=swh.lister.cgit:register
+        lister.conda=swh.lister.conda:register
+        lister.cpan=swh.lister.cpan:register
         lister.cran=swh.lister.cran:register
         lister.crates=swh.lister.crates:register
         lister.debian=swh.lister.debian:register
         lister.gitea=swh.lister.gitea:register
         lister.github=swh.lister.github:register
         lister.gitlab=swh.lister.gitlab:register
         lister.gnu=swh.lister.gnu:register
         lister.golang=swh.lister.golang:register
+        lister.hackage=swh.lister.hackage:register
         lister.launchpad=swh.lister.launchpad:register
+        lister.nixguix=swh.lister.nixguix:register
         lister.npm=swh.lister.npm:register
+        lister.nuget=swh.lister.nuget:register
         lister.opam=swh.lister.opam:register
         lister.packagist=swh.lister.packagist:register
         lister.phabricator=swh.lister.phabricator:register
         lister.pubdev=swh.lister.pubdev:register
+        lister.puppet=swh.lister.puppet:register
         lister.pypi=swh.lister.pypi:register
+        lister.rubygems=swh.lister.rubygems:register
         lister.sourceforge=swh.lister.sourceforge:register
         lister.tuleap=swh.lister.tuleap:register
         lister.maven=swh.lister.maven:register
         lister.gogs=swh.lister.gogs:register
     """,
     classifiers=[
         "Programming Language :: Python :: 3",
         "Intended Audience :: Developers",
         "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
         "Operating System :: OS Independent",
         "Development Status :: 5 - Production/Stable",
     ],
     project_urls={
         "Bug Reports": "https://forge.softwareheritage.org/maniphest",
         "Funding": "https://www.softwareheritage.org/donate",
         "Source": "https://forge.softwareheritage.org/source/swh-lister",
         "Documentation": "https://docs.softwareheritage.org/devel/swh-lister/",
     },
 )
diff --git a/swh.lister.egg-info/PKG-INFO b/swh.lister.egg-info/PKG-INFO
index 8d49371..22218bf 100644
--- a/swh.lister.egg-info/PKG-INFO
+++ b/swh.lister.egg-info/PKG-INFO
@@ -1,125 +1,125 @@
 Metadata-Version: 2.1
 Name: swh.lister
-Version: 3.0.2
+Version: 4.0.0
 Summary: Software Heritage lister
 Home-page: https://forge.softwareheritage.org/diffusion/DLSGH/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-lister
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-lister/
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: testing
 License-File: LICENSE
 
 swh-lister
 ==========
 
 This component from the Software Heritage stack aims to produce listings
 of software origins and their urls hosted on various public developer platforms
 or package managers. As these operations are quite similar, it provides a set of
 Python modules abstracting common software origins listing behaviors.
 
 It also provides several lister implementations, contained in the
 following Python modules:
 
 - `swh.lister.bitbucket`
 - `swh.lister.cgit`
 - `swh.lister.cran`
 - `swh.lister.debian`
 - `swh.lister.gitea`
 - `swh.lister.github`
 - `swh.lister.gitlab`
 - `swh.lister.gnu`
 - `swh.lister.golang`
 - `swh.lister.launchpad`
 - `swh.lister.maven`
 - `swh.lister.npm`
 - `swh.lister.packagist`
 - `swh.lister.phabricator`
 - `swh.lister.pypi`
 - `swh.lister.tuleap`
 - `swh.lister.gogs`
 
 Dependencies
 ------------
 
 All required dependencies can be found in the `requirements*.txt` files located
 at the root of the repository.
 
 Local deployment
 ----------------
 
 ## lister configuration
 
 Each lister implemented so far by Software Heritage (`bitbucket`, `cgit`, `cran`, `debian`,
 `gitea`, `github`, `gitlab`, `gnu`, `golang`, `launchpad`, `npm`, `packagist`, `phabricator`, `pypi`, `tuleap`, `maven`)
 must be configured by following the instructions below (please note that you have to replace
 `<lister_name>` by one of the lister name introduced above).
 
 ### Preparation steps
 
 1. `mkdir ~/.config/swh/`
 2. create configuration file `~/.config/swh/listers.yml`
 
 ### Configuration file sample
 
 Minimalistic configuration shared by all listers to add in file `~/.config/swh/listers.yml`:
 
 ```lang=yml
 scheduler:
   cls: 'remote'
   args:
     url: 'http://localhost:5008/'
 
 credentials: {}
 ```
 
 Note: This expects scheduler (5008) service to run locally
 
 ## Executing a lister
 
 Once configured, a lister can be executed by using the `swh` CLI tool with the
 following options and commands:
 
 ```
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister <lister_name> [lister_parameters]
 ```
 
 Examples:
 
 ```
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister bitbucket
 
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister cran
 
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister gitea url=https://codeberg.org/api/v1/
 
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister gitlab url=https://salsa.debian.org/api/v4/
 
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister npm
 
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister pypi
 ```
 
 Licensing
 ---------
 
 This program is free software: you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
 Foundation, either version 3 of the License, or (at your option) any later
 version.
 
 This program is distributed in the hope that it will be useful, but WITHOUT ANY
 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 PARTICULAR PURPOSE.  See the GNU General Public License for more details.
 
 See top-level LICENSE file for the full text of the GNU General Public License
 along with this program.
diff --git a/swh.lister.egg-info/SOURCES.txt b/swh.lister.egg-info/SOURCES.txt
index 550a6ba..4414e93 100644
--- a/swh.lister.egg-info/SOURCES.txt
+++ b/swh.lister.egg-info/SOURCES.txt
@@ -1,337 +1,419 @@
 .git-blame-ignore-revs
 .gitignore
 .pre-commit-config.yaml
 ACKNOWLEDGEMENTS
 CODE_OF_CONDUCT.md
 CONTRIBUTORS
 LICENSE
 MANIFEST.in
 Makefile
 README.md
 conftest.py
 mypy.ini
 pyproject.toml
 pytest.ini
 requirements-swh.txt
 requirements-test.txt
 requirements.txt
 setup.cfg
 setup.py
 tox.ini
 docs/.gitignore
 docs/Makefile
 docs/cli.rst
 docs/conf.py
 docs/index.rst
 docs/new_lister_template.py
 docs/run_a_new_lister.rst
 docs/save_forge.rst
 docs/tutorial.rst
 docs/_static/.placeholder
 docs/_templates/.placeholder
 docs/images/new_base.png
 docs/images/new_bitbucket_lister.png
 docs/images/new_github_lister.png
 docs/images/old_github_lister.png
 sql/crawler.sql
 sql/pimp_db.sql
 swh/__init__.py
 swh.lister.egg-info/PKG-INFO
 swh.lister.egg-info/SOURCES.txt
 swh.lister.egg-info/dependency_links.txt
 swh.lister.egg-info/entry_points.txt
 swh.lister.egg-info/requires.txt
 swh.lister.egg-info/top_level.txt
 swh/lister/__init__.py
 swh/lister/cli.py
 swh/lister/pattern.py
 swh/lister/py.typed
 swh/lister/utils.py
 swh/lister/arch/__init__.py
 swh/lister/arch/lister.py
 swh/lister/arch/tasks.py
 swh/lister/arch/tests/__init__.py
 swh/lister/arch/tests/test_lister.py
 swh/lister/arch/tests/test_tasks.py
 swh/lister/arch/tests/data/fake_archlinux_archives_init.sh
 swh/lister/arch/tests/data/https_archive.archlinux.org/packages_d_dialog
 swh/lister/arch/tests/data/https_archive.archlinux.org/packages_g_gnome-code-assistance
 swh/lister/arch/tests/data/https_archive.archlinux.org/packages_g_gzip
 swh/lister/arch/tests/data/https_archive.archlinux.org/packages_l_libasyncns
 swh/lister/arch/tests/data/https_archive.archlinux.org/packages_m_mercurial
 swh/lister/arch/tests/data/https_archive.archlinux.org/packages_p_python-hglib
 swh/lister/arch/tests/data/https_archive.archlinux.org/repos_last_community_os_x86_64_community.files.tar.gz
 swh/lister/arch/tests/data/https_archive.archlinux.org/repos_last_core_os_x86_64_core.files.tar.gz
 swh/lister/arch/tests/data/https_archive.archlinux.org/repos_last_extra_os_x86_64_extra.files.tar.gz
 swh/lister/arch/tests/data/https_uk.mirror.archlinuxarm.org/aarch64_community_community.files.tar.gz
 swh/lister/arch/tests/data/https_uk.mirror.archlinuxarm.org/aarch64_core_core.files.tar.gz
 swh/lister/arch/tests/data/https_uk.mirror.archlinuxarm.org/aarch64_extra_extra.files.tar.gz
 swh/lister/arch/tests/data/https_uk.mirror.archlinuxarm.org/armv7h_community_community.files.tar.gz
 swh/lister/arch/tests/data/https_uk.mirror.archlinuxarm.org/armv7h_core_core.files.tar.gz
 swh/lister/arch/tests/data/https_uk.mirror.archlinuxarm.org/armv7h_extra_extra.files.tar.gz
 swh/lister/aur/__init__.py
 swh/lister/aur/lister.py
 swh/lister/aur/tasks.py
 swh/lister/aur/tests/__init__.py
 swh/lister/aur/tests/test_lister.py
 swh/lister/aur/tests/test_tasks.py
 swh/lister/aur/tests/data/fake_aur_packages.sh
 swh/lister/aur/tests/data/packages-meta-v1.json.gz
 swh/lister/bitbucket/__init__.py
 swh/lister/bitbucket/lister.py
 swh/lister/bitbucket/tasks.py
 swh/lister/bitbucket/tests/__init__.py
 swh/lister/bitbucket/tests/test_lister.py
 swh/lister/bitbucket/tests/test_tasks.py
 swh/lister/bitbucket/tests/data/bb_api_repositories_page1.json
 swh/lister/bitbucket/tests/data/bb_api_repositories_page2.json
 swh/lister/bower/__init__.py
 swh/lister/bower/lister.py
 swh/lister/bower/tasks.py
 swh/lister/bower/tests/__init__.py
 swh/lister/bower/tests/test_lister.py
 swh/lister/bower/tests/test_tasks.py
 swh/lister/bower/tests/data/https_registry.bower.io/packages
 swh/lister/cgit/__init__.py
 swh/lister/cgit/lister.py
 swh/lister/cgit/tasks.py
 swh/lister/cgit/tests/__init__.py
 swh/lister/cgit/tests/repo_list.txt
 swh/lister/cgit/tests/test_lister.py
 swh/lister/cgit/tests/test_tasks.py
-swh/lister/cgit/tests/data/https_git.acdw.net/Readme.md
+swh/lister/cgit/tests/data/https_git.acdw.net/README
 swh/lister/cgit/tests/data/https_git.acdw.net/cgit
 swh/lister/cgit/tests/data/https_git.acdw.net/foo
 swh/lister/cgit/tests/data/https_git.acdw.net/foo_summary
 swh/lister/cgit/tests/data/https_git.acdw.net/sfeed
 swh/lister/cgit/tests/data/https_git.acdw.net/sfeed_summary
 swh/lister/cgit/tests/data/https_git.baserock.org/cgit
 swh/lister/cgit/tests/data/https_git.eclipse.org/c
 swh/lister/cgit/tests/data/https_git.savannah.gnu.org/README
 swh/lister/cgit/tests/data/https_git.savannah.gnu.org/cgit
 swh/lister/cgit/tests/data/https_git.savannah.gnu.org/cgit_elisp-es.git
 swh/lister/cgit/tests/data/https_git.tizen/README
 swh/lister/cgit/tests/data/https_git.tizen/cgit
 swh/lister/cgit/tests/data/https_git.tizen/cgit,ofs=100
 swh/lister/cgit/tests/data/https_git.tizen/cgit,ofs=50
 swh/lister/cgit/tests/data/https_git.tizen/cgit_All-Projects
 swh/lister/cgit/tests/data/https_git.tizen/cgit_All-Users
 swh/lister/cgit/tests/data/https_git.tizen/cgit_Lock-Projects
 swh/lister/cgit/tests/data/https_git.tizen/cgit_adaptation_alsa-scenario-scn-data-0-base
 swh/lister/cgit/tests/data/https_git.tizen/cgit_adaptation_alsa-scenario-scn-data-0-mc1n2
 swh/lister/cgit/tests/data/https_git.tizen/cgit_adaptation_ap_samsung_audio-hal-e3250
 swh/lister/cgit/tests/data/https_git.tizen/cgit_adaptation_ap_samsung_audio-hal-e4x12
 swh/lister/cgit/tests/data/https_git.tizen/cgit_adaptation_devices_nfc-plugin-nxp
 swh/lister/cgit/tests/data/https_git.tizen/cgit_adaptation_intel_mfld_bootstub-mfld-blackbay
 swh/lister/cgit/tests/data/https_git.tizen/cgit_adaptation_mtdev
 swh/lister/cgit/tests/data/https_git.tizen/cgit_adaptation_opengl-es-virtual-drv
 swh/lister/cgit/tests/data/https_git.tizen/cgit_adaptation_panda_libdrm
 swh/lister/cgit/tests/data/https_git.tizen/cgit_adaptation_panda_libnl
 swh/lister/cgit/tests/data/https_git.tizen/cgit_adaptation_xorg_driver_xserver-xorg-misc
 swh/lister/cgit/tests/data/https_git.tizen/cgit_apps_core_preloaded_ug-setting-gallery-efl
 swh/lister/cgit/tests/data/https_git.tizen/cgit_apps_core_preloaded_ug-setting-homescreen-efl
 swh/lister/cgit/tests/data/https_jff.email/cgit
+swh/lister/conda/__init__.py
+swh/lister/conda/lister.py
+swh/lister/conda/tasks.py
+swh/lister/conda/tests/__init__.py
+swh/lister/conda/tests/test_lister.py
+swh/lister/conda/tests/test_tasks.py
+swh/lister/conda/tests/data/https_conda.anaconda.org/conda-forge_linux-64_repodata.json.bz2
+swh/lister/conda/tests/data/https_repo.anaconda.com/pkgs_free_linux-64_repodata.json.bz2
+swh/lister/conda/tests/data/https_repo.anaconda.com/pkgs_free_osx-64_repodata.json.bz2
+swh/lister/conda/tests/data/https_repo.anaconda.com/pkgs_free_win-64_repodata.json.bz2
+swh/lister/conda/tests/data/https_repo.anaconda.com/pkgs_main_linux-64_repodata.json.bz2
+swh/lister/conda/tests/data/https_repo.anaconda.com/pkgs_pro_linux-64_repodata.json.bz2
+swh/lister/cpan/__init__.py
+swh/lister/cpan/lister.py
+swh/lister/cpan/tasks.py
+swh/lister/cpan/tests/__init__.py
+swh/lister/cpan/tests/test_lister.py
+swh/lister/cpan/tests/test_tasks.py
+swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page1
+swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page2
+swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page3
+swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page4
+swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1_release__search
 swh/lister/cran/__init__.py
 swh/lister/cran/list_all_packages.R
 swh/lister/cran/lister.py
 swh/lister/cran/tasks.py
 swh/lister/cran/tests/__init__.py
 swh/lister/cran/tests/test_lister.py
 swh/lister/cran/tests/test_tasks.py
 swh/lister/cran/tests/data/list-r-packages.json
 swh/lister/crates/__init__.py
 swh/lister/crates/lister.py
 swh/lister/crates/tasks.py
 swh/lister/crates/tests/__init__.py
 swh/lister/crates/tests/test_lister.py
 swh/lister/crates/tests/test_tasks.py
-swh/lister/crates/tests/data/fake-crates-repository.tar.gz
 swh/lister/crates/tests/data/fake_crates_repository_init.sh
+swh/lister/crates/tests/data/https_static.crates.io/db-dump.tar.gz
+swh/lister/crates/tests/data/https_static.crates.io/db-dump.tar.gz_visit1
 swh/lister/debian/__init__.py
 swh/lister/debian/lister.py
 swh/lister/debian/tasks.py
 swh/lister/debian/tests/__init__.py
 swh/lister/debian/tests/test_lister.py
 swh/lister/debian/tests/test_tasks.py
 swh/lister/debian/tests/data/Sources_bullseye
 swh/lister/debian/tests/data/Sources_buster
 swh/lister/debian/tests/data/Sources_stretch
 swh/lister/gitea/__init__.py
 swh/lister/gitea/lister.py
 swh/lister/gitea/tasks.py
 swh/lister/gitea/tests/__init__.py
 swh/lister/gitea/tests/test_lister.py
 swh/lister/gitea/tests/test_tasks.py
 swh/lister/gitea/tests/data/https_try.gitea.io/repos_page1
 swh/lister/gitea/tests/data/https_try.gitea.io/repos_page2
 swh/lister/github/__init__.py
 swh/lister/github/lister.py
 swh/lister/github/tasks.py
 swh/lister/github/utils.py
 swh/lister/github/tests/__init__.py
 swh/lister/github/tests/test_lister.py
 swh/lister/github/tests/test_tasks.py
 swh/lister/gitlab/__init__.py
 swh/lister/gitlab/lister.py
 swh/lister/gitlab/tasks.py
 swh/lister/gitlab/tests/__init__.py
 swh/lister/gitlab/tests/test_lister.py
 swh/lister/gitlab/tests/test_tasks.py
 swh/lister/gitlab/tests/data/https_foss.heptapod.net/api_response_page1.json
 swh/lister/gitlab/tests/data/https_gite.lirmm.fr/api_response_page1.json
 swh/lister/gitlab/tests/data/https_gite.lirmm.fr/api_response_page2.json
 swh/lister/gitlab/tests/data/https_gite.lirmm.fr/api_response_page3.json
 swh/lister/gitlab/tests/data/https_gitlab.com/api_response_page1.json
 swh/lister/gnu/__init__.py
 swh/lister/gnu/lister.py
 swh/lister/gnu/tasks.py
 swh/lister/gnu/tree.py
 swh/lister/gnu/tests/__init__.py
 swh/lister/gnu/tests/test_lister.py
 swh/lister/gnu/tests/test_tasks.py
 swh/lister/gnu/tests/test_tree.py
 swh/lister/gnu/tests/data/tree.json
 swh/lister/gnu/tests/data/tree.min.json
 swh/lister/gnu/tests/data/https_ftp.gnu.org/tree.json.gz
 swh/lister/gogs/__init__.py
 swh/lister/gogs/lister.py
 swh/lister/gogs/tasks.py
 swh/lister/gogs/tests/__init__.py
 swh/lister/gogs/tests/test_lister.py
 swh/lister/gogs/tests/test_tasks.py
 swh/lister/gogs/tests/data/https_try.gogs.io/repos_page1
 swh/lister/gogs/tests/data/https_try.gogs.io/repos_page2
 swh/lister/gogs/tests/data/https_try.gogs.io/repos_page3
 swh/lister/gogs/tests/data/https_try.gogs.io/repos_page4
 swh/lister/golang/__init__.py
 swh/lister/golang/lister.py
 swh/lister/golang/tasks.py
 swh/lister/golang/tests/__init__.py
 swh/lister/golang/tests/test_lister.py
 swh/lister/golang/tests/test_tasks.py
 swh/lister/golang/tests/data/page-1.txt
 swh/lister/golang/tests/data/page-2.txt
 swh/lister/golang/tests/data/page-3.txt
+swh/lister/hackage/__init__.py
+swh/lister/hackage/lister.py
+swh/lister/hackage/tasks.py
+swh/lister/hackage/tests/__init__.py
+swh/lister/hackage/tests/test_lister.py
+swh/lister/hackage/tests/test_tasks.py
+swh/lister/hackage/tests/data/https_fake49.haskell.org/packages_search_0
+swh/lister/hackage/tests/data/https_fake51.haskell.org/packages_search_0
+swh/lister/hackage/tests/data/https_fake51.haskell.org/packages_search_1
+swh/lister/hackage/tests/data/https_hackage.haskell.org/packages_search_0
+swh/lister/hackage/tests/data/https_hackage.haskell.org/packages_search_1
+swh/lister/hackage/tests/data/https_hackage.haskell.org/packages_search_2
 swh/lister/launchpad/__init__.py
 swh/lister/launchpad/lister.py
 swh/lister/launchpad/tasks.py
 swh/lister/launchpad/tests/__init__.py
 swh/lister/launchpad/tests/conftest.py
 swh/lister/launchpad/tests/test_lister.py
 swh/lister/launchpad/tests/test_tasks.py
 swh/lister/launchpad/tests/data/launchpad_bzr_response.json
 swh/lister/launchpad/tests/data/launchpad_response1.json
 swh/lister/launchpad/tests/data/launchpad_response2.json
 swh/lister/maven/README.md
 swh/lister/maven/__init__.py
 swh/lister/maven/lister.py
 swh/lister/maven/tasks.py
 swh/lister/maven/tests/__init__.py
 swh/lister/maven/tests/test_lister.py
 swh/lister/maven/tests/test_tasks.py
+swh/lister/maven/tests/data/citrus-parent-3.0.7.pom
+swh/lister/maven/tests/data/sprova4j-0.1.0.malformed.pom
 swh/lister/maven/tests/data/http_indexes/export_full.fld
 swh/lister/maven/tests/data/http_indexes/export_incr_first.fld
 swh/lister/maven/tests/data/http_indexes/export_null_mtime.fld
-swh/lister/maven/tests/data/https_maven.org/arangodb-graphql-1.2.pom
-swh/lister/maven/tests/data/https_maven.org/citrus-parent-3.0.7.pom
-swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.malformed.pom
-swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.pom
-swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.1.pom
+swh/lister/maven/tests/data/https_api.github.com/repos_aldialimucaj_sprova4j
+swh/lister/maven/tests/data/https_api.github.com/repos_arangodb-community_arangodb-graphql-java
+swh/lister/maven/tests/data/https_api.github.com/repos_webx_citrus
+swh/lister/maven/tests/data/https_repo1.maven.org/maven2_al_aldi_sprova4j_0.1.0_sprova4j-0.1.0.pom
+swh/lister/maven/tests/data/https_repo1.maven.org/maven2_al_aldi_sprova4j_0.1.1_sprova4j-0.1.1.pom
+swh/lister/maven/tests/data/https_repo1.maven.org/maven2_com_arangodb_arangodb-graphql_1.2_arangodb-graphql-1.2.pom
+swh/lister/nixguix/__init__.py
+swh/lister/nixguix/lister.py
+swh/lister/nixguix/tasks.py
+swh/lister/nixguix/tests/__init__.py
+swh/lister/nixguix/tests/test_lister.py
+swh/lister/nixguix/tests/test_tasks.py
+swh/lister/nixguix/tests/data/sources-failure.json
+swh/lister/nixguix/tests/data/sources-success.json
 swh/lister/npm/__init__.py
 swh/lister/npm/lister.py
 swh/lister/npm/tasks.py
 swh/lister/npm/tests/test_lister.py
 swh/lister/npm/tests/test_tasks.py
 swh/lister/npm/tests/data/npm_full_page1.json
 swh/lister/npm/tests/data/npm_full_page2.json
 swh/lister/npm/tests/data/npm_incremental_page1.json
 swh/lister/npm/tests/data/npm_incremental_page2.json
+swh/lister/nuget/__init__.py
+swh/lister/nuget/lister.py
+swh/lister/nuget/tasks.py
+swh/lister/nuget/tests/__init__.py
+swh/lister/nuget/tests/test_lister.py
+swh/lister/nuget/tests/test_tasks.py
+swh/lister/nuget/tests/data/https_api.nuget.org/v3-flatcontainer_intersoft.crosslight.logging.entityframework_5.0.5000.1235-experimental_intersoft.crosslight.logging.entityframework.nuspec
+swh/lister/nuget/tests/data/https_api.nuget.org/v3-flatcontainer_sil.core.desktop_10.0.1-beta0012_sil.core.desktop.nuspec
+swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.09.23.08.07.54_sil.core.desktop.10.0.1-beta0012.json
+swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.09.23.09.10.26_intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json
+swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_index.json
+swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page11702.json
+swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page16958.json
 swh/lister/opam/__init__.py
 swh/lister/opam/lister.py
 swh/lister/opam/tasks.py
 swh/lister/opam/tests/__init__.py
 swh/lister/opam/tests/test_lister.py
 swh/lister/opam/tests/test_tasks.py
 swh/lister/opam/tests/data/fake_opam_repo/repo
 swh/lister/opam/tests/data/fake_opam_repo/version
 swh/lister/opam/tests/data/fake_opam_repo/packages/agrid/agrid.0.1/opam
 swh/lister/opam/tests/data/fake_opam_repo/packages/calculon/calculon.0.1/opam
 swh/lister/opam/tests/data/fake_opam_repo/packages/calculon/calculon.0.2/opam
 swh/lister/opam/tests/data/fake_opam_repo/packages/calculon/calculon.0.3/opam
 swh/lister/opam/tests/data/fake_opam_repo/packages/calculon/calculon.0.4/opam
 swh/lister/opam/tests/data/fake_opam_repo/packages/calculon/calculon.0.5/opam
 swh/lister/opam/tests/data/fake_opam_repo/packages/calculon/calculon.0.6/opam
 swh/lister/opam/tests/data/fake_opam_repo/packages/directories/directories.0.1/opam
 swh/lister/opam/tests/data/fake_opam_repo/packages/directories/directories.0.2/opam
 swh/lister/opam/tests/data/fake_opam_repo/packages/directories/directories.0.3/opam
 swh/lister/opam/tests/data/fake_opam_repo/packages/ocb/ocb.0.1/opam
 swh/lister/packagist/__init__.py
 swh/lister/packagist/lister.py
 swh/lister/packagist/tasks.py
 swh/lister/packagist/tests/__init__.py
 swh/lister/packagist/tests/test_lister.py
 swh/lister/packagist/tests/test_tasks.py
 swh/lister/packagist/tests/data/den1n_contextmenu.json
+swh/lister/packagist/tests/data/idevlab_essential.json
 swh/lister/packagist/tests/data/ljjackson_linnworks.json
 swh/lister/packagist/tests/data/lky_wx_article.json
 swh/lister/packagist/tests/data/spryker-eco_computop-api.json
+swh/lister/packagist/tests/data/ycms_module-main.json
+swh/lister/packagist/tests/data/https_api.github.com/repos_gitlky_wx_article
+swh/lister/packagist/tests/data/https_api.github.com/repos_spryker-eco_computop-api
+swh/lister/packagist/tests/data/https_api.github.com/repos_ycms_module-main
 swh/lister/phabricator/__init__.py
 swh/lister/phabricator/lister.py
 swh/lister/phabricator/tasks.py
 swh/lister/phabricator/tests/__init__.py
 swh/lister/phabricator/tests/test_lister.py
 swh/lister/phabricator/tests/test_tasks.py
 swh/lister/phabricator/tests/data/__init__.py
 swh/lister/phabricator/tests/data/phabricator_api_repositories_page1.json
 swh/lister/phabricator/tests/data/phabricator_api_repositories_page2.json
 swh/lister/pubdev/__init__.py
 swh/lister/pubdev/lister.py
 swh/lister/pubdev/tasks.py
 swh/lister/pubdev/tests/__init__.py
 swh/lister/pubdev/tests/test_lister.py
 swh/lister/pubdev/tests/test_tasks.py
 swh/lister/pubdev/tests/data/https_pub.dev/api_package-names
 swh/lister/pubdev/tests/data/https_pub.dev/api_packages_Autolinker
 swh/lister/pubdev/tests/data/https_pub.dev/api_packages_Babylon
+swh/lister/puppet/__init__.py
+swh/lister/puppet/lister.py
+swh/lister/puppet/tasks.py
+swh/lister/puppet/tests/__init__.py
+swh/lister/puppet/tests/test_lister.py
+swh/lister/puppet/tests/test_tasks.py
+swh/lister/puppet/tests/data/https_forgeapi.puppet.com/v3_modules,limit=100
+swh/lister/puppet/tests/data/https_forgeapi.puppet.com/v3_modules,limit=100,offset=100
 swh/lister/pypi/__init__.py
 swh/lister/pypi/lister.py
 swh/lister/pypi/tasks.py
 swh/lister/pypi/tests/__init__.py
 swh/lister/pypi/tests/test_lister.py
 swh/lister/pypi/tests/test_tasks.py
+swh/lister/rubygems/__init__.py
+swh/lister/rubygems/lister.py
+swh/lister/rubygems/tasks.py
+swh/lister/rubygems/tests/__init__.py
+swh/lister/rubygems/tests/test_lister.py
+swh/lister/rubygems/tests/test_tasks.py
+swh/lister/rubygems/tests/data/rubygems_dumps.xml
+swh/lister/rubygems/tests/data/rubygems_pgsql_dump.tar
+swh/lister/rubygems/tests/data/small_rubygems_dump.sh
 swh/lister/sourceforge/__init__.py
 swh/lister/sourceforge/lister.py
 swh/lister/sourceforge/tasks.py
 swh/lister/sourceforge/tests/__init__.py
 swh/lister/sourceforge/tests/test_lister.py
 swh/lister/sourceforge/tests/test_tasks.py
 swh/lister/sourceforge/tests/data/aaron.html
 swh/lister/sourceforge/tests/data/aaron.json
 swh/lister/sourceforge/tests/data/adobexmp.json
 swh/lister/sourceforge/tests/data/backapps-website.json
 swh/lister/sourceforge/tests/data/backapps.json
 swh/lister/sourceforge/tests/data/main-sitemap.xml
 swh/lister/sourceforge/tests/data/mojunk.json
 swh/lister/sourceforge/tests/data/mramm.json
 swh/lister/sourceforge/tests/data/ocaml-lpd.html
 swh/lister/sourceforge/tests/data/ocaml-lpd.json
 swh/lister/sourceforge/tests/data/os3dmodels.json
 swh/lister/sourceforge/tests/data/random-mercurial.json
 swh/lister/sourceforge/tests/data/subsitemap-0.xml
 swh/lister/sourceforge/tests/data/subsitemap-1.xml
 swh/lister/sourceforge/tests/data/t12eksandbox.html
 swh/lister/sourceforge/tests/data/t12eksandbox.json
 swh/lister/tests/__init__.py
 swh/lister/tests/test_cli.py
 swh/lister/tests/test_pattern.py
 swh/lister/tests/test_utils.py
 swh/lister/tuleap/__init__.py
 swh/lister/tuleap/lister.py
 swh/lister/tuleap/tasks.py
 swh/lister/tuleap/tests/__init__.py
 swh/lister/tuleap/tests/test_lister.py
 swh/lister/tuleap/tests/test_tasks.py
 swh/lister/tuleap/tests/data/https_tuleap.net/projects
 swh/lister/tuleap/tests/data/https_tuleap.net/repo_1
 swh/lister/tuleap/tests/data/https_tuleap.net/repo_2
 swh/lister/tuleap/tests/data/https_tuleap.net/repo_3
\ No newline at end of file
diff --git a/swh.lister.egg-info/entry_points.txt b/swh.lister.egg-info/entry_points.txt
index 38fe44f..a31a0c1 100644
--- a/swh.lister.egg-info/entry_points.txt
+++ b/swh.lister.egg-info/entry_points.txt
@@ -1,28 +1,35 @@
 [swh.cli.subcommands]
 lister = swh.lister.cli
 
 [swh.workers]
 lister.arch = swh.lister.arch:register
 lister.aur = swh.lister.aur:register
 lister.bitbucket = swh.lister.bitbucket:register
 lister.bower = swh.lister.bower:register
 lister.cgit = swh.lister.cgit:register
+lister.conda = swh.lister.conda:register
+lister.cpan = swh.lister.cpan:register
 lister.cran = swh.lister.cran:register
 lister.crates = swh.lister.crates:register
 lister.debian = swh.lister.debian:register
 lister.gitea = swh.lister.gitea:register
 lister.github = swh.lister.github:register
 lister.gitlab = swh.lister.gitlab:register
 lister.gnu = swh.lister.gnu:register
 lister.gogs = swh.lister.gogs:register
 lister.golang = swh.lister.golang:register
+lister.hackage = swh.lister.hackage:register
 lister.launchpad = swh.lister.launchpad:register
 lister.maven = swh.lister.maven:register
+lister.nixguix = swh.lister.nixguix:register
 lister.npm = swh.lister.npm:register
+lister.nuget = swh.lister.nuget:register
 lister.opam = swh.lister.opam:register
 lister.packagist = swh.lister.packagist:register
 lister.phabricator = swh.lister.phabricator:register
 lister.pubdev = swh.lister.pubdev:register
+lister.puppet = swh.lister.puppet:register
 lister.pypi = swh.lister.pypi:register
+lister.rubygems = swh.lister.rubygems:register
 lister.sourceforge = swh.lister.sourceforge:register
 lister.tuleap = swh.lister.tuleap:register
diff --git a/swh.lister.egg-info/requires.txt b/swh.lister.egg-info/requires.txt
index 5c598f4..6aaf3f2 100644
--- a/swh.lister.egg-info/requires.txt
+++ b/swh.lister.egg-info/requires.txt
@@ -1,19 +1,21 @@
 python_debian
 requests
 setuptools
 iso8601
 beautifulsoup4
 launchpadlib
 tenacity>=6.2
 lxml
 dulwich
-swh.core[db,github]>=2.8
+testing.postgresql
+psycopg2
+swh.core[db,github]>=2.16.1
 swh.scheduler>=0.8
 
 [testing]
 pytest
 pytest-mock
 requests_mock
 types-click
 types-pyyaml
 types-requests
diff --git a/swh/lister/__init__.py b/swh/lister/__init__.py
index 6a9b02b..eaa5efd 100644
--- a/swh/lister/__init__.py
+++ b/swh/lister/__init__.py
@@ -1,60 +1,84 @@
-# Copyright (C) 2018-2019  The Software Heritage developers
+# Copyright (C) 2018-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import logging
 
 import pkg_resources
 
-from swh.lister import pattern
-
 logger = logging.getLogger(__name__)
 
 
 try:
     __version__ = pkg_resources.get_distribution("swh.lister").version
 except pkg_resources.DistributionNotFound:
     __version__ = "devel"
 
-USER_AGENT_TEMPLATE = "Software Heritage Lister (%s)"
-USER_AGENT = USER_AGENT_TEMPLATE % __version__
-
+USER_AGENT_TEMPLATE = (
+    f"Software Heritage %s lister v{__version__}"
+    " (+https://www.softwareheritage.org/contact)"
+)
 
 LISTERS = {
     entry_point.name.split(".", 1)[1]: entry_point
     for entry_point in pkg_resources.iter_entry_points("swh.workers")
     if entry_point.name.split(".", 1)[0] == "lister"
 }
 
 
 SUPPORTED_LISTERS = list(LISTERS)
 
+TARBALL_EXTENSIONS = [
+    "crate",
+    "gem",
+    "jar",
+    "zip",
+    "tar",
+    "gz",
+    "tgz",
+    "tbz",
+    "bz2",
+    "bzip2",
+    "lzma",
+    "lz",
+    "txz",
+    "xz",
+    "z",
+    "Z",
+    "7z",
+    "zst",
+]
+"""Tarball recognition pattern"""
+
 
 def get_lister(lister_name, db_url=None, **conf):
     """Instantiate a lister given its name.
 
     Args:
         lister_name (str): Lister's name
         conf (dict): Configuration dict (lister db cnx, policy, priority...)
 
     Returns:
         Tuple (instantiated lister, drop_tables function, init schema function,
         insert minimum data function)
 
     """
     if lister_name not in LISTERS:
         raise ValueError(
             "Invalid lister %s: only supported listers are %s"
             % (lister_name, SUPPORTED_LISTERS)
         )
     if db_url:
         conf["lister"] = {"cls": "local", "args": {"db": db_url}}
 
     registry_entry = LISTERS[lister_name].load()()
     lister_cls = registry_entry["lister"]
+
+    from swh.lister import pattern
+
     if issubclass(lister_cls, pattern.Lister):
         return lister_cls.from_config(**conf)
     else:
         # Old-style lister
         return lister_cls(override_config=conf)
diff --git a/swh/lister/arch/__init__.py b/swh/lister/arch/__init__.py
index 276e4d2..30d7ae3 100644
--- a/swh/lister/arch/__init__.py
+++ b/swh/lister/arch/__init__.py
@@ -1,226 +1,226 @@
 # Copyright (C) 2022 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 """
 Arch Linux lister
 =================
 
 The Arch lister list origins from `archlinux.org`_, the official Arch Linux packages,
 and from `archlinuxarm.org`_, the Arch Linux ARM packages, an unofficial port for arm.
 
 Packages are put in three different repositories, `core`, `extra` and `community`.
 
 To manage listing those origins, this lister must be instantiated with a `flavours` dict.
 
 `flavours` default values::
 
     "official": {
         "archs": ["x86_64"],
         "repos": ["core", "extra", "community"],
         "base_info_url": "https://archlinux.org",
         "base_archive_url": "https://archive.archlinux.org",
         "base_mirror_url": "",
         "base_api_url": "https://archlinux.org",
     },
     "arm": {
         "archs": ["armv7h", "aarch64"],
         "repos": ["core", "extra", "community"],
         "base_info_url": "https://archlinuxarm.org",
         "base_archive_url": "",
         "base_mirror_url": "https://uk.mirror.archlinuxarm.org",
         "base_api_url": "",
     }
 
 From official Arch Linux repositories we can list all packages and all released versions.
 They provide an api and archives.
 
 From Arch Linux ARM repositories we can list all packages at their latest versions, they
 do not provide api or archives.
 
 As of August 2022 `archlinux.org`_ list 12592 packages and `archlinuxarm.org` 24044 packages.
 Please note that those amounts are the total of `regular`_ and `split`_ packages.
 
 Origins retrieving strategy
 ---------------------------
 
 Download repositories archives as tar.gz files from https://archive.archlinux.org/repos/last/,
 extract to a temp directory and then walks through each 'desc' files.
 Repository archive index url example for Arch Linux `core repository`_ and Arch
 Linux ARM `extra repository`_.
 
 Each 'desc' file describe the latest released version of a package and helps
 to build an origin url and `package versions url`_ from where scrapping artifacts metadata
 and get a list of versions.
 
 For Arch Linux ARM it follow the same discovery process parsing 'desc' files.
 The main difference is that we can't get existing versions of an arm package
 because https://archlinuxarm.org does not have an 'archive' website or api.
 
 Page listing
 ------------
 
 Each page is a list of package belonging to a flavour ('official', 'arm'), and a
 repo ('core', 'extra', 'community').
 
 Each line of a page represents an origin url for a package name with related metadata and versions.
 
 Origin url examples:
 
 * **Arch Linux**: https://archlinux.org/packages/extra/x86_64/mercurial
 * **Arch Linux ARM**: https://archlinuxarm.org/packages/armv7h/mercurial
 
 The data schema for each line is:
 
 * **name**: Package name
 * **version**: Last released package version
 * **last_modified**: Iso8601 last modified date from timestamp
 * **url**: Origin url
 * **data**: Package metadata dict
 * **versions**: A list of dict with artifacts metadata for each versions
 
 The data schema for `versions` within a line:
 
 * **name**: Package name
 * **version**: Package version
 * **repo**: One of core, extra, community
 * **arch**: Processor architecture targeted
 * **filename**: Filename of the archive to download
 * **url**: Package download url
 * **last_modified**: Iso8601 last modified date from timestamp, used as publication date
   for this version
 * **length**: Length of the archive to download
 
 Origins from page
 -----------------
 
 The origin url corresponds to:
 
 * **Arch Linux**: https://archlinux.org/packages/{repo}/{arch}/{name}
 * **Arch Linux ARM**: https://archlinuxarm.org/packages/{arch}/{name}
 
 Additionally we add some data set to "extra_loader_arguments":
 
 * **artifacts**: Represent data about the Arch Linux package archive to download,
   following :ref:`original-artifacts-json specification <extrinsic-metadata-original-artifacts-json>`
 * **arch_metadata**: To store all other interesting attributes that do not belongs to artifacts.
 
 Origin data example Arch Linux official::
 
     {
         "url": "https://archlinux.org/packages/extra/x86_64/mercurial",
         "visit_type": "arch",
         "extra_loader_arguments": {
             "artifacts": [
                 {
                     "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-4.8.2-1-x86_64.pkg.tar.xz",  # noqa: B950
                     "version": "4.8.2-1",
                     "length": 4000000,
                     "filename": "mercurial-4.8.2-1-x86_64.pkg.tar.xz",
                 },
                 {
                     "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-4.9-1-x86_64.pkg.tar.xz",  # noqa: B950
                     "version": "4.9-1",
                     "length": 4000000,
                     "filename": "mercurial-4.9-1-x86_64.pkg.tar.xz",
                 },
                 {
                     "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-4.9.1-1-x86_64.pkg.tar.xz",  # noqa: B950
                     "version": "4.9.1-1",
                     "length": 4000000,
                     "filename": "mercurial-4.9.1-1-x86_64.pkg.tar.xz",
                 },
                 ...
             ],
             "arch_metadata": [
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "4.8.2-1",
                     "last_modified": "2019-01-15T20:31:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "4.9-1",
                     "last_modified": "2019-02-12T06:15:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "4.9.1-1",
                     "last_modified": "2019-03-30T17:40:00",
                 },
             ],
         },
     },
 
 Origin data example Arch Linux ARM::
 
     {
         "url": "https://archlinuxarm.org/packages/armv7h/mercurial",
         "visit_type": "arch",
         "extra_loader_arguments": {
             "artifacts": [
                 {
                     "url": "https://uk.mirror.archlinuxarm.org/armv7h/extra/mercurial-6.1.3-1-armv7h.pkg.tar.xz",  # noqa: B950
                     "length": 4897816,
                     "version": "6.1.3-1",
                     "filename": "mercurial-6.1.3-1-armv7h.pkg.tar.xz",
                 }
             ],
             "arch_metadata": [
                 {
                     "arch": "armv7h",
                     "name": "mercurial",
                     "repo": "extra",
                     "version": "6.1.3-1",
                     "last_modified": "2022-06-02T22:13:08",
                 }
             ],
         },
     },
 
 Running tests
 -------------
 
 Activate the virtualenv and run from within swh-lister directory::
 
    pytest -s -vv --log-cli-level=DEBUG swh/lister/arch/tests
 
 Testing with Docker
 -------------------
 
 Change directory to swh/docker then launch the docker environment::
 
-   docker-compose up -d
+   docker compose up -d
 
-Then connect to the lister::
+Then schedule an arch listing task::
 
-   docker exec -it docker_swh-lister_1 bash
+   docker compose exec swh-scheduler swh scheduler task add -p oneshot list-arch
 
-And run the lister (The output of this listing results in “oneshot” tasks in the scheduler)::
+You can follow lister execution by displaying logs of swh-lister service::
 
-   swh lister run -l arch
+   docker compose logs -f swh-lister
 
 .. _archlinux.org: https://archlinux.org/packages/
 .. _archlinuxarm.org: https://archlinuxarm.org/packages/
 .. _core repository: https://archive.archlinux.org/repos/last/core/os/x86_64/core.files.tar.gz
 .. _extra repository: https://uk.mirror.archlinuxarm.org/armv7h/extra/extra.files.tar.gz
 .. _package versions url: https://archive.archlinux.org/packages/m/mercurial/
 .. _regular: https://wiki.archlinux.org/title/PKGBUILD#Package_name
 .. _split: https://man.archlinux.org/man/PKGBUILD.5#PACKAGE_SPLITTING
 """
 
 
 def register():
     from .lister import ArchLister
 
     return {
         "lister": ArchLister,
         "task_modules": ["%s.tasks" % __name__],
     }
diff --git a/swh/lister/arch/lister.py b/swh/lister/arch/lister.py
index af3a3d8..563fa18 100644
--- a/swh/lister/arch/lister.py
+++ b/swh/lister/arch/lister.py
@@ -1,501 +1,482 @@
 # Copyright (C) 2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import datetime
 import logging
 from pathlib import Path
 import re
 import tarfile
 import tempfile
 from typing import Any, Dict, Iterator, List, Optional
 from urllib.parse import unquote, urljoin
 
 from bs4 import BeautifulSoup
-import requests
-from tenacity.before_sleep import before_sleep_log
 
-from swh.lister.utils import throttling_retry
 from swh.model.hashutil import hash_to_hex
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
-from .. import USER_AGENT
 from ..pattern import CredentialsType, StatelessLister
 
 logger = logging.getLogger(__name__)
 
 # Aliasing the page results returned by `get_pages` method from the lister.
 ArchListerPage = List[Dict[str, Any]]
 
 
 def size_to_bytes(size: str) -> int:
     """Convert human readable file size to bytes.
 
     Resulting value is an approximation as input value is in most case rounded.
 
     Args:
         size: A string representing a human readable file size (eg: '500K')
 
     Returns:
         A decimal representation of file size
 
         Examples::
 
             >>> size_to_bytes("500")
             500
             >>> size_to_bytes("1K")
             1000
     """
     units = {
         "K": 1000,
         "M": 1000**2,
         "G": 1000**3,
         "T": 1000**4,
         "P": 1000**5,
         "E": 1000**6,
         "Z": 1000**7,
         "Y": 1000**8,
     }
     if size.endswith(tuple(units)):
         v, u = (size[:-1], size[-1])
         return int(v) * units[u]
     else:
         return int(size)
 
 
 class ArchLister(StatelessLister[ArchListerPage]):
     """List Arch linux origins from 'core', 'extra', and 'community' repositories
 
     For 'official' Arch Linux it downloads core.tar.gz, extra.tar.gz and community.tar.gz
     from https://archive.archlinux.org/repos/last/ extract to a temp directory and
     then walks through each 'desc' files.
 
     Each 'desc' file describe the latest released version of a package and helps
     to build an origin url from where scrapping artifacts metadata.
 
     For 'arm' Arch Linux it follow the same discovery process parsing 'desc' files.
     The main difference is that we can't get existing versions of an arm package
     because https://archlinuxarm.org does not have an 'archive' website or api.
     """
 
     LISTER_NAME = "arch"
     VISIT_TYPE = "arch"
     INSTANCE = "arch"
 
     ARCH_PACKAGE_URL_PATTERN = "{base_url}/packages/{repo}/{arch}/{pkgname}"
     ARCH_PACKAGE_VERSIONS_URL_PATTERN = "{base_url}/packages/{pkgname[0]}/{pkgname}"
     ARCH_PACKAGE_DOWNLOAD_URL_PATTERN = (
         "{base_url}/packages/{pkgname[0]}/{pkgname}/{filename}"
     )
     ARCH_API_URL_PATTERN = "{base_url}/packages/{repo}/{arch}/{pkgname}/json"
 
     ARM_PACKAGE_URL_PATTERN = "{base_url}/packages/{arch}/{pkgname}"
     ARM_PACKAGE_DOWNLOAD_URL_PATTERN = "{base_url}/{arch}/{repo}/{filename}"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         credentials: Optional[CredentialsType] = None,
         flavours: Dict[str, Any] = {
             "official": {
                 "archs": ["x86_64"],
                 "repos": ["core", "extra", "community"],
                 "base_info_url": "https://archlinux.org",
                 "base_archive_url": "https://archive.archlinux.org",
                 "base_mirror_url": "",
                 "base_api_url": "https://archlinux.org",
             },
             "arm": {
                 "archs": ["armv7h", "aarch64"],
                 "repos": ["core", "extra", "community"],
                 "base_info_url": "https://archlinuxarm.org",
                 "base_archive_url": "",
                 "base_mirror_url": "https://uk.mirror.archlinuxarm.org",
                 "base_api_url": "",
             },
         },
     ):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
             url=flavours["official"]["base_info_url"],
             instance=self.INSTANCE,
         )
 
         self.flavours = flavours
-        self.session = requests.Session()
-        self.session.headers.update(
-            {
-                "User-Agent": USER_AGENT,
-            }
-        )
-
-    @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
-    def request_get(self, url: str, params: Dict[str, Any]) -> requests.Response:
-
-        logger.debug("Fetching URL %s with params %s", url, params)
-
-        response = self.session.get(url, params=params)
-        if response.status_code != 200:
-            logger.warning(
-                "Unexpected HTTP status code %s on %s: %s",
-                response.status_code,
-                response.url,
-                response.content,
-            )
-        response.raise_for_status()
-
-        return response
 
     def scrap_package_versions(
         self, name: str, repo: str, base_url: str
     ) -> List[Dict[str, Any]]:
         """Given a package 'name' and 'repo', make an http call to origin url and parse its content
         to get package versions artifacts data.
         That method is suitable only for 'official' Arch Linux, not 'arm'.
 
         Args:
             name: Package name
             repo: The repository the package belongs to (one of self.repos)
 
         Returns:
             A list of dict of version
 
             Example::
 
                 [
                     {"url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190211-1-x86_64.pkg.tar.xz",  # noqa: B950
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20190211-1",
                     "length": 180000,
                     "filename": "dialog-1:1.3_20190211-1-x86_64.pkg.tar.xz",
                     "last_modified": "2019-02-13T08:36:00"},
                 ]
         """
         url = self.ARCH_PACKAGE_VERSIONS_URL_PATTERN.format(
             pkgname=name, base_url=base_url
         )
-        response = self.request_get(url=url, params={})
+        response = self.http_request(url)
         soup = BeautifulSoup(response.text, "html.parser")
         links = soup.find_all("a", href=True)
 
         # drop the first line (used to go to up directory)
         if links[0].attrs["href"] == "../":
             links.pop(0)
 
         versions = []
 
         for link in links:
             # filename displayed can be cropped if name is too long, get it from href instead
             filename = unquote(link.attrs["href"])
 
             if filename.endswith((".tar.xz", ".tar.zst")):
                 # Extract arch from filename
                 arch_rex = re.compile(
                     rf"^{re.escape(name)}-(?P<version>.*)-(?P<arch>any|i686|x86_64)"
                     rf"(.pkg.tar.(?:zst|xz))$"
                 )
                 m = arch_rex.match(filename)
                 if m is None:
                     logger.error(
                         "Can not find a match for architecture in %(filename)s",
                         dict(filename=filename),
                     )
                 else:
                     arch = m.group("arch")
                     version = m.group("version")
 
                 # Extract last_modified and an approximate file size
                 raw_text = link.next_sibling
                 raw_text_rex = re.compile(
                     r"^(?P<last_modified>\d+-\w+-\d+ \d\d:\d\d)\s+(?P<size>\w+)$"
                 )
                 s = raw_text_rex.search(raw_text.strip())
                 if s is None:
                     logger.error(
                         "Can not find a match for 'last_modified' and/or "
                         "'size' in '%(raw_text)s'",
                         dict(raw_text=raw_text),
                     )
                 else:
                     assert s.groups()
                     assert len(s.groups()) == 2
                     last_modified_str, size = s.groups()
 
                 # format as expected
                 last_modified = datetime.datetime.strptime(
                     last_modified_str, "%d-%b-%Y %H:%M"
                 ).isoformat()
 
                 length = size_to_bytes(size)  # we want bytes
 
                 # link url is relative, format a canonical one
                 url = self.ARCH_PACKAGE_DOWNLOAD_URL_PATTERN.format(
                     base_url=base_url, pkgname=name, filename=filename
                 )
                 versions.append(
                     dict(
                         name=name,
                         version=version,
                         repo=repo,
                         arch=arch,
                         filename=filename,
                         url=url,
                         last_modified=last_modified,
                         length=length,
                     )
                 )
         return versions
 
     def get_repo_archive(self, url: str, destination_path: Path) -> Path:
         """Given an url and a destination path, retrieve and extract .tar.gz archive
         which contains 'desc' file for each package.
         Each .tar.gz archive corresponds to an Arch Linux repo ('core', 'extra', 'community').
 
         Args:
             url: url of the .tar.gz archive to download
             destination_path: the path on disk where to extract archive
 
         Returns:
             a directory Path where the archive has been extracted to.
         """
-        res = self.request_get(url=url, params={})
+        res = self.http_request(url)
         destination_path.parent.mkdir(parents=True, exist_ok=True)
         destination_path.write_bytes(res.content)
 
         extract_to = Path(str(destination_path).split(".tar.gz")[0])
         tar = tarfile.open(destination_path)
         tar.extractall(path=extract_to)
         tar.close()
 
         return extract_to
 
     def parse_desc_file(
         self,
         path: Path,
         repo: str,
         base_url: str,
         dl_url_fmt: str,
     ) -> Dict[str, Any]:
         """Extract package information from a 'desc' file.
         There are subtle differences between parsing 'official' and 'arm' des files
 
         Args:
             path: A path to a 'desc' file on disk
             repo: The repo the package belongs to
 
         Returns:
             A dict of metadata
 
             Example::
 
                 {'api_url': 'https://archlinux.org/packages/core/x86_64/dialog/json',
                  'arch': 'x86_64',
                  'base': 'dialog',
                  'builddate': '1650081535',
                  'csize': '203028',
                  'desc': 'A tool to display dialog boxes from shell scripts',
                  'filename': 'dialog-1:1.3_20220414-1-x86_64.pkg.tar.zst',
                  'isize': '483988',
                  'license': 'LGPL2.1',
                  'md5sum': '06407c0cb11c50d7bf83d600f2e8107c',
                  'name': 'dialog',
                  'packager': 'Evangelos Foutras <foutrelis@archlinux.org>',
                  'pgpsig': 'pgpsig content xxx',
                  'project_url': 'https://invisible-island.net/dialog/',
                  'provides': 'libdialog.so=15-64',
                  'repo': 'core',
                  'sha256sum': 'ef8c8971f591de7db0f455970ef5d81d5aced1ddf139f963f16f6730b1851fa7',
                  'url': 'https://archive.archlinux.org/packages/.all/dialog-1:1.3_20220414-1-x86_64.pkg.tar.zst',  # noqa: B950
                  'version': '1:1.3_20220414-1'}
         """
         rex = re.compile(r"^\%(?P<k>\w+)\%\n(?P<v>.*)\n$", re.M)
         with path.open("rb") as content:
             parsed = rex.findall(content.read().decode())
             data = {entry[0].lower(): entry[1] for entry in parsed}
 
             if "url" in data.keys():
                 data["project_url"] = data["url"]
 
             assert data["name"]
             assert data["filename"]
             assert data["arch"]
 
             data["repo"] = repo
             data["url"] = urljoin(
                 base_url,
                 dl_url_fmt.format(
                     base_url=base_url,
                     pkgname=data["name"],
                     filename=data["filename"],
                     arch=data["arch"],
                     repo=repo,
                 ),
             )
 
             assert data["md5sum"]
             assert data["sha256sum"]
             data["checksums"] = {
                 "md5sum": hash_to_hex(data["md5sum"]),
                 "sha256sum": hash_to_hex(data["sha256sum"]),
             }
         return data
 
     def get_pages(self) -> Iterator[ArchListerPage]:
         """Yield an iterator sorted by name in ascending order of pages.
 
         Each page is a list of package belonging to a flavour ('official', 'arm'),
         and a repo ('core', 'extra', 'community')
         """
 
         for name, flavour in self.flavours.items():
             for arch in flavour["archs"]:
                 for repo in flavour["repos"]:
                     yield self._get_repo_page(name, flavour, arch, repo)
 
     def _get_repo_page(
         self, name: str, flavour: Dict[str, Any], arch: str, repo: str
     ) -> ArchListerPage:
         with tempfile.TemporaryDirectory() as tmpdir:
             page = []
             if name == "official":
                 prefix = urljoin(flavour["base_archive_url"], "/repos/last/")
                 filename = f"{repo}.files.tar.gz"
                 archive_url = urljoin(prefix, f"{repo}/os/{arch}/{filename}")
                 destination_path = Path(tmpdir, arch, filename)
                 base_url = flavour["base_archive_url"]
                 dl_url_fmt = self.ARCH_PACKAGE_DOWNLOAD_URL_PATTERN
                 base_info_url = flavour["base_info_url"]
                 info_url_fmt = self.ARCH_PACKAGE_URL_PATTERN
             elif name == "arm":
                 filename = f"{repo}.files.tar.gz"
                 archive_url = urljoin(
                     flavour["base_mirror_url"], f"{arch}/{repo}/{filename}"
                 )
                 destination_path = Path(tmpdir, arch, filename)
                 base_url = flavour["base_mirror_url"]
                 dl_url_fmt = self.ARM_PACKAGE_DOWNLOAD_URL_PATTERN
                 base_info_url = flavour["base_info_url"]
                 info_url_fmt = self.ARM_PACKAGE_URL_PATTERN
 
             archive = self.get_repo_archive(
                 url=archive_url, destination_path=destination_path
             )
 
             assert archive
 
             packages_desc = list(archive.glob("**/desc"))
             logger.debug(
                 "Processing %(instance)s source packages info from "
                 "%(flavour)s %(arch)s %(repo)s repository, "
                 "(%(qty)s packages).",
                 dict(
                     instance=self.instance,
                     flavour=name,
                     arch=arch,
                     repo=repo,
                     qty=len(packages_desc),
                 ),
             )
 
             for package_desc in packages_desc:
                 data = self.parse_desc_file(
                     path=package_desc,
                     repo=repo,
                     base_url=base_url,
                     dl_url_fmt=dl_url_fmt,
                 )
 
                 assert data["builddate"]
                 last_modified = datetime.datetime.fromtimestamp(
                     float(data["builddate"]), tz=datetime.timezone.utc
                 )
 
                 assert data["name"]
                 assert data["filename"]
                 assert data["arch"]
                 url = info_url_fmt.format(
                     base_url=base_info_url,
                     pkgname=data["name"],
                     filename=data["filename"],
                     repo=repo,
                     arch=data["arch"],
                 )
 
                 assert data["version"]
                 if name == "official":
                     # find all versions of a package scrapping archive
                     versions = self.scrap_package_versions(
                         name=data["name"], repo=repo, base_url=base_url
                     )
                 elif name == "arm":
                     # There is no way to get related versions of a package,
                     # but 'data' represents the latest released version,
                     # use it in this case
                     assert data["builddate"]
                     assert data["csize"]
                     assert data["url"]
                     versions = [
                         dict(
                             name=data["name"],
                             version=data["version"],
                             repo=repo,
                             arch=data["arch"],
                             filename=data["filename"],
                             url=data["url"],
                             last_modified=last_modified.replace(tzinfo=None).isoformat(
                                 timespec="seconds"
                             ),
                             length=int(data["csize"]),
                         )
                     ]
 
                 package = {
                     "name": data["name"],
                     "version": data["version"],
                     "last_modified": last_modified,
                     "url": url,
                     "versions": versions,
                     "data": data,
                 }
                 page.append(package)
             return page
 
     def get_origins_from_page(self, page: ArchListerPage) -> Iterator[ListedOrigin]:
         """Iterate on all arch pages and yield ListedOrigin instances."""
         assert self.lister_obj.id is not None
         for origin in page:
             artifacts = []
             arch_metadata = []
             for version in origin["versions"]:
                 artifacts.append(
                     {
                         "version": version["version"],
                         "filename": version["filename"],
                         "url": version["url"],
                         "length": version["length"],
                     }
                 )
+                if version["version"] == origin["version"]:
+                    artifacts[-1]["checksums"] = {
+                        "md5": origin["data"]["md5sum"],
+                        "sha256": origin["data"]["sha256sum"],
+                    }
+                else:
+                    artifacts[-1]["checksums"] = {"length": version["length"]}
+
                 arch_metadata.append(
                     {
                         "version": version["version"],
                         "name": version["name"],
                         "arch": version["arch"],
                         "repo": version["repo"],
                         "last_modified": version["last_modified"],
                     }
                 )
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 visit_type=self.VISIT_TYPE,
                 url=origin["url"],
                 last_update=origin["last_modified"],
                 extra_loader_arguments={
                     "artifacts": artifacts,
                     "arch_metadata": arch_metadata,
                 },
             )
diff --git a/swh/lister/arch/tests/test_lister.py b/swh/lister/arch/tests/test_lister.py
index daa8712..3167c4d 100644
--- a/swh/lister/arch/tests/test_lister.py
+++ b/swh/lister/arch/tests/test_lister.py
@@ -1,1394 +1,1696 @@
 # Copyright (C) 2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
+
+# flake8: noqa: B950
+
 from swh.lister.arch.lister import ArchLister
 
 expected_origins = [
     {
         "url": "https://archlinux.org/packages/core/x86_64/dialog",
         "visit_type": "arch",
         "extra_loader_arguments": {
             "artifacts": [
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190211-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190211-1-x86_64.pkg.tar.xz",
                     "version": "1:1.3_20190211-1",
                     "length": 180000,
                     "filename": "dialog-1:1.3_20190211-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 180000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190724-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190724-1-x86_64.pkg.tar.xz",
                     "version": "1:1.3_20190724-1",
                     "length": 180000,
                     "filename": "dialog-1:1.3_20190724-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 180000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190728-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190728-1-x86_64.pkg.tar.xz",
                     "version": "1:1.3_20190728-1",
                     "length": 180000,
                     "filename": "dialog-1:1.3_20190728-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 180000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190806-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190806-1-x86_64.pkg.tar.xz",
                     "version": "1:1.3_20190806-1",
                     "length": 182000,
                     "filename": "dialog-1:1.3_20190806-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 182000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190808-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190808-1-x86_64.pkg.tar.xz",
                     "version": "1:1.3_20190808-1",
                     "length": 182000,
                     "filename": "dialog-1:1.3_20190808-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 182000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20191110-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20191110-1-x86_64.pkg.tar.xz",
                     "version": "1:1.3_20191110-1",
                     "length": 183000,
                     "filename": "dialog-1:1.3_20191110-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 183000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20191110-2-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20191110-2-x86_64.pkg.tar.xz",
                     "version": "1:1.3_20191110-2",
                     "length": 183000,
                     "filename": "dialog-1:1.3_20191110-2-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 183000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20191209-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20191209-1-x86_64.pkg.tar.xz",
                     "version": "1:1.3_20191209-1",
                     "length": 183000,
                     "filename": "dialog-1:1.3_20191209-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 183000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20191210-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20191210-1-x86_64.pkg.tar.xz",
                     "version": "1:1.3_20191210-1",
                     "length": 184000,
                     "filename": "dialog-1:1.3_20191210-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 184000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20200228-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20200228-1-x86_64.pkg.tar.zst",
                     "version": "1:1.3_20200228-1",
                     "length": 196000,
                     "filename": "dialog-1:1.3_20200228-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 196000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20200327-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20200327-1-x86_64.pkg.tar.zst",
                     "version": "1:1.3_20200327-1",
                     "length": 196000,
                     "filename": "dialog-1:1.3_20200327-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 196000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20201126-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20201126-1-x86_64.pkg.tar.zst",
                     "version": "1:1.3_20201126-1",
                     "length": 199000,
                     "filename": "dialog-1:1.3_20201126-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 199000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210117-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210117-1-x86_64.pkg.tar.zst",
                     "version": "1:1.3_20210117-1",
                     "length": 200000,
                     "filename": "dialog-1:1.3_20210117-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 200000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210306-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210306-1-x86_64.pkg.tar.zst",
                     "version": "1:1.3_20210306-1",
                     "length": 201000,
                     "filename": "dialog-1:1.3_20210306-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 201000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210319-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210319-1-x86_64.pkg.tar.zst",
                     "version": "1:1.3_20210319-1",
                     "length": 201000,
                     "filename": "dialog-1:1.3_20210319-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 201000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210324-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210324-1-x86_64.pkg.tar.zst",
                     "version": "1:1.3_20210324-1",
                     "length": 201000,
                     "filename": "dialog-1:1.3_20210324-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 201000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210509-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210509-1-x86_64.pkg.tar.zst",
                     "version": "1:1.3_20210509-1",
                     "length": 198000,
                     "filename": "dialog-1:1.3_20210509-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 198000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210530-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210530-1-x86_64.pkg.tar.zst",
                     "version": "1:1.3_20210530-1",
                     "length": 198000,
                     "filename": "dialog-1:1.3_20210530-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 198000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210621-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210621-1-x86_64.pkg.tar.zst",
                     "version": "1:1.3_20210621-1",
                     "length": 199000,
                     "filename": "dialog-1:1.3_20210621-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 199000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20211107-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20211107-1-x86_64.pkg.tar.zst",
                     "version": "1:1.3_20211107-1",
                     "length": 197000,
                     "filename": "dialog-1:1.3_20211107-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 197000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20211214-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20211214-1-x86_64.pkg.tar.zst",
                     "version": "1:1.3_20211214-1",
                     "length": 197000,
                     "filename": "dialog-1:1.3_20211214-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 197000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20220117-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20220117-1-x86_64.pkg.tar.zst",
                     "version": "1:1.3_20220117-1",
                     "length": 199000,
                     "filename": "dialog-1:1.3_20220117-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 199000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20220414-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20220414-1-x86_64.pkg.tar.zst",
                     "version": "1:1.3_20220414-1",
                     "length": 198000,
                     "filename": "dialog-1:1.3_20220414-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "md5": "06407c0cb11c50d7bf83d600f2e8107c",
+                        "sha256": "ef8c8971f591de7db0f455970ef5d81d5aced1ddf139f963f16f6730b1851fa7",
+                    },
                 },
             ],
             "arch_metadata": [
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20190211-1",
                     "last_modified": "2019-02-13T08:36:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20190724-1",
                     "last_modified": "2019-07-26T21:39:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20190728-1",
                     "last_modified": "2019-07-29T12:10:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20190806-1",
                     "last_modified": "2019-08-07T04:19:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20190808-1",
                     "last_modified": "2019-08-09T22:49:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20191110-1",
                     "last_modified": "2019-11-11T11:15:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20191110-2",
                     "last_modified": "2019-11-13T17:40:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20191209-1",
                     "last_modified": "2019-12-10T09:56:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20191210-1",
                     "last_modified": "2019-12-12T15:55:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20200228-1",
                     "last_modified": "2020-03-06T02:21:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20200327-1",
                     "last_modified": "2020-03-29T17:08:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20201126-1",
                     "last_modified": "2020-11-27T12:19:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20210117-1",
                     "last_modified": "2021-01-18T18:05:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20210306-1",
                     "last_modified": "2021-03-07T11:40:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20210319-1",
                     "last_modified": "2021-03-20T00:12:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20210324-1",
                     "last_modified": "2021-03-26T17:53:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20210509-1",
                     "last_modified": "2021-05-16T02:04:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20210530-1",
                     "last_modified": "2021-05-31T14:59:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20210621-1",
                     "last_modified": "2021-06-23T02:59:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20211107-1",
                     "last_modified": "2021-11-09T14:06:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20211214-1",
                     "last_modified": "2021-12-14T09:26:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20220117-1",
                     "last_modified": "2022-01-19T09:56:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20220414-1",
                     "last_modified": "2022-04-16T03:59:00",
                 },
             ],
         },
     },
     {
         "url": "https://archlinux.org/packages/community/x86_64/gnome-code-assistance",
         "visit_type": "arch",
         "extra_loader_arguments": {
             "artifacts": [
                 {
-                    "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-1:3.16.1+15+g0fd8b5f-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-1:3.16.1+15+g0fd8b5f-1-x86_64.pkg.tar.xz",
                     "version": "1:3.16.1+15+g0fd8b5f-1",
                     "length": 2000000,
-                    "filename": "gnome-code-assistance-1:3.16.1+15+g0fd8b5f-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "filename": "gnome-code-assistance-1:3.16.1+15+g0fd8b5f-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 2000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-1:3.16.1+15+g0fd8b5f-2-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-1:3.16.1+15+g0fd8b5f-2-x86_64.pkg.tar.zst",
                     "version": "1:3.16.1+15+g0fd8b5f-2",
                     "length": 2000000,
-                    "filename": "gnome-code-assistance-1:3.16.1+15+g0fd8b5f-2-x86_64.pkg.tar.zst",  # noqa: B950
+                    "filename": "gnome-code-assistance-1:3.16.1+15+g0fd8b5f-2-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 2000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-1:3.16.1+15+g0fd8b5f-3-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-1:3.16.1+15+g0fd8b5f-3-x86_64.pkg.tar.zst",
                     "version": "1:3.16.1+15+g0fd8b5f-3",
                     "length": 2000000,
-                    "filename": "gnome-code-assistance-1:3.16.1+15+g0fd8b5f-3-x86_64.pkg.tar.zst",  # noqa: B950
+                    "filename": "gnome-code-assistance-1:3.16.1+15+g0fd8b5f-3-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 2000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-1:3.16.1+15+g0fd8b5f-4-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-1:3.16.1+15+g0fd8b5f-4-x86_64.pkg.tar.zst",
                     "version": "1:3.16.1+15+g0fd8b5f-4",
                     "length": 2000000,
-                    "filename": "gnome-code-assistance-1:3.16.1+15+g0fd8b5f-4-x86_64.pkg.tar.zst",  # noqa: B950
+                    "filename": "gnome-code-assistance-1:3.16.1+15+g0fd8b5f-4-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 2000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-2:3.16.1+14+gaad6437-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-2:3.16.1+14+gaad6437-1-x86_64.pkg.tar.zst",
                     "version": "2:3.16.1+14+gaad6437-1",
                     "length": 2000000,
-                    "filename": "gnome-code-assistance-2:3.16.1+14+gaad6437-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "filename": "gnome-code-assistance-2:3.16.1+14+gaad6437-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 2000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-2:3.16.1+14+gaad6437-2-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-2:3.16.1+14+gaad6437-2-x86_64.pkg.tar.zst",
                     "version": "2:3.16.1+14+gaad6437-2",
                     "length": 2000000,
-                    "filename": "gnome-code-assistance-2:3.16.1+14+gaad6437-2-x86_64.pkg.tar.zst",  # noqa: B950
+                    "filename": "gnome-code-assistance-2:3.16.1+14+gaad6437-2-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "md5": "eadcf1a6bb70a3e564f260b7fc58135a",
+                        "sha256": "6fd0c80b63d205a1edf5c39c7a62d16499e802566f2451c2b85cd28c9bc30ec7",
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-3.16.1+14+gaad6437-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-3.16.1+14+gaad6437-1-x86_64.pkg.tar.xz",
                     "version": "3.16.1+14+gaad6437-1",
                     "length": 2000000,
-                    "filename": "gnome-code-assistance-3.16.1+14+gaad6437-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "filename": "gnome-code-assistance-3.16.1+14+gaad6437-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 2000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-3.16.1+14+gaad6437-2-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-3.16.1+14+gaad6437-2-x86_64.pkg.tar.xz",
                     "version": "3.16.1+14+gaad6437-2",
                     "length": 2000000,
-                    "filename": "gnome-code-assistance-3.16.1+14+gaad6437-2-x86_64.pkg.tar.xz",  # noqa: B950
+                    "filename": "gnome-code-assistance-3.16.1+14+gaad6437-2-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 2000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-3.16.1+15+gb9ffc4d-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-3.16.1+15+gb9ffc4d-1-x86_64.pkg.tar.xz",
                     "version": "3.16.1+15+gb9ffc4d-1",
                     "length": 2000000,
-                    "filename": "gnome-code-assistance-3.16.1+15+gb9ffc4d-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "filename": "gnome-code-assistance-3.16.1+15+gb9ffc4d-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 2000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-3:3.16.1+r14+gaad6437-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-3:3.16.1+r14+gaad6437-1-x86_64.pkg.tar.zst",
                     "version": "3:3.16.1+r14+gaad6437-1",
                     "length": 2000000,
-                    "filename": "gnome-code-assistance-3:3.16.1+r14+gaad6437-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "filename": "gnome-code-assistance-3:3.16.1+r14+gaad6437-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 2000000,
+                    },
                 },
             ],
             "arch_metadata": [
                 {
                     "arch": "x86_64",
                     "repo": "community",
                     "name": "gnome-code-assistance",
                     "version": "1:3.16.1+15+g0fd8b5f-1",
                     "last_modified": "2019-11-10T20:55:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "community",
                     "name": "gnome-code-assistance",
                     "version": "1:3.16.1+15+g0fd8b5f-2",
                     "last_modified": "2020-03-28T15:58:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "community",
                     "name": "gnome-code-assistance",
                     "version": "1:3.16.1+15+g0fd8b5f-3",
                     "last_modified": "2020-07-05T15:28:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "community",
                     "name": "gnome-code-assistance",
                     "version": "1:3.16.1+15+g0fd8b5f-4",
                     "last_modified": "2020-11-12T17:28:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "community",
                     "name": "gnome-code-assistance",
                     "version": "2:3.16.1+14+gaad6437-1",
                     "last_modified": "2021-02-24T16:30:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "community",
                     "name": "gnome-code-assistance",
                     "version": "2:3.16.1+14+gaad6437-2",
                     "last_modified": "2021-12-02T23:36:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "community",
                     "name": "gnome-code-assistance",
                     "version": "3.16.1+14+gaad6437-1",
                     "last_modified": "2019-03-15T19:23:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "community",
                     "name": "gnome-code-assistance",
                     "version": "3.16.1+14+gaad6437-2",
                     "last_modified": "2019-08-24T20:05:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "community",
                     "name": "gnome-code-assistance",
                     "version": "3.16.1+15+gb9ffc4d-1",
                     "last_modified": "2019-08-25T20:55:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "community",
                     "name": "gnome-code-assistance",
                     "version": "3:3.16.1+r14+gaad6437-1",
                     "last_modified": "2022-05-18T17:23:00",
                 },
             ],
         },
     },
     {
         "url": "https://archlinux.org/packages/core/x86_64/gzip",
         "visit_type": "arch",
         "extra_loader_arguments": {
             "artifacts": [
                 {
-                    "url": "https://archive.archlinux.org/packages/g/gzip/gzip-1.10-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/g/gzip/gzip-1.10-1-x86_64.pkg.tar.xz",
                     "version": "1.10-1",
                     "length": 78000,
                     "filename": "gzip-1.10-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 78000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/g/gzip/gzip-1.10-2-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/g/gzip/gzip-1.10-2-x86_64.pkg.tar.xz",
                     "version": "1.10-2",
                     "length": 78000,
                     "filename": "gzip-1.10-2-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 78000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/g/gzip/gzip-1.10-3-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/g/gzip/gzip-1.10-3-x86_64.pkg.tar.xz",
                     "version": "1.10-3",
                     "length": 78000,
                     "filename": "gzip-1.10-3-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 78000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/g/gzip/gzip-1.11-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/g/gzip/gzip-1.11-1-x86_64.pkg.tar.zst",
                     "version": "1.11-1",
                     "length": 82000,
                     "filename": "gzip-1.11-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 82000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/g/gzip/gzip-1.12-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/g/gzip/gzip-1.12-1-x86_64.pkg.tar.zst",
                     "version": "1.12-1",
                     "length": 80000,
                     "filename": "gzip-1.12-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "md5": "3e72c94305917d00d9e361a687cf0a3e",
+                        "sha256": "0ee561edfbc1c7c6a204f7cfa43437c3362311b4fd09ea0541134aaea3a8cc07",
+                    },
                 },
             ],
             "arch_metadata": [
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "gzip",
                     "version": "1.10-1",
                     "last_modified": "2018-12-30T18:38:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "gzip",
                     "version": "1.10-2",
                     "last_modified": "2019-10-06T16:02:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "gzip",
                     "version": "1.10-3",
                     "last_modified": "2019-11-13T15:55:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "gzip",
                     "version": "1.11-1",
                     "last_modified": "2021-09-04T02:02:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "gzip",
                     "version": "1.12-1",
                     "last_modified": "2022-04-07T17:35:00",
                 },
             ],
         },
     },
     {
         "url": "https://archlinux.org/packages/extra/x86_64/libasyncns",
         "visit_type": "arch",
         "extra_loader_arguments": {
             "artifacts": [
                 {
-                    "url": "https://archive.archlinux.org/packages/l/libasyncns/libasyncns-0.8+3+g68cd5af-2-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/l/libasyncns/libasyncns-0.8+3+g68cd5af-2-x86_64.pkg.tar.xz",
                     "version": "0.8+3+g68cd5af-2",
                     "length": 16000,
                     "filename": "libasyncns-0.8+3+g68cd5af-2-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 16000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/l/libasyncns/libasyncns-0.8+3+g68cd5af-3-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/l/libasyncns/libasyncns-0.8+3+g68cd5af-3-x86_64.pkg.tar.zst",
                     "version": "0.8+3+g68cd5af-3",
                     "length": 17000,
                     "filename": "libasyncns-0.8+3+g68cd5af-3-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "md5": "0aad62f00eab3d0ec7798cb5b4a6eddd",
+                        "sha256": "a0262e191dd3b00343e79e3521159c963e26b7a438d4cc44137c64cf0da90516",
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/l/libasyncns/libasyncns-1:0.8+r3+g68cd5af-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/l/libasyncns/libasyncns-1:0.8+r3+g68cd5af-1-x86_64.pkg.tar.zst",
                     "version": "1:0.8+r3+g68cd5af-1",
                     "length": 17000,
-                    "filename": "libasyncns-1:0.8+r3+g68cd5af-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "filename": "libasyncns-1:0.8+r3+g68cd5af-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 17000,
+                    },
                 },
             ],
             "arch_metadata": [
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "libasyncns",
                     "version": "0.8+3+g68cd5af-2",
                     "last_modified": "2018-11-09T23:39:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "libasyncns",
                     "version": "0.8+3+g68cd5af-3",
                     "last_modified": "2020-05-19T08:28:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "libasyncns",
                     "version": "1:0.8+r3+g68cd5af-1",
                     "last_modified": "2022-05-18T17:23:00",
                 },
             ],
         },
     },
     {
         "url": "https://archlinux.org/packages/extra/x86_64/mercurial",
         "visit_type": "arch",
         "extra_loader_arguments": {
             "artifacts": [
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-4.8.2-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-4.8.2-1-x86_64.pkg.tar.xz",
                     "version": "4.8.2-1",
                     "length": 4000000,
                     "filename": "mercurial-4.8.2-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 4000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-4.9-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-4.9-1-x86_64.pkg.tar.xz",
                     "version": "4.9-1",
                     "length": 4000000,
                     "filename": "mercurial-4.9-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 4000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-4.9.1-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-4.9.1-1-x86_64.pkg.tar.xz",
                     "version": "4.9.1-1",
                     "length": 4000000,
                     "filename": "mercurial-4.9.1-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 4000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.0-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.0-1-x86_64.pkg.tar.xz",
                     "version": "5.0-1",
                     "length": 4000000,
                     "filename": "mercurial-5.0-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 4000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.0.1-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.0.1-1-x86_64.pkg.tar.xz",
                     "version": "5.0.1-1",
                     "length": 4000000,
                     "filename": "mercurial-5.0.1-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 4000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.0.2-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.0.2-1-x86_64.pkg.tar.xz",
                     "version": "5.0.2-1",
                     "length": 4000000,
                     "filename": "mercurial-5.0.2-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 4000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.1-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.1-1-x86_64.pkg.tar.xz",
                     "version": "5.1-1",
                     "length": 4000000,
                     "filename": "mercurial-5.1-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 4000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.1.2-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.1.2-1-x86_64.pkg.tar.xz",
                     "version": "5.1.2-1",
                     "length": 4000000,
                     "filename": "mercurial-5.1.2-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 4000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.2-1-x86_64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.2-1-x86_64.pkg.tar.xz",
                     "version": "5.2-1",
                     "length": 4000000,
                     "filename": "mercurial-5.2-1-x86_64.pkg.tar.xz",
+                    "checksums": {
+                        "length": 4000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.2.1-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.2.1-1-x86_64.pkg.tar.zst",
                     "version": "5.2.1-1",
                     "length": 4000000,
                     "filename": "mercurial-5.2.1-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 4000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.2.2-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.2.2-1-x86_64.pkg.tar.zst",
                     "version": "5.2.2-1",
                     "length": 5000000,
                     "filename": "mercurial-5.2.2-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.2.2-2-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.2.2-2-x86_64.pkg.tar.zst",
                     "version": "5.2.2-2",
                     "length": 4000000,
                     "filename": "mercurial-5.2.2-2-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 4000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.3-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.3-1-x86_64.pkg.tar.zst",
                     "version": "5.3-1",
                     "length": 5000000,
                     "filename": "mercurial-5.3-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.3.1-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.3.1-1-x86_64.pkg.tar.zst",
                     "version": "5.3.1-1",
                     "length": 4000000,
                     "filename": "mercurial-5.3.1-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 4000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.3.2-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.3.2-1-x86_64.pkg.tar.zst",
                     "version": "5.3.2-1",
                     "length": 4000000,
                     "filename": "mercurial-5.3.2-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 4000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.4-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.4-1-x86_64.pkg.tar.zst",
                     "version": "5.4-1",
                     "length": 5000000,
                     "filename": "mercurial-5.4-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.4-2-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.4-2-x86_64.pkg.tar.zst",
                     "version": "5.4-2",
                     "length": 5000000,
                     "filename": "mercurial-5.4-2-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.4.1-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.4.1-1-x86_64.pkg.tar.zst",
                     "version": "5.4.1-1",
                     "length": 5000000,
                     "filename": "mercurial-5.4.1-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.4.2-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.4.2-1-x86_64.pkg.tar.zst",
                     "version": "5.4.2-1",
                     "length": 5000000,
                     "filename": "mercurial-5.4.2-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.5-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.5-1-x86_64.pkg.tar.zst",
                     "version": "5.5-1",
                     "length": 5000000,
                     "filename": "mercurial-5.5-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.5.1-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.5.1-1-x86_64.pkg.tar.zst",
                     "version": "5.5.1-1",
                     "length": 5000000,
                     "filename": "mercurial-5.5.1-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.5.2-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.5.2-1-x86_64.pkg.tar.zst",
                     "version": "5.5.2-1",
                     "length": 5000000,
                     "filename": "mercurial-5.5.2-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.6-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.6-1-x86_64.pkg.tar.zst",
                     "version": "5.6-1",
                     "length": 5000000,
                     "filename": "mercurial-5.6-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.6-2-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.6-2-x86_64.pkg.tar.zst",
                     "version": "5.6-2",
                     "length": 5000000,
                     "filename": "mercurial-5.6-2-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.6-3-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.6-3-x86_64.pkg.tar.zst",
                     "version": "5.6-3",
                     "length": 5000000,
                     "filename": "mercurial-5.6-3-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.6.1-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.6.1-1-x86_64.pkg.tar.zst",
                     "version": "5.6.1-1",
                     "length": 5000000,
                     "filename": "mercurial-5.6.1-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.7-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.7-1-x86_64.pkg.tar.zst",
                     "version": "5.7-1",
                     "length": 5000000,
                     "filename": "mercurial-5.7-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.7.1-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.7.1-1-x86_64.pkg.tar.zst",
                     "version": "5.7.1-1",
                     "length": 5000000,
                     "filename": "mercurial-5.7.1-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.8-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.8-1-x86_64.pkg.tar.zst",
                     "version": "5.8-1",
                     "length": 5000000,
                     "filename": "mercurial-5.8-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.8-2-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.8-2-x86_64.pkg.tar.zst",
                     "version": "5.8-2",
                     "length": 5000000,
                     "filename": "mercurial-5.8-2-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.8.1-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.8.1-1-x86_64.pkg.tar.zst",
                     "version": "5.8.1-1",
                     "length": 5000000,
                     "filename": "mercurial-5.8.1-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.9.1-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.9.1-1-x86_64.pkg.tar.zst",
                     "version": "5.9.1-1",
                     "length": 5000000,
                     "filename": "mercurial-5.9.1-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.9.1-2-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.9.1-2-x86_64.pkg.tar.zst",
                     "version": "5.9.1-2",
                     "length": 5000000,
                     "filename": "mercurial-5.9.1-2-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.9.2-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.9.2-1-x86_64.pkg.tar.zst",
                     "version": "5.9.2-1",
                     "length": 5000000,
                     "filename": "mercurial-5.9.2-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.9.3-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.9.3-1-x86_64.pkg.tar.zst",
                     "version": "5.9.3-1",
                     "length": 5000000,
                     "filename": "mercurial-5.9.3-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0-1-x86_64.pkg.tar.zst",
                     "version": "6.0-1",
                     "length": 5000000,
                     "filename": "mercurial-6.0-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0-2-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0-2-x86_64.pkg.tar.zst",
                     "version": "6.0-2",
                     "length": 5000000,
                     "filename": "mercurial-6.0-2-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0-3-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0-3-x86_64.pkg.tar.zst",
                     "version": "6.0-3",
                     "length": 5000000,
                     "filename": "mercurial-6.0-3-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0.1-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0.1-1-x86_64.pkg.tar.zst",
                     "version": "6.0.1-1",
                     "length": 5000000,
                     "filename": "mercurial-6.0.1-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0.2-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0.2-1-x86_64.pkg.tar.zst",
                     "version": "6.0.2-1",
                     "length": 5000000,
                     "filename": "mercurial-6.0.2-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0.3-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0.3-1-x86_64.pkg.tar.zst",
                     "version": "6.0.3-1",
                     "length": 5000000,
                     "filename": "mercurial-6.0.3-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.1-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.1-1-x86_64.pkg.tar.zst",
                     "version": "6.1-1",
                     "length": 5000000,
                     "filename": "mercurial-6.1-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.1-2-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.1-2-x86_64.pkg.tar.zst",
                     "version": "6.1-2",
                     "length": 5000000,
                     "filename": "mercurial-6.1-2-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.1.1-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.1.1-1-x86_64.pkg.tar.zst",
                     "version": "6.1.1-1",
                     "length": 5000000,
                     "filename": "mercurial-6.1.1-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "length": 5000000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.1.2-1-x86_64.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.1.2-1-x86_64.pkg.tar.zst",
                     "version": "6.1.2-1",
                     "length": 5000000,
                     "filename": "mercurial-6.1.2-1-x86_64.pkg.tar.zst",
+                    "checksums": {
+                        "md5": "037ff48bf6127e9d37ad7da7026a6dc0",
+                        "sha256": "be33e7bf800d1e84714cd40029d103873e65f5a72dea19d6ad935f3439512cf8",
+                    },
                 },
             ],
             "arch_metadata": [
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "4.8.2-1",
                     "last_modified": "2019-01-15T20:31:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "4.9-1",
                     "last_modified": "2019-02-12T06:15:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "4.9.1-1",
                     "last_modified": "2019-03-30T17:40:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.0-1",
                     "last_modified": "2019-05-10T08:44:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.0.1-1",
                     "last_modified": "2019-06-10T18:05:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.0.2-1",
                     "last_modified": "2019-07-10T04:58:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.1-1",
                     "last_modified": "2019-08-17T19:58:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.1.2-1",
                     "last_modified": "2019-10-08T08:38:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.2-1",
                     "last_modified": "2019-11-28T06:41:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.2.1-1",
                     "last_modified": "2020-01-06T12:35:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.2.2-1",
                     "last_modified": "2020-01-15T14:07:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.2.2-2",
                     "last_modified": "2020-01-30T20:05:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.3-1",
                     "last_modified": "2020-02-13T21:40:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.3.1-1",
                     "last_modified": "2020-03-07T23:58:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.3.2-1",
                     "last_modified": "2020-04-05T17:48:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.4-1",
                     "last_modified": "2020-05-10T17:19:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.4-2",
                     "last_modified": "2020-06-04T13:38:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.4.1-1",
                     "last_modified": "2020-06-06T12:28:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.4.2-1",
                     "last_modified": "2020-07-02T21:35:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.5-1",
                     "last_modified": "2020-08-05T10:39:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.5.1-1",
                     "last_modified": "2020-09-03T19:05:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.5.2-1",
                     "last_modified": "2020-10-07T20:05:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.6-1",
                     "last_modified": "2020-11-03T17:26:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.6-2",
                     "last_modified": "2020-11-09T16:54:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.6-3",
                     "last_modified": "2020-11-11T15:20:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.6.1-1",
                     "last_modified": "2020-12-05T12:29:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.7-1",
                     "last_modified": "2021-02-04T08:41:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.7.1-1",
                     "last_modified": "2021-03-11T07:51:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.8-1",
                     "last_modified": "2021-05-04T17:55:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.8-2",
                     "last_modified": "2021-05-08T22:08:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.8.1-1",
                     "last_modified": "2021-07-13T07:04:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.9.1-1",
                     "last_modified": "2021-09-01T12:48:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.9.1-2",
                     "last_modified": "2021-09-24T17:39:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.9.2-1",
                     "last_modified": "2021-10-07T21:52:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "5.9.3-1",
                     "last_modified": "2021-10-27T07:20:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "6.0-1",
                     "last_modified": "2021-11-25T17:10:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "6.0-2",
                     "last_modified": "2021-11-30T20:53:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "6.0-3",
                     "last_modified": "2021-12-02T12:06:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "6.0.1-1",
                     "last_modified": "2022-01-08T10:07:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "6.0.2-1",
                     "last_modified": "2022-02-03T13:28:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "6.0.3-1",
                     "last_modified": "2022-02-23T20:50:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "6.1-1",
                     "last_modified": "2022-03-03T18:06:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "6.1-2",
                     "last_modified": "2022-03-04T08:37:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "6.1.1-1",
                     "last_modified": "2022-04-07T18:26:00",
                 },
                 {
                     "arch": "x86_64",
                     "repo": "extra",
                     "name": "mercurial",
                     "version": "6.1.2-1",
                     "last_modified": "2022-05-07T11:03:00",
                 },
             ],
         },
     },
     {
         "url": "https://archlinux.org/packages/community/any/python-hglib",
         "visit_type": "arch",
         "extra_loader_arguments": {
             "artifacts": [
                 {
-                    "url": "https://archive.archlinux.org/packages/p/python-hglib/python-hglib-2.6.1-3-any.pkg.tar.xz",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/p/python-hglib/python-hglib-2.6.1-3-any.pkg.tar.xz",
                     "version": "2.6.1-3",
                     "length": 40000,
                     "filename": "python-hglib-2.6.1-3-any.pkg.tar.xz",
+                    "checksums": {
+                        "length": 40000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/p/python-hglib/python-hglib-2.6.2-1-any.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/p/python-hglib/python-hglib-2.6.2-1-any.pkg.tar.zst",
                     "version": "2.6.2-1",
                     "length": 43000,
                     "filename": "python-hglib-2.6.2-1-any.pkg.tar.zst",
+                    "checksums": {
+                        "length": 43000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/p/python-hglib/python-hglib-2.6.2-2-any.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/p/python-hglib/python-hglib-2.6.2-2-any.pkg.tar.zst",
                     "version": "2.6.2-2",
                     "length": 43000,
                     "filename": "python-hglib-2.6.2-2-any.pkg.tar.zst",
+                    "checksums": {
+                        "length": 43000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/p/python-hglib/python-hglib-2.6.2-3-any.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/p/python-hglib/python-hglib-2.6.2-3-any.pkg.tar.zst",
                     "version": "2.6.2-3",
                     "length": 43000,
                     "filename": "python-hglib-2.6.2-3-any.pkg.tar.zst",
+                    "checksums": {
+                        "length": 43000,
+                    },
                 },
                 {
-                    "url": "https://archive.archlinux.org/packages/p/python-hglib/python-hglib-2.6.2-4-any.pkg.tar.zst",  # noqa: B950
+                    "url": "https://archive.archlinux.org/packages/p/python-hglib/python-hglib-2.6.2-4-any.pkg.tar.zst",
                     "version": "2.6.2-4",
                     "length": 43000,
                     "filename": "python-hglib-2.6.2-4-any.pkg.tar.zst",
+                    "checksums": {
+                        "md5": "ecc6598834dc216efd938466a2425eae",
+                        "sha256": "fd273811023e8c58090d65118d27f5c10ad10ea5d1fbdbcf88c730327cea0952",
+                    },
                 },
             ],
             "arch_metadata": [
                 {
                     "arch": "any",
                     "repo": "community",
                     "name": "python-hglib",
                     "version": "2.6.1-3",
                     "last_modified": "2019-11-06T14:08:00",
                 },
                 {
                     "arch": "any",
                     "repo": "community",
                     "name": "python-hglib",
                     "version": "2.6.2-1",
                     "last_modified": "2020-11-19T22:29:00",
                 },
                 {
                     "arch": "any",
                     "repo": "community",
                     "name": "python-hglib",
                     "version": "2.6.2-2",
                     "last_modified": "2020-11-19T22:31:00",
                 },
                 {
                     "arch": "any",
                     "repo": "community",
                     "name": "python-hglib",
                     "version": "2.6.2-3",
                     "last_modified": "2020-11-19T22:35:00",
                 },
                 {
                     "arch": "any",
                     "repo": "community",
                     "name": "python-hglib",
                     "version": "2.6.2-4",
                     "last_modified": "2021-12-03T00:44:00",
                 },
             ],
         },
     },
     {
         "url": "https://archlinuxarm.org/packages/aarch64/gzip",
         "visit_type": "arch",
         "extra_loader_arguments": {
             "artifacts": [
                 {
-                    "url": "https://uk.mirror.archlinuxarm.org/aarch64/core/gzip-1.12-1-aarch64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://uk.mirror.archlinuxarm.org/aarch64/core/gzip-1.12-1-aarch64.pkg.tar.xz",
                     "length": 79640,
                     "version": "1.12-1",
                     "filename": "gzip-1.12-1-aarch64.pkg.tar.xz",
+                    "checksums": {
+                        "md5": "97d1e76302213f0499f45aa4a4d329cc",
+                        "sha256": "9065fdaf21dfcac231b0e5977599b37596a0d964f48ec0a6bff628084d636d4c",
+                    },
                 }
             ],
             "arch_metadata": [
                 {
                     "arch": "aarch64",
                     "name": "gzip",
                     "repo": "core",
                     "version": "1.12-1",
                     "last_modified": "2022-04-07T21:08:14",
                 }
             ],
         },
     },
     {
         "url": "https://archlinuxarm.org/packages/aarch64/mercurial",
         "visit_type": "arch",
         "extra_loader_arguments": {
             "artifacts": [
                 {
-                    "url": "https://uk.mirror.archlinuxarm.org/aarch64/extra/mercurial-6.1.3-1-aarch64.pkg.tar.xz",  # noqa: B950
+                    "url": "https://uk.mirror.archlinuxarm.org/aarch64/extra/mercurial-6.1.3-1-aarch64.pkg.tar.xz",
                     "length": 4931228,
                     "version": "6.1.3-1",
                     "filename": "mercurial-6.1.3-1-aarch64.pkg.tar.xz",
+                    "checksums": {
+                        "md5": "0464390744f42faba80c323ee7c72406",
+                        "sha256": "635edb47117e7bda0b821d86e61906c802bd880d4a30a64185d9feec1bd25db6",
+                    },
                 }
             ],
             "arch_metadata": [
                 {
                     "arch": "aarch64",
                     "name": "mercurial",
                     "repo": "extra",
                     "version": "6.1.3-1",
                     "last_modified": "2022-06-02T22:15:18",
                 }
             ],
         },
     },
     {
         "url": "https://archlinuxarm.org/packages/any/python-hglib",
         "visit_type": "arch",
         "extra_loader_arguments": {
             "artifacts": [
                 {
-                    "url": "https://uk.mirror.archlinuxarm.org/any/community/python-hglib-2.6.2-4-any.pkg.tar.xz",  # noqa: B950
+                    "url": "https://uk.mirror.archlinuxarm.org/any/community/python-hglib-2.6.2-4-any.pkg.tar.xz",
                     "length": 41432,
                     "version": "2.6.2-4",
                     "filename": "python-hglib-2.6.2-4-any.pkg.tar.xz",
+                    "checksums": {
+                        "md5": "0f763d5e85c4ffe728153f2836838674",
+                        "sha256": "7a873e20d1822403c8ecf0c790de02439368000e9b1b74881788a9faea8c81b6",
+                    },
                 }
             ],
             "arch_metadata": [
                 {
                     "arch": "any",
                     "name": "python-hglib",
                     "repo": "community",
                     "version": "2.6.2-4",
                     "last_modified": "2021-12-14T16:22:20",
                 }
             ],
         },
     },
     {
         "url": "https://archlinuxarm.org/packages/armv7h/gzip",
         "visit_type": "arch",
         "extra_loader_arguments": {
             "artifacts": [
                 {
-                    "url": "https://uk.mirror.archlinuxarm.org/armv7h/core/gzip-1.12-1-armv7h.pkg.tar.xz",  # noqa: B950
+                    "url": "https://uk.mirror.archlinuxarm.org/armv7h/core/gzip-1.12-1-armv7h.pkg.tar.xz",
                     "length": 78468,
                     "version": "1.12-1",
                     "filename": "gzip-1.12-1-armv7h.pkg.tar.xz",
+                    "checksums": {
+                        "md5": "490c9e28db91740f1adcea64cb6ec1aa",
+                        "sha256": "4ffc8bbede3bbdd9dd6ad6f85bb689b3f4b985655e56285691db2a1346eaf0e7",
+                    },
                 }
             ],
             "arch_metadata": [
                 {
                     "arch": "armv7h",
                     "name": "gzip",
                     "repo": "core",
                     "version": "1.12-1",
                     "last_modified": "2022-04-07T21:08:35",
                 }
             ],
         },
     },
     {
         "url": "https://archlinuxarm.org/packages/armv7h/mercurial",
         "visit_type": "arch",
         "extra_loader_arguments": {
             "artifacts": [
                 {
-                    "url": "https://uk.mirror.archlinuxarm.org/armv7h/extra/mercurial-6.1.3-1-armv7h.pkg.tar.xz",  # noqa: B950
+                    "url": "https://uk.mirror.archlinuxarm.org/armv7h/extra/mercurial-6.1.3-1-armv7h.pkg.tar.xz",
                     "length": 4897816,
                     "version": "6.1.3-1",
                     "filename": "mercurial-6.1.3-1-armv7h.pkg.tar.xz",
+                    "checksums": {
+                        "md5": "453effa55e32be3ef9de5a58f322b9c4",
+                        "sha256": "c1321de5890a6f53d41c1a5e339733be145221828703f13bccf3e7fc22612396",
+                    },
                 }
             ],
             "arch_metadata": [
                 {
                     "arch": "armv7h",
                     "name": "mercurial",
                     "repo": "extra",
                     "version": "6.1.3-1",
                     "last_modified": "2022-06-02T22:13:08",
                 }
             ],
         },
     },
 ]
 
 
 def test_arch_lister(datadir, requests_mock_datadir, swh_scheduler):
     lister = ArchLister(scheduler=swh_scheduler)
     res = lister.run()
 
     assert res.pages == 9
-    assert res.origins == 12
+    assert res.origins == 11
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
 
     assert [
         (
             scheduled.visit_type,
             scheduled.url,
             scheduled.extra_loader_arguments["artifacts"],
             scheduled.extra_loader_arguments["arch_metadata"],
         )
         for scheduled in sorted(scheduler_origins, key=lambda scheduled: scheduled.url)
     ] == [
         (
             "arch",
             expected["url"],
             expected["extra_loader_arguments"]["artifacts"],
             expected["extra_loader_arguments"]["arch_metadata"],
         )
         for expected in sorted(expected_origins, key=lambda expected: expected["url"])
     ]
diff --git a/swh/lister/aur/__init__.py b/swh/lister/aur/__init__.py
index 833c72b..b4ded88 100644
--- a/swh/lister/aur/__init__.py
+++ b/swh/lister/aur/__init__.py
@@ -1,135 +1,135 @@
 # Copyright (C) 2022 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 """
 AUR (Arch User Repository) lister
 =================================
 
 The AUR lister list origins from `aur.archlinux.org`_, the Arch User Repository.
 For each package, there is a git repository, we use the git url as origin and the
 snapshot url as the artifact for the loader to download.
 
 Each git repository consist of a directory (for which name corresponds to the package name),
 and at least two files, .SRCINFO and PKGBUILD which are recipes for building the package.
 
 Each package has a version, the latest one. There isn't any archives of previous versions,
 so the lister will always list one version per package.
 
 As of August 2022 `aur.archlinux.org`_ list 84438 packages. Please note that this amount
 is the total of `regular`_ and `split`_ packages.
 We will archive `regular`  and `split` packages but only their `pkgbase` because that is
 the only one that actually has source code.
 The packages amount is 78554 after removing the split ones.
 
 Origins retrieving strategy
 ---------------------------
 
 An rpc api exists but it is recommended to save bandwidth so it's not used. See
 `New AUR Metadata Archives`_ for more on this topic.
 
 To get an index of all AUR existing packages we download a `packages-meta-v1.json.gz`_
 which contains a json file listing all existing packages definitions.
 
 Each entry describes the latest released version of a package. The origin url
 for a package is built using `pkgbase` and corresponds to a git repository.
 
 Note that we list only standard package (when pkgbase equal pkgname), not the ones
 belonging to split packages.
 
 It takes only a couple of minutes to download the 7 MB index archive and parses its
 content.
 
 Page listing
 ------------
 
 Each page is related to one package. As its not possible to get all previous
 versions, it will always returns one line.
 
 Each page corresponds to a package with a `version`, an `url` for a Git
 repository, a `project_url` which represents the upstream project url and
 a canonical `snapshot_url` from which a tar.gz archive of the package can
 be downloaded.
 
 The data schema for each line is:
 
 * **pkgname**: Package name
 * **version**: Package version
 * **url**: Git repository url for a package
 * **snapshot_url**: Package download url
 * **project_url**: Upstream project url if any
 * **last_modified**: Iso8601 last update date
 
 Origins from page
 -----------------
 
 The lister yields one origin per page.
 The origin url corresponds to the git url of a package, for example ``https://aur.archlinux.org/{package}.git``.
 
 Additionally we add some data set to "extra_loader_arguments":
 
 * **artifacts**: Represent data about the Aur package snapshot to download,
   following :ref:`original-artifacts-json specification <extrinsic-metadata-original-artifacts-json>`
 * **aur_metadata**: To store all other interesting attributes that do not belongs to artifacts.
 
 Origin data example::
 
     {
         "visit_type": "aur",
         "url": "https://aur.archlinux.org/hg-evolve.git",
         "extra_loader_arguments": {
             "artifacts": [
                 {
                     "filename": "hg-evolve.tar.gz",
                     "url": "https://aur.archlinux.org/cgit/aur.git/snapshot/hg-evolve.tar.gz",  # noqa: B950
                     "version": "10.5.1-1",
                 }
             ],
             "aur_metadata": [
                 {
                     "version": "10.5.1-1",
                     "project_url": "https://www.mercurial-scm.org/doc/evolution/",
                     "last_update": "2022-04-27T20:02:56+00:00",
                     "pkgname": "hg-evolve",
                 }
             ],
         },
 
 Running tests
 -------------
 
 Activate the virtualenv and run from within swh-lister directory::
 
    pytest -s -vv --log-cli-level=DEBUG swh/lister/aur/tests
 
 Testing with Docker
 -------------------
 
 Change directory to swh/docker then launch the docker environment::
 
-   docker-compose up -d
+   docker compose up -d
 
-Then connect to the lister::
+Then schedule an aur listing task::
 
-   docker exec -it docker_swh-lister_1 bash
+   docker compose exec swh-scheduler swh scheduler task add -p oneshot list-aur
 
-And run the lister (The output of this listing results in “oneshot” tasks in the scheduler)::
+You can follow lister execution by displaying logs of swh-lister service::
 
-   swh lister run -l aur
+   docker compose logs -f swh-lister
 
 .. _aur.archlinux.org: https://aur.archlinux.org
 .. _New AUR Metadata Archives: https://lists.archlinux.org/pipermail/aur-general/2021-November/036659.html
 .. _packages-meta-v1.json.gz: https://aur.archlinux.org/packages-meta-v1.json.gz
 .. _regular: https://wiki.archlinux.org/title/PKGBUILD#Package_name
 .. _split: https://man.archlinux.org/man/PKGBUILD.5#PACKAGE_SPLITTING
 """
 
 
 def register():
     from .lister import AurLister
 
     return {
         "lister": AurLister,
         "task_modules": ["%s.tasks" % __name__],
     }
diff --git a/swh/lister/aur/lister.py b/swh/lister/aur/lister.py
index 778a848..9bbdf37 100644
--- a/swh/lister/aur/lister.py
+++ b/swh/lister/aur/lister.py
@@ -1,154 +1,152 @@
 # Copyright (C) 2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import datetime
 import logging
 from typing import Any, Dict, Iterator, List, Optional
 
-import requests
-
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
 from ..pattern import CredentialsType, StatelessLister
 
 logger = logging.getLogger(__name__)
 
 # Aliasing the page results returned by `get_pages` method from the lister.
 AurListerPage = Dict[str, Any]
 
 
 class AurLister(StatelessLister[AurListerPage]):
     """List Arch User Repository (AUR) origins.
 
     Given an url (used as a base url, default is 'https://aur.archlinux.org'),
     download a 'packages-meta-v1.json.gz' which contains a json file listing all
     existing packages definitions.
 
     Each entry describes the latest released version of a package. The origin url
     for a package is built using 'pkgname' and corresponds to a git repository.
 
     An rpc api exists but it is recommended to save bandwidth so it's not used. See
     https://lists.archlinux.org/pipermail/aur-general/2021-November/036659.html
     for more on this.
     """
 
     LISTER_NAME = "aur"
     VISIT_TYPE = "aur"
     INSTANCE = "aur"
 
     BASE_URL = "https://aur.archlinux.org"
     DEFAULT_PACKAGES_INDEX_URL = "{base_url}/packages-meta-v1.json.gz"
     PACKAGE_VCS_URL_PATTERN = "{base_url}/{pkgname}.git"
     PACKAGE_SNAPSHOT_URL_PATTERN = "{base_url}/cgit/aur.git/snapshot/{pkgname}.tar.gz"
     ORIGIN_URL_PATTERN = "{base_url}/packages/{pkgname}"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         credentials: Optional[CredentialsType] = None,
     ):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
             instance=self.INSTANCE,
             url=self.BASE_URL,
         )
 
     def download_packages_index(self) -> List[Dict[str, Any]]:
         """Build an url based on self.DEFAULT_PACKAGES_INDEX_URL format string,
         and download the archive to self.DESTINATION_PATH
 
         Returns:
             a directory Path where the archive has been downloaded to.
         """
         url = self.DEFAULT_PACKAGES_INDEX_URL.format(base_url=self.url)
-        return requests.get(url).json()
+        return self.http_request(url).json()
 
     def get_pages(self) -> Iterator[AurListerPage]:
         """Yield an iterator which returns 'page'
 
         Each page corresponds to a package with a 'version', an 'url' for a Git
         repository, a 'project_url' which represents the upstream project url and
         a canonical 'snapshot_url' from which a tar.gz archive of the package can
         be downloaded.
         """
         packages = self.download_packages_index()
 
         logger.debug("Found %s AUR packages in aur_index", len(packages))
 
         for package in packages:
             # Exclude lines where Name differs from PackageBase as they represents
             # split package and they don't have resolvable snapshots url
             if package["Name"] == package["PackageBase"]:
                 logger.debug("Processing AUR package %s", package["Name"])
                 pkgname = package["PackageBase"]
                 version = package["Version"]
                 project_url = package["URL"]
                 last_modified = datetime.datetime.fromtimestamp(
                     float(package["LastModified"]), tz=datetime.timezone.utc
                 ).isoformat()
                 yield {
                     "pkgname": pkgname,
                     "version": version,
                     "url": self.ORIGIN_URL_PATTERN.format(
                         base_url=self.BASE_URL, pkgname=pkgname
                     ),
                     "git_url": self.PACKAGE_VCS_URL_PATTERN.format(
                         base_url=self.BASE_URL, pkgname=pkgname
                     ),
                     "snapshot_url": self.PACKAGE_SNAPSHOT_URL_PATTERN.format(
                         base_url=self.BASE_URL, pkgname=pkgname
                     ),
                     "project_url": project_url,
                     "last_modified": last_modified,
                 }
 
     def get_origins_from_page(self, origin: AurListerPage) -> Iterator[ListedOrigin]:
         """Iterate on all pages and yield ListedOrigin instances.
         It uses the vcs (Git) url as an origin and adds `artifacts` and `aur_metadata`
         entries to 'extra_loader_arguments'.
 
         `artifacts` describe the file to download and `aur_metadata` store some
         metadata that can be useful for the loader.
         """
         assert self.lister_obj.id is not None
 
         last_update = datetime.datetime.fromisoformat(origin["last_modified"])
         filename = origin["snapshot_url"].split("/")[-1]
 
         artifacts = [
             {
                 "filename": filename,
                 "url": origin["snapshot_url"],
                 "version": origin["version"],
             }
         ]
         aur_metadata = [
             {
                 "version": origin["version"],
                 "project_url": origin["project_url"],
                 "last_update": origin["last_modified"],
                 "pkgname": origin["pkgname"],
             }
         ]
 
         yield ListedOrigin(
             lister_id=self.lister_obj.id,
             visit_type=self.VISIT_TYPE,
             url=origin["url"],
             last_update=last_update,
             extra_loader_arguments={
                 "artifacts": artifacts,
                 "aur_metadata": aur_metadata,
             },
         )
 
         yield ListedOrigin(
             lister_id=self.lister_obj.id,
             visit_type="git",
             url=origin["git_url"],
             last_update=last_update,
         )
diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py
index 6a99699..7bcec03 100644
--- a/swh/lister/bitbucket/lister.py
+++ b/swh/lister/bitbucket/lister.py
@@ -1,198 +1,173 @@
-# Copyright (C) 2017-2021 The Software Heritage developers
+# Copyright (C) 2017-2022 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from dataclasses import asdict, dataclass
 from datetime import datetime
 import logging
 import random
 from typing import Any, Dict, Iterator, List, Optional
 from urllib import parse
 
 import iso8601
-import requests
-from tenacity.before_sleep import before_sleep_log
 
-from swh.lister.utils import throttling_retry
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
-from .. import USER_AGENT
 from ..pattern import CredentialsType, Lister
 
 logger = logging.getLogger(__name__)
 
 
 @dataclass
 class BitbucketListerState:
     """State of Bitbucket lister"""
 
     last_repo_cdate: Optional[datetime] = None
     """Creation date and time of the last listed repository during an
     incremental pass"""
 
 
 class BitbucketLister(Lister[BitbucketListerState, List[Dict[str, Any]]]):
     """List origins from Bitbucket using its API.
 
     Bitbucket API has the following rate-limit configuration:
 
       * 60 requests per hour for anonymous users
 
       * 1000 requests per hour for authenticated users
 
     The lister is working in anonymous mode by default but Bitbucket account
     credentials can be provided to perform authenticated requests.
     """
 
     LISTER_NAME = "bitbucket"
     INSTANCE = "bitbucket"
 
     API_URL = "https://api.bitbucket.org/2.0/repositories"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         page_size: int = 1000,
         incremental: bool = True,
         credentials: CredentialsType = None,
     ):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
             url=self.API_URL,
             instance=self.INSTANCE,
         )
 
         self.incremental = incremental
 
         self.url_params: Dict[str, Any] = {
             "pagelen": page_size,
             # only return needed JSON fields in bitbucket API responses
             # (also prevent errors 500 when listing)
             "fields": (
                 "next,values.links.clone.href,values.scm,values.updated_on,"
                 "values.created_on"
             ),
         }
 
-        self.session = requests.Session()
-        self.session.headers.update(
-            {"Accept": "application/json", "User-Agent": USER_AGENT}
-        )
+        self.session.headers.update({"Accept": "application/json"})
 
         if len(self.credentials) > 0:
             cred = random.choice(self.credentials)
             logger.warning("Using Bitbucket credentials from user %s", cred["username"])
             self.set_credentials(cred["username"], cred["password"])
         else:
             logger.warning("No credentials set in configuration, using anonymous mode")
 
     def state_from_dict(self, d: Dict[str, Any]) -> BitbucketListerState:
         last_repo_cdate = d.get("last_repo_cdate")
         if last_repo_cdate is not None:
             d["last_repo_cdate"] = iso8601.parse_date(last_repo_cdate)
         return BitbucketListerState(**d)
 
     def state_to_dict(self, state: BitbucketListerState) -> Dict[str, Any]:
         d = asdict(state)
         last_repo_cdate = d.get("last_repo_cdate")
         if last_repo_cdate is not None:
             d["last_repo_cdate"] = last_repo_cdate.isoformat()
         return d
 
     def set_credentials(self, username: Optional[str], password: Optional[str]) -> None:
         """Set basic authentication headers with given credentials."""
         if username is not None and password is not None:
             self.session.auth = (username, password)
 
-    @throttling_retry(before_sleep=before_sleep_log(logger, logging.DEBUG))
-    def page_request(self, last_repo_cdate: str) -> requests.Response:
-
-        self.url_params["after"] = last_repo_cdate
-        logger.debug("Fetching URL %s with params %s", self.url, self.url_params)
-
-        response = self.session.get(self.url, params=self.url_params)
-
-        if response.status_code != 200:
-            logger.warning(
-                "Unexpected HTTP status code %s on %s: %s",
-                response.status_code,
-                response.url,
-                response.content,
-            )
-        response.raise_for_status()
-
-        return response
-
     def get_pages(self) -> Iterator[List[Dict[str, Any]]]:
 
         last_repo_cdate: str = "1970-01-01"
         if (
             self.incremental
             and self.state is not None
             and self.state.last_repo_cdate is not None
         ):
             last_repo_cdate = self.state.last_repo_cdate.isoformat()
 
         while True:
-            body = self.page_request(last_repo_cdate).json()
+            self.url_params["after"] = last_repo_cdate
+            body = self.http_request(self.url, params=self.url_params).json()
 
             yield body["values"]
 
             next_page_url = body.get("next")
             if next_page_url is not None:
                 next_page_url = parse.urlparse(next_page_url)
                 if not next_page_url.query:
                     logger.warning("Failed to parse url %s", next_page_url)
                     break
                 last_repo_cdate = parse.parse_qs(next_page_url.query)["after"][0]
             else:
                 # last page
                 break
 
     def get_origins_from_page(
         self, page: List[Dict[str, Any]]
     ) -> Iterator[ListedOrigin]:
         """Convert a page of Bitbucket repositories into a list of ListedOrigins."""
         assert self.lister_obj.id is not None
 
         for repo in page:
             last_update = iso8601.parse_date(repo["updated_on"])
             origin_url = repo["links"]["clone"][0]["href"]
             origin_type = repo["scm"]
 
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 url=origin_url,
                 visit_type=origin_type,
                 last_update=last_update,
             )
 
     def commit_page(self, page: List[Dict[str, Any]]) -> None:
         """Update the currently stored state using the latest listed page."""
         if self.incremental:
             last_repo = page[-1]
             last_repo_cdate = iso8601.parse_date(last_repo["created_on"])
 
             if (
                 self.state.last_repo_cdate is None
                 or last_repo_cdate > self.state.last_repo_cdate
             ):
                 self.state.last_repo_cdate = last_repo_cdate
 
     def finalize(self) -> None:
         if self.incremental:
             scheduler_state = self.get_state_from_scheduler()
 
             if self.state.last_repo_cdate is None:
                 return
 
             # Update the lister state in the backend only if the last seen id of
             # the current run is higher than that stored in the database.
             if (
                 scheduler_state.last_repo_cdate is None
                 or self.state.last_repo_cdate > scheduler_state.last_repo_cdate
             ):
                 self.updated = True
diff --git a/swh/lister/bitbucket/tests/test_lister.py b/swh/lister/bitbucket/tests/test_lister.py
index e624e8e..04df324 100644
--- a/swh/lister/bitbucket/tests/test_lister.py
+++ b/swh/lister/bitbucket/tests/test_lister.py
@@ -1,178 +1,180 @@
-# Copyright (C) 2017-2021 The Software Heritage developers
+# Copyright (C) 2017-2022 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from datetime import datetime
 import json
 import os
 
 import pytest
 
 from swh.lister.bitbucket.lister import BitbucketLister
 
 
 @pytest.fixture
 def bb_api_repositories_page1(datadir):
     data_file_path = os.path.join(datadir, "bb_api_repositories_page1.json")
     with open(data_file_path, "r") as data_file:
         return json.load(data_file)
 
 
 @pytest.fixture
 def bb_api_repositories_page2(datadir):
     data_file_path = os.path.join(datadir, "bb_api_repositories_page2.json")
     with open(data_file_path, "r") as data_file:
         return json.load(data_file)
 
 
 def _check_listed_origins(lister_origins, scheduler_origins):
     """Asserts that the two collections have the same origins from the point of view of
     the lister"""
     assert {(lo.url, lo.last_update) for lo in lister_origins} == {
         (so.url, so.last_update) for so in scheduler_origins
     }
 
 
 def test_bitbucket_incremental_lister(
     swh_scheduler,
     requests_mock,
     mocker,
     bb_api_repositories_page1,
     bb_api_repositories_page2,
 ):
     """Simple Bitbucket listing with two pages containing 10 origins"""
 
     requests_mock.get(
         BitbucketLister.API_URL,
         [
             {"json": bb_api_repositories_page1},
             {"json": bb_api_repositories_page2},
         ],
     )
 
     lister = BitbucketLister(scheduler=swh_scheduler, page_size=10)
 
     # First listing
     stats = lister.run()
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
 
     assert stats.pages == 2
     assert stats.origins == 20
     assert len(scheduler_origins) == 20
 
     assert lister.updated
     lister_state = lister.get_state_from_scheduler()
     last_repo_cdate = lister_state.last_repo_cdate.isoformat()
     assert hasattr(lister_state, "last_repo_cdate")
     assert last_repo_cdate == bb_api_repositories_page2["values"][-1]["created_on"]
 
     # Second listing, restarting from last state
-    lister.session.get = mocker.spy(lister.session, "get")
+    lister.session.request = mocker.spy(lister.session, "request")
 
     lister.run()
 
     url_params = lister.url_params
     url_params["after"] = last_repo_cdate
 
-    lister.session.get.assert_called_once_with(lister.API_URL, params=url_params)
+    lister.session.request.assert_called_once_with(
+        "GET", lister.API_URL, params=url_params
+    )
 
     all_origins = (
         bb_api_repositories_page1["values"] + bb_api_repositories_page2["values"]
     )
 
     _check_listed_origins(lister.get_origins_from_page(all_origins), scheduler_origins)
 
 
 def test_bitbucket_lister_rate_limit_hit(
     swh_scheduler,
     requests_mock,
     mocker,
     bb_api_repositories_page1,
     bb_api_repositories_page2,
 ):
     """Simple Bitbucket listing with two pages containing 10 origins"""
 
     requests_mock.get(
         BitbucketLister.API_URL,
         [
             {"json": bb_api_repositories_page1, "status_code": 200},
             {"json": None, "status_code": 429},
             {"json": None, "status_code": 429},
             {"json": bb_api_repositories_page2, "status_code": 200},
         ],
     )
 
     lister = BitbucketLister(scheduler=swh_scheduler, page_size=10)
 
-    mocker.patch.object(lister.page_request.retry, "sleep")
+    mocker.patch.object(lister.http_request.retry, "sleep")
 
     stats = lister.run()
 
     assert stats.pages == 2
     assert stats.origins == 20
     assert len(swh_scheduler.get_listed_origins(lister.lister_obj.id).results) == 20
 
 
 def test_bitbucket_full_lister(
     swh_scheduler,
     requests_mock,
     mocker,
     bb_api_repositories_page1,
     bb_api_repositories_page2,
 ):
     """Simple Bitbucket listing with two pages containing 10 origins"""
 
     requests_mock.get(
         BitbucketLister.API_URL,
         [
             {"json": bb_api_repositories_page1},
             {"json": bb_api_repositories_page2},
             {"json": bb_api_repositories_page1},
             {"json": bb_api_repositories_page2},
         ],
     )
 
     credentials = {"bitbucket": {"bitbucket": [{"username": "u", "password": "p"}]}}
     lister = BitbucketLister(
         scheduler=swh_scheduler, page_size=10, incremental=True, credentials=credentials
     )
     assert lister.session.auth is not None
 
     # First do a incremental run to have an initial lister state
     stats = lister.run()
 
     last_lister_state = lister.get_state_from_scheduler()
 
     assert stats.origins == 20
 
     # Then do the full run and verify lister state did not change
     # Modify last listed repo modification date to check it will be not saved
     # to lister state after its execution
     last_page2_repo = bb_api_repositories_page2["values"][-1]
     last_page2_repo["created_on"] = datetime.now().isoformat()
     last_page2_repo["updated_on"] = datetime.now().isoformat()
 
     lister = BitbucketLister(scheduler=swh_scheduler, page_size=10, incremental=False)
     assert lister.session.auth is None
 
     stats = lister.run()
 
     assert stats.pages == 2
     assert stats.origins == 20
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
 
     # 20 because scheduler upserts based on (id, type, url)
     assert len(scheduler_origins) == 20
 
     # Modification on created_on SHOULD NOT impact lister state
     assert lister.get_state_from_scheduler() == last_lister_state
 
     # Modification on updated_on SHOULD impact lister state
     all_origins = (
         bb_api_repositories_page1["values"] + bb_api_repositories_page2["values"]
     )
 
     _check_listed_origins(lister.get_origins_from_page(all_origins), scheduler_origins)
diff --git a/swh/lister/bower/__init__.py b/swh/lister/bower/__init__.py
index 1f1c017..cdf11f2 100644
--- a/swh/lister/bower/__init__.py
+++ b/swh/lister/bower/__init__.py
@@ -1,76 +1,76 @@
 # Copyright (C) 2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 """
 Bower lister
 ============
 
 The `Bower`_ lister list origins from its packages registry `registry.bower.io`_.
 
 Bower is a tool to manage Javascript packages.
 
 The registry provide an `http api`_ from where the lister retrieve package names
 and url.
 
 As of August 2022 `registry.bower.io`_ list 71028 package names.
 
 Note that even if the project is still maintained(security fixes, no new features), it is
 recommended to not use it anymore and prefer Yarn as a replacement since 2018.
 
 Origins retrieving strategy
 ---------------------------
 
 To get a list of all package names we call `https://registry.bower.io/packages` endpoint.
 There is no other way for discovery (no archive index, no database dump, no dvcs repository).
 
 Page listing
 ------------
 
 There is only one page that list all origins url.
 
 Origins from page
 -----------------
 
 The lister yields all origins url from one page. It is a list of package name and url.
 Origins url corresponds to Git repository url.
 Bower is supposed to support Svn repository too but on +/- 71000 urls I have only found 35
 urls that may not be Git repository.
 
 Running tests
 -------------
 
 Activate the virtualenv and run from within swh-lister directory::
 
    pytest -s -vv --log-cli-level=DEBUG swh/lister/bower/tests
 
 Testing with Docker
 -------------------
 
 Change directory to swh/docker then launch the docker environment::
 
-   docker-compose up -d
+   docker compose up -d
 
-Then connect to the lister::
+Then schedule a bower listing task::
 
-   docker exec -it docker_swh-lister_1 bash
+   docker compose exec swh-scheduler swh scheduler task add -p oneshot list-bower
 
-And run the lister (The output of this listing results in “oneshot” tasks in the scheduler)::
+You can follow lister execution by displaying logs of swh-lister service::
 
-   swh lister run -l bower
+   docker compose logs -f swh-lister
 
 .. _Bower: https://bower.io
 .. _registry.bower.io: https://registry.bower.io
 .. _http api: https://registry.bower.io/packages
 """
 
 
 def register():
     from .lister import BowerLister
 
     return {
         "lister": BowerLister,
         "task_modules": ["%s.tasks" % __name__],
     }
diff --git a/swh/lister/bower/lister.py b/swh/lister/bower/lister.py
index f516b2b..5b488e4 100644
--- a/swh/lister/bower/lister.py
+++ b/swh/lister/bower/lister.py
@@ -1,91 +1,64 @@
 # Copyright (C) 2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
-import logging
-from typing import Any, Dict, Iterator, List, Optional
 
-import requests
-from tenacity.before_sleep import before_sleep_log
+import logging
+from typing import Dict, Iterator, List, Optional
 
-from swh.lister.utils import throttling_retry
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
-from .. import USER_AGENT
 from ..pattern import CredentialsType, StatelessLister
 
 logger = logging.getLogger(__name__)
 
 # Aliasing the page results returned by `get_pages` method from the lister.
 BowerListerPage = List[Dict[str, str]]
 
 
 class BowerLister(StatelessLister[BowerListerPage]):
     """List Bower (Javascript package manager) origins."""
 
     LISTER_NAME = "bower"
     VISIT_TYPE = "git"  # Bower origins url are Git repositories
     INSTANCE = "bower"
 
     API_URL = "https://registry.bower.io/packages"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         credentials: Optional[CredentialsType] = None,
     ):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
             instance=self.INSTANCE,
             url=self.API_URL,
         )
-        self.session = requests.Session()
-        self.session.headers.update(
-            {
-                "Accept": "application/json",
-                "User-Agent": USER_AGENT,
-            }
-        )
-
-    @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
-    def page_request(self, url: str, params: Dict[str, Any]) -> requests.Response:
-
-        logger.info("Fetching URL %s with params %s", url, params)
-
-        response = self.session.get(url, params=params)
-        if response.status_code != 200:
-            logger.warning(
-                "Unexpected HTTP status code %s on %s: %s",
-                response.status_code,
-                response.url,
-                response.content,
-            )
-        response.raise_for_status()
-
-        return response
+        self.session.headers.update({"Accept": "application/json"})
 
     def get_pages(self) -> Iterator[BowerListerPage]:
         """Yield an iterator which returns 'page'
 
         It uses the api endpoint provided by `https://registry.bower.io/packages`
         to get a list of package names with an origin url that corresponds to Git
         repository.
 
         There is only one page that list all origins urls.
         """
-        response = self.page_request(url=self.url, params={})
+        response = self.http_request(self.url)
         yield response.json()
 
     def get_origins_from_page(self, page: BowerListerPage) -> Iterator[ListedOrigin]:
         """Iterate on all pages and yield ListedOrigin instances."""
         assert self.lister_obj.id is not None
 
         for entry in page:
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 visit_type=self.VISIT_TYPE,
                 url=entry["url"],
                 last_update=None,
             )
diff --git a/swh/lister/cgit/lister.py b/swh/lister/cgit/lister.py
index 5ca9445..49458d0 100644
--- a/swh/lister/cgit/lister.py
+++ b/swh/lister/cgit/lister.py
@@ -1,234 +1,225 @@
-# Copyright (C) 2019-2021 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from datetime import datetime, timezone
 import logging
 import re
 from typing import Any, Dict, Iterator, List, Optional
 from urllib.parse import urljoin, urlparse
 
 from bs4 import BeautifulSoup
-import requests
 from requests.exceptions import HTTPError
-from tenacity.before_sleep import before_sleep_log
 
-from swh.lister import USER_AGENT
 from swh.lister.pattern import CredentialsType, StatelessLister
-from swh.lister.utils import throttling_retry
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
 logger = logging.getLogger(__name__)
 
 Repositories = List[Dict[str, Any]]
 
 
 class CGitLister(StatelessLister[Repositories]):
     """Lister class for CGit repositories.
 
     This lister will retrieve the list of published git repositories by
     parsing the HTML page(s) of the index retrieved at `url`.
 
     The lister currently defines 2 listing behaviors:
 
     - If the `base_git_url` is provided, the listed origin urls are computed out of the
       base git url link and the one listed in the main listed page (resulting in less
       HTTP queries than the 2nd behavior below). This is expected to be the main
       deployed behavior.
 
     - Otherwise (with no `base_git_url`), for each found git repository listed, one
       extra HTTP query is made at the given url found in the main listing page to gather
       published "Clone" URLs to be used as origin URL for that git repo. If several
       "Clone" urls are provided, prefer the http/https one, if any, otherwise fallback
       to the first one.
 
     """
 
     LISTER_NAME = "cgit"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         url: str,
         instance: Optional[str] = None,
         credentials: Optional[CredentialsType] = None,
         base_git_url: Optional[str] = None,
     ):
         """Lister class for CGit repositories.
 
         Args:
             url: main URL of the CGit instance, i.e. url of the index
                 of published git repositories on this instance.
             instance: Name of cgit instance. Defaults to url's network location
                 if unset.
             base_git_url: Optional base git url which allows the origin url
                 computations.
 
         """
         super().__init__(
             scheduler=scheduler,
             url=url,
             instance=instance,
             credentials=credentials,
         )
 
-        self.session = requests.Session()
-        self.session.headers.update(
-            {"Accept": "application/html", "User-Agent": USER_AGENT}
-        )
+        self.session.headers.update({"Accept": "application/html"})
         self.base_git_url = base_git_url
 
-    @throttling_retry(before_sleep=before_sleep_log(logger, logging.DEBUG))
     def _get_and_parse(self, url: str) -> BeautifulSoup:
         """Get the given url and parse the retrieved HTML using BeautifulSoup"""
-        response = self.session.get(url)
-        response.raise_for_status()
+        response = self.http_request(url)
         return BeautifulSoup(response.text, features="html.parser")
 
     def get_pages(self) -> Iterator[Repositories]:
         """Generate git 'project' URLs found on the current CGit server
         The last_update date is retrieved on the list of repo page to avoid
         to compute it on the repository details which only give a date per branch
         """
         next_page: Optional[str] = self.url
         while next_page:
             bs_idx = self._get_and_parse(next_page)
 
             page_results = []
 
             for tr in bs_idx.find("div", {"class": "content"}).find_all(
                 "tr", {"class": ""}
             ):
                 repository_link = tr.find("a")["href"]
                 repo_url = None
                 git_url = None
 
                 base_url = urljoin(self.url, repository_link).strip("/")
                 if self.base_git_url:  # mapping provided
                     # computing git url
                     git_url = base_url.replace(self.url, self.base_git_url)
                 else:
                     # we compute the git detailed page url from which we will retrieve
                     # the git url (cf. self.get_origins_from_page)
                     repo_url = base_url
 
                 span = tr.find("span", {"class": re.compile("age-")})
                 last_updated_date = span.get("title") if span else None
 
                 page_results.append(
                     {
                         "url": repo_url,
                         "git_url": git_url,
                         "last_updated_date": last_updated_date,
                     }
                 )
 
             yield page_results
 
             try:
                 pager = bs_idx.find("ul", {"class": "pager"})
 
                 current_page = pager.find("a", {"class": "current"})
                 if current_page:
                     next_page = current_page.parent.next_sibling.a["href"]
                     next_page = urljoin(self.url, next_page)
             except (AttributeError, KeyError):
                 # no pager, or no next page
                 next_page = None
 
     def get_origins_from_page(
         self, repositories: Repositories
     ) -> Iterator[ListedOrigin]:
         """Convert a page of cgit repositories into a list of ListedOrigins."""
         assert self.lister_obj.id is not None
 
         for repo in repositories:
             origin_url = repo["git_url"] or self._get_origin_from_repository_url(
                 repo["url"]
             )
             if origin_url is None:
                 continue
 
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 url=origin_url,
                 visit_type="git",
                 last_update=_parse_last_updated_date(repo),
             )
 
     def _get_origin_from_repository_url(self, repository_url: str) -> Optional[str]:
         """Extract the git url from the repository page"""
         try:
             bs = self._get_and_parse(repository_url)
         except HTTPError as e:
             logger.warning(
                 "Unexpected HTTP status code %s on %s",
                 e.response.status_code,
                 e.response.url,
             )
             return None
 
         # check if we are on the summary tab, if not, go to this tab
         tab = bs.find("table", {"class": "tabs"})
         if tab:
             summary_a = tab.find("a", string="summary")
             if summary_a:
                 summary_url = urljoin(repository_url, summary_a["href"]).strip("/")
 
                 if summary_url != repository_url:
                     logger.debug(
                         "%s : Active tab is not the summary, trying to load the summary page",
                         repository_url,
                     )
                     return self._get_origin_from_repository_url(summary_url)
             else:
                 logger.debug("No summary tab found on %s", repository_url)
 
         # origin urls are listed on the repository page
         # TODO check if forcing https is better or not ?
         # <link rel='vcs-git' href='git://...' title='...'/>
         # <link rel='vcs-git' href='http://...' title='...'/>
         # <link rel='vcs-git' href='https://...' title='...'/>
         urls = [x["href"] for x in bs.find_all("a", {"rel": "vcs-git"})]
 
         if not urls:
             logger.debug("No git urls found on %s", repository_url)
             return None
 
         # look for the http/https url, if any, and use it as origin_url
         for url in urls:
             if urlparse(url).scheme in ("http", "https"):
                 origin_url = url
                 break
         else:
             # otherwise, choose the first one
             origin_url = urls[0]
         return origin_url
 
 
 def _parse_last_updated_date(repository: Dict[str, Any]) -> Optional[datetime]:
     """Parse the last updated date"""
     date = repository.get("last_updated_date")
     if not date:
         return None
 
     parsed_date = None
     for date_format in ("%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S (%Z)"):
         try:
             parsed_date = datetime.strptime(date, date_format)
             # force UTC to avoid naive datetime
             if not parsed_date.tzinfo:
                 parsed_date = parsed_date.replace(tzinfo=timezone.utc)
             break
         except Exception:
             pass
 
     if not parsed_date:
         logger.warning(
             "Could not parse %s last_updated date: %s",
             repository["url"],
             date,
         )
 
     return parsed_date
diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/Readme.md b/swh/lister/cgit/tests/data/https_git.acdw.net/README
similarity index 100%
rename from swh/lister/cgit/tests/data/https_git.acdw.net/Readme.md
rename to swh/lister/cgit/tests/data/https_git.acdw.net/README
diff --git a/swh/lister/cgit/tests/test_lister.py b/swh/lister/cgit/tests/test_lister.py
index 9b5c0c3..c6ffcf2 100644
--- a/swh/lister/cgit/tests/test_lister.py
+++ b/swh/lister/cgit/tests/test_lister.py
@@ -1,280 +1,280 @@
-# Copyright (C) 2019-2021 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from datetime import datetime, timedelta, timezone
 import os
 from typing import List
 
 import pytest
 
 from swh.core.pytest_plugin import requests_mock_datadir_factory
 from swh.lister import __version__
 from swh.lister.cgit.lister import CGitLister, _parse_last_updated_date
 from swh.lister.pattern import ListerStats
 
 
 def test_lister_cgit_get_pages_one_page(requests_mock_datadir, swh_scheduler):
     url = "https://git.savannah.gnu.org/cgit/"
     lister_cgit = CGitLister(swh_scheduler, url=url)
 
     repos: List[List[str]] = list(lister_cgit.get_pages())
     flattened_repos = sum(repos, [])
     assert len(flattened_repos) == 977
 
     assert flattened_repos[0]["url"] == "https://git.savannah.gnu.org/cgit/elisp-es.git"
     # note the url below is NOT a subpath of /cgit/
     assert (
         flattened_repos[-1]["url"] == "https://git.savannah.gnu.org/path/to/yetris.git"
     )  # noqa
     # note the url below is NOT on the same server
     assert flattened_repos[-2]["url"] == "http://example.org/cgit/xstarcastle.git"
 
 
 def test_lister_cgit_get_pages_with_pages(requests_mock_datadir, swh_scheduler):
     url = "https://git.tizen/cgit/"
     lister_cgit = CGitLister(swh_scheduler, url=url)
 
     repos: List[List[str]] = list(lister_cgit.get_pages())
     flattened_repos = sum(repos, [])
     # we should have 16 repos (listed on 3 pages)
     assert len(repos) == 3
     assert len(flattened_repos) == 16
 
 
 def test_lister_cgit_run_with_page(requests_mock_datadir, swh_scheduler):
     """cgit lister supports pagination"""
 
     url = "https://git.tizen/cgit/"
     lister_cgit = CGitLister(swh_scheduler, url=url)
 
     stats = lister_cgit.run()
 
     expected_nb_origins = 16
     assert stats == ListerStats(pages=3, origins=expected_nb_origins)
 
     # test page parsing
     scheduler_origins = swh_scheduler.get_listed_origins(
         lister_cgit.lister_obj.id
     ).results
     assert len(scheduler_origins) == expected_nb_origins
 
     # test listed repositories
     for listed_origin in scheduler_origins:
         assert listed_origin.visit_type == "git"
         assert listed_origin.url.startswith("https://git.tizen")
 
     # test user agent content
     assert len(requests_mock_datadir.request_history) != 0
     for request in requests_mock_datadir.request_history:
         assert "User-Agent" in request.headers
         user_agent = request.headers["User-Agent"]
-        assert "Software Heritage Lister" in user_agent
+        assert "Software Heritage cgit lister" in user_agent
         assert __version__ in user_agent
 
 
 def test_lister_cgit_run_populates_last_update(requests_mock_datadir, swh_scheduler):
     """cgit lister returns last updated date"""
 
     url = "https://git.tizen/cgit"
 
     urls_without_date = [
         f"https://git.tizen.org/cgit/{suffix_url}"
         for suffix_url in [
             "All-Projects",
             "All-Users",
             "Lock-Projects",
         ]
     ]
 
     lister_cgit = CGitLister(swh_scheduler, url=url)
 
     stats = lister_cgit.run()
 
     expected_nb_origins = 16
     assert stats == ListerStats(pages=3, origins=expected_nb_origins)
 
     # test page parsing
     scheduler_origins = swh_scheduler.get_listed_origins(
         lister_cgit.lister_obj.id
     ).results
     assert len(scheduler_origins) == expected_nb_origins
 
     # test listed repositories
     for listed_origin in scheduler_origins:
         if listed_origin.url in urls_without_date:
             assert listed_origin.last_update is None
         else:
             assert listed_origin.last_update is not None
 
 
 @pytest.mark.parametrize(
     "date_str,expected_date",
     [
         ({}, None),
         ("unexpected date", None),
         ("2020-0140-10 10:10:10 (GMT)", None),
         (
             "2020-01-10 10:10:10 (GMT)",
             datetime(
                 year=2020,
                 month=1,
                 day=10,
                 hour=10,
                 minute=10,
                 second=10,
                 tzinfo=timezone.utc,
             ),
         ),
         (
             "2019-08-04 05:10:41 +0100",
             datetime(
                 year=2019,
                 month=8,
                 day=4,
                 hour=5,
                 minute=10,
                 second=41,
                 tzinfo=timezone(timedelta(hours=1)),
             ),
         ),
     ],
 )
 def test_lister_cgit_date_parsing(date_str, expected_date):
     """test cgit lister date parsing"""
 
     repository = {"url": "url", "last_updated_date": date_str}
 
     assert _parse_last_updated_date(repository) == expected_date
 
 
 requests_mock_datadir_missing_url = requests_mock_datadir_factory(
     ignore_urls=[
         "https://git.tizen/cgit/adaptation/ap_samsung/audio-hal-e4x12",
     ]
 )
 
 
 def test_lister_cgit_get_origin_from_repo_failing(
     requests_mock_datadir_missing_url, swh_scheduler
 ):
     url = "https://git.tizen/cgit/"
     lister_cgit = CGitLister(swh_scheduler, url=url)
 
     stats = lister_cgit.run()
 
     expected_nb_origins = 15
     assert stats == ListerStats(pages=3, origins=expected_nb_origins)
 
 
 @pytest.mark.parametrize(
     "credentials, expected_credentials",
     [
         (None, []),
         ({"key": "value"}, []),
         (
             {"cgit": {"tizen": [{"username": "user", "password": "pass"}]}},
             [{"username": "user", "password": "pass"}],
         ),
     ],
 )
 def test_lister_cgit_instantiation_with_credentials(
     credentials, expected_credentials, swh_scheduler
 ):
     url = "https://git.tizen/cgit/"
     lister = CGitLister(
         swh_scheduler, url=url, instance="tizen", credentials=credentials
     )
 
     # Credentials are allowed in constructor
     assert lister.credentials == expected_credentials
 
 
 def test_lister_cgit_from_configfile(swh_scheduler_config, mocker):
     load_from_envvar = mocker.patch("swh.lister.pattern.load_from_envvar")
     load_from_envvar.return_value = {
         "scheduler": {"cls": "local", **swh_scheduler_config},
         "url": "https://git.tizen/cgit/",
         "instance": "tizen",
         "credentials": {},
     }
     lister = CGitLister.from_configfile()
     assert lister.scheduler is not None
     assert lister.credentials is not None
 
 
 @pytest.mark.parametrize(
     "url,base_git_url,expected_nb_origins",
     [
         ("https://git.eclipse.org/c", "https://eclipse.org/r", 5),
         ("https://git.baserock.org/cgit/", "https://git.baserock.org/git/", 3),
         ("https://jff.email/cgit/", "git://jff.email/opt/git/", 6),
     ],
 )
 def test_lister_cgit_with_base_git_url(
     url, base_git_url, expected_nb_origins, requests_mock_datadir, swh_scheduler
 ):
     """With base git url provided, listed urls should be the computed origin urls"""
     lister_cgit = CGitLister(
         swh_scheduler,
         url=url,
         base_git_url=base_git_url,
     )
 
     stats = lister_cgit.run()
 
     assert stats == ListerStats(pages=1, origins=expected_nb_origins)
 
     # test page parsing
     scheduler_origins = swh_scheduler.get_listed_origins(
         lister_cgit.lister_obj.id
     ).results
     assert len(scheduler_origins) == expected_nb_origins
 
     # test listed repositories
     for listed_origin in scheduler_origins:
         assert listed_origin.visit_type == "git"
         assert listed_origin.url.startswith(base_git_url)
         assert (
             listed_origin.url.startswith(url) is False
         ), f"url should be mapped to {base_git_url}"
 
 
 def test_lister_cgit_get_pages_with_pages_and_retry(
     requests_mock_datadir, requests_mock, datadir, mocker, swh_scheduler
 ):
     url = "https://git.tizen/cgit/"
 
     with open(os.path.join(datadir, "https_git.tizen/cgit,ofs=50"), "rb") as page:
 
         requests_mock.get(
             f"{url}?ofs=50",
             [
                 {"content": None, "status_code": 429},
                 {"content": None, "status_code": 429},
                 {"content": page.read(), "status_code": 200},
             ],
         )
 
         lister_cgit = CGitLister(swh_scheduler, url=url)
 
-        mocker.patch.object(lister_cgit._get_and_parse.retry, "sleep")
+        mocker.patch.object(lister_cgit.http_request.retry, "sleep")
 
         repos: List[List[str]] = list(lister_cgit.get_pages())
         flattened_repos = sum(repos, [])
         # we should have 16 repos (listed on 3 pages)
         assert len(repos) == 3
         assert len(flattened_repos) == 16
 
 
 def test_lister_cgit_summary_not_default(requests_mock_datadir, swh_scheduler):
     """cgit lister returns git url when the default repository tab is not the summary"""
 
     url = "https://git.acdw.net/cgit"
 
     lister_cgit = CGitLister(swh_scheduler, url=url)
 
     stats = lister_cgit.run()
 
     expected_nb_origins = 1
     assert stats == ListerStats(pages=1, origins=expected_nb_origins)
diff --git a/swh/lister/conda/__init__.py b/swh/lister/conda/__init__.py
new file mode 100644
index 0000000..3cc6dd0
--- /dev/null
+++ b/swh/lister/conda/__init__.py
@@ -0,0 +1,124 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+"""
+Conda lister
+============
+
+Anaconda is a package manager that provides tooling for datascience.
+
+The Conda lister list `packages`_ from Anaconda `repositories`_.
+Those repositories host packages for several languages (Python, R) operating systems
+and architecture.
+Packages are grouped within free or commercial `channels`_.
+
+To instantiate a conda lister we need to give some `channel`and `arch` arguments::
+
+    lister = CondaLister(
+        scheduler=swh_scheduler, channel="free", archs=["linux-64", "osx-64", "win-64"]
+    )
+
+The default `url` value of lister is `https://repo.anaconda.com/pkgs`. One can set another
+repository url, for example::
+
+    lister = CondaLister(
+        scheduler=swh_scheduler,
+        url="https://conda.anaconda.org",
+        channel="conda-forge",
+        archs=["linux-64"],
+    )
+
+Origins retrieving strategy
+---------------------------
+
+Each channel provides several `repodata.json`_ files that list available packages
+and related versions.
+
+Given a channel and a list of system and architecture the lister download and parse
+corresponding repodata.json.
+
+We use bz2 compressed version of repodata.json. See for example `main/linux-64`_ page
+to view available repodata files.
+
+Page listing
+------------
+
+The lister returns one page per channel / architecture that list all available package
+versions.
+
+Origins from page
+-----------------
+
+Origins urls are built following this pattern `https://anaconda.org/{channel}/{pkgname}`.
+Each origin is yield with an `artifacts` entry in `extra_loader_arguments` that list
+artifact metadata for each archived package version.
+
+Origin data example for one origin with two related versions.::
+
+    {
+        "url": "https://anaconda.org/conda-forge/lifetimes",
+        "artifacts": {
+            "linux-64/0.11.1-py36h9f0ad1d_1": {
+                "url": "https://conda.anaconda.org/conda-forge/linux-64/lifetimes-0.11.1-py36h9f0ad1d_1.tar.bz2",  # noqa: B950
+                "date": "2020-07-06T12:19:36.425000+00:00",
+                "version": "0.11.1",
+                "filename": "lifetimes-0.11.1-py36h9f0ad1d_1.tar.bz2",
+                "checksums": {
+                    "md5": "faa398f7ba0d60ce44aa6eeded490cee",
+                    "sha256": "f82a352dfae8abceeeaa538b220fd9c5e4aa4e59092a6a6cea70b9ec0581ea03",  # noqa: B950
+                },
+            },
+            "linux-64/0.11.1-py36hc560c46_1": {
+                "url": "https://conda.anaconda.org/conda-forge/linux-64/lifetimes-0.11.1-py36hc560c46_1.tar.bz2",  # noqa: B950
+                "date": "2020-07-06T12:19:37.032000+00:00",
+                "version": "0.11.1",
+                "filename": "lifetimes-0.11.1-py36hc560c46_1.tar.bz2",
+                "checksums": {
+                    "md5": "c53a689a4c5948e84211bdfc23e3fe68",
+                    "sha256": "76146c2ebd6e3b65928bde53a2585287759d77beba785c0eeb889ee565c0035d",  # noqa: B950
+                },
+            },
+        },
+    }
+
+Running tests
+-------------
+
+Activate the virtualenv and run from within swh-lister directory::
+
+   pytest -s -vv --log-cli-level=DEBUG swh/lister/conda/tests
+
+Testing with Docker
+-------------------
+
+Change directory to swh/docker then launch the docker environment::
+
+   docker compose up -d
+
+Then schedule a conda listing task::
+
+   docker compose exec swh-scheduler swh scheduler task add -p oneshot list-conda channel="free" archs="[linux-64, osx-64, win-64]"  # noqa: B950
+
+You can follow lister execution by displaying logs of swh-lister service::
+
+   docker compose logs -f swh-lister
+
+.. _packages: https://docs.anaconda.com/anaconda/packages/pkg-docs/
+.. _Anaconda: https://anaconda.com/
+.. _repositories: https://repo.anaconda.com/pkgs/
+.. _channels: https://docs.anaconda.com/anaconda/user-guide/tasks/using-repositories/
+.. _main/linux-64: https://repo.anaconda.com/pkgs/main/linux-64/
+.. _repodata.json: https://repo.anaconda.com/pkgs/free/linux-64/repodata.json
+"""
+
+
+def register():
+    from .lister import CondaLister
+
+    return {
+        "lister": CondaLister,
+        "task_modules": ["%s.tasks" % __name__],
+    }
diff --git a/swh/lister/conda/lister.py b/swh/lister/conda/lister.py
new file mode 100644
index 0000000..eddc15d
--- /dev/null
+++ b/swh/lister/conda/lister.py
@@ -0,0 +1,123 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import bz2
+from collections import defaultdict
+import datetime
+import json
+import logging
+from typing import Any, Dict, Iterator, List, Optional, Tuple
+
+import iso8601
+
+from swh.scheduler.interface import SchedulerInterface
+from swh.scheduler.model import ListedOrigin
+
+from ..pattern import CredentialsType, StatelessLister
+
+logger = logging.getLogger(__name__)
+
+# Aliasing the page results returned by `get_pages` method from the lister.
+CondaListerPage = Tuple[str, Dict[str, Dict[str, Any]]]
+
+
+class CondaLister(StatelessLister[CondaListerPage]):
+    """List Conda (anaconda.com) origins."""
+
+    LISTER_NAME = "conda"
+    VISIT_TYPE = "conda"
+    INSTANCE = "conda"
+    BASE_REPO_URL = "https://repo.anaconda.com/pkgs"
+    REPO_URL_PATTERN = "{url}/{channel}/{arch}/repodata.json.bz2"
+    ORIGIN_URL_PATTERN = "https://anaconda.org/{channel}/{pkgname}"
+    ARCHIVE_URL_PATTERN = "{url}/{channel}/{arch}/{filename}"
+
+    def __init__(
+        self,
+        scheduler: SchedulerInterface,
+        credentials: Optional[CredentialsType] = None,
+        url: str = BASE_REPO_URL,
+        channel: str = "",
+        archs: List = [],
+    ):
+        super().__init__(
+            scheduler=scheduler,
+            credentials=credentials,
+            instance=self.INSTANCE,
+            url=url,
+        )
+        self.channel: str = channel
+        self.archs: List[str] = archs
+        self.packages: Dict[str, Any] = defaultdict(dict)
+        self.package_dates: Dict[str, Any] = defaultdict(list)
+
+    def get_pages(self) -> Iterator[CondaListerPage]:
+        """Yield an iterator which returns 'page'"""
+
+        for arch in self.archs:
+            repodata_url = self.REPO_URL_PATTERN.format(
+                url=self.url, channel=self.channel, arch=arch
+            )
+            response = self.http_request(url=repodata_url)
+            packages: Dict[str, Any] = json.loads(bz2.decompress(response.content))[
+                "packages"
+            ]
+            yield (arch, packages)
+
+    def get_origins_from_page(self, page: CondaListerPage) -> Iterator[ListedOrigin]:
+        """Iterate on all pages and yield ListedOrigin instances."""
+        assert self.lister_obj.id is not None
+        arch, packages = page
+
+        for filename, package_metadata in packages.items():
+            version_key = (
+                f"{arch}/{package_metadata['version']}-{package_metadata['build']}"
+            )
+
+            artifact: Dict[str, Any] = {
+                "filename": filename,
+                "url": self.ARCHIVE_URL_PATTERN.format(
+                    url=self.url,
+                    channel=self.channel,
+                    filename=filename,
+                    arch=arch,
+                ),
+                "version": version_key,
+                "checksums": {},
+            }
+
+            for checksum in ("md5", "sha256"):
+                if checksum in package_metadata:
+                    artifact["checksums"][checksum] = package_metadata[checksum]
+
+            self.packages[package_metadata["name"]][version_key] = artifact
+
+            package_date = None
+            if "timestamp" in package_metadata:
+                package_date = datetime.datetime.fromtimestamp(
+                    package_metadata["timestamp"] / 1e3, datetime.timezone.utc
+                )
+            elif "date" in package_metadata:
+                package_date = iso8601.parse_date(package_metadata["date"])
+
+            last_update = None
+            if package_date:
+                artifact["date"] = package_date.isoformat()
+                self.package_dates[package_metadata["name"]].append(package_date)
+                last_update = max(self.package_dates[package_metadata["name"]])
+
+            yield ListedOrigin(
+                lister_id=self.lister_obj.id,
+                visit_type=self.VISIT_TYPE,
+                url=self.ORIGIN_URL_PATTERN.format(
+                    channel=self.channel, pkgname=package_metadata["name"]
+                ),
+                last_update=last_update,
+                extra_loader_arguments={
+                    "artifacts": [
+                        v for k, v in self.packages[package_metadata["name"]].items()
+                    ],
+                },
+            )
diff --git a/swh/lister/conda/tasks.py b/swh/lister/conda/tasks.py
new file mode 100644
index 0000000..667a998
--- /dev/null
+++ b/swh/lister/conda/tasks.py
@@ -0,0 +1,19 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from celery import shared_task
+
+from swh.lister.conda.lister import CondaLister
+
+
+@shared_task(name=__name__ + ".CondaListerTask")
+def list_conda(**lister_args):
+    """Lister task for Anaconda registry"""
+    return CondaLister.from_configfile(**lister_args).run().dict()
+
+
+@shared_task(name=__name__ + ".ping")
+def _ping():
+    return "OK"
diff --git a/swh/lister/conda/tests/__init__.py b/swh/lister/conda/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/swh/lister/conda/tests/data/https_conda.anaconda.org/conda-forge_linux-64_repodata.json.bz2 b/swh/lister/conda/tests/data/https_conda.anaconda.org/conda-forge_linux-64_repodata.json.bz2
new file mode 100644
index 0000000..253d200
Binary files /dev/null and b/swh/lister/conda/tests/data/https_conda.anaconda.org/conda-forge_linux-64_repodata.json.bz2 differ
diff --git a/swh/lister/conda/tests/data/https_repo.anaconda.com/pkgs_free_linux-64_repodata.json.bz2 b/swh/lister/conda/tests/data/https_repo.anaconda.com/pkgs_free_linux-64_repodata.json.bz2
new file mode 100644
index 0000000..ecd16b0
Binary files /dev/null and b/swh/lister/conda/tests/data/https_repo.anaconda.com/pkgs_free_linux-64_repodata.json.bz2 differ
diff --git a/swh/lister/conda/tests/data/https_repo.anaconda.com/pkgs_free_osx-64_repodata.json.bz2 b/swh/lister/conda/tests/data/https_repo.anaconda.com/pkgs_free_osx-64_repodata.json.bz2
new file mode 100644
index 0000000..e096fce
Binary files /dev/null and b/swh/lister/conda/tests/data/https_repo.anaconda.com/pkgs_free_osx-64_repodata.json.bz2 differ
diff --git a/swh/lister/conda/tests/data/https_repo.anaconda.com/pkgs_free_win-64_repodata.json.bz2 b/swh/lister/conda/tests/data/https_repo.anaconda.com/pkgs_free_win-64_repodata.json.bz2
new file mode 100644
index 0000000..868512b
Binary files /dev/null and b/swh/lister/conda/tests/data/https_repo.anaconda.com/pkgs_free_win-64_repodata.json.bz2 differ
diff --git a/swh/lister/conda/tests/data/https_repo.anaconda.com/pkgs_main_linux-64_repodata.json.bz2 b/swh/lister/conda/tests/data/https_repo.anaconda.com/pkgs_main_linux-64_repodata.json.bz2
new file mode 100644
index 0000000..42cb71a
Binary files /dev/null and b/swh/lister/conda/tests/data/https_repo.anaconda.com/pkgs_main_linux-64_repodata.json.bz2 differ
diff --git a/swh/lister/conda/tests/data/https_repo.anaconda.com/pkgs_pro_linux-64_repodata.json.bz2 b/swh/lister/conda/tests/data/https_repo.anaconda.com/pkgs_pro_linux-64_repodata.json.bz2
new file mode 100644
index 0000000..94bc540
Binary files /dev/null and b/swh/lister/conda/tests/data/https_repo.anaconda.com/pkgs_pro_linux-64_repodata.json.bz2 differ
diff --git a/swh/lister/conda/tests/test_lister.py b/swh/lister/conda/tests/test_lister.py
new file mode 100644
index 0000000..dd01064
--- /dev/null
+++ b/swh/lister/conda/tests/test_lister.py
@@ -0,0 +1,94 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.lister.conda.lister import CondaLister
+
+
+def test_conda_lister_free_channel(datadir, requests_mock_datadir, swh_scheduler):
+    lister = CondaLister(
+        scheduler=swh_scheduler, channel="free", archs=["linux-64", "osx-64", "win-64"]
+    )
+    res = lister.run()
+
+    assert res.pages == 3
+    assert res.origins == 11
+
+
+def test_conda_lister_conda_forge_channel(
+    datadir, requests_mock_datadir, swh_scheduler
+):
+    lister = CondaLister(
+        scheduler=swh_scheduler,
+        url="https://conda.anaconda.org",
+        channel="conda-forge",
+        archs=["linux-64"],
+    )
+    res = lister.run()
+
+    assert res.pages == 1
+    assert res.origins == 2
+
+    scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
+    expected_origins = [
+        {
+            "url": "https://anaconda.org/conda-forge/21cmfast",
+            "artifacts": [
+                {
+                    "url": "https://conda.anaconda.org/conda-forge/linux-64/21cmfast-3.0.2-py36h1af98f8_1.tar.bz2",  # noqa: B950
+                    "date": "2020-11-11T16:04:49.658000+00:00",
+                    "version": "linux-64/3.0.2-py36h1af98f8_1",
+                    "filename": "21cmfast-3.0.2-py36h1af98f8_1.tar.bz2",
+                    "checksums": {
+                        "md5": "d65ab674acf3b7294ebacaec05fc5b54",
+                        "sha256": "1154fceeb5c4ee9bb97d245713ac21eb1910237c724d2b7103747215663273c2",  # noqa: B950
+                    },
+                }
+            ],
+        },
+        {
+            "url": "https://anaconda.org/conda-forge/lifetimes",
+            "artifacts": [
+                {
+                    "url": "https://conda.anaconda.org/conda-forge/linux-64/lifetimes-0.11.1-py36h9f0ad1d_1.tar.bz2",  # noqa: B950
+                    "date": "2020-07-06T12:19:36.425000+00:00",
+                    "version": "linux-64/0.11.1-py36h9f0ad1d_1",
+                    "filename": "lifetimes-0.11.1-py36h9f0ad1d_1.tar.bz2",
+                    "checksums": {
+                        "md5": "faa398f7ba0d60ce44aa6eeded490cee",
+                        "sha256": "f82a352dfae8abceeeaa538b220fd9c5e4aa4e59092a6a6cea70b9ec0581ea03",  # noqa: B950
+                    },
+                },
+                {
+                    "url": "https://conda.anaconda.org/conda-forge/linux-64/lifetimes-0.11.1-py36hc560c46_1.tar.bz2",  # noqa: B950
+                    "date": "2020-07-06T12:19:37.032000+00:00",
+                    "version": "linux-64/0.11.1-py36hc560c46_1",
+                    "filename": "lifetimes-0.11.1-py36hc560c46_1.tar.bz2",
+                    "checksums": {
+                        "md5": "c53a689a4c5948e84211bdfc23e3fe68",
+                        "sha256": "76146c2ebd6e3b65928bde53a2585287759d77beba785c0eeb889ee565c0035d",  # noqa: B950
+                    },
+                },
+            ],
+        },
+    ]
+
+    assert len(scheduler_origins) == len(expected_origins)
+
+    assert [
+        (
+            scheduled.visit_type,
+            scheduled.url,
+            scheduled.extra_loader_arguments["artifacts"],
+        )
+        for scheduled in sorted(scheduler_origins, key=lambda scheduled: scheduled.url)
+    ] == [
+        (
+            "conda",
+            expected["url"],
+            expected["artifacts"],
+        )
+        for expected in sorted(expected_origins, key=lambda expected: expected["url"])
+    ]
diff --git a/swh/lister/conda/tests/test_tasks.py b/swh/lister/conda/tests/test_tasks.py
new file mode 100644
index 0000000..b9c0f6e
--- /dev/null
+++ b/swh/lister/conda/tests/test_tasks.py
@@ -0,0 +1,31 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.lister.pattern import ListerStats
+
+
+def test_conda_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
+    res = swh_scheduler_celery_app.send_task("swh.lister.conda.tasks.ping")
+    assert res
+    res.wait()
+    assert res.successful()
+    assert res.result == "OK"
+
+
+def test_conda_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker):
+    # setup the mocked CondaLister
+    lister = mocker.patch("swh.lister.conda.tasks.CondaLister")
+    lister.from_configfile.return_value = lister
+    stats = ListerStats(pages=42, origins=42)
+    lister.run.return_value = stats
+
+    res = swh_scheduler_celery_app.send_task("swh.lister.conda.tasks.CondaListerTask")
+    assert res
+    res.wait()
+    assert res.successful()
+    assert res.result == stats.dict()
+
+    lister.from_configfile.assert_called_once_with()
+    lister.run.assert_called_once_with()
diff --git a/swh/lister/cpan/__init__.py b/swh/lister/cpan/__init__.py
new file mode 100644
index 0000000..2624112
--- /dev/null
+++ b/swh/lister/cpan/__init__.py
@@ -0,0 +1,73 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+"""
+Cpan lister
+=============
+
+The Cpan lister list origins from `cpan.org`_, the Comprehensive Perl Archive
+Network. It provides search features via `metacpan.org`_.
+
+As of September 2022 `cpan.org`_ list 43675 package names.
+
+Origins retrieving strategy
+---------------------------
+
+To get a list of all package names and their associated release artifacts we call
+a first `http api endpoint`_ that retrieve results and a ``_scroll_id`` that will
+be used to scroll pages through `search`_ endpoint.
+
+Page listing
+------------
+
+Each page returns a list of ``results`` which are raw data from api response.
+
+Origins from page
+-----------------
+
+Origin url is the html page corresponding to a package name on `metacpan.org`_, following
+this pattern::
+
+    "https://metacpan.org/dist/{pkgname}"
+
+Running tests
+-------------
+
+Activate the virtualenv and run from within swh-lister directory::
+
+   pytest -s -vv --log-cli-level=DEBUG swh/lister/cpan/tests
+
+Testing with Docker
+-------------------
+
+Change directory to swh/docker then launch the docker environment::
+
+   docker compose up -d
+
+Then schedule a Cpan listing task::
+
+   docker compose exec swh-scheduler swh scheduler task add -p oneshot list-cpan
+
+You can follow lister execution by displaying logs of swh-lister service::
+
+   docker compose logs -f swh-lister
+
+.. _cpan.org: https://cpan.org/
+.. _metacpan.org: https://metacpan.org/
+.. _http api endpoint: https://explorer.metacpan.org/?url=/release/
+.. _search: https://github.com/metacpan/metacpan-api/blob/master/docs/API-docs.md#search-without-constraints  # noqa: B950
+
+
+"""
+
+
+def register():
+    from .lister import CpanLister
+
+    return {
+        "lister": CpanLister,
+        "task_modules": ["%s.tasks" % __name__],
+    }
diff --git a/swh/lister/cpan/lister.py b/swh/lister/cpan/lister.py
new file mode 100644
index 0000000..32f7479
--- /dev/null
+++ b/swh/lister/cpan/lister.py
@@ -0,0 +1,199 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from collections import defaultdict
+from datetime import datetime
+import logging
+from typing import Any, Dict, Iterator, List, Optional, Set, Union
+
+import iso8601
+
+from swh.scheduler.interface import SchedulerInterface
+from swh.scheduler.model import ListedOrigin
+
+from ..pattern import CredentialsType, StatelessLister
+
+logger = logging.getLogger(__name__)
+
+# Aliasing the page results returned by `get_pages` method from the lister.
+CpanListerPage = Set[str]
+
+
+def get_field_value(entry, field_name):
+    """
+    Splits ``field_name`` on ``.``, and use it as path in the nested ``entry``
+    dictionary. If a value does not exist, returns None.
+
+    >>> entry = {"_source": {"foo": 1, "bar": {"baz": 2, "qux": [3]}}}
+    >>> get_field_value(entry, "foo")
+    1
+    >>> get_field_value(entry, "bar")
+    {'baz': 2, 'qux': [3]}
+    >>> get_field_value(entry, "bar.baz")
+    2
+    >>> get_field_value(entry, "bar.qux")
+    3
+    """
+    fields = field_name.split(".")
+    field_value = entry["_source"]
+    for field in fields[:-1]:
+        field_value = field_value.get(field, {})
+    field_value = field_value.get(fields[-1])
+    # scrolled results might have field value in a list
+    if isinstance(field_value, list):
+        field_value = field_value[0]
+    return field_value
+
+
+def get_module_version(
+    module_name: str, module_version: Union[str, float, int], release_name: str
+) -> str:
+    # some old versions fail to be parsed and cpan api set version to 0
+    if module_version == 0:
+        prefix = f"{module_name}-"
+        if release_name.startswith(prefix):
+            # extract version from release name
+            module_version = release_name.replace(prefix, "", 1)
+    return str(module_version)
+
+
+class CpanLister(StatelessLister[CpanListerPage]):
+    """The Cpan lister list origins from 'Cpan', the Comprehensive Perl Archive
+    Network."""
+
+    LISTER_NAME = "cpan"
+    VISIT_TYPE = "cpan"
+    INSTANCE = "cpan"
+
+    API_BASE_URL = "https://fastapi.metacpan.org/v1"
+    REQUIRED_DOC_FIELDS = [
+        "download_url",
+        "checksum_sha256",
+        "distribution",
+        "version",
+    ]
+    OPTIONAL_DOC_FIELDS = ["date", "author", "stat.size", "name", "metadata.author"]
+    ORIGIN_URL_PATTERN = "https://metacpan.org/dist/{module_name}"
+
+    def __init__(
+        self,
+        scheduler: SchedulerInterface,
+        credentials: Optional[CredentialsType] = None,
+    ):
+        super().__init__(
+            scheduler=scheduler,
+            credentials=credentials,
+            instance=self.INSTANCE,
+            url=self.API_BASE_URL,
+        )
+
+        self.artifacts: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
+        self.module_metadata: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
+        self.release_dates: Dict[str, List[datetime]] = defaultdict(list)
+        self.module_names: Set[str] = set()
+
+    def process_release_page(self, page: List[Dict[str, Any]]):
+        for entry in page:
+
+            if "_source" not in entry or not all(
+                k in entry["_source"].keys() for k in self.REQUIRED_DOC_FIELDS
+            ):
+                logger.warning(
+                    "Skipping release entry %s as some required fields are missing",
+                    entry.get("_source"),
+                )
+                continue
+
+            module_name = get_field_value(entry, "distribution")
+            module_version = get_field_value(entry, "version")
+            module_download_url = get_field_value(entry, "download_url")
+            module_sha256_checksum = get_field_value(entry, "checksum_sha256")
+            module_date = get_field_value(entry, "date")
+            module_size = get_field_value(entry, "stat.size")
+            module_author = get_field_value(entry, "author")
+            module_author_fullname = get_field_value(entry, "metadata.author")
+            release_name = get_field_value(entry, "name")
+
+            module_version = get_module_version(
+                module_name, module_version, release_name
+            )
+
+            self.artifacts[module_name].append(
+                {
+                    "url": module_download_url,
+                    "filename": module_download_url.split("/")[-1],
+                    "checksums": {"sha256": module_sha256_checksum},
+                    "version": module_version,
+                    "length": module_size,
+                }
+            )
+
+            self.module_metadata[module_name].append(
+                {
+                    "name": module_name,
+                    "version": module_version,
+                    "cpan_author": module_author,
+                    "author": (
+                        module_author_fullname
+                        if module_author_fullname not in (None, "", "unknown")
+                        else module_author
+                    ),
+                    "date": module_date,
+                    "release_name": release_name,
+                }
+            )
+
+            self.release_dates[module_name].append(iso8601.parse_date(module_date))
+
+            self.module_names.add(module_name)
+
+    def get_pages(self) -> Iterator[CpanListerPage]:
+        """Yield an iterator which returns 'page'"""
+
+        endpoint = f"{self.API_BASE_URL}/release/_search"
+        scrollendpoint = f"{self.API_BASE_URL}/_search/scroll"
+        size = 1000
+
+        res = self.http_request(
+            endpoint,
+            params={
+                "_source": self.REQUIRED_DOC_FIELDS + self.OPTIONAL_DOC_FIELDS,
+                "size": size,
+                "scroll": "1m",
+            },
+        )
+        data = res.json()["hits"]["hits"]
+        self.process_release_page(data)
+
+        _scroll_id = res.json()["_scroll_id"]
+
+        while data:
+            scroll_res = self.http_request(
+                scrollendpoint, params={"scroll": "1m", "scroll_id": _scroll_id}
+            )
+            data = scroll_res.json()["hits"]["hits"]
+            _scroll_id = scroll_res.json()["_scroll_id"]
+            self.process_release_page(data)
+
+        yield self.module_names
+
+    def get_origins_from_page(
+        self, module_names: CpanListerPage
+    ) -> Iterator[ListedOrigin]:
+        """Iterate on all pages and yield ListedOrigin instances."""
+        assert self.lister_obj.id is not None
+
+        for module_name in module_names:
+            yield ListedOrigin(
+                lister_id=self.lister_obj.id,
+                visit_type=self.VISIT_TYPE,
+                url=self.ORIGIN_URL_PATTERN.format(module_name=module_name),
+                last_update=max(self.release_dates[module_name]),
+                extra_loader_arguments={
+                    "api_base_url": self.API_BASE_URL,
+                    "artifacts": self.artifacts[module_name],
+                    "module_metadata": self.module_metadata[module_name],
+                },
+            )
diff --git a/swh/lister/cpan/tasks.py b/swh/lister/cpan/tasks.py
new file mode 100644
index 0000000..80259cc
--- /dev/null
+++ b/swh/lister/cpan/tasks.py
@@ -0,0 +1,19 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from celery import shared_task
+
+from swh.lister.cpan.lister import CpanLister
+
+
+@shared_task(name=__name__ + ".CpanListerTask")
+def list_cpan(**lister_args):
+    """Lister task for Cpan"""
+    return CpanLister.from_configfile(**lister_args).run().dict()
+
+
+@shared_task(name=__name__ + ".ping")
+def _ping():
+    return "OK"
diff --git a/swh/lister/cpan/tests/__init__.py b/swh/lister/cpan/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page1 b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page1
new file mode 100644
index 0000000..2eac909
--- /dev/null
+++ b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page1
@@ -0,0 +1,247 @@
+{
+   "_shards": {
+      "failed": 0,
+      "total": 3,
+      "successful": 3
+   },
+   "_scroll_id": "cXVlcnlUaGVuRmV0Y2g7Mzs5OTQ2NzY3ODU6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTQ2NzY3ODQ6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTUyMzQzMTA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw==",
+   "terminated_early": true,
+   "took": 3,
+   "hits": {
+      "max_score": 1.0,
+      "hits": [
+         {
+            "_score": 1.0,
+            "_source": {
+               "author": "KIMOTO",
+               "name": "Validator-Custom-0.1210",
+               "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/Validator-Custom-0.1210.tar.gz",
+               "version": "0.1210",
+               "distribution": "Validator-Custom",
+               "date": "2010-08-14T01:41:56",
+               "stat": {
+                  "size": 17608
+               },
+               "checksum_sha256": "f7240f7793ced2952701f0ed28ecf43c07cc2fa4549cc505831eceb8424cba4a",
+               "metadata": {
+                  "author": [
+                     "Yuki Kimoto <kimoto.yuki@gmail.com>"
+                  ]
+               }
+            },
+            "_type": "release",
+            "_index": "cpan_v1_01",
+            "_id": "VGApYqMT4TCxUzHcITn8ZhGHlxE"
+         },
+         {
+            "_type": "release",
+            "_id": "ilQN4bpIIdRl6DoiB3y47fgNIk8",
+            "_index": "cpan_v1_01",
+            "_score": 1.0,
+            "_source": {
+               "author": "KIMOTO",
+               "name": "Validator-Custom-0.1208",
+               "date": "2010-07-28T23:00:52",
+               "distribution": "Validator-Custom",
+               "version": "0.1208",
+               "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/Validator-Custom-0.1208.tar.gz",
+               "metadata": {
+                  "author": [
+                     "Yuki Kimoto <kimoto.yuki@gmail.com>"
+                  ]
+               },
+               "checksum_sha256": "e33a860b026cad852eb919da4a3645007b47e5f414eb7272534b10cee279b52b",
+               "stat": {
+                  "size": 17489
+               }
+            }
+         },
+         {
+            "_source": {
+               "author": "KIMOTO",
+               "name": "DBIx-Custom-0.1619",
+               "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/DBIx-Custom-0.1619.tar.gz",
+               "version": "0.1619",
+               "distribution": "DBIx-Custom",
+               "date": "2010-10-20T15:01:35",
+               "stat": {
+                  "size": 27195
+               },
+               "checksum_sha256": "83c295343f48ebc03029139082345c93527ffe5831820f99e4a72ee67ef186a5",
+               "metadata": {
+                  "author": [
+                     "unknown"
+                  ]
+               }
+            },
+            "_score": 1.0,
+            "_id": "g7562_4h9d693lxvc_cgEOTJAZk",
+            "_index": "cpan_v1_01",
+            "_type": "release"
+         },
+         {
+            "_score": 1.0,
+            "_source": {
+               "author": "KIMOTO",
+               "name": "DBIx-Custom-0.1401",
+               "version": "0.1401",
+               "distribution": "DBIx-Custom",
+               "date": "2010-05-01T23:29:22",
+               "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/DBIx-Custom-0.1401.tar.gz",
+               "checksum_sha256": "004be1d48b6819941b3cb3c53bf457799d811348e0bb15e7cf18211505637aba",
+               "metadata": {
+                  "author": [
+                     "Yuki Kimoto <kimoto.yuki@gmail.com>"
+                  ]
+               },
+               "stat": {
+                  "size": 22711
+               }
+            },
+            "_type": "release",
+            "_id": "bLRsOH2sevNQ6Q93exgkvCZONo0",
+            "_index": "cpan_v1_01"
+         },
+         {
+            "_type": "release",
+            "_index": "cpan_v1_01",
+            "_id": "D8L3qWKznn0IQZrZEeDi9uyXbJY",
+            "_score": 1.0,
+            "_source": {
+               "author": "KIMOTO",
+               "name": "DBIx-Custom-0.1301",
+               "stat": {
+                  "size": 22655
+               },
+               "metadata": {
+                  "author": [
+                     "Yuki Kimoto <kimoto.yuki@gmail.com>"
+                  ]
+               },
+               "checksum_sha256": "6b39e3ad2bc98f06af3a75c96cd8c056a25f7501ed216a375472c8fe7bbb72be",
+               "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/DBIx-Custom-0.1301.tar.gz",
+               "distribution": "DBIx-Custom",
+               "version": "0.1301",
+               "date": "2010-05-01T13:02:19"
+            }
+         },
+         {
+            "_score": 1.0,
+            "_source": {
+               "author": "KIMOTO",
+               "name": "DBIx-Custom-0.1602",
+               "metadata": {
+                  "author": [
+                     "Yuki Kimoto <kimoto.yuki@gmail.com>"
+                  ]
+               },
+               "checksum_sha256": "7a7e18514e171a6c55ef4c8aef92bd548b15ffd7dec4c1fdc83c276a032f6b8a",
+               "stat": {
+                  "size": 18999
+               },
+               "date": "2010-06-25T12:11:33",
+               "distribution": "DBIx-Custom",
+               "version": "0.1602",
+               "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/DBIx-Custom-0.1602.tar.gz"
+            },
+            "_type": "release",
+            "_index": "cpan_v1_01",
+            "_id": "kmzgsMLGdsuiHjrSW55lLwMRO4o"
+         },
+         {
+            "_source": {
+               "author": "KIMOTO",
+               "name": "Validator-Custom-0.1204",
+               "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/Validator-Custom-0.1204.tar.gz",
+               "version": "0.1204",
+               "distribution": "Validator-Custom",
+               "date": "2010-07-08T13:14:23",
+               "stat": {
+                  "size": 13256
+               },
+               "checksum_sha256": "40800b3d92cebc09967b61725cecdd05de2b04649f095e3034c5dd82f3d4ad89",
+               "metadata": {
+                  "author": [
+                     "Yuki Kimoto <kimoto.yuki@gmail.com>"
+                  ]
+               }
+            },
+            "_score": 1.0,
+            "_index": "cpan_v1_01",
+            "_id": "M_lLALu56mb_cDK_jAXwUB2PUlw",
+            "_type": "release"
+         },
+         {
+            "_id": "EVuvfiFcvtEr9Ne5Q4QoMAaxe7E",
+            "_index": "cpan_v1_01",
+            "_type": "release",
+            "_source": {
+               "author": "KIMOTO",
+               "name": "Validator-Custom-0.1203",
+               "stat": {
+                  "size": 12572
+               },
+               "metadata": {
+                  "author": [
+                     "Yuki Kimoto <kimoto.yuki@gmail.com>"
+                  ]
+               },
+               "checksum_sha256": "028a0b41c152c585143167464bed2ac6b6680c8006aa80867f9a8faa4ca5efe7",
+               "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/Validator-Custom-0.1203.tar.gz",
+               "date": "2010-07-07T13:29:41",
+               "distribution": "Validator-Custom",
+               "version": "0.1203"
+            },
+            "_score": 1.0
+         },
+         {
+            "_index": "cpan_v1_01",
+            "_id": "ZaT8bwXejVTHmrzZCqNJPRFImBY",
+            "_type": "release",
+            "_source": {
+               "author": "KIMOTO",
+               "name": "DBIx-Custom-0.1641",
+               "metadata": {
+                  "author": [
+                     "Yuki Kimoto <kimoto.yuki@gmail.com>"
+                  ]
+               },
+               "checksum_sha256": "940412af9b7faf4c946a5e4d57ca52e5b704e49c4d7d0aa5ecb6d2286477ebc6",
+               "stat": {
+                  "size": 40480
+               },
+               "distribution": "DBIx-Custom",
+               "version": "0.1641",
+               "date": "2011-01-27T05:19:14",
+               "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/DBIx-Custom-0.1641.tar.gz"
+            },
+            "_score": 1.0
+         },
+         {
+            "_source": {
+               "author": "KIMOTO",
+               "name": "DBIx-Custom-0.1646",
+               "version": "0.1646",
+               "distribution": "DBIx-Custom",
+               "date": "2011-02-18T17:48:52",
+               "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/DBIx-Custom-0.1646.tar.gz",
+               "metadata": {
+                  "author": [
+                     "Yuki Kimoto <kimoto.yuki@gmail.com>"
+                  ]
+               },
+               "checksum_sha256": "7f729311e3e22d36b158e62b42ab2fbd29f08eabd57206e235db939d1ae57d24",
+               "stat": {
+                  "size": 46577
+               }
+            },
+            "_score": 1.0,
+            "_index": "cpan_v1_01",
+            "_id": "j21QIzHRYZKz1vobyGAPa2BuO50",
+            "_type": "release"
+         }
+      ],
+      "total": 359941
+   },
+   "timed_out": false
+}
\ No newline at end of file
diff --git a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page2 b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page2
new file mode 100644
index 0000000..c98d4b1
--- /dev/null
+++ b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page2
@@ -0,0 +1,39 @@
+{
+  "_shards": {
+    "successful": 3,
+    "failed": 0,
+    "total": 3
+  },
+  "hits": {
+    "max_score": 16.105877,
+    "hits": [
+      {
+        "_id": "FM3U2W_LR4pgKJepBaDKUb4WEy0",
+        "_index": "cpan_v1_01",
+        "_type": "release",
+        "_source": {
+          "distribution": "UDPServersAndClients",
+          "date": "2006-04-20T00:03:25",
+          "checksum_sha256": "763da87c32e65cc7ff72d70a503b4e9497f6b506c174b82c97671af8667c1922",
+          "stat": {
+            "size": 5576
+          },
+          "author": "ROBINBANK",
+          "version": 0,
+          "download_url": "https://cpan.metacpan.org/authors/id/R/RO/ROBINBANK/UDPServersAndClients.zip",
+          "metadata": {
+            "author": [
+              "unknown"
+            ]
+          },
+          "name": "UDPServersAndClients"
+        },
+        "_score": 16.105877
+      }
+    ],
+    "total": 1
+  },
+  "took": 2,
+  "timed_out": false,
+  "_scroll_id": "cXVlcnlUaGVuRmV0Y2g7Mzs5OTQ2NzY3ODU6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTQ2NzY3ODQ6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTUyMzQzMTA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw=="
+}
\ No newline at end of file
diff --git a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page3 b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page3
new file mode 100644
index 0000000..59011f9
--- /dev/null
+++ b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page3
@@ -0,0 +1,85 @@
+{
+  "took": 3,
+  "_shards": {
+    "successful": 3,
+    "failed": 0,
+    "total": 3
+  },
+  "timed_out": false,
+  "hits": {
+    "max_score": 13.962857,
+    "hits": [
+      {
+        "_score": 13.962857,
+        "_type": "release",
+        "_source": {
+          "version": 0,
+          "checksum_sha256": "a19fa7e735ea3406dfeb9c72f35fb2b64fda1e8035ce6ba0fabc15ce1c1e2f41",
+          "metadata": {
+            "author": [
+              "unknown"
+            ]
+          },
+          "author": "MICB",
+          "name": "Compiler-a3",
+          "download_url": "https://cpan.metacpan.org/authors/id/M/MI/MICB/Compiler-a3.tar.gz",
+          "date": "1996-09-02T14:04:00",
+          "stat": {
+            "size": 89134
+          },
+          "distribution": "Compiler"
+        },
+        "_id": "aBI9p6X_yq6r9e8pk7U17pbZMPM",
+        "_index": "cpan_v1_01"
+      },
+      {
+        "_score": 13.707853,
+        "_source": {
+          "checksum_sha256": "def01b544d23c76ec19cc2288a3295b39abcdbdea6dbded5b7fe6d17cd4525de",
+          "version": 0,
+          "name": "Compiler-a2",
+          "author": "MICB",
+          "metadata": {
+            "author": [
+              "unknown"
+            ]
+          },
+          "date": "1996-08-22T14:30:00",
+          "download_url": "https://cpan.metacpan.org/authors/id/M/MI/MICB/Compiler-a2.tar.gz",
+          "distribution": "Compiler",
+          "stat": {
+            "size": 85123
+          }
+        },
+        "_type": "release",
+        "_id": "fG9UelWPReQei13FQ4EAHytuZCo",
+        "_index": "cpan_v1_01"
+      },
+      {
+        "_source": {
+          "checksum_sha256": "b1f7afd4fa8825adf2c17a0cbd8706484e6d2da5294786a5e6e49c205708ee41",
+          "version": 0,
+          "name": "Compiler-a1",
+          "metadata": {
+            "author": [
+              "unknown"
+            ]
+          },
+          "author": "MICB",
+          "date": "1996-05-13T11:39:00",
+          "download_url": "https://cpan.metacpan.org/authors/id/M/MI/MICB/Compiler-a1.tar.gz",
+          "stat": {
+            "size": 61093
+          },
+          "distribution": "Compiler"
+        },
+        "_type": "release",
+        "_id": "8H7BRLllDoyILyqsjjV8sqkBpQY",
+        "_index": "cpan_v1_01",
+        "_score": 13.572314
+      }
+    ],
+    "total": 3
+  },
+  "_scroll_id": "cXVlcnlUaGVuRmV0Y2g7Mzs5OTQ2NzY3ODU6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTQ2NzY3ODQ6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTUyMzQzMTA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw=="
+}
\ No newline at end of file
diff --git a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page4 b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page4
new file mode 100644
index 0000000..5d6b861
--- /dev/null
+++ b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page4
@@ -0,0 +1,131 @@
+{
+  "timed_out": false,
+  "_shards": {
+    "failed": 0,
+    "total": 3,
+    "successful": 3
+  },
+  "took": 14,
+  "hits": {
+    "total": 5,
+    "hits": [
+      {
+        "_score": 14.460719,
+        "_type": "release",
+        "_source": {
+          "stat": {
+            "size": 10738
+          },
+          "download_url": "https://cpan.metacpan.org/authors/id/F/FE/FELIPE/Call-Context-0.03-TRIAL1.tar.gz",
+          "distribution": "Call-Context",
+          "author": "FELIPE",
+          "version": "0.03-TRIAL1",
+          "checksum_sha256": "82aa854d6ae68342b58361b089c7f480b5b75e94f0c85c1d311f8cace1bfadea",
+          "metadata": {
+            "author": [
+              "Felipe Gasper (FELIPE)"
+            ]
+          },
+          "name": "Call-Context-0.03-TRIAL1",
+          "date": "2018-10-25T03:47:31"
+        },
+        "_index": "cpan_v1_01",
+        "_id": "Cjw1voci7z74uflSPriBTT_A_5c"
+      },
+      {
+        "_id": "VdVDByg5PHxbDh9HnvKAzf8QOws",
+        "_index": "cpan_v1_01",
+        "_source": {
+          "download_url": "https://cpan.metacpan.org/authors/id/F/FE/FELIPE/Call-Context-0.01.tar.gz",
+          "stat": {
+            "size": 10019
+          },
+          "author": "FELIPE",
+          "distribution": "Call-Context",
+          "version": 0.01,
+          "date": "2016-11-12T23:12:54",
+          "checksum_sha256": "21bf762ef5b3cbf1047192c2a3c499e9bd315b11e5530bd133856cdf87187b24",
+          "name": "Call-Context-0.01",
+          "metadata": {
+            "author": [
+              "Felipe Gasper (FELIPE)"
+            ]
+          }
+        },
+        "_type": "release",
+        "_score": 14.460719
+      },
+      {
+        "_score": 14.314282,
+        "_id": "_MA6FD8SOhOmTG8JUhvl3CN186I",
+        "_type": "release",
+        "_source": {
+          "stat": {
+            "size": 10046
+          },
+          "download_url": "https://cpan.metacpan.org/authors/id/F/FE/FELIPE/Call-Context-0.02.tar.gz",
+          "distribution": "Call-Context",
+          "author": "FELIPE",
+          "version": 0.02,
+          "metadata": {
+            "author": [
+              "Felipe Gasper (FELIPE)"
+            ]
+          },
+          "checksum_sha256": "b80d977f1df0e08bda2808124cd7218ad83f802e1a54aa258e17748ff5c02a0a",
+          "name": "Call-Context-0.02",
+          "date": "2016-11-13T01:07:43"
+        },
+        "_index": "cpan_v1_01"
+      },
+      {
+        "_id": "veMmCu9wirwpTX7czbuQq6SnKQQ",
+        "_type": "release",
+        "_source": {
+          "stat": {
+            "size": 10741
+          },
+          "download_url": "https://cpan.metacpan.org/authors/id/F/FE/FELIPE/Call-Context-0.03-TRIAL2.tar.gz",
+          "distribution": "Call-Context",
+          "author": "FELIPE",
+          "version": "0.03-TRIAL2",
+          "name": "Call-Context-0.03-TRIAL2",
+          "metadata": {
+            "author": [
+              "Felipe Gasper (FELIPE)"
+            ]
+          },
+          "checksum_sha256": "4ca799d81fc96a774f4f315c38eb3e53616322c332d47f1e3f756814b5bf4b5e",
+          "date": "2018-10-26T13:56:41"
+        },
+        "_index": "cpan_v1_01",
+        "_score": 14.291793
+      },
+      {
+        "_type": "release",
+        "_source": {
+          "version": "0.03",
+          "date": "2018-10-27T00:20:13",
+          "checksum_sha256": "0ee6bf46bc72755adb7a6b08e79d12e207de5f7809707b3c353b58cb2f0b5a26",
+          "metadata": {
+            "author": [
+              "Felipe Gasper (FELIPE)"
+            ]
+          },
+          "name": "Call-Context-0.03",
+          "download_url": "https://cpan.metacpan.org/authors/id/F/FE/FELIPE/Call-Context-0.03.tar.gz",
+          "stat": {
+            "size": 10730
+          },
+          "author": "FELIPE",
+          "distribution": "Call-Context"
+        },
+        "_index": "cpan_v1_01",
+        "_id": "CAAVfGh_7XpKnzpnLVaBKg8IPMM",
+        "_score": 14.291793
+      }
+    ],
+    "max_score": 14.460719
+  },
+  "_scroll_id": "cXVlcnlUaGVuRmV0Y2g7Mzs5OTQ2NzY3ODU6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTQ2NzY3ODQ6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTUyMzQzMTA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw=="
+}
\ No newline at end of file
diff --git a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1_release__search b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1_release__search
new file mode 100644
index 0000000..cb3dabf
--- /dev/null
+++ b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1_release__search
@@ -0,0 +1,246 @@
+{
+   "timed_out": false,
+   "_shards": {
+      "total": 3,
+      "failed": 0,
+      "successful": 3
+   },
+   "hits": {
+      "hits": [
+         {
+            "_index": "cpan_v1_01",
+            "_id": "40MmOvf_SQx_mr8Kj9Eush14a3E",
+            "_source": {
+               "author": "KRYDE",
+               "name": "math-image-46",
+               "date": "2011-03-02T00:46:14",
+               "download_url": "https://cpan.metacpan.org/authors/id/K/KR/KRYDE/math-image-46.tar.gz",
+               "checksum_sha256": "6bd988e3959feb1071d3b9953d16e723af66bdb7b5440ea17add8709d95f20fa",
+               "version": "46",
+               "stat": {
+                  "size": 533502
+               },
+               "distribution": "math-image",
+               "metadata": {
+                  "author": [
+                     "Kevin Ryde <user42@zip.com.au>"
+                  ]
+               }
+            },
+            "_type": "release",
+            "_score": 1.0
+         },
+         {
+            "_index": "cpan_v1_01",
+            "_source": {
+               "author": "MITHALDU",
+               "name": "Dist-Zilla-Plugin-ProgCriticTests-1.101580-TRIAL",
+               "distribution": "Dist-Zilla-Plugin-ProgCriticTests",
+               "metadata": {
+                  "author": [
+                     "Christian Walde <mithaldu@yahoo.de>"
+                  ]
+               },
+               "stat": {
+                  "size": 16918
+               },
+               "checksum_sha256": "ef8c92d0fc55551392a6daeee20a1c13a3ee1bcd0fcacf611cbc2a6cc503f401",
+               "download_url": "https://cpan.metacpan.org/authors/id/M/MI/MITHALDU/Dist-Zilla-Plugin-ProgCriticTests-1.101580-TRIAL.tar.gz",
+               "date": "2010-06-07T14:43:36",
+               "version": "1.101580"
+            },
+            "_id": "6df77_MLO_BG8YC_vQKsay7OFYM",
+            "_type": "release",
+            "_score": 1.0
+         },
+         {
+            "_index": "cpan_v1_01",
+            "_type": "release",
+            "_source": {
+               "author": "MITHUN",
+               "name": "Net-Rapidshare-v0.04",
+               "download_url": "https://cpan.metacpan.org/authors/id/M/MI/MITHUN/Net-Rapidshare-v0.04.tar.gz",
+               "date": "2009-07-28T05:57:26",
+               "checksum_sha256": "f01456a8f8c2b6806a8dd041cf848f330884573d363b28c8b3ff12e837fa8f4f",
+               "version": "v0.04",
+               "distribution": "Net-Rapidshare",
+               "metadata": {
+                  "author": [
+                     "unknown"
+                  ]
+               },
+               "stat": {
+                  "size": 15068
+               }
+            },
+            "_id": "jCs3ZLWuoetrkMLOFKV3YTSr_fM",
+            "_score": 1.0
+         },
+         {
+            "_index": "cpan_v1_01",
+            "_source": {
+               "author": "MITHUN",
+               "name": "Net-Rapidshare-v0.05",
+               "version": "v0.05",
+               "download_url": "https://cpan.metacpan.org/authors/id/M/MI/MITHUN/Net-Rapidshare-v0.05.tgz",
+               "date": "2009-12-21T00:29:48",
+               "checksum_sha256": "e1128d3b35616530d9722d0fe3f5f0e343fd914bc8f9c0df55c1a9ad6c7402fe",
+               "metadata": {
+                  "author": [
+                     "unknown"
+                  ]
+               },
+               "distribution": "Net-Rapidshare",
+               "stat": {
+                  "size": 15971
+               }
+            },
+            "_id": "pExMIwabhz_0S1rX7xAY_lq0GTY",
+            "_type": "release",
+            "_score": 1.0
+         },
+         {
+            "_type": "release",
+            "_source": {
+               "author": "MITHUN",
+               "name": "Net-Rapidshare-v0.0.1",
+               "version": "v0.0.1",
+               "download_url": "https://cpan.metacpan.org/authors/id/M/MI/MITHUN/Net-Rapidshare-v0.0.1.tar.gz",
+               "checksum_sha256": "990de0a72753fa182e7a5867e55fd6755375b71280bb7e5b3a5f07c4de8af905",
+               "date": "2009-07-18T22:56:38",
+               "stat": {
+                  "size": 15161
+               },
+               "metadata": {
+                  "author": [
+                     "unknown"
+                  ]
+               },
+               "distribution": "Net-Rapidshare"
+            },
+            "_id": "eqkhDnj0efXHisWRrMZZ1EHFgug",
+            "_index": "cpan_v1_01",
+            "_score": 1.0
+         },
+         {
+            "_score": 1.0,
+            "_index": "cpan_v1_01",
+            "_type": "release",
+            "_source": {
+               "author": "KIMOTO",
+               "name": "DBIx-Custom-Basic-0.0101",
+               "stat": {
+                  "size": 3409
+               },
+               "distribution": "DBIx-Custom-Basic",
+               "metadata": {
+                  "author": [
+                     "Yuki Kimoto <kimoto.yuki@gmail.com>"
+                  ]
+               },
+               "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/DBIx-Custom-Basic-0.0101.tar.gz",
+               "date": "2009-11-08T04:18:30",
+               "checksum_sha256": "86f68b2d0789934aa6b0202345e9807c5b650f8030b55d0d669ef25293fa3f1f",
+               "version": "0.0101"
+            },
+            "_id": "oKf3t0pXHXa6mZ_4sUZSaSMKuXg"
+         },
+         {
+            "_score": 1.0,
+            "_index": "cpan_v1_01",
+            "_source": {
+               "author": "KIMOTO",
+               "name": "DBIx-Custom-SQLite-0.0101",
+               "version": "0.0101",
+               "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/DBIx-Custom-SQLite-0.0101.tar.gz",
+               "date": "2009-11-08T04:20:31",
+               "checksum_sha256": "0af123551dff95f9654f4fbc24e945c5d6481b92e67b8e03ca91ef4c83088cc7",
+               "metadata": {
+                  "author": [
+                     "Yuki Kimoto <kimoto.yuki@gmail.com>"
+                  ]
+               },
+               "distribution": "DBIx-Custom-SQLite",
+               "stat": {
+                  "size": 3927
+               }
+            },
+            "_type": "release",
+            "_id": "zpVA3zMoUhx0mj8Cn4YC9CuFyA8"
+         },
+         {
+            "_index": "cpan_v1_01",
+            "_source": {
+               "author": "KIMOTO",
+               "name": "Validator-Custom-Ext-Mojolicious-0.0103",
+               "checksum_sha256": "0911fe6ae65f9173c6eb68b6116600552b088939b94881be3c7275344b1cbdce",
+               "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/Validator-Custom-Ext-Mojolicious-0.0103.tar.gz",
+               "date": "2010-01-16T14:51:11",
+               "version": "0.0103",
+               "stat": {
+                  "size": 4190
+               },
+               "distribution": "Validator-Custom-Ext-Mojolicious",
+               "metadata": {
+                  "author": [
+                     "Yuki Kimoto <kimoto.yuki@gmail.com>"
+                  ]
+               }
+            },
+            "_id": "mY_jP2O7NnTtr3utv_xZQNu10Ic",
+            "_type": "release",
+            "_score": 1.0
+         },
+         {
+            "_source": {
+               "author": "KIMOTO",
+               "name": "Validator-Custom-Ext-Mojolicious-0.0102",
+               "stat": {
+                  "size": 4257
+               },
+               "distribution": "Validator-Custom-Ext-Mojolicious",
+               "metadata": {
+                  "author": [
+                     "Yuki Kimoto <kimoto.yuki@gmail.com>"
+                  ]
+               },
+               "date": "2010-01-15T14:07:24",
+               "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/Validator-Custom-Ext-Mojolicious-0.0102.tar.gz",
+               "checksum_sha256": "a88d01504353223f7a3cb0d6a240debb9c6d6155858f1048a19007c3b366beed",
+               "version": "0.0102"
+            },
+            "_id": "WZm6hQ6mBfOqgVE6dPQOE0L8hg0",
+            "_type": "release",
+            "_index": "cpan_v1_01",
+            "_score": 1.0
+         },
+         {
+            "_index": "cpan_v1_01",
+            "_type": "release",
+            "_source": {
+               "author": "KIMOTO",
+               "name": "Validator-Custom-0.1207",
+               "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/Validator-Custom-0.1207.tar.gz",
+               "date": "2010-07-28T13:42:23",
+               "checksum_sha256": "f599da2ecc17ac74443628eb84233ee6b25b204511f83ea778dad9efd0f558e0",
+               "version": "0.1207",
+               "stat": {
+                  "size": 16985
+               },
+               "distribution": "Validator-Custom",
+               "metadata": {
+                  "author": [
+                     "Yuki Kimoto <kimoto.yuki@gmail.com>"
+                  ]
+               }
+            },
+            "_id": "NWJOqmjEinjfJqawfpkEpEhu4d0",
+            "_score": 1.0
+         }
+      ],
+      "total": 359941,
+      "max_score": 1.0
+   },
+   "took": 14,
+   "_scroll_id": "cXVlcnlUaGVuRmV0Y2g7Mzs5OTQ2NzY3ODU6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTQ2NzY3ODQ6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTUyMzQzMTA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw=="
+}
\ No newline at end of file
diff --git a/swh/lister/cpan/tests/test_lister.py b/swh/lister/cpan/tests/test_lister.py
new file mode 100644
index 0000000..46453c0
--- /dev/null
+++ b/swh/lister/cpan/tests/test_lister.py
@@ -0,0 +1,171 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from collections import defaultdict
+from itertools import chain
+import json
+from pathlib import Path
+
+import pytest
+
+from swh.lister.cpan.lister import CpanLister, get_module_version
+
+
+@pytest.fixture
+def release_search_response(datadir):
+    return json.loads(
+        Path(datadir, "https_fastapi.metacpan.org", "v1_release__search").read_bytes()
+    )
+
+
+def release_scroll_response(datadir, page):
+    return json.loads(
+        Path(
+            datadir, "https_fastapi.metacpan.org", f"v1__search_scroll_page{page}"
+        ).read_bytes()
+    )
+
+
+@pytest.fixture
+def release_scroll_first_response(datadir):
+    return release_scroll_response(datadir, page=1)
+
+
+@pytest.fixture
+def release_scroll_second_response(datadir):
+    return release_scroll_response(datadir, page=2)
+
+
+@pytest.fixture
+def release_scroll_third_response(datadir):
+    return release_scroll_response(datadir, page=3)
+
+
+@pytest.fixture
+def release_scroll_fourth_response(datadir):
+    return release_scroll_response(datadir, page=4)
+
+
+@pytest.fixture(autouse=True)
+def mock_network_requests(
+    requests_mock,
+    release_search_response,
+    release_scroll_first_response,
+    release_scroll_second_response,
+    release_scroll_third_response,
+    release_scroll_fourth_response,
+):
+    requests_mock.get(
+        "https://fastapi.metacpan.org/v1/release/_search",
+        json=release_search_response,
+    )
+    requests_mock.get(
+        "https://fastapi.metacpan.org/v1/_search/scroll",
+        [
+            {
+                "json": release_scroll_first_response,
+            },
+            {
+                "json": release_scroll_second_response,
+            },
+            {
+                "json": release_scroll_third_response,
+            },
+            {
+                "json": release_scroll_fourth_response,
+            },
+            {"json": {"hits": {"hits": []}, "_scroll_id": ""}},
+        ],
+    )
+
+
+@pytest.mark.parametrize(
+    "module_name,module_version,release_name,expected_version",
+    [
+        ("Validator-Custom", "0.1207", "Validator-Custom-0.1207", "0.1207"),
+        ("UDPServersAndClients", 0, "UDPServersAndClients", "0"),
+        ("Compiler", 0, "Compiler-a1", "a1"),
+        ("Call-Context", 0.01, "Call-Context-0.01", "0.01"),
+    ],
+)
+def test_get_module_version(
+    module_name, module_version, release_name, expected_version
+):
+    assert (
+        get_module_version(module_name, module_version, release_name)
+        == expected_version
+    )
+
+
+def test_cpan_lister(
+    swh_scheduler,
+    release_search_response,
+    release_scroll_first_response,
+    release_scroll_second_response,
+    release_scroll_third_response,
+    release_scroll_fourth_response,
+):
+    lister = CpanLister(scheduler=swh_scheduler)
+    res = lister.run()
+
+    expected_origins = set()
+    expected_artifacts = defaultdict(list)
+    expected_module_metadata = defaultdict(list)
+    for release in chain(
+        release_search_response["hits"]["hits"],
+        release_scroll_first_response["hits"]["hits"],
+        release_scroll_second_response["hits"]["hits"],
+        release_scroll_third_response["hits"]["hits"],
+        release_scroll_fourth_response["hits"]["hits"],
+    ):
+        distribution = release["_source"]["distribution"]
+        release_name = release["_source"]["name"]
+        checksum_sha256 = release["_source"]["checksum_sha256"]
+        download_url = release["_source"]["download_url"]
+        version = release["_source"]["version"]
+        size = release["_source"]["stat"]["size"]
+        author = release["_source"]["author"]
+        author_fullname = release["_source"]["metadata"]["author"][0]
+        date = release["_source"]["date"]
+        origin_url = f"https://metacpan.org/dist/{distribution}"
+
+        version = get_module_version(distribution, version, release_name)
+
+        expected_origins.add(origin_url)
+        expected_artifacts[origin_url].append(
+            {
+                "url": download_url,
+                "filename": download_url.split("/")[-1],
+                "version": version,
+                "length": size,
+                "checksums": {"sha256": checksum_sha256},
+            }
+        )
+        expected_module_metadata[origin_url].append(
+            {
+                "name": distribution,
+                "version": version,
+                "cpan_author": author,
+                "author": author_fullname if author_fullname != "unknown" else author,
+                "date": date,
+                "release_name": release_name,
+            }
+        )
+
+    assert res.pages == 1
+    assert res.origins == len(expected_origins)
+
+    scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
+    assert len(scheduler_origins) == len(expected_origins)
+
+    for origin in scheduler_origins:
+        assert origin.visit_type == "cpan"
+        assert origin.url in expected_origins
+        assert origin.extra_loader_arguments == {
+            "api_base_url": "https://fastapi.metacpan.org/v1",
+            "artifacts": expected_artifacts[origin.url],
+            "module_metadata": expected_module_metadata[origin.url],
+        }
diff --git a/swh/lister/cpan/tests/test_tasks.py b/swh/lister/cpan/tests/test_tasks.py
new file mode 100644
index 0000000..b3b8fa2
--- /dev/null
+++ b/swh/lister/cpan/tests/test_tasks.py
@@ -0,0 +1,31 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.lister.pattern import ListerStats
+
+
+def test_cpan_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
+    res = swh_scheduler_celery_app.send_task("swh.lister.cpan.tasks.ping")
+    assert res
+    res.wait()
+    assert res.successful()
+    assert res.result == "OK"
+
+
+def test_cpan_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker):
+    # setup the mocked CpanLister
+    lister = mocker.patch("swh.lister.cpan.tasks.CpanLister")
+    lister.from_configfile.return_value = lister
+    stats = ListerStats(pages=42, origins=42)
+    lister.run.return_value = stats
+
+    res = swh_scheduler_celery_app.send_task("swh.lister.cpan.tasks.CpanListerTask")
+    assert res
+    res.wait()
+    assert res.successful()
+    assert res.result == stats.dict()
+
+    lister.from_configfile.assert_called_once_with()
+    lister.run.assert_called_once_with()
diff --git a/swh/lister/cran/list_all_packages.R b/swh/lister/cran/list_all_packages.R
index 5747bb4..67d9c6d 100755
--- a/swh/lister/cran/list_all_packages.R
+++ b/swh/lister/cran/list_all_packages.R
@@ -1,9 +1,9 @@
 #!/usr/bin/Rscript
 
 # This R script calls the buildin API to get list of
 # all the packages of R and their description, then convert the API
 # response to JSON string and print it
 
-db <- tools::CRAN_package_db()[, c("Package", "Version", "Packaged")]
+db <- tools::CRAN_package_db()[, c("Package", "Version", "Packaged", "MD5sum")]
 dbjson <- jsonlite::toJSON(db)
 print(dbjson)
\ No newline at end of file
diff --git a/swh/lister/cran/lister.py b/swh/lister/cran/lister.py
index e9f937a..35e3d2b 100644
--- a/swh/lister/cran/lister.py
+++ b/swh/lister/cran/lister.py
@@ -1,149 +1,150 @@
 # Copyright (C) 2019-2021 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from datetime import datetime, timezone
 import json
 import logging
 import subprocess
 from typing import Dict, Iterator, List, Optional, Tuple
 
 import pkg_resources
 
 from swh.lister.pattern import CredentialsType, StatelessLister
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
 logger = logging.getLogger(__name__)
 
 CRAN_MIRROR = "https://cran.r-project.org"
 
 PageType = List[Dict[str, str]]
 
 
 class CRANLister(StatelessLister[PageType]):
     """
     List all packages hosted on The Comprehensive R Archive Network.
     """
 
     LISTER_NAME = "CRAN"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         credentials: Optional[CredentialsType] = None,
     ):
         super().__init__(
             scheduler, url=CRAN_MIRROR, instance="cran", credentials=credentials
         )
 
     def get_pages(self) -> Iterator[PageType]:
         """
         Yields a single page containing all CRAN packages info.
         """
         yield read_cran_data()
 
     def get_origins_from_page(self, page: PageType) -> Iterator[ListedOrigin]:
         assert self.lister_obj.id is not None
 
         seen_urls = set()
         for package_info in page:
             origin_url, artifact_url = compute_origin_urls(package_info)
 
             if origin_url in seen_urls:
                 # prevent multiple listing of an origin,
                 # most recent version will be listed first
                 continue
 
             seen_urls.add(origin_url)
 
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 url=origin_url,
                 visit_type="cran",
                 last_update=parse_packaged_date(package_info),
                 extra_loader_arguments={
                     "artifacts": [
                         {
                             "url": artifact_url,
                             "version": package_info["Version"],
                             "package": package_info["Package"],
+                            "checksums": {"md5": package_info["MD5sum"]},
                         }
                     ]
                 },
             )
 
 
 def read_cran_data() -> List[Dict[str, str]]:
     """
     Runs R script which uses inbuilt API to return a json response
             containing data about the R packages.
 
     Returns:
         List of Dict about R packages. For example::
 
             [
                 {
                     'Package': 'A3',
                     'Version': '1.0.0'
                 },
                 {
                     'Package': 'abbyyR',
                     'Version': '0.5.4'
                 },
                 ...
             ]
     """
     filepath = pkg_resources.resource_filename("swh.lister.cran", "list_all_packages.R")
     logger.debug("Executing R script %s", filepath)
     response = subprocess.run(filepath, stdout=subprocess.PIPE, shell=False)
     return json.loads(response.stdout.decode("utf-8"))
 
 
 def compute_origin_urls(package_info: Dict[str, str]) -> Tuple[str, str]:
     """Compute the package url from the repo dict.
 
     Args:
         repo: dict with key 'Package', 'Version'
 
     Returns:
         the tuple project url, artifact url
 
     """
     package = package_info["Package"]
     version = package_info["Version"]
     origin_url = f"{CRAN_MIRROR}/package={package}"
     artifact_url = f"{CRAN_MIRROR}/src/contrib/{package}_{version}.tar.gz"
     return origin_url, artifact_url
 
 
 def parse_packaged_date(package_info: Dict[str, str]) -> Optional[datetime]:
     packaged_at_str = package_info.get("Packaged", "")
     packaged_at = None
     if packaged_at_str:
         packaged_at_str = packaged_at_str.replace(" UTC", "")
         # Packaged field possible formats:
         #   - "%Y-%m-%d %H:%M:%S[.%f] UTC; <packager>",
         #   - "%a %b %d %H:%M:%S %Y; <packager>"
         for date_format in (
             "%Y-%m-%d %H:%M:%S",
             "%Y-%m-%d %H:%M:%S.%f",
             "%a %b %d %H:%M:%S %Y",
         ):
             try:
                 packaged_at = datetime.strptime(
                     packaged_at_str.split(";")[0],
                     date_format,
                 ).replace(tzinfo=timezone.utc)
                 break
             except Exception:
                 continue
 
         if packaged_at is None:
             logger.debug(
                 "Could not parse %s package release date: %s",
                 package_info["Package"],
                 packaged_at_str,
             )
 
     return packaged_at
diff --git a/swh/lister/cran/tests/data/list-r-packages.json b/swh/lister/cran/tests/data/list-r-packages.json
index 70ef69c..7357cd5 100644
--- a/swh/lister/cran/tests/data/list-r-packages.json
+++ b/swh/lister/cran/tests/data/list-r-packages.json
@@ -1,40 +1,32 @@
 [
-
     {
-        "Package": "SeleMix",
-        "Version": "1.0.2",
-        "Packaged": "2020-11-28 22:16:43 UTC; Teresa"
+        "Package": "cNORM",
+        "Version": "3.0.2",
+        "Packaged": "2022-06-12 08:46:39 UTC; gbpa005",
+        "MD5sum": "d878686afc17b990e500dc88afb3a990"
     },
     {
-        "Package": "plink",
-        "Version": "1.5-1",
-        "Packaged": "2017-04-26 11:36:15 UTC; Jonathan"
+        "Package": "CNprep",
+        "Version": "2.2",
+        "Packaged": "2022-05-23 23:58:37 UTC; Astrid",
+        "MD5sum": "4b6ddc37df607c79b7fb50a96a57197f"
     },
     {
-        "Package": "jsonlite",
-        "Version": "1.7.2",
-        "Packaged": "2020-12-09 13:54:18 UTC; jeroen"
-
+        "Package": "CNPS",
+        "Version": "1.0.0",
+        "Packaged": "2021-05-21 16:55:04 UTC; Surface",
+        "MD5sum": "deac071a9387e3a296481d041e6d09ee"
     },
     {
-        "Package": "Records",
-        "Version": "1.0",
-        "Packaged": "2012-10-29 08:57:37 UTC; ripley"
+        "Package": "cns",
+        "Version": "0.1.0",
+        "Packaged": "2021-07-16 19:30:51 UTC; nfultz",
+        "MD5sum": "3ad5a474260dbacb889be461b826a73b"
     },
     {
-        "Package": "scRNAtools",
-        "Version": "1.0",
-        "Packaged": "2018-07-04 00:49:45 UTC; dell"
-    },
-    {
-        "Package": "Deriv",
-        "Version": "4.1.2",
-        "Packaged": "2020-12-10 11:12:28 UTC; sokol"
-    },
-    {
-        "Package": "BayesValidate",
-        "Version": "0.0",
-        "Packaged": "Thu Mar 30 10:48:35 2006; hornik"
+        "Package": "cnum",
+        "Version": "0.1.3",
+        "Packaged": "2021-01-11 13:24:52 UTC; Elgar",
+        "MD5sum": "3cb5ab3fdaf4277d1ebfbe147e8990e1"
     }
-
 ]
\ No newline at end of file
diff --git a/swh/lister/cran/tests/test_lister.py b/swh/lister/cran/tests/test_lister.py
index a0bebfc..6501a77 100644
--- a/swh/lister/cran/tests/test_lister.py
+++ b/swh/lister/cran/tests/test_lister.py
@@ -1,163 +1,164 @@
 # Copyright (C) 2019-2021 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from datetime import datetime, timezone
 import json
 from os import path
 
 import pytest
 
 from swh.lister.cran.lister import (
     CRAN_MIRROR,
     CRANLister,
     compute_origin_urls,
     parse_packaged_date,
 )
 
 
 def test_cran_compute_origin_urls():
     pack = "something"
     vers = "0.0.1"
     origin_url, artifact_url = compute_origin_urls(
         {
             "Package": pack,
             "Version": vers,
         }
     )
 
     assert origin_url == f"{CRAN_MIRROR}/package={pack}"
     assert artifact_url == f"{CRAN_MIRROR}/src/contrib/{pack}_{vers}.tar.gz"
 
 
 def test_cran_compute_origin_urls_failure():
     for incomplete_repo in [{"Version": "0.0.1"}, {"Package": "package"}, {}]:
         with pytest.raises(KeyError):
             compute_origin_urls(incomplete_repo)
 
 
 def test_parse_packaged_date():
     common_date_format = {
         "Package": "test",
         "Packaged": "2017-04-26 11:36:15 UTC; Jonathan",
     }
     assert parse_packaged_date(common_date_format) == datetime(
         year=2017, month=4, day=26, hour=11, minute=36, second=15, tzinfo=timezone.utc
     )
     common_date_format = {
         "Package": "test",
         "Packaged": "2017-04-26 11:36:15.123456 UTC; Jonathan",
     }
     assert parse_packaged_date(common_date_format) == datetime(
         year=2017,
         month=4,
         day=26,
         hour=11,
         minute=36,
         second=15,
         microsecond=123456,
         tzinfo=timezone.utc,
     )
     old_date_format = {
         "Package": "test",
         "Packaged": "Thu Mar 30 10:48:35 2006; hornik",
     }
     assert parse_packaged_date(old_date_format) == datetime(
         year=2006, month=3, day=30, hour=10, minute=48, second=35, tzinfo=timezone.utc
     )
     invalid_date_format = {
         "Package": "test",
         "Packaged": "foo",
     }
     assert parse_packaged_date(invalid_date_format) is None
     missing_date = {
         "Package": "test",
     }
     assert parse_packaged_date(missing_date) is None
 
 
 def test_cran_lister_cran(datadir, swh_scheduler, mocker):
     with open(path.join(datadir, "list-r-packages.json")) as f:
         cran_data = json.loads(f.read())
 
     lister = CRANLister(swh_scheduler)
 
     mock_cran = mocker.patch("swh.lister.cran.lister.read_cran_data")
 
     mock_cran.return_value = cran_data
 
     stats = lister.run()
 
     assert stats.pages == 1
     assert stats.origins == len(cran_data)
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
 
     assert len(scheduler_origins) == len(cran_data)
 
     for package_info in cran_data:
         origin_url, artifact_url = compute_origin_urls(package_info)
 
         filtered_origins = [o for o in scheduler_origins if o.url == origin_url]
 
         assert len(filtered_origins) == 1
 
         assert filtered_origins[0].extra_loader_arguments == {
             "artifacts": [
                 {
                     "url": artifact_url,
                     "version": package_info["Version"],
                     "package": package_info["Package"],
+                    "checksums": {"md5": package_info["MD5sum"]},
                 }
             ]
         }
 
         filtered_origins[0].last_update == parse_packaged_date(package_info)
 
 
 def test_cran_lister_duplicated_origins(datadir, swh_scheduler, mocker):
     with open(path.join(datadir, "list-r-packages.json")) as f:
         cran_data = json.loads(f.read())
 
     lister = CRANLister(swh_scheduler)
 
     mock_cran = mocker.patch("swh.lister.cran.lister.read_cran_data")
 
     mock_cran.return_value = cran_data + cran_data
 
     stats = lister.run()
 
     assert stats.pages == 1
     assert stats.origins == len(cran_data)
 
 
 @pytest.mark.parametrize(
     "credentials, expected_credentials",
     [
         (None, []),
         ({"key": "value"}, []),
         (
             {"CRAN": {"cran": [{"username": "user", "password": "pass"}]}},
             [{"username": "user", "password": "pass"}],
         ),
     ],
 )
 def test_lister_cran_instantiation_with_credentials(
     credentials, expected_credentials, swh_scheduler
 ):
     lister = CRANLister(swh_scheduler, credentials=credentials)
 
     # Credentials are allowed in constructor
     assert lister.credentials == expected_credentials
 
 
 def test_lister_cran_from_configfile(swh_scheduler_config, mocker):
     load_from_envvar = mocker.patch("swh.lister.pattern.load_from_envvar")
     load_from_envvar.return_value = {
         "scheduler": {"cls": "local", **swh_scheduler_config},
         "credentials": {},
     }
     lister = CRANLister.from_configfile()
     assert lister.scheduler is not None
     assert lister.credentials is not None
diff --git a/swh/lister/crates/__init__.py b/swh/lister/crates/__init__.py
index c4ca72c..31bd3e9 100644
--- a/swh/lister/crates/__init__.py
+++ b/swh/lister/crates/__init__.py
@@ -1,142 +1,134 @@
 # Copyright (C) 2022 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-
 """
 Crates lister
 =============
 
 The Crates lister list origins from `Crates.io`_, the Rust community’s crate registry.
 
 Origins are `packages`_ for the `Rust language`_ ecosystem.
 Package follow a `layout specifications`_ to be usable with the `Cargo`_ package manager
 and have a `Cargo.toml`_ file manifest which consists in metadata to describe and build
 a specific package version.
 
 As of August 2022 `Crates.io`_ list 89013 packages name for a total of 588215 released
 versions.
 
 Origins retrieving strategy
 ---------------------------
 
-A json http api to list packages from crates.io but we choose a `different strategy`_
-in order to reduce to its bare minimum the amount of http call and bandwidth.
-We clone a git repository which contains a tree of directories whose last child folder
-name corresponds to the package name and contains a Cargo.toml file with some json data
-to describe all existing versions of the package.
-It takes a few seconds to clone the repository and browse it to build a full index of
-existing package and related versions.
-The lister is incremental, so the first time it clones and browses the repository as
-previously described then stores the last seen commit id.
-Next time, it retrieves the list of new and changed files since last commit id and
-returns new or changed package with all of their related versions.
-
-Note that all Git related operations are done with `Dulwich`_, a Python
-implementation of the Git file formats and protocols.
+A json http api to list packages from crates.io exists but we choose a
+`different strategy`_ in order to reduce to its bare minimum the amount
+of http call and bandwidth.
+
+We download a `db-dump.tar.gz`_ archives which contains csv files as an export of
+the crates.io database. Crates.csv list package names, versions.csv list versions
+related to package names.
+It takes a few seconds to download the archive and parse csv files to build a
+full index of existing package and related versions.
+
+The archive also contains a metadata.json file with a timestamp corresponding to
+the date the database dump started. The database dump is automatically generated
+every 24 hours, around 02:00:00 UTC.
+
+The lister is incremental, so the first time it downloads the db-dump.tar.gz archive as
+previously described and store the last seen database dump timestamp.
+Next time, it downloads the db-dump.tar.gz but retrieves only the list of new and
+changed packages since last seen timestamp with all of their related versions.
 
 Page listing
 ------------
 
 Each page is related to one package.
 Each line of a page corresponds to different versions of this package.
 
 The data schema for each line is:
 
 * **name**: Package name
 * **version**: Package version
 * **crate_file**: Package download url
 * **checksum**: Package download checksum
 * **yanked**: Whether the package is yanked or not
-* **last_update**: Iso8601 last update date computed upon git commit date of the
-    related Cargo.toml file
+* **last_update**: Iso8601 last update
 
 Origins from page
 -----------------
 
 The lister yields one origin per page.
 The origin url corresponds to the http api url for a package, for example
-"https://crates.io/api/v1/crates/{package}".
+"https://crates.io/crates/{crate}".
 
-Additionally we add some data set to "extra_loader_arguments":
+Additionally we add some data for each version, set to "extra_loader_arguments":
 
 * **artifacts**: Represent data about the Crates to download, following
     :ref:`original-artifacts-json specification <extrinsic-metadata-original-artifacts-json>`
 * **crates_metadata**: To store all other interesting attributes that do not belongs
-    to artifacts. For now it mainly indicate when a version is `yanked`_.
+    to artifacts. For now it mainly indicate when a version is `yanked`_, and the version
+    last_update timestamp.
 
 Origin data example::
 
     {
-        "url": "https://crates.io/api/v1/crates/rand",
+        "url": "https://crates.io/api/v1/crates/regex-syntax",
         "artifacts": [
             {
+                "version": "0.1.0",
                 "checksums": {
-                    "sha256": "48a45b46c2a8c38348adb1205b13c3c5eb0174e0c0fec52cc88e9fb1de14c54d",  # noqa: B950
-                },
-                "filename": "rand-0.1.1.crate",
-                "url": "https://static.crates.io/crates/rand/rand-0.1.1.crate",
-                "version": "0.1.1",
-            },
-            {
-                "checksums": {
-                    "sha256": "6e229ed392842fa93c1d76018d197b7e1b74250532bafb37b0e1d121a92d4cf7",  # noqa: B950
+                    "sha256": "398952a2f6cd1d22bc1774fd663808e32cf36add0280dee5cdd84a8fff2db944",  # noqa: B950
                 },
-                "filename": "rand-0.1.2.crate",
-                "url": "https://static.crates.io/crates/rand/rand-0.1.2.crate",
-                "version": "0.1.2",
+                "filename": "regex-syntax-0.1.0.crate",
+                "url": "https://static.crates.io/crates/regex-syntax/regex-syntax-0.1.0.crate",  # noqa: B950
             },
         ],
         "crates_metadata": [
             {
-                "version": "0.1.1",
-                "yanked": False,
-            },
-            {
-                "version": "0.1.2",
+                "version": "0.1.0",
+                "last_update": "2017-11-30 03:37:17.449539",
                 "yanked": False,
             },
         ],
-    }
+    },
 
 Running tests
 -------------
 
 Activate the virtualenv and run from within swh-lister directory:
 
    pytest -s -vv --log-cli-level=DEBUG swh/lister/crates/tests
 
 Testing with Docker
 -------------------
 
 Change directory to swh/docker then launch the docker environment:
 
-   docker-compose up -d
+   docker compose up -d
 
-Then connect to the lister:
+Then schedule a crates listing task::
 
-   docker exec -it docker_swh-lister_1 bash
+   docker compose exec swh-scheduler swh scheduler task add -p oneshot list-crates
 
-And run the lister (The output of this listing results in “oneshot” tasks in the scheduler):
+You can follow lister execution by displaying logs of swh-lister service::
 
-   swh lister run -l crates
+   docker compose logs -f swh-lister
 
 .. _Crates.io: https://crates.io
 .. _packages: https://doc.rust-lang.org/book/ch07-01-packages-and-crates.html
 .. _Rust language: https://www.rust-lang.org/
 .. _layout specifications: https://doc.rust-lang.org/cargo/guide/project-layout.html
 .. _Cargo: https://doc.rust-lang.org/cargo/guide/why-cargo-exists.html#enter-cargo
 .. _Cargo.toml: https://doc.rust-lang.org/cargo/reference/manifest.html
 .. _different strategy: https://crates.io/data-access
-.. _Dulwich: https://www.dulwich.io/
 .. _yanked: https://doc.rust-lang.org/cargo/reference/publishing.html#cargo-yank
+.. _db-dump.tar.gz: https://static.crates.io/db-dump.tar.gz
 """
 
 
 def register():
     from .lister import CratesLister
 
     return {
         "lister": CratesLister,
         "task_modules": ["%s.tasks" % __name__],
     }
diff --git a/swh/lister/crates/lister.py b/swh/lister/crates/lister.py
index fbe3003..eca9f10 100644
--- a/swh/lister/crates/lister.py
+++ b/swh/lister/crates/lister.py
@@ -1,259 +1,250 @@
 # Copyright (C) 2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
-from dataclasses import asdict, dataclass
-import datetime
-import io
+
+import csv
+from dataclasses import dataclass
+from datetime import datetime
 import json
 import logging
 from pathlib import Path
-import shutil
+import tarfile
+import tempfile
 from typing import Any, Dict, Iterator, List, Optional
 from urllib.parse import urlparse
 
-from dulwich import porcelain
-from dulwich.patch import write_tree_diff
-from dulwich.repo import Repo
+import iso8601
+from packaging.version import parse as parse_version
 
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
 from ..pattern import CredentialsType, Lister
 
 logger = logging.getLogger(__name__)
 
 # Aliasing the page results returned by `get_pages` method from the lister.
 CratesListerPage = List[Dict[str, Any]]
 
 
 @dataclass
 class CratesListerState:
     """Store lister state for incremental mode operations.
-    'last_commit' represents a git commit hash
+    'index_last_update' represents the UTC time the crates.io database dump was
+    started
     """
 
-    last_commit: str = ""
+    index_last_update: Optional[datetime] = None
 
 
 class CratesLister(Lister[CratesListerState, CratesListerPage]):
     """List origins from the "crates.io" forge.
 
-    It basically fetches https://github.com/rust-lang/crates.io-index.git to a
-    temp directory and then walks through each file to get the crate's info on
-    the first run.
+    It downloads a tar.gz archive which contains crates.io database table content as
+    csv files which is automatically generated every 24 hours.
+    Parsing two csv files we can list all Crates.io package names and their related
+    versions.
 
-    In incremental mode, it relies on the same Git repository but instead of reading
-    each file of the repo, it get the differences through ``git log last_commit..HEAD``.
-    Resulting output string is parsed to build page entries.
+    In incremental mode, it check each entry comparing their 'last_update' value
+    with self.state.index_last_update
     """
 
-    # Part of the lister API, that identifies this lister
     LISTER_NAME = "crates"
-    # (Optional) CVS type of the origins listed by this lister, if constant
     VISIT_TYPE = "crates"
-
     INSTANCE = "crates"
-    INDEX_REPOSITORY_URL = "https://github.com/rust-lang/crates.io-index.git"
-    DESTINATION_PATH = Path("/tmp/crates.io-index")
+
+    BASE_URL = "https://crates.io"
+    DB_DUMP_URL = "https://static.crates.io/db-dump.tar.gz"
+
     CRATE_FILE_URL_PATTERN = (
         "https://static.crates.io/crates/{crate}/{crate}-{version}.crate"
     )
-    CRATE_API_URL_PATTERN = "https://crates.io/api/v1/crates/{crate}"
+    CRATE_URL_PATTERN = "https://crates.io/crates/{crate}"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         credentials: CredentialsType = None,
     ):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
-            url=self.INDEX_REPOSITORY_URL,
+            url=self.BASE_URL,
             instance=self.INSTANCE,
         )
+        self.index_metadata: Dict[str, str] = {}
 
     def state_from_dict(self, d: Dict[str, Any]) -> CratesListerState:
-        if "last_commit" not in d:
-            d["last_commit"] = ""
+        index_last_update = d.get("index_last_update")
+        if index_last_update is not None:
+            d["index_last_update"] = iso8601.parse_date(index_last_update)
         return CratesListerState(**d)
 
     def state_to_dict(self, state: CratesListerState) -> Dict[str, Any]:
-        return asdict(state)
-
-    def get_index_repository(self) -> None:
-        """Get crates.io-index repository up to date running git command."""
-        if self.DESTINATION_PATH.exists():
-            porcelain.pull(
-                self.DESTINATION_PATH, remote_location=self.INDEX_REPOSITORY_URL
-            )
-        else:
-            porcelain.clone(
-                source=self.INDEX_REPOSITORY_URL, target=self.DESTINATION_PATH
-            )
-
-    def get_crates_index(self) -> List[Path]:
-        """Build a sorted list of file paths excluding dotted directories and
-        dotted files.
-
-        Each file path corresponds to a crate that lists all available
-        versions.
+        d: Dict[str, Optional[str]] = {"index_last_update": None}
+        index_last_update = state.index_last_update
+        if index_last_update is not None:
+            d["index_last_update"] = index_last_update.isoformat()
+        return d
+
+    def is_new(self, dt_str: str):
+        """Returns True when dt_str is greater than
+        self.state.index_last_update
         """
-        crates_index = sorted(
-            path
-            for path in self.DESTINATION_PATH.rglob("*")
-            if not any(part.startswith(".") for part in path.parts)
-            and path.is_file()
-            and path != self.DESTINATION_PATH / "config.json"
-        )
-
-        return crates_index
-
-    def get_last_commit_hash(self, repository_path: Path) -> str:
-        """Returns the last commit hash of a git repository"""
-        assert repository_path.exists()
+        dt = iso8601.parse_date(dt_str)
+        last = self.state.index_last_update
+        return not last or (last is not None and last < dt)
 
-        repo = Repo(str(repository_path))
-        head = repo.head()
-        last_commit = repo[head]
+    def get_and_parse_db_dump(self) -> Dict[str, Any]:
+        """Download and parse csv files from db_dump_path.
 
-        return last_commit.id.decode()
-
-    def get_last_update_by_file(self, filepath: Path) -> Optional[datetime.datetime]:
-        """Given a file path within a Git repository, returns its last commit
-        date as iso8601
+        Returns a dict where each entry corresponds to a package name with its related versions.
         """
-        repo = Repo(str(self.DESTINATION_PATH))
-        # compute relative path otherwise it fails
-        relative_path = filepath.relative_to(self.DESTINATION_PATH)
-        walker = repo.get_walker(paths=[bytes(relative_path)], max_entries=1)
-        try:
-            commit = next(iter(walker)).commit
-        except StopIteration:
-            logger.error(
-                "Can not find %s related commits in repository %s", relative_path, repo
-            )
-            return None
-        else:
-            last_update = datetime.datetime.fromtimestamp(
-                commit.author_time, datetime.timezone.utc
-            )
-            return last_update
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+
+            file_name = self.DB_DUMP_URL.split("/")[-1]
+            archive_path = Path(tmpdir) / file_name
+
+            # Download the Db dump
+            with self.http_request(self.DB_DUMP_URL, stream=True) as res:
+                with open(archive_path, "wb") as out_file:
+                    for chunk in res.iter_content(chunk_size=1024):
+                        out_file.write(chunk)
+
+            # Extract the Db dump
+            db_dump_path = Path(str(archive_path).split(".tar.gz")[0])
+            tar = tarfile.open(archive_path)
+            tar.extractall(path=db_dump_path)
+            tar.close()
+
+            csv.field_size_limit(1000000)
+
+            (crates_csv_path,) = list(db_dump_path.glob("*/data/crates.csv"))
+            (versions_csv_path,) = list(db_dump_path.glob("*/data/versions.csv"))
+            (index_metadata_json_path,) = list(db_dump_path.rglob("*metadata.json"))
+
+            with index_metadata_json_path.open("rb") as index_metadata_json:
+                self.index_metadata = json.load(index_metadata_json)
+
+            crates: Dict[str, Any] = {}
+            with crates_csv_path.open() as crates_fd:
+                crates_csv = csv.DictReader(crates_fd)
+                for item in crates_csv:
+                    if self.is_new(item["updated_at"]):
+                        # crate 'id' as key
+                        crates[item["id"]] = {
+                            "name": item["name"],
+                            "updated_at": item["updated_at"],
+                            "versions": {},
+                        }
+
+            data: Dict[str, Any] = {}
+            with versions_csv_path.open() as versions_fd:
+                versions_csv = csv.DictReader(versions_fd)
+                for version in versions_csv:
+                    if version["crate_id"] in crates.keys():
+                        crate: Dict[str, Any] = crates[version["crate_id"]]
+                        crate["versions"][version["num"]] = version
+                        # crate 'name' as key
+                        data[crate["name"]] = crate
+            return data
 
     def page_entry_dict(self, entry: Dict[str, Any]) -> Dict[str, Any]:
         """Transform package version definition dict to a suitable
         page entry dict
         """
+        crate_file = self.CRATE_FILE_URL_PATTERN.format(
+            crate=entry["name"], version=entry["version"]
+        )
+        filename = urlparse(crate_file).path.split("/")[-1]
         return dict(
             name=entry["name"],
-            version=entry["vers"],
-            checksum=entry["cksum"],
-            yanked=entry["yanked"],
-            crate_file=self.CRATE_FILE_URL_PATTERN.format(
-                crate=entry["name"], version=entry["vers"]
-            ),
+            version=entry["version"],
+            checksum=entry["checksum"],
+            yanked=True if entry["yanked"] == "t" else False,
+            crate_file=crate_file,
+            filename=filename,
+            last_update=entry["updated_at"],
         )
 
     def get_pages(self) -> Iterator[CratesListerPage]:
-        """Yield an iterator sorted by name in ascending order of pages.
-
-        Each page is a list of crate versions with:
-            - name: Name of the crate
-            - version: Version
-            - checksum: Checksum
-            - crate_file: Url of the crate file
-            - last_update: Date of the last commit of the corresponding index
-                file
+        """Each page is a list of crate versions with:
+        - name: Name of the crate
+        - version: Version
+        - checksum: Checksum
+        - yanked: Whether the package is yanked or not
+        - crate_file: Url of the crate file
+        - filename: File name of the crate file
+        - last_update: Last update for that version
         """
-        # Fetch crates.io index repository
-        self.get_index_repository()
-        if not self.state.last_commit:
-            # First discovery
-            # List all crates files from the index repository
-            crates_index = self.get_crates_index()
-        else:
-            # Incremental case
-            # Get new package version by parsing a range of commits from index repository
-            repo = Repo(str(self.DESTINATION_PATH))
-            head = repo[repo.head()]
-            last = repo[self.state.last_commit.encode()]
-
-            outstream = io.BytesIO()
-            write_tree_diff(outstream, repo.object_store, last.tree, head.tree)
-            raw_diff = outstream.getvalue()
-            crates_index = []
-            for line in raw_diff.splitlines():
-                if line.startswith(b"+++ b/"):
-                    filepath = line.split(b"+++ b/", 1)[1]
-                    crates_index.append(self.DESTINATION_PATH / filepath.decode())
-            crates_index = sorted(crates_index)
-
-        logger.debug("Found %s crates in crates_index", len(crates_index))
-
-        # Each line of a crate file is a json entry describing released versions
-        # for a package
-        for crate in crates_index:
+
+        # Fetch crates.io Db dump, then Parse the data.
+        dataset = self.get_and_parse_db_dump()
+
+        logger.debug("Found %s crates in crates_index", len(dataset))
+
+        # Each entry from dataset will correspond to a page
+        for name, item in dataset.items():
             page = []
-            last_update = self.get_last_update_by_file(crate)
-
-            with crate.open("rb") as current_file:
-                for line in current_file:
-                    data = json.loads(line)
-                    entry = self.page_entry_dict(data)
-                    entry["last_update"] = last_update
-                    page.append(entry)
+            # sort crate versions
+            versions: list = sorted(item["versions"].keys(), key=parse_version)
+
+            for version in versions:
+                v = item["versions"][version]
+                v["name"] = name
+                v["version"] = version
+                page.append(self.page_entry_dict(v))
+
             yield page
 
     def get_origins_from_page(self, page: CratesListerPage) -> Iterator[ListedOrigin]:
         """Iterate on all crate pages and yield ListedOrigin instances."""
-
         assert self.lister_obj.id is not None
 
-        url = self.CRATE_API_URL_PATTERN.format(crate=page[0]["name"])
+        url = self.CRATE_URL_PATTERN.format(crate=page[0]["name"])
         last_update = page[0]["last_update"]
+
         artifacts = []
         crates_metadata = []
 
-        for version in page:
-            filename = urlparse(version["crate_file"]).path.split("/")[-1]
+        for entry in page:
             # Build an artifact entry following original-artifacts-json specification
             # https://docs.softwareheritage.org/devel/swh-storage/extrinsic-metadata-specification.html#original-artifacts-json  # noqa: B950
-            artifact = {
-                "filename": f"{filename}",
-                "checksums": {
-                    "sha256": f"{version['checksum']}",
-                },
-                "url": version["crate_file"],
-                "version": version["version"],
-            }
-            artifacts.append(artifact)
-            data = {f"{version['version']}": {"yanked": f"{version['yanked']}"}}
-            crates_metadata.append(data)
+            artifacts.append(
+                {
+                    "version": entry["version"],
+                    "filename": entry["filename"],
+                    "url": entry["crate_file"],
+                    "checksums": {
+                        "sha256": entry["checksum"],
+                    },
+                }
+            )
+
+            crates_metadata.append(
+                {
+                    "version": entry["version"],
+                    "yanked": entry["yanked"],
+                    "last_update": entry["last_update"],
+                }
+            )
 
         yield ListedOrigin(
             lister_id=self.lister_obj.id,
             visit_type=self.VISIT_TYPE,
             url=url,
-            last_update=last_update,
+            last_update=iso8601.parse_date(last_update),
             extra_loader_arguments={
                 "artifacts": artifacts,
                 "crates_metadata": crates_metadata,
             },
         )
 
     def finalize(self) -> None:
-        last = self.get_last_commit_hash(repository_path=self.DESTINATION_PATH)
-        if self.state.last_commit == last:
-            self.updated = False
-        else:
-            self.state.last_commit = last
-            self.updated = True
+        last: datetime = iso8601.parse_date(self.index_metadata["timestamp"])
 
-        logger.debug("Listing crates origin completed with last commit id %s", last)
-
-        # Cleanup by removing the repository directory
-        if self.DESTINATION_PATH.exists():
-            shutil.rmtree(self.DESTINATION_PATH)
-            logger.debug(
-                "Successfully removed %s directory", str(self.DESTINATION_PATH)
-            )
+        if not self.state.index_last_update:
+            self.state.index_last_update = last
+            self.updated = True
diff --git a/swh/lister/crates/tests/__init__.py b/swh/lister/crates/tests/__init__.py
index 8b98baa..68748ea 100644
--- a/swh/lister/crates/tests/__init__.py
+++ b/swh/lister/crates/tests/__init__.py
@@ -1,29 +1,3 @@
 # Copyright (C) 2022 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
-
-import os
-from pathlib import PosixPath
-import subprocess
-from typing import Optional, Union
-
-
-def prepare_repository_from_archive(
-    archive_path: str,
-    filename: Optional[str] = None,
-    tmp_path: Union[PosixPath, str] = "/tmp",
-) -> str:
-    """Given an existing archive_path, uncompress it.
-    Returns a file repo url which can be used as origin url.
-
-    This does not deal with the case where the archive passed along does not exist.
-
-    """
-    if not isinstance(tmp_path, str):
-        tmp_path = str(tmp_path)
-    # uncompress folder/repositories/dump for the loader to ingest
-    subprocess.check_output(["tar", "xf", archive_path, "-C", tmp_path])
-    # build the origin url (or some derivative form)
-    _fname = filename if filename else os.path.basename(archive_path)
-    repo_url = f"file://{tmp_path}/{_fname}"
-    return repo_url
diff --git a/swh/lister/crates/tests/data/fake-crates-repository.tar.gz b/swh/lister/crates/tests/data/fake-crates-repository.tar.gz
deleted file mode 100644
index 498b105..0000000
Binary files a/swh/lister/crates/tests/data/fake-crates-repository.tar.gz and /dev/null differ
diff --git a/swh/lister/crates/tests/data/fake_crates_repository_init.sh b/swh/lister/crates/tests/data/fake_crates_repository_init.sh
index 6368601..b58d195 100755
--- a/swh/lister/crates/tests/data/fake_crates_repository_init.sh
+++ b/swh/lister/crates/tests/data/fake_crates_repository_init.sh
@@ -1,64 +1,67 @@
 #!/usr/bin/env bash
 
-# Script to generate fake-crates-repository.tar.gz
-# Creates a git repository like https://github.com/rust-lang/crates.io-index
-# for tests purposes
-
+# Script to generate db-dump.tar.gz like https://static.crates.io/db-dump.tar.gz
+# Creates csv and json files then build the archive for tests purposes
 set -euo pipefail
 
-# files and directories
-mkdir -p tmp_dir/crates.io-index/
-cd tmp_dir/crates.io-index/
+# files and directories for first db dump
+mkdir -p https_static.crates.io
+mkdir -p tmp_dir/crates.io-db-dump/2022-08-08-020027/
+cd tmp_dir/crates.io-db-dump/2022-08-08-020027/
+mkdir data
 
-mkdir -p .dot-dir
-touch .dot-dir/empty
-mkdir -p ra/nd
-mkdir -p re/ge
+echo -e '''created_at,description,documentation,downloads,homepage,id,max_upload_size,name,readme,repository,updated_at
+2015-02-03 06:17:14.147783,"Random number generators and other randomness functionality.",https://docs.rs/rand,139933633,https://rust-random.github.io/book,1339,,rand,,https://github.com/rust-random/rand,2022-02-14 08:37:47.035988
+2014-12-13 22:10:11.303311,"An implementation of regular expressions for Rust. This implementation uses finite automata and guarantees linear time matching on all inputs.",https://docs.rs/regex,85620996,https://github.com/rust-lang/regex,545,,regex,,https://github.com/rust-lang/regex,2022-07-05 18:00:33.712885
+2015-05-27 23:19:16.839117,"A regular expression parser.",https://docs.rs/regex-syntax,84299774,https://github.com/rust-lang/regex,2233,,regex-syntax,,https://github.com/rust-lang/regex,2022-07-05 17:59:37.238137
+''' > data/crates.csv
 
-touch .dot-file
-touch config.json
+echo -e '''checksum,crate_id,crate_size,created_at,downloads,features,id,license,links,num,published_by,updated_at,yanked
+398952a2f6cd1d22bc1774fd663808e32cf36add0280dee5cdd84a8fff2db944,2233,,2015-05-27 23:19:16.848643,1961,{},10855,MIT/Apache-2.0,,0.1.0,,2017-11-30 03:37:17.449539,f
+343bd0171ee23346506db6f4c64525de6d72f0e8cc533f83aea97f3e7488cbf9,545,,2014-12-18 06:56:46.88489,845,{},1321,MIT/Apache-2.0,,0.1.2,,2017-11-30 02:29:20.01125,f
+6e229ed392842fa93c1d76018d197b7e1b74250532bafb37b0e1d121a92d4cf7,1339,,2015-02-03 11:15:19.001762,8211,{},4371,MIT/Apache-2.0,,0.1.2,,2017-11-30 03:14:27.545115,f
+defb220c4054ca1b95fe8b0c9a6e782dda684c1bdf8694df291733ae8a3748e3,545,,2014-12-19 16:16:41.73772,1498,{},1363,MIT/Apache-2.0,,0.1.3,,2017-11-30 02:26:59.236947,f
+48a45b46c2a8c38348adb1205b13c3c5eb0174e0c0fec52cc88e9fb1de14c54d,1339,,2015-02-03 06:17:14.169972,7963,{},4362,MIT/Apache-2.0,,0.1.1,,2017-11-30 03:33:14.186028,f
+f0ff1ca641d3c9a2c30464dac30183a8b91cdcc959d616961be020cdea6255c5,545,,2014-12-13 22:10:11.329494,3204,{},1100,MIT/Apache-2.0,,0.1.0,,2017-11-30 02:51:27.240551,f
+a07bef996bd38a73c21a8e345d2c16848b41aa7ec949e2fedffe9edf74cdfb36,545,,2014-12-15 20:31:48.571836,889,{},1178,MIT/Apache-2.0,,0.1.1,,2017-11-30 03:03:20.143103,f
+''' > data/versions.csv
 
-# Init as a git repository
-git init
-git add .
-git commit -m "Init fake crates.io-index repository for tests purpose"
+echo -e '''{
+  "timestamp": "2022-08-08T02:00:27.645191645Z",
+  "crates_io_commit": "3e5f0b4d2a382ac0951898fd257f693734eadee2"
+}
+''' > metadata.json
 
-echo '{"name":"rand","vers":"0.1.1","deps":[],"cksum":"48a45b46c2a8c38348adb1205b13c3c5eb0174e0c0fec52cc88e9fb1de14c54d","features":{},"yanked":false}' > ra/nd/rand
-git add .
-git commit -m " Updating crate rand#0.1.1"
+cd ../../
+tar -czf db-dump.tar.gz -C crates.io-db-dump .
 
-echo '{"name":"rand","vers":"0.1.2","deps":[{"name":"libc","req":"^0.1.1","features":[""],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"log","req":"^0.2.1","features":[""],"optional":false,"default_features":true,"target":null,"kind":"normal"}],"cksum":"6e229ed392842fa93c1d76018d197b7e1b74250532bafb37b0e1d121a92d4cf7","features":{},"yanked":false}' >> ra/nd/rand
-git add .
-git commit -m " Updating crate rand#0.1.2"
+# A second db dump with a new entry and a different timestamp
 
-echo '{"name":"regex","vers":"0.1.0","deps":[],"cksum":"f0ff1ca641d3c9a2c30464dac30183a8b91cdcc959d616961be020cdea6255c5","features":{},"yanked":false}' > re/ge/regex
-git add .
-git commit -m " Updating crate regex#0.1.0"
+mkdir -p crates.io-db-dump_visit1
+cp -rf crates.io-db-dump/2022-08-08-020027 crates.io-db-dump_visit1/2022-09-05-020027
 
-echo '{"name":"regex","vers":"0.1.1","deps":[{"name":"regex_macros","req":"^0.1.0","features":[""],"optional":false,"default_features":true,"target":null,"kind":"dev"}],"cksum":"a07bef996bd38a73c21a8e345d2c16848b41aa7ec949e2fedffe9edf74cdfb36","features":{},"yanked":false}' >> re/ge/regex
-git add .
-git commit -m " Updating crate regex#0.1.1"
+cd crates.io-db-dump_visit1/2022-09-05-020027/
 
-echo '{"name":"regex","vers":"0.1.2","deps":[{"name":"regex_macros","req":"^0.1.0","features":[""],"optional":false,"default_features":true,"target":null,"kind":"dev"}],"cksum":"343bd0171ee23346506db6f4c64525de6d72f0e8cc533f83aea97f3e7488cbf9","features":{},"yanked":false}' >> re/ge/regex
-git add .
-git commit -m " Updating crate regex#0.1.2"
+echo -e '''{
+  "timestamp": "2022-09-05T02:00:27.687167108Z",
+  "crates_io_commit": "d3652ad81bd8bd837f2d2442ee08484ee5d4bac3"
+}
+''' > metadata.json
 
-echo '{"name":"regex","vers":"0.1.3","deps":[{"name":"regex_macros","req":"^0.1.0","features":[""],"optional":false,"default_features":true,"target":null,"kind":"dev"}],"cksum":"defb220c4054ca1b95fe8b0c9a6e782dda684c1bdf8694df291733ae8a3748e3","features":{},"yanked":false}' >> re/ge/regex
-git add .
-git commit -m " Updating crate regex#0.1.3"
+echo -e '''2019-01-08 15:11:01.560092,"A crate for safe and ergonomic pin-projection.",,48353738,,107436,,pin-project,,https://github.com/taiki-e/pin-project,2022-08-15 13:52:11.642129
+''' >> data/crates.csv
 
-echo '{"name":"regex-syntax","vers":"0.1.0","deps":[{"name":"rand","req":"^0.3","features":[""],"optional":false,"default_features":true,"target":null,"kind":"dev"},{"name":"quickcheck","req":"^0.2","features":[""],"optional":false,"default_features":true,"target":null,"kind":"dev"}],"cksum":"398952a2f6cd1d22bc1774fd663808e32cf36add0280dee5cdd84a8fff2db944","features":{},"yanked":false}' > re/ge/regex-syntax
-git add .
-git commit -m " Updating crate regex-syntax#0.1.0"
+echo -e '''ad29a609b6bcd67fee905812e544992d216af9d755757c05ed2d0e15a74c6ecc,107436,56972,2022-08-15 13:52:11.642129,580330,{},602929,Apache-2.0 OR MIT,,1.0.12,33035,2022-08-15 13:52:11.642129,f
+''' >> data/versions.csv
 
-# Save some space
-rm .git/hooks/*.sample
+cd ../../
 
-# Compress git directory as a tar.gz archive
-cd ../
-tar -cvzf fake-crates-repository.tar.gz crates.io-index
-mv fake-crates-repository.tar.gz ../
+tar -czf db-dump.tar.gz_visit1 -C crates.io-db-dump_visit1 .
+
+# Move the generated tar.gz archives to a servable directory
+mv db-dump.tar.gz ../https_static.crates.io/
+mv db-dump.tar.gz_visit1 ../https_static.crates.io/
 
 # Clean up tmp_dir
 cd ../
 rm -rf tmp_dir
diff --git a/swh/lister/crates/tests/data/https_static.crates.io/db-dump.tar.gz b/swh/lister/crates/tests/data/https_static.crates.io/db-dump.tar.gz
new file mode 100644
index 0000000..bd74c75
Binary files /dev/null and b/swh/lister/crates/tests/data/https_static.crates.io/db-dump.tar.gz differ
diff --git a/swh/lister/crates/tests/data/https_static.crates.io/db-dump.tar.gz_visit1 b/swh/lister/crates/tests/data/https_static.crates.io/db-dump.tar.gz_visit1
new file mode 100644
index 0000000..0b7dd38
Binary files /dev/null and b/swh/lister/crates/tests/data/https_static.crates.io/db-dump.tar.gz_visit1 differ
diff --git a/swh/lister/crates/tests/test_lister.py b/swh/lister/crates/tests/test_lister.py
index 8b26379..387ca26 100644
--- a/swh/lister/crates/tests/test_lister.py
+++ b/swh/lister/crates/tests/test_lister.py
@@ -1,238 +1,239 @@
 # Copyright (C) 2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
-
-from pathlib import Path
-
-from dulwich.repo import Repo
+import iso8601
 
 from swh.lister.crates.lister import CratesLister, CratesListerState
-from swh.lister.crates.tests import prepare_repository_from_archive
 
 expected_origins = [
     {
-        "url": "https://crates.io/api/v1/crates/rand",
+        "url": "https://crates.io/crates/rand",
         "artifacts": [
             {
+                "version": "0.1.1",
                 "checksums": {
                     "sha256": "48a45b46c2a8c38348adb1205b13c3c5eb0174e0c0fec52cc88e9fb1de14c54d",  # noqa: B950
                 },
                 "filename": "rand-0.1.1.crate",
                 "url": "https://static.crates.io/crates/rand/rand-0.1.1.crate",
-                "version": "0.1.1",
             },
             {
+                "version": "0.1.2",
                 "checksums": {
                     "sha256": "6e229ed392842fa93c1d76018d197b7e1b74250532bafb37b0e1d121a92d4cf7",  # noqa: B950
                 },
                 "filename": "rand-0.1.2.crate",
                 "url": "https://static.crates.io/crates/rand/rand-0.1.2.crate",
-                "version": "0.1.2",
             },
         ],
-        "metadata": [
+        "crates_metadata": [
             {
                 "version": "0.1.1",
+                "last_update": "2017-11-30 03:33:14.186028",
                 "yanked": False,
             },
             {
                 "version": "0.1.2",
+                "last_update": "2017-11-30 03:14:27.545115",
                 "yanked": False,
             },
         ],
     },
     {
-        "url": "https://crates.io/api/v1/crates/regex",
+        "url": "https://crates.io/crates/regex",
         "artifacts": [
             {
+                "version": "0.1.0",
                 "checksums": {
                     "sha256": "f0ff1ca641d3c9a2c30464dac30183a8b91cdcc959d616961be020cdea6255c5",  # noqa: B950
                 },
                 "filename": "regex-0.1.0.crate",
                 "url": "https://static.crates.io/crates/regex/regex-0.1.0.crate",
-                "version": "0.1.0",
             },
             {
+                "version": "0.1.1",
                 "checksums": {
                     "sha256": "a07bef996bd38a73c21a8e345d2c16848b41aa7ec949e2fedffe9edf74cdfb36",  # noqa: B950
                 },
                 "filename": "regex-0.1.1.crate",
                 "url": "https://static.crates.io/crates/regex/regex-0.1.1.crate",
-                "version": "0.1.1",
             },
             {
+                "version": "0.1.2",
                 "checksums": {
                     "sha256": "343bd0171ee23346506db6f4c64525de6d72f0e8cc533f83aea97f3e7488cbf9",  # noqa: B950
                 },
                 "filename": "regex-0.1.2.crate",
                 "url": "https://static.crates.io/crates/regex/regex-0.1.2.crate",
-                "version": "0.1.2",
             },
             {
+                "version": "0.1.3",
                 "checksums": {
                     "sha256": "defb220c4054ca1b95fe8b0c9a6e782dda684c1bdf8694df291733ae8a3748e3",  # noqa: B950
                 },
                 "filename": "regex-0.1.3.crate",
                 "url": "https://static.crates.io/crates/regex/regex-0.1.3.crate",
-                "version": "0.1.3",
             },
         ],
-        "metadata": [
+        "crates_metadata": [
             {
                 "version": "0.1.0",
+                "last_update": "2017-11-30 02:51:27.240551",
                 "yanked": False,
             },
             {
                 "version": "0.1.1",
+                "last_update": "2017-11-30 03:03:20.143103",
                 "yanked": False,
             },
             {
                 "version": "0.1.2",
+                "last_update": "2017-11-30 02:29:20.01125",
                 "yanked": False,
             },
             {
                 "version": "0.1.3",
+                "last_update": "2017-11-30 02:26:59.236947",
                 "yanked": False,
             },
         ],
     },
     {
-        "url": "https://crates.io/api/v1/crates/regex-syntax",
+        "url": "https://crates.io/crates/regex-syntax",
         "artifacts": [
             {
+                "version": "0.1.0",
                 "checksums": {
                     "sha256": "398952a2f6cd1d22bc1774fd663808e32cf36add0280dee5cdd84a8fff2db944",  # noqa: B950
                 },
                 "filename": "regex-syntax-0.1.0.crate",
-                "url": "https://static.crates.io/crates/regex-syntax/regex-syntax-0.1.0.crate",
-                "version": "0.1.0",
+                "url": "https://static.crates.io/crates/regex-syntax/regex-syntax-0.1.0.crate",  # noqa: B950
             },
         ],
-        "metadata": [
+        "crates_metadata": [
             {
                 "version": "0.1.0",
+                "last_update": "2017-11-30 03:37:17.449539",
                 "yanked": False,
             },
         ],
     },
 ]
 
+expected_origins_incremental = {
+    "url": "https://crates.io/crates/pin-project",
+    "artifacts": [
+        {
+            "version": "1.0.12",
+            "url": "https://static.crates.io/crates/pin-project/pin-project-1.0.12.crate",
+            "filename": "pin-project-1.0.12.crate",
+            "checksums": {
+                "sha256": "ad29a609b6bcd67fee905812e544992d216af9d755757c05ed2d0e15a74c6ecc"
+            },
+        }
+    ],
+    "crates_metadata": [
+        {
+            "version": "1.0.12",
+            "yanked": False,
+            "last_update": "2022-08-15 13:52:11.642129",
+        }
+    ],
+}
+
+
+def test_crates_lister_is_new(swh_scheduler):
+    lister = CratesLister(scheduler=swh_scheduler)
 
-expected_origins_incremental = [expected_origins[1], expected_origins[2]]
+    index_last_update_state = CratesListerState(
+        index_last_update=iso8601.parse_date("2022-08-15 13:52:11.642129")
+    )
+    lister.state = index_last_update_state
 
+    assert lister.is_new("2022-07-15 13:52:11.642129") is False  # earlier
+    assert lister.is_new("2022-08-15 13:52:11.642129") is False  # exactly equal
+    assert lister.is_new("2022-09-15 13:52:11.642129") is True  # after
 
-def test_crates_lister(datadir, tmp_path, swh_scheduler):
-    archive_path = Path(datadir, "fake-crates-repository.tar.gz")
-    repo_url = prepare_repository_from_archive(
-        archive_path, "crates.io-index", tmp_path
-    )
 
+def test_crates_lister(datadir, tmp_path, swh_scheduler, requests_mock_datadir):
     lister = CratesLister(scheduler=swh_scheduler)
-    lister.INDEX_REPOSITORY_URL = repo_url
-    lister.DESTINATION_PATH = tmp_path.parent / "crates.io-index-tests"
-
     res = lister.run()
 
     assert res.pages == 3
     assert res.origins == 3
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
     assert [
         (
             scheduled.visit_type,
             scheduled.url,
             scheduled.extra_loader_arguments["artifacts"],
+            scheduled.extra_loader_arguments["crates_metadata"],
         )
         for scheduled in sorted(scheduler_origins, key=lambda scheduled: scheduled.url)
     ] == [
         (
             "crates",
             expected["url"],
             expected["artifacts"],
+            expected["crates_metadata"],
         )
         for expected in sorted(expected_origins, key=lambda expected: expected["url"])
     ]
 
 
-def test_crates_lister_incremental(datadir, tmp_path, swh_scheduler):
-    archive_path = Path(datadir, "fake-crates-repository.tar.gz")
-    repo_url = prepare_repository_from_archive(
-        archive_path, "crates.io-index", tmp_path
-    )
-
+def test_crates_lister_incremental(
+    datadir, tmp_path, swh_scheduler, requests_mock_datadir_visits
+):
     lister = CratesLister(scheduler=swh_scheduler)
-    lister.INDEX_REPOSITORY_URL = repo_url
-    lister.DESTINATION_PATH = tmp_path.parent / "crates.io-index-tests"
-    # The lister has not run yet, get the index repository
-    lister.get_index_repository()
-    # Set a CratesListerState with a last commit value to force incremental case
-    repo = Repo(lister.DESTINATION_PATH)
-    # Lets set this last commit to third one from head
-    step = list(repo.get_walker(max_entries=3))[-1]
-    last_commit_state = CratesListerState(last_commit=step.commit.id.decode())
-    lister.state = last_commit_state
+    first = lister.run()
 
-    res = lister.run()
+    assert first.pages == 3
+    assert first.origins == 3
 
-    assert res.pages == 2
-    assert res.origins == 2
+    second = lister.run()
+
+    assert second.pages == 1
+    assert second.origins == 1
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
+    expected_origins.append(expected_origins_incremental)
+
     assert [
         (
             scheduled.visit_type,
             scheduled.url,
             scheduled.extra_loader_arguments["artifacts"],
         )
         for scheduled in sorted(scheduler_origins, key=lambda scheduled: scheduled.url)
     ] == [
         (
             "crates",
             expected["url"],
             expected["artifacts"],
         )
-        for expected in sorted(
-            expected_origins_incremental, key=lambda expected: expected["url"]
-        )
+        for expected in sorted(expected_origins, key=lambda expected: expected["url"])
     ]
 
 
-def test_crates_lister_incremental_nothing_new(datadir, tmp_path, swh_scheduler):
+def test_crates_lister_incremental_nothing_new(
+    datadir, tmp_path, swh_scheduler, requests_mock_datadir
+):
     """Ensure incremental mode runs fine when the repository last commit is the same
-    than lister.state.las-_commit"""
-    archive_path = Path(datadir, "fake-crates-repository.tar.gz")
-    repo_url = prepare_repository_from_archive(
-        archive_path, "crates.io-index", tmp_path
-    )
-
+    than lister.state.last_commit"""
     lister = CratesLister(scheduler=swh_scheduler)
-    lister.INDEX_REPOSITORY_URL = repo_url
-    lister.DESTINATION_PATH = tmp_path.parent / "crates.io-index-tests"
-    lister.get_index_repository()
-
-    repo = Repo(lister.DESTINATION_PATH)
+    lister.get_and_parse_db_dump()
 
     # Set a CratesListerState with a last commit value to force incremental case
-    last_commit_state = CratesListerState(last_commit=repo.head().decode())
-    lister.state = last_commit_state
+    index_last_update_state = CratesListerState(
+        index_last_update=iso8601.parse_date(lister.index_metadata["timestamp"])
+    )
+    lister.state = index_last_update_state
 
     res = lister.run()
 
     assert res.pages == 0
     assert res.origins == 0
-
-
-def test_crates_lister_repository_cleanup(datadir, tmp_path, swh_scheduler):
-    archive_path = Path(datadir, "fake-crates-repository.tar.gz")
-    repo_url = prepare_repository_from_archive(
-        archive_path, "crates.io-index", tmp_path
-    )
-
-    lister = CratesLister(scheduler=swh_scheduler)
-    lister.INDEX_REPOSITORY_URL = repo_url
-    lister.DESTINATION_PATH = tmp_path.parent / "crates.io-index-tests"
-
-    lister.run()
-    # Repository directory should not exists after the lister runs
-    assert not lister.DESTINATION_PATH.exists()
diff --git a/swh/lister/debian/lister.py b/swh/lister/debian/lister.py
index d31a33d..940e453 100644
--- a/swh/lister/debian/lister.py
+++ b/swh/lister/debian/lister.py
@@ -1,311 +1,276 @@
-# Copyright (C) 2017-2021 The Software Heritage developers
+# Copyright (C) 2017-2022 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-
 import bz2
 from collections import defaultdict
 from dataclasses import dataclass, field
 from email.utils import parsedate_to_datetime
 import gzip
 from itertools import product
 import logging
 import lzma
 import os
 from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple
 from urllib.parse import urljoin
 
 from debian.deb822 import Sources
-import requests
+from requests.exceptions import HTTPError
 
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
-from .. import USER_AGENT
 from ..pattern import CredentialsType, Lister
 
 logger = logging.getLogger(__name__)
 
 decompressors: Dict[str, Callable[[Any], Any]] = {
     "gz": lambda f: gzip.GzipFile(fileobj=f),
     "bz2": bz2.BZ2File,
     "xz": lzma.LZMAFile,
 }
 
 Suite = str
 Component = str
 PkgName = str
 PkgVersion = str
 DebianOrigin = str
 DebianPageType = Iterator[Sources]
 
 
 @dataclass
 class DebianListerState:
     """State of debian lister"""
 
     package_versions: Dict[PkgName, Set[PkgVersion]] = field(default_factory=dict)
     """Dictionary mapping a package name to all the versions found during
     last listing"""
 
 
 class DebianLister(Lister[DebianListerState, DebianPageType]):
     """
     List source packages for a given debian or derivative distribution.
 
     The lister will create a snapshot for each package name from all its
     available versions.
 
     If a package snapshot is different from the last listing operation,
     it will be send to the scheduler that will create a loading task
     to archive newly found source code.
 
     Args:
         scheduler: instance of SchedulerInterface
         distribution: identifier of listed distribution (e.g. Debian, Ubuntu)
         mirror_url: debian package archives mirror URL
         suites: list of distribution suites to process
         components: list of package components to process
     """
 
     LISTER_NAME = "debian"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         distribution: str = "Debian",
         mirror_url: str = "http://deb.debian.org/debian/",
         suites: List[Suite] = ["stretch", "buster", "bullseye"],
         components: List[Component] = ["main", "contrib", "non-free"],
         credentials: Optional[CredentialsType] = None,
     ):
         super().__init__(
             scheduler=scheduler,
             url=mirror_url,
             instance=distribution,
             credentials=credentials,
         )
 
         # to ensure urljoin will produce valid Sources URL
         if not self.url.endswith("/"):
             self.url += "/"
 
         self.distribution = distribution
         self.suites = suites
         self.components = components
 
-        self.session = requests.Session()
-        self.session.headers.update({"User-Agent": USER_AGENT})
-
         # will hold all listed origins info
         self.listed_origins: Dict[DebianOrigin, ListedOrigin] = {}
-        # will contain origin urls that have already been listed
-        # in a previous page
-        self.sent_origins: Set[DebianOrigin] = set()
-        # will contain already listed package info that need to be sent
-        # to the scheduler for update in the commit_page method
-        self.origins_to_update: Dict[DebianOrigin, ListedOrigin] = {}
+
         # will contain the lister state after a call to run
         self.package_versions: Dict[PkgName, Set[PkgVersion]] = {}
 
     def state_from_dict(self, d: Dict[str, Any]) -> DebianListerState:
         return DebianListerState(package_versions={k: set(v) for k, v in d.items()})
 
     def state_to_dict(self, state: DebianListerState) -> Dict[str, Any]:
         return {k: list(v) for k, v in state.package_versions.items()}
 
     def debian_index_urls(
         self, suite: Suite, component: Component
     ) -> Iterator[Tuple[str, str]]:
         """Return an iterator on possible Sources file URLs as multiple compression
         formats can be used."""
         compression_exts = ("xz", "bz2", "gz")
         base_urls = [
             urljoin(self.url, f"dists/{suite}/{component}/source/Sources"),
             urljoin(self.url, f"dists/{suite}/updates/{component}/source/Sources"),
         ]
         for base_url, ext in product(base_urls, compression_exts):
             yield (f"{base_url}.{ext}", ext)
         yield (base_url, "")
 
     def page_request(self, suite: Suite, component: Component) -> DebianPageType:
         """Return parsed package Sources file for a given debian suite and component."""
         for url, compression in self.debian_index_urls(suite, component):
-            response = requests.get(url, stream=True)
-            logging.debug("Fetched URL: %s, status code: %s", url, response.status_code)
-            if response.status_code == 200:
+            try:
+                response = self.http_request(url, stream=True)
+            except HTTPError:
+                pass
+            else:
                 last_modified = response.headers.get("Last-Modified")
                 self.last_sources_update = (
                     parsedate_to_datetime(last_modified) if last_modified else None
                 )
                 decompressor = decompressors.get(compression)
                 if decompressor:
                     data = decompressor(response.raw).readlines()
                 else:
                     data = response.raw.readlines()
                 break
         else:
             data = ""
             logger.debug("Could not retrieve sources index for %s/%s", suite, component)
 
         return Sources.iter_paragraphs(data)
 
     def get_pages(self) -> Iterator[DebianPageType]:
         """Return an iterator on parsed debian package Sources files, one per combination
         of debian suite and component."""
         for suite, component in product(self.suites, self.components):
             logger.debug(
                 "Processing %s %s source packages info for %s component.",
                 self.instance,
                 suite,
                 component,
             )
             self.current_suite = suite
             self.current_component = component
             yield self.page_request(suite, component)
 
     def origin_url_for_package(self, package_name: PkgName) -> DebianOrigin:
         """Return the origin url for the given package"""
         return f"deb://{self.instance}/packages/{package_name}"
 
     def get_origins_from_page(self, page: DebianPageType) -> Iterator[ListedOrigin]:
         """Convert a page of debian package sources into an iterator of ListedOrigin.
 
         Please note that the returned origins correspond to packages only
         listed for the first time in order to get an accurate origins counter
         in the statistics returned by the run method of the lister.
 
         Packages already listed in another page but with different versions will
         be put in cache by the method and updated ListedOrigin objects will
         be sent to the scheduler later in the commit_page method.
 
         Indeed as multiple debian suites can be processed, a similar set of
         package names can be listed for two different package source pages,
         only their version will differ, resulting in origins counted multiple
         times in lister statistics.
         """
         assert self.lister_obj.id is not None
 
         origins_to_send = {}
-        self.origins_to_update = {}
 
         # iterate on each package source info
         for src_pkg in page:
             # gather package files info that will be used by the debian loader
             files: Dict[str, Dict[str, Any]] = defaultdict(dict)
             for field_ in src_pkg._multivalued_fields:
                 if field_.startswith("checksums-"):
                     sum_name = field_[len("checksums-") :]
                 else:
                     sum_name = "md5sum"
                 if field_ in src_pkg:
                     for entry in src_pkg[field_]:
                         name = entry["name"]
                         files[name]["name"] = name
                         files[name]["size"] = int(entry["size"], 10)
                         files[name][sum_name] = entry[sum_name]
                         files[name]["uri"] = os.path.join(
                             self.url, src_pkg["Directory"], name
                         )
 
             # extract package name and version
             package_name = src_pkg["Package"]
             package_version = src_pkg["Version"]
             # build origin url
             origin_url = self.origin_url_for_package(package_name)
 
             # create package version key as expected by the debian loader
             package_version_key = (
                 f"{self.current_suite}/{self.current_component}/{package_version}"
             )
 
             # this is the first time a package is listed
             if origin_url not in self.listed_origins:
                 # create a ListedOrigin object for it that can be later
                 # updated with new package versions info
                 self.listed_origins[origin_url] = ListedOrigin(
                     lister_id=self.lister_obj.id,
                     url=origin_url,
                     visit_type="deb",
                     extra_loader_arguments={"packages": {}},
                     last_update=self.last_sources_update,
                 )
-                # origin will be yielded at the end of that method
-                origins_to_send[origin_url] = self.listed_origins[origin_url]
                 # init set that will contain all listed package versions
                 self.package_versions[package_name] = set()
 
-            # package has already been listed in a previous page or current page
-            elif origin_url not in origins_to_send:
-                # if package has been listed in a previous page, its new versions
-                # will be added to its ListedOrigin object but the update will
-                # be sent to the scheduler in the commit_page method
-                self.origins_to_update[origin_url] = self.listed_origins[origin_url]
+            # origin will be yielded at the end of that method
+            origins_to_send[origin_url] = self.listed_origins[origin_url]
 
             # update package versions data in parameter that will be provided
             # to the debian loader
             self.listed_origins[origin_url].extra_loader_arguments["packages"].update(
                 {
                     package_version_key: {
                         "name": package_name,
                         "version": package_version,
                         "files": files,
                     }
                 }
             )
 
             if self.listed_origins[origin_url].last_update is None or (
                 self.last_sources_update is not None
                 and self.last_sources_update  # type: ignore
                 > self.listed_origins[origin_url].last_update
             ):
                 # update debian package last update if current processed sources index
                 # has a greater modification date
                 self.listed_origins[origin_url].last_update = self.last_sources_update
 
             # add package version key to the set of found versions
             self.package_versions[package_name].add(package_version_key)
 
             # package has already been listed during a previous listing process
             if package_name in self.state.package_versions:
                 new_versions = (
                     self.package_versions[package_name]
                     - self.state.package_versions[package_name]
                 )
                 # no new versions so far, no need to send the origin to the scheduler
                 if not new_versions:
                     origins_to_send.pop(origin_url, None)
-                    self.origins_to_update.pop(origin_url, None)
-                # new versions found, ensure the origin will be sent to the scheduler
-                elif origin_url not in self.sent_origins:
-                    self.origins_to_update.pop(origin_url, None)
-                    origins_to_send[origin_url] = self.listed_origins[origin_url]
-
-        # update already counted origins with changes since last page
-        self.sent_origins.update(origins_to_send.keys())
 
-        logger.debug(
-            "Found %s new packages, %s packages with new versions.",
-            len(origins_to_send),
-            len(self.origins_to_update),
-        )
+        logger.debug("Found %s new packages.", len(origins_to_send))
         logger.debug(
             "Current total number of listed packages is equal to %s.",
             len(self.listed_origins),
         )
 
         yield from origins_to_send.values()
 
-    def get_origins_to_update(self) -> Iterator[ListedOrigin]:
-        yield from self.origins_to_update.values()
-
-    def commit_page(self, page: DebianPageType):
-        """Send to scheduler already listed origins where new versions have been found
-        in current page."""
-        self.send_origins(self.get_origins_to_update())
-
     def finalize(self):
         # set mapping between listed package names and versions as lister state
         self.state.package_versions = self.package_versions
-        self.updated = len(self.sent_origins) > 0
+        self.updated = len(self.listed_origins) > 0
diff --git a/swh/lister/gitea/tests/test_lister.py b/swh/lister/gitea/tests/test_lister.py
index 8e3242b..0cf59ad 100644
--- a/swh/lister/gitea/tests/test_lister.py
+++ b/swh/lister/gitea/tests/test_lister.py
@@ -1,176 +1,176 @@
 # Copyright (C) 2017-2020 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import json
 from pathlib import Path
 from typing import Dict, List, Tuple
 
 import pytest
 import requests
 from requests import HTTPError
 
 from swh.lister.gitea.lister import GiteaLister
 from swh.lister.gogs.lister import GogsListerPage
 from swh.scheduler.model import ListedOrigin
 
 TRYGITEA_URL = "https://try.gitea.io/api/v1/"
 TRYGITEA_P1_URL = TRYGITEA_URL + "repos/search?limit=3&page=1"
 TRYGITEA_P2_URL = TRYGITEA_URL + "repos/search?limit=3&page=2"
 
 
 @pytest.fixture
 def trygitea_p1(datadir) -> Tuple[str, Dict[str, str], GogsListerPage, List[str]]:
     text = Path(datadir, "https_try.gitea.io", "repos_page1").read_text()
     headers = {
         "Link": '<{p2}>; rel="next",<{p2}>; rel="last"'.format(p2=TRYGITEA_P2_URL)
     }
     page_data = json.loads(text)
     page_result = GogsListerPage(
         repos=GiteaLister.extract_repos(page_data), next_link=TRYGITEA_P2_URL
     )
     origin_urls = [r["clone_url"] for r in page_data["data"]]
     return text, headers, page_result, origin_urls
 
 
 @pytest.fixture
 def trygitea_p2(datadir) -> Tuple[str, Dict[str, str], GogsListerPage, List[str]]:
     text = Path(datadir, "https_try.gitea.io", "repos_page2").read_text()
     headers = {
         "Link": '<{p1}>; rel="prev",<{p1}>; rel="first"'.format(p1=TRYGITEA_P1_URL)
     }
     page_data = json.loads(text)
     page_result = GogsListerPage(
         repos=GiteaLister.extract_repos(page_data), next_link=None
     )
     origin_urls = [r["clone_url"] for r in page_data["data"]]
     return text, headers, page_result, origin_urls
 
 
 def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedOrigin]):
     """Asserts that the two collections have the same origin URLs.
 
     Does not test last_update."""
     assert set(lister_urls) == {origin.url for origin in scheduler_origins}
 
 
 def test_gitea_full_listing(
     swh_scheduler, requests_mock, mocker, trygitea_p1, trygitea_p2
 ):
     """Covers full listing of multiple pages, rate-limit, page size (required for test),
     checking page results and listed origins, statelessness."""
 
     kwargs = dict(url=TRYGITEA_URL, instance="try_gitea", page_size=3)
     lister = GiteaLister(scheduler=swh_scheduler, **kwargs)
 
     lister.get_origins_from_page = mocker.spy(lister, "get_origins_from_page")
 
     p1_text, p1_headers, p1_result, p1_origin_urls = trygitea_p1
     p2_text, p2_headers, p2_result, p2_origin_urls = trygitea_p2
 
     requests_mock.get(TRYGITEA_P1_URL, text=p1_text, headers=p1_headers)
     requests_mock.get(
         TRYGITEA_P2_URL,
         [
             {"status_code": requests.codes.too_many_requests},
             {"text": p2_text, "headers": p2_headers},
         ],
     )
 
     # end test setup
 
     stats = lister.run()
 
     # start test checks
 
     assert stats.pages == 2
     assert stats.origins == 6
 
     calls = [mocker.call(p1_result), mocker.call(p2_result)]
     lister.get_origins_from_page.assert_has_calls(calls)
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
 
     check_listed_origins(p1_origin_urls + p2_origin_urls, scheduler_origins)
 
     lister_state = lister.get_state_from_scheduler()
     assert lister_state.last_seen_next_link == TRYGITEA_P2_URL
     assert lister_state.last_seen_repo_id == p2_result.repos[-1]["id"]
 
 
 def test_gitea_auth_instance(swh_scheduler, requests_mock, trygitea_p1):
     """Covers token authentication, token from credentials,
     instance inference from URL."""
 
     api_token = "teapot"
     instance = "try.gitea.io"
     creds = {"gitea": {instance: [{"username": "u", "password": api_token}]}}
 
     kwargs1 = dict(url=TRYGITEA_URL, api_token=api_token)
     lister = GiteaLister(scheduler=swh_scheduler, **kwargs1)
 
     # test API token
     assert "Authorization" in lister.session.headers
     assert lister.session.headers["Authorization"].lower() == "token %s" % api_token
 
     kwargs2 = dict(url=TRYGITEA_URL, credentials=creds)
     lister = GiteaLister(scheduler=swh_scheduler, **kwargs2)
 
     # test API token from credentials
     assert "Authorization" in lister.session.headers
     assert lister.session.headers["Authorization"].lower() == "token %s" % api_token
 
     # test instance inference from URL
     assert lister.instance
     assert "gitea" in lister.instance  # infer something related to that
 
     # setup requests mocking
     p1_text, p1_headers, _, _ = trygitea_p1
     p1_headers["Link"] = p1_headers["Link"].replace("next", "")  # only 1 page
 
     base_url = TRYGITEA_URL + lister.REPO_LIST_PATH
     requests_mock.get(base_url, text=p1_text, headers=p1_headers)
 
     # now check the lister runs without error
     stats = lister.run()
 
     assert stats.pages == 1
 
 
-@pytest.mark.parametrize("http_code", [400, 500, 502])
+@pytest.mark.parametrize("http_code", [400, 500])
 def test_gitea_list_http_error(
     swh_scheduler, requests_mock, http_code, trygitea_p1, trygitea_p2
 ):
     """Test handling of some HTTP errors commonly encountered"""
 
     lister = GiteaLister(scheduler=swh_scheduler, url=TRYGITEA_URL, page_size=3)
 
     p1_text, p1_headers, _, p1_origin_urls = trygitea_p1
     p3_text, p3_headers, _, p3_origin_urls = trygitea_p2
 
     base_url = TRYGITEA_URL + lister.REPO_LIST_PATH
     requests_mock.get(
         base_url,
         [
             {"text": p1_text, "headers": p1_headers, "status_code": 200},
             {"status_code": http_code},
             {"text": p3_text, "headers": p3_headers, "status_code": 200},
         ],
     )
 
     # pages with fatal repositories should be skipped (no error raised)
     # See T4423 for more details
     if http_code == 500:
         lister.run()
     else:
         with pytest.raises(HTTPError):
             lister.run()
 
     # Both P1 and P3 origins should be listed in case of 500 error
     # While in other cases, only P1 origins should be listed
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
     check_listed_origins(
         (p1_origin_urls + p3_origin_urls) if http_code == 500 else p1_origin_urls,
         scheduler_origins,
     )
diff --git a/swh/lister/github/lister.py b/swh/lister/github/lister.py
index acef224..ae10d71 100644
--- a/swh/lister/github/lister.py
+++ b/swh/lister/github/lister.py
@@ -1,208 +1,208 @@
 # Copyright (C) 2020-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from dataclasses import asdict, dataclass
 import datetime
 import logging
 from typing import Any, Dict, Iterator, List, Optional, Set
 from urllib.parse import parse_qs, urlparse
 
 import iso8601
 
 from swh.core.github.utils import GitHubSession, MissingRateLimitReset
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
-from .. import USER_AGENT
 from ..pattern import CredentialsType, Lister
 
 logger = logging.getLogger(__name__)
 
 
 @dataclass
 class GitHubListerState:
     """State of the GitHub lister"""
 
     last_seen_id: int = 0
     """Numeric id of the last repository listed on an incremental pass"""
 
 
 class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]):
     """List origins from GitHub.
 
     By default, the lister runs in incremental mode: it lists all repositories,
     starting with the `last_seen_id` stored in the scheduler backend.
 
     Providing the `first_id` and `last_id` arguments enables the "relisting" mode: in
     that mode, the lister finds the origins present in the range **excluding**
     `first_id` and **including** `last_id`. In this mode, the lister can overrun the
     `last_id`: it will always record all the origins seen in a given page. As the lister
     is fully idempotent, this is not a practical problem. Once relisting completes, the
     lister state in the scheduler backend is not updated.
 
     When the config contains a set of credentials, we shuffle this list at the beginning
     of the listing. To follow GitHub's `abuse rate limit policy`_, we keep using the
     same token over and over again, until its rate limit runs out. Once that happens, we
     switch to the next token over in our shuffled list.
 
     When a request fails with a rate limit exception for all tokens, we pause the
     listing until the largest value for X-Ratelimit-Reset over all tokens.
 
     When the credentials aren't set in the lister config, the lister can run in
     anonymous mode too (e.g. for testing purposes).
 
     .. _abuse rate limit policy: https://developer.github.com/v3/guides/best-practices-for-integrators/#dealing-with-abuse-rate-limits
 
 
     Args:
       first_id: the id of the first repo to list
       last_id: stop listing after seeing a repo with an id higher than this value.
 
     """  # noqa: B950
 
     LISTER_NAME = "github"
 
     API_URL = "https://api.github.com/repositories"
     PAGE_SIZE = 1000
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         credentials: CredentialsType = None,
         first_id: Optional[int] = None,
         last_id: Optional[int] = None,
     ):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
             url=self.API_URL,
             instance="github",
         )
 
         self.first_id = first_id
         self.last_id = last_id
 
         self.relisting = self.first_id is not None or self.last_id is not None
 
         self.github_session = GitHubSession(
-            credentials=self.credentials, user_agent=USER_AGENT
+            credentials=self.credentials,
+            user_agent=str(self.session.headers["User-Agent"]),
         )
 
     def state_from_dict(self, d: Dict[str, Any]) -> GitHubListerState:
         return GitHubListerState(**d)
 
     def state_to_dict(self, state: GitHubListerState) -> Dict[str, Any]:
         return asdict(state)
 
     def get_pages(self) -> Iterator[List[Dict[str, Any]]]:
         current_id = 0
         if self.first_id is not None:
             current_id = self.first_id
         elif self.state is not None:
             current_id = self.state.last_seen_id
 
         current_url = f"{self.API_URL}?since={current_id}&per_page={self.PAGE_SIZE}"
 
         while self.last_id is None or current_id < self.last_id:
             logger.debug("Getting page %s", current_url)
 
             try:
                 response = self.github_session.request(current_url)
             except MissingRateLimitReset:
                 # Give up
                 break
 
             # We've successfully retrieved a (non-ratelimited) `response`. We
             # still need to check it for validity.
 
             if response.status_code != 200:
                 logger.warning(
                     "Got unexpected status_code %s: %s",
                     response.status_code,
                     response.content,
                 )
                 break
 
             yield response.json()
 
             if "next" not in response.links:
                 # No `next` link, we've reached the end of the world
                 logger.debug(
                     "No next link found in the response headers, all caught up"
                 )
                 break
 
             # GitHub strongly advises to use the next link directly. We still
             # parse it to get the id of the last repository we've reached so
             # far.
             next_url = response.links["next"]["url"]
             parsed_url = urlparse(next_url)
             if not parsed_url.query:
                 logger.warning("Failed to parse url %s", next_url)
                 break
 
             parsed_query = parse_qs(parsed_url.query)
             current_id = int(parsed_query["since"][0])
             current_url = next_url
 
     def get_origins_from_page(
         self, page: List[Dict[str, Any]]
     ) -> Iterator[ListedOrigin]:
         """Convert a page of GitHub repositories into a list of ListedOrigins.
 
         This records the html_url, as well as the pushed_at value if it exists.
         """
         assert self.lister_obj.id is not None
 
         seen_in_page: Set[str] = set()
 
         for repo in page:
             if not repo:
                 # null repositories in listings happen sometimes...
                 continue
 
             if repo["html_url"] in seen_in_page:
                 continue
             seen_in_page.add(repo["html_url"])
 
             pushed_at_str = repo.get("pushed_at")
             pushed_at: Optional[datetime.datetime] = None
             if pushed_at_str:
                 pushed_at = iso8601.parse_date(pushed_at_str)
 
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 url=repo["html_url"],
                 visit_type="git",
                 last_update=pushed_at,
             )
 
     def commit_page(self, page: List[Dict[str, Any]]):
         """Update the currently stored state using the latest listed page"""
         if self.relisting:
             # Don't update internal state when relisting
             return
 
         if not page:
             # Sometimes, when you reach the end of the world, GitHub returns an empty
             # page of repositories
             return
 
         last_id = page[-1]["id"]
 
         if last_id > self.state.last_seen_id:
             self.state.last_seen_id = last_id
 
     def finalize(self):
         if self.relisting:
             return
 
         # Pull fresh lister state from the scheduler backend
         scheduler_state = self.get_state_from_scheduler()
 
         # Update the lister state in the backend only if the last seen id of
         # the current run is higher than that stored in the database.
         if self.state.last_seen_id > scheduler_state.last_seen_id:
             self.updated = True
diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py
index 61006b0..f57b7e2 100644
--- a/swh/lister/gitlab/lister.py
+++ b/swh/lister/gitlab/lister.py
@@ -1,265 +1,260 @@
-# Copyright (C) 2018-2021 The Software Heritage developers
+# Copyright (C) 2018-2022 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from dataclasses import asdict, dataclass
 import logging
 import random
 from typing import Any, Dict, Iterator, Optional, Tuple
 from urllib.parse import parse_qs, urlencode, urlparse
 
 import iso8601
-import requests
 from requests.exceptions import HTTPError
 from requests.status_codes import codes
 from tenacity.before_sleep import before_sleep_log
 
-from swh.lister import USER_AGENT
 from swh.lister.pattern import CredentialsType, Lister
-from swh.lister.utils import is_retryable_exception, throttling_retry
+from swh.lister.utils import http_retry, is_retryable_exception
 from swh.scheduler.model import ListedOrigin
 
 logger = logging.getLogger(__name__)
 
 
 # Some instance provides hg_git type which can be ingested as hg origins
 VCS_MAPPING = {"hg_git": "hg"}
 
 
 @dataclass
 class GitLabListerState:
     """State of the GitLabLister"""
 
     last_seen_next_link: Optional[str] = None
     """Last link header (not visited yet) during an incremental pass
 
     """
 
 
 Repository = Dict[str, Any]
 
 
 @dataclass
 class PageResult:
     """Result from a query to a gitlab project api page."""
 
     repositories: Optional[Tuple[Repository, ...]] = None
     next_page: Optional[str] = None
 
 
 def _if_rate_limited(retry_state) -> bool:
     """Custom tenacity retry predicate for handling HTTP responses with status code 403
     with specific ratelimit header.
 
     """
     attempt = retry_state.outcome
     if attempt.failed:
         exc = attempt.exception()
         return (
             isinstance(exc, HTTPError)
             and exc.response.status_code == codes.forbidden
             and int(exc.response.headers.get("RateLimit-Remaining", "0")) == 0
         ) or is_retryable_exception(exc)
     return False
 
 
 def _parse_id_after(url: Optional[str]) -> Optional[int]:
     """Given an url, extract a return the 'id_after' query parameter associated value
     or None.
 
     This is the the repository id used for pagination purposes.
 
     """
     if not url:
         return None
     # link: https://${project-api}/?...&id_after=2x...
     query_data = parse_qs(urlparse(url).query)
     page = query_data.get("id_after")
     if page and len(page) > 0:
         return int(page[0])
     return None
 
 
 class GitLabLister(Lister[GitLabListerState, PageResult]):
     """List origins for a gitlab instance.
 
     By default, the lister runs in incremental mode: it lists all repositories,
     starting with the `last_seen_next_link` stored in the scheduler backend.
 
     Args:
         scheduler: a scheduler instance
         url: the api v4 url of the gitlab instance to visit (e.g.
           https://gitlab.com/api/v4/)
         instance: a specific instance name (e.g. gitlab, tor, git-kernel, ...),
             url network location will be used if not provided
         incremental: defines if incremental listing is activated or not
 
     """
 
     def __init__(
         self,
         scheduler,
         url: str,
         name: Optional[str] = "gitlab",
         instance: Optional[str] = None,
         credentials: Optional[CredentialsType] = None,
         incremental: bool = False,
     ):
         if name is not None:
             self.LISTER_NAME = name
         super().__init__(
             scheduler=scheduler,
             url=url.rstrip("/"),
             instance=instance,
             credentials=credentials,
         )
         self.incremental = incremental
         self.last_page: Optional[str] = None
         self.per_page = 100
 
-        self.session = requests.Session()
-        self.session.headers.update(
-            {"Accept": "application/json", "User-Agent": USER_AGENT}
-        )
+        self.session.headers.update({"Accept": "application/json"})
 
         if len(self.credentials) > 0:
             cred = random.choice(self.credentials)
             logger.info(
                 "Using %s credentials from user %s", self.instance, cred["username"]
             )
             api_token = cred["password"]
             if api_token:
                 self.session.headers["Authorization"] = f"Bearer {api_token}"
 
     def state_from_dict(self, d: Dict[str, Any]) -> GitLabListerState:
         return GitLabListerState(**d)
 
     def state_to_dict(self, state: GitLabListerState) -> Dict[str, Any]:
         return asdict(state)
 
-    @throttling_retry(
+    @http_retry(
         retry=_if_rate_limited, before_sleep=before_sleep_log(logger, logging.WARNING)
     )
     def get_page_result(self, url: str) -> PageResult:
         logger.debug("Fetching URL %s", url)
         response = self.session.get(url)
         if response.status_code != 200:
             logger.warning(
                 "Unexpected HTTP status code %s on %s: %s",
                 response.status_code,
                 response.url,
                 response.content,
             )
 
         # GitLab API can return errors 500 when listing projects.
         # https://gitlab.com/gitlab-org/gitlab/-/issues/262629
         # To avoid ending the listing prematurely, skip buggy URLs and move
         # to next pages.
         if response.status_code == 500:
             id_after = _parse_id_after(url)
             assert id_after is not None
             while True:
                 next_id_after = id_after + self.per_page
                 url = url.replace(f"id_after={id_after}", f"id_after={next_id_after}")
                 response = self.session.get(url)
                 if response.status_code == 200:
                     break
                 else:
                     id_after = next_id_after
         else:
             response.raise_for_status()
 
         repositories: Tuple[Repository, ...] = tuple(response.json())
         if hasattr(response, "links") and response.links.get("next"):
             next_page = response.links["next"]["url"]
         else:
             next_page = None
 
         return PageResult(repositories, next_page)
 
     def page_url(self, id_after: Optional[int] = None) -> str:
         parameters = {
             "pagination": "keyset",
             "order_by": "id",
             "sort": "asc",
             "simple": "true",
             "per_page": f"{self.per_page}",
         }
         if id_after is not None:
             parameters["id_after"] = str(id_after)
         return f"{self.url}/projects?{urlencode(parameters)}"
 
     def get_pages(self) -> Iterator[PageResult]:
         next_page: Optional[str]
         if self.incremental and self.state and self.state.last_seen_next_link:
             next_page = self.state.last_seen_next_link
         else:
             next_page = self.page_url()
 
         while next_page:
             self.last_page = next_page
             page_result = self.get_page_result(next_page)
             yield page_result
             next_page = page_result.next_page
 
     def get_origins_from_page(self, page_result: PageResult) -> Iterator[ListedOrigin]:
         assert self.lister_obj.id is not None
 
         repositories = page_result.repositories if page_result.repositories else []
         for repo in repositories:
             visit_type = repo.get("vcs_type", "git")
             visit_type = VCS_MAPPING.get(visit_type, visit_type)
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 url=repo["http_url_to_repo"],
                 visit_type=visit_type,
                 last_update=iso8601.parse_date(repo["last_activity_at"]),
             )
 
     def commit_page(self, page_result: PageResult) -> None:
         """Update currently stored state using the latest listed "next" page if relevant.
 
         Relevancy is determined by the next_page link whose 'page' id must be strictly
         superior to the currently stored one.
 
         Note: this is a noop for full listing mode
 
         """
         if self.incremental:
             # link: https://${project-api}/?...&page=2x...
             next_page = page_result.next_page
             if not next_page and self.last_page:
                 next_page = self.last_page
 
             if next_page:
                 id_after = _parse_id_after(next_page)
                 previous_next_page = self.state.last_seen_next_link
                 previous_id_after = _parse_id_after(previous_next_page)
 
                 if previous_next_page is None or (
                     previous_id_after and id_after and previous_id_after < id_after
                 ):
                     self.state.last_seen_next_link = next_page
 
     def finalize(self) -> None:
         """finalize the lister state when relevant (see `fn:commit_page` for details)
 
         Note: this is a noop for full listing mode
 
         """
         next_page = self.state.last_seen_next_link
         if self.incremental and next_page:
             # link: https://${project-api}/?...&page=2x...
             next_id_after = _parse_id_after(next_page)
             scheduler_state = self.get_state_from_scheduler()
             previous_next_id_after = _parse_id_after(
                 scheduler_state.last_seen_next_link
             )
 
             if (not previous_next_id_after and next_id_after) or (
                 previous_next_id_after
                 and next_id_after
                 and previous_next_id_after < next_id_after
             ):
                 self.updated = True
diff --git a/swh/lister/gitlab/tests/test_lister.py b/swh/lister/gitlab/tests/test_lister.py
index 80650b8..6bbffcd 100644
--- a/swh/lister/gitlab/tests/test_lister.py
+++ b/swh/lister/gitlab/tests/test_lister.py
@@ -1,357 +1,358 @@
-# Copyright (C) 2017-2021 The Software Heritage developers
+# Copyright (C) 2017-2022 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+import functools
 import json
 import logging
 from pathlib import Path
 from typing import Dict, List
 
 import pytest
 from requests.status_codes import codes
 
-from swh.lister import USER_AGENT
+from swh.lister import USER_AGENT_TEMPLATE
 from swh.lister.gitlab.lister import GitLabLister, _parse_id_after
 from swh.lister.pattern import ListerStats
 from swh.lister.tests.test_utils import assert_sleep_calls
 from swh.lister.utils import WAIT_EXP_BASE
 
 logger = logging.getLogger(__name__)
 
 
 def api_url(instance: str) -> str:
     return f"https://{instance}/api/v4/"
 
 
-def _match_request(request):
-    return request.headers.get("User-Agent") == USER_AGENT
+def _match_request(request, lister_name="gitlab"):
+    return request.headers.get("User-Agent") == USER_AGENT_TEMPLATE % lister_name
 
 
 def test_lister_gitlab(datadir, swh_scheduler, requests_mock):
     """Gitlab lister supports full listing"""
     instance = "gitlab.com"
     lister = GitLabLister(swh_scheduler, url=api_url(instance), instance=instance)
 
     response = gitlab_page_response(datadir, instance, 1)
 
     requests_mock.get(
         lister.page_url(),
         [{"json": response}],
         additional_matcher=_match_request,
     )
 
     listed_result = lister.run()
     expected_nb_origins = len(response)
     assert listed_result == ListerStats(pages=1, origins=expected_nb_origins)
 
     scheduler_origins = lister.scheduler.get_listed_origins(
         lister.lister_obj.id
     ).results
     assert len(scheduler_origins) == expected_nb_origins
 
     for listed_origin in scheduler_origins:
         assert listed_origin.visit_type == "git"
         assert listed_origin.url.startswith(f"https://{instance}")
         assert listed_origin.last_update is not None
 
 
 def test_lister_gitlab_heptapod(datadir, swh_scheduler, requests_mock):
     """Heptapod lister happily lists hg, hg_git as hg and git origins"""
     name = "heptapod"
     instance = "foss.heptapod.net"
     lister = GitLabLister(
         swh_scheduler, url=api_url(instance), name=name, instance=instance
     )
     assert lister.LISTER_NAME == name
 
     response = gitlab_page_response(datadir, instance, 1)
 
     requests_mock.get(
         lister.page_url(),
         [{"json": response}],
-        additional_matcher=_match_request,
+        additional_matcher=functools.partial(_match_request, lister_name="heptapod"),
     )
 
     listed_result = lister.run()
     expected_nb_origins = len(response)
 
     for entry in response:
         assert entry["vcs_type"] in ("hg", "hg_git")
 
     assert listed_result == ListerStats(pages=1, origins=expected_nb_origins)
 
     scheduler_origins = lister.scheduler.get_listed_origins(
         lister.lister_obj.id
     ).results
     assert len(scheduler_origins) == expected_nb_origins
 
     for listed_origin in scheduler_origins:
         assert listed_origin.visit_type == "hg"
         assert listed_origin.url.startswith(f"https://{instance}")
         assert listed_origin.last_update is not None
 
 
 def gitlab_page_response(datadir, instance: str, id_after: int) -> List[Dict]:
     """Return list of repositories (out of test dataset)"""
     datapath = Path(datadir, f"https_{instance}", f"api_response_page{id_after}.json")
     return json.loads(datapath.read_text()) if datapath.exists else []
 
 
 def test_lister_gitlab_with_pages(swh_scheduler, requests_mock, datadir):
     """Gitlab lister supports pagination"""
     instance = "gite.lirmm.fr"
     lister = GitLabLister(swh_scheduler, url=api_url(instance))
 
     response1 = gitlab_page_response(datadir, instance, 1)
     response2 = gitlab_page_response(datadir, instance, 2)
 
     requests_mock.get(
         lister.page_url(),
         [{"json": response1, "headers": {"Link": f"<{lister.page_url(2)}>; rel=next"}}],
         additional_matcher=_match_request,
     )
 
     requests_mock.get(
         lister.page_url(2),
         [{"json": response2}],
         additional_matcher=_match_request,
     )
 
     listed_result = lister.run()
 
     expected_nb_origins = len(response1) + len(response2)
     assert listed_result == ListerStats(pages=2, origins=expected_nb_origins)
 
     scheduler_origins = lister.scheduler.get_listed_origins(
         lister.lister_obj.id
     ).results
     assert len(scheduler_origins) == expected_nb_origins
 
     for listed_origin in scheduler_origins:
         assert listed_origin.visit_type == "git"
         assert listed_origin.url.startswith(f"https://{instance}")
         assert listed_origin.last_update is not None
 
 
 def test_lister_gitlab_incremental(swh_scheduler, requests_mock, datadir):
     """Gitlab lister supports incremental visits"""
     instance = "gite.lirmm.fr"
     url = api_url(instance)
     lister = GitLabLister(swh_scheduler, url=url, instance=instance, incremental=True)
 
     url_page1 = lister.page_url()
     response1 = gitlab_page_response(datadir, instance, 1)
     url_page2 = lister.page_url(2)
     response2 = gitlab_page_response(datadir, instance, 2)
     url_page3 = lister.page_url(3)
     response3 = gitlab_page_response(datadir, instance, 3)
 
     requests_mock.get(
         url_page1,
         [{"json": response1, "headers": {"Link": f"<{url_page2}>; rel=next"}}],
         additional_matcher=_match_request,
     )
     requests_mock.get(
         url_page2,
         [{"json": response2}],
         additional_matcher=_match_request,
     )
 
     listed_result = lister.run()
 
     expected_nb_origins = len(response1) + len(response2)
     assert listed_result == ListerStats(pages=2, origins=expected_nb_origins)
     assert lister.state.last_seen_next_link == url_page2
 
     lister2 = GitLabLister(swh_scheduler, url=url, instance=instance, incremental=True)
 
     # Lister will start back at the last stop
     requests_mock.get(
         url_page2,
         [{"json": response2, "headers": {"Link": f"<{url_page3}>; rel=next"}}],
         additional_matcher=_match_request,
     )
     requests_mock.get(
         url_page3,
         [{"json": response3}],
         additional_matcher=_match_request,
     )
 
     listed_result2 = lister2.run()
 
     assert listed_result2 == ListerStats(
         pages=2, origins=len(response2) + len(response3)
     )
     assert lister2.state.last_seen_next_link == url_page3
 
     assert lister.lister_obj.id == lister2.lister_obj.id
     scheduler_origins = lister2.scheduler.get_listed_origins(
         lister2.lister_obj.id
     ).results
 
     assert len(scheduler_origins) == len(response1) + len(response2) + len(response3)
 
     for listed_origin in scheduler_origins:
         assert listed_origin.visit_type == "git"
         assert listed_origin.url.startswith(f"https://{instance}")
         assert listed_origin.last_update is not None
 
 
 def test_lister_gitlab_rate_limit(swh_scheduler, requests_mock, datadir, mocker):
     """Gitlab lister supports rate-limit"""
     instance = "gite.lirmm.fr"
     url = api_url(instance)
     lister = GitLabLister(swh_scheduler, url=url, instance=instance)
 
     url_page1 = lister.page_url()
     response1 = gitlab_page_response(datadir, instance, 1)
     url_page2 = lister.page_url(2)
     response2 = gitlab_page_response(datadir, instance, 2)
 
     requests_mock.get(
         url_page1,
         [{"json": response1, "headers": {"Link": f"<{url_page2}>; rel=next"}}],
         additional_matcher=_match_request,
     )
     requests_mock.get(
         url_page2,
         [
             # rate limited twice
             {"status_code": codes.forbidden, "headers": {"RateLimit-Remaining": "0"}},
             {"status_code": codes.forbidden, "headers": {"RateLimit-Remaining": "0"}},
             # ok
             {"json": response2},
         ],
         additional_matcher=_match_request,
     )
 
     # To avoid this test being too slow, we mock sleep within the retry behavior
     mock_sleep = mocker.patch.object(lister.get_page_result.retry, "sleep")
 
     listed_result = lister.run()
 
     expected_nb_origins = len(response1) + len(response2)
     assert listed_result == ListerStats(pages=2, origins=expected_nb_origins)
 
     assert_sleep_calls(mocker, mock_sleep, [1, WAIT_EXP_BASE])
 
 
 @pytest.mark.parametrize("status_code", [502, 503, 520])
 def test_lister_gitlab_http_errors(
     swh_scheduler, requests_mock, datadir, mocker, status_code
 ):
     """Gitlab lister should retry requests when encountering HTTP 50x errors"""
     instance = "gite.lirmm.fr"
     url = api_url(instance)
     lister = GitLabLister(swh_scheduler, url=url, instance=instance)
 
     url_page1 = lister.page_url()
     response1 = gitlab_page_response(datadir, instance, 1)
     url_page2 = lister.page_url(2)
     response2 = gitlab_page_response(datadir, instance, 2)
 
     requests_mock.get(
         url_page1,
         [{"json": response1, "headers": {"Link": f"<{url_page2}>; rel=next"}}],
         additional_matcher=_match_request,
     )
     requests_mock.get(
         url_page2,
         [
             # first request ends up with error
             {"status_code": status_code},
             # second request is ok
             {"json": response2},
         ],
         additional_matcher=_match_request,
     )
 
     # To avoid this test being too slow, we mock sleep within the retry behavior
     mock_sleep = mocker.patch.object(lister.get_page_result.retry, "sleep")
 
     listed_result = lister.run()
 
     expected_nb_origins = len(response1) + len(response2)
     assert listed_result == ListerStats(pages=2, origins=expected_nb_origins)
 
     assert_sleep_calls(mocker, mock_sleep, [1])
 
 
 def test_lister_gitlab_http_error_500(swh_scheduler, requests_mock, datadir):
     """Gitlab lister should skip buggy URL and move to next page."""
     instance = "gite.lirmm.fr"
     url = api_url(instance)
     lister = GitLabLister(swh_scheduler, url=url, instance=instance)
 
     url_page1 = lister.page_url()
     response1 = gitlab_page_response(datadir, instance, 1)
     url_page2 = lister.page_url(lister.per_page)
     url_page3 = lister.page_url(2 * lister.per_page)
     response3 = gitlab_page_response(datadir, instance, 3)
 
     requests_mock.get(
         url_page1,
         [{"json": response1, "headers": {"Link": f"<{url_page2}>; rel=next"}}],
         additional_matcher=_match_request,
     )
     requests_mock.get(
         url_page2,
         [
             {"status_code": 500},
         ],
         additional_matcher=_match_request,
     )
 
     requests_mock.get(
         url_page3,
         [{"json": response3}],
         additional_matcher=_match_request,
     )
 
     listed_result = lister.run()
 
     expected_nb_origins = len(response1) + len(response3)
     assert listed_result == ListerStats(pages=2, origins=expected_nb_origins)
 
 
 def test_lister_gitlab_credentials(swh_scheduler):
     """Gitlab lister supports credentials configuration"""
     instance = "gitlab"
     credentials = {
         "gitlab": {instance: [{"username": "user", "password": "api-token"}]}
     }
     url = api_url(instance)
     lister = GitLabLister(
         scheduler=swh_scheduler, url=url, instance=instance, credentials=credentials
     )
     assert lister.session.headers["Authorization"] == "Bearer api-token"
 
 
 @pytest.mark.parametrize(
     "url",
     [
         api_url("gitlab").rstrip("/"),
         api_url("gitlab"),
     ],
 )
 def test_lister_gitlab_url_computation(url, swh_scheduler):
     lister = GitLabLister(scheduler=swh_scheduler, url=url)
     assert not lister.url.endswith("/")
 
     page_url = lister.page_url()
     # ensure the generated url contains the separated /
     assert page_url.startswith(f"{lister.url}/projects")
 
 
 @pytest.mark.parametrize(
     "url,expected_result",
     [
         (None, None),
         ("http://dummy/?query=1", None),
         ("http://dummy/?foo=bar&id_after=1&some=result", 1),
         ("http://dummy/?foo=bar&id_after=&some=result", None),
     ],
 )
 def test__parse_id_after(url, expected_result):
     assert _parse_id_after(url) == expected_result
diff --git a/swh/lister/gnu/tree.py b/swh/lister/gnu/tree.py
index f414ef3..ec48cf0 100644
--- a/swh/lister/gnu/tree.py
+++ b/swh/lister/gnu/tree.py
@@ -1,332 +1,319 @@
-# Copyright (C) 2019-2021 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from datetime import datetime, timezone
 import gzip
 import json
 import logging
 from os import path
 from pathlib import Path
 import re
 from typing import Any, List, Mapping, Sequence, Tuple
 from urllib.parse import urlparse
 
 import requests
 
+from swh.lister import TARBALL_EXTENSIONS
+
 logger = logging.getLogger(__name__)
 
 
 class GNUTree:
     """Gnu Tree's representation"""
 
     def __init__(self, url: str):
         self.url = url  # filepath or uri
         u = urlparse(url)
         self.base_url = "%s://%s" % (u.scheme, u.netloc)
         # Interesting top level directories
         self.top_level_directories = ["gnu", "old-gnu"]
         # internal state
         self._artifacts = {}  # type: Mapping[str, Any]
         self._projects = {}  # type: Mapping[str, Any]
 
     @property
     def projects(self) -> Mapping[str, Any]:
         if not self._projects:
             self._projects, self._artifacts = self._load()
         return self._projects
 
     @property
     def artifacts(self) -> Mapping[str, Any]:
         if not self._artifacts:
             self._projects, self._artifacts = self._load()
         return self._artifacts
 
     def _load(self) -> Tuple[Mapping[str, Any], Mapping[str, Any]]:
         """Compute projects and artifacts per project
 
         Returns:
             Tuple of dict projects (key project url, value the associated
             information) and a dict artifacts (key project url, value the
             info_file list)
 
         """
         projects = {}
         artifacts = {}
 
         raw_data = load_raw_data(self.url)[0]
         for directory in raw_data["contents"]:
             if directory["name"] not in self.top_level_directories:
                 continue
             infos = directory["contents"]
             for info in infos:
                 if info["type"] == "directory":
                     package_url = "%s/%s/%s/" % (
                         self.base_url,
                         directory["name"],
                         info["name"],
                     )
                     package_artifacts = find_artifacts(info["contents"], package_url)
                     if package_artifacts != []:
                         repo_details = {
                             "name": info["name"],
                             "url": package_url,
                             "time_modified": format_date(info["time"]),
                         }
                         artifacts[package_url] = package_artifacts
                         projects[package_url] = repo_details
 
         return projects, artifacts
 
 
 def find_artifacts(
     filesystem: List[Mapping[str, Any]], url: str
 ) -> List[Mapping[str, Any]]:
     """Recursively list artifacts present in the folder and subfolders for a
     particular package url.
 
     Args:
 
         filesystem: File structure of the package root directory. This is a
             list of Dict representing either file or directory information as
             dict (keys: name, size, time, type).
         url: URL of the corresponding package
 
     Returns
         List of tarball urls and their associated metadata (time, length,
         etc...). For example:
 
         .. code-block:: python
 
             [
                 {
                     'url': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.3.tar.gz',
                     'time': 1071002600,
                     'filename': '3DLDF-1.1.3.tar.gz',
                     'version': '1.1.3',
                     'length': 543
                 },
                 {
                     'url': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.4.tar.gz',
                     'time': 1071078759,
                     'filename: '3DLDF-1.1.4.tar.gz',
                     'version': '1.1.4',
                     'length': 456
                 },
                 {
                     'url': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.5.tar.gz',
                     'time': 1074278633,
                     'filename': '3DLDF-1.1.5.tar.gz',
                     'version': '1.1.5'
                     'length': 251
                 },
                 ...
             ]
 
     """
     artifacts = []  # type: List[Mapping[str, Any]]
     for info_file in filesystem:
         filetype = info_file["type"]
         filename = info_file["name"]
         if filetype == "file":
             if check_filename_is_archive(filename):
                 uri = url + filename
                 artifacts.append(
                     {
                         "url": uri,
                         "filename": filename,
                         "time": format_date(info_file["time"]),
                         "length": int(info_file["size"]),
                         "version": get_version(filename),
                     }
                 )
         # It will recursively check for artifacts in all sub-folders
         elif filetype == "directory":
             tarballs_in_dir = find_artifacts(
                 info_file["contents"], url + filename + "/"
             )
             artifacts.extend(tarballs_in_dir)
 
     return artifacts
 
 
 def check_filename_is_archive(filename: str) -> bool:
     """
     Check for the extension of the file, if the file is of zip format of
     .tar.x format, where x could be anything, then returns true.
 
     Args:
         filename: name of the file for which the extensions is needs to
             be checked.
 
     Returns:
         Whether filename is an archive or not
 
     Example:
 
     >>> check_filename_is_archive('abc.zip')
     True
     >>> check_filename_is_archive('abc.tar.gz')
     True
     >>> check_filename_is_archive('bac.tar')
     True
     >>> check_filename_is_archive('abc.tar.gz.sig')
     False
     >>> check_filename_is_archive('foobar.tar.')
     False
 
     """
     file_suffixes = Path(filename).suffixes
     if len(file_suffixes) == 1 and file_suffixes[-1] in (".zip", ".tar"):
         return True
     elif len(file_suffixes) > 1:
         if file_suffixes[-1] == ".zip" or file_suffixes[-2] == ".tar":
             return True
     return False
 
 
-# to recognize existing naming pattern
-EXTENSIONS = [
-    "zip",
-    "tar",
-    "gz",
-    "tgz",
-    "bz2",
-    "bzip2",
-    "lzma",
-    "lz",
-    "xz",
-    "Z",
-    "7z",
-]
-
 VERSION_KEYWORDS = [
     "cygwin_me",
     "w32",
     "win32",
     "nt",
     "cygwin",
     "mingw",
     "latest",
     "alpha",
     "beta",
     "release",
     "stable",
     "hppa",
     "solaris",
     "sunos",
     "sun4u",
     "sparc",
     "sun",
     "aix",
     "ibm",
     "rs6000",
     "i386",
     "i686",
     "linux",
     "redhat",
     "linuxlibc",
     "mips",
     "powerpc",
     "macos",
     "apple",
     "darwin",
     "macosx",
     "powermacintosh",
     "unknown",
     "netbsd",
     "freebsd",
     "sgi",
     "irix",
 ]
 
 # Match a filename into components.
 #
 # We use Debian's release number heuristic: A release number starts
 # with a digit, and is followed by alphanumeric characters or any of
 # ., +, :, ~ and -
 #
 # We hardcode a list of possible extensions, as this release number
 # scheme would match them too... We match on any combination of those.
 #
 # Greedy matching is done right to left (we only match the extension
 # greedily with +, software_name and release_number are matched lazily
 # with +? and *?).
 
 PATTERN = r"""
 ^
 (?:
     # We have a software name and a release number, separated with a
     # -, _ or dot.
     (?P<software_name1>.+?[-_.])
     (?P<release_number>({vkeywords}|[0-9][0-9a-zA-Z_.+:~-]*?)+)
 |
     # We couldn't match a release number, put everything in the
     # software name.
     (?P<software_name2>.+?)
 )
 (?P<extension>(?:\.(?:{extensions}))+)
 $
 """.format(
-    extensions="|".join(EXTENSIONS),
+    extensions="|".join(TARBALL_EXTENSIONS),
     vkeywords="|".join("%s[-]?" % k for k in VERSION_KEYWORDS),
 )
 
 
 def get_version(uri: str) -> str:
     """Extract branch name from tarball uri
 
     Args:
         uri (str): Tarball URI
 
     Returns:
         Version detected
 
     Example:
         >>> uri = 'https://ftp.gnu.org/gnu/8sync/8sync-0.2.0.tar.gz'
         >>> get_version(uri)
         '0.2.0'
 
         >>> uri = '8sync-0.3.0.tar.gz'
         >>> get_version(uri)
         '0.3.0'
 
     """
     filename = path.split(uri)[-1]
     m = re.match(PATTERN, filename, flags=re.VERBOSE | re.IGNORECASE)
     if m:
         d = m.groupdict()
         if d["software_name1"] and d["release_number"]:
             return d["release_number"]
         if d["software_name2"]:
             return d["software_name2"]
 
     return ""
 
 
 def load_raw_data(url: str) -> Sequence[Mapping]:
     """Load the raw json from the tree.json.gz
 
     Args:
         url: Tree.json.gz url or path
 
     Returns:
         The raw json list
 
     """
     if url.startswith("http://") or url.startswith("https://"):
         response = requests.get(url, allow_redirects=True)
         if not response.ok:
             raise ValueError("Error during query to %s" % url)
         raw = gzip.decompress(response.content)
     else:
         with gzip.open(url, "r") as f:
             raw = f.read()
     raw_data = json.loads(raw.decode("utf-8"))
     return raw_data
 
 
 def format_date(timestamp: str) -> str:
     """Format a string timestamp to an isoformat string"""
     return datetime.fromtimestamp(int(timestamp), tz=timezone.utc).isoformat()
diff --git a/swh/lister/gogs/lister.py b/swh/lister/gogs/lister.py
index 16d9626..f87100d 100644
--- a/swh/lister/gogs/lister.py
+++ b/swh/lister/gogs/lister.py
@@ -1,220 +1,206 @@
 # Copyright (C) 2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
+
 from dataclasses import asdict, dataclass
 import logging
 import random
 from typing import Any, Dict, Iterator, List, Optional, Tuple
 from urllib.parse import parse_qs, parse_qsl, urlencode, urljoin, urlparse
 
 import iso8601
-import requests
-from tenacity.before_sleep import before_sleep_log
+from requests.exceptions import HTTPError
 
-from swh.lister.utils import throttling_retry
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
-from .. import USER_AGENT
 from ..pattern import CredentialsType, Lister
 
 logger = logging.getLogger(__name__)
 
 Repo = Dict[str, Any]
 
 
 @dataclass
 class GogsListerPage:
     repos: Optional[List[Repo]] = None
     next_link: Optional[str] = None
 
 
 @dataclass
 class GogsListerState:
     last_seen_next_link: Optional[str] = None
     """Last link header (could be already visited) during an incremental pass."""
     last_seen_repo_id: Optional[int] = None
     """Last repo id seen during an incremental pass."""
 
 
 def _parse_page_id(url: Optional[str]) -> int:
     """Parse the page id from a Gogs page url."""
     if url is None:
         return 0
 
     return int(parse_qs(urlparse(url).query)["page"][0])
 
 
 class GogsLister(Lister[GogsListerState, GogsListerPage]):
 
     """List origins from the Gogs
 
     Gogs API documentation: https://github.com/gogs/docs-api
 
     The API is protected behind authentication so credentials/API tokens
     are mandatory. It supports pagination and provides next page URL
     through the 'next' value of the 'Link' header. The default value for
     page size ('limit') is 10 but the maximum allowed value is 50.
     """
 
     LISTER_NAME = "gogs"
 
     VISIT_TYPE = "git"
 
     REPO_LIST_PATH = "repos/search"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         url: str,
         instance: Optional[str] = None,
         api_token: Optional[str] = None,
         page_size: int = 50,
         credentials: CredentialsType = None,
     ):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
             url=url,
             instance=instance,
         )
 
         self.query_params = {
             "limit": page_size,
         }
 
         self.api_token = api_token
         if self.api_token is None:
 
             if len(self.credentials) > 0:
                 cred = random.choice(self.credentials)
                 username = cred.get("username")
                 self.api_token = cred["password"]
                 logger.info("Using authentication credentials from user %s", username)
             else:
                 # Raises an error on Gogs, or a warning on Gitea
                 self.on_anonymous_mode()
 
-        self.session = requests.Session()
-        self.session.headers.update(
-            {
-                "Accept": "application/json",
-                "User-Agent": USER_AGENT,
-            }
-        )
+        self.session.headers.update({"Accept": "application/json"})
 
         if self.api_token:
             self.session.headers["Authorization"] = f"token {self.api_token}"
 
     def on_anonymous_mode(self):
         raise ValueError("No credentials or API token provided")
 
     def state_from_dict(self, d: Dict[str, Any]) -> GogsListerState:
         return GogsListerState(**d)
 
     def state_to_dict(self, state: GogsListerState) -> Dict[str, Any]:
         return asdict(state)
 
-    @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
     def page_request(
         self, url: str, params: Dict[str, Any]
     ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
 
         logger.debug("Fetching URL %s with params %s", url, params)
 
-        response = self.session.get(url, params=params)
-
-        if response.status_code != 200:
-            logger.warning(
-                "Unexpected HTTP status code %s on %s: %s",
-                response.status_code,
-                response.url,
-                response.content,
-            )
-        if (
-            response.status_code == 500
-        ):  # Temporary hack for skipping fatal repos (T4423)
-            url_parts = urlparse(url)
-            query: Dict[str, Any] = dict(parse_qsl(url_parts.query))
-            query.update({"page": _parse_page_id(url) + 1})
-            next_page_link = url_parts._replace(query=urlencode(query)).geturl()
-            body: Dict[str, Any] = {"data": []}
-            links = {"next": {"url": next_page_link}}
-            return body, links
-        else:
-            response.raise_for_status()
+        try:
+            response = self.http_request(url, params=params)
+        except HTTPError as http_error:
+            if (
+                http_error.response.status_code == 500
+            ):  # Temporary hack for skipping fatal repos (T4423)
+                url_parts = urlparse(url)
+                query: Dict[str, Any] = dict(parse_qsl(url_parts.query))
+                query.update({"page": _parse_page_id(url) + 1})
+                next_page_link = url_parts._replace(query=urlencode(query)).geturl()
+                body: Dict[str, Any] = {"data": []}
+                links = {"next": {"url": next_page_link}}
+                return body, links
+            else:
+                raise
 
         return response.json(), response.links
 
     @classmethod
     def extract_repos(cls, body: Dict[str, Any]) -> List[Repo]:
         fields_filter = ["id", "clone_url", "updated_at"]
         return [{k: r[k] for k in fields_filter} for r in body["data"]]
 
     def get_pages(self) -> Iterator[GogsListerPage]:
         page_id = 1
         if self.state.last_seen_next_link is not None:
             page_id = _parse_page_id(self.state.last_seen_next_link)
 
         # base with trailing slash, path without leading slash for urljoin
         next_link: Optional[str] = urljoin(self.url, self.REPO_LIST_PATH)
+        assert next_link is not None
 
         body, links = self.page_request(
             next_link, {**self.query_params, "page": page_id}
         )
 
         while next_link is not None:
             repos = self.extract_repos(body)
 
             assert len(links) > 0, "API changed: no Link header found"
             if "next" in links:
                 next_link = links["next"]["url"]
             else:
                 next_link = None  # Happens for the last page
 
             yield GogsListerPage(repos=repos, next_link=next_link)
 
             if next_link is not None:
                 body, links = self.page_request(next_link, {})
 
     def get_origins_from_page(self, page: GogsListerPage) -> Iterator[ListedOrigin]:
         """Convert a page of Gogs repositories into a list of ListedOrigins"""
         assert self.lister_obj.id is not None
         assert page.repos is not None
 
         for r in page.repos:
             last_update = iso8601.parse_date(r["updated_at"])
 
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 visit_type=self.VISIT_TYPE,
                 url=r["clone_url"],
                 last_update=last_update,
             )
 
     def commit_page(self, page: GogsListerPage) -> None:
         last_seen_next_link = page.next_link
 
         page_id = _parse_page_id(last_seen_next_link)
         state_page_id = _parse_page_id(self.state.last_seen_next_link)
 
         if page_id > state_page_id:
             self.state.last_seen_next_link = last_seen_next_link
 
         if (page.repos is not None) and len(page.repos) > 0:
             self.state.last_seen_repo_id = page.repos[-1]["id"]
 
     def finalize(self) -> None:
         scheduler_state = self.get_state_from_scheduler()
 
         state_page_id = _parse_page_id(self.state.last_seen_next_link)
         scheduler_page_id = _parse_page_id(scheduler_state.last_seen_next_link)
 
         state_last_repo_id = self.state.last_seen_repo_id or 0
         scheduler_last_repo_id = scheduler_state.last_seen_repo_id or 0
 
         if (state_page_id >= scheduler_page_id) and (
             state_last_repo_id > scheduler_last_repo_id
         ):
             self.updated = True  # Marked updated only if it finds new repos
diff --git a/swh/lister/gogs/tests/test_lister.py b/swh/lister/gogs/tests/test_lister.py
index bcac533..4f9e370 100644
--- a/swh/lister/gogs/tests/test_lister.py
+++ b/swh/lister/gogs/tests/test_lister.py
@@ -1,330 +1,330 @@
 # Copyright (C) 2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import json
 from pathlib import Path
 from typing import List
 from unittest.mock import Mock
 
 import pytest
 from requests import HTTPError
 
 from swh.lister.gogs.lister import GogsLister, GogsListerPage, _parse_page_id
 from swh.scheduler.model import ListedOrigin
 
 TRY_GOGS_URL = "https://try.gogs.io/api/v1/"
 
 
 def try_gogs_page(n: int):
     return TRY_GOGS_URL + GogsLister.REPO_LIST_PATH + f"?page={n}&limit=3"
 
 
 P1 = try_gogs_page(1)
 P2 = try_gogs_page(2)
 P3 = try_gogs_page(3)
 P4 = try_gogs_page(4)
 
 
 @pytest.fixture
 def trygogs_p1(datadir):
     text = Path(datadir, "https_try.gogs.io", "repos_page1").read_text()
     headers = {"Link": f'<{P2}>; rel="next"'}
     page_result = GogsListerPage(
         repos=GogsLister.extract_repos(json.loads(text)), next_link=P2
     )
     origin_urls = [r["clone_url"] for r in page_result.repos]
     return text, headers, page_result, origin_urls
 
 
 @pytest.fixture
 def trygogs_p2(datadir):
     text = Path(datadir, "https_try.gogs.io", "repos_page2").read_text()
     headers = {"Link": f'<{P3}>; rel="next",<{P1}>; rel="prev"'}
     page_result = GogsListerPage(
         repos=GogsLister.extract_repos(json.loads(text)), next_link=P3
     )
     origin_urls = [r["clone_url"] for r in page_result.repos]
     return text, headers, page_result, origin_urls
 
 
 @pytest.fixture
 def trygogs_p3(datadir):
     text = Path(datadir, "https_try.gogs.io", "repos_page3").read_text()
     headers = {"Link": f'<{P4}>; rel="next",<{P2}>; rel="prev"'}
     page_result = GogsListerPage(
         repos=GogsLister.extract_repos(json.loads(text)), next_link=P3
     )
     origin_urls = [r["clone_url"] for r in page_result.repos]
     return text, headers, page_result, origin_urls
 
 
 @pytest.fixture
 def trygogs_p4(datadir):
     text = Path(datadir, "https_try.gogs.io", "repos_page4").read_text()
     headers = {"Link": f'<{P3}>; rel="prev"'}
     page_result = GogsListerPage(
         repos=GogsLister.extract_repos(json.loads(text)), next_link=P3
     )
     origin_urls = [r["clone_url"] for r in page_result.repos]
     return text, headers, page_result, origin_urls
 
 
 @pytest.fixture
 def trygogs_p3_last(datadir):
     text = Path(datadir, "https_try.gogs.io", "repos_page3").read_text()
     headers = {"Link": f'<{P2}>; rel="prev",<{P1}>; rel="first"'}
     page_result = GogsListerPage(
         repos=GogsLister.extract_repos(json.loads(text)), next_link=None
     )
     origin_urls = [r["clone_url"] for r in page_result.repos]
     return text, headers, page_result, origin_urls
 
 
 @pytest.fixture
 def trygogs_p3_empty():
     origins_urls = []
     body = {"data": [], "ok": True}
     headers = {"Link": f'<{P2}>; rel="prev",<{P1}>; rel="first"'}
     page_result = GogsListerPage(repos=GogsLister.extract_repos(body), next_link=None)
     text = json.dumps(body)
     return text, headers, page_result, origins_urls
 
 
 def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedOrigin]):
     """Asserts that the two collections have the same origin URLs.
 
     Does not test last_update."""
     assert set(lister_urls) == {origin.url for origin in scheduler_origins}
 
 
 def test_gogs_full_listing(
     swh_scheduler, requests_mock, mocker, trygogs_p1, trygogs_p2, trygogs_p3_last
 ):
     kwargs = dict(
         url=TRY_GOGS_URL, instance="try_gogs", page_size=3, api_token="secret"
     )
     lister = GogsLister(scheduler=swh_scheduler, **kwargs)
 
     lister.get_origins_from_page: Mock = mocker.spy(lister, "get_origins_from_page")
 
     p1_text, p1_headers, p1_result, p1_origin_urls = trygogs_p1
     p2_text, p2_headers, p2_result, p2_origin_urls = trygogs_p2
     p3_text, p3_headers, p3_result, p3_origin_urls = trygogs_p3_last
 
     requests_mock.get(P1, text=p1_text, headers=p1_headers)
     requests_mock.get(P2, text=p2_text, headers=p2_headers)
     requests_mock.get(P3, text=p3_text, headers=p3_headers)
 
     stats = lister.run()
 
     assert stats.pages == 3
     assert stats.origins == 9
 
     calls = map(mocker.call, [p1_result, p2_result, p3_result])
     lister.get_origins_from_page.assert_has_calls(list(calls))
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
 
     check_listed_origins(
         p1_origin_urls + p2_origin_urls + p3_origin_urls, scheduler_origins
     )
 
     assert (
         lister.get_state_from_scheduler().last_seen_next_link == P3
     )  # P3 didn't provide any next link so it remains the last_seen_next_link
 
 
 def test_gogs_auth_instance(
     swh_scheduler, requests_mock, trygogs_p1, trygogs_p2, trygogs_p3_empty
 ):
     """Covers token authentication, token from credentials,
     instance inference from URL."""
 
     api_token = "secret"
     instance = "try_gogs"
 
     # Test lister initialization without api_token or credentials:
     with pytest.raises(ValueError, match="No credentials or API token provided"):
         kwargs1 = dict(url=TRY_GOGS_URL, instance=instance)
         GogsLister(scheduler=swh_scheduler, **kwargs1)
 
     # Test lister initialization using api_token:
     kwargs2 = dict(url=TRY_GOGS_URL, api_token=api_token, instance=instance)
     lister = GogsLister(scheduler=swh_scheduler, **kwargs2)
     assert lister.session.headers["Authorization"].lower() == "token %s" % api_token
 
     # Test lister initialization with credentials and run it:
     creds = {"gogs": {instance: [{"username": "u", "password": api_token}]}}
     kwargs3 = dict(url=TRY_GOGS_URL, credentials=creds, instance=instance, page_size=3)
     lister = GogsLister(scheduler=swh_scheduler, **kwargs3)
     assert lister.session.headers["Authorization"].lower() == "token %s" % api_token
     assert lister.instance == "try_gogs"
 
     # setup requests mocking
     p1_text, p1_headers, _, _ = trygogs_p1
     p2_text, p2_headers, _, _ = trygogs_p2
     p3_text, p3_headers, _, _ = trygogs_p3_empty
 
     requests_mock.get(P1, text=p1_text, headers=p1_headers)
     requests_mock.get(P2, text=p2_text, headers=p2_headers)
     requests_mock.get(P3, text=p3_text, headers=p3_headers)
 
     # lister should run without any error and extract the origins
     stats = lister.run()
     assert stats.pages == 3
     assert stats.origins == 6
 
 
-@pytest.mark.parametrize("http_code", [400, 500, 502])
+@pytest.mark.parametrize("http_code", [400, 500])
 def test_gogs_list_http_error(
     swh_scheduler, requests_mock, http_code, trygogs_p1, trygogs_p3_last
 ):
     """Test handling of some HTTP errors commonly encountered"""
 
     lister = GogsLister(scheduler=swh_scheduler, url=TRY_GOGS_URL, api_token="secret")
 
     p1_text, p1_headers, _, p1_origin_urls = trygogs_p1
     p3_text, p3_headers, _, p3_origin_urls = trygogs_p3_last
 
     base_url = TRY_GOGS_URL + lister.REPO_LIST_PATH
     requests_mock.get(
         base_url,
         [
             {"text": p1_text, "headers": p1_headers, "status_code": 200},
             {"status_code": http_code},
             {"text": p3_text, "headers": p3_headers, "status_code": 200},
         ],
     )
 
     # pages with fatal repositories should be skipped (no error raised)
     # See T4423 for more details
     if http_code == 500:
         lister.run()
     else:
         with pytest.raises(HTTPError):
             lister.run()
 
     # Both P1 and P3 origins should be listed in case of 500 error
     # While in other cases, only P1 origins should be listed
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
     check_listed_origins(
         (p1_origin_urls + p3_origin_urls) if http_code == 500 else p1_origin_urls,
         scheduler_origins,
     )
 
 
 def test_gogs_incremental_lister(
     swh_scheduler,
     requests_mock,
     mocker,
     trygogs_p1,
     trygogs_p2,
     trygogs_p3,
     trygogs_p3_last,
     trygogs_p3_empty,
     trygogs_p4,
 ):
     kwargs = dict(
         url=TRY_GOGS_URL, instance="try_gogs", page_size=3, api_token="secret"
     )
     lister = GogsLister(scheduler=swh_scheduler, **kwargs)
 
     lister.get_origins_from_page: Mock = mocker.spy(lister, "get_origins_from_page")
 
     # First listing attempt: P1 and P2 return 3 origins each
     # while P3 (current last page) is empty.
 
     p1_text, p1_headers, p1_result, p1_origin_urls = trygogs_p1
     p2_text, p2_headers, p2_result, p2_origin_urls = trygogs_p2
     p3_text, p3_headers, p3_result, p3_origin_urls = trygogs_p3_empty
 
     requests_mock.get(P1, text=p1_text, headers=p1_headers)
     requests_mock.get(P2, text=p2_text, headers=p2_headers)
     requests_mock.get(P3, text=p3_text, headers=p3_headers)
 
     attempt1_stats = lister.run()
     assert attempt1_stats.pages == 3
     assert attempt1_stats.origins == 6
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
 
     lister_state = lister.get_state_from_scheduler()
     assert lister_state.last_seen_next_link == P3
     assert lister_state.last_seen_repo_id == p2_result.repos[-1]["id"]
     assert lister.updated
 
     check_listed_origins(p1_origin_urls + p2_origin_urls, scheduler_origins)
 
     lister.updated = False  # Reset the flag
 
     # Second listing attempt: P3 isn't empty anymore.
     # The lister should restart from last state and hence revisit P3.
     p3_text, p3_headers, p3_result, p3_origin_urls = trygogs_p3_last
     requests_mock.get(P3, text=p3_text, headers=p3_headers)
 
-    lister.session.get = mocker.spy(lister.session, "get")
+    lister.session.request = mocker.spy(lister.session, "request")
 
     attempt2_stats = lister.run()
 
     assert attempt2_stats.pages == 1
     assert attempt2_stats.origins == 3
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
 
     page_id = _parse_page_id(lister_state.last_seen_next_link)
     query_params = lister.query_params
     query_params["page"] = page_id
 
-    lister.session.get.assert_called_once_with(
-        TRY_GOGS_URL + lister.REPO_LIST_PATH, params=query_params
+    lister.session.request.assert_called_once_with(
+        "GET", TRY_GOGS_URL + lister.REPO_LIST_PATH, params=query_params
     )
 
     # All the 9 origins (3 pages) should be passed on to the scheduler:
     check_listed_origins(
         p1_origin_urls + p2_origin_urls + p3_origin_urls, scheduler_origins
     )
     lister_state = lister.get_state_from_scheduler()
     assert lister_state.last_seen_next_link == P3
     assert lister_state.last_seen_repo_id == p3_result.repos[-1]["id"]
     assert lister.updated
 
     lister.updated = False  # Reset the flag
 
     # Third listing attempt: No new origins
     # The lister should revisit last seen page (P3)
     attempt3_stats = lister.run()
 
     assert attempt3_stats.pages == 1
     assert attempt3_stats.origins == 3
 
     lister_state = lister.get_state_from_scheduler()
     assert lister_state.last_seen_next_link == P3
     assert lister_state.last_seen_repo_id == p3_result.repos[-1]["id"]
     assert lister.updated is False  # No new origins so state isn't updated.
 
     # Fourth listing attempt: Page 4 is introduced and returns 3 new origins
     # The lister should revisit last seen page (P3) as well as P4.
     p3_text, p3_headers, p3_result, p3_origin_urls = trygogs_p3  # new P3 points to P4
     p4_text, p4_headers, p4_result, p4_origin_urls = trygogs_p4
 
     requests_mock.get(P3, text=p3_text, headers=p3_headers)
     requests_mock.get(P4, text=p4_text, headers=p4_headers)
 
     attempt4_stats = lister.run()
 
     assert attempt4_stats.pages == 2
     assert attempt4_stats.origins == 6
 
     lister_state = lister.get_state_from_scheduler()
     assert lister_state.last_seen_next_link == P4
     assert lister_state.last_seen_repo_id == p4_result.repos[-1]["id"]
     assert lister.updated
 
     # All the 12 origins (4 pages) should be passed on to the scheduler:
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
     check_listed_origins(
         p1_origin_urls + p2_origin_urls + p3_origin_urls + p4_origin_urls,
         scheduler_origins,
     )
diff --git a/swh/lister/golang/lister.py b/swh/lister/golang/lister.py
index 0a2f141..10e5935 100644
--- a/swh/lister/golang/lister.py
+++ b/swh/lister/golang/lister.py
@@ -1,188 +1,164 @@
 # Copyright (C) 2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from dataclasses import dataclass
 from datetime import datetime
 import json
 import logging
 from typing import Any, Dict, Iterator, List, Optional, Tuple
 
 import iso8601
-import requests
-from tenacity import before_sleep_log
 
-from swh.lister.utils import retry_policy_generic, throttling_retry
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
-from .. import USER_AGENT
 from ..pattern import CredentialsType, Lister
 
 logger = logging.getLogger(__name__)
 
 
 @dataclass
 class GolangStateType:
     last_seen: Optional[datetime] = None
     """Last timestamp of a package version we have saved.
     Used as a starting point for an incremental listing."""
 
 
 GolangPageType = List[Dict[str, Any]]
 
 
 class GolangLister(Lister[GolangStateType, GolangPageType]):
     """
     List all Golang modules and send associated origins to scheduler.
 
     The lister queries the Golang module index, whose documentation can be found
     at https://index.golang.org
     """
 
     GOLANG_MODULES_INDEX_URL = "https://index.golang.org/index"
     # `limit` seems to be... limited to 2000.
     GOLANG_MODULES_INDEX_LIMIT = 2000
     LISTER_NAME = "golang"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         incremental: bool = False,
         credentials: CredentialsType = None,
     ):
         super().__init__(
             scheduler=scheduler,
             url=self.GOLANG_MODULES_INDEX_URL,
             instance=self.LISTER_NAME,
             credentials=credentials,
         )
 
-        self.session = requests.Session()
-        self.session.headers.update(
-            {"Accept": "application/json", "User-Agent": USER_AGENT}
-        )
+        self.session.headers.update({"Accept": "application/json"})
         self.incremental = incremental
 
     def state_from_dict(self, d: Dict[str, Any]) -> GolangStateType:
         as_string = d.get("last_seen")
         last_seen = iso8601.parse_date(as_string) if as_string is not None else None
         return GolangStateType(last_seen=last_seen)
 
     def state_to_dict(self, state: GolangStateType) -> Dict[str, Any]:
         return {
             "last_seen": state.last_seen.isoformat()
             if state.last_seen is not None
             else None
         }
 
     def finalize(self):
         if self.incremental and self.state.last_seen is not None:
             scheduler_state = self.get_state_from_scheduler()
 
             if (
                 scheduler_state.last_seen is None
                 or self.state.last_seen > scheduler_state.last_seen
             ):
                 self.updated = True
 
-    @throttling_retry(
-        retry=retry_policy_generic,
-        before_sleep=before_sleep_log(logger, logging.WARNING),
-    )
     def api_request(self, url: str) -> List[str]:
-        logger.debug("Fetching URL %s", url)
-
-        response = self.session.get(url)
-
-        if response.status_code not in (200, 304):
-            # Log response content to ease debugging
-            logger.warning(
-                "Unexpected HTTP status code %s for URL %s",
-                response.status_code,
-                response.url,
-            )
-
-        response.raise_for_status()
-
+        response = self.http_request(url)
         return response.text.split()
 
     def get_single_page(
         self, since: Optional[datetime] = None
     ) -> Tuple[GolangPageType, Optional[datetime]]:
         """Return a page from the API and the timestamp of its last entry.
         Since all entries are sorted by chronological order, the timestamp is useful
         both for pagination and later for incremental runs."""
         url = f"{self.url}?limit={self.GOLANG_MODULES_INDEX_LIMIT}"
         if since is not None:
             # The Golang index does not understand `+00:00` for some reason
             # and expects the "timezone zero" notation instead. This works
             # because all times are UTC.
             utc_offset = since.utcoffset()
             assert (
                 utc_offset is not None and utc_offset.total_seconds() == 0
             ), "Non-UTC datetime"
             as_date = since.isoformat().replace("+00:00", "Z")
             url = f"{url}&since={as_date}"
 
         entries = self.api_request(url)
         page: GolangPageType = []
         if not entries:
             return page, since
 
         for as_json in entries:
             entry = json.loads(as_json)
             timestamp = iso8601.parse_date(entry["Timestamp"])
             # We've already parsed it and we'll need the datetime later, save it
             entry["Timestamp"] = timestamp
             page.append(entry)
             # The index is guaranteed to be sorted in chronological order
             since = timestamp
 
         return page, since
 
     def get_pages(self) -> Iterator[GolangPageType]:
         since = None
         if self.incremental:
             since = self.state.last_seen
         page, since = self.get_single_page(since=since)
         if since == self.state.last_seen:
             # The index returns packages whose timestamp are greater or
             # equal to the date provided as parameter, which will create
             # an infinite loop if not stopped here.
             return [], since
         if since is not None:
             self.state.last_seen = since
 
         while page:
             yield page
             page, since = self.get_single_page(since=since)
             if since == self.state.last_seen:
                 return [], since
             if since is not None:
                 self.state.last_seen = since
 
     def get_origins_from_page(self, page: GolangPageType) -> Iterator[ListedOrigin]:
         """
         Iterate on all Golang projects and yield ListedOrigin instances.
         """
         assert self.lister_obj.id is not None
 
         for module in page:
             path = module["Path"]
             # The loader will be expected to use the golang proxy to do the
             # actual downloading. We're using `pkg.go.dev` so that the URL points
             # to somewhere useful for a human instead of an (incomplete) API path.
             origin_url = f"https://pkg.go.dev/{path}"
 
             # Since the Go index lists versions and not just packages, there will
             # be duplicates. Fortunately, `ListedOrigins` are "upserted" server-side,
             # so only the last timestamp will be used, with no duplicates.
             # Performance should not be an issue as they are sent to the db in bulk.
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 url=origin_url,
                 visit_type="golang",
                 last_update=module["Timestamp"],
             )
diff --git a/swh/lister/golang/tests/test_lister.py b/swh/lister/golang/tests/test_lister.py
index 3cc9c64..e1abe90 100644
--- a/swh/lister/golang/tests/test_lister.py
+++ b/swh/lister/golang/tests/test_lister.py
@@ -1,241 +1,242 @@
 # Copyright (C) 2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import datetime
 from pathlib import Path
 
 import iso8601
 
 from swh.lister.golang.lister import GolangLister, GolangStateType
 from swh.lister.tests.test_utils import assert_sleep_calls
 from swh.lister.utils import WAIT_EXP_BASE
 
 # https://pkg.go.dev prefix omitted
 expected_listed = [
     ("collectd.org", "2019-04-11T18:47:25.450546+00:00"),
     (
         "github.com/blang/semver",
         "2019-04-15T13:54:39.107258+00:00",
     ),
     (
         "github.com/bmizerany/pat",
         "2019-04-11T18:47:29.390564+00:00",
     ),
     (
         "github.com/djherbis/buffer",
         "2019-04-11T18:47:29.974874+00:00",
     ),
     (
         "github.com/djherbis/nio",
         "2019-04-11T18:47:32.283312+00:00",
     ),
     (
         "github.com/gobuffalo/buffalo-plugins",
         "2019-04-15T13:54:34.222985+00:00",
     ),
     (
         "github.com/gobuffalo/buffalo-pop",
         "2019-04-15T13:54:39.135792+00:00",
     ),
     (
         "github.com/gobuffalo/clara",
         "2019-04-15T13:54:40.651916+00:00",
     ),
     (
         "github.com/gobuffalo/genny",
         "2019-04-15T13:54:37.841547+00:00",
     ),
     (
         "github.com/gobuffalo/packr",
         "2019-04-15T13:54:35.688900+00:00",
     ),
     (
         "github.com/markbates/refresh",
         "2019-04-15T13:54:35.250835+00:00",
     ),
     (
         "github.com/mitchellh/go-homedir",
         "2019-04-15T13:54:35.678214+00:00",
     ),
     (
         "github.com/nats-io/nuid",
         "2019-04-11T18:47:28.102348+00:00",
     ),
     (
         "github.com/oklog/ulid",
         "2019-04-11T18:47:23.234198+00:00",
     ),
     (
         "github.com/pkg/errors",
         "2019-04-18T02:07:41.336899+00:00",
     ),
     (
         "golang.org/x/sys",
         "2019-04-15T13:54:37.555525+00:00",
     ),
     ("golang.org/x/text", "2019-04-10T19:08:52.997264+00:00"),
     # only one x/tools listed even though there are two version, and only the
     # latest one's timestamp is used.
     (
         "golang.org/x/tools",
         "2019-04-15T13:54:41.905064+00:00",
     ),
 ]
 
 
 def _generate_responses(datadir, requests_mock):
     responses = []
     for file in Path(datadir).glob("page-*.txt"):
         # Test that throttling and server errors are retries
         responses.append({"text": "", "status_code": 429})
         responses.append({"text": "", "status_code": 500})
         # Also test that the lister appropriately gets out of the infinite loop
         responses.append({"text": file.read_text(), "status_code": 200})
 
     requests_mock.get(GolangLister.GOLANG_MODULES_INDEX_URL, responses)
 
 
 def test_golang_lister(swh_scheduler, mocker, requests_mock, datadir):
-    # first listing, should return one origin per package
-    lister = GolangLister(scheduler=swh_scheduler)
 
     # Exponential retries take a long time, so stub time.sleep
-    mocked_sleep = mocker.patch.object(lister.api_request.retry, "sleep")
+    mocked_sleep = mocker.patch.object(GolangLister.http_request.retry, "sleep")
+
+    # first listing, should return one origin per package
+    lister = GolangLister(scheduler=swh_scheduler)
 
     _generate_responses(datadir, requests_mock)
 
     stats = lister.run()
 
     assert stats.pages == 3
     # The two `golang.org/x/tools` versions are *not* listed as separate origins
     assert stats.origins == 18
 
     scheduler_origins = sorted(
         swh_scheduler.get_listed_origins(lister.lister_obj.id).results,
         key=lambda x: x.url,
     )
 
     for scheduled, (url, timestamp) in zip(scheduler_origins, expected_listed):
         assert scheduled.url == f"https://pkg.go.dev/{url}"
         assert scheduled.last_update == iso8601.parse_date(timestamp)
         assert scheduled.visit_type == "golang"
 
     assert len(scheduler_origins) == len(expected_listed)
 
     # Test `time.sleep` is called with exponential retries
     assert_sleep_calls(
         mocker, mocked_sleep, [1, WAIT_EXP_BASE, 1, WAIT_EXP_BASE, 1, WAIT_EXP_BASE]
     )
 
     # doing it all again (without incremental) should give us the same result
     lister = GolangLister(scheduler=swh_scheduler)
-    mocked_sleep = mocker.patch.object(lister.api_request.retry, "sleep")
+
     _generate_responses(datadir, requests_mock)
     stats = lister.run()
 
     assert stats.pages == 3
     assert stats.origins == 18
 
 
 def test_golang_lister_incremental(swh_scheduler, requests_mock, datadir, mocker):
     # first listing, should return one origin per package
     lister = GolangLister(scheduler=swh_scheduler, incremental=True)
     mock = mocker.spy(lister, "get_single_page")
 
     responses = [
         {"text": Path(datadir, "page-1.txt").read_text(), "status_code": 200},
     ]
     requests_mock.get(GolangLister.GOLANG_MODULES_INDEX_URL, responses)
 
     stats = lister.run()
 
     page1_last_timestamp = datetime.datetime(
         2019, 4, 11, 18, 47, 29, 390564, tzinfo=datetime.timezone.utc
     )
     page2_last_timestamp = datetime.datetime(
         2019, 4, 15, 13, 54, 35, 250835, tzinfo=datetime.timezone.utc
     )
     page3_last_timestamp = datetime.datetime(
         2019, 4, 18, 2, 7, 41, 336899, tzinfo=datetime.timezone.utc
     )
     mock.assert_has_calls(
         [
             # First call has no state
             mocker.call(since=None),
             # Second call is the last timestamp in the listed page
             mocker.call(since=page1_last_timestamp),
         ]
     )
 
     assert lister.get_state_from_scheduler() == GolangStateType(
         last_seen=page1_last_timestamp
     )
 
     assert stats.pages == 1
     assert stats.origins == 5
 
     # Incremental should list nothing
     lister = GolangLister(scheduler=swh_scheduler, incremental=True)
     mock = mocker.spy(lister, "get_single_page")
     stats = lister.run()
     mock.assert_has_calls([mocker.call(since=page1_last_timestamp)])
     assert stats.pages == 0
     assert stats.origins == 0
 
     # Add more responses
     responses = [
         {"text": Path(datadir, "page-2.txt").read_text(), "status_code": 200},
     ]
 
     requests_mock.get(GolangLister.GOLANG_MODULES_INDEX_URL, responses)
 
     # Incremental should list new page
     lister = GolangLister(scheduler=swh_scheduler, incremental=True)
     mock = mocker.spy(lister, "get_single_page")
     stats = lister.run()
     mock.assert_has_calls(
         [
             mocker.call(since=page1_last_timestamp),
             mocker.call(since=page2_last_timestamp),
         ]
     )
     assert stats.pages == 1
     assert stats.origins == 4
 
     # Incremental should list nothing again
     lister = GolangLister(scheduler=swh_scheduler, incremental=True)
     mock = mocker.spy(lister, "get_single_page")
     stats = lister.run()
     assert stats.pages == 0
     assert stats.origins == 0
     mock.assert_has_calls([mocker.call(since=page2_last_timestamp)])
 
     # Add yet more responses
     responses = [
         {"text": Path(datadir, "page-3.txt").read_text(), "status_code": 200},
     ]
 
     requests_mock.get(GolangLister.GOLANG_MODULES_INDEX_URL, responses)
 
     # Incremental should list new page again
     lister = GolangLister(scheduler=swh_scheduler, incremental=True)
     mock = mocker.spy(lister, "get_single_page")
     stats = lister.run()
     assert stats.pages == 1
     assert stats.origins == 9
     mock.assert_has_calls(
         [
             mocker.call(since=page2_last_timestamp),
             mocker.call(since=page3_last_timestamp),
         ]
     )
 
     # Incremental should list nothing one last time
     lister = GolangLister(scheduler=swh_scheduler, incremental=True)
     mock = mocker.spy(lister, "get_single_page")
     stats = lister.run()
     assert stats.pages == 0
     assert stats.origins == 0
     mock.assert_has_calls([mocker.call(since=page3_last_timestamp)])
diff --git a/swh/lister/hackage/__init__.py b/swh/lister/hackage/__init__.py
new file mode 100644
index 0000000..906a878
--- /dev/null
+++ b/swh/lister/hackage/__init__.py
@@ -0,0 +1,99 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+"""
+Hackage lister
+==============
+
+The Hackage lister list origins from `hackage.haskell.org`_, the `Haskell`_ Package
+Repository.
+
+The registry provide an `http api`_ from where the lister retrieve package names
+and build origins urls.
+
+As of August 2022 `hackage.haskell.org`_ list 15536 package names.
+
+Origins retrieving strategy
+---------------------------
+
+To get a list of all package names we make a POST call to
+`https://hackage.haskell.org/packages/search` endpoint with some params given as
+json data.
+
+Default params::
+
+    {
+        "page": 0,
+        "sortColumn": "default",
+        "sortDirection": "ascending",
+        "searchQuery": "(deprecated:any)",
+    }
+
+The page size is 50. The lister will make has much http api call has needed to get
+all results.
+
+Page listing
+------------
+
+The result is paginated, each page is 50 records long.
+
+Entry data set example::
+
+    {
+        "description": "3D model parsers",
+        "downloads": 6,
+        "lastUpload": "2014-11-08T03:55:23.879047Z",
+        "maintainers": [{"display": "capsjac", "uri": "/user/capsjac"}],
+        "name": {"display": "3dmodels", "uri": "/package/3dmodels"},
+        "tags": [
+            {"display": "graphics", "uri": "/packages/tag/graphics"},
+            {"display": "lgpl", "uri": "/packages/tag/lgpl"},
+            {"display": "library", "uri": "/packages/tag/library"},
+        ],
+        "votes": 1.5,
+    }
+
+Origins from page
+-----------------
+
+The lister yields 50 origins url per page.
+Each ListedOrigin has a `last_update` date set.
+
+Running tests
+-------------
+
+Activate the virtualenv and run from within swh-lister directory::
+
+   pytest -s -vv --log-cli-level=DEBUG swh/lister/hackage/tests
+
+Testing with Docker
+-------------------
+
+Change directory to swh/docker then launch the docker environment::
+
+   docker compose up -d
+
+Then schedule an Hackage listing task::
+
+   docker compose exec swh-scheduler swh scheduler task add -p oneshot list-hackage
+
+You can follow lister execution by displaying logs of swh-lister service::
+
+   docker compose logs -f swh-lister
+
+.. _hackage.haskell.org: https://hackage.haskell.org/
+.. _Haskell: https://haskell.org/
+.. _http api: https://hackage.haskell.org/api
+"""
+
+
+def register():
+    from .lister import HackageLister
+
+    return {
+        "lister": HackageLister,
+        "task_modules": ["%s.tasks" % __name__],
+    }
diff --git a/swh/lister/hackage/lister.py b/swh/lister/hackage/lister.py
new file mode 100644
index 0000000..ffe72cc
--- /dev/null
+++ b/swh/lister/hackage/lister.py
@@ -0,0 +1,100 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import logging
+from typing import Any, Dict, Iterator, List, Optional
+
+import iso8601
+
+from swh.scheduler.interface import SchedulerInterface
+from swh.scheduler.model import ListedOrigin
+
+from ..pattern import CredentialsType, StatelessLister
+
+logger = logging.getLogger(__name__)
+
+# Aliasing the page results returned by `get_pages` method from the lister.
+HackageListerPage = List[Dict[str, Any]]
+
+
+class HackageLister(StatelessLister[HackageListerPage]):
+    """List Hackage (The Haskell Package Repository) origins."""
+
+    LISTER_NAME = "hackage"
+    VISIT_TYPE = "hackage"
+    INSTANCE = "hackage"
+
+    BASE_URL = "https://hackage.haskell.org/"
+    PACKAGE_NAMES_URL_PATTERN = "{base_url}packages/search"
+    PACKAGE_INFO_URL_PATTERN = "{base_url}package/{pkgname}"
+
+    def __init__(
+        self,
+        scheduler: SchedulerInterface,
+        credentials: Optional[CredentialsType] = None,
+        url: Optional[str] = None,
+    ):
+        super().__init__(
+            scheduler=scheduler,
+            credentials=credentials,
+            instance=self.INSTANCE,
+            url=url if url else self.BASE_URL,
+        )
+        # Ensure to set this with same value as the http api search endpoint use
+        # (50 as of august 2022)
+        self.page_size: int = 50
+
+    def get_pages(self) -> Iterator[HackageListerPage]:
+        """Yield an iterator which returns 'page'
+
+        It uses the http api endpoint `https://hackage.haskell.org/packages/search`
+        to get a list of package names from which we build an origin url.
+
+        Results are paginated.
+        """
+        params = {
+            "page": 0,
+            "sortColumn": "default",
+            "sortDirection": "ascending",
+            "searchQuery": "(deprecated:any)",
+        }
+
+        data = self.http_request(
+            url=self.PACKAGE_NAMES_URL_PATTERN.format(base_url=self.url),
+            method="POST",
+            json=params,
+        ).json()
+
+        nb_entries: int = data["numberOfResults"]
+        (nb_pages, remainder) = divmod(nb_entries, self.page_size)
+        if remainder:
+            nb_pages += 1
+        yield data["pageContents"]
+
+        for page in range(1, nb_pages):
+            params["page"] = page
+            data = self.http_request(
+                url=self.PACKAGE_NAMES_URL_PATTERN.format(base_url=self.url),
+                method="POST",
+                json=params,
+            ).json()
+            yield data["pageContents"]
+
+    def get_origins_from_page(self, page: HackageListerPage) -> Iterator[ListedOrigin]:
+        """Iterate on all pages and yield ListedOrigin instances."""
+        assert self.lister_obj.id is not None
+
+        for entry in page:
+            pkgname = entry["name"]["display"]
+            last_update = iso8601.parse_date(entry["lastUpload"])
+            url = self.PACKAGE_INFO_URL_PATTERN.format(
+                base_url=self.url, pkgname=pkgname
+            )
+            yield ListedOrigin(
+                lister_id=self.lister_obj.id,
+                visit_type=self.VISIT_TYPE,
+                url=url,
+                last_update=last_update,
+            )
diff --git a/swh/lister/hackage/tasks.py b/swh/lister/hackage/tasks.py
new file mode 100644
index 0000000..2d4cfb6
--- /dev/null
+++ b/swh/lister/hackage/tasks.py
@@ -0,0 +1,19 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from celery import shared_task
+
+from swh.lister.hackage.lister import HackageLister
+
+
+@shared_task(name=__name__ + ".HackageListerTask")
+def list_hackage(**lister_args):
+    """Lister task for Hackage, the Haskell Package Repository"""
+    return HackageLister.from_configfile(**lister_args).run().dict()
+
+
+@shared_task(name=__name__ + ".ping")
+def _ping():
+    return "OK"
diff --git a/swh/lister/hackage/tests/__init__.py b/swh/lister/hackage/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/swh/lister/hackage/tests/data/https_fake49.haskell.org/packages_search_0 b/swh/lister/hackage/tests/data/https_fake49.haskell.org/packages_search_0
new file mode 100644
index 0000000..bebf953
--- /dev/null
+++ b/swh/lister/hackage/tests/data/https_fake49.haskell.org/packages_search_0
@@ -0,0 +1 @@
+{"numberOfResults":49,"pageContents":[{"description":"Haskell package for easy integration with the 2captcha API.","downloads":1,"lastUpload":"2021-09-09T05:13:30.343509948Z","maintainers":[{"display":"qwbarch","uri":"/user/qwbarch"}],"name":{"display":"2captcha","uri":"/package/2captcha"},"tags":[{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"mit","uri":"/packages/tag/mit"},{"display":"network","uri":"/packages/tag/network"}],"votes":1.5},{"description":"Examples of 3D graphics programming with OpenGL","downloads":8,"lastUpload":"2016-07-22T14:26:23.038905Z","maintainers":[{"display":"WolfgangJeltsch","uri":"/user/WolfgangJeltsch"}],"name":{"display":"3d-graphics-examples","uri":"/package/3d-graphics-examples"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"fractals","uri":"/packages/tag/fractals"},{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"program","uri":"/packages/tag/program"}],"votes":1.75},{"description":"3D model parsers","downloads":6,"lastUpload":"2014-11-08T03:55:23.879047Z","maintainers":[{"display":"capsjac","uri":"/user/capsjac"}],"name":{"display":"3dmodels","uri":"/package/3dmodels"},"tags":[{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"lgpl","uri":"/packages/tag/lgpl"},{"display":"library","uri":"/packages/tag/library"}],"votes":1.5},{"description":"A tetris-like game (works with GHC 6.8.3 and Gtk2hs 0.9.13)","downloads":8,"lastUpload":"2010-05-07T18:55:25Z","maintainers":[{"display":"AndrewCalleja","uri":"/user/AndrewCalleja"}],"name":{"display":"4Blocks","uri":"/package/4Blocks"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"game","uri":"/packages/tag/game"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Abstract Application Interface.","downloads":8,"lastUpload":"2015-08-03T23:13:02.007983Z","maintainers":[{"display":"bash0r","uri":"/user/bash0r"}],"name":{"display":"AAI","uri":"/package/AAI"},"tags":[{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"},{"display":"mit","uri":"/packages/tag/mit"}],"votes":0},{"description":"An alternating list of two types","downloads":7,"lastUpload":"2014-06-23T21:53:36.114282Z","maintainers":[{"display":"DylanJust","uri":"/user/DylanJust"}],"name":{"display":"ABList","uri":"/package/ABList"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Angles in degrees and radians.","downloads":10,"lastUpload":"2010-07-23T14:10:27Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Angle","uri":"/package/AC-Angle"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"numerical","uri":"/packages/tag/numerical"}],"votes":0},{"description":"Handle Boolean values generatically.","downloads":4,"lastUpload":"2010-11-09T11:39:08Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Boolean","uri":"/package/AC-Boolean"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"},{"display":"logic","uri":"/packages/tag/logic"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"Detect which OS you're running on.","downloads":5,"lastUpload":"2011-06-08T10:12:03Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-BuildPlatform","uri":"/package/AC-BuildPlatform"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"system","uri":"/packages/tag/system"}],"votes":0},{"description":"Efficient RGB colour types.","downloads":12,"lastUpload":"2014-01-12T16:33:02.796921Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Colour","uri":"/package/AC-Colour"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"numerical","uri":"/packages/tag/numerical"}],"votes":0},{"description":"GTK+ pixel plotting.","downloads":5,"lastUpload":"2010-10-28T10:24:31Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-EasyRaster-GTK","uri":"/package/AC-EasyRaster-GTK"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Efficient half-integer type.","downloads":6,"lastUpload":"2009-08-12T19:15:37Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-HalfInteger","uri":"/package/AC-HalfInteger"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"numerical","uri":"/packages/tag/numerical"}],"votes":0},{"description":"A simple test framework.","downloads":7,"lastUpload":"2012-02-24T13:54:02Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-MiniTest","uri":"/package/AC-MiniTest"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"testing","uri":"/packages/tag/testing"}],"votes":0},{"description":"Trivial package for writing PPM images.","downloads":3,"lastUpload":"2010-01-18T22:26:54Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-PPM","uri":"/package/AC-PPM"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"codec","uri":"/packages/tag/codec"},{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"A pure Haskell PRNG.","downloads":1,"lastUpload":"2011-08-25T09:34:35Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Random","uri":"/package/AC-Random"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"random","uri":"/packages/tag/random"}],"votes":0},{"description":"Trivial wrapper over ansi-terminal.","downloads":2,"lastUpload":"2010-10-28T12:38:15Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Terminal","uri":"/package/AC-Terminal"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"numerical","uri":"/packages/tag/numerical"}],"votes":0},{"description":"Immutable arrays with plain integer indicies.","downloads":5,"lastUpload":"2010-01-17T13:34:07Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-VanillaArray","uri":"/package/AC-VanillaArray"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Efficient geometric vectors and transformations.","downloads":25,"lastUpload":"2011-08-12T12:33:08Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Vector","uri":"/package/AC-Vector"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"numerical","uri":"/packages/tag/numerical"}],"votes":0},{"description":"Fancy type-system stuff for AC-Vector","downloads":13,"lastUpload":"2010-08-15T15:34:26Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Vector-Fancy","uri":"/package/AC-Vector-Fancy"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"numerical","uri":"/packages/tag/numerical"}],"votes":0},{"description":"Essential features","downloads":4,"lastUpload":"2015-01-02T21:48:24.005724Z","maintainers":[{"display":"JamesCandy","uri":"/user/JamesCandy"}],"name":{"display":"ACME","uri":"/package/ACME"},"tags":[{"display":"acme","uri":"/packages/tag/acme"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"}],"votes":2.25},{"description":"Efficient, high-level dynamic programming.","downloads":41,"lastUpload":"2019-10-01T18:22:22.276688014Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"ADPfusion","uri":"/package/ADPfusion"},"tags":[{"display":"algorithms","uri":"/packages/tag/algorithms"},{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data-structures","uri":"/packages/tag/data-structures"},{"display":"formal-languages","uri":"/packages/tag/formal-languages"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Dynamic programming on tree and forest structures","downloads":2,"lastUpload":"2017-11-23T22:15:26.956207149Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"ADPfusionForest","uri":"/package/ADPfusionForest"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"formal-languages","uri":"/packages/tag/formal-languages"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Dynamic programming for Set data structures.","downloads":8,"lastUpload":"2017-10-19T14:36:27.804439921Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"ADPfusionSet","uri":"/package/ADPfusionSet"},"tags":[{"display":"algorithms","uri":"/packages/tag/algorithms"},{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data-structures","uri":"/packages/tag/data-structures"},{"display":"formal-languages","uri":"/packages/tag/formal-languages"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"foundational type classes for approximating exact real numbers","downloads":8,"lastUpload":"2011-05-11T11:25:48Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-Basics","uri":"/package/AERN-Basics"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Compositional lazy dataflow networks for exact real number computation","downloads":11,"lastUpload":"2009-07-29T10:06:08Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-Net","uri":"/package/AERN-Net"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"distributed-computing","uri":"/packages/tag/distributed-computing"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"arbitrary precision real interval arithmetic","downloads":39,"lastUpload":"2011-05-11T11:31:11Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-Real","uri":"/package/AERN-Real"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"arbitrary precision real interval arithmetic","downloads":10,"lastUpload":"2011-05-11T13:37:45Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-Real-Double","uri":"/package/AERN-Real-Double"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"arbitrary precision real interval arithmetic","downloads":11,"lastUpload":"2011-05-11T11:31:35Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-Real-Interval","uri":"/package/AERN-Real-Interval"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"polynomial function enclosures (PFEs) approximating exact real functions","downloads":26,"lastUpload":"2009-07-29T10:05:31Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-RnToRm","uri":"/package/AERN-RnToRm"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"GL plotting of polynomial function enclosures (PFEs)","downloads":15,"lastUpload":"2009-08-01T16:05:31Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-RnToRm-Plot","uri":"/package/AERN-RnToRm-Plot"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"Fast AES encryption/decryption for bytestrings","downloads":40,"lastUpload":"2014-05-07T21:04:03.888615Z","maintainers":[{"display":"SveinOveAas","uri":"/user/SveinOveAas"}],"name":{"display":"AES","uri":"/package/AES"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"cryptography","uri":"/packages/tag/cryptography"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Arrowized functional state machines","downloads":21,"lastUpload":"2016-04-21T22:13:19.148646Z","maintainers":[{"display":"hanzhxu","uri":"/user/hanzhxu"}],"name":{"display":"AFSM","uri":"/package/AFSM"},"tags":[{"display":"frp","uri":"/packages/tag/frp"},{"display":"library","uri":"/packages/tag/library"},{"display":"mit","uri":"/packages/tag/mit"}],"votes":2},{"description":"A library for writing AGI scripts for Asterisk","downloads":14,"lastUpload":"2009-12-07T20:12:57Z","maintainers":[{"display":"JeremyShaw","uri":"/user/JeremyShaw"}],"name":{"display":"AGI","uri":"/package/AGI"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"network","uri":"/packages/tag/network"}],"votes":0},{"description":"A binding for the OpenAL Utility Toolkit","downloads":38,"lastUpload":"2019-06-10T16:09:36.285351988Z","maintainers":[{"display":"StephenBlackheath","uri":"/user/StephenBlackheath"},{"display":"SvenPanne","uri":"/user/SvenPanne"}],"name":{"display":"ALUT","uri":"/package/ALUT"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"sound","uri":"/packages/tag/sound"}],"votes":0},{"description":"Low-level bindings for Asterisk Manager Interface (AMI).","downloads":2,"lastUpload":"2012-01-16T06:02:43Z","maintainers":[{"display":"IlyaPortnov","uri":"/user/IlyaPortnov"}],"name":{"display":"AMI","uri":"/package/AMI"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"network","uri":"/packages/tag/network"}],"votes":0},{"description":"Num instance for Applicatives provided via the ANum newtype","downloads":14,"lastUpload":"2018-02-13T00:06:58.303270679Z","maintainers":[{"display":"DanBurton","uri":"/user/DanBurton"}],"name":{"display":"ANum","uri":"/package/ANum"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"ASN.1 support for Haskell","downloads":4,"lastUpload":"2008-06-22T10:41:52Z","maintainers":[{"display":"HerbertValerioRiedel","uri":"/user/HerbertValerioRiedel"}],"name":{"display":"ASN1","uri":"/package/ASN1"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"language","uri":"/packages/tag/language"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Mutable variables with Exception handling and concurrency support.","downloads":11,"lastUpload":"2009-12-08T15:41:14Z","maintainers":[{"display":"AlexMason","uri":"/user/AlexMason"}],"name":{"display":"AVar","uri":"/package/AVar"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"concurrency","uri":"/packages/tag/concurrency"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"A binding to a part of the ANSI escape code for the console","downloads":2,"lastUpload":"2010-01-24T23:44:07Z","maintainers":[{"display":"HaraldWolfsgruber","uri":"/user/HaraldWolfsgruber"}],"name":{"display":"AWin32Console","uri":"/package/AWin32Console"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"system","uri":"/packages/tag/system"}],"votes":0},{"description":"Monads-tf instances for the AbortT monad transformer.","downloads":2,"lastUpload":"2012-12-07T13:58:51Z","maintainers":[{"display":"GregoryCrosswhite","uri":"/user/GregoryCrosswhite"}],"name":{"display":"AbortT-monadstf","uri":"/package/AbortT-monadstf"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"mtl instances for the AbortT monad transformer","downloads":8,"lastUpload":"2016-02-06T22:37:44.304337Z","maintainers":[{"display":"GregoryCrosswhite","uri":"/user/GregoryCrosswhite"}],"name":{"display":"AbortT-mtl","uri":"/package/AbortT-mtl"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"A monad and monadic transformer providing \"abort\" functionality","downloads":16,"lastUpload":"2019-07-19T14:08:41.889681784Z","maintainers":[{"display":"GregoryCrosswhite","uri":"/user/GregoryCrosswhite"}],"name":{"display":"AbortT-transformers","uri":"/package/AbortT-transformers"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"An easy-to-use video game framework for Haskell.","downloads":5,"lastUpload":"2014-11-11T00:20:57.714901Z","maintainers":[{"display":"AdityaBhargava","uri":"/user/AdityaBhargava"}],"name":{"display":"ActionKid","uri":"/package/ActionKid"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"game-engine","uri":"/packages/tag/game-engine"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Library for incremental computing.","downloads":11,"lastUpload":"2013-01-28T21:50:50Z","maintainers":[{"display":"DustinDeWeese","uri":"/user/DustinDeWeese"},{"display":"MagnusCarlsson","uri":"/user/MagnusCarlsson"},{"display":"PeterJonsson","uri":"/user/PeterJonsson"}],"name":{"display":"Adaptive","uri":"/package/Adaptive"},"tags":[{"display":"algorithms","uri":"/packages/tag/algorithms"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Library for incremental computing.","downloads":8,"lastUpload":"2013-01-26T09:17:43Z","maintainers":[{"display":"PaoloGiarrusso","uri":"/user/PaoloGiarrusso"}],"name":{"display":"Adaptive-Blaisorblade","uri":"/package/Adaptive-Blaisorblade"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Lisperati's adventure game in Lisp translated to Haskell","downloads":8,"lastUpload":"2010-06-11T00:01:05Z","maintainers":[{"display":"TimWawrzynczak","uri":"/user/TimWawrzynczak"}],"name":{"display":"Advgame","uri":"/package/Advgame"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"game","uri":"/packages/tag/game"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Assessment services for the Advise-Me project","downloads":7,"lastUpload":"2019-10-30T07:45:20.345073378Z","maintainers":[{"display":"BastiaanHeeren","uri":"/user/BastiaanHeeren"}],"name":{"display":"Advise-me","uri":"/package/Advise-me"},"tags":[{"display":"apache","uri":"/packages/tag/apache"},{"display":"education","uri":"/packages/tag/education"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Mapping between Aeson's JSON and Bson objects.","downloads":24,"lastUpload":"2022-05-06T10:41:14.015254306Z","maintainers":[{"display":"AndrasSlemmer","uri":"/user/AndrasSlemmer"},{"display":"NiklasHambuechen","uri":"/user/NiklasHambuechen"}],"name":{"display":"AesonBson","uri":"/package/AesonBson"},"tags":[{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Generator-generator for QuickCheck","downloads":10,"lastUpload":"2012-08-01T11:36:04Z","maintainers":[{"display":"JonasDuregard","uri":"/user/JonasDuregard"}],"name":{"display":"Agata","uri":"/package/Agata"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"deprecated","uri":"/packages/tag/deprecated"}],"votes":2.75}]}
diff --git a/swh/lister/hackage/tests/data/https_fake51.haskell.org/packages_search_0 b/swh/lister/hackage/tests/data/https_fake51.haskell.org/packages_search_0
new file mode 100644
index 0000000..5767edc
--- /dev/null
+++ b/swh/lister/hackage/tests/data/https_fake51.haskell.org/packages_search_0
@@ -0,0 +1 @@
+{"numberOfResults":51,"pageContents":[{"description":"Haskell package for easy integration with the 2captcha API.","downloads":1,"lastUpload":"2021-09-09T05:13:30.343509948Z","maintainers":[{"display":"qwbarch","uri":"/user/qwbarch"}],"name":{"display":"2captcha","uri":"/package/2captcha"},"tags":[{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"mit","uri":"/packages/tag/mit"},{"display":"network","uri":"/packages/tag/network"}],"votes":1.5},{"description":"Examples of 3D graphics programming with OpenGL","downloads":8,"lastUpload":"2016-07-22T14:26:23.038905Z","maintainers":[{"display":"WolfgangJeltsch","uri":"/user/WolfgangJeltsch"}],"name":{"display":"3d-graphics-examples","uri":"/package/3d-graphics-examples"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"fractals","uri":"/packages/tag/fractals"},{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"program","uri":"/packages/tag/program"}],"votes":1.75},{"description":"3D model parsers","downloads":6,"lastUpload":"2014-11-08T03:55:23.879047Z","maintainers":[{"display":"capsjac","uri":"/user/capsjac"}],"name":{"display":"3dmodels","uri":"/package/3dmodels"},"tags":[{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"lgpl","uri":"/packages/tag/lgpl"},{"display":"library","uri":"/packages/tag/library"}],"votes":1.5},{"description":"A tetris-like game (works with GHC 6.8.3 and Gtk2hs 0.9.13)","downloads":8,"lastUpload":"2010-05-07T18:55:25Z","maintainers":[{"display":"AndrewCalleja","uri":"/user/AndrewCalleja"}],"name":{"display":"4Blocks","uri":"/package/4Blocks"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"game","uri":"/packages/tag/game"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Abstract Application Interface.","downloads":8,"lastUpload":"2015-08-03T23:13:02.007983Z","maintainers":[{"display":"bash0r","uri":"/user/bash0r"}],"name":{"display":"AAI","uri":"/package/AAI"},"tags":[{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"},{"display":"mit","uri":"/packages/tag/mit"}],"votes":0},{"description":"An alternating list of two types","downloads":7,"lastUpload":"2014-06-23T21:53:36.114282Z","maintainers":[{"display":"DylanJust","uri":"/user/DylanJust"}],"name":{"display":"ABList","uri":"/package/ABList"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Angles in degrees and radians.","downloads":10,"lastUpload":"2010-07-23T14:10:27Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Angle","uri":"/package/AC-Angle"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"numerical","uri":"/packages/tag/numerical"}],"votes":0},{"description":"Handle Boolean values generatically.","downloads":4,"lastUpload":"2010-11-09T11:39:08Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Boolean","uri":"/package/AC-Boolean"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"},{"display":"logic","uri":"/packages/tag/logic"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"Detect which OS you're running on.","downloads":5,"lastUpload":"2011-06-08T10:12:03Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-BuildPlatform","uri":"/package/AC-BuildPlatform"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"system","uri":"/packages/tag/system"}],"votes":0},{"description":"Efficient RGB colour types.","downloads":12,"lastUpload":"2014-01-12T16:33:02.796921Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Colour","uri":"/package/AC-Colour"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"numerical","uri":"/packages/tag/numerical"}],"votes":0},{"description":"GTK+ pixel plotting.","downloads":5,"lastUpload":"2010-10-28T10:24:31Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-EasyRaster-GTK","uri":"/package/AC-EasyRaster-GTK"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Efficient half-integer type.","downloads":6,"lastUpload":"2009-08-12T19:15:37Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-HalfInteger","uri":"/package/AC-HalfInteger"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"numerical","uri":"/packages/tag/numerical"}],"votes":0},{"description":"A simple test framework.","downloads":7,"lastUpload":"2012-02-24T13:54:02Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-MiniTest","uri":"/package/AC-MiniTest"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"testing","uri":"/packages/tag/testing"}],"votes":0},{"description":"Trivial package for writing PPM images.","downloads":3,"lastUpload":"2010-01-18T22:26:54Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-PPM","uri":"/package/AC-PPM"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"codec","uri":"/packages/tag/codec"},{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"A pure Haskell PRNG.","downloads":1,"lastUpload":"2011-08-25T09:34:35Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Random","uri":"/package/AC-Random"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"random","uri":"/packages/tag/random"}],"votes":0},{"description":"Trivial wrapper over ansi-terminal.","downloads":2,"lastUpload":"2010-10-28T12:38:15Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Terminal","uri":"/package/AC-Terminal"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"numerical","uri":"/packages/tag/numerical"}],"votes":0},{"description":"Immutable arrays with plain integer indicies.","downloads":5,"lastUpload":"2010-01-17T13:34:07Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-VanillaArray","uri":"/package/AC-VanillaArray"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Efficient geometric vectors and transformations.","downloads":25,"lastUpload":"2011-08-12T12:33:08Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Vector","uri":"/package/AC-Vector"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"numerical","uri":"/packages/tag/numerical"}],"votes":0},{"description":"Fancy type-system stuff for AC-Vector","downloads":13,"lastUpload":"2010-08-15T15:34:26Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Vector-Fancy","uri":"/package/AC-Vector-Fancy"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"numerical","uri":"/packages/tag/numerical"}],"votes":0},{"description":"Essential features","downloads":4,"lastUpload":"2015-01-02T21:48:24.005724Z","maintainers":[{"display":"JamesCandy","uri":"/user/JamesCandy"}],"name":{"display":"ACME","uri":"/package/ACME"},"tags":[{"display":"acme","uri":"/packages/tag/acme"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"}],"votes":2.25},{"description":"Efficient, high-level dynamic programming.","downloads":41,"lastUpload":"2019-10-01T18:22:22.276688014Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"ADPfusion","uri":"/package/ADPfusion"},"tags":[{"display":"algorithms","uri":"/packages/tag/algorithms"},{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data-structures","uri":"/packages/tag/data-structures"},{"display":"formal-languages","uri":"/packages/tag/formal-languages"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Dynamic programming on tree and forest structures","downloads":2,"lastUpload":"2017-11-23T22:15:26.956207149Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"ADPfusionForest","uri":"/package/ADPfusionForest"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"formal-languages","uri":"/packages/tag/formal-languages"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Dynamic programming for Set data structures.","downloads":8,"lastUpload":"2017-10-19T14:36:27.804439921Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"ADPfusionSet","uri":"/package/ADPfusionSet"},"tags":[{"display":"algorithms","uri":"/packages/tag/algorithms"},{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data-structures","uri":"/packages/tag/data-structures"},{"display":"formal-languages","uri":"/packages/tag/formal-languages"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"foundational type classes for approximating exact real numbers","downloads":8,"lastUpload":"2011-05-11T11:25:48Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-Basics","uri":"/package/AERN-Basics"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Compositional lazy dataflow networks for exact real number computation","downloads":11,"lastUpload":"2009-07-29T10:06:08Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-Net","uri":"/package/AERN-Net"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"distributed-computing","uri":"/packages/tag/distributed-computing"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"arbitrary precision real interval arithmetic","downloads":39,"lastUpload":"2011-05-11T11:31:11Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-Real","uri":"/package/AERN-Real"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"arbitrary precision real interval arithmetic","downloads":10,"lastUpload":"2011-05-11T13:37:45Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-Real-Double","uri":"/package/AERN-Real-Double"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"arbitrary precision real interval arithmetic","downloads":11,"lastUpload":"2011-05-11T11:31:35Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-Real-Interval","uri":"/package/AERN-Real-Interval"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"polynomial function enclosures (PFEs) approximating exact real functions","downloads":26,"lastUpload":"2009-07-29T10:05:31Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-RnToRm","uri":"/package/AERN-RnToRm"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"GL plotting of polynomial function enclosures (PFEs)","downloads":15,"lastUpload":"2009-08-01T16:05:31Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-RnToRm-Plot","uri":"/package/AERN-RnToRm-Plot"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"Fast AES encryption/decryption for bytestrings","downloads":40,"lastUpload":"2014-05-07T21:04:03.888615Z","maintainers":[{"display":"SveinOveAas","uri":"/user/SveinOveAas"}],"name":{"display":"AES","uri":"/package/AES"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"cryptography","uri":"/packages/tag/cryptography"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Arrowized functional state machines","downloads":21,"lastUpload":"2016-04-21T22:13:19.148646Z","maintainers":[{"display":"hanzhxu","uri":"/user/hanzhxu"}],"name":{"display":"AFSM","uri":"/package/AFSM"},"tags":[{"display":"frp","uri":"/packages/tag/frp"},{"display":"library","uri":"/packages/tag/library"},{"display":"mit","uri":"/packages/tag/mit"}],"votes":2},{"description":"A library for writing AGI scripts for Asterisk","downloads":14,"lastUpload":"2009-12-07T20:12:57Z","maintainers":[{"display":"JeremyShaw","uri":"/user/JeremyShaw"}],"name":{"display":"AGI","uri":"/package/AGI"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"network","uri":"/packages/tag/network"}],"votes":0},{"description":"A binding for the OpenAL Utility Toolkit","downloads":38,"lastUpload":"2019-06-10T16:09:36.285351988Z","maintainers":[{"display":"StephenBlackheath","uri":"/user/StephenBlackheath"},{"display":"SvenPanne","uri":"/user/SvenPanne"}],"name":{"display":"ALUT","uri":"/package/ALUT"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"sound","uri":"/packages/tag/sound"}],"votes":0},{"description":"Low-level bindings for Asterisk Manager Interface (AMI).","downloads":2,"lastUpload":"2012-01-16T06:02:43Z","maintainers":[{"display":"IlyaPortnov","uri":"/user/IlyaPortnov"}],"name":{"display":"AMI","uri":"/package/AMI"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"network","uri":"/packages/tag/network"}],"votes":0},{"description":"Num instance for Applicatives provided via the ANum newtype","downloads":14,"lastUpload":"2018-02-13T00:06:58.303270679Z","maintainers":[{"display":"DanBurton","uri":"/user/DanBurton"}],"name":{"display":"ANum","uri":"/package/ANum"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"ASN.1 support for Haskell","downloads":4,"lastUpload":"2008-06-22T10:41:52Z","maintainers":[{"display":"HerbertValerioRiedel","uri":"/user/HerbertValerioRiedel"}],"name":{"display":"ASN1","uri":"/package/ASN1"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"language","uri":"/packages/tag/language"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Mutable variables with Exception handling and concurrency support.","downloads":11,"lastUpload":"2009-12-08T15:41:14Z","maintainers":[{"display":"AlexMason","uri":"/user/AlexMason"}],"name":{"display":"AVar","uri":"/package/AVar"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"concurrency","uri":"/packages/tag/concurrency"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"A binding to a part of the ANSI escape code for the console","downloads":2,"lastUpload":"2010-01-24T23:44:07Z","maintainers":[{"display":"HaraldWolfsgruber","uri":"/user/HaraldWolfsgruber"}],"name":{"display":"AWin32Console","uri":"/package/AWin32Console"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"system","uri":"/packages/tag/system"}],"votes":0},{"description":"Monads-tf instances for the AbortT monad transformer.","downloads":2,"lastUpload":"2012-12-07T13:58:51Z","maintainers":[{"display":"GregoryCrosswhite","uri":"/user/GregoryCrosswhite"}],"name":{"display":"AbortT-monadstf","uri":"/package/AbortT-monadstf"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"mtl instances for the AbortT monad transformer","downloads":8,"lastUpload":"2016-02-06T22:37:44.304337Z","maintainers":[{"display":"GregoryCrosswhite","uri":"/user/GregoryCrosswhite"}],"name":{"display":"AbortT-mtl","uri":"/package/AbortT-mtl"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"A monad and monadic transformer providing \"abort\" functionality","downloads":16,"lastUpload":"2019-07-19T14:08:41.889681784Z","maintainers":[{"display":"GregoryCrosswhite","uri":"/user/GregoryCrosswhite"}],"name":{"display":"AbortT-transformers","uri":"/package/AbortT-transformers"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"An easy-to-use video game framework for Haskell.","downloads":5,"lastUpload":"2014-11-11T00:20:57.714901Z","maintainers":[{"display":"AdityaBhargava","uri":"/user/AdityaBhargava"}],"name":{"display":"ActionKid","uri":"/package/ActionKid"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"game-engine","uri":"/packages/tag/game-engine"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Library for incremental computing.","downloads":11,"lastUpload":"2013-01-28T21:50:50Z","maintainers":[{"display":"DustinDeWeese","uri":"/user/DustinDeWeese"},{"display":"MagnusCarlsson","uri":"/user/MagnusCarlsson"},{"display":"PeterJonsson","uri":"/user/PeterJonsson"}],"name":{"display":"Adaptive","uri":"/package/Adaptive"},"tags":[{"display":"algorithms","uri":"/packages/tag/algorithms"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Library for incremental computing.","downloads":8,"lastUpload":"2013-01-26T09:17:43Z","maintainers":[{"display":"PaoloGiarrusso","uri":"/user/PaoloGiarrusso"}],"name":{"display":"Adaptive-Blaisorblade","uri":"/package/Adaptive-Blaisorblade"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Lisperati's adventure game in Lisp translated to Haskell","downloads":8,"lastUpload":"2010-06-11T00:01:05Z","maintainers":[{"display":"TimWawrzynczak","uri":"/user/TimWawrzynczak"}],"name":{"display":"Advgame","uri":"/package/Advgame"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"game","uri":"/packages/tag/game"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Assessment services for the Advise-Me project","downloads":7,"lastUpload":"2019-10-30T07:45:20.345073378Z","maintainers":[{"display":"BastiaanHeeren","uri":"/user/BastiaanHeeren"}],"name":{"display":"Advise-me","uri":"/package/Advise-me"},"tags":[{"display":"apache","uri":"/packages/tag/apache"},{"display":"education","uri":"/packages/tag/education"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Mapping between Aeson's JSON and Bson objects.","downloads":24,"lastUpload":"2022-05-06T10:41:14.015254306Z","maintainers":[{"display":"AndrasSlemmer","uri":"/user/AndrasSlemmer"},{"display":"NiklasHambuechen","uri":"/user/NiklasHambuechen"}],"name":{"display":"AesonBson","uri":"/package/AesonBson"},"tags":[{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Generator-generator for QuickCheck","downloads":10,"lastUpload":"2012-08-01T11:36:04Z","maintainers":[{"display":"JonasDuregard","uri":"/user/JonasDuregard"}],"name":{"display":"Agata","uri":"/package/Agata"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"testing","uri":"/packages/tag/testing"}],"votes":0},{"description":"A dependently typed functional programming language and proof assistant","downloads":306,"lastUpload":"2022-04-02T18:00:16.805589944Z","maintainers":[{"display":"AndreasAbel","uri":"/user/AndreasAbel"},{"display":"AndresSicardRamirez","uri":"/user/AndresSicardRamirez"},{"display":"NilsAndersDanielsson","uri":"/user/NilsAndersDanielsson"},{"display":"UlfNorell","uri":"/user/UlfNorell"}],"name":{"display":"Agda","uri":"/package/Agda"},"tags":[{"display":"dependent-types","uri":"/packages/tag/dependent-types"},{"display":"program","uri":"/packages/tag/program"}],"votes":2.75}]}
diff --git a/swh/lister/hackage/tests/data/https_fake51.haskell.org/packages_search_1 b/swh/lister/hackage/tests/data/https_fake51.haskell.org/packages_search_1
new file mode 100644
index 0000000..8b2e057
--- /dev/null
+++ b/swh/lister/hackage/tests/data/https_fake51.haskell.org/packages_search_1
@@ -0,0 +1 @@
+{"numberOfResults":51,"pageContents":[{"description":"Command-line program for type-checking and compiling Agda programs","downloads":20,"lastUpload":"2012-03-12T11:01:45Z","maintainers":[{"display":"NilsAndersDanielsson","uri":"/user/NilsAndersDanielsson"},{"display":"UlfNorell","uri":"/user/UlfNorell"}],"name":{"display":"Agda-executable","uri":"/package/Agda-executable"},"tags":[{"display":"dependent-types","uri":"/packages/tag/dependent-types"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"program","uri":"/packages/tag/program"}],"votes":0}]}
diff --git a/swh/lister/hackage/tests/data/https_hackage.haskell.org/packages_search_0 b/swh/lister/hackage/tests/data/https_hackage.haskell.org/packages_search_0
new file mode 100644
index 0000000..ad095f3
--- /dev/null
+++ b/swh/lister/hackage/tests/data/https_hackage.haskell.org/packages_search_0
@@ -0,0 +1 @@
+{"numberOfResults":150,"pageContents":[{"description":"Haskell package for easy integration with the 2captcha API.","downloads":1,"lastUpload":"2021-09-09T05:13:30.343509948Z","maintainers":[{"display":"qwbarch","uri":"/user/qwbarch"}],"name":{"display":"2captcha","uri":"/package/2captcha"},"tags":[{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"mit","uri":"/packages/tag/mit"},{"display":"network","uri":"/packages/tag/network"}],"votes":1.5},{"description":"Examples of 3D graphics programming with OpenGL","downloads":8,"lastUpload":"2016-07-22T14:26:23.038905Z","maintainers":[{"display":"WolfgangJeltsch","uri":"/user/WolfgangJeltsch"}],"name":{"display":"3d-graphics-examples","uri":"/package/3d-graphics-examples"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"fractals","uri":"/packages/tag/fractals"},{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"program","uri":"/packages/tag/program"}],"votes":1.75},{"description":"3D model parsers","downloads":6,"lastUpload":"2014-11-08T03:55:23.879047Z","maintainers":[{"display":"capsjac","uri":"/user/capsjac"}],"name":{"display":"3dmodels","uri":"/package/3dmodels"},"tags":[{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"lgpl","uri":"/packages/tag/lgpl"},{"display":"library","uri":"/packages/tag/library"}],"votes":1.5},{"description":"A tetris-like game (works with GHC 6.8.3 and Gtk2hs 0.9.13)","downloads":8,"lastUpload":"2010-05-07T18:55:25Z","maintainers":[{"display":"AndrewCalleja","uri":"/user/AndrewCalleja"}],"name":{"display":"4Blocks","uri":"/package/4Blocks"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"game","uri":"/packages/tag/game"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Abstract Application Interface.","downloads":8,"lastUpload":"2015-08-03T23:13:02.007983Z","maintainers":[{"display":"bash0r","uri":"/user/bash0r"}],"name":{"display":"AAI","uri":"/package/AAI"},"tags":[{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"},{"display":"mit","uri":"/packages/tag/mit"}],"votes":0},{"description":"An alternating list of two types","downloads":7,"lastUpload":"2014-06-23T21:53:36.114282Z","maintainers":[{"display":"DylanJust","uri":"/user/DylanJust"}],"name":{"display":"ABList","uri":"/package/ABList"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Angles in degrees and radians.","downloads":10,"lastUpload":"2010-07-23T14:10:27Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Angle","uri":"/package/AC-Angle"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"numerical","uri":"/packages/tag/numerical"}],"votes":0},{"description":"Handle Boolean values generatically.","downloads":4,"lastUpload":"2010-11-09T11:39:08Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Boolean","uri":"/package/AC-Boolean"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"},{"display":"logic","uri":"/packages/tag/logic"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"Detect which OS you're running on.","downloads":5,"lastUpload":"2011-06-08T10:12:03Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-BuildPlatform","uri":"/package/AC-BuildPlatform"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"system","uri":"/packages/tag/system"}],"votes":0},{"description":"Efficient RGB colour types.","downloads":12,"lastUpload":"2014-01-12T16:33:02.796921Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Colour","uri":"/package/AC-Colour"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"numerical","uri":"/packages/tag/numerical"}],"votes":0},{"description":"GTK+ pixel plotting.","downloads":5,"lastUpload":"2010-10-28T10:24:31Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-EasyRaster-GTK","uri":"/package/AC-EasyRaster-GTK"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Efficient half-integer type.","downloads":6,"lastUpload":"2009-08-12T19:15:37Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-HalfInteger","uri":"/package/AC-HalfInteger"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"numerical","uri":"/packages/tag/numerical"}],"votes":0},{"description":"A simple test framework.","downloads":7,"lastUpload":"2012-02-24T13:54:02Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-MiniTest","uri":"/package/AC-MiniTest"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"testing","uri":"/packages/tag/testing"}],"votes":0},{"description":"Trivial package for writing PPM images.","downloads":3,"lastUpload":"2010-01-18T22:26:54Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-PPM","uri":"/package/AC-PPM"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"codec","uri":"/packages/tag/codec"},{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"A pure Haskell PRNG.","downloads":1,"lastUpload":"2011-08-25T09:34:35Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Random","uri":"/package/AC-Random"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"random","uri":"/packages/tag/random"}],"votes":0},{"description":"Trivial wrapper over ansi-terminal.","downloads":2,"lastUpload":"2010-10-28T12:38:15Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Terminal","uri":"/package/AC-Terminal"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"numerical","uri":"/packages/tag/numerical"}],"votes":0},{"description":"Immutable arrays with plain integer indicies.","downloads":5,"lastUpload":"2010-01-17T13:34:07Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-VanillaArray","uri":"/package/AC-VanillaArray"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Efficient geometric vectors and transformations.","downloads":25,"lastUpload":"2011-08-12T12:33:08Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Vector","uri":"/package/AC-Vector"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"numerical","uri":"/packages/tag/numerical"}],"votes":0},{"description":"Fancy type-system stuff for AC-Vector","downloads":13,"lastUpload":"2010-08-15T15:34:26Z","maintainers":[{"display":"AndrewCoppin","uri":"/user/AndrewCoppin"}],"name":{"display":"AC-Vector-Fancy","uri":"/package/AC-Vector-Fancy"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"numerical","uri":"/packages/tag/numerical"}],"votes":0},{"description":"Essential features","downloads":4,"lastUpload":"2015-01-02T21:48:24.005724Z","maintainers":[{"display":"JamesCandy","uri":"/user/JamesCandy"}],"name":{"display":"ACME","uri":"/package/ACME"},"tags":[{"display":"acme","uri":"/packages/tag/acme"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"}],"votes":2.25},{"description":"Efficient, high-level dynamic programming.","downloads":41,"lastUpload":"2019-10-01T18:22:22.276688014Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"ADPfusion","uri":"/package/ADPfusion"},"tags":[{"display":"algorithms","uri":"/packages/tag/algorithms"},{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data-structures","uri":"/packages/tag/data-structures"},{"display":"formal-languages","uri":"/packages/tag/formal-languages"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Dynamic programming on tree and forest structures","downloads":2,"lastUpload":"2017-11-23T22:15:26.956207149Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"ADPfusionForest","uri":"/package/ADPfusionForest"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"formal-languages","uri":"/packages/tag/formal-languages"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Dynamic programming for Set data structures.","downloads":8,"lastUpload":"2017-10-19T14:36:27.804439921Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"ADPfusionSet","uri":"/package/ADPfusionSet"},"tags":[{"display":"algorithms","uri":"/packages/tag/algorithms"},{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data-structures","uri":"/packages/tag/data-structures"},{"display":"formal-languages","uri":"/packages/tag/formal-languages"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"foundational type classes for approximating exact real numbers","downloads":8,"lastUpload":"2011-05-11T11:25:48Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-Basics","uri":"/package/AERN-Basics"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Compositional lazy dataflow networks for exact real number computation","downloads":11,"lastUpload":"2009-07-29T10:06:08Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-Net","uri":"/package/AERN-Net"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"distributed-computing","uri":"/packages/tag/distributed-computing"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"arbitrary precision real interval arithmetic","downloads":39,"lastUpload":"2011-05-11T11:31:11Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-Real","uri":"/package/AERN-Real"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"arbitrary precision real interval arithmetic","downloads":10,"lastUpload":"2011-05-11T13:37:45Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-Real-Double","uri":"/package/AERN-Real-Double"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"arbitrary precision real interval arithmetic","downloads":11,"lastUpload":"2011-05-11T11:31:35Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-Real-Interval","uri":"/package/AERN-Real-Interval"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"polynomial function enclosures (PFEs) approximating exact real functions","downloads":26,"lastUpload":"2009-07-29T10:05:31Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-RnToRm","uri":"/package/AERN-RnToRm"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"GL plotting of polynomial function enclosures (PFEs)","downloads":15,"lastUpload":"2009-08-01T16:05:31Z","maintainers":[{"display":"MichalKonecny","uri":"/user/MichalKonecny"}],"name":{"display":"AERN-RnToRm-Plot","uri":"/package/AERN-RnToRm-Plot"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"Fast AES encryption/decryption for bytestrings","downloads":40,"lastUpload":"2014-05-07T21:04:03.888615Z","maintainers":[{"display":"SveinOveAas","uri":"/user/SveinOveAas"}],"name":{"display":"AES","uri":"/package/AES"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"cryptography","uri":"/packages/tag/cryptography"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Arrowized functional state machines","downloads":21,"lastUpload":"2016-04-21T22:13:19.148646Z","maintainers":[{"display":"hanzhxu","uri":"/user/hanzhxu"}],"name":{"display":"AFSM","uri":"/package/AFSM"},"tags":[{"display":"frp","uri":"/packages/tag/frp"},{"display":"library","uri":"/packages/tag/library"},{"display":"mit","uri":"/packages/tag/mit"}],"votes":2},{"description":"A library for writing AGI scripts for Asterisk","downloads":14,"lastUpload":"2009-12-07T20:12:57Z","maintainers":[{"display":"JeremyShaw","uri":"/user/JeremyShaw"}],"name":{"display":"AGI","uri":"/package/AGI"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"network","uri":"/packages/tag/network"}],"votes":0},{"description":"A binding for the OpenAL Utility Toolkit","downloads":38,"lastUpload":"2019-06-10T16:09:36.285351988Z","maintainers":[{"display":"StephenBlackheath","uri":"/user/StephenBlackheath"},{"display":"SvenPanne","uri":"/user/SvenPanne"}],"name":{"display":"ALUT","uri":"/package/ALUT"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"sound","uri":"/packages/tag/sound"}],"votes":0},{"description":"Low-level bindings for Asterisk Manager Interface (AMI).","downloads":2,"lastUpload":"2012-01-16T06:02:43Z","maintainers":[{"display":"IlyaPortnov","uri":"/user/IlyaPortnov"}],"name":{"display":"AMI","uri":"/package/AMI"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"network","uri":"/packages/tag/network"}],"votes":0},{"description":"Num instance for Applicatives provided via the ANum newtype","downloads":14,"lastUpload":"2018-02-13T00:06:58.303270679Z","maintainers":[{"display":"DanBurton","uri":"/user/DanBurton"}],"name":{"display":"ANum","uri":"/package/ANum"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"ASN.1 support for Haskell","downloads":4,"lastUpload":"2008-06-22T10:41:52Z","maintainers":[{"display":"HerbertValerioRiedel","uri":"/user/HerbertValerioRiedel"}],"name":{"display":"ASN1","uri":"/package/ASN1"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"language","uri":"/packages/tag/language"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Mutable variables with Exception handling and concurrency support.","downloads":11,"lastUpload":"2009-12-08T15:41:14Z","maintainers":[{"display":"AlexMason","uri":"/user/AlexMason"}],"name":{"display":"AVar","uri":"/package/AVar"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"concurrency","uri":"/packages/tag/concurrency"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"A binding to a part of the ANSI escape code for the console","downloads":2,"lastUpload":"2010-01-24T23:44:07Z","maintainers":[{"display":"HaraldWolfsgruber","uri":"/user/HaraldWolfsgruber"}],"name":{"display":"AWin32Console","uri":"/package/AWin32Console"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"system","uri":"/packages/tag/system"}],"votes":0},{"description":"Monads-tf instances for the AbortT monad transformer.","downloads":2,"lastUpload":"2012-12-07T13:58:51Z","maintainers":[{"display":"GregoryCrosswhite","uri":"/user/GregoryCrosswhite"}],"name":{"display":"AbortT-monadstf","uri":"/package/AbortT-monadstf"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"mtl instances for the AbortT monad transformer","downloads":8,"lastUpload":"2016-02-06T22:37:44.304337Z","maintainers":[{"display":"GregoryCrosswhite","uri":"/user/GregoryCrosswhite"}],"name":{"display":"AbortT-mtl","uri":"/package/AbortT-mtl"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"A monad and monadic transformer providing \"abort\" functionality","downloads":16,"lastUpload":"2019-07-19T14:08:41.889681784Z","maintainers":[{"display":"GregoryCrosswhite","uri":"/user/GregoryCrosswhite"}],"name":{"display":"AbortT-transformers","uri":"/package/AbortT-transformers"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"An easy-to-use video game framework for Haskell.","downloads":5,"lastUpload":"2014-11-11T00:20:57.714901Z","maintainers":[{"display":"AdityaBhargava","uri":"/user/AdityaBhargava"}],"name":{"display":"ActionKid","uri":"/package/ActionKid"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"game-engine","uri":"/packages/tag/game-engine"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Library for incremental computing.","downloads":11,"lastUpload":"2013-01-28T21:50:50Z","maintainers":[{"display":"DustinDeWeese","uri":"/user/DustinDeWeese"},{"display":"MagnusCarlsson","uri":"/user/MagnusCarlsson"},{"display":"PeterJonsson","uri":"/user/PeterJonsson"}],"name":{"display":"Adaptive","uri":"/package/Adaptive"},"tags":[{"display":"algorithms","uri":"/packages/tag/algorithms"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Library for incremental computing.","downloads":8,"lastUpload":"2013-01-26T09:17:43Z","maintainers":[{"display":"PaoloGiarrusso","uri":"/user/PaoloGiarrusso"}],"name":{"display":"Adaptive-Blaisorblade","uri":"/package/Adaptive-Blaisorblade"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Lisperati's adventure game in Lisp translated to Haskell","downloads":8,"lastUpload":"2010-06-11T00:01:05Z","maintainers":[{"display":"TimWawrzynczak","uri":"/user/TimWawrzynczak"}],"name":{"display":"Advgame","uri":"/package/Advgame"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"game","uri":"/packages/tag/game"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Assessment services for the Advise-Me project","downloads":7,"lastUpload":"2019-10-30T07:45:20.345073378Z","maintainers":[{"display":"BastiaanHeeren","uri":"/user/BastiaanHeeren"}],"name":{"display":"Advise-me","uri":"/package/Advise-me"},"tags":[{"display":"apache","uri":"/packages/tag/apache"},{"display":"education","uri":"/packages/tag/education"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Mapping between Aeson's JSON and Bson objects.","downloads":24,"lastUpload":"2022-05-06T10:41:14.015254306Z","maintainers":[{"display":"AndrasSlemmer","uri":"/user/AndrasSlemmer"},{"display":"NiklasHambuechen","uri":"/user/NiklasHambuechen"}],"name":{"display":"AesonBson","uri":"/package/AesonBson"},"tags":[{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Generator-generator for QuickCheck","downloads":10,"lastUpload":"2012-08-01T11:36:04Z","maintainers":[{"display":"JonasDuregard","uri":"/user/JonasDuregard"}],"name":{"display":"Agata","uri":"/package/Agata"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"testing","uri":"/packages/tag/testing"}],"votes":0},{"description":"A dependently typed functional programming language and proof assistant","downloads":306,"lastUpload":"2022-04-02T18:00:16.805589944Z","maintainers":[{"display":"AndreasAbel","uri":"/user/AndreasAbel"},{"display":"AndresSicardRamirez","uri":"/user/AndresSicardRamirez"},{"display":"NilsAndersDanielsson","uri":"/user/NilsAndersDanielsson"},{"display":"UlfNorell","uri":"/user/UlfNorell"}],"name":{"display":"Agda","uri":"/package/Agda"},"tags":[{"display":"dependent-types","uri":"/packages/tag/dependent-types"},{"display":"program","uri":"/packages/tag/program"}],"votes":2.75}]}
diff --git a/swh/lister/hackage/tests/data/https_hackage.haskell.org/packages_search_1 b/swh/lister/hackage/tests/data/https_hackage.haskell.org/packages_search_1
new file mode 100644
index 0000000..c44aa01
--- /dev/null
+++ b/swh/lister/hackage/tests/data/https_hackage.haskell.org/packages_search_1
@@ -0,0 +1 @@
+{"numberOfResults":150,"pageContents":[{"description":"Command-line program for type-checking and compiling Agda programs","downloads":20,"lastUpload":"2012-03-12T11:01:45Z","maintainers":[{"display":"NilsAndersDanielsson","uri":"/user/NilsAndersDanielsson"},{"display":"UlfNorell","uri":"/user/UlfNorell"}],"name":{"display":"Agda-executable","uri":"/package/Agda-executable"},"tags":[{"display":"dependent-types","uri":"/packages/tag/dependent-types"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Aho-Corasick string matching algorithm","downloads":7,"lastUpload":"2012-11-09T05:36:49Z","maintainers":[{"display":"SergeyLymar","uri":"/user/SergeyLymar"}],"name":{"display":"AhoCorasick","uri":"/package/AhoCorasick"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"text","uri":"/packages/tag/text"}],"votes":0},{"description":"Find the minimal subset/submap satisfying some property.","downloads":7,"lastUpload":"2016-08-24T06:15:53.900038Z","maintainers":[{"display":"EchoNolan","uri":"/user/EchoNolan"}],"name":{"display":"AlanDeniseEricLauren","uri":"/package/AlanDeniseEricLauren"},"tags":[{"display":"algorithms","uri":"/packages/tag/algorithms"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Algorithmic music composition","downloads":6,"lastUpload":"2018-04-06T11:21:37Z","maintainers":[{"display":"omelkonian","uri":"/user/omelkonian"}],"name":{"display":"AlgoRhythm","uri":"/package/AlgoRhythm"},"tags":[{"display":"algorithmic-music-composition","uri":"/packages/tag/algorithmic-music-composition"},{"display":"automatic-music-generation","uri":"/packages/tag/automatic-music-generation"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"chaos-music","uri":"/packages/tag/chaos-music"},{"display":"generative-music-grammars","uri":"/packages/tag/generative-music-grammars"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Example implementation of Algorithm W for Hindley-Milner\ntype inference.","downloads":10,"lastUpload":"2015-05-27T07:33:48.676828Z","maintainers":[{"display":"MartinGrabmueller","uri":"/user/MartinGrabmueller"}],"name":{"display":"AlgorithmW","uri":"/package/AlgorithmW"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"development","uri":"/packages/tag/development"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Collection of alignment algorithms","downloads":11,"lastUpload":"2017-03-14T14:41:48.734043445Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"AlignmentAlgorithms","uri":"/package/AlignmentAlgorithms"},"tags":[{"display":"algorithms","uri":"/packages/tag/algorithms"},{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"data-structures","uri":"/packages/tag/data-structures"},{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"library","uri":"/packages/tag/library"},{"display":"linguistics","uri":"/packages/tag/linguistics"}],"votes":0},{"description":"Near-future Sci-Fi roguelike and tactical squad combat game","downloads":105,"lastUpload":"2021-12-17T17:46:58.38087348Z","maintainers":[{"display":"MikolajKonarski","uri":"/user/MikolajKonarski"}],"name":{"display":"Allure","uri":"/package/Allure"},"tags":[{"display":"agpl","uri":"/packages/tag/agpl"},{"display":"game","uri":"/packages/tag/game"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"},{"display":"roguelike","uri":"/packages/tag/roguelike"}],"votes":2},{"description":"Android view hierarchy importer","downloads":4,"lastUpload":"2012-12-19T12:15:44Z","maintainers":[{"display":"alpheccar","uri":"/user/alpheccar"}],"name":{"display":"AndroidViewHierarchyImporter","uri":"/package/AndroidViewHierarchyImporter"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"development","uri":"/packages/tag/development"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Updated version of Yampa: a library for programming hybrid systems.","downloads":4,"lastUpload":"2011-03-27T02:54:06Z","maintainers":[{"display":"EdwardAmsden","uri":"/user/EdwardAmsden"}],"name":{"display":"Animas","uri":"/package/Animas"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"frp","uri":"/packages/tag/frp"},{"display":"library","uri":"/packages/tag/library"},{"display":"reactivity","uri":"/packages/tag/reactivity"}],"votes":0},{"description":"Constructing, analyzing and destructing annotated trees","downloads":18,"lastUpload":"2016-12-05T10:03:17.260388Z","maintainers":[{"display":"MartijnVanSteenbergen","uri":"/user/MartijnVanSteenbergen"}],"name":{"display":"Annotations","uri":"/package/Annotations"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"generics","uri":"/packages/tag/generics"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Convert ANSI Terminal Sequences to nice HTML markup","downloads":5,"lastUpload":"2011-09-10T19:20:01Z","maintainers":[{"display":"JensStimpfle","uri":"/user/JensStimpfle"}],"name":{"display":"Ansi2Html","uri":"/package/Ansi2Html"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"program","uri":"/packages/tag/program"},{"display":"web","uri":"/packages/tag/web"}],"votes":0},{"description":"A simple music library with the capability of generating .ly and .mid files.","downloads":10,"lastUpload":"2020-07-07T13:19:57.888821262Z","maintainers":[{"display":"Liisi_Kerik","uri":"/user/Liisi_Kerik"}],"name":{"display":"Aoide","uri":"/package/Aoide"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"composition","uri":"/packages/tag/composition"},{"display":"library","uri":"/packages/tag/library"},{"display":"music","uri":"/packages/tag/music"}],"votes":0},{"description":"Library for Apple Push Notification Service","downloads":6,"lastUpload":"2009-07-13T19:38:44Z","maintainers":[{"display":"ChrisMoos","uri":"/user/ChrisMoos"}],"name":{"display":"ApplePush","uri":"/package/ApplePush"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"network","uri":"/packages/tag/network"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Call AppleScript from Haskell, and then call back into Haskell.","downloads":20,"lastUpload":"2012-02-15T11:12:39Z","maintainers":[{"display":"ReinerPope","uri":"/user/ReinerPope"},{"display":"WouterSwierstra","uri":"/user/WouterSwierstra"}],"name":{"display":"AppleScript","uri":"/package/AppleScript"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"foreign","uri":"/packages/tag/foreign"}],"votes":0},{"description":"Function approximation","downloads":2,"lastUpload":"2015-04-26T10:09:29.167094Z","maintainers":[{"display":"DominicSteinitz","uri":"/user/DominicSteinitz"}],"name":{"display":"ApproxFun-hs","uri":"/package/ApproxFun-hs"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"Unboxed references, dynamic arrays and more","downloads":22,"lastUpload":"2009-06-26T19:04:43Z","maintainers":[{"display":"GwernBranwen","uri":"/user/GwernBranwen"}],"name":{"display":"ArrayRef","uri":"/package/ArrayRef"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"A library to generate Netlist code from Arrow  descriptions.","downloads":3,"lastUpload":"2015-02-21T13:23:05.402924Z","maintainers":[{"display":"frosch03","uri":"/user/frosch03"},{"display":"brettschneider","uri":"/user/brettschneider"}],"name":{"display":"ArrowVHDL","uri":"/package/ArrowVHDL"},"tags":[{"display":"library","uri":"/packages/tag/library"},{"display":"public-domain","uri":"/packages/tag/public-domain"},{"display":"testing","uri":"/packages/tag/testing"}],"votes":0},{"description":"Strongly typed Attribute Grammars implemented using type-level programming.","downloads":52,"lastUpload":"2022-05-26T18:50:24.876992743Z","maintainers":[{"display":"MarcosViera","uri":"/user/MarcosViera"},{"display":"jpgarcia","uri":"/user/jpgarcia"}],"name":{"display":"AspectAG","uri":"/package/AspectAG"},"tags":[{"display":"aspect-oriented-programming","uri":"/packages/tag/aspect-oriented-programming"},{"display":"development-","uri":"/packages/tag/development-"},{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"language","uri":"/packages/tag/language"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Fast Bencode encoding and parsing library","downloads":14,"lastUpload":"2014-06-24T23:33:17.506075Z","maintainers":[{"display":"FlorianHartwig","uri":"/user/FlorianHartwig"}],"name":{"display":"AttoBencode","uri":"/package/AttoBencode"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Simple lightweight JSON parser, generator & manipulator based on ByteString","downloads":22,"lastUpload":"2011-02-03T05:13:20Z","maintainers":[{"display":"HiromiIshii","uri":"/user/HiromiIshii"}],"name":{"display":"AttoJson","uri":"/package/AttoJson"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"text","uri":"/packages/tag/text"}],"votes":0},{"description":"Visualisation of Strange Attractors in 3-Dimensions","downloads":11,"lastUpload":"2010-03-15T10:01:26Z","maintainers":[{"display":"RubenZilibowitz","uri":"/user/RubenZilibowitz"}],"name":{"display":"Attrac","uri":"/package/Attrac"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Yet another parser generator for C/C++","downloads":2,"lastUpload":"2011-11-08T06:36:20Z","maintainers":[{"display":"XinyuJiang","uri":"/user/XinyuJiang"}],"name":{"display":"Aurochs","uri":"/package/Aurochs"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"development","uri":"/packages/tag/development"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"GUI library based upon generic programming (SYB3)","downloads":9,"lastUpload":"2008-09-17T10:58:40Z","maintainers":[{"display":"MadsLindstroem","uri":"/user/MadsLindstroem"}],"name":{"display":"AutoForms","uri":"/package/AutoForms"},"tags":[{"display":"gui","uri":"/packages/tag/gui"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Balanced binary trees using the AVL algorithm.","downloads":18,"lastUpload":"2008-08-29T05:59:00Z","maintainers":[{"display":"AdrianHey","uri":"/user/AdrianHey"}],"name":{"display":"AvlTree","uri":"/package/AvlTree"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data-structures","uri":"/packages/tag/data-structures"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Embedded BASIC","downloads":23,"lastUpload":"2009-02-09T09:36:35Z","maintainers":[{"display":"LennartAugustsson","uri":"/user/LennartAugustsson"}],"name":{"display":"BASIC","uri":"/package/BASIC"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"language","uri":"/packages/tag/language"},{"display":"library","uri":"/packages/tag/library"}],"votes":2},{"description":"Big Contact Map Tools","downloads":7,"lastUpload":"2015-05-05T17:37:55.917192Z","maintainers":[{"display":"kaizhang","uri":"/user/kaizhang"}],"name":{"display":"BCMtools","uri":"/package/BCMtools"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"library","uri":"/packages/tag/library"},{"display":"mit","uri":"/packages/tag/mit"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"A compiler front-end generator.","downloads":93,"lastUpload":"2022-02-17T17:38:38.309476485Z","maintainers":[{"display":"AndreasAbel","uri":"/user/AndreasAbel"},{"display":"GregoireDetrez","uri":"/user/GregoireDetrez"},{"display":"MarkusForsberg","uri":"/user/MarkusForsberg"},{"display":"ThomasHallgren","uri":"/user/ThomasHallgren"}],"name":{"display":"BNFC","uri":"/package/BNFC"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"parsing","uri":"/packages/tag/parsing"},{"display":"program","uri":"/packages/tag/program"}],"votes":2.25},{"description":"Deriving Parsers and Quasi-Quoters from BNF Grammars","downloads":51,"lastUpload":"2020-02-09T23:44:05.066878431Z","maintainers":[{"display":"JeanPhilippeBernardy","uri":"/user/JeanPhilippeBernardy"},{"display":"JonasDuregard","uri":"/user/JonasDuregard"},{"display":"ArtemPelenitsyn","uri":"/user/ArtemPelenitsyn"}],"name":{"display":"BNFC-meta","uri":"/package/BNFC-meta"},"tags":[{"display":"development","uri":"/packages/tag/development"},{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"language","uri":"/packages/tag/language"},{"display":"library","uri":"/packages/tag/library"},{"display":"parsing","uri":"/packages/tag/parsing"},{"display":"text","uri":"/packages/tag/text"}],"votes":0},{"description":"Translations of classic Truth Maintenance Systems","downloads":3,"lastUpload":"2022-04-22T00:24:50.851029935Z","maintainers":[{"display":"jpmrst","uri":"/user/jpmrst"}],"name":{"display":"BPS","uri":"/package/BPS"},"tags":[{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"},{"display":"truth-maintenance","uri":"/packages/tag/truth-maintenance"}],"votes":0},{"description":"Tools for self-assembly","downloads":2,"lastUpload":"2014-06-12T19:26:55.982603Z","maintainers":[{"display":"pmeunier","uri":"/user/pmeunier"}],"name":{"display":"Baggins","uri":"/package/Baggins"},"tags":[{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"A Drum Machine DSL for Haskell","downloads":17,"lastUpload":"2015-10-26T16:12:59.52351Z","maintainers":[{"display":"5outh","uri":"/user/5outh"}],"name":{"display":"Bang","uri":"/package/Bang"},"tags":[{"display":"library","uri":"/packages/tag/library"},{"display":"mit","uri":"/packages/tag/mit"},{"display":"sound","uri":"/packages/tag/sound"}],"votes":0},{"description":"An ad-hoc P2P chat program","downloads":8,"lastUpload":"2008-04-05T02:14:35Z","maintainers":[{"display":"GwernBranwen","uri":"/user/GwernBranwen"}],"name":{"display":"Barracuda","uri":"/package/Barracuda"},"tags":[{"display":"library","uri":"/packages/tag/library"},{"display":"network","uri":"/packages/tag/network"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"An interpreter for the Befunge-93 Programming Language","downloads":3,"lastUpload":"2010-05-20T18:25:24Z","maintainers":[{"display":"BrandonSimmons","uri":"/user/BrandonSimmons"}],"name":{"display":"Befunge93","uri":"/package/Befunge93"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"compilers-interpreters","uri":"/packages/tag/compilers-interpreters"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Benchmark functions with history","downloads":5,"lastUpload":"2015-11-19T14:22:31.9303Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"BenchmarkHistory","uri":"/package/BenchmarkHistory"},"tags":[{"display":"benchmarking","uri":"/packages/tag/benchmarking"},{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Berkeley DB binding","downloads":30,"lastUpload":"2013-03-13T19:54:56Z","maintainers":[{"display":"JohnMcCall","uri":"/user/JohnMcCall"},{"display":"StephenBlackheath","uri":"/user/StephenBlackheath"}],"name":{"display":"BerkeleyDB","uri":"/package/BerkeleyDB"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"database","uri":"/packages/tag/database"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Berkeley DB XML binding","downloads":25,"lastUpload":"2011-09-06T20:51:27Z","maintainers":[{"display":"StephenBlackheath","uri":"/user/StephenBlackheath"}],"name":{"display":"BerkeleyDBXML","uri":"/package/BerkeleyDBXML"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"database","uri":"/packages/tag/database"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Factorization of polynomials over finite field","downloads":2,"lastUpload":"2013-06-10T11:40:22Z","maintainers":[{"display":"AbdelwahebMiled","uri":"/user/AbdelwahebMiled"}],"name":{"display":"BerlekampAlgorithm","uri":"/package/BerlekampAlgorithm"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"math","uri":"/packages/tag/math"}],"votes":0},{"description":"The Bidirectional Generic Update Language","downloads":11,"lastUpload":"2016-08-30T09:49:01.746971Z","maintainers":[{"display":"joshko","uri":"/user/joshko"},{"display":"Zirun","uri":"/user/Zirun"}],"name":{"display":"BiGUL","uri":"/package/BiGUL"},"tags":[{"display":"generics","uri":"/packages/tag/generics"},{"display":"language","uri":"/packages/tag/language"},{"display":"lenses","uri":"/packages/tag/lenses"},{"display":"library","uri":"/packages/tag/library"},{"display":"public-domain","uri":"/packages/tag/public-domain"}],"votes":2.25},{"description":"Image editor for pixel art","downloads":10,"lastUpload":"2014-01-24T03:18:08.778401Z","maintainers":[{"display":"ManuelChakravarty","uri":"/user/ManuelChakravarty"}],"name":{"display":"BigPixel","uri":"/package/BigPixel"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"development","uri":"/packages/tag/development"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Source-to-source plugin for enhancing EDSLs with static annotations","downloads":5,"lastUpload":"2020-03-19T17:59:05.904872539Z","maintainers":[{"display":"agustinmista","uri":"/user/agustinmista"}],"name":{"display":"BinderAnn","uri":"/package/BinderAnn"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"compiler-plugin","uri":"/packages/tag/compiler-plugin"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Common bin-packing heuristics.","downloads":14,"lastUpload":"2014-02-07T13:32:35.055121Z","maintainers":[{"display":"BjoernBrandenburg","uri":"/user/BjoernBrandenburg"},{"display":"DavidFeng","uri":"/user/DavidFeng"}],"name":{"display":"Binpack","uri":"/package/Binpack"},"tags":[{"display":"algorithms","uri":"/packages/tag/algorithms"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"heuristics","uri":"/packages/tag/heuristics"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Libary for Hidden Markov Models in HMMER3 format. ","downloads":46,"lastUpload":"2017-06-09T15:26:05.435990496Z","maintainers":[{"display":"FlorianEggenhofer","uri":"/user/FlorianEggenhofer"}],"name":{"display":"BioHMM","uri":"/package/BioHMM"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Base library for bioinformatics","downloads":12,"lastUpload":"2011-04-08T12:46:37Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"Biobase","uri":"/package/Biobase"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"BLAST-related tools","downloads":16,"lastUpload":"2021-06-05T21:06:10.915506997Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"},{"display":"FlorianEggenhofer","uri":"/user/FlorianEggenhofer"}],"name":{"display":"BiobaseBlast","uri":"/package/BiobaseBlast"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Vienna / DotBracket / ExtSS parsers","downloads":3,"lastUpload":"2011-08-22T15:27:13Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"BiobaseDotP","uri":"/package/BiobaseDotP"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"European Nucleotide Archive data","downloads":7,"lastUpload":"2021-06-04T12:37:25.444518495Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"BiobaseENA","uri":"/package/BiobaseENA"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Ensembl related datastructures and functions","downloads":8,"lastUpload":"2020-01-09T19:39:11.497450015Z","maintainers":[{"display":"FlorianEggenhofer","uri":"/user/FlorianEggenhofer"}],"name":{"display":"BiobaseEnsembl","uri":"/package/BiobaseEnsembl"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Importer for FR3D resources","downloads":10,"lastUpload":"2012-02-16T14:20:59Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"BiobaseFR3D","uri":"/package/BiobaseFR3D"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"streaming FASTA parser","downloads":16,"lastUpload":"2021-06-04T13:59:41.393107101Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"BiobaseFasta","uri":"/package/BiobaseFasta"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Libary to interface with the Bioinformatics HTTP services - Entrez Ensembl","downloads":4,"lastUpload":"2019-11-20T21:19:53.584971279Z","maintainers":[{"display":"FlorianEggenhofer","uri":"/user/FlorianEggenhofer"}],"name":{"display":"BiobaseHTTP","uri":"/package/BiobaseHTTP"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"library","uri":"/packages/tag/library"}],"votes":0}]}
diff --git a/swh/lister/hackage/tests/data/https_hackage.haskell.org/packages_search_2 b/swh/lister/hackage/tests/data/https_hackage.haskell.org/packages_search_2
new file mode 100644
index 0000000..72c031f
--- /dev/null
+++ b/swh/lister/hackage/tests/data/https_hackage.haskell.org/packages_search_2
@@ -0,0 +1 @@
+{"numberOfResults":150,"pageContents":[{"description":"Tools to query Bioinformatics HTTP services e.g. Entrez, Ensembl.","downloads":8,"lastUpload":"2018-12-13T19:11:11.267301285Z","maintainers":[{"display":"FlorianEggenhofer","uri":"/user/FlorianEggenhofer"}],"name":{"display":"BiobaseHTTPTools","uri":"/package/BiobaseHTTPTools"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Infernal data structures and tools","downloads":36,"lastUpload":"2017-03-14T13:57:23.042537328Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"BiobaseInfernal","uri":"/package/BiobaseInfernal"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Multiple Alignment Format","downloads":2,"lastUpload":"2011-07-29T12:30:35Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"BiobaseMAF","uri":"/package/BiobaseMAF"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Newick file format parser.","downloads":4,"lastUpload":"2017-07-07T17:56:54.930748267Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"BiobaseNewick","uri":"/package/BiobaseNewick"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"RNA folding training data","downloads":10,"lastUpload":"2011-09-29T13:24:59Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"BiobaseTrainingData","uri":"/package/BiobaseTrainingData"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Import Turner RNA parameters","downloads":31,"lastUpload":"2013-04-25T00:42:45Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"BiobaseTurner","uri":"/package/BiobaseTurner"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Collection of types for bioinformatics","downloads":25,"lastUpload":"2021-06-04T12:19:46.84432619Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"BiobaseTypes","uri":"/package/BiobaseTypes"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data-structures","uri":"/packages/tag/data-structures"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Import Vienna energy parameters","downloads":24,"lastUpload":"2013-04-22T01:29:13Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"BiobaseVienna","uri":"/package/BiobaseVienna"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Efficient RNA/DNA/Protein Primary/Secondary Structure","downloads":73,"lastUpload":"2021-06-04T20:20:23.163421103Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"BiobaseXNA","uri":"/package/BiobaseXNA"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"A preprocessor for Bird-style Literate Haskell comments with Haddock markup.","downloads":4,"lastUpload":"2012-08-02T16:36:38Z","maintainers":[{"display":"SeanMcLaughlin","uri":"/user/SeanMcLaughlin"}],"name":{"display":"BirdPP","uri":"/package/BirdPP"},"tags":[{"display":"development","uri":"/packages/tag/development"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"","downloads":18,"lastUpload":"2018-03-03T19:01:52.086048792Z","maintainers":[{"display":"Ofenhed","uri":"/user/Ofenhed"}],"name":{"display":"BitStringRandomMonad","uri":"/package/BitStringRandomMonad"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"crypto","uri":"/packages/tag/crypto"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"A module to aid in the (de)serialisation of binary data","downloads":10,"lastUpload":"2021-01-17T10:52:15.284789745Z","maintainers":[{"display":"AdamLangley","uri":"/user/AdamLangley"},{"display":"joecrayne","uri":"/user/joecrayne"}],"name":{"display":"BitSyntax","uri":"/package/BitSyntax"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"serialization","uri":"/packages/tag/serialization"}],"votes":0},{"description":"A library to access bit.ly URL shortener.","downloads":18,"lastUpload":"2012-02-02T17:42:46Z","maintainers":[{"display":"SergeyAstanin","uri":"/user/SergeyAstanin"}],"name":{"display":"Bitly","uri":"/package/Bitly"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"web","uri":"/packages/tag/web"}],"votes":0},{"description":"Batteries-included Structured Logging library","downloads":38,"lastUpload":"2022-07-20T15:13:36.539640239Z","maintainers":[{"display":"PatrickBrisbin","uri":"/user/PatrickBrisbin"},{"display":"dukerutledge","uri":"/user/dukerutledge"},{"display":"mjgpy3","uri":"/user/mjgpy3"},{"display":"FreckleEngineering","uri":"/user/FreckleEngineering"}],"name":{"display":"Blammo","uri":"/package/Blammo"},"tags":[{"display":"library","uri":"/packages/tag/library"},{"display":"mit","uri":"/packages/tag/mit"},{"display":"utils","uri":"/packages/tag/utils"}],"votes":0},{"description":"Libary to interface with the NCBI blast REST interface","downloads":25,"lastUpload":"2021-06-05T21:19:54.156671184Z","maintainers":[{"display":"FlorianEggenhofer","uri":"/user/FlorianEggenhofer"}],"name":{"display":"BlastHTTP","uri":"/package/BlastHTTP"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Diagram editor","downloads":6,"lastUpload":"2012-06-19T21:00:19Z","maintainers":[{"display":"AlanZimmerman","uri":"/user/AlanZimmerman"}],"name":{"display":"Blobs","uri":"/package/Blobs"},"tags":[{"display":"graphics","uri":"/packages/tag/graphics"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"A tool for posting Haskelly articles to blogs","downloads":143,"lastUpload":"2022-08-26T02:27:45.073759633Z","maintainers":[{"display":"BrentYorgey","uri":"/user/BrentYorgey"},{"display":"RobertGreayer","uri":"/user/RobertGreayer"}],"name":{"display":"BlogLiterately","uri":"/package/BlogLiterately"},"tags":[{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"},{"display":"web","uri":"/packages/tag/web"}],"votes":0},{"description":"Include images in blog posts with inline diagrams code","downloads":51,"lastUpload":"2022-05-24T16:27:56.206098429Z","maintainers":[{"display":"BrentYorgey","uri":"/user/BrentYorgey"}],"name":{"display":"BlogLiterately-diagrams","uri":"/package/BlogLiterately-diagrams"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"},{"display":"web","uri":"/packages/tag/web"}],"votes":0},{"description":"A markdown-like markup language designed for blog posts","downloads":13,"lastUpload":"2018-02-05T06:23:31.042191764Z","maintainers":[{"display":"alexbecker","uri":"/user/alexbecker"}],"name":{"display":"Blogdown","uri":"/package/Blogdown"},"tags":[{"display":"agpl","uri":"/packages/tag/agpl"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"},{"display":"web","uri":"/packages/tag/web"}],"votes":1.5},{"description":"Html document layout library.","downloads":7,"lastUpload":"2010-08-03T08:19:51Z","maintainers":[{"display":"SergeyMironov","uri":"/user/SergeyMironov"}],"name":{"display":"BluePrintCSS","uri":"/package/BluePrintCSS"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"deprecated","uri":"/packages/tag/deprecated"},{"display":"library","uri":"/packages/tag/library"},{"display":"text","uri":"/packages/tag/text"},{"display":"web","uri":"/packages/tag/web"}],"votes":0},{"description":"Preview of a new build system.","downloads":2,"lastUpload":"2009-11-30T14:22:55Z","maintainers":[{"display":"GregoryCrosswhite","uri":"/user/GregoryCrosswhite"}],"name":{"display":"Blueprint","uri":"/package/Blueprint"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"distribution","uri":"/packages/tag/distribution"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"A simple document organizer with some wiki functionality","downloads":26,"lastUpload":"2016-01-11T12:47:27.621179Z","maintainers":[{"display":"EmilAxelsson","uri":"/user/EmilAxelsson"}],"name":{"display":"Bookshelf","uri":"/package/Bookshelf"},"tags":[{"display":"program","uri":"/packages/tag/program"},{"display":"text","uri":"/packages/tag/text"}],"votes":0},{"description":"Generalized booleans and numbers","downloads":125,"lastUpload":"2017-02-19T22:07:32.056861Z","maintainers":[{"display":"ConalElliott","uri":"/user/ConalElliott"}],"name":{"display":"Boolean","uri":"/package/Boolean"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Implementation of bounded channels.","downloads":26,"lastUpload":"2014-05-14T23:30:48.662935Z","maintainers":[{"display":"AdamWick","uri":"/user/AdamWick"}],"name":{"display":"BoundedChan","uri":"/package/BoundedChan"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"concurrency","uri":"/packages/tag/concurrency"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Static text template generation library","downloads":4,"lastUpload":"2010-03-24T01:43:29Z","maintainers":[{"display":"MatthiasReisner","uri":"/user/MatthiasReisner"}],"name":{"display":"Bravo","uri":"/package/Bravo"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"text","uri":"/packages/tag/text"}],"votes":0},{"description":"A socker wrapper that makes the IO of sockets much cleaner","downloads":7,"lastUpload":"2015-08-12T12:31:27.028316Z","maintainers":[{"display":"tmore","uri":"/user/tmore"}],"name":{"display":"BufferedSocket","uri":"/package/BufferedSocket"},"tags":[{"display":"library","uri":"/packages/tag/library"},{"display":"mit","uri":"/packages/tag/mit"},{"display":"network","uri":"/packages/tag/network"}],"votes":0},{"description":"Hits a set of urls periodically to bust caches","downloads":4,"lastUpload":"2013-05-22T06:11:22Z","maintainers":[{"display":"MichaelXavier","uri":"/user/MichaelXavier"}],"name":{"display":"Buster","uri":"/package/Buster"},"tags":[{"display":"mit","uri":"/packages/tag/mit"},{"display":"program","uri":"/packages/tag/program"},{"display":"web","uri":"/packages/tag/web"}],"votes":0},{"description":"C-Structs implementation for Haskell","downloads":7,"lastUpload":"2021-03-30T13:36:33.593597406Z","maintainers":[{"display":"SimonPlakolb","uri":"/user/SimonPlakolb"}],"name":{"display":"C-structs","uri":"/package/C-structs"},"tags":[{"display":"c","uri":"/packages/tag/c"},{"display":"data","uri":"/packages/tag/data"},{"display":"foreign","uri":"/packages/tag/foreign"},{"display":"library","uri":"/packages/tag/library"},{"display":"mit","uri":"/packages/tag/mit"},{"display":"structures","uri":"/packages/tag/structures"}],"votes":2},{"description":"Encode/Decode values to/from CBOR","downloads":10,"lastUpload":"2014-07-24T04:38:49.281736Z","maintainers":[{"display":"KyleMurphy","uri":"/user/KyleMurphy"}],"name":{"display":"CBOR","uri":"/package/CBOR"},"tags":[{"display":"data","uri":"/packages/tag/data"},{"display":"lgpl","uri":"/packages/tag/lgpl"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Delimited continuations and dynamically scoped variables","downloads":16,"lastUpload":"2016-05-11T02:22:41.377133Z","maintainers":[{"display":"DanDoel","uri":"/user/DanDoel"}],"name":{"display":"CC-delcont","uri":"/package/CC-delcont"},"tags":[{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Three new monad transformers for multi-prompt delimited control","downloads":9,"lastUpload":"2012-10-23T14:23:53Z","maintainers":[{"display":"KidoTakahiro","uri":"/user/KidoTakahiro"}],"name":{"display":"CC-delcont-alt","uri":"/package/CC-delcont-alt"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"A monad transformers for multi-prompt delimited control","downloads":6,"lastUpload":"2011-10-12T16:56:51Z","maintainers":[{"display":"KidoTakahiro","uri":"/user/KidoTakahiro"}],"name":{"display":"CC-delcont-cxe","uri":"/package/CC-delcont-cxe"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"A monad transformers for multi-prompt delimited control","downloads":5,"lastUpload":"2011-10-12T16:34:48Z","maintainers":[{"display":"KidoTakahiro","uri":"/user/KidoTakahiro"}],"name":{"display":"CC-delcont-exc","uri":"/package/CC-delcont-exc"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"A monad transformers for multi-prompt delimited control using refercence cells","downloads":2,"lastUpload":"2011-10-12T16:34:17Z","maintainers":[{"display":"KidoTakahiro","uri":"/user/KidoTakahiro"}],"name":{"display":"CC-delcont-ref","uri":"/package/CC-delcont-ref"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"A monad transformers for multi-prompt delimited control using refercence cells","downloads":10,"lastUpload":"2011-10-18T13:32:20Z","maintainers":[{"display":"KidoTakahiro","uri":"/user/KidoTakahiro"}],"name":{"display":"CC-delcont-ref-tf","uri":"/package/CC-delcont-ref-tf"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"control","uri":"/packages/tag/control"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"preprocessor and library for Causal Commutative Arrows (CCA)","downloads":21,"lastUpload":"2015-05-08T03:06:09.667005Z","maintainers":[{"display":"PaulLiu","uri":"/user/PaulLiu"}],"name":{"display":"CCA","uri":"/package/CCA"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"development","uri":"/packages/tag/development"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"A W3C compliant (X)HTML generating library            ","downloads":12,"lastUpload":"2011-07-26T02:29:50Z","maintainers":[{"display":"PaulTalaga","uri":"/user/PaulTalaga"}],"name":{"display":"CHXHtml","uri":"/package/CHXHtml"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"web","uri":"/packages/tag/web"}],"votes":0},{"description":"Cursor Library for A Structured Editor","downloads":11,"lastUpload":"2009-02-11T09:56:25Z","maintainers":[{"display":"TristanAllwood","uri":"/user/TristanAllwood"}],"name":{"display":"CLASE","uri":"/package/CLASE"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"CLI tools","downloads":6,"lastUpload":"2015-03-10T04:25:07.585209Z","maintainers":[{"display":"g960059","uri":"/user/g960059"}],"name":{"display":"CLI","uri":"/package/CLI"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Infernal covariance model comparison","downloads":8,"lastUpload":"2012-11-22T14:57:12Z","maintainers":[{"display":"ChristianHoener","uri":"/user/ChristianHoener"}],"name":{"display":"CMCompare","uri":"/package/CMCompare"},"tags":[{"display":"bioinformatics","uri":"/packages/tag/bioinformatics"},{"display":"gpl","uri":"/packages/tag/gpl"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"cwmwl udp message queue","downloads":2,"lastUpload":"2012-07-02T13:30:46Z","maintainers":[{"display":"JoergFritsch","uri":"/user/JoergFritsch"}],"name":{"display":"CMQ","uri":"/package/CMQ"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"library","uri":"/packages/tag/library"},{"display":"system","uri":"/packages/tag/system"}],"votes":0},{"description":"An algebraic data type similar to Prelude Ordering.","downloads":8,"lastUpload":"2008-07-23T19:06:06Z","maintainers":[{"display":"AdrianHey","uri":"/user/AdrianHey"}],"name":{"display":"COrdering","uri":"/package/COrdering"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"data","uri":"/packages/tag/data"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"A simple Brainfuck interpretter.","downloads":9,"lastUpload":"2008-11-06T21:21:39Z","maintainers":[{"display":"ThomasDavie","uri":"/user/ThomasDavie"}],"name":{"display":"CPBrainfuck","uri":"/package/CPBrainfuck"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"compilers-interpreters","uri":"/packages/tag/compilers-interpreters"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"An interpreter of Hagino's Categorical Programming Language (CPL).","downloads":11,"lastUpload":"2018-02-16T04:01:31.731523911Z","maintainers":[{"display":"MasahiroSakai","uri":"/user/MasahiroSakai"}],"name":{"display":"CPL","uri":"/package/CPL"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"compilers-interpreters","uri":"/packages/tag/compilers-interpreters"},{"display":"program","uri":"/packages/tag/program"}],"votes":0},{"description":"Definition of a CSP core-language. ","downloads":21,"lastUpload":"2017-10-26T16:01:57.597214822Z","maintainers":[{"display":"MarcFontaine","uri":"/user/MarcFontaine"}],"name":{"display":"CSPM-CoreLanguage","uri":"/package/CSPM-CoreLanguage"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"concurrency","uri":"/packages/tag/concurrency"},{"display":"formal-methods","uri":"/packages/tag/formal-methods"},{"display":"language","uri":"/packages/tag/language"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"Firing rules semantic of CSPM","downloads":23,"lastUpload":"2017-10-26T16:04:59.008462452Z","maintainers":[{"display":"MarcFontaine","uri":"/user/MarcFontaine"}],"name":{"display":"CSPM-FiringRules","uri":"/package/CSPM-FiringRules"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"concurrency","uri":"/packages/tag/concurrency"},{"display":"formal-methods","uri":"/packages/tag/formal-methods"},{"display":"language","uri":"/packages/tag/language"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"A CSP-M parser compatible with FDR-2.91","downloads":27,"lastUpload":"2017-10-26T16:00:53.247544871Z","maintainers":[{"display":"MarcFontaine","uri":"/user/MarcFontaine"}],"name":{"display":"CSPM-Frontend","uri":"/package/CSPM-Frontend"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"concurrency","uri":"/packages/tag/concurrency"},{"display":"formal-methods","uri":"/packages/tag/formal-methods"},{"display":"language","uri":"/packages/tag/language"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"An interpreter for CSPM","downloads":31,"lastUpload":"2017-10-26T16:06:04.796021721Z","maintainers":[{"display":"MarcFontaine","uri":"/user/MarcFontaine"}],"name":{"display":"CSPM-Interpreter","uri":"/package/CSPM-Interpreter"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"concurrency","uri":"/packages/tag/concurrency"},{"display":"formal-methods","uri":"/packages/tag/formal-methods"},{"display":"language","uri":"/packages/tag/language"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"some modules specific for the ProB tool","downloads":11,"lastUpload":"2017-10-26T16:07:34.663251395Z","maintainers":[{"display":"MarcFontaine","uri":"/user/MarcFontaine"}],"name":{"display":"CSPM-ToProlog","uri":"/package/CSPM-ToProlog"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"formal-methods","uri":"/packages/tag/formal-methods"},{"display":"library","uri":"/packages/tag/library"}],"votes":0},{"description":"cspm command line tool for analyzing CSPM specifications.","downloads":26,"lastUpload":"2017-10-26T16:08:56.855821544Z","maintainers":[{"display":"MarcFontaine","uri":"/user/MarcFontaine"}],"name":{"display":"CSPM-cspm","uri":"/package/CSPM-cspm"},"tags":[{"display":"bsd3","uri":"/packages/tag/bsd3"},{"display":"concurrency","uri":"/packages/tag/concurrency"},{"display":"formal-methods","uri":"/packages/tag/formal-methods"},{"display":"language","uri":"/packages/tag/language"},{"display":"library","uri":"/packages/tag/library"},{"display":"program","uri":"/packages/tag/program"}],"votes":0}]}
diff --git a/swh/lister/hackage/tests/test_lister.py b/swh/lister/hackage/tests/test_lister.py
new file mode 100644
index 0000000..93bb6f4
--- /dev/null
+++ b/swh/lister/hackage/tests/test_lister.py
@@ -0,0 +1,100 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import functools
+import json
+from pathlib import Path
+from urllib.parse import unquote, urlparse
+
+from swh.lister.hackage.lister import HackageLister
+
+
+def json_callback(request, context, datadir):
+    """Callback for requests_mock that load a json file regarding a page number"""
+    page = request.json()["page"]
+
+    unquoted_url = unquote(request.url)
+    url = urlparse(unquoted_url)
+    dirname = "%s_%s" % (url.scheme, url.hostname)
+    filename = url.path[1:]
+    if filename.endswith("/"):
+        filename = filename[:-1]
+    filename = filename.replace("/", "_")
+
+    return json.loads(Path(datadir, dirname, f"{filename}_{page}").read_text())
+
+
+def test_hackage_lister(swh_scheduler, requests_mock, datadir):
+
+    requests_mock.post(
+        url="https://hackage.haskell.org/packages/search",
+        status_code=200,
+        json=functools.partial(json_callback, datadir=datadir),
+    )
+
+    expected_origins = []
+
+    for page in [0, 1, 2]:
+        data = json.loads(
+            Path(
+                datadir, "https_hackage.haskell.org", f"packages_search_{page}"
+            ).read_text()
+        )
+        for entry in data["pageContents"]:
+            pkgname = entry["name"]["display"]
+            expected_origins.append(
+                {"url": f"https://hackage.haskell.org/package/{pkgname}"}
+            )
+
+    lister = HackageLister(scheduler=swh_scheduler)
+    res = lister.run()
+
+    assert res.pages == 3
+    assert res.origins == res.pages * 50
+
+    scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
+    assert len(scheduler_origins) == len(expected_origins)
+
+    assert {
+        (
+            scheduled.visit_type,
+            scheduled.url,
+        )
+        for scheduled in scheduler_origins
+    } == {
+        (
+            "hackage",
+            expected["url"],
+        )
+        for expected in expected_origins
+    }
+
+
+def test_hackage_lister_pagination_49(swh_scheduler, requests_mock, datadir):
+    requests_mock.post(
+        url="https://fake49.haskell.org/packages/search",
+        status_code=200,
+        json=functools.partial(json_callback, datadir=datadir),
+    )
+    lister = HackageLister(scheduler=swh_scheduler, url="https://fake49.haskell.org/")
+    pages = list(lister.get_pages())
+    # there should be 1 page with 49 entries
+    assert len(pages) == 1
+    assert len(pages[0]) == 49
+
+
+def test_hackage_lister_pagination_51(swh_scheduler, requests_mock, datadir):
+    requests_mock.post(
+        url="https://fake51.haskell.org/packages/search",
+        status_code=200,
+        json=functools.partial(json_callback, datadir=datadir),
+    )
+    lister = HackageLister(scheduler=swh_scheduler, url="https://fake51.haskell.org/")
+    pages = list(lister.get_pages())
+    # there should be 2 pages with 50 + 1 entries
+    assert len(pages) == 2
+    assert len(pages[0]) == 50
+    assert len(pages[1]) == 1
diff --git a/swh/lister/hackage/tests/test_tasks.py b/swh/lister/hackage/tests/test_tasks.py
new file mode 100644
index 0000000..d10bc8b
--- /dev/null
+++ b/swh/lister/hackage/tests/test_tasks.py
@@ -0,0 +1,33 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.lister.pattern import ListerStats
+
+
+def test_hackage_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
+    res = swh_scheduler_celery_app.send_task("swh.lister.hackage.tasks.ping")
+    assert res
+    res.wait()
+    assert res.successful()
+    assert res.result == "OK"
+
+
+def test_hackage_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker):
+    # setup the mocked HackageLister
+    lister = mocker.patch("swh.lister.hackage.tasks.HackageLister")
+    lister.from_configfile.return_value = lister
+    stats = ListerStats(pages=42, origins=42)
+    lister.run.return_value = stats
+
+    res = swh_scheduler_celery_app.send_task(
+        "swh.lister.hackage.tasks.HackageListerTask"
+    )
+    assert res
+    res.wait()
+    assert res.successful()
+    assert res.result == stats.dict()
+
+    lister.from_configfile.assert_called_once_with()
+    lister.run.assert_called_once_with()
diff --git a/swh/lister/launchpad/lister.py b/swh/lister/launchpad/lister.py
index b134303..e9c36fa 100644
--- a/swh/lister/launchpad/lister.py
+++ b/swh/lister/launchpad/lister.py
@@ -1,209 +1,209 @@
 # Copyright (C) 2020-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from dataclasses import dataclass
 from datetime import datetime
 import logging
 from typing import Any, Dict, Iterator, Optional, Tuple
 
 import iso8601
 from launchpadlib.launchpad import Launchpad
 from lazr.restfulclient.errors import RestfulError
 from lazr.restfulclient.resource import Collection
 from tenacity.before_sleep import before_sleep_log
 
-from swh.lister.utils import retry_if_exception, throttling_retry
+from swh.lister.utils import http_retry, retry_if_exception
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
 from ..pattern import CredentialsType, Lister
 
 logger = logging.getLogger(__name__)
 
 VcsType = str
 LaunchpadPageType = Tuple[VcsType, Collection]
 
 
 SUPPORTED_VCS_TYPES = ("git", "bzr")
 
 
 @dataclass
 class LaunchpadListerState:
     """State of Launchpad lister"""
 
     git_date_last_modified: Optional[datetime] = None
     """modification date of last updated git repository since last listing"""
     bzr_date_last_modified: Optional[datetime] = None
     """modification date of last updated bzr repository since last listing"""
 
 
 def origin(vcs_type: str, repo: Any) -> str:
     """Determine the origin url out of a repository with a given vcs_type"""
     return repo.git_https_url if vcs_type == "git" else repo.web_link
 
 
 def retry_if_restful_error(retry_state):
     return retry_if_exception(retry_state, lambda e: isinstance(e, RestfulError))
 
 
 class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
     """
     List repositories from Launchpad (git or bzr).
 
     Args:
         scheduler: instance of SchedulerInterface
         incremental: defines if incremental listing should be used, in that case
             only modified or new repositories since last incremental listing operation
             will be returned
     """
 
     LISTER_NAME = "launchpad"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         incremental: bool = False,
         credentials: CredentialsType = None,
     ):
         super().__init__(
             scheduler=scheduler,
             url="https://launchpad.net/",
             instance="launchpad",
             credentials=credentials,
         )
         self.incremental = incremental
         self.date_last_modified: Dict[str, Optional[datetime]] = {
             "git": None,
             "bzr": None,
         }
 
     def state_from_dict(self, d: Dict[str, Any]) -> LaunchpadListerState:
         for vcs_type in SUPPORTED_VCS_TYPES:
             key = f"{vcs_type}_date_last_modified"
             date_last_modified = d.get(key)
             if date_last_modified is not None:
                 d[key] = iso8601.parse_date(date_last_modified)
 
         return LaunchpadListerState(**d)
 
     def state_to_dict(self, state: LaunchpadListerState) -> Dict[str, Any]:
         d: Dict[str, Optional[str]] = {}
         for vcs_type in SUPPORTED_VCS_TYPES:
             attribute_name = f"{vcs_type}_date_last_modified"
             d[attribute_name] = None
 
             if hasattr(state, attribute_name):
                 date_last_modified = getattr(state, attribute_name)
                 if date_last_modified is not None:
                     d[attribute_name] = date_last_modified.isoformat()
         return d
 
-    @throttling_retry(
+    @http_retry(
         retry=retry_if_restful_error,
         before_sleep=before_sleep_log(logger, logging.WARNING),
     )
     def _page_request(
         self, launchpad, vcs_type: str, date_last_modified: Optional[datetime]
     ) -> Optional[Collection]:
         """Querying the page of results for a given vcs_type since the date_last_modified. If
         some issues occurs, this will deal with the retrying policy.
 
         """
         get_vcs_fns = {
             "git": launchpad.git_repositories.getRepositories,
             "bzr": launchpad.branches.getBranches,
         }
 
         return get_vcs_fns[vcs_type](
             order_by="most neglected first",
             modified_since_date=date_last_modified,
         )
 
     def get_pages(self) -> Iterator[LaunchpadPageType]:
         """
         Yields an iterator on all git/bzr repositories hosted on Launchpad sorted
         by last modification date in ascending order.
         """
         launchpad = Launchpad.login_anonymously(
             "softwareheritage", "production", version="devel"
         )
         if self.incremental:
             self.date_last_modified = {
                 "git": self.state.git_date_last_modified,
                 "bzr": self.state.bzr_date_last_modified,
             }
         for vcs_type in SUPPORTED_VCS_TYPES:
             try:
                 result = self._page_request(
                     launchpad, vcs_type, self.date_last_modified[vcs_type]
                 )
             except RestfulError as e:
                 logger.warning("Listing %s origins raised %s", vcs_type, e)
                 result = None
             if not result:
                 continue
             yield vcs_type, result
 
     def get_origins_from_page(self, page: LaunchpadPageType) -> Iterator[ListedOrigin]:
         """
         Iterate on all git repositories and yield ListedOrigin instances.
         """
         assert self.lister_obj.id is not None
 
         vcs_type, repos = page
 
         try:
             for repo in repos:
                 origin_url = origin(vcs_type, repo)
 
                 # filter out origins with invalid URL
                 if not origin_url.startswith("https://"):
                     continue
 
                 last_update = repo.date_last_modified
 
                 self.date_last_modified[vcs_type] = last_update
 
                 logger.debug(
                     "Found origin %s with type %s last updated on %s",
                     origin_url,
                     vcs_type,
                     last_update,
                 )
 
                 yield ListedOrigin(
                     lister_id=self.lister_obj.id,
                     visit_type=vcs_type,
                     url=origin_url,
                     last_update=last_update,
                 )
         except RestfulError as e:
             logger.warning("Listing %s origins raised %s", vcs_type, e)
 
     def finalize(self) -> None:
         git_date_last_modified = self.date_last_modified["git"]
         bzr_date_last_modified = self.date_last_modified["bzr"]
         if git_date_last_modified is None and bzr_date_last_modified is None:
             return
 
         if self.incremental and (
             self.state.git_date_last_modified is None
             or (
                 git_date_last_modified is not None
                 and git_date_last_modified > self.state.git_date_last_modified
             )
         ):
             self.state.git_date_last_modified = git_date_last_modified
 
         if self.incremental and (
             self.state.bzr_date_last_modified is None
             or (
                 bzr_date_last_modified is not None
                 and bzr_date_last_modified > self.state.bzr_date_last_modified
             )
         ):
             self.state.bzr_date_last_modified = self.date_last_modified["bzr"]
 
         self.updated = True
diff --git a/swh/lister/maven/lister.py b/swh/lister/maven/lister.py
index 2560feb..2055b91 100644
--- a/swh/lister/maven/lister.py
+++ b/swh/lister/maven/lister.py
@@ -1,428 +1,406 @@
 # Copyright (C) 2021-2022 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from dataclasses import asdict, dataclass
 from datetime import datetime, timezone
 import logging
 import re
 from typing import Any, Dict, Iterator, Optional
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup
 import lxml
 import requests
-from tenacity.before_sleep import before_sleep_log
 
 from swh.core.github.utils import GitHubSession
-from swh.lister.utils import throttling_retry
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
-from .. import USER_AGENT
 from ..pattern import CredentialsType, Lister
 
 logger = logging.getLogger(__name__)
 
 RepoPage = Dict[str, Any]
 
 SUPPORTED_SCM_TYPES = ("git", "svn", "hg", "cvs", "bzr")
 
 
 @dataclass
 class MavenListerState:
     """State of the MavenLister"""
 
     last_seen_doc: int = -1
     """Last doc ID ingested during an incremental pass
 
     """
 
     last_seen_pom: int = -1
     """Last doc ID related to a pom and ingested during
        an incremental pass
 
     """
 
 
 class MavenLister(Lister[MavenListerState, RepoPage]):
     """List origins from a Maven repository.
 
     Maven Central provides artifacts for Java builds.
     It includes POM files and source archives, which we download to get
     the source code of artifacts and links to their scm repository.
 
     This lister yields origins of types: git/svn/hg or whatever the Artifacts
     use as repository type, plus maven types for the maven loader (tgz, jar)."""
 
     LISTER_NAME = "maven"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         url: str,
         index_url: str = None,
         instance: Optional[str] = None,
         credentials: CredentialsType = None,
         incremental: bool = True,
     ):
         """Lister class for Maven repositories.
 
         Args:
             url: main URL of the Maven repository, i.e. url of the base index
                 used to fetch maven artifacts. For Maven central use
                 https://repo1.maven.org/maven2/
             index_url: the URL to download the exported text indexes from.
                 Would typically be a local host running the export docker image.
                 See README.md in this directory for more information.
             instance: Name of maven instance. Defaults to url's network location
                 if unset.
             incremental: bool, defaults to True. Defines if incremental listing
                 is activated or not.
 
         """
         self.BASE_URL = url
         self.INDEX_URL = index_url
         self.incremental = incremental
 
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
             url=url,
             instance=instance,
         )
 
-        self.session = requests.Session()
-        self.session.headers.update(
-            {
-                "Accept": "application/json",
-                "User-Agent": USER_AGENT,
-            }
-        )
+        self.session.headers.update({"Accept": "application/json"})
 
         self.jar_origins: Dict[str, ListedOrigin] = {}
         self.github_session = GitHubSession(
-            credentials=self.credentials, user_agent=USER_AGENT
+            credentials=self.credentials,
+            user_agent=str(self.session.headers["User-Agent"]),
         )
 
     def state_from_dict(self, d: Dict[str, Any]) -> MavenListerState:
         return MavenListerState(**d)
 
     def state_to_dict(self, state: MavenListerState) -> Dict[str, Any]:
         return asdict(state)
 
-    @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
-    def page_request(self, url: str, params: Dict[str, Any]) -> requests.Response:
-
-        logger.info("Fetching URL %s with params %s", url, params)
-
-        response = self.session.get(url, params=params)
-        if response.status_code != 200:
-            logger.warning(
-                "Unexpected HTTP status code %s on %s: %s",
-                response.status_code,
-                response.url,
-                response.content,
-            )
-        response.raise_for_status()
-
-        return response
-
     def get_pages(self) -> Iterator[RepoPage]:
         """Retrieve and parse exported maven indexes to
         identify all pom files and src archives.
         """
 
         # Example of returned RepoPage's:
         # [
         #   {
         #     "type": "maven",
         #     "url": "https://maven.xwiki.org/..-5.4.2-sources.jar",
         #     "time": 1626109619335,
         #     "gid": "org.xwiki.platform",
         #     "aid": "xwiki-platform-wikistream-events-xwiki",
         #     "version": "5.4.2"
         #   },
         #   {
         #     "type": "scm",
         #     "url": "scm:git:git://github.com/openengsb/openengsb-framework.git",
         #     "project": "openengsb-framework",
         #   },
         #   ...
         # ]
 
         # Download the main text index file.
         logger.info("Downloading computed index from %s.", self.INDEX_URL)
         assert self.INDEX_URL is not None
-        response = requests.get(self.INDEX_URL, stream=True)
-        if response.status_code != 200:
+        try:
+            response = self.http_request(self.INDEX_URL, stream=True)
+        except requests.HTTPError:
             logger.error("Index %s not found, stopping", self.INDEX_URL)
-            response.raise_for_status()
+            raise
 
         # Prepare regexes to parse index exports.
 
         # Parse doc id.
         # Example line: "doc 13"
         re_doc = re.compile(r"^doc (?P<doc>\d+)$")
 
         # Parse gid, aid, version, classifier, extension.
         # Example line: "    value al.aldi|sprova4j|0.1.0|sources|jar"
         re_val = re.compile(
             r"^\s{4}value (?P<gid>[^|]+)\|(?P<aid>[^|]+)\|(?P<version>[^|]+)\|"
             + r"(?P<classifier>[^|]+)\|(?P<ext>[^|]+)$"
         )
 
         # Parse last modification time.
         # Example line: "    value jar|1626109619335|14316|2|2|0|jar"
         re_time = re.compile(
             r"^\s{4}value ([^|]+)\|(?P<mtime>[^|]+)\|([^|]+)\|([^|]+)\|([^|]+)"
             + r"\|([^|]+)\|([^|]+)$"
         )
 
         # Read file line by line and process it
         out_pom: Dict = {}
         jar_src: Dict = {}
         doc_id: int = 0
         jar_src["doc"] = None
         url_src = None
 
         iterator = response.iter_lines(chunk_size=1024)
         for line_bytes in iterator:
             # Read the index text export and get URLs and SCMs.
             line = line_bytes.decode(errors="ignore")
             m_doc = re_doc.match(line)
             if m_doc is not None:
                 doc_id = int(m_doc.group("doc"))
                 # jar_src["doc"] contains the id of the current document, whatever
                 # its type (scm or jar).
                 jar_src["doc"] = doc_id
             else:
                 m_val = re_val.match(line)
                 if m_val is not None:
                     (gid, aid, version, classifier, ext) = m_val.groups()
                     ext = ext.strip()
                     path = "/".join(gid.split("."))
                     if classifier == "NA" and ext.lower() == "pom":
                         # If incremental mode, we don't record any line that is
                         # before our last recorded doc id.
                         if (
                             self.incremental
                             and self.state
                             and self.state.last_seen_pom
                             and self.state.last_seen_pom >= doc_id
                         ):
                             continue
                         url_path = f"{path}/{aid}/{version}/{aid}-{version}.{ext}"
                         url_pom = urljoin(
                             self.BASE_URL,
                             url_path,
                         )
                         out_pom[url_pom] = doc_id
                     elif (
                         classifier.lower() == "sources" or ("src" in classifier)
                     ) and ext.lower() in ("zip", "jar"):
                         url_path = (
                             f"{path}/{aid}/{version}/{aid}-{version}-{classifier}.{ext}"
                         )
                         url_src = urljoin(self.BASE_URL, url_path)
                         jar_src["gid"] = gid
                         jar_src["aid"] = aid
                         jar_src["version"] = version
                 else:
                     m_time = re_time.match(line)
                     if m_time is not None and url_src is not None:
                         time = m_time.group("mtime")
                         jar_src["time"] = int(time)
                         artifact_metadata_d = {
                             "type": "maven",
                             "url": url_src,
                             **jar_src,
                         }
                         logger.debug(
                             "* Yielding jar %s: %s", url_src, artifact_metadata_d
                         )
                         yield artifact_metadata_d
                         url_src = None
 
         logger.info("Found %s poms.", len(out_pom))
 
         # Now fetch pom files and scan them for scm info.
 
         logger.info("Fetching poms..")
-        for pom in out_pom:
+        for pom_url in out_pom:
             try:
-                response = self.page_request(pom, {})
+                response = self.http_request(pom_url)
                 parsed_pom = BeautifulSoup(response.content, "xml")
                 project = parsed_pom.find("project")
                 if project is None:
                     continue
                 scm = project.find("scm")
                 if scm is not None:
                     connection = scm.find("connection")
                     if connection is not None:
                         artifact_metadata_d = {
                             "type": "scm",
-                            "doc": out_pom[pom],
+                            "doc": out_pom[pom_url],
                             "url": connection.text,
                         }
-                        logger.debug("* Yielding pom %s: %s", pom, artifact_metadata_d)
+                        logger.debug(
+                            "* Yielding pom %s: %s", pom_url, artifact_metadata_d
+                        )
                         yield artifact_metadata_d
                     else:
-                        logger.debug("No scm.connection in pom %s", pom)
+                        logger.debug("No scm.connection in pom %s", pom_url)
                 else:
-                    logger.debug("No scm in pom %s", pom)
+                    logger.debug("No scm in pom %s", pom_url)
             except requests.HTTPError:
                 logger.warning(
                     "POM info page could not be fetched, skipping project '%s'",
-                    pom,
+                    pom_url,
                 )
             except lxml.etree.Error as error:
-                logger.info("Could not parse POM %s XML: %s.", pom, error)
+                logger.info("Could not parse POM %s XML: %s.", pom_url, error)
 
     def get_scm(self, page: RepoPage) -> Optional[ListedOrigin]:
         """Retrieve scm origin out of the page information. Only called when type of the
         page is scm.
 
         Try and detect an scm/vcs repository. Note that official format is in the form:
         scm:{type}:git://example.org/{user}/{repo}.git but some projects directly put
         the repo url (without the "scm:type"), so we have to check against the content
         to extract the type and url properly.
 
         Raises
             AssertionError when the type of the page is not 'scm'
 
         Returns
             ListedOrigin with proper canonical scm url (for github) if any is found,
             None otherwise.
 
         """
 
         assert page["type"] == "scm"
         visit_type: Optional[str] = None
         url: Optional[str] = None
         m_scm = re.match(r"^scm:(?P<type>[^:]+):(?P<url>.*)$", page["url"])
         if m_scm is None:
             return None
 
         scm_type = m_scm.group("type")
         if scm_type and scm_type in SUPPORTED_SCM_TYPES:
             url = m_scm.group("url")
             visit_type = scm_type
         elif page["url"].endswith(".git"):
             url = page["url"].lstrip("scm:")
             visit_type = "git"
         else:
             return None
 
         if url and visit_type == "git":
             # Non-github urls will be returned as is, github ones will be canonical ones
             url = self.github_session.get_canonical_url(url)
 
         if not url:
             return None
 
         assert visit_type is not None
         assert self.lister_obj.id is not None
         return ListedOrigin(
             lister_id=self.lister_obj.id,
             url=url,
             visit_type=visit_type,
         )
 
     def get_origins_from_page(self, page: RepoPage) -> Iterator[ListedOrigin]:
 
         """Convert a page of Maven repositories into a list of ListedOrigins."""
         if page["type"] == "scm":
             listed_origin = self.get_scm(page)
             if listed_origin:
                 yield listed_origin
         else:
             # Origin is gathering source archives:
             last_update_dt = None
             last_update_iso = ""
             try:
                 last_update_seconds = str(page["time"])[:-3]
                 last_update_dt = datetime.fromtimestamp(int(last_update_seconds))
                 last_update_dt = last_update_dt.astimezone(timezone.utc)
             except (OverflowError, ValueError):
                 logger.warning("- Failed to convert datetime %s.", last_update_seconds)
             if last_update_dt:
                 last_update_iso = last_update_dt.isoformat()
 
             # Origin URL will target page holding sources for all versions of
             # an artifactId (package name) inside a groupId (namespace)
             path = "/".join(page["gid"].split("."))
             origin_url = urljoin(self.BASE_URL, f"{path}/{page['aid']}")
 
             artifact = {
                 **{k: v for k, v in page.items() if k != "doc"},
                 "time": last_update_iso,
                 "base_url": self.BASE_URL,
             }
 
             if origin_url not in self.jar_origins:
                 # Create ListedOrigin instance if we did not see that origin yet
                 assert self.lister_obj.id is not None
                 jar_origin = ListedOrigin(
                     lister_id=self.lister_obj.id,
                     url=origin_url,
                     visit_type=page["type"],
                     last_update=last_update_dt,
                     extra_loader_arguments={"artifacts": [artifact]},
                 )
                 self.jar_origins[origin_url] = jar_origin
             else:
                 # Update list of source artifacts for that origin otherwise
                 jar_origin = self.jar_origins[origin_url]
                 artifacts = jar_origin.extra_loader_arguments["artifacts"]
                 if artifact not in artifacts:
                     artifacts.append(artifact)
 
             if (
                 jar_origin.last_update
                 and last_update_dt
                 and last_update_dt > jar_origin.last_update
             ):
                 jar_origin.last_update = last_update_dt
 
             if not self.incremental or (
                 self.state and page["doc"] > self.state.last_seen_doc
             ):
                 # Yield origin with updated source artifacts, multiple instances of
                 # ListedOrigin for the same origin URL but with different artifacts
                 # list will be sent to the scheduler but it will deduplicate them and
                 # take the latest one to upsert in database
                 yield jar_origin
 
     def commit_page(self, page: RepoPage) -> None:
         """Update currently stored state using the latest listed doc.
 
         Note: this is a noop for full listing mode
 
         """
         if self.incremental and self.state:
             # We need to differentiate the two state counters according
             # to the type of origin.
             if page["type"] == "maven" and page["doc"] > self.state.last_seen_doc:
                 self.state.last_seen_doc = page["doc"]
             elif page["type"] == "scm" and page["doc"] > self.state.last_seen_pom:
                 self.state.last_seen_doc = page["doc"]
                 self.state.last_seen_pom = page["doc"]
 
     def finalize(self) -> None:
         """Finalize the lister state, set update if any progress has been made.
 
         Note: this is a noop for full listing mode
 
         """
         if self.incremental and self.state:
             last_seen_doc = self.state.last_seen_doc
             last_seen_pom = self.state.last_seen_pom
 
             scheduler_state = self.get_state_from_scheduler()
             if last_seen_doc and last_seen_pom:
                 if (scheduler_state.last_seen_doc < last_seen_doc) or (
                     scheduler_state.last_seen_pom < last_seen_pom
                 ):
                     self.updated = True
diff --git a/swh/lister/maven/tests/data/https_maven.org/citrus-parent-3.0.7.pom b/swh/lister/maven/tests/data/citrus-parent-3.0.7.pom
similarity index 100%
rename from swh/lister/maven/tests/data/https_maven.org/citrus-parent-3.0.7.pom
rename to swh/lister/maven/tests/data/citrus-parent-3.0.7.pom
diff --git a/swh/lister/maven/tests/data/https_api.github.com/repos_aldialimucaj_sprova4j b/swh/lister/maven/tests/data/https_api.github.com/repos_aldialimucaj_sprova4j
new file mode 100644
index 0000000..8f5deb4
--- /dev/null
+++ b/swh/lister/maven/tests/data/https_api.github.com/repos_aldialimucaj_sprova4j
@@ -0,0 +1,110 @@
+{
+  "id": 133475101,
+  "node_id": "MDEwOlJlcG9zaXRvcnkxMzM0NzUxMDE=",
+  "name": "sprova4j",
+  "full_name": "aldialimucaj/sprova4j",
+  "private": false,
+  "owner": {
+    "login": "aldialimucaj",
+    "id": 782755,
+    "node_id": "MDQ6VXNlcjc4Mjc1NQ==",
+    "avatar_url": "https://avatars.githubusercontent.com/u/782755?v=4",
+    "gravatar_id": "",
+    "url": "https://api.github.com/users/aldialimucaj",
+    "html_url": "https://github.com/aldialimucaj",
+    "followers_url": "https://api.github.com/users/aldialimucaj/followers",
+    "following_url": "https://api.github.com/users/aldialimucaj/following{/other_user}",
+    "gists_url": "https://api.github.com/users/aldialimucaj/gists{/gist_id}",
+    "starred_url": "https://api.github.com/users/aldialimucaj/starred{/owner}{/repo}",
+    "subscriptions_url": "https://api.github.com/users/aldialimucaj/subscriptions",
+    "organizations_url": "https://api.github.com/users/aldialimucaj/orgs",
+    "repos_url": "https://api.github.com/users/aldialimucaj/repos",
+    "events_url": "https://api.github.com/users/aldialimucaj/events{/privacy}",
+    "received_events_url": "https://api.github.com/users/aldialimucaj/received_events",
+    "type": "User",
+    "site_admin": false
+  },
+  "html_url": "https://github.com/aldialimucaj/sprova4j",
+  "description": "Java client for Sprova Test Framework",
+  "fork": false,
+  "url": "https://api.github.com/repos/aldialimucaj/sprova4j",
+  "forks_url": "https://api.github.com/repos/aldialimucaj/sprova4j/forks",
+  "keys_url": "https://api.github.com/repos/aldialimucaj/sprova4j/keys{/key_id}",
+  "collaborators_url": "https://api.github.com/repos/aldialimucaj/sprova4j/collaborators{/collaborator}",
+  "teams_url": "https://api.github.com/repos/aldialimucaj/sprova4j/teams",
+  "hooks_url": "https://api.github.com/repos/aldialimucaj/sprova4j/hooks",
+  "issue_events_url": "https://api.github.com/repos/aldialimucaj/sprova4j/issues/events{/number}",
+  "events_url": "https://api.github.com/repos/aldialimucaj/sprova4j/events",
+  "assignees_url": "https://api.github.com/repos/aldialimucaj/sprova4j/assignees{/user}",
+  "branches_url": "https://api.github.com/repos/aldialimucaj/sprova4j/branches{/branch}",
+  "tags_url": "https://api.github.com/repos/aldialimucaj/sprova4j/tags",
+  "blobs_url": "https://api.github.com/repos/aldialimucaj/sprova4j/git/blobs{/sha}",
+  "git_tags_url": "https://api.github.com/repos/aldialimucaj/sprova4j/git/tags{/sha}",
+  "git_refs_url": "https://api.github.com/repos/aldialimucaj/sprova4j/git/refs{/sha}",
+  "trees_url": "https://api.github.com/repos/aldialimucaj/sprova4j/git/trees{/sha}",
+  "statuses_url": "https://api.github.com/repos/aldialimucaj/sprova4j/statuses/{sha}",
+  "languages_url": "https://api.github.com/repos/aldialimucaj/sprova4j/languages",
+  "stargazers_url": "https://api.github.com/repos/aldialimucaj/sprova4j/stargazers",
+  "contributors_url": "https://api.github.com/repos/aldialimucaj/sprova4j/contributors",
+  "subscribers_url": "https://api.github.com/repos/aldialimucaj/sprova4j/subscribers",
+  "subscription_url": "https://api.github.com/repos/aldialimucaj/sprova4j/subscription",
+  "commits_url": "https://api.github.com/repos/aldialimucaj/sprova4j/commits{/sha}",
+  "git_commits_url": "https://api.github.com/repos/aldialimucaj/sprova4j/git/commits{/sha}",
+  "comments_url": "https://api.github.com/repos/aldialimucaj/sprova4j/comments{/number}",
+  "issue_comment_url": "https://api.github.com/repos/aldialimucaj/sprova4j/issues/comments{/number}",
+  "contents_url": "https://api.github.com/repos/aldialimucaj/sprova4j/contents/{+path}",
+  "compare_url": "https://api.github.com/repos/aldialimucaj/sprova4j/compare/{base}...{head}",
+  "merges_url": "https://api.github.com/repos/aldialimucaj/sprova4j/merges",
+  "archive_url": "https://api.github.com/repos/aldialimucaj/sprova4j/{archive_format}{/ref}",
+  "downloads_url": "https://api.github.com/repos/aldialimucaj/sprova4j/downloads",
+  "issues_url": "https://api.github.com/repos/aldialimucaj/sprova4j/issues{/number}",
+  "pulls_url": "https://api.github.com/repos/aldialimucaj/sprova4j/pulls{/number}",
+  "milestones_url": "https://api.github.com/repos/aldialimucaj/sprova4j/milestones{/number}",
+  "notifications_url": "https://api.github.com/repos/aldialimucaj/sprova4j/notifications{?since,all,participating}",
+  "labels_url": "https://api.github.com/repos/aldialimucaj/sprova4j/labels{/name}",
+  "releases_url": "https://api.github.com/repos/aldialimucaj/sprova4j/releases{/id}",
+  "deployments_url": "https://api.github.com/repos/aldialimucaj/sprova4j/deployments",
+  "created_at": "2018-05-15T07:15:09Z",
+  "updated_at": "2019-08-13T11:29:54Z",
+  "pushed_at": "2018-11-08T20:43:59Z",
+  "git_url": "git://github.com/aldialimucaj/sprova4j.git",
+  "ssh_url": "git@github.com:aldialimucaj/sprova4j.git",
+  "clone_url": "https://github.com/aldialimucaj/sprova4j.git",
+  "svn_url": "https://github.com/aldialimucaj/sprova4j",
+  "homepage": null,
+  "size": 87,
+  "stargazers_count": 2,
+  "watchers_count": 2,
+  "language": "Java",
+  "has_issues": true,
+  "has_projects": true,
+  "has_downloads": true,
+  "has_wiki": true,
+  "has_pages": false,
+  "forks_count": 0,
+  "mirror_url": null,
+  "archived": false,
+  "disabled": false,
+  "open_issues_count": 0,
+  "license": {
+    "key": "apache-2.0",
+    "name": "Apache License 2.0",
+    "spdx_id": "Apache-2.0",
+    "url": "https://api.github.com/licenses/apache-2.0",
+    "node_id": "MDc6TGljZW5zZTI="
+  },
+  "allow_forking": true,
+  "is_template": false,
+  "web_commit_signoff_required": false,
+  "topics": [
+
+  ],
+  "visibility": "public",
+  "forks": 0,
+  "open_issues": 0,
+  "watchers": 2,
+  "default_branch": "master",
+  "temp_clone_token": null,
+  "network_count": 0,
+  "subscribers_count": 1
+}
diff --git a/swh/lister/maven/tests/data/https_api.github.com/repos_arangodb-community_arangodb-graphql-java b/swh/lister/maven/tests/data/https_api.github.com/repos_arangodb-community_arangodb-graphql-java
new file mode 100644
index 0000000..3342375
--- /dev/null
+++ b/swh/lister/maven/tests/data/https_api.github.com/repos_arangodb-community_arangodb-graphql-java
@@ -0,0 +1,130 @@
+{
+  "id": 203772666,
+  "node_id": "MDEwOlJlcG9zaXRvcnkyMDM3NzI2NjY=",
+  "name": "arangodb-graphql-java",
+  "full_name": "ArangoDB-Community/arangodb-graphql-java",
+  "private": false,
+  "owner": {
+    "login": "ArangoDB-Community",
+    "id": 37540306,
+    "node_id": "MDEyOk9yZ2FuaXphdGlvbjM3NTQwMzA2",
+    "avatar_url": "https://avatars.githubusercontent.com/u/37540306?v=4",
+    "gravatar_id": "",
+    "url": "https://api.github.com/users/ArangoDB-Community",
+    "html_url": "https://github.com/ArangoDB-Community",
+    "followers_url": "https://api.github.com/users/ArangoDB-Community/followers",
+    "following_url": "https://api.github.com/users/ArangoDB-Community/following{/other_user}",
+    "gists_url": "https://api.github.com/users/ArangoDB-Community/gists{/gist_id}",
+    "starred_url": "https://api.github.com/users/ArangoDB-Community/starred{/owner}{/repo}",
+    "subscriptions_url": "https://api.github.com/users/ArangoDB-Community/subscriptions",
+    "organizations_url": "https://api.github.com/users/ArangoDB-Community/orgs",
+    "repos_url": "https://api.github.com/users/ArangoDB-Community/repos",
+    "events_url": "https://api.github.com/users/ArangoDB-Community/events{/privacy}",
+    "received_events_url": "https://api.github.com/users/ArangoDB-Community/received_events",
+    "type": "Organization",
+    "site_admin": false
+  },
+  "html_url": "https://github.com/ArangoDB-Community/arangodb-graphql-java",
+  "description": null,
+  "fork": false,
+  "url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java",
+  "forks_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/forks",
+  "keys_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/keys{/key_id}",
+  "collaborators_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/collaborators{/collaborator}",
+  "teams_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/teams",
+  "hooks_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/hooks",
+  "issue_events_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/issues/events{/number}",
+  "events_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/events",
+  "assignees_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/assignees{/user}",
+  "branches_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/branches{/branch}",
+  "tags_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/tags",
+  "blobs_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/git/blobs{/sha}",
+  "git_tags_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/git/tags{/sha}",
+  "git_refs_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/git/refs{/sha}",
+  "trees_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/git/trees{/sha}",
+  "statuses_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/statuses/{sha}",
+  "languages_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/languages",
+  "stargazers_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/stargazers",
+  "contributors_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/contributors",
+  "subscribers_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/subscribers",
+  "subscription_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/subscription",
+  "commits_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/commits{/sha}",
+  "git_commits_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/git/commits{/sha}",
+  "comments_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/comments{/number}",
+  "issue_comment_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/issues/comments{/number}",
+  "contents_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/contents/{+path}",
+  "compare_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/compare/{base}...{head}",
+  "merges_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/merges",
+  "archive_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/{archive_format}{/ref}",
+  "downloads_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/downloads",
+  "issues_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/issues{/number}",
+  "pulls_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/pulls{/number}",
+  "milestones_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/milestones{/number}",
+  "notifications_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/notifications{?since,all,participating}",
+  "labels_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/labels{/name}",
+  "releases_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/releases{/id}",
+  "deployments_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/deployments",
+  "created_at": "2019-08-22T10:38:53Z",
+  "updated_at": "2022-05-03T21:29:48Z",
+  "pushed_at": "2022-09-16T21:06:34Z",
+  "git_url": "git://github.com/ArangoDB-Community/arangodb-graphql-java.git",
+  "ssh_url": "git@github.com:ArangoDB-Community/arangodb-graphql-java.git",
+  "clone_url": "https://github.com/ArangoDB-Community/arangodb-graphql-java.git",
+  "svn_url": "https://github.com/ArangoDB-Community/arangodb-graphql-java",
+  "homepage": null,
+  "size": 91,
+  "stargazers_count": 5,
+  "watchers_count": 5,
+  "language": "Java",
+  "has_issues": true,
+  "has_projects": false,
+  "has_downloads": true,
+  "has_wiki": false,
+  "has_pages": false,
+  "forks_count": 1,
+  "mirror_url": null,
+  "archived": false,
+  "disabled": false,
+  "open_issues_count": 1,
+  "license": {
+    "key": "apache-2.0",
+    "name": "Apache License 2.0",
+    "spdx_id": "Apache-2.0",
+    "url": "https://api.github.com/licenses/apache-2.0",
+    "node_id": "MDc6TGljZW5zZTI="
+  },
+  "allow_forking": true,
+  "is_template": false,
+  "web_commit_signoff_required": false,
+  "topics": [
+
+  ],
+  "visibility": "public",
+  "forks": 1,
+  "open_issues": 1,
+  "watchers": 5,
+  "default_branch": "master",
+  "temp_clone_token": null,
+  "organization": {
+    "login": "ArangoDB-Community",
+    "id": 37540306,
+    "node_id": "MDEyOk9yZ2FuaXphdGlvbjM3NTQwMzA2",
+    "avatar_url": "https://avatars.githubusercontent.com/u/37540306?v=4",
+    "gravatar_id": "",
+    "url": "https://api.github.com/users/ArangoDB-Community",
+    "html_url": "https://github.com/ArangoDB-Community",
+    "followers_url": "https://api.github.com/users/ArangoDB-Community/followers",
+    "following_url": "https://api.github.com/users/ArangoDB-Community/following{/other_user}",
+    "gists_url": "https://api.github.com/users/ArangoDB-Community/gists{/gist_id}",
+    "starred_url": "https://api.github.com/users/ArangoDB-Community/starred{/owner}{/repo}",
+    "subscriptions_url": "https://api.github.com/users/ArangoDB-Community/subscriptions",
+    "organizations_url": "https://api.github.com/users/ArangoDB-Community/orgs",
+    "repos_url": "https://api.github.com/users/ArangoDB-Community/repos",
+    "events_url": "https://api.github.com/users/ArangoDB-Community/events{/privacy}",
+    "received_events_url": "https://api.github.com/users/ArangoDB-Community/received_events",
+    "type": "Organization",
+    "site_admin": false
+  },
+  "network_count": 1,
+  "subscribers_count": 17
+}
diff --git a/swh/lister/maven/tests/data/https_api.github.com/repos_webx_citrus b/swh/lister/maven/tests/data/https_api.github.com/repos_webx_citrus
new file mode 100644
index 0000000..c731849
--- /dev/null
+++ b/swh/lister/maven/tests/data/https_api.github.com/repos_webx_citrus
@@ -0,0 +1,124 @@
+{
+  "id": 2618757,
+  "node_id": "MDEwOlJlcG9zaXRvcnkyNjE4NzU3",
+  "name": "citrus",
+  "full_name": "webx/citrus",
+  "private": false,
+  "owner": {
+    "login": "webx",
+    "id": 1142574,
+    "node_id": "MDEyOk9yZ2FuaXphdGlvbjExNDI1NzQ=",
+    "avatar_url": "https://avatars.githubusercontent.com/u/1142574?v=4",
+    "gravatar_id": "",
+    "url": "https://api.github.com/users/webx",
+    "html_url": "https://github.com/webx",
+    "followers_url": "https://api.github.com/users/webx/followers",
+    "following_url": "https://api.github.com/users/webx/following{/other_user}",
+    "gists_url": "https://api.github.com/users/webx/gists{/gist_id}",
+    "starred_url": "https://api.github.com/users/webx/starred{/owner}{/repo}",
+    "subscriptions_url": "https://api.github.com/users/webx/subscriptions",
+    "organizations_url": "https://api.github.com/users/webx/orgs",
+    "repos_url": "https://api.github.com/users/webx/repos",
+    "events_url": "https://api.github.com/users/webx/events{/privacy}",
+    "received_events_url": "https://api.github.com/users/webx/received_events",
+    "type": "Organization",
+    "site_admin": false
+  },
+  "html_url": "https://github.com/webx/citrus",
+  "description": "Java-based Web Framework for Alibaba Group",
+  "fork": false,
+  "url": "https://api.github.com/repos/webx/citrus",
+  "forks_url": "https://api.github.com/repos/webx/citrus/forks",
+  "keys_url": "https://api.github.com/repos/webx/citrus/keys{/key_id}",
+  "collaborators_url": "https://api.github.com/repos/webx/citrus/collaborators{/collaborator}",
+  "teams_url": "https://api.github.com/repos/webx/citrus/teams",
+  "hooks_url": "https://api.github.com/repos/webx/citrus/hooks",
+  "issue_events_url": "https://api.github.com/repos/webx/citrus/issues/events{/number}",
+  "events_url": "https://api.github.com/repos/webx/citrus/events",
+  "assignees_url": "https://api.github.com/repos/webx/citrus/assignees{/user}",
+  "branches_url": "https://api.github.com/repos/webx/citrus/branches{/branch}",
+  "tags_url": "https://api.github.com/repos/webx/citrus/tags",
+  "blobs_url": "https://api.github.com/repos/webx/citrus/git/blobs{/sha}",
+  "git_tags_url": "https://api.github.com/repos/webx/citrus/git/tags{/sha}",
+  "git_refs_url": "https://api.github.com/repos/webx/citrus/git/refs{/sha}",
+  "trees_url": "https://api.github.com/repos/webx/citrus/git/trees{/sha}",
+  "statuses_url": "https://api.github.com/repos/webx/citrus/statuses/{sha}",
+  "languages_url": "https://api.github.com/repos/webx/citrus/languages",
+  "stargazers_url": "https://api.github.com/repos/webx/citrus/stargazers",
+  "contributors_url": "https://api.github.com/repos/webx/citrus/contributors",
+  "subscribers_url": "https://api.github.com/repos/webx/citrus/subscribers",
+  "subscription_url": "https://api.github.com/repos/webx/citrus/subscription",
+  "commits_url": "https://api.github.com/repos/webx/citrus/commits{/sha}",
+  "git_commits_url": "https://api.github.com/repos/webx/citrus/git/commits{/sha}",
+  "comments_url": "https://api.github.com/repos/webx/citrus/comments{/number}",
+  "issue_comment_url": "https://api.github.com/repos/webx/citrus/issues/comments{/number}",
+  "contents_url": "https://api.github.com/repos/webx/citrus/contents/{+path}",
+  "compare_url": "https://api.github.com/repos/webx/citrus/compare/{base}...{head}",
+  "merges_url": "https://api.github.com/repos/webx/citrus/merges",
+  "archive_url": "https://api.github.com/repos/webx/citrus/{archive_format}{/ref}",
+  "downloads_url": "https://api.github.com/repos/webx/citrus/downloads",
+  "issues_url": "https://api.github.com/repos/webx/citrus/issues{/number}",
+  "pulls_url": "https://api.github.com/repos/webx/citrus/pulls{/number}",
+  "milestones_url": "https://api.github.com/repos/webx/citrus/milestones{/number}",
+  "notifications_url": "https://api.github.com/repos/webx/citrus/notifications{?since,all,participating}",
+  "labels_url": "https://api.github.com/repos/webx/citrus/labels{/name}",
+  "releases_url": "https://api.github.com/repos/webx/citrus/releases{/id}",
+  "deployments_url": "https://api.github.com/repos/webx/citrus/deployments",
+  "created_at": "2011-10-21T07:34:55Z",
+  "updated_at": "2022-10-12T13:24:54Z",
+  "pushed_at": "2022-10-10T02:41:17Z",
+  "git_url": "git://github.com/webx/citrus.git",
+  "ssh_url": "git@github.com:webx/citrus.git",
+  "clone_url": "https://github.com/webx/citrus.git",
+  "svn_url": "https://github.com/webx/citrus",
+  "homepage": "http://www.openwebx.org/",
+  "size": 7512,
+  "stargazers_count": 756,
+  "watchers_count": 756,
+  "language": "Java",
+  "has_issues": true,
+  "has_projects": true,
+  "has_downloads": true,
+  "has_wiki": true,
+  "has_pages": false,
+  "forks_count": 376,
+  "mirror_url": null,
+  "archived": false,
+  "disabled": false,
+  "open_issues_count": 32,
+  "license": null,
+  "allow_forking": true,
+  "is_template": false,
+  "web_commit_signoff_required": false,
+  "topics": [
+
+  ],
+  "visibility": "public",
+  "forks": 376,
+  "open_issues": 32,
+  "watchers": 756,
+  "default_branch": "master",
+  "temp_clone_token": null,
+  "organization": {
+    "login": "webx",
+    "id": 1142574,
+    "node_id": "MDEyOk9yZ2FuaXphdGlvbjExNDI1NzQ=",
+    "avatar_url": "https://avatars.githubusercontent.com/u/1142574?v=4",
+    "gravatar_id": "",
+    "url": "https://api.github.com/users/webx",
+    "html_url": "https://github.com/webx",
+    "followers_url": "https://api.github.com/users/webx/followers",
+    "following_url": "https://api.github.com/users/webx/following{/other_user}",
+    "gists_url": "https://api.github.com/users/webx/gists{/gist_id}",
+    "starred_url": "https://api.github.com/users/webx/starred{/owner}{/repo}",
+    "subscriptions_url": "https://api.github.com/users/webx/subscriptions",
+    "organizations_url": "https://api.github.com/users/webx/orgs",
+    "repos_url": "https://api.github.com/users/webx/repos",
+    "events_url": "https://api.github.com/users/webx/events{/privacy}",
+    "received_events_url": "https://api.github.com/users/webx/received_events",
+    "type": "Organization",
+    "site_admin": false
+  },
+  "network_count": 376,
+  "subscribers_count": 136
+}
diff --git a/swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.pom b/swh/lister/maven/tests/data/https_repo1.maven.org/maven2_al_aldi_sprova4j_0.1.0_sprova4j-0.1.0.pom
similarity index 100%
rename from swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.pom
rename to swh/lister/maven/tests/data/https_repo1.maven.org/maven2_al_aldi_sprova4j_0.1.0_sprova4j-0.1.0.pom
diff --git a/swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.1.pom b/swh/lister/maven/tests/data/https_repo1.maven.org/maven2_al_aldi_sprova4j_0.1.1_sprova4j-0.1.1.pom
similarity index 100%
rename from swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.1.pom
rename to swh/lister/maven/tests/data/https_repo1.maven.org/maven2_al_aldi_sprova4j_0.1.1_sprova4j-0.1.1.pom
diff --git a/swh/lister/maven/tests/data/https_maven.org/arangodb-graphql-1.2.pom b/swh/lister/maven/tests/data/https_repo1.maven.org/maven2_com_arangodb_arangodb-graphql_1.2_arangodb-graphql-1.2.pom
similarity index 100%
rename from swh/lister/maven/tests/data/https_maven.org/arangodb-graphql-1.2.pom
rename to swh/lister/maven/tests/data/https_repo1.maven.org/maven2_com_arangodb_arangodb-graphql_1.2_arangodb-graphql-1.2.pom
diff --git a/swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.malformed.pom b/swh/lister/maven/tests/data/sprova4j-0.1.0.malformed.pom
similarity index 100%
rename from swh/lister/maven/tests/data/https_maven.org/sprova4j-0.1.0.malformed.pom
rename to swh/lister/maven/tests/data/sprova4j-0.1.0.malformed.pom
diff --git a/swh/lister/maven/tests/test_lister.py b/swh/lister/maven/tests/test_lister.py
index 6a75a99..9bacd4e 100644
--- a/swh/lister/maven/tests/test_lister.py
+++ b/swh/lister/maven/tests/test_lister.py
@@ -1,379 +1,348 @@
 # Copyright (C) 2021-2022 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from pathlib import Path
 
 import iso8601
 import pytest
 import requests
 
 from swh.lister.maven.lister import MavenLister
 
 MVN_URL = "https://repo1.maven.org/maven2/"  # main maven repo url
 INDEX_URL = "http://indexes/export.fld"  # index directory url
 
 URL_POM_1 = MVN_URL + "al/aldi/sprova4j/0.1.0/sprova4j-0.1.0.pom"
 URL_POM_2 = MVN_URL + "al/aldi/sprova4j/0.1.1/sprova4j-0.1.1.pom"
 URL_POM_3 = MVN_URL + "com/arangodb/arangodb-graphql/1.2/arangodb-graphql-1.2.pom"
 
 
 USER_REPO0 = "aldialimucaj/sprova4j"
 GIT_REPO_URL0_HTTPS = f"https://github.com/{USER_REPO0}"
 GIT_REPO_URL0_API = f"https://api.github.com/repos/{USER_REPO0}"
-LIST_GIT = (GIT_REPO_URL0_HTTPS,)
+ORIGIN_GIT = GIT_REPO_URL0_HTTPS
 
 USER_REPO1 = "ArangoDB-Community/arangodb-graphql-java"
 GIT_REPO_URL1_HTTPS = f"https://github.com/{USER_REPO1}"
 GIT_REPO_URL1_GIT = f"git://github.com/{USER_REPO1}.git"
 GIT_REPO_URL1_API = f"https://api.github.com/repos/{USER_REPO1}"
-LIST_GIT_INCR = (GIT_REPO_URL1_HTTPS,)
+ORIGIN_GIT_INCR = GIT_REPO_URL1_HTTPS
 
 USER_REPO2 = "webx/citrus"
 GIT_REPO_URL2_HTTPS = f"https://github.com/{USER_REPO2}"
 GIT_REPO_URL2_API = f"https://api.github.com/repos/{USER_REPO2}"
 
-LIST_SRC = (MVN_URL + "al/aldi/sprova4j",)
+ORIGIN_SRC = MVN_URL + "al/aldi/sprova4j"
 
 LIST_SRC_DATA = (
     {
         "type": "maven",
         "url": "https://repo1.maven.org/maven2/al/aldi/sprova4j"
         + "/0.1.0/sprova4j-0.1.0-sources.jar",
         "time": "2021-07-12T17:06:59+00:00",
         "gid": "al.aldi",
         "aid": "sprova4j",
         "version": "0.1.0",
         "base_url": MVN_URL,
     },
     {
         "type": "maven",
         "url": "https://repo1.maven.org/maven2/al/aldi/sprova4j"
         + "/0.1.1/sprova4j-0.1.1-sources.jar",
         "time": "2021-07-12T17:37:05+00:00",
         "gid": "al.aldi",
         "aid": "sprova4j",
         "version": "0.1.1",
         "base_url": MVN_URL,
     },
 )
 
 
 @pytest.fixture
 def maven_index_full(datadir) -> bytes:
     return Path(datadir, "http_indexes", "export_full.fld").read_bytes()
 
 
 @pytest.fixture
 def maven_index_incr_first(datadir) -> bytes:
     return Path(datadir, "http_indexes", "export_incr_first.fld").read_bytes()
 
 
-@pytest.fixture
-def maven_pom_1(datadir) -> bytes:
-    return Path(datadir, "https_maven.org", "sprova4j-0.1.0.pom").read_bytes()
-
-
 @pytest.fixture
 def maven_index_null_mtime(datadir) -> bytes:
     return Path(datadir, "http_indexes", "export_null_mtime.fld").read_bytes()
 
 
-@pytest.fixture
-def maven_pom_1_malformed(datadir) -> bytes:
-    return Path(datadir, "https_maven.org", "sprova4j-0.1.0.malformed.pom").read_bytes()
-
-
-@pytest.fixture
-def maven_pom_2(datadir) -> bytes:
-    return Path(datadir, "https_maven.org", "sprova4j-0.1.1.pom").read_bytes()
-
-
-@pytest.fixture
-def maven_pom_3(datadir) -> bytes:
-    return Path(datadir, "https_maven.org", "arangodb-graphql-1.2.pom").read_bytes()
-
-
-@pytest.fixture
-def maven_pom_multi_byte_encoding(datadir) -> bytes:
-    return Path(datadir, "https_maven.org", "citrus-parent-3.0.7.pom").read_bytes()
-
-
-@pytest.fixture
-def requests_mock(requests_mock):
-    """If github api calls for the configured scm repository, returns its canonical url."""
-    for url_api, url_html in [
-        (GIT_REPO_URL0_API, GIT_REPO_URL0_HTTPS),
-        (GIT_REPO_URL1_API, GIT_REPO_URL1_HTTPS),
-        (GIT_REPO_URL2_API, GIT_REPO_URL2_HTTPS),
-    ]:
-        requests_mock.get(
-            url_api,
-            json={"html_url": url_html},
-        )
-    yield requests_mock
+@pytest.fixture(autouse=True)
+def network_requests_mock(requests_mock, requests_mock_datadir, maven_index_full):
+    requests_mock.get(INDEX_URL, content=maven_index_full)
 
 
 @pytest.fixture(autouse=True)
-def network_requests_mock(
-    requests_mock, maven_index_full, maven_pom_1, maven_pom_2, maven_pom_3
-):
-    requests_mock.get(INDEX_URL, content=maven_index_full)
-    requests_mock.get(URL_POM_1, content=maven_pom_1)
-    requests_mock.get(URL_POM_2, content=maven_pom_2)
-    requests_mock.get(URL_POM_3, content=maven_pom_3)
+def retry_sleep_mock(mocker):
+    mocker.patch.object(MavenLister.http_request.retry, "sleep")
 
 
 def test_maven_full_listing(swh_scheduler):
     """Covers full listing of multiple pages, checking page results and listed
     origins, statelessness."""
 
     # Run the lister.
     lister = MavenLister(
         scheduler=swh_scheduler,
         url=MVN_URL,
         instance="maven.org",
         index_url=INDEX_URL,
         incremental=False,
     )
 
     stats = lister.run()
 
     # Start test checks.
     assert stats.pages == 5
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
     origin_urls = [origin.url for origin in scheduler_origins]
 
     # 3 git origins + 1 maven origin with 2 releases (one per jar)
-    assert len(origin_urls) == 3
-    assert sorted(origin_urls) == sorted(LIST_GIT + LIST_GIT_INCR + LIST_SRC)
+    assert set(origin_urls) == {ORIGIN_GIT, ORIGIN_GIT_INCR, ORIGIN_SRC}
+    assert len(set(origin_urls)) == len(origin_urls)
 
     for origin in scheduler_origins:
         if origin.visit_type == "maven":
             for src in LIST_SRC_DATA:
                 last_update_src = iso8601.parse_date(src["time"])
                 assert last_update_src <= origin.last_update
             assert origin.extra_loader_arguments["artifacts"] == list(LIST_SRC_DATA)
 
     scheduler_state = lister.get_state_from_scheduler()
     assert scheduler_state is not None
     assert scheduler_state.last_seen_doc == -1
     assert scheduler_state.last_seen_pom == -1
 
 
 def test_maven_full_listing_malformed(
     swh_scheduler,
     requests_mock,
-    maven_pom_1_malformed,
+    datadir,
 ):
     """Covers full listing of multiple pages, checking page results with a malformed
     scm entry in pom."""
 
     lister = MavenLister(
         scheduler=swh_scheduler,
         url=MVN_URL,
         instance="maven.org",
         index_url=INDEX_URL,
         incremental=False,
     )
 
     # Set up test.
-    requests_mock.get(URL_POM_1, content=maven_pom_1_malformed)
+    requests_mock.get(
+        URL_POM_1, content=Path(datadir, "sprova4j-0.1.0.malformed.pom").read_bytes()
+    )
 
     # Then run the lister.
     stats = lister.run()
 
     # Start test checks.
     assert stats.pages == 5
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
     origin_urls = [origin.url for origin in scheduler_origins]
 
     # 2 git origins + 1 maven origin with 2 releases (one per jar)
-    assert len(origin_urls) == 3
-    assert sorted(origin_urls) == sorted(LIST_GIT + LIST_GIT_INCR + LIST_SRC)
+    assert set(origin_urls) == {ORIGIN_GIT, ORIGIN_GIT_INCR, ORIGIN_SRC}
+    assert len(origin_urls) == len(set(origin_urls))
 
     for origin in scheduler_origins:
         if origin.visit_type == "maven":
             for src in LIST_SRC_DATA:
                 last_update_src = iso8601.parse_date(src["time"])
                 assert last_update_src <= origin.last_update
             assert origin.extra_loader_arguments["artifacts"] == list(LIST_SRC_DATA)
 
     scheduler_state = lister.get_state_from_scheduler()
     assert scheduler_state is not None
     assert scheduler_state.last_seen_doc == -1
     assert scheduler_state.last_seen_pom == -1
 
 
 def test_maven_incremental_listing(
     swh_scheduler,
     requests_mock,
     maven_index_full,
     maven_index_incr_first,
 ):
     """Covers full listing of multiple pages, checking page results and listed
     origins, with a second updated run for statefulness."""
 
     lister = MavenLister(
         scheduler=swh_scheduler,
         url=MVN_URL,
         instance="maven.org",
         index_url=INDEX_URL,
         incremental=True,
     )
 
     # Set up test.
     requests_mock.get(INDEX_URL, content=maven_index_incr_first)
 
     # Then run the lister.
     stats = lister.run()
 
     # Start test checks.
     assert lister.incremental
     assert lister.updated
     assert stats.pages == 2
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
     origin_urls = [origin.url for origin in scheduler_origins]
 
     # 1 git origins + 1 maven origin with 1 release (one per jar)
-    assert len(origin_urls) == 2
-    assert sorted(origin_urls) == sorted(LIST_GIT + LIST_SRC)
+    assert set(origin_urls) == {ORIGIN_GIT, ORIGIN_SRC}
+    assert len(origin_urls) == len(set(origin_urls))
 
     for origin in scheduler_origins:
         if origin.visit_type == "maven":
             last_update_src = iso8601.parse_date(LIST_SRC_DATA[0]["time"])
             assert last_update_src == origin.last_update
             assert origin.extra_loader_arguments["artifacts"] == [LIST_SRC_DATA[0]]
 
     # Second execution of the lister, incremental mode
     lister = MavenLister(
         scheduler=swh_scheduler,
         url=MVN_URL,
         instance="maven.org",
         index_url=INDEX_URL,
         incremental=True,
     )
 
     scheduler_state = lister.get_state_from_scheduler()
     assert scheduler_state is not None
     assert scheduler_state.last_seen_doc == 1
     assert scheduler_state.last_seen_pom == 1
 
     # Set up test.
     requests_mock.get(INDEX_URL, content=maven_index_full)
 
     # Then run the lister.
     stats = lister.run()
 
     # Start test checks.
     assert lister.incremental
     assert lister.updated
     assert stats.pages == 4
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
     origin_urls = [origin.url for origin in scheduler_origins]
 
-    assert sorted(origin_urls) == sorted(LIST_SRC + LIST_GIT + LIST_GIT_INCR)
+    assert set(origin_urls) == {ORIGIN_SRC, ORIGIN_GIT, ORIGIN_GIT_INCR}
+    assert len(origin_urls) == len(set(origin_urls))
 
     for origin in scheduler_origins:
         if origin.visit_type == "maven":
             for src in LIST_SRC_DATA:
                 last_update_src = iso8601.parse_date(src["time"])
                 assert last_update_src <= origin.last_update
             assert origin.extra_loader_arguments["artifacts"] == list(LIST_SRC_DATA)
 
     scheduler_state = lister.get_state_from_scheduler()
     assert scheduler_state is not None
     assert scheduler_state.last_seen_doc == 4
     assert scheduler_state.last_seen_pom == 4
 
 
 @pytest.mark.parametrize("http_code", [400, 404, 500, 502])
 def test_maven_list_http_error_on_index_read(swh_scheduler, requests_mock, http_code):
     """should stop listing if the lister fails to retrieve the main index url."""
 
     lister = MavenLister(scheduler=swh_scheduler, url=MVN_URL, index_url=INDEX_URL)
     requests_mock.get(INDEX_URL, status_code=http_code)
     with pytest.raises(requests.HTTPError):  # listing cannot continues so stop
         lister.run()
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
     assert len(scheduler_origins) == 0
 
 
 @pytest.mark.parametrize("http_code", [400, 404, 500, 502])
 def test_maven_list_http_error_artifacts(
     swh_scheduler,
     requests_mock,
     http_code,
 ):
     """should continue listing when failing to retrieve artifacts."""
     # Test failure of artefacts retrieval.
     requests_mock.get(URL_POM_1, status_code=http_code)
 
     lister = MavenLister(scheduler=swh_scheduler, url=MVN_URL, index_url=INDEX_URL)
 
     # on artifacts though, that raises but continue listing
     lister.run()
 
     # If the maven_index_full step succeeded but not the get_pom step,
     # then we get only one maven-jar origin and one git origin.
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
-    assert len(scheduler_origins) == 2
+    origin_urls = [origin.url for origin in scheduler_origins]
+
+    assert set(origin_urls) == {ORIGIN_SRC, ORIGIN_GIT_INCR}
+    assert len(origin_urls) == len(set(origin_urls))
 
 
 def test_maven_lister_null_mtime(swh_scheduler, requests_mock, maven_index_null_mtime):
 
     requests_mock.get(INDEX_URL, content=maven_index_null_mtime)
 
     # Run the lister.
     lister = MavenLister(
         scheduler=swh_scheduler,
         url=MVN_URL,
         instance="maven.org",
         index_url=INDEX_URL,
         incremental=False,
     )
 
     stats = lister.run()
 
     # Start test checks.
     assert stats.pages == 1
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
     assert len(scheduler_origins) == 1
     assert scheduler_origins[0].last_update is None
 
 
-def test_maven_list_pom_bad_encoding(swh_scheduler, requests_mock, maven_pom_1):
+def test_maven_list_pom_bad_encoding(swh_scheduler, requests_mock):
     """should continue listing when failing to decode pom file."""
     # Test failure of pom parsing by reencoding a UTF-8 pom file to a not expected one
-    requests_mock.get(URL_POM_1, content=maven_pom_1.decode("utf-8").encode("utf-32"))
+    requests_mock.get(
+        URL_POM_1,
+        content=requests.get(URL_POM_1).content.decode("utf-8").encode("utf-32"),
+    )
 
     lister = MavenLister(scheduler=swh_scheduler, url=MVN_URL, index_url=INDEX_URL)
 
     lister.run()
 
     # If the maven_index_full step succeeded but not the pom parsing step,
     # then we get only one maven-jar origin and one git origin.
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
     assert len(scheduler_origins) == 2
 
 
-def test_maven_list_pom_multi_byte_encoding(
-    swh_scheduler, requests_mock, maven_pom_multi_byte_encoding
-):
+def test_maven_list_pom_multi_byte_encoding(swh_scheduler, requests_mock, datadir):
     """should parse POM file with multi-byte encoding."""
 
     # replace pom file with a multi-byte encoding one
-    requests_mock.get(URL_POM_1, content=maven_pom_multi_byte_encoding)
+    requests_mock.get(
+        URL_POM_1, content=Path(datadir, "citrus-parent-3.0.7.pom").read_bytes()
+    )
 
     lister = MavenLister(scheduler=swh_scheduler, url=MVN_URL, index_url=INDEX_URL)
 
     lister.run()
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
     assert len(scheduler_origins) == 3
diff --git a/swh/lister/nixguix/__init__.py b/swh/lister/nixguix/__init__.py
new file mode 100644
index 0000000..a35c3e7
--- /dev/null
+++ b/swh/lister/nixguix/__init__.py
@@ -0,0 +1,38 @@
+# Copyright (C) 2022 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+"""
+NixGuix lister
+==============
+
+Nix and Guix package managers are among other things (lazy) functional package managers.
+We cannot easily parse their source declarations as it would require some involved
+computations.
+
+After some discussion and work with both communities, they now expose public manifests
+that the lister consumes to extract origins. Be it the `Guix manifest`_ or the `Nixpkgs
+manifests`_.
+
+4 kinds of origins are listed:
+
+- main `Guix repository`_ or `Nixpkgs repository`_ which are 'git' repositories
+- VCS origins ('git', 'svn', 'hg')
+- unique file ('content')
+- unique tarball ('directory')
+
+.. _Guix repository: https://git.savannah.gnu.org/cgit/guix.git/
+.. _Nixpkgs repository: https://github.com/NixOS/nixpkgs
+.. _Guix manifest: https://guix.gnu.org/sources.json
+.. _Nixpkgs manifests: https://nix-community.github.io/nixpkgs-swh/sources-unstable-full.json
+
+"""
+
+
+def register():
+    from .lister import NixGuixLister
+
+    return {
+        "lister": NixGuixLister,
+        "task_modules": [f"{__name__}.tasks"],
+    }
diff --git a/swh/lister/nixguix/lister.py b/swh/lister/nixguix/lister.py
new file mode 100644
index 0000000..1dbd4de
--- /dev/null
+++ b/swh/lister/nixguix/lister.py
@@ -0,0 +1,490 @@
+# Copyright (C) 2020-2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+"""NixGuix lister definition.
+
+This lists artifacts out of manifest for Guix or Nixpkgs manifests.
+
+Artifacts can be of types:
+- upstream git repository (NixOS/nixpkgs, Guix)
+- VCS repositories (svn, git, hg, ...)
+- unique file
+- unique tarball
+
+"""
+
+import base64
+import binascii
+from dataclasses import dataclass
+from enum import Enum
+import logging
+from pathlib import Path
+import random
+from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
+from urllib.parse import parse_qsl, urlparse
+
+import requests
+from requests.exceptions import ConnectionError, InvalidSchema, SSLError
+
+from swh.core.github.utils import GitHubSession
+from swh.core.tarball import MIMETYPE_TO_ARCHIVE_FORMAT
+from swh.lister import TARBALL_EXTENSIONS
+from swh.lister.pattern import CredentialsType, StatelessLister
+from swh.scheduler.model import ListedOrigin
+
+logger = logging.getLogger(__name__)
+
+
+class ArtifactNatureUndetected(ValueError):
+    """Raised when a remote artifact's nature (tarball, file) cannot be detected."""
+
+    pass
+
+
+class ArtifactNatureMistyped(ValueError):
+    """Raised when a remote artifact is neither a tarball nor a file.
+
+    Error of this type are' probably a misconfiguration in the manifest generation that
+    badly typed a vcs repository.
+
+    """
+
+    pass
+
+
+class ArtifactWithoutExtension(ValueError):
+    """Raised when an artifact nature cannot be determined by its name.
+
+    This exception is solely for internal use of the :meth:`is_tarball` method.
+
+    """
+
+    pass
+
+
+class ChecksumsComputation(Enum):
+    """The possible artifact types listed out of the manifest."""
+
+    STANDARD = "standard"
+    """Standard checksums (e.g. sha1, sha256, ...) on the tarball or file."""
+    NAR = "nar"
+    """The hash is computed over the NAR archive dump of the output (e.g. uncompressed
+    directory.)"""
+
+
+MAPPING_CHECKSUMS_COMPUTATION = {
+    "flat": ChecksumsComputation.STANDARD,
+    "recursive": ChecksumsComputation.NAR,
+}
+"""Mapping between the outputHashMode from the manifest and how to compute checksums."""
+
+
+@dataclass
+class Artifact:
+    """Metadata information on Remote Artifact with url (tarball or file)."""
+
+    origin: str
+    """Canonical url retrieve the tarball artifact."""
+    visit_type: str
+    """Either 'tar' or 'file' """
+    fallback_urls: List[str]
+    """List of urls to retrieve tarball artifact if canonical url no longer works."""
+    checksums: Dict[str, str]
+    """Integrity hash converted into a checksum dict."""
+    checksums_computation: ChecksumsComputation
+    """Checksums computation mode to provide to loaders (e.g. nar, standard, ...)"""
+
+
+@dataclass
+class VCS:
+    """Metadata information on VCS."""
+
+    origin: str
+    """Origin url of the vcs"""
+    type: str
+    """Type of (d)vcs, e.g. svn, git, hg, ..."""
+    ref: Optional[str] = None
+    """Reference either a svn commit id, a git commit, ..."""
+
+
+class ArtifactType(Enum):
+    """The possible artifact types listed out of the manifest."""
+
+    ARTIFACT = "artifact"
+    VCS = "vcs"
+
+
+PageResult = Tuple[ArtifactType, Union[Artifact, VCS]]
+
+
+VCS_SUPPORTED = ("git", "svn", "hg")
+
+# Rough approximation of what we can find of mimetypes for tarballs "out there"
+POSSIBLE_TARBALL_MIMETYPES = tuple(MIMETYPE_TO_ARCHIVE_FORMAT.keys())
+
+
+def is_tarball(urls: List[str], request: Optional[Any] = None) -> Tuple[bool, str]:
+    """Determine whether a list of files actually are tarballs or simple files.
+
+    When this cannot be answered simply out of the url, when request is provided, this
+    executes a HTTP `HEAD` query on the url to determine the information. If request is
+    not provided, this raises an ArtifactNatureUndetected exception.
+
+    Args:
+        urls: name of the remote files for which the extension needs to be checked.
+
+    Raises:
+        ArtifactNatureUndetected when the artifact's nature cannot be detected out
+            of its url
+        ArtifactNatureMistyped when the artifact is not a tarball nor a file. It's up to
+            the caller to do what's right with it.
+
+    Returns: A tuple (bool, url). The boolean represents whether the url is an archive
+        or not. The second parameter is the actual url once the head request is issued
+        as a fallback of not finding out whether the urls are tarballs or not.
+
+    """
+
+    def _is_tarball(url):
+        """Determine out of an extension whether url is a tarball.
+
+        Raises:
+            ArtifactWithoutExtension in case no extension is available
+
+        """
+        urlparsed = urlparse(url)
+        if urlparsed.scheme not in ("http", "https", "ftp"):
+            raise ArtifactNatureMistyped(f"Mistyped artifact '{url}'")
+
+        paths = [
+            Path(p) for (_, p) in [("_", urlparsed.path)] + parse_qsl(urlparsed.query)
+        ]
+        if not any(path.suffix != "" for path in paths):
+            raise ArtifactWithoutExtension
+        return any(path.suffix.endswith(tuple(TARBALL_EXTENSIONS)) for path in paths)
+
+    index = random.randrange(len(urls))
+    url = urls[index]
+
+    try:
+        return _is_tarball(url), urls[0]
+    except ArtifactWithoutExtension:
+        if request is None:
+            raise ArtifactNatureUndetected(
+                f"Cannot determine artifact type from url <{url}>"
+            )
+        logger.warning(
+            "Cannot detect extension for <%s>. Fallback to http head query",
+            url,
+        )
+
+        try:
+            response = request.head(url)
+        except (InvalidSchema, SSLError, ConnectionError):
+            raise ArtifactNatureUndetected(
+                f"Cannot determine artifact type from url <{url}>"
+            )
+
+        if not response.ok or response.status_code == 404:
+            raise ArtifactNatureUndetected(
+                f"Cannot determine artifact type from url <{url}>"
+            )
+        location = response.headers.get("Location")
+        if location:  # It's not always present
+            logger.debug("Location: %s", location)
+            try:
+                # FIXME: location is also returned as it's considered the true origin,
+                # true enough?
+                return _is_tarball(location), location
+            except ArtifactWithoutExtension:
+                logger.warning(
+                    "Still cannot detect extension through location <%s>...",
+                    url,
+                )
+
+        content_type = response.headers.get("Content-Type")
+        if content_type:
+            logger.debug("Content-Type: %s", content_type)
+            if content_type == "application/json":
+                return False, urls[0]
+            return content_type.startswith(POSSIBLE_TARBALL_MIMETYPES), urls[0]
+
+        raise ArtifactNatureUndetected(
+            f"Cannot determine artifact type from url <{url}>"
+        )
+
+
+VCS_KEYS_MAPPING = {
+    "git": {
+        "ref": "git_ref",
+        "url": "git_url",
+    },
+    "svn": {
+        "ref": "svn_revision",
+        "url": "svn_url",
+    },
+    "hg": {
+        "ref": "hg_changeset",
+        "url": "hg_url",
+    },
+}
+
+
+class NixGuixLister(StatelessLister[PageResult]):
+    """List Guix or Nix sources out of a public json manifest.
+
+    This lister can output:
+    - unique tarball (.tar.gz, .tbz2, ...)
+    - vcs repositories (e.g. git, hg, svn)
+    - unique file (.lisp, .py, ...)
+
+    Note that no `last_update` is available in either manifest.
+
+    For `url` types artifacts, this tries to determine the artifact's nature, tarball or
+    file. It first tries to compute out of the "url" extension. In case of no extension,
+    it fallbacks to query (HEAD) the url to retrieve the origin out of the `Location`
+    response header, and then checks the extension again.
+
+    """
+
+    LISTER_NAME = "nixguix"
+
+    def __init__(
+        self,
+        scheduler,
+        url: str,
+        origin_upstream: str,
+        instance: Optional[str] = None,
+        credentials: Optional[CredentialsType] = None,
+        # canonicalize urls, can be turned off during docker runs
+        canonicalize: bool = True,
+        **kwargs: Any,
+    ):
+        super().__init__(
+            scheduler=scheduler,
+            url=url.rstrip("/"),
+            instance=instance,
+            credentials=credentials,
+        )
+        # either full fqdn NixOS/nixpkgs or guix repository urls
+        # maybe add an assert on those specific urls?
+        self.origin_upstream = origin_upstream
+
+        self.session = requests.Session()
+        # for testing purposes, we may want to skip this step (e.g. docker run and rate
+        # limit)
+        self.github_session = (
+            GitHubSession(
+                credentials=self.credentials,
+                user_agent=str(self.session.headers["User-Agent"]),
+            )
+            if canonicalize
+            else None
+        )
+
+    def build_artifact(
+        self, artifact_url: str, artifact_type: str, artifact_ref: Optional[str] = None
+    ) -> Optional[Tuple[ArtifactType, VCS]]:
+        """Build a canonicalized vcs artifact when possible."""
+        origin = (
+            self.github_session.get_canonical_url(artifact_url)
+            if self.github_session
+            else artifact_url
+        )
+        if not origin:
+            return None
+        return ArtifactType.VCS, VCS(
+            origin=origin, type=artifact_type, ref=artifact_ref
+        )
+
+    def get_pages(self) -> Iterator[PageResult]:
+        """Yield one page per "typed" origin referenced in manifest."""
+        # fetch and parse the manifest...
+        response = self.http_request(self.url)
+
+        # ... if any
+        raw_data = response.json()
+        yield ArtifactType.VCS, VCS(origin=self.origin_upstream, type="git")
+
+        # grep '"type"' guix-sources.json | sort | uniq
+        #       "type": false                             <<<<<<<<< noise
+        #       "type": "git",
+        #       "type": "hg",
+        #       "type": "no-origin",                      <<<<<<<<< noise
+        #       "type": "svn",
+        #       "type": "url",
+
+        # grep '"type"' nixpkgs-sources-unstable.json | sort | uniq
+        #  "type": "url",
+
+        sources = raw_data["sources"]
+        random.shuffle(sources)
+
+        for artifact in sources:
+            artifact_type = artifact["type"]
+            if artifact_type in VCS_SUPPORTED:
+                plain_url = artifact[VCS_KEYS_MAPPING[artifact_type]["url"]]
+                plain_ref = artifact[VCS_KEYS_MAPPING[artifact_type]["ref"]]
+                built_artifact = self.build_artifact(
+                    plain_url, artifact_type, plain_ref
+                )
+                if not built_artifact:
+                    continue
+                yield built_artifact
+            elif artifact_type == "url":
+                # It's either a tarball or a file
+                origin_urls = artifact.get("urls")
+                if not origin_urls:
+                    # Nothing to fetch
+                    logger.warning("Skipping url <%s>: empty artifact", artifact)
+                    continue
+
+                assert origin_urls is not None
+
+                # Deal with urls with empty scheme (basic fallback to http)
+                urls = []
+                for url in origin_urls:
+                    urlparsed = urlparse(url)
+                    if urlparsed.scheme == "":
+                        logger.warning("Missing scheme for <%s>: fallback to http", url)
+                        fixed_url = f"http://{url}"
+                    else:
+                        fixed_url = url
+                    urls.append(fixed_url)
+
+                origin, *fallback_urls = urls
+
+                if origin.endswith(".git"):
+                    built_artifact = self.build_artifact(origin, "git")
+                    if not built_artifact:
+                        continue
+                    yield built_artifact
+                    continue
+
+                outputHash = artifact.get("outputHash")
+                integrity = artifact.get("integrity")
+                if integrity is None and outputHash is None:
+                    logger.warning(
+                        "Skipping url <%s>: missing integrity and outputHash field",
+                        origin,
+                    )
+                    continue
+
+                # Falls back to outputHash field if integrity is missing
+                if integrity is None and outputHash:
+                    # We'll deal with outputHash as integrity field
+                    integrity = outputHash
+
+                try:
+                    is_tar, origin = is_tarball(urls, self.session)
+                except ArtifactNatureMistyped:
+                    logger.warning(
+                        "Mistyped url <%s>: trying to deal with it properly", origin
+                    )
+                    urlparsed = urlparse(origin)
+                    artifact_type = urlparsed.scheme
+
+                    if artifact_type in VCS_SUPPORTED:
+                        built_artifact = self.build_artifact(origin, artifact_type)
+                        if not built_artifact:
+                            continue
+                        yield built_artifact
+                    else:
+                        logger.warning(
+                            "Skipping url <%s>: undetected remote artifact type", origin
+                        )
+                    continue
+                except ArtifactNatureUndetected:
+                    logger.warning(
+                        "Skipping url <%s>: undetected remote artifact type", origin
+                    )
+                    continue
+
+                # Determine the content checksum stored in the integrity field and
+                # convert into a dict of checksums. This only parses the
+                # `hash-expression` (hash-<b64-encoded-checksum>) as defined in
+                # https://w3c.github.io/webappsec-subresource-integrity/#the-integrity-attribute
+                try:
+                    chksum_algo, chksum_b64 = integrity.split("-")
+                    checksums: Dict[str, str] = {
+                        chksum_algo: base64.decodebytes(chksum_b64.encode()).hex()
+                    }
+                except binascii.Error:
+                    logger.exception(
+                        "Skipping url: <%s>: integrity computation failure for <%s>",
+                        url,
+                        artifact,
+                    )
+                    continue
+
+                # The 'outputHashMode' attribute determines how the hash is computed. It
+                # must be one of the following two values:
+                # - "flat": (default) The output must be a non-executable regular file.
+                #     If it isn’t, the build fails. The hash is simply computed over the
+                #     contents of that file (so it’s equal to what Unix commands like
+                #     `sha256sum` or `sha1sum` produce).
+                # - "recursive": The hash is computed over the NAR archive dump of the
+                #       output (i.e., the result of `nix-store --dump`). In this case,
+                #       the output can be anything, including a directory tree.
+                outputHashMode = artifact.get("outputHashMode", "flat")
+
+                if not is_tar and outputHashMode == "recursive":
+                    # T4608: Cannot deal with those properly yet as some can be missing
+                    # 'critical' information about how to recompute the hash (e.g. fs
+                    # layout, executable bit, ...)
+                    logger.warning(
+                        "Skipping artifact <%s>: 'file' artifact of type <%s> is "
+                        " missing information to properly check its integrity",
+                        artifact,
+                        artifact_type,
+                    )
+                    continue
+
+                logger.debug("%s: %s", "dir" if is_tar else "cnt", origin)
+                yield ArtifactType.ARTIFACT, Artifact(
+                    origin=origin,
+                    fallback_urls=fallback_urls,
+                    checksums=checksums,
+                    checksums_computation=MAPPING_CHECKSUMS_COMPUTATION[outputHashMode],
+                    visit_type="directory" if is_tar else "content",
+                )
+            else:
+                logger.warning(
+                    "Skipping artifact <%s>: unsupported type %s",
+                    artifact,
+                    artifact_type,
+                )
+
+    def vcs_to_listed_origin(self, artifact: VCS) -> Iterator[ListedOrigin]:
+        """Given a vcs repository, yield a ListedOrigin."""
+        assert self.lister_obj.id is not None
+        # FIXME: What to do with the "ref" (e.g. git/hg/svn commit, ...)
+        yield ListedOrigin(
+            lister_id=self.lister_obj.id,
+            url=artifact.origin,
+            visit_type=artifact.type,
+        )
+
+    def artifact_to_listed_origin(self, artifact: Artifact) -> Iterator[ListedOrigin]:
+        """Given an artifact (tarball, file), yield one ListedOrigin."""
+        assert self.lister_obj.id is not None
+        yield ListedOrigin(
+            lister_id=self.lister_obj.id,
+            url=artifact.origin,
+            visit_type=artifact.visit_type,
+            extra_loader_arguments={
+                "checksums": artifact.checksums,
+                "checksums_computation": artifact.checksums_computation.value,
+                "fallback_urls": artifact.fallback_urls,
+            },
+        )
+
+    def get_origins_from_page(
+        self, artifact_tuple: PageResult
+    ) -> Iterator[ListedOrigin]:
+        """Given an artifact tuple (type, artifact), yield a ListedOrigin."""
+        artifact_type, artifact = artifact_tuple
+        mapping_type_fn = getattr(self, f"{artifact_type.value}_to_listed_origin")
+        yield from mapping_type_fn(artifact)
diff --git a/swh/lister/nixguix/tasks.py b/swh/lister/nixguix/tasks.py
new file mode 100644
index 0000000..23d9fd6
--- /dev/null
+++ b/swh/lister/nixguix/tasks.py
@@ -0,0 +1,14 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from celery import shared_task
+
+
+@shared_task(name=__name__ + ".NixGuixListerTask")
+def list_nixguix(**lister_args):
+    """Lister task for Arch Linux"""
+    from swh.lister.nixguix.lister import NixGuixLister
+
+    return NixGuixLister.from_configfile(**lister_args).run().dict()
diff --git a/swh/lister/nixguix/tests/__init__.py b/swh/lister/nixguix/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/swh/lister/nixguix/tests/data/sources-failure.json b/swh/lister/nixguix/tests/data/sources-failure.json
new file mode 100644
index 0000000..e0844af
--- /dev/null
+++ b/swh/lister/nixguix/tests/data/sources-failure.json
@@ -0,0 +1,64 @@
+{
+  "sources": [
+    {"type": "git", "git_url": "", "git_ref": ""},
+    {"type": false},
+    {"type": "no-origin"},
+    {"type": "url", "urls": []},
+    {
+      "type": "url",
+      "urls": ["https://crates.io/api/v1/0.1.5/no-extension-and-head-404-so-skipped"],
+      "integrity": "sha256-HW6jxFlbljY8E5Q0l9s0r0Rg+0dKlcQ/REatNBuMl4U="
+    },
+    {
+      "type": "url",
+      "urls": [ "https://example.org/another-file-no-integrity-so-skipped.txt" ]
+    },
+    {
+      "type": "url",
+      "urls": [
+        "ftp://ftp.ourproject.org/file-with-no-extension"
+      ],
+      "integrity": "sha256-bss09x9yOnuW+Q5BHHjf8nNcCNxCKMdl9/2/jKSFcrQ="
+    },
+    {
+      "type": "url",
+      "urls": [
+        "https://git-tails.immerda.ch/onioncircuits"
+      ],
+      "integrity": "sha256-lV3xiWUZmSnt4LW0ni/sUyC/bbtaxkTzvFLFtJKLuI4="
+    },
+    {
+      "outputHash": "sha256-9uF0fYl4Zz/Ia2UKx7CBi8ZU8jfWoBfy2QSgTSwXo5A",
+      "outputHashAlgo": null,
+      "outputHashMode": "recursive",
+      "type": "url",
+      "urls": [
+        "https://github.com/figiel/hosts/archive/v1.0.0.tar.gz"
+      ],
+      "inferredFetcher": "fetchzip"
+    },
+    {
+      "outputHash": "0s2mvy1nr2v1x0rr1fxlsv8ly1vyf9978rb4hwry5vnr678ls522",
+      "outputHashAlgo": "sha256",
+      "outputHashMode": "recursive",
+      "type": "url",
+      "urls": [
+        "https://www.unicode.org/Public/emoji/12.1/emoji-zwj-sequences.txt"
+      ],
+      "integrity": "sha256-QhRN0THZ7uIzh2RldFJyfgdP0da0u5Az6GGLbIPfVWg=",
+      "inferredFetcher": "unclassified"
+    },
+    {
+      "type": "url",
+      "urls": [ "unknown://example.org/wrong-scheme-so-skipped.txt" ],
+      "integrity": "sha256-wAEswtkl3ulAw3zq4perrGS6Wlww5XXnQYsEAoYT9fI="
+    },
+    {
+      "type": "url",
+      "urls": [ "https://code.9front.org/hg/plan9front" ],
+      "integrity": "sha256-wAEswtkl3ulAw3zq4perrGS6Wlww5XXnQYsEAoYT9fI="
+    }
+  ],
+  "version":"1",
+  "revision":"ab59155c5a38dda7efaceb47c7528578fcf0def4"
+}
diff --git a/swh/lister/nixguix/tests/data/sources-success.json b/swh/lister/nixguix/tests/data/sources-success.json
new file mode 100644
index 0000000..bb1943c
--- /dev/null
+++ b/swh/lister/nixguix/tests/data/sources-success.json
@@ -0,0 +1,107 @@
+{
+  "sources": [
+    {
+      "type": "url",
+      "urls": [ "https://github.com/owner-1/repository-1/revision-1.tgz" ],
+      "integrity": "sha256-3vm2Nt+O4zHf3Ovd/qsv1gKTEUwodX9FLxlrQdry0zs="
+    },
+    {
+      "type": "url",
+      "urls": [ "https://github.com/owner-3/repository-1/revision-1.tgz" ],
+      "integrity": "sha256-3vm2Nt+O4zHf3Ovd/qsv1gKTEUwodX9FLxlrQdry0zs="
+    },
+    {
+      "type": "url",
+      "urls": [ "https://example.com/file.txt" ],
+      "integrity": "sha256-Q0copBCnj1b8G1iZw1k0NuYasMcx6QctleltspAgXlM="
+    },
+    {
+      "type": "url",
+      "urls": [
+        "https://releases.wildfiregames.com/0ad-0.0.25b-alpha-unix-build.tar.xz"
+      ],
+      "integrity": "sha256-1w3NdfRzp9XIFDLD2SYJJr+Nnf9c1UF5YWlJfRxSLt0="
+    },
+    {
+      "type": "url",
+      "urls": [
+        "ftp://ftp.ourproject.org/pub/ytalk/ytalk-3.3.0.tar.gz"
+      ],
+      "integrity": "sha256-bss09x9yOnuW+Q5BHHjf8nNcCNxCKMdl9/2/jKSFcrQ="
+    },
+    {
+      "type": "url",
+      "urls": [
+        "www.roudoudou.com/export/cpc/rasm/rasm_v0117_src.zip"
+      ],
+      "integrity": "sha256-wAEswtkl3ulAw3zq4perrGS6Wlww5XXnQYsEAoYT9fI="
+    },
+    {
+      "type": "url",
+      "outputHashMode": "flat",
+      "urls": [
+        "http://downloads.sourceforge.net/project/nmon/lmon16n.c",
+        "http://ufpr.dl.sourceforge.net/project/nmon/lmon16n.c",
+        "http://netassist.dl.sourceforge.net/project/nmon/lmon16n.c"
+      ],
+      "integrity": "sha256-wAEswtkl3ulAw3zq4perrGS6Wlww5XXnQYsEAoYT9fI="
+    },
+    {
+      "outputHash": "0s7p9swjqjsqddylmgid6cv263ggq7pmb734z4k84yfcrgb6kg4g",
+      "outputHashAlgo": "sha256",
+      "outputHashMode": "recursive",
+      "type": "url",
+      "urls": [
+        "https://github.com/kandu/trie/archive/1.0.0.tar.gz"
+      ],
+      "integrity": "sha256-j7xp1svMeYIm+WScVe/B7w0jNjMtvkp9a1hLLLlO92g=",
+      "inferredFetcher": "fetchzip"
+    },
+    {
+      "type": "url",
+      "urls": [
+        "https://github.com/trie/trie.git"
+      ],
+      "integrity": "sha256-j7xp1svMeYIm+WScVe/B7w0jNjMtvkp9a1hLLLlO92g="
+    },
+    {
+      "type": "git",
+      "git_url": "https://example.org/pali/0xffff",
+      "git_ref": "0.9"
+    },
+    {
+      "type": "hg",
+      "hg_url": "https://example.org/vityok/cl-string-match",
+      "hg_changeset": "5048480a61243e6f1b02884012c8f25cdbee6d97"
+    },
+    {
+      "type": "svn",
+      "svn_url": "https://code.call-cc.org/svn/chicken-eggs/release/5/iset/tags/2.2",
+      "svn_revision": 39057
+    },
+    {
+      "outputHash": "sha256-LxVcYj2WKHbhNu5x/DFkxQPOYrVkNvwiE/qcODq52Lc=",
+      "outputHashAlgo": null,
+      "outputHashMode": "recursive",
+      "type": "url",
+      "urls": [
+        "https://github.com/julian-klode/triehash/archive/debian/0.3-3.tar.gz"
+      ],
+      "inferredFetcher": "fetchzip"
+    },
+    {
+      "type": "url",
+      "urls": [
+        "http://git.marmaro.de/?p=mmh;a=snapshot;h=431604647f89d5aac7b199a7883e98e56e4ccf9e;sf=tgz"
+      ],
+      "integrity": "sha256-G/7oY5qdCSJ59VlwHtIbvMdT6+mriXhMqQIHNx65J+E="
+    },
+    {
+      "type": "url",
+      "urls": ["svn://svn.code.sf.net/p/acme-crossass/code-0/trunk"],
+      "integrity": "sha256-VifIQ+UEVMKJ+cNS+Xxusazinr5Cgu1lmGuhqj/5Mpk="
+    }
+  ],
+  "version": "1",
+  "revision": "cc4e04c26672dd74e5fd0fecb78b435fb55368f7"
+}
diff --git a/swh/lister/nixguix/tests/test_lister.py b/swh/lister/nixguix/tests/test_lister.py
new file mode 100644
index 0000000..7ff0010
--- /dev/null
+++ b/swh/lister/nixguix/tests/test_lister.py
@@ -0,0 +1,309 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from collections import defaultdict
+import json
+import logging
+from pathlib import Path
+from typing import Dict, List
+
+import pytest
+import requests
+from requests.exceptions import ConnectionError, InvalidSchema, SSLError
+
+from swh.lister import TARBALL_EXTENSIONS
+from swh.lister.nixguix.lister import (
+    POSSIBLE_TARBALL_MIMETYPES,
+    ArtifactNatureMistyped,
+    ArtifactNatureUndetected,
+    NixGuixLister,
+    is_tarball,
+)
+from swh.lister.pattern import ListerStats
+
+logger = logging.getLogger(__name__)
+
+SOURCES = {
+    "guix": {
+        "repo": "https://git.savannah.gnu.org/cgit/guix.git/",
+        "manifest": "https://guix.gnu.org/sources.json",
+    },
+    "nixpkgs": {
+        "repo": "https://github.com/NixOS/nixpkgs",
+        "manifest": "https://nix-community.github.io/nixpkgs-swh/sources-unstable.json",
+    },
+}
+
+
+def page_response(datadir, instance: str = "success") -> List[Dict]:
+    """Return list of repositories (out of test dataset)"""
+    datapath = Path(datadir, f"sources-{instance}.json")
+    return json.loads(datapath.read_text()) if datapath.exists else []
+
+
+@pytest.mark.parametrize(
+    "tarballs",
+    [[f"one.{ext}", f"two.{ext}"] for ext in TARBALL_EXTENSIONS]
+    + [[f"one.{ext}?foo=bar"] for ext in TARBALL_EXTENSIONS],
+)
+def test_is_tarball_simple(tarballs):
+    """Simple check on tarball should  discriminate between tarball and file"""
+    urls = [f"https://example.org/{tarball}" for tarball in tarballs]
+    is_tar, origin = is_tarball(urls)
+    assert is_tar is True
+    assert origin == urls[0]
+
+
+@pytest.mark.parametrize(
+    "query_param",
+    ["file", "f", "url", "name", "anykeyreally"],
+)
+def test_is_tarball_not_so_simple(query_param):
+    """More involved check on tarball should discriminate between tarball and file"""
+    url = f"https://example.org/download.php?foo=bar&{query_param}=one.tar.gz"
+    is_tar, origin = is_tarball([url])
+    assert is_tar is True
+    assert origin == url
+
+
+@pytest.mark.parametrize(
+    "files",
+    [
+        ["abc.lisp"],
+        ["one.abc", "two.bcd"],
+        ["abc.c", "other.c"],
+        ["one.scm?foo=bar", "two.scm?foo=bar"],
+        ["config.nix", "flakes.nix"],
+    ],
+)
+def test_is_tarball_simple_not_tarball(files):
+    """Simple check on tarball should discriminate betwenn tarball and file"""
+    urls = [f"http://example.org/{file}" for file in files]
+    is_tar, origin = is_tarball(urls)
+    assert is_tar is False
+    assert origin == urls[0]
+
+
+def test_is_tarball_complex_with_no_result(requests_mock):
+    """Complex tarball detection without proper information should fail."""
+    # No extension, this won't detect immediately the nature of the url
+    url = "https://example.org/crates/package/download"
+    urls = [url]
+    with pytest.raises(ArtifactNatureUndetected):
+        is_tarball(urls)  # no request parameter, this cannot fallback, raises
+
+    with pytest.raises(ArtifactNatureUndetected):
+        requests_mock.head(
+            url,
+            status_code=404,  # not found so cannot detect anything
+        )
+        is_tarball(urls, requests)
+
+    with pytest.raises(ArtifactNatureUndetected):
+        requests_mock.head(
+            url, headers={}
+        )  # response ok without headers, cannot detect anything
+        is_tarball(urls, requests)
+
+    with pytest.raises(ArtifactNatureUndetected):
+        fallback_url = "https://example.org/mirror/crates/package/download"
+        requests_mock.head(
+            url, headers={"location": fallback_url}  # still no extension, cannot detect
+        )
+        is_tarball(urls, requests)
+
+    with pytest.raises(ArtifactNatureMistyped):
+        is_tarball(["foo://example.org/unsupported-scheme"])
+
+    with pytest.raises(ArtifactNatureMistyped):
+        fallback_url = "foo://example.org/unsupported-scheme"
+        requests_mock.head(
+            url, headers={"location": fallback_url}  # still no extension, cannot detect
+        )
+        is_tarball(urls, requests)
+
+
+@pytest.mark.parametrize(
+    "fallback_url, expected_result",
+    [
+        ("https://example.org/mirror/crates/package/download.tar.gz", True),
+        ("https://example.org/mirror/package/download.lisp", False),
+    ],
+)
+def test_is_tarball_complex_with_location_result(
+    requests_mock, fallback_url, expected_result
+):
+    """Complex tarball detection with information should detect artifact nature"""
+    # No extension, this won't detect immediately the nature of the url
+    url = "https://example.org/crates/package/download"
+    urls = [url]
+
+    # One scenario where the url renders a location with a proper extension
+    requests_mock.head(url, headers={"location": fallback_url})
+    is_tar, origin = is_tarball(urls, requests)
+    assert is_tar == expected_result
+    if is_tar:
+        assert origin == fallback_url
+
+
+@pytest.mark.parametrize(
+    "content_type, expected_result",
+    [("application/json", False), ("application/something", False)]
+    + [(ext, True) for ext in POSSIBLE_TARBALL_MIMETYPES],
+)
+def test_is_tarball_complex_with_content_type_result(
+    requests_mock, content_type, expected_result
+):
+    """Complex tarball detection with information should detect artifact nature"""
+    # No extension, this won't detect immediately the nature of the url
+    url = "https://example.org/crates/package/download"
+    urls = [url]
+
+    # One scenario where the url renders a location with a proper extension
+    requests_mock.head(url, headers={"Content-Type": content_type})
+    is_tar, origin = is_tarball(urls, requests)
+    assert is_tar == expected_result
+    if is_tar:
+        assert origin == url
+
+
+def test_lister_nixguix_ok(datadir, swh_scheduler, requests_mock):
+    """NixGuixLister should list all origins per visit type"""
+    url = SOURCES["guix"]["manifest"]
+    origin_upstream = SOURCES["guix"]["repo"]
+    lister = NixGuixLister(swh_scheduler, url=url, origin_upstream=origin_upstream)
+
+    response = page_response(datadir, "success")
+    requests_mock.get(
+        url,
+        [{"json": response}],
+    )
+    requests_mock.get(
+        "https://api.github.com/repos/trie/trie",
+        [{"json": {"html_url": "https://github.com/trie/trie.git"}}],
+    )
+    requests_mock.head(
+        "http://git.marmaro.de/?p=mmh;a=snapshot;h=431604647f89d5aac7b199a7883e98e56e4ccf9e;sf=tgz",
+        headers={"Content-Type": "application/gzip; charset=ISO-8859-1"},
+    )
+
+    expected_visit_types = defaultdict(int)
+    # origin upstream is added as origin
+    expected_nb_origins = 1
+    expected_visit_types["git"] += 1
+    for artifact in response["sources"]:
+        # Each artifact is considered an origin (even "url" artifacts with mirror urls)
+        expected_nb_origins += 1
+        artifact_type = artifact["type"]
+        if artifact_type in [
+            "git",
+            "svn",
+            "hg",
+        ]:
+            expected_visit_types[artifact_type] += 1
+        elif artifact_type == "url":
+            url = artifact["urls"][0]
+            if url.endswith(".git"):
+                expected_visit_types["git"] += 1
+            elif url.endswith(".c") or url.endswith(".txt"):
+                expected_visit_types["content"] += 1
+            elif url.startswith("svn"):  # mistyped artifact rendered as vcs nonetheless
+                expected_visit_types["svn"] += 1
+            else:
+                expected_visit_types["directory"] += 1
+
+    assert set(expected_visit_types.keys()) == {
+        "content",
+        "git",
+        "svn",
+        "hg",
+        "directory",
+    }
+
+    listed_result = lister.run()
+
+    # 1 page read is 1 origin
+    nb_pages = expected_nb_origins
+    assert listed_result == ListerStats(pages=nb_pages, origins=expected_nb_origins)
+
+    scheduler_origins = lister.scheduler.get_listed_origins(
+        lister.lister_obj.id
+    ).results
+    assert len(scheduler_origins) == expected_nb_origins
+
+    mapping_visit_types = defaultdict(int)
+
+    for listed_origin in scheduler_origins:
+        assert listed_origin.visit_type in expected_visit_types
+        # no last update is listed on those manifests
+        assert listed_origin.last_update is None
+
+        mapping_visit_types[listed_origin.visit_type] += 1
+
+    assert dict(mapping_visit_types) == expected_visit_types
+
+
+def test_lister_nixguix_mostly_noop(datadir, swh_scheduler, requests_mock):
+    """NixGuixLister should ignore unsupported or incomplete origins"""
+    url = SOURCES["nixpkgs"]["manifest"]
+    origin_upstream = SOURCES["nixpkgs"]["repo"]
+    lister = NixGuixLister(swh_scheduler, url=url, origin_upstream=origin_upstream)
+
+    response = page_response(datadir, "failure")
+
+    requests_mock.get(
+        url,
+        [{"json": response}],
+    )
+    # Amongst artifacts, this url does not allow to determine its nature (tarball, file)
+    # It's ending up doing a http head query which ends up being 404, so it's skipped.
+    requests_mock.head(
+        "https://crates.io/api/v1/0.1.5/no-extension-and-head-404-so-skipped",
+        status_code=404,
+    )
+    # Invalid schema for that origin (and no extension), so skip origin
+    # from its name
+    requests_mock.head(
+        "ftp://ftp.ourproject.org/file-with-no-extension",
+        exc=InvalidSchema,
+    )
+    # Cannot communicate with an expired cert, so skip origin
+    requests_mock.head(
+        "https://code.9front.org/hg/plan9front",
+        exc=SSLError,
+    )
+    # Cannot connect to the site, so skip origin
+    requests_mock.head(
+        "https://git-tails.immerda.ch/onioncircuits",
+        exc=ConnectionError,
+    )
+
+    listed_result = lister.run()
+    # only the origin upstream is listed, every other entries are unsupported or incomplete
+    assert listed_result == ListerStats(pages=1, origins=1)
+
+    scheduler_origins = lister.scheduler.get_listed_origins(
+        lister.lister_obj.id
+    ).results
+    assert len(scheduler_origins) == 1
+
+    assert scheduler_origins[0].visit_type == "git"
+
+
+def test_lister_nixguix_fail(datadir, swh_scheduler, requests_mock):
+    url = SOURCES["nixpkgs"]["manifest"]
+    origin_upstream = SOURCES["nixpkgs"]["repo"]
+    lister = NixGuixLister(swh_scheduler, url=url, origin_upstream=origin_upstream)
+
+    requests_mock.get(
+        url,
+        status_code=404,
+    )
+
+    with pytest.raises(requests.HTTPError):  # listing cannot continues so stop
+        lister.run()
+
+    scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+    assert len(scheduler_origins) == 0
diff --git a/swh/lister/nixguix/tests/test_tasks.py b/swh/lister/nixguix/tests/test_tasks.py
new file mode 100644
index 0000000..8631046
--- /dev/null
+++ b/swh/lister/nixguix/tests/test_tasks.py
@@ -0,0 +1,27 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.lister.pattern import ListerStats
+
+NAMESPACE = "swh.lister.nixguix"
+
+
+def test_nixguix_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker):
+    # setup the mocked ArchLister
+    lister = mocker.patch(f"{NAMESPACE}.lister.NixGuixLister")
+    lister.from_configfile.return_value = lister
+    stats = ListerStats(pages=1, origins=42)
+    lister.run.return_value = stats
+
+    res = swh_scheduler_celery_app.send_task(
+        f"{NAMESPACE}.tasks.NixGuixListerTask",
+    )
+    assert res
+    res.wait()
+    assert res.successful()
+    assert res.result == stats.dict()
+
+    lister.from_configfile.assert_called_once_with()
+    lister.run.assert_called_once_with()
diff --git a/swh/lister/npm/lister.py b/swh/lister/npm/lister.py
index dfc6561..b940699 100644
--- a/swh/lister/npm/lister.py
+++ b/swh/lister/npm/lister.py
@@ -1,190 +1,170 @@
-# Copyright (C) 2018-2021 the Software Heritage developers
+# Copyright (C) 2018-2022 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from dataclasses import asdict, dataclass
 import logging
 from typing import Any, Dict, Iterator, List, Optional
 
 import iso8601
-import requests
-from tenacity.before_sleep import before_sleep_log
 
-from swh.lister import USER_AGENT
 from swh.lister.pattern import CredentialsType, Lister
-from swh.lister.utils import throttling_retry
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
 logger = logging.getLogger(__name__)
 
 
 @dataclass
 class NpmListerState:
     """State of npm lister"""
 
     last_seq: Optional[int] = None
 
 
 class NpmLister(Lister[NpmListerState, List[Dict[str, Any]]]):
     """
     List all packages hosted on the npm registry.
 
     The lister is based on the npm replication API powered by a
     CouchDB database (https://docs.couchdb.org/en/stable/api/database/).
 
     Args:
         scheduler: a scheduler instance
         page_size: number of packages info to return per page when querying npm API
         incremental: defines if incremental listing should be used, in that case
             only modified or new packages since last incremental listing operation
             will be returned, otherwise all packages will be listed in lexicographical
             order
 
     """
 
     LISTER_NAME = "npm"
     INSTANCE = "npm"
 
     API_BASE_URL = "https://replicate.npmjs.com"
     API_INCREMENTAL_LISTING_URL = f"{API_BASE_URL}/_changes"
     API_FULL_LISTING_URL = f"{API_BASE_URL}/_all_docs"
     PACKAGE_URL_TEMPLATE = "https://www.npmjs.com/package/{package_name}"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         page_size: int = 1000,
         incremental: bool = False,
         credentials: CredentialsType = None,
     ):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
             url=self.API_INCREMENTAL_LISTING_URL
             if incremental
             else self.API_FULL_LISTING_URL,
             instance=self.INSTANCE,
         )
 
         self.page_size = page_size
         if not incremental:
             # in full listing mode, first package in each page corresponds to the one
             # provided as the startkey query parameter value, so we increment the page
             # size by one to avoid double package processing
             self.page_size += 1
         self.incremental = incremental
 
-        self.session = requests.Session()
-        self.session.headers.update(
-            {"Accept": "application/json", "User-Agent": USER_AGENT}
-        )
+        self.session.headers.update({"Accept": "application/json"})
 
     def state_from_dict(self, d: Dict[str, Any]) -> NpmListerState:
         return NpmListerState(**d)
 
     def state_to_dict(self, state: NpmListerState) -> Dict[str, Any]:
         return asdict(state)
 
     def request_params(self, last_package_id: str) -> Dict[str, Any]:
         # include package JSON document to get its last update date
         params = {"limit": self.page_size, "include_docs": "true"}
         if self.incremental:
             params["since"] = last_package_id
         else:
             params["startkey"] = last_package_id
         return params
 
-    @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
-    def page_request(self, last_package_id: str) -> requests.Response:
-        params = self.request_params(last_package_id)
-        logger.debug("Fetching URL %s with params %s", self.url, params)
-        response = self.session.get(self.url, params=params)
-        if response.status_code != 200:
-            logger.warning(
-                "Unexpected HTTP status code %s on %s: %s",
-                response.status_code,
-                response.url,
-                response.content,
-            )
-        response.raise_for_status()
-        return response
-
     def get_pages(self) -> Iterator[List[Dict[str, Any]]]:
         last_package_id: str = "0" if self.incremental else '""'
         if (
             self.incremental
             and self.state is not None
             and self.state.last_seq is not None
         ):
             last_package_id = str(self.state.last_seq)
 
         while True:
 
-            response = self.page_request(last_package_id)
+            response = self.http_request(
+                self.url, params=self.request_params(last_package_id)
+            )
 
             data = response.json()
             page = data["results"] if self.incremental else data["rows"]
 
             if not page:
                 break
 
             if self.incremental or len(page) < self.page_size:
                 yield page
             else:
                 yield page[:-1]
 
             if len(page) < self.page_size:
                 break
 
             last_package_id = (
                 str(page[-1]["seq"]) if self.incremental else f'"{page[-1]["id"]}"'
             )
 
     def get_origins_from_page(
         self, page: List[Dict[str, Any]]
     ) -> Iterator[ListedOrigin]:
         """Convert a page of Npm repositories into a list of ListedOrigin."""
         assert self.lister_obj.id is not None
 
         for package in page:
             # no source code to archive here
             if not package["doc"].get("versions", {}):
                 continue
 
             package_name = package["doc"]["name"]
             package_latest_version = (
                 package["doc"].get("dist-tags", {}).get("latest", "")
             )
 
             last_update = None
             if package_latest_version in package["doc"].get("time", {}):
                 last_update = iso8601.parse_date(
                     package["doc"]["time"][package_latest_version]
                 )
 
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 url=self.PACKAGE_URL_TEMPLATE.format(package_name=package_name),
                 visit_type="npm",
                 last_update=last_update,
             )
 
     def commit_page(self, page: List[Dict[str, Any]]):
         """Update the currently stored state using the latest listed page."""
         if self.incremental:
             last_package = page[-1]
             last_seq = last_package["seq"]
 
             if self.state.last_seq is None or last_seq > self.state.last_seq:
                 self.state.last_seq = last_seq
 
     def finalize(self):
         if self.incremental and self.state.last_seq is not None:
             scheduler_state = self.get_state_from_scheduler()
 
             if (
                 scheduler_state.last_seq is None
                 or self.state.last_seq > scheduler_state.last_seq
             ):
                 self.updated = True
diff --git a/swh/lister/npm/tests/test_lister.py b/swh/lister/npm/tests/test_lister.py
index 1c20b33..7c4fa93 100644
--- a/swh/lister/npm/tests/test_lister.py
+++ b/swh/lister/npm/tests/test_lister.py
@@ -1,207 +1,220 @@
-# Copyright (C) 2018-2021 The Software Heritage developers
+# Copyright (C) 2018-2022 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from itertools import chain
 import json
 from pathlib import Path
 
 import iso8601
 import pytest
 from requests.exceptions import HTTPError
 
-from swh.lister import USER_AGENT
+from swh.lister import USER_AGENT_TEMPLATE
 from swh.lister.npm.lister import NpmLister, NpmListerState
 
 
 @pytest.fixture
 def npm_full_listing_page1(datadir):
     return json.loads(Path(datadir, "npm_full_page1.json").read_text())
 
 
 @pytest.fixture
 def npm_full_listing_page2(datadir):
     return json.loads(Path(datadir, "npm_full_page2.json").read_text())
 
 
 @pytest.fixture
 def npm_incremental_listing_page1(datadir):
     return json.loads(Path(datadir, "npm_incremental_page1.json").read_text())
 
 
 @pytest.fixture
 def npm_incremental_listing_page2(datadir):
     return json.loads(Path(datadir, "npm_incremental_page2.json").read_text())
 
 
+@pytest.fixture(autouse=True)
+def retry_sleep_mock(mocker):
+    mocker.patch.object(NpmLister.http_request.retry, "sleep")
+
+
 def _check_listed_npm_packages(lister, packages, scheduler_origins):
     for package in packages:
         package_name = package["doc"]["name"]
         latest_version = package["doc"]["dist-tags"]["latest"]
         package_last_update = iso8601.parse_date(package["doc"]["time"][latest_version])
         origin_url = lister.PACKAGE_URL_TEMPLATE.format(package_name=package_name)
 
         scheduler_origin = [o for o in scheduler_origins if o.url == origin_url]
         assert scheduler_origin
         assert scheduler_origin[0].last_update == package_last_update
 
 
 def _match_request(request):
-    return request.headers.get("User-Agent") == USER_AGENT
+    return (
+        request.headers.get("User-Agent") == USER_AGENT_TEMPLATE % NpmLister.LISTER_NAME
+    )
 
 
 def _url_params(page_size, **kwargs):
     params = {"limit": page_size, "include_docs": "true"}
     params.update(**kwargs)
     return params
 
 
 def test_npm_lister_full(
     swh_scheduler, requests_mock, mocker, npm_full_listing_page1, npm_full_listing_page2
 ):
     """Simulate a full listing of four npm packages in two pages"""
     page_size = 2
     lister = NpmLister(scheduler=swh_scheduler, page_size=page_size, incremental=False)
 
     requests_mock.get(
         lister.API_FULL_LISTING_URL,
         [
             {"json": npm_full_listing_page1},
             {"json": npm_full_listing_page2},
         ],
         additional_matcher=_match_request,
     )
 
-    spy_get = mocker.spy(lister.session, "get")
+    spy_request = mocker.spy(lister.session, "request")
 
     stats = lister.run()
     assert stats.pages == 2
     assert stats.origins == page_size * stats.pages
 
-    spy_get.assert_has_calls(
+    spy_request.assert_has_calls(
         [
             mocker.call(
+                "GET",
                 lister.API_FULL_LISTING_URL,
                 params=_url_params(page_size + 1, startkey='""'),
             ),
             mocker.call(
+                "GET",
                 lister.API_FULL_LISTING_URL,
                 params=_url_params(
                     page_size + 1,
                     startkey=f'"{npm_full_listing_page1["rows"][-1]["id"]}"',
                 ),
             ),
         ]
     )
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
 
     _check_listed_npm_packages(
         lister,
         chain(npm_full_listing_page1["rows"][:-1], npm_full_listing_page2["rows"]),
         scheduler_origins,
     )
 
     assert lister.get_state_from_scheduler() == NpmListerState()
 
 
 def test_npm_lister_incremental(
     swh_scheduler,
     requests_mock,
     mocker,
     npm_incremental_listing_page1,
     npm_incremental_listing_page2,
 ):
     """Simulate an incremental listing of four npm packages in two pages"""
     page_size = 2
     lister = NpmLister(scheduler=swh_scheduler, page_size=page_size, incremental=True)
 
     requests_mock.get(
         lister.API_INCREMENTAL_LISTING_URL,
         [
             {"json": npm_incremental_listing_page1},
             {"json": npm_incremental_listing_page2},
             {"json": {"results": []}},
         ],
         additional_matcher=_match_request,
     )
 
-    spy_get = mocker.spy(lister.session, "get")
+    spy_request = mocker.spy(lister.session, "request")
 
     assert lister.get_state_from_scheduler() == NpmListerState()
 
     stats = lister.run()
     assert stats.pages == 2
     assert stats.origins == page_size * stats.pages
 
     last_seq = npm_incremental_listing_page2["results"][-1]["seq"]
 
-    spy_get.assert_has_calls(
+    spy_request.assert_has_calls(
         [
             mocker.call(
+                "GET",
                 lister.API_INCREMENTAL_LISTING_URL,
                 params=_url_params(page_size, since="0"),
             ),
             mocker.call(
+                "GET",
                 lister.API_INCREMENTAL_LISTING_URL,
                 params=_url_params(
                     page_size,
                     since=str(npm_incremental_listing_page1["results"][-1]["seq"]),
                 ),
             ),
             mocker.call(
+                "GET",
                 lister.API_INCREMENTAL_LISTING_URL,
                 params=_url_params(page_size, since=str(last_seq)),
             ),
         ]
     )
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
 
     _check_listed_npm_packages(
         lister,
         chain(
             npm_incremental_listing_page1["results"],
             npm_incremental_listing_page2["results"],
         ),
         scheduler_origins,
     )
 
     assert lister.get_state_from_scheduler() == NpmListerState(last_seq=last_seq)
 
 
 def test_npm_lister_incremental_restart(
     swh_scheduler,
     requests_mock,
     mocker,
 ):
     """Check incremental npm listing will restart from saved state"""
     page_size = 2
     last_seq = 67
     lister = NpmLister(scheduler=swh_scheduler, page_size=page_size, incremental=True)
     lister.state = NpmListerState(last_seq=last_seq)
 
     requests_mock.get(lister.API_INCREMENTAL_LISTING_URL, json={"results": []})
 
-    spy_get = mocker.spy(lister.session, "get")
+    spy_request = mocker.spy(lister.session, "request")
 
     lister.run()
 
-    spy_get.assert_called_with(
+    spy_request.assert_called_with(
+        "GET",
         lister.API_INCREMENTAL_LISTING_URL,
         params=_url_params(page_size, since=str(last_seq)),
     )
 
 
 def test_npm_lister_http_error(
     swh_scheduler,
     requests_mock,
     mocker,
 ):
     lister = NpmLister(scheduler=swh_scheduler)
 
     requests_mock.get(lister.API_FULL_LISTING_URL, status_code=500)
 
     with pytest.raises(HTTPError):
         lister.run()
diff --git a/swh/lister/nuget/__init__.py b/swh/lister/nuget/__init__.py
new file mode 100644
index 0000000..73aaafa
--- /dev/null
+++ b/swh/lister/nuget/__init__.py
@@ -0,0 +1,79 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+"""
+NuGet lister
+============
+
+The NuGet lister discover origins from `nuget.org`_, NuGet is the package manager for .NET.
+As .NET packages mostly contains binaries, we keep only track of packages that have
+a Dvcs repository (GIT, SVN, Mercurial...) url usable as an origin.
+
+The `nuget.org/packages`_ list 301,206 packages as of September 2022.
+
+Origins retrieving strategy
+---------------------------
+
+Nuget.org provides an `http api`_ with several endpoint to discover and list packages
+and versions.
+
+The recommended way to retrieve all packages is to use the `catalog`_ api endpoint.
+It provides a first endpoint that list all available pages. We then iterate to get
+content of related pages.
+
+Page listing
+------------
+
+Each page returns a list of packages which is the data of the response request.
+
+Origins from page
+-----------------
+
+For each entry in a page listing we get related metadata through its `package metadata`_
+http api endpoint. It returns uri for linked archives that contains binary, not the
+original source code. Our strategy is then to get a related GIT repository.
+
+We use another endpoint for each package to get its `package manifest`_, a .nuspec file (xml
+ data) which may contains a GIT repository url. If we found one, it is used as origin.
+
+Running tests
+-------------
+
+Activate the virtualenv and run from within swh-lister directory::
+
+   pytest -s -vv --log-cli-level=DEBUG swh/lister/nuget/tests
+
+Testing with Docker
+-------------------
+
+Change directory to swh/docker then launch the docker environment::
+
+   docker compose up -d
+
+Then schedule a nuget listing task::
+
+   docker compose exec swh-scheduler swh scheduler task add -p oneshot list-nuget
+
+You can follow lister execution by displaying logs of swh-lister service::
+
+   docker compose logs -f swh-lister
+
+.. _nuget.org: https://nuget.org
+.. _nuget.org/packages: https://www.nuget.org/packages
+.. _http api: https://api.nuget.org/v3/index.json
+.. _catalog: https://learn.microsoft.com/en-us/nuget/api/catalog-resource
+.. _package metadata: https://learn.microsoft.com/en-us/nuget/api/registration-base-url-resource
+.. _package manifest: https://learn.microsoft.com/en-us/nuget/api/package-base-address-resource#download-package-manifest-nuspec  # noqa: B950
+"""
+
+
+def register():
+    from .lister import NugetLister
+
+    return {
+        "lister": NugetLister,
+        "task_modules": ["%s.tasks" % __name__],
+    }
diff --git a/swh/lister/nuget/lister.py b/swh/lister/nuget/lister.py
new file mode 100644
index 0000000..51652ec
--- /dev/null
+++ b/swh/lister/nuget/lister.py
@@ -0,0 +1,114 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import logging
+from typing import Dict, Iterator, List, Optional
+
+from bs4 import BeautifulSoup
+from requests.exceptions import HTTPError
+
+from swh.scheduler.interface import SchedulerInterface
+from swh.scheduler.model import ListedOrigin
+
+from ..pattern import CredentialsType, StatelessLister
+
+logger = logging.getLogger(__name__)
+
+# Aliasing the page results returned by `get_pages` method from the lister.
+NugetListerPage = List[Dict[str, str]]
+
+
+class NugetLister(StatelessLister[NugetListerPage]):
+    """List Nuget (Package manager for .NET) origins."""
+
+    LISTER_NAME = "nuget"
+    INSTANCE = "nuget"
+
+    API_INDEX_URL = "https://api.nuget.org/v3/catalog0/index.json"
+
+    def __init__(
+        self,
+        scheduler: SchedulerInterface,
+        credentials: Optional[CredentialsType] = None,
+    ):
+        super().__init__(
+            scheduler=scheduler,
+            credentials=credentials,
+            instance=self.INSTANCE,
+            url=self.API_INDEX_URL,
+        )
+
+    def get_pages(self) -> Iterator[NugetListerPage]:
+        """Yield an iterator which returns 'page'
+
+        It uses the following endpoint `https://api.nuget.org/v3/catalog0/index.json`
+        to get a list of pages endpoint to iterate.
+        """
+        index_response = self.http_request(url=self.url)
+        index = index_response.json()
+        assert "items" in index
+
+        for page in index["items"]:
+            assert page["@id"]
+            try:
+                page_response = self.http_request(url=page["@id"])
+                page_data = page_response.json()
+                assert "items" in page_data
+                yield page_data["items"]
+            except HTTPError:
+                logger.warning(
+                    "Failed to fetch page %s, skipping it from listing.",
+                    page["@id"],
+                )
+                continue
+
+    def get_origins_from_page(self, page: NugetListerPage) -> Iterator[ListedOrigin]:
+        """Iterate on all pages and yield ListedOrigin instances.
+        .NET packages are binary, dll, etc. We retrieve only packages for which we can
+        find a vcs repository.
+
+        To check if a vcs repository exists, we need for each entry in a page to retrieve
+        a .nuspec file, which is a package metadata xml file, and search for a `repository`
+        value.
+        """
+        assert self.lister_obj.id is not None
+
+        for elt in page:
+            try:
+                res = self.http_request(url=elt["@id"])
+            except HTTPError:
+                logger.warning(
+                    "Failed to fetch page %s, skipping it from listing.",
+                    elt["@id"],
+                )
+                continue
+
+            data = res.json()
+            pkgname = data["id"]
+            nuspec_url = (
+                f"https://api.nuget.org/v3-flatcontainer/{pkgname.lower()}/"
+                f"{data['version'].lower()}/{pkgname.lower()}.nuspec"
+            )
+            try:
+                res_metadata = self.http_request(url=nuspec_url)
+            except HTTPError:
+                logger.warning(
+                    "Failed to fetch nuspec file %s, skipping it from listing.",
+                    nuspec_url,
+                )
+                continue
+            xml = BeautifulSoup(res_metadata.content, "xml")
+            repo = xml.find("repository")
+            if repo and "url" in repo.attrs and "type" in repo.attrs:
+                vcs_url = repo.attrs["url"]
+                vcs_type = repo.attrs["type"]
+                yield ListedOrigin(
+                    lister_id=self.lister_obj.id,
+                    visit_type=vcs_type,
+                    url=vcs_url,
+                    last_update=None,
+                )
+            else:
+                continue
diff --git a/swh/lister/nuget/tasks.py b/swh/lister/nuget/tasks.py
new file mode 100644
index 0000000..d766cfb
--- /dev/null
+++ b/swh/lister/nuget/tasks.py
@@ -0,0 +1,19 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from celery import shared_task
+
+from swh.lister.nuget.lister import NugetLister
+
+
+@shared_task(name=__name__ + ".NugetListerTask")
+def list_nuget(**lister_args):
+    """Lister task for Nuget (Javascript package manager) registry"""
+    return NugetLister.from_configfile(**lister_args).run().dict()
+
+
+@shared_task(name=__name__ + ".ping")
+def _ping():
+    return "OK"
diff --git a/swh/lister/nuget/tests/__init__.py b/swh/lister/nuget/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3-flatcontainer_intersoft.crosslight.logging.entityframework_5.0.5000.1235-experimental_intersoft.crosslight.logging.entityframework.nuspec b/swh/lister/nuget/tests/data/https_api.nuget.org/v3-flatcontainer_intersoft.crosslight.logging.entityframework_5.0.5000.1235-experimental_intersoft.crosslight.logging.entityframework.nuspec
new file mode 100644
index 0000000..52d1360
--- /dev/null
+++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3-flatcontainer_intersoft.crosslight.logging.entityframework_5.0.5000.1235-experimental_intersoft.crosslight.logging.entityframework.nuspec
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="utf-8"?>
+<package xmlns="http://schemas.microsoft.com/packaging/2013/05/nuspec.xsd">
+  <metadata>
+    <id>Intersoft.Crosslight.Logging.EntityFramework</id>
+    <version>5.0.5000.1235-experimental</version>
+    <title>Intersoft Crosslight Logging EntityFramework</title>
+    <authors>Intersoft Solutions</authors>
+    <owners>Intersoft Solutions</owners>
+    <requireLicenseAcceptance>false</requireLicenseAcceptance>
+    <licenseUrl>http://www.intersoftsolutions.com/Products/Licensing</licenseUrl>
+    <projectUrl>http://www.intersoftsolutions.com/Crosslight</projectUrl>
+    <iconUrl>http://www.intersoftsolutions.com/assets/images/default/logo-crosslight-medium.png</iconUrl>
+    <description>Provides server logging functionality that use Entity Framework for the storage.</description>
+    <releaseNotes></releaseNotes>
+    <copyright>Copyright © 2016 Intersoft Solutions</copyright>
+    <tags>Intersoft Crosslight Logging Entity Framework EF PCL Portable Class Library Log</tags>
+    <dependencies>
+      <dependency id="Intersoft.Crosslight" version="5.0.5000.1235-experimental" />
+      <dependency id="Intersoft.Crosslight.Logging" version="5.0.5000.1235-experimental" />
+      <dependency id="EntityFramework" version="6.0.2" />
+    </dependencies>
+  </metadata>
+</package>
\ No newline at end of file
diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3-flatcontainer_sil.core.desktop_10.0.1-beta0012_sil.core.desktop.nuspec b/swh/lister/nuget/tests/data/https_api.nuget.org/v3-flatcontainer_sil.core.desktop_10.0.1-beta0012_sil.core.desktop.nuspec
new file mode 100644
index 0000000..e28954d
--- /dev/null
+++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3-flatcontainer_sil.core.desktop_10.0.1-beta0012_sil.core.desktop.nuspec
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="utf-8"?>
+<package xmlns="http://schemas.microsoft.com/packaging/2013/05/nuspec.xsd">
+  <metadata>
+    <id>SIL.Core.Desktop</id>
+    <version>10.0.1-beta0012</version>
+    <authors>SIL International</authors>
+    <license type="expression">MIT</license>
+    <licenseUrl>https://licenses.nuget.org/MIT</licenseUrl>
+    <projectUrl>https://github.com/sillsdev/libpalaso</projectUrl>
+    <description>SIL.Core.Desktop provides general UI related utilities for language software.</description>
+    <releaseNotes>Changes since version 10.0.0
+
+Added:
+- [SIL.Core] Added SIL.PlatformUtilities.Platform.IsFlatpak property.
+- [SIL.Core.Desktop] Added Testing channel to UpdateSettings.
+
+Fixed:
+- [SIL.Core] Fixed SIL.IO.PathUtilities.DeleteToRecycleBin and .GetDefaultFileManager to work in a flatpak environment.
+- [SIL.Windows.Forms] Fixed ImageToolbox.ImageGallery.ImageCollectionManager.FromStandardLocations to work in a flatpak environment.
+- [SIL.WritingSystems] Fixed SLDR initialization for users with European number formats.
+
+See full changelog at https://github.com/sillsdev/libpalaso/blob/master/CHANGELOG.md</releaseNotes>
+    <copyright>Copyright © 2010-2021 SIL International</copyright>
+    <repository type="git" url="https://github.com/sillsdev/libpalaso.git" branch="master" commit="cb8fefb631ffebfe5278fa4e13c58d99d0055dae" />
+    <dependencies>
+      <group targetFramework=".NETFramework4.6.1">
+        <dependency id="SIL.Core" version="10.0.1-beta0012" exclude="Build,Analyzers" />
+        <dependency id="NDesk.DBus" version="0.15.0" exclude="Build,Analyzers" />
+        <dependency id="System.Configuration.ConfigurationManager" version="6.0.0" exclude="Build,Analyzers" />
+        <dependency id="System.Drawing.Common" version="6.0.0" exclude="Build,Analyzers" />
+        <dependency id="System.IO.FileSystem.AccessControl" version="5.0.0" exclude="Build,Analyzers" />
+      </group>
+      <group targetFramework=".NETStandard2.0">
+        <dependency id="SIL.Core" version="10.0.1-beta0012" exclude="Build,Analyzers" />
+        <dependency id="System.Configuration.ConfigurationManager" version="6.0.0" exclude="Build,Analyzers" />
+        <dependency id="System.Drawing.Common" version="6.0.0" exclude="Build,Analyzers" />
+        <dependency id="System.IO.FileSystem.AccessControl" version="5.0.0" exclude="Build,Analyzers" />
+      </group>
+    </dependencies>
+    <frameworkAssemblies>
+      <frameworkAssembly assemblyName="System.Configuration" targetFramework=".NETFramework4.6.1" />
+      <frameworkAssembly assemblyName="System.Management" targetFramework=".NETFramework4.6.1" />
+      <frameworkAssembly assemblyName="System.Security" targetFramework=".NETFramework4.6.1" />
+    </frameworkAssemblies>
+  </metadata>
+</package>
\ No newline at end of file
diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.09.23.08.07.54_sil.core.desktop.10.0.1-beta0012.json b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.09.23.08.07.54_sil.core.desktop.10.0.1-beta0012.json
new file mode 100644
index 0000000..224dd28
--- /dev/null
+++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.09.23.08.07.54_sil.core.desktop.10.0.1-beta0012.json
@@ -0,0 +1,189 @@
+{
+  "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json",
+  "@type": [
+    "PackageDetails",
+    "catalog:Permalink"
+  ],
+  "authors": "SIL International",
+  "catalog:commitId": "f97b1cb0-9a7f-4bfd-a9ee-d61a3cd59c6d",
+  "catalog:commitTimeStamp": "2022-09-23T08:07:54.051884Z",
+  "copyright": "Copyright © 2010-2021 SIL International",
+  "created": "2022-08-25T21:03:44.337Z",
+  "description": "SIL.Core.Desktop provides general UI related utilities for language software.",
+  "frameworkAssemblyGroup": {
+    "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#frameworkassemblygroup/.netframework4.6.1",
+    "assembly": [
+      "System.Configuration",
+      "System.Management",
+      "System.Security"
+    ],
+    "targetFramework": ".NETFramework4.6.1"
+  },
+  "id": "SIL.Core.Desktop",
+  "isPrerelease": true,
+  "lastEdited": "2022-09-23T08:07:21.247Z",
+  "licenseExpression": "MIT",
+  "licenseUrl": "https://licenses.nuget.org/MIT",
+  "listed": false,
+  "packageHash": "VeWqaDhGa/Y2dPKmeujabcBtTV2itT0sicfQnt5ZMESHddQg1S0T3Rm+8QDsGx7H7hGVHM/lTEbblOJ3LwyBBQ==",
+  "packageHashAlgorithm": "SHA512",
+  "packageSize": 68053,
+  "projectUrl": "https://github.com/sillsdev/libpalaso",
+  "published": "1900-01-01T00:00:00Z",
+  "releaseNotes": "Changes since version 10.0.0\n\nAdded:\n- [SIL.Core] Added SIL.PlatformUtilities.Platform.IsFlatpak property.\n- [SIL.Core.Desktop] Added Testing channel to UpdateSettings.\n\nFixed:\n- [SIL.Core] Fixed SIL.IO.PathUtilities.DeleteToRecycleBin and .GetDefaultFileManager to work in a flatpak environment.\n- [SIL.Windows.Forms] Fixed ImageToolbox.ImageGallery.ImageCollectionManager.FromStandardLocations to work in a flatpak environment.\n- [SIL.WritingSystems] Fixed SLDR initialization for users with European number formats.\n\nSee full changelog at https://github.com/sillsdev/libpalaso/blob/master/CHANGELOG.md",
+  "repository": "",
+  "verbatimVersion": "10.0.1-beta0012",
+  "version": "10.0.1-beta0012",
+  "dependencyGroups": [
+    {
+      "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netframework4.6.1",
+      "@type": "PackageDependencyGroup",
+      "dependencies": [
+        {
+          "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netframework4.6.1/sil.core",
+          "@type": "PackageDependency",
+          "id": "SIL.Core",
+          "range": "[10.0.1-beta0012, )"
+        },
+        {
+          "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netframework4.6.1/ndesk.dbus",
+          "@type": "PackageDependency",
+          "id": "NDesk.DBus",
+          "range": "[0.15.0, )"
+        },
+        {
+          "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netframework4.6.1/system.configuration.configurationmanager",
+          "@type": "PackageDependency",
+          "id": "System.Configuration.ConfigurationManager",
+          "range": "[6.0.0, )"
+        },
+        {
+          "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netframework4.6.1/system.drawing.common",
+          "@type": "PackageDependency",
+          "id": "System.Drawing.Common",
+          "range": "[6.0.0, )"
+        },
+        {
+          "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netframework4.6.1/system.io.filesystem.accesscontrol",
+          "@type": "PackageDependency",
+          "id": "System.IO.FileSystem.AccessControl",
+          "range": "[5.0.0, )"
+        }
+      ],
+      "targetFramework": ".NETFramework4.6.1"
+    },
+    {
+      "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netstandard2.0",
+      "@type": "PackageDependencyGroup",
+      "dependencies": [
+        {
+          "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netstandard2.0/sil.core",
+          "@type": "PackageDependency",
+          "id": "SIL.Core",
+          "range": "[10.0.1-beta0012, )"
+        },
+        {
+          "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netstandard2.0/system.configuration.configurationmanager",
+          "@type": "PackageDependency",
+          "id": "System.Configuration.ConfigurationManager",
+          "range": "[6.0.0, )"
+        },
+        {
+          "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netstandard2.0/system.drawing.common",
+          "@type": "PackageDependency",
+          "id": "System.Drawing.Common",
+          "range": "[6.0.0, )"
+        },
+        {
+          "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netstandard2.0/system.io.filesystem.accesscontrol",
+          "@type": "PackageDependency",
+          "id": "System.IO.FileSystem.AccessControl",
+          "range": "[5.0.0, )"
+        }
+      ],
+      "targetFramework": ".NETStandard2.0"
+    }
+  ],
+  "packageEntries": [
+    {
+      "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#SIL.Core.Desktop.nuspec",
+      "@type": "PackageEntry",
+      "compressedLength": 984,
+      "fullName": "SIL.Core.Desktop.nuspec",
+      "length": 2835,
+      "name": "SIL.Core.Desktop.nuspec"
+    },
+    {
+      "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#lib/net461/SIL.Core.Desktop.dll",
+      "@type": "PackageEntry",
+      "compressedLength": 31890,
+      "fullName": "lib/net461/SIL.Core.Desktop.dll",
+      "length": 75776,
+      "name": "SIL.Core.Desktop.dll"
+    },
+    {
+      "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#lib/netstandard2.0/SIL.Core.Desktop.dll",
+      "@type": "PackageEntry",
+      "compressedLength": 23793,
+      "fullName": "lib/netstandard2.0/SIL.Core.Desktop.dll",
+      "length": 55296,
+      "name": "SIL.Core.Desktop.dll"
+    },
+    {
+      "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#.signature.p7s",
+      "@type": "PackageEntry",
+      "compressedLength": 9465,
+      "fullName": ".signature.p7s",
+      "length": 9465,
+      "name": ".signature.p7s"
+    }
+  ],
+  "@context": {
+    "@vocab": "http://schema.nuget.org/schema#",
+    "catalog": "http://schema.nuget.org/catalog#",
+    "xsd": "http://www.w3.org/2001/XMLSchema#",
+    "dependencies": {
+      "@id": "dependency",
+      "@container": "@set"
+    },
+    "dependencyGroups": {
+      "@id": "dependencyGroup",
+      "@container": "@set"
+    },
+    "packageEntries": {
+      "@id": "packageEntry",
+      "@container": "@set"
+    },
+    "packageTypes": {
+      "@id": "packageType",
+      "@container": "@set"
+    },
+    "supportedFrameworks": {
+      "@id": "supportedFramework",
+      "@container": "@set"
+    },
+    "tags": {
+      "@id": "tag",
+      "@container": "@set"
+    },
+    "vulnerabilities": {
+      "@id": "vulnerability",
+      "@container": "@set"
+    },
+    "published": {
+      "@type": "xsd:dateTime"
+    },
+    "created": {
+      "@type": "xsd:dateTime"
+    },
+    "lastEdited": {
+      "@type": "xsd:dateTime"
+    },
+    "catalog:commitTimeStamp": {
+      "@type": "xsd:dateTime"
+    },
+    "reasons": {
+      "@container": "@set"
+    }
+  }
+}
\ No newline at end of file
diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.09.23.09.10.26_intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.09.23.09.10.26_intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json
new file mode 100644
index 0000000..e35c9c3
--- /dev/null
+++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.09.23.09.10.26_intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json
@@ -0,0 +1,142 @@
+{
+  "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json",
+  "@type": [
+    "PackageDetails",
+    "catalog:Permalink"
+  ],
+  "authors": "Intersoft Solutions",
+  "catalog:commitId": "8b51bc91-722d-48fb-a4e4-e6167c9a459b",
+  "catalog:commitTimeStamp": "2022-09-23T09:10:26.5844749Z",
+  "copyright": "Copyright © 2016 Intersoft Solutions",
+  "created": "2022-09-23T09:08:08.377Z",
+  "description": "Provides server logging functionality that use Entity Framework for the storage.",
+  "iconUrl": "http://www.intersoftsolutions.com/assets/images/default/logo-crosslight-medium.png",
+  "id": "Intersoft.Crosslight.Logging.EntityFramework",
+  "isPrerelease": true,
+  "lastEdited": "2022-09-23T09:10:09.59Z",
+  "licenseUrl": "http://www.intersoftsolutions.com/Products/Licensing",
+  "listed": true,
+  "packageHash": "8la7uRv/KjXFBRiSgbvODemvNmjDCYbZ0/15WfNJCBz4gq3mFoKcuOuq1xwMUwi88DqHGI9Ov//6k7bocsaT1w==",
+  "packageHashAlgorithm": "SHA512",
+  "packageSize": 16908,
+  "projectUrl": "http://www.intersoftsolutions.com/Crosslight",
+  "published": "2022-09-23T09:08:08.377Z",
+  "releaseNotes": "",
+  "requireLicenseAcceptance": false,
+  "title": "Intersoft Crosslight Logging EntityFramework",
+  "verbatimVersion": "5.0.5000.1235-experimental",
+  "version": "5.0.5000.1235-experimental",
+  "dependencyGroups": [
+    {
+      "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#dependencygroup",
+      "@type": "PackageDependencyGroup",
+      "dependencies": [
+        {
+          "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#dependencygroup/intersoft.crosslight",
+          "@type": "PackageDependency",
+          "id": "Intersoft.Crosslight",
+          "range": "[5.0.5000.1235-experimental, )"
+        },
+        {
+          "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#dependencygroup/intersoft.crosslight.logging",
+          "@type": "PackageDependency",
+          "id": "Intersoft.Crosslight.Logging",
+          "range": "[5.0.5000.1235-experimental, )"
+        },
+        {
+          "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#dependencygroup/entityframework",
+          "@type": "PackageDependency",
+          "id": "EntityFramework",
+          "range": "[6.0.2, )"
+        }
+      ]
+    }
+  ],
+  "packageEntries": [
+    {
+      "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#Intersoft.Crosslight.Logging.EntityFramework.nuspec",
+      "@type": "PackageEntry",
+      "compressedLength": 541,
+      "fullName": "Intersoft.Crosslight.Logging.EntityFramework.nuspec",
+      "length": 1339,
+      "name": "Intersoft.Crosslight.Logging.EntityFramework.nuspec"
+    },
+    {
+      "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#lib/net45/Intersoft.Crosslight.Logging.EntityFramework.Server.dll",
+      "@type": "PackageEntry",
+      "compressedLength": 4952,
+      "fullName": "lib/net45/Intersoft.Crosslight.Logging.EntityFramework.Server.dll",
+      "length": 11264,
+      "name": "Intersoft.Crosslight.Logging.EntityFramework.Server.dll"
+    },
+    {
+      "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#.signature.p7s",
+      "@type": "PackageEntry",
+      "compressedLength": 9474,
+      "fullName": ".signature.p7s",
+      "length": 9474,
+      "name": ".signature.p7s"
+    }
+  ],
+  "tags": [
+    "Intersoft",
+    "Crosslight",
+    "Logging",
+    "Entity",
+    "Framework",
+    "EF",
+    "PCL",
+    "Portable",
+    "Class",
+    "Library",
+    "Log"
+  ],
+  "@context": {
+    "@vocab": "http://schema.nuget.org/schema#",
+    "catalog": "http://schema.nuget.org/catalog#",
+    "xsd": "http://www.w3.org/2001/XMLSchema#",
+    "dependencies": {
+      "@id": "dependency",
+      "@container": "@set"
+    },
+    "dependencyGroups": {
+      "@id": "dependencyGroup",
+      "@container": "@set"
+    },
+    "packageEntries": {
+      "@id": "packageEntry",
+      "@container": "@set"
+    },
+    "packageTypes": {
+      "@id": "packageType",
+      "@container": "@set"
+    },
+    "supportedFrameworks": {
+      "@id": "supportedFramework",
+      "@container": "@set"
+    },
+    "tags": {
+      "@id": "tag",
+      "@container": "@set"
+    },
+    "vulnerabilities": {
+      "@id": "vulnerability",
+      "@container": "@set"
+    },
+    "published": {
+      "@type": "xsd:dateTime"
+    },
+    "created": {
+      "@type": "xsd:dateTime"
+    },
+    "lastEdited": {
+      "@type": "xsd:dateTime"
+    },
+    "catalog:commitTimeStamp": {
+      "@type": "xsd:dateTime"
+    },
+    "reasons": {
+      "@container": "@set"
+    }
+  }
+}
\ No newline at end of file
diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_index.json b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_index.json
new file mode 100644
index 0000000..5c2a5a3
--- /dev/null
+++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_index.json
@@ -0,0 +1,53 @@
+{
+  "@id": "https://api.nuget.org/v3/catalog0/index.json",
+  "@type": [
+    "CatalogRoot",
+    "AppendOnlyCatalog",
+    "Permalink"
+  ],
+  "commitId": "94389591-14c5-4802-8cdf-0c4e46dbaaea",
+  "commitTimeStamp": "2022-09-23T08:18:58.8986871Z",
+  "count": 16959,
+  "nuget:lastCreated": "2022-09-23T08:18:53.69Z",
+  "nuget:lastDeleted": "2022-09-21T22:41:42.7622396Z",
+  "nuget:lastEdited": "2022-09-23T08:18:53.69Z",
+  "items": [
+    {
+      "@id": "https://api.nuget.org/v3/catalog0/page11702.json",
+      "@type": "CatalogPage",
+      "commitId": "6df640f0-8681-460e-adb3-8ea5de6f53cc",
+      "commitTimeStamp": "2021-01-11T08:39:31.3161021Z",
+      "count": 550
+    },
+    {
+      "@id": "https://api.nuget.org/v3/catalog0/page16958.json",
+      "@type": "CatalogPage",
+      "commitId": "94389591-14c5-4802-8cdf-0c4e46dbaaea",
+      "commitTimeStamp": "2022-09-23T08:18:58.8986871Z",
+      "count": 240
+    }
+  ],
+  "@context": {
+    "@vocab": "http://schema.nuget.org/catalog#",
+    "nuget": "http://schema.nuget.org/schema#",
+    "items": {
+      "@id": "item",
+      "@container": "@set"
+    },
+    "parent": {
+      "@type": "@id"
+    },
+    "commitTimeStamp": {
+      "@type": "http://www.w3.org/2001/XMLSchema#dateTime"
+    },
+    "nuget:lastCreated": {
+      "@type": "http://www.w3.org/2001/XMLSchema#dateTime"
+    },
+    "nuget:lastEdited": {
+      "@type": "http://www.w3.org/2001/XMLSchema#dateTime"
+    },
+    "nuget:lastDeleted": {
+      "@type": "http://www.w3.org/2001/XMLSchema#dateTime"
+    }
+  }
+}
diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page11702.json b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page11702.json
new file mode 100644
index 0000000..a5c30ad
--- /dev/null
+++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page11702.json
@@ -0,0 +1,52 @@
+{
+    "@id": "https://api.nuget.org/v3/catalog0/page11702.json",
+    "@type": "CatalogPage",
+    "commitId": "6df640f0-8681-460e-adb3-8ea5de6f53cc",
+    "commitTimeStamp": "2021-01-11T08:39:31.3161021Z",
+    "count": 550,
+    "parent": "https://api.nuget.org/v3/catalog0/index.json",
+    "items": [
+        {
+            "@id": "https://api.nuget.org/v3/catalog0/data/2021.01.11.07.51.35/hanatech.application.3.1.3.6-preview9.83.0.json",
+            "@type": "nuget:PackageDetails",
+            "commitId": "7d6ac844-6613-41dd-bbb3-998cec87f79d",
+            "commitTimeStamp": "2021-01-11T07:51:35.5330856Z",
+            "nuget:id": "HanaTech.Application",
+            "nuget:version": "3.1.3.6-preview9.83.0"
+        },
+        {
+          "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.20.14.22.12/glader.essentials.gameframework.2.3.117.json",
+          "@type": "nuget:PackageDetails",
+          "commitId": "2340f078-1087-47a6-969d-be0f1fac4249",
+          "commitTimeStamp": "2022-09-20T14:22:12.7481401Z",
+          "nuget:id": "Glader.Essentials.GameFramework",
+          "nuget:version": "2.3.117"
+        },
+        {
+            "@id": "https://api.nuget.org/v3/catalog0/data/2021.01.11.07.28.39/hanatech.framework.workflow.3.1.3.6-preview9.33.0.json",
+            "@type": "nuget:PackageDetails",
+            "commitId": "be968d19-2aee-434f-9013-9a0bcbe1e2c8",
+            "commitTimeStamp": "2021-01-11T07:28:39.9688859Z",
+            "nuget:id": "HanaTech.Framework.WorkFlow",
+            "nuget:version": "3.1.3.6-preview9.33.0"
+        },
+        {
+            "@id": "https://api.nuget.org/v3/catalog0/data/2021.01.11.07.28.39/hanatech.framework.workflow.3.1.3.6-preview9.74.0.json",
+            "@type": "nuget:PackageDetails",
+            "commitId": "be968d19-2aee-434f-9013-9a0bcbe1e2c8",
+            "commitTimeStamp": "2021-01-11T07:28:39.9688859Z",
+            "nuget:id": "HanaTech.Framework.WorkFlow",
+            "nuget:version": "3.1.3.6-preview9.74.0"
+        }
+    ],
+    "@context": {
+        "@vocab": "http://schema.nuget.org/catalog#",
+        "nuget": "http://schema.nuget.org/schema#",
+        "items": {"@id": "item", "@container": "@set"},
+        "parent": {"@type": "@id"},
+        "commitTimeStamp": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"},
+        "nuget:lastCreated": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"},
+        "nuget:lastEdited": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"},
+        "nuget:lastDeleted": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"}
+    }
+}
diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page16958.json b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page16958.json
new file mode 100644
index 0000000..eb6c0d5
--- /dev/null
+++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page16958.json
@@ -0,0 +1,44 @@
+{
+    "@id": "https://api.nuget.org/v3/catalog0/page16958.json",
+    "@type": "CatalogPage",
+    "commitId": "8b51bc91-722d-48fb-a4e4-e6167c9a459b",
+    "commitTimeStamp": "2022-09-23T09:10:26.5844749Z",
+    "count": 546,
+    "parent": "https://api.nuget.org/v3/catalog0/index.json",
+    "items": [
+        {
+            "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json",
+            "@type": "nuget:PackageDetails",
+            "commitId": "8b51bc91-722d-48fb-a4e4-e6167c9a459b",
+            "commitTimeStamp": "2022-09-23T09:10:26.5844749Z",
+            "nuget:id": "Intersoft.Crosslight.Logging.EntityFramework",
+            "nuget:version": "5.0.5000.1235-experimental"
+        },
+        {
+            "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.tests.10.0.1-beta0005.json",
+            "@type": "nuget:PackageDetails",
+            "commitId": "f97b1cb0-9a7f-4bfd-a9ee-d61a3cd59c6d",
+            "commitTimeStamp": "2022-09-23T08:07:54.051884Z",
+            "nuget:id": "SIL.Core.Tests",
+            "nuget:version": "10.0.1-beta0005"
+        },
+        {
+            "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json",
+            "@type": "nuget:PackageDetails",
+            "commitId": "f97b1cb0-9a7f-4bfd-a9ee-d61a3cd59c6d",
+            "commitTimeStamp": "2022-09-23T08:07:54.051884Z",
+            "nuget:id": "SIL.Core.Desktop",
+            "nuget:version": "10.0.1-beta0012"
+        }
+    ],
+    "@context": {
+        "@vocab": "http://schema.nuget.org/catalog#",
+        "nuget": "http://schema.nuget.org/schema#",
+        "items": {"@id": "item", "@container": "@set"},
+        "parent": {"@type": "@id"},
+        "commitTimeStamp": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"},
+        "nuget:lastCreated": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"},
+        "nuget:lastEdited": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"},
+        "nuget:lastDeleted": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"}
+    }
+}
diff --git a/swh/lister/nuget/tests/test_lister.py b/swh/lister/nuget/tests/test_lister.py
new file mode 100644
index 0000000..8c94c8e
--- /dev/null
+++ b/swh/lister/nuget/tests/test_lister.py
@@ -0,0 +1,34 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.lister.nuget.lister import NugetLister
+
+expected_origins = ["https://github.com/sillsdev/libpalaso.git"]
+
+
+def test_nuget_lister(datadir, requests_mock_datadir, swh_scheduler):
+    lister = NugetLister(scheduler=swh_scheduler)
+    res = lister.run()
+
+    assert res.pages == 2
+    assert res.origins == 1
+
+    scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
+    assert len(scheduler_origins) == len(expected_origins)
+
+    assert [
+        (
+            scheduled.visit_type,
+            scheduled.url,
+        )
+        for scheduled in sorted(scheduler_origins, key=lambda scheduled: scheduled.url)
+    ] == [
+        (
+            "git",
+            url,
+        )
+        for url in expected_origins
+    ]
diff --git a/swh/lister/nuget/tests/test_tasks.py b/swh/lister/nuget/tests/test_tasks.py
new file mode 100644
index 0000000..aa8fc4c
--- /dev/null
+++ b/swh/lister/nuget/tests/test_tasks.py
@@ -0,0 +1,31 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.lister.pattern import ListerStats
+
+
+def test_nuget_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
+    res = swh_scheduler_celery_app.send_task("swh.lister.nuget.tasks.ping")
+    assert res
+    res.wait()
+    assert res.successful()
+    assert res.result == "OK"
+
+
+def test_nuget_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker):
+    # setup the mocked NugetLister
+    lister = mocker.patch("swh.lister.nuget.tasks.NugetLister")
+    lister.from_configfile.return_value = lister
+    stats = ListerStats(pages=42, origins=42)
+    lister.run.return_value = stats
+
+    res = swh_scheduler_celery_app.send_task("swh.lister.nuget.tasks.NugetListerTask")
+    assert res
+    res.wait()
+    assert res.successful()
+    assert res.result == stats.dict()
+
+    lister.from_configfile.assert_called_once_with()
+    lister.run.assert_called_once_with()
diff --git a/swh/lister/packagist/lister.py b/swh/lister/packagist/lister.py
index 19b4721..251c25a 100644
--- a/swh/lister/packagist/lister.py
+++ b/swh/lister/packagist/lister.py
@@ -1,184 +1,179 @@
-# Copyright (C) 2019-2021  The Software Heritage developers
+# Copyright (C) 2019-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from dataclasses import dataclass
 from datetime import datetime, timezone
 import logging
 from typing import Any, Dict, Iterator, List, Optional
 
 import iso8601
 import requests
 
+from swh.core.github.utils import GitHubSession
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
-from .. import USER_AGENT
 from ..pattern import CredentialsType, Lister
 
 logger = logging.getLogger(__name__)
 
 PackagistPageType = List[str]
 
 
 @dataclass
 class PackagistListerState:
     """State of Packagist lister"""
 
     last_listing_date: Optional[datetime] = None
     """Last date when packagist lister was executed"""
 
 
 class PackagistLister(Lister[PackagistListerState, PackagistPageType]):
     """
     List all Packagist projects and send associated origins to scheduler.
 
     The lister queries the Packagist API, whose documentation can be found at
     https://packagist.org/apidoc.
 
     For each package, its metadata are retrieved using Packagist API endpoints
     whose responses are served from static files, which are guaranteed to be
     efficient on the Packagist side (no dymamic queries).
     Furthermore, subsequent listing will send the "If-Modified-Since" HTTP
     header to only retrieve packages metadata updated since the previous listing
     operation in order to save bandwidth and return only origins which might have
     new released versions.
     """
 
     LISTER_NAME = "Packagist"
     PACKAGIST_PACKAGES_LIST_URL = "https://packagist.org/packages/list.json"
     PACKAGIST_REPO_BASE_URL = "https://repo.packagist.org/p"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         credentials: CredentialsType = None,
     ):
         super().__init__(
             scheduler=scheduler,
             url=self.PACKAGIST_PACKAGES_LIST_URL,
             instance="packagist",
             credentials=credentials,
         )
 
-        self.session = requests.Session()
-        self.session.headers.update(
-            {"Accept": "application/json", "User-Agent": USER_AGENT}
-        )
+        self.session.headers.update({"Accept": "application/json"})
         self.listing_date = datetime.now().astimezone(tz=timezone.utc)
+        self.github_session = GitHubSession(
+            credentials=self.credentials,
+            user_agent=str(self.session.headers["User-Agent"]),
+        )
 
     def state_from_dict(self, d: Dict[str, Any]) -> PackagistListerState:
         last_listing_date = d.get("last_listing_date")
         if last_listing_date is not None:
             d["last_listing_date"] = iso8601.parse_date(last_listing_date)
         return PackagistListerState(**d)
 
     def state_to_dict(self, state: PackagistListerState) -> Dict[str, Any]:
         d: Dict[str, Optional[str]] = {"last_listing_date": None}
         last_listing_date = state.last_listing_date
         if last_listing_date is not None:
             d["last_listing_date"] = last_listing_date.isoformat()
         return d
 
     def api_request(self, url: str) -> Any:
-        logger.debug("Fetching URL %s", url)
-
-        response = self.session.get(url)
-
-        if response.status_code not in (200, 304):
-            logger.warning(
-                "Unexpected HTTP status code %s on %s: %s",
-                response.status_code,
-                response.url,
-                response.content,
-            )
-
-        response.raise_for_status()
-
+        response = self.http_request(url)
         # response is empty when status code is 304
         return response.json() if response.status_code == 200 else {}
 
     def get_pages(self) -> Iterator[PackagistPageType]:
         """
         Yield a single page listing all Packagist projects.
         """
         yield self.api_request(self.PACKAGIST_PACKAGES_LIST_URL)["packageNames"]
 
     def get_origins_from_page(self, page: PackagistPageType) -> Iterator[ListedOrigin]:
         """
         Iterate on all Packagist projects and yield ListedOrigin instances.
         """
         assert self.lister_obj.id is not None
 
         # save some bandwidth by only getting packages metadata updated since
         # last listing
         if self.state.last_listing_date is not None:
             if_modified_since = self.state.last_listing_date.strftime(
                 "%a, %d %b %Y %H:%M:%S GMT"
             )
             self.session.headers["If-Modified-Since"] = if_modified_since
 
         # to ensure origins will not be listed multiple times
         origin_urls = set()
 
         for package_name in page:
             try:
                 metadata = self.api_request(
                     f"{self.PACKAGIST_REPO_BASE_URL}/{package_name}.json"
                 )
                 if not metadata.get("packages", {}):
                     # package metadata not updated since last listing
                     continue
                 if package_name not in metadata["packages"]:
                     # missing package metadata in response
                     continue
                 versions_info = metadata["packages"][package_name].values()
-            except requests.exceptions.HTTPError:
+            except requests.HTTPError:
                 # error when getting package metadata (usually 404 when a
                 # package has been removed), skip it and process next package
                 continue
 
             origin_url = None
             visit_type = None
             last_update = None
 
             # extract origin url for package, vcs type and latest release date
             for version_info in versions_info:
                 origin_url = version_info.get("source", {}).get("url", "")
                 if not origin_url:
                     continue
                 # can be git, hg or svn
                 visit_type = version_info.get("source", {}).get("type", "")
                 dist_time_str = version_info.get("time", "")
                 if not dist_time_str:
                     continue
                 dist_time = iso8601.parse_date(dist_time_str)
                 if last_update is None or dist_time > last_update:
                     last_update = dist_time
 
             # skip package with already seen origin url or with missing required info
             if visit_type is None or origin_url is None or origin_url in origin_urls:
                 continue
 
+            if visit_type == "git":
+                # Non-github urls will be returned as is, github ones will be canonical
+                # ones
+                origin_url = (
+                    self.github_session.get_canonical_url(origin_url) or origin_url
+                )
+
             # bitbucket closed its mercurial hosting service, those origins can not be
             # loaded into the archive anymore
             if visit_type == "hg" and origin_url.startswith("https://bitbucket.org/"):
                 continue
 
             origin_urls.add(origin_url)
 
             logger.debug(
                 "Found package %s last updated on %s", package_name, last_update
             )
 
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 url=origin_url,
                 visit_type=visit_type,
                 last_update=last_update,
             )
 
     def finalize(self) -> None:
         self.state.last_listing_date = self.listing_date
         self.updated = True
diff --git a/swh/lister/packagist/tests/data/https_api.github.com/repos_gitlky_wx_article b/swh/lister/packagist/tests/data/https_api.github.com/repos_gitlky_wx_article
new file mode 100644
index 0000000..3892be4
--- /dev/null
+++ b/swh/lister/packagist/tests/data/https_api.github.com/repos_gitlky_wx_article
@@ -0,0 +1 @@
+{"html_url": "https://github.com/gitlky/wx_article"}
diff --git a/swh/lister/packagist/tests/data/https_api.github.com/repos_spryker-eco_computop-api b/swh/lister/packagist/tests/data/https_api.github.com/repos_spryker-eco_computop-api
new file mode 100644
index 0000000..0e84d32
--- /dev/null
+++ b/swh/lister/packagist/tests/data/https_api.github.com/repos_spryker-eco_computop-api
@@ -0,0 +1,130 @@
+{
+  "id": 133818271,
+  "node_id": "MDEwOlJlcG9zaXRvcnkxMzM4MTgyNzE=",
+  "name": "computop-api",
+  "full_name": "spryker-eco/computop-api",
+  "private": false,
+  "owner": {
+    "login": "spryker-eco",
+    "id": 25103059,
+    "node_id": "MDEyOk9yZ2FuaXphdGlvbjI1MTAzMDU5",
+    "avatar_url": "https://avatars.githubusercontent.com/u/25103059?v=4",
+    "gravatar_id": "",
+    "url": "https://api.github.com/users/spryker-eco",
+    "html_url": "https://github.com/spryker-eco",
+    "followers_url": "https://api.github.com/users/spryker-eco/followers",
+    "following_url": "https://api.github.com/users/spryker-eco/following{/other_user}",
+    "gists_url": "https://api.github.com/users/spryker-eco/gists{/gist_id}",
+    "starred_url": "https://api.github.com/users/spryker-eco/starred{/owner}{/repo}",
+    "subscriptions_url": "https://api.github.com/users/spryker-eco/subscriptions",
+    "organizations_url": "https://api.github.com/users/spryker-eco/orgs",
+    "repos_url": "https://api.github.com/users/spryker-eco/repos",
+    "events_url": "https://api.github.com/users/spryker-eco/events{/privacy}",
+    "received_events_url": "https://api.github.com/users/spryker-eco/received_events",
+    "type": "Organization",
+    "site_admin": false
+  },
+  "html_url": "https://github.com/spryker-eco/computop-api",
+  "description": "Spryker Eco ComputopApi module",
+  "fork": false,
+  "url": "https://api.github.com/repos/spryker-eco/computop-api",
+  "forks_url": "https://api.github.com/repos/spryker-eco/computop-api/forks",
+  "keys_url": "https://api.github.com/repos/spryker-eco/computop-api/keys{/key_id}",
+  "collaborators_url": "https://api.github.com/repos/spryker-eco/computop-api/collaborators{/collaborator}",
+  "teams_url": "https://api.github.com/repos/spryker-eco/computop-api/teams",
+  "hooks_url": "https://api.github.com/repos/spryker-eco/computop-api/hooks",
+  "issue_events_url": "https://api.github.com/repos/spryker-eco/computop-api/issues/events{/number}",
+  "events_url": "https://api.github.com/repos/spryker-eco/computop-api/events",
+  "assignees_url": "https://api.github.com/repos/spryker-eco/computop-api/assignees{/user}",
+  "branches_url": "https://api.github.com/repos/spryker-eco/computop-api/branches{/branch}",
+  "tags_url": "https://api.github.com/repos/spryker-eco/computop-api/tags",
+  "blobs_url": "https://api.github.com/repos/spryker-eco/computop-api/git/blobs{/sha}",
+  "git_tags_url": "https://api.github.com/repos/spryker-eco/computop-api/git/tags{/sha}",
+  "git_refs_url": "https://api.github.com/repos/spryker-eco/computop-api/git/refs{/sha}",
+  "trees_url": "https://api.github.com/repos/spryker-eco/computop-api/git/trees{/sha}",
+  "statuses_url": "https://api.github.com/repos/spryker-eco/computop-api/statuses/{sha}",
+  "languages_url": "https://api.github.com/repos/spryker-eco/computop-api/languages",
+  "stargazers_url": "https://api.github.com/repos/spryker-eco/computop-api/stargazers",
+  "contributors_url": "https://api.github.com/repos/spryker-eco/computop-api/contributors",
+  "subscribers_url": "https://api.github.com/repos/spryker-eco/computop-api/subscribers",
+  "subscription_url": "https://api.github.com/repos/spryker-eco/computop-api/subscription",
+  "commits_url": "https://api.github.com/repos/spryker-eco/computop-api/commits{/sha}",
+  "git_commits_url": "https://api.github.com/repos/spryker-eco/computop-api/git/commits{/sha}",
+  "comments_url": "https://api.github.com/repos/spryker-eco/computop-api/comments{/number}",
+  "issue_comment_url": "https://api.github.com/repos/spryker-eco/computop-api/issues/comments{/number}",
+  "contents_url": "https://api.github.com/repos/spryker-eco/computop-api/contents/{+path}",
+  "compare_url": "https://api.github.com/repos/spryker-eco/computop-api/compare/{base}...{head}",
+  "merges_url": "https://api.github.com/repos/spryker-eco/computop-api/merges",
+  "archive_url": "https://api.github.com/repos/spryker-eco/computop-api/{archive_format}{/ref}",
+  "downloads_url": "https://api.github.com/repos/spryker-eco/computop-api/downloads",
+  "issues_url": "https://api.github.com/repos/spryker-eco/computop-api/issues{/number}",
+  "pulls_url": "https://api.github.com/repos/spryker-eco/computop-api/pulls{/number}",
+  "milestones_url": "https://api.github.com/repos/spryker-eco/computop-api/milestones{/number}",
+  "notifications_url": "https://api.github.com/repos/spryker-eco/computop-api/notifications{?since,all,participating}",
+  "labels_url": "https://api.github.com/repos/spryker-eco/computop-api/labels{/name}",
+  "releases_url": "https://api.github.com/repos/spryker-eco/computop-api/releases{/id}",
+  "deployments_url": "https://api.github.com/repos/spryker-eco/computop-api/deployments",
+  "created_at": "2018-05-17T13:34:07Z",
+  "updated_at": "2021-12-28T13:55:55Z",
+  "pushed_at": "2022-03-18T14:05:09Z",
+  "git_url": "git://github.com/spryker-eco/computop-api.git",
+  "ssh_url": "git@github.com:spryker-eco/computop-api.git",
+  "clone_url": "https://github.com/spryker-eco/computop-api.git",
+  "svn_url": "https://github.com/spryker-eco/computop-api",
+  "homepage": "https://spryker.com",
+  "size": 198,
+  "stargazers_count": 0,
+  "watchers_count": 0,
+  "language": "PHP",
+  "has_issues": true,
+  "has_projects": false,
+  "has_downloads": true,
+  "has_wiki": true,
+  "has_pages": false,
+  "forks_count": 0,
+  "mirror_url": null,
+  "archived": false,
+  "disabled": false,
+  "open_issues_count": 0,
+  "license": {
+    "key": "mit",
+    "name": "MIT License",
+    "spdx_id": "MIT",
+    "url": "https://api.github.com/licenses/mit",
+    "node_id": "MDc6TGljZW5zZTEz"
+  },
+  "allow_forking": true,
+  "is_template": false,
+  "web_commit_signoff_required": false,
+  "topics": [
+
+  ],
+  "visibility": "public",
+  "forks": 0,
+  "open_issues": 0,
+  "watchers": 0,
+  "default_branch": "master",
+  "temp_clone_token": null,
+  "organization": {
+    "login": "spryker-eco",
+    "id": 25103059,
+    "node_id": "MDEyOk9yZ2FuaXphdGlvbjI1MTAzMDU5",
+    "avatar_url": "https://avatars.githubusercontent.com/u/25103059?v=4",
+    "gravatar_id": "",
+    "url": "https://api.github.com/users/spryker-eco",
+    "html_url": "https://github.com/spryker-eco",
+    "followers_url": "https://api.github.com/users/spryker-eco/followers",
+    "following_url": "https://api.github.com/users/spryker-eco/following{/other_user}",
+    "gists_url": "https://api.github.com/users/spryker-eco/gists{/gist_id}",
+    "starred_url": "https://api.github.com/users/spryker-eco/starred{/owner}{/repo}",
+    "subscriptions_url": "https://api.github.com/users/spryker-eco/subscriptions",
+    "organizations_url": "https://api.github.com/users/spryker-eco/orgs",
+    "repos_url": "https://api.github.com/users/spryker-eco/repos",
+    "events_url": "https://api.github.com/users/spryker-eco/events{/privacy}",
+    "received_events_url": "https://api.github.com/users/spryker-eco/received_events",
+    "type": "Organization",
+    "site_admin": false
+  },
+  "network_count": 0,
+  "subscribers_count": 33
+}
diff --git a/swh/lister/packagist/tests/data/https_api.github.com/repos_ycms_module-main b/swh/lister/packagist/tests/data/https_api.github.com/repos_ycms_module-main
new file mode 100644
index 0000000..e1b4664
--- /dev/null
+++ b/swh/lister/packagist/tests/data/https_api.github.com/repos_ycms_module-main
@@ -0,0 +1,104 @@
+{
+  "id": 38592537,
+  "node_id": "MDEwOlJlcG9zaXRvcnkzODU5MjUzNw==",
+  "name": "module-main",
+  "full_name": "GameCHN/module-main",
+  "private": false,
+  "owner": {
+    "login": "GameCHN",
+    "id": 13175811,
+    "node_id": "MDQ6VXNlcjEzMTc1ODEx",
+    "avatar_url": "https://avatars.githubusercontent.com/u/13175811?v=4",
+    "gravatar_id": "",
+    "url": "https://api.github.com/users/GameCHN",
+    "html_url": "https://github.com/GameCHN",
+    "followers_url": "https://api.github.com/users/GameCHN/followers",
+    "following_url": "https://api.github.com/users/GameCHN/following{/other_user}",
+    "gists_url": "https://api.github.com/users/GameCHN/gists{/gist_id}",
+    "starred_url": "https://api.github.com/users/GameCHN/starred{/owner}{/repo}",
+    "subscriptions_url": "https://api.github.com/users/GameCHN/subscriptions",
+    "organizations_url": "https://api.github.com/users/GameCHN/orgs",
+    "repos_url": "https://api.github.com/users/GameCHN/repos",
+    "events_url": "https://api.github.com/users/GameCHN/events{/privacy}",
+    "received_events_url": "https://api.github.com/users/GameCHN/received_events",
+    "type": "User",
+    "site_admin": false
+  },
+  "html_url": "https://github.com/GameCHN/module-main",
+  "description": null,
+  "fork": false,
+  "url": "https://api.github.com/repos/GameCHN/module-main",
+  "forks_url": "https://api.github.com/repos/GameCHN/module-main/forks",
+  "keys_url": "https://api.github.com/repos/GameCHN/module-main/keys{/key_id}",
+  "collaborators_url": "https://api.github.com/repos/GameCHN/module-main/collaborators{/collaborator}",
+  "teams_url": "https://api.github.com/repos/GameCHN/module-main/teams",
+  "hooks_url": "https://api.github.com/repos/GameCHN/module-main/hooks",
+  "issue_events_url": "https://api.github.com/repos/GameCHN/module-main/issues/events{/number}",
+  "events_url": "https://api.github.com/repos/GameCHN/module-main/events",
+  "assignees_url": "https://api.github.com/repos/GameCHN/module-main/assignees{/user}",
+  "branches_url": "https://api.github.com/repos/GameCHN/module-main/branches{/branch}",
+  "tags_url": "https://api.github.com/repos/GameCHN/module-main/tags",
+  "blobs_url": "https://api.github.com/repos/GameCHN/module-main/git/blobs{/sha}",
+  "git_tags_url": "https://api.github.com/repos/GameCHN/module-main/git/tags{/sha}",
+  "git_refs_url": "https://api.github.com/repos/GameCHN/module-main/git/refs{/sha}",
+  "trees_url": "https://api.github.com/repos/GameCHN/module-main/git/trees{/sha}",
+  "statuses_url": "https://api.github.com/repos/GameCHN/module-main/statuses/{sha}",
+  "languages_url": "https://api.github.com/repos/GameCHN/module-main/languages",
+  "stargazers_url": "https://api.github.com/repos/GameCHN/module-main/stargazers",
+  "contributors_url": "https://api.github.com/repos/GameCHN/module-main/contributors",
+  "subscribers_url": "https://api.github.com/repos/GameCHN/module-main/subscribers",
+  "subscription_url": "https://api.github.com/repos/GameCHN/module-main/subscription",
+  "commits_url": "https://api.github.com/repos/GameCHN/module-main/commits{/sha}",
+  "git_commits_url": "https://api.github.com/repos/GameCHN/module-main/git/commits{/sha}",
+  "comments_url": "https://api.github.com/repos/GameCHN/module-main/comments{/number}",
+  "issue_comment_url": "https://api.github.com/repos/GameCHN/module-main/issues/comments{/number}",
+  "contents_url": "https://api.github.com/repos/GameCHN/module-main/contents/{+path}",
+  "compare_url": "https://api.github.com/repos/GameCHN/module-main/compare/{base}...{head}",
+  "merges_url": "https://api.github.com/repos/GameCHN/module-main/merges",
+  "archive_url": "https://api.github.com/repos/GameCHN/module-main/{archive_format}{/ref}",
+  "downloads_url": "https://api.github.com/repos/GameCHN/module-main/downloads",
+  "issues_url": "https://api.github.com/repos/GameCHN/module-main/issues{/number}",
+  "pulls_url": "https://api.github.com/repos/GameCHN/module-main/pulls{/number}",
+  "milestones_url": "https://api.github.com/repos/GameCHN/module-main/milestones{/number}",
+  "notifications_url": "https://api.github.com/repos/GameCHN/module-main/notifications{?since,all,participating}",
+  "labels_url": "https://api.github.com/repos/GameCHN/module-main/labels{/name}",
+  "releases_url": "https://api.github.com/repos/GameCHN/module-main/releases{/id}",
+  "deployments_url": "https://api.github.com/repos/GameCHN/module-main/deployments",
+  "created_at": "2015-07-06T02:08:07Z",
+  "updated_at": "2015-07-06T02:09:45Z",
+  "pushed_at": "2015-08-23T04:42:42Z",
+  "git_url": "git://github.com/GameCHN/module-main.git",
+  "ssh_url": "git@github.com:GameCHN/module-main.git",
+  "clone_url": "https://github.com/GameCHN/module-main.git",
+  "svn_url": "https://github.com/GameCHN/module-main",
+  "homepage": null,
+  "size": 172,
+  "stargazers_count": 0,
+  "watchers_count": 0,
+  "language": "PHP",
+  "has_issues": true,
+  "has_projects": true,
+  "has_downloads": true,
+  "has_wiki": true,
+  "has_pages": false,
+  "forks_count": 0,
+  "mirror_url": null,
+  "archived": false,
+  "disabled": false,
+  "open_issues_count": 0,
+  "license": null,
+  "allow_forking": true,
+  "is_template": false,
+  "web_commit_signoff_required": false,
+  "topics": [
+
+  ],
+  "visibility": "public",
+  "forks": 0,
+  "open_issues": 0,
+  "watchers": 0,
+  "default_branch": "master",
+  "temp_clone_token": null,
+  "network_count": 0,
+  "subscribers_count": 2
+}
diff --git a/swh/lister/packagist/tests/data/idevlab_essential.json b/swh/lister/packagist/tests/data/idevlab_essential.json
new file mode 100644
index 0000000..9891d8c
--- /dev/null
+++ b/swh/lister/packagist/tests/data/idevlab_essential.json
@@ -0,0 +1,309 @@
+{
+  "packages": {
+    "idevlab/essential": {
+      "1.0.1": {
+        "name": "idevlab/essential",
+        "description": "All the methods and tools useful for the development of the various idevlab projects.",
+        "keywords": [],
+        "homepage": "",
+        "version": "1.0.1",
+        "version_normalized": "1.0.1.0",
+        "license": [],
+        "authors": [
+          {
+            "name": "Florian Sinama",
+            "email": "f.sinama@gmail.com"
+          }
+        ],
+        "source": {
+          "url": "git@gitlab.com:idevlab/Essential.git",
+          "type": "git",
+          "reference": "6ff62de2e789aae308f3ff6fb11ea5955c806e19"
+        },
+        "dist": {
+          "url": "https://gitlab.com/api/v4/projects/idevlab%2FEssential/repository/archive.zip?sha=6ff62de2e789aae308f3ff6fb11ea5955c806e19",
+          "type": "zip",
+          "shasum": "",
+          "reference": "6ff62de2e789aae308f3ff6fb11ea5955c806e19"
+        },
+        "type": "library",
+        "time": "2022-08-17T21:57:20+00:00",
+        "autoload": {
+          "psr-4": {
+            "Idevlab\\Essential\\": "src/"
+          }
+        },
+        "require": {
+          "php": "^8.1"
+        },
+        "uid": 6622550
+      },
+      "1.1.0": {
+        "name": "idevlab/essential",
+        "description": "All the methods and tools useful for the development of the various idevlab projects.",
+        "keywords": [],
+        "homepage": "",
+        "version": "1.1.0",
+        "version_normalized": "1.1.0.0",
+        "license": [],
+        "authors": [
+          {
+            "name": "Florian Sinama",
+            "email": "f.sinama@gmail.com"
+          }
+        ],
+        "source": {
+          "url": "git@gitlab.com:idevlab/Essential.git",
+          "type": "git",
+          "reference": "e69bdd42f03d7c453be072eef0c62f7cfeae2af8"
+        },
+        "dist": {
+          "url": "https://gitlab.com/api/v4/projects/idevlab%2FEssential/repository/archive.zip?sha=e69bdd42f03d7c453be072eef0c62f7cfeae2af8",
+          "type": "zip",
+          "shasum": "",
+          "reference": "e69bdd42f03d7c453be072eef0c62f7cfeae2af8"
+        },
+        "type": "library",
+        "time": "2022-08-17T22:11:20+00:00",
+        "autoload": {
+          "psr-4": {
+            "Idevlab\\Essential\\": "src/"
+          }
+        },
+        "require": {
+          "php": "^8.1",
+          "ext-mbstring": "*"
+        },
+        "uid": 6622551
+      },
+      "1.1.1": {
+        "name": "idevlab/essential",
+        "description": "All the methods and tools useful for the development of the various idevlab projects.",
+        "keywords": [],
+        "homepage": "",
+        "version": "1.1.1",
+        "version_normalized": "1.1.1.0",
+        "license": [],
+        "authors": [
+          {
+            "name": "Florian Sinama",
+            "email": "f.sinama@gmail.com"
+          }
+        ],
+        "source": {
+          "url": "git@gitlab.com:idevlab/Essential.git",
+          "type": "git",
+          "reference": "c6f5113ffad27402a9cafe593efd518517bc321c"
+        },
+        "dist": {
+          "url": "https://gitlab.com/api/v4/projects/idevlab%2FEssential/repository/archive.zip?sha=c6f5113ffad27402a9cafe593efd518517bc321c",
+          "type": "zip",
+          "shasum": "",
+          "reference": "c6f5113ffad27402a9cafe593efd518517bc321c"
+        },
+        "type": "library",
+        "time": "2022-08-17T22:31:14+00:00",
+        "autoload": {
+          "psr-4": {
+            "Idevlab\\Essential\\": "src/"
+          }
+        },
+        "require": {
+          "php": "^8.1",
+          "ext-mbstring": "*"
+        },
+        "uid": 6622552
+      },
+      "1.2.0": {
+        "name": "idevlab/essential",
+        "description": "All the methods and tools useful for the development of the various idevlab projects.",
+        "keywords": [],
+        "homepage": "",
+        "version": "1.2.0",
+        "version_normalized": "1.2.0.0",
+        "license": [],
+        "authors": [
+          {
+            "name": "Florian Sinama",
+            "email": "f.sinama@gmail.com"
+          }
+        ],
+        "source": {
+          "url": "git@gitlab.com:idevlab/Essential.git",
+          "type": "git",
+          "reference": "263fd95fcadfbfd4af6108749be0699d69d4df90"
+        },
+        "dist": {
+          "url": "https://gitlab.com/api/v4/projects/idevlab%2FEssential/repository/archive.zip?sha=263fd95fcadfbfd4af6108749be0699d69d4df90",
+          "type": "zip",
+          "shasum": "",
+          "reference": "263fd95fcadfbfd4af6108749be0699d69d4df90"
+        },
+        "type": "library",
+        "time": "2022-10-12T10:34:29+00:00",
+        "autoload": {
+          "psr-4": {
+            "Idevlab\\Essential\\": "src/"
+          }
+        },
+        "require": {
+          "php": "^8.1",
+          "ext-mbstring": "*"
+        },
+        "uid": 6624846
+      },
+      "dev-develop": {
+        "name": "idevlab/essential",
+        "description": "All the methods and tools useful for the development of the various idevlab projects.",
+        "keywords": [],
+        "homepage": "",
+        "version": "dev-develop",
+        "version_normalized": "dev-develop",
+        "license": [],
+        "authors": [
+          {
+            "name": "Florian Sinama",
+            "email": "f.sinama@gmail.com"
+          }
+        ],
+        "source": {
+          "url": "git@gitlab.com:idevlab/Essential.git",
+          "type": "git",
+          "reference": "8125bcc747e1bf5086a3195f74a682b7be0aea6a"
+        },
+        "dist": {
+          "url": "https://gitlab.com/api/v4/projects/idevlab%2FEssential/repository/archive.zip?sha=8125bcc747e1bf5086a3195f74a682b7be0aea6a",
+          "type": "zip",
+          "shasum": "",
+          "reference": "8125bcc747e1bf5086a3195f74a682b7be0aea6a"
+        },
+        "type": "library",
+        "time": "2022-10-12T10:34:29+00:00",
+        "autoload": {
+          "psr-4": {
+            "Idevlab\\Essential\\": "src/"
+          }
+        },
+        "require": {
+          "php": "^8.1",
+          "ext-mbstring": "*"
+        },
+        "uid": 6622554
+      },
+      "dev-main": {
+        "name": "idevlab/essential",
+        "description": "All the methods and tools useful for the development of the various idevlab projects.",
+        "keywords": [],
+        "homepage": "",
+        "version": "dev-main",
+        "version_normalized": "dev-main",
+        "license": [],
+        "authors": [
+          {
+            "name": "Florian Sinama",
+            "email": "f.sinama@gmail.com"
+          }
+        ],
+        "source": {
+          "url": "git@gitlab.com:idevlab/Essential.git",
+          "type": "git",
+          "reference": "263fd95fcadfbfd4af6108749be0699d69d4df90"
+        },
+        "dist": {
+          "url": "https://gitlab.com/api/v4/projects/idevlab%2FEssential/repository/archive.zip?sha=263fd95fcadfbfd4af6108749be0699d69d4df90",
+          "type": "zip",
+          "shasum": "",
+          "reference": "263fd95fcadfbfd4af6108749be0699d69d4df90"
+        },
+        "type": "library",
+        "time": "2022-10-12T10:34:29+00:00",
+        "autoload": {
+          "psr-4": {
+            "Idevlab\\Essential\\": "src/"
+          }
+        },
+        "default-branch": true,
+        "require": {
+          "php": "^8.1",
+          "ext-mbstring": "*"
+        },
+        "uid": 6622553
+      },
+      "dev-release/1.2.0": {
+        "name": "idevlab/essential",
+        "description": "All the methods and tools useful for the development of the various idevlab projects.",
+        "keywords": [],
+        "homepage": "",
+        "version": "dev-release/1.2.0",
+        "version_normalized": "dev-release/1.2.0",
+        "license": [],
+        "authors": [
+          {
+            "name": "Florian Sinama",
+            "email": "f.sinama@gmail.com"
+          }
+        ],
+        "source": {
+          "url": "git@gitlab.com:idevlab/Essential.git",
+          "type": "git",
+          "reference": "1688c6cb5f3b0e9f21a5bbcb9f9951138ab51a66"
+        },
+        "dist": {
+          "url": "https://gitlab.com/api/v4/projects/idevlab%2FEssential/repository/archive.zip?sha=1688c6cb5f3b0e9f21a5bbcb9f9951138ab51a66",
+          "type": "zip",
+          "shasum": "",
+          "reference": "1688c6cb5f3b0e9f21a5bbcb9f9951138ab51a66"
+        },
+        "type": "library",
+        "time": "2022-10-12T10:34:14+00:00",
+        "autoload": {
+          "psr-4": {
+            "Idevlab\\Essential\\": "src/"
+          }
+        },
+        "require": {
+          "php": "^8.1",
+          "ext-mbstring": "*"
+        },
+        "uid": 6624845
+      },
+      "v1.0.0": {
+        "name": "idevlab/essential",
+        "description": "All the methods and tools useful for the development of the various idevlab projects.",
+        "keywords": [],
+        "homepage": "",
+        "version": "v1.0.0",
+        "version_normalized": "1.0.0.0",
+        "license": [],
+        "authors": [
+          {
+            "name": "Florian Sinama",
+            "email": "f.sinama@gmail.com"
+          }
+        ],
+        "source": {
+          "url": "git@gitlab.com:idevlab/Essential.git",
+          "type": "git",
+          "reference": "b86767e1baf9ed9c218abcba963361876d55138a"
+        },
+        "dist": {
+          "url": "https://gitlab.com/api/v4/projects/idevlab%2FEssential/repository/archive.zip?sha=b86767e1baf9ed9c218abcba963361876d55138a",
+          "type": "zip",
+          "shasum": "",
+          "reference": "b86767e1baf9ed9c218abcba963361876d55138a"
+        },
+        "type": "library",
+        "time": "2022-08-17T21:34:05+00:00",
+        "autoload": {
+          "psr-4": {
+            "Idevlab\\Essential\\": "src/"
+          }
+        },
+        "require": {
+          "php": "^8.1"
+        },
+        "uid": 6622549
+      }
+    }
+  }
+}
diff --git a/swh/lister/packagist/tests/data/ycms_module-main.json b/swh/lister/packagist/tests/data/ycms_module-main.json
new file mode 100644
index 0000000..ae22e39
--- /dev/null
+++ b/swh/lister/packagist/tests/data/ycms_module-main.json
@@ -0,0 +1,41 @@
+{
+  "packages": {
+    "ycms/module-main": {
+      "dev-master": {
+        "name": "ycms/module-main",
+        "description": "",
+        "keywords": [],
+        "homepage": "",
+        "version": "dev-master",
+        "version_normalized": "9999999-dev",
+        "license": [],
+        "authors": [
+          {
+            "name": "YCMS Labs",
+            "email": "ycms.net@gmail.com"
+          }
+        ],
+        "source": {
+          "type": "git",
+          "url": "git@github.com:ycms/module-main.git",
+          "reference": "1173796881fbd7202009a68a2a59a5150bf2dbc6"
+        },
+        "dist": {
+          "type": "zip",
+          "url": "https://api.github.com/repos/ycms/module-main/zipball/1173796881fbd7202009a68a2a59a5150bf2dbc6",
+          "reference": "1173796881fbd7202009a68a2a59a5150bf2dbc6",
+          "shasum": ""
+        },
+        "type": "wordpress-plugin",
+        "time": "2015-08-23T04:42:33+00:00",
+        "autoload": {
+          "psr-4": {
+            "YC\\Main\\": ""
+          }
+        },
+        "default-branch": true,
+        "uid": 4064797
+      }
+    }
+  }
+}
diff --git a/swh/lister/packagist/tests/test_lister.py b/swh/lister/packagist/tests/test_lister.py
index 64b4439..e2782ee 100644
--- a/swh/lister/packagist/tests/test_lister.py
+++ b/swh/lister/packagist/tests/test_lister.py
@@ -1,159 +1,199 @@
-# Copyright (C) 2019-2021  The Software Heritage developers
+# Copyright (C) 2019-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+import datetime
 import json
 from pathlib import Path
 
-import iso8601
-
 from swh.lister.packagist.lister import PackagistLister
 
 _packages_list = {
     "packageNames": [
         "ljjackson/linnworks",
         "lky/wx_article",
         "spryker-eco/computop-api",
+        "idevlab/essential",
     ]
 }
 
 
 def _package_metadata(datadir, package_name):
     return json.loads(
         Path(datadir, f"{package_name.replace('/', '_')}.json").read_text()
     )
 
 
-def _package_origin_info(package_name, package_metadata):
-    origin_url = None
-    visit_type = None
-    last_update = None
-    for version_info in package_metadata["packages"][package_name].values():
-        origin_url = version_info["source"].get("url")
-        visit_type = version_info["source"].get("type")
-        if "time" in version_info:
-            version_date = iso8601.parse_date(version_info["time"])
-        if last_update is None or version_date > last_update:
-            last_update = version_date
-    return origin_url, visit_type, last_update
-
-
 def _request_without_if_modified_since(request):
     return request.headers.get("If-Modified-Since") is None
 
 
 def _request_with_if_modified_since(request):
     return request.headers.get("If-Modified-Since") is not None
 
 
-def test_packagist_lister(swh_scheduler, requests_mock, datadir):
+def test_packagist_lister(swh_scheduler, requests_mock, datadir, requests_mock_datadir):
     # first listing, should return one origin per package
     lister = PackagistLister(scheduler=swh_scheduler)
     requests_mock.get(lister.PACKAGIST_PACKAGES_LIST_URL, json=_packages_list)
     packages_metadata = {}
     for package_name in _packages_list["packageNames"]:
         metadata = _package_metadata(datadir, package_name)
         packages_metadata[package_name] = metadata
         requests_mock.get(
             f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json",
             json=metadata,
             additional_matcher=_request_without_if_modified_since,
         )
     stats = lister.run()
 
     assert stats.pages == 1
     assert stats.origins == len(_packages_list["packageNames"])
     assert lister.updated
 
-    scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+    expected_origins = {
+        (
+            "https://github.com/gitlky/wx_article",  # standard case
+            "git",
+            datetime.datetime.fromisoformat("2018-08-30T07:37:09+00:00"),
+        ),
+        (
+            "https://github.com/ljjackson/linnworks.git",  # API goes 404
+            "git",
+            datetime.datetime.fromisoformat("2018-11-01T21:45:50+00:00"),
+        ),
+        (
+            "https://github.com/spryker-eco/computop-api",  # SSH URL in manifest
+            "git",
+            datetime.datetime.fromisoformat("2020-06-22T15:50:29+00:00"),
+        ),
+        (
+            "git@gitlab.com:idevlab/Essential.git",  # not GitHub
+            "git",
+            datetime.datetime.fromisoformat("2022-10-12T10:34:29+00:00"),
+        ),
+    }
 
-    for package_name, package_metadata in packages_metadata.items():
-        origin_url, visit_type, last_update = _package_origin_info(
-            package_name, package_metadata
-        )
-        filtered_origins = [o for o in scheduler_origins if o.url == origin_url]
-        assert filtered_origins
-        assert filtered_origins[0].visit_type == visit_type
-        assert filtered_origins[0].last_update == last_update
+    assert expected_origins == {
+        (o.url, o.visit_type, o.last_update)
+        for o in swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+    }
 
     # second listing, should return 0 origins as no package metadata
     # has been updated since first listing
     lister = PackagistLister(scheduler=swh_scheduler)
     for package_name in _packages_list["packageNames"]:
         requests_mock.get(
             f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json",
             additional_matcher=_request_with_if_modified_since,
             status_code=304,
         )
 
     assert lister.get_state_from_scheduler().last_listing_date is not None
 
     stats = lister.run()
 
     assert stats.pages == 1
     assert stats.origins == 0
     assert lister.updated
 
+    assert expected_origins == {
+        (o.url, o.visit_type, o.last_update)
+        for o in swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+    }
+
 
 def test_packagist_lister_missing_metadata(swh_scheduler, requests_mock, datadir):
     lister = PackagistLister(scheduler=swh_scheduler)
     requests_mock.get(lister.PACKAGIST_PACKAGES_LIST_URL, json=_packages_list)
     for package_name in _packages_list["packageNames"]:
         requests_mock.get(
             f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json",
             additional_matcher=_request_without_if_modified_since,
             status_code=404,
         )
 
     stats = lister.run()
 
     assert stats.pages == 1
     assert stats.origins == 0
 
 
 def test_packagist_lister_empty_metadata(swh_scheduler, requests_mock, datadir):
     lister = PackagistLister(scheduler=swh_scheduler)
     requests_mock.get(lister.PACKAGIST_PACKAGES_LIST_URL, json=_packages_list)
     for package_name in _packages_list["packageNames"]:
         requests_mock.get(
             f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json",
             additional_matcher=_request_without_if_modified_since,
             json={"packages": {}},
         )
 
     stats = lister.run()
 
     assert stats.pages == 1
     assert stats.origins == 0
 
 
 def test_packagist_lister_package_with_bitbucket_hg_origin(
     swh_scheduler, requests_mock, datadir
 ):
     package_name = "den1n/contextmenu"
     lister = PackagistLister(scheduler=swh_scheduler)
     requests_mock.get(
         lister.PACKAGIST_PACKAGES_LIST_URL, json={"packageNames": [package_name]}
     )
     requests_mock.get(
         f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json",
         additional_matcher=_request_without_if_modified_since,
         json=_package_metadata(datadir, package_name),
     )
 
     stats = lister.run()
 
     assert stats.pages == 1
     assert stats.origins == 0
 
 
+def test_packagist_lister_package_normalize_github_origin(
+    swh_scheduler, requests_mock, datadir, requests_mock_datadir
+):
+    package_name = "ycms/module-main"
+    lister = PackagistLister(scheduler=swh_scheduler)
+    requests_mock.get(
+        lister.PACKAGIST_PACKAGES_LIST_URL, json={"packageNames": [package_name]}
+    )
+    requests_mock.get(
+        f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json",
+        additional_matcher=_request_without_if_modified_since,
+        json=_package_metadata(datadir, package_name),
+    )
+
+    stats = lister.run()
+
+    assert stats.pages == 1
+    assert stats.origins == 1
+
+    expected_origins = {
+        (
+            "https://github.com/GameCHN/module-main",
+            "git",
+            datetime.datetime.fromisoformat("2015-08-23T04:42:33+00:00"),
+        ),
+    }
+    assert expected_origins == {
+        (o.url, o.visit_type, o.last_update)
+        for o in swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+    }
+
+
 def test_lister_from_configfile(swh_scheduler_config, mocker):
     load_from_envvar = mocker.patch("swh.lister.pattern.load_from_envvar")
     load_from_envvar.return_value = {
         "scheduler": {"cls": "local", **swh_scheduler_config},
         "credentials": {},
     }
     lister = PackagistLister.from_configfile()
     assert lister.scheduler is not None
     assert lister.credentials is not None
diff --git a/swh/lister/pattern.py b/swh/lister/pattern.py
index 63a2fff..7492683 100644
--- a/swh/lister/pattern.py
+++ b/swh/lister/pattern.py
@@ -1,284 +1,321 @@
-# Copyright (C) 2020-2021  The Software Heritage developers
+# Copyright (C) 2020-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import Any, Dict, Generic, Iterable, Iterator, List, Optional, TypeVar
+import logging
+from typing import Any, Dict, Generic, Iterable, Iterator, List, Optional, Set, TypeVar
 from urllib.parse import urlparse
 
+import requests
+from tenacity.before_sleep import before_sleep_log
+
 from swh.core.config import load_from_envvar
 from swh.core.utils import grouper
 from swh.scheduler import get_scheduler, model
 from swh.scheduler.interface import SchedulerInterface
 
+from . import USER_AGENT_TEMPLATE
+from .utils import http_retry
+
+logger = logging.getLogger(__name__)
+
 
 @dataclass
 class ListerStats:
     pages: int = 0
     origins: int = 0
 
     def __add__(self, other: ListerStats) -> ListerStats:
         return self.__class__(self.pages + other.pages, self.origins + other.origins)
 
     def __iadd__(self, other: ListerStats):
         self.pages += other.pages
         self.origins += other.origins
 
     def dict(self) -> Dict[str, int]:
         return {"pages": self.pages, "origins": self.origins}
 
 
 StateType = TypeVar("StateType")
 PageType = TypeVar("PageType")
 
 BackendStateType = Dict[str, Any]
 CredentialsType = Optional[Dict[str, Dict[str, List[Dict[str, str]]]]]
 
 
 class Lister(Generic[StateType, PageType]):
     """The base class for a Software Heritage lister.
 
     A lister scrapes a page by page list of origins from an upstream (a forge, the API
     of a package manager, ...), and massages the results of that scrape into a list of
     origins that are recorded by the scheduler backend.
 
     The main loop of the lister, :meth:`run`, basically revolves around the
     :meth:`get_pages` iterator, which sets up the lister state, then yields the scrape
     results page by page. The :meth:`get_origins_from_page` method converts the pages
     into a list of :class:`model.ListedOrigin`, sent to the scheduler at every page. The
     :meth:`commit_page` method can be used to update the lister state after a page of
     origins has been recorded in the scheduler backend.
 
     The :func:`finalize` method is called at lister teardown (whether the run has
     been successful or not) to update the local :attr:`state` object before it's sent to
     the database. This method must set the :attr:`updated` attribute if an updated
     state needs to be sent to the scheduler backend. This method can call
     :func:`get_state_from_scheduler` to refresh and merge the lister state from the
     scheduler before it's finalized (and potentially minimize the risk of race
     conditions between concurrent runs of the lister).
 
     The state of the lister is serialized and deserialized from the dict stored in the
     scheduler backend, using the :meth:`state_from_dict` and :meth:`state_to_dict`
     methods.
 
     Args:
       scheduler: the instance of the Scheduler being used to register the
         origins listed by this lister
       url: a URL representing this lister, e.g. the API's base URL
       instance: the instance name, to uniquely identify this lister instance,
         if not provided the URL network location will be used
       credentials: dictionary of credentials for all listers. The first level
         identifies the :attr:`LISTER_NAME`, the second level the lister
         :attr:`instance`. The final level is a list of dicts containing the
         expected credentials for the given instance of that lister.
 
     Generic types:
       - *StateType*: concrete lister type; should usually be a :class:`dataclass` for
         stricter typing
       - *PageType*: type of scrape results; can usually be a :class:`requests.Response`,
         or a :class:`dict`
 
     """
 
     LISTER_NAME: str = ""
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         url: str,
         instance: Optional[str] = None,
         credentials: CredentialsType = None,
     ):
         if not self.LISTER_NAME:
             raise ValueError("Must set the LISTER_NAME attribute on Lister classes")
 
         self.url = url
         if instance is not None:
             self.instance = instance
         else:
             self.instance = urlparse(url).netloc
 
         self.scheduler = scheduler
 
         if not credentials:
             credentials = {}
         self.credentials = list(
             credentials.get(self.LISTER_NAME, {}).get(self.instance, [])
         )
 
         # store the initial state of the lister
         self.state = self.get_state_from_scheduler()
         self.updated = False
 
+        self.session = requests.Session()
+        # Declare the USER_AGENT is more sysadm-friendly for the forge we list
+        self.session.headers.update(
+            {"User-Agent": USER_AGENT_TEMPLATE % self.LISTER_NAME}
+        )
+
+        self.recorded_origins: Set[str] = set()
+
+    @http_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
+    def http_request(self, url: str, method="GET", **kwargs) -> requests.Response:
+
+        logger.debug("Fetching URL %s with params %s", url, kwargs.get("params"))
+
+        response = self.session.request(method, url, **kwargs)
+        if response.status_code not in (200, 304):
+            logger.warning(
+                "Unexpected HTTP status code %s on %s: %s",
+                response.status_code,
+                response.url,
+                response.content,
+            )
+        response.raise_for_status()
+
+        return response
+
     def run(self) -> ListerStats:
         """Run the lister.
 
         Returns:
           A counter with the number of pages and origins seen for this run
           of the lister.
 
         """
         full_stats = ListerStats()
+        self.recorded_origins = set()
 
         try:
             for page in self.get_pages():
                 full_stats.pages += 1
                 origins = self.get_origins_from_page(page)
-                full_stats.origins += self.send_origins(origins)
+                sent_origins = self.send_origins(origins)
+                self.recorded_origins.update(sent_origins)
+                full_stats.origins = len(self.recorded_origins)
                 self.commit_page(page)
         finally:
             self.finalize()
             if self.updated:
                 self.set_state_in_scheduler()
 
         return full_stats
 
     def get_state_from_scheduler(self) -> StateType:
         """Update the state in the current instance from the state in the scheduler backend.
 
         This updates :attr:`lister_obj`, and returns its (deserialized) current state,
         to allow for comparison with the local state.
 
         Returns:
           the state retrieved from the scheduler backend
         """
         self.lister_obj = self.scheduler.get_or_create_lister(
             name=self.LISTER_NAME, instance_name=self.instance
         )
         return self.state_from_dict(self.lister_obj.current_state)
 
     def set_state_in_scheduler(self) -> None:
         """Update the state in the scheduler backend from the state of the current
         instance.
 
         Raises:
           swh.scheduler.exc.StaleData: in case of a race condition between
             concurrent listers (from :meth:`swh.scheduler.Scheduler.update_lister`).
         """
         self.lister_obj.current_state = self.state_to_dict(self.state)
         self.lister_obj = self.scheduler.update_lister(self.lister_obj)
 
     # State management to/from the scheduler
 
     def state_from_dict(self, d: BackendStateType) -> StateType:
         """Convert the state stored in the scheduler backend (as a dict),
         to the concrete StateType for this lister."""
         raise NotImplementedError
 
     def state_to_dict(self, state: StateType) -> BackendStateType:
         """Convert the StateType for this lister to its serialization as dict for
         storage in the scheduler.
 
         Values must be JSON-compatible as that's what the backend database expects.
         """
         raise NotImplementedError
 
     def finalize(self) -> None:
         """Custom hook to finalize the lister state before returning from the main loop.
 
         This method must set :attr:`updated` if the lister has done some work.
 
         If relevant, this method can use :meth`get_state_from_scheduler` to merge the
         current lister state with the one from the scheduler backend, reducing the risk
         of race conditions if we're running concurrent listings.
 
         This method is called in a `finally` block, which means it will also run when
         the lister fails.
 
         """
         pass
 
     # Actual listing logic
 
     def get_pages(self) -> Iterator[PageType]:
         """Retrieve a list of pages of listed results. This is the main loop of the lister.
 
         Returns:
           an iterator of raw pages fetched from the platform currently being listed.
         """
         raise NotImplementedError
 
     def get_origins_from_page(self, page: PageType) -> Iterator[model.ListedOrigin]:
         """Extract a list of :class:`model.ListedOrigin` from a raw page of results.
 
         Args:
           page: a single page of results
         Returns:
           an iterator for the origins present on the given page of results
         """
         raise NotImplementedError
 
     def commit_page(self, page: PageType) -> None:
         """Custom hook called after the current page has been committed in the scheduler
         backend.
 
         This method can be used to update the state after a page of origins has been
         successfully recorded in the scheduler backend. If the new state should be
         recorded at the point the lister completes, the :attr:`updated` attribute must
         be set.
 
         """
         pass
 
-    def send_origins(self, origins: Iterable[model.ListedOrigin]) -> int:
+    def send_origins(self, origins: Iterable[model.ListedOrigin]) -> List[str]:
         """Record a list of :class:`model.ListedOrigin` in the scheduler.
 
         Returns:
-          the number of listed origins recorded in the scheduler
+          the list of origin URLs recorded in scheduler database
         """
-        count = 0
+        recorded_origins = []
         for batch_origins in grouper(origins, n=1000):
             ret = self.scheduler.record_listed_origins(batch_origins)
-            count += len(ret)
+            recorded_origins += [origin.url for origin in ret]
 
-        return count
+        return recorded_origins
 
     @classmethod
     def from_config(cls, scheduler: Dict[str, Any], **config: Any):
         """Instantiate a lister from a configuration dict.
 
         This is basically a backwards-compatibility shim for the CLI.
 
         Args:
           scheduler: instantiation config for the scheduler
           config: the configuration dict for the lister, with the following keys:
             - credentials (optional): credentials list for the scheduler
             - any other kwargs passed to the lister.
 
         Returns:
           the instantiated lister
         """
         # Drop the legacy config keys which aren't used for this generation of listers.
         for legacy_key in ("storage", "lister", "celery"):
             config.pop(legacy_key, None)
 
         # Instantiate the scheduler
         scheduler_instance = get_scheduler(**scheduler)
 
         return cls(scheduler=scheduler_instance, **config)
 
     @classmethod
     def from_configfile(cls, **kwargs: Any):
         """Instantiate a lister from the configuration loaded from the
         SWH_CONFIG_FILENAME envvar, with potential extra keyword arguments
         if their value is not None.
 
         Args:
             kwargs: kwargs passed to the lister instantiation
         """
         config = dict(load_from_envvar())
         config.update({k: v for k, v in kwargs.items() if v is not None})
         return cls.from_config(**config)
 
 
 class StatelessLister(Lister[None, PageType], Generic[PageType]):
     def state_from_dict(self, d: BackendStateType) -> None:
         """Always return empty state"""
         return None
 
     def state_to_dict(self, state: None) -> BackendStateType:
         """Always set empty state"""
         return {}
diff --git a/swh/lister/phabricator/lister.py b/swh/lister/phabricator/lister.py
index 83ddc31..4556178 100644
--- a/swh/lister/phabricator/lister.py
+++ b/swh/lister/phabricator/lister.py
@@ -1,185 +1,165 @@
-# Copyright (C) 2019-2021 the Software Heritage developers
+# Copyright (C) 2019-2022 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
+
 from collections import defaultdict
 import logging
 import random
 from typing import Any, Dict, Iterator, List, Optional
 from urllib.parse import urljoin
 
-import requests
-
-from swh.lister import USER_AGENT
 from swh.lister.pattern import CredentialsType, StatelessLister
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
 logger = logging.getLogger(__name__)
 
 
 PageType = List[Dict[str, Any]]
 
 
 class PhabricatorLister(StatelessLister[PageType]):
     """
     List all repositories hosted on a Phabricator instance.
 
     Args:
         url: base URL of a phabricator forge
             (for instance https://forge.softwareheritage.org)
         instance: string identifier for the listed forge,
             URL network location will be used if not provided
         api_token: authentication token for Conduit API
     """
 
     LISTER_NAME = "phabricator"
     API_REPOSITORY_PATH = "/api/diffusion.repository.search"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         url: str,
         instance: Optional[str] = None,
         api_token: Optional[str] = None,
         credentials: CredentialsType = None,
     ):
         super().__init__(
             scheduler, urljoin(url, self.API_REPOSITORY_PATH), instance, credentials
         )
 
-        self.session = requests.Session()
-        self.session.headers.update(
-            {"Accept": "application/json", "User-Agent": USER_AGENT}
-        )
+        self.session.headers.update({"Accept": "application/json"})
 
         if api_token is not None:
             self.api_token = api_token
         else:
             if not self.credentials:
                 raise ValueError(
                     f"No credentials found for phabricator instance {self.instance};"
                     " Please set them in the lister configuration file."
                 )
 
             self.api_token = random.choice(self.credentials)["password"]
 
     def get_request_params(self, after: Optional[str]) -> Dict[str, str]:
         """Get the query parameters for the request."""
 
         base_params = {
             # Stable order
             "order": "oldest",
             # Add all URIs to the response
             "attachments[uris]": "1",
             # API token from stored credentials
             "api.token": self.api_token,
         }
 
         if after is not None:
             base_params["after"] = after
 
         return base_params
 
     @staticmethod
     def filter_params(params: Dict[str, str]) -> Dict[str, str]:
         """Filter the parameters for debug purposes"""
         return {
             k: (v if k != "api.token" else "**redacted**") for k, v in params.items()
         }
 
     def get_pages(self) -> Iterator[PageType]:
         after: Optional[str] = None
         while True:
             params = self.get_request_params(after)
-            logger.debug(
-                "Retrieving results on URI %s with parameters %s",
-                self.url,
-                self.filter_params(params),
-            )
-            response = self.session.post(self.url, data=params)
-
-            if response.status_code != 200:
-                logger.warning(
-                    "Unexpected HTTP status code %s on %s: %s",
-                    response.status_code,
-                    response.url,
-                    response.content,
-                )
-
-            response.raise_for_status()
+            response = self.http_request(self.url, method="POST", data=params)
 
             response_data = response.json()
 
             if response_data.get("result") is None:
                 logger.warning(
                     "Got unexpected response on %s: %s",
                     response.url,
                     response_data,
                 )
                 break
 
             result = response_data["result"]
 
             yield result["data"]
             after = None
             if "cursor" in result and "after" in result["cursor"]:
                 after = result["cursor"]["after"]
 
             if not after:
                 logger.debug("Empty `after` cursor. All done")
                 break
 
     def get_origins_from_page(self, page: PageType) -> Iterator[ListedOrigin]:
         assert self.lister_obj.id is not None
 
         for repo in page:
             url = get_repo_url(repo["attachments"]["uris"]["uris"])
             if url is None:
                 short_name: Optional[str] = None
 
                 for field in "shortName", "name", "callsign":
                     short_name = repo["fields"].get(field)
                     if short_name:
                         break
 
                 logger.warning(
                     "No valid url for repository [%s] (phid=%s)",
                     short_name or repo["phid"],
                     repo["phid"],
                 )
                 continue
 
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 url=url,
                 visit_type=repo["fields"]["vcs"],
                 # The "dateUpdated" field returned by the Phabricator API only refers to
                 # the repository metadata; We can't use it for our purposes.
                 last_update=None,
             )
 
 
 def get_repo_url(attachments: List[Dict[str, Any]]) -> Optional[str]:
     """
     Return url for a hosted repository from its uris attachments according
     to the following priority lists:
     * protocol: https > http
     * identifier: shortname > callsign > id
     """
     processed_urls = defaultdict(dict)  # type: Dict[str, Any]
     for uri in attachments:
         protocol = uri["fields"]["builtin"]["protocol"]
         url = uri["fields"]["uri"]["effective"]
         identifier = uri["fields"]["builtin"]["identifier"]
         if protocol in ("http", "https"):
             processed_urls[protocol][identifier] = url
         elif protocol is None:
             for protocol in ("https", "http"):
                 if url.startswith(protocol):
                     processed_urls[protocol]["undefined"] = url
                 break
     for protocol in ["https", "http"]:
         for identifier in ["shortname", "callsign", "id", "undefined"]:
             if protocol in processed_urls and identifier in processed_urls[protocol]:
                 return processed_urls[protocol][identifier]
     return None
diff --git a/swh/lister/phabricator/tests/test_lister.py b/swh/lister/phabricator/tests/test_lister.py
index a638c40..c6e7043 100644
--- a/swh/lister/phabricator/tests/test_lister.py
+++ b/swh/lister/phabricator/tests/test_lister.py
@@ -1,137 +1,143 @@
-# Copyright (C) 2019-2021  The Software Heritage developers
+# Copyright (C) 2019-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import json
 from pathlib import Path
 
 import pytest
 from requests.exceptions import HTTPError
 
-from swh.lister import USER_AGENT
+from swh.lister import USER_AGENT_TEMPLATE
 from swh.lister.phabricator.lister import PhabricatorLister, get_repo_url
 
 
 @pytest.fixture
 def phabricator_repositories_page1(datadir):
     return json.loads(
         Path(datadir, "phabricator_api_repositories_page1.json").read_text()
     )
 
 
 @pytest.fixture
 def phabricator_repositories_page2(datadir):
     return json.loads(
         Path(datadir, "phabricator_api_repositories_page2.json").read_text()
     )
 
 
+@pytest.fixture(autouse=True)
+def retry_sleep_mock(mocker):
+    mocker.patch.object(PhabricatorLister.http_request.retry, "sleep")
+
+
 def test_get_repo_url(phabricator_repositories_page1):
     repos = phabricator_repositories_page1["result"]["data"]
     for repo in repos:
         expected_name = "https://forge.softwareheritage.org/source/%s.git" % (
             repo["fields"]["shortName"]
         )
         assert get_repo_url(repo["attachments"]["uris"]["uris"]) == expected_name
 
 
 def test_get_repo_url_undefined_protocol():
     undefined_protocol_uris = [
         {
             "fields": {
                 "uri": {
                     "raw": "https://svn.blender.org/svnroot/bf-blender/",
                     "display": "https://svn.blender.org/svnroot/bf-blender/",
                     "effective": "https://svn.blender.org/svnroot/bf-blender/",
                     "normalized": "svn.blender.org/svnroot/bf-blender",
                 },
                 "builtin": {"protocol": None, "identifier": None},
             },
         }
     ]
     expected_name = "https://svn.blender.org/svnroot/bf-blender/"
     assert get_repo_url(undefined_protocol_uris) == expected_name
 
 
 def test_lister_url_param(swh_scheduler):
     FORGE_BASE_URL = "https://forge.softwareheritage.org"
     API_REPOSITORY_PATH = "/api/diffusion.repository.search"
 
     for url in (
         FORGE_BASE_URL,
         f"{FORGE_BASE_URL}/",
         f"{FORGE_BASE_URL}/{API_REPOSITORY_PATH}",
         f"{FORGE_BASE_URL}/{API_REPOSITORY_PATH}/",
     ):
 
         lister = PhabricatorLister(
             scheduler=swh_scheduler, url=FORGE_BASE_URL, instance="swh", api_token="foo"
         )
 
         expected_url = f"{FORGE_BASE_URL}{API_REPOSITORY_PATH}"
 
         assert lister.url == expected_url
 
 
 def test_lister(
     swh_scheduler,
     requests_mock,
     phabricator_repositories_page1,
     phabricator_repositories_page2,
 ):
     FORGE_BASE_URL = "https://forge.softwareheritage.org"
     API_TOKEN = "foo"
 
     lister = PhabricatorLister(
         scheduler=swh_scheduler, url=FORGE_BASE_URL, instance="swh", api_token=API_TOKEN
     )
 
     def match_request(request):
         return (
-            request.headers.get("User-Agent") == USER_AGENT
+            request.headers.get("User-Agent")
+            == USER_AGENT_TEMPLATE % PhabricatorLister.LISTER_NAME
             and f"api.token={API_TOKEN}" in request.body
         )
 
     requests_mock.post(
         f"{FORGE_BASE_URL}{lister.API_REPOSITORY_PATH}",
         [
             {"json": phabricator_repositories_page1},
             {"json": phabricator_repositories_page2},
         ],
         additional_matcher=match_request,
     )
 
     stats = lister.run()
 
     expected_nb_origins = len(phabricator_repositories_page1["result"]["data"]) * 2
 
     assert stats.pages == 2
     assert stats.origins == expected_nb_origins
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
 
     assert len(scheduler_origins) == expected_nb_origins
 
 
 def test_lister_request_error(
     swh_scheduler,
     requests_mock,
     phabricator_repositories_page1,
 ):
     FORGE_BASE_URL = "https://forge.softwareheritage.org"
 
     lister = PhabricatorLister(
         scheduler=swh_scheduler, url=FORGE_BASE_URL, instance="swh", api_token="foo"
     )
 
     requests_mock.post(
         f"{FORGE_BASE_URL}{lister.API_REPOSITORY_PATH}",
         [
             {"status_code": 200, "json": phabricator_repositories_page1},
             {"status_code": 500, "reason": "Internal Server Error"},
         ],
     )
 
     with pytest.raises(HTTPError):
         lister.run()
diff --git a/swh/lister/pubdev/__init__.py b/swh/lister/pubdev/__init__.py
index 63bde65..310595f 100644
--- a/swh/lister/pubdev/__init__.py
+++ b/swh/lister/pubdev/__init__.py
@@ -1,71 +1,71 @@
 # Copyright (C) 2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 """
 Pub.dev lister
 ==============
 
 The Pubdev lister list origins from `pub.dev`_, the `Dart`_ and `Flutter`_ packages registry.
 
 The registry provide an `http api`_ from where the lister retrieve package names.
 
 As of August 2022 `pub.dev`_ list 33535 package names.
 
 Origins retrieving strategy
 ---------------------------
 
 To get a list of all package names we call `https://pub.dev/api/packages` endpoint.
 There is no other way for discovery (no archive index, no database dump, no dvcs repository).
 
 Page listing
 ------------
 
 There is only one page that list all origins url based
 on `https://pub.dev/api/packages/{pkgname}`.
 The origin url corresponds to the http api endpoint that returns complete information
 about the package versions (name, version, author, description, release date).
 
 Origins from page
 -----------------
 
 The lister yields all origins url from one page.
 
 Running tests
 -------------
 
 Activate the virtualenv and run from within swh-lister directory::
 
    pytest -s -vv --log-cli-level=DEBUG swh/lister/pubdev/tests
 
 Testing with Docker
 -------------------
 
 Change directory to swh/docker then launch the docker environment::
 
    docker-compose up -d
 
-Then connect to the lister::
+Then schedule a pubdev listing task::
 
-   docker exec -it docker_swh-lister_1 bash
+   docker compose exec swh-scheduler swh scheduler task add -p oneshot list-pubdev
 
-And run the lister (The output of this listing results in “oneshot” tasks in the scheduler)::
+You can follow lister execution by displaying logs of swh-lister service::
 
-   swh lister run -l pubdev
+   docker compose logs -f swh-lister
 
 .. _pub.dev: https://pub.dev
 .. _Dart: https://dart.dev
 .. _Flutter: https://flutter.dev
 .. _http api: https://pub.dev/help/api
 """
 
 
 def register():
     from .lister import PubDevLister
 
     return {
         "lister": PubDevLister,
         "task_modules": ["%s.tasks" % __name__],
     }
diff --git a/swh/lister/pubdev/lister.py b/swh/lister/pubdev/lister.py
index a17ad0e..fd1dc45 100644
--- a/swh/lister/pubdev/lister.py
+++ b/swh/lister/pubdev/lister.py
@@ -1,125 +1,94 @@
 # Copyright (C) 2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
+
 import logging
-from typing import Any, Dict, Iterator, List, Optional
+from typing import Iterator, List, Optional
 
 import iso8601
-import requests
 from requests.exceptions import HTTPError
-from tenacity.before_sleep import before_sleep_log
 
-from swh.lister.utils import throttling_retry
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
-from .. import __version__
 from ..pattern import CredentialsType, StatelessLister
 
-# https://github.com/dart-lang/pub/blob/master/doc/repository-spec-v2.md#metadata-headers
-USER_AGENT = (
-    f"Software Heritage PubDev Lister v{__version__} "
-    "(+https://www.softwareheritage.org/contact)"
-)
-
 logger = logging.getLogger(__name__)
 
 # Aliasing the page results returned by `get_pages` method from the lister.
 PubDevListerPage = List[str]
 
 
 class PubDevLister(StatelessLister[PubDevListerPage]):
     """List pub.dev (Dart, Flutter) origins."""
 
     LISTER_NAME = "pubdev"
     VISIT_TYPE = "pubdev"
     INSTANCE = "pubdev"
 
     BASE_URL = "https://pub.dev/"
     PACKAGE_NAMES_URL_PATTERN = "{base_url}api/package-names"
     PACKAGE_INFO_URL_PATTERN = "{base_url}api/packages/{pkgname}"
     ORIGIN_URL_PATTERN = "{base_url}packages/{pkgname}"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         credentials: Optional[CredentialsType] = None,
     ):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
             instance=self.INSTANCE,
             url=self.BASE_URL,
         )
-        self.session = requests.Session()
-        self.session.headers.update(
-            {
-                "Accept": "application/json",
-                "User-Agent": USER_AGENT,
-            }
-        )
-
-    @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
-    def page_request(self, url: str, params: Dict[str, Any]) -> requests.Response:
-
-        logger.debug("Fetching URL %s with params %s", url, params)
-
-        response = self.session.get(url, params=params)
-        if response.status_code != 200:
-            logger.warning(
-                "Unexpected HTTP status code %s on %s: %s",
-                response.status_code,
-                response.url,
-                response.content,
-            )
-        response.raise_for_status()
 
-        return response
+        self.session.headers.update({"Accept": "application/json"})
 
     def get_pages(self) -> Iterator[PubDevListerPage]:
         """Yield an iterator which returns 'page'
 
         It uses the api provided by https://pub.dev/api/ to find Dart and Flutter package
         origins.
 
         The http api call get "{base_url}package-names" to retrieve a sorted list
         of all package names.
 
         There is only one page that list all origins url based on "{base_url}packages/{pkgname}"
         """
-        response = self.page_request(
-            url=self.PACKAGE_NAMES_URL_PATTERN.format(base_url=self.url), params={}
+        response = self.http_request(
+            url=self.PACKAGE_NAMES_URL_PATTERN.format(base_url=self.url)
         )
         yield response.json()["packages"]
 
     def get_origins_from_page(self, page: PubDevListerPage) -> Iterator[ListedOrigin]:
         """Iterate on all pages and yield ListedOrigin instances."""
         assert self.lister_obj.id is not None
 
         for pkgname in page:
             package_info_url = self.PACKAGE_INFO_URL_PATTERN.format(
                 base_url=self.url, pkgname=pkgname
             )
             try:
-                response = self.page_request(url=package_info_url, params={})
+                response = self.http_request(url=package_info_url)
             except HTTPError:
                 logger.warning(
                     "Failed to fetch metadata for package %s, skipping it from listing.",
                     pkgname,
                 )
                 continue
             package_metadata = response.json()
             package_versions = package_metadata["versions"]
             last_published = max(
                 package_version["published"] for package_version in package_versions
             )
             origin_url = self.ORIGIN_URL_PATTERN.format(
                 base_url=self.url, pkgname=pkgname
             )
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 visit_type=self.VISIT_TYPE,
                 url=origin_url,
                 last_update=iso8601.parse_date(last_published),
             )
diff --git a/swh/lister/pubdev/tests/test_lister.py b/swh/lister/pubdev/tests/test_lister.py
index ac2be14..5113249 100644
--- a/swh/lister/pubdev/tests/test_lister.py
+++ b/swh/lister/pubdev/tests/test_lister.py
@@ -1,49 +1,53 @@
 # Copyright (C) 2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-from swh.lister.pubdev.lister import USER_AGENT, PubDevLister
+from swh.lister import USER_AGENT_TEMPLATE
+from swh.lister.pubdev.lister import PubDevLister
 
 expected_origins = {
     "https://pub.dev/packages/Autolinker",
     "https://pub.dev/packages/Babylon",
 }
 
 
 def test_pubdev_lister(datadir, requests_mock_datadir, swh_scheduler):
     lister = PubDevLister(scheduler=swh_scheduler)
     res = lister.run()
 
     assert res.pages == 1
     assert res.origins == 2
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
 
     assert len(scheduler_origins) == len(expected_origins)
 
     for origin in scheduler_origins:
         assert origin.visit_type == "pubdev"
         assert origin.url in expected_origins
         assert origin.last_update is not None
 
 
 def _match_request(request):
-    return request.headers.get("User-Agent") == USER_AGENT
+    return (
+        request.headers.get("User-Agent")
+        == USER_AGENT_TEMPLATE % PubDevLister.LISTER_NAME
+    )
 
 
 def test_pubdev_lister_skip_package(
     datadir, requests_mock_datadir, swh_scheduler, requests_mock
 ):
 
     requests_mock.get(
         "https://pub.dev/api/packages/Autolinker",
         status_code=404,
         additional_matcher=_match_request,
     )
 
     lister = PubDevLister(scheduler=swh_scheduler)
     res = lister.run()
 
     assert res.pages == 1
     assert res.origins == 1
diff --git a/swh/lister/puppet/__init__.py b/swh/lister/puppet/__init__.py
new file mode 100644
index 0000000..e56cee6
--- /dev/null
+++ b/swh/lister/puppet/__init__.py
@@ -0,0 +1,101 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+"""
+Puppet lister
+=============
+
+The Puppet lister list origins from `Puppet Forge`_.
+Puppet Forge is a package manager for Puppet modules.
+
+As of September 2022 `Puppet Forge`_ list 6917 package names.
+
+Origins retrieving strategy
+---------------------------
+
+To get a list of all package names we call an `http api endpoint`_  which have a
+`getModules`_ endpoint.
+It returns a paginated list of results and a `next` url.
+
+The api follow `OpenApi 3.0 specifications`.
+
+Page listing
+------------
+
+Each page returns a list of ``results`` which are raw data from api response.
+The results size is 100 as 100 is the maximum limit size allowed by the api.
+
+Origins from page
+-----------------
+
+The lister yields one hundred origin url per page.
+
+Origin url is the html page corresponding to a package name on the forge, following
+this pattern::
+
+    "https://forge.puppet.com/modules/{owner}/{pkgname}"
+
+For each origin `last_update`is set via the module "updated_at" value.
+As the api also returns all existing versions for a package, we build an `artifacts`
+dict in `extra_loader_arguments` with the archive tarball corresponding to each
+existing versions.
+
+Example for ``file_concat`` module located at
+https://forge.puppet.com/modules/electrical/file_concat::
+
+    {
+        "artifacts": {
+            "1.0.0": {
+                "url": "https://forgeapi.puppet.com/v3/files/electrical-file_concat-1.0.0.tar.gz",  # noqa: B950
+                "version": "1.0.0",
+                "filename": "electrical-file_concat-1.0.0.tar.gz",
+                "last_update": "2015-04-09T12:03:13-07:00",
+            },
+            "1.0.1": {
+                "url": "https://forgeapi.puppet.com/v3/files/electrical-file_concat-1.0.1.tar.gz",  # noqa: B950
+                "version": "1.0.1",
+                "filename": "electrical-file_concat-1.0.1.tar.gz",
+                "last_update": "2015-04-17T01:03:46-07:00",
+            },
+        }
+    }
+
+Running tests
+-------------
+
+Activate the virtualenv and run from within swh-lister directory::
+
+   pytest -s -vv --log-cli-level=DEBUG swh/lister/puppet/tests
+
+Testing with Docker
+-------------------
+
+Change directory to swh/docker then launch the docker environment::
+
+   docker compose up -d
+
+Then schedule a Puppet listing task::
+
+   docker compose exec swh-scheduler swh scheduler task add -p oneshot list-puppet
+
+You can follow lister execution by displaying logs of swh-lister service::
+
+   docker compose logs -f swh-lister
+
+.. _Puppet Forge: https://forge.puppet.com/
+.. _http api endpoint: https://forgeapi.puppet.com/
+.. _getModules: https://forgeapi.puppet.com/#tag/Module-Operations/operation/getModules
+
+"""
+
+
+def register():
+    from .lister import PuppetLister
+
+    return {
+        "lister": PuppetLister,
+        "task_modules": ["%s.tasks" % __name__],
+    }
diff --git a/swh/lister/puppet/lister.py b/swh/lister/puppet/lister.py
new file mode 100644
index 0000000..4982e92
--- /dev/null
+++ b/swh/lister/puppet/lister.py
@@ -0,0 +1,111 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from datetime import datetime
+import logging
+from typing import Any, Dict, Iterator, List, Optional
+from urllib.parse import urljoin
+
+from swh.scheduler.interface import SchedulerInterface
+from swh.scheduler.model import ListedOrigin
+
+from ..pattern import CredentialsType, StatelessLister
+
+logger = logging.getLogger(__name__)
+
+# Aliasing the page results returned by `get_pages` method from the lister.
+PuppetListerPage = List[Dict[str, Any]]
+
+
+class PuppetLister(StatelessLister[PuppetListerPage]):
+    """The Puppet lister list origins from 'Puppet Forge'"""
+
+    LISTER_NAME = "puppet"
+    VISIT_TYPE = "puppet"
+    INSTANCE = "puppet"
+
+    BASE_URL = "https://forgeapi.puppet.com/"
+
+    def __init__(
+        self,
+        scheduler: SchedulerInterface,
+        credentials: Optional[CredentialsType] = None,
+    ):
+        super().__init__(
+            scheduler=scheduler,
+            credentials=credentials,
+            instance=self.INSTANCE,
+            url=self.BASE_URL,
+        )
+
+    def get_pages(self) -> Iterator[PuppetListerPage]:
+        """Yield an iterator which returns 'page'
+
+        It request the http api endpoint to get a paginated results of modules,
+        and retrieve a `next` url. It ends when `next` json value is `null`.
+
+        Open Api specification for getModules endpoint:
+        https://forgeapi.puppet.com/#tag/Module-Operations/operation/getModules
+
+        """
+        # limit = 100 is the max value for pagination
+        limit: int = 100
+        response = self.http_request(
+            f"{self.BASE_URL}v3/modules", params={"limit": limit}
+        )
+        data: Dict[str, Any] = response.json()
+        yield data["results"]
+
+        while data["pagination"]["next"]:
+            response = self.http_request(
+                urljoin(self.BASE_URL, data["pagination"]["next"])
+            )
+            data = response.json()
+            yield data["results"]
+
+    def get_origins_from_page(self, page: PuppetListerPage) -> Iterator[ListedOrigin]:
+        """Iterate on all pages and yield ListedOrigin instances."""
+        assert self.lister_obj.id is not None
+
+        dt_parse_pattern = "%Y-%m-%d %H:%M:%S %z"
+
+        for entry in page:
+            last_update = datetime.strptime(entry["updated_at"], dt_parse_pattern)
+            pkgname = entry["name"]
+            owner = entry["owner"]["slug"]
+            url = f"https://forge.puppet.com/modules/{owner}/{pkgname}"
+            artifacts = {}
+            for release in entry["releases"]:
+                # Build an artifact entry following original-artifacts-json specification
+                # https://docs.softwareheritage.org/devel/swh-storage/extrinsic-metadata-specification.html#original-artifacts-json  # noqa: B950
+                checksums = {}
+
+                if release["version"] == entry["current_release"]["version"]:
+                    # checksums are only available for current release
+                    for checksum in ("md5", "sha256"):
+                        checksums[checksum] = entry["current_release"][
+                            f"file_{checksum}"
+                        ]
+                else:
+                    # use file length as basic content check instead
+                    checksums["length"] = release["file_size"]
+
+                artifacts[release["version"]] = {
+                    "filename": release["file_uri"].split("/")[-1],
+                    "url": urljoin(self.BASE_URL, release["file_uri"]),
+                    "version": release["version"],
+                    "last_update": datetime.strptime(
+                        release["created_at"], dt_parse_pattern
+                    ).isoformat(),
+                    "checksums": checksums,
+                }
+
+            yield ListedOrigin(
+                lister_id=self.lister_obj.id,
+                visit_type=self.VISIT_TYPE,
+                url=url,
+                last_update=last_update,
+                extra_loader_arguments={"artifacts": artifacts},
+            )
diff --git a/swh/lister/puppet/tasks.py b/swh/lister/puppet/tasks.py
new file mode 100644
index 0000000..ed44627
--- /dev/null
+++ b/swh/lister/puppet/tasks.py
@@ -0,0 +1,19 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from celery import shared_task
+
+from swh.lister.puppet.lister import PuppetLister
+
+
+@shared_task(name=__name__ + ".PuppetListerTask")
+def list_puppet(**lister_args):
+    """Lister task for Puppet"""
+    return PuppetLister.from_configfile(**lister_args).run().dict()
+
+
+@shared_task(name=__name__ + ".ping")
+def _ping():
+    return "OK"
diff --git a/swh/lister/puppet/tests/__init__.py b/swh/lister/puppet/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/swh/lister/puppet/tests/data/https_forgeapi.puppet.com/v3_modules,limit=100 b/swh/lister/puppet/tests/data/https_forgeapi.puppet.com/v3_modules,limit=100
new file mode 100644
index 0000000..34223ec
--- /dev/null
+++ b/swh/lister/puppet/tests/data/https_forgeapi.puppet.com/v3_modules,limit=100
@@ -0,0 +1,391 @@
+{
+  "pagination": {
+    "limit": 100,
+    "offset": 0,
+    "first": "/v3/modules?limit=100&offset=0",
+    "previous": null,
+    "current": "/v3/modules?limit=100&offset=0",
+    "next": "/v3/modules?limit=100&offset=100",
+    "total": 7301
+  },
+  "results": [
+    {
+      "uri": "/v3/modules/puppetlabs-puppetdb",
+      "slug": "puppetlabs-puppetdb",
+      "name": "puppetdb",
+      "downloads": 313089590,
+      "created_at": "2012-09-19 16:49:18 -0700",
+      "updated_at": "2021-12-16 14:57:46 -0800",
+      "deprecated_at": null,
+      "deprecated_for": null,
+      "superseded_by": null,
+      "supported": false,
+      "endorsement": null,
+      "module_group": "base",
+      "owner": {
+        "uri": "/v3/users/puppetlabs",
+        "slug": "puppetlabs",
+        "username": "puppetlabs",
+        "gravatar_id": "fdd009b7c1ec96e088b389f773e87aec"
+      },
+      "premium": false,
+      "current_release": {
+        "uri": "/v3/releases/puppetlabs-puppetdb-7.10.0",
+        "slug": "puppetlabs-puppetdb-7.10.0",
+        "module": {
+          "uri": "/v3/modules/puppetlabs-puppetdb",
+          "slug": "puppetlabs-puppetdb",
+          "name": "puppetdb",
+          "deprecated_at": null,
+          "owner": {
+            "uri": "/v3/users/puppetlabs",
+            "slug": "puppetlabs",
+            "username": "puppetlabs",
+            "gravatar_id": "fdd009b7c1ec96e088b389f773e87aec"
+          }
+        },
+        "version": "7.10.0",
+        "metadata": {
+          "name": "puppetlabs-puppetdb",
+          "version": "7.10.0",
+          "author": "puppetlabs",
+          "summary": "Installs PostgreSQL and PuppetDB, sets up the connection to Puppet master.",
+          "license": "Apache-2.0",
+          "source": "git://github.com/puppetlabs/puppetlabs-puppetdb.git",
+          "project_page": "http://github.com/puppetlabs/puppetlabs-puppetdb",
+          "issues_url": "https://tickets.puppetlabs.com/browse/PDB",
+          "dependencies": [
+            {
+              "name": "puppetlabs/inifile",
+              "version_requirement": ">= 1.1.3 < 6.0.0"
+            },
+            {
+              "name": "puppetlabs/postgresql",
+              "version_requirement": ">= 6.5.0 < 8.0.0"
+            },
+            {
+              "name": "puppetlabs/firewall",
+              "version_requirement": ">= 1.1.3 < 4.0.0"
+            },
+            {
+              "name": "puppetlabs/stdlib",
+              "version_requirement": ">= 4.13.1 < 9.0.0"
+            }
+          ],
+          "operatingsystem_support": [
+            {
+              "operatingsystem": "RedHat",
+              "operatingsystemrelease": [
+                "6",
+                "7",
+                "8"
+              ]
+            },
+            {
+              "operatingsystem": "CentOS",
+              "operatingsystemrelease": [
+                "6",
+                "7",
+                "8"
+              ]
+            },
+            {
+              "operatingsystem": "OracleLinux",
+              "operatingsystemrelease": [
+                "6",
+                "7",
+                "8"
+              ]
+            },
+            {
+              "operatingsystem": "Scientific",
+              "operatingsystemrelease": [
+                "6",
+                "7"
+              ]
+            },
+            {
+              "operatingsystem": "SLES",
+              "operatingsystemrelease": [
+                "12 SP3"
+              ]
+            },
+            {
+              "operatingsystem": "Debian",
+              "operatingsystemrelease": [
+                "8",
+                "9",
+                "10",
+                "11"
+              ]
+            },
+            {
+              "operatingsystem": "Ubuntu",
+              "operatingsystemrelease": [
+                "16.04",
+                "18.04",
+                "20.04"
+              ]
+            }
+          ],
+          "requirements": [
+            {
+              "name": "puppet",
+              "version_requirement": ">= 4.10.0 < 8.0.0"
+            }
+          ],
+          "description": "Module for installing/configuring PuppetDB",
+          "pdk-version": "1.18.0",
+          "template-url": "https://github.com/puppetlabs/pdk-templates#1.18.0",
+          "template-ref": "tags/1.18.0-0-g095317c"
+        },
+        "tags": [
+          "puppet",
+          "puppetdb",
+          "storeconfig"
+        ],
+        "supported": false,
+        "pdk": true,
+        "validation_score": 100,
+        "file_uri": "/v3/files/puppetlabs-puppetdb-7.10.0.tar.gz",
+        "file_size": 42806,
+        "file_md5": "e91a2074ca8d94a8b3ff7f6c8bbf12bc",
+        "file_sha256": "49b1a542fbd2a1378c16cb04809e0f88bf4f3e45979532294fb1f03f56c97fbb",
+        "downloads": 36371,
+        "readme": "puppetdb\n=========\n\n#### Table of Contents\n\n1. [Overview - What is the PuppetDB module?](#overview)\n2. [Module Description - What does the module do?](#module-description)\n3. [Setup - The basics of getting started with PuppetDB module](#setup)\n4. [Upgrading - Guide for upgrading from older revisions of this module](#upgrading)\n4. [Usage - The classes and parameters available for configuration](#usage)\n5. [Implementation - An under-the-hood peek at what the module is doing](#implementation)\n6. [Limitations - OS compatibility, etc.](#limitations)\n7. [Development - Guide for contributing to the module](#development)\n8. [Release Notes - Notes on the most recent updates to the module](#release-notes)\n\nOverview\n--------\n\nBy guiding puppetdb setup and configuration with a Puppet master, the PuppetDB\nmodule provides fast, streamlined access to data on puppetized infrastructure.\n\nModule Description\n-------------------\n\nThe PuppetDB module provides a quick way to get started using PuppetDB, an open\nsource inventory resource service that manages storage and retrieval of\nplatform-generated data. The module will install PostgreSQL and PuppetDB if you\ndon't have them, as well as set up the connection to Puppet master. The module\nwill also provide a dashboard you can use to view the current state of your\nsystem.\n\nFor more information about PuppetDB\n[please see the official PuppetDB documentation.](https://puppet.com/docs/puppetdb/latest/)\n\n\nSetup\n-----\n\n**What PuppetDB affects:**\n\n* package/service/configuration files for PuppetDB\n* package/service/configuration files for PostgreSQL (optional, but set as default)\n* Puppet master's runtime (via plugins)\n* Puppet master's configuration\n  * **note**: Using the `puppetdb::master::config` class will cause your\n    routes.yaml file to be overwritten entirely (see **Usage** below for options\n    and more information )\n* system firewall (optional)\n* listened-to ports\n\n**Introductory Questions**\n\nTo begin using PuppetDB, you’ll have to make a few decisions:\n\n* Which database back-end should I use?\n  * PostgreSQL (default) or our embedded database\n  * Embedded database\n    * **note:** As of PuppetDB 4.0, the embedded database is no longer supported as\n      an option. When running PuppetDB 3.x, we suggest using the embedded database\n      only for experimental environments rather than production, as it does not scale\n      well and can cause difficulty in migrating to PostgreSQL.\n* Should I run the database on the same node that I run PuppetDB on?\n* Should I run PuppetDB on the same node that I run my master on?\n\nThe answers to those questions will be largely dependent on your answers to\nquestions about your Puppet environment:\n\n* How many nodes are you managing?\n* What kind of hardware are you running on?\n* Is your current load approaching the limits of your hardware?\n\nDepending on your answers to all of the questions above, you will likely fall\nunder one of these set-up options:\n\n1. [Single Node (Testing and Development)](#single-node-setup)\n2. [Multiple Node (Recommended)](#multiple-node-setup)\n\n### Single Node Setup\n\nThis approach assumes you will use our default database (PostgreSQL) and run\neverything (PostgreSQL, PuppetDB, Puppet master) all on the same node. This\nsetup will be great for a testing or experimental environment. In this case,\nyour manifest will look like:\n\n    node <hostname> {\n      # Configure puppetdb and its underlying database\n      class { 'puppetdb': }\n      \n      # Configure the Puppet master to use puppetdb\n      class { 'puppetdb::master::config': }\n    }\n\nYou can provide some parameters for these classes if you’d like more control,\nbut that is literally all that it will take to get you up and running with the\ndefault configuration.\n\n### Multiple Node Setup\n\nThis approach is for those who prefer not to install PuppetDB on the same node\nas the Puppet master. Your environment will be easier to scale if you are able\nto dedicate hardware to the individual system components. You may even choose to\nrun the puppetdb server on a different node from the PostgreSQL database that it\nuses to store its data. So let’s have a look at what a manifest for that\nscenario might look like:\n\n**This is an example of a very basic 3-node setup for PuppetDB.**\n\n    $puppetdb_host = 'puppetdb.example.lan'\n    $postgres_host = 'postgres.example.lan'\n    node 'master.example.lan' {\n      # Here we configure the Puppet master to use PuppetDB,\n      # telling it the hostname of the PuppetDB node\n      class { 'puppetdb::master::config':\n        puppetdb_server => $puppetdb_host,\n      }\n    }\n    node 'postgres.example.lan' {\n      # Here we install and configure PostgreSQL and the PuppetDB\n      # database instance, and tell PostgreSQL that it should\n      # listen for connections to the `$postgres_host`\n      class { 'puppetdb::database::postgresql':\n        listen_addresses => $postgres_host,\n      }\n    }\n    node 'puppetdb.example.lan' {\n      # Here we install and configure PuppetDB, and tell it where to\n      # find the PostgreSQL database.\n      class { 'puppetdb::server':\n        database_host => $postgres_host,\n      }\n    }\n\nThis should be all it takes to get a 3-node, distributed installation of\nPuppetDB up and running. Note that, if you prefer, you could easily move two of\nthese classes to a single node and end up with a 2-node setup instead.\n\n### Enable SSL connections\n\nTo use SSL connections for the single node setup, use the following manifest:\n\n    node <hostname> {\n      # Here we configure puppetdb and PostgreSQL to use ssl connections\n      class { 'puppetdb':\n        postgresql_ssl_on => true,\n        database_host => '<hostname>',\n        database_listen_address => '0.0.0.0'\n      }\n      \n      # Configure the Puppet master to use puppetdb\n      class { 'puppetdb::master::config': }\n\nTo use SSL connections for the multiple nodes setup, use the following manifest:\n\n    $puppetdb_host = 'puppetdb.example.lan'\n    $postgres_host = 'postgres.example.lan'\n\n    node 'master.example.lan' {\n      # Here we configure the Puppet master to use PuppetDB,\n      # telling it the hostname of the PuppetDB node.\n      class { 'puppetdb::master::config':\n        puppetdb_server => $puppetdb_host,\n      }\n    }\n\n    node 'postgres.example.lan' {\n      # Here we install and configure PostgreSQL and the PuppetDB\n      # database instance, and tell PostgreSQL that it should\n      # listen for connections to the `$postgres_host`. \n      # We also enable SSL connections.\n      class { 'puppetdb::database::postgresql':\n        listen_addresses => $postgres_host,\n        postgresql_ssl_on => true,\n        puppetdb_server => $puppetdb_host\n      }\n    }\n\n    node 'puppetdb.example.lan' {\n      # Here we install and configure PuppetDB, and tell it where to\n      # find the PostgreSQL database. We also enable SSL connections.\n      class { 'puppetdb::server':\n        database_host => $postgres_host,\n        postgresql_ssl_on => true\n      }\n    }\n\n### Beginning with PuppetDB\n\nWhether you choose a single node development setup or a multi-node setup, a\nbasic setup of PuppetDB will cause: PostgreSQL to install on the node if it’s\nnot already there; PuppetDB postgres database instance and user account to be\ncreated; the postgres connection to be validated and, if successful, PuppetDB to\nbe installed and configured; PuppetDB connection to be validated and, if\nsuccessful, the Puppet master config files to be modified to use PuppetDB; and\nthe Puppet master to be restarted so that it will pick up the config changes.\n\nIf your logging level is set to INFO or finer, you should start seeing\nPuppetDB-related log messages appear in both your Puppet master log and your\npuppetdb log as subsequent agent runs occur.\n\n### Cross-node Dependencies\n\nIt is worth noting that there are some cross-node dependencies, which means that\nthe first time you add the module's configurations to your manifests, you may\nsee a few failed puppet runs on the affected nodes.\n\nPuppetDB handles cross-node dependencies by taking a sort of \"eventual\nconsistency\" approach. There’s nothing that the module can do to control the\norder in which your nodes check in, but the module can check to verify that the\nservices it depends on are up and running before it makes configuration\nchanges--so that’s what it does.\n\nWhen your Puppet master node checks in, it will validate the connectivity to the\npuppetdb server before it applies its changes to the Puppet master config files.\nIf it can’t connect to puppetdb, then the puppet run will fail and the previous\nconfig files will be left intact. This prevents your master from getting into a\nbroken state where all incoming puppet runs fail because the master is\nconfigured to use a puppetdb server that doesn’t exist yet. The same strategy is\nused to handle the dependency between the puppetdb server and the postgres\nserver.\n\nHence the failed puppet runs. These failures should be limited to 1 failed run\non the puppetdb node, and up to 2 failed runs on the Puppet master node. After\nthat, all of the dependencies should be satisfied and your puppet runs should\nstart to succeed again.\n\nYou can also manually trigger puppet runs on the nodes in the correct order\n(Postgres, PuppetDB, Puppet master), which will avoid any failed runs.\n\nUpgrading\n---------\n\n### Upgrading from 4.x to 5.x\n\nSignificant parameter changes are listed below:\n\n* The PuppetDB module defaults to Puppet 4 pathing and assumes `puppetserver`\n  is the master service by default\n* The PuppetDB module manages Postgres repos by default. To turn this behavior\n  off, set `manage_package_repo` to `false`.\n* To specify a specific version of PuppetDB to manage, you'll need to use the\n  `puppetdb::globals` class to set the version of PuppetDB you're using\n  explicitly. The ability to configure the version in the `puppetdb::server` and\n  `puppetdb` class have been removed.\n\nFor example if your config looked like this before:\n\n    class {'puppetdb':\n        puppetdb_version => '3.2.4-1.el7',\n    }\n    class { 'puppetdb::master::config': }\n\nand you'd still like to use the module with PuppetDB 3.2.4, all you'd have to\nchange would be:\n\n    class { 'puppetdb::globals':\n        version => '3.2.4-1.el7',\n    }\n    class { 'puppetdb' : }\n    class { 'puppetdb::master::config' : }\n\nThe `globals` class above takes into account the following PuppetDB 3 and Puppet\n4 related changes:\n    * The `puppetdb::master:puppetdb_conf` class has added a `$legacy_terminus`\n      to support the PuppetDB 2.x terminus configuration.\n    * The default `test_url` for the `PuppetDBConnValidator` has also been\n      changed to `/pdb/meta/v1/version` but will default to `/v3/version` when\n      using a PuppetDB 2.x version.\n    * The configuration pathing for Puppet and PuppetDB has changed with Puppet\n      4 and PuppetDB 3, using PuppetDB 2.x or older assumes the old\n      configuration pathing.\n\nSee the CHANGELOG file for more detailed information on changes for each release.\n\n### Upgrading from 3.x to 4.x\n\nFor this release, all dependency versions have been bumped to their latest.\nSignificant parameter changes are listed below:\n\n* The PuppetDB module now only supports Puppet 3.7.1 or later\n* `puppetlabs/postgresql` 4.0.0 or later is now required\n* `puppetlabs/inifile` 1.1.3 or later is now required\n* `puppetlabs/firewall` 1.1.3 or later is now required\n* `puppetlabs/stdlib` 4.2.2 or later is now required\n* The parameter `manage_firewall` for the class `puppetdb::database::postgresql`\n  has now been removed, since the PostgreSQL module no longer supports this.\n* The parameter `open_postgres_port` for the class `puppetdb` has also been\n  removed, due to PostgreSQL changes.\n\nSee the CHANGELOG file for more detailed information on changes for each release.\n\n### Upgrading from 2.x to 3.x\n\nFor this release a major dependency has changed. The module\n`pupppetlabs/postgresql` must now be version 3.x. Upgrading the module should\nupgrade the `puppetlabs/postgresql` module for you, but if another module has a\nfixed dependency that module will have to be fixed before you can continue.\n\nSome other changes include:\n\n* The parameter `manage_redhat_firewall` for the class `puppetdb` has now been\n  removed completely in favor of `open_postgres_port` and\n  `open_ssl_listen_port`.\n* The parameter `manage_redhat_firewall` for the class\n  `puppetdb::database::postgresql`, has now been renamed to `manage_firewall`.\n* The parameter `manage_redhat_firewall` for the class `puppetdb::server` has\n  now been removed completely in favor of `open_listen_port` and\n  `open_ssl_listen_port`.\n* The internal class: `puppetdb::database::postgresql_db` has been removed. If\n  you were using this, it is now defunct.\n* The class `puppetdb::server::firewall` has been marked as private, do not use\n  it directly.\n* The class `puppetdb::server::jetty_ini` and `puppetdb::server::database_ini`\n  have been marked as private, do not use it directly.\n\n### Upgrading from 1.x to 2.x\n\nA major dependency has been changed, so now when you upgrade to 2.0 the\ndependency `cprice404/inifile` has been replaced with `puppetlabs/inifile`. This\nmay interfere with other modules as they may depend on the old\n`cprice404/inifile` instead, so upgrading should be done with caution. Check\nthat your other modules use the newer `puppetlabs/inifile` module as\ninteroperation with the old `cprice404/inifile` module will no longer be\nsupported by this module.\n\nDepending on how you install your modules, changing the dependency may require\nmanual intervention. Double check your modules contain the newer\n`puppetlabs/inifile` after installing this latest module.\n\nOtherwise, all existing parameters from 1.x should still work correctly.\n\nUsage\n------\n\nPuppetDB supports a large number of configuration options for both configuring\nthe puppetdb service and connecting that service to the Puppet master.\n\n### puppetdb::globals\n\nThe `puppetdb::globals` class is intended to provide similar functionality to\nthe `postgresql::globals` class in the `puppetlabs-postgresql` module by\nexposing a top-level entry-point into the module so that we can properly set\ndefaults for the `puppetdb::params` class based on the version of `puppetdb` you\nare using. This setting defaults to `present`.\n\nYou must declare the class to use it:\n\n    class { 'puppetdb::globals': }\n\n**Parameters within `puppetdb::globals`:**\n\n#### `version`\n\nThe version of the `puppetdb` package that should be installed. You may specify\nan explicit version number, 'present', or 'latest' (defaults to 'present').\n\n### puppetdb\n\nThe `puppetdb` class is intended as a high-level abstraction (sort of an\n'all-in-one' class) to help simplify the process of getting your puppetdb server\nup and running. It wraps the slightly-lower-level classes `puppetdb::server` and\n`puppetdb::database::*`, and it'll get you up and running with everything you\nneed (including database setup and management) on the server side. For maximum\nconfigurability, you may choose not to use this class. You may prefer to use the\n`puppetdb::server` class directly, or manage your puppetdb setup on your own.\n\nYou must declare the class to use it:\n\n    class { 'puppetdb': }\n\n**Parameters within `puppetdb`:**\n\n#### `listen_address`\n\nThe address that the web server should bind to for HTTP requests. Defaults to\n`localhost`. Set to `0.0.0.0` to listen on all addresses.\n\n#### `listen_port`\n\nThe port on which the puppetdb web server should accept HTTP requests. Defaults\nto `8080`.\n\n#### `disable_cleartext`\n\nIf true, the puppetdb web server will only serve HTTPS and not HTTP requests (defaults to false).\n\n#### `open_listen_port`\n\nIf `true`, open the `http_listen_port` on the firewall. Defaults to `false`.\n\n#### `ssl_listen_address`\n\nThe address that the web server should bind to for HTTPS requests. Defaults to\n`0.0.0.0` to listen on all addresses.\n\n#### `ssl_listen_port`\n\nThe port on which the puppetdb web server should accept HTTPS requests. Defaults\nto `8081`.\n\n#### `disable_ssl`\n\nIf `true`, the puppetdb web server will only serve HTTP and not HTTPS requests.\nDefaults to `false`.\n\n#### `open_ssl_listen_port`\n\nIf true, open the `ssl_listen_port` on the firewall. Defaults to `undef`.\n\n#### `ssl_protocols`\n\nSpecify the supported SSL protocols for PuppetDB (e.g. TLSv1, TLSv1.1, TLSv1.2.)\n\n### `postgresql_ssl_on`\n\nIf `true`, it configures SSL connections between PuppetDB and the PostgreSQL database.\nDefaults to `false`.\n\n#### `cipher_suites`\n\nConfigure jetty's supported `cipher-suites` (e.g. `SSL_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384`).\nDefaults to `undef`.\n\n#### `migrate`\n\nIf `true`, puppetdb will automatically migrate to the latest database format at startup. If `false`, if the database format supplied by this version of PuppetDB doesn't match the version expected (whether newer or older), PuppetDB will exit with an error status. Defaults to `true`.\n\n### `manage_dbserver`\n\nIf true, the PostgreSQL server will be managed by this module. Defaults to `true`.\n\n### `manage_database`\n\nIf true, the PostgreSQL database will be managed by this module. Defaults to `true`.\n\n#### `database`\n\nWhich database backend to use; legal values are `postgres` (default)\nor `embedded`. The `embedded` option is not supported on PuppetDB\n4.0.0 or later. `embedded` can be used for very small installations or\nfor testing, but is not recommended for use in production\nenvironments. For more info, see the [puppetdb\ndocs](https://puppet.com/docs/puppetdb/latest/).\n\n#### `database_host`\n\nHostname to use for the database connection. For single case installations this\nshould be left as the default. Defaults to `localhost`, ignored for `embedded`\ndatabase.\n\n#### `database_port`\n\nThe port that the database server listens on. Defaults to `5432`, ignored for\n`embedded` database.\n\n#### `database_username`\n\nThe name of the database user to connect as. Defaults to `puppetdb`, ignored for\n`embedded` database.\n\n#### `database_password`\n\nThe password for the database user. Defaults to `puppetdb`, ignored for\n`embedded` database.\n\n#### `manage_db_password`\n\nWhether or not the database password in database.ini will be managed by this module.\nSet this to `false` if you want to set the password some other way.\nDefaults to `true`\n\n#### `database_name`\n\nThe name of the database instance to connect to. Defaults to `puppetdb`, ignored\nfor `embedded` database.\n\n#### `jdbc_ssl_properties`\n\nThe text to append to the JDBC connection URI. This should begin with a '?'\ncharacter. For example, to use SSL for the PostgreSQL connection, set this\nparameter's value to `?ssl=true`.\n\nThis setting is only available when using PostgreSQL; when using HyperSQL (the\n`embedded` database), it does nothing.\n\n#### `database_validate`\n\nIf true, the module will attempt to connect to the database using the specified\nsettings and fail if it is not able to do so. Defaults to `true`.\n\n#### `database_embedded_path`\n\n*Embedded Database Only* Changes the path location for the HSQLDB database. Does\n not provide migration for old data, so if you change this value and you have an\n existing database you will need to manually move the content also. (defaults to\n package default for 2.x release).\n\n#### `node_ttl`\n\nThe length of time a node can go without receiving any new data before it's\nautomatically deactivated. (defaults to '7d', which is a 7-day period. Set to\n'0d' to disable auto-deactivation).  This option is supported in PuppetDB >=\n1.1.0.\n\n#### `node_purge_ttl`\n\nThe length of time a node can be deactivated before it's deleted from the\ndatabase. (defaults to '14d', which is a 14-day period. Set to '0d' to disable\npurging). This option is supported in PuppetDB >= 1.2.0.\n\n#### `report_ttl`\n\nThe length of time reports should be stored before being deleted. (defaults to\n`14d`, which is a 14-day period). This option is supported in PuppetDB >= 1.1.0.\n\n#### `gc_interval`\n\nThis controls how often (in minutes) to compact the database. The compaction\nprocess reclaims space and deletes unnecessary rows. If not supplied, the\ndefault is every 60 minutes. This option is supported in PuppetDB >= 0.9.\n\n#### `log_slow_statements`\n\nThis sets the number of seconds before an SQL query is considered \"slow.\" Slow\nSQL queries are logged as warnings, to assist in debugging and tuning. Note\nPuppetDB does not interrupt slow queries; it simply reports them after they\ncomplete.\n\nThe default value is `10` seconds. A value of 0 will disable logging of slow\nqueries. This option is supported in PuppetDB >= 1.1.\n\n#### `conn_max_age`\n\nThe maximum time (in minutes) for a pooled connection to remain unused before\nit is closed off.\n\nIf not supplied, we default to `60` minutes. This option is supported in PuppetDB >= 1.1.\n\n#### `conn_keep_alive`\n\nThis sets the time (in minutes) for a connection to remain idle before sending\na test query to the DB. This is useful to prevent a DB from timing out\nconnections on its end.\n\nIf not supplied, we default to 45 minutes. This option is supported in PuppetDB >= 1.1.\n\n#### `conn_lifetime`\n\nThe maximum time (in minutes) a pooled connection should remain open. Any\nconnections older than this setting will be closed off. Connections currently in\nuse will not be affected until they are returned to the pool.\n\nIf not supplied, we won't terminate connections based on their age alone. This\noption is supported in PuppetDB >= 1.4.\n\n#### `puppetdb_package`\n\nThe PuppetDB package name in the package manager. Defaults to `present`.\n\n#### `puppetdb_service`\n\nThe name of the PuppetDB service. Defaults to `puppetdb`.\n\n#### `puppetdb_service_status`\n\nSets whether the service should be `running ` or `stopped`. When set to `stopped` the\nservice doesn't start on boot either. Valid values are `true`, `running`,\n`false`, and `stopped`.\n\n#### `confdir`\n\nThe PuppetDB configuration directory. Defaults to `/etc/puppetdb/conf.d`.\n\n#### `vardir`\n\nThe parent directory for the MQ's data directory.\n\n#### `java_args`\n\nJava VM options used for overriding default Java VM options specified in\nPuppetDB package. Defaults to `{}`. See\n[PuppetDB Configuration](https://puppet.com/docs/puppetdb/latest/configure.html)\nto get more details about the current defaults.\n\nFor example, to set `-Xmx512m -Xms256m` options use:\n\n    {\n        '-Xmx' => '512m',\n        '-Xms' => '256m',\n    }\n\n#### `merge_default_java_args`\n\nSets whether the provided java args should be merged with the defaults, or\nshould override the defaults. This setting is necessary if any of the defaults\nare to be removed. Defaults to true. If `false`, the `java_args` in the PuppetDB\ninit config file will reflect only what is passed via the `java_args` param.\n\n#### `max_threads`\n\nJetty option to explicitly set `max-threads`. Defaults to `undef`, so the\nPuppetDB-Jetty default is used.\n\n#### `read_database`\n\nWhich database backend to use for the read database. Only supports\n`postgres` (default). This option is supported in PuppetDB >= 1.6.\n\n#### `read_database_host`\n*This parameter must be set to use another PuppetDB instance for queries.*\n\nThe hostname or IP address of the read database server. If set to `undef`, and \n`manage_database` is set to `true`, it will use the value of the `database_host` \nparameter. This option is supported in PuppetDB >= 1.6.\n\n#### `read_database_port`\n\nThe port that the read database server listens on. If `read_database_host`\nis set to `undef`, and `manage_database` is set to `true`, it will use the value of \nthe `database_port` parameter. This option is supported in PuppetDB >= 1.6.\n\n#### `read_database_username`\n\nThe name of the read database user to connect as. Defaults to `puppetdb-read`. This\noption is supported in PuppetDB >= 1.6.\n\n#### `read_database_password`\n\nThe password for the read database user. Defaults to `puppetdb-read`. This option is\nsupported in PuppetDB >= 1.6.\n\n#### `manage_read_db_password`\n\nWhether or not the database password in read-database.ini will be managed by this module.\nSet this to `false` if you want to set the password some other way.\nDefaults to `true`\n\n#### `read_database_name`\n\nThe name of the read database instance to connect to. If `read_database_host`\nis set to `undef`, and `manage_database` is set to `true`, it will use the value of\nthe `database_name` parameter. This option is supported in PuppetDB >= 1.6.\n\n#### `read_log_slow_statements`\n\nThis sets the number of seconds before an SQL query to the read database is\nconsidered \"slow.\" Slow SQL queries are logged as warnings, to assist in\ndebugging and tuning. Note PuppetDB does not interrupt slow queries; it simply\nreports them after they complete.\n\nThe default value is 10 seconds. A value of 0 will disable logging of slow\nqueries. This option is supported in PuppetDB >= 1.6.\n\n#### `read_conn_max_age`\n\nThe maximum time (in minutes) for a pooled read database connection to remain\nunused before it is closed off.\n\nIf not supplied, we default to 60 minutes. This option is supported in PuppetDB >= 1.6.\n\n#### `read_conn_keep_alive`\n\nThis sets the time (in minutes) for a read database connection to remain idle\nbefore sending a test query to the DB. This is useful to prevent a DB from\ntiming out connections on its end.\n\nIf not supplied, we default to 45 minutes. This option is supported in PuppetDB >= 1.6.\n\n#### `read_conn_lifetime`\n\nThe maximum time (in minutes) a pooled read database connection should remain\nopen. Any connections older than this setting will be closed off. Connections\ncurrently in use will not be affected until they are returned to the pool.\n\nIf not supplied, we won't terminate connections based on their age alone. This\noption is supported in PuppetDB >= 1.6.\n\n#### `ssl_dir`\n\nBase directory for PuppetDB SSL configuration. Defaults to `/etc/puppetdb/ssl`\nor `/etc/puppetlabs/puppetdb/ssl` for FOSS and PE respectively.\n\n#### `ssl_set_cert_paths`\n\nA switch to enable or disable the management of SSL certificates in your\n`jetty.ini` configuration file.\n\n#### `ssl_cert_path`\n\nPath to your SSL certificate for populating `jetty.ini`.\n\n#### `ssl_key_path`\n\nPath to your SSL key for populating `jetty.ini`.\n\n#### `ssl_ca_cert_path`\n\nPath to your SSL CA for populating `jetty.ini`.\n\n#### `ssl_deploy_certs`\n\nA boolean switch to enable or disable the management of SSL keys in your\n`ssl_dir`. Default is `false`.\n\n#### `ssl_key`\n\nContents of your SSL key, as a string.\n\n#### `ssl_cert`\n\nContents of your SSL certificate, as a string.\n\n#### `ssl_ca_cert`\n\nContents of your SSL CA certificate, as a string.\n\n#### `manage_firewall`\n\nIf `true`, puppet will manage your iptables rules for PuppetDB via the\n[puppetlabs-firewall](https://forge.puppetlabs.com/puppetlabs/firewall) class.\n\n#### `command_threads`\n\nThe number of command processing threads to use. Defaults to `undef`, using the\nPuppetDB built-in default.\n\n#### `concurrent_writes`\n\nThe number of threads allowed to write to disk at any one time. Defaults to\n`undef`, which uses the PuppetDB built-in default.\n\n#### `store_usage`\n\nThe amount of disk space (in MB) to allow for persistent message storage.\nDefaults to `undef`, using the PuppetDB built-in default.\n\n#### `temp_usage`\n\nThe amount of disk space (in MB) to allow for temporary message storage.\nDefaults to `undef`, using the PuppetDB built-in default.\n\n#### `disable_update_checking`\n\nSetting this to true disables checking for updated versions of PuppetDB and sending basic analytics data to Puppet.\nDefaults to `undef`, using the PuppetDB built-in default.\n\n#### `certificate_whitelist_file`\n\nThe name of the certificate whitelist file to set up and configure in PuppetDB. Defaults to `/etc/puppetdb/certificate-whitelist` or `/etc/puppetlabs/puppetdb/certificate-whitelist` for FOSS and PE respectively.\n\n#### `certificate_whitelist`\n\nArray of the X.509 certificate Common Names of clients allowed to connect to PuppetDB. Defaults to empty. Be aware that this permits full access to all Puppet clients to download anything contained in PuppetDB, including the full catalogs of all nodes, which possibly contain sensitive information. Set to `[ $::servername ]` to allow access only from your (single) Puppet master, which is enough for normal operation. Set to a list of Puppet masters if you have multiple.\n\n#### `automatic_dlo_cleanup`\n\nPuppetDB creates [Dead Letter Office](https://puppet.com/docs/puppetdb/5.2/maintain_and_tune.html#clean-up-the-dead-letter-office).\nThose are reports of failed requests. They spill up the disk. This parameter is\na boolean and defaults to false. You can enable automatic cleanup of DLO\nreports by setting this to true.\n\n#### `cleanup_timer_interval`\n\nThe DLO cleanup is a systemd timer if systemd is available, otherwise a\ncronjob. The variable configures the systemd.timer option [onCalender](https://www.freedesktop.org/software/systemd/man/systemd.timer.html#OnCalendar=).\nIt defaults to `*-*-* ${fqdn_rand(24)}:${fqdn_rand(60)}:00`. This will start\nthe cleanup service on a daily basis. The exact minute and hour is random\nper node based on the [fqdn_rand](https://puppet.com/docs/puppet/5.5/function.html#fqdnrand)\nmethod. On non-systemd systems, the cron runs daily and the `$puppetdb_user` needs\nto be able to run cron jobs. On systemd systems you need the [camptocamp/systemd](https://forge.puppet.com/camptocamp/systemd)\nmodule, which is an optional dependency and not automatically installed!\n\n#### `dlo_max_age`\n\nThis is a positive integer. It describes the amount of days you want to keep\nthe DLO reports. The default value is 90 days.\n\n### puppetdb::server\n\nThe `puppetdb::server` class manages the PuppetDB server independently of the\nunderlying database that it depends on. It will manage the PuppetDB package,\nservice, config files, etc., but will still allow you to manage the database\n(e.g. PostgreSQL) however you see fit.\n\n    class { 'puppetdb::server':\n      database_host => 'pg1.mydomain.com',\n    }\n\n### puppetdb::master::config\n\nThe `puppetdb::master::config` class directs your Puppet master to use PuppetDB,\nwhich means that this class should be used on your Puppet master node. It’ll\nverify that it can successfully communicate with your PuppetDB server, and then\nconfigure your master to use PuppetDB.\n\nUsing this class allows the module to manipulate the puppet configuration files\npuppet.conf and routes.yaml. The puppet.conf changes are supplemental and should\nnot affect any of your existing settings, but the routes.yaml file will be\noverwritten entirely. If you have an existing routes.yaml file, you will want to\ntake care to use the `manage_routes` parameter of this class to prevent the module\nfrom managing that file, and you’ll need to manage it yourself.\n\n    class { 'puppetdb::master::config':\n      puppetdb_server => 'my.host.name',\n      puppetdb_port   => 8081,\n    }\n\n**Parameters within `puppetdb::master::config`:**\n\n#### `puppetdb_server`\n\nThe dns name or ip of the PuppetDB server. Defaults to the hostname of the\ncurrent node, i.e. `$::fqdn`.\n\n#### `puppetdb_port`\n\nThe port that the PuppetDB server is running on. Defaults to `8081`.\n\n#### `puppetdb_disable_ssl`\n\nIf true, use plain HTTP to talk to PuppetDB. Defaults to the value of\n`disable_ssl` if PuppetDB is on the same server as the Puppet Master, or else\nfalse. If you set this, you probably need to set `puppetdb_port` to match the HTTP\nport of the PuppetDB.\n\n#### `puppetdb_soft_write_failure`\n\nBoolean to fail in a soft manner if PuppetDB is not accessible for command\nsubmission Defaults to `false`.\n\n#### `manage_routes`\n\nIf `true`, the module will overwrite the Puppet master's routes file to\nconfigure it to use PuppetDB. Defaults to `true`.\n\n#### `manage_storeconfigs`\n\nIf `true`, the module will manage the Puppet master's storeconfig settings.\nDefaults to `true`.\n\n#### `manage_report_processor`\n\nIf `true`, the module will manage the 'reports' field in the puppet.conf file to\nenable or disable the PuppetDB report processor. Defaults to `false`.\n\n#### `manage_config`\n\nIf `true`, the module will store values from `puppetdb_server` and `puppetdb_port`\nparameters in the PuppetDB configuration file. If `false`, an existing PuppetDB\nconfiguration file will be used to retrieve server and port values.\n\n#### `create_puppet_service_resource`\n\nIf `true`, AND if `restart_puppet` is true, then the module will create a service\nresource for `puppet_service_name` if it has not been defined. Defaults to `true`.\nIf you are already declaring the `puppet_service_name` service resource in another\npart of your code, setting this to `false` will avoid creation of that service\nresource by this module, avoiding potential duplicate resource errors.\n\n#### `strict_validation`\n\nIf `true`, the module will fail if PuppetDB is not reachable, otherwise it will\npreconfigure PuppetDB without checking.\n\n#### `enable_reports`\n\nIgnored unless `manage_report_processor` is `true`, in which case this setting\nwill determine whether or not the PuppetDB report processor is enabled (`true`)\nor disabled (`false`) in the puppet.conf file.\n\n#### `enable_storeconfigs`\n\nIgnored unless `manage_storeconfigs` is `true`, in which case this setting\nwill determine whether or not client configuration storage is enabled (`true`)\nor disabled (`false`) in the puppet.conf file.\n\n#### `puppet_confdir`\n\nPuppet's config directory. Defaults to `/etc/puppet`.\n\n#### `puppet_conf`\n\nPuppet's config file. Defaults to `/etc/puppet/puppet.conf`.\n\n#### `masterless`\n\nA boolean switch to enable or disable the masterless setup of PuppetDB. Defaults\nto `false`.\n\n#### `terminus_package`\n\nName of the package to use that represents the PuppetDB terminus code. Defaults\nto `puppetdb-termini`, when `puppetdb_version` is set to `<= 2.3.x` the default\nchanges to `puppetdb-terminus`.\n\n#### `puppet_service_name`\n\nName of the service that represents Puppet. You can change this to `apache2` or\n`httpd` depending on your operating system, if you plan on having Puppet run\nusing Apache/Passenger for example.\n\n#### `puppetdb_startup_timeout`\n\nThe maximum amount of time that the module should wait for PuppetDB to start up.\nThis is most important during the initial install of PuppetDB (defaults to 15\nseconds).\n\n#### `restart_puppet`\n\nIf `true`, the module will restart the Puppet master when PuppetDB configuration\nfiles are changed by the module. Defaults to `true`. If set to `false`, you\nmust restart the service manually in order to pick up changes to the config\nfiles (other than `puppet.conf`).\n\n### puppetdb::database::postgresql\n\nThe `puppetdb::database::postgresql` class manages a PostgreSQL server for use\nby PuppetDB. It can manage the PostgreSQL packages and service, as well as\ncreating and managing the PuppetDB database and database user accounts.\n\n    class { 'puppetdb::database::postgresql':\n      listen_addresses => 'my.postgres.host.name',\n    }\n\n#### `listen_addresses`\n\nThe `listen_address` is a comma-separated list of hostnames or IP addresses on\nwhich the postgres server should listen for incoming connections. This defaults\nto `localhost`. This parameter maps directly to PostgreSQL's `listen_addresses`\nconfig option. Use a `*` to allow connections on any accessible address.\n\n#### `database_name`\n\nSets the name of the database. Defaults to `puppetdb`.\n\n#### `database_username`\n\nCreates a user for access the database. Defaults to `puppetdb`.\n\n#### `database_password`\n\nSets the password for the database user above. Defaults to `puppetdb`.\n\n#### `manage_server`\n\nConditionally manages the PostgreSQL server via `postgresql::server`. Defaults\nto `true`. If set to `false`, this class will create the database and user via\n`postgresql::server::db` but not attempt to install or manage the server itself.\n\n#### `test_url`\n\nThe URL to use for testing if the PuppetDB instance is running. Defaults to\n`/pdb/meta/v1/version`.\n\n#### `manage_package_repo`\n\nIf `true`, the official postgresql.org repo will be added and postgres won't\nbe installed from the regular repository. Defaults to `true`.\n\n#### `postgres_version`\n\nIf the postgresql.org repo is installed, you can install several versions of\npostgres. Defaults to `9.6` in module version 6.0+ and `9.4` in older versions.\n\nImplementation\n---------------\n\n### Resource overview\n\nIn addition to the classes and variables mentioned above, PuppetDB includes:\n\n**puppetdb::master::routes**\n\nConfigures the Puppet master to use PuppetDB as the facts terminus. *WARNING*:\nthe current implementation simply overwrites your routes.yaml file; if you have\nan existing routes.yaml file that you are using for other purposes, you should\n*not* use this.\n\n    class { 'puppetdb::master::routes':\n      puppet_confdir => '/etc/puppet'\n    }\n\nThe optional parameter routes can be used to specify a custom route\nconfiguration. For example to configure routes for masterless puppet.\n\n    class { 'puppetdb::master::routes':\n      routes => {\n        'apply' => {\n          'facts' => {\n            'terminus' => 'facter',\n            'cache'    => 'puppetdb_apply',\n          }\n        }\n      }\n    }\n\n**puppetdb::master::storeconfigs**\n\nConfigures the Puppet master to enable storeconfigs and to use PuppetDB as the\nstoreconfigs backend.\n\n    class { 'puppetdb::master::storeconfigs':\n      puppet_conf => '/etc/puppet/puppet.conf'\n    }\n\n**puppetdb::server::validate_db**\n\nValidates that a successful database connection can be established between the\nnode on which this resource is run and the specified PuppetDB database instance\n(host/port/user/password/database name).\n\n    puppetdb::server::validate_db { 'validate my puppetdb database connection':\n      database_host     => 'my.postgres.host',\n      database_username => 'mydbuser',\n      database_password => 'mydbpassword',\n      database_name     => 'mydbname',\n    }\n\n### Custom Types\n\n**puppetdb_conn_validator**\n\nVerifies that a connection can be successfully established between a node and\nthe PuppetDB server. Its primary use is as a precondition to prevent\nconfiguration changes from being applied if the PuppetDB server cannot be\nreached, but it could potentially be used for other purposes such as monitoring.\n\nLimitations\n------------\n\nCurrently, PuppetDB is compatible with:\n\n    Puppet Version: 4.10+\n\nPlatforms:\n* EL 5, 6, 7\n* Debian 6, 7\n* Ubuntu 10.04, 12.04, 14.04\n\nCommunity Maintained Platforms:\n* Archlinux\n* OpenBSD 5.6-current and newer\n* SLES 11 SP1\n\nDevelopment\n------------\n\nPuppet Labs modules on the Puppet Forge are open projects, and community\ncontributions are essential for keeping them great. We can’t access the huge\nnumber of platforms and myriad of hardware, software, and deployment\nconfigurations that Puppet is intended to serve.\n\nWe want to keep it as easy as possible to contribute changes so that our modules\nwork in your environment. There are a few guidelines that we need contributors\nto follow so that we can have a chance of keeping on top of things.\n\nYou can read the complete [contribution guide](https://github.com/puppetlabs/.github/blob/master/CONTRIBUTING.md).\n",
+        "changelog": "## puppetlabs-puppetdb changelog\n\nRelease notes for the puppetlabs-puppetdb module.\n\n#### 7.10.0 - 2021/12/16\n* Add support for Debian 11\n* Allow puppetlabs/stdlib 8.0.0\n* Default to PostgreSQL 11 when using PuppetDB 7.0.0 or later\n* Update minimum version of puppetlabs/postgresql module to 6.5.0\n\n#### 7.9.0 - 2021/06/23\n\n* When `manage_database` is true, it will create a read-only user in postgres\n  and configure PuppetDB to use that user for its read-database connection\n  pool\n* Update module dependencies for inifile, firewall, and stdlib\n\n#### 7.8.0 - 2021/03/25\n\n* Added an option `postgresql_ssl_on` to enable an SSL connection between\n  PostgreSQL and PuppetDB using Puppet agent certificates to verify the\n  connection and authorize PuppetDB to access the puppetdb database.\n* Update our metadata to allow puppetlabs-postgresql 7 (this fixes an issue on el8)\n\n#### 7.7.1 - 2020/12/15\n\n* When using Puppet 7 or newer, the connection validator will use the new HTTP\n  client. This removes a deprecation warning in the agent output.\n\n#### 7.7.0 - 2020/11/05\n\n* When applied to a node running puppet `7.0.0` or newer, the\n  `puppetdb::master::config` class will default to the `json` fact cache. See\n  [PUP-10656](https://tickets.puppetlabs.com/browse/PUP-10656) for more\n  information.\n\n#### 7.6.0 - 2020/09/02\n\n* Added `migrate` parameter to manage the database.ini config option\n* Added Ubuntu 20.04 LTS as a supported platform\n\n#### 7.5.0 - 2020/06/10\n\n* Added `java_bin` parameter to set the full path to the java bin\n* Added `node_purge_gc_batch_limit` parameter\n* Added `facts_blacklist` parameter to manage the database.ini config option\n* Added `manage_db_password` and `manage_read_db_password` parameters\n* Updated functions to use puppet4 functions\n* Added `enable_storeconfigs` parameter, specifies whether or not the manage the master's storeconfigs (default: `true`)\n\n#### 7.4.0 - 2019/06/14\n\nThis is a minor feature release.\n\nDetailed changes:\n* Add the `manage_database` parameter to skip database and extension creation\n\n#### 7.3.0 - 2019/06/14\n\nThis is a minor feature release.\n\nDetailed changes:\n* Update module dependencies for firewall and stdlib\n\n#### 7.2.0 - 2019/05/17\n\nThis is a minor feature release.\n\nDetailed changes:\n* Update module dependencies for inifile and PostgreSQL\n\n#### 7.1.0 - 2018/10/02\n\nThis is a minor feature release.\n\nDetailed changes:\n* Fix issue with DLO path default being hardcoded\n* Update module dependencies to allow compatibility with Puppet 6\n\n------------------------------------------\n\n#### 7.0.1 - 2018/07/30\n\nThis is a minor bugfix release.\n\nDetailed changes:\n* Update the upper bound of required puppet version in metadata.json (Thanks @ekohl!)\n\n------------------------------------------\n\n#### 7.0.0 - 2018/06/27\n\nThis is a major release that replaces validate_* methods with data types.\nThe minimum required version of puppetlabs/stdlib has been bummped to 4.13.1\nin order to get the new data types. Thanks very much to @bastelfreak for your\nsubmissions!\n\nDetailed changes:\n* Require puppetlabs/stdlib >= 4.13.1\n* Bump puppet-lint to version 2\n* Bump minimal recommended puppet4 version to 4.7.1\n* Replace uses of validate_* methods in favor of data types (Thanks @bastelfreak!)\n* Add data type for ttl (Thanks @bastelfreak!)\n* Update list of supported platforms\n* Retire the previously deprecated `database_ssl` and `read_database_ssl` params in favor of `jdbc_ssl_properties` and `read_database_jdbc_ssl_properties`\n\n------------------------------------------\n\n#### 6.0.2 - 2017/11/06\n\nThis is a minor bugfix release.\n\nDetailed changes:\n\n * Update the upper bound of the puppetlabs inifile dependency\n * Explicitly add database dependency in the PostgreSQL manifest (Thanks @pgassmann!)\n\n------------------------------------------\n\n#### 6.0.1 - 2017/06/05\n\nThis is a minor bugfix release.\n\nDetailed changes:\n\n* Update the required puppet version in metadata.json\n\n------------------------------------------\n\n#### 6.0.0 - 2017/06/05\n\nThis is a major release to support PuppetDB 5.0. Note that the default\nPostgreSQL version is now 9.6, the minimum required by PuppetDB 5.0. If you're\nrunning an older version, be sure to explicitly specifying it when upgrading the\nmodule so it doesn't get upgraded out from under you.\n\nDetailed changes:\n\n* Require Puppet >= 4.7\n* If unspecified, install PostgreSQL version 9.6\n* Default node-ttl and node-purge ttl to 7 days and 14 days, respectively.\n* Support puppetlabs-postgresql version 5.x (Thanks @dhollinger!)\n* Add create_service_resource param to avoid duplicate resource\n  errors in some situations. (Thanks @kpaulisse!)\n* Configure the master service as 'enabled' when it is automatically added\n  (Thanks @tampakrap!)\n* Add concurrent_writes parameter (Thanks @aperiodic!)\n* Add cipher_suites option to configure jetty's SSL support (Thanks @selyx!)\n* Add support for Ruby 2.3.1 (Thanks @ghoneycutt!)\n* Specify mode of routes.yaml (Thanks @tampakrap!)\n* Add [read_]database_max_pool_size parameter (Thanks @kpaulisse and @vine77!)\n* Fix user/group names on OpenBSD (Thanks @buzzdeee!)\n* Enforce permissions of managed ini files (Thanks @kbarber!)\n* Manage the pg_trgm database extension (Thanks @PascalBourdier!)\n* Default open_ssl_listen_port to undef instead of true (Thanks @mmckinst!)\n\n\n------------------------------------------\n\n#### 5.1.2 - 2016/03/14\n\nThis is a minor bugfix release.\n\nDetailed changes:\n\n* Support RHEL upgrades from the `puppetdb-terminus` (<= PuppetDB 2) to the\n`puppetdb-termini` (>= PuppetDB 3).\n\n------------------------------------------\n\n#### 5.1.1 - 2016/02/09\n\nThis is a minor bugfix release.\n\nDetailed changes:\n\n* Revert a change to 'puppetdb-terminus' installation process that occurred in\nthe last release.\n\n------------------------------------------\n\n#### 5.1.0 - 2016/02/09\n\nThis is a minor feature release.\n\nDetailed changes:\n\n* Use 'puppetdb-terminus' as the terminus package on RHEL, to avoid packaging\n  conflicts that could occur on upgrades from 2.x to 3.x. The\n  'puppetdb-terminus' version 3.x package on RHEL will install\n  'puppetdb-termini' as a dependency.\n* Add jdbc_ssl_properties parameter.\n* Pass 'dport' parameter to puppetlabs/firewall instead of the deprecated 'port'.\n* Pass database_port parameter to the postgresql class.\n* Manage the puppetdb vardir.\n* Allow default java_args to be overridden.\n* Linting fixes.\n\n------------------------------------------\n\n#### 5.0.0 - 2015/07/08\n\nThis is a major release to provide default support for PuppetDB 3.0.0, so\nlots of changes have been introduced. Ensure you read the upgrade guide\nprovided in the README before upgrading to this release.\n\nDetailed changes:\n\n* Packaging paths by default favour the PDB 3.0.0 AIO paths now.\n* Added legacy handling for old terminus & service versions (see upgrade guide\n  in README for details)\n* PDB 3.0.0 introduces new pathing for the API requests, so all the defaults\n  for this module are switched to use that now.\n* Support for Puppet 4 added.\n* manage_pg_repo is now on by default when using the puppetlabs/postgresql module,\n  as PDB 3.0.0 supports only 9.4. This enables the use of the upstream PGDG\n  PostgreSQL repos for all distros to obtain a working version of 9.4. The\n  option can be disabled if required.\n* Default ssl-host is now 0.0.0.0\n\n------------------------------------------\n\n#### 4.3.0 - 2015/06/10\n\nThis is a minor feature release.\n\nDetailed changes:\n\n* Feature: Provide `database_embedded_path` option for overriding HSQLDB file path.\n* Feature: Add ability to manage `command_threads`, `store_usage` and `temp_usage`.\n* Bug: allow database_validation to be false\n* Bug: Fix ordering issues with read_database_ini\n* Testing: Fix file_concat dependency and fix rspec warnings\n\n------------------------------------------\n\n#### 4.2.1 - 2015/04/07\n\nThis is a minor bugfix release.\n\nDetailed Changes:\n\n* Ignore `._foo` files when building the `.tar.gz` of the module.\n\n------------------------------------------\n\n#### 4.2.0 - 2015/04/02\n\nThis is a minor feature release.\n\nDetailed Changes:\n\n* Added Puppet 4 compatibility by introspecting the value for `$puppet_confdir`.\n* Added `masterless` param switch to enable or disable the masterless setup of PuppetDB.\n* Added `manage_package_repo` param which will setup the official PostgreSQL repositories on your host.\n* Added FreeBSD support.\n* The puppetdb service now restarts if the certificates change.\n* `manage_firewall` and `ssl_protocols` are now configurable through the top-level puppetdb class.\n* Show the puppetdb server URI scheme in connection errors.\n* `test_url` param is now properly passed from the resource to the provider.\n* Removed dead PE code and unused variables from the module.\n* New parameter `puppetdb_disable_ssl` to enable validation to use cleartext.\n* Database validation is now optional via the `database_validate` and `read_database_validate` params.\n* Documentation updates to the README and metadata.json.\n\n------------------------------------------\n\n#### 4.1.0 - 2014/11/13\n\nThis is a minor feature release.\n\nDetailed Changes:\n\n* New capabilities added for installing SSL certificates and keys.\n* New parameter `puppetdb_disable_ssl` to enable validation to use cleartext.\n* `ssl_protocols` now provided to allow users to fine tune what protocols they want to support for PuppetDB.\n* Lots of documentation and parameter cleanups, to ensure consistency.\n* test_url is now supported for `puppetdb::master::config` to allow the URL one uses to be overridden.\n* Corrected PE detection support.\n* Correct the path for HSQLDB to use /var/lib/puppetdb/db instead of /usr/share/puppetdb/db as is standard in PuppetDB core.\n\n------------------------------------------\n\n#### 4.0.0 - 2014/09/16\n\nFor this release, all dependency versions have been bumped to their latest.\n\nDetailed Changes:\n\n* The PuppetDB module now only supports Puppet 3.7.1 or later\n* 'puppetlabs/postgresql' 4.0.0 or later is now required\n* 'puppetlabs/inifile' 1.1.3 or later is now required\n* 'puppetlabs/firewall' 1.1.3 or later is now required\n* 'puppetlabs/stdlib' 4.2.2 or later is now required\n* The parameter `manage_firewall` for the class `puppetdb::database::postgresql` has now been removed, since the postgresql module no longer supports this.\n* The parameter `open_postgres_port` for the class `puppetdb` has also been removed, due to postgresql changes.\n\n------------------------------------------\n\n#### 3.0.1 - 2014/02/11\n\nThis release contains only minor bug fixes.\n\nDetailed Changes:\n\n* Add missing PUBLISHER_LOGIN variable for auto-publish. (Ashley Penney)\n* fix validation regular expressions for time configs (Scott Duckworth)\n* update ripienaar/concat -> puppetlabs/concat (Joshua Hoblitt)\n* Fix issue with validator when disable_ssl = true (Elliott Barrere)\n* Enable soft_write_failure setting when $puppetdb::disablessl = true (Elliott Barrere)\n* Support rspec-puppet v1.0.0 (Garrett Honeycutt)\n* Pin rspec-puppet to 1.x releases (Ken Barber)\n* Define parameter in puppetdb class to define postgres listen address (Adrian Lopez)\n* Enable fast finish in Travis (Garrett Honeycutt)\n* Convert tests to beaker (Ashley Penney)\n* Use the /v2 metrics endpoint instead of /metrics (Ken Barber)\n\n------------------------------------------\n\n#### 3.0.0 - 2013/10/27\n\nThis major release changes the main dependency for the postgresql module from\nversion 2.5.x to 3.x. Since the postgresql module is not backwards compatible,\nthis release is also not backwards compatible. As a consequence we have taken\nsome steps to deprecate some of the older functionality:\n\n* The parameter manage_redhat_firewall for the class puppetdb has now been removed completely in favor of open_postgres_port and open_ssl_listen_port.\n* The parameter manage_redhat_firewall for the class puppetdb::database::postgresql, has now been renamed to manage_firewall.\n* The parameter manage_redhat_firewall for the class puppetdb::server has now been removed completely in favor of open_listen_port and open_ssl_listen_port.\n* The internal class: puppetdb::database::postgresql_db has been removed. If you were using this, it is now defunct.\n* The class puppetdb::server::firewall has been marked as private, do not use it directly.\n* The class puppetdb::server::jetty_ini and puppetdb::server::database_ini have been marked as private, do not use it directly.\n\nAll of this is documented in the upgrade portion of the README.\n\nAdditionally some features have been included in this release as well:\n\n* soft_write_failure can now be enabled in your puppetdb.conf with this\n  module to handle failing silently when your PuppetDB is not available\n  during writes.\n* There is a new switch to enable SSL connectivity to PostgreSQL. While this\n  functionality is only in its infancy this is a good start.\n\nDetailed Changes:\n\n* FM-103: Add metadata.json to all modules. (Ashley Penney)\n* Add soft_write_failure to puppetdb.conf (Garrett Honeycutt)\n* Add switch to configure database SSL connection (Stefan Dietrich)\n* (GH-91) Update to use rspec-system-puppet 2.x (Ken Barber)\n* (GH-93) Switch to using puppetlabs-postgresql 3.x (Ken Barber)\n* Fix copyright and project notice (Ken Barber)\n* Adjust memory for PuppetDB tests to avoid OOM killer (Ken Barber)\n* Ensure ntpdate executes early during testing (Ken Barber)\n\n------------------------------------------\n\n#### 2.0.0 - 2013/10/04\n\nThis major release changes the main dependency for the inifile module from\nthe deprecated `cprice404/inifile` to `puppetlabs/inifile` to remove\ndeprecation warnings and to move onto the latest and greatest implementation\nof that code.\n\nIts a major release, because it may affect other dependencies since modules\ncannot have overlapping second part dependencies (that is inifile cannot be from\ntwo different locations).\n\nIt also adds the parameter `puppetdb_service_status` to the class `puppetdb` to\nallow users to specify whether the module manages the puppetdb service for you.\n\nThe `database_password` parameter is now optional, and initial Arch Linux\nsupport has been added.\n\nDetailed Changes:\n\n* (GH-73) Switch to puppetlabs/inifile from cprice/inifile (Ken Barber)\n* Make database_password an optional parameter (Nick Lewis)\n* add archlinux support (Niels Abspoel)\n* Added puppetdb service control (Akos Hencz)\n\n------------------------------------------\n\n#### 1.6.0 - 2013/08/07\n\nThis minor feature release provides extra parameters for new configuration\nitems available in PuppetDB 1.4, and also provides some older parameters\nthat were missed previously:\n\n* gc_interval\n* log_slow_statements\n* conn_max_age\n* conn_keep_alive\n* conn_lifetime\n\nConsult the README.md file, or the PuppetDB documentation for more details.\n\n------------------------------------------\n\n#### 1.5.0 - 2013/07/18\n\nThis minor feature release provides the following new functionality:\n\n* The module is now capable of managing PuppetDB on SUSE systems\n  for which PuppetDB packages are available\n* The ruby code for validating the PuppetDB connection now\n  supports validating on a non-SSL HTTP port.\n\n------------------------------------------\n\n#### 1.4.0 - 2013/05/13\n\nThis feature release provides support for managing the puppetdb report\nprocessor on your master.\n\nTo enable the report processor, you can do something like this:\n\n    class { 'puppetdb::master::config':\n        manage_report_processor => true,\n        enable_reports => true\n    }\n\nThis will add the 'puppetdb' report processor to the list of `reports`\ninside your master's `puppet.conf` file.\n\n------------------------------------------\n\n#### 1.3.0 - 2013/05/13\n\nThis feature release provides us with a few new features for the PuppetDB\nmodule.\n\nYou can now disable SSL when using the `puppetdb` class by using the new\nparameter `disable_ssl`:\n\n    class { 'puppetdb':\n      disable_ssl => true,\n    }\n\nThis will remove the SSL settings from your `jetty.ini` configuration file\ndisabling any SSL communication. This is useful when you want to offload SSL\nto another web server, such as Apache or Nginx.\n\nWe have now added an option `java_args` for passing in Java options to\nPuppetDB. The format is a hash that is passed in when declaring the use of the\n`puppetdb` class:\n\n    class { 'puppetdb':\n      java_args => {\n        '-Xmx' => '512m',\n        '-Xms' => '256m',\n      }\n    }\n\nAlso, the default `report-ttl` was set to `14d` in PuppetDB to align it with an\nupcoming PE release, so we've also reflected that default here now.\n\nAnd finally we've fixed the issue whereby the options `report_ttl`, `node_ttl`,\n`node_purge_ttl` and `gc_interval` were not making the correct changes. On top\nof that you can now set these values to zero in the module, and the correct\ntime modifier (`s`, `m`, `h` etc.) will automatically get applied for you.\n\nBehind the scenes we've also added system and unit testing, which was\npreviously non-existent. This should help us reduce regression going forward.\n\nThanks to all the contributing developers in the list below that made this\nrelease possible :-).\n\n#### Changes\n\n* Allows for 0 _ttl's without time signifier and enables tests (Garrett Honeycutt)\n* Add option to disable SSL in Jetty, including tests and documentation (Christian Berg)\n* Cleaned up ghoneycutt's code a tad (Ken Barber)\n* the new settings report_ttl, node_ttl and node_purge_ttl were added but they are not working, this fixes it (fsalum)\n* Also fix gc_interval (Ken Barber)\n* Support for remote puppetdb (Filip Hrbek)\n* Added support for Java VM options (Karel Brezina)\n* Add initial rspec-system tests and scaffolding (Ken Barber)\n\n------------------------------------------\n\n#### 1.2.1 - 2013/04/08\n\nThis is a minor bugfix that solves the PuppetDB startup exception:\n\n    java.lang.AssertionError: Assert failed: (string? s)\n\nThis was due to the default `node-ttl` and `node-purge-ttl` settings not having a time suffix by default. These settings required 's', 'm', 'd' etc. to be suffixed, even if they are zero.\n\n#### Changes\n\n* (Ken Barber) Add 's' suffix to period settings to avoid exceptions in PuppetDB\n\n------------------------------------------\n\n#### 1.2.0 - 2013/04/05\n\nThis release is primarily about providing full configuration file support in the module for PuppetDB 1.2.0. (The alignment of version is a coincidence I assure you :-).\n\nThis feature release adds the following new configuration parameters to the main `puppetdb` class:\n\n* node_ttl\n* node_purge_ttl (available in >=1.2.0)\n* report_ttl\n\nConsult the README for futher details about these new configurable items.\n\n##### Changes\n\n* (Nick Lewis) Add params and ini settings for node/purge/report ttls and document them\n\n------------------------------------------\n\n1.1.5\n=====\n\n2013-02-13 - Karel Brezina\n * Fix database creation so database_username, database_password and\n   database_name are correctly passed during database creation.\n\n2013-01-29 - Lauren Rother\n * Change README to conform to new style and various other README improvements\n\n2013-01-17 - Chris Price\n * Improve documentation in init.pp\n\n------------------------------------------\n\n1.1.4\n=====\n\nThis is a bugfix release, mostly around fixing backward-compatibility for the\ndeprecated `manage_redhat_firewall` parameter.  It wasn't actually entirely\nbackwards-compatible in the 1.1.3 release.\n\n2013-01-17 - Chris Price <chris@puppetlabs.com>\n * Fix backward compatibility of `manage_redhat_firewall` parameter (de20b44)\n\n2013-01-16 - Chris Price <chris@puppetlabs.com>\n * Fix deprecation warnings around manage_redhat_firewall (448f8bc)\n\n------------------------------------------\n\n1.1.3\n=====\n\nThis is mostly a maintenance release, to update the module dependencies to newer\nversions in preparation for some new features.  This release does include some nice\nadditions around the ability to set the listen address for the HTTP port on Jetty\nand manage the firewall for that port.  Thanks very much to Drew Blessing for those\nsubmissions!\n\n2013-01-15 - Chris Price <chris@puppetlabs.com>\n * Update Modulefile for 1.1.3 release (updates dependencies\n   on postgres and inifile modules to the latest versions) (76bfd9e)\n\n2012-12-19 - Garrett Honeycutt <garrett@puppetlabs.com>\n * (#18228) updates README for style (fd2e990)\n\n2012-11-29 - Drew Blessing <Drew.Blessing@Buckle.com>\n * 17594 - Fixes suggested by cprice-puppet (0cf9632)\n\n2012-11-14 - Drew Blessing <Drew.Blessing@Buckle.com>\n * Adjust examples in tests to include new port params (0afc276)\n\n2012-11-13 - Drew Blessing <Drew.Blessing@Buckle.com>\n * 17594 - PuppetDB - Add ability to set standard host listen address and open firewall\n\n------------------------------------------\n\n1.1.2\n=====\n\n2012-10-26 - Chris Price <chris@puppetlabs.com> (1.1.2)\n * 1.1.2 release\n\n2012-10-26 - Chris Price <chris@puppetlabs.com>\n * Add some more missing `inherit`s for `puppetdb::params` (a72cc7c)\n\n2012-10-26 - Chris Price <chris@puppetlabs.com> (1.1.2)\n * 1.1.1 release\n\n2012-10-26 - Chris Price <chris@puppetlabs.com> (1.1.1)\n * Add missing `inherit` for `puppetdb::params` (ea9b379)\n\n2012-10-24 - Chris Price <chris@puppetlabs.com>\n * 1.1.0 release\n\n2012-10-24 - Chris Price <chris@puppetlabs.com> (1.1.0)\n * Update postgres dependency to puppetlabs/postgresql (bea79b4)\n\n2012-10-17 - Reid Vandewiele <reid@puppetlabs.com> (1.1.0)\n * Fix embedded db setup in Puppet Enterprise (bf0ab45)\n\n2012-10-17 - Chris Price <chris@puppetlabs.com> (1.1.0)\n * Update manifests/master/config.pp (b119a30)\n\n2012-10-16 - Chris Price <chris@puppetlabs.com> (1.1.0)\n * Make puppetdb startup timeout configurable (783b595)\n\n2012-10-01 - Hunter Haugen <h.haugen@gmail.com> (1.1.0)\n * Add condition to detect PE installations and provide different parameters (63f1c52)\n\n2012-10-01 - Hunter Haugen <h.haugen@gmail.com> (1.1.0)\n * Add example manifest code for pe puppet master (a598edc)\n\n2012-10-01 - Chris Price <chris@puppetlabs.com> (1.1.0)\n * Update comments and docs w/rt PE params (b5df5d9)\n\n2012-10-01 - Hunter Haugen <h.haugen@gmail.com> (1.1.0)\n * Adding pe_puppetdb tests class (850e039)\n\n2012-09-28 - Hunter Haugen <h.haugen@gmail.com> (1.1.0)\n * Add parameters to enable usage of enterprise versions of PuppetDB (df6f7cc)\n\n2012-09-23 - Chris Price <chris@puppetlabs.com>\n * 1.0.3 release\n\n2012-09-23 - Chris Price <chris@puppetlabs.com>\n * Add a parameter for restarting puppet master (179b337)\n\n2012-09-21 - Chris Price <chris@puppetlabs.com>\n * 1.0.2 release\n\n2012-09-21 - Chris Price <chris@puppetlabs.com>\n * Pass 'manage_redhat_firewall' param through to postgres (f21740b)\n\n2012-09-20 - Chris Price <chris@puppetlabs.com>\n * 1.0.1 release\n\n2012-09-20 - Garrett Honeycutt <garrett@puppetlabs.com>\n * complies with style guide (1aab5d9)\n\n2012-09-19 - Chris Price <chris@puppetlabs.com>\n * Fix invalid subname in database.ini (be683b7)\n\n2011-09-18 Chris Price <chris@puppetlabs.com> - 1.0.0\n* Initial 1.0.0 release\n",
+        "license": "\n                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n",
+        "reference": null,
+        "malware_scan": null,
+        "tasks": [
+
+        ],
+        "plans": [
+
+        ],
+        "created_at": "2021-12-16 14:57:46 -0800",
+        "updated_at": "2021-12-16 14:58:44 -0800",
+        "deleted_at": null,
+        "deleted_for": null
+      },
+      "releases": [
+        {
+          "uri": "/v3/releases/puppetlabs-puppetdb-7.10.0",
+          "slug": "puppetlabs-puppetdb-7.10.0",
+          "version": "7.10.0",
+          "supported": false,
+          "created_at": "2021-12-16 14:57:46 -0800",
+          "deleted_at": null,
+          "file_uri": "/v3/files/puppetlabs-puppetdb-7.10.0.tar.gz",
+          "file_size": 42806
+        },
+        {
+          "uri": "/v3/releases/puppetlabs-puppetdb-7.9.0",
+          "slug": "puppetlabs-puppetdb-7.9.0",
+          "version": "7.9.0",
+          "supported": false,
+          "created_at": "2021-06-24 07:48:54 -0700",
+          "deleted_at": null,
+          "file_uri": "/v3/files/puppetlabs-puppetdb-7.9.0.tar.gz",
+          "file_size": 42773
+        },
+        {
+          "uri": "/v3/releases/puppetlabs-puppetdb-1.0.0",
+          "slug": "puppetlabs-puppetdb-1.0.0",
+          "version": "1.0.0",
+          "supported": false,
+          "created_at": "2012-09-19 16:51:22 -0700",
+          "deleted_at": null,
+          "file_uri": "/v3/files/puppetlabs-puppetdb-1.0.0.tar.gz",
+          "file_size": 16336
+        }
+      ],
+      "feedback_score": 74,
+      "homepage_url": "http://github.com/puppetlabs/puppetlabs-puppetdb",
+      "issues_url": "https://tickets.puppetlabs.com/browse/PDB"
+    },
+    {
+      "uri": "/v3/modules/saz-memcached",
+      "slug": "saz-memcached",
+      "name": "memcached",
+      "downloads": 2647652,
+      "created_at": "2011-11-20 13:40:08 -0800",
+      "updated_at": "2022-07-11 03:34:55 -0700",
+      "deprecated_at": null,
+      "deprecated_for": null,
+      "superseded_by": null,
+      "supported": false,
+      "endorsement": null,
+      "module_group": "base",
+      "owner": {
+        "uri": "/v3/users/saz",
+        "slug": "saz",
+        "username": "saz",
+        "gravatar_id": "d24714d241768d79a194d73fc1bdf1ef"
+      },
+      "premium": false,
+      "current_release": {
+        "uri": "/v3/releases/saz-memcached-8.1.0",
+        "slug": "saz-memcached-8.1.0",
+        "module": {
+          "uri": "/v3/modules/saz-memcached",
+          "slug": "saz-memcached",
+          "name": "memcached",
+          "deprecated_at": null,
+          "owner": {
+            "uri": "/v3/users/saz",
+            "slug": "saz",
+            "username": "saz",
+            "gravatar_id": "d24714d241768d79a194d73fc1bdf1ef"
+          }
+        },
+        "version": "8.1.0",
+        "metadata": {
+          "name": "saz-memcached",
+          "version": "8.1.0",
+          "author": "saz",
+          "summary": "Manage memcached via Puppet",
+          "license": "Apache-2.0",
+          "source": "git://github.com/saz/puppet-memcached.git",
+          "project_page": "https://github.com/saz/puppet-memcached",
+          "issues_url": "https://github.com/saz/puppet-memcached/issues",
+          "description": "Manage memcached via Puppet",
+          "requirements": [
+            {
+              "name": "puppet",
+              "version_requirement": ">= 6.1.0 < 8.0.0"
+            }
+          ],
+          "dependencies": [
+            {
+              "name": "puppetlabs/stdlib",
+              "version_requirement": ">= 4.13.1 < 9.0.0"
+            },
+            {
+              "name": "puppetlabs/firewall",
+              "version_requirement": ">= 0.1.0 < 4.0.0"
+            },
+            {
+              "name": "puppet/systemd",
+              "version_requirement": ">= 2.10.0 < 4.0.0"
+            },
+            {
+              "name": "puppet/selinux",
+              "version_requirement": ">= 3.2.0 < 4.0.0"
+            }
+          ],
+          "operatingsystem_support": [
+            {
+              "operatingsystem": "RedHat",
+              "operatingsystemrelease": [
+                "7",
+                "8",
+                "9"
+              ]
+            },
+            {
+              "operatingsystem": "CentOS",
+              "operatingsystemrelease": [
+                "7",
+                "8",
+                "9"
+              ]
+            },
+            {
+              "operatingsystem": "OracleLinux",
+              "operatingsystemrelease": [
+                "7"
+              ]
+            },
+            {
+              "operatingsystem": "Scientific",
+              "operatingsystemrelease": [
+                "7"
+              ]
+            },
+            {
+              "operatingsystem": "Debian",
+              "operatingsystemrelease": [
+                "9",
+                "10",
+                "11"
+              ]
+            },
+            {
+              "operatingsystem": "Ubuntu",
+              "operatingsystemrelease": [
+                "18.04",
+                "20.04",
+                "22.04"
+              ]
+            },
+            {
+              "operatingsystem": "Windows"
+            },
+            {
+              "operatingsystem": "FreeBSD"
+            }
+          ]
+        },
+        "tags": [
+          "debian",
+          "ubuntu",
+          "redhat",
+          "amazon",
+          "fedora",
+          "memcached",
+          "linux",
+          "centos"
+        ],
+        "supported": false,
+        "pdk": false,
+        "validation_score": 100,
+        "file_uri": "/v3/files/saz-memcached-8.1.0.tar.gz",
+        "file_size": 10996,
+        "file_md5": "aadf80fba5848909429eb002ee1927ea",
+        "file_sha256": "883d6186e91c2c3fed13ae2009c3aa596657f6707b76f1f7efc6203c6e4ae986",
+        "downloads": 841,
+        "readme": "# memached module for Puppet\n\n[![Build Status](https://github.com/saz/puppet-memcached/workflows/CI/badge.svg)](https://github.com/saz/puppet-memcached/actions?query=workflow%3ACI)\n\nManage memcached via Puppet\n\n## Show some love\nIf you find this module useful, send some bitcoins to 1Na3YFUmdxKxJLiuRXQYJU2kiNqA3KY2j9\n\n### Supported Puppet versions\n* Puppet >= 5\n* Last version supporting Puppet 3: v3.0.2\n\n## How to use\n\n```\nStarting with version 3.0.0, memcached will be listening on 127.0.0.1 only.\nThis should make setups more secure (e.g. if there are no firewall rules in place).\n\nTo change this behavior, you need to set listen_ip to '0.0.0.0'.\n```\n\n### Use roughly 90% of memory\n\n```ruby\n    class { 'memcached': }\n```\n\n### Set a fixed memory limit in MB\n\n```ruby\n    class { 'memcached':\n      max_memory => 2048\n    }\n```\n\n### Use 12% of available memory\n\n```ruby\n    class { 'memcached':\n      max_memory => '12%'\n    }\n```\n\n### Install multiple memcached instances\n\nthe multiinstance support uses a systemd instance unit file. This will be placed\nat `/etc/systemd/system/memcached@.service`. It allows us to manage multiple\ninstances via the same unit file. To start a simple instance, you only need to\nknow the desired TCP port:\n\n```puppet\nmemcached::instance{'11222':}\n```\n\nthat's it! It will bind to localhost and listen to TCP port 11222. You might\nwant to tune the systemd limits, for example the number of file descriptors\n(LimitNOFILE) or the number of processes (LimitNPROC):\n\n```puppet\nmemcached::instance{'11222':\n  limits => {\n    'LimitNOFILE' => 8192,\n    'LimitNPROC'  => 16384,\n  }\n}\n```\n\nAll systemd limits are documented in the [systemd documentation](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Process%20Properties).\n\nAnother usecase. Let's assume your name is Eric and you want to change the\nactual memcached parameters, for example to bind it to every interface:\n\n```puppet\nmemcached::instance{'11222':\n  override_content => \"[Service]\\nEnvironment='LISTEN=-l 0.0.0.0'\",\n}\n```\n\nMaybe Eric also wants to override the cache size (the unit is MB):\n\n```puppet\nmemcached::instance{'11222':\n  override_content => \"[Service]\\nEnvironment=CACHESIZE=4096\\n\",\n}\n```\n\nlast but not least, Eric might also want to override the maximum amount\nof connections (the default is 1024):\n\n```puppet\nmemcached::instance{'11222':\n  override_content => \"[Service]\\nEnvironment=MAXCONN=4096\\n\",\n}\n```\n\nNow Eric wants to use all those three settings at the same time:\n\n```puppet\nmemcached::instance{'11222':\n  override_content => \"[Service]\\nEnvironment=MAXCONN=4096\\nEnvironment=CACHESIZE=4096\\nEnvironment='LISTEN=-l 0.0.0.0'\\n\",\n}\n```\n\nInstead of passing a long string with multiple `\\n`, Eric can also put the\ncontent in a file and provide that:\n\n```puppet\nmemcached::instance{'11222':\n  override_source => \"${module_name}/memcached_11222_override.conf\\n\",\n}\n```\n\n### Other class parameters\n\n* $package_ensure = 'present'\n* $logfile = '/var/log/memcached.log'\n* $logstdout = false (Set this to true to disable logging to a file/syslog entirely, useful when memcached runs in containers)\n* $pidfile = '/var/run/memcached.pid' (Debian family only, set to false to disable pidfile)\n* $max_memory = false\n* $max_item_size = false\n* $min_item_size = false\n* $factor = false\n* $lock_memory = false (WARNING: good if used intelligently, google for -k key)\n* $listen = '127.0.0.1' (if TLS/SSL is enabled, 'notls' prefix can be used to disable for specific listeners \"notls:<ip>:<port>\")\n* $listen_ip = '127.0.0.1' (deprecated, listen will take precedence over this)\n* $tcp_port = 11211\n* $udp_port = 0\n* $manage_firewall = false\n* $user = '' (OS specific setting, see params.pp)\n* $max_connections = 8192\n* $verbosity = undef\n* $unix_socket = undef\n* $install_dev = false (TRUE if 'libmemcached-dev' package should be installed)\n* $processorcount = $::processorcount\n* $service_restart = true (restart service after configuration changes, false to prevent restarts)\n* $use_sasl = false (start memcached with SASL support)\n* $use_tls = false (start memcached with TLS support)\n* $tls_cert_chain = undef\n* $tls_key = undef\n* $tls_ca_cert = undef\n* $tls_verify_mode = 1 (0: None, 1: Request, 2: Require, 3: Once)\n* $large_mem_pages = false (try to use large memory pages)\n",
+        "changelog": "# Changelog\nAll notable changes to this project will be documented in this file.\n\nThe format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),\nand this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\n\n## [8.1.0]\n### Added\n- Support for RedHat 9 and CentOS 9\n\n## [8.0.0]\n### Changed\n- BREAKING CHANGE: switch from camptocamp/systemd to puppet/systemd\n- Improved tests\n- Load modern facts first (#138)\n- Make sure memcached logfile exists (#140)\n- Allow stdlib < 9.0.0\n### Fixed\n- Fix duplicate systemd daemon-reload execs (#137)\n### Added\n- Added support for Debian 11 and Ubuntu 22.04\n\n## [7.0.0]\n### Changed\n- BREAKING CHANGE: Testing for Puppet 5 has been dropped\n- Switched from Travis to Github Actions\n- Dependencies updated to support the newest releases\n\n## [6.0.0]\n### Added\n- Add listen parameter as successor for listen_ip (#127)\n### Deprecated\n- listen_ip parameter is deprecated in favor of new listen parameter (#127)\n### Changed\n- Use camptocamp/systemd v2.12.0 for tests, as newer versions might drop support for puppet 5\n### Removed\n- Dropped notls_listener_addr and notls_listener_port parameter in favor of listen_ip (#128)\n\n## [5.0.0]\n### Added\n- Add sasl support on RHEL derivatives (#122)\n- Add notls_listener_addr and notls_listener_port parameters (#124)\n### Changed\n- BREAKING CHANGE: Disable UDP by default (#125)\n  If you need UDP enabled, set `memcached::udp_port` to a non-zero value, e.g. 11211\n\n## [4.0.0]\n### Added\n- Support management of multiple memcached instances (systemd required!) #120\n- Add FreeBSD to list of supported operatingsystems\n### Removed\n- Drop support for Puppet 4 (EOL) #116\n\n## [3.7.0]\n### Added\n- Add support to set TLS parameters in /etc/sysconfig/memcached (#113)\n### Fixed\n- Make ssl_ca_cert optional (#112)\n\n## [3.6.0]\n### Added\n- Add TLS support (#109)\n\n## [3.5.0]\n### Fixed\n- allow FreeBSD to set max memory (#104)\n### Changed\n- Dependencies updated (#107)\n- Better FreeBSD tests\n\n## [3.4.0]\n### Fixed\n- factor should be a string or number, not boolean\n### Added\n- Add Puppet 6 to travis checks\n### Changed\n- Update Puppet version requirement to include version 6 (< 7.0.0)\n- Unpin firewall module in fixtures\n- Require puppetlabs_spec_helper >= 2.11.0\n### Removed\n- Drop Ruby 2.1 from travis checks\n",
+        "license": "   Copyright 2011-2016 Steffen Zieger\n   Copyright 2014-2016 Garrett Honeycutt <code@garretthoneycutt.com>\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n",
+        "reference": null,
+        "malware_scan": null,
+        "tasks": [
+
+        ],
+        "plans": [
+
+        ],
+        "created_at": "2022-07-11 03:34:55 -0700",
+        "updated_at": "2022-07-11 03:35:51 -0700",
+        "deleted_at": null,
+        "deleted_for": null
+      },
+      "releases": [
+        {
+          "uri": "/v3/releases/saz-memcached-8.1.0",
+          "slug": "saz-memcached-8.1.0",
+          "version": "8.1.0",
+          "supported": false,
+          "created_at": "2022-07-11 03:34:55 -0700",
+          "deleted_at": null,
+          "file_uri": "/v3/files/saz-memcached-8.1.0.tar.gz",
+          "file_size": 10996
+        },
+        {
+          "uri": "/v3/releases/saz-memcached-1.0.0",
+          "slug": "saz-memcached-1.0.0",
+          "version": "1.0.0",
+          "supported": false,
+          "created_at": "2011-11-20 13:40:30 -0800",
+          "deleted_at": null,
+          "file_uri": "/v3/files/saz-memcached-1.0.0.tar.gz",
+          "file_size": 2472
+        }
+      ],
+      "feedback_score": 100,
+      "homepage_url": "https://github.com/saz/puppet-memcached",
+      "issues_url": "https://github.com/saz/puppet-memcached/issues"
+    }
+  ]
+}
diff --git a/swh/lister/puppet/tests/data/https_forgeapi.puppet.com/v3_modules,limit=100,offset=100 b/swh/lister/puppet/tests/data/https_forgeapi.puppet.com/v3_modules,limit=100,offset=100
new file mode 100644
index 0000000..e5b6a14
--- /dev/null
+++ b/swh/lister/puppet/tests/data/https_forgeapi.puppet.com/v3_modules,limit=100,offset=100
@@ -0,0 +1,190 @@
+{
+  "pagination": {
+    "limit": 100,
+    "offset": 100,
+    "first": "/v3/modules?limit=100&offset=0",
+    "previous": "/v3/modules?limit=100&offset=0",
+    "current": "/v3/modules?limit=100&offset=100",
+    "next": null,
+    "total": 7301
+  },
+  "results": [
+    {
+      "uri": "/v3/modules/electrical-file_concat",
+      "slug": "electrical-file_concat",
+      "name": "file_concat",
+      "downloads": 2293802,
+      "created_at": "2015-04-09 12:03:13 -0700",
+      "updated_at": "2016-12-28 20:00:02 -0800",
+      "deprecated_at": null,
+      "deprecated_for": null,
+      "superseded_by": null,
+      "supported": false,
+      "endorsement": null,
+      "module_group": "base",
+      "owner": {
+        "uri": "/v3/users/electrical",
+        "slug": "electrical",
+        "username": "electrical",
+        "gravatar_id": "46dbd1ee4484b8e993466bd2209858cf"
+      },
+      "premium": false,
+      "current_release": {
+        "uri": "/v3/releases/electrical-file_concat-1.0.1",
+        "slug": "electrical-file_concat-1.0.1",
+        "module": {
+          "uri": "/v3/modules/electrical-file_concat",
+          "slug": "electrical-file_concat",
+          "name": "file_concat",
+          "deprecated_at": null,
+          "owner": {
+            "uri": "/v3/users/electrical",
+            "slug": "electrical",
+            "username": "electrical",
+            "gravatar_id": "46dbd1ee4484b8e993466bd2209858cf"
+          }
+        },
+        "version": "1.0.1",
+        "metadata": {
+          "name": "electrical-file_concat",
+          "version": "1.0.1",
+          "author": "electrical",
+          "summary": "Library for concatenating different files into 1",
+          "license": "Apache License, Version 2.0",
+          "source": "https://github.com/electrical/puppet-lib-file_concat",
+          "project_page": "https://github.com/electrical/puppet-lib-file_concat",
+          "issues_url": "https://github.com/electrical/puppet-lib-file_concat/issues",
+          "operatingsystem_support": [
+            {
+              "operatingsystem": "RedHat",
+              "operatingsystemrelease": [
+                "5",
+                "6",
+                "7"
+              ]
+            },
+            {
+              "operatingsystem": "CentOS",
+              "operatingsystemrelease": [
+                "5",
+                "6",
+                "7"
+              ]
+            },
+            {
+              "operatingsystem": "OracleLinux",
+              "operatingsystemrelease": [
+                "5",
+                "6",
+                "7"
+              ]
+            },
+            {
+              "operatingsystem": "Scientific",
+              "operatingsystemrelease": [
+                "5",
+                "6",
+                "7"
+              ]
+            },
+            {
+              "operatingsystem": "SLES",
+              "operatingsystemrelease": [
+                "10 SP4",
+                "11 SP1",
+                "12"
+              ]
+            },
+            {
+              "operatingsystem": "Debian",
+              "operatingsystemrelease": [
+                "6",
+                "7"
+              ]
+            },
+            {
+              "operatingsystem": "Ubuntu",
+              "operatingsystemrelease": [
+                "10.04",
+                "12.04",
+                "14.04"
+              ]
+            },
+            {
+              "operatingsystem": "Solaris",
+              "operatingsystemrelease": [
+                "10",
+                "11"
+              ]
+            }
+          ],
+          "requirements": [
+            {
+              "name": "pe",
+              "version_requirement": "3.x"
+            },
+            {
+              "name": "puppet",
+              "version_requirement": "3.x"
+            }
+          ],
+          "description": "Library for concatenating different files into 1",
+          "dependencies": [
+
+          ]
+        },
+        "tags": [
+
+        ],
+        "supported": false,
+        "pdk": false,
+        "validation_score": 62,
+        "file_uri": "/v3/files/electrical-file_concat-1.0.1.tar.gz",
+        "file_size": 13335,
+        "file_md5": "74901a89544134478c2dfde5efbb7f14",
+        "file_sha256": "15e973613ea038d8a4f60bafe2d678f88f53f3624c02df3157c0043f4a400de6",
+        "downloads": 2291838,
+        "readme": "# puppet-lib-file_concat\n\n#### Table of Contents\n\n1. [Overview](#overview)\n2. [Usage - Configuration options and additional functionality](#usage)\n3. [Limitations - OS compatibility, etc.](#limitations)\n4. [Development - Guide for contributing to the module](#development)\n\n## Overview\n\nLibrary for concatenating multiple files into 1.\n\n## Usage\n\n### Creating a file fragment\n\nCreates a file fragment to be collected by file_concat based on the tag.\n\nExample with exported resource:\n\n    @@file_fragment { \"uniqe_name_${::fqdn}\":\n      tag     => 'unique_tag',            # Mandatory.\n      order   => 10,                      # Optional. Defaults to 10.\n      content => 'some content'           # OR\n      content => template('template.erb') # OR\n      source  => 'puppet:///path/to/file'\n    }\n\n### Concatenating file fragments into one file\n\nGets all the file fragments and puts these into the target file.\nThis will mostly be used with exported resources.\n\nexample:\n    \n    File_fragment <<| tag == 'unique_tag' |>>\n\n    file_concat { '/tmp/file':\n      tag     => 'unique_tag', # Mandatory\n      path    => '/tmp/file',  # Optional. If given it overrides the resource name.\n      owner   => 'root',       # Optional. Defaults to undef.\n      group   => 'root',       # Optional. Defaults to undef.\n      mode    => '0644'        # Optional. Defaults to undef.\n      order   => 'numeric'     # Optional. Set to 'numeric' or 'alpha'. Defaults to numeric.\n      replace => true          # Optional. Boolean Value. Defaults to true.\n      backup  => false         # Optional. true, false, 'puppet', or a string. Defaults to 'puppet' for Filebucketing.\n    }\n\n## Limitations\n\n## Development\n\n",
+        "changelog": "##1.0.1 ( Apr 17, 2015 )\n\n###Summary\nBugfix release\n\n####Features\n\n####Bugfixes\n* Fix windows support by not defaulting owner,group and mode values\n\n####Changes\n\n####Testing changes\n\n####Known bugs\n\n\n##1.0.0 ( Apr 09, 2015 )\n\n###Summary\nMajor release.\nThe module has been moved from the ispavailability account on Forge to elecrical.\n\n####Features\n* Major refactoring to enhance functionality\n* Re-use existing file resource to avoid code duplication\n* Make the module more compatiable with puppetlabs-concat\n* Support array of sources\n\n####Bugfixes\n\n####Changes\n\n####Testing changes\n* Add centos 7 acceptance testing\n* Add tests for user/group/mode options\n\n####Known bugs\n\n##0.3.0 ( Mar 26, 2015 )\n\n###Summary\nThis release adds windows support to the library.\n\n####Features\n* Added windows support to the library.\n\n####Bugfixes\n\n####Changes\n\n####Testing changes\n\n####Known bugs\n\n##0.2.1 ( Mar 25, 2015 )\n\n###Summary\nBugfix release\n\n####Features\n\n####Bugfixes\n* Fix a bug caused by some refactoring\n\n####Changes\n\n####Testing changes\n\n####Known bugs\n* Windows is not supported yet\n\n##0.2.0 ( Mar 25, 2015 )\n\n###Summary\nWith this release Ive done several code cleanups and added some basic tests.\nAlso support for puppet-server has been fixed\n\n####Features\n\n####Bugfixes\n* Remove unnecessary require which fixed support for puppet-server\n\n####Changes\n* Added some basic files\n* Implemented rubocop for style checking\n\n####Testing changes\n* Implemented basic acceptance tests\n\n####Known bugs\n* Windows is not supported yet\n\n##0.1.0 ( Jan 21, 2014 )\n  Rewrite of the fragment ordering part.\n    Fragments are now first ordered based on the order number and then on the resource name.\n  Convert `order` parameter to string to support integer values when using Hiera/YAML ( PR#3 by Michael G. Noll )\n\n##0.0.2 ( Mar 03, 2013 )\n  Adding source variable option to file_fragment type\n\n##0.0.1 ( Jan 13, 2013 )\n  Initial release of the module\n",
+        "license": "Copyright (c) 2013-2015 Richard Pijnenbug <richard@ispavailability.com>\nCopyright (c) 2012 Simon Effenberg <savar@schuldeigen.de>\nCopyright (c) 2012 Uwe Stuehler <uwe@bsdx.de>\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n",
+        "reference": null,
+        "malware_scan": null,
+        "tasks": [
+
+        ],
+        "plans": [
+
+        ],
+        "created_at": "2015-04-17 01:03:46 -0700",
+        "updated_at": "2019-07-03 15:45:50 -0700",
+        "deleted_at": null,
+        "deleted_for": null
+      },
+      "releases": [
+        {
+          "uri": "/v3/releases/electrical-file_concat-1.0.1",
+          "slug": "electrical-file_concat-1.0.1",
+          "version": "1.0.1",
+          "supported": false,
+          "created_at": "2015-04-17 01:03:46 -0700",
+          "deleted_at": null,
+          "file_uri": "/v3/files/electrical-file_concat-1.0.1.tar.gz",
+          "file_size": 13335
+        },
+        {
+          "uri": "/v3/releases/electrical-file_concat-1.0.0",
+          "slug": "electrical-file_concat-1.0.0",
+          "version": "1.0.0",
+          "supported": false,
+          "created_at": "2015-04-09 12:03:13 -0700",
+          "deleted_at": null,
+          "file_uri": "/v3/files/electrical-file_concat-1.0.0.tar.gz",
+          "file_size": 13289
+        }
+      ],
+      "feedback_score": null,
+      "homepage_url": "https://github.com/electrical/puppet-lib-file_concat",
+      "issues_url": "https://github.com/electrical/puppet-lib-file_concat/issues"
+    }
+  ]
+}
diff --git a/swh/lister/puppet/tests/test_lister.py b/swh/lister/puppet/tests/test_lister.py
new file mode 100644
index 0000000..5dbfd89
--- /dev/null
+++ b/swh/lister/puppet/tests/test_lister.py
@@ -0,0 +1,106 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.lister.puppet.lister import PuppetLister
+
+# flake8: noqa: B950
+
+expected_origins = {
+    "https://forge.puppet.com/modules/electrical/file_concat": {
+        "artifacts": {
+            "1.0.0": {
+                "url": "https://forgeapi.puppet.com/v3/files/electrical-file_concat-1.0.0.tar.gz",
+                "version": "1.0.0",
+                "filename": "electrical-file_concat-1.0.0.tar.gz",
+                "last_update": "2015-04-09T12:03:13-07:00",
+                "checksums": {
+                    "length": 13289,
+                },
+            },
+            "1.0.1": {
+                "url": "https://forgeapi.puppet.com/v3/files/electrical-file_concat-1.0.1.tar.gz",
+                "version": "1.0.1",
+                "filename": "electrical-file_concat-1.0.1.tar.gz",
+                "last_update": "2015-04-17T01:03:46-07:00",
+                "checksums": {
+                    "md5": "74901a89544134478c2dfde5efbb7f14",
+                    "sha256": "15e973613ea038d8a4f60bafe2d678f88f53f3624c02df3157c0043f4a400de6",
+                },
+            },
+        }
+    },
+    "https://forge.puppet.com/modules/puppetlabs/puppetdb": {
+        "artifacts": {
+            "1.0.0": {
+                "url": "https://forgeapi.puppet.com/v3/files/puppetlabs-puppetdb-1.0.0.tar.gz",
+                "version": "1.0.0",
+                "filename": "puppetlabs-puppetdb-1.0.0.tar.gz",
+                "last_update": "2012-09-19T16:51:22-07:00",
+                "checksums": {
+                    "length": 16336,
+                },
+            },
+            "7.9.0": {
+                "url": "https://forgeapi.puppet.com/v3/files/puppetlabs-puppetdb-7.9.0.tar.gz",
+                "version": "7.9.0",
+                "filename": "puppetlabs-puppetdb-7.9.0.tar.gz",
+                "last_update": "2021-06-24T07:48:54-07:00",
+                "checksums": {
+                    "length": 42773,
+                },
+            },
+            "7.10.0": {
+                "url": "https://forgeapi.puppet.com/v3/files/puppetlabs-puppetdb-7.10.0.tar.gz",
+                "version": "7.10.0",
+                "filename": "puppetlabs-puppetdb-7.10.0.tar.gz",
+                "last_update": "2021-12-16T14:57:46-08:00",
+                "checksums": {
+                    "md5": "e91a2074ca8d94a8b3ff7f6c8bbf12bc",
+                    "sha256": "49b1a542fbd2a1378c16cb04809e0f88bf4f3e45979532294fb1f03f56c97fbb",
+                },
+            },
+        }
+    },
+    "https://forge.puppet.com/modules/saz/memcached": {
+        "artifacts": {
+            "1.0.0": {
+                "url": "https://forgeapi.puppet.com/v3/files/saz-memcached-1.0.0.tar.gz",
+                "version": "1.0.0",
+                "filename": "saz-memcached-1.0.0.tar.gz",
+                "last_update": "2011-11-20T13:40:30-08:00",
+                "checksums": {
+                    "length": 2472,
+                },
+            },
+            "8.1.0": {
+                "url": "https://forgeapi.puppet.com/v3/files/saz-memcached-8.1.0.tar.gz",
+                "version": "8.1.0",
+                "filename": "saz-memcached-8.1.0.tar.gz",
+                "last_update": "2022-07-11T03:34:55-07:00",
+                "checksums": {
+                    "md5": "aadf80fba5848909429eb002ee1927ea",
+                    "sha256": "883d6186e91c2c3fed13ae2009c3aa596657f6707b76f1f7efc6203c6e4ae986",
+                },
+            },
+        }
+    },
+}
+
+
+def test_puppet_lister(datadir, requests_mock_datadir, swh_scheduler):
+    lister = PuppetLister(scheduler=swh_scheduler)
+    res = lister.run()
+
+    assert res.pages == 2
+    assert res.origins == 1 + 1 + 1
+
+    scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
+    assert len(scheduler_origins) == len(expected_origins)
+
+    for origin in scheduler_origins:
+        assert origin.visit_type == "puppet"
+        assert origin.url in expected_origins
+        assert origin.extra_loader_arguments == expected_origins[origin.url]
diff --git a/swh/lister/puppet/tests/test_tasks.py b/swh/lister/puppet/tests/test_tasks.py
new file mode 100644
index 0000000..c83cdbd
--- /dev/null
+++ b/swh/lister/puppet/tests/test_tasks.py
@@ -0,0 +1,31 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.lister.pattern import ListerStats
+
+
+def test_puppet_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
+    res = swh_scheduler_celery_app.send_task("swh.lister.puppet.tasks.ping")
+    assert res
+    res.wait()
+    assert res.successful()
+    assert res.result == "OK"
+
+
+def test_puppet_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker):
+    # setup the mocked PuppetLister
+    lister = mocker.patch("swh.lister.puppet.tasks.PuppetLister")
+    lister.from_configfile.return_value = lister
+    stats = ListerStats(pages=42, origins=42)
+    lister.run.return_value = stats
+
+    res = swh_scheduler_celery_app.send_task("swh.lister.puppet.tasks.PuppetListerTask")
+    assert res
+    res.wait()
+    assert res.successful()
+    assert res.result == stats.dict()
+
+    lister.from_configfile.assert_called_once_with()
+    lister.run.assert_called_once_with()
diff --git a/swh/lister/pypi/lister.py b/swh/lister/pypi/lister.py
index eefd797..443c21d 100644
--- a/swh/lister/pypi/lister.py
+++ b/swh/lister/pypi/lister.py
@@ -1,177 +1,177 @@
 # Copyright (C) 2018-2021 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from collections import defaultdict
 from dataclasses import asdict, dataclass
 from datetime import datetime, timezone
 import logging
 from time import sleep
 from typing import Any, Dict, Iterator, List, Optional, Tuple
 from xmlrpc.client import Fault, ServerProxy
 
 from tenacity.before_sleep import before_sleep_log
 
-from swh.lister.utils import throttling_retry
+from swh.lister.utils import http_retry
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
 from ..pattern import CredentialsType, Lister
 
 logger = logging.getLogger(__name__)
 
 # Type returned by the XML-RPC changelog call:
 # package, version, release timestamp, description, serial
 ChangelogEntry = Tuple[str, str, int, str, int]
 # Manipulated package updated type which is a subset information
 # of the ChangelogEntry type: package, max release date
 PackageUpdate = Tuple[str, datetime]
 # Type returned by listing a page of results
 PackageListPage = List[PackageUpdate]
 
 
 @dataclass
 class PyPIListerState:
     """State of PyPI lister"""
 
     last_serial: Optional[int] = None
     """Last seen serial when visiting the pypi instance"""
 
 
 def _if_rate_limited(retry_state) -> bool:
     """Custom tenacity retry predicate to handle xmlrpc client error:
 
     .. code::
 
         xmlrpc.client.Fault: <Fault -32500: 'HTTPTooManyRequests: The action could not
         be performed because there were too many requests by the client. Limit may reset
         in 1 seconds.'>
 
     """
     attempt = retry_state.outcome
     return attempt.failed and isinstance(attempt.exception(), Fault)
 
 
 def pypi_url(package_name: str) -> str:
     """Build pypi url out of a package name."""
     return PyPILister.PACKAGE_URL.format(package_name=package_name)
 
 
 class PyPILister(Lister[PyPIListerState, PackageListPage]):
     """List origins from PyPI."""
 
     LISTER_NAME = "pypi"
     INSTANCE = "pypi"  # As of today only the main pypi.org is used
     PACKAGE_LIST_URL = "https://pypi.org/pypi"  # XML-RPC url
     PACKAGE_URL = "https://pypi.org/project/{package_name}/"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         credentials: Optional[CredentialsType] = None,
     ):
         super().__init__(
             scheduler=scheduler,
             url=self.PACKAGE_LIST_URL,
             instance=self.INSTANCE,
             credentials=credentials,
         )
 
         # used as termination condition and if useful, becomes the new state when the
         # visit is done
         self.last_processed_serial: Optional[int] = None
 
     def state_from_dict(self, d: Dict[str, Any]) -> PyPIListerState:
         return PyPIListerState(last_serial=d.get("last_serial"))
 
     def state_to_dict(self, state: PyPIListerState) -> Dict[str, Any]:
         return asdict(state)
 
-    @throttling_retry(
+    @http_retry(
         retry=_if_rate_limited, before_sleep=before_sleep_log(logger, logging.WARNING)
     )
     def _changelog_last_serial(self, client: ServerProxy) -> int:
         """Internal detail to allow throttling when calling the changelog last entry"""
         serial = client.changelog_last_serial()
         assert isinstance(serial, int)
         return serial
 
-    @throttling_retry(
+    @http_retry(
         retry=_if_rate_limited, before_sleep=before_sleep_log(logger, logging.WARNING)
     )
     def _changelog_since_serial(
         self, client: ServerProxy, serial: int
     ) -> List[ChangelogEntry]:
         """Internal detail to allow throttling when calling the changelog listing"""
         sleep(1)  # to avoid the initial warning about throttling
         return client.changelog_since_serial(serial)  # type: ignore
 
     def get_pages(self) -> Iterator[PackageListPage]:
         """Iterate other changelog events per package, determine the max release date for that
            package and use that max release date as last_update. When the execution is
            done, this will also set the self.last_processed_serial attribute so we can
            finalize the state of the lister for the next visit.
 
         Yields:
             List of Tuple of (package-name, max release-date)
 
         """
         client = ServerProxy(self.url)
 
         last_processed_serial = -1
         if self.state.last_serial is not None:
             last_processed_serial = self.state.last_serial
         upstream_last_serial = self._changelog_last_serial(client)
 
         # Paginate through result of pypi, until we read everything
         while last_processed_serial < upstream_last_serial:
             updated_packages = defaultdict(list)
 
             for package, _, release_date, _, serial in self._changelog_since_serial(
                 client, last_processed_serial
             ):
                 updated_packages[package].append(release_date)
                 # Compute the max serial so we can stop when done
                 last_processed_serial = max(last_processed_serial, serial)
 
             # Returns pages of result to flush regularly
             yield [
                 (
                     pypi_url(package),
                     datetime.fromtimestamp(max(release_dates)).replace(
                         tzinfo=timezone.utc
                     ),
                 )
                 for package, release_dates in updated_packages.items()
             ]
 
         self.last_processed_serial = upstream_last_serial
 
     def get_origins_from_page(
         self, packages: PackageListPage
     ) -> Iterator[ListedOrigin]:
         """Convert a page of PyPI repositories into a list of ListedOrigins."""
         assert self.lister_obj.id is not None
 
         for origin, last_update in packages:
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 url=origin,
                 visit_type="pypi",
                 last_update=last_update,
             )
 
     def finalize(self):
         """Finalize the visit state by updating with the new last_serial if updates
         actually happened.
 
         """
         self.updated = (
             self.state
             and self.state.last_serial
             and self.last_processed_serial
             and self.state.last_serial < self.last_processed_serial
         ) or (not self.state.last_serial and self.last_processed_serial)
         if self.updated:
             self.state.last_serial = self.last_processed_serial
diff --git a/swh/lister/rubygems/__init__.py b/swh/lister/rubygems/__init__.py
new file mode 100644
index 0000000..3435e18
--- /dev/null
+++ b/swh/lister/rubygems/__init__.py
@@ -0,0 +1,66 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+"""
+RubyGems lister
+===============
+
+The RubyGems lister list origins from `RubyGems.org`_, the Ruby community’s gem hosting service.
+
+As of September 2022 `RubyGems.org`_ list 173384 package names.
+
+Origins retrieving strategy
+---------------------------
+
+To get a list of all package names we call an `http endpoint`_ which returns a list of gems
+as text.
+
+Page listing
+------------
+
+Each page returns an origin url based on the following pattern::
+
+    https://rubygems.org/gems/{pkgname}
+
+Origins from page
+-----------------
+
+The lister yields one origin url per page.
+
+Running tests
+-------------
+
+Activate the virtualenv and run from within swh-lister directory::
+
+   pytest -s -vv --log-cli-level=DEBUG swh/lister/rubygems/tests
+
+Testing with Docker
+-------------------
+
+Change directory to swh/docker then launch the docker environment::
+
+   docker compose up -d
+
+Then schedule a RubyGems listing task::
+
+   docker compose exec swh-scheduler swh scheduler task add -p oneshot list-rubygems
+
+You can follow lister execution by displaying logs of swh-lister service::
+
+   docker compose logs -f swh-lister
+
+.. _RubyGems.org: https://rubygems.org/
+.. _http endpoint: https://rubygems.org/versions
+"""
+
+
+def register():
+    from .lister import RubyGemsLister
+
+    return {
+        "lister": RubyGemsLister,
+        "task_modules": ["%s.tasks" % __name__],
+    }
diff --git a/swh/lister/rubygems/lister.py b/swh/lister/rubygems/lister.py
new file mode 100644
index 0000000..6898b8b
--- /dev/null
+++ b/swh/lister/rubygems/lister.py
@@ -0,0 +1,214 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import base64
+from datetime import timezone
+import gzip
+import logging
+import os
+import shutil
+import subprocess
+import tarfile
+import tempfile
+from typing import Any, Dict, Iterator, Optional, Tuple
+
+from bs4 import BeautifulSoup
+import psycopg2
+from testing.postgresql import Postgresql
+
+from swh.scheduler.interface import SchedulerInterface
+from swh.scheduler.model import ListedOrigin
+
+from ..pattern import CredentialsType, StatelessLister
+
+logger = logging.getLogger(__name__)
+
+RubyGemsListerPage = Dict[str, Any]
+
+
+class RubyGemsLister(StatelessLister[RubyGemsListerPage]):
+    """Lister for RubyGems.org, the Ruby community's gem hosting service.
+
+    Instead of querying rubygems.org Web API, it uses gems data from the
+    daily PostreSQL database dump of rubygems. It enables to gather all
+    interesting info about a gem and its release artifacts (version number,
+    download URL, checksums, release date) in an efficient way and without
+    flooding rubygems Web API with numerous HTTP requests (as there is more
+    than 187000 gems available on 07/10/2022).
+    """
+
+    LISTER_NAME = "rubygems"
+    VISIT_TYPE = "rubygems"
+    INSTANCE = "rubygems"
+
+    RUBY_GEMS_POSTGRES_DUMP_BASE_URL = (
+        "https://s3-us-west-2.amazonaws.com/rubygems-dumps"
+    )
+    RUBY_GEMS_POSTGRES_DUMP_LIST_URL = (
+        f"{RUBY_GEMS_POSTGRES_DUMP_BASE_URL}?prefix=production/public_postgresql"
+    )
+
+    RUBY_GEM_DOWNLOAD_URL_PATTERN = "https://rubygems.org/downloads/{gem}-{version}.gem"
+    RUBY_GEM_ORIGIN_URL_PATTERN = "https://rubygems.org/gems/{gem}"
+    RUBY_GEM_EXTRINSIC_METADATA_URL_PATTERN = (
+        "https://rubygems.org/api/v2/rubygems/{gem}/versions/{version}.json"
+    )
+
+    DB_NAME = "rubygems"
+    DUMP_SQL_PATH = "public_postgresql/databases/PostgreSQL.sql.gz"
+
+    def __init__(
+        self,
+        scheduler: SchedulerInterface,
+        credentials: Optional[CredentialsType] = None,
+    ):
+        super().__init__(
+            scheduler=scheduler,
+            credentials=credentials,
+            instance=self.INSTANCE,
+            url=self.RUBY_GEMS_POSTGRES_DUMP_BASE_URL,
+        )
+
+    def get_latest_dump_file(self) -> str:
+        response = self.http_request(self.RUBY_GEMS_POSTGRES_DUMP_LIST_URL)
+        xml = BeautifulSoup(response.content, "xml")
+        contents = xml.find_all("Contents")
+        return contents[-1].find("Key").text
+
+    def create_rubygems_db(
+        self, postgresql: Postgresql
+    ) -> Tuple[str, psycopg2._psycopg.connection]:
+        logger.debug("Creating rubygems database")
+
+        db_dsn = postgresql.dsn()
+        db_url = postgresql.url().replace(db_dsn["database"], self.DB_NAME)
+        db = psycopg2.connect(**db_dsn)
+        db.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
+        with db.cursor() as cursor:
+            cursor.execute(f"CREATE DATABASE {self.DB_NAME}")
+
+        db_dsn["database"] = self.DB_NAME
+
+        db = psycopg2.connect(**db_dsn)
+        db.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
+        with db.cursor() as cursor:
+            cursor.execute("CREATE EXTENSION IF NOT EXISTS hstore")
+
+        return db_url, db
+
+    def populate_rubygems_db(self, db_url: str):
+        dump_file = self.get_latest_dump_file()
+        dump_id = dump_file.split("/")[2]
+
+        response = self.http_request(f"{self.url}/{dump_file}", stream=True)
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            logger.debug(
+                "Downloading latest rubygems database dump: %s (%s bytes)",
+                dump_id,
+                response.headers["content-length"],
+            )
+            dump_file = os.path.join(temp_dir, "rubygems_dump.tar")
+            with open(dump_file, "wb") as dump:
+                for chunk in response.iter_content(chunk_size=1024):
+                    dump.write(chunk)
+
+            with tarfile.open(dump_file) as dump_tar:
+                dump_tar.extractall(temp_dir)
+
+                logger.debug("Populating rubygems database with dump %s", dump_id)
+                psql = subprocess.Popen(
+                    ["psql", "-q", db_url],
+                    stdin=subprocess.PIPE,
+                )
+
+                # passing value of gzip.open as stdin of subprocess.run makes the process
+                # read raw data instead of decompressed data so we have to use a pipe
+                with gzip.open(os.path.join(temp_dir, self.DUMP_SQL_PATH), "rb") as sql:
+                    shutil.copyfileobj(sql, psql.stdin)  # type: ignore
+
+                # denote end of read file
+                psql.stdin.close()  # type: ignore
+                psql.wait()
+
+    def get_pages(self) -> Iterator[RubyGemsListerPage]:
+        # spawn a temporary postgres instance (require initdb executable in environment)
+        with Postgresql() as postgresql:
+            db_url, db = self.create_rubygems_db(postgresql)
+            self.populate_rubygems_db(db_url)
+
+            with db.cursor() as cursor:
+                cursor.execute("SELECT id, name from rubygems")
+                for gem_id, gem_name in cursor.fetchall():
+                    logger.debug("Processing gem named %s", gem_name)
+                    with db.cursor() as cursor_v:
+                        cursor_v.execute(
+                            "SELECT authors, built_at, number, sha256, size from versions "
+                            "where rubygem_id = %s",
+                            (gem_id,),
+                        )
+                        versions = [
+                            {
+                                "number": number,
+                                "url": self.RUBY_GEM_DOWNLOAD_URL_PATTERN.format(
+                                    gem=gem_name, version=number
+                                ),
+                                "date": built_at.replace(tzinfo=timezone.utc),
+                                "authors": authors,
+                                "sha256": (
+                                    base64.decodebytes(sha256.encode()).hex()
+                                    if sha256
+                                    else None
+                                ),
+                                "size": size,
+                            }
+                            for authors, built_at, number, sha256, size in cursor_v.fetchall()
+                        ]
+                        if versions:
+                            yield {
+                                "name": gem_name,
+                                "versions": versions,
+                            }
+
+    def get_origins_from_page(self, page: RubyGemsListerPage) -> Iterator[ListedOrigin]:
+        assert self.lister_obj.id is not None
+
+        artifacts = []
+        rubygem_metadata = []
+        for version in page["versions"]:
+            artifacts.append(
+                {
+                    "version": version["number"],
+                    "filename": version["url"].split("/")[-1],
+                    "url": version["url"],
+                    "checksums": (
+                        {"sha256": version["sha256"]} if version["sha256"] else {}
+                    ),
+                    "length": version["size"],
+                }
+            )
+            rubygem_metadata.append(
+                {
+                    "version": version["number"],
+                    "date": version["date"].isoformat(),
+                    "authors": version["authors"],
+                    "extrinsic_metadata_url": (
+                        self.RUBY_GEM_EXTRINSIC_METADATA_URL_PATTERN.format(
+                            gem=page["name"], version=version["number"]
+                        )
+                    ),
+                }
+            )
+
+        yield ListedOrigin(
+            lister_id=self.lister_obj.id,
+            visit_type=self.VISIT_TYPE,
+            url=self.RUBY_GEM_ORIGIN_URL_PATTERN.format(gem=page["name"]),
+            last_update=max(version["date"] for version in page["versions"]),
+            extra_loader_arguments={
+                "artifacts": artifacts,
+                "rubygem_metadata": rubygem_metadata,
+            },
+        )
diff --git a/swh/lister/rubygems/tasks.py b/swh/lister/rubygems/tasks.py
new file mode 100644
index 0000000..a1395d3
--- /dev/null
+++ b/swh/lister/rubygems/tasks.py
@@ -0,0 +1,19 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from celery import shared_task
+
+from swh.lister.rubygems.lister import RubyGemsLister
+
+
+@shared_task(name=__name__ + ".RubyGemsListerTask")
+def list_rubygems(**lister_args):
+    """Lister task for RubyGems"""
+    return RubyGemsLister.from_configfile(**lister_args).run().dict()
+
+
+@shared_task(name=__name__ + ".ping")
+def _ping():
+    return "OK"
diff --git a/swh/lister/rubygems/tests/__init__.py b/swh/lister/rubygems/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/swh/lister/rubygems/tests/data/rubygems_dumps.xml b/swh/lister/rubygems/tests/data/rubygems_dumps.xml
new file mode 100644
index 0000000..5506050
--- /dev/null
+++ b/swh/lister/rubygems/tests/data/rubygems_dumps.xml
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ListBucketResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
+  <Name>rubygems-dumps</Name>
+  <Prefix>production/public_postgresql</Prefix>
+  <Marker></Marker>
+  <MaxKeys>1000</MaxKeys>
+  <IsTruncated>false</IsTruncated>
+  <Contents>
+    <Key>production/public_postgresql/2022.10.05.06.10.11/public_postgresql.tar</Key>
+    <LastModified>2022-10-05T06:11:15.000Z</LastModified>
+    <ETag>&quot;d1c447a2a490225c2d59061e60ed86e9-75&quot;</ETag>
+    <Size>391653888</Size>
+    <StorageClass>STANDARD</StorageClass>
+  </Contents>
+  <Contents>
+    <Key>production/public_postgresql/2022.10.06.06.10.05/public_postgresql.tar</Key>
+    <LastModified>2022-10-06T06:11:11.000Z</LastModified>
+    <ETag>&quot;2ccd9340e4f802ec982e4cd00db2d168-75&quot;</ETag>
+    <Size>390047744</Size>
+    <StorageClass>STANDARD</StorageClass>
+  </Contents>
+</ListBucketResult>
\ No newline at end of file
diff --git a/swh/lister/rubygems/tests/data/rubygems_pgsql_dump.tar b/swh/lister/rubygems/tests/data/rubygems_pgsql_dump.tar
new file mode 100644
index 0000000..971cdf9
Binary files /dev/null and b/swh/lister/rubygems/tests/data/rubygems_pgsql_dump.tar differ
diff --git a/swh/lister/rubygems/tests/data/small_rubygems_dump.sh b/swh/lister/rubygems/tests/data/small_rubygems_dump.sh
new file mode 100644
index 0000000..7d65158
--- /dev/null
+++ b/swh/lister/rubygems/tests/data/small_rubygems_dump.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+# this script requires a PostgreSQL server running on host,
+# it enables to generate the rubygems_pgsql_dump.tar file used in tests data
+# which contains a very small subset of gems for testing purpose
+
+cd /tmp
+
+# download rubygems load-pg-dump utility script
+curl -O https://raw.githubusercontent.com/rubygems/rubygems.org/1c8cf7e079e56f709e7fc8f4b2398637e41815f2/script/load-pg-dump
+
+# download latest rubygems pgsql dump and load rubygems db in local pgsql server
+./load-pg-dump -c rubygems_dump.tar
+
+# remove all rows in the rubygems db not related to gem haar_joke or l33tify
+# those gems have few releases so that is why they have been picked
+# also drop tables not needed by the rubygems lister
+cleanup_script=$(cat <<- EOF
+with t as (
+  select id from rubygems where name = 'haar_joke'
+),
+t2 as (
+  select id from rubygems where name = 'l33tify'
+) delete from versions where rubygem_id != (select id from t) and rubygem_id != (select id from t2);
+
+delete from rubygems where name != 'haar_joke' and name != 'l33tify';
+
+drop table dependencies;
+drop table gem_downloads;
+drop table linksets;
+EOF
+)
+echo $cleanup_script | psql rubygems
+
+# create the rubygems_pgsql_dump.tar file
+mkdir -p public_postgresql/databases
+pg_dump rubygems | gzip -c > public_postgresql/databases/PostgreSQL.sql.gz
+tar -cvf rubygems_pgsql_dump.tar public_postgresql
diff --git a/swh/lister/rubygems/tests/test_lister.py b/swh/lister/rubygems/tests/test_lister.py
new file mode 100644
index 0000000..122c8c7
--- /dev/null
+++ b/swh/lister/rubygems/tests/test_lister.py
@@ -0,0 +1,154 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+# flake8: noqa: B950
+
+from pathlib import Path
+
+import iso8601
+import pytest
+
+from swh.lister.rubygems.lister import RubyGemsLister
+from swh.scheduler.model import ListedOrigin
+
+DUMP_FILEPATH = "production/public_postgresql/2022.10.06.06.10.05/public_postgresql.tar"
+
+
+@pytest.fixture
+def expected_listed_origins():
+    return [
+        {
+            "url": "https://rubygems.org/gems/haar_joke",
+            "visit_type": "rubygems",
+            "last_update": iso8601.parse_date("2016-11-05T00:00:00+00:00"),
+            "extra_loader_arguments": {
+                "artifacts": [
+                    {
+                        "url": "https://rubygems.org/downloads/haar_joke-0.0.2.gem",
+                        "length": 8704,
+                        "version": "0.0.2",
+                        "filename": "haar_joke-0.0.2.gem",
+                        "checksums": {
+                            "sha256": "85a8cf5f41890e9605265eeebfe9e99aa0350a01a3c799f9f55a0615a31a2f5f"
+                        },
+                    },
+                    {
+                        "url": "https://rubygems.org/downloads/haar_joke-0.0.1.gem",
+                        "length": 8704,
+                        "version": "0.0.1",
+                        "filename": "haar_joke-0.0.1.gem",
+                        "checksums": {
+                            "sha256": "a2ee7052fb8ffcfc4ec0fdb77fae9a36e473f859af196a36870a0f386b5ab55e"
+                        },
+                    },
+                ],
+                "rubygem_metadata": [
+                    {
+                        "date": "2016-11-05T00:00:00+00:00",
+                        "authors": "Gemma Gotch",
+                        "version": "0.0.2",
+                        "extrinsic_metadata_url": "https://rubygems.org/api/v2/rubygems/haar_joke/versions/0.0.2.json",
+                    },
+                    {
+                        "date": "2016-07-23T00:00:00+00:00",
+                        "authors": "Gemma Gotch",
+                        "version": "0.0.1",
+                        "extrinsic_metadata_url": "https://rubygems.org/api/v2/rubygems/haar_joke/versions/0.0.1.json",
+                    },
+                ],
+            },
+        },
+        {
+            "url": "https://rubygems.org/gems/l33tify",
+            "visit_type": "rubygems",
+            "last_update": iso8601.parse_date("2014-11-14T00:00:00+00:00"),
+            "extra_loader_arguments": {
+                "artifacts": [
+                    {
+                        "url": "https://rubygems.org/downloads/l33tify-0.0.2.gem",
+                        "length": 6144,
+                        "version": "0.0.2",
+                        "filename": "l33tify-0.0.2.gem",
+                        "checksums": {
+                            "sha256": "0087a21fb6161bba8892df40de3b5e27404f941658084413b8fde49db2bc7c9f"
+                        },
+                    },
+                    {
+                        "url": "https://rubygems.org/downloads/l33tify-0.0.3.gem",
+                        "length": 6144,
+                        "version": "0.0.3",
+                        "filename": "l33tify-0.0.3.gem",
+                        "checksums": {
+                            "sha256": "4502097ddf2657d561ce0f527ef1f49f1658c8a0968ab8cc853273138f8382a2"
+                        },
+                    },
+                    {
+                        "url": "https://rubygems.org/downloads/l33tify-0.0.1.gem",
+                        "length": 6144,
+                        "version": "0.0.1",
+                        "filename": "l33tify-0.0.1.gem",
+                        "checksums": {
+                            "sha256": "5abfb737ce5cf561726f2f7cc1ba0f0e4f865f8b7283192e05eb3f246d3dbbca"
+                        },
+                    },
+                ],
+                "rubygem_metadata": [
+                    {
+                        "date": "2014-11-14T00:00:00+00:00",
+                        "authors": "E Alexander Liedtke",
+                        "version": "0.0.2",
+                        "extrinsic_metadata_url": "https://rubygems.org/api/v2/rubygems/l33tify/versions/0.0.2.json",
+                    },
+                    {
+                        "date": "2014-11-14T00:00:00+00:00",
+                        "authors": "E Alexander Liedtke",
+                        "version": "0.0.3",
+                        "extrinsic_metadata_url": "https://rubygems.org/api/v2/rubygems/l33tify/versions/0.0.3.json",
+                    },
+                    {
+                        "date": "2014-11-14T00:00:00+00:00",
+                        "authors": "E Alexander Liedtke",
+                        "version": "0.0.1",
+                        "extrinsic_metadata_url": "https://rubygems.org/api/v2/rubygems/l33tify/versions/0.0.1.json",
+                    },
+                ],
+            },
+        },
+    ]
+
+
+@pytest.fixture(autouse=True)
+def network_requests_mock(datadir, requests_mock):
+    requests_mock.get(
+        RubyGemsLister.RUBY_GEMS_POSTGRES_DUMP_LIST_URL,
+        content=Path(datadir, "rubygems_dumps.xml").read_bytes(),
+    )
+    content = Path(datadir, "rubygems_pgsql_dump.tar").read_bytes()
+    requests_mock.get(
+        f"{RubyGemsLister.RUBY_GEMS_POSTGRES_DUMP_BASE_URL}/{DUMP_FILEPATH}",
+        content=content,
+        headers={"content-length": str(len(content))},
+    )
+
+
+@pytest.mark.db
+def test_rubygems_lister(swh_scheduler, expected_listed_origins):
+    lister = RubyGemsLister(scheduler=swh_scheduler)
+    res = lister.run()
+
+    assert res.pages == 2
+    assert res.origins == 2
+
+    scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
+    assert [
+        {
+            "url": origin.url,
+            "visit_type": origin.visit_type,
+            "last_update": origin.last_update,
+            "extra_loader_arguments": origin.extra_loader_arguments,
+        }
+        for origin in scheduler_origins
+    ] == expected_listed_origins
diff --git a/swh/lister/rubygems/tests/test_tasks.py b/swh/lister/rubygems/tests/test_tasks.py
new file mode 100644
index 0000000..0267dcd
--- /dev/null
+++ b/swh/lister/rubygems/tests/test_tasks.py
@@ -0,0 +1,33 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.lister.pattern import ListerStats
+
+
+def test_rubygems_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
+    res = swh_scheduler_celery_app.send_task("swh.lister.rubygems.tasks.ping")
+    assert res
+    res.wait()
+    assert res.successful()
+    assert res.result == "OK"
+
+
+def test_rubygems_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker):
+    # setup the mocked RubyGemsLister
+    lister = mocker.patch("swh.lister.rubygems.tasks.RubyGemsLister")
+    lister.from_configfile.return_value = lister
+    stats = ListerStats(pages=42, origins=42)
+    lister.run.return_value = stats
+
+    res = swh_scheduler_celery_app.send_task(
+        "swh.lister.rubygems.tasks.RubyGemsListerTask"
+    )
+    assert res
+    res.wait()
+    assert res.successful()
+    assert res.result == stats.dict()
+
+    lister.from_configfile.assert_called_once_with()
+    lister.run.assert_called_once_with()
diff --git a/swh/lister/sourceforge/lister.py b/swh/lister/sourceforge/lister.py
index dcc30c3..ba8c412 100644
--- a/swh/lister/sourceforge/lister.py
+++ b/swh/lister/sourceforge/lister.py
@@ -1,457 +1,430 @@
 # Copyright (C) 2021-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from dataclasses import dataclass, field
 import datetime
 from enum import Enum
 import logging
 import re
 from typing import Any, Dict, Iterator, List, Optional, Set, Tuple
 from xml.etree import ElementTree
 
 from bs4 import BeautifulSoup
 import iso8601
 import lxml
 import requests
-from tenacity.before_sleep import before_sleep_log
 
 from swh.core.api.classes import stream_results
-from swh.lister.utils import retry_policy_generic, throttling_retry
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
-from .. import USER_AGENT
 from ..pattern import CredentialsType, Lister
 
 logger = logging.getLogger(__name__)
 
 
 class VcsNames(Enum):
     """Used to filter SourceForge tool names for valid VCS types"""
 
     # CVS projects are read-only
     CVS = "cvs"
     GIT = "git"
     SUBVERSION = "svn"
     MERCURIAL = "hg"
     BAZAAR = "bzr"
 
 
 VCS_NAMES = set(v.value for v in VcsNames.__members__.values())
 
 
 @dataclass
 class SourceForgeListerEntry:
     vcs: VcsNames
     url: str
     last_modified: datetime.date
 
 
 SubSitemapNameT = str
 ProjectNameT = str
 # SourceForge only offers day-level granularity, which is good enough for our purposes
 LastModifiedT = datetime.date
 
 
 @dataclass
 class SourceForgeListerState:
     """Current state of the SourceForge lister in incremental runs"""
 
     """If the subsitemap does not exist, we assume a full run of this subsitemap
     is needed. If the date is the same, we skip the subsitemap, otherwise we
     request the subsitemap and look up every project's "last modified" date
     to compare against `ListedOrigins` from the database."""
     subsitemap_last_modified: Dict[SubSitemapNameT, LastModifiedT] = field(
         default_factory=dict
     )
     """Some projects (not the majority, but still meaningful) have no VCS for us to
     archive. We need to remember a mapping of their API URL to their "last modified"
     date so we don't keep querying them needlessly every time."""
     empty_projects: Dict[str, LastModifiedT] = field(default_factory=dict)
 
 
 SourceForgeListerPage = List[SourceForgeListerEntry]
 
 MAIN_SITEMAP_URL = "https://sourceforge.net/allura_sitemap/sitemap.xml"
 SITEMAP_XML_NAMESPACE = "{http://www.sitemaps.org/schemas/sitemap/0.9}"
 
 # API resource endpoint for information about the given project.
 #
 # `namespace`: Project namespace. Very often `p`, but can be something else like
 #              `adobe`
 # `project`: Project name, e.g. `seedai`. Can be a subproject, e.g `backapps/website`.
 PROJECT_API_URL_FORMAT = "https://sourceforge.net/rest/{namespace}/{project}"
 
 # Predictable URL for cloning (in the broad sense) a VCS registered for the project.
 #
 # Warning: does not apply to bzr repos, and Mercurial are http only, see use of this
 # constant below.
 #
 # `vcs`: VCS type, one of `VCS_NAMES`
 # `namespace`: Project namespace. Very often `p`, but can be something else like
 #              `adobe`.
 # `project`: Project name, e.g. `seedai`. Can be a subproject, e.g `backapps/website`.
 # `mount_point`: url path used by the repo. For example, the Code::Blocks project uses
 #                `git` (https://git.code.sf.net/p/codeblocks/git).
 CLONE_URL_FORMAT = "https://{vcs}.code.sf.net/{namespace}/{project}/{mount_point}"
 
 PROJ_URL_RE = re.compile(
     r"^https://sourceforge.net/(?P<namespace>[^/]+)/(?P<project>[^/]+)/(?P<rest>.*)?"
 )
 
 # Mapping of `(namespace, project name)` to `last modified` date.
 ProjectsLastModifiedCache = Dict[Tuple[str, str], LastModifiedT]
 
 
 class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]):
     """List origins from the "SourceForge" forge."""
 
     # Part of the lister API, that identifies this lister
     LISTER_NAME = "sourceforge"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         incremental: bool = False,
         credentials: Optional[CredentialsType] = None,
     ):
         super().__init__(
             scheduler=scheduler,
             url="https://sourceforge.net",
             instance="main",
             credentials=credentials,
         )
 
         # Will hold the currently saved "last modified" dates to compare against our
         # requests.
         self._project_last_modified: Optional[ProjectsLastModifiedCache] = None
-        self.session = requests.Session()
-        # Declare the USER_AGENT is more sysadm-friendly for the forge we list
-        self.session.headers.update(
-            {"Accept": "application/json", "User-Agent": USER_AGENT}
-        )
+
+        self.session.headers.update({"Accept": "application/json"})
         self.incremental = incremental
 
     def state_from_dict(self, d: Dict[str, Dict[str, Any]]) -> SourceForgeListerState:
         subsitemaps = {
             k: datetime.date.fromisoformat(v)
             for k, v in d.get("subsitemap_last_modified", {}).items()
         }
         empty_projects = {
             k: datetime.date.fromisoformat(v)
             for k, v in d.get("empty_projects", {}).items()
         }
         return SourceForgeListerState(
             subsitemap_last_modified=subsitemaps, empty_projects=empty_projects
         )
 
     def state_to_dict(self, state: SourceForgeListerState) -> Dict[str, Any]:
         return {
             "subsitemap_last_modified": {
                 k: v.isoformat() for k, v in state.subsitemap_last_modified.items()
             },
             "empty_projects": {
                 k: v.isoformat() for k, v in state.empty_projects.items()
             },
         }
 
     def projects_last_modified(self) -> ProjectsLastModifiedCache:
         if not self.incremental:
             # No point in loading the previous results if we're doing a full run
             return {}
         if self._project_last_modified is not None:
             return self._project_last_modified
         # We know there will be at least that many origins
         stream = stream_results(
             self.scheduler.get_listed_origins, self.lister_obj.id, limit=300_000
         )
         listed_origins = dict()
         # Projects can have slashes in them if they're subprojects, but the
         # mointpoint (last component) cannot.
         url_match = re.compile(
             r".*\.code\.sf\.net/(?P<namespace>[^/]+)/(?P<project>.+)/.*"
         )
         bzr_url_match = re.compile(
             r"http://(?P<project>[^/]+).bzr.sourceforge.net/bzr/([^/]+)"
         )
         cvs_url_match = re.compile(
             r"rsync://a.cvs.sourceforge.net/cvsroot/(?P<project>.+)/([^/]+)"
         )
 
         for origin in stream:
             url = origin.url
             match = url_match.match(url)
             if match is None:
                 # Could be a bzr or cvs special endpoint
                 bzr_match = bzr_url_match.match(url)
                 cvs_match = cvs_url_match.match(url)
                 matches = None
                 if bzr_match is not None:
                     matches = bzr_match.groupdict()
                 elif cvs_match is not None:
                     matches = cvs_match.groupdict()
                 assert matches
                 project = matches["project"]
                 namespace = "p"  # no special namespacing for bzr and cvs projects
             else:
                 matches = match.groupdict()
                 namespace = matches["namespace"]
                 project = matches["project"]
             # "Last modified" dates are the same across all VCS (tools, even)
             # within a project or subproject. An assertion here would be overkill.
             last_modified = origin.last_update
             assert last_modified is not None
             listed_origins[(namespace, project)] = last_modified.date()
 
         self._project_last_modified = listed_origins
         return listed_origins
 
-    @throttling_retry(
-        retry=retry_policy_generic,
-        before_sleep=before_sleep_log(logger, logging.WARNING),
-    )
-    def page_request(self, url, params) -> requests.Response:
-        # Log listed URL to ease debugging
-        logger.debug("Fetching URL %s with params %s", url, params)
-        response = self.session.get(url, params=params)
-
-        if response.status_code != 200:
-            # Log response content to ease debugging
-            logger.warning(
-                "Unexpected HTTP status code %s for URL %s",
-                response.status_code,
-                response.url,
-            )
-        # The lister must fail on blocking errors
-        response.raise_for_status()
-
-        return response
-
     def get_pages(self) -> Iterator[SourceForgeListerPage]:
         """
         SourceForge has a main XML sitemap that lists its sharded sitemaps for all
         projects.
         Each XML sub-sitemap lists project pages, which are not unique per project: a
         project can have a wiki, a home, a git, an svn, etc.
         For each unique project, we query an API endpoint that lists (among
         other things) the tools associated with said project, some of which are
         the VCS used. Subprojects are considered separate projects.
         Lastly we use the information of which VCS are used to build the predictable
         clone URL for any given VCS.
         """
-        sitemap_contents = self.page_request(MAIN_SITEMAP_URL, {}).text
+        sitemap_contents = self.http_request(MAIN_SITEMAP_URL).text
         tree = ElementTree.fromstring(sitemap_contents)
 
         for subsitemap in tree.iterfind(f"{SITEMAP_XML_NAMESPACE}sitemap"):
             last_modified_el = subsitemap.find(f"{SITEMAP_XML_NAMESPACE}lastmod")
             assert last_modified_el is not None and last_modified_el.text is not None
             last_modified = datetime.date.fromisoformat(last_modified_el.text)
             location = subsitemap.find(f"{SITEMAP_XML_NAMESPACE}loc")
             assert location is not None and location.text is not None
             sub_url = location.text
 
             if self.incremental:
                 recorded_last_mod = self.state.subsitemap_last_modified.get(sub_url)
                 if recorded_last_mod == last_modified:
                     # The entire subsitemap hasn't changed, so none of its projects
                     # have either, skip it.
                     continue
 
             self.state.subsitemap_last_modified[sub_url] = last_modified
-            subsitemap_contents = self.page_request(sub_url, {}).text
+            subsitemap_contents = self.http_request(sub_url).text
             subtree = ElementTree.fromstring(subsitemap_contents)
 
             yield from self._get_pages_from_subsitemap(subtree)
 
     def get_origins_from_page(
         self, page: SourceForgeListerPage
     ) -> Iterator[ListedOrigin]:
         assert self.lister_obj.id is not None
         for hit in page:
             last_modified: str = str(hit.last_modified)
             last_update: datetime.datetime = iso8601.parse_date(last_modified)
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 visit_type=hit.vcs.value,
                 url=hit.url,
                 last_update=last_update,
             )
 
     def _get_pages_from_subsitemap(
         self, subtree: ElementTree.Element
     ) -> Iterator[SourceForgeListerPage]:
         projects: Set[ProjectNameT] = set()
         for project_block in subtree.iterfind(f"{SITEMAP_XML_NAMESPACE}url"):
             last_modified_block = project_block.find(f"{SITEMAP_XML_NAMESPACE}lastmod")
             assert last_modified_block is not None
             last_modified = last_modified_block.text
             location = project_block.find(f"{SITEMAP_XML_NAMESPACE}loc")
             assert location is not None
             project_url = location.text
             assert project_url is not None
 
             match = PROJ_URL_RE.match(project_url)
             if match:
                 matches = match.groupdict()
                 namespace = matches["namespace"]
                 if namespace == "projects":
                     # These have a `p`-namespaced counterpart, use that instead
                     continue
 
                 project = matches["project"]
                 rest = matches["rest"]
                 if rest.count("/") > 1:
                     # This is a subproject. There exists no sub-subprojects.
                     subproject_name = rest.rsplit("/", 2)[0]
                     project = f"{project}/{subproject_name}"
 
                 prev_len = len(projects)
                 projects.add(project)
 
                 if prev_len == len(projects):
                     # Already seen
                     continue
 
                 pages = self._get_pages_for_project(namespace, project, last_modified)
                 if pages:
                     yield pages
                 else:
                     logger.debug("Project '%s' does not have any VCS", project)
             else:
                 # Should almost always match, let's log it
                 # The only ones that don't match are mostly specialized one-off URLs.
                 msg = "Project URL '%s' does not match expected pattern"
                 logger.warning(msg, project_url)
 
     def _get_pages_for_project(
         self, namespace, project, last_modified
     ) -> SourceForgeListerPage:
         endpoint = PROJECT_API_URL_FORMAT.format(namespace=namespace, project=project)
         empty_project_last_modified = self.state.empty_projects.get(endpoint)
         if empty_project_last_modified is not None:
             if last_modified == empty_project_last_modified.isoformat():
                 # Project has not changed, so is still empty, meaning it has
                 # no VCS attached that we can archive.
                 logger.debug(f"Project {namespace}/{project} is still empty")
                 return []
 
         if self.incremental:
             expected = self.projects_last_modified().get((namespace, project))
 
             if expected is not None:
                 if expected.isoformat() == last_modified:
                     # Project has not changed
                     logger.debug(f"Project {namespace}/{project} has not changed")
                     return []
                 else:
                     logger.debug(f"Project {namespace}/{project} was updated")
             else:
                 msg = "New project during an incremental run: %s/%s"
                 logger.debug(msg, namespace, project)
 
         try:
-            res = self.page_request(endpoint, {}).json()
+            res = self.http_request(endpoint).json()
         except requests.HTTPError:
-            # We've already logged in `page_request`
+            # We've already logged in `http_request`
             return []
 
         tools = res.get("tools")
         if tools is None:
             # This rarely happens, on very old URLs
             logger.warning("Project '%s' does not have any tools", endpoint)
             return []
 
         hits = []
         for tool in tools:
             tool_name = tool["name"]
             if tool_name not in VCS_NAMES:
                 continue
             if tool_name == VcsNames.CVS.value:
                 # CVS projects are different from other VCS ones, they use the rsync
                 # protocol, a list of modules needs to be fetched from an info page
                 # and multiple origin URLs can be produced for a same project.
                 cvs_info_url = f"http://{project}.cvs.sourceforge.net"
                 try:
-                    response = self.page_request(cvs_info_url, params={})
+                    response = self.http_request(cvs_info_url)
                 except requests.HTTPError:
                     logger.warning(
                         "CVS info page could not be fetched, skipping project '%s'",
                         project,
                     )
                     continue
                 else:
                     bs = BeautifulSoup(response.text, features="html.parser")
                     cvs_base_url = "rsync://a.cvs.sourceforge.net/cvsroot"
                     for text in [b.text for b in bs.find_all("b")]:
                         match = re.search(rf".*/cvsroot/{project} co -P (.+)", text)
                         if match is not None:
                             module = match.group(1)
                             if module != "Attic":
                                 url = f"{cvs_base_url}/{project}/{module}"
                                 hits.append(
                                     SourceForgeListerEntry(
                                         vcs=VcsNames(tool_name),
                                         url=url,
                                         last_modified=last_modified,
                                     )
                                 )
                     continue
             url = CLONE_URL_FORMAT.format(
                 vcs=tool_name,
                 namespace=namespace,
                 project=project,
                 mount_point=tool["mount_point"],
             )
             if tool_name == VcsNames.MERCURIAL.value:
                 # SourceForge does not yet support anonymous HTTPS cloning for Mercurial
                 # See https://sourceforge.net/p/forge/feature-requests/727/
                 url = url.replace("https://", "http://")
             if tool_name == VcsNames.BAZAAR.value:
                 # SourceForge has removed support for bzr and only keeps legacy projects
                 # around at a separate (also not https) URL. Bzr projects are very rare
                 # and a lot of them are 404 now.
                 url = f"http://{project}.bzr.sourceforge.net/bzr/{project}"
                 try:
-                    response = self.page_request(url, params={})
+                    response = self.http_request(url)
                     if "To get this branch, use:" not in response.text:
                         # If a bzr project has multiple branches, we need to extract their
                         # names from the repository landing page and create one listed origin
                         # per branch
                         parser = lxml.etree.HTMLParser()
                         tree = lxml.etree.fromstring(response.text, parser)
 
                         # Get all tds with class 'autcell'
                         tds = tree.xpath(".//td[contains(@class, 'autcell')]")
                         for td in tds:
                             branch = td.findtext("a")
                             # If the td's parent contains <img alt="Branch"/> and
                             # it has non-empty text:
                             if td.xpath("..//img[@alt='Branch']") and branch:
                                 hits.append(
                                     SourceForgeListerEntry(
                                         vcs=VcsNames(tool_name),
                                         url=f"{url}/{branch}",
                                         last_modified=last_modified,
                                     )
                                 )
                         continue
                 except requests.HTTPError:
                     logger.warning(
                         "Bazaar repository page could not be fetched, skipping project '%s'",
                         project,
                     )
                     continue
             entry = SourceForgeListerEntry(
                 vcs=VcsNames(tool_name), url=url, last_modified=last_modified
             )
             hits.append(entry)
 
         if not hits:
             date = datetime.date.fromisoformat(last_modified)
             self.state.empty_projects[endpoint] = date
         else:
             self.state.empty_projects.pop(endpoint, None)
 
         return hits
diff --git a/swh/lister/sourceforge/tests/test_lister.py b/swh/lister/sourceforge/tests/test_lister.py
index 516f562..1a97bf3 100644
--- a/swh/lister/sourceforge/tests/test_lister.py
+++ b/swh/lister/sourceforge/tests/test_lister.py
@@ -1,536 +1,540 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
+
 import datetime
 import functools
 import json
 from pathlib import Path
 import re
 
 from iso8601 import iso8601
 import pytest
 from requests.exceptions import HTTPError
 
-from swh.lister import USER_AGENT
+from swh.lister import USER_AGENT_TEMPLATE
 from swh.lister.sourceforge.lister import (
     MAIN_SITEMAP_URL,
     PROJECT_API_URL_FORMAT,
     SourceForgeLister,
     SourceForgeListerState,
 )
 from swh.lister.tests.test_utils import assert_sleep_calls
 from swh.lister.utils import WAIT_EXP_BASE
 
 # Mapping of project name to namespace
 from swh.scheduler.model import ListedOrigin
 
 TEST_PROJECTS = {
     "aaron": "p",
     "adobexmp": "adobe",
     "backapps": "p",
     "backapps/website": "p",
     "bzr-repo": "p",
     "mojunk": "p",
     "mramm": "p",
     "os3dmodels": "p",
     "random-mercurial": "p",
     "t12eksandbox": "p",
     "ocaml-lpd": "p",
 }
 
 URLS_MATCHER = {
     PROJECT_API_URL_FORMAT.format(namespace=namespace, project=project): project
     for project, namespace in TEST_PROJECTS.items()
 }
 
 
 def get_main_sitemap(datadir):
     return Path(datadir, "main-sitemap.xml").read_text()
 
 
 def get_subsitemap_0(datadir):
     return Path(datadir, "subsitemap-0.xml").read_text()
 
 
 def get_subsitemap_1(datadir):
     return Path(datadir, "subsitemap-1.xml").read_text()
 
 
 def get_project_json(datadir, request, context):
     url = request.url
     project = URLS_MATCHER.get(url)
     assert project is not None, f"Url '{url}' could not be matched"
     project = project.replace("/", "-")
     return json.loads(Path(datadir, f"{project}.json").read_text())
 
 
 def get_cvs_info_page(datadir):
     return Path(datadir, "aaron.html").read_text()
 
 
 def get_bzr_repo_page(datadir, repo_name):
     return Path(datadir, f"{repo_name}.html").read_text()
 
 
 def _check_request_headers(request):
-    return request.headers.get("User-Agent") == USER_AGENT
+    return (
+        request.headers.get("User-Agent")
+        == USER_AGENT_TEMPLATE % SourceForgeLister.LISTER_NAME
+    )
 
 
 def _check_listed_origins(lister, swh_scheduler):
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
     res = {o.url: (o.visit_type, str(o.last_update.date())) for o in scheduler_origins}
     assert res == {
         "https://svn.code.sf.net/p/backapps/website/code": ("svn", "2021-02-11"),
         "https://git.code.sf.net/p/os3dmodels/git": ("git", "2017-03-31"),
         "https://svn.code.sf.net/p/os3dmodels/svn": ("svn", "2017-03-31"),
         "https://git.code.sf.net/p/mramm/files": ("git", "2019-04-04"),
         "https://git.code.sf.net/p/mramm/git": ("git", "2019-04-04"),
         "https://svn.code.sf.net/p/mramm/svn": ("svn", "2019-04-04"),
         "https://git.code.sf.net/p/mojunk/git": ("git", "2017-12-31"),
         "https://git.code.sf.net/p/mojunk/git2": ("git", "2017-12-31"),
         "https://svn.code.sf.net/p/mojunk/svn": ("svn", "2017-12-31"),
         "http://hg.code.sf.net/p/random-mercurial/hg": ("hg", "2019-05-02"),
         "http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox": (
             "bzr",
             "2011-02-09",
         ),
         "http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd/trunk": (
             "bzr",
             "2011-04-17",
         ),
         "rsync://a.cvs.sourceforge.net/cvsroot/aaron/aaron": ("cvs", "2013-03-07"),
         "rsync://a.cvs.sourceforge.net/cvsroot/aaron/www": ("cvs", "2013-03-07"),
     }
 
 
 def test_sourceforge_lister_full(swh_scheduler, requests_mock, datadir):
     """
     Simulate a full listing of an artificially restricted sourceforge.
     There are 5 different projects, spread over two sub-sitemaps, a few of which
     have multiple VCS listed, one has none, one is outside of the standard `/p/`
     namespace, some with custom mount points.
     All non-interesting but related entries have been kept.
     """
     lister = SourceForgeLister(scheduler=swh_scheduler)
 
     requests_mock.get(
         MAIN_SITEMAP_URL,
         text=get_main_sitemap(datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         "https://sourceforge.net/allura_sitemap/sitemap-0.xml",
         text=get_subsitemap_0(datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         "https://sourceforge.net/allura_sitemap/sitemap-1.xml",
         text=get_subsitemap_1(datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         re.compile("https://sourceforge.net/rest/.*"),
         json=functools.partial(get_project_json, datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         re.compile("http://aaron.cvs.sourceforge.net/"),
         text=get_cvs_info_page(datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         re.compile("http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox"),
         text=get_bzr_repo_page(datadir, "t12eksandbox"),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         re.compile("http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd"),
         text=get_bzr_repo_page(datadir, "ocaml-lpd"),
         additional_matcher=_check_request_headers,
     )
 
     stats = lister.run()
     # - os3dmodels (2 repos),
     # - mramm (3 repos),
     # - mojunk (3 repos),
     # - backapps/website (1 repo),
     # - random-mercurial (1 repo).
     # - t12eksandbox (1 repo).
     # - ocaml-lpd (1 repo).
     # adobe and backapps itself have no repos.
     assert stats.pages == 8
     assert stats.origins == 14
     expected_state = {
         "subsitemap_last_modified": {
             "https://sourceforge.net/allura_sitemap/sitemap-0.xml": "2021-03-18",
             "https://sourceforge.net/allura_sitemap/sitemap-1.xml": "2021-03-18",
         },
         "empty_projects": {
             "https://sourceforge.net/rest/p/backapps": "2021-02-11",
             "https://sourceforge.net/rest/adobe/adobexmp": "2017-10-17",
         },
     }
     assert lister.state_to_dict(lister.state) == expected_state
 
     _check_listed_origins(lister, swh_scheduler)
 
 
 def test_sourceforge_lister_incremental(swh_scheduler, requests_mock, datadir, mocker):
     """
     Simulate an incremental listing of an artificially restricted sourceforge.
     Same dataset as the full run, because it's enough to validate the different cases.
     """
     lister = SourceForgeLister(scheduler=swh_scheduler, incremental=True)
 
     requests_mock.get(
         MAIN_SITEMAP_URL,
         text=get_main_sitemap(datadir),
         additional_matcher=_check_request_headers,
     )
 
     def not_called(request, *args, **kwargs):
         raise AssertionError(f"Should not have been called: '{request.url}'")
 
     requests_mock.get(
         "https://sourceforge.net/allura_sitemap/sitemap-0.xml",
         text=get_subsitemap_0(datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         "https://sourceforge.net/allura_sitemap/sitemap-1.xml",
         text=not_called,
         additional_matcher=_check_request_headers,
     )
 
     def filtered_get_project_json(request, context):
         # These projects should not be requested again
         assert URLS_MATCHER[request.url] not in {"adobe", "mojunk"}
         return get_project_json(datadir, request, context)
 
     requests_mock.get(
         re.compile("https://sourceforge.net/rest/.*"),
         json=filtered_get_project_json,
         additional_matcher=_check_request_headers,
     )
 
     requests_mock.get(
         re.compile("http://aaron.cvs.sourceforge.net/"),
         text=get_cvs_info_page(datadir),
         additional_matcher=_check_request_headers,
     )
 
     requests_mock.get(
         re.compile("http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox"),
         text=get_bzr_repo_page(datadir, "t12eksandbox"),
         additional_matcher=_check_request_headers,
     )
 
     requests_mock.get(
         re.compile("http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd"),
         text=get_bzr_repo_page(datadir, "ocaml-lpd"),
         additional_matcher=_check_request_headers,
     )
 
     faked_listed_origins = [
         # mramm: changed
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="git",
             url="https://git.code.sf.net/p/mramm/files",
             last_update=iso8601.parse_date("2019-01-01"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="git",
             url="https://git.code.sf.net/p/mramm/git",
             last_update=iso8601.parse_date("2019-01-01"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="svn",
             url="https://svn.code.sf.net/p/mramm/svn",
             last_update=iso8601.parse_date("2019-01-01"),
         ),
         # stayed the same, even though its subsitemap has changed
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="git",
             url="https://git.code.sf.net/p/os3dmodels/git",
             last_update=iso8601.parse_date("2017-03-31"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="svn",
             url="https://svn.code.sf.net/p/os3dmodels/svn",
             last_update=iso8601.parse_date("2017-03-31"),
         ),
         # others: stayed the same, should be skipped
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="git",
             url="https://git.code.sf.net/p/mojunk/git",
             last_update=iso8601.parse_date("2017-12-31"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="git",
             url="https://git.code.sf.net/p/mojunk/git2",
             last_update=iso8601.parse_date("2017-12-31"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="svn",
             url="https://svn.code.sf.net/p/mojunk/svn",
             last_update=iso8601.parse_date("2017-12-31"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="svn",
             url="https://svn.code.sf.net/p/backapps/website/code",
             last_update=iso8601.parse_date("2021-02-11"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="hg",
             url="http://hg.code.sf.net/p/random-mercurial/hg",
             last_update=iso8601.parse_date("2019-05-02"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="bzr",
             url="http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox",
             last_update=iso8601.parse_date("2011-02-09"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="bzr",
             url="http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd/trunk",
             last_update=iso8601.parse_date("2011-04-17"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="cvs",
             url="rsync://a.cvs.sourceforge.net/cvsroot/aaron/aaron",
             last_update=iso8601.parse_date("2013-03-07"),
         ),
         ListedOrigin(
             lister_id=lister.lister_obj.id,
             visit_type="cvs",
             url="rsync://a.cvs.sourceforge.net/cvsroot/aaron/www",
             last_update=iso8601.parse_date("2013-03-07"),
         ),
     ]
     swh_scheduler.record_listed_origins(faked_listed_origins)
 
     to_date = datetime.date.fromisoformat
     faked_state = SourceForgeListerState(
         subsitemap_last_modified={
             # changed
             "https://sourceforge.net/allura_sitemap/sitemap-0.xml": to_date(
                 "2021-02-18"
             ),
             # stayed the same
             "https://sourceforge.net/allura_sitemap/sitemap-1.xml": to_date(
                 "2021-03-18"
             ),
         },
         empty_projects={
             "https://sourceforge.net/rest/p/backapps": to_date("2020-02-11"),
             "https://sourceforge.net/rest/adobe/adobexmp": to_date("2017-10-17"),
         },
     )
     lister.state = faked_state
 
     stats = lister.run()
 
     # - mramm (3 repos),  # changed
     assert stats.pages == 1
     assert stats.origins == 3
     expected_state = {
         "subsitemap_last_modified": {
             "https://sourceforge.net/allura_sitemap/sitemap-0.xml": "2021-03-18",
             "https://sourceforge.net/allura_sitemap/sitemap-1.xml": "2021-03-18",
         },
         "empty_projects": {
             "https://sourceforge.net/rest/p/backapps": "2021-02-11",  # changed
             "https://sourceforge.net/rest/adobe/adobexmp": "2017-10-17",
         },
     }
     assert lister.state_to_dict(lister.state) == expected_state
 
     # origins have been updated
     _check_listed_origins(lister, swh_scheduler)
 
 
 def test_sourceforge_lister_retry(swh_scheduler, requests_mock, mocker, datadir):
 
     lister = SourceForgeLister(scheduler=swh_scheduler)
 
     # Exponential retries take a long time, so stub time.sleep
-    mocked_sleep = mocker.patch.object(lister.page_request.retry, "sleep")
+    mocked_sleep = mocker.patch.object(lister.http_request.retry, "sleep")
 
     requests_mock.get(
         MAIN_SITEMAP_URL,
         [
             {"status_code": 429},
             {"status_code": 429},
             {"text": get_main_sitemap(datadir)},
         ],
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         "https://sourceforge.net/allura_sitemap/sitemap-0.xml",
         [{"status_code": 429}, {"text": get_subsitemap_0(datadir), "status_code": 301}],
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         "https://sourceforge.net/allura_sitemap/sitemap-1.xml",
         [{"status_code": 429}, {"text": get_subsitemap_1(datadir)}],
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         re.compile("https://sourceforge.net/rest/.*"),
         [{"status_code": 429}, {"json": functools.partial(get_project_json, datadir)}],
         additional_matcher=_check_request_headers,
     )
 
     requests_mock.get(
         re.compile("http://aaron.cvs.sourceforge.net/"),
         text=get_cvs_info_page(datadir),
         additional_matcher=_check_request_headers,
     )
 
     requests_mock.get(
         re.compile("http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox"),
         text=get_bzr_repo_page(datadir, "t12eksandbox"),
         additional_matcher=_check_request_headers,
     )
 
     requests_mock.get(
         re.compile("http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd"),
         text=get_bzr_repo_page(datadir, "ocaml-lpd"),
         additional_matcher=_check_request_headers,
     )
 
     stats = lister.run()
     # - os3dmodels (2 repos),
     # - mramm (3 repos),
     # - mojunk (3 repos),
     # - backapps/website (1 repo),
     # - random-mercurial (1 repo).
     # - t12eksandbox (1 repo).
     # - ocaml-lpd (1 repo).
     # adobe and backapps itself have no repos.
     assert stats.pages == 8
     assert stats.origins == 14
 
     _check_listed_origins(lister, swh_scheduler)
 
     # Test `time.sleep` is called with exponential retries
     assert_sleep_calls(mocker, mocked_sleep, [1, WAIT_EXP_BASE, 1, 1])
 
 
 @pytest.mark.parametrize("status_code", [500, 503, 504, 403, 404])
 def test_sourceforge_lister_http_error(
     swh_scheduler, requests_mock, status_code, mocker
 ):
     lister = SourceForgeLister(scheduler=swh_scheduler)
 
     # Exponential retries take a long time, so stub time.sleep
-    mocked_sleep = mocker.patch.object(lister.page_request.retry, "sleep")
+    mocked_sleep = mocker.patch.object(lister.http_request.retry, "sleep")
 
     requests_mock.get(MAIN_SITEMAP_URL, status_code=status_code)
 
     with pytest.raises(HTTPError):
         lister.run()
 
     exp_retries = []
     if status_code >= 500:
         exp_retries = [1.0, 10.0, 100.0, 1000.0]
 
     assert_sleep_calls(mocker, mocked_sleep, exp_retries)
 
 
 @pytest.mark.parametrize("status_code", [500, 503, 504, 403, 404])
 def test_sourceforge_lister_project_error(
     datadir, swh_scheduler, requests_mock, status_code, mocker
 ):
     lister = SourceForgeLister(scheduler=swh_scheduler)
     # Exponential retries take a long time, so stub time.sleep
-    mocker.patch.object(lister.page_request.retry, "sleep")
+    mocker.patch.object(lister.http_request.retry, "sleep")
 
     requests_mock.get(
         MAIN_SITEMAP_URL,
         text=get_main_sitemap(datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         "https://sourceforge.net/allura_sitemap/sitemap-0.xml",
         text=get_subsitemap_0(datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         "https://sourceforge.net/allura_sitemap/sitemap-1.xml",
         text=get_subsitemap_1(datadir),
         additional_matcher=_check_request_headers,
     )
     # Request mocks precedence is LIFO
     requests_mock.get(
         re.compile("https://sourceforge.net/rest/.*"),
         json=functools.partial(get_project_json, datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         re.compile("http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox"),
         text=get_bzr_repo_page(datadir, "t12eksandbox"),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         re.compile("http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd"),
         text=get_bzr_repo_page(datadir, "ocaml-lpd"),
         additional_matcher=_check_request_headers,
     )
     # Make all `mramm` requests fail
     # `mramm` is in subsitemap 0, which ensures we keep listing after an error.
     requests_mock.get(
         re.compile("https://sourceforge.net/rest/p/mramm"), status_code=status_code
     )
 
     # Make request to CVS info page fail
     requests_mock.get(
         re.compile("http://aaron.cvs.sourceforge.net/"), status_code=status_code
     )
 
     stats = lister.run()
     # - os3dmodels (2 repos),
     # - mojunk (3 repos),
     # - backapps/website (1 repo),
     # - random-mercurial (1 repo).
     # - t12eksandbox (1 repo).
     # - ocaml-lpd (1 repo).
     # adobe and backapps itself have no repos.
     # Did *not* list mramm
     assert stats.pages == 6
     assert stats.origins == 9
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
     res = {o.url: (o.visit_type, str(o.last_update.date())) for o in scheduler_origins}
     # Ensure no `mramm` origins are listed, but all others are.
     assert res == {
         "https://svn.code.sf.net/p/backapps/website/code": ("svn", "2021-02-11"),
         "https://git.code.sf.net/p/os3dmodels/git": ("git", "2017-03-31"),
         "https://svn.code.sf.net/p/os3dmodels/svn": ("svn", "2017-03-31"),
         "https://git.code.sf.net/p/mojunk/git": ("git", "2017-12-31"),
         "https://git.code.sf.net/p/mojunk/git2": ("git", "2017-12-31"),
         "https://svn.code.sf.net/p/mojunk/svn": ("svn", "2017-12-31"),
         "http://hg.code.sf.net/p/random-mercurial/hg": ("hg", "2019-05-02"),
         "http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox": (
             "bzr",
             "2011-02-09",
         ),
         "http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd/trunk": (
             "bzr",
             "2011-04-17",
         ),
     }
diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py
index ed0f34e..a69ec1c 100644
--- a/swh/lister/tests/test_cli.py
+++ b/swh/lister/tests/test_cli.py
@@ -1,59 +1,63 @@
 # Copyright (C) 2019-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import pytest
 
 from swh.lister.cli import SUPPORTED_LISTERS, get_lister
 
 lister_args = {
     "cgit": {
         "url": "https://git.eclipse.org/c/",
     },
     "phabricator": {
         "instance": "softwareheritage",
         "url": "https://forge.softwareheritage.org/api/diffusion.repository.search",
         "api_token": "bogus",
     },
     "gitea": {
         "url": "https://try.gitea.io/api/v1/",
     },
     "tuleap": {
         "url": "https://tuleap.net",
     },
     "gitlab": {
         "url": "https://gitlab.ow2.org/api/v4",
         "instance": "ow2",
     },
     "opam": {"url": "https://opam.ocaml.org", "instance": "opam"},
     "maven": {
         "url": "https://repo1.maven.org/maven2/",
         "index_url": "http://indexes/export.fld",
     },
     "gogs": {
         "url": "https://try.gogs.io/",
         "api_token": "secret",
     },
+    "nixguix": {
+        "url": "https://guix.gnu.org/sources.json",
+        "origin_upstream": "https://git.savannah.gnu.org/cgit/guix.git/",
+    },
 }
 
 
 def test_get_lister_wrong_input():
     """Unsupported lister should raise"""
     with pytest.raises(ValueError) as e:
         get_lister("unknown", "db-url")
 
     assert "Invalid lister" in str(e.value)
 
 
 def test_get_lister(swh_scheduler_config):
     """Instantiating a supported lister should be ok"""
     # Drop launchpad lister from the lister to check, its test setup is more involved
     # than the other listers and it's not currently done here
     for lister_name in SUPPORTED_LISTERS:
         lst = get_lister(
             lister_name,
             scheduler={"cls": "local", **swh_scheduler_config},
             **lister_args.get(lister_name, {}),
         )
         assert hasattr(lst, "run")
diff --git a/swh/lister/tests/test_pattern.py b/swh/lister/tests/test_pattern.py
index 192f8f7..554a8d1 100644
--- a/swh/lister/tests/test_pattern.py
+++ b/swh/lister/tests/test_pattern.py
@@ -1,200 +1,217 @@
 # Copyright (C) 2020-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from typing import TYPE_CHECKING, Any, Dict, Iterator, List
 
 import pytest
 
 from swh.lister import pattern
 from swh.scheduler.model import ListedOrigin
 
 StateType = Dict[str, str]
 OriginType = Dict[str, str]
 PageType = List[OriginType]
 
 
 class InstantiableLister(pattern.Lister[StateType, PageType]):
     """A lister that can only be instantiated, not run."""
 
     LISTER_NAME = "test-pattern-lister"
 
     def state_from_dict(self, d: Dict[str, str]) -> StateType:
         return d
 
 
 def test_instantiation(swh_scheduler):
     lister = InstantiableLister(
         scheduler=swh_scheduler, url="https://example.com", instance="example.com"
     )
 
     # check the lister was registered in the scheduler backend
     stored_lister = swh_scheduler.get_or_create_lister(
         name="test-pattern-lister", instance_name="example.com"
     )
     assert stored_lister == lister.lister_obj
 
     with pytest.raises(NotImplementedError):
         lister.run()
 
 
 def test_lister_instance_name(swh_scheduler):
     lister = InstantiableLister(
         scheduler=swh_scheduler, url="https://example.org", instance="example"
     )
 
     assert lister.instance == "example"
 
     lister = InstantiableLister(scheduler=swh_scheduler, url="https://example.org")
 
     assert lister.instance == "example.org"
 
 
 def test_instantiation_from_configfile(swh_scheduler, mocker):
     mock_load_from_envvar = mocker.patch("swh.lister.pattern.load_from_envvar")
     mock_get_scheduler = mocker.patch("swh.lister.pattern.get_scheduler")
     mock_load_from_envvar.return_value = {
         "scheduler": {},
         "url": "foo",
         "instance": "bar",
     }
     mock_get_scheduler.return_value = swh_scheduler
 
     lister = InstantiableLister.from_configfile()
     assert lister.url == "foo"
     assert lister.instance == "bar"
 
     lister = InstantiableLister.from_configfile(url="bar", instance="foo")
     assert lister.url == "bar"
     assert lister.instance == "foo"
 
     lister = InstantiableLister.from_configfile(url=None, instance="foo")
     assert lister.url == "foo"
     assert lister.instance == "foo"
 
 
 if TYPE_CHECKING:
     _Base = pattern.Lister[Any, PageType]
 else:
     _Base = object
 
 
 class ListerMixin(_Base):
     def get_pages(self) -> Iterator[PageType]:
         for pageno in range(2):
             yield [
                 {"url": f"https://example.com/{pageno:02d}{i:03d}"} for i in range(10)
             ]
 
     def get_origins_from_page(self, page: PageType) -> Iterator[ListedOrigin]:
         assert self.lister_obj.id is not None
         for origin in page:
             yield ListedOrigin(
                 lister_id=self.lister_obj.id, url=origin["url"], visit_type="git"
             )
 
 
 def check_listed_origins(swh_scheduler, lister, stored_lister):
     """Check that the listed origins match the ones in the lister"""
     # Gather the origins that are supposed to be listed
     lister_urls = sorted(
         sum([[o["url"] for o in page] for page in lister.get_pages()], [])
     )
 
     # And check the state of origins in the scheduler
     ret = swh_scheduler.get_listed_origins()
     assert ret.next_page_token is None
     assert len(ret.results) == len(lister_urls)
 
     for origin, expected_url in zip(ret.results, lister_urls):
         assert origin.url == expected_url
         assert origin.lister_id == stored_lister.id
 
 
 class RunnableLister(ListerMixin, InstantiableLister):
     """A lister that can be run."""
 
     def state_to_dict(self, state: StateType) -> Dict[str, str]:
         return state
 
     def finalize(self) -> None:
         self.state["updated"] = "yes"
         self.updated = True
 
 
 def test_run(swh_scheduler):
     lister = RunnableLister(
         scheduler=swh_scheduler, url="https://example.com", instance="example.com"
     )
 
     assert "updated" not in lister.state
 
     update_date = lister.lister_obj.updated
 
     run_result = lister.run()
 
     assert run_result.pages == 2
     assert run_result.origins == 20
 
     stored_lister = swh_scheduler.get_or_create_lister(
         name="test-pattern-lister", instance_name="example.com"
     )
 
     # Check that the finalize operation happened
     assert stored_lister.updated > update_date
     assert stored_lister.current_state["updated"] == "yes"
 
     check_listed_origins(swh_scheduler, lister, stored_lister)
 
 
 class InstantiableStatelessLister(pattern.StatelessLister[PageType]):
     LISTER_NAME = "test-stateless-lister"
 
 
 def test_stateless_instantiation(swh_scheduler):
     lister = InstantiableStatelessLister(
         scheduler=swh_scheduler,
         url="https://example.com",
         instance="example.com",
     )
 
     # check the lister was registered in the scheduler backend
     stored_lister = swh_scheduler.get_or_create_lister(
         name="test-stateless-lister", instance_name="example.com"
     )
     assert stored_lister == lister.lister_obj
     assert stored_lister.current_state == {}
     assert lister.state is None
 
     with pytest.raises(NotImplementedError):
         lister.run()
 
 
 class RunnableStatelessLister(ListerMixin, InstantiableStatelessLister):
     def finalize(self):
         self.updated = True
 
 
 def test_stateless_run(swh_scheduler):
     lister = RunnableStatelessLister(
         scheduler=swh_scheduler, url="https://example.com", instance="example.com"
     )
 
     update_date = lister.lister_obj.updated
 
     run_result = lister.run()
 
     assert run_result.pages == 2
     assert run_result.origins == 20
 
     stored_lister = swh_scheduler.get_or_create_lister(
         name="test-stateless-lister", instance_name="example.com"
     )
 
     # Check that the finalize operation happened
     assert stored_lister.updated > update_date
     assert stored_lister.current_state == {}
 
     # And that all origins are stored
     check_listed_origins(swh_scheduler, lister, stored_lister)
+
+
+class ListerWithSameOriginInMultiplePages(RunnableStatelessLister):
+    def get_pages(self) -> Iterator[PageType]:
+        for _ in range(2):
+            yield [{"url": "https://example.org/user/project"}]
+
+
+def test_listed_origins_count(swh_scheduler):
+    lister = ListerWithSameOriginInMultiplePages(
+        scheduler=swh_scheduler, url="https://example.org", instance="example.org"
+    )
+
+    run_result = lister.run()
+
+    assert run_result.pages == 2
+    assert run_result.origins == 1
diff --git a/swh/lister/tests/test_utils.py b/swh/lister/tests/test_utils.py
index 6d9b50d..98b376f 100644
--- a/swh/lister/tests/test_utils.py
+++ b/swh/lister/tests/test_utils.py
@@ -1,126 +1,130 @@
-# Copyright (C) 2018-2021 the Software Heritage developers
+# Copyright (C) 2018-2022 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import pytest
 import requests
 from requests.status_codes import codes
 from tenacity.wait import wait_fixed
 
-from swh.lister.utils import (
-    MAX_NUMBER_ATTEMPTS,
-    WAIT_EXP_BASE,
-    split_range,
-    throttling_retry,
-)
+from swh.lister.utils import MAX_NUMBER_ATTEMPTS, WAIT_EXP_BASE, http_retry, split_range
 
 
 @pytest.mark.parametrize(
     "total_pages,nb_pages,expected_ranges",
     [
         (14, 5, [(0, 4), (5, 9), (10, 14)]),
         (19, 10, [(0, 9), (10, 19)]),
         (20, 3, [(0, 2), (3, 5), (6, 8), (9, 11), (12, 14), (15, 17), (18, 20)]),
         (
             21,
             3,
             [
                 (0, 2),
                 (3, 5),
                 (6, 8),
                 (9, 11),
                 (12, 14),
                 (15, 17),
                 (18, 21),
             ],
         ),
     ],
 )
 def test_split_range(total_pages, nb_pages, expected_ranges):
     actual_ranges = list(split_range(total_pages, nb_pages))
     assert actual_ranges == expected_ranges
 
 
 @pytest.mark.parametrize("total_pages,nb_pages", [(None, 1), (100, None)])
 def test_split_range_errors(total_pages, nb_pages):
     for total_pages, nb_pages in [(None, 1), (100, None)]:
         with pytest.raises(TypeError):
             next(split_range(total_pages, nb_pages))
 
 
 TEST_URL = "https://example.og/api/repositories"
 
 
-@throttling_retry()
+@http_retry()
 def make_request():
     response = requests.get(TEST_URL)
     response.raise_for_status()
     return response
 
 
 def assert_sleep_calls(mocker, mock_sleep, sleep_params):
     mock_sleep.assert_has_calls([mocker.call(param) for param in sleep_params])
 
 
-def test_throttling_retry(requests_mock, mocker):
+@pytest.mark.parametrize(
+    "status_code",
+    [
+        codes.too_many_requests,
+        codes.internal_server_error,
+        codes.bad_gateway,
+        codes.service_unavailable,
+    ],
+)
+def test_http_retry(requests_mock, mocker, status_code):
     data = {"result": {}}
     requests_mock.get(
         TEST_URL,
         [
-            {"status_code": codes.too_many_requests},
-            {"status_code": codes.too_many_requests},
+            {"status_code": status_code},
+            {"status_code": status_code},
             {"status_code": codes.ok, "json": data},
         ],
     )
 
     mock_sleep = mocker.patch.object(make_request.retry, "sleep")
 
     response = make_request()
 
     assert_sleep_calls(mocker, mock_sleep, [1, WAIT_EXP_BASE])
 
     assert response.json() == data
 
 
-def test_throttling_retry_max_attemps(requests_mock, mocker):
+def test_http_retry_max_attemps(requests_mock, mocker):
     requests_mock.get(
         TEST_URL,
         [{"status_code": codes.too_many_requests}] * (MAX_NUMBER_ATTEMPTS),
     )
 
     mock_sleep = mocker.patch.object(make_request.retry, "sleep")
 
     with pytest.raises(requests.exceptions.HTTPError) as e:
         make_request()
 
     assert e.value.response.status_code == codes.too_many_requests
 
     assert_sleep_calls(
         mocker,
         mock_sleep,
         [float(WAIT_EXP_BASE**i) for i in range(MAX_NUMBER_ATTEMPTS - 1)],
     )
 
 
-@throttling_retry(wait=wait_fixed(WAIT_EXP_BASE))
+@http_retry(wait=wait_fixed(WAIT_EXP_BASE))
 def make_request_wait_fixed():
     response = requests.get(TEST_URL)
     response.raise_for_status()
     return response
 
 
-def test_throttling_retry_wait_fixed(requests_mock, mocker):
+def test_http_retry_wait_fixed(requests_mock, mocker):
     requests_mock.get(
         TEST_URL,
         [
             {"status_code": codes.too_many_requests},
             {"status_code": codes.too_many_requests},
             {"status_code": codes.ok},
         ],
     )
 
     mock_sleep = mocker.patch.object(make_request_wait_fixed.retry, "sleep")
 
     make_request_wait_fixed()
 
     assert_sleep_calls(mocker, mock_sleep, [WAIT_EXP_BASE] * 2)
diff --git a/swh/lister/tuleap/lister.py b/swh/lister/tuleap/lister.py
index 179329a..4a55499 100644
--- a/swh/lister/tuleap/lister.py
+++ b/swh/lister/tuleap/lister.py
@@ -1,150 +1,123 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import logging
 from typing import Any, Dict, Iterator, List, Optional
 from urllib.parse import urljoin
 
 import iso8601
-import requests
-from tenacity.before_sleep import before_sleep_log
 
-from swh.lister.utils import throttling_retry
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
-from .. import USER_AGENT
 from ..pattern import CredentialsType, StatelessLister
 
 logger = logging.getLogger(__name__)
 
 RepoPage = Dict[str, Any]
 
 
 class TuleapLister(StatelessLister[RepoPage]):
     """List origins from Tuleap.
 
     Tuleap provides SVN and Git repositories hosting.
 
     Tuleap API getting started:
     https://tuleap.net/doc/en/user-guide/integration/rest.html
     Tuleap API reference:
     https://tuleap.net/api/explorer/
 
     Using the API we first request a list of projects, and from there request their
     associated repositories individually. Everything is paginated, code uses throttling
     at the individual GET call level."""
 
     LISTER_NAME = "tuleap"
 
     REPO_LIST_PATH = "/api"
     REPO_GIT_PATH = "plugins/git/"
     REPO_SVN_PATH = "plugins/svn/"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         url: str,
         instance: Optional[str] = None,
         credentials: CredentialsType = None,
     ):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
             url=url,
             instance=instance,
         )
 
-        self.session = requests.Session()
-        self.session.headers.update(
-            {
-                "Accept": "application/json",
-                "User-Agent": USER_AGENT,
-            }
-        )
-
-    @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
-    def page_request(self, url: str, params: Dict[str, Any]) -> requests.Response:
-
-        logger.info("Fetching URL %s with params %s", url, params)
-
-        response = self.session.get(url, params=params)
-        if response.status_code != 200:
-            logger.warning(
-                "Unexpected HTTP status code %s on %s: %s",
-                response.status_code,
-                response.url,
-                response.content,
-            )
-        response.raise_for_status()
-
-        return response
+        self.session.headers.update({"Accept": "application/json"})
 
     @classmethod
     def results_simplified(cls, url: str, repo_type: str, repo: RepoPage) -> RepoPage:
         if repo_type == "git":
             prefix_url = TuleapLister.REPO_GIT_PATH
         else:
             prefix_url = TuleapLister.REPO_SVN_PATH
         rep = {
             "project": repo["name"],
             "type": repo_type,
             "uri": urljoin(url, f"{prefix_url}{repo['path']}"),
             "last_update_date": repo["last_update_date"],
         }
         return rep
 
     def _get_repositories(self, url_repo) -> List[Dict[str, Any]]:
-        ret = self.page_request(url_repo, {})
+        ret = self.http_request(url_repo)
         reps_list = ret.json()["repositories"]
         limit = int(ret.headers["X-PAGINATION-LIMIT-MAX"])
         offset = int(ret.headers["X-PAGINATION-LIMIT"])
         size = int(ret.headers["X-PAGINATION-SIZE"])
         while offset < size:
             url_offset = url_repo + "?offset=" + str(offset) + "&limit=" + str(limit)
-            ret = self.page_request(url_offset, {}).json()
+            ret = self.http_request(url_offset).json()
             reps_list = reps_list + ret["repositories"]
             offset += limit
         return reps_list
 
     def get_pages(self) -> Iterator[RepoPage]:
         # base with trailing slash, path without leading slash for urljoin
         url_api: str = urljoin(self.url, self.REPO_LIST_PATH)
         url_projects = url_api + "/projects/"
 
         # Get the list of projects.
-        response = self.page_request(url_projects, {})
+        response = self.http_request(url_projects)
         projects_list = response.json()
         limit = int(response.headers["X-PAGINATION-LIMIT-MAX"])
         offset = int(response.headers["X-PAGINATION-LIMIT"])
         size = int(response.headers["X-PAGINATION-SIZE"])
         while offset < size:
             url_offset = (
                 url_projects + "?offset=" + str(offset) + "&limit=" + str(limit)
             )
-            ret = self.page_request(url_offset, {}).json()
+            ret = self.http_request(url_offset).json()
             projects_list = projects_list + ret
             offset += limit
 
         # Get list of repositories for each project.
         for p in projects_list:
             p_id = p["id"]
 
             # Fetch Git repositories for project
             url_git = url_projects + str(p_id) + "/git"
             repos = self._get_repositories(url_git)
             for repo in repos:
                 yield self.results_simplified(url_api, "git", repo)
 
     def get_origins_from_page(self, page: RepoPage) -> Iterator[ListedOrigin]:
         """Convert a page of Tuleap repositories into a list of ListedOrigins."""
         assert self.lister_obj.id is not None
 
         yield ListedOrigin(
             lister_id=self.lister_obj.id,
             url=page["uri"],
             visit_type=page["type"],
             last_update=iso8601.parse_date(page["last_update_date"]),
         )
diff --git a/swh/lister/tuleap/tests/test_lister.py b/swh/lister/tuleap/tests/test_lister.py
index 16d0c7a..722e708 100644
--- a/swh/lister/tuleap/tests/test_lister.py
+++ b/swh/lister/tuleap/tests/test_lister.py
@@ -1,165 +1,170 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import json
 from pathlib import Path
 from typing import Dict, List, Tuple
 
 import pytest
 import requests
 
 from swh.lister.tuleap.lister import RepoPage, TuleapLister
 from swh.scheduler.model import ListedOrigin
 
 TULEAP_URL = "https://tuleap.net/"
 TULEAP_PROJECTS_URL = TULEAP_URL + "api/projects/"
 TULEAP_REPO_1_URL = TULEAP_URL + "api/projects/685/git"  # manjaromemodoc
 TULEAP_REPO_2_URL = TULEAP_URL + "api/projects/309/git"  # myaurora
 TULEAP_REPO_3_URL = TULEAP_URL + "api/projects/1080/git"  # tuleap cleanup module
 
 GIT_REPOS = (
     "https://tuleap.net/plugins/git/manjaromemodoc/manjaro-memo-documentation.git",
     "https://tuleap.net/plugins/git/myaurora/myaurora.git",
 )
 
 
 @pytest.fixture
 def tuleap_projects(datadir) -> Tuple[str, Dict[str, str], List[str]]:
     text = Path(datadir, "https_tuleap.net", "projects").read_text()
     headers = {
         "X-PAGINATION-LIMIT-MAX": "50",
         "X-PAGINATION-LIMIT": "10",
         "X-PAGINATION-SIZE": "2",
     }
     repo_json = json.loads(text)
     projects = [p["shortname"] for p in repo_json]
     return text, headers, projects
 
 
 @pytest.fixture
 def tuleap_repo_1(datadir) -> Tuple[str, Dict[str, str], List[RepoPage], List[str]]:
     text = Path(datadir, "https_tuleap.net", "repo_1").read_text()
     headers = {
         "X-PAGINATION-LIMIT-MAX": "50",
         "X-PAGINATION-LIMIT": "10",
         "X-PAGINATION-SIZE": "1",
     }
     reps = json.loads(text)
     page_results = []
     for r in reps["repositories"]:
         page_results.append(
             TuleapLister.results_simplified(url=TULEAP_URL, repo_type="git", repo=r)
         )
     origin_urls = [r["uri"] for r in page_results]
     return text, headers, page_results, origin_urls
 
 
 @pytest.fixture
 def tuleap_repo_2(datadir) -> Tuple[str, Dict[str, str], List[RepoPage], List[str]]:
     text = Path(datadir, "https_tuleap.net", "repo_2").read_text()
     headers = {
         "X-PAGINATION-LIMIT-MAX": "50",
         "X-PAGINATION-LIMIT": "10",
         "X-PAGINATION-SIZE": "1",
     }
     reps = json.loads(text)
     page_results = []
     for r in reps["repositories"]:
         page_results.append(
             TuleapLister.results_simplified(url=TULEAP_URL, repo_type="git", repo=r)
         )
     origin_urls = [r["uri"] for r in page_results]
     return text, headers, page_results, origin_urls
 
 
 @pytest.fixture
 def tuleap_repo_3(datadir) -> Tuple[str, Dict[str, str], List[RepoPage], List[str]]:
     text = Path(datadir, "https_tuleap.net", "repo_3").read_text()
     headers = {
         "X-PAGINATION-LIMIT-MAX": "50",
         "X-PAGINATION-LIMIT": "10",
         "X-PAGINATION-SIZE": "0",
     }
     reps = json.loads(text)
     page_results = []
     for r in reps["repositories"]:
         page_results.append(
             TuleapLister.results_simplified(url=TULEAP_URL, repo_type="git", repo=r)
         )
     origin_urls = [r["uri"] for r in page_results]
     return text, headers, page_results, origin_urls
 
 
+@pytest.fixture(autouse=True)
+def retry_sleep_mock(mocker):
+    mocker.patch.object(TuleapLister.http_request.retry, "sleep")
+
+
 def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedOrigin]):
     """Asserts that the two collections have the same origin URLs.
 
     Does not test last_update."""
 
     assert set(lister_urls) == {origin.url for origin in scheduler_origins}
 
 
 def test_tuleap_full_listing(
     swh_scheduler,
     requests_mock,
     mocker,
     tuleap_projects,
     tuleap_repo_1,
     tuleap_repo_2,
     tuleap_repo_3,
 ):
     """Covers full listing of multiple pages, rate-limit, page size (required for test),
     checking page results and listed origins, statelessness."""
 
     lister = TuleapLister(
         scheduler=swh_scheduler, url=TULEAP_URL, instance="tuleap.net"
     )
 
     p_text, p_headers, p_projects = tuleap_projects
     r1_text, r1_headers, r1_result, r1_origin_urls = tuleap_repo_1
     r2_text, r2_headers, r2_result, r2_origin_urls = tuleap_repo_2
     r3_text, r3_headers, r3_result, r3_origin_urls = tuleap_repo_3
 
     requests_mock.get(TULEAP_PROJECTS_URL, text=p_text, headers=p_headers)
     requests_mock.get(TULEAP_REPO_1_URL, text=r1_text, headers=r1_headers)
     requests_mock.get(
         TULEAP_REPO_2_URL,
         [
             {"status_code": requests.codes.too_many_requests},
             {"text": r2_text, "headers": r2_headers},
         ],
     )
     requests_mock.get(TULEAP_REPO_3_URL, text=r3_text, headers=r3_headers)
 
     # end test setup
 
     stats = lister.run()
 
     # start test checks
     assert stats.pages == 2
     assert stats.origins == 2
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
 
     check_listed_origins(
         r1_origin_urls + r2_origin_urls + r3_origin_urls, scheduler_origins
     )
     check_listed_origins(GIT_REPOS, scheduler_origins)
 
     assert lister.get_state_from_scheduler() is None
 
 
 @pytest.mark.parametrize("http_code", [400, 500, 502])
 def test_tuleap_list_http_error(swh_scheduler, requests_mock, http_code):
     """Test handling of some HTTP errors commonly encountered"""
 
     lister = TuleapLister(scheduler=swh_scheduler, url=TULEAP_URL)
 
     requests_mock.get(TULEAP_PROJECTS_URL, status_code=http_code)
 
     with pytest.raises(requests.HTTPError):
         lister.run()
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
     assert len(scheduler_origins) == 0
diff --git a/swh/lister/utils.py b/swh/lister/utils.py
index ea4a989..125b31b 100644
--- a/swh/lister/utils.py
+++ b/swh/lister/utils.py
@@ -1,121 +1,113 @@
-# Copyright (C) 2018-2021 the Software Heritage developers
+# Copyright (C) 2018-2022 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from typing import Callable, Iterator, Tuple
 
 from requests.exceptions import ConnectionError, HTTPError
 from requests.status_codes import codes
 from tenacity import retry as tenacity_retry
 from tenacity.stop import stop_after_attempt
 from tenacity.wait import wait_exponential
 
 
 def split_range(total_pages: int, nb_pages: int) -> Iterator[Tuple[int, int]]:
     """Split `total_pages` into mostly `nb_pages` ranges. In some cases, the last range can
     have one more element.
 
     >>> list(split_range(19, 10))
     [(0, 9), (10, 19)]
 
     >>> list(split_range(20, 3))
     [(0, 2), (3, 5), (6, 8), (9, 11), (12, 14), (15, 17), (18, 20)]
 
     >>> list(split_range(21, 3))
     [(0, 2), (3, 5), (6, 8), (9, 11), (12, 14), (15, 17), (18, 21)]
 
     """
     prev_index = None
     for index in range(0, total_pages, nb_pages):
         if index is not None and prev_index is not None:
             yield prev_index, index - 1
         prev_index = index
 
     if index != total_pages:
         yield index, total_pages
 
 
 def is_throttling_exception(e: Exception) -> bool:
     """
     Checks if an exception is a requests.exception.HTTPError for
     a response with status code 429 (too many requests).
     """
     return (
         isinstance(e, HTTPError) and e.response.status_code == codes.too_many_requests
     )
 
 
 def is_retryable_exception(e: Exception) -> bool:
     """
     Checks if an exception is worth retrying (connection, throttling or a server error).
     """
     is_connection_error = isinstance(e, ConnectionError)
     is_500_error = isinstance(e, HTTPError) and e.response.status_code >= 500
 
     return is_connection_error or is_throttling_exception(e) or is_500_error
 
 
 def retry_if_exception(retry_state, predicate: Callable[[Exception], bool]) -> bool:
     """
     Custom tenacity retry predicate for handling exceptions with the given predicate.
     """
     attempt = retry_state.outcome
     if attempt.failed:
         exception = attempt.exception()
         return predicate(exception)
     return False
 
 
-def retry_if_throttling(retry_state) -> bool:
-    """
-    Custom tenacity retry predicate for handling HTTP responses with
-    status code 429 (too many requests).
-    """
-    return retry_if_exception(retry_state, is_throttling_exception)
-
-
 def retry_policy_generic(retry_state) -> bool:
     """
     Custom tenacity retry predicate for handling failed requests:
         - ConnectionError
         - Server errors (status >= 500)
         - Throttling errors (status == 429)
 
     This does not handle 404, 403 or other status codes.
     """
     return retry_if_exception(retry_state, is_retryable_exception)
 
 
 WAIT_EXP_BASE = 10
 MAX_NUMBER_ATTEMPTS = 5
 
 
-def throttling_retry(
-    retry=retry_if_throttling,
+def http_retry(
+    retry=retry_policy_generic,
     wait=wait_exponential(exp_base=WAIT_EXP_BASE),
     stop=stop_after_attempt(max_attempt_number=MAX_NUMBER_ATTEMPTS),
     **retry_args,
 ):
     """
     Decorator based on `tenacity` for retrying a function possibly raising
     requests.exception.HTTPError for status code 429 (too many requests).
 
     It provides a default configuration that should work properly in most
     cases but all `tenacity.retry` parameters can also be overridden in client
     code.
 
     When the mmaximum of attempts is reached, the HTTPError exception will then
     be reraised.
 
     Args:
         retry: function defining request retry condition (default to 429 status code)
             https://tenacity.readthedocs.io/en/latest/#whether-to-retry
 
         wait: function defining wait strategy before retrying (default to exponential
             backoff) https://tenacity.readthedocs.io/en/latest/#waiting-before-retrying
 
         stop: function defining when to stop retrying (default after 5 attempts)
             https://tenacity.readthedocs.io/en/latest/#stopping
 
     """
     return tenacity_retry(retry=retry, wait=wait, stop=stop, reraise=True, **retry_args)