diff --git a/PKG-INFO b/PKG-INFO
index 35d33b3..6bbaf3d 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,125 +1,125 @@
 Metadata-Version: 2.1
 Name: swh.lister
-Version: 3.0.0
+Version: 3.0.1
 Summary: Software Heritage lister
 Home-page: https://forge.softwareheritage.org/diffusion/DLSGH/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-lister
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-lister/
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: testing
 License-File: LICENSE
 
 swh-lister
 ==========
 
 This component from the Software Heritage stack aims to produce listings
 of software origins and their urls hosted on various public developer platforms
 or package managers. As these operations are quite similar, it provides a set of
 Python modules abstracting common software origins listing behaviors.
 
 It also provides several lister implementations, contained in the
 following Python modules:
 
 - `swh.lister.bitbucket`
 - `swh.lister.cgit`
 - `swh.lister.cran`
 - `swh.lister.debian`
 - `swh.lister.gitea`
 - `swh.lister.github`
 - `swh.lister.gitlab`
 - `swh.lister.gnu`
 - `swh.lister.golang`
 - `swh.lister.launchpad`
 - `swh.lister.maven`
 - `swh.lister.npm`
 - `swh.lister.packagist`
 - `swh.lister.phabricator`
 - `swh.lister.pypi`
 - `swh.lister.tuleap`
 - `swh.lister.gogs`
 
 Dependencies
 ------------
 
 All required dependencies can be found in the `requirements*.txt` files located
 at the root of the repository.
 
 Local deployment
 ----------------
 
 ## lister configuration
 
 Each lister implemented so far by Software Heritage (`bitbucket`, `cgit`, `cran`, `debian`,
 `gitea`, `github`, `gitlab`, `gnu`, `golang`, `launchpad`, `npm`, `packagist`, `phabricator`, `pypi`, `tuleap`, `maven`)
 must be configured by following the instructions below (please note that you have to replace
 `<lister_name>` by one of the lister name introduced above).
 
 ### Preparation steps
 
 1. `mkdir ~/.config/swh/`
 2. create configuration file `~/.config/swh/listers.yml`
 
 ### Configuration file sample
 
 Minimalistic configuration shared by all listers to add in file `~/.config/swh/listers.yml`:
 
 ```lang=yml
 scheduler:
   cls: 'remote'
   args:
     url: 'http://localhost:5008/'
 
 credentials: {}
 ```
 
 Note: This expects scheduler (5008) service to run locally
 
 ## Executing a lister
 
 Once configured, a lister can be executed by using the `swh` CLI tool with the
 following options and commands:
 
 ```
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister <lister_name> [lister_parameters]
 ```
 
 Examples:
 
 ```
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister bitbucket
 
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister cran
 
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister gitea url=https://codeberg.org/api/v1/
 
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister gitlab url=https://salsa.debian.org/api/v4/
 
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister npm
 
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister pypi
 ```
 
 Licensing
 ---------
 
 This program is free software: you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
 Foundation, either version 3 of the License, or (at your option) any later
 version.
 
 This program is distributed in the hope that it will be useful, but WITHOUT ANY
 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 PARTICULAR PURPOSE.  See the GNU General Public License for more details.
 
 See top-level LICENSE file for the full text of the GNU General Public License
 along with this program.
diff --git a/swh.lister.egg-info/PKG-INFO b/swh.lister.egg-info/PKG-INFO
index 35d33b3..6bbaf3d 100644
--- a/swh.lister.egg-info/PKG-INFO
+++ b/swh.lister.egg-info/PKG-INFO
@@ -1,125 +1,125 @@
 Metadata-Version: 2.1
 Name: swh.lister
-Version: 3.0.0
+Version: 3.0.1
 Summary: Software Heritage lister
 Home-page: https://forge.softwareheritage.org/diffusion/DLSGH/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-lister
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-lister/
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: testing
 License-File: LICENSE
 
 swh-lister
 ==========
 
 This component from the Software Heritage stack aims to produce listings
 of software origins and their urls hosted on various public developer platforms
 or package managers. As these operations are quite similar, it provides a set of
 Python modules abstracting common software origins listing behaviors.
 
 It also provides several lister implementations, contained in the
 following Python modules:
 
 - `swh.lister.bitbucket`
 - `swh.lister.cgit`
 - `swh.lister.cran`
 - `swh.lister.debian`
 - `swh.lister.gitea`
 - `swh.lister.github`
 - `swh.lister.gitlab`
 - `swh.lister.gnu`
 - `swh.lister.golang`
 - `swh.lister.launchpad`
 - `swh.lister.maven`
 - `swh.lister.npm`
 - `swh.lister.packagist`
 - `swh.lister.phabricator`
 - `swh.lister.pypi`
 - `swh.lister.tuleap`
 - `swh.lister.gogs`
 
 Dependencies
 ------------
 
 All required dependencies can be found in the `requirements*.txt` files located
 at the root of the repository.
 
 Local deployment
 ----------------
 
 ## lister configuration
 
 Each lister implemented so far by Software Heritage (`bitbucket`, `cgit`, `cran`, `debian`,
 `gitea`, `github`, `gitlab`, `gnu`, `golang`, `launchpad`, `npm`, `packagist`, `phabricator`, `pypi`, `tuleap`, `maven`)
 must be configured by following the instructions below (please note that you have to replace
 `<lister_name>` by one of the lister name introduced above).
 
 ### Preparation steps
 
 1. `mkdir ~/.config/swh/`
 2. create configuration file `~/.config/swh/listers.yml`
 
 ### Configuration file sample
 
 Minimalistic configuration shared by all listers to add in file `~/.config/swh/listers.yml`:
 
 ```lang=yml
 scheduler:
   cls: 'remote'
   args:
     url: 'http://localhost:5008/'
 
 credentials: {}
 ```
 
 Note: This expects scheduler (5008) service to run locally
 
 ## Executing a lister
 
 Once configured, a lister can be executed by using the `swh` CLI tool with the
 following options and commands:
 
 ```
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister <lister_name> [lister_parameters]
 ```
 
 Examples:
 
 ```
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister bitbucket
 
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister cran
 
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister gitea url=https://codeberg.org/api/v1/
 
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister gitlab url=https://salsa.debian.org/api/v4/
 
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister npm
 
 $ swh --log-level DEBUG lister -C ~/.config/swh/listers.yml run --lister pypi
 ```
 
 Licensing
 ---------
 
 This program is free software: you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
 Foundation, either version 3 of the License, or (at your option) any later
 version.
 
 This program is distributed in the hope that it will be useful, but WITHOUT ANY
 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 PARTICULAR PURPOSE.  See the GNU General Public License for more details.
 
 See top-level LICENSE file for the full text of the GNU General Public License
 along with this program.
diff --git a/swh/lister/arch/lister.py b/swh/lister/arch/lister.py
index 58e5371..af3a3d8 100644
--- a/swh/lister/arch/lister.py
+++ b/swh/lister/arch/lister.py
@@ -1,500 +1,501 @@
 # Copyright (C) 2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
+
 import datetime
 import logging
 from pathlib import Path
 import re
 import tarfile
+import tempfile
 from typing import Any, Dict, Iterator, List, Optional
 from urllib.parse import unquote, urljoin
 
 from bs4 import BeautifulSoup
 import requests
 from tenacity.before_sleep import before_sleep_log
 
 from swh.lister.utils import throttling_retry
 from swh.model.hashutil import hash_to_hex
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
 from .. import USER_AGENT
 from ..pattern import CredentialsType, StatelessLister
 
 logger = logging.getLogger(__name__)
 
 # Aliasing the page results returned by `get_pages` method from the lister.
 ArchListerPage = List[Dict[str, Any]]
 
 
 def size_to_bytes(size: str) -> int:
     """Convert human readable file size to bytes.
 
     Resulting value is an approximation as input value is in most case rounded.
 
     Args:
         size: A string representing a human readable file size (eg: '500K')
 
     Returns:
         A decimal representation of file size
 
         Examples::
 
             >>> size_to_bytes("500")
             500
             >>> size_to_bytes("1K")
             1000
     """
     units = {
         "K": 1000,
         "M": 1000**2,
         "G": 1000**3,
         "T": 1000**4,
         "P": 1000**5,
         "E": 1000**6,
         "Z": 1000**7,
         "Y": 1000**8,
     }
     if size.endswith(tuple(units)):
         v, u = (size[:-1], size[-1])
         return int(v) * units[u]
     else:
         return int(size)
 
 
 class ArchLister(StatelessLister[ArchListerPage]):
     """List Arch linux origins from 'core', 'extra', and 'community' repositories
 
     For 'official' Arch Linux it downloads core.tar.gz, extra.tar.gz and community.tar.gz
     from https://archive.archlinux.org/repos/last/ extract to a temp directory and
     then walks through each 'desc' files.
 
     Each 'desc' file describe the latest released version of a package and helps
     to build an origin url from where scrapping artifacts metadata.
 
     For 'arm' Arch Linux it follow the same discovery process parsing 'desc' files.
     The main difference is that we can't get existing versions of an arm package
     because https://archlinuxarm.org does not have an 'archive' website or api.
     """
 
     LISTER_NAME = "arch"
     VISIT_TYPE = "arch"
     INSTANCE = "arch"
 
-    DESTINATION_PATH = Path("/tmp/archlinux_archive")
-
     ARCH_PACKAGE_URL_PATTERN = "{base_url}/packages/{repo}/{arch}/{pkgname}"
     ARCH_PACKAGE_VERSIONS_URL_PATTERN = "{base_url}/packages/{pkgname[0]}/{pkgname}"
     ARCH_PACKAGE_DOWNLOAD_URL_PATTERN = (
         "{base_url}/packages/{pkgname[0]}/{pkgname}/{filename}"
     )
     ARCH_API_URL_PATTERN = "{base_url}/packages/{repo}/{arch}/{pkgname}/json"
 
     ARM_PACKAGE_URL_PATTERN = "{base_url}/packages/{arch}/{pkgname}"
     ARM_PACKAGE_DOWNLOAD_URL_PATTERN = "{base_url}/{arch}/{repo}/{filename}"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         credentials: Optional[CredentialsType] = None,
         flavours: Dict[str, Any] = {
             "official": {
                 "archs": ["x86_64"],
                 "repos": ["core", "extra", "community"],
                 "base_info_url": "https://archlinux.org",
                 "base_archive_url": "https://archive.archlinux.org",
                 "base_mirror_url": "",
                 "base_api_url": "https://archlinux.org",
             },
             "arm": {
                 "archs": ["armv7h", "aarch64"],
                 "repos": ["core", "extra", "community"],
                 "base_info_url": "https://archlinuxarm.org",
                 "base_archive_url": "",
                 "base_mirror_url": "https://uk.mirror.archlinuxarm.org",
                 "base_api_url": "",
             },
         },
     ):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
             url=flavours["official"]["base_info_url"],
             instance=self.INSTANCE,
         )
 
         self.flavours = flavours
         self.session = requests.Session()
         self.session.headers.update(
             {
                 "User-Agent": USER_AGENT,
             }
         )
 
     @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
     def request_get(self, url: str, params: Dict[str, Any]) -> requests.Response:
 
-        logger.info("Fetching URL %s with params %s", url, params)
+        logger.debug("Fetching URL %s with params %s", url, params)
 
         response = self.session.get(url, params=params)
         if response.status_code != 200:
             logger.warning(
                 "Unexpected HTTP status code %s on %s: %s",
                 response.status_code,
                 response.url,
                 response.content,
             )
         response.raise_for_status()
 
         return response
 
     def scrap_package_versions(
         self, name: str, repo: str, base_url: str
     ) -> List[Dict[str, Any]]:
         """Given a package 'name' and 'repo', make an http call to origin url and parse its content
         to get package versions artifacts data.
         That method is suitable only for 'official' Arch Linux, not 'arm'.
 
         Args:
             name: Package name
             repo: The repository the package belongs to (one of self.repos)
 
         Returns:
             A list of dict of version
 
             Example::
 
                 [
                     {"url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190211-1-x86_64.pkg.tar.xz",  # noqa: B950
                     "arch": "x86_64",
                     "repo": "core",
                     "name": "dialog",
                     "version": "1:1.3_20190211-1",
                     "length": 180000,
                     "filename": "dialog-1:1.3_20190211-1-x86_64.pkg.tar.xz",
                     "last_modified": "2019-02-13T08:36:00"},
                 ]
         """
         url = self.ARCH_PACKAGE_VERSIONS_URL_PATTERN.format(
             pkgname=name, base_url=base_url
         )
         response = self.request_get(url=url, params={})
         soup = BeautifulSoup(response.text, "html.parser")
         links = soup.find_all("a", href=True)
 
         # drop the first line (used to go to up directory)
         if links[0].attrs["href"] == "../":
             links.pop(0)
 
         versions = []
 
         for link in links:
             # filename displayed can be cropped if name is too long, get it from href instead
             filename = unquote(link.attrs["href"])
 
             if filename.endswith((".tar.xz", ".tar.zst")):
                 # Extract arch from filename
                 arch_rex = re.compile(
                     rf"^{re.escape(name)}-(?P<version>.*)-(?P<arch>any|i686|x86_64)"
                     rf"(.pkg.tar.(?:zst|xz))$"
                 )
                 m = arch_rex.match(filename)
                 if m is None:
                     logger.error(
                         "Can not find a match for architecture in %(filename)s",
                         dict(filename=filename),
                     )
                 else:
                     arch = m.group("arch")
                     version = m.group("version")
 
                 # Extract last_modified and an approximate file size
                 raw_text = link.next_sibling
                 raw_text_rex = re.compile(
                     r"^(?P<last_modified>\d+-\w+-\d+ \d\d:\d\d)\s+(?P<size>\w+)$"
                 )
                 s = raw_text_rex.search(raw_text.strip())
                 if s is None:
                     logger.error(
                         "Can not find a match for 'last_modified' and/or "
                         "'size' in '%(raw_text)s'",
                         dict(raw_text=raw_text),
                     )
                 else:
                     assert s.groups()
                     assert len(s.groups()) == 2
                     last_modified_str, size = s.groups()
 
                 # format as expected
                 last_modified = datetime.datetime.strptime(
                     last_modified_str, "%d-%b-%Y %H:%M"
                 ).isoformat()
 
                 length = size_to_bytes(size)  # we want bytes
 
                 # link url is relative, format a canonical one
                 url = self.ARCH_PACKAGE_DOWNLOAD_URL_PATTERN.format(
                     base_url=base_url, pkgname=name, filename=filename
                 )
                 versions.append(
                     dict(
                         name=name,
                         version=version,
                         repo=repo,
                         arch=arch,
                         filename=filename,
                         url=url,
                         last_modified=last_modified,
                         length=length,
                     )
                 )
         return versions
 
     def get_repo_archive(self, url: str, destination_path: Path) -> Path:
         """Given an url and a destination path, retrieve and extract .tar.gz archive
         which contains 'desc' file for each package.
         Each .tar.gz archive corresponds to an Arch Linux repo ('core', 'extra', 'community').
 
         Args:
             url: url of the .tar.gz archive to download
             destination_path: the path on disk where to extract archive
 
         Returns:
             a directory Path where the archive has been extracted to.
         """
         res = self.request_get(url=url, params={})
         destination_path.parent.mkdir(parents=True, exist_ok=True)
         destination_path.write_bytes(res.content)
 
         extract_to = Path(str(destination_path).split(".tar.gz")[0])
         tar = tarfile.open(destination_path)
         tar.extractall(path=extract_to)
         tar.close()
 
         return extract_to
 
     def parse_desc_file(
         self,
         path: Path,
         repo: str,
         base_url: str,
         dl_url_fmt: str,
     ) -> Dict[str, Any]:
         """Extract package information from a 'desc' file.
         There are subtle differences between parsing 'official' and 'arm' des files
 
         Args:
             path: A path to a 'desc' file on disk
             repo: The repo the package belongs to
 
         Returns:
             A dict of metadata
 
             Example::
 
                 {'api_url': 'https://archlinux.org/packages/core/x86_64/dialog/json',
                  'arch': 'x86_64',
                  'base': 'dialog',
                  'builddate': '1650081535',
                  'csize': '203028',
                  'desc': 'A tool to display dialog boxes from shell scripts',
                  'filename': 'dialog-1:1.3_20220414-1-x86_64.pkg.tar.zst',
                  'isize': '483988',
                  'license': 'LGPL2.1',
                  'md5sum': '06407c0cb11c50d7bf83d600f2e8107c',
                  'name': 'dialog',
                  'packager': 'Evangelos Foutras <foutrelis@archlinux.org>',
                  'pgpsig': 'pgpsig content xxx',
                  'project_url': 'https://invisible-island.net/dialog/',
                  'provides': 'libdialog.so=15-64',
                  'repo': 'core',
                  'sha256sum': 'ef8c8971f591de7db0f455970ef5d81d5aced1ddf139f963f16f6730b1851fa7',
                  'url': 'https://archive.archlinux.org/packages/.all/dialog-1:1.3_20220414-1-x86_64.pkg.tar.zst',  # noqa: B950
                  'version': '1:1.3_20220414-1'}
         """
         rex = re.compile(r"^\%(?P<k>\w+)\%\n(?P<v>.*)\n$", re.M)
         with path.open("rb") as content:
             parsed = rex.findall(content.read().decode())
             data = {entry[0].lower(): entry[1] for entry in parsed}
 
             if "url" in data.keys():
                 data["project_url"] = data["url"]
 
             assert data["name"]
             assert data["filename"]
             assert data["arch"]
 
             data["repo"] = repo
             data["url"] = urljoin(
                 base_url,
                 dl_url_fmt.format(
                     base_url=base_url,
                     pkgname=data["name"],
                     filename=data["filename"],
                     arch=data["arch"],
                     repo=repo,
                 ),
             )
 
             assert data["md5sum"]
             assert data["sha256sum"]
             data["checksums"] = {
                 "md5sum": hash_to_hex(data["md5sum"]),
                 "sha256sum": hash_to_hex(data["sha256sum"]),
             }
         return data
 
     def get_pages(self) -> Iterator[ArchListerPage]:
         """Yield an iterator sorted by name in ascending order of pages.
 
         Each page is a list of package belonging to a flavour ('official', 'arm'),
         and a repo ('core', 'extra', 'community')
         """
 
         for name, flavour in self.flavours.items():
             for arch in flavour["archs"]:
                 for repo in flavour["repos"]:
                     yield self._get_repo_page(name, flavour, arch, repo)
 
     def _get_repo_page(
         self, name: str, flavour: Dict[str, Any], arch: str, repo: str
     ) -> ArchListerPage:
-        page = []
-        if name == "official":
-            prefix = urljoin(flavour["base_archive_url"], "/repos/last/")
-            filename = f"{repo}.files.tar.gz"
-            archive_url = urljoin(prefix, f"{repo}/os/{arch}/{filename}")
-            destination_path = Path(self.DESTINATION_PATH, arch, filename)
-            base_url = flavour["base_archive_url"]
-            dl_url_fmt = self.ARCH_PACKAGE_DOWNLOAD_URL_PATTERN
-            base_info_url = flavour["base_info_url"]
-            info_url_fmt = self.ARCH_PACKAGE_URL_PATTERN
-        elif name == "arm":
-            filename = f"{repo}.files.tar.gz"
-            archive_url = urljoin(
-                flavour["base_mirror_url"], f"{arch}/{repo}/{filename}"
+        with tempfile.TemporaryDirectory() as tmpdir:
+            page = []
+            if name == "official":
+                prefix = urljoin(flavour["base_archive_url"], "/repos/last/")
+                filename = f"{repo}.files.tar.gz"
+                archive_url = urljoin(prefix, f"{repo}/os/{arch}/{filename}")
+                destination_path = Path(tmpdir, arch, filename)
+                base_url = flavour["base_archive_url"]
+                dl_url_fmt = self.ARCH_PACKAGE_DOWNLOAD_URL_PATTERN
+                base_info_url = flavour["base_info_url"]
+                info_url_fmt = self.ARCH_PACKAGE_URL_PATTERN
+            elif name == "arm":
+                filename = f"{repo}.files.tar.gz"
+                archive_url = urljoin(
+                    flavour["base_mirror_url"], f"{arch}/{repo}/{filename}"
+                )
+                destination_path = Path(tmpdir, arch, filename)
+                base_url = flavour["base_mirror_url"]
+                dl_url_fmt = self.ARM_PACKAGE_DOWNLOAD_URL_PATTERN
+                base_info_url = flavour["base_info_url"]
+                info_url_fmt = self.ARM_PACKAGE_URL_PATTERN
+
+            archive = self.get_repo_archive(
+                url=archive_url, destination_path=destination_path
             )
-            destination_path = Path(self.DESTINATION_PATH, arch, filename)
-            base_url = flavour["base_mirror_url"]
-            dl_url_fmt = self.ARM_PACKAGE_DOWNLOAD_URL_PATTERN
-            base_info_url = flavour["base_info_url"]
-            info_url_fmt = self.ARM_PACKAGE_URL_PATTERN
-
-        archive = self.get_repo_archive(
-            url=archive_url, destination_path=destination_path
-        )
-
-        assert archive
-
-        packages_desc = list(archive.glob("**/desc"))
-        logger.debug(
-            "Processing %(instance)s source packages info from "
-            "%(flavour)s %(arch)s %(repo)s repository, "
-            "(%(qty)s packages).",
-            dict(
-                instance=self.instance,
-                flavour=name,
-                arch=arch,
-                repo=repo,
-                qty=len(packages_desc),
-            ),
-        )
 
-        for package_desc in packages_desc:
-            data = self.parse_desc_file(
-                path=package_desc,
-                repo=repo,
-                base_url=base_url,
-                dl_url_fmt=dl_url_fmt,
+            assert archive
+
+            packages_desc = list(archive.glob("**/desc"))
+            logger.debug(
+                "Processing %(instance)s source packages info from "
+                "%(flavour)s %(arch)s %(repo)s repository, "
+                "(%(qty)s packages).",
+                dict(
+                    instance=self.instance,
+                    flavour=name,
+                    arch=arch,
+                    repo=repo,
+                    qty=len(packages_desc),
+                ),
             )
 
-            assert data["builddate"]
-            last_modified = datetime.datetime.fromtimestamp(
-                float(data["builddate"]), tz=datetime.timezone.utc
-            )
+            for package_desc in packages_desc:
+                data = self.parse_desc_file(
+                    path=package_desc,
+                    repo=repo,
+                    base_url=base_url,
+                    dl_url_fmt=dl_url_fmt,
+                )
 
-            assert data["name"]
-            assert data["filename"]
-            assert data["arch"]
-            url = info_url_fmt.format(
-                base_url=base_info_url,
-                pkgname=data["name"],
-                filename=data["filename"],
-                repo=repo,
-                arch=data["arch"],
-            )
+                assert data["builddate"]
+                last_modified = datetime.datetime.fromtimestamp(
+                    float(data["builddate"]), tz=datetime.timezone.utc
+                )
 
-            assert data["version"]
-            if name == "official":
-                # find all versions of a package scrapping archive
-                versions = self.scrap_package_versions(
-                    name=data["name"], repo=repo, base_url=base_url
+                assert data["name"]
+                assert data["filename"]
+                assert data["arch"]
+                url = info_url_fmt.format(
+                    base_url=base_info_url,
+                    pkgname=data["name"],
+                    filename=data["filename"],
+                    repo=repo,
+                    arch=data["arch"],
                 )
-            elif name == "arm":
-                # There is no way to get related versions of a package,
-                # but 'data' represents the latest released version,
-                # use it in this case
-                assert data["builddate"]
-                assert data["csize"]
-                assert data["url"]
-                versions = [
-                    dict(
-                        name=data["name"],
-                        version=data["version"],
-                        repo=repo,
-                        arch=data["arch"],
-                        filename=data["filename"],
-                        url=data["url"],
-                        last_modified=last_modified.replace(tzinfo=None).isoformat(
-                            timespec="seconds"
-                        ),
-                        length=int(data["csize"]),
-                    )
-                ]
 
-            package = {
-                "name": data["name"],
-                "version": data["version"],
-                "last_modified": last_modified,
-                "url": url,
-                "versions": versions,
-                "data": data,
-            }
-            page.append(package)
-        return page
+                assert data["version"]
+                if name == "official":
+                    # find all versions of a package scrapping archive
+                    versions = self.scrap_package_versions(
+                        name=data["name"], repo=repo, base_url=base_url
+                    )
+                elif name == "arm":
+                    # There is no way to get related versions of a package,
+                    # but 'data' represents the latest released version,
+                    # use it in this case
+                    assert data["builddate"]
+                    assert data["csize"]
+                    assert data["url"]
+                    versions = [
+                        dict(
+                            name=data["name"],
+                            version=data["version"],
+                            repo=repo,
+                            arch=data["arch"],
+                            filename=data["filename"],
+                            url=data["url"],
+                            last_modified=last_modified.replace(tzinfo=None).isoformat(
+                                timespec="seconds"
+                            ),
+                            length=int(data["csize"]),
+                        )
+                    ]
+
+                package = {
+                    "name": data["name"],
+                    "version": data["version"],
+                    "last_modified": last_modified,
+                    "url": url,
+                    "versions": versions,
+                    "data": data,
+                }
+                page.append(package)
+            return page
 
     def get_origins_from_page(self, page: ArchListerPage) -> Iterator[ListedOrigin]:
         """Iterate on all arch pages and yield ListedOrigin instances."""
         assert self.lister_obj.id is not None
         for origin in page:
             artifacts = []
             arch_metadata = []
             for version in origin["versions"]:
                 artifacts.append(
                     {
                         "version": version["version"],
                         "filename": version["filename"],
                         "url": version["url"],
                         "length": version["length"],
                     }
                 )
                 arch_metadata.append(
                     {
                         "version": version["version"],
                         "name": version["name"],
                         "arch": version["arch"],
                         "repo": version["repo"],
                         "last_modified": version["last_modified"],
                     }
                 )
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 visit_type=self.VISIT_TYPE,
                 url=origin["url"],
                 last_update=origin["last_modified"],
                 extra_loader_arguments={
                     "artifacts": artifacts,
                     "arch_metadata": arch_metadata,
                 },
             )
diff --git a/swh/lister/golang/lister.py b/swh/lister/golang/lister.py
index 0d6b2b9..0a2f141 100644
--- a/swh/lister/golang/lister.py
+++ b/swh/lister/golang/lister.py
@@ -1,188 +1,188 @@
 # Copyright (C) 2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from dataclasses import dataclass
 from datetime import datetime
 import json
 import logging
 from typing import Any, Dict, Iterator, List, Optional, Tuple
 
 import iso8601
 import requests
 from tenacity import before_sleep_log
 
 from swh.lister.utils import retry_policy_generic, throttling_retry
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
 from .. import USER_AGENT
 from ..pattern import CredentialsType, Lister
 
 logger = logging.getLogger(__name__)
 
 
 @dataclass
 class GolangStateType:
     last_seen: Optional[datetime] = None
     """Last timestamp of a package version we have saved.
     Used as a starting point for an incremental listing."""
 
 
 GolangPageType = List[Dict[str, Any]]
 
 
 class GolangLister(Lister[GolangStateType, GolangPageType]):
     """
     List all Golang modules and send associated origins to scheduler.
 
     The lister queries the Golang module index, whose documentation can be found
     at https://index.golang.org
     """
 
     GOLANG_MODULES_INDEX_URL = "https://index.golang.org/index"
     # `limit` seems to be... limited to 2000.
     GOLANG_MODULES_INDEX_LIMIT = 2000
-    LISTER_NAME = "Golang"
+    LISTER_NAME = "golang"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         incremental: bool = False,
         credentials: CredentialsType = None,
     ):
         super().__init__(
             scheduler=scheduler,
             url=self.GOLANG_MODULES_INDEX_URL,
-            instance="Golang",
+            instance=self.LISTER_NAME,
             credentials=credentials,
         )
 
         self.session = requests.Session()
         self.session.headers.update(
             {"Accept": "application/json", "User-Agent": USER_AGENT}
         )
         self.incremental = incremental
 
     def state_from_dict(self, d: Dict[str, Any]) -> GolangStateType:
         as_string = d.get("last_seen")
         last_seen = iso8601.parse_date(as_string) if as_string is not None else None
         return GolangStateType(last_seen=last_seen)
 
     def state_to_dict(self, state: GolangStateType) -> Dict[str, Any]:
         return {
             "last_seen": state.last_seen.isoformat()
             if state.last_seen is not None
             else None
         }
 
     def finalize(self):
         if self.incremental and self.state.last_seen is not None:
             scheduler_state = self.get_state_from_scheduler()
 
             if (
                 scheduler_state.last_seen is None
                 or self.state.last_seen > scheduler_state.last_seen
             ):
                 self.updated = True
 
     @throttling_retry(
         retry=retry_policy_generic,
         before_sleep=before_sleep_log(logger, logging.WARNING),
     )
     def api_request(self, url: str) -> List[str]:
         logger.debug("Fetching URL %s", url)
 
         response = self.session.get(url)
 
         if response.status_code not in (200, 304):
             # Log response content to ease debugging
             logger.warning(
                 "Unexpected HTTP status code %s for URL %s",
                 response.status_code,
                 response.url,
             )
 
         response.raise_for_status()
 
         return response.text.split()
 
     def get_single_page(
         self, since: Optional[datetime] = None
     ) -> Tuple[GolangPageType, Optional[datetime]]:
         """Return a page from the API and the timestamp of its last entry.
         Since all entries are sorted by chronological order, the timestamp is useful
         both for pagination and later for incremental runs."""
         url = f"{self.url}?limit={self.GOLANG_MODULES_INDEX_LIMIT}"
         if since is not None:
             # The Golang index does not understand `+00:00` for some reason
             # and expects the "timezone zero" notation instead. This works
             # because all times are UTC.
             utc_offset = since.utcoffset()
             assert (
                 utc_offset is not None and utc_offset.total_seconds() == 0
             ), "Non-UTC datetime"
             as_date = since.isoformat().replace("+00:00", "Z")
             url = f"{url}&since={as_date}"
 
         entries = self.api_request(url)
         page: GolangPageType = []
         if not entries:
             return page, since
 
         for as_json in entries:
             entry = json.loads(as_json)
             timestamp = iso8601.parse_date(entry["Timestamp"])
             # We've already parsed it and we'll need the datetime later, save it
             entry["Timestamp"] = timestamp
             page.append(entry)
             # The index is guaranteed to be sorted in chronological order
             since = timestamp
 
         return page, since
 
     def get_pages(self) -> Iterator[GolangPageType]:
         since = None
         if self.incremental:
             since = self.state.last_seen
         page, since = self.get_single_page(since=since)
         if since == self.state.last_seen:
             # The index returns packages whose timestamp are greater or
             # equal to the date provided as parameter, which will create
             # an infinite loop if not stopped here.
             return [], since
         if since is not None:
             self.state.last_seen = since
 
         while page:
             yield page
             page, since = self.get_single_page(since=since)
             if since == self.state.last_seen:
                 return [], since
             if since is not None:
                 self.state.last_seen = since
 
     def get_origins_from_page(self, page: GolangPageType) -> Iterator[ListedOrigin]:
         """
         Iterate on all Golang projects and yield ListedOrigin instances.
         """
         assert self.lister_obj.id is not None
 
         for module in page:
             path = module["Path"]
             # The loader will be expected to use the golang proxy to do the
             # actual downloading. We're using `pkg.go.dev` so that the URL points
             # to somewhere useful for a human instead of an (incomplete) API path.
             origin_url = f"https://pkg.go.dev/{path}"
 
             # Since the Go index lists versions and not just packages, there will
             # be duplicates. Fortunately, `ListedOrigins` are "upserted" server-side,
             # so only the last timestamp will be used, with no duplicates.
             # Performance should not be an issue as they are sent to the db in bulk.
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 url=origin_url,
                 visit_type="golang",
                 last_update=module["Timestamp"],
             )
diff --git a/swh/lister/pubdev/lister.py b/swh/lister/pubdev/lister.py
index 8abb582..a17ad0e 100644
--- a/swh/lister/pubdev/lister.py
+++ b/swh/lister/pubdev/lister.py
@@ -1,125 +1,125 @@
 # Copyright (C) 2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 import logging
 from typing import Any, Dict, Iterator, List, Optional
 
 import iso8601
 import requests
 from requests.exceptions import HTTPError
 from tenacity.before_sleep import before_sleep_log
 
 from swh.lister.utils import throttling_retry
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
 from .. import __version__
 from ..pattern import CredentialsType, StatelessLister
 
 # https://github.com/dart-lang/pub/blob/master/doc/repository-spec-v2.md#metadata-headers
 USER_AGENT = (
     f"Software Heritage PubDev Lister v{__version__} "
     "(+https://www.softwareheritage.org/contact)"
 )
 
 logger = logging.getLogger(__name__)
 
 # Aliasing the page results returned by `get_pages` method from the lister.
 PubDevListerPage = List[str]
 
 
 class PubDevLister(StatelessLister[PubDevListerPage]):
     """List pub.dev (Dart, Flutter) origins."""
 
     LISTER_NAME = "pubdev"
     VISIT_TYPE = "pubdev"
     INSTANCE = "pubdev"
 
     BASE_URL = "https://pub.dev/"
     PACKAGE_NAMES_URL_PATTERN = "{base_url}api/package-names"
     PACKAGE_INFO_URL_PATTERN = "{base_url}api/packages/{pkgname}"
     ORIGIN_URL_PATTERN = "{base_url}packages/{pkgname}"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         credentials: Optional[CredentialsType] = None,
     ):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
             instance=self.INSTANCE,
             url=self.BASE_URL,
         )
         self.session = requests.Session()
         self.session.headers.update(
             {
                 "Accept": "application/json",
                 "User-Agent": USER_AGENT,
             }
         )
 
     @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
     def page_request(self, url: str, params: Dict[str, Any]) -> requests.Response:
 
-        logger.info("Fetching URL %s with params %s", url, params)
+        logger.debug("Fetching URL %s with params %s", url, params)
 
         response = self.session.get(url, params=params)
         if response.status_code != 200:
             logger.warning(
                 "Unexpected HTTP status code %s on %s: %s",
                 response.status_code,
                 response.url,
                 response.content,
             )
         response.raise_for_status()
 
         return response
 
     def get_pages(self) -> Iterator[PubDevListerPage]:
         """Yield an iterator which returns 'page'
 
         It uses the api provided by https://pub.dev/api/ to find Dart and Flutter package
         origins.
 
         The http api call get "{base_url}package-names" to retrieve a sorted list
         of all package names.
 
         There is only one page that list all origins url based on "{base_url}packages/{pkgname}"
         """
         response = self.page_request(
             url=self.PACKAGE_NAMES_URL_PATTERN.format(base_url=self.url), params={}
         )
         yield response.json()["packages"]
 
     def get_origins_from_page(self, page: PubDevListerPage) -> Iterator[ListedOrigin]:
         """Iterate on all pages and yield ListedOrigin instances."""
         assert self.lister_obj.id is not None
 
         for pkgname in page:
             package_info_url = self.PACKAGE_INFO_URL_PATTERN.format(
                 base_url=self.url, pkgname=pkgname
             )
             try:
                 response = self.page_request(url=package_info_url, params={})
             except HTTPError:
                 logger.warning(
                     "Failed to fetch metadata for package %s, skipping it from listing.",
                     pkgname,
                 )
                 continue
             package_metadata = response.json()
             package_versions = package_metadata["versions"]
             last_published = max(
                 package_version["published"] for package_version in package_versions
             )
             origin_url = self.ORIGIN_URL_PATTERN.format(
                 base_url=self.url, pkgname=pkgname
             )
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 visit_type=self.VISIT_TYPE,
                 url=origin_url,
                 last_update=iso8601.parse_date(last_published),
             )