diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -25,6 +25,7 @@
- `swh.lister.pypi`
- `swh.lister.tuleap`
- `swh.lister.gogs`
+- `swh.liser.fedora`
Dependencies
------------
diff --git a/mypy.ini b/mypy.ini
--- a/mypy.ini
+++ b/mypy.ini
@@ -42,3 +42,9 @@
[mypy-dulwich.*]
ignore_missing_imports = True
+
+[mypy-repomd.*]
+ignore_missing_imports = True
+
+[mypy-defusedxml.*]
+ignore_missing_imports = True
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,4 @@
tenacity >= 6.2
lxml
dulwich
+repomd
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -75,6 +75,7 @@
lister.tuleap=swh.lister.tuleap:register
lister.maven=swh.lister.maven:register
lister.gogs=swh.lister.gogs:register
+ lister.fedora=swh.lister.fedora:register
""",
classifiers=[
"Programming Language :: Python :: 3",
diff --git a/swh/lister/fedora/__init__.py b/swh/lister/fedora/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+def register():
+ from .lister import FedoraLister
+
+ return {
+ "lister": FedoraLister,
+ "task_modules": [f"{__name__}.tasks"],
+ }
diff --git a/swh/lister/fedora/lister.py b/swh/lister/fedora/lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/lister.py
@@ -0,0 +1,224 @@
+# Copyright (C) 2017-2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+from dataclasses import dataclass, field
+import logging
+from typing import Any, Dict, Iterator, List, Set
+from urllib.parse import urljoin
+
+import repomd
+
+from swh.scheduler.interface import SchedulerInterface
+from swh.scheduler.model import ListedOrigin
+
+from ..pattern import Lister
+
+logger = logging.getLogger(__name__)
+
+
+Release = int
+Edition = str
+PkgName = str
+PkgVersion = str
+FedoraOrigin = str
+FedoraPageType = Iterator[repomd.Package]
+"""Each page is a list of packages from a given Fedora release"""
+
+
+def get_editions(release: Release) -> List[Edition]:
+ """Get list of editions for a given release."""
+ if release < 20:
+ return ["Everything", "Fedora"]
+ elif release < 28:
+ return ["Everything", "Server", "Workstation"]
+ else:
+ return ["Everything", "Server", "Workstation", "Modular"]
+
+
+@dataclass
+class FedoraListerState:
+ """State of Fedora lister"""
+
+ package_versions: Dict[PkgName, Set[PkgVersion]] = field(default_factory=dict)
+ """Dictionary mapping a package name to all the versions found during
+ last listing"""
+
+
+class FedoraLister(Lister[FedoraListerState, FedoraPageType]):
+ """
+ List source packages for given Fedora releases.
+
+ The lister will create a snapshot for each package name from all its
+ available versions.
+
+ If a package snapshot is different from the last listing operation,
+ it will be sent to the scheduler that will create a loading task
+ to archive newly found source code.
+
+ Args:
+ scheduler: instance of SchedulerInterface
+ url: fedora package archives mirror URL
+ releases: list of fedora releases to process
+ """
+
+ # in the archives,old versions of fedora do not contain repomd.xml
+
+ LISTER_NAME = "fedora"
+
+ def __init__(
+ self,
+ scheduler: SchedulerInterface,
+ instance: str = "fedora",
+ url: str = "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/",
+ releases: List[Release] = [34, 35, 36],
+ ):
+ super().__init__(
+ scheduler=scheduler,
+ url=url,
+ instance=instance,
+ credentials={},
+ )
+
+ self.releases = releases
+
+ self.listed_origins: Dict[FedoraOrigin, ListedOrigin] = {}
+ "will hold all listed origins info"
+ self.sent_origins: Set[FedoraOrigin] = set()
+ """will contain origin urls that have already been listed
+ in a previous page (fedora release)"""
+ self.origins_to_update: Dict[FedoraOrigin, ListedOrigin] = {}
+ """will contain already listed package info that need to be
+ sent to the scheduler for update in the commit_page method"""
+ self.package_versions: Dict[PkgName, Set[PkgVersion]] = {}
+ "will contain the lister state after a call to run"
+
+ def state_from_dict(self, d: Dict[str, Any]) -> FedoraListerState:
+ return FedoraListerState(package_versions={k: set(v) for k, v in d.items()})
+
+ def state_to_dict(self, state: FedoraListerState) -> Dict[str, Any]:
+ return {k: list(v) for k, v in state.package_versions.items()}
+
+ def page_request(self, release: Release, edition: Edition) -> FedoraPageType:
+ """Return parsed packages for a given fedora release."""
+ index_url = urljoin(
+ self.url,
+ f"{release}/{edition}/source/SRPMS/"
+ if release < 24
+ else f"{release}/Everything/source/tree/",
+ )
+
+ repo = repomd.load(index_url) # throws error if no repomd.xml is not found
+ logging.debug(
+ "Fetched metadata from url: %s, found %d packages", index_url, len(repo)
+ )
+ # TODO: Extract more fields like "provides" and "requires" from *primary.xml
+ # as extrinsic metadata using the pkg._element.findtext method
+ for pkg in repo:
+ yield pkg
+
+ def get_pages(self) -> Iterator[FedoraPageType]:
+ """Return an iterator on parsed fedora packages, one page per fedora release."""
+ # Dirs that don't contain .rpm files:
+ # Docker,CloudImages,Atomic*,Spins,Live,Cloud_Atomic,Silverblue
+
+ for release in self.releases:
+ for edition in get_editions(release):
+ logger.debug("Listing fedora release %s edition %s", release, edition)
+ self.current_release = release
+ self.current_edition = edition
+ yield self.page_request(release, edition)
+
+ def origin_url_for_package(self, package_name: PkgName) -> FedoraOrigin:
+ """Return the origin url for the given package"""
+ return f"rpm://{self.instance}/packages/{package_name}"
+
+ def get_origins_from_page(self, page: FedoraPageType) -> Iterator[ListedOrigin]:
+ """Convert a page of fedora package sources into an iterator of ListedOrigin."""
+ assert self.lister_obj.id is not None
+
+ origins_to_send = {}
+ self.origins_to_update = {}
+
+ # iterate on each package's metadata
+ for pkg_metadata in page:
+ # extract package metadata
+ package_name = pkg_metadata.name
+ package_version = pkg_metadata.version
+ package_last_updated = pkg_metadata.build_time
+ package_download_path = pkg_metadata.location
+
+ # build origin url
+ origin_url = self.origin_url_for_package(package_name)
+ # create package version key as expected by the fedora (rpm) loader
+ # TODO: could use pkg.release instead of self.current_release
+ package_version_key = (
+ f"{self.current_release}/{self.current_edition}/{package_version}"
+ )
+
+ # this is the first time a package is listed
+ if origin_url not in self.listed_origins:
+ # create a ListedOrigin object for it that can be later
+ # updated with new package versions info
+ self.listed_origins[origin_url] = ListedOrigin(
+ lister_id=self.lister_obj.id,
+ url=origin_url,
+ visit_type="rpm",
+ extra_loader_arguments={"packages": {}},
+ last_update=package_last_updated,
+ )
+
+ # origin will be yielded at the end of that method
+ origins_to_send[origin_url] = self.listed_origins[origin_url]
+ # init set that will contain all listed package versions
+ self.package_versions[package_name] = set()
+
+ # package has already been listed in a previous or current page (release)
+ elif origin_url not in origins_to_send:
+ # if package has been listed in previously, its new versions
+ # will be added to its ListedOrigin object but the update will
+ # be sent to the scheduler in the commit_page method
+ self.origins_to_update[origin_url] = self.listed_origins[origin_url]
+
+ # update package versions data in parameter that will be provided
+ # to the rpm loader
+ self.listed_origins[origin_url].extra_loader_arguments["packages"][
+ package_version_key
+ ] = {
+ "name": package_name,
+ "version": package_version,
+ "path": package_download_path,
+ }
+
+ # add package version key to the set of found versions
+ self.package_versions[package_name].add(package_version_key)
+
+ # update already counted origins with changes since last page
+ self.sent_origins.update(origins_to_send.keys())
+
+ logger.debug(
+ "Found %s new packages, %s packages with new versions.",
+ len(origins_to_send),
+ len(self.origins_to_update),
+ )
+ logger.debug(
+ "Current total number of listed packages is equal to %s.",
+ len(self.listed_origins),
+ )
+
+ yield from origins_to_send.values()
+
+ def get_origins_to_update(self) -> Iterator[ListedOrigin]:
+ yield from self.origins_to_update.values()
+
+ def commit_page(self, page: FedoraPageType):
+ """Send to scheduler already listed origins where new versions have been found
+ in current page."""
+ self.send_origins(self.get_origins_to_update())
+
+ def finalize(self):
+ # set mapping between listed package names and versions as lister state
+ self.state.package_versions = self.package_versions
+ self.updated = len(self.sent_origins) > 0
diff --git a/swh/lister/fedora/tasks.py b/swh/lister/fedora/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/tasks.py
@@ -0,0 +1,29 @@
+# Copyright (C) 2022 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from typing import Dict, List, Optional
+
+from celery import shared_task
+
+from .lister import FedoraLister
+
+
+@shared_task(name=__name__ + ".FullFedoraRelister")
+def list_fedora_full(
+ url: str,
+ instance: Optional[str] = None,
+ releases: List[str] = None,
+) -> Dict[str, int]:
+ """Full update of a Fedora instance"""
+ lister = FedoraLister.from_configfile(
+ url=url,
+ instance=instance,
+ releases=releases,
+ )
+ return lister.run().dict()
+
+
+@shared_task(name=__name__ + ".ping")
+def _ping() -> str:
+ return "OK"
diff --git a/swh/lister/fedora/tests/__init__.py b/swh/lister/fedora/tests/__init__.py
new file mode 100644
diff --git a/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary26.xml b/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary26.xml
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary26.xml
@@ -0,0 +1,1162 @@
+
+
+
+ 0ad
+ src
+
+ 89c05dfbe8c40a66fed5f4c7b87d172b6ab3d898307edbae351528214a39f8e8
+ Cross-Platform RTS Game of Ancient Warfare
+ 0 A.D. (pronounced "zero ey-dee") is a free, open-source, cross-platform
+real-time strategy (RTS) game of ancient warfare. In short, it is a
+historically-based war/economy game that allows players to relive or rewrite
+the history of Western civilizations, focusing on the years between 500 B.C.
+and 500 A.D. The project is highly ambitious, involving state-of-the-art 3D
+graphics, detailed artwork, sound, and a flexible and powerful custom-built
+game engine.
+
+The game has been in development by Wildfire Games (WFG), a group of volunteer,
+hobbyist game developers, since 2001.
+ Fedora Project
+ http://play0ad.com
+
+
+
+
+ GPLv2+ and BSD and MIT and IBM
+ Fedora Project
+ Amusements/Games
+ buildvm-aarch64-10.arm.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 0ad-data
+ src
+
+ 339932264e5ad849b8c97a2aeff42658d0983570f5dc8a40a89985883f8c1f5a
+ The Data Files for 0 AD
+ 0 A.D. (pronounced "zero ey-dee") is a free, open-source, cross-platform
+real-time strategy (RTS) game of ancient warfare. In short, it is a
+historically-based war/economy game that allows players to relive or rewrite
+the history of Western civilizations, focusing on the years between 500 B.C.
+and 500 A.D. The project is highly ambitious, involving state-of-the-art 3D
+graphics, detailed artwork, sound, and a flexible and powerful custom-built
+game engine.
+
+This package contains the 0ad data files.
+ Fedora Project
+ http://play0ad.com
+
+
+
+
+ CC-BY-SA
+ Fedora Project
+ Amusements/Games
+ buildvm-07.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+ 0install
+ src
+
+ 79ace4e9dc84500aea0e3e5ea878dc3fae17a0fd3a07daf3e109066d514be68e
+ A decentralized cross-distribution software installation system
+ Zero Install is a decentralized cross-distribution software
+installation system available under the LGPL. It allows software
+developers to publish programs directly from their own web-sites,
+while supporting features familiar from centralized distribution
+repositories such as shared libraries, automatic updates and digital
+signatures. It is intended to complement, rather than replace, the
+operating system's package management. 0install packages never
+interfere with those provided by the distribution.
+
+0install does not define a new packaging format; unmodified tarballs
+or zip archives can be used. Instead, it defines an XML metadata
+format to describe these packages and the dependencies between them. A
+single metadata file can be used on multiple platforms (e.g. Ubuntu,
+Debian, Fedora, openSUSE, Mac OS X and Windows), assuming binary or
+source archives are available that work on those systems.
+
+0install also has some interesting features not often found in
+traditional package managers. For example, while it will share
+libraries whenever possible, it can always install multiple versions
+of a package in parallel when there are conflicting
+requirements. Installation is always side-effect-free (each package is
+unpacked to its own directory and will not touch shared directories
+such as /usr/bin), making it ideal for use with sandboxing
+technologies and virtualization.
+
+The XML file describing the program's requirements can also be
+included in a source-code repository, allowing full dependency
+handling for unreleased developer versions. For example, a user can
+clone a Git repository and build and test the program, automatically
+downloading newer versions of libraries where necessary, without
+interfering with the versions of those libraries installed by their
+distribution, which continue to be used for other software.
+ Fedora Project
+ http://0install.net
+
+
+
+
+ LGPLv2+
+ Fedora Project
+ Unspecified
+ buildvm-aarch64-19.arm.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 0xFFFF
+ src
+
+ 96f9c163c0402d2b30e5343c8397a6d50e146c85a446804396b119ef9698231f
+ The Open Free Fiasco Firmware Flasher
+ The 'Open Free Fiasco Firmware Flasher' aka 0xFFFF utility implements
+a free (GPL3) userspace handler for the NOLO bootloader and related
+utilities for the Nokia Internet Tablets like flashing setting device
+options, packing/unpacking FIASCO firmware format and more.
+ Fedora Project
+ http://www.nopcode.org/0xFFFF/
+
+
+
+
+ GPLv3
+ Fedora Project
+ Applications/System
+ buildvm-05.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+ 2048-cli
+ src
+
+ 8a2d21bc6f88f6a1906d4760255fa383a70e595317883cb8847c81e99ec9e761
+ The game 2048 for your Linux terminal
+ A cli version of the game 2048 for your Linux terminal.
+ Fedora Project
+ https://github.com/Tiehuis/2048-cli
+
+
+
+
+ MIT
+ Fedora Project
+ Unspecified
+ buildvm-28.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+ 2ping
+ src
+
+ fe684c7a07c1d4dba5b493805af17c630a23bb230c2666c485cc08bc3321318b
+ Bi-directional ping utility
+ 2ping is a bi-directional ping utility. It uses 3-way pings (akin to TCP SYN,
+SYN/ACK, ACK) and after-the-fact state comparison between a 2ping listener and
+a 2ping client to determine which direction packet loss occurs.
+ Fedora Project
+ http://www.finnie.org/software/2ping
+
+
+
+
+ GPLv2+
+ Fedora Project
+ Unspecified
+ buildvm-30.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+ 389-admin
+ src
+
+ aa849057e2deedbbfb050e8572ee8c43bc43f222c3aa01a7ef141e3fe60a1db8
+ 389 Administration Server (admin)
+ 389 Administration Server is an HTTP agent that provides management features
+for 389 Directory Server. It provides some management web apps that can
+be used through a web browser. It provides the authentication, access control,
+and CGI utilities used by the console.
+ Fedora Project
+ http://www.port389.org/
+
+
+
+
+ GPLv2 and ASL 2.0
+ Fedora Project
+ System Environment/Daemons
+ buildhw-03.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 389-admin-console
+ src
+
+ 3d6de5f3bda84055e4fdad7c15c1a384a346b8c810ca7302c59104c14e196fd0
+ 389 Admin Server Management Console
+ A Java based remote management console used for Managing 389
+Admin Server. Requires the 389 Console to load and run the
+jar files.
+ Fedora Project
+ http://www.port389.org
+
+
+
+
+ GPLv3
+ Fedora Project
+ Applications/System
+ buildvm-16.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+ 389-adminutil
+ src
+
+ e89dd564baa83a290ff4df61807bda030705c77a80d36816ff236ae47a4f3bf8
+ Utility library for 389 administration
+ 389-adminutil is libraries of functions used to administer directory
+servers, usually in conjunction with the admin server. 389-adminutil is
+broken into two libraries - libadminutil contains the basic
+functionality, and libadmsslutil contains SSL versions and wrappers
+around the basic functions. The PSET functions allow applications to
+store their preferences and configuration parameters in LDAP, without
+having to know anything about LDAP. The configuration is cached in a
+local file, allowing applications to function even if the LDAP server
+is down. The other code is typically used by CGI programs used for
+directory server management, containing GET/POST processing code as
+well as resource handling (ICU ures API).
+ Fedora Project
+ http://port389.org/wiki/AdminUtil
+
+
+
+
+ GPLv3
+ Fedora Project
+ Development/Libraries
+ buildvm-02.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+ 389-console
+ src
+
+ 6482088712ef3b055d98303fe37e770f26766c66f83d6d60d19c907b29aff646
+ 389 Management Console
+ A Java based remote management console used for managing 389
+Administration Server and 389 Directory Server.
+ Fedora Project
+ http://port389.org
+
+
+
+
+ LGPLv2
+ Fedora Project
+ Applications/System
+ buildvm-aarch64-17.arm.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+ 389-ds
+ src
+
+ da7d9a853a49a1d45d073443c99b18864ac404325cd8c447894898132efd763a
+ 389 Directory, Administration, and Console Suite
+ The 389 Directory Server, Administration Server, and Console Suite provide
+the LDAPv3 server, the httpd daemon used to administer the server, and the
+console GUI application used for server and user/group administration.
+ Fedora Project
+ http://port389.org/
+
+
+
+
+ GPLv2
+ Fedora Project
+ System Environment/Daemons
+ buildvm-14.phx2.fedoraproject.org
+
+
+
+
+
+ 389-ds-base
+ src
+
+ b1a0fa067b1938e1ca8d02c272379bbb42824a3817ea85cc031806dd1f315087
+ 389 Directory Server (base)
+ 389 Directory Server is an LDAPv3 compliant server. The base package includes
+the LDAP server and command line utilities for server administration.
+ Fedora Project
+ http://www.port389.org
+
+
+
+
+ GPLv3+
+ Fedora Project
+ System Environment/Daemons
+ buildhw-08.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 389-ds-console
+ src
+
+ e074df08c0c1a605650ffef4b4372e6e77adf671b68e7d2ae92e1421a74575e2
+ 389 Directory Server Management Console
+ A Java based remote management console used for managing 389
+Directory Server. The 389 Console is required to load and
+run these jar files.
+ Fedora Project
+ http://www.port389.org
+
+
+
+
+ GPLv3
+ Fedora Project
+ Applications/System
+ buildhw-04.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+ 389-dsgw
+ src
+
+ 4d941a1a1f75408d76eaf68cc67e335d53edb0822a2295df86deb80a446122dc
+ 389 Directory Server Gateway (dsgw)
+ 389 Directory Server Gateway is a collection of 3 web applications
+that run on top of the Administration Server used by the Directory
+Server. These 3 applications are:
+* phonebook - a simple phonebook application geared towards end users,
+with simple search screens and simple self-service management
+* orgchart - an organization chart viewer
+* gateway - a more advanced search interface that allows admins to
+create and edit user entries, and allows creation of templates for
+different types of user and group entries
+ Fedora Project
+ http://port389.org/
+
+
+
+
+ GPLv2
+ Fedora Project
+ System Environment/Daemons
+ buildvm-08.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 3Depict
+ src
+
+ 4e6df7205db51e6cdc7b413517b01380bdb3262731b9f198d8222cf4dcfabfbd
+ Valued 3D point cloud visualization and analysis
+ This software is designed to help users visualize and analyze 3D point clouds
+with an associated real value, in a fast and flexible fashion. It is
+specifically targeted to atom probe tomography applications, but may be
+useful for general scalar valued point data purposes.
+ Fedora Project
+ http://threedepict.sourceforge.net
+
+
+
+
+ GPLv3+
+ Fedora Project
+ Applications/Engineering
+ buildvm-03.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 3dprinter-udev-rules
+ src
+
+ 4254bf954423c860464e1c21bab1797720664a8f26b74c9fc205b00ce96a6768
+ Rules for udev to give regular users access to operate 3D printers
+ Normally, when you connect a RepRap like 3D printer to a Linux machine by an
+USB cable, you need to be in dialout or similar group to be able to control
+it via OctoPrint, Printrun, Cura or any other control software. Not any more.
+
+Install this rule to grant all users read and write access to collected
+devices based on the VID and PID.
+
+Disclaimer: Such device might not be a 3D printer, it my be an Arduino, it
+might be a modem and it might even be a blender. But normally you would
+add your user to dialout and get access to all of those and more anyway.
+So I guess be careful when some of the users should not get access to
+your blenders.
+ Fedora Project
+ https://github.com/hroncok/3dprinter-udev-rules
+
+
+
+
+ CC0
+ Fedora Project
+ Unspecified
+ arm02-builder04.arm.fedoraproject.org
+
+
+
+
+
+
+
+
+ 3proxy
+ src
+
+ 12927f379da4871e0939368407ecc18f5a5ca74fbde33349619448afab51ca27
+ Tiny but very powerful proxy
+ 3proxy -- light proxy server.
+Universal proxy server with HTTP, HTTPS, SOCKS v4, SOCKS v4a, SOCKS v5, FTP,
+POP3, UDP and TCP portmapping, access control, bandwith control, traffic
+limitation and accounting based on username, client IP, target IP, day time,
+day of week, etc.
+ Fedora Project
+ http://3proxy.ru/?l=EN
+
+
+
+
+ BSD or ASL 2.0 or GPLv2+ or LGPLv2+
+ Fedora Project
+ System Environment/Daemons
+ buildvm-24.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+ 4Pane
+ src
+
+ daf1615ea9d4bc36e0fac2a13ac37ae5e169893e9c97f241a91d1da0afd2ea0c
+ Multi-pane, detailed-list file manager
+ 4Pane is a multi-pane, detailed-list file manager. It is designed
+to be fully-featured without bloat, and aims for speed rather than
+visual effects.
+In addition to standard file manager things, it offers multiple
+undo and redo of most operations (including deletions), archive
+management including 'virtual browsing' inside archives, multiple
+renaming/duplication of files, a terminal emulator and user-defined
+tools.
+ Fedora Project
+ http://www.4pane.co.uk/
+
+
+
+
+ GPLv3
+ Fedora Project
+ Unspecified
+ buildhw-05.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 64tass
+ src
+
+ cda7d777f556e9007bed98d64814067ad6c48287713fb730e2a3ef886558134a
+ 6502 assembler
+ 64tass is a multi-pass optimizing macro assembler for the 65xx series of
+processors. It supports the 6502, 65C02, R65C02, W65C02, 65CE02, 65816,
+DTV, and 65EL02, using a syntax similar to that of Omicron TASS and TASM.
+ Fedora Project
+ http://tass64.sourceforge.net/
+
+
+
+
+ GPLv2+
+ Fedora Project
+ Development/Languages
+ buildvm-26.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+ 7kaa
+ src
+
+ daa291c15876c820858c28225c150e076ab878d48c76cddd3e8287fc67141a0a
+ Seven Kingdoms: Ancient Adversaries
+ Seven Kingdoms is a real-time strategy (RTS) computer game developed
+by Trevor Chan of Enlight Software. The game enables players to
+compete against up to six other kingdoms allowing players to conquer
+opponents by defeating them in war (with troops or machines),
+capturing their buildings with spies, or offering opponents money
+for their kingdom.
+
+Seven Kingdoms: Ancient Adversaries is a free patch provided by
+Interactive Magic and added three new cultures, the Egyptians, the
+Mughals and the Zulus, and a new war machine, Unicorn.
+ Fedora Project
+ http://7kfans.com/
+
+
+
+
+ GPLv3+ and GPLv2+
+ Fedora Project
+ Unspecified
+ buildvm-31.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 99soft-oss-parent
+ src
+
+ 25a24a764f9a185a63a0df0a148403b82503395bee0eaf7aa972a83aed259601
+ 99 Software Foundation parent pom
+ The 99 Software Foundation parent pom.
+ Fedora Project
+ https://github.com/99soft/infra
+
+
+
+
+ ASL 2.0
+ Fedora Project
+ Unspecified
+ buildvm-aarch64-16.arm.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+ 9wm
+ src
+
+ 9e339c9a698a0d9472cee50697dfd964338b0b4a61d66611b826ccecdbfc5c1b
+ Emulation of the Plan 9 window manager 8 1/2
+ 9wm is an X window manager which attempts to emulate the Plan 9 window
+manager 8-1/2 as far as possible within the constraints imposed by X.
+It provides a simple yet comfortable user interface, without garish
+decorations or title-bars. Or icons. And it's click-to-type.
+ Fedora Project
+ https://woozle.org/neale/src/9wm/
+
+
+
+
+ MIT
+ Fedora Project
+ User Interface/Desktops
+ buildvm-18.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+ AGReader
+ src
+
+ 546ac7b23a8cb8a0149578fe1bce4a60b614e091d20e5b0235136916da896775
+ Console reader for viewing AmigaGuide files
+ A viewer for the UNIX console which can read and display AmigaGuide files. It
+supports all of the v39 AmigaGuide specification possible and supports a large
+subset of the v40 specifications.
+ Fedora Project
+ http://main.aminet.net/misc/unix/
+
+
+
+
+ GPL+
+ Fedora Project
+ Applications/Text
+ buildvm-21.phx2.fedoraproject.org
+
+
+
+
+
+ APLpy
+ src
+
+ dc989d44cf1d3514a57c5b5e6717bd7085e959f5839e517c632c00ab65a10c85
+ The Astronomical Plotting Library in Python
+ APLpy (the Astronomical Plotting Library in Python) is a Python module aimed at
+producing publication-quality plots of astronomical imaging data in FITS format.
+The module uses Matplotlib, a powerful and interactive plotting package. It is
+capable of creating output files in several graphical formats, including EPS,
+PDF, PS, PNG, and SVG.
+ Fedora Project
+ http://aplpy.github.com
+
+
+
+
+ MIT
+ Fedora Project
+ Development/Languages
+ buildhw-05.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ATpy
+ src
+
+ ee426f715d00c91412f04acd34c4c02df9a1dfeb6c3265b9c93f9d6dfa3c433a
+ Astronomical Tables in Python
+ ATpy is a high-level Python package providing a way to manipulate tables of
+astronomical data in a uniform way. It provides built-in support for NumPy
+recarrays and common astronomical file/database formats (FITS, VO, HDF5,
+and ASCII tables) with a very simple API.
+ Fedora Project
+ http://atpy.readthedocs.org
+
+
+
+
+ MIT
+ Fedora Project
+ Development/Libraries
+ buildhw-02.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+ AcetoneISO
+ src
+
+ e76553136b66a5f5854b8b5daaded10ed2e77444b0284c3533a90c9dcfd37688
+ CD/DVD Image Manipulator
+ AcetoneISO: The CD/DVD image manipulator for Linux, it can do the following:
+- Mount and Unmount ISO, MDF, NRG (if iso-9660 standard)
+- Convert / Extract / Browse to ISO : *.bin *.mdf *.nrg *.img *.daa *.cdi
+ *.xbx *.b5i *.bwi *.pdi
+- Play a DVD Movie ISO with most used media players
+- Generate an ISO from a Folder or CD/DVD
+- Generate MD5 file of an image
+- Encrypt an image
+- Split image into X megabyte chunks
+- Highly compress an image
+- Rip a PSX cd to *.bin to make it work with epsxe/psx emulators
+- Service-Menu support for Konqueror
+- Restore a lost CUE file of *.bin *.img
+ Fedora Project
+ http://www.acetoneteam.org/
+
+
+
+
+ GPLv2+
+ Fedora Project
+ Applications/Archiving
+ buildvm-aarch64-15.arm.fedoraproject.org
+
+
+
+
+
+
+
+
+
+ Add64
+ src
+
+ b019cd07bca2e5ba133dd5c167060d17ee585f63104bddd0b56f21bfd922dd99
+ An additive synthesizer using JACK
+ Add64 is an additive synthesizer using Qt and the JACK audio connection kit
+ Fedora Project
+ http://sourceforge.net/projects/add64
+
+
+
+
+ GPLv3
+ Fedora Project
+ Applications/Multimedia
+ buildvm-22.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+ Agda
+ src
+
+ 31710476e81a755e9f5c9969de0a57a5356a85703cc261336b4c8ddb655d060c
+ A dependently typed functional programming language and proof assistant
+ Agda is a dependently typed functional programming language: it has
+inductive families, which are similar to Haskell's GADTs, but they can
+be indexed by values and not just types. It also has parameterized
+modules, mixfix operators, Unicode characters, and an interactive
+Emacs interface (the type checker can assist in the development of your code).
+
+Agda is also a proof assistant: It is an interactive system for writing and
+checking proofs. Agda is based on intuitionistic type theory,
+a foundational system for constructive mathematics developed by
+the Swedish logician Per Martin-Löf. It has many similarities with other
+proof assistants based on dependent types, such as Coq, Epigram and NuPRL.
+ Fedora Project
+ https://hackage.haskell.org/package/Agda
+
+
+
+
+ MIT and BSD
+ Fedora Project
+ Unspecified
+ buildvm-aarch64-15.arm.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Agda-stdlib
+ src
+
+ 1bb7c2150f46fc657b2ac1fc5e79f640de523d969bfb7fe142fa82c9eecb531f
+ Agda standard libraries
+ Agda standard libraries
+ Fedora Project
+ http://wiki.portal.chalmers.se/agda/agda.php?n=Libraries.StandardLibrary
+
+
+
+
+ MIT
+ Fedora Project
+ Development/Libraries
+ buildvm-aarch64-18.arm.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+ zynjacku
+ src
+
+ 309dc2ebd94d40933558260724fd7d15d9d84d0fd9541d9dbb27a54760b875d2
+ LV2 synths and plugins host
+ zynjacku is JACK based, GTK (2.x) host for LV2 synths. It has one JACK MIDI
+input port (routed to all hosted synths) and one (two for stereo synths) JACK
+audio output port per plugin. Such design provides multi-timbral sound by
+running several synth plugins.
+
+zynjacku is a nunchaku weapon for JACK audio synthesis. You have solid parts
+for synthesis itself and you have flexible part that allows synthesis to suit
+your needs.
+
+lv2rack is a host for LV2 effect plugins.
+ Fedora Project
+ http://home.gna.org/zynjacku/
+
+
+
+
+ GPLv2
+ Fedora Project
+ Applications/Multimedia
+ buildvm-08.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+ zypper
+ src
+
+ 8984b02bf1f977a9fcaf9d514016e371206bf3697148adb56ec80f70e57a7121
+ Command line package manager using libzypp
+ Zypper is a command line package manager tool using libzypp,
+which can be used to manage software for RPM based systems.
+ Fedora Project
+ http://en.opensuse.org/Portal:Zypper
+
+
+
+
+ GPLv2+
+ Fedora Project
+ Unspecified
+ buildvm-25.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ zziplib
+ src
+
+ 8db1b95ac56b94c563638deea593beb4143d5d80dc436debf6e287d3f518582c
+ Lightweight library to easily extract data from zip files
+ The zziplib library is intentionally lightweight, it offers the ability to
+easily extract data from files archived in a single zip file. Applications
+can bundle files into a single zip archive and access them. The implementation
+is based only on the (free) subset of compression with the zlib algorithm
+which is actually used by the zip/unzip tools.
+ Fedora Project
+ http://zziplib.sourceforge.net/
+
+
+
+
+ LGPLv2+ or MPLv1.1
+ Fedora Project
+ Applications/Archiving
+ buildvm-27.phx2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ zzuf
+ src
+
+ 3d7a167cc518dbc9ec3294279751786baf4264907d870ff4b76c571ce03df094
+ Transparent application input fuzzer
+ zzuf is a transparent application input fuzzer. It works by
+intercepting file operations and changing random bits in the program's
+input. zzuf's behaviour is deterministic, making it easy to reproduce
+bugs.
+ Fedora Project
+ http://sam.zoy.org/zzuf/
+
+
+
+
+ WTFPL
+ Fedora Project
+ Development/Tools
+ buildvm-11.phx2.fedoraproject.org
+
+
+
+
+
\ No newline at end of file
diff --git a/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary36.xml b/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary36.xml
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary36.xml
@@ -0,0 +1,596 @@
+
+
+
+ 0ad
+ src
+
+ 85fca6fd7a1073da430439b850cb4015eea97cea396ebba163bde4d2f1b72e2e
+ Cross-Platform RTS Game of Ancient Warfare
+ 0 A.D. (pronounced "zero ey-dee") is a free, open-source, cross-platform
+real-time strategy (RTS) game of ancient warfare. In short, it is a
+historically-based war/economy game that allows players to relive or rewrite
+the history of Western civilizations, focusing on the years between 500 B.C.
+and 500 A.D. The project is highly ambitious, involving state-of-the-art 3D
+graphics, detailed artwork, sound, and a flexible and powerful custom-built
+game engine.
+
+The game has been in development by Wildfire Games (WFG), a group of volunteer,
+hobbyist game developers, since 2001.
+ Fedora Project
+ http://play0ad.com
+
+
+
+
+ GPLv2+ and BSD and MIT and IBM and MPLv2.0
+ Fedora Project
+ Unspecified
+ buildvm-a32-13.iad2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 0ad-data
+ src
+
+ db142de59b5b2bf33abc6025d4c6be56851ebcbcde7b39c3dc461e2b8992e8c5
+ The Data Files for 0 AD
+ 0 A.D. (pronounced "zero ey-dee") is a free, open-source, cross-platform
+real-time strategy (RTS) game of ancient warfare. In short, it is a
+historically-based war/economy game that allows players to relive or rewrite
+the history of Western civilizations, focusing on the years between 500 B.C.
+and 500 A.D. The project is highly ambitious, involving state-of-the-art 3D
+graphics, detailed artwork, sound, and a flexible and powerful custom-built
+game engine.
+
+This package contains the 0ad data files.
+ Fedora Project
+ http://play0ad.com
+
+
+
+
+ CC-BY-SA
+ Fedora Project
+ Unspecified
+ buildhw-a64-22.iad2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+ 0xFFFF
+ src
+
+ 45eee8d990d502324ae665233c320b8a5469c25d735f1862e094c1878d6ff2cd
+ The Open Free Fiasco Firmware Flasher
+ The 'Open Free Fiasco Firmware Flasher' aka 0xFFFF utility implements
+a free (GPL3) userspace handler for the NOLO bootloader and related
+utilities for the Nokia Internet Tablets like flashing setting device
+options, packing/unpacking FIASCO firmware format and more.
+ Fedora Project
+ https://talk.maemo.org/showthread.php?t=87996
+
+
+
+
+ GPLv3
+ Fedora Project
+ Unspecified
+ buildvm-a32-17.iad2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 2048-cli
+ src
+
+ 5f77b054ad11aff03bc145616b88c977e67a11333902de776e8ce8769caef7b5
+ The game 2048 for your Linux terminal
+ A cli version of the game 2048 for your Linux terminal.
+ Fedora Project
+ https://github.com/Tiehuis/2048-cli
+
+
+
+
+ MIT
+ Fedora Project
+ Unspecified
+ buildvm-a32-19.iad2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 2ping
+ src
+
+ 2ce028d944ebea1cab8c6203c9fed882792478b42fc34682b886a9db16e9de28
+ Bi-directional ping utility
+ 2ping is a bi-directional ping utility. It uses 3-way pings (akin to TCP SYN,
+SYN/ACK, ACK) and after-the-fact state comparison between a 2ping listener and
+a 2ping client to determine which direction packet loss occurs.
+ Fedora Project
+ https://www.finnie.org/software/2ping
+
+
+
+
+ GPLv2+
+ Fedora Project
+ Unspecified
+ buildvm-x86-32.iad2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 389-ds-base
+ src
+
+ 00ccfe16be5767bd043a878f0588b19445b8be8d15c477a6ecae331f9485c55e
+ 389 Directory Server (base)
+ 389 Directory Server is an LDAPv3 compliant server. The base package includes
+the LDAP server and command line utilities for server administration.
+ Fedora Project
+ https://www.port389.org
+
+
+
+
+ GPLv3+
+ Fedora Project
+ Unspecified
+ buildvm-a32-12.iad2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 3Depict
+ src
+
+ 78a0521a39bf634b7d21edccaf26ff5c0ea318b68b620e4a337e1e965b4b2a68
+ Valued 3D point cloud visualization and analysis
+ This software is designed to help users visualize and analyze 3D point clouds
+with an associated real value, in a fast and flexible fashion. It is
+specifically targeted to atom probe tomography applications, but may be
+useful for general scalar valued point data purposes.
+ Fedora Project
+ http://threedepict.sourceforge.net
+
+
+
+
+ GPLv3+
+ Fedora Project
+ Unspecified
+ buildvm-a32-11.iad2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 3dprinter-udev-rules
+ src
+
+ 9deaa03d0744a684eaaa7b3d2cf12b100e53f86b402c7028e68b3378189259fa
+ Rules for udev to give regular users access to operate 3D printers
+ Normally, when you connect a RepRap like 3D printer to a Linux machine by an
+USB cable, you need to be in dialout or similar group to be able to control
+it via OctoPrint, Printrun, Cura or any other control software. Not any more.
+
+Install this rule to grant all users read and write access to collected
+devices based on the VID and PID.
+
+Disclaimer: Such device might not be a 3D printer, it my be an Arduino, it
+might be a modem and it might even be a blender. But normally you would
+add your user to dialout and get access to all of those and more anyway.
+So I guess be careful when some of the users should not get access to
+your blenders.
+ Fedora Project
+ https://github.com/hroncok/3dprinter-udev-rules
+
+
+
+
+ CC0
+ Fedora Project
+ Unspecified
+ buildvm-ppc64le-04.iad2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+ 3mux
+ src
+
+ 10ed40714c2f4ad243302aa59ea4a40c3bb8a68b9ab78affdac6747a668542b7
+ Terminal multiplexer inspired by i3
+
+Terminal multiplexer inspired by i3.
+ Fedora Project
+ https://github.com/aaronjanse/3mux
+
+
+
+
+ MIT
+ Fedora Project
+ Unspecified
+ buildvm-a32-33.iad2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 3proxy
+ src
+
+ 9b4a2b6ca029cb43912232f71656af97eee5d173849157d9004c5bbf1b356ebb
+ Tiny but very powerful proxy
+ 3proxy -- light proxy server.
+Universal proxy server with HTTP, HTTPS, SOCKS v4, SOCKS v4a, SOCKS v5, FTP,
+POP3, UDP and TCP portmapping, access control, bandwith control, traffic
+limitation and accounting based on username, client IP, target IP, day time,
+day of week, etc.
+ Fedora Project
+ http://3proxy.ru/?l=EN
+
+
+
+
+ BSD or ASL 2.0 or GPLv2+ or LGPLv2+
+ Fedora Project
+ Unspecified
+ buildvm-a32-06.iad2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 4Pane
+ src
+
+ 3b56a7560662d182a66495160389ff06045ee67ebc7383010695f6c4f6b4e8c7
+ Multi-pane, detailed-list file manager
+ 4Pane is a multi-pane, detailed-list file manager. It is designed
+to be fully-featured without bloat, and aims for speed rather than
+visual effects.
+In addition to standard file manager things, it offers multiple
+undo and redo of most operations (including deletions), archive
+management including 'virtual browsing' inside archives, multiple
+renaming/duplication of files, a terminal emulator and user-defined
+tools.
+ Fedora Project
+ http://www.4pane.co.uk/
+
+
+
+
+ GPLv3
+ Fedora Project
+ Unspecified
+ buildvm-a32-18.iad2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 4diac-forte
+ src
+
+ 9e4286400a4dcda294fc807202195bc8d4fdaf324d523d8f2c86d2b399528a8f
+ IEC 61499 runtime environment
+ The 4DIAC runtime environment (4DIAC-RTE, FORTE) is a small portable
+implementation of an IEC 61499 runtime environment targeting small
+embedded control devices (16/32 Bit), implemented in C++. It supports
+online-reconfiguration of its applications and the real-time capable
+execution of all function block types provided by the IEC 61499 standard.
+ Fedora Project
+ http://eclipse.org/4diac
+
+
+
+
+ EPL
+ Fedora Project
+ Unspecified
+ buildvm-a32-28.iad2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 4th
+ src
+
+ 52a78198b2ee889130f79c87f300e2ab64fd4a925fa81e6c55d24c9e0e76e0a8
+ A Forth compiler
+ 4tH is basic framework for creating application specific scripting
+languages. It is a library of functions centered around a virtual
+machine, which guarantees high performance, ease of use and low overhead.
+ Fedora Project
+ https://thebeez.home.xs4all.nl/4tH/
+
+
+
+
+ GPLv3+
+ Fedora Project
+ Unspecified
+ buildvm-a32-09.iad2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ zzuf
+ src
+
+ d4aa530ea79c034a36aed1a0ef4859f14e9dadbc23e6086018287b9d1236c6fe
+ Transparent application input fuzzer
+ zzuf is a transparent application input fuzzer. It works by
+intercepting file operations and changing random bits in the program's
+input. zzuf's behaviour is deterministic, making it easy to reproduce
+bugs.
+ Fedora Project
+ http://sam.zoy.org/zzuf/
+
+
+
+
+ WTFPL
+ Fedora Project
+ Unspecified
+ buildvm-a32-18.iad2.fedoraproject.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/swh/lister/fedora/tests/test_lister.py b/swh/lister/fedora/tests/test_lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/tests/test_lister.py
@@ -0,0 +1,168 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+from pathlib import Path
+from typing import Dict, List, Set, Tuple
+from unittest.mock import MagicMock
+
+import defusedxml
+import pytest
+from repomd import Repo
+
+from swh.lister.fedora.lister import FedoraLister, get_editions
+from swh.scheduler.interface import SchedulerInterface
+from swh.scheduler.model import ListedOrigin
+
+
+def get_repo(datadir: Path, release: int) -> Repo:
+ """Returns a repomd Repo object for the given release."""
+ primary_xml = Path(datadir, "archives.fedoraproject.org", f"primary{release}.xml")
+ metadata = defusedxml.lxml.fromstring(primary_xml.read_bytes())
+ return Repo(f"mocked.mirror.url/releases/{release}", metadata)
+
+
+RepomdMock = Tuple[Tuple[MagicMock, MagicMock], Dict[str, Set[str]]]
+
+
+@pytest.fixture
+def repomd_mock(datadir, mocker) -> RepomdMock:
+ fedora26 = get_repo(datadir, 26)
+ fedora36 = get_repo(datadir, 36)
+
+ repomd_mock = mocker.patch("swh.lister.fedora.lister.repomd")
+
+ def side_effect(url):
+ if "36" in url:
+ return fedora36
+ elif "26" in url:
+ return fedora26
+ else:
+ raise Exception(f"Unexpected url: {url}")
+
+ repomd_mock.load.side_effect = side_effect
+ pkg_versions: Dict[str, Set[str]] = {}
+ for (release, repo) in [(26, fedora26), (36, fedora36)]:
+ for pkg in repo:
+ if pkg.name not in pkg_versions:
+ pkg_versions[pkg.name] = set()
+ for edition in get_editions(release):
+ pkg_versions[pkg.name].add(f"{release}/{edition}/{pkg.version}")
+
+ return (fedora26, fedora36), pkg_versions
+
+
+def check_listed_origins(pkg_versions: dict, scheduler_origins: List[ListedOrigin]):
+ """Asserts that the two collections have the same origin URLs.
+
+ Does not test last_update."""
+
+ pkgs = list(pkg_versions.keys())
+ sorted_pkgs = list(sorted(pkgs))
+ sorted_scheduler_origins = list(sorted(scheduler_origins))
+
+ assert len(sorted_pkgs) == len(sorted_scheduler_origins)
+
+ for pkg, s_origin in zip(sorted_pkgs, sorted_scheduler_origins):
+ assert s_origin.url.endswith(f"/packages/{pkg}")
+
+
+def test_full_lister_fedora(
+ swh_scheduler: SchedulerInterface,
+ repomd_mock: RepomdMock,
+):
+ """
+ Simulate a full listing of packages for fedora releases.
+ """
+ releases = [26, 36]
+
+ lister = FedoraLister(
+ scheduler=swh_scheduler,
+ releases=releases,
+ )
+
+ page_count = 0
+ for release in releases:
+ for _ in get_editions(release):
+ page_count += 1
+
+ attempt1 = lister.run()
+ scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+ _, pkg_versions = repomd_mock
+
+ assert attempt1.pages == page_count
+ assert attempt1.origins == len(pkg_versions)
+
+ check_listed_origins(pkg_versions, scheduler_origins)
+
+ lister_state = lister.get_state_from_scheduler()
+ assert lister_state.package_versions == pkg_versions
+ assert lister.updated
+
+ attempt2 = lister.run()
+ scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
+ assert attempt2.pages == page_count
+ assert attempt2.origins == 0 # No new pkgs discovered
+
+ check_listed_origins(pkg_versions, scheduler_origins)
+
+ lister_state = lister.get_state_from_scheduler()
+ assert lister_state.package_versions == pkg_versions
+ assert lister.updated
+
+
+def test_incremental_lister(swh_scheduler: SchedulerInterface, repomd_mock: RepomdMock):
+ """
+ Simulate an incremental listing of packages for fedora releases.
+ """
+ releases = [26, 36]
+ lister = FedoraLister(
+ scheduler=swh_scheduler,
+ releases=releases,
+ )
+ page_count = 0
+ for release in releases:
+ for _ in get_editions(release):
+ page_count += 1
+
+ # First run
+ attempt1 = lister.run()
+ scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+ (fedora26, _), pkg_versions = repomd_mock
+
+ assert attempt1.pages == page_count
+ assert attempt1.origins == len(pkg_versions)
+ assert lister.state.package_versions == pkg_versions
+
+ check_listed_origins(pkg_versions, scheduler_origins)
+ assert lister.updated
+
+ # Second run (no updates)
+ attempt2 = lister.run()
+ scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
+ assert attempt2.pages == page_count
+ assert attempt2.origins == 0 # No new pkgs were discovered
+ assert lister.state.package_versions == pkg_versions
+
+ check_listed_origins(pkg_versions, scheduler_origins)
+ assert lister.updated
+
+ # Update versions for half of the packages in Fedora 26:
+ for i, pkg in enumerate(fedora26):
+ if i % 2 == 0:
+ pkg._version_info.set("ver", "1.0.0")
+ for edition in get_editions(26):
+ pkg_versions[pkg.name].add(f"26/{edition}/1.0.0")
+
+ # Third run (half of pkgs in fedora26 got updated)
+ attempt3 = lister.run()
+ scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
+ assert attempt3.pages == page_count
+ assert attempt3.origins == 0 # Again, no new pkgs were discovered
+ check_listed_origins(pkg_versions, scheduler_origins)
+
+ assert lister.state.package_versions == pkg_versions
+ assert lister.updated
diff --git a/swh/lister/fedora/tests/test_tasks.py b/swh/lister/fedora/tests/test_tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/tests/test_tasks.py
@@ -0,0 +1,60 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from unittest.mock import patch
+
+from swh.lister.pattern import ListerStats
+
+
+def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
+ res = swh_scheduler_celery_app.send_task("swh.lister.fedora.tasks.ping")
+ assert res
+ res.wait()
+ assert res.successful()
+ assert res.result == "OK"
+
+
+@patch("swh.lister.fedora.tasks.FedoraLister")
+def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
+ lister.from_configfile.return_value = lister
+ lister.run.return_value = ListerStats(pages=10, origins=500)
+
+ kwargs = dict(url="https://eu.edge.kernel.org/fedora/releases/")
+ res = swh_scheduler_celery_app.send_task(
+ "swh.lister.fedora.tasks.FullFedoraRelister",
+ kwargs=kwargs,
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+
+ actual_kwargs = dict(**kwargs, instance=None, releases=None)
+
+ lister.from_configfile.assert_called_once_with(**actual_kwargs)
+ lister.run.assert_called_once_with()
+
+
+@patch("swh.lister.fedora.tasks.FedoraLister")
+def test_full_listing_params(
+ lister, swh_scheduler_celery_app, swh_scheduler_celery_worker
+):
+ lister.from_configfile.return_value = lister
+ lister.run.return_value = ListerStats(pages=10, origins=500)
+
+ kwargs = dict(
+ url="https://eu.edge.kernel.org/fedora/releases/",
+ instance="eu.edge.kernel",
+ releases=["36"],
+ )
+ res = swh_scheduler_celery_app.send_task(
+ "swh.lister.fedora.tasks.FullFedoraRelister",
+ kwargs=kwargs,
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+
+ lister.from_configfile.assert_called_once_with(**kwargs)
+ lister.run.assert_called_once_with()
diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py
--- a/swh/lister/tests/test_cli.py
+++ b/swh/lister/tests/test_cli.py
@@ -35,6 +35,9 @@
"url": "https://try.gogs.io/",
"api_token": "secret",
},
+ "fedora": {
+ "url": "https://eu.edge.kernel.org/fedora/releases/",
+ },
}