diff --git a/README.md b/README.md --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ - `swh.lister.pypi` - `swh.lister.tuleap` - `swh.lister.gogs` +- `swh.liser.fedora` Dependencies ------------ diff --git a/mypy.ini b/mypy.ini --- a/mypy.ini +++ b/mypy.ini @@ -42,3 +42,9 @@ [mypy-dulwich.*] ignore_missing_imports = True + +[mypy-repomd.*] +ignore_missing_imports = True + +[mypy-defusedxml.*] +ignore_missing_imports = True diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ tenacity >= 6.2 lxml dulwich +repomd diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -75,6 +75,7 @@ lister.tuleap=swh.lister.tuleap:register lister.maven=swh.lister.maven:register lister.gogs=swh.lister.gogs:register + lister.fedora=swh.lister.fedora:register """, classifiers=[ "Programming Language :: Python :: 3", diff --git a/swh/lister/fedora/__init__.py b/swh/lister/fedora/__init__.py new file mode 100644 --- /dev/null +++ b/swh/lister/fedora/__init__.py @@ -0,0 +1,13 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +def register(): + from .lister import FedoraLister + + return { + "lister": FedoraLister, + "task_modules": [f"{__name__}.tasks"], + } diff --git a/swh/lister/fedora/lister.py b/swh/lister/fedora/lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/fedora/lister.py @@ -0,0 +1,224 @@ +# Copyright (C) 2017-2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +from dataclasses import dataclass, field +import logging +from typing import Any, Dict, Iterator, List, Set +from urllib.parse import urljoin + +import repomd + +from swh.scheduler.interface import SchedulerInterface +from swh.scheduler.model import ListedOrigin + +from ..pattern import Lister + +logger = logging.getLogger(__name__) + + +Release = int +Edition = str +PkgName = str +PkgVersion = str +FedoraOrigin = str +FedoraPageType = Iterator[repomd.Package] +"""Each page is a list of packages from a given Fedora release""" + + +def get_editions(release: Release) -> List[Edition]: + """Get list of editions for a given release.""" + if release < 20: + return ["Everything", "Fedora"] + elif release < 28: + return ["Everything", "Server", "Workstation"] + else: + return ["Everything", "Server", "Workstation", "Modular"] + + +@dataclass +class FedoraListerState: + """State of Fedora lister""" + + package_versions: Dict[PkgName, Set[PkgVersion]] = field(default_factory=dict) + """Dictionary mapping a package name to all the versions found during + last listing""" + + +class FedoraLister(Lister[FedoraListerState, FedoraPageType]): + """ + List source packages for given Fedora releases. + + The lister will create a snapshot for each package name from all its + available versions. + + If a package snapshot is different from the last listing operation, + it will be sent to the scheduler that will create a loading task + to archive newly found source code. + + Args: + scheduler: instance of SchedulerInterface + url: fedora package archives mirror URL + releases: list of fedora releases to process + """ + + # in the archives,old versions of fedora do not contain repomd.xml + + LISTER_NAME = "fedora" + + def __init__( + self, + scheduler: SchedulerInterface, + instance: str = "fedora", + url: str = "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/", + releases: List[Release] = [34, 35, 36], + ): + super().__init__( + scheduler=scheduler, + url=url, + instance=instance, + credentials={}, + ) + + self.releases = releases + + self.listed_origins: Dict[FedoraOrigin, ListedOrigin] = {} + "will hold all listed origins info" + self.sent_origins: Set[FedoraOrigin] = set() + """will contain origin urls that have already been listed + in a previous page (fedora release)""" + self.origins_to_update: Dict[FedoraOrigin, ListedOrigin] = {} + """will contain already listed package info that need to be + sent to the scheduler for update in the commit_page method""" + self.package_versions: Dict[PkgName, Set[PkgVersion]] = {} + "will contain the lister state after a call to run" + + def state_from_dict(self, d: Dict[str, Any]) -> FedoraListerState: + return FedoraListerState(package_versions={k: set(v) for k, v in d.items()}) + + def state_to_dict(self, state: FedoraListerState) -> Dict[str, Any]: + return {k: list(v) for k, v in state.package_versions.items()} + + def page_request(self, release: Release, edition: Edition) -> FedoraPageType: + """Return parsed packages for a given fedora release.""" + index_url = urljoin( + self.url, + f"{release}/{edition}/source/SRPMS/" + if release < 24 + else f"{release}/Everything/source/tree/", + ) + + repo = repomd.load(index_url) # throws error if no repomd.xml is not found + logging.debug( + "Fetched metadata from url: %s, found %d packages", index_url, len(repo) + ) + # TODO: Extract more fields like "provides" and "requires" from *primary.xml + # as extrinsic metadata using the pkg._element.findtext method + for pkg in repo: + yield pkg + + def get_pages(self) -> Iterator[FedoraPageType]: + """Return an iterator on parsed fedora packages, one page per fedora release.""" + # Dirs that don't contain .rpm files: + # Docker,CloudImages,Atomic*,Spins,Live,Cloud_Atomic,Silverblue + + for release in self.releases: + for edition in get_editions(release): + logger.debug("Listing fedora release %s edition %s", release, edition) + self.current_release = release + self.current_edition = edition + yield self.page_request(release, edition) + + def origin_url_for_package(self, package_name: PkgName) -> FedoraOrigin: + """Return the origin url for the given package""" + return f"rpm://{self.instance}/packages/{package_name}" + + def get_origins_from_page(self, page: FedoraPageType) -> Iterator[ListedOrigin]: + """Convert a page of fedora package sources into an iterator of ListedOrigin.""" + assert self.lister_obj.id is not None + + origins_to_send = {} + self.origins_to_update = {} + + # iterate on each package's metadata + for pkg_metadata in page: + # extract package metadata + package_name = pkg_metadata.name + package_version = pkg_metadata.version + package_last_updated = pkg_metadata.build_time + package_download_path = pkg_metadata.location + + # build origin url + origin_url = self.origin_url_for_package(package_name) + # create package version key as expected by the fedora (rpm) loader + # TODO: could use pkg.release instead of self.current_release + package_version_key = ( + f"{self.current_release}/{self.current_edition}/{package_version}" + ) + + # this is the first time a package is listed + if origin_url not in self.listed_origins: + # create a ListedOrigin object for it that can be later + # updated with new package versions info + self.listed_origins[origin_url] = ListedOrigin( + lister_id=self.lister_obj.id, + url=origin_url, + visit_type="rpm", + extra_loader_arguments={"packages": {}}, + last_update=package_last_updated, + ) + + # origin will be yielded at the end of that method + origins_to_send[origin_url] = self.listed_origins[origin_url] + # init set that will contain all listed package versions + self.package_versions[package_name] = set() + + # package has already been listed in a previous or current page (release) + elif origin_url not in origins_to_send: + # if package has been listed in previously, its new versions + # will be added to its ListedOrigin object but the update will + # be sent to the scheduler in the commit_page method + self.origins_to_update[origin_url] = self.listed_origins[origin_url] + + # update package versions data in parameter that will be provided + # to the rpm loader + self.listed_origins[origin_url].extra_loader_arguments["packages"][ + package_version_key + ] = { + "name": package_name, + "version": package_version, + "path": package_download_path, + } + + # add package version key to the set of found versions + self.package_versions[package_name].add(package_version_key) + + # update already counted origins with changes since last page + self.sent_origins.update(origins_to_send.keys()) + + logger.debug( + "Found %s new packages, %s packages with new versions.", + len(origins_to_send), + len(self.origins_to_update), + ) + logger.debug( + "Current total number of listed packages is equal to %s.", + len(self.listed_origins), + ) + + yield from origins_to_send.values() + + def get_origins_to_update(self) -> Iterator[ListedOrigin]: + yield from self.origins_to_update.values() + + def commit_page(self, page: FedoraPageType): + """Send to scheduler already listed origins where new versions have been found + in current page.""" + self.send_origins(self.get_origins_to_update()) + + def finalize(self): + # set mapping between listed package names and versions as lister state + self.state.package_versions = self.package_versions + self.updated = len(self.sent_origins) > 0 diff --git a/swh/lister/fedora/tasks.py b/swh/lister/fedora/tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/fedora/tasks.py @@ -0,0 +1,29 @@ +# Copyright (C) 2022 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from typing import Dict, List, Optional + +from celery import shared_task + +from .lister import FedoraLister + + +@shared_task(name=__name__ + ".FullFedoraRelister") +def list_fedora_full( + url: str, + instance: Optional[str] = None, + releases: List[str] = None, +) -> Dict[str, int]: + """Full update of a Fedora instance""" + lister = FedoraLister.from_configfile( + url=url, + instance=instance, + releases=releases, + ) + return lister.run().dict() + + +@shared_task(name=__name__ + ".ping") +def _ping() -> str: + return "OK" diff --git a/swh/lister/fedora/tests/__init__.py b/swh/lister/fedora/tests/__init__.py new file mode 100644 diff --git a/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary26.xml b/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary26.xml new file mode 100644 --- /dev/null +++ b/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary26.xml @@ -0,0 +1,1162 @@ + + + + 0ad + src + + 89c05dfbe8c40a66fed5f4c7b87d172b6ab3d898307edbae351528214a39f8e8 + Cross-Platform RTS Game of Ancient Warfare + 0 A.D. (pronounced "zero ey-dee") is a free, open-source, cross-platform +real-time strategy (RTS) game of ancient warfare. In short, it is a +historically-based war/economy game that allows players to relive or rewrite +the history of Western civilizations, focusing on the years between 500 B.C. +and 500 A.D. The project is highly ambitious, involving state-of-the-art 3D +graphics, detailed artwork, sound, and a flexible and powerful custom-built +game engine. + +The game has been in development by Wildfire Games (WFG), a group of volunteer, +hobbyist game developers, since 2001. + Fedora Project + http://play0ad.com + + + 0ad-data + src + + 339932264e5ad849b8c97a2aeff42658d0983570f5dc8a40a89985883f8c1f5a + The Data Files for 0 AD + 0 A.D. (pronounced "zero ey-dee") is a free, open-source, cross-platform +real-time strategy (RTS) game of ancient warfare. In short, it is a +historically-based war/economy game that allows players to relive or rewrite +the history of Western civilizations, focusing on the years between 500 B.C. +and 500 A.D. The project is highly ambitious, involving state-of-the-art 3D +graphics, detailed artwork, sound, and a flexible and powerful custom-built +game engine. + +This package contains the 0ad data files. + Fedora Project + http://play0ad.com + + + 0install + src + + 79ace4e9dc84500aea0e3e5ea878dc3fae17a0fd3a07daf3e109066d514be68e + A decentralized cross-distribution software installation system + Zero Install is a decentralized cross-distribution software +installation system available under the LGPL. It allows software +developers to publish programs directly from their own web-sites, +while supporting features familiar from centralized distribution +repositories such as shared libraries, automatic updates and digital +signatures. It is intended to complement, rather than replace, the +operating system's package management. 0install packages never +interfere with those provided by the distribution. + +0install does not define a new packaging format; unmodified tarballs +or zip archives can be used. Instead, it defines an XML metadata +format to describe these packages and the dependencies between them. A +single metadata file can be used on multiple platforms (e.g. Ubuntu, +Debian, Fedora, openSUSE, Mac OS X and Windows), assuming binary or +source archives are available that work on those systems. + +0install also has some interesting features not often found in +traditional package managers. For example, while it will share +libraries whenever possible, it can always install multiple versions +of a package in parallel when there are conflicting +requirements. Installation is always side-effect-free (each package is +unpacked to its own directory and will not touch shared directories +such as /usr/bin), making it ideal for use with sandboxing +technologies and virtualization. + +The XML file describing the program's requirements can also be +included in a source-code repository, allowing full dependency +handling for unreleased developer versions. For example, a user can +clone a Git repository and build and test the program, automatically +downloading newer versions of libraries where necessary, without +interfering with the versions of those libraries installed by their +distribution, which continue to be used for other software. + Fedora Project + http://0install.net + + + 0xFFFF + src + + 96f9c163c0402d2b30e5343c8397a6d50e146c85a446804396b119ef9698231f + The Open Free Fiasco Firmware Flasher + The 'Open Free Fiasco Firmware Flasher' aka 0xFFFF utility implements +a free (GPL3) userspace handler for the NOLO bootloader and related +utilities for the Nokia Internet Tablets like flashing setting device +options, packing/unpacking FIASCO firmware format and more. + Fedora Project + http://www.nopcode.org/0xFFFF/ + + + 2048-cli + src + + 8a2d21bc6f88f6a1906d4760255fa383a70e595317883cb8847c81e99ec9e761 + The game 2048 for your Linux terminal + A cli version of the game 2048 for your Linux terminal. + Fedora Project + https://github.com/Tiehuis/2048-cli + + + 2ping + src + + fe684c7a07c1d4dba5b493805af17c630a23bb230c2666c485cc08bc3321318b + Bi-directional ping utility + 2ping is a bi-directional ping utility. It uses 3-way pings (akin to TCP SYN, +SYN/ACK, ACK) and after-the-fact state comparison between a 2ping listener and +a 2ping client to determine which direction packet loss occurs. + Fedora Project + http://www.finnie.org/software/2ping + + + 389-admin + src + + aa849057e2deedbbfb050e8572ee8c43bc43f222c3aa01a7ef141e3fe60a1db8 + 389 Administration Server (admin) + 389 Administration Server is an HTTP agent that provides management features +for 389 Directory Server. It provides some management web apps that can +be used through a web browser. It provides the authentication, access control, +and CGI utilities used by the console. + Fedora Project + http://www.port389.org/ + + + 389-admin-console + src + + 3d6de5f3bda84055e4fdad7c15c1a384a346b8c810ca7302c59104c14e196fd0 + 389 Admin Server Management Console + A Java based remote management console used for Managing 389 +Admin Server. Requires the 389 Console to load and run the +jar files. + Fedora Project + http://www.port389.org + + + 389-adminutil + src + + e89dd564baa83a290ff4df61807bda030705c77a80d36816ff236ae47a4f3bf8 + Utility library for 389 administration + 389-adminutil is libraries of functions used to administer directory +servers, usually in conjunction with the admin server. 389-adminutil is +broken into two libraries - libadminutil contains the basic +functionality, and libadmsslutil contains SSL versions and wrappers +around the basic functions. The PSET functions allow applications to +store their preferences and configuration parameters in LDAP, without +having to know anything about LDAP. The configuration is cached in a +local file, allowing applications to function even if the LDAP server +is down. The other code is typically used by CGI programs used for +directory server management, containing GET/POST processing code as +well as resource handling (ICU ures API). + Fedora Project + http://port389.org/wiki/AdminUtil + + + 389-console + src + + 6482088712ef3b055d98303fe37e770f26766c66f83d6d60d19c907b29aff646 + 389 Management Console + A Java based remote management console used for managing 389 +Administration Server and 389 Directory Server. + Fedora Project + http://port389.org + + + 389-ds + src + + da7d9a853a49a1d45d073443c99b18864ac404325cd8c447894898132efd763a + 389 Directory, Administration, and Console Suite + The 389 Directory Server, Administration Server, and Console Suite provide +the LDAPv3 server, the httpd daemon used to administer the server, and the +console GUI application used for server and user/group administration. + Fedora Project + http://port389.org/ + + + 389-ds-base + src + + b1a0fa067b1938e1ca8d02c272379bbb42824a3817ea85cc031806dd1f315087 + 389 Directory Server (base) + 389 Directory Server is an LDAPv3 compliant server. The base package includes +the LDAP server and command line utilities for server administration. + Fedora Project + http://www.port389.org + + + 389-ds-console + src + + e074df08c0c1a605650ffef4b4372e6e77adf671b68e7d2ae92e1421a74575e2 + 389 Directory Server Management Console + A Java based remote management console used for managing 389 +Directory Server. The 389 Console is required to load and +run these jar files. + Fedora Project + http://www.port389.org + + + 389-dsgw + src + + 4d941a1a1f75408d76eaf68cc67e335d53edb0822a2295df86deb80a446122dc + 389 Directory Server Gateway (dsgw) + 389 Directory Server Gateway is a collection of 3 web applications +that run on top of the Administration Server used by the Directory +Server. These 3 applications are: +* phonebook - a simple phonebook application geared towards end users, +with simple search screens and simple self-service management +* orgchart - an organization chart viewer +* gateway - a more advanced search interface that allows admins to +create and edit user entries, and allows creation of templates for +different types of user and group entries + Fedora Project + http://port389.org/ + + + 3Depict + src + + 4e6df7205db51e6cdc7b413517b01380bdb3262731b9f198d8222cf4dcfabfbd + Valued 3D point cloud visualization and analysis + This software is designed to help users visualize and analyze 3D point clouds +with an associated real value, in a fast and flexible fashion. It is +specifically targeted to atom probe tomography applications, but may be +useful for general scalar valued point data purposes. + Fedora Project + http://threedepict.sourceforge.net + + + 3dprinter-udev-rules + src + + 4254bf954423c860464e1c21bab1797720664a8f26b74c9fc205b00ce96a6768 + Rules for udev to give regular users access to operate 3D printers + Normally, when you connect a RepRap like 3D printer to a Linux machine by an +USB cable, you need to be in dialout or similar group to be able to control +it via OctoPrint, Printrun, Cura or any other control software. Not any more. + +Install this rule to grant all users read and write access to collected +devices based on the VID and PID. + +Disclaimer: Such device might not be a 3D printer, it my be an Arduino, it +might be a modem and it might even be a blender. But normally you would +add your user to dialout and get access to all of those and more anyway. +So I guess be careful when some of the users should not get access to +your blenders. + Fedora Project + https://github.com/hroncok/3dprinter-udev-rules + + + 3proxy + src + + 12927f379da4871e0939368407ecc18f5a5ca74fbde33349619448afab51ca27 + Tiny but very powerful proxy + 3proxy -- light proxy server. +Universal proxy server with HTTP, HTTPS, SOCKS v4, SOCKS v4a, SOCKS v5, FTP, +POP3, UDP and TCP portmapping, access control, bandwith control, traffic +limitation and accounting based on username, client IP, target IP, day time, +day of week, etc. + Fedora Project + http://3proxy.ru/?l=EN + + + 4Pane + src + + daf1615ea9d4bc36e0fac2a13ac37ae5e169893e9c97f241a91d1da0afd2ea0c + Multi-pane, detailed-list file manager + 4Pane is a multi-pane, detailed-list file manager. It is designed +to be fully-featured without bloat, and aims for speed rather than +visual effects. +In addition to standard file manager things, it offers multiple +undo and redo of most operations (including deletions), archive +management including 'virtual browsing' inside archives, multiple +renaming/duplication of files, a terminal emulator and user-defined +tools. + Fedora Project + http://www.4pane.co.uk/ + + + 64tass + src + + cda7d777f556e9007bed98d64814067ad6c48287713fb730e2a3ef886558134a + 6502 assembler + 64tass is a multi-pass optimizing macro assembler for the 65xx series of +processors. It supports the 6502, 65C02, R65C02, W65C02, 65CE02, 65816, +DTV, and 65EL02, using a syntax similar to that of Omicron TASS and TASM. + Fedora Project + http://tass64.sourceforge.net/ + + + 7kaa + src + + daa291c15876c820858c28225c150e076ab878d48c76cddd3e8287fc67141a0a + Seven Kingdoms: Ancient Adversaries + Seven Kingdoms is a real-time strategy (RTS) computer game developed +by Trevor Chan of Enlight Software. The game enables players to +compete against up to six other kingdoms allowing players to conquer +opponents by defeating them in war (with troops or machines), +capturing their buildings with spies, or offering opponents money +for their kingdom. + +Seven Kingdoms: Ancient Adversaries is a free patch provided by +Interactive Magic and added three new cultures, the Egyptians, the +Mughals and the Zulus, and a new war machine, Unicorn. + Fedora Project + http://7kfans.com/ + + + 99soft-oss-parent + src + + 25a24a764f9a185a63a0df0a148403b82503395bee0eaf7aa972a83aed259601 + 99 Software Foundation parent pom + The 99 Software Foundation parent pom. + Fedora Project + https://github.com/99soft/infra + + + 9wm + src + + 9e339c9a698a0d9472cee50697dfd964338b0b4a61d66611b826ccecdbfc5c1b + Emulation of the Plan 9 window manager 8 1/2 + 9wm is an X window manager which attempts to emulate the Plan 9 window +manager 8-1/2 as far as possible within the constraints imposed by X. +It provides a simple yet comfortable user interface, without garish +decorations or title-bars. Or icons. And it's click-to-type. + Fedora Project + https://woozle.org/neale/src/9wm/ + + + AGReader + src + + 546ac7b23a8cb8a0149578fe1bce4a60b614e091d20e5b0235136916da896775 + Console reader for viewing AmigaGuide files + A viewer for the UNIX console which can read and display AmigaGuide files. It +supports all of the v39 AmigaGuide specification possible and supports a large +subset of the v40 specifications. + Fedora Project + http://main.aminet.net/misc/unix/ + + + APLpy + src + + dc989d44cf1d3514a57c5b5e6717bd7085e959f5839e517c632c00ab65a10c85 + The Astronomical Plotting Library in Python + APLpy (the Astronomical Plotting Library in Python) is a Python module aimed at +producing publication-quality plots of astronomical imaging data in FITS format. +The module uses Matplotlib, a powerful and interactive plotting package. It is +capable of creating output files in several graphical formats, including EPS, +PDF, PS, PNG, and SVG. + Fedora Project + http://aplpy.github.com + + + ATpy + src + + ee426f715d00c91412f04acd34c4c02df9a1dfeb6c3265b9c93f9d6dfa3c433a + Astronomical Tables in Python + ATpy is a high-level Python package providing a way to manipulate tables of +astronomical data in a uniform way. It provides built-in support for NumPy +recarrays and common astronomical file/database formats (FITS, VO, HDF5, +and ASCII tables) with a very simple API. + Fedora Project + http://atpy.readthedocs.org + + + AcetoneISO + src + + e76553136b66a5f5854b8b5daaded10ed2e77444b0284c3533a90c9dcfd37688 + CD/DVD Image Manipulator + AcetoneISO: The CD/DVD image manipulator for Linux, it can do the following: +- Mount and Unmount ISO, MDF, NRG (if iso-9660 standard) +- Convert / Extract / Browse to ISO : *.bin *.mdf *.nrg *.img *.daa *.cdi + *.xbx *.b5i *.bwi *.pdi +- Play a DVD Movie ISO with most used media players +- Generate an ISO from a Folder or CD/DVD +- Generate MD5 file of an image +- Encrypt an image +- Split image into X megabyte chunks +- Highly compress an image +- Rip a PSX cd to *.bin to make it work with epsxe/psx emulators +- Service-Menu support for Konqueror +- Restore a lost CUE file of *.bin *.img + Fedora Project + http://www.acetoneteam.org/ + + + Add64 + src + + b019cd07bca2e5ba133dd5c167060d17ee585f63104bddd0b56f21bfd922dd99 + An additive synthesizer using JACK + Add64 is an additive synthesizer using Qt and the JACK audio connection kit + Fedora Project + http://sourceforge.net/projects/add64 + + + Agda + src + + 31710476e81a755e9f5c9969de0a57a5356a85703cc261336b4c8ddb655d060c + A dependently typed functional programming language and proof assistant + Agda is a dependently typed functional programming language: it has +inductive families, which are similar to Haskell's GADTs, but they can +be indexed by values and not just types. It also has parameterized +modules, mixfix operators, Unicode characters, and an interactive +Emacs interface (the type checker can assist in the development of your code). + +Agda is also a proof assistant: It is an interactive system for writing and +checking proofs. Agda is based on intuitionistic type theory, +a foundational system for constructive mathematics developed by +the Swedish logician Per Martin-Löf. It has many similarities with other +proof assistants based on dependent types, such as Coq, Epigram and NuPRL. + Fedora Project + https://hackage.haskell.org/package/Agda + + + Agda-stdlib + src + + 1bb7c2150f46fc657b2ac1fc5e79f640de523d969bfb7fe142fa82c9eecb531f + Agda standard libraries + Agda standard libraries + Fedora Project + http://wiki.portal.chalmers.se/agda/agda.php?n=Libraries.StandardLibrary + + + zynjacku + src + + 309dc2ebd94d40933558260724fd7d15d9d84d0fd9541d9dbb27a54760b875d2 + LV2 synths and plugins host + zynjacku is JACK based, GTK (2.x) host for LV2 synths. It has one JACK MIDI +input port (routed to all hosted synths) and one (two for stereo synths) JACK +audio output port per plugin. Such design provides multi-timbral sound by +running several synth plugins. + +zynjacku is a nunchaku weapon for JACK audio synthesis. You have solid parts +for synthesis itself and you have flexible part that allows synthesis to suit +your needs. + +lv2rack is a host for LV2 effect plugins. + Fedora Project + http://home.gna.org/zynjacku/ + + + zypper + src + + 8984b02bf1f977a9fcaf9d514016e371206bf3697148adb56ec80f70e57a7121 + Command line package manager using libzypp + Zypper is a command line package manager tool using libzypp, +which can be used to manage software for RPM based systems. + Fedora Project + http://en.opensuse.org/Portal:Zypper + + + zziplib + src + + 8db1b95ac56b94c563638deea593beb4143d5d80dc436debf6e287d3f518582c + Lightweight library to easily extract data from zip files + The zziplib library is intentionally lightweight, it offers the ability to +easily extract data from files archived in a single zip file. Applications +can bundle files into a single zip archive and access them. The implementation +is based only on the (free) subset of compression with the zlib algorithm +which is actually used by the zip/unzip tools. + Fedora Project + http://zziplib.sourceforge.net/ + + + zzuf + src + + 3d7a167cc518dbc9ec3294279751786baf4264907d870ff4b76c571ce03df094 + Transparent application input fuzzer + zzuf is a transparent application input fuzzer. It works by +intercepting file operations and changing random bits in the program's +input. zzuf's behaviour is deterministic, making it easy to reproduce +bugs. + Fedora Project + http://sam.zoy.org/zzuf/ + + \ No newline at end of file diff --git a/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary36.xml b/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary36.xml new file mode 100644 --- /dev/null +++ b/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary36.xml @@ -0,0 +1,596 @@ + + + + 0ad + src + + 85fca6fd7a1073da430439b850cb4015eea97cea396ebba163bde4d2f1b72e2e + Cross-Platform RTS Game of Ancient Warfare + 0 A.D. (pronounced "zero ey-dee") is a free, open-source, cross-platform +real-time strategy (RTS) game of ancient warfare. In short, it is a +historically-based war/economy game that allows players to relive or rewrite +the history of Western civilizations, focusing on the years between 500 B.C. +and 500 A.D. The project is highly ambitious, involving state-of-the-art 3D +graphics, detailed artwork, sound, and a flexible and powerful custom-built +game engine. + +The game has been in development by Wildfire Games (WFG), a group of volunteer, +hobbyist game developers, since 2001. + Fedora Project + http://play0ad.com + + + 0ad-data + src + + db142de59b5b2bf33abc6025d4c6be56851ebcbcde7b39c3dc461e2b8992e8c5 + The Data Files for 0 AD + 0 A.D. (pronounced "zero ey-dee") is a free, open-source, cross-platform +real-time strategy (RTS) game of ancient warfare. In short, it is a +historically-based war/economy game that allows players to relive or rewrite +the history of Western civilizations, focusing on the years between 500 B.C. +and 500 A.D. The project is highly ambitious, involving state-of-the-art 3D +graphics, detailed artwork, sound, and a flexible and powerful custom-built +game engine. + +This package contains the 0ad data files. + Fedora Project + http://play0ad.com + + + 0xFFFF + src + + 45eee8d990d502324ae665233c320b8a5469c25d735f1862e094c1878d6ff2cd + The Open Free Fiasco Firmware Flasher + The 'Open Free Fiasco Firmware Flasher' aka 0xFFFF utility implements +a free (GPL3) userspace handler for the NOLO bootloader and related +utilities for the Nokia Internet Tablets like flashing setting device +options, packing/unpacking FIASCO firmware format and more. + Fedora Project + https://talk.maemo.org/showthread.php?t=87996 + + + 2048-cli + src + + 5f77b054ad11aff03bc145616b88c977e67a11333902de776e8ce8769caef7b5 + The game 2048 for your Linux terminal + A cli version of the game 2048 for your Linux terminal. + Fedora Project + https://github.com/Tiehuis/2048-cli + + + 2ping + src + + 2ce028d944ebea1cab8c6203c9fed882792478b42fc34682b886a9db16e9de28 + Bi-directional ping utility + 2ping is a bi-directional ping utility. It uses 3-way pings (akin to TCP SYN, +SYN/ACK, ACK) and after-the-fact state comparison between a 2ping listener and +a 2ping client to determine which direction packet loss occurs. + Fedora Project + https://www.finnie.org/software/2ping + + + 389-ds-base + src + + 00ccfe16be5767bd043a878f0588b19445b8be8d15c477a6ecae331f9485c55e + 389 Directory Server (base) + 389 Directory Server is an LDAPv3 compliant server. The base package includes +the LDAP server and command line utilities for server administration. + Fedora Project + https://www.port389.org + + + 3Depict + src + + 78a0521a39bf634b7d21edccaf26ff5c0ea318b68b620e4a337e1e965b4b2a68 + Valued 3D point cloud visualization and analysis + This software is designed to help users visualize and analyze 3D point clouds +with an associated real value, in a fast and flexible fashion. It is +specifically targeted to atom probe tomography applications, but may be +useful for general scalar valued point data purposes. + Fedora Project + http://threedepict.sourceforge.net + + + 3dprinter-udev-rules + src + + 9deaa03d0744a684eaaa7b3d2cf12b100e53f86b402c7028e68b3378189259fa + Rules for udev to give regular users access to operate 3D printers + Normally, when you connect a RepRap like 3D printer to a Linux machine by an +USB cable, you need to be in dialout or similar group to be able to control +it via OctoPrint, Printrun, Cura or any other control software. Not any more. + +Install this rule to grant all users read and write access to collected +devices based on the VID and PID. + +Disclaimer: Such device might not be a 3D printer, it my be an Arduino, it +might be a modem and it might even be a blender. But normally you would +add your user to dialout and get access to all of those and more anyway. +So I guess be careful when some of the users should not get access to +your blenders. + Fedora Project + https://github.com/hroncok/3dprinter-udev-rules + + + 3mux + src + + 10ed40714c2f4ad243302aa59ea4a40c3bb8a68b9ab78affdac6747a668542b7 + Terminal multiplexer inspired by i3 + +Terminal multiplexer inspired by i3. + Fedora Project + https://github.com/aaronjanse/3mux + + + 3proxy + src + + 9b4a2b6ca029cb43912232f71656af97eee5d173849157d9004c5bbf1b356ebb + Tiny but very powerful proxy + 3proxy -- light proxy server. +Universal proxy server with HTTP, HTTPS, SOCKS v4, SOCKS v4a, SOCKS v5, FTP, +POP3, UDP and TCP portmapping, access control, bandwith control, traffic +limitation and accounting based on username, client IP, target IP, day time, +day of week, etc. + Fedora Project + http://3proxy.ru/?l=EN + + + 4Pane + src + + 3b56a7560662d182a66495160389ff06045ee67ebc7383010695f6c4f6b4e8c7 + Multi-pane, detailed-list file manager + 4Pane is a multi-pane, detailed-list file manager. It is designed +to be fully-featured without bloat, and aims for speed rather than +visual effects. +In addition to standard file manager things, it offers multiple +undo and redo of most operations (including deletions), archive +management including 'virtual browsing' inside archives, multiple +renaming/duplication of files, a terminal emulator and user-defined +tools. + Fedora Project + http://www.4pane.co.uk/ + + + 4diac-forte + src + + 9e4286400a4dcda294fc807202195bc8d4fdaf324d523d8f2c86d2b399528a8f + IEC 61499 runtime environment + The 4DIAC runtime environment (4DIAC-RTE, FORTE) is a small portable +implementation of an IEC 61499 runtime environment targeting small +embedded control devices (16/32 Bit), implemented in C++. It supports +online-reconfiguration of its applications and the real-time capable +execution of all function block types provided by the IEC 61499 standard. + Fedora Project + http://eclipse.org/4diac + + + 4th + src + + 52a78198b2ee889130f79c87f300e2ab64fd4a925fa81e6c55d24c9e0e76e0a8 + A Forth compiler + 4tH is basic framework for creating application specific scripting +languages. It is a library of functions centered around a virtual +machine, which guarantees high performance, ease of use and low overhead. + Fedora Project + https://thebeez.home.xs4all.nl/4tH/ + + + zzuf + src + + d4aa530ea79c034a36aed1a0ef4859f14e9dadbc23e6086018287b9d1236c6fe + Transparent application input fuzzer + zzuf is a transparent application input fuzzer. It works by +intercepting file operations and changing random bits in the program's +input. zzuf's behaviour is deterministic, making it easy to reproduce +bugs. + Fedora Project + http://sam.zoy.org/zzuf/ + + diff --git a/swh/lister/fedora/tests/test_lister.py b/swh/lister/fedora/tests/test_lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/fedora/tests/test_lister.py @@ -0,0 +1,168 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information +from pathlib import Path +from typing import Dict, List, Set, Tuple +from unittest.mock import MagicMock + +import defusedxml +import pytest +from repomd import Repo + +from swh.lister.fedora.lister import FedoraLister, get_editions +from swh.scheduler.interface import SchedulerInterface +from swh.scheduler.model import ListedOrigin + + +def get_repo(datadir: Path, release: int) -> Repo: + """Returns a repomd Repo object for the given release.""" + primary_xml = Path(datadir, "archives.fedoraproject.org", f"primary{release}.xml") + metadata = defusedxml.lxml.fromstring(primary_xml.read_bytes()) + return Repo(f"mocked.mirror.url/releases/{release}", metadata) + + +RepomdMock = Tuple[Tuple[MagicMock, MagicMock], Dict[str, Set[str]]] + + +@pytest.fixture +def repomd_mock(datadir, mocker) -> RepomdMock: + fedora26 = get_repo(datadir, 26) + fedora36 = get_repo(datadir, 36) + + repomd_mock = mocker.patch("swh.lister.fedora.lister.repomd") + + def side_effect(url): + if "36" in url: + return fedora36 + elif "26" in url: + return fedora26 + else: + raise Exception(f"Unexpected url: {url}") + + repomd_mock.load.side_effect = side_effect + pkg_versions: Dict[str, Set[str]] = {} + for (release, repo) in [(26, fedora26), (36, fedora36)]: + for pkg in repo: + if pkg.name not in pkg_versions: + pkg_versions[pkg.name] = set() + for edition in get_editions(release): + pkg_versions[pkg.name].add(f"{release}/{edition}/{pkg.version}") + + return (fedora26, fedora36), pkg_versions + + +def check_listed_origins(pkg_versions: dict, scheduler_origins: List[ListedOrigin]): + """Asserts that the two collections have the same origin URLs. + + Does not test last_update.""" + + pkgs = list(pkg_versions.keys()) + sorted_pkgs = list(sorted(pkgs)) + sorted_scheduler_origins = list(sorted(scheduler_origins)) + + assert len(sorted_pkgs) == len(sorted_scheduler_origins) + + for pkg, s_origin in zip(sorted_pkgs, sorted_scheduler_origins): + assert s_origin.url.endswith(f"/packages/{pkg}") + + +def test_full_lister_fedora( + swh_scheduler: SchedulerInterface, + repomd_mock: RepomdMock, +): + """ + Simulate a full listing of packages for fedora releases. + """ + releases = [26, 36] + + lister = FedoraLister( + scheduler=swh_scheduler, + releases=releases, + ) + + page_count = 0 + for release in releases: + for _ in get_editions(release): + page_count += 1 + + attempt1 = lister.run() + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + _, pkg_versions = repomd_mock + + assert attempt1.pages == page_count + assert attempt1.origins == len(pkg_versions) + + check_listed_origins(pkg_versions, scheduler_origins) + + lister_state = lister.get_state_from_scheduler() + assert lister_state.package_versions == pkg_versions + assert lister.updated + + attempt2 = lister.run() + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + + assert attempt2.pages == page_count + assert attempt2.origins == 0 # No new pkgs discovered + + check_listed_origins(pkg_versions, scheduler_origins) + + lister_state = lister.get_state_from_scheduler() + assert lister_state.package_versions == pkg_versions + assert lister.updated + + +def test_incremental_lister(swh_scheduler: SchedulerInterface, repomd_mock: RepomdMock): + """ + Simulate an incremental listing of packages for fedora releases. + """ + releases = [26, 36] + lister = FedoraLister( + scheduler=swh_scheduler, + releases=releases, + ) + page_count = 0 + for release in releases: + for _ in get_editions(release): + page_count += 1 + + # First run + attempt1 = lister.run() + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + (fedora26, _), pkg_versions = repomd_mock + + assert attempt1.pages == page_count + assert attempt1.origins == len(pkg_versions) + assert lister.state.package_versions == pkg_versions + + check_listed_origins(pkg_versions, scheduler_origins) + assert lister.updated + + # Second run (no updates) + attempt2 = lister.run() + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + + assert attempt2.pages == page_count + assert attempt2.origins == 0 # No new pkgs were discovered + assert lister.state.package_versions == pkg_versions + + check_listed_origins(pkg_versions, scheduler_origins) + assert lister.updated + + # Update versions for half of the packages in Fedora 26: + for i, pkg in enumerate(fedora26): + if i % 2 == 0: + pkg._version_info.set("ver", "1.0.0") + for edition in get_editions(26): + pkg_versions[pkg.name].add(f"26/{edition}/1.0.0") + + # Third run (half of pkgs in fedora26 got updated) + attempt3 = lister.run() + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + + assert attempt3.pages == page_count + assert attempt3.origins == 0 # Again, no new pkgs were discovered + check_listed_origins(pkg_versions, scheduler_origins) + + assert lister.state.package_versions == pkg_versions + assert lister.updated diff --git a/swh/lister/fedora/tests/test_tasks.py b/swh/lister/fedora/tests/test_tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/fedora/tests/test_tasks.py @@ -0,0 +1,60 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from unittest.mock import patch + +from swh.lister.pattern import ListerStats + + +def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker): + res = swh_scheduler_celery_app.send_task("swh.lister.fedora.tasks.ping") + assert res + res.wait() + assert res.successful() + assert res.result == "OK" + + +@patch("swh.lister.fedora.tasks.FedoraLister") +def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker): + lister.from_configfile.return_value = lister + lister.run.return_value = ListerStats(pages=10, origins=500) + + kwargs = dict(url="https://eu.edge.kernel.org/fedora/releases/") + res = swh_scheduler_celery_app.send_task( + "swh.lister.fedora.tasks.FullFedoraRelister", + kwargs=kwargs, + ) + assert res + res.wait() + assert res.successful() + + actual_kwargs = dict(**kwargs, instance=None, releases=None) + + lister.from_configfile.assert_called_once_with(**actual_kwargs) + lister.run.assert_called_once_with() + + +@patch("swh.lister.fedora.tasks.FedoraLister") +def test_full_listing_params( + lister, swh_scheduler_celery_app, swh_scheduler_celery_worker +): + lister.from_configfile.return_value = lister + lister.run.return_value = ListerStats(pages=10, origins=500) + + kwargs = dict( + url="https://eu.edge.kernel.org/fedora/releases/", + instance="eu.edge.kernel", + releases=["36"], + ) + res = swh_scheduler_celery_app.send_task( + "swh.lister.fedora.tasks.FullFedoraRelister", + kwargs=kwargs, + ) + assert res + res.wait() + assert res.successful() + + lister.from_configfile.assert_called_once_with(**kwargs) + lister.run.assert_called_once_with() diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py --- a/swh/lister/tests/test_cli.py +++ b/swh/lister/tests/test_cli.py @@ -35,6 +35,9 @@ "url": "https://try.gogs.io/", "api_token": "secret", }, + "fedora": { + "url": "https://eu.edge.kernel.org/fedora/releases/", + }, }