Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123364
D8386.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
32 KB
Subscribers
None
D8386.diff
View Options
diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -26,6 +26,7 @@
- `swh.lister.pypi`
- `swh.lister.tuleap`
- `swh.lister.gogs`
+- `swh.liser.fedora`
Dependencies
------------
diff --git a/mypy.ini b/mypy.ini
--- a/mypy.ini
+++ b/mypy.ini
@@ -48,3 +48,9 @@
[mypy-psycopg2.*]
ignore_missing_imports = True
+
+[mypy-repomd.*]
+ignore_missing_imports = True
+
+[mypy-defusedxml.*]
+ignore_missing_imports = True
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,3 +9,4 @@
dulwich
testing.postgresql
psycopg2
+repomd
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -86,6 +86,7 @@
lister.tuleap=swh.lister.tuleap:register
lister.maven=swh.lister.maven:register
lister.gogs=swh.lister.gogs:register
+ lister.fedora=swh.lister.fedora:register
""",
classifiers=[
"Programming Language :: Python :: 3",
diff --git a/swh/lister/fedora/__init__.py b/swh/lister/fedora/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+def register():
+ from .lister import FedoraLister
+
+ return {
+ "lister": FedoraLister,
+ "task_modules": [f"{__name__}.tasks"],
+ }
diff --git a/swh/lister/fedora/lister.py b/swh/lister/fedora/lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/lister.py
@@ -0,0 +1,252 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+import logging
+from typing import Any, Dict, Iterator, List, Set, Type
+from urllib.error import HTTPError
+from urllib.parse import urljoin
+
+import repomd
+
+from swh.scheduler.interface import SchedulerInterface
+from swh.scheduler.model import ListedOrigin
+
+from ..pattern import Lister
+
+logger = logging.getLogger(__name__)
+
+
+Release = int
+Edition = str
+PkgName = str
+PkgVersion = str
+FedoraOrigin = str
+FedoraPageType = Type[repomd.Repo]
+"""Each page is a list of packages from a given Fedora (release, edition) pair"""
+
+
+def get_editions(release: Release) -> List[Edition]:
+ """Get list of editions for a given release."""
+ # Ignore dirs that don't contain .rpm files:
+ # Docker,CloudImages,Atomic*,Spins,Live,Cloud_Atomic,Silverblue
+
+ if release < 20:
+ return ["Everything", "Fedora"]
+ elif release < 28:
+ return ["Everything", "Server", "Workstation"]
+ else:
+ return ["Everything", "Server", "Workstation", "Modular"]
+
+
+def get_last_modified(pkg: repomd.Package) -> datetime:
+ """Get timezone aware last modified time in UTC from RPM package metadata."""
+ ts = pkg._element.find("common:time", namespaces=repomd._ns).get("build")
+ return datetime.utcfromtimestamp(int(ts)).replace(tzinfo=timezone.utc)
+
+
+def get_checksums(pkg: repomd.Package) -> Dict[str, str]:
+ """Get checksums associated to rpm archive."""
+ cs = pkg._element.find("common:checksum", namespaces=repomd._ns)
+ cs_type = cs.get("type")
+ if cs_type == "sha":
+ cs_type = "sha1"
+ return {cs_type: cs.text}
+
+
+@dataclass
+class FedoraListerState:
+ """State of Fedora lister"""
+
+ package_versions: Dict[PkgName, Set[PkgVersion]] = field(default_factory=dict)
+ """Dictionary mapping a package name to all the versions found during
+ last listing"""
+
+
+class FedoraLister(Lister[FedoraListerState, FedoraPageType]):
+ """
+ List source packages for given Fedora releases.
+
+ The lister will create a snapshot for each package name from all its
+ available versions.
+
+ If a package snapshot is different from the last listing operation,
+ it will be sent to the scheduler that will create a loading task
+ to archive newly found source code.
+
+ Args:
+ scheduler: instance of SchedulerInterface
+ url: fedora package archives mirror URL
+ releases: list of fedora releases to process
+ """
+
+ LISTER_NAME = "fedora"
+
+ def __init__(
+ self,
+ scheduler: SchedulerInterface,
+ instance: str = "fedora",
+ url: str = "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/",
+ releases: List[Release] = [34, 35, 36],
+ ):
+ super().__init__(
+ scheduler=scheduler,
+ url=url,
+ instance=instance,
+ credentials={},
+ )
+
+ self.releases = releases
+
+ self.listed_origins: Dict[FedoraOrigin, ListedOrigin] = {}
+ "will hold all listed origins info"
+ self.origins_to_send: Set[FedoraOrigin] = set()
+ "will hold updated origins since last listing"
+ self.package_versions: Dict[PkgName, Set[PkgVersion]] = {}
+ "will contain the lister state after a call to run"
+ self.last_page = False
+
+ def state_from_dict(self, d: Dict[str, Any]) -> FedoraListerState:
+ return FedoraListerState(package_versions={k: set(v) for k, v in d.items()})
+
+ def state_to_dict(self, state: FedoraListerState) -> Dict[str, Any]:
+ return {k: list(v) for k, v in state.package_versions.items()}
+
+ def page_request(self, release: Release, edition: Edition) -> FedoraPageType:
+ """Return parsed packages for a given fedora release."""
+ index_url = urljoin(
+ self.url,
+ f"{release}/{edition}/source/SRPMS/"
+ if release < 24
+ else f"{release}/{edition}/source/tree/",
+ )
+
+ repo = repomd.load(index_url) # throws error if no repomd.xml is not found
+ self.last_page = (
+ release == self.releases[-1] and edition == get_editions(release)[-1]
+ )
+
+ logger.debug(
+ "Fetched metadata from url: %s, found %d packages", index_url, len(repo)
+ )
+ # TODO: Extract more fields like "provides" and "requires" from *primary.xml
+ # as extrinsic metadata using the pkg._element.findtext method
+ return repo
+
+ def get_pages(self) -> Iterator[FedoraPageType]:
+ """Return an iterator on parsed fedora packages, one page per (release, edition) pair"""
+
+ for release in self.releases:
+ for edition in get_editions(release):
+ logger.debug("Listing fedora release %s edition %s", release, edition)
+ self.current_release = release
+ self.current_edition = edition
+ try:
+ yield self.page_request(release, edition)
+ except HTTPError as http_error:
+ if http_error.getcode() == 404:
+ logger.debug(
+ "No packages metadata found for fedora release %s edition %s",
+ release,
+ edition,
+ )
+ continue
+ raise
+
+ def origin_url_for_package(self, package_name: PkgName) -> FedoraOrigin:
+ """Return the origin url for the given package"""
+ return f"https://src.fedoraproject.org/rpms/{package_name}"
+
+ def get_origins_from_page(self, page: FedoraPageType) -> Iterator[ListedOrigin]:
+ """Convert a page of fedora package sources into an iterator of ListedOrigin."""
+ assert self.lister_obj.id is not None
+
+ origins_to_send = set()
+
+ # iterate on each package's metadata
+ for pkg_metadata in page:
+ # extract package metadata
+ package_name = pkg_metadata.name
+ package_version = pkg_metadata.version
+ package_build_time = get_last_modified(pkg_metadata)
+ package_download_path = pkg_metadata.location
+
+ # build origin url
+ origin_url = self.origin_url_for_package(package_name)
+ # create package version key as expected by the fedora (rpm) loader
+ package_version_key = pkg_metadata.vr
+
+ # this is the first time a package is listed
+ if origin_url not in self.listed_origins:
+ # create a ListedOrigin object for it that can be later
+ # updated with new package versions info
+ self.listed_origins[origin_url] = ListedOrigin(
+ lister_id=self.lister_obj.id,
+ url=origin_url,
+ visit_type="rpm",
+ extra_loader_arguments={"packages": {}},
+ last_update=package_build_time,
+ )
+
+ # init set that will contain all listed package versions
+ self.package_versions[package_name] = set()
+
+ # origin will be yielded at the end of that method
+ origins_to_send.add(origin_url)
+
+ # update package metadata in parameter that will be provided
+ # to the rpm loader
+ self.listed_origins[origin_url].extra_loader_arguments["packages"][
+ package_version_key
+ ] = {
+ "name": package_name,
+ "version": package_version,
+ "url": urljoin(page.baseurl, package_download_path),
+ "release": self.current_release,
+ "edition": self.current_edition,
+ "buildTime": package_build_time.isoformat(),
+ "checksums": get_checksums(pkg_metadata),
+ }
+
+ last_update = self.listed_origins[origin_url].last_update
+ if last_update is not None and package_build_time > last_update:
+ self.listed_origins[origin_url].last_update = package_build_time
+
+ # add package version key to the set of found versions
+ self.package_versions[package_name].add(package_version_key)
+
+ # package has already been listed during a previous listing process
+ if package_name in self.state.package_versions:
+ new_versions = (
+ self.package_versions[package_name]
+ - self.state.package_versions[package_name]
+ )
+ # no new versions so far, no need to send the origin to the scheduler
+ if not new_versions:
+ origins_to_send.remove(origin_url)
+
+ logger.debug(
+ "Found %s packages to update (new ones or packages with new versions).",
+ len(origins_to_send),
+ )
+ logger.debug(
+ "Current total number of listed packages is equal to %s.",
+ len(self.listed_origins),
+ )
+
+ # yield from origins_to_send.values()
+ self.origins_to_send.update(origins_to_send)
+
+ if self.last_page:
+ # yield listed origins when all fedora releases and editions processed
+ yield from [
+ self.listed_origins[origin_url] for origin_url in self.origins_to_send
+ ]
+
+ def finalize(self):
+ # set mapping between listed package names and versions as lister state
+ self.state.package_versions = self.package_versions
+ self.updated = len(self.listed_origins) > 0
diff --git a/swh/lister/fedora/tasks.py b/swh/lister/fedora/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/tasks.py
@@ -0,0 +1,21 @@
+# Copyright (C) 2022 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from typing import Dict
+
+from celery import shared_task
+
+from .lister import FedoraLister
+
+
+@shared_task(name=__name__ + ".FullFedoraRelister")
+def list_fedora_full(**lister_args) -> Dict[str, int]:
+ """Full update of a Fedora instance"""
+ lister = FedoraLister.from_configfile(**lister_args)
+ return lister.run().dict()
+
+
+@shared_task(name=__name__ + ".ping")
+def _ping() -> str:
+ return "OK"
diff --git a/swh/lister/fedora/tests/__init__.py b/swh/lister/fedora/tests/__init__.py
new file mode 100644
diff --git a/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary26.xml.gz b/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary26.xml.gz
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary36-altered.xml.gz b/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary36-altered.xml.gz
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary36.xml.gz b/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary36.xml.gz
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/lister/fedora/tests/data/archives.fedoraproject.org/repomd26.xml b/swh/lister/fedora/tests/data/archives.fedoraproject.org/repomd26.xml
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/tests/data/archives.fedoraproject.org/repomd26.xml
@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<repomd xmlns="http://linux.duke.edu/metadata/repo" xmlns:rpm="http://linux.duke.edu/metadata/rpm">
+ <revision>1499286311</revision>
+ <data type="primary">
+ <checksum type="sha256">4f677623c24912d86848f86837d398979b5adc2a51d9a2170f11fe42a257f3d3</checksum>
+ <open-checksum type="sha256">db616ad8e4219e23dfc05cd515e017cdc0d59144689ac606951fa42cbb06ae65</open-checksum>
+ <location href="repodata/4f677623c24912d86848f86837d398979b5adc2a51d9a2170f11fe42a257f3d3-primary.xml.gz"/>
+ <timestamp>1499286305</timestamp>
+ <size>5425131</size>
+ <open-size>30064034</open-size>
+ </data>
+ <data type="filelists">
+ <checksum type="sha256">17296af99a4b80bc67fccabe71ecefa02b76e8409372d936c054b8c9de312b6c</checksum>
+ <open-checksum type="sha256">7caabd1205a72d26422756211dcd536336cef643f7f73eb15a470b02ff09a194</open-checksum>
+ <location href="repodata/17296af99a4b80bc67fccabe71ecefa02b76e8409372d936c054b8c9de312b6c-filelists.xml.gz"/>
+ <timestamp>1499286305</timestamp>
+ <size>1650273</size>
+ <open-size>6419422</open-size>
+ </data>
+ <data type="other">
+ <checksum type="sha256">8f1ed139aeaa57f5bc280ce97b82f690e4008c122b4793791ca18e513268b6eb</checksum>
+ <open-checksum type="sha256">786b8d4fa759f0ade3eaab1bde390d12c950dfe217eda1773400f3a3d461522b</open-checksum>
+ <location href="repodata/8f1ed139aeaa57f5bc280ce97b82f690e4008c122b4793791ca18e513268b6eb-other.xml.gz"/>
+ <timestamp>1499286305</timestamp>
+ <size>4396102</size>
+ <open-size>33165783</open-size>
+ </data>
+ <data type="primary_db">
+ <checksum type="sha256">1d2c0be48c35e55669b410cb4dbe767ae4850b4c610e95ca9aee67f7eb31e457</checksum>
+ <open-checksum type="sha256">dc8dbac072ac1412f0ecface57fa57c5ddcac14acc880fe9b467164be733e963</open-checksum>
+ <location href="repodata/1d2c0be48c35e55669b410cb4dbe767ae4850b4c610e95ca9aee67f7eb31e457-primary.sqlite.bz2"/>
+ <timestamp>1499286309</timestamp>
+ <size>7071217</size>
+ <open-size>26177536</open-size>
+ <database_version>10</database_version>
+ </data>
+ <data type="filelists_db">
+ <checksum type="sha256">5e1259759b9bedefc1ff14b81760524841402776e6c1b33014f4f5d6feb40d11</checksum>
+ <open-checksum type="sha256">b293d51dd4e6eb4128e40b6ce228c62b169b1d47be535e56f69b8ad622c4a6ca</open-checksum>
+ <location href="repodata/5e1259759b9bedefc1ff14b81760524841402776e6c1b33014f4f5d6feb40d11-filelists.sqlite.bz2"/>
+ <timestamp>1499286307</timestamp>
+ <size>2227395</size>
+ <open-size>5529600</open-size>
+ <database_version>10</database_version>
+ </data>
+ <data type="other_db">
+ <checksum type="sha256">f6b30bdfe96d2137542704288de1345c01ea14397eb187126d4474648bad5292</checksum>
+ <open-checksum type="sha256">3f5d4619dcabe945b773c1c98ea40b8ead53340291bd504ab3faabfc7b57bb99</open-checksum>
+ <location href="repodata/f6b30bdfe96d2137542704288de1345c01ea14397eb187126d4474648bad5292-other.sqlite.bz2"/>
+ <timestamp>1499286311</timestamp>
+ <size>5264843</size>
+ <open-size>27930624</open-size>
+ <database_version>10</database_version>
+ </data>
+</repomd>
diff --git a/swh/lister/fedora/tests/data/archives.fedoraproject.org/repomd36.xml b/swh/lister/fedora/tests/data/archives.fedoraproject.org/repomd36.xml
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/tests/data/archives.fedoraproject.org/repomd36.xml
@@ -0,0 +1,85 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<repomd xmlns="http://linux.duke.edu/metadata/repo" xmlns:rpm="http://linux.duke.edu/metadata/rpm">
+ <revision>1651698851</revision>
+ <data type="primary">
+ <checksum type="sha256">42155056c6d7b1f0e5437bb2a92c48e6d21a02ee8f09acc726e705c26e960a3c</checksum>
+ <open-checksum type="sha256">a5841e7086be579d58e2dbb7628caebba32d9defa85739455d518bfaf90e39b0</open-checksum>
+ <location href="repodata/42155056c6d7b1f0e5437bb2a92c48e6d21a02ee8f09acc726e705c26e960a3c-primary.xml.gz"/>
+ <timestamp>1651698827</timestamp>
+ <size>7144060</size>
+ <open-size>45898728</open-size>
+ </data>
+ <data type="filelists">
+ <checksum type="sha256">fc915adcdf5710f9f80dfffcec8f03088f09cf80fbc9c801d5a8f45f1f31bb92</checksum>
+ <open-checksum type="sha256">a96a4739268e250e3c3461da716472503ed5ed8b27161fec9a143d4a8ccf5767</open-checksum>
+ <location href="repodata/fc915adcdf5710f9f80dfffcec8f03088f09cf80fbc9c801d5a8f45f1f31bb92-filelists.xml.gz"/>
+ <timestamp>1651698827</timestamp>
+ <size>1934835</size>
+ <open-size>7458268</open-size>
+ </data>
+ <data type="other">
+ <checksum type="sha256">461db9fa87e564d75d74c0dfbf006ea5d18ed646d4cb8dee1c69a4d95dd08d09</checksum>
+ <open-checksum type="sha256">1733c3011a0323fadac711dd25176c9934698176605c3e516b6aabb9b5775e00</open-checksum>
+ <location href="repodata/461db9fa87e564d75d74c0dfbf006ea5d18ed646d4cb8dee1c69a4d95dd08d09-other.xml.gz"/>
+ <timestamp>1651698827</timestamp>
+ <size>3779969</size>
+ <open-size>33166564</open-size>
+ </data>
+ <data type="primary_db">
+ <checksum type="sha256">ac60dd254bfc7557eb646a116bf8083b49fee8e942e1ef50dff7f74004897e74</checksum>
+ <open-checksum type="sha256">c752f5132f2cc5f4f137dade787154316f9503ae816212b8fabf5733cc2d344d</open-checksum>
+ <location href="repodata/ac60dd254bfc7557eb646a116bf8083b49fee8e942e1ef50dff7f74004897e74-primary.sqlite.xz"/>
+ <timestamp>1651698851</timestamp>
+ <size>9058624</size>
+ <open-size>41562112</open-size>
+ <database_version>10</database_version>
+ </data>
+ <data type="filelists_db">
+ <checksum type="sha256">1a279b88531d9c2e24c0bfc9a0d6b4357d70301c24fa42f649c726ed1af1d6a8</checksum>
+ <open-checksum type="sha256">e9b5c17e6004a78d20146aa54fa5ac93a01f4f2a95117588d649e92cfc008473</open-checksum>
+ <location href="repodata/1a279b88531d9c2e24c0bfc9a0d6b4357d70301c24fa42f649c726ed1af1d6a8-filelists.sqlite.xz"/>
+ <timestamp>1651698834</timestamp>
+ <size>1809496</size>
+ <open-size>6471680</open-size>
+ <database_version>10</database_version>
+ </data>
+ <data type="other_db">
+ <checksum type="sha256">850ad17efdebe5f9ccbef03c8aec4e7589bb6a1ca9a6249578968d60ad094a4f</checksum>
+ <open-checksum type="sha256">d13c6da8f7ad2c9060fd5b811b86facc9e926ec9273c0e135c4fe1110f784cdc</open-checksum>
+ <location href="repodata/850ad17efdebe5f9ccbef03c8aec4e7589bb6a1ca9a6249578968d60ad094a4f-other.sqlite.xz"/>
+ <timestamp>1651698838</timestamp>
+ <size>4285108</size>
+ <open-size>27897856</open-size>
+ <database_version>10</database_version>
+ </data>
+ <data type="primary_zck">
+ <checksum type="sha256">fc4205cf1cca7f0c157d1aa9a1348a1742ca7df671fbf7ccccd79221d473145b</checksum>
+ <open-checksum type="sha256">a5841e7086be579d58e2dbb7628caebba32d9defa85739455d518bfaf90e39b0</open-checksum>
+ <header-checksum type="sha256">2074f3da25ad0d45cf2776ad35dd22a6c63fafff319143c2f7dfefa98b99d651</header-checksum>
+ <location href="repodata/fc4205cf1cca7f0c157d1aa9a1348a1742ca7df671fbf7ccccd79221d473145b-primary.xml.zck"/>
+ <timestamp>1651698828</timestamp>
+ <size>6030441</size>
+ <open-size>45898728</open-size>
+ <header-size>231</header-size>
+ </data>
+ <data type="filelists_zck">
+ <checksum type="sha256">6c77673bb8823bf04fd4520c421fd0fc84567db9f23b8aa19f600b0688e46dd9</checksum>
+ <open-checksum type="sha256">a96a4739268e250e3c3461da716472503ed5ed8b27161fec9a143d4a8ccf5767</open-checksum>
+ <header-checksum type="sha256">55fc5e75acd903f01cf18328fec9c6f995bd8f80c5b085aa3e0fe116bb89e891</header-checksum>
+ <location href="repodata/6c77673bb8823bf04fd4520c421fd0fc84567db9f23b8aa19f600b0688e46dd9-filelists.xml.zck"/>
+ <timestamp>1651698829</timestamp>
+ <size>1735208</size>
+ <open-size>7458268</open-size>
+ <header-size>136</header-size>
+ </data>
+ <data type="other_zck">
+ <checksum type="sha256">c87c1b085ef287ba69b1f244d3fff56fc5efc01ffd1d7c10ee22328117651cd5</checksum>
+ <open-checksum type="sha256">1733c3011a0323fadac711dd25176c9934698176605c3e516b6aabb9b5775e00</open-checksum>
+ <header-checksum type="sha256">93624d227c24ff4eb2332fcb038e7157e08ed051b654820def75c5511a1ce191</header-checksum>
+ <location href="repodata/c87c1b085ef287ba69b1f244d3fff56fc5efc01ffd1d7c10ee22328117651cd5-other.xml.zck"/>
+ <timestamp>1651698829</timestamp>
+ <size>3019451</size>
+ <open-size>33166564</open-size>
+ <header-size>206</header-size>
+ </data>
+</repomd>
diff --git a/swh/lister/fedora/tests/test_lister.py b/swh/lister/fedora/tests/test_lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/tests/test_lister.py
@@ -0,0 +1,231 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from io import StringIO
+from pathlib import Path
+from typing import List
+from unittest.mock import MagicMock
+from urllib.error import HTTPError
+
+import pytest
+
+from swh.lister.fedora.lister import FedoraLister, Release, get_editions
+from swh.scheduler.interface import SchedulerInterface
+
+
+def mock_repomd(datadir, mocker, use_altered_fedora36=False):
+ """Mocks the .xml files fetched by repomd for the next lister run"""
+ paths = ["repomd26.xml", "primary26.xml.gz", "repomd36.xml", "primary36.xml.gz"]
+ if use_altered_fedora36:
+ paths[3] = "primary36-altered.xml.gz"
+
+ cm = MagicMock()
+ cm.read.side_effect = [
+ Path(datadir, "archives.fedoraproject.org", path).read_bytes() for path in paths
+ ]
+ cm.__enter__.return_value = cm
+ mocker.patch("repomd.urllib.request.urlopen").return_value = cm
+
+
+def rpm_url(release, path):
+ return (
+ "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/"
+ f"{release}/Everything/source/tree/Packages/{path}"
+ )
+
+
+@pytest.fixture
+def pkg_versions():
+ return {
+ "https://src.fedoraproject.org/rpms/0install": {
+ "2.11-4.fc26": {
+ "name": "0install",
+ "version": "2.11",
+ "release": 26,
+ "edition": "Everything",
+ "buildTime": "2017-02-10T04:59:31+00:00",
+ "url": rpm_url(26, "0/0install-2.11-4.fc26.src.rpm"),
+ "checksums": {
+ # note: we intentionally altered the original
+ # primary26.xml file to test sha1 usage
+ "sha1": "a6fdef5d1026dea208eeeba148f55ac2f545989b",
+ },
+ }
+ },
+ "https://src.fedoraproject.org/rpms/0xFFFF": {
+ "0.3.9-15.fc26": {
+ "name": "0xFFFF",
+ "version": "0.3.9",
+ "release": 26,
+ "edition": "Everything",
+ "buildTime": "2017-02-10T05:01:53+00:00",
+ "url": rpm_url(26, "0/0xFFFF-0.3.9-15.fc26.src.rpm"),
+ "checksums": {
+ "sha256": "96f9c163c0402d2b30e5343c8397a6d50e146c85a446804396b119ef9698231f"
+ },
+ },
+ "0.9-4.fc36": {
+ "name": "0xFFFF",
+ "version": "0.9",
+ "release": 36,
+ "edition": "Everything",
+ "buildTime": "2022-01-19T19:13:53+00:00",
+ "url": rpm_url(36, "0/0xFFFF-0.9-4.fc36.src.rpm"),
+ "checksums": {
+ "sha256": "45eee8d990d502324ae665233c320b8a5469c25d735f1862e094c1878d6ff2cd"
+ },
+ },
+ },
+ "https://src.fedoraproject.org/rpms/2ping": {
+ "4.5.1-2.fc36": {
+ "name": "2ping",
+ "version": "4.5.1",
+ "release": 36,
+ "edition": "Everything",
+ "buildTime": "2022-01-19T19:12:21+00:00",
+ "url": rpm_url(36, "2/2ping-4.5.1-2.fc36.src.rpm"),
+ "checksums": {
+ "sha256": "2ce028d944ebea1cab8c6203c9fed882792478b42fc34682b886a9db16e9de28"
+ },
+ }
+ },
+ }
+
+
+def run_lister(
+ swh_scheduler: SchedulerInterface,
+ releases: List[Release],
+ pkg_versions: dict,
+ origin_count: int,
+ updated: bool = True,
+):
+ """Runs the lister and tests that the listed origins are correct."""
+ lister = FedoraLister(scheduler=swh_scheduler, releases=releases)
+
+ stats = lister.run()
+ scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+ lister_state = lister.get_state_from_scheduler()
+ state_pkg_versions = {k.split("/")[-1]: set(v) for k, v in pkg_versions.items()}
+
+ # One edition from each release (we mocked get_editions)
+ assert stats.pages == (len(releases) if updated else 0)
+ assert stats.origins == origin_count
+
+ assert {
+ o.url: o.extra_loader_arguments["packages"] for o in scheduler_origins
+ } == pkg_versions
+
+ assert lister_state.package_versions == state_pkg_versions
+ assert lister.updated == updated
+
+
+def test_get_editions():
+ assert get_editions(18) == ["Everything", "Fedora"]
+ assert get_editions(26) == ["Everything", "Server", "Workstation"]
+ assert get_editions(34) == ["Everything", "Server", "Workstation", "Modular"]
+
+
+@pytest.mark.parametrize("status_code", [400, 404, 500])
+def test_fedora_lister_http_error(
+ swh_scheduler: SchedulerInterface, mocker: MagicMock, status_code: int
+):
+ """
+ Simulates handling of HTTP Errors while fetching of packages for fedora releases.
+ """
+ releases = [18]
+
+ is_404 = status_code == 404
+
+ def side_effect(url):
+ if is_404:
+ raise HTTPError(
+ url, status_code, "Not Found", {"content-type": "text/html"}, StringIO()
+ )
+ else:
+ raise HTTPError(
+ url,
+ status_code,
+ "Internal server error",
+ {"content-type": "text/html"},
+ StringIO(),
+ )
+
+ urlopen_patch = mocker.patch("repomd.urllib.request.urlopen")
+ urlopen_patch.side_effect = side_effect
+
+ expected_pkgs: dict = {}
+
+ if is_404:
+ run_lister(
+ swh_scheduler, releases, expected_pkgs, origin_count=0, updated=False
+ )
+ else:
+ with pytest.raises(HTTPError):
+ run_lister(
+ swh_scheduler, releases, expected_pkgs, origin_count=0, updated=False
+ )
+
+
+def test_full_lister_fedora(
+ swh_scheduler: SchedulerInterface,
+ mocker: MagicMock,
+ datadir: Path,
+ pkg_versions: dict,
+):
+ """
+ Simulates a full listing of packages for fedora releases.
+ """
+ releases = [26, 36]
+
+ get_editions_patch = mocker.patch("swh.lister.fedora.lister.get_editions")
+ get_editions_patch.return_value = ["Everything"]
+
+ mock_repomd(datadir, mocker)
+ run_lister(swh_scheduler, releases, pkg_versions, origin_count=3)
+
+
+def test_incremental_lister(
+ swh_scheduler: SchedulerInterface,
+ mocker: MagicMock,
+ datadir: Path,
+ pkg_versions: dict,
+):
+ """
+ Simulates an incremental listing of packages for fedora releases.
+ """
+ releases = [26, 36]
+
+ get_editions_patch = mocker.patch("swh.lister.fedora.lister.get_editions")
+ get_editions_patch.return_value = ["Everything"]
+
+ # First run
+ mock_repomd(datadir, mocker)
+ run_lister(swh_scheduler, releases, pkg_versions, origin_count=3)
+ # Second run (no updates)
+ mock_repomd(datadir, mocker)
+ run_lister(swh_scheduler, releases, pkg_versions, origin_count=0)
+
+ # Use an altered version of primary36.xml in which we updated the version
+ # of package 0xFFFF to 0.10:
+ mock_repomd(datadir, mocker, use_altered_fedora36=True)
+ # Add new version to the set of expected pkg versions:
+ pkg_versions["https://src.fedoraproject.org/rpms/0xFFFF"].update(
+ {
+ "0.10-4.fc36": {
+ "name": "0xFFFF",
+ "version": "0.10",
+ "release": 36,
+ "edition": "Everything",
+ "buildTime": "2022-01-19T19:13:53+00:00",
+ "url": rpm_url(36, "0/0xFFFF-0.10-4.fc36.src.rpm"),
+ "checksums": {
+ "sha256": "45eee8d990d502324ae665233c320b8a5469c25d735f1862e094c1878d6ff2cd"
+ },
+ }
+ }
+ )
+
+ # Third run (0xFFFF in fedora36 editions got updated and it needs to be listed)
+ run_lister(swh_scheduler, releases, pkg_versions, origin_count=1)
diff --git a/swh/lister/fedora/tests/test_tasks.py b/swh/lister/fedora/tests/test_tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/tests/test_tasks.py
@@ -0,0 +1,60 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from unittest.mock import patch
+
+from swh.lister.pattern import ListerStats
+
+
+def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
+ res = swh_scheduler_celery_app.send_task("swh.lister.fedora.tasks.ping")
+ assert res
+ res.wait()
+ assert res.successful()
+ assert res.result == "OK"
+
+
+@patch("swh.lister.fedora.tasks.FedoraLister")
+def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
+ lister.from_configfile.return_value = lister
+ lister.run.return_value = ListerStats(pages=10, origins=500)
+
+ kwargs = dict(
+ url="https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/"
+ )
+ res = swh_scheduler_celery_app.send_task(
+ "swh.lister.fedora.tasks.FullFedoraRelister",
+ kwargs=kwargs,
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+
+ lister.from_configfile.assert_called_once_with(**kwargs)
+ lister.run.assert_called_once_with()
+
+
+@patch("swh.lister.fedora.tasks.FedoraLister")
+def test_full_listing_params(
+ lister, swh_scheduler_celery_app, swh_scheduler_celery_worker
+):
+ lister.from_configfile.return_value = lister
+ lister.run.return_value = ListerStats(pages=10, origins=500)
+
+ kwargs = dict(
+ url="https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/",
+ instance="archives.fedoraproject.org",
+ releases=["36"],
+ )
+ res = swh_scheduler_celery_app.send_task(
+ "swh.lister.fedora.tasks.FullFedoraRelister",
+ kwargs=kwargs,
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+
+ lister.from_configfile.assert_called_once_with(**kwargs)
+ lister.run.assert_called_once_with()
diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py
--- a/swh/lister/tests/test_cli.py
+++ b/swh/lister/tests/test_cli.py
@@ -39,6 +39,9 @@
"url": "https://guix.gnu.org/sources.json",
"origin_upstream": "https://git.savannah.gnu.org/cgit/guix.git/",
},
+ "fedora": {
+ "url": "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases//",
+ },
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Dec 19, 6:34 AM (6 h, 45 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218492
Attached To
D8386: feat(fedora): Introduce fedora lister
Event Timeline
Log In to Comment