Page MenuHomeSoftware Heritage

D8386.diff
No OneTemporary

D8386.diff

diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -26,6 +26,7 @@
- `swh.lister.pypi`
- `swh.lister.tuleap`
- `swh.lister.gogs`
+- `swh.liser.fedora`
Dependencies
------------
diff --git a/mypy.ini b/mypy.ini
--- a/mypy.ini
+++ b/mypy.ini
@@ -48,3 +48,9 @@
[mypy-psycopg2.*]
ignore_missing_imports = True
+
+[mypy-repomd.*]
+ignore_missing_imports = True
+
+[mypy-defusedxml.*]
+ignore_missing_imports = True
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,3 +9,4 @@
dulwich
testing.postgresql
psycopg2
+repomd
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -86,6 +86,7 @@
lister.tuleap=swh.lister.tuleap:register
lister.maven=swh.lister.maven:register
lister.gogs=swh.lister.gogs:register
+ lister.fedora=swh.lister.fedora:register
""",
classifiers=[
"Programming Language :: Python :: 3",
diff --git a/swh/lister/fedora/__init__.py b/swh/lister/fedora/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+def register():
+ from .lister import FedoraLister
+
+ return {
+ "lister": FedoraLister,
+ "task_modules": [f"{__name__}.tasks"],
+ }
diff --git a/swh/lister/fedora/lister.py b/swh/lister/fedora/lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/lister.py
@@ -0,0 +1,252 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+import logging
+from typing import Any, Dict, Iterator, List, Set, Type
+from urllib.error import HTTPError
+from urllib.parse import urljoin
+
+import repomd
+
+from swh.scheduler.interface import SchedulerInterface
+from swh.scheduler.model import ListedOrigin
+
+from ..pattern import Lister
+
+logger = logging.getLogger(__name__)
+
+
+Release = int
+Edition = str
+PkgName = str
+PkgVersion = str
+FedoraOrigin = str
+FedoraPageType = Type[repomd.Repo]
+"""Each page is a list of packages from a given Fedora (release, edition) pair"""
+
+
+def get_editions(release: Release) -> List[Edition]:
+ """Get list of editions for a given release."""
+ # Ignore dirs that don't contain .rpm files:
+ # Docker,CloudImages,Atomic*,Spins,Live,Cloud_Atomic,Silverblue
+
+ if release < 20:
+ return ["Everything", "Fedora"]
+ elif release < 28:
+ return ["Everything", "Server", "Workstation"]
+ else:
+ return ["Everything", "Server", "Workstation", "Modular"]
+
+
+def get_last_modified(pkg: repomd.Package) -> datetime:
+ """Get timezone aware last modified time in UTC from RPM package metadata."""
+ ts = pkg._element.find("common:time", namespaces=repomd._ns).get("build")
+ return datetime.utcfromtimestamp(int(ts)).replace(tzinfo=timezone.utc)
+
+
+def get_checksums(pkg: repomd.Package) -> Dict[str, str]:
+ """Get checksums associated to rpm archive."""
+ cs = pkg._element.find("common:checksum", namespaces=repomd._ns)
+ cs_type = cs.get("type")
+ if cs_type == "sha":
+ cs_type = "sha1"
+ return {cs_type: cs.text}
+
+
+@dataclass
+class FedoraListerState:
+ """State of Fedora lister"""
+
+ package_versions: Dict[PkgName, Set[PkgVersion]] = field(default_factory=dict)
+ """Dictionary mapping a package name to all the versions found during
+ last listing"""
+
+
+class FedoraLister(Lister[FedoraListerState, FedoraPageType]):
+ """
+ List source packages for given Fedora releases.
+
+ The lister will create a snapshot for each package name from all its
+ available versions.
+
+ If a package snapshot is different from the last listing operation,
+ it will be sent to the scheduler that will create a loading task
+ to archive newly found source code.
+
+ Args:
+ scheduler: instance of SchedulerInterface
+ url: fedora package archives mirror URL
+ releases: list of fedora releases to process
+ """
+
+ LISTER_NAME = "fedora"
+
+ def __init__(
+ self,
+ scheduler: SchedulerInterface,
+ instance: str = "fedora",
+ url: str = "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/",
+ releases: List[Release] = [34, 35, 36],
+ ):
+ super().__init__(
+ scheduler=scheduler,
+ url=url,
+ instance=instance,
+ credentials={},
+ )
+
+ self.releases = releases
+
+ self.listed_origins: Dict[FedoraOrigin, ListedOrigin] = {}
+ "will hold all listed origins info"
+ self.origins_to_send: Set[FedoraOrigin] = set()
+ "will hold updated origins since last listing"
+ self.package_versions: Dict[PkgName, Set[PkgVersion]] = {}
+ "will contain the lister state after a call to run"
+ self.last_page = False
+
+ def state_from_dict(self, d: Dict[str, Any]) -> FedoraListerState:
+ return FedoraListerState(package_versions={k: set(v) for k, v in d.items()})
+
+ def state_to_dict(self, state: FedoraListerState) -> Dict[str, Any]:
+ return {k: list(v) for k, v in state.package_versions.items()}
+
+ def page_request(self, release: Release, edition: Edition) -> FedoraPageType:
+ """Return parsed packages for a given fedora release."""
+ index_url = urljoin(
+ self.url,
+ f"{release}/{edition}/source/SRPMS/"
+ if release < 24
+ else f"{release}/{edition}/source/tree/",
+ )
+
+ repo = repomd.load(index_url) # throws error if no repomd.xml is not found
+ self.last_page = (
+ release == self.releases[-1] and edition == get_editions(release)[-1]
+ )
+
+ logger.debug(
+ "Fetched metadata from url: %s, found %d packages", index_url, len(repo)
+ )
+ # TODO: Extract more fields like "provides" and "requires" from *primary.xml
+ # as extrinsic metadata using the pkg._element.findtext method
+ return repo
+
+ def get_pages(self) -> Iterator[FedoraPageType]:
+ """Return an iterator on parsed fedora packages, one page per (release, edition) pair"""
+
+ for release in self.releases:
+ for edition in get_editions(release):
+ logger.debug("Listing fedora release %s edition %s", release, edition)
+ self.current_release = release
+ self.current_edition = edition
+ try:
+ yield self.page_request(release, edition)
+ except HTTPError as http_error:
+ if http_error.getcode() == 404:
+ logger.debug(
+ "No packages metadata found for fedora release %s edition %s",
+ release,
+ edition,
+ )
+ continue
+ raise
+
+ def origin_url_for_package(self, package_name: PkgName) -> FedoraOrigin:
+ """Return the origin url for the given package"""
+ return f"https://src.fedoraproject.org/rpms/{package_name}"
+
+ def get_origins_from_page(self, page: FedoraPageType) -> Iterator[ListedOrigin]:
+ """Convert a page of fedora package sources into an iterator of ListedOrigin."""
+ assert self.lister_obj.id is not None
+
+ origins_to_send = set()
+
+ # iterate on each package's metadata
+ for pkg_metadata in page:
+ # extract package metadata
+ package_name = pkg_metadata.name
+ package_version = pkg_metadata.version
+ package_build_time = get_last_modified(pkg_metadata)
+ package_download_path = pkg_metadata.location
+
+ # build origin url
+ origin_url = self.origin_url_for_package(package_name)
+ # create package version key as expected by the fedora (rpm) loader
+ package_version_key = pkg_metadata.vr
+
+ # this is the first time a package is listed
+ if origin_url not in self.listed_origins:
+ # create a ListedOrigin object for it that can be later
+ # updated with new package versions info
+ self.listed_origins[origin_url] = ListedOrigin(
+ lister_id=self.lister_obj.id,
+ url=origin_url,
+ visit_type="rpm",
+ extra_loader_arguments={"packages": {}},
+ last_update=package_build_time,
+ )
+
+ # init set that will contain all listed package versions
+ self.package_versions[package_name] = set()
+
+ # origin will be yielded at the end of that method
+ origins_to_send.add(origin_url)
+
+ # update package metadata in parameter that will be provided
+ # to the rpm loader
+ self.listed_origins[origin_url].extra_loader_arguments["packages"][
+ package_version_key
+ ] = {
+ "name": package_name,
+ "version": package_version,
+ "url": urljoin(page.baseurl, package_download_path),
+ "release": self.current_release,
+ "edition": self.current_edition,
+ "buildTime": package_build_time.isoformat(),
+ "checksums": get_checksums(pkg_metadata),
+ }
+
+ last_update = self.listed_origins[origin_url].last_update
+ if last_update is not None and package_build_time > last_update:
+ self.listed_origins[origin_url].last_update = package_build_time
+
+ # add package version key to the set of found versions
+ self.package_versions[package_name].add(package_version_key)
+
+ # package has already been listed during a previous listing process
+ if package_name in self.state.package_versions:
+ new_versions = (
+ self.package_versions[package_name]
+ - self.state.package_versions[package_name]
+ )
+ # no new versions so far, no need to send the origin to the scheduler
+ if not new_versions:
+ origins_to_send.remove(origin_url)
+
+ logger.debug(
+ "Found %s packages to update (new ones or packages with new versions).",
+ len(origins_to_send),
+ )
+ logger.debug(
+ "Current total number of listed packages is equal to %s.",
+ len(self.listed_origins),
+ )
+
+ # yield from origins_to_send.values()
+ self.origins_to_send.update(origins_to_send)
+
+ if self.last_page:
+ # yield listed origins when all fedora releases and editions processed
+ yield from [
+ self.listed_origins[origin_url] for origin_url in self.origins_to_send
+ ]
+
+ def finalize(self):
+ # set mapping between listed package names and versions as lister state
+ self.state.package_versions = self.package_versions
+ self.updated = len(self.listed_origins) > 0
diff --git a/swh/lister/fedora/tasks.py b/swh/lister/fedora/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/tasks.py
@@ -0,0 +1,21 @@
+# Copyright (C) 2022 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from typing import Dict
+
+from celery import shared_task
+
+from .lister import FedoraLister
+
+
+@shared_task(name=__name__ + ".FullFedoraRelister")
+def list_fedora_full(**lister_args) -> Dict[str, int]:
+ """Full update of a Fedora instance"""
+ lister = FedoraLister.from_configfile(**lister_args)
+ return lister.run().dict()
+
+
+@shared_task(name=__name__ + ".ping")
+def _ping() -> str:
+ return "OK"
diff --git a/swh/lister/fedora/tests/__init__.py b/swh/lister/fedora/tests/__init__.py
new file mode 100644
diff --git a/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary26.xml.gz b/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary26.xml.gz
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary36-altered.xml.gz b/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary36-altered.xml.gz
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary36.xml.gz b/swh/lister/fedora/tests/data/archives.fedoraproject.org/primary36.xml.gz
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/lister/fedora/tests/data/archives.fedoraproject.org/repomd26.xml b/swh/lister/fedora/tests/data/archives.fedoraproject.org/repomd26.xml
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/tests/data/archives.fedoraproject.org/repomd26.xml
@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<repomd xmlns="http://linux.duke.edu/metadata/repo" xmlns:rpm="http://linux.duke.edu/metadata/rpm">
+ <revision>1499286311</revision>
+ <data type="primary">
+ <checksum type="sha256">4f677623c24912d86848f86837d398979b5adc2a51d9a2170f11fe42a257f3d3</checksum>
+ <open-checksum type="sha256">db616ad8e4219e23dfc05cd515e017cdc0d59144689ac606951fa42cbb06ae65</open-checksum>
+ <location href="repodata/4f677623c24912d86848f86837d398979b5adc2a51d9a2170f11fe42a257f3d3-primary.xml.gz"/>
+ <timestamp>1499286305</timestamp>
+ <size>5425131</size>
+ <open-size>30064034</open-size>
+ </data>
+ <data type="filelists">
+ <checksum type="sha256">17296af99a4b80bc67fccabe71ecefa02b76e8409372d936c054b8c9de312b6c</checksum>
+ <open-checksum type="sha256">7caabd1205a72d26422756211dcd536336cef643f7f73eb15a470b02ff09a194</open-checksum>
+ <location href="repodata/17296af99a4b80bc67fccabe71ecefa02b76e8409372d936c054b8c9de312b6c-filelists.xml.gz"/>
+ <timestamp>1499286305</timestamp>
+ <size>1650273</size>
+ <open-size>6419422</open-size>
+ </data>
+ <data type="other">
+ <checksum type="sha256">8f1ed139aeaa57f5bc280ce97b82f690e4008c122b4793791ca18e513268b6eb</checksum>
+ <open-checksum type="sha256">786b8d4fa759f0ade3eaab1bde390d12c950dfe217eda1773400f3a3d461522b</open-checksum>
+ <location href="repodata/8f1ed139aeaa57f5bc280ce97b82f690e4008c122b4793791ca18e513268b6eb-other.xml.gz"/>
+ <timestamp>1499286305</timestamp>
+ <size>4396102</size>
+ <open-size>33165783</open-size>
+ </data>
+ <data type="primary_db">
+ <checksum type="sha256">1d2c0be48c35e55669b410cb4dbe767ae4850b4c610e95ca9aee67f7eb31e457</checksum>
+ <open-checksum type="sha256">dc8dbac072ac1412f0ecface57fa57c5ddcac14acc880fe9b467164be733e963</open-checksum>
+ <location href="repodata/1d2c0be48c35e55669b410cb4dbe767ae4850b4c610e95ca9aee67f7eb31e457-primary.sqlite.bz2"/>
+ <timestamp>1499286309</timestamp>
+ <size>7071217</size>
+ <open-size>26177536</open-size>
+ <database_version>10</database_version>
+ </data>
+ <data type="filelists_db">
+ <checksum type="sha256">5e1259759b9bedefc1ff14b81760524841402776e6c1b33014f4f5d6feb40d11</checksum>
+ <open-checksum type="sha256">b293d51dd4e6eb4128e40b6ce228c62b169b1d47be535e56f69b8ad622c4a6ca</open-checksum>
+ <location href="repodata/5e1259759b9bedefc1ff14b81760524841402776e6c1b33014f4f5d6feb40d11-filelists.sqlite.bz2"/>
+ <timestamp>1499286307</timestamp>
+ <size>2227395</size>
+ <open-size>5529600</open-size>
+ <database_version>10</database_version>
+ </data>
+ <data type="other_db">
+ <checksum type="sha256">f6b30bdfe96d2137542704288de1345c01ea14397eb187126d4474648bad5292</checksum>
+ <open-checksum type="sha256">3f5d4619dcabe945b773c1c98ea40b8ead53340291bd504ab3faabfc7b57bb99</open-checksum>
+ <location href="repodata/f6b30bdfe96d2137542704288de1345c01ea14397eb187126d4474648bad5292-other.sqlite.bz2"/>
+ <timestamp>1499286311</timestamp>
+ <size>5264843</size>
+ <open-size>27930624</open-size>
+ <database_version>10</database_version>
+ </data>
+</repomd>
diff --git a/swh/lister/fedora/tests/data/archives.fedoraproject.org/repomd36.xml b/swh/lister/fedora/tests/data/archives.fedoraproject.org/repomd36.xml
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/tests/data/archives.fedoraproject.org/repomd36.xml
@@ -0,0 +1,85 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<repomd xmlns="http://linux.duke.edu/metadata/repo" xmlns:rpm="http://linux.duke.edu/metadata/rpm">
+ <revision>1651698851</revision>
+ <data type="primary">
+ <checksum type="sha256">42155056c6d7b1f0e5437bb2a92c48e6d21a02ee8f09acc726e705c26e960a3c</checksum>
+ <open-checksum type="sha256">a5841e7086be579d58e2dbb7628caebba32d9defa85739455d518bfaf90e39b0</open-checksum>
+ <location href="repodata/42155056c6d7b1f0e5437bb2a92c48e6d21a02ee8f09acc726e705c26e960a3c-primary.xml.gz"/>
+ <timestamp>1651698827</timestamp>
+ <size>7144060</size>
+ <open-size>45898728</open-size>
+ </data>
+ <data type="filelists">
+ <checksum type="sha256">fc915adcdf5710f9f80dfffcec8f03088f09cf80fbc9c801d5a8f45f1f31bb92</checksum>
+ <open-checksum type="sha256">a96a4739268e250e3c3461da716472503ed5ed8b27161fec9a143d4a8ccf5767</open-checksum>
+ <location href="repodata/fc915adcdf5710f9f80dfffcec8f03088f09cf80fbc9c801d5a8f45f1f31bb92-filelists.xml.gz"/>
+ <timestamp>1651698827</timestamp>
+ <size>1934835</size>
+ <open-size>7458268</open-size>
+ </data>
+ <data type="other">
+ <checksum type="sha256">461db9fa87e564d75d74c0dfbf006ea5d18ed646d4cb8dee1c69a4d95dd08d09</checksum>
+ <open-checksum type="sha256">1733c3011a0323fadac711dd25176c9934698176605c3e516b6aabb9b5775e00</open-checksum>
+ <location href="repodata/461db9fa87e564d75d74c0dfbf006ea5d18ed646d4cb8dee1c69a4d95dd08d09-other.xml.gz"/>
+ <timestamp>1651698827</timestamp>
+ <size>3779969</size>
+ <open-size>33166564</open-size>
+ </data>
+ <data type="primary_db">
+ <checksum type="sha256">ac60dd254bfc7557eb646a116bf8083b49fee8e942e1ef50dff7f74004897e74</checksum>
+ <open-checksum type="sha256">c752f5132f2cc5f4f137dade787154316f9503ae816212b8fabf5733cc2d344d</open-checksum>
+ <location href="repodata/ac60dd254bfc7557eb646a116bf8083b49fee8e942e1ef50dff7f74004897e74-primary.sqlite.xz"/>
+ <timestamp>1651698851</timestamp>
+ <size>9058624</size>
+ <open-size>41562112</open-size>
+ <database_version>10</database_version>
+ </data>
+ <data type="filelists_db">
+ <checksum type="sha256">1a279b88531d9c2e24c0bfc9a0d6b4357d70301c24fa42f649c726ed1af1d6a8</checksum>
+ <open-checksum type="sha256">e9b5c17e6004a78d20146aa54fa5ac93a01f4f2a95117588d649e92cfc008473</open-checksum>
+ <location href="repodata/1a279b88531d9c2e24c0bfc9a0d6b4357d70301c24fa42f649c726ed1af1d6a8-filelists.sqlite.xz"/>
+ <timestamp>1651698834</timestamp>
+ <size>1809496</size>
+ <open-size>6471680</open-size>
+ <database_version>10</database_version>
+ </data>
+ <data type="other_db">
+ <checksum type="sha256">850ad17efdebe5f9ccbef03c8aec4e7589bb6a1ca9a6249578968d60ad094a4f</checksum>
+ <open-checksum type="sha256">d13c6da8f7ad2c9060fd5b811b86facc9e926ec9273c0e135c4fe1110f784cdc</open-checksum>
+ <location href="repodata/850ad17efdebe5f9ccbef03c8aec4e7589bb6a1ca9a6249578968d60ad094a4f-other.sqlite.xz"/>
+ <timestamp>1651698838</timestamp>
+ <size>4285108</size>
+ <open-size>27897856</open-size>
+ <database_version>10</database_version>
+ </data>
+ <data type="primary_zck">
+ <checksum type="sha256">fc4205cf1cca7f0c157d1aa9a1348a1742ca7df671fbf7ccccd79221d473145b</checksum>
+ <open-checksum type="sha256">a5841e7086be579d58e2dbb7628caebba32d9defa85739455d518bfaf90e39b0</open-checksum>
+ <header-checksum type="sha256">2074f3da25ad0d45cf2776ad35dd22a6c63fafff319143c2f7dfefa98b99d651</header-checksum>
+ <location href="repodata/fc4205cf1cca7f0c157d1aa9a1348a1742ca7df671fbf7ccccd79221d473145b-primary.xml.zck"/>
+ <timestamp>1651698828</timestamp>
+ <size>6030441</size>
+ <open-size>45898728</open-size>
+ <header-size>231</header-size>
+ </data>
+ <data type="filelists_zck">
+ <checksum type="sha256">6c77673bb8823bf04fd4520c421fd0fc84567db9f23b8aa19f600b0688e46dd9</checksum>
+ <open-checksum type="sha256">a96a4739268e250e3c3461da716472503ed5ed8b27161fec9a143d4a8ccf5767</open-checksum>
+ <header-checksum type="sha256">55fc5e75acd903f01cf18328fec9c6f995bd8f80c5b085aa3e0fe116bb89e891</header-checksum>
+ <location href="repodata/6c77673bb8823bf04fd4520c421fd0fc84567db9f23b8aa19f600b0688e46dd9-filelists.xml.zck"/>
+ <timestamp>1651698829</timestamp>
+ <size>1735208</size>
+ <open-size>7458268</open-size>
+ <header-size>136</header-size>
+ </data>
+ <data type="other_zck">
+ <checksum type="sha256">c87c1b085ef287ba69b1f244d3fff56fc5efc01ffd1d7c10ee22328117651cd5</checksum>
+ <open-checksum type="sha256">1733c3011a0323fadac711dd25176c9934698176605c3e516b6aabb9b5775e00</open-checksum>
+ <header-checksum type="sha256">93624d227c24ff4eb2332fcb038e7157e08ed051b654820def75c5511a1ce191</header-checksum>
+ <location href="repodata/c87c1b085ef287ba69b1f244d3fff56fc5efc01ffd1d7c10ee22328117651cd5-other.xml.zck"/>
+ <timestamp>1651698829</timestamp>
+ <size>3019451</size>
+ <open-size>33166564</open-size>
+ <header-size>206</header-size>
+ </data>
+</repomd>
diff --git a/swh/lister/fedora/tests/test_lister.py b/swh/lister/fedora/tests/test_lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/tests/test_lister.py
@@ -0,0 +1,231 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from io import StringIO
+from pathlib import Path
+from typing import List
+from unittest.mock import MagicMock
+from urllib.error import HTTPError
+
+import pytest
+
+from swh.lister.fedora.lister import FedoraLister, Release, get_editions
+from swh.scheduler.interface import SchedulerInterface
+
+
+def mock_repomd(datadir, mocker, use_altered_fedora36=False):
+ """Mocks the .xml files fetched by repomd for the next lister run"""
+ paths = ["repomd26.xml", "primary26.xml.gz", "repomd36.xml", "primary36.xml.gz"]
+ if use_altered_fedora36:
+ paths[3] = "primary36-altered.xml.gz"
+
+ cm = MagicMock()
+ cm.read.side_effect = [
+ Path(datadir, "archives.fedoraproject.org", path).read_bytes() for path in paths
+ ]
+ cm.__enter__.return_value = cm
+ mocker.patch("repomd.urllib.request.urlopen").return_value = cm
+
+
+def rpm_url(release, path):
+ return (
+ "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/"
+ f"{release}/Everything/source/tree/Packages/{path}"
+ )
+
+
+@pytest.fixture
+def pkg_versions():
+ return {
+ "https://src.fedoraproject.org/rpms/0install": {
+ "2.11-4.fc26": {
+ "name": "0install",
+ "version": "2.11",
+ "release": 26,
+ "edition": "Everything",
+ "buildTime": "2017-02-10T04:59:31+00:00",
+ "url": rpm_url(26, "0/0install-2.11-4.fc26.src.rpm"),
+ "checksums": {
+ # note: we intentionally altered the original
+ # primary26.xml file to test sha1 usage
+ "sha1": "a6fdef5d1026dea208eeeba148f55ac2f545989b",
+ },
+ }
+ },
+ "https://src.fedoraproject.org/rpms/0xFFFF": {
+ "0.3.9-15.fc26": {
+ "name": "0xFFFF",
+ "version": "0.3.9",
+ "release": 26,
+ "edition": "Everything",
+ "buildTime": "2017-02-10T05:01:53+00:00",
+ "url": rpm_url(26, "0/0xFFFF-0.3.9-15.fc26.src.rpm"),
+ "checksums": {
+ "sha256": "96f9c163c0402d2b30e5343c8397a6d50e146c85a446804396b119ef9698231f"
+ },
+ },
+ "0.9-4.fc36": {
+ "name": "0xFFFF",
+ "version": "0.9",
+ "release": 36,
+ "edition": "Everything",
+ "buildTime": "2022-01-19T19:13:53+00:00",
+ "url": rpm_url(36, "0/0xFFFF-0.9-4.fc36.src.rpm"),
+ "checksums": {
+ "sha256": "45eee8d990d502324ae665233c320b8a5469c25d735f1862e094c1878d6ff2cd"
+ },
+ },
+ },
+ "https://src.fedoraproject.org/rpms/2ping": {
+ "4.5.1-2.fc36": {
+ "name": "2ping",
+ "version": "4.5.1",
+ "release": 36,
+ "edition": "Everything",
+ "buildTime": "2022-01-19T19:12:21+00:00",
+ "url": rpm_url(36, "2/2ping-4.5.1-2.fc36.src.rpm"),
+ "checksums": {
+ "sha256": "2ce028d944ebea1cab8c6203c9fed882792478b42fc34682b886a9db16e9de28"
+ },
+ }
+ },
+ }
+
+
+def run_lister(
+ swh_scheduler: SchedulerInterface,
+ releases: List[Release],
+ pkg_versions: dict,
+ origin_count: int,
+ updated: bool = True,
+):
+ """Runs the lister and tests that the listed origins are correct."""
+ lister = FedoraLister(scheduler=swh_scheduler, releases=releases)
+
+ stats = lister.run()
+ scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+ lister_state = lister.get_state_from_scheduler()
+ state_pkg_versions = {k.split("/")[-1]: set(v) for k, v in pkg_versions.items()}
+
+ # One edition from each release (we mocked get_editions)
+ assert stats.pages == (len(releases) if updated else 0)
+ assert stats.origins == origin_count
+
+ assert {
+ o.url: o.extra_loader_arguments["packages"] for o in scheduler_origins
+ } == pkg_versions
+
+ assert lister_state.package_versions == state_pkg_versions
+ assert lister.updated == updated
+
+
+def test_get_editions():
+ assert get_editions(18) == ["Everything", "Fedora"]
+ assert get_editions(26) == ["Everything", "Server", "Workstation"]
+ assert get_editions(34) == ["Everything", "Server", "Workstation", "Modular"]
+
+
+@pytest.mark.parametrize("status_code", [400, 404, 500])
+def test_fedora_lister_http_error(
+ swh_scheduler: SchedulerInterface, mocker: MagicMock, status_code: int
+):
+ """
+ Simulates handling of HTTP Errors while fetching of packages for fedora releases.
+ """
+ releases = [18]
+
+ is_404 = status_code == 404
+
+ def side_effect(url):
+ if is_404:
+ raise HTTPError(
+ url, status_code, "Not Found", {"content-type": "text/html"}, StringIO()
+ )
+ else:
+ raise HTTPError(
+ url,
+ status_code,
+ "Internal server error",
+ {"content-type": "text/html"},
+ StringIO(),
+ )
+
+ urlopen_patch = mocker.patch("repomd.urllib.request.urlopen")
+ urlopen_patch.side_effect = side_effect
+
+ expected_pkgs: dict = {}
+
+ if is_404:
+ run_lister(
+ swh_scheduler, releases, expected_pkgs, origin_count=0, updated=False
+ )
+ else:
+ with pytest.raises(HTTPError):
+ run_lister(
+ swh_scheduler, releases, expected_pkgs, origin_count=0, updated=False
+ )
+
+
+def test_full_lister_fedora(
+ swh_scheduler: SchedulerInterface,
+ mocker: MagicMock,
+ datadir: Path,
+ pkg_versions: dict,
+):
+ """
+ Simulates a full listing of packages for fedora releases.
+ """
+ releases = [26, 36]
+
+ get_editions_patch = mocker.patch("swh.lister.fedora.lister.get_editions")
+ get_editions_patch.return_value = ["Everything"]
+
+ mock_repomd(datadir, mocker)
+ run_lister(swh_scheduler, releases, pkg_versions, origin_count=3)
+
+
+def test_incremental_lister(
+ swh_scheduler: SchedulerInterface,
+ mocker: MagicMock,
+ datadir: Path,
+ pkg_versions: dict,
+):
+ """
+ Simulates an incremental listing of packages for fedora releases.
+ """
+ releases = [26, 36]
+
+ get_editions_patch = mocker.patch("swh.lister.fedora.lister.get_editions")
+ get_editions_patch.return_value = ["Everything"]
+
+ # First run
+ mock_repomd(datadir, mocker)
+ run_lister(swh_scheduler, releases, pkg_versions, origin_count=3)
+ # Second run (no updates)
+ mock_repomd(datadir, mocker)
+ run_lister(swh_scheduler, releases, pkg_versions, origin_count=0)
+
+ # Use an altered version of primary36.xml in which we updated the version
+ # of package 0xFFFF to 0.10:
+ mock_repomd(datadir, mocker, use_altered_fedora36=True)
+ # Add new version to the set of expected pkg versions:
+ pkg_versions["https://src.fedoraproject.org/rpms/0xFFFF"].update(
+ {
+ "0.10-4.fc36": {
+ "name": "0xFFFF",
+ "version": "0.10",
+ "release": 36,
+ "edition": "Everything",
+ "buildTime": "2022-01-19T19:13:53+00:00",
+ "url": rpm_url(36, "0/0xFFFF-0.10-4.fc36.src.rpm"),
+ "checksums": {
+ "sha256": "45eee8d990d502324ae665233c320b8a5469c25d735f1862e094c1878d6ff2cd"
+ },
+ }
+ }
+ )
+
+ # Third run (0xFFFF in fedora36 editions got updated and it needs to be listed)
+ run_lister(swh_scheduler, releases, pkg_versions, origin_count=1)
diff --git a/swh/lister/fedora/tests/test_tasks.py b/swh/lister/fedora/tests/test_tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/fedora/tests/test_tasks.py
@@ -0,0 +1,60 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from unittest.mock import patch
+
+from swh.lister.pattern import ListerStats
+
+
+def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
+ res = swh_scheduler_celery_app.send_task("swh.lister.fedora.tasks.ping")
+ assert res
+ res.wait()
+ assert res.successful()
+ assert res.result == "OK"
+
+
+@patch("swh.lister.fedora.tasks.FedoraLister")
+def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
+ lister.from_configfile.return_value = lister
+ lister.run.return_value = ListerStats(pages=10, origins=500)
+
+ kwargs = dict(
+ url="https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/"
+ )
+ res = swh_scheduler_celery_app.send_task(
+ "swh.lister.fedora.tasks.FullFedoraRelister",
+ kwargs=kwargs,
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+
+ lister.from_configfile.assert_called_once_with(**kwargs)
+ lister.run.assert_called_once_with()
+
+
+@patch("swh.lister.fedora.tasks.FedoraLister")
+def test_full_listing_params(
+ lister, swh_scheduler_celery_app, swh_scheduler_celery_worker
+):
+ lister.from_configfile.return_value = lister
+ lister.run.return_value = ListerStats(pages=10, origins=500)
+
+ kwargs = dict(
+ url="https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/",
+ instance="archives.fedoraproject.org",
+ releases=["36"],
+ )
+ res = swh_scheduler_celery_app.send_task(
+ "swh.lister.fedora.tasks.FullFedoraRelister",
+ kwargs=kwargs,
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+
+ lister.from_configfile.assert_called_once_with(**kwargs)
+ lister.run.assert_called_once_with()
diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py
--- a/swh/lister/tests/test_cli.py
+++ b/swh/lister/tests/test_cli.py
@@ -39,6 +39,9 @@
"url": "https://guix.gnu.org/sources.json",
"origin_upstream": "https://git.savannah.gnu.org/cgit/guix.git/",
},
+ "fedora": {
+ "url": "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases//",
+ },
}

File Metadata

Mime Type
text/plain
Expires
Thu, Dec 19, 6:34 AM (9 h, 30 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218492

Event Timeline