Page MenuHomeSoftware Heritage

D8907.id.diff
No OneTemporary

D8907.id.diff

diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -27,6 +27,7 @@
- `swh.lister.tuleap`
- `swh.lister.gogs`
- `swh.liser.fedora`
+- `swh.lister.hex`
Dependencies
------------
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -87,6 +87,7 @@
lister.maven=swh.lister.maven:register
lister.gogs=swh.lister.gogs:register
lister.fedora=swh.lister.fedora:register
+ lister.hex=swh.lister.hex:register
""",
classifiers=[
"Programming Language :: Python :: 3",
diff --git a/swh/lister/hex/__init__.py b/swh/lister/hex/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/hex/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+def register():
+ from .lister import HexLister
+
+ return {
+ "lister": HexLister,
+ "task_modules": [f"{__name__}.tasks"],
+ }
diff --git a/swh/lister/hex/lister.py b/swh/lister/hex/lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/hex/lister.py
@@ -0,0 +1,130 @@
+# Copyright (C) 2021-2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from dataclasses import asdict, dataclass
+import logging
+from typing import Any, Dict, Iterator, List
+from urllib.parse import urljoin
+
+import iso8601
+
+from swh.scheduler.interface import SchedulerInterface
+from swh.scheduler.model import ListedOrigin
+
+from ..pattern import CredentialsType, Lister
+
+logger = logging.getLogger(__name__)
+
+HexListerPage = List[Dict[str, Any]]
+
+
+def get_tar_url(pkg_name: str, release_version: str):
+ return f"https://repo.hex.pm/tarballs/{pkg_name}-{release_version}.tar"
+
+
+@dataclass
+class HexListerState:
+ """The HexLister instance state. This is used for incremental listing."""
+
+ last_page_id: int = 1
+ """Id of the last page listed on an incremental pass"""
+ last_pkg_name: str = ""
+ """Name of the last package inserted at on an incremental pass"""
+
+
+class HexLister(Lister[HexListerState, HexListerPage]):
+ """List origins from the "Hex" forge."""
+
+ LISTER_NAME = "hex"
+ VISIT_TYPE = "hex"
+
+ HEX_API_URL = "https://hex.pm/api/"
+ PACKAGES_PATH = "packages/"
+
+ def __init__(
+ self,
+ scheduler: SchedulerInterface,
+ instance: str = "hex",
+ credentials: CredentialsType = None,
+ ):
+ super().__init__(
+ scheduler=scheduler,
+ credentials=credentials,
+ url=self.HEX_API_URL,
+ instance=instance,
+ )
+ # TODO: Add authentication support
+
+ self.session.headers.update({"Accept": "application/json"})
+
+ def state_from_dict(self, d: Dict[str, Any]) -> HexListerState:
+ return HexListerState(**d)
+
+ def state_to_dict(self, state: HexListerState) -> Dict[str, Any]:
+ return asdict(state)
+
+ def get_pages(self) -> Iterator[HexListerPage]:
+ page_id = 1
+ if self.state.last_page_id is not None:
+ page_id = self.state.last_page_id
+
+ url = urljoin(self.url, self.PACKAGES_PATH)
+
+ while page_id is not None:
+ body = self.http_request(
+ url,
+ params={
+ "page": page_id,
+ "sort": "name",
+ }, # sort=name is actually the default
+ ).json()
+
+ yield body
+
+ page_id += 1 # Consider stopping before yielding?
+
+ if len(body) == 0:
+ break # Consider stopping if number of items < 100?
+
+ def get_origins_from_page(self, page: HexListerPage) -> Iterator[ListedOrigin]:
+ """Convert a page of HexLister repositories into a list of ListedOrigins"""
+ assert self.lister_obj.id is not None
+
+ for pkg in page:
+
+ yield ListedOrigin(
+ lister_id=self.lister_obj.id,
+ visit_type=self.VISIT_TYPE,
+ url=pkg["html_url"],
+ last_update=iso8601.parse_date(pkg["updated_at"]),
+ extra_loader_arguments={
+ "releases": {
+ release["url"]: {
+ "package": pkg["name"],
+ "version": release["version"],
+ "tar_url": get_tar_url(pkg["name"], release["version"]),
+ }
+ for release in pkg["releases"]
+ }
+ },
+ )
+
+ def commit_page(self, page: HexListerPage) -> None:
+ if len(page) == 0:
+ return
+
+ last_pkg_name = page[-1]["name"]
+
+ # incoming page should have alphabetically greater
+ # last package name than the one stored in the state
+ if last_pkg_name > self.state.last_pkg_name:
+ self.state.last_pkg_name = last_pkg_name
+ self.state.last_page_id += 1
+
+ def finalize(self) -> None:
+ scheduler_state = self.get_state_from_scheduler()
+
+ if self.state.last_page_id > scheduler_state.last_page_id:
+ self.updated = True
diff --git a/swh/lister/hex/tasks.py b/swh/lister/hex/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/hex/tasks.py
@@ -0,0 +1,23 @@
+# Copyright (C) 2022 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from typing import Dict, Optional
+
+from celery import shared_task
+
+from .lister import HexLister
+
+
+@shared_task(name=__name__ + ".FullHexRelister")
+def list_hex_full(
+ instance: Optional[str] = None,
+) -> Dict[str, int]:
+ """Full update of a Hex.pm instance"""
+ lister = HexLister.from_configfile(instance=instance)
+ return lister.run().dict()
+
+
+@shared_task(name=__name__ + ".ping")
+def _ping() -> str:
+ return "OK"
diff --git a/swh/lister/hex/tests/__init__.py b/swh/lister/hex/tests/__init__.py
new file mode 100644
diff --git a/swh/lister/hex/tests/data/https_hex.pm/page1.json b/swh/lister/hex/tests/data/https_hex.pm/page1.json
new file mode 100644
--- /dev/null
+++ b/swh/lister/hex/tests/data/https_hex.pm/page1.json
@@ -0,0 +1,190 @@
+[
+ {
+ "configs": {
+ "erlang.mk": "dep_aadya = hex 0.1.0",
+ "mix.exs": "{:aadya, \"~> 0.1.0\"}",
+ "rebar.config": "{aadya, \"0.1.0\"}"
+ },
+ "docs_html_url": "https://hexdocs.pm/aadya/",
+ "downloads": {
+ "all": 4199,
+ "recent": 2
+ },
+ "html_url": "https://hex.pm/packages/aadya",
+ "inserted_at": "2018-03-12T02:13:42.826404Z",
+ "latest_stable_version": "0.1.0",
+ "latest_version": "0.1.0",
+ "meta": {
+ "description": "CoAP framework",
+ "licenses": [
+ "GNU Lesser General Public License v3.0"
+ ],
+ "links": {
+ "GitHub": "https://gitlab.com/ahamtech/coap/aadya.git"
+ },
+ "maintainers": [
+ "Anwesh Reddy",
+ "Mahesh Reddy",
+ "Malreddy Ankanna"
+ ]
+ },
+ "name": "aadya",
+ "releases": [
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/aadya/releases/0.1.0",
+ "version": "0.1.0"
+ }
+ ],
+ "repository": "hexpm",
+ "retirements": {},
+ "updated_at": "2018-03-12T02:19:58.150334Z",
+ "url": "https://hex.pm/api/packages/aadya"
+ },
+ {
+ "configs": {
+ "erlang.mk": "dep_active_job = hex 0.1.1",
+ "mix.exs": "{:active_job, \"~> 0.1.1\"}",
+ "rebar.config": "{active_job, \"0.1.1\"}"
+ },
+ "docs_html_url": null,
+ "downloads": {
+ "all": 575,
+ "recent": 8
+ },
+ "html_url": "https://hex.pm/packages/active_job",
+ "inserted_at": "2022-05-04T05:07:26.204862Z",
+ "latest_stable_version": "0.1.1",
+ "latest_version": "0.1.1",
+ "meta": {
+ "description": "Declare job workers that can be run by a variety of queuing backends. This plugin is a port of the Rails ActiveJob gem",
+ "licenses": [
+ "MIT"
+ ],
+ "links": {
+ "GitHub": "https://github.com/chaskiq/ex-rails/active_job"
+ },
+ "maintainers": []
+ },
+ "name": "active_job",
+ "releases": [
+ {
+ "has_docs": false,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/active_job/releases/0.1.1",
+ "version": "0.1.1"
+ },
+ {
+ "has_docs": false,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/active_job/releases/0.1.0",
+ "version": "0.1.0"
+ }
+ ],
+ "repository": "hexpm",
+ "retirements": {},
+ "updated_at": "2022-06-17T07:01:32.486546Z",
+ "url": "https://hex.pm/api/packages/active_job"
+ },
+ {
+ "configs": {
+ "erlang.mk": "dep_active_jorb = hex 0.1.2",
+ "mix.exs": "{:active_jorb, \"~> 0.1.2\"}",
+ "rebar.config": "{active_jorb, \"0.1.2\"}"
+ },
+ "docs_html_url": "https://hexdocs.pm/active_jorb/",
+ "downloads": {
+ "all": 7148,
+ "recent": 10
+ },
+ "html_url": "https://hex.pm/packages/active_jorb",
+ "inserted_at": "2018-04-10T17:35:34.698754Z",
+ "latest_stable_version": "0.1.2",
+ "latest_version": "0.1.2",
+ "meta": {
+ "description": "A library to enqueue jobs with your Active Job job processor. You may want\nthis when strangling your Rails project.",
+ "licenses": [
+ "MIT"
+ ],
+ "links": {
+ "Github": "https://github.com/PrecisionNutrition/active_jorb"
+ },
+ "maintainers": [
+ "James Herdman"
+ ]
+ },
+ "name": "active_jorb",
+ "releases": [
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/active_jorb/releases/0.1.2",
+ "version": "0.1.2"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/active_jorb/releases/0.1.1",
+ "version": "0.1.1"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/active_jorb/releases/0.1.0",
+ "version": "0.1.0"
+ }
+ ],
+ "repository": "hexpm",
+ "retirements": {},
+ "updated_at": "2018-04-24T17:42:25.744971Z",
+ "url": "https://hex.pm/api/packages/active_jorb"
+ },
+ {
+ "configs": {
+ "erlang.mk": "dep_acx = hex 0.0.2",
+ "mix.exs": "{:acx, \"~> 0.0.2\"}",
+ "rebar.config": "{acx, \"0.0.2\"}"
+ },
+ "docs_html_url": "https://hexdocs.pm/acx/",
+ "downloads": {
+ "all": 4790,
+ "recent": 8
+ },
+ "html_url": "https://hex.pm/packages/acx",
+ "inserted_at": "2018-01-22T06:52:21.027352Z",
+ "latest_stable_version": "0.0.2",
+ "latest_version": "0.0.2",
+ "meta": {
+ "description": "A Elixir wrap for API of Acx.io exchange.",
+ "licenses": [
+ "MIT"
+ ],
+ "links": {
+ "Github": "https://github.com/2pd/acx-elixir"
+ },
+ "maintainers": [
+ "Liang Shi"
+ ]
+ },
+ "name": "acx",
+ "releases": [
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/acx/releases/0.0.2",
+ "version": "0.0.2"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/acx/releases/0.0.1",
+ "version": "0.0.1"
+ }
+ ],
+ "repository": "hexpm",
+ "retirements": {},
+ "updated_at": "2018-01-30T04:56:03.053561Z",
+ "url": "https://hex.pm/api/packages/acx"
+ }
+]
\ No newline at end of file
diff --git a/swh/lister/hex/tests/data/https_hex.pm/page2.json b/swh/lister/hex/tests/data/https_hex.pm/page2.json
new file mode 100644
--- /dev/null
+++ b/swh/lister/hex/tests/data/https_hex.pm/page2.json
@@ -0,0 +1,223 @@
+[
+ {
+ "configs": {
+ "erlang.mk": "dep_adam7 = hex 0.4.0",
+ "mix.exs": "{:adam7, \"~> 0.4.0\"}",
+ "rebar.config": "{adam7, \"0.4.0\"}"
+ },
+ "docs_html_url": null,
+ "downloads": {
+ "all": 12746,
+ "recent": 27,
+ "week": 10
+ },
+ "html_url": "https://hex.pm/packages/adam7",
+ "inserted_at": "2015-10-10T05:09:04.399996Z",
+ "latest_stable_version": "0.4.0",
+ "latest_version": "0.4.0",
+ "meta": {
+ "description": "Adam7 interlacing library for Elixir.\nPrimarily used for interlacing and de-interlacing image data for PNGs.",
+ "licenses": [
+ "MIT"
+ ],
+ "links": {
+ "github": "https://github.com/SenecaSystems/imagineer"
+ },
+ "maintainers": [
+ "Chris Maddox"
+ ]
+ },
+ "name": "adam7",
+ "releases": [
+ {
+ "has_docs": false,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/adam7/releases/0.4.0",
+ "version": "0.4.0"
+ },
+ {
+ "has_docs": false,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/adam7/releases/0.3.0",
+ "version": "0.3.0"
+ },
+ {
+ "has_docs": false,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/adam7/releases/0.2.0",
+ "version": "0.2.0"
+ },
+ {
+ "has_docs": false,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/adam7/releases/0.1.1",
+ "version": "0.1.1"
+ },
+ {
+ "has_docs": false,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/adam7/releases/0.1.0",
+ "version": "0.1.0"
+ }
+ ],
+ "repository": "hexpm",
+ "retirements": {},
+ "updated_at": "2015-10-10T05:09:04.400005Z",
+ "url": "https://hex.pm/api/packages/adam7"
+ },
+ {
+ "configs": {
+ "erlang.mk": "dep_addressBook = hex 0.1.1",
+ "mix.exs": "{:addressBook, \"~> 0.1.1\"}",
+ "rebar.config": "{addressBook, \"0.1.1\"}"
+ },
+ "docs_html_url": "https://hexdocs.pm/addressBook/",
+ "downloads": {
+ "all": 4871,
+ "recent": 8,
+ "week": 4
+ },
+ "html_url": "https://hex.pm/packages/addressBook",
+ "inserted_at": "2017-06-05T19:59:12.978909Z",
+ "latest_stable_version": "0.1.1",
+ "latest_version": "0.1.1",
+ "meta": {
+ "description": "Simple package for managing address book.",
+ "licenses": [
+ "Apache 2.0"
+ ],
+ "links": {
+ "GitHub": "https://github.com/maxiwoj/AddressBook"
+ },
+ "maintainers": [
+ "Maksymilian Wojczuk"
+ ]
+ },
+ "name": "addressBook",
+ "releases": [
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/addressBook/releases/0.1.1",
+ "version": "0.1.1"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/addressBook/releases/0.1.0",
+ "version": "0.1.0"
+ }
+ ],
+ "repository": "hexpm",
+ "retirements": {},
+ "updated_at": "2017-06-05T21:06:42.788652Z",
+ "url": "https://hex.pm/api/packages/addressBook"
+ },
+ {
+ "configs": {
+ "erlang.mk": "dep_address_us = hex 0.4.1",
+ "mix.exs": "{:address_us, \"~> 0.4.1\"}",
+ "rebar.config": "{address_us, \"0.4.1\"}"
+ },
+ "docs_html_url": "https://hexdocs.pm/address_us/",
+ "downloads": {
+ "all": 55337,
+ "day": 2,
+ "recent": 7105,
+ "week": 194
+ },
+ "html_url": "https://hex.pm/packages/address_us",
+ "inserted_at": "2014-10-10T20:24:11.000000Z",
+ "latest_stable_version": "0.4.1",
+ "latest_version": "0.4.1",
+ "meta": {
+ "description": "Library for parsing US Addresses into their individual parts.",
+ "licenses": [
+ "Apache 2.0"
+ ],
+ "links": {
+ "Docs": "https://smashedtoatoms.github.io/address_us",
+ "GitHub": "https://github.com/smashedtoatoms/address_us"
+ },
+ "maintainers": []
+ },
+ "name": "address_us",
+ "releases": [
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/address_us/releases/0.4.1",
+ "version": "0.4.1"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/address_us/releases/0.4.0",
+ "version": "0.4.0"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/address_us/releases/0.2.1",
+ "version": "0.2.1"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/address_us/releases/0.1.1",
+ "version": "0.1.1"
+ },
+ {
+ "has_docs": false,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/address_us/releases/0.1.0",
+ "version": "0.1.0"
+ }
+ ],
+ "repository": "hexpm",
+ "retirements": {},
+ "updated_at": "2020-12-11T05:07:11.118292Z",
+ "url": "https://hex.pm/api/packages/address_us"
+ },
+ {
+ "configs": {
+ "erlang.mk": "dep_alchemy_vm = hex 0.8.1",
+ "mix.exs": "{:alchemy_vm, \"~> 0.8.1\"}",
+ "rebar.config": "{alchemy_vm, \"0.8.1\"}"
+ },
+ "docs_html_url": "https://hexdocs.pm/alchemy_vm/",
+ "downloads": {
+ "all": 2368,
+ "recent": 3,
+ "week": 2
+ },
+ "html_url": "https://hex.pm/packages/alchemy_vm",
+ "inserted_at": "2019-03-27T00:32:40.709924Z",
+ "latest_stable_version": "0.8.1",
+ "latest_version": "0.8.1",
+ "meta": {
+ "description": "A WebAssembly Virtual Machine",
+ "licenses": [
+ "MIT"
+ ],
+ "links": {
+ "Elixium Network Website": "https://www.elixiumnetwork.org",
+ "GitHub": "https://github.com/ElixiumNetwork/AlchemyVM"
+ },
+ "maintainers": []
+ },
+ "name": "alchemy_vm",
+ "releases": [
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/alchemy_vm/releases/0.8.1",
+ "version": "0.8.1"
+ }
+ ],
+ "repository": "hexpm",
+ "retirements": {},
+ "updated_at": "2019-03-27T00:32:47.822901Z",
+ "url": "https://hex.pm/api/packages/alchemy_vm"
+ }
+]
\ No newline at end of file
diff --git a/swh/lister/hex/tests/data/https_hex.pm/page3.json b/swh/lister/hex/tests/data/https_hex.pm/page3.json
new file mode 100644
--- /dev/null
+++ b/swh/lister/hex/tests/data/https_hex.pm/page3.json
@@ -0,0 +1,108 @@
+[
+ {
+ "configs": {
+ "erlang.mk": "dep_quagga_def = hex 0.4.0",
+ "mix.exs": "{:quagga_def, \"~> 0.4.0\"}",
+ "rebar.config": "{quagga_def, \"0.4.0\"}"
+ },
+ "docs_html_url": "https://hexdocs.pm/quagga_def/",
+ "downloads": {
+ "all": 106,
+ "day": 12,
+ "recent": 106,
+ "week": 22
+ },
+ "html_url": "https://hex.pm/packages/quagga_def",
+ "inserted_at": "2022-10-12T07:03:48.666872Z",
+ "latest_stable_version": "0.4.0",
+ "latest_version": "0.4.0",
+ "meta": {
+ "description": "Quagga bamboo clump convention definitions and functions",
+ "licenses": [
+ "MIT"
+ ],
+ "links": {
+ "GitHub": "https://github.com/mwmiller/quagga_def"
+ },
+ "maintainers": []
+ },
+ "name": "quagga_def",
+ "releases": [
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/quagga_def/releases/0.4.0",
+ "version": "0.4.0"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/quagga_def/releases/0.3.0",
+ "version": "0.3.0"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/quagga_def/releases/0.2.0",
+ "version": "0.2.0"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/quagga_def/releases/0.1.0",
+ "version": "0.1.0"
+ }
+ ],
+ "repository": "hexpm",
+ "retirements": {},
+ "updated_at": "2022-11-29T11:41:15.862303Z",
+ "url": "https://hex.pm/api/packages/quagga_def"
+ },
+ {
+ "configs": {
+ "erlang.mk": "dep_logger_dev = hex 0.1.1",
+ "mix.exs": "{:logger_dev, \"~> 0.1.1\"}",
+ "rebar.config": "{logger_dev, \"0.1.1\"}"
+ },
+ "docs_html_url": "https://hexdocs.pm/logger_dev/",
+ "downloads": {
+ "all": 188,
+ "day": 4,
+ "recent": 188,
+ "week": 48
+ },
+ "html_url": "https://hex.pm/packages/logger_dev",
+ "inserted_at": "2022-09-08T21:37:20.359224Z",
+ "latest_stable_version": "0.1.1",
+ "latest_version": "0.1.1",
+ "meta": {
+ "description": "A more readable formatter for Logger.Backends.Console",
+ "licenses": [
+ "MIT"
+ ],
+ "links": {
+ "GitHub": "https://github.com/protestContest/logger_dev"
+ },
+ "maintainers": []
+ },
+ "name": "logger_dev",
+ "releases": [
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/logger_dev/releases/0.1.1",
+ "version": "0.1.1"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/logger_dev/releases/0.1.0",
+ "version": "0.1.0"
+ }
+ ],
+ "repository": "hexpm",
+ "retirements": {},
+ "updated_at": "2022-09-09T21:00:14.993273Z",
+ "url": "https://hex.pm/api/packages/logger_dev"
+ }
+]
\ No newline at end of file
diff --git a/swh/lister/hex/tests/test_lister.py b/swh/lister/hex/tests/test_lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/hex/tests/test_lister.py
@@ -0,0 +1,141 @@
+import json
+from pathlib import Path
+from typing import List
+
+import pytest
+
+from swh.lister.hex.lister import HexLister, ListedOrigin
+from swh.scheduler.interface import SchedulerInterface
+
+
+@pytest.fixture
+def hexpm_page(datadir):
+ def get_page(page_id: int):
+ # FIXME: Update the test data to match ?sort=name
+ text = Path(datadir, "https_hex.pm", f"page{page_id}.json").read_text()
+ page_result = json.loads(text)
+ origins = [origin["html_url"] for origin in page_result]
+ return origins, page_result
+
+ return get_page
+
+
+def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedOrigin]):
+ """Asserts that the two collections have the same origin URLs."""
+ assert set(lister_urls) == {origin.url for origin in scheduler_origins}
+
+
+def test_full_lister_hex(
+ swh_scheduler: SchedulerInterface,
+ requests_mock,
+ hexpm_page,
+):
+ """
+ Simulate a full listing of packages for hex (erlang package manager)
+ """
+ p1_origin_urls, p1_json = hexpm_page(1)
+ p2_origin_urls, p2_json = hexpm_page(2)
+ p3_origin_urls, p3_json = hexpm_page(3)
+
+ requests_mock.get("https://hex.pm/api/packages/?page=1", json=p1_json)
+ requests_mock.get("https://hex.pm/api/packages/?page=2", json=p2_json)
+ requests_mock.get("https://hex.pm/api/packages/?page=3", json=p3_json)
+ requests_mock.get("https://hex.pm/api/packages/?page=4", json=[])
+
+ lister = HexLister(swh_scheduler)
+
+ stats = lister.run()
+ scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+ lister_state = lister.get_state_from_scheduler()
+
+ assert stats.pages == 4
+ assert stats.origins == 10 # 4 + 4 + 2 + 0
+
+ check_listed_origins(
+ p1_origin_urls + p2_origin_urls + p3_origin_urls, scheduler_origins
+ )
+
+ assert lister_state.last_page_id == 4
+ assert lister_state.last_pkg_name == "logger_dev"
+ assert lister.updated
+
+
+def test_gogs_incremental_lister(
+ swh_scheduler,
+ requests_mock,
+ hexpm_page,
+):
+ lister = HexLister(swh_scheduler)
+
+ # First run: P1 and P2 return 4 origins each and P3 returns 0
+ p1_origin_urls, p1_json = hexpm_page(1)
+ p2_origin_urls, p2_json = hexpm_page(2)
+
+ requests_mock.get("https://hex.pm/api/packages/?page=1", json=p1_json)
+ requests_mock.get("https://hex.pm/api/packages/?page=2", json=p2_json)
+ requests_mock.get("https://hex.pm/api/packages/?page=3", json=[])
+
+ stats = lister.run()
+
+ assert stats.pages == 3
+ assert stats.origins == 8
+
+ scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
+ lister_state = lister.get_state_from_scheduler()
+ assert lister_state.last_page_id == 3
+ assert lister.state.last_pkg_name == "alchemy_vm"
+ assert lister.updated
+
+ check_listed_origins(p1_origin_urls + p2_origin_urls, scheduler_origins)
+
+ lister.updated = False # Reset the flag
+
+ # Second run: P3 isn't empty anymore
+ p3_origin_urls, p3_json = hexpm_page(3)
+
+ requests_mock.get("https://hex.pm/api/packages/?page=3", json=p3_json)
+ requests_mock.get(
+ "https://hex.pm/api/packages/?page=4", json=[]
+ ) # TODO: Try with 40x/50x here?
+
+ stats = lister.run()
+
+ assert stats.pages == 2
+ assert stats.origins == 2
+
+ scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
+ lister_state = lister.get_state_from_scheduler()
+ assert (
+ lister_state.last_page_id == 4
+ ) # TODO: Shouldn't this be 3 given that P4 is empty?
+ assert lister.state.last_pkg_name == "logger_dev"
+ assert lister.updated
+
+ check_listed_origins(
+ p1_origin_urls + p2_origin_urls + p3_origin_urls, scheduler_origins
+ )
+
+ lister.updated = False # Reset the flag
+
+ # Third run: No new origins
+ # The lister should revisit the last page (P3)
+
+ stats = lister.run()
+
+ assert stats.pages == 1
+ assert (
+ stats.origins == 0
+ ) # FIXME: inconsistent with Gogs lister. Either of them could be wrong
+
+ lister_state = lister.get_state_from_scheduler()
+ assert (
+ lister_state.last_page_id == 4
+ ) # TODO: Shouldn't this be 3 given that P4 is empty?
+ assert lister.state.last_pkg_name == "logger_dev"
+ assert lister.updated is False # No new origins so state isn't updated
+
+ check_listed_origins(
+ p1_origin_urls + p2_origin_urls + p3_origin_urls, scheduler_origins
+ )
diff --git a/swh/lister/hex/tests/test_tasks.py b/swh/lister/hex/tests/test_tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/hex/tests/test_tasks.py
@@ -0,0 +1,56 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from unittest.mock import patch
+
+from swh.lister.pattern import ListerStats
+
+
+def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
+ res = swh_scheduler_celery_app.send_task("swh.lister.hex.tasks.ping")
+ assert res
+ res.wait()
+ assert res.successful()
+ assert res.result == "OK"
+
+
+@patch("swh.lister.hex.tasks.HexLister")
+def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
+ lister.from_configfile.return_value = lister
+ lister.run.return_value = ListerStats(pages=10, origins=500)
+
+ kwargs = dict()
+ res = swh_scheduler_celery_app.send_task(
+ "swh.lister.hex.tasks.FullHexRelister",
+ kwargs=kwargs,
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+
+ actual_kwargs = dict(**kwargs, instance=None)
+
+ lister.from_configfile.assert_called_once_with(**actual_kwargs)
+ lister.run.assert_called_once_with()
+
+
+@patch("swh.lister.hex.tasks.HexLister")
+def test_full_listing_params(
+ lister, swh_scheduler_celery_app, swh_scheduler_celery_worker
+):
+ lister.from_configfile.return_value = lister
+ lister.run.return_value = ListerStats(pages=10, origins=500)
+
+ kwargs = dict(instance="hex.pm")
+ res = swh_scheduler_celery_app.send_task(
+ "swh.lister.hex.tasks.FullHexRelister",
+ kwargs=kwargs,
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+
+ lister.from_configfile.assert_called_once_with(**kwargs)
+ lister.run.assert_called_once_with()
diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py
--- a/swh/lister/tests/test_cli.py
+++ b/swh/lister/tests/test_cli.py
@@ -40,7 +40,7 @@
"origin_upstream": "https://git.savannah.gnu.org/cgit/guix.git/",
},
"fedora": {
- "url": "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases//",
+ "url": "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/",
},
}

File Metadata

Mime Type
text/plain
Expires
Sun, Aug 17, 8:04 PM (1 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216782

Event Timeline