Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9696430
D8907.id.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
31 KB
Subscribers
None
D8907.id.diff
View Options
diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -27,6 +27,7 @@
- `swh.lister.tuleap`
- `swh.lister.gogs`
- `swh.liser.fedora`
+- `swh.lister.hex`
Dependencies
------------
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -87,6 +87,7 @@
lister.maven=swh.lister.maven:register
lister.gogs=swh.lister.gogs:register
lister.fedora=swh.lister.fedora:register
+ lister.hex=swh.lister.hex:register
""",
classifiers=[
"Programming Language :: Python :: 3",
diff --git a/swh/lister/hex/__init__.py b/swh/lister/hex/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/hex/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+def register():
+ from .lister import HexLister
+
+ return {
+ "lister": HexLister,
+ "task_modules": [f"{__name__}.tasks"],
+ }
diff --git a/swh/lister/hex/lister.py b/swh/lister/hex/lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/hex/lister.py
@@ -0,0 +1,130 @@
+# Copyright (C) 2021-2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from dataclasses import asdict, dataclass
+import logging
+from typing import Any, Dict, Iterator, List
+from urllib.parse import urljoin
+
+import iso8601
+
+from swh.scheduler.interface import SchedulerInterface
+from swh.scheduler.model import ListedOrigin
+
+from ..pattern import CredentialsType, Lister
+
+logger = logging.getLogger(__name__)
+
+HexListerPage = List[Dict[str, Any]]
+
+
+def get_tar_url(pkg_name: str, release_version: str):
+ return f"https://repo.hex.pm/tarballs/{pkg_name}-{release_version}.tar"
+
+
+@dataclass
+class HexListerState:
+ """The HexLister instance state. This is used for incremental listing."""
+
+ last_page_id: int = 1
+ """Id of the last page listed on an incremental pass"""
+ last_pkg_name: str = ""
+ """Name of the last package inserted at on an incremental pass"""
+
+
+class HexLister(Lister[HexListerState, HexListerPage]):
+ """List origins from the "Hex" forge."""
+
+ LISTER_NAME = "hex"
+ VISIT_TYPE = "hex"
+
+ HEX_API_URL = "https://hex.pm/api/"
+ PACKAGES_PATH = "packages/"
+
+ def __init__(
+ self,
+ scheduler: SchedulerInterface,
+ instance: str = "hex",
+ credentials: CredentialsType = None,
+ ):
+ super().__init__(
+ scheduler=scheduler,
+ credentials=credentials,
+ url=self.HEX_API_URL,
+ instance=instance,
+ )
+ # TODO: Add authentication support
+
+ self.session.headers.update({"Accept": "application/json"})
+
+ def state_from_dict(self, d: Dict[str, Any]) -> HexListerState:
+ return HexListerState(**d)
+
+ def state_to_dict(self, state: HexListerState) -> Dict[str, Any]:
+ return asdict(state)
+
+ def get_pages(self) -> Iterator[HexListerPage]:
+ page_id = 1
+ if self.state.last_page_id is not None:
+ page_id = self.state.last_page_id
+
+ url = urljoin(self.url, self.PACKAGES_PATH)
+
+ while page_id is not None:
+ body = self.http_request(
+ url,
+ params={
+ "page": page_id,
+ "sort": "name",
+ }, # sort=name is actually the default
+ ).json()
+
+ yield body
+
+ page_id += 1 # Consider stopping before yielding?
+
+ if len(body) == 0:
+ break # Consider stopping if number of items < 100?
+
+ def get_origins_from_page(self, page: HexListerPage) -> Iterator[ListedOrigin]:
+ """Convert a page of HexLister repositories into a list of ListedOrigins"""
+ assert self.lister_obj.id is not None
+
+ for pkg in page:
+
+ yield ListedOrigin(
+ lister_id=self.lister_obj.id,
+ visit_type=self.VISIT_TYPE,
+ url=pkg["html_url"],
+ last_update=iso8601.parse_date(pkg["updated_at"]),
+ extra_loader_arguments={
+ "releases": {
+ release["url"]: {
+ "package": pkg["name"],
+ "version": release["version"],
+ "tar_url": get_tar_url(pkg["name"], release["version"]),
+ }
+ for release in pkg["releases"]
+ }
+ },
+ )
+
+ def commit_page(self, page: HexListerPage) -> None:
+ if len(page) == 0:
+ return
+
+ last_pkg_name = page[-1]["name"]
+
+ # incoming page should have alphabetically greater
+ # last package name than the one stored in the state
+ if last_pkg_name > self.state.last_pkg_name:
+ self.state.last_pkg_name = last_pkg_name
+ self.state.last_page_id += 1
+
+ def finalize(self) -> None:
+ scheduler_state = self.get_state_from_scheduler()
+
+ if self.state.last_page_id > scheduler_state.last_page_id:
+ self.updated = True
diff --git a/swh/lister/hex/tasks.py b/swh/lister/hex/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/hex/tasks.py
@@ -0,0 +1,23 @@
+# Copyright (C) 2022 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from typing import Dict, Optional
+
+from celery import shared_task
+
+from .lister import HexLister
+
+
+@shared_task(name=__name__ + ".FullHexRelister")
+def list_hex_full(
+ instance: Optional[str] = None,
+) -> Dict[str, int]:
+ """Full update of a Hex.pm instance"""
+ lister = HexLister.from_configfile(instance=instance)
+ return lister.run().dict()
+
+
+@shared_task(name=__name__ + ".ping")
+def _ping() -> str:
+ return "OK"
diff --git a/swh/lister/hex/tests/__init__.py b/swh/lister/hex/tests/__init__.py
new file mode 100644
diff --git a/swh/lister/hex/tests/data/https_hex.pm/page1.json b/swh/lister/hex/tests/data/https_hex.pm/page1.json
new file mode 100644
--- /dev/null
+++ b/swh/lister/hex/tests/data/https_hex.pm/page1.json
@@ -0,0 +1,190 @@
+[
+ {
+ "configs": {
+ "erlang.mk": "dep_aadya = hex 0.1.0",
+ "mix.exs": "{:aadya, \"~> 0.1.0\"}",
+ "rebar.config": "{aadya, \"0.1.0\"}"
+ },
+ "docs_html_url": "https://hexdocs.pm/aadya/",
+ "downloads": {
+ "all": 4199,
+ "recent": 2
+ },
+ "html_url": "https://hex.pm/packages/aadya",
+ "inserted_at": "2018-03-12T02:13:42.826404Z",
+ "latest_stable_version": "0.1.0",
+ "latest_version": "0.1.0",
+ "meta": {
+ "description": "CoAP framework",
+ "licenses": [
+ "GNU Lesser General Public License v3.0"
+ ],
+ "links": {
+ "GitHub": "https://gitlab.com/ahamtech/coap/aadya.git"
+ },
+ "maintainers": [
+ "Anwesh Reddy",
+ "Mahesh Reddy",
+ "Malreddy Ankanna"
+ ]
+ },
+ "name": "aadya",
+ "releases": [
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/aadya/releases/0.1.0",
+ "version": "0.1.0"
+ }
+ ],
+ "repository": "hexpm",
+ "retirements": {},
+ "updated_at": "2018-03-12T02:19:58.150334Z",
+ "url": "https://hex.pm/api/packages/aadya"
+ },
+ {
+ "configs": {
+ "erlang.mk": "dep_active_job = hex 0.1.1",
+ "mix.exs": "{:active_job, \"~> 0.1.1\"}",
+ "rebar.config": "{active_job, \"0.1.1\"}"
+ },
+ "docs_html_url": null,
+ "downloads": {
+ "all": 575,
+ "recent": 8
+ },
+ "html_url": "https://hex.pm/packages/active_job",
+ "inserted_at": "2022-05-04T05:07:26.204862Z",
+ "latest_stable_version": "0.1.1",
+ "latest_version": "0.1.1",
+ "meta": {
+ "description": "Declare job workers that can be run by a variety of queuing backends. This plugin is a port of the Rails ActiveJob gem",
+ "licenses": [
+ "MIT"
+ ],
+ "links": {
+ "GitHub": "https://github.com/chaskiq/ex-rails/active_job"
+ },
+ "maintainers": []
+ },
+ "name": "active_job",
+ "releases": [
+ {
+ "has_docs": false,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/active_job/releases/0.1.1",
+ "version": "0.1.1"
+ },
+ {
+ "has_docs": false,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/active_job/releases/0.1.0",
+ "version": "0.1.0"
+ }
+ ],
+ "repository": "hexpm",
+ "retirements": {},
+ "updated_at": "2022-06-17T07:01:32.486546Z",
+ "url": "https://hex.pm/api/packages/active_job"
+ },
+ {
+ "configs": {
+ "erlang.mk": "dep_active_jorb = hex 0.1.2",
+ "mix.exs": "{:active_jorb, \"~> 0.1.2\"}",
+ "rebar.config": "{active_jorb, \"0.1.2\"}"
+ },
+ "docs_html_url": "https://hexdocs.pm/active_jorb/",
+ "downloads": {
+ "all": 7148,
+ "recent": 10
+ },
+ "html_url": "https://hex.pm/packages/active_jorb",
+ "inserted_at": "2018-04-10T17:35:34.698754Z",
+ "latest_stable_version": "0.1.2",
+ "latest_version": "0.1.2",
+ "meta": {
+ "description": "A library to enqueue jobs with your Active Job job processor. You may want\nthis when strangling your Rails project.",
+ "licenses": [
+ "MIT"
+ ],
+ "links": {
+ "Github": "https://github.com/PrecisionNutrition/active_jorb"
+ },
+ "maintainers": [
+ "James Herdman"
+ ]
+ },
+ "name": "active_jorb",
+ "releases": [
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/active_jorb/releases/0.1.2",
+ "version": "0.1.2"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/active_jorb/releases/0.1.1",
+ "version": "0.1.1"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/active_jorb/releases/0.1.0",
+ "version": "0.1.0"
+ }
+ ],
+ "repository": "hexpm",
+ "retirements": {},
+ "updated_at": "2018-04-24T17:42:25.744971Z",
+ "url": "https://hex.pm/api/packages/active_jorb"
+ },
+ {
+ "configs": {
+ "erlang.mk": "dep_acx = hex 0.0.2",
+ "mix.exs": "{:acx, \"~> 0.0.2\"}",
+ "rebar.config": "{acx, \"0.0.2\"}"
+ },
+ "docs_html_url": "https://hexdocs.pm/acx/",
+ "downloads": {
+ "all": 4790,
+ "recent": 8
+ },
+ "html_url": "https://hex.pm/packages/acx",
+ "inserted_at": "2018-01-22T06:52:21.027352Z",
+ "latest_stable_version": "0.0.2",
+ "latest_version": "0.0.2",
+ "meta": {
+ "description": "A Elixir wrap for API of Acx.io exchange.",
+ "licenses": [
+ "MIT"
+ ],
+ "links": {
+ "Github": "https://github.com/2pd/acx-elixir"
+ },
+ "maintainers": [
+ "Liang Shi"
+ ]
+ },
+ "name": "acx",
+ "releases": [
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/acx/releases/0.0.2",
+ "version": "0.0.2"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/acx/releases/0.0.1",
+ "version": "0.0.1"
+ }
+ ],
+ "repository": "hexpm",
+ "retirements": {},
+ "updated_at": "2018-01-30T04:56:03.053561Z",
+ "url": "https://hex.pm/api/packages/acx"
+ }
+]
\ No newline at end of file
diff --git a/swh/lister/hex/tests/data/https_hex.pm/page2.json b/swh/lister/hex/tests/data/https_hex.pm/page2.json
new file mode 100644
--- /dev/null
+++ b/swh/lister/hex/tests/data/https_hex.pm/page2.json
@@ -0,0 +1,223 @@
+[
+ {
+ "configs": {
+ "erlang.mk": "dep_adam7 = hex 0.4.0",
+ "mix.exs": "{:adam7, \"~> 0.4.0\"}",
+ "rebar.config": "{adam7, \"0.4.0\"}"
+ },
+ "docs_html_url": null,
+ "downloads": {
+ "all": 12746,
+ "recent": 27,
+ "week": 10
+ },
+ "html_url": "https://hex.pm/packages/adam7",
+ "inserted_at": "2015-10-10T05:09:04.399996Z",
+ "latest_stable_version": "0.4.0",
+ "latest_version": "0.4.0",
+ "meta": {
+ "description": "Adam7 interlacing library for Elixir.\nPrimarily used for interlacing and de-interlacing image data for PNGs.",
+ "licenses": [
+ "MIT"
+ ],
+ "links": {
+ "github": "https://github.com/SenecaSystems/imagineer"
+ },
+ "maintainers": [
+ "Chris Maddox"
+ ]
+ },
+ "name": "adam7",
+ "releases": [
+ {
+ "has_docs": false,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/adam7/releases/0.4.0",
+ "version": "0.4.0"
+ },
+ {
+ "has_docs": false,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/adam7/releases/0.3.0",
+ "version": "0.3.0"
+ },
+ {
+ "has_docs": false,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/adam7/releases/0.2.0",
+ "version": "0.2.0"
+ },
+ {
+ "has_docs": false,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/adam7/releases/0.1.1",
+ "version": "0.1.1"
+ },
+ {
+ "has_docs": false,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/adam7/releases/0.1.0",
+ "version": "0.1.0"
+ }
+ ],
+ "repository": "hexpm",
+ "retirements": {},
+ "updated_at": "2015-10-10T05:09:04.400005Z",
+ "url": "https://hex.pm/api/packages/adam7"
+ },
+ {
+ "configs": {
+ "erlang.mk": "dep_addressBook = hex 0.1.1",
+ "mix.exs": "{:addressBook, \"~> 0.1.1\"}",
+ "rebar.config": "{addressBook, \"0.1.1\"}"
+ },
+ "docs_html_url": "https://hexdocs.pm/addressBook/",
+ "downloads": {
+ "all": 4871,
+ "recent": 8,
+ "week": 4
+ },
+ "html_url": "https://hex.pm/packages/addressBook",
+ "inserted_at": "2017-06-05T19:59:12.978909Z",
+ "latest_stable_version": "0.1.1",
+ "latest_version": "0.1.1",
+ "meta": {
+ "description": "Simple package for managing address book.",
+ "licenses": [
+ "Apache 2.0"
+ ],
+ "links": {
+ "GitHub": "https://github.com/maxiwoj/AddressBook"
+ },
+ "maintainers": [
+ "Maksymilian Wojczuk"
+ ]
+ },
+ "name": "addressBook",
+ "releases": [
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/addressBook/releases/0.1.1",
+ "version": "0.1.1"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/addressBook/releases/0.1.0",
+ "version": "0.1.0"
+ }
+ ],
+ "repository": "hexpm",
+ "retirements": {},
+ "updated_at": "2017-06-05T21:06:42.788652Z",
+ "url": "https://hex.pm/api/packages/addressBook"
+ },
+ {
+ "configs": {
+ "erlang.mk": "dep_address_us = hex 0.4.1",
+ "mix.exs": "{:address_us, \"~> 0.4.1\"}",
+ "rebar.config": "{address_us, \"0.4.1\"}"
+ },
+ "docs_html_url": "https://hexdocs.pm/address_us/",
+ "downloads": {
+ "all": 55337,
+ "day": 2,
+ "recent": 7105,
+ "week": 194
+ },
+ "html_url": "https://hex.pm/packages/address_us",
+ "inserted_at": "2014-10-10T20:24:11.000000Z",
+ "latest_stable_version": "0.4.1",
+ "latest_version": "0.4.1",
+ "meta": {
+ "description": "Library for parsing US Addresses into their individual parts.",
+ "licenses": [
+ "Apache 2.0"
+ ],
+ "links": {
+ "Docs": "https://smashedtoatoms.github.io/address_us",
+ "GitHub": "https://github.com/smashedtoatoms/address_us"
+ },
+ "maintainers": []
+ },
+ "name": "address_us",
+ "releases": [
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/address_us/releases/0.4.1",
+ "version": "0.4.1"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/address_us/releases/0.4.0",
+ "version": "0.4.0"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/address_us/releases/0.2.1",
+ "version": "0.2.1"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/address_us/releases/0.1.1",
+ "version": "0.1.1"
+ },
+ {
+ "has_docs": false,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/address_us/releases/0.1.0",
+ "version": "0.1.0"
+ }
+ ],
+ "repository": "hexpm",
+ "retirements": {},
+ "updated_at": "2020-12-11T05:07:11.118292Z",
+ "url": "https://hex.pm/api/packages/address_us"
+ },
+ {
+ "configs": {
+ "erlang.mk": "dep_alchemy_vm = hex 0.8.1",
+ "mix.exs": "{:alchemy_vm, \"~> 0.8.1\"}",
+ "rebar.config": "{alchemy_vm, \"0.8.1\"}"
+ },
+ "docs_html_url": "https://hexdocs.pm/alchemy_vm/",
+ "downloads": {
+ "all": 2368,
+ "recent": 3,
+ "week": 2
+ },
+ "html_url": "https://hex.pm/packages/alchemy_vm",
+ "inserted_at": "2019-03-27T00:32:40.709924Z",
+ "latest_stable_version": "0.8.1",
+ "latest_version": "0.8.1",
+ "meta": {
+ "description": "A WebAssembly Virtual Machine",
+ "licenses": [
+ "MIT"
+ ],
+ "links": {
+ "Elixium Network Website": "https://www.elixiumnetwork.org",
+ "GitHub": "https://github.com/ElixiumNetwork/AlchemyVM"
+ },
+ "maintainers": []
+ },
+ "name": "alchemy_vm",
+ "releases": [
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/alchemy_vm/releases/0.8.1",
+ "version": "0.8.1"
+ }
+ ],
+ "repository": "hexpm",
+ "retirements": {},
+ "updated_at": "2019-03-27T00:32:47.822901Z",
+ "url": "https://hex.pm/api/packages/alchemy_vm"
+ }
+]
\ No newline at end of file
diff --git a/swh/lister/hex/tests/data/https_hex.pm/page3.json b/swh/lister/hex/tests/data/https_hex.pm/page3.json
new file mode 100644
--- /dev/null
+++ b/swh/lister/hex/tests/data/https_hex.pm/page3.json
@@ -0,0 +1,108 @@
+[
+ {
+ "configs": {
+ "erlang.mk": "dep_quagga_def = hex 0.4.0",
+ "mix.exs": "{:quagga_def, \"~> 0.4.0\"}",
+ "rebar.config": "{quagga_def, \"0.4.0\"}"
+ },
+ "docs_html_url": "https://hexdocs.pm/quagga_def/",
+ "downloads": {
+ "all": 106,
+ "day": 12,
+ "recent": 106,
+ "week": 22
+ },
+ "html_url": "https://hex.pm/packages/quagga_def",
+ "inserted_at": "2022-10-12T07:03:48.666872Z",
+ "latest_stable_version": "0.4.0",
+ "latest_version": "0.4.0",
+ "meta": {
+ "description": "Quagga bamboo clump convention definitions and functions",
+ "licenses": [
+ "MIT"
+ ],
+ "links": {
+ "GitHub": "https://github.com/mwmiller/quagga_def"
+ },
+ "maintainers": []
+ },
+ "name": "quagga_def",
+ "releases": [
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/quagga_def/releases/0.4.0",
+ "version": "0.4.0"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/quagga_def/releases/0.3.0",
+ "version": "0.3.0"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/quagga_def/releases/0.2.0",
+ "version": "0.2.0"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/quagga_def/releases/0.1.0",
+ "version": "0.1.0"
+ }
+ ],
+ "repository": "hexpm",
+ "retirements": {},
+ "updated_at": "2022-11-29T11:41:15.862303Z",
+ "url": "https://hex.pm/api/packages/quagga_def"
+ },
+ {
+ "configs": {
+ "erlang.mk": "dep_logger_dev = hex 0.1.1",
+ "mix.exs": "{:logger_dev, \"~> 0.1.1\"}",
+ "rebar.config": "{logger_dev, \"0.1.1\"}"
+ },
+ "docs_html_url": "https://hexdocs.pm/logger_dev/",
+ "downloads": {
+ "all": 188,
+ "day": 4,
+ "recent": 188,
+ "week": 48
+ },
+ "html_url": "https://hex.pm/packages/logger_dev",
+ "inserted_at": "2022-09-08T21:37:20.359224Z",
+ "latest_stable_version": "0.1.1",
+ "latest_version": "0.1.1",
+ "meta": {
+ "description": "A more readable formatter for Logger.Backends.Console",
+ "licenses": [
+ "MIT"
+ ],
+ "links": {
+ "GitHub": "https://github.com/protestContest/logger_dev"
+ },
+ "maintainers": []
+ },
+ "name": "logger_dev",
+ "releases": [
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/logger_dev/releases/0.1.1",
+ "version": "0.1.1"
+ },
+ {
+ "has_docs": true,
+ "inserted_at": null,
+ "url": "https://hex.pm/api/packages/logger_dev/releases/0.1.0",
+ "version": "0.1.0"
+ }
+ ],
+ "repository": "hexpm",
+ "retirements": {},
+ "updated_at": "2022-09-09T21:00:14.993273Z",
+ "url": "https://hex.pm/api/packages/logger_dev"
+ }
+]
\ No newline at end of file
diff --git a/swh/lister/hex/tests/test_lister.py b/swh/lister/hex/tests/test_lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/hex/tests/test_lister.py
@@ -0,0 +1,141 @@
+import json
+from pathlib import Path
+from typing import List
+
+import pytest
+
+from swh.lister.hex.lister import HexLister, ListedOrigin
+from swh.scheduler.interface import SchedulerInterface
+
+
+@pytest.fixture
+def hexpm_page(datadir):
+ def get_page(page_id: int):
+ # FIXME: Update the test data to match ?sort=name
+ text = Path(datadir, "https_hex.pm", f"page{page_id}.json").read_text()
+ page_result = json.loads(text)
+ origins = [origin["html_url"] for origin in page_result]
+ return origins, page_result
+
+ return get_page
+
+
+def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedOrigin]):
+ """Asserts that the two collections have the same origin URLs."""
+ assert set(lister_urls) == {origin.url for origin in scheduler_origins}
+
+
+def test_full_lister_hex(
+ swh_scheduler: SchedulerInterface,
+ requests_mock,
+ hexpm_page,
+):
+ """
+ Simulate a full listing of packages for hex (erlang package manager)
+ """
+ p1_origin_urls, p1_json = hexpm_page(1)
+ p2_origin_urls, p2_json = hexpm_page(2)
+ p3_origin_urls, p3_json = hexpm_page(3)
+
+ requests_mock.get("https://hex.pm/api/packages/?page=1", json=p1_json)
+ requests_mock.get("https://hex.pm/api/packages/?page=2", json=p2_json)
+ requests_mock.get("https://hex.pm/api/packages/?page=3", json=p3_json)
+ requests_mock.get("https://hex.pm/api/packages/?page=4", json=[])
+
+ lister = HexLister(swh_scheduler)
+
+ stats = lister.run()
+ scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+ lister_state = lister.get_state_from_scheduler()
+
+ assert stats.pages == 4
+ assert stats.origins == 10 # 4 + 4 + 2 + 0
+
+ check_listed_origins(
+ p1_origin_urls + p2_origin_urls + p3_origin_urls, scheduler_origins
+ )
+
+ assert lister_state.last_page_id == 4
+ assert lister_state.last_pkg_name == "logger_dev"
+ assert lister.updated
+
+
+def test_gogs_incremental_lister(
+ swh_scheduler,
+ requests_mock,
+ hexpm_page,
+):
+ lister = HexLister(swh_scheduler)
+
+ # First run: P1 and P2 return 4 origins each and P3 returns 0
+ p1_origin_urls, p1_json = hexpm_page(1)
+ p2_origin_urls, p2_json = hexpm_page(2)
+
+ requests_mock.get("https://hex.pm/api/packages/?page=1", json=p1_json)
+ requests_mock.get("https://hex.pm/api/packages/?page=2", json=p2_json)
+ requests_mock.get("https://hex.pm/api/packages/?page=3", json=[])
+
+ stats = lister.run()
+
+ assert stats.pages == 3
+ assert stats.origins == 8
+
+ scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
+ lister_state = lister.get_state_from_scheduler()
+ assert lister_state.last_page_id == 3
+ assert lister.state.last_pkg_name == "alchemy_vm"
+ assert lister.updated
+
+ check_listed_origins(p1_origin_urls + p2_origin_urls, scheduler_origins)
+
+ lister.updated = False # Reset the flag
+
+ # Second run: P3 isn't empty anymore
+ p3_origin_urls, p3_json = hexpm_page(3)
+
+ requests_mock.get("https://hex.pm/api/packages/?page=3", json=p3_json)
+ requests_mock.get(
+ "https://hex.pm/api/packages/?page=4", json=[]
+ ) # TODO: Try with 40x/50x here?
+
+ stats = lister.run()
+
+ assert stats.pages == 2
+ assert stats.origins == 2
+
+ scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
+ lister_state = lister.get_state_from_scheduler()
+ assert (
+ lister_state.last_page_id == 4
+ ) # TODO: Shouldn't this be 3 given that P4 is empty?
+ assert lister.state.last_pkg_name == "logger_dev"
+ assert lister.updated
+
+ check_listed_origins(
+ p1_origin_urls + p2_origin_urls + p3_origin_urls, scheduler_origins
+ )
+
+ lister.updated = False # Reset the flag
+
+ # Third run: No new origins
+ # The lister should revisit the last page (P3)
+
+ stats = lister.run()
+
+ assert stats.pages == 1
+ assert (
+ stats.origins == 0
+ ) # FIXME: inconsistent with Gogs lister. Either of them could be wrong
+
+ lister_state = lister.get_state_from_scheduler()
+ assert (
+ lister_state.last_page_id == 4
+ ) # TODO: Shouldn't this be 3 given that P4 is empty?
+ assert lister.state.last_pkg_name == "logger_dev"
+ assert lister.updated is False # No new origins so state isn't updated
+
+ check_listed_origins(
+ p1_origin_urls + p2_origin_urls + p3_origin_urls, scheduler_origins
+ )
diff --git a/swh/lister/hex/tests/test_tasks.py b/swh/lister/hex/tests/test_tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/hex/tests/test_tasks.py
@@ -0,0 +1,56 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from unittest.mock import patch
+
+from swh.lister.pattern import ListerStats
+
+
+def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
+ res = swh_scheduler_celery_app.send_task("swh.lister.hex.tasks.ping")
+ assert res
+ res.wait()
+ assert res.successful()
+ assert res.result == "OK"
+
+
+@patch("swh.lister.hex.tasks.HexLister")
+def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
+ lister.from_configfile.return_value = lister
+ lister.run.return_value = ListerStats(pages=10, origins=500)
+
+ kwargs = dict()
+ res = swh_scheduler_celery_app.send_task(
+ "swh.lister.hex.tasks.FullHexRelister",
+ kwargs=kwargs,
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+
+ actual_kwargs = dict(**kwargs, instance=None)
+
+ lister.from_configfile.assert_called_once_with(**actual_kwargs)
+ lister.run.assert_called_once_with()
+
+
+@patch("swh.lister.hex.tasks.HexLister")
+def test_full_listing_params(
+ lister, swh_scheduler_celery_app, swh_scheduler_celery_worker
+):
+ lister.from_configfile.return_value = lister
+ lister.run.return_value = ListerStats(pages=10, origins=500)
+
+ kwargs = dict(instance="hex.pm")
+ res = swh_scheduler_celery_app.send_task(
+ "swh.lister.hex.tasks.FullHexRelister",
+ kwargs=kwargs,
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+
+ lister.from_configfile.assert_called_once_with(**kwargs)
+ lister.run.assert_called_once_with()
diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py
--- a/swh/lister/tests/test_cli.py
+++ b/swh/lister/tests/test_cli.py
@@ -40,7 +40,7 @@
"origin_upstream": "https://git.savannah.gnu.org/cgit/guix.git/",
},
"fedora": {
- "url": "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases//",
+ "url": "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/",
},
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Aug 17, 8:04 PM (1 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216782
Attached To
D8907: feat: Add Hex.pm lister
Event Timeline
Log In to Comment