Page MenuHomeSoftware Heritage

D8160.id29513.diff
No OneTemporary

D8160.id29513.diff

diff --git a/CONTRIBUTORS b/CONTRIBUTORS
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -7,3 +7,4 @@
Boris Baldassari
Léo Andrès
Franck Bret
+Kumar Shivendu
diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -24,6 +24,7 @@
- `swh.lister.phabricator`
- `swh.lister.pypi`
- `swh.lister.tuleap`
+- `swh.lister.gogs`
Dependencies
------------
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -74,6 +74,7 @@
lister.sourceforge=swh.lister.sourceforge:register
lister.tuleap=swh.lister.tuleap:register
lister.maven=swh.lister.maven:register
+ lister.gogs=swh.lister.gogs:register
""",
classifiers=[
"Programming Language :: Python :: 3",
diff --git a/swh/lister/gogs/__init__.py b/swh/lister/gogs/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/gogs/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+def register():
+ from .lister import GogsLister
+
+ return {
+ "lister": GogsLister,
+ "task_modules": [f"{__name__}.tasks"],
+ }
diff --git a/swh/lister/gogs/lister.py b/swh/lister/gogs/lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/gogs/lister.py
@@ -0,0 +1,144 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import logging
+import random
+from typing import Any, Dict, Iterator, List, Optional
+from urllib.parse import urljoin
+
+import iso8601
+import requests
+from tenacity.before_sleep import before_sleep_log
+
+from swh.lister.utils import throttling_retry
+from swh.scheduler.interface import SchedulerInterface
+from swh.scheduler.model import ListedOrigin
+
+from .. import USER_AGENT
+from ..pattern import CredentialsType, StatelessLister
+
+logger = logging.getLogger(__name__)
+
+# Aliasing page results returned by `GogsLister.get_pages` method
+GogsListerPage = List[Dict[str, Any]]
+
+
+class GogsLister(StatelessLister[GogsListerPage]):
+
+ """List origins from the Gogs
+
+ Gogs API documentation: https://github.com/gogs/docs-api
+
+ The API is protected behind authentication so credentials/API tokens
+ are mandatory. It supports pagination and provides navigation URLs
+ through the 'Link' header. The default value for page size ('limit')
+ is 10 but the maximum allowed value is 50.
+ """
+
+ LISTER_NAME = "gogs"
+
+ VISIT_TYPE = "git"
+
+ REPO_LIST_PATH = "repos/search"
+
+ def __init__(
+ self,
+ scheduler: SchedulerInterface,
+ url: str,
+ instance: Optional[str] = None,
+ api_token: Optional[str] = None,
+ page_size: int = 50,
+ credentials: CredentialsType = None,
+ ):
+ super().__init__(
+ scheduler=scheduler,
+ credentials=credentials,
+ url=url,
+ instance=instance,
+ )
+
+ self.query_params = {
+ "limit": page_size,
+ "page": 1,
+ }
+
+ self.api_token = api_token
+ if self.api_token is None:
+
+ if len(self.credentials) > 0:
+ cred = random.choice(self.credentials)
+ username = cred.get("username")
+ self.api_token = cred["password"]
+ logger.warning(
+ "Using authentication credentials from user %s", username or "???"
+ )
+ else:
+ raise ValueError("No credentials or API token provided")
+
+ self.max_page_limit = 2
+
+ self.session = requests.Session()
+ self.session.headers.update(
+ {
+ "Accept": "application/json",
+ "User-Agent": USER_AGENT,
+ "Authorization": f"token {self.api_token}",
+ }
+ )
+
+ @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
+ def page_request(self, url, params) -> requests.Response:
+
+ logger.debug("Fetching URL %s with params %s", url, params)
+
+ response = self.session.get(url, params=params)
+
+ if response.status_code != 200:
+ logger.warning(
+ "Unexpected HTTP status code %s on %s: %s",
+ response.status_code,
+ response.url,
+ response.content,
+ )
+ response.raise_for_status()
+
+ return response
+
+ @classmethod
+ def results_simplified(cls, body: Dict[str, GogsListerPage]) -> GogsListerPage:
+ fields_filter = ["id", "clone_url", "updated_at"]
+ return [{k: r[k] for k in fields_filter} for r in body["data"]]
+
+ def get_pages(self) -> Iterator[GogsListerPage]:
+ # base with trailing slash, path without leading slash for urljoin
+ url = urljoin(self.url, self.REPO_LIST_PATH)
+ response = self.page_request(url, self.query_params)
+
+ while True:
+ page_results = self.results_simplified(response.json())
+
+ yield page_results
+
+ assert len(response.links) > 0, "API changed: no Link header found"
+ if "next" in response.links:
+ url = response.links["next"]["url"]
+ else:
+ break
+
+ response = self.page_request(url, {})
+
+ def get_origins_from_page(self, page: GogsListerPage) -> Iterator[ListedOrigin]:
+ """Convert a page of Gogs repositories into a list of ListedOrigins"""
+ assert self.lister_obj.id is not None
+
+ for repo in page:
+ last_update = iso8601.parse_date(repo["updated_at"])
+
+ yield ListedOrigin(
+ lister_id=self.lister_obj.id,
+ visit_type=self.VISIT_TYPE,
+ url=repo["clone_url"],
+ last_update=last_update,
+ )
diff --git a/swh/lister/gogs/tasks.py b/swh/lister/gogs/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/gogs/tasks.py
@@ -0,0 +1,28 @@
+# Copyright (C) 2022 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from typing import Dict, Optional
+
+from celery import shared_task
+
+from .lister import GogsLister
+
+
+@shared_task(name=__name__ + ".FullGogsRelister")
+def list_gogs_full(
+ url: str,
+ instance: Optional[str] = None,
+ api_token: Optional[str] = None,
+ page_size: Optional[int] = None,
+) -> Dict[str, int]:
+ """Full update of a Gogs instance"""
+ lister = GogsLister.from_configfile(
+ url=url, instance=instance, api_token=api_token, page_size=page_size
+ )
+ return lister.run().dict()
+
+
+@shared_task(name=__name__ + ".ping")
+def _ping() -> str:
+ return "OK"
diff --git a/swh/lister/gogs/tests/__init__.py b/swh/lister/gogs/tests/__init__.py
new file mode 100644
diff --git a/swh/lister/gogs/tests/data/https_try.gogs.io/repos_page1 b/swh/lister/gogs/tests/data/https_try.gogs.io/repos_page1
new file mode 100644
--- /dev/null
+++ b/swh/lister/gogs/tests/data/https_try.gogs.io/repos_page1
@@ -0,0 +1,98 @@
+{
+ "data": [
+ {
+ "id": 190,
+ "owner": {
+ "id": 338,
+ "username": "carwyn",
+ "login": "carwyn",
+ "full_name": "",
+ "email": "carwyn@carwyn.com",
+ "avatar_url": "https://secure.gravatar.com/avatar/65a98c538bcc360e9e9739d2af7908b0?d=identicon"
+ },
+ "name": "test-repo",
+ "full_name": "carwyn/test-repo",
+ "description": "An example.",
+ "private": false,
+ "fork": false,
+ "parent": null,
+ "empty": false,
+ "mirror": false,
+ "size": 1024,
+ "html_url": "https://try.gogs.io/carwyn/test-repo",
+ "ssh_url": "git@try.gogs.io:carwyn/test-repo.git",
+ "clone_url": "https://try.gogs.io/carwyn/test-repo.git",
+ "website": "",
+ "stars_count": 0,
+ "forks_count": 0,
+ "watchers_count": 1,
+ "open_issues_count": 0,
+ "default_branch": "master",
+ "created_at": "2015-02-17T21:11:54Z",
+ "updated_at": "2022-03-26T07:28:38Z"
+ },
+ {
+ "id": 258,
+ "owner": {
+ "id": 462,
+ "username": "juquinha",
+ "login": "juquinha",
+ "full_name": "",
+ "email": "juquinha123@mailinator.com",
+ "avatar_url": "https://secure.gravatar.com/avatar/40cdc8c32069ac441ff7f5c9bfe0f9ef?d=identicon"
+ },
+ "name": "zicarepo",
+ "full_name": "juquinha/zicarepo",
+ "description": "Foo test.",
+ "private": false,
+ "fork": false,
+ "parent": null,
+ "empty": false,
+ "mirror": false,
+ "size": 8192,
+ "html_url": "https://try.gogs.io/juquinha/zicarepo",
+ "ssh_url": "git@try.gogs.io:juquinha/zicarepo.git",
+ "clone_url": "https://try.gogs.io/juquinha/zicarepo.git",
+ "website": "",
+ "stars_count": 0,
+ "forks_count": 0,
+ "watchers_count": 1,
+ "open_issues_count": 1,
+ "default_branch": "master",
+ "created_at": "2015-02-24T12:13:57Z",
+ "updated_at": "2022-03-26T07:28:38Z"
+ },
+ {
+ "id": 334,
+ "owner": {
+ "id": 582,
+ "username": "ivilata",
+ "login": "ivilata",
+ "full_name": "",
+ "email": "ivan@pangea.org",
+ "avatar_url": "https://secure.gravatar.com/avatar/ed21e55837a9080c57181f624aefa905?d=identicon"
+ },
+ "name": "footest",
+ "full_name": "ivilata/footest",
+ "description": "Dummy repo for testing issue handling mainly.",
+ "private": false,
+ "fork": false,
+ "parent": null,
+ "empty": false,
+ "mirror": false,
+ "size": 3072,
+ "html_url": "https://try.gogs.io/ivilata/footest",
+ "ssh_url": "git@try.gogs.io:ivilata/footest.git",
+ "clone_url": "https://try.gogs.io/ivilata/footest.git",
+ "website": "",
+ "stars_count": 0,
+ "forks_count": 0,
+ "watchers_count": 1,
+ "open_issues_count": 1,
+ "default_branch": "master",
+ "created_at": "2015-03-03T17:03:45Z",
+ "updated_at": "2022-03-26T07:28:38Z"
+ }
+ ],
+ "ok": true
+}
diff --git a/swh/lister/gogs/tests/data/https_try.gogs.io/repos_page2 b/swh/lister/gogs/tests/data/https_try.gogs.io/repos_page2
new file mode 100644
--- /dev/null
+++ b/swh/lister/gogs/tests/data/https_try.gogs.io/repos_page2
@@ -0,0 +1,98 @@
+{
+ "data": [
+ {
+ "id": 337,
+ "owner": {
+ "id": 585,
+ "username": "zork",
+ "login": "zork",
+ "full_name": "",
+ "email": "f905334@trbvm.com",
+ "avatar_url": "https://secure.gravatar.com/avatar/ebcb8e171a1a47fde8ded46b2618f135?d=identicon"
+ },
+ "name": "zork-repo",
+ "full_name": "zork/zork-repo",
+ "description": "This is a test thing.",
+ "private": false,
+ "fork": false,
+ "parent": null,
+ "empty": false,
+ "mirror": false,
+ "size": 13312,
+ "html_url": "https://try.gogs.io/zork/zork-repo",
+ "ssh_url": "git@try.gogs.io:zork/zork-repo.git",
+ "clone_url": "https://try.gogs.io/zork/zork-repo.git",
+ "website": "",
+ "stars_count": 0,
+ "forks_count": 0,
+ "watchers_count": 1,
+ "open_issues_count": 0,
+ "default_branch": "master",
+ "created_at": "2015-03-03T22:31:53Z",
+ "updated_at": "2022-03-26T07:28:38Z"
+ },
+ {
+ "id": 338,
+ "owner": {
+ "id": 585,
+ "username": "zork",
+ "login": "zork",
+ "full_name": "",
+ "email": "f905334@trbvm.com",
+ "avatar_url": "https://secure.gravatar.com/avatar/ebcb8e171a1a47fde8ded46b2618f135?d=identicon"
+ },
+ "name": "supernova",
+ "full_name": "zork/supernova",
+ "description": "This is a description. Blah blah blah.",
+ "private": false,
+ "fork": false,
+ "parent": null,
+ "empty": false,
+ "mirror": false,
+ "size": 1471488,
+ "html_url": "https://try.gogs.io/zork/supernova",
+ "ssh_url": "git@try.gogs.io:zork/supernova.git",
+ "clone_url": "https://try.gogs.io/zork/supernova.git",
+ "website": "",
+ "stars_count": 0,
+ "forks_count": 0,
+ "watchers_count": 1,
+ "open_issues_count": 0,
+ "default_branch": "master",
+ "created_at": "2015-03-03T22:44:20Z",
+ "updated_at": "2022-03-26T07:28:38Z"
+ },
+ {
+ "id": 339,
+ "owner": {
+ "id": 585,
+ "username": "zork",
+ "login": "zork",
+ "full_name": "",
+ "email": "f905334@trbvm.com",
+ "avatar_url": "https://secure.gravatar.com/avatar/ebcb8e171a1a47fde8ded46b2618f135?d=identicon"
+ },
+ "name": "digits",
+ "full_name": "zork/digits",
+ "description": "Distantly related to the game Mastermind, you are given clues to help determine a random number combination. The object of the game is to guess the solution in as few tries as possible.",
+ "private": false,
+ "fork": false,
+ "parent": null,
+ "empty": false,
+ "mirror": false,
+ "size": 18432,
+ "html_url": "https://try.gogs.io/zork/digits",
+ "ssh_url": "git@try.gogs.io:zork/digits.git",
+ "clone_url": "https://try.gogs.io/zork/digits.git",
+ "website": "",
+ "stars_count": 0,
+ "forks_count": 1,
+ "watchers_count": 1,
+ "open_issues_count": 0,
+ "default_branch": "master",
+ "created_at": "2015-03-03T22:47:56Z",
+ "updated_at": "2022-03-26T07:28:38Z"
+ }
+ ],
+ "ok": true
+}
diff --git a/swh/lister/gogs/tests/test_lister.py b/swh/lister/gogs/tests/test_lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/gogs/tests/test_lister.py
@@ -0,0 +1,163 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import json
+from pathlib import Path
+from typing import List
+from unittest.mock import Mock
+
+import pytest
+from requests import HTTPError
+
+from swh.lister.gogs.lister import GogsLister
+from swh.scheduler.model import ListedOrigin
+
+TRY_GOGS_URL = "https://try.gogs.io/api/v1/"
+
+
+def try_gogs_page(n: int):
+ return TRY_GOGS_URL + f"repos/search?page={n}&limit=3"
+
+
+@pytest.fixture
+def trygogs_p1(datadir):
+ text = Path(datadir, "https_try.gogs.io", "repos_page1").read_text()
+ headers = {
+ "Link": '<{p2}>; rel="next",<{p2}>; rel="last"'.format(p2=try_gogs_page(2))
+ }
+ page_result = GogsLister.results_simplified(json.loads(text))
+ origin_urls = [r["clone_url"] for r in page_result]
+ return text, headers, page_result, origin_urls
+
+
+@pytest.fixture
+def trygogs_p2(datadir):
+ text = Path(datadir, "https_try.gogs.io", "repos_page2").read_text()
+ headers = {
+ "Link": '<{p1}>; rel="prev",<{p1}>; rel="first"'.format(p1=try_gogs_page(1))
+ }
+ page_result = GogsLister.results_simplified(json.loads(text))
+ origin_urls = [r["clone_url"] for r in page_result]
+ return text, headers, page_result, origin_urls
+
+
+@pytest.fixture
+def trygogs_empty_page():
+ origins_urls = []
+ page_result = {"data": [], "ok": True}
+ headers = {
+ "Link": '<{p1}>; rel="prev",<{p1}>; rel="first"'.format(p1=try_gogs_page(1))
+ }
+ text = json.dumps(page_result)
+ return text, headers, page_result, origins_urls
+
+
+def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedOrigin]):
+ """Asserts that the two collections have the same origin URLs.
+
+ Does not test last_update."""
+
+ sorted_lister_urls = list(sorted(lister_urls))
+ sorted_scheduler_origins = list(sorted(scheduler_origins))
+
+ assert len(sorted_lister_urls) == len(sorted_scheduler_origins)
+
+ for l_url, s_origin in zip(sorted_lister_urls, sorted_scheduler_origins):
+ assert l_url == s_origin.url
+
+
+def test_gogs_full_listing(
+ swh_scheduler, requests_mock, mocker, trygogs_p1, trygogs_p2, trygogs_empty_page
+):
+ kwargs = dict(
+ url=TRY_GOGS_URL, instance="try_gogs", page_size=3, api_token="secret"
+ )
+ lister = GogsLister(scheduler=swh_scheduler, **kwargs)
+
+ lister.get_origins_from_page: Mock = mocker.spy(lister, "get_origins_from_page")
+
+ p1_text, p1_headers, p1_result, p1_origin_urls = trygogs_p1
+ p2_text, p2_headers, p2_result, p2_origin_urls = trygogs_p2
+ p3_text, p3_headers, _, _ = trygogs_empty_page
+
+ requests_mock.get(try_gogs_page(1), text=p1_text, headers=p1_headers)
+ requests_mock.get(try_gogs_page(2), text=p2_text, headers=p2_headers)
+ requests_mock.get(try_gogs_page(3), text=p3_text, headers=p3_headers)
+
+ stats = lister.run()
+
+ assert stats.pages == 2
+ assert stats.origins == 6
+
+ calls = [mocker.call(p1_result), mocker.call(p2_result)]
+ lister.get_origins_from_page.assert_has_calls(calls)
+
+ scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+
+ check_listed_origins(p1_origin_urls + p2_origin_urls, scheduler_origins)
+
+ assert lister.get_state_from_scheduler() is None
+
+
+def test_gogs_auth_instance(
+ swh_scheduler, requests_mock, trygogs_p1, trygogs_empty_page
+):
+ """Covers token authentication, token from credentials,
+ instance inference from URL."""
+
+ api_token = "secret"
+ instance = "try.gogs.io"
+ creds = {"gogs": {instance: [{"username": "u", "password": api_token}]}}
+
+ kwargs1 = dict(url=TRY_GOGS_URL, api_token=api_token, instance=instance)
+ lister = GogsLister(scheduler=swh_scheduler, **kwargs1)
+
+ # test API token
+ assert "Authorization" in lister.session.headers
+ assert lister.session.headers["Authorization"].lower() == "token %s" % api_token
+
+ with pytest.raises(ValueError, match="No credentials or API token provided"):
+ kwargs2 = dict(url=TRY_GOGS_URL, instance=instance)
+ GogsLister(scheduler=swh_scheduler, **kwargs2)
+
+ kwargs3 = dict(url=TRY_GOGS_URL, credentials=creds, instance=instance, page_size=3)
+ lister = GogsLister(scheduler=swh_scheduler, **kwargs3)
+
+ # test API token from credentials
+ assert "Authorization" in lister.session.headers
+ assert lister.session.headers["Authorization"].lower() == "token %s" % api_token
+
+ # test instance inference from URL
+ assert lister.instance
+ assert "gogs" in lister.instance
+
+ # setup requests mocking
+ p1_text, p1_headers, _, _ = trygogs_p1
+ p2_text, p2_headers, _, _ = trygogs_empty_page
+
+ base_url = TRY_GOGS_URL + lister.REPO_LIST_PATH
+ requests_mock.get(base_url, text=p1_text, headers=p1_headers)
+ requests_mock.get(try_gogs_page(2), text=p2_text, headers=p2_headers)
+ # now check the lister runs without error
+ stats = lister.run()
+
+ assert stats.pages == 2
+ assert stats.origins == 3
+
+
+@pytest.mark.parametrize("http_code", [400, 500, 502])
+def test_gogs_list_http_error(swh_scheduler, requests_mock, http_code):
+ """Test handling of some HTTP errors commonly encountered"""
+
+ lister = GogsLister(scheduler=swh_scheduler, url=TRY_GOGS_URL, api_token="secret")
+
+ base_url = TRY_GOGS_URL + lister.REPO_LIST_PATH
+ requests_mock.get(base_url, status_code=http_code)
+
+ with pytest.raises(HTTPError):
+ lister.run()
+
+ scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+ assert len(scheduler_origins) == 0
diff --git a/swh/lister/gogs/tests/test_tasks.py b/swh/lister/gogs/tests/test_tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/gogs/tests/test_tasks.py
@@ -0,0 +1,61 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from unittest.mock import patch
+
+from swh.lister.pattern import ListerStats
+
+
+def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
+ res = swh_scheduler_celery_app.send_task("swh.lister.gogs.tasks.ping")
+ assert res
+ res.wait()
+ assert res.successful()
+ assert res.result == "OK"
+
+
+@patch("swh.lister.gogs.tasks.GogsLister")
+def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
+ lister.from_configfile.return_value = lister
+ lister.run.return_value = ListerStats(pages=10, origins=500)
+
+ kwargs = dict(url="https://try.gogs.io/api/v1/")
+ res = swh_scheduler_celery_app.send_task(
+ "swh.lister.gogs.tasks.FullGogsRelister",
+ kwargs=kwargs,
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+
+ actual_kwargs = dict(**kwargs, instance=None, api_token=None, page_size=None)
+
+ lister.from_configfile.assert_called_once_with(**actual_kwargs)
+ lister.run.assert_called_once_with()
+
+
+@patch("swh.lister.gogs.tasks.GogsLister")
+def test_full_listing_params(
+ lister, swh_scheduler_celery_app, swh_scheduler_celery_worker
+):
+ lister.from_configfile.return_value = lister
+ lister.run.return_value = ListerStats(pages=10, origins=500)
+
+ kwargs = dict(
+ url="https://gogs-host.com/api/v1/",
+ instance="foo",
+ api_token="test",
+ page_size=50,
+ )
+ res = swh_scheduler_celery_app.send_task(
+ "swh.lister.gogs.tasks.FullGogsRelister",
+ kwargs=kwargs,
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+
+ lister.from_configfile.assert_called_once_with(**kwargs)
+ lister.run.assert_called_once_with()
diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py
--- a/swh/lister/tests/test_cli.py
+++ b/swh/lister/tests/test_cli.py
@@ -31,6 +31,10 @@
"url": "https://repo1.maven.org/maven2/",
"index_url": "http://indexes/export.fld",
},
+ "gogs": {
+ "url": "https://try.gogs.io/",
+ "api_token": "secret",
+ },
}

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 30, 5:10 PM (3 h, 12 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225299

Event Timeline