diff --git a/CONTRIBUTORS b/CONTRIBUTORS --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -7,3 +7,4 @@ Boris Baldassari Léo Andrès Franck Bret +Kumar Shivendu diff --git a/README.md b/README.md --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ - `swh.lister.phabricator` - `swh.lister.pypi` - `swh.lister.tuleap` +- `swh.lister.gogs` Dependencies ------------ diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -74,6 +74,7 @@ lister.sourceforge=swh.lister.sourceforge:register lister.tuleap=swh.lister.tuleap:register lister.maven=swh.lister.maven:register + lister.gogs=swh.lister.gogs:register """, classifiers=[ "Programming Language :: Python :: 3", diff --git a/swh/lister/gogs/__init__.py b/swh/lister/gogs/__init__.py new file mode 100644 --- /dev/null +++ b/swh/lister/gogs/__init__.py @@ -0,0 +1,13 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +def register(): + from .lister import GogsLister + + return { + "lister": GogsLister, + "task_modules": [f"{__name__}.tasks"], + } diff --git a/swh/lister/gogs/lister.py b/swh/lister/gogs/lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/gogs/lister.py @@ -0,0 +1,144 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import logging +import random +from typing import Any, Dict, Iterator, List, Optional +from urllib.parse import urljoin + +import iso8601 +import requests +from tenacity.before_sleep import before_sleep_log + +from swh.lister.utils import throttling_retry +from swh.scheduler.interface import SchedulerInterface +from swh.scheduler.model import ListedOrigin + +from .. import USER_AGENT +from ..pattern import CredentialsType, StatelessLister + +logger = logging.getLogger(__name__) + +# Aliasing page results returned by `GogsLister.get_pages` method +GogsListerPage = List[Dict[str, Any]] + + +class GogsLister(StatelessLister[GogsListerPage]): + + """List origins from the Gogs + + Gogs API documentation: https://github.com/gogs/docs-api + + The API is protected behind authentication so credentials/API tokens + are mandatory. It supports pagination and provides next page URL + through the 'next' value of the 'Link' header. The default value for + page size ('limit') is 10 but the maximum allowed value is 50. + """ + + LISTER_NAME = "gogs" + + VISIT_TYPE = "git" + + REPO_LIST_PATH = "repos/search" + + def __init__( + self, + scheduler: SchedulerInterface, + url: str, + instance: Optional[str] = None, + api_token: Optional[str] = None, + page_size: int = 50, + credentials: CredentialsType = None, + ): + super().__init__( + scheduler=scheduler, + credentials=credentials, + url=url, + instance=instance, + ) + + self.query_params = { + "limit": page_size, + "page": 1, + } + + self.api_token = api_token + if self.api_token is None: + + if len(self.credentials) > 0: + cred = random.choice(self.credentials) + username = cred.get("username") + self.api_token = cred["password"] + logger.warning( + "Using authentication credentials from user %s", username or "???" + ) + else: + raise ValueError("No credentials or API token provided") + + self.max_page_limit = 2 + + self.session = requests.Session() + self.session.headers.update( + { + "Accept": "application/json", + "User-Agent": USER_AGENT, + "Authorization": f"token {self.api_token}", + } + ) + + @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING)) + def page_request(self, url, params) -> requests.Response: + + logger.debug("Fetching URL %s with params %s", url, params) + + response = self.session.get(url, params=params) + + if response.status_code != 200: + logger.warning( + "Unexpected HTTP status code %s on %s: %s", + response.status_code, + response.url, + response.content, + ) + response.raise_for_status() + + return response + + @classmethod + def results_simplified(cls, body: Dict[str, GogsListerPage]) -> GogsListerPage: + fields_filter = ["id", "clone_url", "updated_at"] + return [{k: r[k] for k in fields_filter} for r in body["data"]] + + def get_pages(self) -> Iterator[GogsListerPage]: + # base with trailing slash, path without leading slash for urljoin + url = urljoin(self.url, self.REPO_LIST_PATH) + response = self.page_request(url, self.query_params) + + while True: + page_results = self.results_simplified(response.json()) + + yield page_results + + assert len(response.links) > 0, "API changed: no Link header found" + if "next" in response.links: + url = response.links["next"]["url"] + else: + break + + response = self.page_request(url, {}) + + def get_origins_from_page(self, page: GogsListerPage) -> Iterator[ListedOrigin]: + """Convert a page of Gogs repositories into a list of ListedOrigins""" + assert self.lister_obj.id is not None + + for repo in page: + last_update = iso8601.parse_date(repo["updated_at"]) + + yield ListedOrigin( + lister_id=self.lister_obj.id, + visit_type=self.VISIT_TYPE, + url=repo["clone_url"], + last_update=last_update, + ) diff --git a/swh/lister/gogs/tasks.py b/swh/lister/gogs/tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/gogs/tasks.py @@ -0,0 +1,28 @@ +# Copyright (C) 2022 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from typing import Dict, Optional + +from celery import shared_task + +from .lister import GogsLister + + +@shared_task(name=__name__ + ".FullGogsRelister") +def list_gogs_full( + url: str, + instance: Optional[str] = None, + api_token: Optional[str] = None, + page_size: Optional[int] = None, +) -> Dict[str, int]: + """Full update of a Gogs instance""" + lister = GogsLister.from_configfile( + url=url, instance=instance, api_token=api_token, page_size=page_size + ) + return lister.run().dict() + + +@shared_task(name=__name__ + ".ping") +def _ping() -> str: + return "OK" diff --git a/swh/lister/gogs/tests/__init__.py b/swh/lister/gogs/tests/__init__.py new file mode 100644 diff --git a/swh/lister/gogs/tests/data/https_try.gogs.io/repos_page1 b/swh/lister/gogs/tests/data/https_try.gogs.io/repos_page1 new file mode 100644 --- /dev/null +++ b/swh/lister/gogs/tests/data/https_try.gogs.io/repos_page1 @@ -0,0 +1,98 @@ +{ + "data": [ + { + "id": 190, + "owner": { + "id": 338, + "username": "carwyn", + "login": "carwyn", + "full_name": "", + "email": "carwyn@carwyn.com", + "avatar_url": "https://secure.gravatar.com/avatar/65a98c538bcc360e9e9739d2af7908b0?d=identicon" + }, + "name": "test-repo", + "full_name": "carwyn/test-repo", + "description": "An example.", + "private": false, + "fork": false, + "parent": null, + "empty": false, + "mirror": false, + "size": 1024, + "html_url": "https://try.gogs.io/carwyn/test-repo", + "ssh_url": "git@try.gogs.io:carwyn/test-repo.git", + "clone_url": "https://try.gogs.io/carwyn/test-repo.git", + "website": "", + "stars_count": 0, + "forks_count": 0, + "watchers_count": 1, + "open_issues_count": 0, + "default_branch": "master", + "created_at": "2015-02-17T21:11:54Z", + "updated_at": "2022-03-26T07:28:38Z" + }, + { + "id": 258, + "owner": { + "id": 462, + "username": "juquinha", + "login": "juquinha", + "full_name": "", + "email": "juquinha123@mailinator.com", + "avatar_url": "https://secure.gravatar.com/avatar/40cdc8c32069ac441ff7f5c9bfe0f9ef?d=identicon" + }, + "name": "zicarepo", + "full_name": "juquinha/zicarepo", + "description": "Foo test.", + "private": false, + "fork": false, + "parent": null, + "empty": false, + "mirror": false, + "size": 8192, + "html_url": "https://try.gogs.io/juquinha/zicarepo", + "ssh_url": "git@try.gogs.io:juquinha/zicarepo.git", + "clone_url": "https://try.gogs.io/juquinha/zicarepo.git", + "website": "", + "stars_count": 0, + "forks_count": 0, + "watchers_count": 1, + "open_issues_count": 1, + "default_branch": "master", + "created_at": "2015-02-24T12:13:57Z", + "updated_at": "2022-03-26T07:28:38Z" + }, + { + "id": 334, + "owner": { + "id": 582, + "username": "ivilata", + "login": "ivilata", + "full_name": "", + "email": "ivan@pangea.org", + "avatar_url": "https://secure.gravatar.com/avatar/ed21e55837a9080c57181f624aefa905?d=identicon" + }, + "name": "footest", + "full_name": "ivilata/footest", + "description": "Dummy repo for testing issue handling mainly.", + "private": false, + "fork": false, + "parent": null, + "empty": false, + "mirror": false, + "size": 3072, + "html_url": "https://try.gogs.io/ivilata/footest", + "ssh_url": "git@try.gogs.io:ivilata/footest.git", + "clone_url": "https://try.gogs.io/ivilata/footest.git", + "website": "", + "stars_count": 0, + "forks_count": 0, + "watchers_count": 1, + "open_issues_count": 1, + "default_branch": "master", + "created_at": "2015-03-03T17:03:45Z", + "updated_at": "2022-03-26T07:28:38Z" + } + ], + "ok": true +} diff --git a/swh/lister/gogs/tests/data/https_try.gogs.io/repos_page2 b/swh/lister/gogs/tests/data/https_try.gogs.io/repos_page2 new file mode 100644 --- /dev/null +++ b/swh/lister/gogs/tests/data/https_try.gogs.io/repos_page2 @@ -0,0 +1,98 @@ +{ + "data": [ + { + "id": 337, + "owner": { + "id": 585, + "username": "zork", + "login": "zork", + "full_name": "", + "email": "f905334@trbvm.com", + "avatar_url": "https://secure.gravatar.com/avatar/ebcb8e171a1a47fde8ded46b2618f135?d=identicon" + }, + "name": "zork-repo", + "full_name": "zork/zork-repo", + "description": "This is a test thing.", + "private": false, + "fork": false, + "parent": null, + "empty": false, + "mirror": false, + "size": 13312, + "html_url": "https://try.gogs.io/zork/zork-repo", + "ssh_url": "git@try.gogs.io:zork/zork-repo.git", + "clone_url": "https://try.gogs.io/zork/zork-repo.git", + "website": "", + "stars_count": 0, + "forks_count": 0, + "watchers_count": 1, + "open_issues_count": 0, + "default_branch": "master", + "created_at": "2015-03-03T22:31:53Z", + "updated_at": "2022-03-26T07:28:38Z" + }, + { + "id": 338, + "owner": { + "id": 585, + "username": "zork", + "login": "zork", + "full_name": "", + "email": "f905334@trbvm.com", + "avatar_url": "https://secure.gravatar.com/avatar/ebcb8e171a1a47fde8ded46b2618f135?d=identicon" + }, + "name": "supernova", + "full_name": "zork/supernova", + "description": "This is a description. Blah blah blah.", + "private": false, + "fork": false, + "parent": null, + "empty": false, + "mirror": false, + "size": 1471488, + "html_url": "https://try.gogs.io/zork/supernova", + "ssh_url": "git@try.gogs.io:zork/supernova.git", + "clone_url": "https://try.gogs.io/zork/supernova.git", + "website": "", + "stars_count": 0, + "forks_count": 0, + "watchers_count": 1, + "open_issues_count": 0, + "default_branch": "master", + "created_at": "2015-03-03T22:44:20Z", + "updated_at": "2022-03-26T07:28:38Z" + }, + { + "id": 339, + "owner": { + "id": 585, + "username": "zork", + "login": "zork", + "full_name": "", + "email": "f905334@trbvm.com", + "avatar_url": "https://secure.gravatar.com/avatar/ebcb8e171a1a47fde8ded46b2618f135?d=identicon" + }, + "name": "digits", + "full_name": "zork/digits", + "description": "Distantly related to the game Mastermind, you are given clues to help determine a random number combination. The object of the game is to guess the solution in as few tries as possible.", + "private": false, + "fork": false, + "parent": null, + "empty": false, + "mirror": false, + "size": 18432, + "html_url": "https://try.gogs.io/zork/digits", + "ssh_url": "git@try.gogs.io:zork/digits.git", + "clone_url": "https://try.gogs.io/zork/digits.git", + "website": "", + "stars_count": 0, + "forks_count": 1, + "watchers_count": 1, + "open_issues_count": 0, + "default_branch": "master", + "created_at": "2015-03-03T22:47:56Z", + "updated_at": "2022-03-26T07:28:38Z" + } + ], + "ok": true +} diff --git a/swh/lister/gogs/tests/test_lister.py b/swh/lister/gogs/tests/test_lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/gogs/tests/test_lister.py @@ -0,0 +1,163 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import json +from pathlib import Path +from typing import List +from unittest.mock import Mock + +import pytest +from requests import HTTPError + +from swh.lister.gogs.lister import GogsLister +from swh.scheduler.model import ListedOrigin + +TRY_GOGS_URL = "https://try.gogs.io/api/v1/" + + +def try_gogs_page(n: int): + return TRY_GOGS_URL + f"repos/search?page={n}&limit=3" + + +@pytest.fixture +def trygogs_p1(datadir): + text = Path(datadir, "https_try.gogs.io", "repos_page1").read_text() + headers = { + "Link": '<{p2}>; rel="next",<{p2}>; rel="last"'.format(p2=try_gogs_page(2)) + } + page_result = GogsLister.results_simplified(json.loads(text)) + origin_urls = [r["clone_url"] for r in page_result] + return text, headers, page_result, origin_urls + + +@pytest.fixture +def trygogs_p2(datadir): + text = Path(datadir, "https_try.gogs.io", "repos_page2").read_text() + headers = { + "Link": '<{p1}>; rel="prev",<{p1}>; rel="first"'.format(p1=try_gogs_page(1)) + } + page_result = GogsLister.results_simplified(json.loads(text)) + origin_urls = [r["clone_url"] for r in page_result] + return text, headers, page_result, origin_urls + + +@pytest.fixture +def trygogs_empty_page(): + origins_urls = [] + page_result = {"data": [], "ok": True} + headers = { + "Link": '<{p1}>; rel="prev",<{p1}>; rel="first"'.format(p1=try_gogs_page(1)) + } + text = json.dumps(page_result) + return text, headers, page_result, origins_urls + + +def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedOrigin]): + """Asserts that the two collections have the same origin URLs. + + Does not test last_update.""" + + sorted_lister_urls = list(sorted(lister_urls)) + sorted_scheduler_origins = list(sorted(scheduler_origins)) + + assert len(sorted_lister_urls) == len(sorted_scheduler_origins) + + for l_url, s_origin in zip(sorted_lister_urls, sorted_scheduler_origins): + assert l_url == s_origin.url + + +def test_gogs_full_listing( + swh_scheduler, requests_mock, mocker, trygogs_p1, trygogs_p2, trygogs_empty_page +): + kwargs = dict( + url=TRY_GOGS_URL, instance="try_gogs", page_size=3, api_token="secret" + ) + lister = GogsLister(scheduler=swh_scheduler, **kwargs) + + lister.get_origins_from_page: Mock = mocker.spy(lister, "get_origins_from_page") + + p1_text, p1_headers, p1_result, p1_origin_urls = trygogs_p1 + p2_text, p2_headers, p2_result, p2_origin_urls = trygogs_p2 + p3_text, p3_headers, _, _ = trygogs_empty_page + + requests_mock.get(try_gogs_page(1), text=p1_text, headers=p1_headers) + requests_mock.get(try_gogs_page(2), text=p2_text, headers=p2_headers) + requests_mock.get(try_gogs_page(3), text=p3_text, headers=p3_headers) + + stats = lister.run() + + assert stats.pages == 2 + assert stats.origins == 6 + + calls = [mocker.call(p1_result), mocker.call(p2_result)] + lister.get_origins_from_page.assert_has_calls(calls) + + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + + check_listed_origins(p1_origin_urls + p2_origin_urls, scheduler_origins) + + assert lister.get_state_from_scheduler() is None + + +def test_gogs_auth_instance( + swh_scheduler, requests_mock, trygogs_p1, trygogs_empty_page +): + """Covers token authentication, token from credentials, + instance inference from URL.""" + + api_token = "secret" + instance = "try.gogs.io" + creds = {"gogs": {instance: [{"username": "u", "password": api_token}]}} + + kwargs1 = dict(url=TRY_GOGS_URL, api_token=api_token, instance=instance) + lister = GogsLister(scheduler=swh_scheduler, **kwargs1) + + # test API token + assert "Authorization" in lister.session.headers + assert lister.session.headers["Authorization"].lower() == "token %s" % api_token + + with pytest.raises(ValueError, match="No credentials or API token provided"): + kwargs2 = dict(url=TRY_GOGS_URL, instance=instance) + GogsLister(scheduler=swh_scheduler, **kwargs2) + + kwargs3 = dict(url=TRY_GOGS_URL, credentials=creds, instance=instance, page_size=3) + lister = GogsLister(scheduler=swh_scheduler, **kwargs3) + + # test API token from credentials + assert "Authorization" in lister.session.headers + assert lister.session.headers["Authorization"].lower() == "token %s" % api_token + + # test instance inference from URL + assert lister.instance + assert "gogs" in lister.instance + + # setup requests mocking + p1_text, p1_headers, _, _ = trygogs_p1 + p2_text, p2_headers, _, _ = trygogs_empty_page + + base_url = TRY_GOGS_URL + lister.REPO_LIST_PATH + requests_mock.get(base_url, text=p1_text, headers=p1_headers) + requests_mock.get(try_gogs_page(2), text=p2_text, headers=p2_headers) + # now check the lister runs without error + stats = lister.run() + + assert stats.pages == 2 + assert stats.origins == 3 + + +@pytest.mark.parametrize("http_code", [400, 500, 502]) +def test_gogs_list_http_error(swh_scheduler, requests_mock, http_code): + """Test handling of some HTTP errors commonly encountered""" + + lister = GogsLister(scheduler=swh_scheduler, url=TRY_GOGS_URL, api_token="secret") + + base_url = TRY_GOGS_URL + lister.REPO_LIST_PATH + requests_mock.get(base_url, status_code=http_code) + + with pytest.raises(HTTPError): + lister.run() + + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + assert len(scheduler_origins) == 0 diff --git a/swh/lister/gogs/tests/test_tasks.py b/swh/lister/gogs/tests/test_tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/gogs/tests/test_tasks.py @@ -0,0 +1,61 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from unittest.mock import patch + +from swh.lister.pattern import ListerStats + + +def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker): + res = swh_scheduler_celery_app.send_task("swh.lister.gogs.tasks.ping") + assert res + res.wait() + assert res.successful() + assert res.result == "OK" + + +@patch("swh.lister.gogs.tasks.GogsLister") +def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker): + lister.from_configfile.return_value = lister + lister.run.return_value = ListerStats(pages=10, origins=500) + + kwargs = dict(url="https://try.gogs.io/api/v1/") + res = swh_scheduler_celery_app.send_task( + "swh.lister.gogs.tasks.FullGogsRelister", + kwargs=kwargs, + ) + assert res + res.wait() + assert res.successful() + + actual_kwargs = dict(**kwargs, instance=None, api_token=None, page_size=None) + + lister.from_configfile.assert_called_once_with(**actual_kwargs) + lister.run.assert_called_once_with() + + +@patch("swh.lister.gogs.tasks.GogsLister") +def test_full_listing_params( + lister, swh_scheduler_celery_app, swh_scheduler_celery_worker +): + lister.from_configfile.return_value = lister + lister.run.return_value = ListerStats(pages=10, origins=500) + + kwargs = dict( + url="https://gogs-host.com/api/v1/", + instance="foo", + api_token="test", + page_size=50, + ) + res = swh_scheduler_celery_app.send_task( + "swh.lister.gogs.tasks.FullGogsRelister", + kwargs=kwargs, + ) + assert res + res.wait() + assert res.successful() + + lister.from_configfile.assert_called_once_with(**kwargs) + lister.run.assert_called_once_with() diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py --- a/swh/lister/tests/test_cli.py +++ b/swh/lister/tests/test_cli.py @@ -31,6 +31,10 @@ "url": "https://repo1.maven.org/maven2/", "index_url": "http://indexes/export.fld", }, + "gogs": { + "url": "https://try.gogs.io/", + "api_token": "secret", + }, }