Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/gitea/tests/test_lister.py
# Copyright (C) 2017-2020 The Software Heritage developers | # Copyright (C) 2017-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import logging | import json | ||||
import re | from pathlib import Path | ||||
import unittest | from typing import Dict, List, Tuple | ||||
from swh.lister.core.tests.test_lister import HttpListerTesterBase | import pytest | ||||
from swh.lister.gitea.lister import GiteaLister | import requests | ||||
logger = logging.getLogger(__name__) | from swh.lister.gitea.lister import GiteaLister, RepoListPage | ||||
from swh.scheduler.model import ListedOrigin | |||||
class GiteaListerTester(HttpListerTesterBase, unittest.TestCase): | |||||
Lister = GiteaLister | @pytest.fixture | ||||
test_re = re.compile(r"^.*/projects.*page=(\d+).*") | def trygitea_p1(datadir) -> Tuple[str, str, Dict[str, str], RepoListPage, List[str]]: | ||||
lister_subdir = "gitea" | url = "https://try.gitea.io/api/v1/repos/search?sort=id&order=asc&limit=3&page=1" | ||||
good_api_response_file = "data/https_try.gitea.io/api_response.json" | text = Path( | ||||
bad_api_response_file = "data/https_try.gitea.io/api_empty_response.json" | datadir, | ||||
first_index = 1 | "https_try.gitea.io", | ||||
last_index = 2 | "api_v1_repos_search,sort=id,order=asc,limit=3,page=1", | ||||
entries_per_page = 3 | ).read_text() | ||||
convert_type = int | headers: Dict[str, str] = { | ||||
"Link": '<https://try.gitea.io/api/v1/repos/search?limit=3&order=asc&page=2&sort=id>; rel="next",' # noqa | |||||
def response_headers(self, request): | '<https://try.gitea.io/api/v1/repos/search?limit=3&order=asc&page=2282&sort=id>; rel="last"' # noqa | ||||
headers = {} | |||||
if self.request_index(request) == self.first_index: | |||||
headers.update( | |||||
{ | |||||
"Link": "<https://try.gitea.io/api/v1\ | |||||
/repos/search?&page=%s&sort=id>;" | |||||
' rel="next"' % self.last_index | |||||
} | } | ||||
page_result = GiteaLister.results_simplified(json.loads(text)) | |||||
origin_urls = [r["clone_url"] for r in page_result] | |||||
return url, text, headers, page_result, origin_urls | |||||
@pytest.fixture | |||||
def trygitea_p2(datadir) -> Tuple[str, str, Dict[str, str], RepoListPage, List[str]]: | |||||
url = "https://try.gitea.io/api/v1/repos/search?sort=id&order=asc&limit=3&page=2" | |||||
text = Path( | |||||
datadir, | |||||
"https_try.gitea.io", | |||||
"api_v1_repos_search,sort=id,order=asc,limit=3,page=2", | |||||
).read_text() | |||||
headers: Dict[str, str] = { | |||||
"Link": '<https://try.gitea.io/api/v1/repos/search?limit=3&order=asc&page=1&sort=id>; rel="prev",' # noqa | |||||
'<https://try.gitea.io/api/v1/repos/search?limit=3&order=asc&page=1&sort=id>; rel="first"' # noqa | |||||
} | |||||
page_result = GiteaLister.results_simplified(json.loads(text)) | |||||
origin_urls = [r["clone_url"] for r in page_result] | |||||
anlambert: There is a lot of duplicated hardcoded URLs in those fixture.
You should use a function based… | |||||
return url, text, headers, page_result, origin_urls | |||||
def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedOrigin]): | |||||
"""Asserts that the two collections have the same origin URLs. | |||||
Does not test last_update.""" | |||||
sorted_lister_urls = list(sorted(lister_urls)) | |||||
sorted_scheduler_origins = list(sorted(scheduler_origins)) | |||||
assert len(sorted_lister_urls) == len(sorted_scheduler_origins) | |||||
for l_url, s_origin in zip(sorted_lister_urls, sorted_scheduler_origins): | |||||
assert l_url == s_origin.url | |||||
def test_gitea_full_listing( | |||||
swh_scheduler, requests_mock, mocker, trygitea_p1, trygitea_p2 | |||||
): | |||||
"""Covers full listing of multiple pages, instance inference from URL, | |||||
rate-limit, token authentication, token from credentials, | |||||
page size (required for test), checking page results and listed origins, | |||||
statelessness.""" | |||||
baseurl = "https://try.gitea.io/api/v1/" | |||||
p1_url, p1_text, p1_headers, p1_result, p1_origin_urls = trygitea_p1 | |||||
p2_url, p2_text, p2_headers, p2_result, p2_origin_urls = trygitea_p2 | |||||
requests_mock.get(p1_url, text=p1_text, headers=p1_headers) | |||||
requests_mock.get( | |||||
p2_url, | |||||
[ | |||||
{"status_code": requests.codes.too_many_requests}, | |||||
{"text": p2_text, "headers": p2_headers}, | |||||
], | |||||
) | |||||
instance = "try.gitea.io" | |||||
api_token = "p" | |||||
creds = {"gitea": {instance: [{"username": "u", "password": api_token}]}} | |||||
kwargs = dict(url=baseurl, page_size=3, credentials=creds) | |||||
lister = GiteaLister(scheduler=swh_scheduler, **kwargs) | |||||
assert lister.instance == instance | |||||
assert ( | |||||
"Authorization" in lister.session.headers | |||||
and lister.session.headers["Authorization"].lower() == "token %s" % api_token | |||||
) | ) | ||||
return headers | lister.get_origins_from_page = mocker.spy(lister, "get_origins_from_page") | ||||
# end test setup | |||||
stats = lister.run() | |||||
# start test checks | |||||
assert stats.pages == 2 | |||||
assert stats.origins == 6 | |||||
calls = [mocker.call(p1_result), mocker.call(p2_result)] | |||||
lister.get_origins_from_page.assert_has_calls(calls) | |||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).origins | |||||
assert lister.get_state_from_scheduler() is None | |||||
check_listed_origins(p1_origin_urls + p2_origin_urls, scheduler_origins) | |||||
@pytest.mark.parametrize("http_code", [400, 500, 502]) | |||||
def test_gitea_list_http_error(swh_scheduler, requests_mock, trygitea_p1, http_code): | |||||
"""Test handling of some HTTP errors commonly encountered""" | |||||
baseurl = "https://try.gitea.io/api/v1/" | |||||
p1_url, _, _, _, _ = trygitea_p1 | |||||
Done Inline Actionsyou can remove the trygitea_p1 fixture use and use baseurl + lister.REPO_LIST_PATH as requests_mock.get URL. anlambert: you can remove the `trygitea_p1` fixture use and use `baseurl + lister.REPO_LIST_PATH` as… | |||||
requests_mock.get(p1_url, status_code=http_code) | |||||
lister = GiteaLister(scheduler=swh_scheduler, url=baseurl, page_size=3) | |||||
def test_lister_gitea(lister_gitea, requests_mock_datadir): | with pytest.raises(requests.HTTPError): | ||||
lister_gitea.run() | lister.run() | ||||
r = lister_gitea.scheduler.search_tasks(task_type="load-git") | |||||
assert len(r) == 3 | |||||
for row in r: | |||||
assert row["type"] == "load-git" | |||||
# arguments check | |||||
args = row["arguments"]["args"] | |||||
assert len(args) == 0 | |||||
# kwargs | |||||
kwargs = row["arguments"]["kwargs"] | |||||
url = kwargs["url"] | |||||
assert url.startswith("https://try.gitea.io") | |||||
assert row["policy"] == "recurring" | scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).origins | ||||
assert row["priority"] is None | assert len(scheduler_origins) == 0 |
There is a lot of duplicated hardcoded URLs in those fixture.
You should use a function based on string templating or formating to improve readability.
Also you can simplify the name of JSON data files, something like data/gitea_repos_pageX.json.
Previous naming was only required when using the requests_mock_datadir fixture.