diff --git a/swh/lister/gitlab/tests/data/https_gite.lirmm.fr/api_response_page1.json b/swh/lister/gitlab/tests/data/https_gite.lirmm.fr/api_response_page1.json new file mode 100644 index 0000000..deee20f --- /dev/null +++ b/swh/lister/gitlab/tests/data/https_gite.lirmm.fr/api_response_page1.json @@ -0,0 +1,42 @@ +[ + { + "avatar_url": null, + "created_at": "2021-01-02T13:25:44.175Z", + "default_branch": "master", + "description": "Miscellaneous utils that are commonly used in multiple projects.", + "forks_count": 0, + "http_url_to_repo": "https://gite.lirmm.fr/yuquan/roboticsutils.git", + "id": 4456, + "last_activity_at": "2021-01-14T11:32:50.672Z", + "name": "RoboticsUtils", + "name_with_namespace": "Wang Yuquan / RoboticsUtils", + "namespace": {}, + "path": "roboticsutils", + "path_with_namespace": "yuquan/roboticsutils", + "readme_url": null, + "ssh_url_to_repo": "git@gite.lirmm.fr:yuquan/roboticsutils.git", + "star_count": 0, + "tag_list": [], + "web_url": "https://gite.lirmm.fr/yuquan/roboticsutils" + }, + { + "avatar_url": "https://gite.lirmm.fr/uploads/-/system/project/avatar/4444/pacq.jpg", + "created_at": "2020-12-15T15:20:15.494Z", + "default_branch": "master", + "description": "", + "forks_count": 0, + "http_url_to_repo": "https://gite.lirmm.fr/constraint-acquisition-team/pacq.git", + "id": 4444, + "last_activity_at": "2020-12-15T19:43:53.678Z", + "name": "PACQ", + "name_with_namespace": "Constraint Acquisition Team / PACQ", + "namespace": {}, + "path": "pacq", + "path_with_namespace": "constraint-acquisition-team/pacq", + "readme_url": "https://gite.lirmm.fr/constraint-acquisition-team/pacq/-/blob/master/README.md", + "ssh_url_to_repo": "git@gite.lirmm.fr:constraint-acquisition-team/pacq.git", + "star_count": 0, + "tag_list": [], + "web_url": "https://gite.lirmm.fr/constraint-acquisition-team/pacq" + } +] diff --git a/swh/lister/gitlab/tests/data/https_gite.lirmm.fr/api_response_page2.json b/swh/lister/gitlab/tests/data/https_gite.lirmm.fr/api_response_page2.json new file mode 100644 index 0000000..7ad4d0b --- /dev/null +++ b/swh/lister/gitlab/tests/data/https_gite.lirmm.fr/api_response_page2.json @@ -0,0 +1,42 @@ +[ + { + "avatar_url": null, + "created_at": "2020-12-15T09:20:11.133Z", + "default_branch": "master", + "description": "", + "forks_count": 0, + "http_url_to_repo": "https://gite.lirmm.fr/mgardeisen/citest.git", + "id": 4440, + "last_activity_at": "2021-01-21T14:37:31.022Z", + "name": "CItest", + "name_with_namespace": "Marine Gardeisen / CItest", + "namespace": {}, + "path": "citest", + "path_with_namespace": "mgardeisen/citest", + "readme_url": "https://gite.lirmm.fr/mgardeisen/citest/-/blob/master/README.md", + "ssh_url_to_repo": "git@gite.lirmm.fr:mgardeisen/citest.git", + "star_count": 0, + "tag_list": [], + "web_url": "https://gite.lirmm.fr/mgardeisen/citest" + }, + { + "avatar_url": null, + "created_at": "2020-12-11T09:54:02.710Z", + "default_branch": "master", + "description": "Can be used to enforce the conventional commits specification on a package, generate a changelog recommend the next version to release\r\nSee see https://conventionalcommits.org", + "forks_count": 0, + "http_url_to_repo": "https://gite.lirmm.fr/pid/environments/conventional_commits.git", + "id": 4428, + "last_activity_at": "2021-01-08T11:11:54.178Z", + "name": "conventional_commits", + "name_with_namespace": "pid / environments / conventional_commits", + "namespace": {}, + "path": "conventional_commits", + "path_with_namespace": "pid/environments/conventional_commits", + "readme_url": "https://gite.lirmm.fr/pid/environments/conventional_commits/-/blob/master/README.md", + "ssh_url_to_repo": "git@gite.lirmm.fr:pid/environments/conventional_commits.git", + "star_count": 0, + "tag_list": [], + "web_url": "https://gite.lirmm.fr/pid/environments/conventional_commits" + } +] diff --git a/swh/lister/gitlab/tests/data/https_gitlab.com/api_v4_projects,page=1,order_by=id,sort=asc,per_page=20 b/swh/lister/gitlab/tests/data/https_gitlab.com/api_response_page1.json similarity index 100% rename from swh/lister/gitlab/tests/data/https_gitlab.com/api_v4_projects,page=1,order_by=id,sort=asc,per_page=20 rename to swh/lister/gitlab/tests/data/https_gitlab.com/api_response_page1.json diff --git a/swh/lister/gitlab/tests/test_lister.py b/swh/lister/gitlab/tests/test_lister.py index a1d791a..9155291 100644 --- a/swh/lister/gitlab/tests/test_lister.py +++ b/swh/lister/gitlab/tests/test_lister.py @@ -1,74 +1,115 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import json import logging +from pathlib import Path +from typing import Dict, List import pytest +from swh.lister import USER_AGENT from swh.lister.gitlab.lister import GitLabLister, _parse_page_id from swh.lister.pattern import ListerStats logger = logging.getLogger(__name__) -@pytest.fixture -def lister_gitlab(swh_scheduler): - url = "https://gitlab.com/api/v4/" - return GitLabLister(swh_scheduler, url=url) +def api_url(instance: str) -> str: + return f"https://{instance}/api/v4/" -# class GitLabListerTester(HttpListerTesterBase, unittest.TestCase): -# Lister = GitLabLister -# test_re = re.compile(r"^.*/projects.*page=(\d+).*") -# lister_subdir = "gitlab" -# good_api_response_file = "data/gitlab.com/api_response.json" -# bad_api_response_file = "data/gitlab.com/api_empty_response.json" -# first_index = 1 -# entries_per_page = 10 -# convert_type = int +def url_page(api_url: str, page_id: int) -> str: + return f"{api_url}projects?page={page_id}&order_by=id&sort=asc&per_page=20" -# def response_headers(self, request): -# headers = {"RateLimit-Remaining": "1"} -# if self.request_index(request) == self.first_index: -# headers.update( -# {"x-next-page": "3",} -# ) -# return headers +def _match_request(request): + return request.headers.get("User-Agent") == USER_AGENT -# def mock_rate_quota(self, n, request, context): -# self.rate_limit += 1 -# context.status_code = 403 -# context.headers["RateLimit-Remaining"] = "0" -# one_second = int((datetime.now() + timedelta(seconds=1.5)).timestamp()) -# context.headers["RateLimit-Reset"] = str(one_second) -# return '{"error":"dummy"}' +def test_lister_gitlab(datadir, swh_scheduler, requests_mock): + """Gitlab lister supports full listing + + """ + instance = "gitlab.com" + url = api_url(instance) + + response = gitlab_page_response(datadir, instance, 1) + + requests_mock.get( + url_page(url, 1), [{"json": response}], additional_matcher=_match_request, + ) + + lister_gitlab = GitLabLister( + swh_scheduler, url=api_url(instance), instance=instance + ) -def test_lister_gitlab(lister_gitlab, requests_mock_datadir): listed_result = lister_gitlab.run() - assert listed_result == ListerStats(pages=1, origins=10) + expected_nb_origins = len(response) + assert listed_result == ListerStats(pages=1, origins=expected_nb_origins) scheduler_origins = lister_gitlab.scheduler.get_listed_origins( lister_gitlab.lister_obj.id ).origins - assert len(scheduler_origins) == 10 + assert len(scheduler_origins) == expected_nb_origins + + for listed_origin in scheduler_origins: + assert listed_origin.visit_type == "git" + assert listed_origin.url.startswith(f"https://{instance}") + + +def gitlab_page_response(datadir, instance: str, page_id: int) -> List[Dict]: + """Return list of repositories (out of test dataset)""" + datapath = Path(datadir, f"https_{instance}", f"api_response_page{page_id}.json") + return json.loads(datapath.read_text()) if datapath.exists else [] + + +def test_lister_gitlab_with_pages(swh_scheduler, requests_mock, datadir): + """Gitlab lister supports pagination + + """ + instance = "gite.lirmm.fr" + url = api_url(instance) + + response1 = gitlab_page_response(datadir, instance, 1) + response2 = gitlab_page_response(datadir, instance, 2) + + requests_mock.get( + url_page(url, 1), + [{"json": response1, "headers": {"Link": f"<{url_page(url, 2)}>; rel=next"}}], + additional_matcher=_match_request, + ) + + requests_mock.get( + url_page(url, 2), [{"json": response2}], additional_matcher=_match_request, + ) + + lister = GitLabLister(swh_scheduler, url=url) + listed_result = lister.run() + + expected_nb_origins = len(response1) + len(response2) + assert listed_result == ListerStats(pages=2, origins=expected_nb_origins) + + scheduler_origins = lister.scheduler.get_listed_origins( + lister.lister_obj.id + ).origins + assert len(scheduler_origins) == expected_nb_origins for listed_origin in scheduler_origins: assert listed_origin.visit_type == "git" - assert listed_origin.url.startswith("https://gitlab.com") + assert listed_origin.url.startswith(f"https://{instance}") @pytest.mark.parametrize( "url,expected_result", [ (None, None), ("http://dummy/?query=1", None), ("http://dummy/?foo=bar&page=1&some=result", 1), ("http://dummy/?foo=bar&page=&some=result", None), ], ) def test__parse_page_id(url, expected_result): assert _parse_page_id(url) == expected_result