Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/cgit/tests/test_lister.py
# Copyright (C) 2019-2020 the Software Heritage developers | # Copyright (C) 2019-2021 The Software Heritage developers | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from typing import List | |||||
from swh.lister import __version__ | from swh.lister import __version__ | ||||
from swh.lister.cgit.lister import CGitLister | |||||
def test_lister_cgit_no_page(requests_mock_datadir, lister_cgit): | def test_lister_cgit_no_page(requests_mock_datadir, swh_scheduler): | ||||
anlambert: test name is ambiguous as `get_pages` is called in implementation. | |||||
assert lister_cgit.url == "https://git.savannah.gnu.org/cgit/" | url = "https://git.savannah.gnu.org/cgit/" | ||||
lister_cgit = CGitLister(swh_scheduler, url=url) | |||||
repos = list(lister_cgit.get_repos()) | repos: List[List[str]] = list(lister_cgit.get_pages()) | ||||
assert len(repos) == 977 | flattened_repos = sum(repos, []) | ||||
assert len(flattened_repos) == 977 | |||||
assert repos[0] == "https://git.savannah.gnu.org/cgit/elisp-es.git/" | assert flattened_repos[0] == "https://git.savannah.gnu.org/cgit/elisp-es.git/" | ||||
# note the url below is NOT a subpath of /cgit/ | # note the url below is NOT a subpath of /cgit/ | ||||
assert repos[-1] == "https://git.savannah.gnu.org/path/to/yetris.git/" # noqa | assert ( | ||||
flattened_repos[-1] == "https://git.savannah.gnu.org/path/to/yetris.git/" | |||||
) # noqa | |||||
# note the url below is NOT on the same server | # note the url below is NOT on the same server | ||||
assert repos[-2] == "http://example.org/cgit/xstarcastle.git/" | assert flattened_repos[-2] == "http://example.org/cgit/xstarcastle.git/" | ||||
def test_lister_cgit_model(requests_mock_datadir, swh_scheduler): | |||||
anlambertUnsubmitted Not Done Inline Actionstest should be renamed here anlambert: test should be renamed here | |||||
url = "https://git.savannah.gnu.org/cgit/" | |||||
lister_cgit = CGitLister(swh_scheduler, url=url) | |||||
def test_lister_cgit_model(requests_mock_datadir, lister_cgit): | repo = next(lister_cgit.get_pages())[0] | ||||
repo = next(lister_cgit.get_repos()) | |||||
model = lister_cgit.build_model(repo) | origin_url = lister_cgit._get_origin_from_repository_url(repo) | ||||
assert model == { | assert origin_url == "https://git.savannah.gnu.org/git/elisp-es.git" | ||||
"uid": "https://git.savannah.gnu.org/cgit/elisp-es.git/", | |||||
"name": "elisp-es.git", | |||||
"origin_type": "git", | |||||
"instance": "git.savannah.gnu.org", | |||||
"origin_url": "https://git.savannah.gnu.org/git/elisp-es.git", | |||||
} | |||||
def test_lister_cgit_with_pages(requests_mock_datadir, lister_cgit): | def test_lister_cgit_with_pages(requests_mock_datadir, swh_scheduler): | ||||
lister_cgit.url = "https://git.tizen/cgit/" | url = "https://git.tizen/cgit/" | ||||
lister_cgit = CGitLister(swh_scheduler, url=url) | |||||
repos = list(lister_cgit.get_repos()) | repos: List[List[str]] = list(lister_cgit.get_pages()) | ||||
flattened_repos = sum(repos, []) | |||||
# we should have 16 repos (listed on 3 pages) | # we should have 16 repos (listed on 3 pages) | ||||
assert len(repos) == 16 | assert len(repos) == 3 | ||||
assert len(flattened_repos) == 16 | |||||
def test_lister_cgit_run(requests_mock_datadir, lister_cgit): | def test_lister_cgit_run(requests_mock_datadir, swh_scheduler): | ||||
lister_cgit.url = "https://git.tizen/cgit/" | url = "https://git.tizen/cgit/" | ||||
lister_cgit.run() | lister_cgit = CGitLister(swh_scheduler, url=url) | ||||
r = lister_cgit.scheduler.search_tasks(task_type="load-git") | lister_cgit.run() | ||||
anlambertUnsubmitted Not Done Inline Actionsreturn value of the run method should be used to test the number of pages and listed origins anlambert: return value of the `run` method should be used to test the number of pages and listed origins | |||||
assert len(r) == 16 | |||||
for row in r: | scheduler_origins = swh_scheduler.get_listed_origins( | ||||
assert row["type"] == "load-git" | lister_cgit.lister_obj.id | ||||
# arguments check | ).origins | ||||
args = row["arguments"]["args"] | assert len(scheduler_origins) == 16 | ||||
assert len(args) == 0 | |||||
for listed_origin in scheduler_origins: | |||||
# kwargs | assert listed_origin.visit_type == "git" | ||||
kwargs = row["arguments"]["kwargs"] | assert listed_origin.url.startswith("https://git.tizen") | ||||
assert len(kwargs) == 1 | |||||
url = kwargs["url"] | |||||
assert url.startswith("https://git.tizen") | |||||
assert row["policy"] == "recurring" | |||||
assert row["priority"] is None | |||||
def test_lister_cgit_requests(requests_mock_datadir, swh_scheduler): | |||||
url = "https://git.tizen/cgit/" | |||||
lister_cgit = CGitLister(swh_scheduler, url=url) | |||||
def test_lister_cgit_requests(requests_mock_datadir, lister_cgit): | |||||
lister_cgit.url = "https://git.tizen/cgit/" | |||||
lister_cgit.run() | lister_cgit.run() | ||||
assert len(requests_mock_datadir.request_history) != 0 | assert len(requests_mock_datadir.request_history) != 0 | ||||
for request in requests_mock_datadir.request_history: | for request in requests_mock_datadir.request_history: | ||||
assert "User-Agent" in request.headers | assert "User-Agent" in request.headers | ||||
user_agent = request.headers["User-Agent"] | user_agent = request.headers["User-Agent"] | ||||
assert "Software Heritage Lister" in user_agent | assert "Software Heritage Lister" in user_agent | ||||
assert __version__ in user_agent | assert __version__ in user_agent |
test name is ambiguous as get_pages is called in implementation.