Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/pypi/tests/test_lister.py
# Copyright (C) 2019 The Software Heritage developers | # Copyright (C) 2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from pathlib import Path | |||||
from typing import List | |||||
def test_pypi_lister(lister_pypi, requests_mock_datadir): | import pytest | ||||
lister_pypi.run() | import requests | ||||
r = lister_pypi.scheduler.search_tasks(task_type="load-pypi") | from swh.lister.pypi.lister import PyPILister | ||||
assert len(r) == 4 | from swh.scheduler.model import ListedOrigin | ||||
for row in r: | |||||
assert row["type"] == "load-pypi" | |||||
# arguments check | |||||
args = row["arguments"]["args"] | |||||
assert len(args) == 0 | |||||
# kwargs | |||||
kwargs = row["arguments"]["kwargs"] | |||||
assert len(kwargs) == 1 | |||||
origin_url = kwargs["url"] | @pytest.fixture | ||||
assert "https://pypi.org/project" in origin_url | def pypi_packages_testdata(datadir): | ||||
content = Path(datadir, "https_pypi.org", "simple").read_bytes() | |||||
names = ["0lever-so", "0lever-utils", "0-orchestrator", "0wned"] | |||||
urls = [PyPILister.PACKAGE_URL.format(package_name=n) for n in names] | |||||
return content, names, urls | |||||
assert row["policy"] == "recurring" | |||||
assert row["priority"] is None | def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedOrigin]): | ||||
anlambert: s/origins url/origin URLs/ | |||||
"""Asserts that the two collections have the same origin URLs""" | |||||
sorted_lister_urls = list(sorted(lister_urls)) | |||||
sorted_scheduler_origins = list(sorted(scheduler_origins)) | |||||
assert len(sorted_lister_urls) == len(sorted_scheduler_origins) | |||||
for l_url, s_origin in zip(sorted_lister_urls, sorted_scheduler_origins): | |||||
assert l_url == s_origin.url | |||||
Done Inline ActionsI guess this would deserve a comment about what's in the mocked response, because that feels a bit magic. Please also check the contents of the listed origins (that is, that the origin urls are what you'd expect). olasd: I guess this would deserve a comment about what's in the mocked response, because that feels a… | |||||
def test_pypi_list(swh_scheduler, requests_mock, mocker, pypi_packages_testdata): | |||||
t_content, t_names, t_urls = pypi_packages_testdata | |||||
Done Inline ActionsIf the ratelimit logic goes away, then this test can go away too (as well as the pages_content fixture). olasd: If the ratelimit logic goes away, then this test can go away too (as well as the… | |||||
requests_mock.get( | |||||
PyPILister.PACKAGE_LIST_URL, [{"content": t_content, "status_code": 200},], | |||||
) | |||||
Done Inline ActionsPlease remove this as it's not being used anymore. olasd: Please remove this as it's not being used anymore. | |||||
lister = PyPILister(scheduler=swh_scheduler) | |||||
lister.get_origins_from_page = mocker.spy(lister, "get_origins_from_page") | |||||
lister.session.get = mocker.spy(lister.session, "get") | |||||
Done Inline Actionsyou can remove that line anlambert: you can remove that line | |||||
stats = lister.run() | |||||
Done Inline Actionssame here anlambert: same here | |||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).origins | |||||
lister.session.get.assert_called_once_with(lister.PACKAGE_LIST_URL) | |||||
lister.get_origins_from_page.assert_called_once_with(t_names) | |||||
assert stats.pages == 1 | |||||
assert stats.origins == 4 | |||||
assert len(scheduler_origins) == 4 | |||||
check_listed_origins(t_urls, scheduler_origins) | |||||
assert lister.get_state_from_scheduler() is None | |||||
@pytest.mark.parametrize("http_code", [400, 429, 500]) | |||||
def test_pypi_list_http_error(swh_scheduler, requests_mock, mocker, http_code): | |||||
requests_mock.get( | |||||
PyPILister.PACKAGE_LIST_URL, [{"content": None, "status_code": http_code},], | |||||
) | |||||
lister = PyPILister(scheduler=swh_scheduler) | |||||
lister.session.get = mocker.spy(lister.session, "get") | |||||
with pytest.raises(requests.HTTPError): | |||||
lister.run() | |||||
lister.session.get.assert_called_once_with(lister.PACKAGE_LIST_URL) | |||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).origins | |||||
assert len(scheduler_origins) == 0 |
s/origins url/origin URLs/