Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/pypi/tests/test_lister.py
# Copyright (C) 2019 The Software Heritage developers | # Copyright (C) 2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import os | |||||
import pytest | |||||
from swh.lister.pypi.lister import PyPILister | |||||
@pytest.fixture | |||||
def pypi_packages_page_content(datadir): | |||||
data_file_path = os.path.join(datadir, "https_pypi.org", "simple") | |||||
with open(data_file_path, "rb") as data_file: | |||||
return data_file.read() | |||||
""" | |||||
def test_pypi_lister(lister_pypi, requests_mock_datadir): | def test_pypi_lister(lister_pypi, requests_mock_datadir): | ||||
lister_pypi.run() | lister_pypi.run() | ||||
r = lister_pypi.scheduler.search_tasks(task_type="load-pypi") | r = lister_pypi.scheduler.search_tasks(task_type="load-pypi") | ||||
assert len(r) == 4 | assert len(r) == 4 | ||||
for row in r: | for row in r: | ||||
assert row["type"] == "load-pypi" | assert row["type"] == "load-pypi" | ||||
# arguments check | # arguments check | ||||
args = row["arguments"]["args"] | args = row["arguments"]["args"] | ||||
assert len(args) == 0 | assert len(args) == 0 | ||||
# kwargs | # kwargs | ||||
kwargs = row["arguments"]["kwargs"] | kwargs = row["arguments"]["kwargs"] | ||||
assert len(kwargs) == 1 | assert len(kwargs) == 1 | ||||
origin_url = kwargs["url"] | origin_url = kwargs["url"] | ||||
assert "https://pypi.org/project" in origin_url | assert "https://pypi.org/project" in origin_url | ||||
assert row["policy"] == "recurring" | assert row["policy"] == "recurring" | ||||
assert row["priority"] is None | assert row["priority"] is None | ||||
olasd: Please remove this as it's not being used anymore. | |||||
Done Inline Actionss/origins url/origin URLs/ anlambert: s/origins url/origin URLs/ | |||||
""" | |||||
def test_pypi_lister(swh_scheduler, requests_mock_datadir): | |||||
lister = PyPILister(scheduler=swh_scheduler) | |||||
stats = lister.run() | |||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).origins | |||||
assert stats.pages == 1 | |||||
assert stats.origins == 4 | |||||
Done Inline ActionsI guess this would deserve a comment about what's in the mocked response, because that feels a bit magic. Please also check the contents of the listed origins (that is, that the origin urls are what you'd expect). olasd: I guess this would deserve a comment about what's in the mocked response, because that feels a… | |||||
assert len(scheduler_origins) == 4 | |||||
Done Inline ActionsIf the ratelimit logic goes away, then this test can go away too (as well as the pages_content fixture). olasd: If the ratelimit logic goes away, then this test can go away too (as well as the… | |||||
def test_pypi_lister_rate_limit_hit( | |||||
swh_scheduler, requests_mock, mocker, pypi_packages_page_content, | |||||
): | |||||
mock_sleep = mocker.patch("swh.lister.pypi.lister.time.sleep") | |||||
requests_mock.get( | |||||
PyPILister.PACKAGE_LIST_URL, | |||||
[ | |||||
Done Inline Actionsyou can remove that line anlambert: you can remove that line | |||||
{"content": None, "status_code": 429}, | |||||
{"content": None, "status_code": 429}, | |||||
{"content": pypi_packages_page_content, "status_code": 200}, | |||||
Done Inline Actionssame here anlambert: same here | |||||
], | |||||
) | |||||
lister = PyPILister(scheduler=swh_scheduler) | |||||
stats = lister.run() | |||||
mock_sleep.assert_has_calls( | |||||
[mocker.call(lister.BACKOFF_FACTOR), mocker.call(lister.BACKOFF_FACTOR ** 2),] | |||||
) | |||||
assert stats.pages == 1 | |||||
assert stats.origins == 4 | |||||
assert len(swh_scheduler.get_listed_origins(lister.lister_obj.id).origins) == 4 |
Please remove this as it's not being used anymore.