Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/pypi/tests/test_lister.py
# Copyright (C) 2019 The Software Heritage developers | # Copyright (C) 2019-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from collections import defaultdict | |||||
from datetime import datetime, timezone | |||||
from pathlib import Path | from pathlib import Path | ||||
from typing import List | from typing import List, Tuple | ||||
import pytest | import pytest | ||||
import requests | import requests | ||||
from swh.lister.pypi.lister import PyPILister | from swh.lister.pypi.lister import PackageListPage, PyPILister, PyPIListerState | ||||
from swh.scheduler.model import ListedOrigin | from swh.scheduler.model import ListedOrigin | ||||
def pypi_url(package_name: str) -> str: | |||||
"""Build pypi url out of a package name. | |||||
""" | |||||
return PyPILister.PACKAGE_URL.format(package_name=package_name) | |||||
def pypi_urls(packages: List[str]) -> List[str]: | |||||
"""Build pypi urls out of package names | |||||
""" | |||||
return [pypi_url(package_name) for package_name in packages] | |||||
@pytest.fixture | @pytest.fixture | ||||
def pypi_packages_testdata(datadir): | def pypi_packages_testdata(datadir) -> Tuple[bytes, PackageListPage, List[str]]: | ||||
content = Path(datadir, "https_pypi.org", "simple").read_bytes() | raw_content = Path(datadir, "https_pypi.org", "simple").read_bytes() | ||||
names = ["0lever-so", "0lever-utils", "0-orchestrator", "0wned"] | package_names = ["0lever-so", "0lever-utils", "0-orchestrator", "0wned"] | ||||
urls = [PyPILister.PACKAGE_URL.format(package_name=n) for n in names] | packages = [{"name": name, "last_update": None} for name in package_names] | ||||
return content, names, urls | return raw_content, packages, pypi_urls(package_names) | ||||
def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedOrigin]): | def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedOrigin]): | ||||
"""Asserts that the two collections have the same origin URLs""" | """Asserts that the two collections have the same origin URLs""" | ||||
sorted_lister_urls = list(sorted(lister_urls)) | sorted_lister_urls = list(sorted(lister_urls)) | ||||
sorted_scheduler_origins = list(sorted(scheduler_origins)) | sorted_scheduler_origins = list(sorted(scheduler_origins)) | ||||
assert len(sorted_lister_urls) == len(sorted_scheduler_origins) | assert len(sorted_lister_urls) == len(sorted_scheduler_origins) | ||||
for l_url, s_origin in zip(sorted_lister_urls, sorted_scheduler_origins): | for l_url, s_origin in zip(sorted_lister_urls, sorted_scheduler_origins): | ||||
assert l_url == s_origin.url | assert l_url == s_origin.url | ||||
def test_pypi_list(swh_scheduler, requests_mock, mocker, pypi_packages_testdata): | def test_pypi_list(swh_scheduler, requests_mock, mocker, pypi_packages_testdata): | ||||
t_content, t_names, t_urls = pypi_packages_testdata | t_raw_content, t_packages, t_urls = pypi_packages_testdata | ||||
requests_mock.get(PyPILister.PACKAGE_LIST_URL, content=t_content) | requests_mock.get(PyPILister.PACKAGE_LIST_URL, content=t_raw_content) | ||||
lister = PyPILister(scheduler=swh_scheduler) | lister = PyPILister(scheduler=swh_scheduler) | ||||
lister.get_origins_from_page = mocker.spy(lister, "get_origins_from_page") | lister.get_origins_from_page = mocker.spy(lister, "get_origins_from_page") | ||||
lister.session.get = mocker.spy(lister.session, "get") | lister.session.get = mocker.spy(lister.session, "get") | ||||
stats = lister.run() | stats = lister.run() | ||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results | scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results | ||||
lister.session.get.assert_called_once_with(lister.PACKAGE_LIST_URL) | lister.session.get.assert_called_once_with(lister.PACKAGE_LIST_URL) | ||||
lister.get_origins_from_page.assert_called_once_with(t_names) | lister.get_origins_from_page.assert_called_once_with(t_packages) | ||||
assert stats.pages == 1 | assert stats.pages == 1 | ||||
assert stats.origins == 4 | assert stats.origins == 4 | ||||
assert len(scheduler_origins) == 4 | assert len(scheduler_origins) == 4 | ||||
check_listed_origins(t_urls, scheduler_origins) | check_listed_origins(t_urls, scheduler_origins) | ||||
assert lister.get_state_from_scheduler() is None | assert lister.get_state_from_scheduler() == PyPIListerState(last_visit=None) | ||||
@pytest.mark.parametrize("http_code", [400, 429, 500]) | @pytest.mark.parametrize("http_code", [400, 429, 500]) | ||||
def test_pypi_list_http_error(swh_scheduler, requests_mock, mocker, http_code): | def test_pypi_list_http_error(swh_scheduler, requests_mock, mocker, http_code): | ||||
requests_mock.get( | requests_mock.get( | ||||
PyPILister.PACKAGE_LIST_URL, [{"content": None, "status_code": http_code},], | PyPILister.PACKAGE_LIST_URL, [{"content": None, "status_code": http_code},], | ||||
) | ) | ||||
Show All 35 Lines | def test_lister_pypi_from_configfile(swh_scheduler_config, mocker): | ||||
load_from_envvar = mocker.patch("swh.lister.pattern.load_from_envvar") | load_from_envvar = mocker.patch("swh.lister.pattern.load_from_envvar") | ||||
load_from_envvar.return_value = { | load_from_envvar.return_value = { | ||||
"scheduler": {"cls": "local", **swh_scheduler_config}, | "scheduler": {"cls": "local", **swh_scheduler_config}, | ||||
"credentials": {}, | "credentials": {}, | ||||
} | } | ||||
lister = PyPILister.from_configfile() | lister = PyPILister.from_configfile() | ||||
assert lister.scheduler is not None | assert lister.scheduler is not None | ||||
assert lister.credentials is not None | assert lister.credentials is not None | ||||
@pytest.fixture | |||||
olasd: `mock_pypi_xmlrpc`? | |||||
def mock_rpc_call(mocker, swh_scheduler): | |||||
"""This setups a lister so it can actually fake the call to the rpc service executed | |||||
during an incremental listing. | |||||
To retrieve or update the faked data, open a python3 toplevel and execute the | |||||
following: | |||||
.. code:: python | |||||
from datetime import timezone, datetime, timedelta | |||||
from xmlrpc.client import ServerProxy | |||||
from swh.scheduler.utils import utcnow | |||||
RPC_URL = "https://pypi.org/pypi" | |||||
cli = ServerProxy(RPC_URL) | |||||
date_yesterday = utcnow() - timedelta(days=1) | |||||
date_yesterday | |||||
datetime.datetime(2021, 7, 6, 12, 54, 49, 673346, tzinfo=datetime.timezone.utc) | |||||
Done Inline Actionsyou were right earlier, see this ^ ardumont: you were right earlier, see this ^ | |||||
recent_changes = cli.changelog(int(date_yesterday.timestamp())) | |||||
# recent_changes[0:20] should have sufficient data to update the tests | |||||
Returns: | |||||
the following Tuple[datetime, List[Tuple[str, str, int, str]], MagicMock] type. | |||||
ardumontAuthorUnsubmitted Done Inline Actionsmypy did not want it so fine it complained it wanted Tuple[datetime, List[object], Any] which is not as informative as ^. ardumont: mypy did not want it so fine it complained it wanted `Tuple[datetime, List[object], Any]` which… | |||||
""" | |||||
date_yesterday = datetime(2021, 7, 6, 12, 54, 49, 673346, tzinfo=timezone.utc) | |||||
# Set the lister state to the last visit as date_yesterday | |||||
lister_obj = swh_scheduler.get_or_create_lister( | |||||
name=PyPILister.LISTER_NAME, instance_name=PyPILister.INSTANCE | |||||
) | |||||
lister_obj.current_state = {"last_visit": date_yesterday.isoformat()} | |||||
swh_scheduler.update_lister(lister_obj) | |||||
data = [ | |||||
["coordinate-geometry", "1.0.7", 1625576111, "new release"], | |||||
[ | |||||
"coordinate-geometry", | |||||
"1.0.7", | |||||
1625576111, | |||||
"add source file coordinate_geometry-1.0.7.tar.gz", | |||||
], | |||||
["py-Ultroid", "45.5b0", 1625576137, "new release"], | |||||
[ | |||||
"py-Ultroid", | |||||
"45.5b0", | |||||
1625576137, | |||||
"add py3 file py_Ultroid-45.5b0-py3-none-any.whl", | |||||
], | |||||
[ | |||||
"py-Ultroid", | |||||
"45.5b0", | |||||
1625576139, | |||||
"add source file py-Ultroid-45.5b0.tar.gz", | |||||
], | |||||
["bdrk", "0.9.0", 1625576160, "new release"], | |||||
["bdrk", "0.9.0", 1625576160, "add py3 file bdrk-0.9.0-py3-none-any.whl"], | |||||
["bdrk", "0.9.0", 1625576163, "add source file bdrk-0.9.0.tar.gz"], | |||||
["dantro", "0.17.1", 1625576165, "new release"], | |||||
["dantro", "0.17.1", 1625576165, "add py3 file dantro-0.17.1-py3-none-any.whl"], | |||||
["dantro", "0.17.1", 1625576167, "add source file dantro-0.17.1.tar.gz"], | |||||
["bamr", "0.1.1", 1625576222, "new release"], | |||||
["bamr", "0.1.1", 1625576222, "add source file bamr-0.1.1.tar.gz"], | |||||
["niozdaspy", None, 1625576223, "create"], | |||||
["niozdaspy", None, 1625576223, "add Owner nioz"], | |||||
["niozdaspy", "1.0", 1625576223, "new release"], | |||||
["niozdaspy", "1.0", 1625576223, "add py3 file niozdaspy-1.0-py3-none-any.whl"], | |||||
["niozdaspy", "1.0", 1625576225, "add source file niozdaspy-1.0.tar.gz"], | |||||
["analysis-engine", "0.0.30", 1625576277, "new release"], | |||||
[ | |||||
"analysis-engine", | |||||
"0.0.30", | |||||
1625576277, | |||||
"add py3 file analysis_engine-0.0.30-py3-none-any.whl", | |||||
], | |||||
] | |||||
mock = mocker.patch("swh.lister.pypi.lister.PyPILister._last_updates_since") | |||||
mock.return_value = data | |||||
return date_yesterday, data, mock | |||||
def test_lister_pypi_incremental(mock_rpc_call, swh_scheduler): | |||||
date_yesterday, data, mock = mock_rpc_call | |||||
updated_packages = defaultdict(list) | |||||
for [package, _, release_date, _] in data: | |||||
updated_packages[package].append(release_date) | |||||
assert len(updated_packages) > 0 | |||||
expected_last_updates = { | |||||
pypi_url(package): datetime.fromtimestamp(max(releases)).replace( | |||||
tzinfo=timezone.utc | |||||
) | |||||
for package, releases in updated_packages.items() | |||||
} | |||||
expected_pypi_urls = pypi_urls(updated_packages) | |||||
lister = PyPILister(scheduler=swh_scheduler, incremental=True) | |||||
stats = lister.run() | |||||
assert mock.called | |||||
assert stats.pages == 1 | |||||
assert stats.origins == len(updated_packages) | |||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results | |||||
assert len(scheduler_origins) == stats.origins | |||||
check_listed_origins(expected_pypi_urls, scheduler_origins) | |||||
actual_scheduler_state = lister.get_state_from_scheduler() | |||||
# our visit is most recent now | |||||
assert actual_scheduler_state.last_visit > date_yesterday | |||||
for listed_origin in scheduler_origins: | |||||
assert listed_origin.last_update is not None | |||||
assert listed_origin.last_update == expected_last_updates[listed_origin.url] |
mock_pypi_xmlrpc?