Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/sourceforge/tests/test_lister.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
import functools | import functools | ||||
import json | import json | ||||
from pathlib import Path | from pathlib import Path | ||||
import re | import re | ||||
from iso8601 import iso8601 | from iso8601 import iso8601 | ||||
import pytest | import pytest | ||||
▲ Show 20 Lines • Show All 350 Lines • ▼ Show 20 Lines | def test_sourceforge_lister_incremental(swh_scheduler, requests_mock, datadir, mocker): | ||||
_check_listed_origins(lister, swh_scheduler) | _check_listed_origins(lister, swh_scheduler) | ||||
def test_sourceforge_lister_retry(swh_scheduler, requests_mock, mocker, datadir): | def test_sourceforge_lister_retry(swh_scheduler, requests_mock, mocker, datadir): | ||||
lister = SourceForgeLister(scheduler=swh_scheduler) | lister = SourceForgeLister(scheduler=swh_scheduler) | ||||
# Exponential retries take a long time, so stub time.sleep | # Exponential retries take a long time, so stub time.sleep | ||||
mocked_sleep = mocker.patch.object(lister.page_request.retry, "sleep") | mocked_sleep = mocker.patch.object(lister.http_request.retry, "sleep") | ||||
requests_mock.get( | requests_mock.get( | ||||
MAIN_SITEMAP_URL, | MAIN_SITEMAP_URL, | ||||
[ | [ | ||||
{"status_code": 429}, | {"status_code": 429}, | ||||
{"status_code": 429}, | {"status_code": 429}, | ||||
{"text": get_main_sitemap(datadir)}, | {"text": get_main_sitemap(datadir)}, | ||||
], | ], | ||||
▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines | |||||
@pytest.mark.parametrize("status_code", [500, 503, 504, 403, 404]) | @pytest.mark.parametrize("status_code", [500, 503, 504, 403, 404]) | ||||
def test_sourceforge_lister_http_error( | def test_sourceforge_lister_http_error( | ||||
swh_scheduler, requests_mock, status_code, mocker | swh_scheduler, requests_mock, status_code, mocker | ||||
): | ): | ||||
lister = SourceForgeLister(scheduler=swh_scheduler) | lister = SourceForgeLister(scheduler=swh_scheduler) | ||||
# Exponential retries take a long time, so stub time.sleep | # Exponential retries take a long time, so stub time.sleep | ||||
mocked_sleep = mocker.patch.object(lister.page_request.retry, "sleep") | mocked_sleep = mocker.patch.object(lister.http_request.retry, "sleep") | ||||
requests_mock.get(MAIN_SITEMAP_URL, status_code=status_code) | requests_mock.get(MAIN_SITEMAP_URL, status_code=status_code) | ||||
with pytest.raises(HTTPError): | with pytest.raises(HTTPError): | ||||
lister.run() | lister.run() | ||||
exp_retries = [] | exp_retries = [] | ||||
if status_code >= 500: | if status_code >= 500: | ||||
exp_retries = [1.0, 10.0, 100.0, 1000.0] | exp_retries = [1.0, 10.0, 100.0, 1000.0] | ||||
assert_sleep_calls(mocker, mocked_sleep, exp_retries) | assert_sleep_calls(mocker, mocked_sleep, exp_retries) | ||||
@pytest.mark.parametrize("status_code", [500, 503, 504, 403, 404]) | @pytest.mark.parametrize("status_code", [500, 503, 504, 403, 404]) | ||||
def test_sourceforge_lister_project_error( | def test_sourceforge_lister_project_error( | ||||
datadir, swh_scheduler, requests_mock, status_code, mocker | datadir, swh_scheduler, requests_mock, status_code, mocker | ||||
): | ): | ||||
lister = SourceForgeLister(scheduler=swh_scheduler) | lister = SourceForgeLister(scheduler=swh_scheduler) | ||||
# Exponential retries take a long time, so stub time.sleep | # Exponential retries take a long time, so stub time.sleep | ||||
mocker.patch.object(lister.page_request.retry, "sleep") | mocker.patch.object(lister.http_request.retry, "sleep") | ||||
requests_mock.get( | requests_mock.get( | ||||
MAIN_SITEMAP_URL, | MAIN_SITEMAP_URL, | ||||
text=get_main_sitemap(datadir), | text=get_main_sitemap(datadir), | ||||
additional_matcher=_check_request_headers, | additional_matcher=_check_request_headers, | ||||
) | ) | ||||
requests_mock.get( | requests_mock.get( | ||||
"https://sourceforge.net/allura_sitemap/sitemap-0.xml", | "https://sourceforge.net/allura_sitemap/sitemap-0.xml", | ||||
▲ Show 20 Lines • Show All 67 Lines • Show Last 20 Lines |