Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/golang/tests/test_lister.py
- This file was added.
# Copyright (C) 2022 The Software Heritage developers | |||||
# See the AUTHORS file at the top-level directory of this distribution | |||||
# License: GNU General Public License version 3, or any later version | |||||
# See top-level LICENSE file for more information | |||||
from pathlib import Path | |||||
import iso8601 | |||||
from swh.lister.golang.lister import GolangLister | |||||
from swh.lister.tests.test_utils import assert_sleep_calls | |||||
from swh.lister.utils import WAIT_EXP_BASE | |||||
# https://pkg.go.dev prefix omitted | |||||
expected_listed = [ | |||||
("collectd.org", "2019-04-11T18:47:25.450546+00:00"), | |||||
("github.com/blang/semver", "2019-04-15T13:54:39.107258+00:00",), | |||||
("github.com/bmizerany/pat", "2019-04-11T18:47:29.390564+00:00",), | |||||
("github.com/djherbis/buffer", "2019-04-11T18:47:29.974874+00:00",), | |||||
("github.com/djherbis/nio", "2019-04-11T18:47:32.283312+00:00",), | |||||
("github.com/gobuffalo/buffalo-plugins", "2019-04-15T13:54:34.222985+00:00",), | |||||
("github.com/gobuffalo/buffalo-pop", "2019-04-15T13:54:39.135792+00:00",), | |||||
("github.com/gobuffalo/clara", "2019-04-15T13:54:40.651916+00:00",), | |||||
("github.com/gobuffalo/genny", "2019-04-15T13:54:37.841547+00:00",), | |||||
("github.com/gobuffalo/packr", "2019-04-15T13:54:35.688900+00:00",), | |||||
("github.com/markbates/refresh", "2019-04-15T13:54:35.250835+00:00",), | |||||
("github.com/mitchellh/go-homedir", "2019-04-15T13:54:35.678214+00:00",), | |||||
("github.com/nats-io/nuid", "2019-04-11T18:47:28.102348+00:00",), | |||||
("github.com/oklog/ulid", "2019-04-11T18:47:23.234198+00:00",), | |||||
("github.com/pkg/errors", "2019-04-18T02:07:41.336899+00:00",), | |||||
("golang.org/x/sys", "2019-04-15T13:54:37.555525+00:00",), | |||||
("golang.org/x/text", "2019-04-10T19:08:52.997264+00:00"), | |||||
# only one x/tools listed even though there are two version, and only the | |||||
# latest one's timestamp is used. | |||||
("golang.org/x/tools", "2019-04-15T13:54:41.905064+00:00",), | |||||
] | |||||
def _generate_responses(datadir, requests_mock): | |||||
responses = [] | |||||
for file in Path(datadir).glob("page-*.txt"): | |||||
# Test that throttling and server errors are retries | |||||
responses.append({"text": "", "status_code": 429}) | |||||
responses.append({"text": "", "status_code": 500}) | |||||
# Also test that the lister appropriately gets out of the infinite loop | |||||
responses.append({"text": file.read_text(), "status_code": 200}) | |||||
requests_mock.get(GolangLister.GOLANG_MODULES_INDEX_URL, responses) | |||||
def test_golang_lister(swh_scheduler, mocker, requests_mock, datadir): | |||||
# first listing, should return one origin per package | |||||
lister = GolangLister(scheduler=swh_scheduler) | |||||
# Exponential retries take a long time, so stub time.sleep | |||||
mocked_sleep = mocker.patch.object(lister.api_request.retry, "sleep") | |||||
_generate_responses(datadir, requests_mock) | |||||
stats = lister.run() | |||||
assert stats.pages == 3 | |||||
# The two `golang.org/x/tools` versions are *not* listed as separate origins | |||||
assert stats.origins == 18 | |||||
scheduler_origins = sorted( | |||||
swh_scheduler.get_listed_origins(lister.lister_obj.id).results, | |||||
key=lambda x: x.url, | |||||
) | |||||
for scheduled, (url, timestamp) in zip(scheduler_origins, expected_listed): | |||||
assert scheduled.url == f"https://pkg.go.dev/{url}" | |||||
assert scheduled.last_update == iso8601.parse_date(timestamp) | |||||
assert scheduled.visit_type == "golang" | |||||
assert len(scheduler_origins) == len(expected_listed) | |||||
# Test `time.sleep` is called with exponential retries | |||||
assert_sleep_calls( | |||||
mocker, mocked_sleep, [1, WAIT_EXP_BASE, 1, WAIT_EXP_BASE, 1, WAIT_EXP_BASE] | |||||
) | |||||
# doing it all again (without incremental) should give us the same result | |||||
lister = GolangLister(scheduler=swh_scheduler) | |||||
mocked_sleep = mocker.patch.object(lister.api_request.retry, "sleep") | |||||
_generate_responses(datadir, requests_mock) | |||||
stats = lister.run() | |||||
assert stats.pages == 3 | |||||
assert stats.origins == 18 |