Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/packagist/tests/test_lister.py
# Copyright (C) 2019 The Software Heritage developers | # Copyright (C) 2019-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import unittest | import json | ||||
from unittest.mock import patch | from pathlib import Path | ||||
import requests_mock | import iso8601 | ||||
from swh.lister.core.tests.test_lister import HttpSimpleListerTester | from swh.lister.packagist.lister import PackagistLister | ||||
from swh.lister.packagist.lister import PackagistLister, compute_package_url | |||||
expected_packages = [ | _packages_list = { | ||||
"0.0.0/composer-include-files", | "packageNames": [ | ||||
"0.0.0/laravel-env-shim", | "ljjackson/linnworks", | ||||
"0.0.1/try-make-package", | "lky/wx_article", | ||||
"0099ff/dialogflowphp", | "spryker-eco/computop-api", | ||||
"00f100/array_dot", | |||||
] | ] | ||||
expected_model = { | |||||
"uid": "0099ff/dialogflowphp", | |||||
"name": "0099ff/dialogflowphp", | |||||
"full_name": "0099ff/dialogflowphp", | |||||
"html_url": "https://repo.packagist.org/p/0099ff/dialogflowphp.json", | |||||
"origin_url": "https://repo.packagist.org/p/0099ff/dialogflowphp.json", | |||||
"origin_type": "packagist", | |||||
} | } | ||||
class PackagistListerTester(HttpSimpleListerTester, unittest.TestCase): | def _package_metadata(datadir, package_name): | ||||
Lister = PackagistLister | return json.loads( | ||||
PAGE = "https://packagist.org/packages/list.json" | Path(datadir, f"{package_name.replace('/', '_')}.json").read_text() | ||||
lister_subdir = "packagist" | ) | ||||
good_api_response_file = "data/https_packagist.org/packages_list.json" | |||||
entries = 5 | |||||
@requests_mock.Mocker() | |||||
def test_list_packages(self, http_mocker): | |||||
"""List packages from simple api page should retrieve all packages within | |||||
""" | def _package_origin_info(package_name, package_metadata): | ||||
http_mocker.get(self.PAGE, text=self.mock_response) | origin_url = None | ||||
fl = self.get_fl() | visit_type = None | ||||
packages = fl.list_packages(self.get_api_response(0)) | last_update = None | ||||
for version_info in package_metadata["packages"][package_name].values(): | |||||
origin_url = version_info["source"].get("url") | |||||
visit_type = version_info["source"].get("type") | |||||
if "time" in version_info: | |||||
version_date = iso8601.parse_date(version_info["time"]) | |||||
if last_update is None or version_date > last_update: | |||||
last_update = version_date | |||||
return origin_url, visit_type, last_update | |||||
def _request_without_if_modified_since(request): | |||||
return request.headers.get("If-Modified-Since") is None | |||||
def _request_with_if_modified_since(request): | |||||
return request.headers.get("If-Modified-Since") is not None | |||||
def test_packagist_lister(swh_scheduler, requests_mock, datadir): | |||||
# first listing, should return one origin per package | |||||
lister = PackagistLister(scheduler=swh_scheduler) | |||||
requests_mock.get(lister.PACKAGIST_PACKAGES_LIST_URL, json=_packages_list) | |||||
packages_metadata = {} | |||||
for package_name in _packages_list["packageNames"]: | |||||
metadata = _package_metadata(datadir, package_name) | |||||
packages_metadata[package_name] = metadata | |||||
requests_mock.get( | |||||
f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json", | |||||
json=metadata, | |||||
additional_matcher=_request_without_if_modified_since, | |||||
) | |||||
stats = lister.run() | |||||
for package in expected_packages: | assert stats.pages == 1 | ||||
assert package in packages | assert stats.origins == len(_packages_list["packageNames"]) | ||||
assert lister.updated | |||||
def test_transport_response_simplified(self): | scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results | ||||
"""Test model created by the lister | |||||
""" | for package_name, package_metadata in packages_metadata.items(): | ||||
fl = self.get_fl() | origin_url, visit_type, last_update = _package_origin_info( | ||||
model = fl.transport_response_simplified(["0099ff/dialogflowphp"]) | package_name, package_metadata | ||||
assert len(model) == 1 | ) | ||||
for key, values in model[0].items(): | filtered_origins = [o for o in scheduler_origins if o.url == origin_url] | ||||
assert values == expected_model[key] | assert filtered_origins | ||||
assert filtered_origins[0].visit_type == visit_type | |||||
assert filtered_origins[0].last_update == last_update | |||||
# second listing, should return 0 origins as no package metadata | |||||
# has been updated since first listing | |||||
lister = PackagistLister(scheduler=swh_scheduler) | |||||
for package_name in _packages_list["packageNames"]: | |||||
requests_mock.get( | |||||
f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json", | |||||
additional_matcher=_request_with_if_modified_since, | |||||
status_code=304, | |||||
) | |||||
@patch("swh.lister.packagist.lister.utils.create_task_dict") | assert lister.get_state_from_scheduler().last_listing_date is not None | ||||
def test_task_dict(self, mock_create_tasks): | |||||
"""Test the task creation of lister | |||||
""" | stats = lister.run() | ||||
fl = self.get_fl() | |||||
fl.task_dict( | assert stats.pages == 1 | ||||
origin_type="packagist", origin_url="https://abc", name="test_pack" | assert stats.origins == 0 | ||||
) | assert lister.updated | ||||
mock_create_tasks.assert_called_once_with( | |||||
"load-packagist", "recurring", "test_pack", "https://abc", retries_left=3 | |||||
def test_packagist_lister_missing_metadata(swh_scheduler, requests_mock, datadir): | |||||
lister = PackagistLister(scheduler=swh_scheduler) | |||||
requests_mock.get(lister.PACKAGIST_PACKAGES_LIST_URL, json=_packages_list) | |||||
for package_name in _packages_list["packageNames"]: | |||||
requests_mock.get( | |||||
f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json", | |||||
additional_matcher=_request_without_if_modified_since, | |||||
status_code=404, | |||||
) | ) | ||||
stats = lister.run() | |||||
def test_compute_package_url(): | assert stats.pages == 1 | ||||
expected_url = "https://repo.packagist.org/p/hello.json" | assert stats.origins == 0 | ||||
actual_url = compute_package_url("hello") | |||||
assert actual_url == expected_url | |||||
def test_packagist_lister(lister_packagist, requests_mock_datadir): | def test_packagist_lister_empty_metadata(swh_scheduler, requests_mock, datadir): | ||||
lister_packagist.run() | lister = PackagistLister(scheduler=swh_scheduler) | ||||
requests_mock.get(lister.PACKAGIST_PACKAGES_LIST_URL, json=_packages_list) | |||||
for package_name in _packages_list["packageNames"]: | |||||
requests_mock.get( | |||||
f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json", | |||||
additional_matcher=_request_without_if_modified_since, | |||||
json={"packages": {}}, | |||||
) | |||||
stats = lister.run() | |||||
assert stats.pages == 1 | |||||
assert stats.origins == 0 | |||||
r = lister_packagist.scheduler.search_tasks(task_type="load-packagist") | |||||
assert len(r) == 5 | |||||
for row in r: | def test_packagist_lister_package_with_bitbucket_hg_origin( | ||||
assert row["type"] == "load-packagist" | swh_scheduler, requests_mock, datadir | ||||
# arguments check | ): | ||||
args = row["arguments"]["args"] | package_name = "den1n/contextmenu" | ||||
assert len(args) == 2 | lister = PackagistLister(scheduler=swh_scheduler) | ||||
requests_mock.get( | |||||
lister.PACKAGIST_PACKAGES_LIST_URL, json={"packageNames": [package_name]} | |||||
) | |||||
requests_mock.get( | |||||
f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json", | |||||
additional_matcher=_request_without_if_modified_since, | |||||
json=_package_metadata(datadir, package_name), | |||||
) | |||||
package = args[0] | stats = lister.run() | ||||
url = args[1] | |||||
expected_url = compute_package_url(package) | assert stats.pages == 1 | ||||
assert url == expected_url | assert stats.origins == 0 | ||||
# kwargs | |||||
kwargs = row["arguments"]["kwargs"] | |||||
assert kwargs == {} | |||||
assert row["policy"] == "recurring" | def test_lister_from_configfile(swh_scheduler_config, mocker): | ||||
assert row["priority"] is None | load_from_envvar = mocker.patch("swh.lister.pattern.load_from_envvar") | ||||
load_from_envvar.return_value = { | |||||
"scheduler": {"cls": "local", **swh_scheduler_config}, | |||||
"credentials": {}, | |||||
} | |||||
lister = PackagistLister.from_configfile() | |||||
assert lister.scheduler is not None | |||||
assert lister.credentials is not None |