diff --git a/swh/lister/packagist/lister.py b/swh/lister/packagist/lister.py index 1620e24..fa2d581 100644 --- a/swh/lister/packagist/lister.py +++ b/swh/lister/packagist/lister.py @@ -1,79 +1,98 @@ -# Copyright (C) 2019 the Software Heritage developers +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import random import json -from .models import PackagistModel +import logging +import random + +from typing import Any, List, Mapping from swh.scheduler import utils from swh.lister.core.simple_lister import SimpleLister from swh.lister.core.lister_transports import ListerOnePageApiTransport +from .models import PackagistModel + + +logger = logging.getLogger(__name__) + + +def compute_package_url(repo_name: str) -> str: + """Compute packgist package url from repo name. + + """ + return 'https://repo.packagist.org/p/%s.json' % repo_name + class PackagistLister(ListerOnePageApiTransport, SimpleLister): """List packages available in the Packagist package manager. The lister sends the request to the url present in the class variable `PAGE`, to receive a list of all the package names present in the Packagist package manager. Iterates over all the packages and constructs the metadata url of the package from the name of the package and creates a loading task. Task: Type: load-packagist Policy: recurring Args: Example: Type: load-packagist Policy: recurring Args: 'hypejunction/hypegamemechanics' 'https://repo.packagist.org/p/hypejunction/hypegamemechanics.json' """ MODEL = PackagistModel LISTER_NAME = 'packagist' PAGE = 'https://packagist.org/packages/list.json' instance = 'packagist' def __init__(self, override_config=None): ListerOnePageApiTransport .__init__(self) SimpleLister.__init__(self, override_config=override_config) - def task_dict(self, origin_type, origin_url, **kwargs): + def task_dict(self, origin_type: str, origin_url: str, + **kwargs: Mapping[str, str]) -> Mapping[str, str]: """Return task format dict This is overridden from the lister_base as more information is needed for the ingestion task creation. """ - return utils.create_task_dict('load-%s' % origin_type, - kwargs.get('policy', 'recurring'), - kwargs.get('name'), origin_url) + return utils.create_task_dict( + 'load-%s' % origin_type, + kwargs.get('policy', 'recurring'), + kwargs.get('name'), origin_url, + retries_left=3) - def list_packages(self, response): + def list_packages(self, response: Any) -> List[str]: """List the actual packagist origins from the response. """ response = json.loads(response.text) packages = [name for name in response['packageNames']] + logger.debug('Number of packages: %s', len(packages)) random.shuffle(packages) return packages - def get_model_from_repo(self, repo_name): + def get_model_from_repo(self, repo_name: str) -> Mapping[str, str]: """Transform from repository representation to model """ - url = 'https://repo.packagist.org/p/%s.json' % repo_name + url = compute_package_url(repo_name) return { 'uid': repo_name, 'name': repo_name, 'full_name': repo_name, 'html_url': url, 'origin_url': url, 'origin_type': 'packagist', } diff --git a/swh/lister/packagist/tests/conftest.py b/swh/lister/packagist/tests/conftest.py index 507fef9..fe31517 100644 --- a/swh/lister/packagist/tests/conftest.py +++ b/swh/lister/packagist/tests/conftest.py @@ -1 +1,23 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest + from swh.lister.core.tests.conftest import * # noqa + + +@pytest.fixture +def lister_packagist(swh_listers): + lister = swh_listers['packagist'] + + # Amend the scheduler with an unknown yet load-packagist task type + lister.scheduler.create_task_type({ + 'type': 'load-packagist', + 'description': 'Load packagist origin', + 'backend_name': 'swh.loader.package.tasks.LoaderPackagist', + 'default_interval': '1 day', + }) + + return lister diff --git a/swh/lister/packagist/tests/api_response.json b/swh/lister/packagist/tests/data/packagist.org/packages_list.json similarity index 100% rename from swh/lister/packagist/tests/api_response.json rename to swh/lister/packagist/tests/data/packagist.org/packages_list.json diff --git a/swh/lister/packagist/tests/test_lister.py b/swh/lister/packagist/tests/test_lister.py index fb58424..f8b3a2a 100644 --- a/swh/lister/packagist/tests/test_lister.py +++ b/swh/lister/packagist/tests/test_lister.py @@ -1,66 +1,102 @@ -# Copyright (C) 2019 the Software Heritage developers +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import requests_mock + from unittest.mock import patch -from swh.lister.packagist.lister import PackagistLister + +from swh.lister.packagist.lister import PackagistLister, compute_package_url from swh.lister.core.tests.test_lister import HttpSimpleListerTester expected_packages = ['0.0.0/composer-include-files', '0.0.0/laravel-env-shim', '0.0.1/try-make-package', '0099ff/dialogflowphp', '00f100/array_dot'] expected_model = { 'uid': '0099ff/dialogflowphp', 'name': '0099ff/dialogflowphp', 'full_name': '0099ff/dialogflowphp', 'html_url': 'https://repo.packagist.org/p/0099ff/dialogflowphp.json', 'origin_url': 'https://repo.packagist.org/p/0099ff/dialogflowphp.json', 'origin_type': 'packagist', } class PackagistListerTester(HttpSimpleListerTester, unittest.TestCase): Lister = PackagistLister PAGE = 'https://packagist.org/packages/list.json' lister_subdir = 'packagist' - good_api_response_file = 'api_response.json' + good_api_response_file = 'data/packagist.org/packages_list.json' entries = 5 @requests_mock.Mocker() def test_list_packages(self, http_mocker): """List packages from simple api page should retrieve all packages within """ http_mocker.get(self.PAGE, text=self.mock_response) fl = self.get_fl() packages = fl.list_packages(self.get_api_response(0)) for package in expected_packages: assert package in packages def test_transport_response_simplified(self): """Test model created by the lister """ fl = self.get_fl() model = fl.transport_response_simplified(['0099ff/dialogflowphp']) assert len(model) == 1 for key, values in model[0].items(): assert values == expected_model[key] - def test_task_dict(self): + @patch('swh.lister.packagist.lister.utils.create_task_dict') + def test_task_dict(self, mock_create_tasks): """Test the task creation of lister """ fl = self.get_fl() - with patch('swh.lister.packagist.lister.utils.create_task_dict') as mock_create_tasks: # noqa - fl.task_dict(origin_type='packagist', origin_url='https://abc', - name='test_pack') + fl.task_dict(origin_type='packagist', origin_url='https://abc', + name='test_pack') mock_create_tasks.assert_called_once_with( - 'load-packagist', 'recurring', 'test_pack', 'https://abc') + 'load-packagist', 'recurring', 'test_pack', 'https://abc', + retries_left=3) + + +def test_compute_package_url(): + expected_url = 'https://repo.packagist.org/p/hello.json' + actual_url = compute_package_url('hello') + assert actual_url == expected_url + + +def test_packagist_lister(lister_packagist, requests_mock_datadir): + lister_packagist.run() + + r = lister_packagist.scheduler.search_tasks(task_type='load-packagist') + assert len(r) == 5 + + for row in r: + assert row['type'] == 'load-packagist' + # arguments check + args = row['arguments']['args'] + assert len(args) == 2 + + package = args[0] + url = args[1] + + expected_url = compute_package_url(package) + assert url == expected_url + + # kwargs + kwargs = row['arguments']['kwargs'] + assert kwargs == {} + + assert row['policy'] == 'recurring' + assert row['priority'] is None