diff --git a/swh/lister/packagist/lister.py b/swh/lister/packagist/lister.py --- a/swh/lister/packagist/lister.py +++ b/swh/lister/packagist/lister.py @@ -1,15 +1,30 @@ -# Copyright (C) 2019 the Software Heritage developers +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import random import json -from .models import PackagistModel +import logging +import random + +from typing import Any, List, Mapping from swh.scheduler import utils from swh.lister.core.simple_lister import SimpleLister from swh.lister.core.lister_transports import ListerOnePageApiTransport +from .models import PackagistModel + + +logger = logging.getLogger(__name__) + + +def compute_package_url(repo_name: str) -> str: + """Compute packgist package url from repo name. + + """ + return 'https://repo.packagist.org/p/%s.json' % repo_name + class PackagistLister(ListerOnePageApiTransport, SimpleLister): """List packages available in the Packagist package manager. @@ -44,31 +59,35 @@ ListerOnePageApiTransport .__init__(self) SimpleLister.__init__(self, override_config=override_config) - def task_dict(self, origin_type, origin_url, **kwargs): + def task_dict(self, origin_type: str, origin_url: str, + **kwargs: Mapping[str, str]) -> Mapping[str, str]: """Return task format dict This is overridden from the lister_base as more information is needed for the ingestion task creation. """ - return utils.create_task_dict('load-%s' % origin_type, - kwargs.get('policy', 'recurring'), - kwargs.get('name'), origin_url) + return utils.create_task_dict( + 'load-%s' % origin_type, + kwargs.get('policy', 'recurring'), + kwargs.get('name'), origin_url, + retries_left=3) - def list_packages(self, response): + def list_packages(self, response: Any) -> List[str]: """List the actual packagist origins from the response. """ response = json.loads(response.text) packages = [name for name in response['packageNames']] + logger.debug('Number of packages: %s', len(packages)) random.shuffle(packages) return packages - def get_model_from_repo(self, repo_name): + def get_model_from_repo(self, repo_name: str) -> Mapping[str, str]: """Transform from repository representation to model """ - url = 'https://repo.packagist.org/p/%s.json' % repo_name + url = compute_package_url(repo_name) return { 'uid': repo_name, 'name': repo_name, diff --git a/swh/lister/packagist/tests/conftest.py b/swh/lister/packagist/tests/conftest.py --- a/swh/lister/packagist/tests/conftest.py +++ b/swh/lister/packagist/tests/conftest.py @@ -1 +1,23 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest + from swh.lister.core.tests.conftest import * # noqa + + +@pytest.fixture +def lister_packagist(swh_listers): + lister = swh_listers['packagist'] + + # Amend the scheduler with an unknown yet load-packagist task type + lister.scheduler.create_task_type({ + 'type': 'load-packagist', + 'description': 'Load packagist origin', + 'backend_name': 'swh.loader.package.tasks.LoaderPackagist', + 'default_interval': '1 day', + }) + + return lister diff --git a/swh/lister/packagist/tests/api_response.json b/swh/lister/packagist/tests/data/packagist.org/packages_list.json rename from swh/lister/packagist/tests/api_response.json rename to swh/lister/packagist/tests/data/packagist.org/packages_list.json diff --git a/swh/lister/packagist/tests/test_lister.py b/swh/lister/packagist/tests/test_lister.py --- a/swh/lister/packagist/tests/test_lister.py +++ b/swh/lister/packagist/tests/test_lister.py @@ -1,11 +1,14 @@ -# Copyright (C) 2019 the Software Heritage developers +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import requests_mock + from unittest.mock import patch -from swh.lister.packagist.lister import PackagistLister + +from swh.lister.packagist.lister import PackagistLister, compute_package_url from swh.lister.core.tests.test_lister import HttpSimpleListerTester @@ -29,7 +32,7 @@ Lister = PackagistLister PAGE = 'https://packagist.org/packages/list.json' lister_subdir = 'packagist' - good_api_response_file = 'api_response.json' + good_api_response_file = 'data/packagist.org/packages_list.json' entries = 5 @requests_mock.Mocker() @@ -54,13 +57,46 @@ for key, values in model[0].items(): assert values == expected_model[key] - def test_task_dict(self): + @patch('swh.lister.packagist.lister.utils.create_task_dict') + def test_task_dict(self, mock_create_tasks): """Test the task creation of lister """ fl = self.get_fl() - with patch('swh.lister.packagist.lister.utils.create_task_dict') as mock_create_tasks: # noqa - fl.task_dict(origin_type='packagist', origin_url='https://abc', - name='test_pack') + fl.task_dict(origin_type='packagist', origin_url='https://abc', + name='test_pack') mock_create_tasks.assert_called_once_with( - 'load-packagist', 'recurring', 'test_pack', 'https://abc') + 'load-packagist', 'recurring', 'test_pack', 'https://abc', + retries_left=3) + + +def test_compute_package_url(): + expected_url = 'https://repo.packagist.org/p/hello.json' + actual_url = compute_package_url('hello') + assert actual_url == expected_url + + +def test_packagist_lister(lister_packagist, requests_mock_datadir): + lister_packagist.run() + + r = lister_packagist.scheduler.search_tasks(task_type='load-packagist') + assert len(r) == 5 + + for row in r: + assert row['type'] == 'load-packagist' + # arguments check + args = row['arguments']['args'] + assert len(args) == 2 + + package = args[0] + url = args[1] + + expected_url = compute_package_url(package) + assert url == expected_url + + # kwargs + kwargs = row['arguments']['kwargs'] + assert kwargs == {} + + assert row['policy'] == 'recurring' + assert row['priority'] is None