diff --git a/README.md b/README.md --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ - `swh.lister.phabricator` - `swh.lister.cran` - `swh.lister.cgit` +- `swh.lister.packagist` Dependencies ------------ @@ -221,6 +222,19 @@ url_prefix='https://anongit.kde.org/') ``` +## lister-packagist + +Once configured, you can execute a Packagist lister using the following instructions +in a `python3` script: + +```lang=python +import logging +from swh.lister.packagist.tasks import packagist_lister + +logging.basicConfig(level=logging.DEBUG) +packagist_lister() +``` + Licensing --------- diff --git a/swh/lister/cli.py b/swh/lister/cli.py --- a/swh/lister/cli.py +++ b/swh/lister/cli.py @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi', - 'npm', 'phabricator', 'gnu', 'cran', 'cgit'] + 'npm', 'phabricator', 'gnu', 'cran', 'cgit', 'packagist'] @click.group(name='lister', context_settings=CONTEXT_SETTINGS) @@ -133,6 +133,11 @@ url_prefix='http://git.savannah.gnu.org/git/', override_config=override_conf) + elif lister == 'packagist': + from .packagist.models import ModelBase + from .packagist.lister import PackagistLister + _lister = PackagistLister(override_config=override_conf) + else: raise ValueError( 'Invalid lister %s: only supported listers are %s' % diff --git a/swh/lister/core/tests/conftest.py b/swh/lister/core/tests/conftest.py --- a/swh/lister/core/tests/conftest.py +++ b/swh/lister/core/tests/conftest.py @@ -13,6 +13,7 @@ 'swh.lister.gitlab.tasks', 'swh.lister.gnu.tasks', 'swh.lister.npm.tasks', - 'swh.lister.pypi.tasks', + 'swh.lister.packagist.tasks', 'swh.lister.phabricator.tasks', + 'swh.lister.pypi.tasks', ] diff --git a/swh/lister/packagist/__init__.py b/swh/lister/packagist/__init__.py new file mode 100644 diff --git a/swh/lister/packagist/lister.py b/swh/lister/packagist/lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/packagist/lister.py @@ -0,0 +1,84 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import random +import json +from .models import PackagistModel + +from swh.scheduler import utils +from swh.lister.core.simple_lister import SimpleLister +from swh.lister.core.lister_transports import ListerOnePageApiTransport + + +class PackagistLister(ListerOnePageApiTransport, SimpleLister): + """List packages available in the Packagist package manger. + + The lister sends the request to the url present in the class + variable `PAGE`, to receive a list of all the package names + present in the Packagist package manger. Iterates over all the + packages and constructs the metadata url of the package from + the name of the package and creates a loading task. + + Task: + Type: load-packagist + Policy: recurring + Args: + + + + Example: + Type: load-packagist + Policy: recurring + Args: + 'hypejunction/hypegamemechanics' + 'https://repo.packagist.org/p/hypejunction/hypegamemechanics.json' + + """ + MODEL = PackagistModel + LISTER_NAME = 'packagist' + PAGE = 'https://packagist.org/packages/list.json' + instance = 'packagist' + + def __init__(self, override_config=None): + ListerOnePageApiTransport .__init__(self) + SimpleLister.__init__(self, override_config=override_config) + + def task_dict(self, origin_type, origin_url, **kwargs): + """Return task format dict + + This is overridden from the lister_base as more information is + needed for the ingestion task creation. + + """ + return utils.create_task_dict('load-%s' % origin_type, 'recurring', + kwargs.get('name'), origin_url) + + def list_packages(self, response): + """List the actual packagist origins from the response. + + """ + response = json.loads(response.text) + packages = [name for name in response['packageNames']] + random.shuffle(packages) + return packages + + def get_model_from_repo(self, repo_name): + """Transform from repository representation to model + + """ + url = 'https://repo.packagist.org/p/%s.json' % repo_name + return { + 'uid': repo_name, + 'name': repo_name, + 'full_name': repo_name, + 'html_url': url, + 'origin_url': url, + 'origin_type': 'packagist', + } + + def transport_response_simplified(self, response): + """Transform response to list for model manipulation + + """ + return [self.get_model_from_repo(repo_name) for repo_name in response] diff --git a/swh/lister/packagist/models.py b/swh/lister/packagist/models.py new file mode 100644 --- /dev/null +++ b/swh/lister/packagist/models.py @@ -0,0 +1,16 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from sqlalchemy import Column, String + +from ..core.models import ModelBase + + +class PackagistModel(ModelBase): + """a Packagist repository representation + + """ + __tablename__ = 'packagist_repo' + + uid = Column(String, primary_key=True) diff --git a/swh/lister/packagist/tasks.py b/swh/lister/packagist/tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/packagist/tasks.py @@ -0,0 +1,17 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.scheduler.celery_backend.config import app + +from .lister import PackagistLister + + +@app.task(name=__name__ + '.PackagistListerTask') +def packagist_lister(**lister_args): + PackagistLister(**lister_args).run() + + +@app.task(name=__name__ + '.ping') +def ping(): + return 'OK' diff --git a/swh/lister/packagist/tests/__init__.py b/swh/lister/packagist/tests/__init__.py new file mode 100644 diff --git a/swh/lister/packagist/tests/api_response.json b/swh/lister/packagist/tests/api_response.json new file mode 100644 --- /dev/null +++ b/swh/lister/packagist/tests/api_response.json @@ -0,0 +1,9 @@ +{ + "packageNames": [ + "0.0.0/composer-include-files", + "0.0.0/laravel-env-shim", + "0.0.1/try-make-package", + "0099ff/dialogflowphp", + "00f100/array_dot" + ] +} \ No newline at end of file diff --git a/swh/lister/packagist/tests/conftest.py b/swh/lister/packagist/tests/conftest.py new file mode 100644 --- /dev/null +++ b/swh/lister/packagist/tests/conftest.py @@ -0,0 +1 @@ +from swh.lister.core.tests.conftest import * # noqa diff --git a/swh/lister/packagist/tests/test_lister.py b/swh/lister/packagist/tests/test_lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/packagist/tests/test_lister.py @@ -0,0 +1,66 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import unittest +import requests_mock +from unittest.mock import patch +from swh.lister.packagist.lister import PackagistLister +from swh.lister.core.tests.test_lister import HttpSimpleListerTester + + +expected_packages = ['0.0.0/composer-include-files', '0.0.0/laravel-env-shim', + '0.0.1/try-make-package', '0099ff/dialogflowphp', + '00f100/array_dot'] + +expected_model = { + 'uid': '0099ff/dialogflowphp', + 'name': '0099ff/dialogflowphp', + 'full_name': '0099ff/dialogflowphp', + 'html_url': + 'https://repo.packagist.org/p/0099ff/dialogflowphp.json', + 'origin_url': + 'https://repo.packagist.org/p/0099ff/dialogflowphp.json', + 'origin_type': 'packagist', + } + + +class PackagistListerTester(HttpSimpleListerTester, unittest.TestCase): + Lister = PackagistLister + PAGE = 'https://packagist.org/packages/list.json' + lister_subdir = 'packagist' + good_api_response_file = 'api_response.json' + entries = 5 + + @requests_mock.Mocker() + def test_list_packages(self, http_mocker): + """List packages from simple api page should retrieve all packages within + + """ + http_mocker.get(self.PAGE, text=self.mock_response) + fl = self.get_fl() + packages = fl.list_packages(self.get_api_response(0)) + + for package in expected_packages: + assert package in packages + + def test_transport_response_simplified(self): + """Test model created by the lister + + """ + fl = self.get_fl() + model = fl.transport_response_simplified(['0099ff/dialogflowphp']) + assert len(model) == 1 + for key, values in model[0].items(): + assert values == expected_model[key] + + def test_task_dict(self): + """Test the task creation of lister + + """ + fl = self.get_fl() + with patch('swh.lister.packagist.lister.utils.create_task_dict') as mock_create_tasks: # noqa + fl.task_dict(origin_type='packagist', origin_url='https://abc', + name='test_pack') + mock_create_tasks.assert_called_once_with( + 'load-packagist', 'recurring', 'test_pack', 'https://abc') diff --git a/swh/lister/packagist/tests/test_tasks.py b/swh/lister/packagist/tests/test_tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/packagist/tests/test_tasks.py @@ -0,0 +1,31 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from unittest.mock import patch + + +def test_ping(swh_app, celery_session_worker): + res = swh_app.send_task( + 'swh.lister.packagist.tasks.ping') + assert res + res.wait() + assert res.successful() + assert res.result == 'OK' + + +@patch('swh.lister.packagist.tasks.PackagistLister') +def test_lister(lister, swh_app, celery_session_worker): + # setup the mocked PackagistLister + lister.return_value = lister + lister.run.return_value = None + + res = swh_app.send_task( + 'swh.lister.packagist.tasks.PackagistListerTask') + assert res + res.wait() + assert res.successful() + + lister.assert_called_once_with() + lister.db_last_index.assert_not_called() + lister.run.assert_called_once_with()