diff --git a/README.md b/README.md --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ - `swh.lister.phabricator` - `swh.lister.cran` - `swh.lister.cgit` +- `swh.lister.packagist` Dependencies ------------ @@ -221,6 +222,19 @@ url_prefix='https://anongit.kde.org/') ``` +## lister-packagist + +Once configured, you can execute a Packagist lister using the following instructions +in a `python3` script: + +```lang=python +import logging +from swh.lister.packagist.tasks import packagist_lister + +logging.basicConfig(level=logging.DEBUG) +packagist_lister() +``` + Licensing --------- diff --git a/swh/lister/cli.py b/swh/lister/cli.py --- a/swh/lister/cli.py +++ b/swh/lister/cli.py @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi', - 'npm', 'phabricator', 'gnu', 'cran', 'cgit'] + 'npm', 'phabricator', 'gnu', 'cran', 'cgit', 'packagist'] @click.group(name='lister', context_settings=CONTEXT_SETTINGS) @@ -133,6 +133,11 @@ url_prefix='http://git.savannah.gnu.org/git/', override_config=override_conf) + elif lister == 'packagist': + from .packagist.models import ModelBase + from .packagist.lister import PackagistLister + _lister = PackagistLister(override_config=override_conf) + else: raise ValueError( 'Invalid lister %s: only supported listers are %s' % diff --git a/swh/lister/core/tests/conftest.py b/swh/lister/core/tests/conftest.py --- a/swh/lister/core/tests/conftest.py +++ b/swh/lister/core/tests/conftest.py @@ -13,6 +13,7 @@ 'swh.lister.gitlab.tasks', 'swh.lister.gnu.tasks', 'swh.lister.npm.tasks', - 'swh.lister.pypi.tasks', + 'swh.lister.packagist.tasks', 'swh.lister.phabricator.tasks', + 'swh.lister.pypi.tasks', ] diff --git a/swh/lister/packagist/__init__.py b/swh/lister/packagist/__init__.py new file mode 100644 diff --git a/swh/lister/packagist/lister.py b/swh/lister/packagist/lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/packagist/lister.py @@ -0,0 +1,55 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import random + +from .models import PackagistModel + +from swh.scheduler import utils +from swh.lister.core.simple_lister import SimpleLister + + +class PackagistLister(SimpleLister): + MODEL = PackagistModel + LISTER_NAME = 'packagist' + PAGE = 'https://packagist.org/packages/list.json' + instance = 'packagist' + + def task_dict(self, origin_type, origin_url, **kwargs): + """Return task format dict + + This is overridden from the lister_base as more information is + needed for the ingestion task creation. + + """ + return utils.create_task_dict('load-%s' % origin_type, 'recurring', + kwargs.get('name'), origin_url) + + def list_packages(self, response): + """List the actual packagist origins from the response. + + """ + packages = [name for name in response['packageNames']] + random.shuffle(packages) + return packages + + def get_model_from_repo(self, repo_name): + """Transform from repository representation to model + + """ + url = 'https://repo.packagist.org/p/%s.json ' % repo_name + return { + 'uid': repo_name, + 'name': repo_name, + 'full_name': repo_name, + 'html_url': url, + 'origin_url': url, + 'origin_type': 'packagist', + } + + def transport_response_simplified(self, response): + """Transform response to list for model manipulation + + """ + return [self.get_model_from_repo(repo_name) for repo_name in response] diff --git a/swh/lister/packagist/models.py b/swh/lister/packagist/models.py new file mode 100644 --- /dev/null +++ b/swh/lister/packagist/models.py @@ -0,0 +1,16 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from sqlalchemy import Column, String + +from ..core.models import ModelBase + + +class PackagistModel(ModelBase): + """a Packagist repository representation + + """ + __tablename__ = 'packagist_repo' + + uid = Column(String, primary_key=True) diff --git a/swh/lister/packagist/tasks.py b/swh/lister/packagist/tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/packagist/tasks.py @@ -0,0 +1,17 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.scheduler.celery_backend.config import app + +from .lister import PackagistLister + + +@app.task(name=__name__ + '.PackagistListerTask') +def packagist_lister(**lister_args): + PackagistLister(**lister_args).run() + + +@app.task(name=__name__ + '.ping') +def ping(): + return 'OK' diff --git a/swh/lister/packagist/tests/__init__.py b/swh/lister/packagist/tests/__init__.py new file mode 100644 diff --git a/swh/lister/packagist/tests/conftest.py b/swh/lister/packagist/tests/conftest.py new file mode 100644 --- /dev/null +++ b/swh/lister/packagist/tests/conftest.py @@ -0,0 +1 @@ +from swh.lister.core.tests.conftest import * # noqa diff --git a/swh/lister/packagist/tests/test_tasks.py b/swh/lister/packagist/tests/test_tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/packagist/tests/test_tasks.py @@ -0,0 +1,27 @@ +from unittest.mock import patch + + +def test_ping(swh_app, celery_session_worker): + res = swh_app.send_task( + 'swh.lister.packagist.tasks.ping') + assert res + res.wait() + assert res.successful() + assert res.result == 'OK' + + +@patch('swh.lister.packagist.tasks.PackagistLister') +def test_lister(lister, swh_app, celery_session_worker): + # setup the mocked PackagistLister + lister.return_value = lister + lister.run.return_value = None + + res = swh_app.send_task( + 'swh.lister.packagist.tasks.PackagistListerTask') + assert res + res.wait() + assert res.successful() + + lister.assert_called_once_with() + lister.db_last_index.assert_not_called() + lister.run.assert_called_once_with()