Page MenuHomeSoftware Heritage

D1584.diff
No OneTemporary

D1584.diff

diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -19,6 +19,7 @@
- `swh.lister.phabricator`
- `swh.lister.cran`
- `swh.lister.cgit`
+- `swh.lister.packagist`
Dependencies
------------
@@ -221,6 +222,19 @@
url_prefix='https://anongit.kde.org/')
```
+## lister-packagist
+
+Once configured, you can execute a Packagist lister using the following instructions
+in a `python3` script:
+
+```lang=python
+import logging
+from swh.lister.packagist.tasks import packagist_lister
+
+logging.basicConfig(level=logging.DEBUG)
+packagist_lister()
+```
+
Licensing
---------
diff --git a/swh/lister/cli.py b/swh/lister/cli.py
--- a/swh/lister/cli.py
+++ b/swh/lister/cli.py
@@ -12,7 +12,7 @@
logger = logging.getLogger(__name__)
SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi',
- 'npm', 'phabricator', 'gnu', 'cran', 'cgit']
+ 'npm', 'phabricator', 'gnu', 'cran', 'cgit', 'packagist']
@click.group(name='lister', context_settings=CONTEXT_SETTINGS)
@@ -133,6 +133,11 @@
url_prefix='http://git.savannah.gnu.org/git/',
override_config=override_conf)
+ elif lister == 'packagist':
+ from .packagist.models import ModelBase
+ from .packagist.lister import PackagistLister
+ _lister = PackagistLister(override_config=override_conf)
+
else:
raise ValueError(
'Invalid lister %s: only supported listers are %s' %
diff --git a/swh/lister/core/tests/conftest.py b/swh/lister/core/tests/conftest.py
--- a/swh/lister/core/tests/conftest.py
+++ b/swh/lister/core/tests/conftest.py
@@ -13,6 +13,7 @@
'swh.lister.gitlab.tasks',
'swh.lister.gnu.tasks',
'swh.lister.npm.tasks',
- 'swh.lister.pypi.tasks',
+ 'swh.lister.packagist.tasks',
'swh.lister.phabricator.tasks',
+ 'swh.lister.pypi.tasks',
]
diff --git a/swh/lister/packagist/__init__.py b/swh/lister/packagist/__init__.py
new file mode 100644
diff --git a/swh/lister/packagist/lister.py b/swh/lister/packagist/lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/packagist/lister.py
@@ -0,0 +1,84 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import random
+import json
+from .models import PackagistModel
+
+from swh.scheduler import utils
+from swh.lister.core.simple_lister import SimpleLister
+from swh.lister.core.lister_transports import ListerOnePageApiTransport
+
+
+class PackagistLister(ListerOnePageApiTransport, SimpleLister):
+ """List packages available in the Packagist package manger.
+
+ The lister sends the request to the url present in the class
+ variable `PAGE`, to receive a list of all the package names
+ present in the Packagist package manger. Iterates over all the
+ packages and constructs the metadata url of the package from
+ the name of the package and creates a loading task.
+
+ Task:
+ Type: load-packagist
+ Policy: recurring
+ Args:
+ <package_name>
+ <package_metadata_url>
+
+ Example:
+ Type: load-packagist
+ Policy: recurring
+ Args:
+ 'hypejunction/hypegamemechanics'
+ 'https://repo.packagist.org/p/hypejunction/hypegamemechanics.json'
+
+ """
+ MODEL = PackagistModel
+ LISTER_NAME = 'packagist'
+ PAGE = 'https://packagist.org/packages/list.json'
+ instance = 'packagist'
+
+ def __init__(self, override_config=None):
+ ListerOnePageApiTransport .__init__(self)
+ SimpleLister.__init__(self, override_config=override_config)
+
+ def task_dict(self, origin_type, origin_url, **kwargs):
+ """Return task format dict
+
+ This is overridden from the lister_base as more information is
+ needed for the ingestion task creation.
+
+ """
+ return utils.create_task_dict('load-%s' % origin_type, 'recurring',
+ kwargs.get('name'), origin_url)
+
+ def list_packages(self, response):
+ """List the actual packagist origins from the response.
+
+ """
+ response = json.loads(response.text)
+ packages = [name for name in response['packageNames']]
+ random.shuffle(packages)
+ return packages
+
+ def get_model_from_repo(self, repo_name):
+ """Transform from repository representation to model
+
+ """
+ url = 'https://repo.packagist.org/p/%s.json' % repo_name
+ return {
+ 'uid': repo_name,
+ 'name': repo_name,
+ 'full_name': repo_name,
+ 'html_url': url,
+ 'origin_url': url,
+ 'origin_type': 'packagist',
+ }
+
+ def transport_response_simplified(self, response):
+ """Transform response to list for model manipulation
+
+ """
+ return [self.get_model_from_repo(repo_name) for repo_name in response]
diff --git a/swh/lister/packagist/models.py b/swh/lister/packagist/models.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/packagist/models.py
@@ -0,0 +1,16 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from sqlalchemy import Column, String
+
+from ..core.models import ModelBase
+
+
+class PackagistModel(ModelBase):
+ """a Packagist repository representation
+
+ """
+ __tablename__ = 'packagist_repo'
+
+ uid = Column(String, primary_key=True)
diff --git a/swh/lister/packagist/tasks.py b/swh/lister/packagist/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/packagist/tasks.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.scheduler.celery_backend.config import app
+
+from .lister import PackagistLister
+
+
+@app.task(name=__name__ + '.PackagistListerTask')
+def packagist_lister(**lister_args):
+ PackagistLister(**lister_args).run()
+
+
+@app.task(name=__name__ + '.ping')
+def ping():
+ return 'OK'
diff --git a/swh/lister/packagist/tests/__init__.py b/swh/lister/packagist/tests/__init__.py
new file mode 100644
diff --git a/swh/lister/packagist/tests/api_response.json b/swh/lister/packagist/tests/api_response.json
new file mode 100644
--- /dev/null
+++ b/swh/lister/packagist/tests/api_response.json
@@ -0,0 +1,9 @@
+{
+ "packageNames": [
+ "0.0.0/composer-include-files",
+ "0.0.0/laravel-env-shim",
+ "0.0.1/try-make-package",
+ "0099ff/dialogflowphp",
+ "00f100/array_dot"
+ ]
+}
\ No newline at end of file
diff --git a/swh/lister/packagist/tests/conftest.py b/swh/lister/packagist/tests/conftest.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/packagist/tests/conftest.py
@@ -0,0 +1 @@
+from swh.lister.core.tests.conftest import * # noqa
diff --git a/swh/lister/packagist/tests/test_lister.py b/swh/lister/packagist/tests/test_lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/packagist/tests/test_lister.py
@@ -0,0 +1,66 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import unittest
+import requests_mock
+from unittest.mock import patch
+from swh.lister.packagist.lister import PackagistLister
+from swh.lister.core.tests.test_lister import HttpSimpleListerTester
+
+
+expected_packages = ['0.0.0/composer-include-files', '0.0.0/laravel-env-shim',
+ '0.0.1/try-make-package', '0099ff/dialogflowphp',
+ '00f100/array_dot']
+
+expected_model = {
+ 'uid': '0099ff/dialogflowphp',
+ 'name': '0099ff/dialogflowphp',
+ 'full_name': '0099ff/dialogflowphp',
+ 'html_url':
+ 'https://repo.packagist.org/p/0099ff/dialogflowphp.json',
+ 'origin_url':
+ 'https://repo.packagist.org/p/0099ff/dialogflowphp.json',
+ 'origin_type': 'packagist',
+ }
+
+
+class PackagistListerTester(HttpSimpleListerTester, unittest.TestCase):
+ Lister = PackagistLister
+ PAGE = 'https://packagist.org/packages/list.json'
+ lister_subdir = 'packagist'
+ good_api_response_file = 'api_response.json'
+ entries = 5
+
+ @requests_mock.Mocker()
+ def test_list_packages(self, http_mocker):
+ """List packages from simple api page should retrieve all packages within
+
+ """
+ http_mocker.get(self.PAGE, text=self.mock_response)
+ fl = self.get_fl()
+ packages = fl.list_packages(self.get_api_response(0))
+
+ for package in expected_packages:
+ assert package in packages
+
+ def test_transport_response_simplified(self):
+ """Test model created by the lister
+
+ """
+ fl = self.get_fl()
+ model = fl.transport_response_simplified(['0099ff/dialogflowphp'])
+ assert len(model) == 1
+ for key, values in model[0].items():
+ assert values == expected_model[key]
+
+ def test_task_dict(self):
+ """Test the task creation of lister
+
+ """
+ fl = self.get_fl()
+ with patch('swh.lister.packagist.lister.utils.create_task_dict') as mock_create_tasks: # noqa
+ fl.task_dict(origin_type='packagist', origin_url='https://abc',
+ name='test_pack')
+ mock_create_tasks.assert_called_once_with(
+ 'load-packagist', 'recurring', 'test_pack', 'https://abc')
diff --git a/swh/lister/packagist/tests/test_tasks.py b/swh/lister/packagist/tests/test_tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/packagist/tests/test_tasks.py
@@ -0,0 +1,31 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from unittest.mock import patch
+
+
+def test_ping(swh_app, celery_session_worker):
+ res = swh_app.send_task(
+ 'swh.lister.packagist.tasks.ping')
+ assert res
+ res.wait()
+ assert res.successful()
+ assert res.result == 'OK'
+
+
+@patch('swh.lister.packagist.tasks.PackagistLister')
+def test_lister(lister, swh_app, celery_session_worker):
+ # setup the mocked PackagistLister
+ lister.return_value = lister
+ lister.run.return_value = None
+
+ res = swh_app.send_task(
+ 'swh.lister.packagist.tasks.PackagistListerTask')
+ assert res
+ res.wait()
+ assert res.successful()
+
+ lister.assert_called_once_with()
+ lister.db_last_index.assert_not_called()
+ lister.run.assert_called_once_with()

File Metadata

Mime Type
text/plain
Expires
Tue, Dec 17, 2:31 PM (3 d, 3 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3215957

Event Timeline