Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7122984
D1584.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
10 KB
Subscribers
None
D1584.diff
View Options
diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -19,6 +19,7 @@
- `swh.lister.phabricator`
- `swh.lister.cran`
- `swh.lister.cgit`
+- `swh.lister.packagist`
Dependencies
------------
@@ -221,6 +222,19 @@
url_prefix='https://anongit.kde.org/')
```
+## lister-packagist
+
+Once configured, you can execute a Packagist lister using the following instructions
+in a `python3` script:
+
+```lang=python
+import logging
+from swh.lister.packagist.tasks import packagist_lister
+
+logging.basicConfig(level=logging.DEBUG)
+packagist_lister()
+```
+
Licensing
---------
diff --git a/swh/lister/cli.py b/swh/lister/cli.py
--- a/swh/lister/cli.py
+++ b/swh/lister/cli.py
@@ -12,7 +12,7 @@
logger = logging.getLogger(__name__)
SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi',
- 'npm', 'phabricator', 'gnu', 'cran', 'cgit']
+ 'npm', 'phabricator', 'gnu', 'cran', 'cgit', 'packagist']
@click.group(name='lister', context_settings=CONTEXT_SETTINGS)
@@ -133,6 +133,11 @@
url_prefix='http://git.savannah.gnu.org/git/',
override_config=override_conf)
+ elif lister == 'packagist':
+ from .packagist.models import ModelBase
+ from .packagist.lister import PackagistLister
+ _lister = PackagistLister(override_config=override_conf)
+
else:
raise ValueError(
'Invalid lister %s: only supported listers are %s' %
diff --git a/swh/lister/core/tests/conftest.py b/swh/lister/core/tests/conftest.py
--- a/swh/lister/core/tests/conftest.py
+++ b/swh/lister/core/tests/conftest.py
@@ -13,6 +13,7 @@
'swh.lister.gitlab.tasks',
'swh.lister.gnu.tasks',
'swh.lister.npm.tasks',
- 'swh.lister.pypi.tasks',
+ 'swh.lister.packagist.tasks',
'swh.lister.phabricator.tasks',
+ 'swh.lister.pypi.tasks',
]
diff --git a/swh/lister/packagist/__init__.py b/swh/lister/packagist/__init__.py
new file mode 100644
diff --git a/swh/lister/packagist/lister.py b/swh/lister/packagist/lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/packagist/lister.py
@@ -0,0 +1,84 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import random
+import json
+from .models import PackagistModel
+
+from swh.scheduler import utils
+from swh.lister.core.simple_lister import SimpleLister
+from swh.lister.core.lister_transports import ListerOnePageApiTransport
+
+
+class PackagistLister(ListerOnePageApiTransport, SimpleLister):
+ """List packages available in the Packagist package manger.
+
+ The lister sends the request to the url present in the class
+ variable `PAGE`, to receive a list of all the package names
+ present in the Packagist package manger. Iterates over all the
+ packages and constructs the metadata url of the package from
+ the name of the package and creates a loading task.
+
+ Task:
+ Type: load-packagist
+ Policy: recurring
+ Args:
+ <package_name>
+ <package_metadata_url>
+
+ Example:
+ Type: load-packagist
+ Policy: recurring
+ Args:
+ 'hypejunction/hypegamemechanics'
+ 'https://repo.packagist.org/p/hypejunction/hypegamemechanics.json'
+
+ """
+ MODEL = PackagistModel
+ LISTER_NAME = 'packagist'
+ PAGE = 'https://packagist.org/packages/list.json'
+ instance = 'packagist'
+
+ def __init__(self, override_config=None):
+ ListerOnePageApiTransport .__init__(self)
+ SimpleLister.__init__(self, override_config=override_config)
+
+ def task_dict(self, origin_type, origin_url, **kwargs):
+ """Return task format dict
+
+ This is overridden from the lister_base as more information is
+ needed for the ingestion task creation.
+
+ """
+ return utils.create_task_dict('load-%s' % origin_type, 'recurring',
+ kwargs.get('name'), origin_url)
+
+ def list_packages(self, response):
+ """List the actual packagist origins from the response.
+
+ """
+ response = json.loads(response.text)
+ packages = [name for name in response['packageNames']]
+ random.shuffle(packages)
+ return packages
+
+ def get_model_from_repo(self, repo_name):
+ """Transform from repository representation to model
+
+ """
+ url = 'https://repo.packagist.org/p/%s.json' % repo_name
+ return {
+ 'uid': repo_name,
+ 'name': repo_name,
+ 'full_name': repo_name,
+ 'html_url': url,
+ 'origin_url': url,
+ 'origin_type': 'packagist',
+ }
+
+ def transport_response_simplified(self, response):
+ """Transform response to list for model manipulation
+
+ """
+ return [self.get_model_from_repo(repo_name) for repo_name in response]
diff --git a/swh/lister/packagist/models.py b/swh/lister/packagist/models.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/packagist/models.py
@@ -0,0 +1,16 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from sqlalchemy import Column, String
+
+from ..core.models import ModelBase
+
+
+class PackagistModel(ModelBase):
+ """a Packagist repository representation
+
+ """
+ __tablename__ = 'packagist_repo'
+
+ uid = Column(String, primary_key=True)
diff --git a/swh/lister/packagist/tasks.py b/swh/lister/packagist/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/packagist/tasks.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.scheduler.celery_backend.config import app
+
+from .lister import PackagistLister
+
+
+@app.task(name=__name__ + '.PackagistListerTask')
+def packagist_lister(**lister_args):
+ PackagistLister(**lister_args).run()
+
+
+@app.task(name=__name__ + '.ping')
+def ping():
+ return 'OK'
diff --git a/swh/lister/packagist/tests/__init__.py b/swh/lister/packagist/tests/__init__.py
new file mode 100644
diff --git a/swh/lister/packagist/tests/api_response.json b/swh/lister/packagist/tests/api_response.json
new file mode 100644
--- /dev/null
+++ b/swh/lister/packagist/tests/api_response.json
@@ -0,0 +1,9 @@
+{
+ "packageNames": [
+ "0.0.0/composer-include-files",
+ "0.0.0/laravel-env-shim",
+ "0.0.1/try-make-package",
+ "0099ff/dialogflowphp",
+ "00f100/array_dot"
+ ]
+}
\ No newline at end of file
diff --git a/swh/lister/packagist/tests/conftest.py b/swh/lister/packagist/tests/conftest.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/packagist/tests/conftest.py
@@ -0,0 +1 @@
+from swh.lister.core.tests.conftest import * # noqa
diff --git a/swh/lister/packagist/tests/test_lister.py b/swh/lister/packagist/tests/test_lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/packagist/tests/test_lister.py
@@ -0,0 +1,66 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import unittest
+import requests_mock
+from unittest.mock import patch
+from swh.lister.packagist.lister import PackagistLister
+from swh.lister.core.tests.test_lister import HttpSimpleListerTester
+
+
+expected_packages = ['0.0.0/composer-include-files', '0.0.0/laravel-env-shim',
+ '0.0.1/try-make-package', '0099ff/dialogflowphp',
+ '00f100/array_dot']
+
+expected_model = {
+ 'uid': '0099ff/dialogflowphp',
+ 'name': '0099ff/dialogflowphp',
+ 'full_name': '0099ff/dialogflowphp',
+ 'html_url':
+ 'https://repo.packagist.org/p/0099ff/dialogflowphp.json',
+ 'origin_url':
+ 'https://repo.packagist.org/p/0099ff/dialogflowphp.json',
+ 'origin_type': 'packagist',
+ }
+
+
+class PackagistListerTester(HttpSimpleListerTester, unittest.TestCase):
+ Lister = PackagistLister
+ PAGE = 'https://packagist.org/packages/list.json'
+ lister_subdir = 'packagist'
+ good_api_response_file = 'api_response.json'
+ entries = 5
+
+ @requests_mock.Mocker()
+ def test_list_packages(self, http_mocker):
+ """List packages from simple api page should retrieve all packages within
+
+ """
+ http_mocker.get(self.PAGE, text=self.mock_response)
+ fl = self.get_fl()
+ packages = fl.list_packages(self.get_api_response(0))
+
+ for package in expected_packages:
+ assert package in packages
+
+ def test_transport_response_simplified(self):
+ """Test model created by the lister
+
+ """
+ fl = self.get_fl()
+ model = fl.transport_response_simplified(['0099ff/dialogflowphp'])
+ assert len(model) == 1
+ for key, values in model[0].items():
+ assert values == expected_model[key]
+
+ def test_task_dict(self):
+ """Test the task creation of lister
+
+ """
+ fl = self.get_fl()
+ with patch('swh.lister.packagist.lister.utils.create_task_dict') as mock_create_tasks: # noqa
+ fl.task_dict(origin_type='packagist', origin_url='https://abc',
+ name='test_pack')
+ mock_create_tasks.assert_called_once_with(
+ 'load-packagist', 'recurring', 'test_pack', 'https://abc')
diff --git a/swh/lister/packagist/tests/test_tasks.py b/swh/lister/packagist/tests/test_tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/packagist/tests/test_tasks.py
@@ -0,0 +1,31 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from unittest.mock import patch
+
+
+def test_ping(swh_app, celery_session_worker):
+ res = swh_app.send_task(
+ 'swh.lister.packagist.tasks.ping')
+ assert res
+ res.wait()
+ assert res.successful()
+ assert res.result == 'OK'
+
+
+@patch('swh.lister.packagist.tasks.PackagistLister')
+def test_lister(lister, swh_app, celery_session_worker):
+ # setup the mocked PackagistLister
+ lister.return_value = lister
+ lister.run.return_value = None
+
+ res = swh_app.send_task(
+ 'swh.lister.packagist.tasks.PackagistListerTask')
+ assert res
+ res.wait()
+ assert res.successful()
+
+ lister.assert_called_once_with()
+ lister.db_last_index.assert_not_called()
+ lister.run.assert_called_once_with()
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Dec 17, 2:31 PM (3 d, 3 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3215957
Attached To
D1584: swh.lister.packagist
Event Timeline
Log In to Comment