Page MenuHomeSoftware Heritage

D1584.id5676.diff
No OneTemporary

D1584.id5676.diff

diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -19,6 +19,7 @@
- `swh.lister.phabricator`
- `swh.lister.cran`
- `swh.lister.cgit`
+- `swh.lister.packagist`
Dependencies
------------
@@ -221,6 +222,19 @@
url_prefix='https://anongit.kde.org/')
```
+## lister-packagist
+
+Once configured, you can execute a Packagist lister using the following instructions
+in a `python3` script:
+
+```lang=python
+import logging
+from swh.lister.packagist.tasks import packagist_lister
+
+logging.basicConfig(level=logging.DEBUG)
+packagist_lister()
+```
+
Licensing
---------
diff --git a/swh/lister/cli.py b/swh/lister/cli.py
--- a/swh/lister/cli.py
+++ b/swh/lister/cli.py
@@ -12,7 +12,7 @@
logger = logging.getLogger(__name__)
SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi',
- 'npm', 'phabricator', 'gnu', 'cran', 'cgit']
+ 'npm', 'phabricator', 'gnu', 'cran', 'cgit', 'packagist']
@click.group(name='lister', context_settings=CONTEXT_SETTINGS)
@@ -133,6 +133,11 @@
url_prefix='http://git.savannah.gnu.org/git/',
override_config=override_conf)
+ elif lister == 'packagist':
+ from .packagist.models import ModelBase
+ from .packagist.lister import PackagistLister
+ _lister = PackagistLister(override_config=override_conf)
+
else:
raise ValueError(
'Invalid lister %s: only supported listers are %s' %
diff --git a/swh/lister/core/tests/conftest.py b/swh/lister/core/tests/conftest.py
--- a/swh/lister/core/tests/conftest.py
+++ b/swh/lister/core/tests/conftest.py
@@ -13,6 +13,7 @@
'swh.lister.gitlab.tasks',
'swh.lister.gnu.tasks',
'swh.lister.npm.tasks',
- 'swh.lister.pypi.tasks',
+ 'swh.lister.packagist.tasks',
'swh.lister.phabricator.tasks',
+ 'swh.lister.pypi.tasks',
]
diff --git a/swh/lister/packagist/__init__.py b/swh/lister/packagist/__init__.py
new file mode 100644
diff --git a/swh/lister/packagist/lister.py b/swh/lister/packagist/lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/packagist/lister.py
@@ -0,0 +1,70 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import random
+import requests
+import json
+
+from .models import PackagistModel
+
+from swh.scheduler import utils
+from swh.lister.core.simple_lister import SimpleLister
+
+
+class PackagistLister(SimpleLister):
+ MODEL = PackagistModel
+ LISTER_NAME = 'packagist'
+ PAGE = 'https://packagist.org/packages/list.json'
+ instance = 'packagist'
+
+ def task_dict(self, origin_type, origin_url, **kwargs):
+ """Return task format dict
+
+ This is overridden from the lister_base as more information is
+ needed for the ingestion task creation.
+
+ """
+
+ return utils.create_task_dict('load-%s' % origin_type, 'recurring',
+ kwargs.get('name'), origin_url)
+
+ def list_packages(self, response):
+ """List the actual packagist origins from the response.
+
+ """
+ packages = [name for name in response['packageNames']]
+ random.shuffle(packages)
+ return packages
+
+ def safely_issue_request(self, identifier):
+ """Make network request with retries, rate quotas, and response logs.
+
+ Args:
+ identifier: resource identifier
+ Returns:
+ server response
+ """
+ response = requests.get(self.PAGE)
+ return json.loads(response.text)
+
+ def get_model_from_repo(self, repo_name):
+ """Transform from repository representation to model
+
+ """
+ url = 'https://repo.packagist.org/p/%s.json ' % repo_name
+ return {
+ 'uid': repo_name,
+ 'name': repo_name,
+ 'full_name': repo_name,
+ 'html_url': url,
+ 'origin_url': url,
+ 'origin_type': 'packagist',
+ }
+
+ def transport_response_simplified(self, response):
+ """Transform response to list for model manipulation
+
+ """
+
+ return [self.get_model_from_repo(repo_name) for repo_name in response]
diff --git a/swh/lister/packagist/models.py b/swh/lister/packagist/models.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/packagist/models.py
@@ -0,0 +1,16 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from sqlalchemy import Column, String
+
+from ..core.models import ModelBase
+
+
+class PackagistModel(ModelBase):
+ """a Packagist repository representation
+
+ """
+ __tablename__ = 'packagist_repo'
+
+ uid = Column(String, primary_key=True)
diff --git a/swh/lister/packagist/tasks.py b/swh/lister/packagist/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/packagist/tasks.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.scheduler.celery_backend.config import app
+
+from .lister import PackagistLister
+
+
+@app.task(name=__name__ + '.PackagistListerTask')
+def packagist_lister(**lister_args):
+ PackagistLister(**lister_args).run()
+
+
+@app.task(name=__name__ + '.ping')
+def ping():
+ return 'OK'
diff --git a/swh/lister/packagist/tests/__init__.py b/swh/lister/packagist/tests/__init__.py
new file mode 100644
diff --git a/swh/lister/packagist/tests/conftest.py b/swh/lister/packagist/tests/conftest.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/packagist/tests/conftest.py
@@ -0,0 +1 @@
+from swh.lister.core.tests.conftest import * # noqa
diff --git a/swh/lister/packagist/tests/test_tasks.py b/swh/lister/packagist/tests/test_tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/packagist/tests/test_tasks.py
@@ -0,0 +1,27 @@
+from unittest.mock import patch
+
+
+def test_ping(swh_app, celery_session_worker):
+ res = swh_app.send_task(
+ 'swh.lister.packagist.tasks.ping')
+ assert res
+ res.wait()
+ assert res.successful()
+ assert res.result == 'OK'
+
+
+@patch('swh.lister.packagist.tasks.PackagistLister')
+def test_lister(lister, swh_app, celery_session_worker):
+ # setup the mocked PackagistLister
+ lister.return_value = lister
+ lister.run.return_value = None
+
+ res = swh_app.send_task(
+ 'swh.lister.packagist.tasks.PackagistListerTask')
+ assert res
+ res.wait()
+ assert res.successful()
+
+ lister.assert_called_once_with()
+ lister.db_last_index.assert_not_called()
+ lister.run.assert_called_once_with()

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 30, 4:34 PM (2 h, 27 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225851

Event Timeline