Page MenuHomeSoftware Heritage

D1492.id4979.diff
No OneTemporary

D1492.id4979.diff

diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -16,6 +16,7 @@
- `swh.lister.pypi`
- `swh.lister.npm`
- `swh.lister.phabricator`
+- `swh.lister.rcran`
Dependencies
------------
@@ -177,6 +178,18 @@
incremental_phabricator_lister(forge_url='https://forge.softwareheritage.org', api_token='XXXX')
```
+## lister-rcran
+
+Once configured, you can execute a RCRAN lister using the following instructions in a `python3` script:
+
+```lang=python
+import logging
+from swh.lister.rcran.tasks import rcran_lister
+
+logging.basicConfig(level=logging.DEBUG)
+rcran_lister()
+```
+
Licensing
---------
diff --git a/swh/lister/cli.py b/swh/lister/cli.py
--- a/swh/lister/cli.py
+++ b/swh/lister/cli.py
@@ -12,7 +12,7 @@
logger = logging.getLogger(__name__)
SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi',
- 'npm', 'phabricator']
+ 'npm', 'phabricator', 'rcran']
@click.group(name='lister', context_settings=CONTEXT_SETTINGS)
@@ -115,6 +115,11 @@
api_token='',
override_config=override_conf)
+ elif lister == 'rcran':
+ from .rcran.models import ModelBase
+ from .rcran.lister import RCRANLister
+ _lister = RCRANLister(override_config=override_conf)
+
else:
raise ValueError(
'Invalid lister %s: only supported listers are %s' %
diff --git a/swh/lister/core/tests/conftest.py b/swh/lister/core/tests/conftest.py
--- a/swh/lister/core/tests/conftest.py
+++ b/swh/lister/core/tests/conftest.py
@@ -12,4 +12,5 @@
'swh.lister.npm.tasks',
'swh.lister.pypi.tasks',
'swh.lister.phabricator.tasks',
+ 'swh.lister.rcran.tasks',
]
diff --git a/swh/lister/rcran/__init__.py b/swh/lister/rcran/__init__.py
new file mode 100644
diff --git a/swh/lister/rcran/lister.py b/swh/lister/rcran/lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/rcran/lister.py
@@ -0,0 +1,116 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+import subprocess
+import json
+import logging
+
+from swh.lister.rcran.models import RCRANModel
+
+from swh.scheduler import utils
+from swh.lister.core.simple_lister import SimpleLister
+
+
+class RCRANLister(SimpleLister):
+ MODEL = RCRANModel
+ LISTER_NAME = 'rcran'
+
+ def task_dict(self, origin_type, origin_url, **kwargs):
+ """Return task format dict
+
+ This is overridden from the lister_base as more information is
+ needed for the ingestion task creation.
+ """
+ return utils.create_task_dict(
+ 'load-%s' % origin_type, 'recurring',
+ kwargs.get('name'), origin_url, kwargs.get('version'),
+ project_metadata=kwargs.get('description'))
+
+ def r_script_request(self):
+ """Runs r script which uses inbuilt API to return a json
+ response containing data about all the R packages
+
+ Returns:
+ JSON response
+ """
+ # This command two line R script calls the buildin API to get list of
+ # all the packages of R and their description and convert the API
+ # response to JSON
+ response = subprocess.run('Rscript -e \'db <- tools::CRAN_package_db()'
+ '; jsonlite::toJSON(db)\'',
+ stdout=subprocess.PIPE, shell=True)
+ return json.loads(response.stdout)
+
+ def _compute_url(self, repo):
+ """Returns the package tarball URL
+
+ """
+ return 'https://cran.r-project.org/src/contrib' \
+ '/%(Package)s_%(Version)s.tar.gz' % repo
+
+ def get_model_from_repo(self, repo):
+ """Transform from repository representation to model
+
+ """
+ project_url = self._compute_url(repo)
+ return {
+ 'uid': repo["Package"],
+ 'name': repo["Package"],
+ 'full_name': repo["Title"],
+ 'version': repo["Version"],
+ 'html_url': project_url,
+ 'origin_url': project_url,
+ 'origin_type': 'rcran',
+ 'description': repo["Description"]
+ }
+
+ def transport_response_simplified(self, response):
+ """Transform response to list for model manipulation
+
+ """
+ return [self.get_model_from_repo(repo) for repo in response]
+
+ def ingest_data(self, identifier, checks=False):
+ """Rework the base ingest_data.
+ Request server endpoint which gives all in one go.
+
+ Simplify and filter response list of repositories. Inject
+ repo information into local db. Queue loader tasks for
+ linked repositories.
+
+ Args:
+ identifier: Resource identifier (unused)
+ checks (bool): Additional checks required (unused)
+
+ """
+ response = self.r_script_request()
+ if not response:
+ return response, []
+ models_list = self.transport_response_simplified(response)
+ models_list = self.filter_before_inject(models_list)
+ all_injected = []
+ for models in utils.grouper(models_list, n=10000):
+ models = list(models)
+ logging.debug('models: %s' % len(models))
+ # inject into local db
+ injected = self.inject_repo_data_into_db(models)
+ # queue workers
+ self.create_missing_origins_and_tasks(models, injected)
+ all_injected.append(injected)
+ # flush
+ self.db_session.commit()
+ self.db_session = self.mk_session()
+
+ return response, all_injected
+
+ def list_packages(self, response):
+ pass
+
+ def transport_quota_check(self):
+ pass
+
+ def transport_request(self):
+ pass
+
+ def transport_response_to_string(self):
+ pass
diff --git a/swh/lister/rcran/models.py b/swh/lister/rcran/models.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/rcran/models.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from sqlalchemy import Column, String
+
+from swh.lister.core.models import ModelBase
+
+
+class RCRANModel(ModelBase):
+ """a RCRAN repository representation
+
+ """
+ __tablename__ = 'rcran_repo'
+
+ uid = Column(String, primary_key=True)
+ version = Column(String)
diff --git a/swh/lister/rcran/tasks.py b/swh/lister/rcran/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/rcran/tasks.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.scheduler.celery_backend.config import app
+
+from swh.lister.rcran.lister import RCRANLister
+
+
+@app.task(name=__name__ + '.RCRANListerTask')
+def rcran_lister(**lister_args):
+ RCRANLister(**lister_args).run()
+
+
+@app.task(name=__name__ + '.ping')
+def ping():
+ return 'OK'
diff --git a/swh/lister/rcran/tests/__init__.py b/swh/lister/rcran/tests/__init__.py
new file mode 100644
diff --git a/swh/lister/rcran/tests/conftest.py b/swh/lister/rcran/tests/conftest.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/rcran/tests/conftest.py
@@ -0,0 +1 @@
+from swh.lister.core.tests.conftest import * # noqa
diff --git a/swh/lister/rcran/tests/test_tasks.py b/swh/lister/rcran/tests/test_tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/rcran/tests/test_tasks.py
@@ -0,0 +1,27 @@
+from unittest.mock import patch
+
+
+def test_ping(swh_app, celery_session_worker):
+ res = swh_app.send_task(
+ 'swh.lister.rcran.tasks.ping')
+ assert res
+ res.wait()
+ assert res.successful()
+ assert res.result == 'OK'
+
+
+@patch('swh.lister.rcran.tasks.RCRANLister')
+def test_lister(lister, swh_app, celery_session_worker):
+ # setup the mocked RCRANLister
+ lister.return_value = lister
+ lister.run.return_value = None
+
+ res = swh_app.send_task(
+ 'swh.lister.rcran.tasks.RCRANListerTask')
+ assert res
+ res.wait()
+ assert res.successful()
+
+ lister.assert_called_once_with()
+ lister.db_last_index.assert_not_called()
+ lister.run.assert_called_once_with()

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 11:30 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3230992

Event Timeline