Page MenuHomeSoftware Heritage

D1492.id4891.diff
No OneTemporary

D1492.id4891.diff

diff --git a/swh/lister/core/tests/conftest.py b/swh/lister/core/tests/conftest.py
--- a/swh/lister/core/tests/conftest.py
+++ b/swh/lister/core/tests/conftest.py
@@ -12,4 +12,5 @@
'swh.lister.npm.tasks',
'swh.lister.pypi.tasks',
'swh.lister.phabricator.tasks',
+ 'swh.lister.rcran.tasks',
]
diff --git a/swh/lister/rcran/__init__.py b/swh/lister/rcran/__init__.py
new file mode 100644
diff --git a/swh/lister/rcran/list_all_the_packages.R b/swh/lister/rcran/list_all_the_packages.R
new file mode 100755
--- /dev/null
+++ b/swh/lister/rcran/list_all_the_packages.R
@@ -0,0 +1,5 @@
+#!/usr/bin/Rscript
+
+db <- tools::CRAN_package_db();
+dbjson <- jsonlite::toJSON(db);
+print(dbjson);
\ No newline at end of file
diff --git a/swh/lister/rcran/lister.py b/swh/lister/rcran/lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/rcran/lister.py
@@ -0,0 +1,116 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+import subprocess
+import json
+import logging
+
+from swh.lister.rcran.models import RCRANModel
+
+from swh.scheduler import utils
+from swh.lister.core.simple_lister import SimpleLister
+
+
+class RCRANLister(SimpleLister):
+ MODEL = RCRANModel
+ LISTER_NAME = 'rcran'
+
+ def __init__(self, override_config=None):
+ SimpleLister.__init__(self, override_config=override_config)
+
+ def task_dict(self, origin_type, origin_url, **kwargs):
+ """(Override) Return task format dict
+
+ This is overridden from the lister_base as more information is
+ needed for the ingestion task creation.
+
+ """
+ _type = 'origin-update-%s' % origin_type
+ _policy = 'recurring'
+ project_name = kwargs.get('name')
+ project_version = kwargs.get('version')
+ project_metadata = kwargs.get('description')
+ return utils.create_task_dict(
+ _type, _policy, project_name, origin_url, project_version,
+ project_metadata=project_metadata)
+
+ def r_script_request(self):
+ """(Override) Runs r script which uses inbuilt API to return a json
+ response containing data about all the R packages
+
+ Returns:
+ JSON response
+
+ """
+ response = subprocess.getoutput("./list_all_the_packages.R")
+ return json.loads(response)
+
+ def list_packages(self, response):
+ """(Override) List the actual rcran origins from the response.
+
+ """
+ pass
+
+ def _compute_urls(self, repo):
+ """Returns a tuple (project_url, project_metadata_url)
+
+ """
+ return (
+ 'https://cran.r-project.org/src/contrib/%s_%s.tar.gz' %
+ (repo["Package"], repo["Version"])
+ )
+
+ def get_model_from_repo(self, repo):
+ """(Override) Transform from repository representation to model
+
+ """
+ project_url = self._compute_urls(repo)
+ return {
+ 'uid': repo["Package"],
+ 'name': repo["Package"],
+ 'full_name': repo["Title"],
+ 'version': repo["Version"],
+ 'html_url': project_url,
+ 'origin_url': project_url,
+ 'origin_type': 'rcran',
+ 'description': repo["Description"]
+ }
+
+ def transport_response_simplified(self, response):
+ """(Override) Transform response to list for model manipulation
+
+ """
+ return [self.get_model_from_repo(repo_name) for repo_name in response]
+
+ def ingest_data(self, identifier, checks=False):
+ """(Override)Rework the base ingest_data.
+ Request server endpoint which gives all in one go.
+
+ Simplify and filter response list of repositories. Inject
+ repo information into local db. Queue loader tasks for
+ linked repositories.
+
+ Args:
+ identifier: Resource identifier (unused)
+ checks (bool): Additional checks required (unused)
+
+ """
+ response = self.r_script_request()
+ if not response:
+ return response, []
+ models_list = self.transport_response_simplified(response)
+ models_list = self.filter_before_inject(models_list)
+ all_injected = []
+ for models in utils.grouper(models_list, n=10000):
+ models = list(models)
+ logging.debug('models: %s' % len(models))
+ # inject into local db
+ injected = self.inject_repo_data_into_db(models)
+ # queue workers
+ self.create_missing_origins_and_tasks(models, injected)
+ all_injected.append(injected)
+ # flush
+ self.db_session.commit()
+ self.db_session = self.mk_session()
+
+ return response, all_injected
diff --git a/swh/lister/rcran/models.py b/swh/lister/rcran/models.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/rcran/models.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from sqlalchemy import Column, String
+
+from swh.lister.core.models import ModelBase
+
+
+class RCRANModel(ModelBase):
+ """a RCRAN repository representation
+
+ """
+ __tablename__ = 'rcran_repo'
+
+ uid = Column(String, primary_key=True)
+ version = Column(String)
diff --git a/swh/lister/rcran/tasks.py b/swh/lister/rcran/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/rcran/tasks.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.scheduler.celery_backend.config import app
+
+from swh.lister.rcran.lister import RCRANLister
+
+
+@app.task(name=__name__ + '.RCRANListerTask')
+def rcran_lister(**lister_args):
+ RCRANLister(**lister_args).run()
+
+
+@app.task(name=__name__ + '.ping')
+def ping():
+ return 'OK'
diff --git a/swh/lister/rcran/tests/__init__.py b/swh/lister/rcran/tests/__init__.py
new file mode 100644
diff --git a/swh/lister/rcran/tests/conftest.py b/swh/lister/rcran/tests/conftest.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/rcran/tests/conftest.py
@@ -0,0 +1 @@
+from swh.lister.core.tests.conftest import * # noqa
diff --git a/swh/lister/rcran/tests/test_tasks.py b/swh/lister/rcran/tests/test_tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/rcran/tests/test_tasks.py
@@ -0,0 +1,27 @@
+from unittest.mock import patch
+
+
+def test_ping(swh_app, celery_session_worker):
+ res = swh_app.send_task(
+ 'swh.lister.rcran.tasks.ping')
+ assert res
+ res.wait()
+ assert res.successful()
+ assert res.result == 'OK'
+
+
+@patch('swh.lister.rcran.tasks.RCRANLister')
+def test_lister(lister, swh_app, celery_session_worker):
+ # setup the mocked RCRANLister
+ lister.return_value = lister
+ lister.run.return_value = None
+
+ res = swh_app.send_task(
+ 'swh.lister.rcran.tasks.RCRANListerTask')
+ assert res
+ res.wait()
+ assert res.successful()
+
+ lister.assert_called_once_with()
+ lister.db_last_index.assert_not_called()
+ lister.run.assert_called_once_with()

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 1:30 AM (11 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3231418

Event Timeline