Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123778
D1492.id4891.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
D1492.id4891.diff
View Options
diff --git a/swh/lister/core/tests/conftest.py b/swh/lister/core/tests/conftest.py
--- a/swh/lister/core/tests/conftest.py
+++ b/swh/lister/core/tests/conftest.py
@@ -12,4 +12,5 @@
'swh.lister.npm.tasks',
'swh.lister.pypi.tasks',
'swh.lister.phabricator.tasks',
+ 'swh.lister.rcran.tasks',
]
diff --git a/swh/lister/rcran/__init__.py b/swh/lister/rcran/__init__.py
new file mode 100644
diff --git a/swh/lister/rcran/list_all_the_packages.R b/swh/lister/rcran/list_all_the_packages.R
new file mode 100755
--- /dev/null
+++ b/swh/lister/rcran/list_all_the_packages.R
@@ -0,0 +1,5 @@
+#!/usr/bin/Rscript
+
+db <- tools::CRAN_package_db();
+dbjson <- jsonlite::toJSON(db);
+print(dbjson);
\ No newline at end of file
diff --git a/swh/lister/rcran/lister.py b/swh/lister/rcran/lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/rcran/lister.py
@@ -0,0 +1,116 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+import subprocess
+import json
+import logging
+
+from swh.lister.rcran.models import RCRANModel
+
+from swh.scheduler import utils
+from swh.lister.core.simple_lister import SimpleLister
+
+
+class RCRANLister(SimpleLister):
+ MODEL = RCRANModel
+ LISTER_NAME = 'rcran'
+
+ def __init__(self, override_config=None):
+ SimpleLister.__init__(self, override_config=override_config)
+
+ def task_dict(self, origin_type, origin_url, **kwargs):
+ """(Override) Return task format dict
+
+ This is overridden from the lister_base as more information is
+ needed for the ingestion task creation.
+
+ """
+ _type = 'origin-update-%s' % origin_type
+ _policy = 'recurring'
+ project_name = kwargs.get('name')
+ project_version = kwargs.get('version')
+ project_metadata = kwargs.get('description')
+ return utils.create_task_dict(
+ _type, _policy, project_name, origin_url, project_version,
+ project_metadata=project_metadata)
+
+ def r_script_request(self):
+ """(Override) Runs r script which uses inbuilt API to return a json
+ response containing data about all the R packages
+
+ Returns:
+ JSON response
+
+ """
+ response = subprocess.getoutput("./list_all_the_packages.R")
+ return json.loads(response)
+
+ def list_packages(self, response):
+ """(Override) List the actual rcran origins from the response.
+
+ """
+ pass
+
+ def _compute_urls(self, repo):
+ """Returns a tuple (project_url, project_metadata_url)
+
+ """
+ return (
+ 'https://cran.r-project.org/src/contrib/%s_%s.tar.gz' %
+ (repo["Package"], repo["Version"])
+ )
+
+ def get_model_from_repo(self, repo):
+ """(Override) Transform from repository representation to model
+
+ """
+ project_url = self._compute_urls(repo)
+ return {
+ 'uid': repo["Package"],
+ 'name': repo["Package"],
+ 'full_name': repo["Title"],
+ 'version': repo["Version"],
+ 'html_url': project_url,
+ 'origin_url': project_url,
+ 'origin_type': 'rcran',
+ 'description': repo["Description"]
+ }
+
+ def transport_response_simplified(self, response):
+ """(Override) Transform response to list for model manipulation
+
+ """
+ return [self.get_model_from_repo(repo_name) for repo_name in response]
+
+ def ingest_data(self, identifier, checks=False):
+ """(Override)Rework the base ingest_data.
+ Request server endpoint which gives all in one go.
+
+ Simplify and filter response list of repositories. Inject
+ repo information into local db. Queue loader tasks for
+ linked repositories.
+
+ Args:
+ identifier: Resource identifier (unused)
+ checks (bool): Additional checks required (unused)
+
+ """
+ response = self.r_script_request()
+ if not response:
+ return response, []
+ models_list = self.transport_response_simplified(response)
+ models_list = self.filter_before_inject(models_list)
+ all_injected = []
+ for models in utils.grouper(models_list, n=10000):
+ models = list(models)
+ logging.debug('models: %s' % len(models))
+ # inject into local db
+ injected = self.inject_repo_data_into_db(models)
+ # queue workers
+ self.create_missing_origins_and_tasks(models, injected)
+ all_injected.append(injected)
+ # flush
+ self.db_session.commit()
+ self.db_session = self.mk_session()
+
+ return response, all_injected
diff --git a/swh/lister/rcran/models.py b/swh/lister/rcran/models.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/rcran/models.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from sqlalchemy import Column, String
+
+from swh.lister.core.models import ModelBase
+
+
+class RCRANModel(ModelBase):
+ """a RCRAN repository representation
+
+ """
+ __tablename__ = 'rcran_repo'
+
+ uid = Column(String, primary_key=True)
+ version = Column(String)
diff --git a/swh/lister/rcran/tasks.py b/swh/lister/rcran/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/rcran/tasks.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.scheduler.celery_backend.config import app
+
+from swh.lister.rcran.lister import RCRANLister
+
+
+@app.task(name=__name__ + '.RCRANListerTask')
+def rcran_lister(**lister_args):
+ RCRANLister(**lister_args).run()
+
+
+@app.task(name=__name__ + '.ping')
+def ping():
+ return 'OK'
diff --git a/swh/lister/rcran/tests/__init__.py b/swh/lister/rcran/tests/__init__.py
new file mode 100644
diff --git a/swh/lister/rcran/tests/conftest.py b/swh/lister/rcran/tests/conftest.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/rcran/tests/conftest.py
@@ -0,0 +1 @@
+from swh.lister.core.tests.conftest import * # noqa
diff --git a/swh/lister/rcran/tests/test_tasks.py b/swh/lister/rcran/tests/test_tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/rcran/tests/test_tasks.py
@@ -0,0 +1,27 @@
+from unittest.mock import patch
+
+
+def test_ping(swh_app, celery_session_worker):
+ res = swh_app.send_task(
+ 'swh.lister.rcran.tasks.ping')
+ assert res
+ res.wait()
+ assert res.successful()
+ assert res.result == 'OK'
+
+
+@patch('swh.lister.rcran.tasks.RCRANLister')
+def test_lister(lister, swh_app, celery_session_worker):
+ # setup the mocked RCRANLister
+ lister.return_value = lister
+ lister.run.return_value = None
+
+ res = swh_app.send_task(
+ 'swh.lister.rcran.tasks.RCRANListerTask')
+ assert res
+ res.wait()
+ assert res.successful()
+
+ lister.assert_called_once_with()
+ lister.db_last_index.assert_not_called()
+ lister.run.assert_called_once_with()
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 20 2024, 1:30 AM (11 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3231418
Attached To
D1492: CRAN Lister
Event Timeline
Log In to Comment