Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123721
D1492.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
9 KB
Subscribers
None
D1492.diff
View Options
diff --git a/MANIFEST.in b/MANIFEST.in
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -4,4 +4,5 @@
include requirements-swh.txt
include requirements-test.txt
include version.txt
+include swh/lister/cran/list_all_packages.R
recursive-include swh/lister/*/tests/ *.json
diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -17,6 +17,7 @@
- `swh.lister.pypi`
- `swh.lister.npm`
- `swh.lister.phabricator`
+- `swh.lister.cran`
Dependencies
------------
@@ -188,6 +189,17 @@
logging.basicConfig(level=logging.DEBUG)
gnu_lister()
+
+## lister-cran
+
+Once configured, you can execute a RCRAN lister using the following instructions in a `python3` script:
+
+```lang=python
+import logging
+from swh.lister.cran.tasks import cran_lister
+
+logging.basicConfig(level=logging.DEBUG)
+cran_lister()
```
Licensing
diff --git a/swh/lister/cli.py b/swh/lister/cli.py
--- a/swh/lister/cli.py
+++ b/swh/lister/cli.py
@@ -12,7 +12,7 @@
logger = logging.getLogger(__name__)
SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi',
- 'npm', 'phabricator', 'gnu']
+ 'npm', 'phabricator', 'gnu', 'cran']
@click.group(name='lister', context_settings=CONTEXT_SETTINGS)
@@ -120,6 +120,11 @@
from .gnu.lister import GNULister
_lister = GNULister(override_config=override_conf)
+ elif lister == 'cran':
+ from .cran.models import ModelBase
+ from .cran.lister import CRANLister
+ _lister = CRANLister(override_config=override_conf)
+
else:
raise ValueError(
'Invalid lister %s: only supported listers are %s' %
diff --git a/swh/lister/core/tests/conftest.py b/swh/lister/core/tests/conftest.py
--- a/swh/lister/core/tests/conftest.py
+++ b/swh/lister/core/tests/conftest.py
@@ -6,6 +6,7 @@
def celery_includes():
return [
'swh.lister.bitbucket.tasks',
+ 'swh.lister.cran.tasks',
'swh.lister.debian.tasks',
'swh.lister.github.tasks',
'swh.lister.gitlab.tasks',
diff --git a/swh/lister/cran/__init__.py b/swh/lister/cran/__init__.py
new file mode 100644
diff --git a/swh/lister/cran/list_all_packages.R b/swh/lister/cran/list_all_packages.R
new file mode 100755
--- /dev/null
+++ b/swh/lister/cran/list_all_packages.R
@@ -0,0 +1,9 @@
+#!/usr/bin/Rscript
+
+# This R script calls the buildin API to get list of
+# all the packages of R and their description, then convert the API
+# response to JSON string and print it
+
+db <- tools::CRAN_package_db()[, c("Package", "Version", "Title", "Description")]
+dbjson <- jsonlite::toJSON(db)
+print(dbjson)
\ No newline at end of file
diff --git a/swh/lister/cran/lister.py b/swh/lister/cran/lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/cran/lister.py
@@ -0,0 +1,119 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+import subprocess
+import json
+import logging
+import pkg_resources
+
+from swh.lister.cran.models import CRANModel
+
+from swh.scheduler.utils import create_task_dict
+from swh.core import utils
+from swh.lister.core.simple_lister import SimpleLister
+
+
+class CRANLister(SimpleLister):
+ MODEL = CRANModel
+ LISTER_NAME = 'cran'
+ instance = 'cran'
+
+ def task_dict(self, origin_type, origin_url, **kwargs):
+ """Return task format dict
+
+ This is overridden from the lister_base as more information is
+ needed for the ingestion task creation.
+ """
+ return create_task_dict(
+ 'load-%s' % origin_type, 'recurring',
+ kwargs.get('name'), origin_url, kwargs.get('version'),
+ project_metadata=kwargs.get('description'))
+
+ def r_script_request(self):
+ """Runs R script which uses inbuilt API to return a json
+ response containing data about all the R packages
+
+ Returns:
+ List of dictionaries
+ example
+ [
+ {'Package': 'A3',
+ 'Version': '1.0.0',
+ 'Title':
+ 'Accurate, Adaptable, and Accessible Error Metrics for
+ Predictive\nModels',
+ 'Description':
+ 'Supplies tools for tabulating and analyzing the results
+ of predictive models. The methods employed are ... '
+ }
+ {'Package': 'abbyyR',
+ 'Version': '0.5.4',
+ 'Title':
+ 'Access to Abbyy Optical Character Recognition (OCR) API',
+ 'Description': 'Get text from images of text using Abbyy
+ Cloud Optical Character\n ...'
+ }
+ ...
+ ]
+ """
+ file_path = pkg_resources.resource_filename('swh.lister.cran',
+ 'list_all_packages.R')
+ response = subprocess.run(file_path, stdout=subprocess.PIPE,
+ shell=False)
+ return json.loads(response.stdout)
+
+ def get_model_from_repo(self, repo):
+ """Transform from repository representation to model
+
+ """
+ project_url = 'https://cran.r-project.org/src/contrib' \
+ '/%(Package)s_%(Version)s.tar.gz' % repo
+ return {
+ 'uid': repo["Package"],
+ 'name': repo["Package"],
+ 'full_name': repo["Title"],
+ 'version': repo["Version"],
+ 'html_url': project_url,
+ 'origin_url': project_url,
+ 'origin_type': 'cran',
+ 'description': repo["Description"]
+ }
+
+ def transport_response_simplified(self, response):
+ """Transform response to list for model manipulation
+
+ """
+ return [self.get_model_from_repo(repo) for repo in response]
+
+ def ingest_data(self, identifier, checks=False):
+ """Rework the base ingest_data.
+ Request server endpoint which gives all in one go.
+
+ Simplify and filter response list of repositories. Inject
+ repo information into local db. Queue loader tasks for
+ linked repositories.
+
+ Args:
+ identifier: Resource identifier (unused)
+ checks (bool): Additional checks required (unused)
+
+ """
+ response = self.r_script_request()
+ if not response:
+ return response, []
+ models_list = self.transport_response_simplified(response)
+ models_list = self.filter_before_inject(models_list)
+ all_injected = []
+ for models in utils.grouper(models_list, n=10000):
+ models = list(models)
+ logging.debug('models: %s' % len(models))
+ # inject into local db
+ injected = self.inject_repo_data_into_db(models)
+ # queue workers
+ self.create_missing_origins_and_tasks(models, injected)
+ all_injected.append(injected)
+ # flush
+ self.db_session.commit()
+ self.db_session = self.mk_session()
+
+ return response, all_injected
diff --git a/swh/lister/cran/models.py b/swh/lister/cran/models.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/cran/models.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from sqlalchemy import Column, String
+
+from swh.lister.core.models import ModelBase
+
+
+class CRANModel(ModelBase):
+ """a CRAN repository representation
+
+ """
+ __tablename__ = 'cran_repo'
+
+ uid = Column(String, primary_key=True)
+ version = Column(String)
diff --git a/swh/lister/cran/tasks.py b/swh/lister/cran/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/cran/tasks.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.scheduler.celery_backend.config import app
+
+from swh.lister.cran.lister import CRANLister
+
+
+@app.task(name=__name__ + '.CRANListerTask')
+def cran_lister(**lister_args):
+ CRANLister(**lister_args).run()
+
+
+@app.task(name=__name__ + '.ping')
+def ping():
+ return 'OK'
diff --git a/swh/lister/cran/tests/__init__.py b/swh/lister/cran/tests/__init__.py
new file mode 100644
diff --git a/swh/lister/cran/tests/conftest.py b/swh/lister/cran/tests/conftest.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/cran/tests/conftest.py
@@ -0,0 +1 @@
+from swh.lister.core.tests.conftest import * # noqa
diff --git a/swh/lister/cran/tests/test_tasks.py b/swh/lister/cran/tests/test_tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/cran/tests/test_tasks.py
@@ -0,0 +1,27 @@
+from unittest.mock import patch
+
+
+def test_ping(swh_app, celery_session_worker):
+ res = swh_app.send_task(
+ 'swh.lister.cran.tasks.ping')
+ assert res
+ res.wait()
+ assert res.successful()
+ assert res.result == 'OK'
+
+
+@patch('swh.lister.cran.tasks.CRANLister')
+def test_lister(lister, swh_app, celery_session_worker):
+ # setup the mocked CRANLister
+ lister.return_value = lister
+ lister.run.return_value = None
+
+ res = swh_app.send_task(
+ 'swh.lister.cran.tasks.CRANListerTask')
+ assert res
+ res.wait()
+ assert res.successful()
+
+ lister.assert_called_once_with()
+ lister.db_last_index.assert_not_called()
+ lister.run.assert_called_once_with()
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 19 2024, 9:54 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219569
Attached To
D1492: CRAN Lister
Event Timeline
Log In to Comment