Page MenuHomeSoftware Heritage

D2062.id6935.diff
No OneTemporary

D2062.id6935.diff

diff --git a/swh/lister/cran/lister.py b/swh/lister/cran/lister.py
--- a/swh/lister/cran/lister.py
+++ b/swh/lister/cran/lister.py
@@ -1,17 +1,22 @@
# Copyright (C) 2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import subprocess
+
import json
import logging
import pkg_resources
+import subprocess
+
from collections import defaultdict
+from typing import List, Dict
from swh.lister.cran.models import CRANModel
-from swh.scheduler.utils import create_task_dict
-from swh.core import utils
from swh.lister.core.simple_lister import SimpleLister
+from swh.scheduler.utils import create_task_dict
+
+
+logger = logging.getLogger(__name__)
class CRANLister(SimpleLister):
@@ -32,15 +37,17 @@
kwargs.get('name'), origin_url, kwargs.get('version'),
project_metadata=self.descriptions[kwargs.get('name')])
- def r_script_request(self):
- """Runs R script which uses inbuilt API to return a json
- response containing data about all the R packages
+ def safely_issue_request(self, identifier: str) -> List[Dict]:
+ """Runs R script which uses inbuilt API to return a json response
+ containing data about all the R packages.
Returns:
- List of dictionaries
- example
+ List of Dict about r packages.
+
+ Sample:
[
- {'Package': 'A3',
+ {
+ 'Package': 'A3',
'Version': '1.0.0',
'Title':
'Accurate, Adaptable, and Accessible Error Metrics for
@@ -48,22 +55,27 @@
'Description':
'Supplies tools for tabulating and analyzing the results
of predictive models. The methods employed are ... '
- }
- {'Package': 'abbyyR',
+ },
+ {
+ 'Package': 'abbyyR',
'Version': '0.5.4',
'Title':
'Access to Abbyy Optical Character Recognition (OCR) API',
'Description': 'Get text from images of text using Abbyy
- Cloud Optical Character\n ...'
- }
+ Cloud Optical Character\n ...'
+ },
...
]
+
"""
- file_path = pkg_resources.resource_filename('swh.lister.cran',
- 'list_all_packages.R')
- response = subprocess.run(file_path, stdout=subprocess.PIPE,
- shell=False)
- return json.loads(response.stdout)
+ filepath = pkg_resources.resource_filename('swh.lister.cran',
+ 'list_all_packages.R')
+ logger.debug('script list-all-packages.R path: %s', filepath)
+ response = subprocess.run(
+ filepath, stdout=subprocess.PIPE, shell=False)
+ data = json.loads(response.stdout)
+ logger.debug('r-script-request: %s', data)
+ return data
def get_model_from_repo(self, repo):
"""Transform from repository representation to model
@@ -87,36 +99,3 @@
"""
return [self.get_model_from_repo(repo) for repo in response]
-
- def ingest_data(self, identifier, checks=False):
- """Rework the base ingest_data.
- Request server endpoint which gives all in one go.
-
- Simplify and filter response list of repositories. Inject
- repo information into local db. Queue loader tasks for
- linked repositories.
-
- Args:
- identifier: Resource identifier (unused)
- checks (bool): Additional checks required (unused)
-
- """
- response = self.r_script_request()
- if not response:
- return response, []
- models_list = self.transport_response_simplified(response)
- models_list = self.filter_before_inject(models_list)
- all_injected = []
- for models in utils.grouper(models_list, n=10000):
- models = list(models)
- logging.debug('models: %s' % len(models))
- # inject into local db
- injected = self.inject_repo_data_into_db(models)
- # queue workers
- self.create_missing_origins_and_tasks(models, injected)
- all_injected.append(injected)
- # flush
- self.db_session.commit()
- self.db_session = self.mk_session()
-
- return response, all_injected

File Metadata

Mime Type
text/plain
Expires
Dec 21 2024, 3:17 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3221351

Event Timeline