Changeset View
Standalone View
swh/lister/cran/lister.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | import json | ||||
import logging | import logging | ||||
import pkg_resources | import pkg_resources | ||||
import subprocess | import subprocess | ||||
from typing import List, Mapping | from typing import List, Mapping, Tuple | ||||
from swh.lister.cran.models import CRANModel | from swh.lister.cran.models import CRANModel | ||||
from swh.lister.core.simple_lister import SimpleLister | from swh.lister.core.simple_lister import SimpleLister | ||||
from swh.scheduler.utils import create_task_dict | from swh.scheduler.utils import create_task_dict | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
CRAN_MIRROR = 'https://cran.r-project.org' | |||||
class CRANLister(SimpleLister): | class CRANLister(SimpleLister): | ||||
MODEL = CRANModel | MODEL = CRANModel | ||||
LISTER_NAME = 'cran' | LISTER_NAME = 'cran' | ||||
instance = 'cran' | instance = 'cran' | ||||
def task_dict(self, origin_type, origin_url, **kwargs): | def task_dict(self, origin_type, origin_url, version=None, html_url=None, | ||||
policy=None, **kwargs): | |||||
"""Return task format dict. This creates tasks with args and kwargs | """Return task format dict. This creates tasks with args and kwargs | ||||
set, for example:: | set, for example:: | ||||
args: [] | args: [] | ||||
kwargs: { | kwargs: { | ||||
'url': 'https://cran.r-project.org/Packages/<package>...', | |||||
'artifacts': [{ | |||||
'url': 'https://cran.r-project.org/...', | 'url': 'https://cran.r-project.org/...', | ||||
'version': '0.0.1' | 'version': '0.0.1', | ||||
}] | |||||
} | } | ||||
""" | """ | ||||
policy = kwargs.get('policy', 'oneshot') | if not policy: | ||||
douardda: this html_url seems a bit strange here, and there is no mention of it in the example given in… | |||||
Done Inline Actions
it is. i avoided the task_dict method change as i'm unsure we can do it. ardumont: > Also it looks like this argument is in fact required, so I don't understand why it's… | |||||
Done Inline Actionsalso that's already the existing variable version's case... (and policy) so... i just aligned that code... task_dict takes the model transformed earlier as parameter... html_url is actually, in this lister's context, an artifact_url, so you do not see html_url, you see artifact_url which is present in the docstring. ardumont: also that's already the existing variable `version`'s case... (and policy) so... i just aligned… | |||||
version = kwargs.get('version') | policy = 'oneshot' | ||||
artifact_url = html_url | |||||
Done Inline ActionsHeads up, i will also drop that version which has no part in there. ardumont: Heads up, i will also drop that version which has no part in there.
It's the artifact's version. | |||||
assert origin_type == 'tar' | assert origin_type == 'tar' | ||||
return create_task_dict( | return create_task_dict( | ||||
'load-cran', policy, | 'load-cran', policy, | ||||
url=origin_url, version=version, retries_left=3 | url=origin_url, artifacts=[{ | ||||
'url': artifact_url, | |||||
'version': version | |||||
}], retries_left=3 | |||||
) | ) | ||||
def safely_issue_request(self, identifier): | def safely_issue_request(self, identifier): | ||||
"""Bypass the implementation. It's now the `list_packages` which | """Bypass the implementation. It's now the `list_packages` which | ||||
returns data. | returns data. | ||||
As an implementation detail, we cannot change simply the base | As an implementation detail, we cannot change simply the base | ||||
SimpleLister yet as other implementation still uses it. This shall be | SimpleLister yet as other implementation still uses it. This shall be | ||||
Show All 38 Lines | def list_packages(self, response) -> List[Mapping[str, str]]: | ||||
return read_cran_data() | return read_cran_data() | ||||
def get_model_from_repo( | def get_model_from_repo( | ||||
self, repo: Mapping[str, str]) -> Mapping[str, str]: | self, repo: Mapping[str, str]) -> Mapping[str, str]: | ||||
"""Transform from repository representation to model | """Transform from repository representation to model | ||||
""" | """ | ||||
logger.debug('repo: %s', repo) | logger.debug('repo: %s', repo) | ||||
project_url = compute_package_url(repo) | origin_url, artifact_url = compute_origin_urls(repo) | ||||
package = repo['Package'] | package = repo['Package'] | ||||
version = repo['Version'] | version = repo['Version'] | ||||
return { | return { | ||||
'uid': f'{package}-{version}', | 'uid': f'{package}-{version}', | ||||
'name': package, | 'name': package, | ||||
'full_name': repo['Title'], | 'full_name': repo['Title'], | ||||
'version': version, | 'version': version, | ||||
'html_url': project_url, | 'html_url': artifact_url, | ||||
'origin_url': project_url, | 'origin_url': origin_url, | ||||
Done Inline ActionsNow i'm wondering whether to change this in cran's model... (to name it appropriately...) ardumont: Now i'm wondering whether to change this in cran's model... (to name it appropriately...)
.oO… | |||||
'origin_type': 'tar', | 'origin_type': 'tar', | ||||
} | } | ||||
def read_cran_data() -> List[Mapping[str, str]]: | def read_cran_data() -> List[Mapping[str, str]]: | ||||
"""Execute r script to read cran listing. | """Execute r script to read cran listing. | ||||
""" | """ | ||||
filepath = pkg_resources.resource_filename('swh.lister.cran', | filepath = pkg_resources.resource_filename('swh.lister.cran', | ||||
'list_all_packages.R') | 'list_all_packages.R') | ||||
logger.debug('script list-all-packages.R path: %s', filepath) | logger.debug('script list-all-packages.R path: %s', filepath) | ||||
response = subprocess.run(filepath, stdout=subprocess.PIPE, shell=False) | response = subprocess.run(filepath, stdout=subprocess.PIPE, shell=False) | ||||
return json.loads(response.stdout.decode('utf-8')) | return json.loads(response.stdout.decode('utf-8')) | ||||
def compute_package_url(repo: Mapping[str, str]) -> str: | def compute_origin_urls(repo: Mapping[str, str]) -> Tuple[str, str]: | ||||
"""Compute the package url from the repo dict. | """Compute the package url from the repo dict. | ||||
Args: | Args: | ||||
repo: dict with key 'Package', 'Version' | repo: dict with key 'Package', 'Version' | ||||
Returns: | Returns: | ||||
the package url | the tuple project url, artifact url | ||||
""" | """ | ||||
return 'https://cran.r-project.org/src/contrib' \ | package = repo['Package'] | ||||
'/{Package}_{Version}.tar.gz'.format(**repo) | version = repo['Version'] | ||||
origin_url = f'{CRAN_MIRROR}/package={package}' | |||||
artifact_url = f'{CRAN_MIRROR}/src/contrib/{package}_{version}.tar.gz' | |||||
return origin_url, artifact_url |
this html_url seems a bit strange here, and there is no mention of it in the example given in the docstring...
Also it looks like this argument is in fact required, so I don't understand why it's retrieved with a kwargs.get()