diff --git a/swh/loader/package/cran/loader.py b/swh/loader/package/cran/loader.py --- a/swh/loader/package/cran/loader.py +++ b/swh/loader/package/cran/loader.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019 The Software Heritage developers +# Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -31,35 +31,40 @@ class CRANLoader(PackageLoader): visit_type = 'cran' - def __init__(self, url: str, version: str): + def __init__(self, url: str, artifacts: List[Dict]): """Loader constructor. Args: - url: Origin url to retrieve cran artifact from - version: version of the cran artifact + url: Origin url to retrieve cran artifact(s) from + artifacts: List of associated artifact for the origin url """ super().__init__(url=url) - self.version = version # explicit what we consider the artifact identity self.id_keys = ['url', 'version'] - self.artifact = {'url': url, 'version': version} + self.artifacts = artifacts def get_versions(self) -> List[str]: - # only 1 artifact - return [self.version] + versions = [] + for artifact in self.artifacts: + versions.append(artifact['version']) + return versions def get_default_version(self) -> str: - return self.version + return self.artifacts[-1]['version'] def get_package_info(self, version: str) -> Generator[ Tuple[str, Dict[str, Any]], None, None]: - p_info = { - 'url': self.url, - 'filename': path.basename(self.url), - 'raw': self.artifact, - } - yield release_name(version), p_info + for a_metadata in self.artifacts: + url = a_metadata['url'] + package_version = a_metadata['version'] + if version == package_version: + p_info = { + 'url': url, + 'filename': path.basename(url), + 'raw': a_metadata, + } + yield release_name(version), p_info def resolve_revision_from( self, known_artifacts: Mapping[bytes, Mapping], @@ -85,7 +90,7 @@ metadata = extract_intrinsic_metadata(uncompressed_path) normalized_date = normalize_timestamp(parse_date(metadata.get('Date'))) author = swh_author(parse_author(metadata.get('Maintainer', {}))) - version = metadata.get('Version', self.version) + version = metadata.get('Version', a_metadata['version']) return { 'message': version.encode('utf-8'), 'type': 'tar', diff --git a/swh/loader/package/cran/tasks.py b/swh/loader/package/cran/tasks.py --- a/swh/loader/package/cran/tasks.py +++ b/swh/loader/package/cran/tasks.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019 The Software Heritage developers +# Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -9,6 +9,6 @@ @shared_task(name=__name__ + '.LoadCRAN') -def load_cran(url=None, version=None): - """Load archive's artifacts (e.g gnu, etc...)""" - return CRANLoader(url, version).load() +def load_cran(url=None, artifacts=[]): + """Load CRAN's artifacts""" + return CRANLoader(url, artifacts).load() diff --git a/swh/loader/package/cran/tests/test_cran.py b/swh/loader/package/cran/tests/test_cran.py --- a/swh/loader/package/cran/tests/test_cran.py +++ b/swh/loader/package/cran/tests/test_cran.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019 The Software Heritage developers +# Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -151,8 +151,12 @@ def test_cran_one_visit(swh_config, requests_mock_datadir): version = '2.22-6' base_url = 'https://cran.r-project.org' - url = f'{base_url}/src_contrib_1.4.0_Recommended_KernSmooth_{version}.tar.gz' # noqa - loader = CRANLoader(url, version=version) + origin_url = f'{base_url}/Packages/Recommended_KernSmooth/index.html' + artifact_url = f'{base_url}/src_contrib_1.4.0_Recommended_KernSmooth_{version}.tar.gz' # noqa + loader = CRANLoader(origin_url, artifacts=[{ + 'url': artifact_url, + 'version': version, + }]) actual_load_status = loader.load() @@ -174,7 +178,7 @@ } check_snapshot(expected_snapshot, loader.storage) - origin_visit = next(loader.storage.origin_visit_get(url)) + origin_visit = next(loader.storage.origin_visit_get(origin_url)) assert origin_visit['status'] == 'full' assert origin_visit['type'] == 'cran' @@ -204,8 +208,12 @@ """Multiple visits on the same origin, only 1 archive fetch""" version = '2.22-6' base_url = 'https://cran.r-project.org' - url = f'{base_url}/src_contrib_1.4.0_Recommended_KernSmooth_{version}.tar.gz' # noqa - loader = CRANLoader(url, version=version) + origin_url = f'{base_url}/Packages/Recommended_KernSmooth/index.html' + artifact_url = f'{base_url}/src_contrib_1.4.0_Recommended_KernSmooth_{version}.tar.gz' # noqa + loader = CRANLoader(origin_url, artifacts=[{ + 'url': artifact_url, + 'version': version + }]) # first visit actual_load_status = loader.load() @@ -228,7 +236,7 @@ } check_snapshot(expected_snapshot, loader.storage) - origin_visit = next(loader.storage.origin_visit_get(url)) + origin_visit = next(loader.storage.origin_visit_get(origin_url)) assert origin_visit['status'] == 'full' assert origin_visit['type'] == 'cran' @@ -253,7 +261,7 @@ 'snapshot_id': expected_snapshot_id } - origin_visit2 = next(loader.storage.origin_visit_get(url)) + origin_visit2 = next(loader.storage.origin_visit_get(origin_url)) assert origin_visit2['status'] == 'full' assert origin_visit2['type'] == 'cran' diff --git a/swh/loader/package/cran/tests/test_tasks.py b/swh/loader/package/cran/tests/test_tasks.py --- a/swh/loader/package/cran/tests/test_tasks.py +++ b/swh/loader/package/cran/tests/test_tasks.py @@ -11,7 +11,13 @@ res = swh_app.send_task( 'swh.loader.package.cran.tasks.LoadCRAN', - kwargs={'url': 'some-url', 'version': '1.2.3'} + kwargs={ + 'url': 'some-url', + 'artifacts': { + 'version': '1.2.3', + 'url': 'artifact-url' + } + } ) assert res res.wait()