diff --git a/swh/loader/package/conda/loader.py b/swh/loader/package/conda/loader.py index 5668702..e75c1e1 100644 --- a/swh/loader/package/conda/loader.py +++ b/swh/loader/package/conda/loader.py @@ -1,180 +1,191 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime import json from pathlib import Path from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple import attr import iso8601 from packaging.version import parse as parse_version import yaml from swh.loader.package.loader import BasePackageInfo, PackageLoader from swh.loader.package.utils import EMPTY_AUTHOR, Person, get_url_body, release_name from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone from swh.storage.interface import StorageInterface @attr.s class CondaPackageInfo(BasePackageInfo): name = attr.ib(type=str) """Name of the package""" filename = attr.ib(type=str) """Archive (tar.gz) file name""" version = attr.ib(type=str) - """Complete version and distribution name. Ex: 'linux-64/0.1.1-py37' + """Complete version and distribution name used as branch name. Ex: 'linux-64/0.1.1-py37' + """ + + release_version = attr.ib(type=str) + """Version number used as release name. Ex: '0.1.1-py37-linux-64' """ last_modified: Optional[datetime] = attr.ib() """File last modified date as release date""" def extract_intrinsic_metadata(dir_path: Path) -> Dict[str, Any]: """Extract intrinsic metadata from file at dir_path. Each Conda package version may have an info/about.json file in the archive. If missing we try to get metadata from info/recipe/meta.yaml See https://docs.conda.io/projects/conda/en/latest/user-guide/concepts/pkg-specs.html?highlight=meta.yaml#info-about-json # noqa: B950 for package specifications. Args: dir_path: A directory on disk where a metadata file can be found Returns: A dict mapping from yaml parser """ metadata: Dict[str, Any] = {} meta_json_path = dir_path / "info" / "about.json" meta_yml_path = dir_path / "info" / "recipe" / "meta.yaml" if meta_json_path.exists(): try: metadata = json.loads(meta_json_path.read_text()) except json.JSONDecodeError: pass if meta_yml_path.exists() and not metadata: try: metadata = yaml.safe_load(meta_yml_path.read_text()) except yaml.YAMLError: pass return metadata class CondaLoader(PackageLoader[CondaPackageInfo]): visit_type = "conda" def __init__( self, storage: StorageInterface, url: str, artifacts: List[Dict[str, Any]], **kwargs, ): super().__init__(storage=storage, url=url, **kwargs) self.url = url self.artifacts: Dict[str, Dict] = { artifact["version"]: artifact for artifact in artifacts } def _raw_info(self, url: str, **extra_params) -> bytes: return get_url_body(url=url, **extra_params) def get_versions(self) -> Sequence[str]: """Get all released versions of a Conda package Returns: A sequence of versions Example:: ["0.1.1", "0.10.2"] """ versions = list(self.artifacts.keys()) - versions.sort(key=parse_version) + versions.sort( + key=lambda version_key: parse_version( + version_key.split("/", 1)[1].split("-", 1)[0] + ) + ) return versions def get_default_version(self) -> str: """Get the newest release version of a Conda package Returns: A string representing a version Example:: "0.10.2" """ return self.get_versions()[-1] def get_package_info(self, version: str) -> Iterator[Tuple[str, CondaPackageInfo]]: """Get release name and package information from version Args: version: Package version (e.g: "0.1.0") Returns: Iterator of tuple (release_name, p_info) """ data = self.artifacts[version] pkgname: str = self.url.split("/")[-1] url: str = data["url"] filename: str = data["filename"] last_modified = None if data.get("date"): last_modified = iso8601.parse_date(data["date"]) + arch, version_and_build = data["version"].split("/", 1) + p_info = CondaPackageInfo( name=pkgname, filename=filename, url=url, version=version, + release_version=f"{version_and_build}-{arch}", last_modified=last_modified, checksums=data["checksums"], ) yield release_name(version), p_info def build_release( self, p_info: CondaPackageInfo, uncompressed_path: str, directory: Sha1Git ) -> Optional[Release]: # Extract intrinsic metadata from archive to get description and author metadata = extract_intrinsic_metadata(Path(uncompressed_path)) author = EMPTY_AUTHOR maintainers = metadata.get("extra", {}).get("recipe-maintainers") if maintainers and isinstance(maintainers, list) and any(maintainers): # TODO: here we have a list of author, see T3887 author = Person.from_fullname(maintainers[0].encode()) message = ( f"Synthetic release for Conda source package {p_info.name} " f"version {p_info.version}\n" ) last_modified = ( TimestampWithTimezone.from_datetime(p_info.last_modified) if p_info.last_modified else None ) return Release( - name=p_info.version.encode(), + name=p_info.release_version.encode(), author=author, date=last_modified, message=message.encode(), target_type=ObjectType.DIRECTORY, target=directory, synthetic=True, ) diff --git a/swh/loader/package/conda/tests/test_conda.py b/swh/loader/package/conda/tests/test_conda.py index 6af23ef..b4262ca 100644 --- a/swh/loader/package/conda/tests/test_conda.py +++ b/swh/loader/package/conda/tests/test_conda.py @@ -1,156 +1,139 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.loader.package.conda.loader import CondaLoader from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats -from swh.model.hashutil import hash_to_bytes +from swh.model.hashutil import hash_to_bytes, hash_to_hex from swh.model.model import ( ObjectType, Person, Release, Snapshot, SnapshotBranch, TargetType, TimestampWithTimezone, ) ORIGINS = [ { "url": "https://anaconda.org/conda-forge/lifetimes", "artifacts": [ { "url": "https://conda.anaconda.org/conda-forge/linux-64/lifetimes-0.11.1-py36h9f0ad1d_1.tar.bz2", # noqa: B950 "date": "2020-07-06T12:19:36.425000+00:00", "version": "linux-64/0.11.1-py36h9f0ad1d_1", "filename": "lifetimes-0.11.1-py36h9f0ad1d_1.tar.bz2", "checksums": { "md5": "5cbc765bd910a62315f340988f329768", "sha256": "44f91fa4fd77aea15dcba382b3f20e13a6ae1d48eedb9ae6b3c3a0709bbdb76e", # noqa: B950 }, }, { "url": "https://conda.anaconda.org/conda-forge/linux-64/lifetimes-0.11.1-py36hc560c46_1.tar.bz2", # noqa: B950 "date": "", # Empty date "version": "linux-64/0.11.1-py36hc560c46_1", "filename": "lifetimes-0.11.1-py36hc560c46_1.tar.bz2", "checksums": { "md5": "14d2908d3b625ffd3f8d1fc0f20eaa07", "sha256": "46f5c5ab12338ec7b546d930a75c39633762fb2dac5f486fefebabbc4705c6c1", # noqa: B950 }, }, ], }, ] def test_get_versions(requests_mock_datadir, swh_storage): loader = CondaLoader( swh_storage, url=ORIGINS[0]["url"], artifacts=ORIGINS[0]["artifacts"] ) assert loader.get_versions() == [ "linux-64/0.11.1-py36h9f0ad1d_1", "linux-64/0.11.1-py36hc560c46_1", ] def test_get_default_version(requests_mock_datadir, swh_storage): loader = CondaLoader( swh_storage, url=ORIGINS[0]["url"], artifacts=ORIGINS[0]["artifacts"] ) assert loader.get_default_version() == "linux-64/0.11.1-py36hc560c46_1" def test_conda_loader_load_multiple_version( datadir, requests_mock_datadir, swh_storage ): loader = CondaLoader( swh_storage, url=ORIGINS[0]["url"], artifacts=ORIGINS[0]["artifacts"] ) load_status = loader.load() assert load_status["status"] == "eventful" assert load_status["snapshot_id"] is not None - expected_snapshot_id = "9c20bedf9af54ef7b3937fe4675f9f17b9331b7b" + first_release = Release( + name=b"0.11.1-py36h9f0ad1d_1-linux-64", + message=b"Synthetic release for Conda source package lifetimes version" + b" linux-64/0.11.1-py36h9f0ad1d_1\n", + target=hash_to_bytes("0c63e5f909e481d8e5832bac8abbd089bca42993"), + target_type=ObjectType.DIRECTORY, + synthetic=True, + author=Person( + fullname=b"CamDavidsonPilon", name=b"CamDavidsonPilon", email=None + ), + date=TimestampWithTimezone.from_iso8601("2020-07-06T12:19:36.425000+00:00"), + ) - assert expected_snapshot_id == load_status["snapshot_id"] + # This one have empty author and date + second_release = Release( + name=b"0.11.1-py36hc560c46_1-linux-64", + message=b"Synthetic release for Conda source package lifetimes version" + b" linux-64/0.11.1-py36hc560c46_1\n", + target=hash_to_bytes("45ca406aeb31f51836a8593b619ab216403ce489"), + target_type=ObjectType.DIRECTORY, + synthetic=True, + author=Person(fullname=b"", name=None, email=None), + date=None, + ) expected_snapshot = Snapshot( - id=hash_to_bytes(load_status["snapshot_id"]), branches={ b"releases/linux-64/0.11.1-py36h9f0ad1d_1": SnapshotBranch( - target=hash_to_bytes("9848f61b501801799025d632221833a0f988dca2"), + target=first_release.id, target_type=TargetType.RELEASE, ), b"releases/linux-64/0.11.1-py36hc560c46_1": SnapshotBranch( - target=hash_to_bytes("fad38f867db9311504a4c340f2b32dcdffa46e27"), + target=second_release.id, target_type=TargetType.RELEASE, ), b"HEAD": SnapshotBranch( target=b"releases/linux-64/0.11.1-py36hc560c46_1", target_type=TargetType.ALIAS, ), }, ) + assert hash_to_hex(expected_snapshot.id) == load_status["snapshot_id"] + check_snapshot(expected_snapshot, swh_storage) stats = get_stats(swh_storage) assert { "content": 1 + 1, "directory": 2 + 3, "origin": 1, "origin_visit": 1, "release": 1 + 1, "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats - assert swh_storage.release_get( - [hash_to_bytes("9848f61b501801799025d632221833a0f988dca2")] - )[0] == Release( - name=b"linux-64/0.11.1-py36h9f0ad1d_1", - message=b"Synthetic release for Conda source package lifetimes version" - b" linux-64/0.11.1-py36h9f0ad1d_1\n", - target=hash_to_bytes("0c63e5f909e481d8e5832bac8abbd089bca42993"), - target_type=ObjectType.DIRECTORY, - synthetic=True, - author=Person( - fullname=b"CamDavidsonPilon", name=b"CamDavidsonPilon", email=None - ), - date=TimestampWithTimezone.from_iso8601("2020-07-06T12:19:36.425000+00:00"), - id=hash_to_bytes("9848f61b501801799025d632221833a0f988dca2"), - ) - - assert_last_visit_matches( - swh_storage, - url=ORIGINS[0]["url"], - status="full", - type="conda", - snapshot=hash_to_bytes(load_status["snapshot_id"]), - ) - - # This one have empty author and date - assert swh_storage.release_get( - [hash_to_bytes("fad38f867db9311504a4c340f2b32dcdffa46e27")] - )[0] == Release( - name=b"linux-64/0.11.1-py36hc560c46_1", - message=b"Synthetic release for Conda source package lifetimes version" - b" linux-64/0.11.1-py36hc560c46_1\n", - target=hash_to_bytes("45ca406aeb31f51836a8593b619ab216403ce489"), - target_type=ObjectType.DIRECTORY, - synthetic=True, - author=Person(fullname=b"", name=None, email=None), - date=None, - id=hash_to_bytes("fad38f867db9311504a4c340f2b32dcdffa46e27"), - ) - assert_last_visit_matches( swh_storage, url=ORIGINS[0]["url"], status="full", type="conda", snapshot=hash_to_bytes(load_status["snapshot_id"]), )