Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/cran/loader.py
Show All 10 Lines | |||||
import logging | import logging | ||||
import re | import re | ||||
from typing import Any, Dict, Iterator, List, Mapping, Optional, Tuple | from typing import Any, Dict, Iterator, List, Mapping, Optional, Tuple | ||||
import attr | import attr | ||||
from debian.deb822 import Deb822 | from debian.deb822 import Deb822 | ||||
from swh.loader.package.loader import BasePackageInfo, PackageLoader | from swh.loader.package.loader import BasePackageInfo, PackageLoader | ||||
from swh.loader.package.utils import release_name, artifact_identity | from swh.loader.package.utils import release_name | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Person, | Person, | ||||
TimestampWithTimezone, | TimestampWithTimezone, | ||||
Sha1Git, | Sha1Git, | ||||
Revision, | Revision, | ||||
RevisionType, | RevisionType, | ||||
) | ) | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
DATE_PATTERN = re.compile(r"^(?P<year>\d{4})-(?P<month>\d{2})$") | DATE_PATTERN = re.compile(r"^(?P<year>\d{4})-(?P<month>\d{2})$") | ||||
@attr.s | |||||
class CRANPackageInfo(BasePackageInfo): | class CRANPackageInfo(BasePackageInfo): | ||||
raw = attr.ib(type=Dict[str, Any]) | raw = attr.ib(type=Dict[str, Any]) | ||||
version = attr.ib(type=str) | |||||
ID_KEYS = ["url", "version"] | |||||
@classmethod | |||||
def from_metadata(cls, a_metadata: Dict[str, Any]) -> "CRANPackageInfo": | |||||
url = a_metadata["url"] | |||||
return CRANPackageInfo( | |||||
url=url, | |||||
filename=path.basename(url), | |||||
raw=a_metadata, | |||||
version=a_metadata["version"], | |||||
) | |||||
class CRANLoader(PackageLoader[CRANPackageInfo]): | class CRANLoader(PackageLoader[CRANPackageInfo]): | ||||
visit_type = "cran" | visit_type = "cran" | ||||
def __init__(self, url: str, artifacts: List[Dict]): | def __init__(self, url: str, artifacts: List[Dict]): | ||||
"""Loader constructor. | """Loader constructor. | ||||
Args: | Args: | ||||
url: Origin url to retrieve cran artifact(s) from | url: Origin url to retrieve cran artifact(s) from | ||||
artifacts: List of associated artifact for the origin url | artifacts: List of associated artifact for the origin url | ||||
""" | """ | ||||
super().__init__(url=url) | super().__init__(url=url) | ||||
# explicit what we consider the artifact identity | # explicit what we consider the artifact identity | ||||
self.id_keys = ["url", "version"] | |||||
self.artifacts = artifacts | self.artifacts = artifacts | ||||
def get_versions(self) -> List[str]: | def get_versions(self) -> List[str]: | ||||
versions = [] | versions = [] | ||||
for artifact in self.artifacts: | for artifact in self.artifacts: | ||||
versions.append(artifact["version"]) | versions.append(artifact["version"]) | ||||
return versions | return versions | ||||
def get_default_version(self) -> str: | def get_default_version(self) -> str: | ||||
return self.artifacts[-1]["version"] | return self.artifacts[-1]["version"] | ||||
def get_package_info(self, version: str) -> Iterator[Tuple[str, CRANPackageInfo]]: | def get_package_info(self, version: str) -> Iterator[Tuple[str, CRANPackageInfo]]: | ||||
for a_metadata in self.artifacts: | for a_metadata in self.artifacts: | ||||
url = a_metadata["url"] | p_info = CRANPackageInfo.from_metadata(a_metadata) | ||||
package_version = a_metadata["version"] | if version == p_info.version: | ||||
if version == package_version: | |||||
p_info = CRANPackageInfo( | |||||
url=url, filename=path.basename(url), raw=a_metadata, | |||||
) | |||||
yield release_name(version), p_info | yield release_name(version), p_info | ||||
def resolve_revision_from( | def resolve_revision_from( | ||||
self, | self, known_artifacts: Mapping[bytes, Mapping], p_info: CRANPackageInfo, | ||||
known_artifacts: Mapping[bytes, Mapping], | |||||
artifact_metadata: Mapping[str, Any], | |||||
) -> Optional[bytes]: | ) -> Optional[bytes]: | ||||
"""Given known_artifacts per revision, try to determine the revision for | """Given known_artifacts per revision, try to determine the revision for | ||||
artifact_metadata | artifact_metadata | ||||
""" | """ | ||||
new_identity = artifact_identity(artifact_metadata, self.id_keys) | new_identity = p_info.artifact_identity() | ||||
for rev_id, known_artifact_meta in known_artifacts.items(): | for rev_id, known_artifact_meta in known_artifacts.items(): | ||||
logging.debug("known_artifact_meta: %s", known_artifact_meta) | logging.debug("known_artifact_meta: %s", known_artifact_meta) | ||||
known_artifact = known_artifact_meta["extrinsic"]["raw"] | known_artifact = known_artifact_meta["extrinsic"]["raw"] | ||||
known_identity = artifact_identity(known_artifact, self.id_keys) | known_identity = CRANPackageInfo.from_metadata( | ||||
known_artifact | |||||
).artifact_identity() | |||||
if new_identity == known_identity: | if new_identity == known_identity: | ||||
return rev_id | return rev_id | ||||
return None | return None | ||||
ardumont: I started reviewing as you suggested (that's only the first package loader I read after the the… | |||||
Done Inline ActionsThat crossed my mind, but Debian does it differently, and PyPI and NPM do it in another different way, so I'd rather keep it this way, at least for now vlorentz: That crossed my mind, but Debian does it differently, and PyPI and NPM do it in another… | |||||
Not Done Inline Actionsagreed ;) ardumont: agreed ;) | |||||
def build_revision( | def build_revision( | ||||
self, a_metadata: Mapping[str, Any], uncompressed_path: str, directory: Sha1Git | self, p_info: CRANPackageInfo, uncompressed_path: str, directory: Sha1Git | ||||
) -> Optional[Revision]: | ) -> Optional[Revision]: | ||||
# a_metadata is empty | # a_metadata is empty | ||||
metadata = extract_intrinsic_metadata(uncompressed_path) | metadata = extract_intrinsic_metadata(uncompressed_path) | ||||
date = parse_date(metadata.get("Date")) | date = parse_date(metadata.get("Date")) | ||||
author = Person.from_fullname(metadata.get("Maintainer", "").encode()) | author = Person.from_fullname(metadata.get("Maintainer", "").encode()) | ||||
version = metadata.get("Version", a_metadata["version"]) | version = metadata.get("Version", p_info.version) | ||||
return Revision( | return Revision( | ||||
message=version.encode("utf-8"), | message=version.encode("utf-8"), | ||||
type=RevisionType.TAR, | type=RevisionType.TAR, | ||||
date=date, | date=date, | ||||
author=author, | author=author, | ||||
committer=author, | committer=author, | ||||
committer_date=date, | committer_date=date, | ||||
parents=(), | parents=(), | ||||
directory=directory, | directory=directory, | ||||
synthetic=True, | synthetic=True, | ||||
metadata={ | metadata={ | ||||
"intrinsic": {"tool": "DESCRIPTION", "raw": metadata,}, | "intrinsic": {"tool": "DESCRIPTION", "raw": metadata,}, | ||||
"extrinsic": { | "extrinsic": { | ||||
"provider": self.url, | "provider": self.url, | ||||
"when": self.visit_date.isoformat(), | "when": self.visit_date.isoformat(), | ||||
"raw": a_metadata, | "raw": p_info.raw, | ||||
}, | }, | ||||
}, | }, | ||||
) | ) | ||||
def parse_debian_control(filepath: str) -> Dict[str, Any]: | def parse_debian_control(filepath: str) -> Dict[str, Any]: | ||||
"""Parse debian control at filepath""" | """Parse debian control at filepath""" | ||||
metadata: Dict = {} | metadata: Dict = {} | ||||
▲ Show 20 Lines • Show All 71 Lines • Show Last 20 Lines |
I started reviewing as you suggested (that's only the first package loader I read after the the loader.package one).
So I did not read the rest yet...
Though i'm wondering if this can't go one level up now that you abstracted away the artifact identity computation...
Theoretically, in my mind at least ;), that should work, shouldn't it?