Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/cpan/loader.py
# Copyright (C) 2022 The Software Heritage developers | # Copyright (C) 2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from datetime import datetime | from datetime import datetime | ||||
import logging | import logging | ||||
from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple | from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple | ||||
import attr | import attr | ||||
import iso8601 | import iso8601 | ||||
from packaging.version import parse as parse_version | from packaging.version import parse as parse_version | ||||
from requests import HTTPError | |||||
from swh.loader.package.loader import BasePackageInfo, PackageLoader | from swh.loader.package.loader import ( | ||||
from swh.loader.package.utils import EMPTY_AUTHOR, Person, release_name | BasePackageInfo, | ||||
from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone | PackageLoader, | ||||
RawExtrinsicMetadataCore, | |||||
) | |||||
from swh.loader.package.utils import EMPTY_AUTHOR, Person, get_url_body, release_name | |||||
from swh.model.model import ( | |||||
MetadataAuthority, | |||||
MetadataAuthorityType, | |||||
ObjectType, | |||||
Release, | |||||
Sha1Git, | |||||
TimestampWithTimezone, | |||||
) | |||||
from swh.storage.interface import StorageInterface | from swh.storage.interface import StorageInterface | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
@attr.s | @attr.s | ||||
class CpanPackageInfo(BasePackageInfo): | class CpanPackageInfo(BasePackageInfo): | ||||
name = attr.ib(type=str) | name = attr.ib(type=str) | ||||
"""Name of the package""" | """Name of the package""" | ||||
version = attr.ib(type=str) | version = attr.ib(type=str) | ||||
"""Current version""" | """Current version""" | ||||
last_modified = attr.ib(type=datetime) | last_modified = attr.ib(type=datetime) | ||||
"""File last modified date as release date.""" | """File last modified date as release date.""" | ||||
author = attr.ib(type=Person) | author = attr.ib(type=Person) | ||||
"""Author""" | """Author""" | ||||
class CpanLoader(PackageLoader[CpanPackageInfo]): | class CpanLoader(PackageLoader[CpanPackageInfo]): | ||||
visit_type = "cpan" | visit_type = "cpan" | ||||
EXTRINSIC_METADATA_URL_PATTERN = "{api_base_url}/release/{author}/{release_name}" | |||||
def __init__( | def __init__( | ||||
self, | self, | ||||
storage: StorageInterface, | storage: StorageInterface, | ||||
url: str, | url: str, | ||||
api_base_url: str, | api_base_url: str, | ||||
artifacts: List[Dict[str, Any]], | artifacts: List[Dict[str, Any]], | ||||
module_metadata: List[Dict[str, Any]], | module_metadata: List[Dict[str, Any]], | ||||
**kwargs, | **kwargs, | ||||
): | ): | ||||
super().__init__(storage=storage, url=url, **kwargs) | super().__init__(storage=storage, url=url, **kwargs) | ||||
self.url = url | self.url = url | ||||
self.api_base_url = api_base_url | self.api_base_url = api_base_url | ||||
self.artifacts: Dict[str, Dict] = { | self.artifacts: Dict[str, Dict] = { | ||||
artifact["version"]: {k: v for k, v in artifact.items() if k != "version"} | artifact["version"]: {k: v for k, v in artifact.items() if k != "version"} | ||||
for artifact in artifacts | for artifact in artifacts | ||||
} | } | ||||
self.module_metadata: Dict[str, Dict] = { | self.module_metadata: Dict[str, Dict] = { | ||||
meta["version"]: meta for meta in module_metadata | meta["version"]: meta for meta in module_metadata | ||||
} | } | ||||
def get_metadata_authority(self): | |||||
return MetadataAuthority( | |||||
type=MetadataAuthorityType.FORGE, | |||||
url="https://metacpan.org/", | |||||
) | |||||
def get_versions(self) -> Sequence[str]: | def get_versions(self) -> Sequence[str]: | ||||
"""Get all released versions of a Perl package | """Get all released versions of a Perl package | ||||
Returns: | Returns: | ||||
A sequence of versions | A sequence of versions | ||||
Example:: | Example:: | ||||
Show All 29 Lines | def get_package_info(self, version: str) -> Iterator[Tuple[str, CpanPackageInfo]]: | ||||
last_modified = iso8601.parse_date(metadata["date"]) | last_modified = iso8601.parse_date(metadata["date"]) | ||||
author = ( | author = ( | ||||
Person.from_fullname(metadata["author"].encode()) | Person.from_fullname(metadata["author"].encode()) | ||||
if metadata["author"] | if metadata["author"] | ||||
else EMPTY_AUTHOR | else EMPTY_AUTHOR | ||||
) | ) | ||||
try: | |||||
extrinsic_metadata_url = self.EXTRINSIC_METADATA_URL_PATTERN.format( | |||||
api_base_url=self.api_base_url, | |||||
author=metadata["cpan_author"], | |||||
release_name=metadata["release_name"], | |||||
) | |||||
version_extrinsic_metadata = get_url_body(extrinsic_metadata_url) | |||||
except HTTPError: | |||||
logger.warning( | |||||
"Could not fetch extrinsic_metadata for module %s version %s", | |||||
metadata["name"], | |||||
version, | |||||
) | |||||
version_extrinsic_metadata = None | |||||
directory_extrinsic_metadata = [] | |||||
if version_extrinsic_metadata: | |||||
directory_extrinsic_metadata.append( | |||||
RawExtrinsicMetadataCore( | |||||
vlorentz: Shouldn't it be `cpan-release-json`? | |||||
Done Inline ActionsBetter naming indeed, will update. anlambert: Better naming indeed, will update. | |||||
format="cpan-release-json", | |||||
metadata=version_extrinsic_metadata, | |||||
) | |||||
) | |||||
p_info = CpanPackageInfo( | p_info = CpanPackageInfo( | ||||
name=metadata["name"], | name=metadata["name"], | ||||
filename=artifact["filename"], | filename=artifact["filename"], | ||||
url=artifact["url"], | url=artifact["url"], | ||||
version=version, | version=version, | ||||
last_modified=last_modified, | last_modified=last_modified, | ||||
author=author, | author=author, | ||||
checksums=artifact["checksums"], | checksums=artifact["checksums"], | ||||
directory_extrinsic_metadata=directory_extrinsic_metadata, | |||||
) | ) | ||||
yield release_name(version), p_info | yield release_name(version), p_info | ||||
def build_release( | def build_release( | ||||
self, p_info: CpanPackageInfo, uncompressed_path: str, directory: Sha1Git | self, p_info: CpanPackageInfo, uncompressed_path: str, directory: Sha1Git | ||||
) -> Optional[Release]: | ) -> Optional[Release]: | ||||
message = ( | message = ( | ||||
Show All 13 Lines |
Shouldn't it be cpan-release-json?