Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/cpan/loader.py
# Copyright (C) 2022 The Software Heritage developers | # Copyright (C) 2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from datetime import datetime | from datetime import datetime | ||||
import json | |||||
import logging | import logging | ||||
from pathlib import Path | |||||
from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple | from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple | ||||
import attr | import attr | ||||
import iso8601 | import iso8601 | ||||
from packaging.version import parse as parse_version | from packaging.version import parse as parse_version | ||||
import yaml | |||||
from swh.loader.package.loader import BasePackageInfo, PackageLoader | from swh.loader.package.loader import BasePackageInfo, PackageLoader | ||||
from swh.loader.package.utils import EMPTY_AUTHOR, Person, release_name | from swh.loader.package.utils import EMPTY_AUTHOR, Person, release_name | ||||
from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone | from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone | ||||
from swh.storage.interface import StorageInterface | from swh.storage.interface import StorageInterface | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
Show All 9 Lines | class CpanPackageInfo(BasePackageInfo): | ||||
last_modified = attr.ib(type=datetime) | last_modified = attr.ib(type=datetime) | ||||
"""File last modified date as release date.""" | """File last modified date as release date.""" | ||||
author = attr.ib(type=Person) | author = attr.ib(type=Person) | ||||
"""Author""" | """Author""" | ||||
def extract_intrinsic_metadata(dir_path: Path) -> Dict[str, Any]: | |||||
"""Extract intrinsic metadata from META.json file at dir_path. | |||||
Most Perl package version have a META.json file at the root of the archive, | |||||
or a META.yml for older version. | |||||
See https://perldoc.perl.org/CPAN::Meta for META specifications. | |||||
Args: | |||||
dir_path: A directory on disk where a META.json|.yml can be found | |||||
Returns: | |||||
A dict mapping from yaml parser | |||||
""" | |||||
meta_json_path = dir_path / "META.json" | |||||
meta_yml_path = dir_path / "META.yml" | |||||
metadata: Dict[str, Any] = {} | |||||
if meta_json_path.exists(): | |||||
metadata = json.loads(meta_json_path.read_text()) | |||||
elif meta_yml_path.exists(): | |||||
metadata = yaml.safe_load(meta_yml_path.read_text()) | |||||
return metadata | |||||
class CpanLoader(PackageLoader[CpanPackageInfo]): | class CpanLoader(PackageLoader[CpanPackageInfo]): | ||||
visit_type = "cpan" | visit_type = "cpan" | ||||
def __init__( | def __init__( | ||||
self, | self, | ||||
storage: StorageInterface, | storage: StorageInterface, | ||||
url: str, | url: str, | ||||
api_base_url: str, | api_base_url: str, | ||||
▲ Show 20 Lines • Show All 68 Lines • ▼ Show 20 Lines | def get_package_info(self, version: str) -> Iterator[Tuple[str, CpanPackageInfo]]: | ||||
checksums=artifact["checksums"], | checksums=artifact["checksums"], | ||||
) | ) | ||||
yield release_name(version), p_info | yield release_name(version), p_info | ||||
def build_release( | def build_release( | ||||
self, p_info: CpanPackageInfo, uncompressed_path: str, directory: Sha1Git | self, p_info: CpanPackageInfo, uncompressed_path: str, directory: Sha1Git | ||||
) -> Optional[Release]: | ) -> Optional[Release]: | ||||
# Extract intrinsic metadata from uncompressed_path/META.json|.yml | |||||
intrinsic_metadata = extract_intrinsic_metadata( | |||||
Path(uncompressed_path) / f"{p_info.name}-{p_info.version}" | |||||
) | |||||
# author data from http endpoint are less complete than from META | |||||
if "author" in intrinsic_metadata: | |||||
author_data = intrinsic_metadata["author"] | |||||
if type(author_data) is list: | |||||
author = author_data[0] | |||||
else: | |||||
author = author_data | |||||
author = Person.from_fullname(author.encode()) | |||||
else: | |||||
author = p_info.author | |||||
message = ( | message = ( | ||||
f"Synthetic release for Perl source package {p_info.name} " | f"Synthetic release for Perl source package {p_info.name} " | ||||
f"version {p_info.version}\n" | f"version {p_info.version}\n" | ||||
) | ) | ||||
return Release( | return Release( | ||||
name=p_info.version.encode(), | name=p_info.version.encode(), | ||||
author=author, | author=p_info.author, | ||||
date=TimestampWithTimezone.from_datetime(p_info.last_modified), | date=TimestampWithTimezone.from_datetime(p_info.last_modified), | ||||
message=message.encode(), | message=message.encode(), | ||||
target_type=ObjectType.DIRECTORY, | target_type=ObjectType.DIRECTORY, | ||||
target=directory, | target=directory, | ||||
synthetic=True, | synthetic=True, | ||||
) | ) |