Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/jar/loader.py
- This file was added.
# Copyright (C) 2019-2021 The Software Heritage developers | |||||
# See the AUTHORS file at the top-level directory of this distribution | |||||
# License: GNU General Public License version 3, or any later version | |||||
# See top-level LICENSE file for more information | |||||
from datetime import datetime, timezone | |||||
import hashlib | |||||
import logging | |||||
from os import path | |||||
import string | |||||
from typing import Any, Dict, Iterator, Mapping, Optional, Sequence, Tuple, Union | |||||
import attr | |||||
from swh.loader.package.loader import BasePackageInfo, PackageLoader, PartialExtID | |||||
from swh.loader.package.utils import release_name | |||||
from swh.model.model import ( | |||||
Person, | |||||
Revision, | |||||
RevisionType, | |||||
Sha1Git, | |||||
TimestampWithTimezone, | |||||
) | |||||
from swh.storage.interface import StorageInterface | |||||
logger = logging.getLogger(__name__) | |||||
SWH_PERSON = Person( | |||||
name=b"Software Heritage", | |||||
fullname=b"Software Heritage", | |||||
email=b"robot@softwareheritage.org", | |||||
) | |||||
REVISION_MESSAGE = b"swh-loader-package: synthetic revision message" | |||||
@attr.s | |||||
class JarPackageInfo(BasePackageInfo): | |||||
time = attr.ib(type=Union[str, datetime]) | |||||
"""Timestamp of the jar file on the server""" | |||||
raw_info = attr.ib(type=Dict[str, Any]) | |||||
gid = attr.ib(type=str) | |||||
"""Group ID of the maven artifact""" | |||||
aid = attr.ib(type=str) | |||||
"""Artifact ID of the maven artifact""" | |||||
version = attr.ib(type=str) | |||||
"""Version of the maven artifact""" | |||||
# default format for maven artifacts | |||||
MANIFEST_FORMAT = string.Template("$gid $aid $version") | |||||
def extid(self, manifest_format: Optional[string.Template] = None) -> PartialExtID: | |||||
"""Returns a unique intrinsic identifier of this package info | |||||
``manifest_format`` allows overriding the class' default MANIFEST_FORMAT""" | |||||
manifest_format = manifest_format or self.MANIFEST_FORMAT | |||||
manifest = manifest_format.substitute( | |||||
{"gid": self.gid, "aid": self.aid, "version": self.version} | |||||
) | |||||
return ("maven-jar", hashlib.sha256(manifest.encode()).digest()) | |||||
@classmethod | |||||
def from_metadata(cls, a_metadata: Dict[str, Any]) -> "JarPackageInfo": | |||||
url = a_metadata["url"] | |||||
filename = a_metadata.get("filename") | |||||
return cls( | |||||
url=url, | |||||
filename=filename if filename else path.split(url)[-1], | |||||
raw_info=a_metadata, | |||||
time=a_metadata["time"], | |||||
gid=a_metadata["gid"], | |||||
aid=a_metadata["aid"], | |||||
version=a_metadata["version"], | |||||
) | |||||
class JarLoader(PackageLoader[JarPackageInfo]): | |||||
"""Load jar origin's artifact files into swh archive | |||||
""" | |||||
visit_type = "jar" | |||||
def __init__( | |||||
self, | |||||
storage: StorageInterface, | |||||
url: str, | |||||
artifacts: Sequence[Dict[str, Any]], | |||||
extid_manifest_format: Optional[str] = None, | |||||
max_content_size: Optional[int] = None, | |||||
snapshot_append: bool = False, | |||||
): | |||||
f"""Loader constructor. | |||||
For now, this is the lister's task output. | |||||
Args: | |||||
url: Origin url | |||||
artifacts: List of single artifact information with keys: | |||||
- **time**: the timestamp of the jar file as an int | |||||
- **url**: the artifact url to retrieve filename | |||||
- **filename**: optionally, the file's name | |||||
- **gid**: artifact's groupId | |||||
- **aid**: artifact's artifactId | |||||
- **version**: artifact's version | |||||
extid_manifest_format: template string used to format a manifest, | |||||
which is hashed to get the extid of a package. | |||||
Defaults to {JarPackageInfo.MANIFEST_FORMAT!r} | |||||
snapshot_append: if :const:`True`, append latest snapshot content to | |||||
the new snapshot created by the loader | |||||
""" | |||||
super().__init__(storage=storage, url=url, max_content_size=max_content_size) | |||||
self.artifacts = artifacts # assume order is enforced in the lister | |||||
self.extid_manifest_format = ( | |||||
None | |||||
if extid_manifest_format is None | |||||
else string.Template(extid_manifest_format) | |||||
) | |||||
self.snapshot_append = snapshot_append | |||||
def get_versions(self) -> Sequence[str]: | |||||
print(f"In get_versions {self.artifacts}") | |||||
versions = [] | |||||
for jar in self.artifacts: | |||||
v = jar.get("version") | |||||
if v: | |||||
versions.append(v) | |||||
print(f"In get_versions versions {versions}") | |||||
return versions | |||||
def get_default_version(self) -> str: | |||||
print("In get_default_versions") | |||||
# Returning the last item -- there should be only one version anyway. | |||||
return self.artifacts[-1]["version"] | |||||
def get_package_info(self, version: str) -> Iterator[Tuple[str, JarPackageInfo]]: | |||||
print(f"In get_package_info {version}") | |||||
a_metadata = self.artifacts[0] | |||||
yield release_name(a_metadata["version"]), JarPackageInfo.from_metadata( | |||||
a_metadata | |||||
) | |||||
def new_packageinfo_to_extid( | |||||
self, p_info: JarPackageInfo | |||||
) -> Optional[PartialExtID]: | |||||
print(f"new_packageinfo_to_extid {p_info}.") | |||||
print(f" {p_info}.extid(manifest_format=self.extid_manifest_format)") | |||||
return p_info.extid(manifest_format=self.extid_manifest_format) | |||||
def build_revision( | |||||
self, p_info: JarPackageInfo, uncompressed_path: str, directory: Sha1Git | |||||
) -> Optional[Revision]: | |||||
print(f"In build_revision {p_info}.") | |||||
time = p_info.time | |||||
print(f"TYPE is {type(p_info.time)}") | |||||
if isinstance(time, datetime): | |||||
parsed_time = time | |||||
else: # assume it's a timestamp (in milliseconds) | |||||
raw_time = int(str(p_info.time)) | |||||
parsed_time = datetime.fromtimestamp(raw_time / 1e3) | |||||
parsed_time = parsed_time.replace(tzinfo=timezone.utc) | |||||
normalized_time = TimestampWithTimezone.from_datetime(parsed_time) | |||||
return Revision( | |||||
type=RevisionType.TAR, | |||||
message=REVISION_MESSAGE, | |||||
date=normalized_time, | |||||
author=SWH_PERSON, | |||||
committer=SWH_PERSON, | |||||
committer_date=normalized_time, | |||||
parents=(), | |||||
directory=directory, | |||||
synthetic=True, | |||||
) | |||||
def extra_branches(self) -> Dict[bytes, Mapping[str, Any]]: | |||||
if not self.snapshot_append: | |||||
return {} | |||||
last_snapshot = self.last_snapshot() | |||||
return last_snapshot.to_dict()["branches"] if last_snapshot else {} |