Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/tar.py
- This file was added.
# Copyright (C) 2019 The Software Heritage developers | |||||
# See the AUTHORS file at the top-level directory of this distribution | |||||
# License: GNU General Public License version 3, or any later version | |||||
# See top-level LICENSE file for more information | |||||
import iso8601 | |||||
import logging | |||||
from os import path | |||||
from typing import Any, Dict, Generator, Mapping, Optional, Sequence, Tuple | |||||
from swh.loader.package.loader import PackageLoader | |||||
from swh.loader.package.utils import release_name | |||||
from swh.model.identifiers import normalize_timestamp | |||||
logger = logging.getLogger(__name__) | |||||
SWH_PERSON = { | |||||
'name': b'Software Heritage', | |||||
'fullname': b'Software Heritage', | |||||
'email': b'robot@softwareheritage.org' | |||||
} | |||||
REVISION_MESSAGE = b'swh-loader-package: synthetic revision message' | |||||
class ArchiveLoader(PackageLoader): | |||||
visit_type = 'tar' | |||||
def __init__(self, url: str, artifacts: Sequence[Mapping[str, Any]], | |||||
pk_artifact_keys: Optional[Sequence[str]] = None): | |||||
"""Loader constructor. | |||||
For now, this is the lister's task output. | |||||
Args: | |||||
url: Origin url | |||||
artifacts: List of artifact information with keys: | |||||
**time**: last modification time as either isoformat date string | |||||
or timestamp | |||||
**url**: the artifact url to retrieve filename | |||||
**artifact's filename version**: artifact's version length | |||||
**artifact's size | |||||
pk_artifact_keys: Optional List of keys forming a composite primary | |||||
key for an artifact | |||||
""" | |||||
super().__init__(url=url) | |||||
self.artifacts = artifacts # assume order is enforced in the lister | |||||
if not pk_artifact_keys: | |||||
# default keys for gnu | |||||
pk_artifact_keys = ['time', 'url', 'length', 'version'] | |||||
self.pk_artifact_keys = pk_artifact_keys | |||||
def get_versions(self) -> Sequence[str]: | |||||
versions = [] | |||||
for archive in self.artifacts: | |||||
v = archive.get('version') | |||||
if v: | |||||
versions.append(v) | |||||
return versions | |||||
def get_default_version(self) -> str: | |||||
# It's the most recent, so for this loader, it's the last one | |||||
return self.artifacts[-1]['version'] | |||||
def get_package_info(self, version: str) -> Generator[ | |||||
Tuple[str, Mapping[str, Any]], None, None]: | |||||
for a_metadata in self.artifacts: | |||||
url = a_metadata['url'] | |||||
package_version = a_metadata['version'] | |||||
if version == package_version: | |||||
filename = a_metadata.get('filename') | |||||
p_info = { | |||||
'url': url, | |||||
'filename': filename if filename else path.split(url)[-1], | |||||
'raw': a_metadata, | |||||
} | |||||
# FIXME: this code assumes we have only 1 artifact per | |||||
# versioned package | |||||
yield release_name(version), p_info | |||||
def resolve_revision_from( | |||||
self, known_artifacts: Dict, artifact_metadata: Dict) \ | |||||
-> Optional[bytes]: | |||||
artifact_pk = pk(artifact_metadata, pk_keys=self.pk_artifact_keys) | |||||
for rev_id, known_artifact in known_artifacts.items(): | |||||
logging.debug('known_artifact: %s', known_artifact) | |||||
reference_artifact = known_artifact['extrinsic']['raw'] | |||||
known_pk = pk(reference_artifact, pk_keys=self.pk_artifact_keys) | |||||
if artifact_pk == known_pk: | |||||
return rev_id | |||||
def build_revision(self, a_metadata: Mapping[str, Any], | |||||
uncompressed_path: str) -> Dict: | |||||
time = a_metadata['time'] # assume it's a timestamp | |||||
if isinstance(time, str): # otherwise, assume it's a parsable date | |||||
time = iso8601.parse_date(time) | |||||
normalized_time = normalize_timestamp(time) | |||||
return { | |||||
'type': 'tar', | |||||
'message': REVISION_MESSAGE, | |||||
'date': normalized_time, | |||||
'author': SWH_PERSON, | |||||
'committer': SWH_PERSON, | |||||
'committer_date': normalized_time, | |||||
'parents': [], | |||||
'metadata': { | |||||
'intrinsic': {}, | |||||
'extrinsic': { | |||||
'provider': self.url, | |||||
'when': self.visit_date.isoformat(), | |||||
'raw': a_metadata, | |||||
}, | |||||
}, | |||||
} | |||||
def pk(d: Mapping[str, Any], pk_keys: Sequence[str]) -> Sequence[Any]: | |||||
"""Compute the primary key for a dict using the pk_keys as primary key | |||||
composite. | |||||
Args: | |||||
d: A dict entry to compute the primary key on | |||||
pk_keys: Sequence of keys to use as primary key | |||||
Returns: | |||||
The primary key for that dict entry | |||||
""" | |||||
return [d.get(k) for k in pk_keys] |