Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/tar.py
- This file was added.
# Copyright (C) 2019 The Software Heritage developers | |||||
# See the AUTHORS file at the top-level directory of this distribution | |||||
# License: GNU General Public License version 3, or any later version | |||||
# See top-level LICENSE file for more information | |||||
import iso8601 | |||||
import logging | |||||
from os import path | |||||
from typing import Dict, Generator, Optional, Mapping, Sequence, Tuple | |||||
from swh.loader.package.loader import PackageLoader | |||||
from swh.model.identifiers import normalize_timestamp | |||||
logger = logging.getLogger(__name__) | |||||
SWH_PERSON = { | |||||
'name': b'Software Heritage', | |||||
'fullname': b'Software Heritage', | |||||
'email': b'robot@softwareheritage.org' | |||||
} | |||||
REVISION_MESSAGE = b'swh-loader-package: synthetic revision message' | |||||
class TarLoader(PackageLoader): | |||||
visit_type = 'tar' | |||||
def __init__(self, url: str, packages: Sequence[Mapping[str, str]]): | |||||
"""Loader constructor. | |||||
Args: | |||||
url: Origin url | |||||
packages: List of dict with keys: | |||||
- uri: the url to retrieve one versioned archive | |||||
- date (isoformat date string) | |||||
- sha256: integrity hash | |||||
""" | |||||
super().__init__(url=url) | |||||
# sorting per date | |||||
self.packages = list(sorted(packages, key=lambda v: v['date'])) | |||||
def get_versions(self) -> Sequence[str]: | |||||
versions = [] | |||||
for package in self.packages: | |||||
v = package.get('version') | |||||
if v: | |||||
versions.append(v) | |||||
return versions | |||||
def get_default_release(self) -> str: | |||||
# It's the most recent, so for this loader, it's the last one | |||||
return self.packages[-1]['version'] | |||||
def get_artifacts(self, version: str) -> Generator[ | |||||
Tuple[str, str, Dict], None, None]: | |||||
for a_metadata in self.packages: | |||||
url = a_metadata['url'] | |||||
artifact_version = a_metadata['version'] | |||||
if version == artifact_version: | |||||
filename = path.split(url)[-1] | |||||
yield filename, url, a_metadata | |||||
def resolve_revision_from( | |||||
self, known_artifacts: Dict, artifact_metadata: Dict) \ | |||||
-> Optional[bytes]: | |||||
def pk(d): | |||||
return [d.get(k) for k in ['time', 'archive', 'length']] | |||||
artifact_pk = pk(artifact_metadata) | |||||
for rev_id, known_artifact in known_artifacts.items(): | |||||
logging.debug('known_artifact: %s', known_artifact) | |||||
known_pk = pk(known_artifact['extrinsic']['raw']) | |||||
if artifact_pk == known_pk: | |||||
return rev_id | |||||
def build_revision( | |||||
self, a_metadata: Dict, a_uncompressed_path: str) -> Dict: | |||||
normalized_date = normalize_timestamp( | |||||
iso8601.parse_date(a_metadata['date'])) | |||||
return { | |||||
'message': REVISION_MESSAGE, | |||||
'date': normalized_date, | |||||
'author': SWH_PERSON, | |||||
'committer': SWH_PERSON, | |||||
'committer_date': normalized_date, | |||||
'parents': [], | |||||
'metadata': { | |||||
'intrinsic': {}, | |||||
'extrinsic': { | |||||
'provider': self.url, | |||||
'when': self.visit_date.isoformat(), | |||||
'raw': a_metadata, | |||||
}, | |||||
}, | |||||
} |