Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/deposit/loader.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | import json | ||||
import logging | import logging | ||||
import requests | import requests | ||||
from typing import Any, Dict, Iterator, List, Mapping, Optional, Sequence, Tuple, Union | |||||
import types | import types | ||||
from typing import Any, Dict, Generator, List, Mapping, Optional, Sequence, Tuple, Union | import attr | ||||
from swh.model.hashutil import hash_to_hex, hash_to_bytes | from swh.model.hashutil import hash_to_hex, hash_to_bytes | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Person, | Person, | ||||
Revision, | Revision, | ||||
RevisionType, | RevisionType, | ||||
TimestampWithTimezone, | TimestampWithTimezone, | ||||
Sha1Git, | Sha1Git, | ||||
MetadataAuthority, | MetadataAuthority, | ||||
MetadataAuthorityType, | MetadataAuthorityType, | ||||
MetadataFetcher, | MetadataFetcher, | ||||
MetadataTargetType, | MetadataTargetType, | ||||
RawExtrinsicMetadata, | RawExtrinsicMetadata, | ||||
) | ) | ||||
from swh.loader.package.loader import PackageLoader | from swh.loader.package.loader import PackageLoader, BasePackageInfo | ||||
from swh.loader.package.utils import download | from swh.loader.package.utils import download | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
class DepositLoader(PackageLoader): | class DepositPackageInfo(BasePackageInfo): | ||||
filename = attr.ib(type=str) # instead of Optional[str] | |||||
raw = attr.ib(type=Dict[str, Any]) | |||||
class DepositLoader(PackageLoader[DepositPackageInfo]): | |||||
"""Load pypi origin's artifact releases into swh archive. | """Load pypi origin's artifact releases into swh archive. | ||||
""" | """ | ||||
visit_type = "deposit" | visit_type = "deposit" | ||||
def __init__(self, url: str, deposit_id: str): | def __init__(self, url: str, deposit_id: str): | ||||
"""Constructor | """Constructor | ||||
Show All 12 Lines | class DepositLoader(PackageLoader[DepositPackageInfo]): | ||||
def get_versions(self) -> Sequence[str]: | def get_versions(self) -> Sequence[str]: | ||||
# only 1 branch 'HEAD' with no alias since we only have 1 snapshot | # only 1 branch 'HEAD' with no alias since we only have 1 snapshot | ||||
# branch | # branch | ||||
return ["HEAD"] | return ["HEAD"] | ||||
def get_package_info( | def get_package_info( | ||||
self, version: str | self, version: str | ||||
) -> Generator[Tuple[str, Mapping[str, Any]], None, None]: | ) -> Iterator[Tuple[str, DepositPackageInfo]]: | ||||
p_info = { | p_info = DepositPackageInfo( | ||||
"filename": "archive.zip", | url=self.url, filename="archive.zip", raw=self.metadata, | ||||
"raw": self.metadata, | ) | ||||
} | |||||
yield "HEAD", p_info | yield "HEAD", p_info | ||||
def download_package( | def download_package( | ||||
self, p_info: Mapping[str, Any], tmpdir: str | self, p_info: DepositPackageInfo, tmpdir: str | ||||
) -> List[Tuple[str, Mapping]]: | ) -> List[Tuple[str, Mapping]]: | ||||
"""Override to allow use of the dedicated deposit client | """Override to allow use of the dedicated deposit client | ||||
""" | """ | ||||
return [self.client.archive_get(self.deposit_id, tmpdir, p_info["filename"])] | return [self.client.archive_get(self.deposit_id, tmpdir, p_info.filename)] | ||||
def build_revision( | def build_revision( | ||||
self, a_metadata: Dict, uncompressed_path: str, directory: Sha1Git | self, a_metadata: Dict, uncompressed_path: str, directory: Sha1Git | ||||
) -> Optional[Revision]: | ) -> Optional[Revision]: | ||||
depo = a_metadata.pop("deposit") | depo = a_metadata.pop("deposit") | ||||
# Note: | # Note: | ||||
# `date` and `committer_date` are always transmitted by the deposit read api | # `date` and `committer_date` are always transmitted by the deposit read api | ||||
Show All 20 Lines | ) -> Optional[Revision]: | ||||
committer_date=commit_date, | committer_date=commit_date, | ||||
parents=tuple([hash_to_bytes(p) for p in depo["revision_parents"]]), | parents=tuple([hash_to_bytes(p) for p in depo["revision_parents"]]), | ||||
directory=directory, | directory=directory, | ||||
synthetic=True, | synthetic=True, | ||||
metadata={ | metadata={ | ||||
"extrinsic": { | "extrinsic": { | ||||
"provider": self.client.metadata_url(self.deposit_id), | "provider": self.client.metadata_url(self.deposit_id), | ||||
"when": self.visit_date.isoformat(), | "when": self.visit_date.isoformat(), | ||||
"raw": a_metadata, | "raw": a_metadata, # Actually the processed metadata instead of raw | ||||
ardumont: well yea, the `extrinsic` because it got processed by a third party (the deposit server) and… | |||||
}, | }, | ||||
}, | }, | ||||
) | ) | ||||
def load(self) -> Dict: | def load(self) -> Dict: | ||||
# First making sure the deposit is known prior to trigger a loading | # First making sure the deposit is known prior to trigger a loading | ||||
try: | try: | ||||
self.metadata = self.client.metadata_get(self.deposit_id) | self.metadata = self.client.metadata_get(self.deposit_id) | ||||
▲ Show 20 Lines • Show All 170 Lines • Show Last 20 Lines |
well yea, the extrinsic because it got processed by a third party (the deposit server) and raw because the loader did not touch it (afair).