Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/deposit/loader.py
Show First 20 Lines • Show All 55 Lines • ▼ Show 20 Lines | class DepositPackageInfo(BasePackageInfo): | ||||
def from_metadata( | def from_metadata( | ||||
cls, metadata: Dict[str, Any], url: str, filename: str | cls, metadata: Dict[str, Any], url: str, filename: str | ||||
) -> "DepositPackageInfo": | ) -> "DepositPackageInfo": | ||||
# Note: | # Note: | ||||
# `date` and `committer_date` are always transmitted by the deposit read api | # `date` and `committer_date` are always transmitted by the deposit read api | ||||
# which computes itself the values. The loader needs to use those to create the | # which computes itself the values. The loader needs to use those to create the | ||||
# revision. | # revision. | ||||
raw_metadata_from_origin = json.dumps( | |||||
metadata["origin_metadata"]["metadata"] | |||||
).encode() | |||||
metadata = metadata.copy() | metadata = metadata.copy() | ||||
# FIXME: this removes information from 'raw' metadata | # FIXME: this removes information from 'raw' metadata | ||||
depo = metadata.pop("deposit") | depo = metadata.pop("deposit") | ||||
return cls( | return cls( | ||||
url=url, | url=url, | ||||
filename=filename, | filename=filename, | ||||
author_date=depo["author_date"], | author_date=depo["author_date"], | ||||
commit_date=depo["committer_date"], | commit_date=depo["committer_date"], | ||||
client=depo["client"], | client=depo["client"], | ||||
id=depo["id"], | id=depo["id"], | ||||
collection=depo["collection"], | collection=depo["collection"], | ||||
author=parse_author(depo["author"]), | author=parse_author(depo["author"]), | ||||
committer=parse_author(depo["committer"]), | committer=parse_author(depo["committer"]), | ||||
revision_parents=tuple(hash_to_bytes(p) for p in depo["revision_parents"]), | revision_parents=tuple(hash_to_bytes(p) for p in depo["revision_parents"]), | ||||
raw_info=metadata, | raw_info=metadata, | ||||
revision_extrinsic_metadata=[ | |||||
ardumont: why is the discovery-date None?
(do you plan to retrieve it later?)
----
nvm saw that it… | |||||
(None, "sword-v2-atom-codemeta-v2-in-json", raw_metadata_from_origin,), | |||||
], | |||||
) | ) | ||||
class DepositLoader(PackageLoader[DepositPackageInfo]): | class DepositLoader(PackageLoader[DepositPackageInfo]): | ||||
"""Load pypi origin's artifact releases into swh archive. | """Load pypi origin's artifact releases into swh archive. | ||||
""" | """ | ||||
Show All 14 Lines | def __init__(self, url: str, deposit_id: str): | ||||
self.client = ApiClient(url=config_deposit["url"], auth=config_deposit["auth"]) | self.client = ApiClient(url=config_deposit["url"], auth=config_deposit["auth"]) | ||||
self.metadata: Dict[str, Any] = {} | self.metadata: Dict[str, Any] = {} | ||||
def get_versions(self) -> Sequence[str]: | def get_versions(self) -> Sequence[str]: | ||||
# only 1 branch 'HEAD' with no alias since we only have 1 snapshot | # only 1 branch 'HEAD' with no alias since we only have 1 snapshot | ||||
# branch | # branch | ||||
return ["HEAD"] | return ["HEAD"] | ||||
def get_metadata_authority(self) -> MetadataAuthority: | |||||
provider = self.metadata["origin_metadata"]["provider"] | |||||
assert provider["provider_type"] == "deposit_client" | |||||
return MetadataAuthority( | |||||
type=MetadataAuthorityType.DEPOSIT_CLIENT, | |||||
url=provider["provider_url"], | |||||
metadata={ | |||||
"name": provider["provider_name"], | |||||
**(provider["metadata"] or {}), | |||||
}, | |||||
) | |||||
def get_metadata_fetcher(self) -> MetadataFetcher: | |||||
tool = self.metadata["origin_metadata"]["tool"] | |||||
return MetadataFetcher( | |||||
name=tool["name"], version=tool["version"], metadata=tool["configuration"], | |||||
) | |||||
def get_package_info( | def get_package_info( | ||||
self, version: str | self, version: str | ||||
) -> Iterator[Tuple[str, DepositPackageInfo]]: | ) -> Iterator[Tuple[str, DepositPackageInfo]]: | ||||
p_info = DepositPackageInfo.from_metadata( | p_info = DepositPackageInfo.from_metadata( | ||||
self.metadata, url=self.url, filename="archive.zip", | self.metadata, url=self.url, filename="archive.zip", | ||||
) | ) | ||||
yield "HEAD", p_info | yield "HEAD", p_info | ||||
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines | def load(self) -> Dict: | ||||
r = super().load() | r = super().load() | ||||
success = r["status"] != "failed" | success = r["status"] != "failed" | ||||
if success: | if success: | ||||
# Update archive with metadata information | # Update archive with metadata information | ||||
origin_metadata = self.metadata["origin_metadata"] | origin_metadata = self.metadata["origin_metadata"] | ||||
logger.debug("origin_metadata: %s", origin_metadata) | logger.debug("origin_metadata: %s", origin_metadata) | ||||
provider = origin_metadata["provider"] | authority = self.get_metadata_authority() | ||||
assert provider["provider_type"] == "deposit_client" | |||||
authority = MetadataAuthority( | |||||
type=MetadataAuthorityType.DEPOSIT_CLIENT, | |||||
url=provider["provider_url"], | |||||
metadata={ | |||||
"name": provider["provider_name"], | |||||
**(provider["metadata"] or {}), | |||||
}, | |||||
) | |||||
self.storage.metadata_authority_add([authority]) | self.storage.metadata_authority_add([authority]) | ||||
tool = origin_metadata["tool"] | fetcher = self.get_metadata_fetcher() | ||||
fetcher = MetadataFetcher( | |||||
name=tool["name"], | |||||
version=tool["version"], | |||||
metadata=tool["configuration"], | |||||
) | |||||
self.storage.metadata_fetcher_add([fetcher]) | self.storage.metadata_fetcher_add([fetcher]) | ||||
self.storage.object_metadata_add( | self.storage.object_metadata_add( | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.ORIGIN, | type=MetadataTargetType.ORIGIN, | ||||
id=self.url, | id=self.url, | ||||
discovery_date=self.visit_date, | discovery_date=self.visit_date, | ||||
▲ Show 20 Lines • Show All 132 Lines • Show Last 20 Lines |
why is the discovery-date None?
(do you plan to retrieve it later?)
nvm saw that it fallbacks to the visit in the package loader now ;)