Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/pypi/loader.py
Show All 12 Lines | |||||
from pkginfo import UnpackedSDist | from pkginfo import UnpackedSDist | ||||
from swh.loader.package.loader import ( | from swh.loader.package.loader import ( | ||||
BasePackageInfo, | BasePackageInfo, | ||||
PackageLoader, | PackageLoader, | ||||
RawExtrinsicMetadataCore, | RawExtrinsicMetadataCore, | ||||
) | ) | ||||
from swh.loader.package.utils import EMPTY_AUTHOR, api_info, cached_method, release_name | from swh.loader.package.utils import EMPTY_AUTHOR, api_info, cached_method, release_name | ||||
from swh.model.hashutil import hash_to_bytes | |||||
from swh.model.model import ( | from swh.model.model import ( | ||||
MetadataAuthority, | MetadataAuthority, | ||||
MetadataAuthorityType, | MetadataAuthorityType, | ||||
Person, | Person, | ||||
Revision, | Revision, | ||||
RevisionType, | RevisionType, | ||||
Sha1Git, | Sha1Git, | ||||
TimestampWithTimezone, | TimestampWithTimezone, | ||||
Show All 22 Lines | def from_metadata(cls, metadata: Dict[str, Any]) -> "PyPIPackageInfo": | ||||
upload_time=metadata["upload_time"], | upload_time=metadata["upload_time"], | ||||
directory_extrinsic_metadata=[ | directory_extrinsic_metadata=[ | ||||
RawExtrinsicMetadataCore( | RawExtrinsicMetadataCore( | ||||
format="pypi-project-json", metadata=json.dumps(metadata).encode(), | format="pypi-project-json", metadata=json.dumps(metadata).encode(), | ||||
) | ) | ||||
], | ], | ||||
) | ) | ||||
def extid(self) -> bytes: | |||||
return hash_to_bytes(self.sha256) | |||||
class PyPILoader(PackageLoader[PyPIPackageInfo]): | class PyPILoader(PackageLoader[PyPIPackageInfo]): | ||||
"""Load pypi origin's artifact releases into swh archive. | """Load pypi origin's artifact releases into swh archive. | ||||
""" | """ | ||||
visit_type = "pypi" | visit_type = "pypi" | ||||
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines | def get_package_info(self, version: str) -> Iterator[Tuple[str, PyPIPackageInfo]]: | ||||
if len(res) == 1: | if len(res) == 1: | ||||
version, p_info = res[0] | version, p_info = res[0] | ||||
yield release_name(version), p_info | yield release_name(version), p_info | ||||
else: | else: | ||||
for version, p_info in res: | for version, p_info in res: | ||||
yield release_name(version, p_info.filename), p_info | yield release_name(version, p_info.filename), p_info | ||||
def resolve_revision_from( | @staticmethod | ||||
self, known_artifacts: Dict, p_info: PyPIPackageInfo | def known_artifact_to_extid(known_artifact: Dict) -> Optional[bytes]: | ||||
) -> Optional[bytes]: | extid_str = _artifact_to_sha256(known_artifact) | ||||
return artifact_to_revision_id(known_artifacts, p_info) | if extid_str is None: | ||||
return None | |||||
try: | |||||
return hash_to_bytes(extid_str) if extid_str else None | |||||
except ValueError: | |||||
return None | |||||
def build_revision( | def build_revision( | ||||
self, p_info: PyPIPackageInfo, uncompressed_path: str, directory: Sha1Git | self, p_info: PyPIPackageInfo, uncompressed_path: str, directory: Sha1Git | ||||
) -> Optional[Revision]: | ) -> Optional[Revision]: | ||||
i_metadata = extract_intrinsic_metadata(uncompressed_path) | i_metadata = extract_intrinsic_metadata(uncompressed_path) | ||||
if not i_metadata: | if not i_metadata: | ||||
return None | return None | ||||
Show All 22 Lines | ) -> Optional[Revision]: | ||||
"provider": self.provider_url, | "provider": self.provider_url, | ||||
"when": self.visit_date.isoformat(), | "when": self.visit_date.isoformat(), | ||||
"raw": p_info.raw_info, | "raw": p_info.raw_info, | ||||
}, | }, | ||||
}, | }, | ||||
) | ) | ||||
def artifact_to_revision_id( | def _artifact_to_sha256(known_artifact: Dict) -> Optional[str]: | ||||
known_artifacts: Dict, p_info: PyPIPackageInfo | """Returns the sha256 from a PyPI 'original_artifact' dict | ||||
) -> Optional[bytes]: | |||||
"""Given metadata artifact, solves the associated revision id. | |||||
The following code allows to deal with 2 metadata formats (column metadata | The following code allows to deal with 2 metadata formats (column metadata | ||||
in 'revision') | in 'revision') | ||||
- old format sample:: | - old format sample:: | ||||
{ | { | ||||
'original_artifact': { | 'original_artifact': { | ||||
Show All 11 Lines | - new format sample:: | ||||
'sha256': '6975816f2c5ad4046acc676ba112f2fff945b01522d63948531f11f11e0892ec', # noqa | 'sha256': '6975816f2c5ad4046acc676ba112f2fff945b01522d63948531f11f11e0892ec', # noqa | ||||
... | ... | ||||
}, | }, | ||||
}], | }], | ||||
... | ... | ||||
} | } | ||||
""" | """ | ||||
sha256 = p_info.sha256 | |||||
for rev_id, known_artifact in known_artifacts.items(): | |||||
original_sha256 = _artifact_to_sha256(known_artifact) | |||||
if sha256 == original_sha256: | |||||
return rev_id | |||||
return None | |||||
def _artifact_to_sha256(known_artifact: Dict) -> Optional[str]: | |||||
"""Returns the sha256 from a PyPI 'original_artifact' dict""" | |||||
original_artifact = known_artifact["original_artifact"] | original_artifact = known_artifact["original_artifact"] | ||||
if isinstance(original_artifact, dict): | if isinstance(original_artifact, dict): | ||||
# previous loader-pypi version stored metadata as dict | # previous loader-pypi version stored metadata as dict | ||||
return original_artifact["sha256"] | return original_artifact["sha256"] | ||||
# new pypi loader actually store metadata dict differently... | # new pypi loader actually store metadata dict differently... | ||||
assert isinstance(original_artifact, list) | assert isinstance(original_artifact, list) | ||||
# current loader-pypi stores metadata as list of dict | # current loader-pypi stores metadata as list of dict | ||||
if len(known_artifact["original_artifact"]) == 0: | if len(known_artifact["original_artifact"]) == 0: | ||||
▲ Show 20 Lines • Show All 94 Lines • Show Last 20 Lines |