Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/pypi/loader.py
Show All 19 Lines | |||||
) | ) | ||||
from swh.loader.package.loader import BasePackageInfo, PackageLoader | from swh.loader.package.loader import BasePackageInfo, PackageLoader | ||||
from swh.loader.package.utils import api_info, release_name, EMPTY_AUTHOR | from swh.loader.package.utils import api_info, release_name, EMPTY_AUTHOR | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
@attr.s | |||||
class PyPIPackageInfo(BasePackageInfo): | class PyPIPackageInfo(BasePackageInfo): | ||||
raw = attr.ib(type=Dict[str, Any]) | raw = attr.ib(type=Dict[str, Any]) | ||||
comment_text = attr.ib(type=Optional[str]) | |||||
sha256 = attr.ib(type=str) | |||||
upload_time = attr.ib(type=str) | |||||
@classmethod | |||||
def from_metadata(cls, metadata: Dict[str, Any]) -> "PyPIPackageInfo": | |||||
return cls( | |||||
url=metadata["url"], | |||||
filename=metadata["filename"], | |||||
raw=metadata, | |||||
comment_text=metadata.get("comment_text"), | |||||
sha256=metadata["digests"]["sha256"], | |||||
upload_time=metadata["upload_time"], | |||||
) | |||||
class PyPILoader(PackageLoader[PyPIPackageInfo]): | class PyPILoader(PackageLoader[PyPIPackageInfo]): | ||||
"""Load pypi origin's artifact releases into swh archive. | """Load pypi origin's artifact releases into swh archive. | ||||
""" | """ | ||||
visit_type = "pypi" | visit_type = "pypi" | ||||
Show All 17 Lines | class PyPILoader(PackageLoader[PyPIPackageInfo]): | ||||
def get_default_version(self) -> str: | def get_default_version(self) -> str: | ||||
return self.info["info"]["version"] | return self.info["info"]["version"] | ||||
def get_package_info(self, version: str) -> Iterator[Tuple[str, PyPIPackageInfo]]: | def get_package_info(self, version: str) -> Iterator[Tuple[str, PyPIPackageInfo]]: | ||||
res = [] | res = [] | ||||
for meta in self.info["releases"][version]: | for meta in self.info["releases"][version]: | ||||
if meta["packagetype"] != "sdist": | if meta["packagetype"] != "sdist": | ||||
continue | continue | ||||
filename = meta["filename"] | p_info = PyPIPackageInfo.from_metadata(meta) | ||||
p_info = PyPIPackageInfo(url=meta["url"], filename=filename, raw=meta,) | |||||
res.append((version, p_info)) | res.append((version, p_info)) | ||||
if len(res) == 1: | if len(res) == 1: | ||||
version, p_info = res[0] | version, p_info = res[0] | ||||
yield release_name(version), p_info | yield release_name(version), p_info | ||||
else: | else: | ||||
for version, p_info in res: | for version, p_info in res: | ||||
yield release_name(version, p_info.filename), p_info | yield release_name(version, p_info.filename), p_info | ||||
def resolve_revision_from( | def resolve_revision_from( | ||||
self, known_artifacts: Dict, artifact_metadata: Dict | self, known_artifacts: Dict, p_info: PyPIPackageInfo | ||||
) -> Optional[bytes]: | ) -> Optional[bytes]: | ||||
return artifact_to_revision_id(known_artifacts, artifact_metadata) | return artifact_to_revision_id(known_artifacts, p_info) | ||||
def build_revision( | def build_revision( | ||||
self, a_metadata: Dict, uncompressed_path: str, directory: Sha1Git | self, p_info: PyPIPackageInfo, uncompressed_path: str, directory: Sha1Git | ||||
) -> Optional[Revision]: | ) -> Optional[Revision]: | ||||
i_metadata = extract_intrinsic_metadata(uncompressed_path) | i_metadata = extract_intrinsic_metadata(uncompressed_path) | ||||
if not i_metadata: | if not i_metadata: | ||||
return None | return None | ||||
# from intrinsic metadata | # from intrinsic metadata | ||||
name = i_metadata["version"] | name = i_metadata["version"] | ||||
_author = author(i_metadata) | _author = author(i_metadata) | ||||
# from extrinsic metadata | # from extrinsic metadata | ||||
message = a_metadata.get("comment_text", "") | message = p_info.comment_text or "" | ||||
message = "%s: %s" % (name, message) if message else name | message = "%s: %s" % (name, message) if message else name | ||||
date = TimestampWithTimezone.from_iso8601(a_metadata["upload_time"]) | date = TimestampWithTimezone.from_iso8601(p_info.upload_time) | ||||
return Revision( | return Revision( | ||||
type=RevisionType.TAR, | type=RevisionType.TAR, | ||||
message=message.encode("utf-8"), | message=message.encode("utf-8"), | ||||
author=_author, | author=_author, | ||||
date=date, | date=date, | ||||
committer=_author, | committer=_author, | ||||
committer_date=date, | committer_date=date, | ||||
parents=(), | parents=(), | ||||
directory=directory, | directory=directory, | ||||
synthetic=True, | synthetic=True, | ||||
metadata={ | metadata={ | ||||
"intrinsic": {"tool": "PKG-INFO", "raw": i_metadata,}, | "intrinsic": {"tool": "PKG-INFO", "raw": i_metadata,}, | ||||
"extrinsic": { | "extrinsic": { | ||||
"provider": self.provider_url, | "provider": self.provider_url, | ||||
"when": self.visit_date.isoformat(), | "when": self.visit_date.isoformat(), | ||||
"raw": a_metadata, | "raw": p_info.raw, | ||||
}, | }, | ||||
}, | }, | ||||
) | ) | ||||
def artifact_to_revision_id( | def artifact_to_revision_id( | ||||
known_artifacts: Dict, artifact_metadata: Dict | known_artifacts: Dict, p_info: PyPIPackageInfo | ||||
) -> Optional[bytes]: | ) -> Optional[bytes]: | ||||
"""Given metadata artifact, solves the associated revision id. | """Given metadata artifact, solves the associated revision id. | ||||
The following code allows to deal with 2 metadata formats (column metadata | The following code allows to deal with 2 metadata formats (column metadata | ||||
in 'revision') | in 'revision') | ||||
- old format sample:: | - old format sample:: | ||||
Show All 13 Lines | - new format sample:: | ||||
'sha256': '6975816f2c5ad4046acc676ba112f2fff945b01522d63948531f11f11e0892ec', # noqa | 'sha256': '6975816f2c5ad4046acc676ba112f2fff945b01522d63948531f11f11e0892ec', # noqa | ||||
... | ... | ||||
}, | }, | ||||
}], | }], | ||||
... | ... | ||||
} | } | ||||
""" | """ | ||||
sha256 = artifact_metadata["digests"]["sha256"] | sha256 = p_info.sha256 | ||||
for rev_id, known_artifact in known_artifacts.items(): | for rev_id, known_artifact in known_artifacts.items(): | ||||
original_artifact = known_artifact["original_artifact"] | original_artifact = known_artifact["original_artifact"] | ||||
if isinstance(original_artifact, dict): | if isinstance(original_artifact, dict): | ||||
# previous loader-pypi version stored metadata as dict | # previous loader-pypi version stored metadata as dict | ||||
original_sha256 = original_artifact["sha256"] | original_sha256 = original_artifact["sha256"] | ||||
if sha256 == original_sha256: | if sha256 == original_sha256: | ||||
return rev_id | return rev_id | ||||
continue | continue | ||||
▲ Show 20 Lines • Show All 93 Lines • Show Last 20 Lines |