Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/npm/loader.py
Show All 26 Lines | |||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
EMPTY_PERSON = Person(fullname=b"", name=None, email=None) | EMPTY_PERSON = Person(fullname=b"", name=None, email=None) | ||||
@attr.s | |||||
class NpmPackageInfo(BasePackageInfo): | class NpmPackageInfo(BasePackageInfo): | ||||
raw = attr.ib(type=Dict[str, Any]) | raw = attr.ib(type=Dict[str, Any]) | ||||
date = attr.ib(type=Optional[str]) | |||||
shasum = attr.ib(type=str) | |||||
"""sha1 checksum""" | |||||
version = attr.ib(type=str) | |||||
@classmethod | |||||
def from_metadata( | |||||
cls, project_metadata: Dict[str, Any], version: str | |||||
) -> "NpmPackageInfo": | |||||
package_metadata = project_metadata["versions"][version] | |||||
url = package_metadata["dist"]["tarball"] | |||||
# No date available in intrinsic metadata: retrieve it from the API | |||||
# metadata, using the version number that the API claims this package | |||||
# has. | |||||
extrinsic_version = package_metadata["version"] | |||||
if "time" in project_metadata: | |||||
date = project_metadata["time"][extrinsic_version] | |||||
elif "mtime" in package_metadata: | |||||
date = package_metadata["mtime"] | |||||
else: | |||||
date = None | |||||
return cls( | |||||
url=url, | |||||
filename=os.path.basename(url), | |||||
date=date, | |||||
shasum=package_metadata["dist"]["shasum"], | |||||
version=extrinsic_version, | |||||
raw=package_metadata, # FIXME: we're losing some of the project metadata | |||||
) | |||||
class NpmLoader(PackageLoader[NpmPackageInfo]): | class NpmLoader(PackageLoader[NpmPackageInfo]): | ||||
"""Load npm origin's artifact releases into swh archive. | """Load npm origin's artifact releases into swh archive. | ||||
""" | """ | ||||
visit_type = "npm" | visit_type = "npm" | ||||
Show All 20 Lines | def info(self) -> Dict[str, Any]: | ||||
return self._info | return self._info | ||||
def get_versions(self) -> Sequence[str]: | def get_versions(self) -> Sequence[str]: | ||||
return sorted(list(self.info["versions"].keys())) | return sorted(list(self.info["versions"].keys())) | ||||
def get_default_version(self) -> str: | def get_default_version(self) -> str: | ||||
return self.info["dist-tags"].get("latest", "") | return self.info["dist-tags"].get("latest", "") | ||||
def get_package_info(self, version: str) -> Iterator[Tuple[str, NpmPackageInfo]]: | def get_package_info( | ||||
meta = self.info["versions"][version] | self, version: str | ||||
url = meta["dist"]["tarball"] | ) -> Iterator[Tuple[str, NpmPackageInfo]]: | ||||
p_info = NpmPackageInfo(url=url, filename=os.path.basename(url), raw=meta,) | p_info = NpmPackageInfo.from_metadata( | ||||
project_metadata=self.info, version=version | |||||
) | |||||
yield release_name(version), p_info | yield release_name(version), p_info | ||||
def resolve_revision_from( | def resolve_revision_from( | ||||
self, known_artifacts: Dict, artifact_metadata: Dict | self, known_artifacts: Dict, p_info: NpmPackageInfo | ||||
) -> Optional[bytes]: | ) -> Optional[bytes]: | ||||
return artifact_to_revision_id(known_artifacts, artifact_metadata) | return artifact_to_revision_id(known_artifacts, p_info) | ||||
def build_revision( | def build_revision( | ||||
self, a_metadata: Dict, uncompressed_path: str, directory: Sha1Git | self, p_info: NpmPackageInfo, uncompressed_path: str, directory: Sha1Git | ||||
) -> Optional[Revision]: | ) -> Optional[Revision]: | ||||
i_metadata = extract_intrinsic_metadata(uncompressed_path) | i_metadata = extract_intrinsic_metadata(uncompressed_path) | ||||
if not i_metadata: | if not i_metadata: | ||||
return None | return None | ||||
# from intrinsic metadata | |||||
author = extract_npm_package_author(i_metadata) | author = extract_npm_package_author(i_metadata) | ||||
message = i_metadata["version"].encode("ascii") | message = i_metadata["version"].encode("ascii") | ||||
# from extrinsic metadata | if p_info.date is None: | ||||
url = p_info.url | |||||
# No date available in intrinsic metadata: retrieve it from the API | artifact_name = os.path.basename(url) | ||||
# metadata, using the version number that the API claims this package | |||||
# has. | |||||
extrinsic_version = a_metadata["version"] | |||||
if "time" in self.info: | |||||
date = self.info["time"][extrinsic_version] | |||||
elif "mtime" in a_metadata: | |||||
date = a_metadata["mtime"] | |||||
else: | |||||
artifact_name = os.path.basename(a_metadata["dist"]["tarball"]) | |||||
raise ValueError( | raise ValueError( | ||||
"Origin %s: Cannot determine upload time for artifact %s." | "Origin %s: Cannot determine upload time for artifact %s." | ||||
% (self.url, artifact_name) | % (p_info.url, artifact_name) | ||||
) | ) | ||||
date = TimestampWithTimezone.from_iso8601(date) | date = TimestampWithTimezone.from_iso8601(p_info.date) | ||||
# FIXME: this is to remain bug-compatible with earlier versions: | # FIXME: this is to remain bug-compatible with earlier versions: | ||||
date = attr.evolve(date, timestamp=attr.evolve(date.timestamp, microseconds=0)) | date = attr.evolve(date, timestamp=attr.evolve(date.timestamp, microseconds=0)) | ||||
r = Revision( | r = Revision( | ||||
type=RevisionType.TAR, | type=RevisionType.TAR, | ||||
message=message, | message=message, | ||||
author=author, | author=author, | ||||
date=date, | date=date, | ||||
committer=author, | committer=author, | ||||
committer_date=date, | committer_date=date, | ||||
parents=(), | parents=(), | ||||
directory=directory, | directory=directory, | ||||
synthetic=True, | synthetic=True, | ||||
metadata={ | metadata={ | ||||
"intrinsic": {"tool": "package.json", "raw": i_metadata,}, | "intrinsic": {"tool": "package.json", "raw": i_metadata,}, | ||||
"extrinsic": { | "extrinsic": { | ||||
"provider": self.provider_url, | "provider": self.provider_url, | ||||
"when": self.visit_date.isoformat(), | "when": self.visit_date.isoformat(), | ||||
"raw": a_metadata, | "raw": p_info.raw, | ||||
}, | }, | ||||
}, | }, | ||||
) | ) | ||||
return r | return r | ||||
def artifact_to_revision_id( | def artifact_to_revision_id( | ||||
known_artifacts: Dict, artifact_metadata: Dict | known_artifacts: Dict, p_info: NpmPackageInfo | ||||
) -> Optional[bytes]: | ) -> Optional[bytes]: | ||||
"""Given metadata artifact, solves the associated revision id. | """Given metadata artifact, solves the associated revision id. | ||||
The following code allows to deal with 2 metadata formats: | The following code allows to deal with 2 metadata formats: | ||||
- old format sample:: | - old format sample:: | ||||
{ | { | ||||
Show All 10 Lines | - new format sample:: | ||||
'sha256': '6975816f2c5ad4046acc676ba112f2fff945b01522d63948531f11f11e0892ec', # noqa | 'sha256': '6975816f2c5ad4046acc676ba112f2fff945b01522d63948531f11f11e0892ec', # noqa | ||||
... | ... | ||||
}, | }, | ||||
}], | }], | ||||
... | ... | ||||
} | } | ||||
""" | """ | ||||
shasum = artifact_metadata["dist"]["shasum"] | shasum = p_info.shasum | ||||
for rev_id, known_artifact in known_artifacts.items(): | for rev_id, known_artifact in known_artifacts.items(): | ||||
known_original_artifact = known_artifact.get("original_artifact") | known_original_artifact = known_artifact.get("original_artifact") | ||||
if not known_original_artifact: | if not known_original_artifact: | ||||
# previous loader-npm version kept original artifact elsewhere | # previous loader-npm version kept original artifact elsewhere | ||||
known_original_artifact = known_artifact.get("package_source") | known_original_artifact = known_artifact.get("package_source") | ||||
if not known_original_artifact: | if not known_original_artifact: | ||||
continue | continue | ||||
original_hash = known_original_artifact["sha1"] | original_hash = known_original_artifact["sha1"] | ||||
▲ Show 20 Lines • Show All 124 Lines • Show Last 20 Lines |