Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/npm/loader.py
Show All 13 Lines | |||||
import chardet | import chardet | ||||
from swh.loader.package.loader import ( | from swh.loader.package.loader import ( | ||||
BasePackageInfo, | BasePackageInfo, | ||||
PackageLoader, | PackageLoader, | ||||
RawExtrinsicMetadataCore, | RawExtrinsicMetadataCore, | ||||
) | ) | ||||
from swh.loader.package.utils import api_info, cached_method, release_name | from swh.loader.package.utils import api_info, cached_method, release_name | ||||
from swh.model.hashutil import hash_to_bytes | |||||
from swh.model.model import ( | from swh.model.model import ( | ||||
MetadataAuthority, | MetadataAuthority, | ||||
MetadataAuthorityType, | MetadataAuthorityType, | ||||
Person, | Person, | ||||
Revision, | Revision, | ||||
RevisionType, | RevisionType, | ||||
Sha1Git, | Sha1Git, | ||||
TimestampWithTimezone, | TimestampWithTimezone, | ||||
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | ) -> "NpmPackageInfo": | ||||
directory_extrinsic_metadata=[ | directory_extrinsic_metadata=[ | ||||
RawExtrinsicMetadataCore( | RawExtrinsicMetadataCore( | ||||
format="replicate-npm-package-json", | format="replicate-npm-package-json", | ||||
metadata=json.dumps(package_metadata).encode(), | metadata=json.dumps(package_metadata).encode(), | ||||
) | ) | ||||
], | ], | ||||
) | ) | ||||
def extid(self) -> bytes: | |||||
return hash_to_bytes(self.shasum) | |||||
class NpmLoader(PackageLoader[NpmPackageInfo]): | class NpmLoader(PackageLoader[NpmPackageInfo]): | ||||
"""Load npm origin's artifact releases into swh archive. | """Load npm origin's artifact releases into swh archive. | ||||
""" | """ | ||||
visit_type = "npm" | visit_type = "npm" | ||||
Show All 38 Lines | def get_metadata_authority(self): | ||||
) | ) | ||||
def get_package_info(self, version: str) -> Iterator[Tuple[str, NpmPackageInfo]]: | def get_package_info(self, version: str) -> Iterator[Tuple[str, NpmPackageInfo]]: | ||||
p_info = NpmPackageInfo.from_metadata( | p_info = NpmPackageInfo.from_metadata( | ||||
project_metadata=self.info(), version=version | project_metadata=self.info(), version=version | ||||
) | ) | ||||
yield release_name(version), p_info | yield release_name(version), p_info | ||||
def resolve_revision_from( | @staticmethod | ||||
self, known_artifacts: Dict, p_info: NpmPackageInfo | def known_artifact_to_extid(known_artifact: Dict) -> Optional[bytes]: | ||||
) -> Optional[bytes]: | extid_str = _artifact_to_sha1(known_artifact) | ||||
return artifact_to_revision_id(known_artifacts, p_info) | if extid_str is None: | ||||
return None | |||||
try: | |||||
return hash_to_bytes(extid_str) | |||||
except ValueError: | |||||
return None | |||||
def build_revision( | def build_revision( | ||||
self, p_info: NpmPackageInfo, uncompressed_path: str, directory: Sha1Git | self, p_info: NpmPackageInfo, uncompressed_path: str, directory: Sha1Git | ||||
) -> Optional[Revision]: | ) -> Optional[Revision]: | ||||
i_metadata = extract_intrinsic_metadata(uncompressed_path) | i_metadata = extract_intrinsic_metadata(uncompressed_path) | ||||
if not i_metadata: | if not i_metadata: | ||||
return None | return None | ||||
author = extract_npm_package_author(i_metadata) | author = extract_npm_package_author(i_metadata) | ||||
Show All 29 Lines | ) -> Optional[Revision]: | ||||
"when": self.visit_date.isoformat(), | "when": self.visit_date.isoformat(), | ||||
"raw": p_info.raw_info, | "raw": p_info.raw_info, | ||||
}, | }, | ||||
}, | }, | ||||
) | ) | ||||
return r | return r | ||||
def artifact_to_revision_id( | def _artifact_to_sha1(known_artifact: Dict) -> Optional[str]: | ||||
known_artifacts: Dict, p_info: NpmPackageInfo | """Returns the sha1 from an NPM 'original_artifact' dict | ||||
) -> Optional[bytes]: | |||||
"""Given metadata artifact, solves the associated revision id. | |||||
The following code allows to deal with 2 metadata formats: | The following code allows to deal with 2 metadata formats: | ||||
- old format sample:: | - old format sample:: | ||||
{ | { | ||||
'package_source': { | 'package_source': { | ||||
'sha1': '05181c12cd8c22035dd31155656826b85745da37', | 'sha1': '05181c12cd8c22035dd31155656826b85745da37', | ||||
} | } | ||||
} | } | ||||
- new format sample:: | - new format sample:: | ||||
{ | { | ||||
'original_artifact': [{ | 'original_artifact': [{ | ||||
'checksums': { | 'checksums': { | ||||
'sha256': '6975816f2c5ad4046acc676ba112f2fff945b01522d63948531f11f11e0892ec', # noqa | 'sha256': '6975816f2c5ad4046acc676ba112f2fff945b01522d63948531f11f11e0892ec', # noqa | ||||
... | ... | ||||
}, | }, | ||||
}], | }], | ||||
... | ... | ||||
} | } | ||||
""" | """ | ||||
shasum = p_info.shasum | |||||
for rev_id, known_artifact in known_artifacts.items(): | |||||
original_hash = _artifact_to_sha1(known_artifact) | |||||
if shasum == original_hash: | |||||
return rev_id | |||||
return None | |||||
def _artifact_to_sha1(known_artifact: Dict) -> Optional[str]: | |||||
"""Returns the sha1 from an NPM 'original_artifact' dict""" | |||||
known_original_artifact = known_artifact.get("original_artifact") | known_original_artifact = known_artifact.get("original_artifact") | ||||
if not known_original_artifact: | if not known_original_artifact: | ||||
# previous loader-npm version kept original artifact elsewhere | # previous loader-npm version kept original artifact elsewhere | ||||
known_original_artifact = known_artifact.get("package_source") | known_original_artifact = known_artifact.get("package_source") | ||||
if not known_original_artifact: | if not known_original_artifact: | ||||
return None | return None | ||||
return known_original_artifact["sha1"] | return known_original_artifact["sha1"] | ||||
else: | else: | ||||
▲ Show 20 Lines • Show All 120 Lines • Show Last 20 Lines |