diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py --- a/swh/loader/package/npm/loader.py +++ b/swh/loader/package/npm/loader.py @@ -212,21 +212,27 @@ """ shasum = p_info.shasum for rev_id, known_artifact in known_artifacts.items(): - known_original_artifact = known_artifact.get("original_artifact") - if not known_original_artifact: - # previous loader-npm version kept original artifact elsewhere - known_original_artifact = known_artifact.get("package_source") - if not known_original_artifact: - continue - original_hash = known_original_artifact["sha1"] - else: - assert isinstance(known_original_artifact, list) - original_hash = known_original_artifact[0]["checksums"]["sha1"] + original_hash = _artifact_to_sha1(known_artifact) if shasum == original_hash: return rev_id + return None +def _artifact_to_sha1(known_artifact: Dict) -> Optional[str]: + """Returns the sha1 from an NPM 'original_artifact' dict""" + known_original_artifact = known_artifact.get("original_artifact") + if not known_original_artifact: + # previous loader-npm version kept original artifact elsewhere + known_original_artifact = known_artifact.get("package_source") + if not known_original_artifact: + return None + return known_original_artifact["sha1"] + else: + assert isinstance(known_original_artifact, list) + return known_original_artifact[0]["checksums"]["sha1"] + + def _author_str(author_data: Union[Dict, List, str]) -> str: """Parse author from package.json author fields diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py --- a/swh/loader/package/pypi/loader.py +++ b/swh/loader/package/pypi/loader.py @@ -188,19 +188,24 @@ """ sha256 = p_info.sha256 for rev_id, known_artifact in known_artifacts.items(): - original_artifact = known_artifact["original_artifact"] - if isinstance(original_artifact, dict): - # previous loader-pypi version stored metadata as dict - original_sha256 = original_artifact["sha256"] - if sha256 == original_sha256: - return rev_id - continue - # new pypi loader actually store metadata dict differently... - assert isinstance(original_artifact, list) - # current loader-pypi stores metadata as list of dict - for original_artifact in known_artifact["original_artifact"]: - if sha256 == original_artifact["checksums"]["sha256"]: - return rev_id + original_sha256 = _artifact_to_sha256(known_artifact) + if sha256 == original_sha256: + return rev_id + + return None + + +def _artifact_to_sha256(known_artifact: Dict) -> Optional[str]: + """Returns the sha256 from a PyPI 'original_artifact' dict""" + original_artifact = known_artifact["original_artifact"] + if isinstance(original_artifact, dict): + # previous loader-pypi version stored metadata as dict + return original_artifact["sha256"] + # new pypi loader actually store metadata dict differently... + assert isinstance(original_artifact, list) + # current loader-pypi stores metadata as list of dict + for original_artifact in known_artifact["original_artifact"]: + return original_artifact["checksums"]["sha256"] return None