diff --git a/swh/loader/package/npm.py b/swh/loader/package/npm.py --- a/swh/loader/package/npm.py +++ b/swh/loader/package/npm.py @@ -73,12 +73,7 @@ def resolve_revision_from( self, known_artifacts: Dict, artifact_metadata: Dict) \ -> Optional[bytes]: - shasum = artifact_metadata['dist']['shasum'] - for rev_id, known_artifact in known_artifacts.items(): - original_artifact = known_artifact['original_artifact'][0] - if shasum == original_artifact['checksums']['sha1']: - return rev_id - return None + return artifact_to_revision_id(known_artifacts, artifact_metadata) def build_revision( self, a_metadata: Dict, uncompressed_path: str) -> Dict: @@ -119,6 +114,50 @@ } +def artifact_to_revision_id( + known_artifacts: Dict, artifact_metadata: Dict) -> Optional[bytes]: + """Given metadata artifact, solves the associated revision id. + + The following code allows to deal with 2 metadata formats: + + - old format sample: + + { + 'package_source': { + 'sha1': '05181c12cd8c22035dd31155656826b85745da37', + } + } + + - new format sample: + + { + 'original_artifact': [{ + 'checksums': { + 'sha256': "6975816f2c5ad4046acc676ba112f2fff945b01522d63948531f11f11e0892ec", # noqa + ... + }, + }], + ... + } + + """ + shasum = artifact_metadata['dist']['shasum'] + for rev_id, known_artifact in known_artifacts.items(): + known_original_artifact = known_artifact.get('original_artifact') + if not known_original_artifact: + # previous loader-npm version kept original artifact elsewhere + known_original_artifact = known_artifact.get('package_source') + if not known_original_artifact: + continue + original_hash = known_original_artifact['sha1'] + else: + assert isinstance(known_original_artifact, list) + original_hash = known_original_artifact[0]['checksums']['sha1'] + if shasum == original_hash: + return rev_id + return None + + def parse_npm_package_author(author_str): """ Parse npm package author string. diff --git a/swh/loader/package/tests/test_npm.py b/swh/loader/package/tests/test_npm.py --- a/swh/loader/package/tests/test_npm.py +++ b/swh/loader/package/tests/test_npm.py @@ -11,7 +11,8 @@ from swh.model.hashutil import hash_to_bytes from swh.loader.package.npm import ( - parse_npm_package_author, extract_npm_package_author + parse_npm_package_author, extract_npm_package_author, + artifact_to_revision_id ) from swh.loader.package.tests.common import ( check_snapshot, check_metadata_paths, get_stats @@ -582,3 +583,79 @@ }, } check_snapshot(expected_snapshot, loader.storage) + + +def test_npm_artifact_to_revision_id_none(): + """Current loader version should stop soon if nothing can be found + + """ + artifact_metadata = { + 'dist': { + 'shasum': '05181c12cd8c22035dd31155656826b85745da37', + }, + } + + known_artifacts = { + 'b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92': {}, + } + + assert artifact_to_revision_id(known_artifacts, artifact_metadata) is None + + +def test_npm_artifact_to_revision_id_old_loader_version(): + """Current loader version should solve old metadata scheme + + """ + artifact_metadata = { + 'dist': { + 'shasum': '05181c12cd8c22035dd31155656826b85745da37', + } + } + + known_artifacts = { + hash_to_bytes('b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92'): { + 'package_source': { + 'sha1': "something-wrong" + } + }, + hash_to_bytes('845673bfe8cbd31b1eaf757745a964137e6f9116'): { + 'package_source': { + 'sha1': '05181c12cd8c22035dd31155656826b85745da37', + } + } + + } + + assert artifact_to_revision_id(known_artifacts, artifact_metadata) \ + == hash_to_bytes('845673bfe8cbd31b1eaf757745a964137e6f9116') + + +def test_npm_artifact_to_revision_id_current_loader_version(): + """Current loader version should be able to solve current metadata scheme + + """ + artifact_metadata = { + 'dist': { + 'shasum': '05181c12cd8c22035dd31155656826b85745da37', + } + } + + known_artifacts = { + hash_to_bytes('b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92'): { + 'original_artifact': [{ + 'checksums': { + 'sha1': "05181c12cd8c22035dd31155656826b85745da37" + }, + }], + }, + hash_to_bytes('845673bfe8cbd31b1eaf757745a964137e6f9116'): { + 'original_artifact': [{ + 'checksums': { + 'sha1': 'something-wrong' + }, + }], + }, + } + + assert artifact_to_revision_id(known_artifacts, artifact_metadata) \ + == hash_to_bytes('b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92')