diff --git a/swh/loader/package/debian/loader.py b/swh/loader/package/debian/loader.py --- a/swh/loader/package/debian/loader.py +++ b/swh/loader/package/debian/loader.py @@ -89,33 +89,11 @@ yield release_name(version), p_info def resolve_revision_from( - self, known_package_artifacts: Dict, artifact_metadata: Dict) \ + self, known_package_artifacts: Mapping, + artifact_metadata: Mapping) \ -> Optional[bytes]: - artifacts_to_fetch = artifact_metadata['files'] - logger.debug('k_p_artifacts: %s', known_package_artifacts) - logger.debug('artifacts_to_fetch: %s', artifacts_to_fetch) - for rev_id, known_artifacts in known_package_artifacts.items(): - logger.debug('Revision: %s', rev_id) - logger.debug('Associated known_artifacts: %s', known_artifacts) - known_artifacts = known_artifacts['extrinsic']['raw']['files'] - rev_found = True - for a_name, k_artifact in known_artifacts.items(): - artifact_to_fetch = artifacts_to_fetch.get(a_name) - logger.debug('artifact_to_fetch: %s', artifact_to_fetch) - if artifact_to_fetch is None: - # as soon as we do not see an artifact, we consider we need - # to check the other revision - rev_found = False - if k_artifact['sha256'] != artifact_to_fetch['sha256']: - # Hash is different, we consider we need to check the other - # revisions - rev_found = False - if rev_found: - logger.debug('Existing revision %s found for new artifacts.', - rev_id) - return rev_id - logger.debug('No existing revision found for the new artifacts.') - return None + return resolve_revision_from( + known_package_artifacts, artifact_metadata) def download_package(self, p_info: Mapping[str, Any], tmpdir: str) -> List[Tuple[str, Mapping]]: @@ -183,6 +161,42 @@ } +def resolve_revision_from(known_package_artifacts: Mapping, + artifact_metadata: Mapping) -> Optional[bytes]: + """Given known package artifacts (resolved from the snapshot of previous + visit) and the new artifact to fetch, try to solve the corresponding + revision. + + """ + artifacts_to_fetch = artifact_metadata.get('files') + if not artifacts_to_fetch: + return None + logger.debug('k_p_artifacts: %s', known_package_artifacts) + logger.debug('artifacts_to_fetch: %s', artifacts_to_fetch) + for rev_id, known_artifacts in known_package_artifacts.items(): + logger.debug('Revision: %s', rev_id) + logger.debug('Associated known_artifacts: %s', known_artifacts) + known_artifacts = known_artifacts['extrinsic']['raw']['files'] + rev_found = True + for a_name, k_artifact in known_artifacts.items(): + artifact_to_fetch = artifacts_to_fetch.get(a_name) + logger.debug('artifact_to_fetch: %s', artifact_to_fetch) + if artifact_to_fetch is None: + # as soon as we do not see an artifact, we consider we need + # to check the other revision + rev_found = False + if k_artifact['sha256'] != artifact_to_fetch['sha256']: + # Hash is different, we consider we need to check the other + # revisions + rev_found = False + if rev_found: + logger.debug('Existing revision %s found for new artifacts.', + rev_id) + return rev_id + logger.debug('No existing revision found for the new artifacts.') + return None + + def uid_to_person(uid: str) -> Mapping[str, str]: """Convert an uid to a person suitable for insertion. diff --git a/swh/loader/package/debian/tests/test_debian.py b/swh/loader/package/debian/tests/test_debian.py --- a/swh/loader/package/debian/tests/test_debian.py +++ b/swh/loader/package/debian/tests/test_debian.py @@ -14,6 +14,7 @@ prepare_person, get_package_metadata, extract_package ) from swh.loader.package.tests.common import check_snapshot, get_stats +from swh.loader.package.debian.loader import resolve_revision_from logger = logging.getLogger(__name__) @@ -386,3 +387,68 @@ } check_snapshot(expected_snapshot, loader.storage) + + +def test_resolve_revision_from_edge_cases(): + """Solving revision with empty data will result in unknown revision + + """ + for package_artifacts in [{}, PACKAGE_FILES]: + actual_revision = resolve_revision_from( + package_artifacts, {}) + assert actual_revision is None + + for known_artifacts in [{}, PACKAGE_FILES]: + actual_revision = resolve_revision_from( + {}, known_artifacts) + assert actual_revision is None + + expected_revision_id = b"(\x07\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xfe\x85\x85O\xfe\xcf\x07" # noqa + + +def test_resolve_revision_from_edge_cases_hit_and_miss(): + """Solving revision with inconsistent data will result in unknown revision + + """ + artifact_metadata = PACKAGE_FILES2 + expected_revision_id = b"(\x07\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xfe\x85\x85O\xfe\xcf\x07" # noqa + known_package_artifacts = { + expected_revision_id: { + 'extrinsic': { + 'provider': 'http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2-3.dsc', # noqa + 'raw': PACKAGE_FILES, + 'when': '2019-12-19T17:04:32.161965+00:00' + }, + # ... removed the unnecessary intermediary data + } + } + + actual_revision = resolve_revision_from( + known_package_artifacts, artifact_metadata + ) + + assert actual_revision is None + + +def test_resolve_revision_from(): + """Solving revision with consistent data will solve the revision + + """ + artifact_metadata = PACKAGE_FILES + expected_revision_id = b"(\x07\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xfe\x85\x85O\xfe\xcf\x07" # noqa + known_package_artifacts = { + expected_revision_id: { + 'extrinsic': { + 'provider': 'http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2-3.dsc', # noqa + 'raw': PACKAGE_FILES, + 'when': '2019-12-19T17:04:32.161965+00:00' + }, + # ... removed the unnecessary intermediary data + } + } + + actual_revision = resolve_revision_from( + known_package_artifacts, artifact_metadata + ) + + assert actual_revision == expected_revision_id