Page MenuHomeSoftware Heritage

D2479.diff
No OneTemporary

D2479.diff

diff --git a/swh/loader/package/debian/loader.py b/swh/loader/package/debian/loader.py
--- a/swh/loader/package/debian/loader.py
+++ b/swh/loader/package/debian/loader.py
@@ -89,33 +89,11 @@
yield release_name(version), p_info
def resolve_revision_from(
- self, known_package_artifacts: Dict, artifact_metadata: Dict) \
+ self, known_package_artifacts: Mapping,
+ artifact_metadata: Mapping) \
-> Optional[bytes]:
- artifacts_to_fetch = artifact_metadata['files']
- logger.debug('k_p_artifacts: %s', known_package_artifacts)
- logger.debug('artifacts_to_fetch: %s', artifacts_to_fetch)
- for rev_id, known_artifacts in known_package_artifacts.items():
- logger.debug('Revision: %s', rev_id)
- logger.debug('Associated known_artifacts: %s', known_artifacts)
- known_artifacts = known_artifacts['extrinsic']['raw']['files']
- rev_found = True
- for a_name, k_artifact in known_artifacts.items():
- artifact_to_fetch = artifacts_to_fetch.get(a_name)
- logger.debug('artifact_to_fetch: %s', artifact_to_fetch)
- if artifact_to_fetch is None:
- # as soon as we do not see an artifact, we consider we need
- # to check the other revision
- rev_found = False
- if k_artifact['sha256'] != artifact_to_fetch['sha256']:
- # Hash is different, we consider we need to check the other
- # revisions
- rev_found = False
- if rev_found:
- logger.debug('Existing revision %s found for new artifacts.',
- rev_id)
- return rev_id
- logger.debug('No existing revision found for the new artifacts.')
- return None
+ return resolve_revision_from(
+ known_package_artifacts, artifact_metadata)
def download_package(self, p_info: Mapping[str, Any],
tmpdir: str) -> List[Tuple[str, Mapping]]:
@@ -183,6 +161,42 @@
}
+def resolve_revision_from(known_package_artifacts: Mapping,
+ artifact_metadata: Mapping) -> Optional[bytes]:
+ """Given known package artifacts (resolved from the snapshot of previous
+ visit) and the new artifact to fetch, try to solve the corresponding
+ revision.
+
+ """
+ artifacts_to_fetch = artifact_metadata.get('files')
+ if not artifacts_to_fetch:
+ return None
+ logger.debug('k_p_artifacts: %s', known_package_artifacts)
+ logger.debug('artifacts_to_fetch: %s', artifacts_to_fetch)
+ for rev_id, known_artifacts in known_package_artifacts.items():
+ logger.debug('Revision: %s', rev_id)
+ logger.debug('Associated known_artifacts: %s', known_artifacts)
+ known_artifacts = known_artifacts['extrinsic']['raw']['files']
+ rev_found = True
+ for a_name, k_artifact in known_artifacts.items():
+ artifact_to_fetch = artifacts_to_fetch.get(a_name)
+ logger.debug('artifact_to_fetch: %s', artifact_to_fetch)
+ if artifact_to_fetch is None:
+ # as soon as we do not see an artifact, we consider we need
+ # to check the other revision
+ rev_found = False
+ if k_artifact['sha256'] != artifact_to_fetch['sha256']:
+ # Hash is different, we consider we need to check the other
+ # revisions
+ rev_found = False
+ if rev_found:
+ logger.debug('Existing revision %s found for new artifacts.',
+ rev_id)
+ return rev_id
+ logger.debug('No existing revision found for the new artifacts.')
+ return None
+
+
def uid_to_person(uid: str) -> Mapping[str, str]:
"""Convert an uid to a person suitable for insertion.
diff --git a/swh/loader/package/debian/tests/test_debian.py b/swh/loader/package/debian/tests/test_debian.py
--- a/swh/loader/package/debian/tests/test_debian.py
+++ b/swh/loader/package/debian/tests/test_debian.py
@@ -14,6 +14,7 @@
prepare_person, get_package_metadata, extract_package
)
from swh.loader.package.tests.common import check_snapshot, get_stats
+from swh.loader.package.debian.loader import resolve_revision_from
logger = logging.getLogger(__name__)
@@ -386,3 +387,68 @@
}
check_snapshot(expected_snapshot, loader.storage)
+
+
+def test_resolve_revision_from_edge_cases():
+ """Solving revision with empty data will result in unknown revision
+
+ """
+ for package_artifacts in [{}, PACKAGE_FILES]:
+ actual_revision = resolve_revision_from(
+ package_artifacts, {})
+ assert actual_revision is None
+
+ for known_artifacts in [{}, PACKAGE_FILES]:
+ actual_revision = resolve_revision_from(
+ {}, known_artifacts)
+ assert actual_revision is None
+
+ expected_revision_id = b"(\x07\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xfe\x85\x85O\xfe\xcf\x07" # noqa
+
+
+def test_resolve_revision_from_edge_cases_hit_and_miss():
+ """Solving revision with inconsistent data will result in unknown revision
+
+ """
+ artifact_metadata = PACKAGE_FILES2
+ expected_revision_id = b"(\x07\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xfe\x85\x85O\xfe\xcf\x07" # noqa
+ known_package_artifacts = {
+ expected_revision_id: {
+ 'extrinsic': {
+ 'provider': 'http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2-3.dsc', # noqa
+ 'raw': PACKAGE_FILES,
+ 'when': '2019-12-19T17:04:32.161965+00:00'
+ },
+ # ... removed the unnecessary intermediary data
+ }
+ }
+
+ actual_revision = resolve_revision_from(
+ known_package_artifacts, artifact_metadata
+ )
+
+ assert actual_revision is None
+
+
+def test_resolve_revision_from():
+ """Solving revision with consistent data will solve the revision
+
+ """
+ artifact_metadata = PACKAGE_FILES
+ expected_revision_id = b"(\x07\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xfe\x85\x85O\xfe\xcf\x07" # noqa
+ known_package_artifacts = {
+ expected_revision_id: {
+ 'extrinsic': {
+ 'provider': 'http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2-3.dsc', # noqa
+ 'raw': PACKAGE_FILES,
+ 'when': '2019-12-19T17:04:32.161965+00:00'
+ },
+ # ... removed the unnecessary intermediary data
+ }
+ }
+
+ actual_revision = resolve_revision_from(
+ known_package_artifacts, artifact_metadata
+ )
+
+ assert actual_revision == expected_revision_id

File Metadata

Mime Type
text/plain
Expires
Tue, Dec 17, 1:58 PM (2 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223176

Event Timeline