diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,4 +1,4 @@ swh.core >= 0.0.75 swh.model >= 0.0.60 swh.scheduler -swh.storage >= 0.0.178 +swh.storage >= 0.0.183 diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py --- a/swh/loader/package/loader.py +++ b/swh/loader/package/loader.py @@ -27,6 +27,7 @@ Origin ) from swh.storage import get_storage +from swh.storage.exc import HashCollision from swh.storage.algos.snapshot import snapshot_get_all_branches from swh.loader.package.utils import download @@ -310,7 +311,7 @@ status_load = 'failed' return finalize_visit() - load_exceptions = [] + load_exceptions: List[Exception] = [] for version in self.get_versions(): # for each logger.debug('version: %s', version) @@ -324,6 +325,13 @@ try: revision_id = self._load_revision(p_info, origin) status_load = 'eventful' + except HashCollision as e: + self.storage.clear_buffers() + load_exceptions.append(e) + sentry_sdk.capture_exception(e) + logger.exception('Failed loading branch %s for %s', + branch_name, self.url) + continue except Exception as e: load_exceptions.append(e) sentry_sdk.capture_exception(e) diff --git a/swh/loader/package/nixguix/tests/test_nixguix.py b/swh/loader/package/nixguix/tests/test_nixguix.py --- a/swh/loader/package/nixguix/tests/test_nixguix.py +++ b/swh/loader/package/nixguix/tests/test_nixguix.py @@ -4,15 +4,20 @@ # See top-level LICENSE file for more information import pytest + +from typing import Dict, Optional, Tuple + from json.decoder import JSONDecodeError from swh.loader.package.nixguix.loader import ( NixGuixLoader, retrieve_sources, clean_sources ) - from swh.loader.package.tests.common import ( get_stats, check_snapshot ) +from swh.loader.package.utils import download +from swh.storage.exc import HashCollision + sources_url = 'https://nix-community.github.io/nixpkgs-swh/sources.json' @@ -331,3 +336,58 @@ } check_snapshot(expected_snapshot, storage=loader.storage) + + +def fake_download(url: str, dest: str, hashes: Dict = {}, + filename: Optional[str] = None, + auth: Optional[Tuple[str, str]] = None) -> Tuple[str, Dict]: + """Fake download which raises HashCollision + + For tests only. + + """ + if url == 'https://example.com/file.txt': + # instead of failing because it's a file not dealt with by the nix guix + # loader, make it raise a hash collision + raise HashCollision( + 'sha1', 'f92d74e3874587aaf443d1db961d4e26dde13e9c', []) + return download(url, dest, hashes, filename, auth) + + +def test_raise_hash_collision(swh_config, requests_mock_datadir, mocker): + mock_download = mocker.patch('swh.loader.package.loader.download') + mock_download.side_effect = fake_download + + loader = NixGuixLoader(sources_url) + res = loader.load() + + expected_snapshot_id = '0c5881c74283793ebe9a09a105a9381e41380383' + assert res == { + 'status': 'eventful', + 'snapshot_id': expected_snapshot_id, + } + + expected_branches = { + 'https://github.com/owner-1/repository-1/revision-1.tgz': { + 'target': '488ad4e7b8e2511258725063cf43a2b897c503b4', + 'target_type': 'revision', + }, + 'evaluation': { + 'target': 'cc4e04c26672dd74e5fd0fecb78b435fb55368f7', + 'target_type': 'revision', + }, + } + expected_snapshot = { + 'id': expected_snapshot_id, + 'branches': expected_branches, + } + + check_snapshot(expected_snapshot, storage=loader.storage) + + assert len(mock_download.mock_calls) == 2 + + origin_visit = loader.storage.origin_visit_get_latest(sources_url) + + # The visit is partial because some hash collision were detected + assert origin_visit['status'] == 'partial' + assert origin_visit['type'] == 'nixguix'