diff --git a/swh/loader/package/debian/loader.py b/swh/loader/package/debian/loader.py --- a/swh/loader/package/debian/loader.py +++ b/swh/loader/package/debian/loader.py @@ -259,7 +259,8 @@ extrinsic_hashes = {'sha256': fileinfo['sha256']} logger.debug('extrinsic_hashes(%s): %s', filename, extrinsic_hashes) filepath, hashes = download(uri, dest=tmpdir, filename=filename, - hashes=extrinsic_hashes) + hashes=extrinsic_hashes, + on_length_error_raise=False) all_hashes[filename] = hashes logger.debug('all_hashes: %s', all_hashes) diff --git a/swh/loader/package/utils.py b/swh/loader/package/utils.py --- a/swh/loader/package/utils.py +++ b/swh/loader/package/utils.py @@ -40,7 +40,8 @@ def download(url: str, dest: str, hashes: Dict = {}, filename: Optional[str] = None, - auth: Optional[Tuple[str, str]] = None) -> Tuple[str, Dict]: + auth: Optional[Tuple[str, str]] = None, + on_length_error_raise: bool = True) -> Tuple[str, Dict]: """Download a remote tarball from url, uncompresses and computes swh hashes on it. @@ -51,6 +52,10 @@ to download (those hashes are expected to be hex string) auth: Optional tuple of login/password (for http authentication service, e.g. deposit) + on_length_error_raise: If true, when length described and actual + downloaded length diverge, raise (default behavior). Otherwise, this + will try to continue download (can only be true if hashes to check are + provided) Raises: ValueError in case of any error when fetching/computing (length, @@ -60,6 +65,9 @@ Tuple of local (filepath, hashes of filepath) """ + # accept to bypass error on length check only when hashes are provided + assert on_length_error_raise or (not on_length_error_raise and hashes) + params = copy.deepcopy(DEFAULT_PARAMS) if auth is not None: params['auth'] = auth @@ -85,8 +93,12 @@ actual_length = os.path.getsize(filepath) if length != actual_length: - raise ValueError('Error when checking size: %s != %s' % ( - length, actual_length)) + msg = f'Error when checking size: {length} != {actual_length}' + if on_length_error_raise: + raise ValueError(msg) + else: + logger.warning('%s. Continue assuming erroneous data upstream', + msg) # Also check the expected hashes if provided if hashes: