diff --git a/swh/loader/package/debian/tests/test_debian.py b/swh/loader/package/debian/tests/test_debian.py --- a/swh/loader/package/debian/tests/test_debian.py +++ b/swh/loader/package/debian/tests/test_debian.py @@ -237,27 +237,21 @@ assert all_hashes == { 'cicero_0.7.2-3.diff.gz': { 'checksums': { - 'blake2s256': '08b1c438e70d2474bab843d826515147fa4a817f8c4baaf3ddfbeb5132183f21', # noqa 'sha1': '0815282053f21601b0ec4adf7a8fe47eace3c0bc', - 'sha1_git': '834ac91da3a9da8f23f47004bb456dd5bd16fe49', 'sha256': 'f039c9642fe15c75bed5254315e2a29f9f2700da0e29d9b0729b3ffc46c8971c' # noqa }, 'filename': 'cicero_0.7.2-3.diff.gz', 'length': 3964}, 'cicero_0.7.2-3.dsc': { 'checksums': { - 'blake2s256': '8c002bead3e35818eaa9d00826f3d141345707c58fb073beaa8abecf4bde45d2', # noqa 'sha1': 'abbec4e8efbbc80278236e1dd136831eac08accd', - 'sha1_git': '1f94b2086fa1142c2df6b94092f5c5fa11093a8e', 'sha256': '35b7f1048010c67adfd8d70e4961aefd8800eb9a83a4d1cc68088da0009d9a03' # noqa }, 'filename': 'cicero_0.7.2-3.dsc', 'length': 1864}, 'cicero_0.7.2.orig.tar.gz': { 'checksums': { - 'blake2s256': '9809aa8d2e2dad7f34cef72883db42b0456ab7c8f1418a636eebd30ab71a15a6', # noqa 'sha1': 'a286efd63fe2c9c9f7bb30255c3d6fcdcf390b43', - 'sha1_git': 'aa0a38978dce86d531b5b0299b4a616b95c64c74', 'sha256': '63f40f2436ea9f67b44e2d4bd669dbabe90e2635a204526c20e0b3c8ee957786' # noqa }, 'filename': 'cicero_0.7.2.orig.tar.gz', diff --git a/swh/loader/package/tests/test_utils.py b/swh/loader/package/tests/test_utils.py --- a/swh/loader/package/tests/test_utils.py +++ b/swh/loader/package/tests/test_utils.py @@ -30,27 +30,6 @@ url, status_code) -@pytest.mark.fs -def test_download_fail_length_mismatch(tmp_path, requests_mock): - """Mismatch length after download should raise - - """ - filename = 'requests-0.0.1.tar.gz' - url = 'https://pypi.org/pypi/requests/%s' % filename - data = 'this is something' - wrong_size = len(data) - 3 - requests_mock.get(url, text=data, headers={ - 'content-length': str(wrong_size) # wrong size! - }) - - with pytest.raises(ValueError) as e: - download(url, dest=str(tmp_path)) - - assert e.value.args[0] == "Error when checking size: %s != %s" % ( - wrong_size, len(data) - ) - - @pytest.mark.fs def test_download_ok(tmp_path, requests_mock): """Download without issue should provide filename and hashes""" @@ -72,23 +51,21 @@ @pytest.mark.fs -def test_download_headers(tmp_path, requests_mock): - """Check that we send proper headers when downloading files""" +def test_download_ok_no_header(tmp_path, requests_mock): + """Download without issue should provide filename and hashes""" filename = 'requests-0.0.1.tar.gz' url = 'https://pypi.org/pypi/requests/%s' % filename data = 'this is something' - requests_mock.get(url, text=data, headers={ - 'content-length': str(len(data)) - }) + requests_mock.get(url, text=data) # no header information actual_filepath, actual_hashes = download(url, dest=str(tmp_path)) - assert len(requests_mock.request_history) == 1 - req = requests_mock.request_history[0] - assert 'User-Agent' in req.headers - user_agent = req.headers['User-Agent'] - assert 'Software Heritage Loader' in user_agent - assert swh.loader.package.__version__ in user_agent + actual_filename = os.path.basename(actual_filepath) + assert actual_filename == filename + assert actual_hashes['length'] == len(data) + assert actual_hashes['checksums']['sha1'] == 'fdd1ce606a904b08c816ba84f3125f2af44d92b2' # noqa + assert (actual_hashes['checksums']['sha256'] == + '1d9224378d77925d612c9f926eb9fb92850e6551def8328011b6a972323298d5') @pytest.mark.fs diff --git a/swh/loader/package/utils.py b/swh/loader/package/utils.py --- a/swh/loader/package/utils.py +++ b/swh/loader/package/utils.py @@ -17,6 +17,9 @@ logger = logging.getLogger(__name__) +DOWNLOAD_HASHES = set(['sha1', 'sha256', 'length']) + + def api_info(url: str) -> Dict: """Basic api client to retrieve information on project. This deals with fetching json metadata about pypi projects. @@ -64,30 +67,21 @@ if auth is not None: params['auth'] = auth response = requests.get(url, **params, stream=True) - logger.debug('headers: %s', response.headers) if response.status_code != 200: raise ValueError("Fail to query '%s'. Reason: %s" % ( url, response.status_code)) - _length = response.headers.get('content-length') - # some server do not provide the content-length header... - length = int(_length) if _length is not None else len(response.content) filename = filename if filename else os.path.basename(url) logger.debug('filename: %s', filename) filepath = os.path.join(dest, filename) logger.debug('filepath: %s', filepath) - h = MultiHash(length=length) + h = MultiHash(hash_names=DOWNLOAD_HASHES) with open(filepath, 'wb') as f: for chunk in response.iter_content(chunk_size=HASH_BLOCK_SIZE): h.update(chunk) f.write(chunk) - actual_length = os.path.getsize(filepath) - if length != actual_length: - raise ValueError('Error when checking size: %s != %s' % ( - length, actual_length)) - # Also check the expected hashes if provided if hashes: actual_hashes = h.hexdigest() @@ -100,12 +94,12 @@ 'Checksum mismatched: %s != %s' % ( url, expected_digest, actual_digest)) + computed_hashes = h.hexdigest() + length = computed_hashes.pop('length') extrinsic_metadata = { 'length': length, 'filename': filename, - 'checksums': { - **h.hexdigest() - }, + 'checksums': computed_hashes, } logger.debug('extrinsic_metadata', extrinsic_metadata)