Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/utils.py
Show All 11 Lines | |||||
from swh.model.hashutil import MultiHash, HASH_BLOCK_SIZE | from swh.model.hashutil import MultiHash, HASH_BLOCK_SIZE | ||||
from swh.loader.package import DEFAULT_PARAMS | from swh.loader.package import DEFAULT_PARAMS | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
DOWNLOAD_HASHES = set(['sha1', 'sha256', 'length']) | |||||
def api_info(url: str) -> Dict: | def api_info(url: str) -> Dict: | ||||
"""Basic api client to retrieve information on project. This deals with | """Basic api client to retrieve information on project. This deals with | ||||
fetching json metadata about pypi projects. | fetching json metadata about pypi projects. | ||||
Args: | Args: | ||||
url (str): The api url (e.g PyPI, npm, etc...) | url (str): The api url (e.g PyPI, npm, etc...) | ||||
Raises: | Raises: | ||||
Show All 31 Lines | def download(url: str, dest: str, hashes: Dict = {}, | ||||
Returns: | Returns: | ||||
Tuple of local (filepath, hashes of filepath) | Tuple of local (filepath, hashes of filepath) | ||||
""" | """ | ||||
params = copy.deepcopy(DEFAULT_PARAMS) | params = copy.deepcopy(DEFAULT_PARAMS) | ||||
if auth is not None: | if auth is not None: | ||||
params['auth'] = auth | params['auth'] = auth | ||||
response = requests.get(url, **params, stream=True) | response = requests.get(url, **params, stream=True) | ||||
logger.debug('headers: %s', response.headers) | headers = response.headers | ||||
logger.debug('headers: %s', headers) | |||||
if response.status_code != 200: | if response.status_code != 200: | ||||
raise ValueError("Fail to query '%s'. Reason: %s" % ( | raise ValueError("Fail to query '%s'. Reason: %s" % ( | ||||
url, response.status_code)) | url, response.status_code)) | ||||
_length = response.headers.get('content-length') | |||||
# some server do not provide the content-length header... | |||||
length = int(_length) if _length is not None else len(response.content) | |||||
filename = filename if filename else os.path.basename(url) | filename = filename if filename else os.path.basename(url) | ||||
logger.debug('filename: %s', filename) | logger.debug('filename: %s', filename) | ||||
filepath = os.path.join(dest, filename) | filepath = os.path.join(dest, filename) | ||||
logger.debug('filepath: %s', filepath) | logger.debug('filepath: %s', filepath) | ||||
h = MultiHash(length=length) | h = MultiHash(hash_names=DOWNLOAD_HASHES) | ||||
with open(filepath, 'wb') as f: | with open(filepath, 'wb') as f: | ||||
for chunk in response.iter_content(chunk_size=HASH_BLOCK_SIZE): | for chunk in response.iter_content(chunk_size=HASH_BLOCK_SIZE): | ||||
h.update(chunk) | h.update(chunk) | ||||
f.write(chunk) | f.write(chunk) | ||||
actual_length = os.path.getsize(filepath) | |||||
if length != actual_length: | |||||
raise ValueError('Error when checking size: %s != %s' % ( | |||||
length, actual_length)) | |||||
# Also check the expected hashes if provided | # Also check the expected hashes if provided | ||||
if hashes: | if hashes: | ||||
actual_hashes = h.hexdigest() | actual_hashes = h.hexdigest() | ||||
for algo_hash in hashes.keys(): | for algo_hash in hashes.keys(): | ||||
actual_digest = actual_hashes[algo_hash] | actual_digest = actual_hashes[algo_hash] | ||||
expected_digest = hashes[algo_hash] | expected_digest = hashes[algo_hash] | ||||
if actual_digest != expected_digest: | if actual_digest != expected_digest: | ||||
raise ValueError( | raise ValueError( | ||||
'Failure when fetching %s. ' | 'Failure when fetching %s. ' | ||||
'Checksum mismatched: %s != %s' % ( | 'Checksum mismatched: %s != %s' % ( | ||||
url, expected_digest, actual_digest)) | url, expected_digest, actual_digest)) | ||||
computed_hashes = h.hexdigest() | |||||
length = computed_hashes.pop('length') | |||||
extrinsic_metadata = { | extrinsic_metadata = { | ||||
'length': length, | 'length': length, | ||||
'filename': filename, | 'filename': filename, | ||||
'checksums': { | 'checksums': { | ||||
**h.hexdigest() | **computed_hashes | ||||
}, | }, | ||||
olasd: `'checksums': computed_hashes,`? | |||||
} | } | ||||
logger.debug('extrinsic_metadata', extrinsic_metadata) | logger.debug('extrinsic_metadata', extrinsic_metadata) | ||||
return filepath, extrinsic_metadata | return filepath, extrinsic_metadata | ||||
def release_name(version: str, filename: Optional[str] = None) -> str: | def release_name(version: str, filename: Optional[str] = None) -> str: | ||||
if filename: | if filename: | ||||
return 'releases/%s/%s' % (version, filename) | return 'releases/%s/%s' % (version, filename) | ||||
return 'releases/%s' % version | return 'releases/%s' % version |
'checksums': computed_hashes,?