Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/git/converters.py
Show All 30 Lines | def dulwich_blob_to_content_id(blob): | ||||
data = blob.as_raw_string() | data = blob.as_raw_string() | ||||
hashes = MultiHash.from_data(data, HASH_ALGORITHMS).digest() | hashes = MultiHash.from_data(data, HASH_ALGORITHMS).digest() | ||||
hashes['sha1_git'] = blob.sha().digest() | hashes['sha1_git'] = blob.sha().digest() | ||||
hashes['length'] = size | hashes['length'] = size | ||||
return hashes | return hashes | ||||
def dulwich_blob_to_content(blob, log=None, max_content_size=None, | def dulwich_blob_to_content(blob, log=None, max_content_size=None, | ||||
origin_id=None): | origin_url=None): | ||||
"""Convert a dulwich blob to a Software Heritage content""" | """Convert a dulwich blob to a Software Heritage content""" | ||||
if blob.type_name != b'blob': | if blob.type_name != b'blob': | ||||
return | return | ||||
ret = dulwich_blob_to_content_id(blob) | ret = dulwich_blob_to_content_id(blob) | ||||
size = ret['length'] | size = ret['length'] | ||||
if max_content_size: | if max_content_size: | ||||
if size > max_content_size: | if size > max_content_size: | ||||
id = hash_to_hex(ret['sha1_git']) | id = hash_to_hex(ret['sha1_git']) | ||||
if log: | if log: | ||||
log.info('Skipping content %s, too large (%s > %s)' % | log.info('Skipping content %s, too large (%s > %s)' % | ||||
(id, size, max_content_size), extra={ | (id, size, max_content_size), extra={ | ||||
'swh_type': 'loader_git_content_skip', | 'swh_type': 'loader_git_content_skip', | ||||
'swh_id': id, | 'swh_id': id, | ||||
'swh_size': size, | 'swh_size': size, | ||||
}) | }) | ||||
ret['status'] = 'absent' | ret['status'] = 'absent' | ||||
ret['reason'] = 'Content too large' | ret['reason'] = 'Content too large' | ||||
ret['origin'] = origin_id | ret['origin'] = origin_url | ||||
return ret | return ret | ||||
data = blob.as_raw_string() | data = blob.as_raw_string() | ||||
ret['data'] = data | ret['data'] = data | ||||
ret['status'] = 'visible' | ret['status'] = 'visible' | ||||
return ret | return ret | ||||
▲ Show 20 Lines • Show All 169 Lines • Show Last 20 Lines |