Changeset View
Changeset View
Standalone View
Standalone View
swh/model/hashutil.py
Show First 20 Lines • Show All 50 Lines • ▼ Show 20 Lines | |||||
""" | """ | ||||
import binascii | import binascii | ||||
import functools | import functools | ||||
import hashlib | import hashlib | ||||
from io import BytesIO | from io import BytesIO | ||||
import os | import os | ||||
from typing import Callable, Dict | from typing import Callable, Dict, Optional | ||||
ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256", "blake2b512"]) | ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256", "blake2b512"]) | ||||
"""Hashing algorithms supported by this module""" | """Hashing algorithms supported by this module""" | ||||
DEFAULT_ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256"]) | DEFAULT_ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256"]) | ||||
"""Algorithms computed by default when calling the functions from this module. | """Algorithms computed by default when calling the functions from this module. | ||||
Subset of :const:`ALGORITHMS`. | Subset of :const:`ALGORITHMS`. | ||||
▲ Show 20 Lines • Show All 139 Lines • ▼ Show 20 Lines | def _new_hashlib_hash(algo): | ||||
Handle the swh-specific names for the blake2-related algorithms | Handle the swh-specific names for the blake2-related algorithms | ||||
""" | """ | ||||
if algo.startswith("blake2"): | if algo.startswith("blake2"): | ||||
return _new_blake2_hash(algo) | return _new_blake2_hash(algo) | ||||
else: | else: | ||||
return hashlib.new(algo) | return hashlib.new(algo) | ||||
def _new_git_hash(base_algo, git_type, length): | def _git_header(git_type: str, length: int) -> bytes: | ||||
"""Initialize a digest object (as returned by python's hashlib) for the | """Returns the header for a git object of the given type and length. | ||||
requested algorithm, and feed it with the header for a git object of the | |||||
given type and length. | |||||
The header for hashing a git object consists of: | The header of a git object consists of: | ||||
- The type of the object (encoded in ASCII) | - The type of the object (encoded in ASCII) | ||||
- One ASCII space (\x20) | - One ASCII space (\x20) | ||||
- The length of the object (decimal encoded in ASCII) | - The length of the object (decimal encoded in ASCII) | ||||
- One NUL byte | - One NUL byte | ||||
Args: | Args: | ||||
base_algo (str from :const:`ALGORITHMS`): a hashlib-supported algorithm | base_algo (str from :const:`ALGORITHMS`): a hashlib-supported algorithm | ||||
git_type: the type of the git object (supposedly one of 'blob', | git_type: the type of the git object (supposedly one of 'blob', | ||||
'commit', 'tag', 'tree') | 'commit', 'tag', 'tree') | ||||
length: the length of the git object you're encoding | length: the length of the git object you're encoding | ||||
Returns: | Returns: | ||||
a hashutil.hash object | a hashutil.hash object | ||||
""" | """ | ||||
git_object_types = { | |||||
"blob", | |||||
"tree", | |||||
"commit", | |||||
"tag", | |||||
"snapshot", | |||||
"raw_extrinsic_metadata", | |||||
"extid", | |||||
} | |||||
h = _new_hashlib_hash(base_algo) | if git_type not in git_object_types: | ||||
git_header = "%s %d\0" % (git_type, length) | raise ValueError( | ||||
h.update(git_header.encode("ascii")) | "Unexpected git object type %s, expected one of %s" | ||||
% (git_type, ", ".join(sorted(git_object_types))) | |||||
) | |||||
return h | return ("%s %d\0" % (git_type, length)).encode("ascii") | ||||
def _new_hash(algo, length=None): | def _new_hash(algo: str, length: Optional[int] = None): | ||||
"""Initialize a digest object (as returned by python's hashlib) for | """Initialize a digest object (as returned by python's hashlib) for | ||||
the requested algorithm. See the constant ALGORITHMS for the list | the requested algorithm. See the constant ALGORITHMS for the list | ||||
of supported algorithms. If a git-specific hashing algorithm is | of supported algorithms. If a git-specific hashing algorithm is | ||||
requested (e.g., "sha1_git"), the hashing object will be pre-fed | requested (e.g., "sha1_git"), the hashing object will be pre-fed | ||||
with the needed header; for this to work, length must be given. | with the needed header; for this to work, length must be given. | ||||
Args: | Args: | ||||
algo (str): a hashing algorithm (one of ALGORITHMS) | algo (str): a hashing algorithm (one of ALGORITHMS) | ||||
Show All 13 Lines | if algo not in ALGORITHMS: | ||||
"Unexpected hashing algorithm %s, expected one of %s" | "Unexpected hashing algorithm %s, expected one of %s" | ||||
% (algo, ", ".join(sorted(ALGORITHMS))) | % (algo, ", ".join(sorted(ALGORITHMS))) | ||||
) | ) | ||||
if algo.endswith("_git"): | if algo.endswith("_git"): | ||||
if length is None: | if length is None: | ||||
raise ValueError("Missing length for git hashing algorithm") | raise ValueError("Missing length for git hashing algorithm") | ||||
base_algo = algo[:-4] | base_algo = algo[:-4] | ||||
return _new_git_hash(base_algo, "blob", length) | h = _new_hashlib_hash(base_algo) | ||||
h.update(_git_header("blob", length)) | |||||
return h | |||||
return _new_hashlib_hash(algo) | return _new_hashlib_hash(algo) | ||||
def hash_git_data(data, git_type, base_algo="sha1"): | |||||
"""Hash the given data as a git object of type git_type. | |||||
Args: | |||||
data: a bytes object | |||||
git_type: the git object type | |||||
base_algo: the base hashing algorithm used (default: sha1) | |||||
Returns: a dict mapping each algorithm to a bytes digest | |||||
Raises: | |||||
ValueError if the git_type is unexpected. | |||||
""" | |||||
git_object_types = { | |||||
"blob", | |||||
"tree", | |||||
"commit", | |||||
"tag", | |||||
"snapshot", | |||||
"raw_extrinsic_metadata", | |||||
"extid", | |||||
} | |||||
if git_type not in git_object_types: | |||||
raise ValueError( | |||||
"Unexpected git object type %s, expected one of %s" | |||||
% (git_type, ", ".join(sorted(git_object_types))) | |||||
) | |||||
h = _new_git_hash(base_algo, git_type, len(data)) | |||||
h.update(data) | |||||
return h.digest() | |||||
@functools.lru_cache() | @functools.lru_cache() | ||||
def hash_to_hex(hash): | def hash_to_hex(hash): | ||||
"""Converts a hash (in hex or bytes form) to its hexadecimal ascii form | """Converts a hash (in hex or bytes form) to its hexadecimal ascii form | ||||
Args: | Args: | ||||
hash (str or bytes): a :class:`bytes` hash or a :class:`str` containing | hash (str or bytes): a :class:`bytes` hash or a :class:`str` containing | ||||
the hexadecimal form of the hash | the hexadecimal form of the hash | ||||
▲ Show 20 Lines • Show All 49 Lines • Show Last 20 Lines |