diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -17,9 +17,21 @@ extra_requirements = [] - -pyblake2_hashes = {'blake2s256', 'blake2b512'} -if pyblake2_hashes - set(hashlib.algorithms_available): +pyblake2_hash_sets = [ + # Built-in implementation in Python 3.6+ + {'blake2s', 'blake2b'}, + # Potentially shipped by OpenSSL 1.1 (e.g. Python 3.5 in Debian stretch + # has these) + {'blake2s256', 'blake2b512'}, +] + +for pyblake2_hashes in pyblake2_hash_sets: + if not pyblake2_hashes - set(hashlib.algorithms_available): + # The required blake2 hashes have been found + break +else: + # None of the possible sets of blake2 hashes are available. + # use pyblake2 instead extra_requirements.append('pyblake2') setup( diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py --- a/swh/model/hashutil.py +++ b/swh/model/hashutil.py @@ -43,15 +43,61 @@ HASH_BLOCK_SIZE = 32768 """Block size for streaming hash computations made in this module""" -# Load blake2 hashes from pyblake2 if they are not available in the builtin -# hashlib -__pyblake2_hashes = {'blake2s256': 'blake2s', - 'blake2b512': 'blake2b'} -__cache = hashlib.__builtin_constructor_cache -for __hash, __pyblake2_fn in __pyblake2_hashes.items(): - if __hash not in hashlib.algorithms_available: - import pyblake2 - __cache[__hash] = getattr(pyblake2, __pyblake2_fn) +_blake2_hash_cache = {} + + +def _new_blake2_hash(algo): + """Return a function that initializes a blake2 hash. + + """ + if algo in _blake2_hash_cache: + return _blake2_hash_cache[algo]() + + lalgo = algo.lower() + if not lalgo.startswith('blake2'): + raise ValueError('Algorithm %s is not a blake2 hash' % algo) + + blake_family = lalgo[:7] + + digest_size = None + if lalgo[7:]: + try: + digest_size, remainder = divmod(int(lalgo[7:]), 8) + except ValueError: + raise ValueError( + 'Unknown digest size for algo %s' % algo + ) from None + if remainder: + raise ValueError( + 'Digest size for algorithm %s must be a multiple of 8' % algo + ) + + if lalgo in hashlib.algorithms_available: + # Handle the case where OpenSSL ships the given algorithm + # (e.g. Python 3.5 on Debian 9 stretch) + _blake2_hash_cache[algo] = lambda: hashlib.new(lalgo) + else: + # Try using the built-in implementation for Python 3.6+ + if blake_family in hashlib.algorithms_available: + blake2 = getattr(hashlib, blake_family) + else: + import pyblake2 + blake2 = getattr(pyblake2, blake_family) + + _blake2_hash_cache[algo] = lambda: blake2(digest_size=digest_size) + + return _blake2_hash_cache[algo]() + + +def _new_hashlib_hash(algo): + """Initialize a digest object from hashlib. + + Handle the swh-specific names for the blake2-related algorithms + """ + if algo.startswith('blake2'): + return _new_blake2_hash(algo) + else: + return hashlib.new(algo) def _new_git_hash(base_algo, git_type, length): @@ -75,7 +121,7 @@ a hashutil.hash object """ - h = hashlib.new(base_algo) + h = _new_hashlib_hash(base_algo) git_header = '%s %d\0' % (git_type, length) h.update(git_header.encode('ascii')) @@ -113,7 +159,7 @@ base_algo = algo[:-4] return _new_git_hash(base_algo, 'blob', length) - return hashlib.new(algo) + return _new_hashlib_hash(algo) def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None): diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py --- a/swh/model/tests/test_hashutil.py +++ b/swh/model/tests/test_hashutil.py @@ -3,6 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import hashlib import io import os import tempfile @@ -16,6 +17,9 @@ class Hashutil(unittest.TestCase): def setUp(self): + # Reset function cache + hashutil._blake2_hash_cache = {} + self.data = b'1984\n' self.hex_checksums = { 'sha1': '62be35bf00ff0c624f4a621e2ea5595a049e0731', @@ -150,25 +154,103 @@ 'expected one of blake2b512, blake2s256, ' 'sha1, sha1_git, sha256') - @patch('swh.model.hashutil.hashlib') + @patch('hashlib.new') @istest - def new_hash_blake2b(self, mock_hashlib): - mock_hashlib.new.return_value = 'some-hashlib-object' + def new_hash_blake2b_blake2b512_builtin(self, mock_hashlib_new): + if 'blake2b512' not in hashlib.algorithms_available: + self.skipTest('blake2b512 not built-in') + mock_hashlib_new.return_value = sentinel = object() h = hashutil._new_hash('blake2b512') - self.assertEquals(h, 'some-hashlib-object') - mock_hashlib.new.assert_called_with('blake2b512') + self.assertIs(h, sentinel) + mock_hashlib_new.assert_called_with('blake2b512') - @patch('swh.model.hashutil.hashlib') + @patch('hashlib.new') @istest - def new_hash_blake2s(self, mock_hashlib): - mock_hashlib.new.return_value = 'some-hashlib-object' + def new_hash_blake2s_blake2s256_builtin(self, mock_hashlib_new): + if 'blake2s256' not in hashlib.algorithms_available: + self.skipTest('blake2s256 not built-in') + mock_hashlib_new.return_value = sentinel = object() h = hashutil._new_hash('blake2s256') - self.assertEquals(h, 'some-hashlib-object') - mock_hashlib.new.assert_called_with('blake2s256') + self.assertIs(h, sentinel) + mock_hashlib_new.assert_called_with('blake2s256') + + @istest + def new_hash_blake2b_builtin(self): + removed_hash = False + + try: + if 'blake2b512' in hashlib.algorithms_available: + removed_hash = True + hashlib.algorithms_available.remove('blake2b512') + if 'blake2b' not in hashlib.algorithms_available: + self.skipTest('blake2b not built in') + + with patch('hashlib.blake2b') as mock_blake2b: + mock_blake2b.return_value = sentinel = object() + + h = hashutil._new_hash('blake2b512') + + self.assertIs(h, sentinel) + mock_blake2b.assert_called_with(digest_size=512//8) + finally: + if removed_hash: + hashlib.algorithms_available.add('blake2b512') + + @istest + def new_hash_blake2s_builtin(self): + removed_hash = False + + try: + if 'blake2s256' in hashlib.algorithms_available: + removed_hash = True + hashlib.algorithms_available.remove('blake2s256') + if 'blake2s' not in hashlib.algorithms_available: + self.skipTest('blake2s not built in') + + with patch('hashlib.blake2s') as mock_blake2s: + mock_blake2s.return_value = sentinel = object() + + h = hashutil._new_hash('blake2s256') + + self.assertIs(h, sentinel) + mock_blake2s.assert_called_with(digest_size=256//8) + finally: + if removed_hash: + hashlib.algorithms_available.add('blake2s256') + + @istest + def new_hash_blake2b_pyblake2(self): + if 'blake2b512' in hashlib.algorithms_available: + self.skipTest('blake2b512 built in') + if 'blake2b' in hashlib.algorithms_available: + self.skipTest('blake2b built in') + + with patch('pyblake2.blake2b') as mock_blake2b: + mock_blake2b.return_value = sentinel = object() + + h = hashutil._new_hash('blake2b512') + + self.assertIs(h, sentinel) + mock_blake2b.assert_called_with(digest_size=512//8) + + @istest + def new_hash_blake2s_pyblake2(self): + if 'blake2s256' in hashlib.algorithms_available: + self.skipTest('blake2s256 built in') + if 'blake2s' in hashlib.algorithms_available: + self.skipTest('blake2s built in') + + with patch('pyblake2.blake2s') as mock_blake2s: + mock_blake2s.return_value = sentinel = object() + + h = hashutil._new_hash('blake2s256') + + self.assertIs(h, sentinel) + mock_blake2s.assert_called_with(digest_size=256//8) class HashlibGit(unittest.TestCase):