diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py --- a/swh/model/hashutil.py +++ b/swh/model/hashutil.py @@ -58,7 +58,7 @@ import os from typing import Callable, Dict, Optional -ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256", "blake2b512"]) +ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256", "blake2b512", "md5"]) """Hashing algorithms supported by this module""" DEFAULT_ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256"]) diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py --- a/swh/model/tests/test_hashutil.py +++ b/swh/model/tests/test_hashutil.py @@ -13,7 +13,7 @@ import pytest from swh.model import hashutil -from swh.model.hashutil import MultiHash +from swh.model.hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_bytehex @contextlib.contextmanager @@ -112,6 +112,36 @@ assert checksums == hash_test_data.bytehex_checksums +def test_multi_hash_file_with_md5(hash_test_data): + fobj = io.BytesIO(hash_test_data.data) + + checksums = MultiHash.from_file( + fobj, hash_names=DEFAULT_ALGORITHMS | {"md5"}, length=len(hash_test_data.data) + ).digest() + md5sum = {"md5": hashlib.md5(hash_test_data.data).digest()} + assert checksums == {**hash_test_data.checksums, **md5sum} + + +def test_multi_hash_file_hexdigest_with_md5(hash_test_data): + fobj = io.BytesIO(hash_test_data.data) + length = len(hash_test_data.data) + checksums = MultiHash.from_file( + fobj, hash_names=DEFAULT_ALGORITHMS | {"md5"}, length=length + ).hexdigest() + md5sum = {"md5": hashlib.md5(hash_test_data.data).hexdigest()} + assert checksums == {**hash_test_data.hex_checksums, **md5sum} + + +def test_multi_hash_file_bytehexdigest_with_md5(hash_test_data): + fobj = io.BytesIO(hash_test_data.data) + length = len(hash_test_data.data) + checksums = MultiHash.from_file( + fobj, hash_names=DEFAULT_ALGORITHMS | {"md5"}, length=length + ).bytehexdigest() + md5sum = {"md5": hash_to_bytehex(hashlib.md5(hash_test_data.data).digest())} + assert checksums == {**hash_test_data.bytehex_checksums, **md5sum} + + def test_multi_hash_file_missing_length(hash_test_data): fobj = io.BytesIO(hash_test_data.data) with pytest.raises(ValueError, match="Missing length"): @@ -177,7 +207,7 @@ expected_message = ( "Unexpected hashing algorithm blake2:10, " "expected one of blake2b512, blake2s256, " - "sha1, sha1_git, sha256" + "md5, sha1, sha1_git, sha256" ) with pytest.raises(ValueError, match=expected_message): hashutil._new_hash("blake2:10")