diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py --- a/swh/model/hashutil.py +++ b/swh/model/hashutil.py @@ -58,7 +58,18 @@ import os from typing import Callable, Dict, Optional, Union -ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256", "blake2b512", "md5"]) +ALGORITHMS = set( + [ + "sha1", + "sha256", + "sha1_git", + "blake2s256", + "blake2b512", + "sha384", + "sha512", + "md5", + ] +) """Hashing algorithms supported by this module""" DEFAULT_ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256"]) diff --git a/swh/model/model.py b/swh/model/model.py --- a/swh/model/model.py +++ b/swh/model/model.py @@ -32,7 +32,13 @@ from . import git_objects from .collections import ImmutableDict -from .hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_bytehex, hash_to_hex +from .hashutil import ( + ALGORITHMS, + DEFAULT_ALGORITHMS, + MultiHash, + hash_to_bytehex, + hash_to_hex, +) from .swhids import CoreSWHID from .swhids import ExtendedObjectType as SwhidExtendedObjectType from .swhids import ExtendedSWHID @@ -1291,9 +1297,9 @@ ) @staticmethod - def _hash_data(data: bytes): + def _hash_data(data: bytes, hash_names=DEFAULT_ALGORITHMS): """Hash some data, returning most of the fields of a content object""" - d = MultiHash.from_data(data).digest() + d = MultiHash.from_data(data, hash_names).digest() d["data"] = data d["length"] = len(data) @@ -1311,9 +1317,13 @@ return super().from_dict(d) def get_hash(self, hash_name): - if hash_name not in DEFAULT_ALGORITHMS: + if hash_name not in ALGORITHMS: raise ValueError("{} is not a valid hash name.".format(hash_name)) - return getattr(self, hash_name) + value = getattr(self, hash_name) + if value is None: + raise ValueError("No result for hash {}".format(hash_name)) + else: + return value def hashes(self) -> Dict[str, bytes]: """Returns a dictionary {hash_name: hash_value}""" @@ -1371,13 +1381,15 @@ return content @classmethod - def from_data(cls, data, status="visible", ctime=None) -> "Content": + def from_data( + cls, data, status="visible", ctime=None, hash_names=DEFAULT_ALGORITHMS + ) -> "Content": """Generate a Content from a given `data` byte string. This populates the Content with the hashes and length for the data passed as argument, as well as the data itself. """ - d = cls._hash_data(data) + d = cls._hash_data(data, hash_names) d["status"] = status d["ctime"] = ctime return cls(**d)