diff --git a/PKG-INFO b/PKG-INFO
index e14129d..92d127e 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,42 +1,42 @@
 Metadata-Version: 2.1
 Name: swh.model
-Version: 6.5.0
+Version: 6.5.1
 Summary: Software Heritage data model
 Home-page: https://forge.softwareheritage.org/diffusion/DMOD/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-model
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-model/
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: cli
 Provides-Extra: testing-minimal
 Provides-Extra: testing
 License-File: LICENSE
 License-File: AUTHORS
 
 swh-model
 =========
 
 Implementation of the Data model of the Software Heritage project, used to
 archive source code artifacts.
 
 This module defines the notion of SoftWare Heritage persistent IDentifiers
 (SWHIDs) and provides tools to compute them:
 
 ```sh
    $ swh-identify fork.c kmod.c sched/deadline.c
    swh:1:cnt:2e391c754ae730bd2d8520c2ab497c403220c6e3    fork.c
    swh:1:cnt:0277d1216f80ae1adeed84a686ed34c9b2931fc2    kmod.c
    swh:1:cnt:57b939c81bce5d06fa587df8915f05affbe22b82    sched/deadline.c
 
    $ swh-identify --no-filename /usr/src/linux/kernel/
    swh:1:dir:f9f858a48d663b3809c9e2f336412717496202ab
 ```
diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO
index e14129d..92d127e 100644
--- a/swh.model.egg-info/PKG-INFO
+++ b/swh.model.egg-info/PKG-INFO
@@ -1,42 +1,42 @@
 Metadata-Version: 2.1
 Name: swh.model
-Version: 6.5.0
+Version: 6.5.1
 Summary: Software Heritage data model
 Home-page: https://forge.softwareheritage.org/diffusion/DMOD/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-model
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-model/
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: cli
 Provides-Extra: testing-minimal
 Provides-Extra: testing
 License-File: LICENSE
 License-File: AUTHORS
 
 swh-model
 =========
 
 Implementation of the Data model of the Software Heritage project, used to
 archive source code artifacts.
 
 This module defines the notion of SoftWare Heritage persistent IDentifiers
 (SWHIDs) and provides tools to compute them:
 
 ```sh
    $ swh-identify fork.c kmod.c sched/deadline.c
    swh:1:cnt:2e391c754ae730bd2d8520c2ab497c403220c6e3    fork.c
    swh:1:cnt:0277d1216f80ae1adeed84a686ed34c9b2931fc2    kmod.c
    swh:1:cnt:57b939c81bce5d06fa587df8915f05affbe22b82    sched/deadline.c
 
    $ swh-identify --no-filename /usr/src/linux/kernel/
    swh:1:dir:f9f858a48d663b3809c9e2f336412717496202ab
 ```
diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py
index 86ecf12..8795b1f 100644
--- a/swh/model/from_disk.py
+++ b/swh/model/from_disk.py
@@ -1,588 +1,592 @@
 # Copyright (C) 2017-2020 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 """Conversion from filesystem tree to SWH objects.
 
 This module allows reading a tree of directories and files from a local
 filesystem, and convert them to in-memory data structures, which can then
 be exported to SWH data model objects, as defined in :mod:`swh.model.model`.
 """
 
 import datetime
 import enum
 import fnmatch
 import glob
 import os
 import re
 import stat
 from typing import Any, Iterable, Iterator, List, Optional, Pattern, Tuple
 
 import attr
 from attrs_strict import type_validator
 from typing_extensions import Final
 
 from . import model
 from .exceptions import InvalidDirectoryPath
 from .git_objects import directory_entry_sort_key
 from .hashutil import MultiHash, hash_to_hex
 from .merkle import MerkleLeaf, MerkleNode
 from .swhids import CoreSWHID, ObjectType
 
 
 @attr.s(frozen=True, slots=True)
 class DiskBackedContent(model.BaseContent):
     """Content-like class, which allows lazy-loading data from the disk."""
 
     object_type: Final = "content_file"
 
     sha1 = attr.ib(type=bytes, validator=type_validator())
     sha1_git = attr.ib(type=model.Sha1Git, validator=type_validator())
     sha256 = attr.ib(type=bytes, validator=type_validator())
     blake2s256 = attr.ib(type=bytes, validator=type_validator())
 
     length = attr.ib(type=int, validator=type_validator())
 
     status = attr.ib(
         type=str,
         validator=attr.validators.in_(["visible", "hidden"]),
         default="visible",
     )
 
     ctime = attr.ib(
         type=Optional[datetime.datetime],
         validator=type_validator(),
         default=None,
         eq=False,
     )
 
     path = attr.ib(type=Optional[bytes], default=None)
 
     @classmethod
     def from_dict(cls, d):
         return cls(**d)
 
     def __attrs_post_init__(self):
         if self.path is None:
             raise TypeError("path must not be None.")
 
     def with_data(self) -> model.Content:
         args = self.to_dict()
         del args["path"]
         assert self.path is not None
         with open(self.path, "rb") as fd:
             return model.Content.from_dict({**args, "data": fd.read()})
 
 
 class DentryPerms(enum.IntEnum):
     """Admissible permissions for directory entries."""
 
     content = 0o100644
     """Content"""
     executable_content = 0o100755
     """Executable content (e.g. executable script)"""
     symlink = 0o120000
     """Symbolic link"""
     directory = 0o040000
     """Directory"""
     revision = 0o160000
     """Revision (e.g. submodule)"""
 
 
 def mode_to_perms(mode):
     """Convert a file mode to a permission compatible with Software Heritage
     directory entries
 
     Args:
       mode (int): a file mode as returned by :func:`os.stat` in
                   :attr:`os.stat_result.st_mode`
 
     Returns:
       DentryPerms: one of the following values:
         :const:`DentryPerms.content`: plain file
         :const:`DentryPerms.executable_content`: executable file
         :const:`DentryPerms.symlink`: symbolic link
         :const:`DentryPerms.directory`: directory
 
     """
     if stat.S_ISLNK(mode):
         return DentryPerms.symlink
     if stat.S_ISDIR(mode):
         return DentryPerms.directory
     else:
         # file is executable in any way
         if mode & (0o111):
             return DentryPerms.executable_content
         else:
             return DentryPerms.content
 
 
 class Content(MerkleLeaf):
     """Representation of a Software Heritage content as a node in a Merkle tree.
 
     The current Merkle hash for the Content nodes is the `sha1_git`, which
     makes it consistent with what :class:`Directory` uses for its own hash
     computation.
 
     """
 
     __slots__ = []  # type: List[str]
     object_type: Final = "content"
 
     @classmethod
     def from_bytes(cls, *, mode, data):
         """Convert data (raw :class:`bytes`) to a Software Heritage content entry
 
         Args:
           mode (int): a file mode (passed to :func:`mode_to_perms`)
           data (bytes): raw contents of the file
         """
         ret = MultiHash.from_data(data).digest()
         ret["length"] = len(data)
         ret["perms"] = mode_to_perms(mode)
         ret["data"] = data
         ret["status"] = "visible"
 
         return cls(ret)
 
     @classmethod
     def from_symlink(cls, *, path, mode):
         """Convert a symbolic link to a Software Heritage content entry"""
         return cls.from_bytes(mode=mode, data=os.readlink(path))
 
     @classmethod
     def from_file(cls, *, path, max_content_length=None):
         """Compute the Software Heritage content entry corresponding to an
         on-disk file.
 
         The returned dictionary contains keys useful for both:
         - loading the content in the archive (hashes, `length`)
         - using the content as a directory entry in a directory
 
         Args:
           save_path (bool): add the file path to the entry
           max_content_length (Optional[int]): if given, all contents larger
             than this will be skipped.
 
         """
         file_stat = os.lstat(path)
         mode = file_stat.st_mode
         length = file_stat.st_size
         too_large = max_content_length is not None and length > max_content_length
 
         if stat.S_ISLNK(mode):
             # Symbolic link: return a file whose contents are the link target
 
             if too_large:
                 # Unlike large contents, we can't stream symlinks to
                 # MultiHash, and we don't want to fit them in memory if
                 # they exceed max_content_length either.
                 # Thankfully, this should not happen for reasonable values of
                 # max_content_length because of OS/filesystem limitations,
                 # so let's just raise an error.
                 raise Exception(f"Symlink too large ({length} bytes)")
 
             return cls.from_symlink(path=path, mode=mode)
         elif not stat.S_ISREG(mode):
             # not a regular file: return the empty file instead
             return cls.from_bytes(mode=mode, data=b"")
 
         if too_large:
             skip_reason = "Content too large"
         else:
             skip_reason = None
 
         hashes = MultiHash.from_path(path).digest()
         if skip_reason:
             ret = {
                 **hashes,
                 "status": "absent",
                 "reason": skip_reason,
             }
         else:
             ret = {
                 **hashes,
                 "status": "visible",
             }
 
         ret["path"] = path
         ret["perms"] = mode_to_perms(mode)
         ret["length"] = length
 
         obj = cls(ret)
         return obj
 
     def swhid(self) -> CoreSWHID:
         """Return node identifier as a SWHID"""
         return CoreSWHID(object_type=ObjectType.CONTENT, object_id=self.hash)
 
     def __repr__(self):
         return "Content(id=%s)" % hash_to_hex(self.hash)
 
     def compute_hash(self):
         return self.data["sha1_git"]
 
     def to_model(self) -> model.BaseContent:
         """Builds a `model.BaseContent` object based on this leaf."""
         data = self.get_data().copy()
         data.pop("perms", None)
         if data["status"] == "absent":
             data.pop("path", None)
             return model.SkippedContent.from_dict(data)
         elif "data" in data:
             return model.Content.from_dict(data)
         else:
             return DiskBackedContent.from_dict(data)
 
 
 def accept_all_directories(dirpath: str, dirname: str, entries: Iterable[Any]) -> bool:
     """Default filter for :func:`Directory.from_disk` accepting all
     directories
 
     Args:
       dirname (bytes): directory name
       entries (list): directory entries
     """
     return True
 
 
 def ignore_empty_directories(
     dirpath: str, dirname: str, entries: Iterable[Any]
 ) -> bool:
     """Filter for :func:`directory_to_objects` ignoring empty directories
 
     Args:
       dirname (bytes): directory name
       entries (list): directory entries
     Returns:
       True if the directory is not empty, false if the directory is empty
     """
     return bool(entries)
 
 
 def ignore_named_directories(names, *, case_sensitive=True):
     """Filter for :func:`directory_to_objects` to ignore directories named one
     of names.
 
     Args:
       names (list of bytes): names to ignore
       case_sensitive (bool): whether to do the filtering in a case sensitive
         way
     Returns:
       a directory filter for :func:`directory_to_objects`
     """
     if not case_sensitive:
         names = [name.lower() for name in names]
 
     def named_filter(
         dirpath: str,
         dirname: str,
         entries: Iterable[Any],
         names: Iterable[Any] = names,
         case_sensitive: bool = case_sensitive,
     ):
         if case_sensitive:
             return dirname not in names
         else:
             return dirname.lower() not in names
 
     return named_filter
 
 
 # TODO: `extract_regex_objs` has been copied and adapted from `swh.scanner`.
 # In the future `swh.scanner` should use the `swh.model` version and remove its own.
 def extract_regex_objs(
     root_path: bytes, patterns: Iterable[bytes]
 ) -> Iterator[Pattern[bytes]]:
     """Generates a regex object for each pattern given in input and checks if
      the path is a subdirectory or relative to the root path.
 
     Args:
       root_path (bytes): path to the root directory
-      patterns (list of byte): patterns to match
+      patterns (list of byte): shell patterns to match
 
      Yields:
         an SRE_Pattern object
     """
     absolute_root_path = os.path.abspath(root_path)
     for pattern in patterns:
-        for path in glob.glob(pattern):
-            absolute_path = os.path.abspath(path)
-            if not absolute_path.startswith(absolute_root_path):
+        if os.path.isabs(pattern):
+            pattern = os.path.relpath(pattern, root_path)
+        # python 3.10 has a `root_dir` argument for glob, but not the previous
+        # version. So we adjust the pattern
+        test_pattern = os.path.join(absolute_root_path, pattern)
+        for path in glob.glob(test_pattern):
+            if os.path.isabs(path) and not path.startswith(absolute_root_path):
                 error_msg = (
                     b'The path "' + path + b'" is not a subdirectory or relative '
                     b'to the root directory path: "' + root_path + b'"'
                 )
                 raise InvalidDirectoryPath(error_msg)
 
         regex = fnmatch.translate((pattern.decode()))
         yield re.compile(regex.encode())
 
 
 def ignore_directories_patterns(root_path: bytes, patterns: Iterable[bytes]):
     """Filter for :func:`directory_to_objects` to ignore directories
     matching certain patterns.
 
     Args:
       root_path (bytes): path of the root directory
-      patterns (list of byte): patterns to ignore
+      patterns (list of bytes): patterns to ignore
 
     Returns:
       a directory filter for :func:`directory_to_objects`
     """
     sre_patterns = set(extract_regex_objs(root_path, patterns))
 
     def pattern_filter(
         dirpath: bytes,
         dirname: bytes,
         entries: Iterable[Any],
         patterns: Iterable[Any] = sre_patterns,
         root_path: bytes = os.path.abspath(root_path),
     ):
         full_path = os.path.abspath(dirpath)
         relative_path = os.path.relpath(full_path, root_path)
         return not any([pattern.match(relative_path) for pattern in patterns])
 
     return pattern_filter
 
 
 def iter_directory(
     directory,
 ) -> Tuple[List[model.Content], List[model.SkippedContent], List[model.Directory]]:
     """Return the directory listing from a disk-memory directory instance.
 
     Raises:
         TypeError in case an unexpected object type is listed.
 
     Returns:
         Tuple of respectively iterable of content, skipped content and directories.
 
     """
     contents: List[model.Content] = []
     skipped_contents: List[model.SkippedContent] = []
     directories: List[model.Directory] = []
 
     for obj in directory.iter_tree():
         obj = obj.to_model()
         obj_type = obj.object_type
         if obj_type in (model.Content.object_type, DiskBackedContent.object_type):
             # FIXME: read the data from disk later (when the
             # storage buffer is flushed).
             obj = obj.with_data()
             contents.append(obj)
         elif obj_type == model.SkippedContent.object_type:
             skipped_contents.append(obj)
         elif obj_type == model.Directory.object_type:
             directories.append(obj)
         else:
             raise TypeError(f"Unexpected object type from disk: {obj}")
 
     return contents, skipped_contents, directories
 
 
 class Directory(MerkleNode):
     """Representation of a Software Heritage directory as a node in a Merkle Tree.
 
     This class can be used to generate, from an on-disk directory, all the
     objects that need to be sent to the Software Heritage archive.
 
     The :func:`from_disk` constructor allows you to generate the data structure
     from a directory on disk. The resulting :class:`Directory` can then be
     manipulated as a dictionary, using the path as key.
 
     The :func:`collect` method is used to retrieve all the objects that need to
     be added to the Software Heritage archive since the last collection, by
     class (contents and directories).
 
     When using the dict-like methods to update the contents of the directory,
     the affected levels of hierarchy are reset and can be collected again using
     the same method. This enables the efficient collection of updated nodes,
     for instance when the client is applying diffs.
     """
 
     __slots__ = ["__entries", "__model_object"]
     object_type: Final = "directory"
 
     @classmethod
     def from_disk(
         cls, *, path, dir_filter=accept_all_directories, max_content_length=None
     ):
         """Compute the Software Heritage objects for a given directory tree
 
         Args:
           path (bytes): the directory to traverse
           data (bool): whether to add the data to the content objects
           save_path (bool): whether to add the path to the content objects
           dir_filter (function): a filter to ignore some directories by
             name or contents. Takes two arguments: dirname and entries, and
             returns True if the directory should be added, False if the
             directory should be ignored.
           max_content_length (Optional[int]): if given, all contents larger
             than this will be skipped.
         """
         top_path = path
         dirs = {}
 
         for root, dentries, fentries in os.walk(top_path, topdown=False):
             entries = {}
             # Join fentries and dentries in the same processing, as symbolic
             # links to directories appear in dentries...
             for name in fentries + dentries:
                 path = os.path.join(root, name)
                 if not os.path.isdir(path) or os.path.islink(path):
                     content = Content.from_file(
                         path=path, max_content_length=max_content_length
                     )
                     entries[name] = content
                 else:
                     if dir_filter(path, name, dirs[path].entries):
                         entries[name] = dirs[path]
 
             dirs[root] = cls({"name": os.path.basename(root), "path": root})
             dirs[root].update(entries)
 
         return dirs[top_path]
 
     def __init__(self, data=None):
         super().__init__(data=data)
         self.__entries = None
         self.__model_object = None
 
     def invalidate_hash(self):
         self.__entries = None
         self.__model_object = None
         super().invalidate_hash()
 
     @staticmethod
     def child_to_directory_entry(name, child):
         if child.object_type == "directory":
             return {
                 "type": "dir",
                 "perms": DentryPerms.directory,
                 "target": child.hash,
                 "name": name,
             }
         elif child.object_type == "content":
             return {
                 "type": "file",
                 "perms": child.data["perms"],
                 "target": child.hash,
                 "name": name,
             }
         else:
             raise ValueError(f"unknown child {child}")
 
     def get_data(self, **kwargs):
         return {
             "id": self.hash,
             "entries": self.entries,
         }
 
     @property
     def entries(self):
         """Child nodes, sorted by name in the same way
         :func:`swh.model.git_objects.directory_git_object` does."""
         if self.__entries is None:
             self.__entries = sorted(
                 (
                     self.child_to_directory_entry(name, child)
                     for name, child in self.items()
                 ),
                 key=directory_entry_sort_key,
             )
 
         return self.__entries
 
     def swhid(self) -> CoreSWHID:
         """Return node identifier as a SWHID"""
         return CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=self.hash)
 
     def compute_hash(self):
         return self.to_model().id
 
     def to_model(self) -> model.Directory:
         """Builds a `model.Directory` object based on this node;
         ignoring its children."""
         if self.__model_object is None:
 
             DirectoryEntry = model.DirectoryEntry
 
             entries = []
             for name, child in self.items():
                 if child.object_type == "directory":
                     e = DirectoryEntry(
                         type="dir",
                         perms=DentryPerms.directory,
                         target=child.hash,
                         name=name,
                     )
                 elif child.object_type == "content":
                     e = DirectoryEntry(
                         type="file",
                         perms=child.data["perms"],
                         target=child.hash,
                         name=name,
                     )
                 else:
                     raise ValueError(f"unknown child {child}")
                 entries.append(e)
             entries.sort(key=directory_entry_sort_key)
             self.__model_object = model.Directory(entries=tuple(entries))
         return self.__model_object
 
     def __getitem__(self, key):
         if not isinstance(key, bytes):
             raise ValueError("Can only get a bytes from Directory")
 
         # Convenience shortcut
         if key == b"":
             return self
 
         if b"/" not in key:
             return super().__getitem__(key)
         else:
             key1, key2 = key.split(b"/", 1)
             return self.__getitem__(key1)[key2]
 
     def __setitem__(self, key, value):
         if not isinstance(key, bytes):
             raise ValueError("Can only set a bytes Directory entry")
         if not isinstance(value, (Content, Directory)):
             raise ValueError(
                 "Can only set a Directory entry to a Content or " "Directory"
             )
 
         if key == b"":
             raise ValueError("Directory entry must have a name")
         if b"\x00" in key:
             raise ValueError("Directory entry name must not contain nul bytes")
 
         if b"/" not in key:
             return super().__setitem__(key, value)
         else:
             key1, key2 = key.rsplit(b"/", 1)
             self[key1].__setitem__(key2, value)
 
     def __delitem__(self, key):
         if not isinstance(key, bytes):
             raise ValueError("Can only delete a bytes Directory entry")
 
         if b"/" not in key:
             super().__delitem__(key)
         else:
             key1, key2 = key.rsplit(b"/", 1)
             del self[key1][key2]
 
     def __contains__(self, key):
         if b"/" not in key:
             return super().__contains__(key)
         else:
             key1, key2 = key.split(b"/", 1)
             return super().__contains__(key1) and self[key1].__contains__(key2)
 
     def __repr__(self):
         return "Directory(id=%s, entries=[%s])" % (
             hash_to_hex(self.hash),
             ", ".join(str(entry) for entry in self),
         )
diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py
index 75d9f8b..8d2cb90 100644
--- a/swh/model/hashutil.py
+++ b/swh/model/hashutil.py
@@ -1,351 +1,353 @@
 # Copyright (C) 2015-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 """Module in charge of hashing function definitions. This is the base
 module use to compute swh's hashes.
 
 Only a subset of hashing algorithms is supported as defined in the
 ALGORITHMS set. Any provided algorithms not in that list will result
 in a ValueError explaining the error.
 
 This module defines a MultiHash class to ease the softwareheritage
 hashing algorithms computation. This allows to compute hashes from
 file object, path, data using a similar interface as what the standard
 hashlib module provides.
 
 Basic usage examples:
 
 - file object: MultiHash.from_file(
                  file_object, hash_names=DEFAULT_ALGORITHMS).digest()
 
 - path (filepath): MultiHash.from_path(b'foo').hexdigest()
 
 - data (bytes): MultiHash.from_data(b'foo').bytehexdigest()
 
 
 "Complex" usage, defining a swh hashlib instance first:
 
 - To compute length, integrate the length to the set of algorithms to
   compute, for example:
 
   .. code-block:: python
 
      h = MultiHash(hash_names=set({'length'}).union(DEFAULT_ALGORITHMS))
      with open(filepath, 'rb') as f:
          h.update(f.read(HASH_BLOCK_SIZE))
      hashes = h.digest()  # returns a dict of {hash_algo_name: hash_in_bytes}
 
 - Write alongside computing hashing algorithms (from a stream), example:
 
   .. code-block:: python
 
      h = MultiHash(length=length)
      with open(filepath, 'wb') as f:
          for chunk in r.iter_content():  # r a stream of sort
              h.update(chunk)
              f.write(chunk)
      hashes = h.hexdigest()  # returns a dict of {hash_algo_name: hash_in_hex}
 
 
 """
 
 import binascii
 import functools
 import hashlib
 from io import BytesIO
 import os
 from typing import Callable, Dict, Optional, Union
 
-ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256", "blake2b512", "md5"])
+ALGORITHMS = set(
+    ["sha1", "sha256", "sha1_git", "blake2s256", "blake2b512", "md5", "sha512"]
+)
 """Hashing algorithms supported by this module"""
 
 DEFAULT_ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256"])
 """Algorithms computed by default when calling the functions from this module.
 
 Subset of :const:`ALGORITHMS`.
 """
 
 HASH_BLOCK_SIZE = 32768
 """Block size for streaming hash computations made in this module"""
 
 _blake2_hash_cache = {}  # type: Dict[str, Callable]
 
 
 class MultiHash:
     """Hashutil class to support multiple hashes computation.
 
     Args:
 
         hash_names (set): Set of hash algorithms (+ optionally length)
                           to compute hashes (cf. DEFAULT_ALGORITHMS)
         length (int): Length of the total sum of chunks to read
 
     If the length is provided as algorithm, the length is also
     computed and returned.
 
     """
 
     def __init__(self, hash_names=DEFAULT_ALGORITHMS, length=None):
         self.state = {}
         self.track_length = False
         for name in hash_names:
             if name == "length":
                 self.state["length"] = 0
                 self.track_length = True
             else:
                 self.state[name] = _new_hash(name, length)
 
     @classmethod
     def from_state(cls, state, track_length):
         ret = cls([])
         ret.state = state
         ret.track_length = track_length
 
     @classmethod
     def from_file(cls, fobj, hash_names=DEFAULT_ALGORITHMS, length=None):
         ret = cls(length=length, hash_names=hash_names)
         while True:
             chunk = fobj.read(HASH_BLOCK_SIZE)
             if not chunk:
                 break
             ret.update(chunk)
         return ret
 
     @classmethod
     def from_path(cls, path, hash_names=DEFAULT_ALGORITHMS):
         length = os.path.getsize(path)
         with open(path, "rb") as f:
             ret = cls.from_file(f, hash_names=hash_names, length=length)
         return ret
 
     @classmethod
     def from_data(cls, data, hash_names=DEFAULT_ALGORITHMS):
         length = len(data)
         fobj = BytesIO(data)
         return cls.from_file(fobj, hash_names=hash_names, length=length)
 
     def update(self, chunk):
         for name, h in self.state.items():
             if name == "length":
                 continue
             h.update(chunk)
         if self.track_length:
             self.state["length"] += len(chunk)
 
     def digest(self):
         return {
             name: h.digest() if name != "length" else h
             for name, h in self.state.items()
         }
 
     def hexdigest(self):
         return {
             name: h.hexdigest() if name != "length" else h
             for name, h in self.state.items()
         }
 
     def bytehexdigest(self):
         return {
             name: hash_to_bytehex(h.digest()) if name != "length" else h
             for name, h in self.state.items()
         }
 
     def copy(self):
         copied_state = {
             name: h.copy() if name != "length" else h for name, h in self.state.items()
         }
         return self.from_state(copied_state, self.track_length)
 
 
 def _new_blake2_hash(algo):
     """Return a function that initializes a blake2 hash."""
     if algo in _blake2_hash_cache:
         return _blake2_hash_cache[algo]()
 
     lalgo = algo.lower()
     if not lalgo.startswith("blake2"):
         raise ValueError("Algorithm %s is not a blake2 hash" % algo)
 
     blake_family = lalgo[:7]
 
     digest_size = None
     if lalgo[7:]:
         try:
             digest_size, remainder = divmod(int(lalgo[7:]), 8)
         except ValueError:
             raise ValueError("Unknown digest size for algo %s" % algo) from None
         if remainder:
             raise ValueError(
                 "Digest size for algorithm %s must be a multiple of 8" % algo
             )
 
     blake2 = getattr(hashlib, blake_family)
     _blake2_hash_cache[algo] = lambda: blake2(digest_size=digest_size)
 
     return _blake2_hash_cache[algo]()
 
 
 def _new_hashlib_hash(algo):
     """Initialize a digest object from hashlib.
 
     Handle the swh-specific names for the blake2-related algorithms
     """
     if algo.startswith("blake2"):
         return _new_blake2_hash(algo)
     else:
         return hashlib.new(algo)
 
 
 def git_object_header(git_type: str, length: int) -> bytes:
     """Returns the header for a git object of the given type and length.
 
     The header of a git object consists of:
      - The type of the object (encoded in ASCII)
      - One ASCII space (\x20)
      - The length of the object (decimal encoded in ASCII)
      - One NUL byte
 
     Args:
         base_algo (str from :const:`ALGORITHMS`): a hashlib-supported algorithm
         git_type: the type of the git object (supposedly one of 'blob',
                   'commit', 'tag', 'tree')
         length: the length of the git object you're encoding
 
     Returns:
         a hashutil.hash object
     """
     git_object_types = {
         "blob",
         "tree",
         "commit",
         "tag",
         "snapshot",
         "raw_extrinsic_metadata",
         "extid",
     }
 
     if git_type not in git_object_types:
         raise ValueError(
             "Unexpected git object type %s, expected one of %s"
             % (git_type, ", ".join(sorted(git_object_types)))
         )
 
     return ("%s %d\0" % (git_type, length)).encode("ascii")
 
 
 def _new_hash(algo: str, length: Optional[int] = None):
     """Initialize a digest object (as returned by python's hashlib) for
     the requested algorithm. See the constant ALGORITHMS for the list
     of supported algorithms. If a git-specific hashing algorithm is
     requested (e.g., "sha1_git"), the hashing object will be pre-fed
     with the needed header; for this to work, length must be given.
 
     Args:
         algo (str): a hashing algorithm (one of ALGORITHMS)
         length (int): the length of the hashed payload (needed for
           git-specific algorithms)
 
     Returns:
         a hashutil.hash object
 
     Raises:
         ValueError if algo is unknown, or length is missing for a git-specific
         hash.
 
     """
     if algo not in ALGORITHMS:
         raise ValueError(
             "Unexpected hashing algorithm %s, expected one of %s"
             % (algo, ", ".join(sorted(ALGORITHMS)))
         )
 
     if algo.endswith("_git"):
         if length is None:
             raise ValueError("Missing length for git hashing algorithm")
         base_algo = algo[:-4]
         h = _new_hashlib_hash(base_algo)
         h.update(git_object_header("blob", length))
         return h
 
     return _new_hashlib_hash(algo)
 
 
 def hash_git_data(data, git_type, base_algo="sha1"):
     """Hash the given data as a git object of type git_type.
 
     Args:
         data: a bytes object
         git_type: the git object type
         base_algo: the base hashing algorithm used (default: sha1)
 
     Returns: a dict mapping each algorithm to a bytes digest
 
     Raises:
         ValueError if the git_type is unexpected.
     """
     h = _new_hashlib_hash(base_algo)
     h.update(git_object_header(git_type, len(data)))
     h.update(data)
 
     return h.digest()
 
 
 @functools.lru_cache()
 def hash_to_hex(hash: Union[str, bytes]) -> str:
     """Converts a hash (in hex or bytes form) to its hexadecimal ascii form
 
     Args:
       hash (str or bytes): a :class:`bytes` hash or a :class:`str` containing
         the hexadecimal form of the hash
 
     Returns:
       str: the hexadecimal form of the hash
     """
     if isinstance(hash, str):
         return hash
     return binascii.hexlify(hash).decode("ascii")
 
 
 @functools.lru_cache()
 def hash_to_bytehex(hash: bytes) -> bytes:
     """Converts a hash to its hexadecimal bytes representation
 
     Args:
       hash (bytes): a :class:`bytes` hash
 
     Returns:
       bytes: the hexadecimal form of the hash, as :class:`bytes`
     """
     return binascii.hexlify(hash)
 
 
 @functools.lru_cache()
 def hash_to_bytes(hash: Union[str, bytes]) -> bytes:
     """Converts a hash (in hex or bytes form) to its raw bytes form
 
     Args:
       hash (str or bytes): a :class:`bytes` hash or a :class:`str` containing
         the hexadecimal form of the hash
 
     Returns:
       bytes: the :class:`bytes` form of the hash
     """
     if isinstance(hash, bytes):
         return hash
     return bytes.fromhex(hash)
 
 
 @functools.lru_cache()
 def bytehex_to_hash(hex: bytes) -> bytes:
     """Converts a hexadecimal bytes representation of a hash to that hash
 
     Args:
       hash (bytes): a :class:`bytes` containing the hexadecimal form of the
         hash encoded in ascii
 
     Returns:
       bytes: the :class:`bytes` form of the hash
     """
     return hash_to_bytes(hex.decode())
diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py
index 1ab2812..b279f2f 100644
--- a/swh/model/tests/test_hashutil.py
+++ b/swh/model/tests/test_hashutil.py
@@ -1,324 +1,334 @@
 # Copyright (C) 2015-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import contextlib
 import hashlib
 import io
 import os
 import tempfile
 from unittest.mock import patch
 
 import pytest
 
 from swh.model import hashutil
 from swh.model.hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_bytehex
 
 
 @contextlib.contextmanager
 def patch_blake2(function_name):
     try:
         with patch(function_name) as mock:
             yield mock
     finally:
         # mocking blake2 inserts mock objects in the cache; we need
         # to clean it before the next test runs
         hashutil._blake2_hash_cache.clear()
 
 
 @pytest.fixture(autouse=True)
 def blake2_hash_cache_reset():
     # Reset function cache
     hashutil._blake2_hash_cache = {}
 
 
 @pytest.fixture
 def hash_test_data():
     class HashTestData:
 
         data = b"1984\n"
         hex_checksums = {
             "sha1": "62be35bf00ff0c624f4a621e2ea5595a049e0731",
             "sha1_git": "568aaf43d83b2c3df8067f3bedbb97d83260be6d",
             "sha256": "26602113b4b9afd9d55466b08580d3c2"
             "4a9b50ee5b5866c0d91fab0e65907311",
             "blake2s256": "63cfb259e1fdb485bc5c55749697a6b21ef31fb7445f6c78a"
             "c9422f9f2dc8906",
         }
 
         checksums = {
             type: bytes.fromhex(cksum) for type, cksum in hex_checksums.items()
         }
 
         bytehex_checksums = {
             type: hashutil.hash_to_bytehex(cksum) for type, cksum in checksums.items()
         }
 
         git_hex_checksums = {
             "blob": hex_checksums["sha1_git"],
             "tree": "5b2e883aa33d2efab98442693ea4dd5f1b8871b0",
             "commit": "79e4093542e72f0fcb7cbd75cb7d270f9254aa8f",
             "tag": "d6bf62466f287b4d986c545890716ce058bddf67",
         }
 
         git_checksums = {
             type: bytes.fromhex(cksum) for type, cksum in git_hex_checksums.items()
         }
 
     return HashTestData
 
 
 def test_multi_hash_data(hash_test_data):
     checksums = MultiHash.from_data(hash_test_data.data).digest()
     assert checksums == hash_test_data.checksums
     assert "length" not in checksums
 
 
 def test_multi_hash_data_with_length(hash_test_data):
     expected_checksums = hash_test_data.checksums.copy()
     expected_checksums["length"] = len(hash_test_data.data)
 
     algos = set(["length"]).union(hashutil.DEFAULT_ALGORITHMS)
     checksums = MultiHash.from_data(hash_test_data.data, hash_names=algos).digest()
 
     assert checksums == expected_checksums
     assert "length" in checksums
 
 
 def test_multi_hash_data_unknown_hash(hash_test_data):
     with pytest.raises(ValueError, match="Unexpected hashing algorithm.*unknown-hash"):
         MultiHash.from_data(hash_test_data.data, ["unknown-hash"])
 
 
 def test_multi_hash_file(hash_test_data):
     fobj = io.BytesIO(hash_test_data.data)
 
     checksums = MultiHash.from_file(fobj, length=len(hash_test_data.data)).digest()
     assert checksums == hash_test_data.checksums
 
 
 def test_multi_hash_file_hexdigest(hash_test_data):
     fobj = io.BytesIO(hash_test_data.data)
     length = len(hash_test_data.data)
     checksums = MultiHash.from_file(fobj, length=length).hexdigest()
     assert checksums == hash_test_data.hex_checksums
 
 
 def test_multi_hash_file_bytehexdigest(hash_test_data):
     fobj = io.BytesIO(hash_test_data.data)
     length = len(hash_test_data.data)
     checksums = MultiHash.from_file(fobj, length=length).bytehexdigest()
     assert checksums == hash_test_data.bytehex_checksums
 
 
-def test_multi_hash_file_with_md5(hash_test_data):
+EXTRA_HASH_ALGOS = ["md5", "sha512"]
+
+
+@pytest.mark.parametrize("hash_algo", EXTRA_HASH_ALGOS)
+def test_multi_hash_file_with_extra_hash_algo(hash_test_data, hash_algo):
     fobj = io.BytesIO(hash_test_data.data)
 
     checksums = MultiHash.from_file(
-        fobj, hash_names=DEFAULT_ALGORITHMS | {"md5"}, length=len(hash_test_data.data)
+        fobj,
+        hash_names=DEFAULT_ALGORITHMS | {hash_algo},
+        length=len(hash_test_data.data),
     ).digest()
-    md5sum = {"md5": hashlib.md5(hash_test_data.data).digest()}
-    assert checksums == {**hash_test_data.checksums, **md5sum}
+    checksum = {hash_algo: hashlib.new(hash_algo, hash_test_data.data).digest()}
+    assert checksums == {**hash_test_data.checksums, **checksum}
 
 
-def test_multi_hash_file_hexdigest_with_md5(hash_test_data):
+@pytest.mark.parametrize("hash_algo", EXTRA_HASH_ALGOS)
+def test_multi_hash_file_hexdigest_with_extra_hash_algo(hash_test_data, hash_algo):
     fobj = io.BytesIO(hash_test_data.data)
     length = len(hash_test_data.data)
     checksums = MultiHash.from_file(
-        fobj, hash_names=DEFAULT_ALGORITHMS | {"md5"}, length=length
+        fobj, hash_names=DEFAULT_ALGORITHMS | {hash_algo}, length=length
     ).hexdigest()
-    md5sum = {"md5": hashlib.md5(hash_test_data.data).hexdigest()}
-    assert checksums == {**hash_test_data.hex_checksums, **md5sum}
+    checksum = {hash_algo: hashlib.new(hash_algo, hash_test_data.data).hexdigest()}
+    assert checksums == {**hash_test_data.hex_checksums, **checksum}
 
 
-def test_multi_hash_file_bytehexdigest_with_md5(hash_test_data):
+@pytest.mark.parametrize("hash_algo", EXTRA_HASH_ALGOS)
+def test_multi_hash_file_bytehexdigest_with_extra_algo(hash_test_data, hash_algo):
     fobj = io.BytesIO(hash_test_data.data)
     length = len(hash_test_data.data)
     checksums = MultiHash.from_file(
-        fobj, hash_names=DEFAULT_ALGORITHMS | {"md5"}, length=length
+        fobj, hash_names=DEFAULT_ALGORITHMS | {hash_algo}, length=length
     ).bytehexdigest()
-    md5sum = {"md5": hash_to_bytehex(hashlib.md5(hash_test_data.data).digest())}
-    assert checksums == {**hash_test_data.bytehex_checksums, **md5sum}
+    checksum = {
+        hash_algo: hash_to_bytehex(hashlib.new(hash_algo, hash_test_data.data).digest())
+    }
+    assert checksums == {**hash_test_data.bytehex_checksums, **checksum}
 
 
 def test_multi_hash_file_missing_length(hash_test_data):
     fobj = io.BytesIO(hash_test_data.data)
     with pytest.raises(ValueError, match="Missing length"):
         MultiHash.from_file(fobj, hash_names=["sha1_git"])
 
 
 def test_multi_hash_path(hash_test_data):
     with tempfile.NamedTemporaryFile(delete=False) as f:
         f.write(hash_test_data.data)
 
     hashes = MultiHash.from_path(f.name).digest()
     os.remove(f.name)
 
     assert hash_test_data.checksums == hashes
 
 
 def test_hash_git_data(hash_test_data):
     checksums = {
         git_type: hashutil.hash_git_data(hash_test_data.data, git_type)
         for git_type in hash_test_data.git_checksums
     }
 
     assert checksums == hash_test_data.git_checksums
 
 
 def test_hash_git_data_unknown_git_type(hash_test_data):
     with pytest.raises(
         ValueError, match="Unexpected git object type.*unknown-git-type"
     ):
         hashutil.hash_git_data(hash_test_data.data, "unknown-git-type")
 
 
 def test_hash_to_hex(hash_test_data):
     for type in hash_test_data.checksums:
         hex = hash_test_data.hex_checksums[type]
         hash = hash_test_data.checksums[type]
         assert hashutil.hash_to_hex(hex) == hex
         assert hashutil.hash_to_hex(hash) == hex
 
 
 def test_hash_to_bytes(hash_test_data):
     for type in hash_test_data.checksums:
         hex = hash_test_data.hex_checksums[type]
         hash = hash_test_data.checksums[type]
         assert hashutil.hash_to_bytes(hex) == hash
         assert hashutil.hash_to_bytes(hash) == hash
 
 
 def test_hash_to_bytehex(hash_test_data):
     for algo in hash_test_data.checksums:
         hex_checksum = hash_test_data.hex_checksums[algo].encode("ascii")
         assert hex_checksum == hashutil.hash_to_bytehex(hash_test_data.checksums[algo])
 
 
 def test_bytehex_to_hash(hash_test_data):
     for algo in hash_test_data.checksums:
         assert hash_test_data.checksums[algo] == hashutil.bytehex_to_hash(
             hash_test_data.hex_checksums[algo].encode()
         )
 
 
 def test_new_hash_unsupported_hashing_algorithm():
     expected_message = (
         "Unexpected hashing algorithm blake2:10, "
         "expected one of blake2b512, blake2s256, "
         "md5, sha1, sha1_git, sha256"
     )
     with pytest.raises(ValueError, match=expected_message):
         hashutil._new_hash("blake2:10")
 
 
 def test_new_hash_blake2b_builtin():
     with patch_blake2("hashlib.blake2b") as mock_blake2b:
         mock_blake2b.return_value = sentinel = object()
 
         h = hashutil._new_hash("blake2b512")
 
         assert h is sentinel
         mock_blake2b.assert_called_with(digest_size=512 // 8)
 
 
 def test_new_hash_blake2s_builtin():
     with patch_blake2("hashlib.blake2s") as mock_blake2s:
         mock_blake2s.return_value = sentinel = object()
 
         h = hashutil._new_hash("blake2s256")
 
         assert h is sentinel
         mock_blake2s.assert_called_with(digest_size=256 // 8)
 
 
 @pytest.fixture
 def hashgit_test_data():
     class HashGitTestData:
         blob_data = b"42\n"
 
         tree_data = b"".join(
             [
                 b"40000 barfoo\0",
                 bytes.fromhex("c3020f6bf135a38c6df" "3afeb5fb38232c5e07087"),
                 b"100644 blah\0",
                 bytes.fromhex("63756ef0df5e4f10b6efa" "33cfe5c758749615f20"),
                 b"100644 hello\0",
                 bytes.fromhex("907b308167f0880fb2a" "5c0e1614bb0c7620f9dc3"),
             ]
         )
 
         commit_data = b"""\
 tree 1c61f7259dcb770f46b194d941df4f08ff0a3970
 author Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com> 1444054085 +0200
 committer Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com> 1444054085 +0200
 
 initial
 """  # noqa
 
         tag_data = """object 24d012aaec0bc5a4d2f62c56399053d6cc72a241
 type commit
 tag 0.0.1
 tagger Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com> 1444225145 +0200
 
 blah
 """.encode(
             "utf-8"
         )  # NOQA
 
         checksums = {
             "blob_sha1_git": bytes.fromhex(
                 "d81cc0710eb6cf9efd5b920a8453e1" "e07157b6cd"
             ),
             "tree_sha1_git": bytes.fromhex(
                 "ac212302c45eada382b27bfda795db" "121dacdb1c"
             ),
             "commit_sha1_git": bytes.fromhex(
                 "e960570b2e6e2798fa4cfb9af2c399" "d629189653"
             ),
             "tag_sha1_git": bytes.fromhex(
                 "bc2b99ba469987bcf1272c189ed534" "e9e959f120"
             ),
         }
 
     return HashGitTestData
 
 
 def test_unknown_header_type():
     with pytest.raises(ValueError, match="Unexpected git object type"):
         hashutil.hash_git_data(b"any-data", "some-unknown-type")
 
 
 def test_hashdata_content(hashgit_test_data):
     # when
     actual_hash = hashutil.hash_git_data(hashgit_test_data.blob_data, git_type="blob")
 
     # then
     assert actual_hash == hashgit_test_data.checksums["blob_sha1_git"]
 
 
 def test_hashdata_tree(hashgit_test_data):
     # when
     actual_hash = hashutil.hash_git_data(hashgit_test_data.tree_data, git_type="tree")
 
     # then
     assert actual_hash == hashgit_test_data.checksums["tree_sha1_git"]
 
 
 def test_hashdata_revision(hashgit_test_data):
     # when
     actual_hash = hashutil.hash_git_data(
         hashgit_test_data.commit_data, git_type="commit"
     )
 
     # then
     assert actual_hash == hashgit_test_data.checksums["commit_sha1_git"]
 
 
 def test_hashdata_tag(hashgit_test_data):
     # when
     actual_hash = hashutil.hash_git_data(hashgit_test_data.tag_data, git_type="tag")
 
     # then
     assert actual_hash == hashgit_test_data.checksums["tag_sha1_git"]