diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py
index 4767c39..a9fc2d3 100644
--- a/swh/model/from_disk.py
+++ b/swh/model/from_disk.py
@@ -1,347 +1,350 @@
 # Copyright (C) 2017-2018 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import enum
 import os
 import stat
 
 from .hashutil import MultiHash, HASH_BLOCK_SIZE
 from .merkle import MerkleLeaf, MerkleNode
 from .identifiers import (
     directory_identifier,
     identifier_to_bytes as id_to_bytes,
     identifier_to_str as id_to_str,
 )
 
 
 class DentryPerms(enum.IntEnum):
     """Admissible permissions for directory entries."""
     content = 0o100644
     """Content"""
     executable_content = 0o100755
     """Executable content (e.g. executable script)"""
     symlink = 0o120000
     """Symbolic link"""
     directory = 0o040000
     """Directory"""
     revision = 0o160000
     """Revision (e.g. submodule)"""
 
 
 def mode_to_perms(mode):
     """Convert a file mode to a permission compatible with Software Heritage
     directory entries
 
     Args:
       mode (int): a file mode as returned by :func:`os.stat` in
                   :attr:`os.stat_result.st_mode`
 
     Returns:
       DentryPerms: one of the following values:
         :const:`DentryPerms.content`: plain file
         :const:`DentryPerms.executable_content`: executable file
         :const:`DentryPerms.symlink`: symbolic link
         :const:`DentryPerms.directory`: directory
 
     """
     if stat.S_ISLNK(mode):
         return DentryPerms.symlink
     if stat.S_ISDIR(mode):
         return DentryPerms.directory
     else:
         # file is executable in any way
         if mode & (0o111):
             return DentryPerms.executable_content
         else:
             return DentryPerms.content
 
 
 class Content(MerkleLeaf):
     """Representation of a Software Heritage content as a node in a Merkle tree.
 
     The current Merkle hash for the Content nodes is the `sha1_git`, which
     makes it consistent with what :class:`Directory` uses for its own hash
     computation.
 
     """
     __slots__ = []
     type = 'content'
 
     @classmethod
     def from_bytes(cls, *, mode, data):
         """Convert data (raw :class:`bytes`) to a Software Heritage content entry
 
         Args:
           mode (int): a file mode (passed to :func:`mode_to_perms`)
           data (bytes): raw contents of the file
         """
         length = len(data)
         ret = MultiHash.from_data(data, length=length).digest()
         ret['length'] = length
         ret['perms'] = mode_to_perms(mode)
         ret['data'] = data
 
         return cls(ret)
 
     @classmethod
     def from_symlink(cls, *, path, mode):
         """Convert a symbolic link to a Software Heritage content entry"""
         return cls.from_bytes(mode=mode, data=os.readlink(path))
 
     @classmethod
     def from_file(cls, *, path, data=False, save_path=False):
         """Compute the Software Heritage content entry corresponding to an
         on-disk file.
 
         The returned dictionary contains keys useful for both:
         - loading the content in the archive (hashes, `length`)
         - using the content as a directory entry in a directory
 
         Args:
           path (bytes): path to the file for which we're computing the
             content entry
           data (bool): add the file data to the entry
           save_path (bool): add the file path to the entry
 
         """
         file_stat = os.lstat(path)
         mode = file_stat.st_mode
 
         if stat.S_ISLNK(mode):
             # Symbolic link: return a file whose contents are the link target
             return cls.from_symlink(path=path, mode=mode)
         elif not stat.S_ISREG(mode):
             # not a regular file: return the empty file instead
             return cls.from_bytes(mode=mode, data=b'')
 
         length = file_stat.st_size
 
         if not data:
             ret = MultiHash.from_path(path).digest()
         else:
             h = MultiHash(length=length)
             chunks = []
             with open(path, 'rb') as fobj:
-                for chunk in fobj:
+                while True:
+                    chunk = fobj.read(HASH_BLOCK_SIZE)
+                    if not chunk:
+                        break
                     h.update(chunk)
                     chunks.append(chunk)
 
             ret = h.digest()
             ret['data'] = b''.join(chunks)
 
         if save_path:
             ret['path'] = path
         ret['perms'] = mode_to_perms(mode)
         ret['length'] = length
 
         obj = cls(ret)
         return obj
 
     def __repr__(self):
         return 'Content(id=%s)' % id_to_str(self.hash)
 
     def compute_hash(self):
         return self.data['sha1_git']
 
 
 def accept_all_directories(dirname, entries):
     """Default filter for :func:`Directory.from_disk` accepting all
     directories
 
     Args:
       dirname (bytes): directory name
       entries (list): directory entries
     """
     return True
 
 
 def ignore_empty_directories(dirname, entries):
     """Filter for :func:`directory_to_objects` ignoring empty directories
 
     Args:
       dirname (bytes): directory name
       entries (list): directory entries
     Returns:
       True if the directory is not empty, false if the directory is empty
     """
     return bool(entries)
 
 
 def ignore_named_directories(names, *, case_sensitive=True):
     """Filter for :func:`directory_to_objects` to ignore directories named one
     of names.
 
     Args:
       names (list of bytes): names to ignore
       case_sensitive (bool): whether to do the filtering in a case sensitive
         way
     Returns:
       a directory filter for :func:`directory_to_objects`
     """
     if not case_sensitive:
         names = [name.lower() for name in names]
 
     def named_filter(dirname, entries,
                      names=names, case_sensitive=case_sensitive):
         if case_sensitive:
             return dirname not in names
         else:
             return dirname.lower() not in names
 
     return named_filter
 
 
 class Directory(MerkleNode):
     """Representation of a Software Heritage directory as a node in a Merkle Tree.
 
     This class can be used to generate, from an on-disk directory, all the
     objects that need to be sent to the Software Heritage archive.
 
     The :func:`from_disk` constructor allows you to generate the data structure
     from a directory on disk. The resulting :class:`Directory` can then be
     manipulated as a dictionary, using the path as key.
 
     The :func:`collect` method is used to retrieve all the objects that need to
     be added to the Software Heritage archive since the last collection, by
     class (contents and directories).
 
     When using the dict-like methods to update the contents of the directory,
     the affected levels of hierarchy are reset and can be collected again using
     the same method. This enables the efficient collection of updated nodes,
     for instance when the client is applying diffs.
     """
     __slots__ = ['__entries']
     type = 'directory'
 
     @classmethod
     def from_disk(cls, *, path, data=False, save_path=False,
                   dir_filter=accept_all_directories):
         """Compute the Software Heritage objects for a given directory tree
 
         Args:
           path (bytes): the directory to traverse
           data (bool): whether to add the data to the content objects
           save_path (bool): whether to add the path to the content objects
           dir_filter (function): a filter to ignore some directories by
             name or contents. Takes two arguments: dirname and entries, and
             returns True if the directory should be added, False if the
             directory should be ignored.
         """
 
         top_path = path
         dirs = {}
 
         for root, dentries, fentries in os.walk(top_path, topdown=False):
             entries = {}
             # Join fentries and dentries in the same processing, as symbolic
             # links to directories appear in dentries...
             for name in fentries + dentries:
                 path = os.path.join(root, name)
                 if not os.path.isdir(path) or os.path.islink(path):
                     content = Content.from_file(path=path, data=data,
                                                 save_path=save_path)
                     entries[name] = content
                 else:
                     if dir_filter(name, dirs[path].entries):
                         entries[name] = dirs[path]
 
             dirs[root] = cls({'name': os.path.basename(root)})
             dirs[root].update(entries)
 
         return dirs[top_path]
 
     def __init__(self, data=None):
         super().__init__(data=data)
         self.__entries = None
 
     def invalidate_hash(self):
         self.__entries = None
         super().invalidate_hash()
 
     @staticmethod
     def child_to_directory_entry(name, child):
         if isinstance(child, Directory):
             return {
                 'type': 'dir',
                 'perms': DentryPerms.directory,
                 'target': child.hash,
                 'name': name,
             }
         elif isinstance(child, Content):
             return {
                 'type': 'file',
                 'perms': child.data['perms'],
                 'target': child.hash,
                 'name': name,
             }
         else:
             raise ValueError('unknown child')
 
     def get_data(self, **kwargs):
         return {
             'id': self.hash,
             'entries': self.entries,
         }
 
     @property
     def entries(self):
         if self.__entries is None:
             self.__entries = [
                 self.child_to_directory_entry(name, child)
                 for name, child in self.items()
             ]
 
         return self.__entries
 
     def compute_hash(self):
         return id_to_bytes(directory_identifier({'entries': self.entries}))
 
     def __getitem__(self, key):
         if not isinstance(key, bytes):
             raise ValueError('Can only get a bytes from Directory')
 
         # Convenience shortcut
         if key == b'':
             return self
 
         if b'/' not in key:
             return super().__getitem__(key)
         else:
             key1, key2 = key.split(b'/', 1)
             return self.__getitem__(key1)[key2]
 
     def __setitem__(self, key, value):
         if not isinstance(key, bytes):
             raise ValueError('Can only set a bytes Directory entry')
         if not isinstance(value, (Content, Directory)):
             raise ValueError('Can only set a Directory entry to a Content or '
                              'Directory')
 
         if key == b'':
             raise ValueError('Directory entry must have a name')
         if b'\x00' in key:
             raise ValueError('Directory entry name must not contain nul bytes')
 
         if b'/' not in key:
             return super().__setitem__(key, value)
         else:
             key1, key2 = key.rsplit(b'/', 1)
             self[key1].__setitem__(key2, value)
 
     def __delitem__(self, key):
         if not isinstance(key, bytes):
             raise ValueError('Can only delete a bytes Directory entry')
 
         if b'/' not in key:
             super().__delitem__(key)
         else:
             key1, key2 = key.rsplit(b'/', 1)
             del self[key1][key2]
 
     def __repr__(self):
         return 'Directory(id=%s, entries=[%s])' % (
             id_to_str(self.hash),
             ', '.join(str(entry) for entry in self),
         )
diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py
index d8249bc..24c2f69 100644
--- a/swh/model/hashutil.py
+++ b/swh/model/hashutil.py
@@ -1,455 +1,458 @@
 # Copyright (C) 2015-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 """Module in charge of hashing function definitions. This is the base
 module use to compute swh's hashes.
 
 Only a subset of hashing algorithms is supported as defined in the
 ALGORITHMS set. Any provided algorithms not in that list will result
 in a ValueError explaining the error.
 
 This module defines a MultiHash class to ease the softwareheritage
 hashing algorithms computation. This allows to compute hashes from
 file object, path, data using a similar interface as what the standard
 hashlib module provides.
 
 Basic usage examples:
 
 - file object: MultiHash.from_file(
                  file_object, hash_names=DEFAULT_ALGORITHMS).digest()
 
 - path (filepath): MultiHash.from_path(b'foo').hexdigest()
 
 - data (bytes): MultiHash.from_data(b'foo').bytehexdigest()
 
 
 "Complex" usage, defining a swh hashlib instance first:
 
 - To compute length, integrate the length to the set of algorithms to
   compute, for example:
 
     h = MultiHash(hash_names=set({'length'}).union(DEFAULT_ALGORITHMS))
     with open(filepath, 'rb') as f:
         h.update(f.read(HASH_BLOCK_SIZE))
     hashes = h.digest()  # returns a dict of {hash_algo_name: hash_in_bytes}
 
     for chunk in
   # then use h as you would
 
 - Write alongside computing hashing algorithms (from a stream), example:
 
     h = MultiHash(length=length)
     with open(filepath, 'wb') as f:
         for chunk in r.iter_content():  # r a stream of sort
             h.update(chunk)
             f.write(chunk)
     hashes = h.hexdigest()  # returns a dict of {hash_algo_name: hash_in_hex}
 
     Note: Prior to this, we would have to use chunk_cb (cf. hash_file,
           hash_path)
 
 
 This module also defines the following (deprecated) hashing functions:
 
 - hash_file: Hash the contents of the given file object with the given
   algorithms (defaulting to DEFAULT_ALGORITHMS if none provided).
 
 - hash_data: Hash the given binary blob with the given algorithms
   (defaulting to DEFAULT_ALGORITHMS if none provided).
 
 - hash_path: Hash the contents of the file at the given path with the
   given algorithms (defaulting to DEFAULT_ALGORITHMS if none
   provided).
 
 """
 
 import binascii
 import functools
 import hashlib
 import os
 
 from io import BytesIO
 
 ALGORITHMS = set(['sha1', 'sha256', 'sha1_git', 'blake2s256', 'blake2b512'])
 """Hashing algorithms supported by this module"""
 
 DEFAULT_ALGORITHMS = set(['sha1', 'sha256', 'sha1_git', 'blake2s256'])
 """Algorithms computed by default when calling the functions from this module.
 
 Subset of :const:`ALGORITHMS`.
 """
 
 HASH_BLOCK_SIZE = 32768
 """Block size for streaming hash computations made in this module"""
 
 _blake2_hash_cache = {}
 
 
 class MultiHash:
     """Hashutil class to support multiple hashes computation.
 
     Args:
 
         hash_names (set): Set of hash algorithms (+ optionally length)
                           to compute hashes (cf. DEFAULT_ALGORITHMS)
         length (int): Length of the total sum of chunks to read
 
     If the length is provided as algorithm, the length is also
     computed and returned.
 
     """
     def __init__(self, hash_names=DEFAULT_ALGORITHMS, length=None):
         self.state = {}
         self.track_length = False
         for name in hash_names:
             if name == 'length':
                 self.state['length'] = 0
                 self.track_length = True
             else:
                 self.state[name] = _new_hash(name, length)
 
     @classmethod
     def from_state(cls, state, track_length):
         ret = cls([])
         ret.state = state
         ret.track_length = track_length
 
     @classmethod
-    def from_file(cls, file, hash_names=DEFAULT_ALGORITHMS, length=None):
+    def from_file(cls, fobj, hash_names=DEFAULT_ALGORITHMS, length=None):
         ret = cls(length=length, hash_names=hash_names)
-        for chunk in file:
+        while True:
+            chunk = fobj.read(HASH_BLOCK_SIZE)
+            if not chunk:
+                break
             ret.update(chunk)
         return ret
 
     @classmethod
     def from_path(cls, path, hash_names=DEFAULT_ALGORITHMS, length=None,
                   track_length=True):
         if not length:
             length = os.path.getsize(path)
         with open(path, 'rb') as f:
             ret = cls.from_file(f, hash_names=hash_names, length=length)
         # For compatibility reason with `hash_path`
         if track_length:
             ret.state['length'] = length
         return ret
 
     @classmethod
     def from_data(cls, data, hash_names=DEFAULT_ALGORITHMS, length=None):
         if not length:
             length = len(data)
         fobj = BytesIO(data)
         return cls.from_file(fobj, hash_names=hash_names, length=length)
 
     def update(self, chunk):
         for name, h in self.state.items():
             if name == 'length':
                 continue
             h.update(chunk)
         if self.track_length:
             self.state['length'] += len(chunk)
 
     def digest(self):
         return {
             name: h.digest() if name != 'length' else h
             for name, h in self.state.items()
         }
 
     def hexdigest(self):
         return {
             name: h.hexdigest() if name != 'length' else h
             for name, h in self.state.items()
         }
 
     def bytehexdigest(self):
         return {
             name: hash_to_bytehex(h.digest()) if name != 'length' else h
             for name, h in self.state.items()
         }
 
     def copy(self):
         copied_state = {
             name: h.copy() if name != 'length' else h
             for name, h in self.state.items()
         }
         return self.from_state(copied_state, self.track_length)
 
 
 def _new_blake2_hash(algo):
     """Return a function that initializes a blake2 hash.
 
     """
     if algo in _blake2_hash_cache:
         return _blake2_hash_cache[algo]()
 
     lalgo = algo.lower()
     if not lalgo.startswith('blake2'):
         raise ValueError('Algorithm %s is not a blake2 hash' % algo)
 
     blake_family = lalgo[:7]
 
     digest_size = None
     if lalgo[7:]:
         try:
             digest_size, remainder = divmod(int(lalgo[7:]), 8)
         except ValueError:
             raise ValueError(
                 'Unknown digest size for algo %s' % algo
             ) from None
         if remainder:
             raise ValueError(
                 'Digest size for algorithm %s must be a multiple of 8' % algo
             )
 
     if lalgo in hashlib.algorithms_available:
         # Handle the case where OpenSSL ships the given algorithm
         # (e.g. Python 3.5 on Debian 9 stretch)
         _blake2_hash_cache[algo] = lambda: hashlib.new(lalgo)
     else:
         # Try using the built-in implementation for Python 3.6+
         if blake_family in hashlib.algorithms_available:
             blake2 = getattr(hashlib, blake_family)
         else:
             import pyblake2
             blake2 = getattr(pyblake2, blake_family)
 
         _blake2_hash_cache[algo] = lambda: blake2(digest_size=digest_size)
 
     return _blake2_hash_cache[algo]()
 
 
 def _new_hashlib_hash(algo):
     """Initialize a digest object from hashlib.
 
     Handle the swh-specific names for the blake2-related algorithms
     """
     if algo.startswith('blake2'):
         return _new_blake2_hash(algo)
     else:
         return hashlib.new(algo)
 
 
 def _new_git_hash(base_algo, git_type, length):
     """Initialize a digest object (as returned by python's hashlib) for the
     requested algorithm, and feed it with the header for a git object of the
     given type and length.
 
     The header for hashing a git object consists of:
      - The type of the object (encoded in ASCII)
      - One ASCII space (\x20)
      - The length of the object (decimal encoded in ASCII)
      - One NUL byte
 
     Args:
         base_algo (str from :const:`ALGORITHMS`): a hashlib-supported algorithm
         git_type: the type of the git object (supposedly one of 'blob',
                   'commit', 'tag', 'tree')
         length: the length of the git object you're encoding
 
     Returns:
         a hashutil.hash object
     """
 
     h = _new_hashlib_hash(base_algo)
     git_header = '%s %d\0' % (git_type, length)
     h.update(git_header.encode('ascii'))
 
     return h
 
 
 def _new_hash(algo, length=None):
     """Initialize a digest object (as returned by python's hashlib) for
     the requested algorithm. See the constant ALGORITHMS for the list
     of supported algorithms. If a git-specific hashing algorithm is
     requested (e.g., "sha1_git"), the hashing object will be pre-fed
     with the needed header; for this to work, length must be given.
 
     Args:
         algo (str): a hashing algorithm (one of ALGORITHMS)
         length (int): the length of the hashed payload (needed for
           git-specific algorithms)
 
     Returns:
         a hashutil.hash object
 
     Raises:
         ValueError if algo is unknown, or length is missing for a git-specific
         hash.
 
     """
     if algo not in ALGORITHMS:
         raise ValueError(
             'Unexpected hashing algorithm %s, expected one of %s' %
             (algo, ', '.join(sorted(ALGORITHMS))))
 
     if algo.endswith('_git'):
         if length is None:
             raise ValueError('Missing length for git hashing algorithm')
         base_algo = algo[:-4]
         return _new_git_hash(base_algo, 'blob', length)
 
     return _new_hashlib_hash(algo)
 
 
 def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS,
               chunk_cb=None):
     """(Deprecated) cf. MultiHash.from_file
 
     Hash the contents of the given file object with the given algorithms.
 
     Args:
         fobj: a file-like object
         length (int): the length of the contents of the file-like
                       object (for the git-specific algorithms)
         algorithms (set): the hashing algorithms to be used, as an
                           iterable over strings
         chunk_cb (fun): a callback function taking a chunk of data as
                         parameter
 
     Returns:
         a dict mapping each algorithm to a digest (bytes by default).
 
     Raises:
         ValueError if algorithms contains an unknown hash algorithm.
 
     """
     h = MultiHash(algorithms, length)
     while True:
         chunk = fobj.read(HASH_BLOCK_SIZE)
         if not chunk:
             break
         h.update(chunk)
         if chunk_cb:
             chunk_cb(chunk)
 
     return h.digest()
 
 
 def hash_path(path, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None):
     """(deprecated) cf. MultiHash.from_path
 
     Hash the contents of the file at the given path with the given
     algorithms.
 
     Args:
         path (str): the path of the file to hash
         algorithms (set): the hashing algorithms used
         chunk_cb (fun): a callback function taking a chunk of data as parameter
 
     Returns: a dict mapping each algorithm to a bytes digest.
 
     Raises:
         ValueError if algorithms contains an unknown hash algorithm.
         OSError on file access error
 
     """
     length = os.path.getsize(path)
     with open(path, 'rb') as fobj:
         hashes = hash_file(fobj, length, algorithms, chunk_cb=chunk_cb)
     hashes['length'] = length
     return hashes
 
 
 def hash_data(data, algorithms=DEFAULT_ALGORITHMS):
     """(deprecated) cf. MultiHash.from_data
 
     Hash the given binary blob with the given algorithms.
 
     Args:
         data (bytes): raw content to hash
         algorithms (set): the hashing algorithms used
 
     Returns: a dict mapping each algorithm to a bytes digest
 
     Raises:
         TypeError if data does not support the buffer interface.
         ValueError if algorithms contains an unknown hash algorithm.
 
     """
     return MultiHash.from_data(data, hash_names=algorithms).digest()
 
 
 def hash_git_data(data, git_type, base_algo='sha1'):
     """Hash the given data as a git object of type git_type.
 
     Args:
         data: a bytes object
         git_type: the git object type
         base_algo: the base hashing algorithm used (default: sha1)
 
     Returns: a dict mapping each algorithm to a bytes digest
 
     Raises:
         ValueError if the git_type is unexpected.
     """
 
     git_object_types = {'blob', 'tree', 'commit', 'tag', 'snapshot'}
 
     if git_type not in git_object_types:
         raise ValueError('Unexpected git object type %s, expected one of %s' %
                          (git_type, ', '.join(sorted(git_object_types))))
 
     h = _new_git_hash(base_algo, git_type, len(data))
     h.update(data)
 
     return h.digest()
 
 
 @functools.lru_cache()
 def hash_to_hex(hash):
     """Converts a hash (in hex or bytes form) to its hexadecimal ascii form
 
     Args:
       hash (str or bytes): a :class:`bytes` hash or a :class:`str` containing
         the hexadecimal form of the hash
 
     Returns:
       str: the hexadecimal form of the hash
     """
     if isinstance(hash, str):
         return hash
     return binascii.hexlify(hash).decode('ascii')
 
 
 @functools.lru_cache()
 def hash_to_bytehex(hash):
     """Converts a hash to its hexadecimal bytes representation
 
     Args:
       hash (bytes): a :class:`bytes` hash
 
     Returns:
       bytes: the hexadecimal form of the hash, as :class:`bytes`
     """
     return binascii.hexlify(hash)
 
 
 @functools.lru_cache()
 def hash_to_bytes(hash):
     """Converts a hash (in hex or bytes form) to its raw bytes form
 
     Args:
       hash (str or bytes): a :class:`bytes` hash or a :class:`str` containing
         the hexadecimal form of the hash
 
     Returns:
       bytes: the :class:`bytes` form of the hash
     """
     if isinstance(hash, bytes):
         return hash
     return bytes.fromhex(hash)
 
 
 @functools.lru_cache()
 def bytehex_to_hash(hex):
     """Converts a hexadecimal bytes representation of a hash to that hash
 
     Args:
       hash (bytes): a :class:`bytes` containing the hexadecimal form of the
         hash encoded in ascii
 
     Returns:
       bytes: the :class:`bytes` form of the hash
     """
     return hash_to_bytes(hex.decode())