diff --git a/PKG-INFO b/PKG-INFO
index 57272cb..f3f8bd2 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,75 +1,75 @@
 Metadata-Version: 2.1
 Name: swh.indexer
-Version: 0.8.0
+Version: 0.8.1
 Summary: Software Heritage Content Indexer
 Home-page: https://forge.softwareheritage.org/diffusion/78/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-indexer
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-indexer/
 Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: testing
 License-File: LICENSE
 License-File: AUTHORS
 
 swh-indexer
 ============
 
 Tools to compute multiple indexes on SWH's raw contents:
 - content:
   - mimetype
   - ctags
   - language
   - fossology-license
   - metadata
 - revision:
   - metadata
 
 An indexer is in charge of:
 - looking up objects
 - extracting information from those objects
 - store those information in the swh-indexer db
 
 There are multiple indexers working on different object types:
   - content indexer: works with content sha1 hashes
   - revision indexer: works with revision sha1 hashes
   - origin indexer: works with origin identifiers
 
 Indexation procedure:
 - receive batch of ids
 - retrieve the associated data depending on object type
 - compute for that object some index
 - store the result to swh's storage
 
 Current content indexers:
 
 - mimetype (queue swh_indexer_content_mimetype): detect the encoding
   and mimetype
 
 - language (queue swh_indexer_content_language): detect the
   programming language
 
 - ctags (queue swh_indexer_content_ctags): compute tags information
 
 - fossology-license (queue swh_indexer_fossology_license): compute the
   license
 
 - metadata: translate file into translated_metadata dict
 
 Current revision indexers:
 
 - metadata: detects files containing metadata and retrieves translated_metadata
   in content_metadata table in storage or run content indexer to translate
   files.
 
 
diff --git a/requirements-test.txt b/requirements-test.txt
index c1f90dc..c343e8f 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,6 +1,9 @@
 confluent-kafka
+hypothesis >= 3.11.0
 pytest
 pytest-mock
-hypothesis>=3.11.0
 swh.scheduler[testing] >= 0.5.0
 swh.storage[testing] >= 0.10.0
+
+types-click
+types-pyyaml
diff --git a/requirements.txt b/requirements.txt
index bde0b88..a94d3ff 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,9 @@
-click
 python-magic >= 0.4.13
+click
+# frozendict: dependency of pyld
+# the version 2.1.2 is causing segmentation faults
+# cf T3815
+frozendict < 2.1.2
 pyld
 xmltodict
 typing-extensions
diff --git a/swh.indexer.egg-info/PKG-INFO b/swh.indexer.egg-info/PKG-INFO
index 57272cb..f3f8bd2 100644
--- a/swh.indexer.egg-info/PKG-INFO
+++ b/swh.indexer.egg-info/PKG-INFO
@@ -1,75 +1,75 @@
 Metadata-Version: 2.1
 Name: swh.indexer
-Version: 0.8.0
+Version: 0.8.1
 Summary: Software Heritage Content Indexer
 Home-page: https://forge.softwareheritage.org/diffusion/78/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-indexer
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-indexer/
 Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: testing
 License-File: LICENSE
 License-File: AUTHORS
 
 swh-indexer
 ============
 
 Tools to compute multiple indexes on SWH's raw contents:
 - content:
   - mimetype
   - ctags
   - language
   - fossology-license
   - metadata
 - revision:
   - metadata
 
 An indexer is in charge of:
 - looking up objects
 - extracting information from those objects
 - store those information in the swh-indexer db
 
 There are multiple indexers working on different object types:
   - content indexer: works with content sha1 hashes
   - revision indexer: works with revision sha1 hashes
   - origin indexer: works with origin identifiers
 
 Indexation procedure:
 - receive batch of ids
 - retrieve the associated data depending on object type
 - compute for that object some index
 - store the result to swh's storage
 
 Current content indexers:
 
 - mimetype (queue swh_indexer_content_mimetype): detect the encoding
   and mimetype
 
 - language (queue swh_indexer_content_language): detect the
   programming language
 
 - ctags (queue swh_indexer_content_ctags): compute tags information
 
 - fossology-license (queue swh_indexer_fossology_license): compute the
   license
 
 - metadata: translate file into translated_metadata dict
 
 Current revision indexers:
 
 - metadata: detects files containing metadata and retrieves translated_metadata
   in content_metadata table in storage or run content indexer to translate
   files.
 
 
diff --git a/swh.indexer.egg-info/requires.txt b/swh.indexer.egg-info/requires.txt
index 48a5c82..33fc812 100644
--- a/swh.indexer.egg-info/requires.txt
+++ b/swh.indexer.egg-info/requires.txt
@@ -1,19 +1,22 @@
-click
 python-magic>=0.4.13
+click
+frozendict<2.1.2
 pyld
 xmltodict
 typing-extensions
 swh.core[db,http]>=0.14.0
 swh.model>=0.0.15
 swh.objstorage>=0.2.2
 swh.scheduler>=0.5.2
 swh.storage>=0.22.0
 swh.journal>=0.1.0
 
 [testing]
 confluent-kafka
+hypothesis>=3.11.0
 pytest
 pytest-mock
-hypothesis>=3.11.0
 swh.scheduler[testing]>=0.5.0
 swh.storage[testing]>=0.10.0
+types-click
+types-pyyaml
diff --git a/swh/__init__.py b/swh/__init__.py
index f14e196..b36383a 100644
--- a/swh/__init__.py
+++ b/swh/__init__.py
@@ -1,4 +1,3 @@
 from pkgutil import extend_path
-from typing import Iterable
 
-__path__ = extend_path(__path__, __name__)  # type: Iterable[str]
+__path__ = extend_path(__path__, __name__)
diff --git a/swh/indexer/origin_head.py b/swh/indexer/origin_head.py
index 61c495a..fec42c8 100644
--- a/swh/indexer/origin_head.py
+++ b/swh/indexer/origin_head.py
@@ -1,154 +1,154 @@
 # Copyright (C) 2018-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import logging
 import re
 from typing import Any, Dict, List, Tuple, Union
 
 import click
 
 from swh.indexer.indexer import OriginIndexer
 from swh.model.model import SnapshotBranch, TargetType
 from swh.storage.algos.origin import origin_get_latest_visit_status
 from swh.storage.algos.snapshot import snapshot_get_all_branches
 
 
 class OriginHeadIndexer(OriginIndexer[Dict]):
     """Origin-level indexer.
 
     This indexer is in charge of looking up the revision that acts as the
     "head" of an origin.
 
     In git, this is usually the commit pointed to by the 'master' branch."""
 
     USE_TOOLS = False
 
     def persist_index_computations(self, results: Any) -> Dict[str, int]:
         """Do nothing. The indexer's results are not persistent, they
         should only be piped to another indexer."""
         return {}
 
     # Dispatch
 
     def index(self, id: str, data: None = None, **kwargs) -> List[Dict]:
         origin_url = id
         visit_status = origin_get_latest_visit_status(
             self.storage, origin_url, allowed_statuses=["full"], require_snapshot=True
         )
         if not visit_status:
             return []
         assert visit_status.snapshot is not None
         snapshot = snapshot_get_all_branches(self.storage, visit_status.snapshot)
         if snapshot is None:
             return []
         method = getattr(
             self, "_try_get_%s_head" % visit_status.type, self._try_get_head_generic
         )
 
-        rev_id = method(snapshot.branches)
+        rev_id = method(snapshot.branches)  # type: ignore
         if rev_id is not None:
             return [{"origin_url": origin_url, "revision_id": rev_id,}]
 
         # could not find a head revision
         return []
 
     # Tarballs
 
     _archive_filename_re = re.compile(
         rb"^"
         rb"(?P<pkgname>.*)[-_]"
         rb"(?P<version>[0-9]+(\.[0-9])*)"
         rb"(?P<preversion>[-+][a-zA-Z0-9.~]+?)?"
         rb"(?P<extension>(\.[a-zA-Z0-9]+)+)"
         rb"$"
     )
 
     @classmethod
     def _parse_version(cls: Any, filename: bytes) -> Tuple[Union[float, int], ...]:
         """Extracts the release version from an archive filename,
         to get an ordering whose maximum is likely to be the last
         version of the software
 
         >>> OriginHeadIndexer._parse_version(b'foo')
         (-inf,)
         >>> OriginHeadIndexer._parse_version(b'foo.tar.gz')
         (-inf,)
         >>> OriginHeadIndexer._parse_version(b'gnu-hello-0.0.1.tar.gz')
         (0, 0, 1, 0)
         >>> OriginHeadIndexer._parse_version(b'gnu-hello-0.0.1-beta2.tar.gz')
         (0, 0, 1, -1, 'beta2')
         >>> OriginHeadIndexer._parse_version(b'gnu-hello-0.0.1+foobar.tar.gz')
         (0, 0, 1, 1, 'foobar')
         """
         res = cls._archive_filename_re.match(filename)
         if res is None:
             return (float("-infinity"),)
         version = [int(n) for n in res.group("version").decode().split(".")]
         if res.group("preversion") is None:
             version.append(0)
         else:
             preversion = res.group("preversion").decode()
             if preversion.startswith("-"):
                 version.append(-1)
                 version.append(preversion[1:])
             elif preversion.startswith("+"):
                 version.append(1)
                 version.append(preversion[1:])
             else:
                 assert False, res.group("preversion")
         return tuple(version)
 
     def _try_get_ftp_head(self, branches: Dict[bytes, SnapshotBranch]) -> Any:
         archive_names = list(branches)
         max_archive_name = max(archive_names, key=self._parse_version)
         r = self._try_resolve_target(branches, max_archive_name)
         return r
 
     # Generic
 
     def _try_get_head_generic(self, branches: Dict[bytes, SnapshotBranch]) -> Any:
         # Works on 'deposit', 'pypi', and VCSs.
         return self._try_resolve_target(branches, b"HEAD") or self._try_resolve_target(
             branches, b"master"
         )
 
     def _try_resolve_target(
         self, branches: Dict[bytes, SnapshotBranch], branch_name: bytes
     ) -> Any:
         try:
             branch = branches[branch_name]
             if branch is None:
                 return None
             while branch.target_type == TargetType.ALIAS:
                 branch = branches[branch.target]
                 if branch is None:
                     return None
 
             if branch.target_type == TargetType.REVISION:
                 return branch.target
             elif branch.target_type == TargetType.CONTENT:
                 return None  # TODO
             elif branch.target_type == TargetType.DIRECTORY:
                 return None  # TODO
             elif branch.target_type == TargetType.RELEASE:
                 return None  # TODO
             else:
                 assert False, branch
         except KeyError:
             return None
 
 
 @click.command()
 @click.option(
     "--origins", "-i", help='Origins to lookup, in the "type+url" format', multiple=True
 )
 def main(origins: List[str]) -> None:
     rev_metadata_indexer = OriginHeadIndexer()
     rev_metadata_indexer.run(origins)
 
 
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO)
     main()
diff --git a/swh/indexer/storage/model.py b/swh/indexer/storage/model.py
index 3eace16..ad3cbed 100644
--- a/swh/indexer/storage/model.py
+++ b/swh/indexer/storage/model.py
@@ -1,138 +1,138 @@
 # Copyright (C) 2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 """Classes used internally by the in-memory idx-storage, and will be
 used for the interface of the idx-storage in the near future."""
 
 from __future__ import annotations
 
 from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar
 
 import attr
 from typing_extensions import Final
 
 from swh.model.model import Sha1Git, dictify
 
 TSelf = TypeVar("TSelf")
 
 
 @attr.s
 class BaseRow:
     UNIQUE_KEY_FIELDS: Tuple = ("id", "indexer_configuration_id")
 
     id = attr.ib(type=Any)
     indexer_configuration_id = attr.ib(type=Optional[int], default=None, kw_only=True)
     tool = attr.ib(type=Optional[Dict], default=None, kw_only=True)
 
     def __attrs_post_init__(self):
         if self.indexer_configuration_id is None and self.tool is None:
             raise TypeError("Either indexer_configuration_id or tool must be not None.")
         if self.indexer_configuration_id is not None and self.tool is not None:
             raise TypeError(
                 "indexer_configuration_id and tool are mutually exclusive; "
                 "only one may be not None."
             )
 
     def anonymize(self: TSelf) -> Optional[TSelf]:
         # Needed to implement swh.journal.writer.ValueProtocol
         return None
 
     def to_dict(self) -> Dict[str, Any]:
         """Wrapper of `attr.asdict` that can be overridden by subclasses
         that have special handling of some of the fields."""
         d = dictify(attr.asdict(self, recurse=False))
         if d["indexer_configuration_id"] is None:
             del d["indexer_configuration_id"]
         if d["tool"] is None:
             del d["tool"]
 
         return d
 
     @classmethod
     def from_dict(cls: Type[TSelf], d) -> TSelf:
-        return cls(**d)  # type: ignore
+        return cls(**d)
 
     def unique_key(self) -> Dict:
         obj = self
 
         # tool["id"] and obj.indexer_configuration_id are the same value, but
         # only one of them is set for any given object
         if obj.indexer_configuration_id is None:
             assert obj.tool  # constructors ensures tool XOR indexer_configuration_id
             obj = attr.evolve(obj, indexer_configuration_id=obj.tool["id"], tool=None)
 
         return {key: getattr(obj, key) for key in self.UNIQUE_KEY_FIELDS}
 
 
 @attr.s
 class ContentMimetypeRow(BaseRow):
     object_type: Final = "content_mimetype"
 
     id = attr.ib(type=Sha1Git)
     mimetype = attr.ib(type=str)
     encoding = attr.ib(type=str)
 
 
 @attr.s
 class ContentLanguageRow(BaseRow):
     object_type: Final = "content_language"
 
     id = attr.ib(type=Sha1Git)
     lang = attr.ib(type=str)
 
 
 @attr.s
 class ContentCtagsRow(BaseRow):
     object_type: Final = "content_ctags"
     UNIQUE_KEY_FIELDS = (
         "id",
         "indexer_configuration_id",
         "name",
         "kind",
         "line",
         "lang",
     )
 
     id = attr.ib(type=Sha1Git)
     name = attr.ib(type=str)
     kind = attr.ib(type=str)
     line = attr.ib(type=int)
     lang = attr.ib(type=str)
 
 
 @attr.s
 class ContentLicenseRow(BaseRow):
     object_type: Final = "content_fossology_license"
     UNIQUE_KEY_FIELDS = ("id", "indexer_configuration_id", "license")
 
     id = attr.ib(type=Sha1Git)
     license = attr.ib(type=str)
 
 
 @attr.s
 class ContentMetadataRow(BaseRow):
     object_type: Final = "content_metadata"
 
     id = attr.ib(type=Sha1Git)
     metadata = attr.ib(type=Dict[str, Any])
 
 
 @attr.s
 class RevisionIntrinsicMetadataRow(BaseRow):
     object_type: Final = "revision_intrinsic_metadata"
 
     id = attr.ib(type=Sha1Git)
     metadata = attr.ib(type=Dict[str, Any])
     mappings = attr.ib(type=List[str])
 
 
 @attr.s
 class OriginIntrinsicMetadataRow(BaseRow):
     object_type: Final = "origin_intrinsic_metadata"
 
     id = attr.ib(type=str)
     metadata = attr.ib(type=Dict[str, Any])
     from_revision = attr.ib(type=Sha1Git)
     mappings = attr.ib(type=List[str])
diff --git a/tox.ini b/tox.ini
index 3afda02..0af6d83 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,78 +1,78 @@
 [tox]
 envlist=black,flake8,mypy,py3
 
 [testenv]
 extras =
   testing
 deps =
   pytest-cov
   swh-scheduler[testing] >= 0.5.0
   swh-storage[testing] >= 0.10.0
   dev: pdbpp
 commands =
   pytest --doctest-modules \
   !slow: --hypothesis-profile=fast \
   slow:  --hypothesis-profile=slow \
          {envsitepackagesdir}/swh/indexer \
          --cov={envsitepackagesdir}/swh/indexer \
          --cov-branch {posargs}
 
 [testenv:black]
 skip_install = true
 deps =
   black==19.10b0
 commands =
   {envpython} -m black --check swh
 
 [testenv:flake8]
 skip_install = true
 deps =
   flake8
 commands =
   {envpython} -m flake8
 
 [testenv:mypy]
 extras =
   testing
 deps =
-  mypy
+  mypy==0.920
 commands =
   mypy swh
 
 # build documentation outside swh-environment using the current
 # git HEAD of swh-docs, is executed on CI for each diff to prevent
 # breaking doc build
 [testenv:sphinx]
 whitelist_externals = make
 usedevelop = true
 extras =
   testing
 deps =
   # fetch and install swh-docs in develop mode
   -e git+https://forge.softwareheritage.org/source/swh-docs#egg=swh.docs
 
 setenv =
   SWH_PACKAGE_DOC_TOX_BUILD = 1
   # turn warnings into errors
   SPHINXOPTS = -W
 commands =
   make -I ../.tox/sphinx/src/swh-docs/swh/ -C docs
 
 
 # build documentation only inside swh-environment using local state
 # of swh-docs package
 [testenv:sphinx-dev]
 whitelist_externals = make
 usedevelop = true
 extras =
   testing
 deps =
   # install swh-docs in develop mode
   -e ../swh-docs
 
 setenv =
   SWH_PACKAGE_DOC_TOX_BUILD = 1
   # turn warnings into errors
   SPHINXOPTS = -W
 commands =
   make -I ../.tox/sphinx-dev/src/swh-docs/swh/ -C docs