diff --git a/swh/loader/git/converters.py b/swh/loader/git/converters.py
index cb176ca..ea9ccf3 100644
--- a/swh/loader/git/converters.py
+++ b/swh/loader/git/converters.py
@@ -1,329 +1,330 @@
 # Copyright (C) 2015-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 """Convert dulwich objects to dictionaries suitable for swh.storage"""
 
 import logging
 import re
 from typing import Any, Dict, Optional, cast
 
 import attr
 from dulwich.objects import Blob, Commit, ShaFile, Tag, Tree, _parse_message
 
 from swh.model.hashutil import (
     DEFAULT_ALGORITHMS,
     MultiHash,
     git_object_header,
     hash_to_bytes,
     hash_to_hex,
 )
 from swh.model.model import (
     BaseContent,
     Content,
     Directory,
     DirectoryEntry,
     HashableObject,
     ObjectType,
     Person,
     Release,
     Revision,
     RevisionType,
     SkippedContent,
     TargetType,
     Timestamp,
     TimestampWithTimezone,
 )
 
 COMMIT_MODE_MASK = 0o160000
 """Mode/perms of tree entries that point to a commit.
 They are normally equal to this mask, but may have more bits set to 1."""
 TREE_MODE_MASK = 0o040000
 """Mode/perms of tree entries that point to a tree.
 They are normally equal to this mask, but may have more bits set to 1."""
 
 AUTHORSHIP_LINE_RE = re.compile(rb"^.*> (?P<timestamp>\S+) (?P<timezone>\S+)$")
 
 logger = logging.getLogger(__name__)
 
 
 class HashMismatch(Exception):
     pass
 
 
 def check_id(obj: HashableObject) -> None:
     real_id = obj.compute_hash()
     if obj.id != real_id:
         raise HashMismatch(
             f"Expected {type(obj).__name__} hash to be {obj.id.hex()}, "
             f"got {real_id.hex()}"
         )
 
 
 def dulwich_blob_to_content_id(obj: ShaFile) -> Dict[str, Any]:
     """Convert a dulwich blob to a Software Heritage content id"""
     if obj.type_name != b"blob":
         raise ValueError("Argument is not a blob.")
     blob = cast(Blob, obj)
 
     size = blob.raw_length()
     data = blob.as_raw_string()
     hashes = MultiHash.from_data(data, DEFAULT_ALGORITHMS).digest()
     if hashes["sha1_git"] != blob.sha().digest():
         raise HashMismatch(
             f"Expected Content hash to be {blob.sha().digest().hex()}, "
             f"got {hashes['sha1_git'].hex()}"
         )
     hashes["length"] = size
     return hashes
 
 
 def dulwich_blob_to_content(obj: ShaFile, max_content_size=None) -> BaseContent:
     """Convert a dulwich blob to a Software Heritage content"""
     if obj.type_name != b"blob":
         raise ValueError("Argument is not a blob.")
     blob = cast(Blob, obj)
 
     hashes = dulwich_blob_to_content_id(blob)
     if max_content_size is not None and hashes["length"] >= max_content_size:
         return SkippedContent(
             status="absent",
             reason="Content too large",
             **hashes,
         )
     else:
         return Content(
             data=blob.as_raw_string(),
             status="visible",
             **hashes,
         )
 
 
 def dulwich_tree_to_directory(obj: ShaFile) -> Directory:
     """Format a tree as a directory"""
     if obj.type_name != b"tree":
         raise ValueError("Argument is not a tree.")
     tree = cast(Tree, obj)
 
     entries = []
 
     for entry in tree.iteritems():
         if entry.mode & COMMIT_MODE_MASK == COMMIT_MODE_MASK:
             type_ = "rev"
         elif entry.mode & TREE_MODE_MASK == TREE_MODE_MASK:
             type_ = "dir"
         else:
             type_ = "file"
 
         entries.append(
             DirectoryEntry(
                 type=type_,
                 perms=entry.mode,
                 name=entry.path.replace(
                     b"/", b"_"
                 ),  # '/' is very rare, and invalid in SWH.
                 target=hash_to_bytes(entry.sha.decode("ascii")),
             )
         )
 
     dir_ = Directory(
         id=tree.sha().digest(),
         entries=tuple(entries),
     )
 
     if dir_.compute_hash() != dir_.id:
         expected_id = dir_.id
         actual_id = dir_.compute_hash()
         logger.warning(
             "Expected directory to have id %s, but got %s. Recording raw_manifest.",
             hash_to_hex(expected_id),
             hash_to_hex(actual_id),
         )
         raw_string = tree.as_raw_string()
         dir_ = attr.evolve(
             dir_, raw_manifest=git_object_header("tree", len(raw_string)) + raw_string
         )
 
     check_id(dir_)
     return dir_
 
 
 def parse_author(name_email: bytes) -> Person:
     """Parse an author line"""
     return Person.from_fullname(name_email)
 
 
 def dulwich_tsinfo_to_timestamp(
     timestamp,
     timezone: int,
     timezone_neg_utc: bool,
     timezone_bytes: Optional[bytes],
 ) -> TimestampWithTimezone:
     """Convert the dulwich timestamp information to a structure compatible with
     Software Heritage."""
     ts = Timestamp(
         seconds=int(timestamp),
         microseconds=0,
     )
     if timezone_bytes is None:
         # Failed to parse from the raw manifest, fallback to what Dulwich managed to
         # parse.
         return TimestampWithTimezone.from_numeric_offset(
             timestamp=ts,
             offset=timezone // 60,
             negative_utc=timezone_neg_utc,
         )
     else:
         return TimestampWithTimezone(timestamp=ts, offset_bytes=timezone_bytes)
 
 
 def dulwich_commit_to_revision(obj: ShaFile) -> Revision:
     if obj.type_name != b"commit":
         raise ValueError("Argument is not a commit.")
     commit = cast(Commit, obj)
 
     author_timezone = None
     committer_timezone = None
+    assert commit._chunked_text is not None  # to keep mypy happy
     for (field, value) in _parse_message(commit._chunked_text):
         if field == b"author":
             m = AUTHORSHIP_LINE_RE.match(value)
             if m:
                 author_timezone = m.group("timezone")
         elif field == b"committer":
             m = AUTHORSHIP_LINE_RE.match(value)
             if m:
                 committer_timezone = m.group("timezone")
 
     extra_headers = []
     if commit.encoding is not None:
         extra_headers.append((b"encoding", commit.encoding))
     if commit.mergetag:
         for mergetag in commit.mergetag:
             raw_string = mergetag.as_raw_string()
             assert raw_string.endswith(b"\n")
             extra_headers.append((b"mergetag", raw_string[:-1]))
 
     if commit.extra:
         extra_headers.extend((k, v) for k, v in commit.extra)
 
     if commit.gpgsig:
         extra_headers.append((b"gpgsig", commit.gpgsig))
 
     rev = Revision(
         id=commit.sha().digest(),
         author=parse_author(commit.author),
         date=dulwich_tsinfo_to_timestamp(
             commit.author_time,
             commit.author_timezone,
             commit._author_timezone_neg_utc,
             author_timezone,
         ),
         committer=parse_author(commit.committer),
         committer_date=dulwich_tsinfo_to_timestamp(
             commit.commit_time,
             commit.commit_timezone,
             commit._commit_timezone_neg_utc,
             committer_timezone,
         ),
         type=RevisionType.GIT,
         directory=bytes.fromhex(commit.tree.decode()),
         message=commit.message,
         metadata=None,
         extra_headers=tuple(extra_headers),
         synthetic=False,
         parents=tuple(bytes.fromhex(p.decode()) for p in commit.parents),
     )
 
     if rev.compute_hash() != rev.id:
         expected_id = rev.id
         actual_id = rev.compute_hash()
         logger.warning(
             "Expected revision to have id %s, but got %s. Recording raw_manifest.",
             hash_to_hex(expected_id),
             hash_to_hex(actual_id),
         )
         raw_string = commit.as_raw_string()
         rev = attr.evolve(
             rev, raw_manifest=git_object_header("commit", len(raw_string)) + raw_string
         )
 
     check_id(rev)
     return rev
 
 
 DULWICH_TARGET_TYPES = {
     b"blob": TargetType.CONTENT,
     b"tree": TargetType.DIRECTORY,
     b"commit": TargetType.REVISION,
     b"tag": TargetType.RELEASE,
 }
 
 
 DULWICH_OBJECT_TYPES = {
     b"blob": ObjectType.CONTENT,
     b"tree": ObjectType.DIRECTORY,
     b"commit": ObjectType.REVISION,
     b"tag": ObjectType.RELEASE,
 }
 
 
 def dulwich_tag_to_release(obj: ShaFile) -> Release:
     if obj.type_name != b"tag":
         raise ValueError("Argument is not a tag.")
     tag = cast(Tag, obj)
 
     tagger_timezone = None
     # FIXME: _parse_message is a private function from Dulwich.
     for (field, value) in _parse_message(tag.as_raw_chunks()):
         if field == b"tagger":
             m = AUTHORSHIP_LINE_RE.match(value)
             if m:
                 tagger_timezone = m.group("timezone")
 
     target_type, target = tag.object
     if tag.tagger:
         author: Optional[Person] = parse_author(tag.tagger)
         if tag.tag_time is None:
             date = None
         else:
             date = dulwich_tsinfo_to_timestamp(
                 tag.tag_time,
                 tag.tag_timezone,
                 tag._tag_timezone_neg_utc,
                 tagger_timezone,
             )
     else:
         author = date = None
 
     message = tag.message
     if tag.signature:
         message += tag.signature
 
     rel = Release(
         id=tag.sha().digest(),
         author=author,
         date=date,
         name=tag.name,
         target=bytes.fromhex(target.decode()),
         target_type=DULWICH_OBJECT_TYPES[target_type.type_name],
         message=message,
         metadata=None,
         synthetic=False,
     )
 
     if rel.compute_hash() != rel.id:
         expected_id = rel.id
         actual_id = rel.compute_hash()
         logger.warning(
             "Expected release to have id %s, but got %s. Recording raw_manifest.",
             hash_to_hex(expected_id),
             hash_to_hex(actual_id),
         )
         raw_string = tag.as_raw_string()
         rel = attr.evolve(
             rel, raw_manifest=git_object_header("tag", len(raw_string)) + raw_string
         )
 
     check_id(rel)
     return rel
diff --git a/swh/loader/git/dumb.py b/swh/loader/git/dumb.py
index c34c19b..35826e9 100644
--- a/swh/loader/git/dumb.py
+++ b/swh/loader/git/dumb.py
@@ -1,204 +1,204 @@
 # Copyright (C) 2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from __future__ import annotations
 
 from collections import defaultdict
 import logging
 import stat
 import struct
 from tempfile import SpooledTemporaryFile
 from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Set, cast
 import urllib.parse
 
 from dulwich.errors import NotGitRepository
 from dulwich.objects import S_IFGITLINK, Commit, ShaFile, Tree
 from dulwich.pack import Pack, PackData, PackIndex, load_pack_index_file
 import requests
 
 from swh.loader.git.utils import HexBytes
 
 if TYPE_CHECKING:
     from .loader import RepoRepresentation
 
 logger = logging.getLogger(__name__)
 
 
 HEADERS = {"User-Agent": "Software Heritage dumb Git loader"}
 
 
 def check_protocol(repo_url: str) -> bool:
     """Checks if a git repository can be cloned using the dumb protocol.
 
     Args:
         repo_url: Base URL of a git repository
 
     Returns:
         Whether the dumb protocol is supported.
 
     """
     if not repo_url.startswith("http"):
         return False
     url = urllib.parse.urljoin(
         repo_url.rstrip("/") + "/", "info/refs?service=git-upload-pack/"
     )
     logger.debug("Fetching %s", url)
     response = requests.get(url, headers=HEADERS)
     content_type = response.headers.get("Content-Type")
     return (
         response.status_code
         in (
             200,
             304,
         )
         # header is not mandatory in protocol specification
         and (content_type is None or not content_type.startswith("application/x-git-"))
     )
 
 
 class GitObjectsFetcher:
     """Git objects fetcher using dumb HTTP protocol.
 
     Fetches a set of git objects for a repository according to its archival
     state by Software Heritage and provides iterators on them.
 
     Args:
         repo_url: Base URL of a git repository
         base_repo: State of repository archived by Software Heritage
     """
 
     def __init__(self, repo_url: str, base_repo: RepoRepresentation):
         self._session = requests.Session()
         self.repo_url = repo_url
         self.base_repo = base_repo
         self.objects: Dict[bytes, Set[bytes]] = defaultdict(set)
         self.refs = self._get_refs()
         self.head = self._get_head() if self.refs else {}
         self.packs = self._get_packs()
 
     def fetch_object_ids(self) -> None:
         """Fetches identifiers of git objects to load into the archive."""
         wants = self.base_repo.determine_wants(self.refs)
 
         # process refs
         commit_objects = []
         for ref in wants:
             ref_object = self._get_git_object(ref)
-            if ref_object.get_type() == Commit.type_num:
+            if ref_object.type_num == Commit.type_num:
                 commit_objects.append(cast(Commit, ref_object))
                 self.objects[b"commit"].add(ref)
             else:
                 self.objects[b"tag"].add(ref)
 
         # perform DFS on commits graph
         while commit_objects:
             commit = commit_objects.pop()
             # fetch tree and blob ids recursively
             self._fetch_tree_objects(commit.tree)
             for parent in commit.parents:
                 if (
                     # commit not already seen in the current load
                     parent not in self.objects[b"commit"]
                     # commit not already archived by a previous load
                     and parent not in self.base_repo.heads
                 ):
                     commit_objects.append(cast(Commit, self._get_git_object(parent)))
                     self.objects[b"commit"].add(parent)
 
     def iter_objects(self, object_type: bytes) -> Iterable[ShaFile]:
         """Returns a generator on fetched git objects per type.
 
         Args:
             object_type: Git object type, either b"blob", b"commit", b"tag" or b"tree"
 
         Returns:
             A generator fetching git objects on the fly.
         """
         return map(self._get_git_object, self.objects[object_type])
 
     def _http_get(self, path: str) -> SpooledTemporaryFile:
         url = urllib.parse.urljoin(self.repo_url.rstrip("/") + "/", path)
         logger.debug("Fetching %s", url)
         response = self._session.get(url, headers=HEADERS)
         buffer = SpooledTemporaryFile(max_size=100 * 1024 * 1024)
         for chunk in response.iter_content(chunk_size=10 * 1024 * 1024):
             buffer.write(chunk)
         buffer.flush()
         buffer.seek(0)
         return buffer
 
     def _get_refs(self) -> Dict[bytes, HexBytes]:
         refs = {}
         refs_resp_bytes = self._http_get("info/refs")
         for ref_line in refs_resp_bytes.readlines():
             ref_target, ref_name = ref_line.replace(b"\n", b"").split(b"\t")
             refs[ref_name] = ref_target
         return refs
 
     def _get_head(self) -> Dict[bytes, HexBytes]:
         head_resp_bytes = self._http_get("HEAD")
         _, head_target = head_resp_bytes.readline().replace(b"\n", b"").split(b" ")
         return {b"HEAD": head_target}
 
     def _get_pack_data(self, pack_name: str) -> Callable[[], PackData]:
         def _pack_data() -> PackData:
             pack_data_bytes = self._http_get(f"objects/pack/{pack_name}")
             return PackData(pack_name, file=pack_data_bytes)
 
         return _pack_data
 
     def _get_pack_idx(self, pack_idx_name: str) -> Callable[[], PackIndex]:
         def _pack_idx() -> PackIndex:
             pack_idx_bytes = self._http_get(f"objects/pack/{pack_idx_name}")
             return load_pack_index_file(pack_idx_name, pack_idx_bytes)
 
         return _pack_idx
 
     def _get_packs(self) -> List[Pack]:
         packs = []
         packs_info_bytes = self._http_get("objects/info/packs")
         packs_info = packs_info_bytes.read().decode()
         for pack_info in packs_info.split("\n"):
             if pack_info:
                 pack_name = pack_info.split(" ")[1]
                 pack_idx_name = pack_name.replace(".pack", ".idx")
                 # pack index and data file will be lazily fetched when required
                 packs.append(
                     Pack.from_lazy_objects(
                         self._get_pack_data(pack_name),
                         self._get_pack_idx(pack_idx_name),
                     )
                 )
         return packs
 
     def _get_git_object(self, sha: bytes) -> ShaFile:
         # try to get the object from a pack file first to avoid flooding
         # git server with numerous HTTP requests
         for pack in list(self.packs):
             try:
                 if sha in pack:
                     return pack[sha]
             except (NotGitRepository, struct.error):
                 # missing (dulwich http client raises NotGitRepository on 404)
                 # or invalid pack index/content, remove it from global packs list
                 logger.debug("A pack file is missing or its content is invalid")
                 self.packs.remove(pack)
         # fetch it from objects/ directory otherwise
         sha_hex = sha.decode()
         object_path = f"objects/{sha_hex[:2]}/{sha_hex[2:]}"
         return ShaFile.from_file(self._http_get(object_path))
 
     def _fetch_tree_objects(self, sha: bytes) -> None:
         if sha not in self.objects[b"tree"]:
             tree = cast(Tree, self._get_git_object(sha))
             self.objects[b"tree"].add(sha)
             for item in tree.items():
                 if item.mode == S_IFGITLINK:
                     # skip submodules as objects are not stored in repository
                     continue
                 if item.mode & stat.S_IFDIR:
                     self._fetch_tree_objects(item.sha)
                 else:
                     self.objects[b"blob"].add(item.sha)