diff --git a/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py b/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py
index 8bfa321..8f9e152 100644
--- a/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py
+++ b/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py
@@ -1,648 +1,663 @@
 #!/usr/local/bin/python
 
 #
 # Copyright (c) 2012 YASUOKA Masahiko <yasuoka@yasuoka.net>
 #
 # Permission to use, copy, modify, and distribute this software for any
 # purpose with or without fee is hereby granted, provided that the above
 # copyright notice and this permission notice appear in all copies.
 #
 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 # Usage
 #
 #   First import:
 #   % git init --bare /git/openbsd.git
 #   % python cvs2gitdump.py -k OpenBSD -e openbsd.org /cvs/openbsd/src \
 #       > openbsd.dump
 #   % git --git-dir /git/openbsd.git fast-import < openbsd.dump
 #
 #   Periodic import:
 #   % sudo cvsync
 #   % python cvs2gitdump.py -k OpenBSD -e openbsd.org /cvs/openbsd/src \
 #       /git/openbsd.git > openbsd2.dump
 #   % git --git-dir /git/openbsd.git fast-import < openbsd2.dump
 #
 
 import getopt
 import os
 import re
 import subprocess
 import sys
 import time
+from typing import Dict, List, Optional, Tuple, TypeVar
+
 import swh.loader.cvs.rcsparse as rcsparse
 
 CHANGESET_FUZZ_SEC = 300
 
 
 def usage():
     print('usage: cvs2gitdump [-ah] [-z fuzz] [-e email_domain] '
           '[-E log_encodings]\n'
           '\t[-k rcs_keywords] [-b branch] [-m module] [-l last_revision]\n'
           '\tcvsroot [git_dir]', file=sys.stderr)
 
 
-def main():
+def main() -> None:
     email_domain = None
     do_incremental = False
     git_tip = None
     git_branch = 'master'
     dump_all = False
     log_encoding = 'utf-8,iso-8859-1'
     rcs = RcsKeywords()
     modules = []
     last_revision = None
     fuzzsec = CHANGESET_FUZZ_SEC
 
     try:
         opts, args = getopt.getopt(sys.argv[1:], 'ab:hm:z:e:E:k:t:l:')
         for opt, v in opts:
             if opt == '-z':
                 fuzzsec = int(v)
             elif opt == '-e':
                 email_domain = v
             elif opt == '-a':
                 dump_all = True
             elif opt == '-b':
                 git_branch = v
             elif opt == '-E':
                 log_encoding = v
             elif opt == '-k':
                 rcs.add_id_keyword(v)
             elif opt == '-m':
                 if v == '.git':
                     print('Cannot handle the path named \'.git\'',
                           file=sys.stderr)
                     sys.exit(1)
                 modules.append(v)
             elif opt == '-l':
                 last_revision = v
             elif opt == '-h':
                 usage()
                 sys.exit(1)
     except getopt.GetoptError as msg:
         print(msg, file=sys.stderr)
         usage()
         sys.exit(1)
 
     if len(args) == 0 or len(args) > 2:
         usage()
         sys.exit(1)
 
     log_encodings = log_encoding.split(',')
 
     cvsroot = args[0]
     while cvsroot[-1] == '/':
         cvsroot = cvsroot[:-1]
 
     if len(args) == 2:
         do_incremental = True
         git = subprocess.Popen(
             ['git', '--git-dir=' + args[1], '-c',
              'i18n.logOutputEncoding=UTF-8', 'log', '--max-count', '1',
              '--date=raw', '--format=%ae%n%ad%n%H', git_branch],
             encoding='utf-8', stdout=subprocess.PIPE)
+        assert git.stdout is not None
         outs = git.stdout.readlines()
         git.wait()
         if git.returncode != 0:
             print("Couldn't exec git", file=sys.stderr)
             sys.exit(git.returncode)
         git_tip = outs[2].strip()
 
         if last_revision is not None:
             git = subprocess.Popen(
                 ['git', '--git-dir=' + args[1], '-c',
                  'i18n.logOutputEncoding=UTF-8', 'log', '--max-count', '1',
                  '--date=raw', '--format=%ae%n%ad%n%H', last_revision],
                 encoding='utf-8', stdout=subprocess.PIPE)
+            assert git.stdout is not None
             outs = git.stdout.readlines()
             git.wait()
             if git.returncode != 0:
                 print("Coundn't exec git", file=sys.stderr)
                 sys.exit(git.returncode)
         last_author = outs[0].strip()
         last_ctime = float(outs[1].split()[0])
 
         # strip off the domain part from the last author since cvs doesn't have
         # the domain part.
         if do_incremental and email_domain is not None and \
                 last_author.lower().endswith(('@' + email_domain).lower()):
             last_author = last_author[:-1 * (1 + len(email_domain))]
 
     cvs = CvsConv(cvsroot, rcs, not do_incremental, fuzzsec)
     print('** walk cvs tree', file=sys.stderr)
     if len(modules) == 0:
         cvs.walk()
     else:
         for module in modules:
             cvs.walk(module)
 
     changesets = sorted(cvs.changesets)
     nchangesets = len(changesets)
     print('** cvs has %d changeset' % (nchangesets), file=sys.stderr)
 
     if nchangesets <= 0:
         sys.exit(0)
 
     if not dump_all:
         # don't use last 10 minutes for safety
         max_time_max = changesets[-1].max_time - 600
     else:
         max_time_max = changesets[-1].max_time
 
     found_last_revision = False
     markseq = cvs.markseq
     extags = set()
     for k in changesets:
         if do_incremental and not found_last_revision:
             if k.min_time == last_ctime and k.author == last_author:
                 found_last_revision = True
             for tag in k.tags:
                 extags.add(tag)
             continue
         if k.max_time > max_time_max:
             break
 
         marks = {}
 
         for f in k.revs:
             if not do_incremental:
                 marks[f.markseq] = f
             else:
                 markseq = markseq + 1
                 git_dump_file(f.path, f.rev, rcs, markseq)
                 marks[markseq] = f
         log = rcsparse.rcsfile(k.revs[0].path).getlog(k.revs[0].rev)
         for i, e in enumerate(log_encodings):
             try:
                 how = 'ignore' if i == len(log_encodings) - 1 else 'strict'
-                log = log.decode(e, how)
+                log_str = log.decode(e, how)
                 break
             except UnicodeError:
                 pass
-        log = log.encode('utf-8', 'ignore')
+        log = log_str.encode('utf-8', 'ignore')
 
         output('commit refs/heads/' + git_branch)
         markseq = markseq + 1
         output('mark :%d' % (markseq))
         email = k.author if email_domain is None \
             else k.author + '@' + email_domain
         output('author %s <%s> %d +0000' % (k.author, email, k.min_time))
         output('committer %s <%s> %d +0000' % (k.author, email, k.min_time))
 
         output('data', len(log))
         output(log, end='')
         if do_incremental and git_tip is not None:
             output('from', git_tip)
             git_tip = None
 
         for m in marks:
             f = marks[m]
             mode = 0o100755 if os.access(f.path, os.X_OK) else 0o100644
             fn = file_path(cvs.cvsroot, f.path)
             if f.state == 'dead':
                 output('D', fn)
             else:
                 output('M %o :%d %s' % (mode, m, fn))
         output('')
         for tag in k.tags:
             if tag in extags:
                 continue
             output('reset refs/tags/%s' % (tag))
             output('from :%d' % (markseq))
             output('')
 
     if do_incremental and not found_last_revision:
         raise Exception('could not find the last revision')
 
     print('** dumped', file=sys.stderr)
 
 
 #
 # Encode by UTF-8 always for string objects since encoding for git-fast-import
 # is UTF-8.  Also write without conversion for a bytes object (file bodies
 # might be various encodings)
 #
-def output(*args, end='\n'):
+def output(*args, end='\n') -> None:
     if len(args) == 0:
         pass
     elif len(args) > 1 or isinstance(args[0], str):
         lines = ' '.join(
             [arg if isinstance(arg, str) else str(arg) for arg in args])
         sys.stdout.buffer.write(lines.encode('utf-8'))
     else:
         sys.stdout.buffer.write(args[0])
     if len(end) > 0:
         sys.stdout.buffer.write(end.encode('utf-8'))
 
 
 class FileRevision:
-    def __init__(self, path, rev, state, markseq):
+    def __init__(self, path: str, rev: str, state: str, markseq: int) -> None:
         self.path = path
         self.rev = rev
         self.state = state
         self.markseq = markseq
 
 
 class ChangeSetKey:
-    def __init__(self, branch, author, timestamp, log, commitid, fuzzsec):
+    def __init__(
+            self,
+            branch: str,
+            author,
+            timestamp: int,
+            log: bytes,
+            commitid: Optional[str],
+            fuzzsec: int
+    ) -> None:
         self.branch = branch
         self.author = author
         self.min_time = timestamp
         self.max_time = timestamp
         self.commitid = commitid
         self.fuzzsec = fuzzsec
-        self.revs = []
-        self.tags = []
+        self.revs: List[FileRevision] = []
+        self.tags: List[str] = []
         self.log_hash = 0
         h = 0
         for c in log:
             h = 31 * h + c
         self.log_hash = h
 
-    def __lt__(self, other):
+    def __lt__(self, other) -> bool:
         return self._cmp(other) < 0
 
-    def __gt__(self, other):
+    def __gt__(self, other) -> bool:
         return self._cmp(other) > 0
 
-    def __eq__(self, other):
+    def __eq__(self, other) -> bool:
         return self._cmp(other) == 0
 
-    def __le__(self, other):
+    def __le__(self, other) -> bool:
         return self._cmp(other) <= 0
 
-    def __ge__(self, other):
+    def __ge__(self, other) -> bool:
         return self._cmp(other) >= 0
 
-    def __ne__(self, other):
+    def __ne__(self, other) -> bool:
         return self._cmp(other) != 0
 
-    def _cmp(self, anon):
+    def _cmp(self, anon) -> int:
+        if not isinstance(anon, ChangeSetKey):
+            raise TypeError()
         # compare by the commitid
         cid = _cmp2(self.commitid, anon.commitid)
         if cid == 0 and self.commitid is not None:
             # both have commitid and they are same
             return 0
 
         # compare by the time
         ma = anon.min_time - self.max_time
         mi = self.min_time - anon.max_time
         ct = self.min_time - anon.min_time
         if ma > self.fuzzsec or mi > self.fuzzsec:
             return ct
 
         if cid != 0:
             # only one has the commitid, this means different commit
             return cid if ct == 0 else ct
 
         # compare by log, branch and author
         c = _cmp2(self.log_hash, anon.log_hash)
         if c == 0:
             c = _cmp2(self.branch, anon.branch)
         if c == 0:
             c = _cmp2(self.author, anon.author)
         if c == 0:
             return 0
 
         return ct if ct != 0 else c
 
-    def merge(self, anot):
+    def merge(self, anot: "ChangeSetKey") -> None:
         self.max_time = max(self.max_time, anot.max_time)
         self.min_time = min(self.min_time, anot.min_time)
         self.revs.extend(anot.revs)
 
-    def __hash__(self):
+    def __hash__(self) -> int:
         return hash(self.branch + '/' + self.author) * 31 + self.log_hash
 
-    def put_file(self, path, rev, state, markseq):
+    def put_file(self, path: str, rev: str, state: str, markseq: int):
         self.revs.append(FileRevision(path, rev, state, markseq))
 
 
-def _cmp2(a, b):
+TCmp = TypeVar("TCmp", int, str)
+def _cmp2(a: Optional[TCmp], b: Optional[TCmp]) -> int:
     _a = a is not None
     _b = b is not None
-    return (a > b) - (a < b) if _a and _b else (_a > _b) - (_a < _b)
+    return (a > b) - (a < b) if _a and _b else (_a > _b) - (_a < _b)  # type: ignore
 
 
 class CvsConv:
-    def __init__(self, cvsroot, rcs, dumpfile, fuzzsec):
+    def __init__(self, cvsroot: str, rcs: "RcsKeywords", dumpfile: bool, fuzzsec: int) -> None:
         self.cvsroot = cvsroot
         self.rcs = rcs
-        self.changesets = dict()
+        self.changesets: Dict[ChangeSetKey, ChangeSetKey] = dict()
         self.dumpfile = dumpfile
         self.markseq = 0
-        self.tags = dict()
+        self.tags: Dict[str, ChangeSetKey] = dict()
         self.fuzzsec = fuzzsec
 
-    def walk(self, module=None):
+    def walk(self, module: Optional[str] =None) -> None:
         p = [self.cvsroot]
         if module is not None:
             p.append(module)
         path = os.path.join(*p)
 
         for root, dirs, files in os.walk(path):
             if '.git' in dirs:
                 print('Ignore %s: cannot handle the path named \'.git\'' % (
                       root + os.sep + '.git'), file=sys.stderr)
                 dirs.remove('.git')
             if '.git' in files:
                 print('Ignore %s: cannot handle the path named \'.git\'' % (
                       root + os.sep + '.git'), file=sys.stderr)
                 files.remove('.git')
             for f in files:
                 if not f[-2:] == ',v':
                     continue
                 self.parse_file(root + os.sep + f)
 
         for t, c in list(self.tags.items()):
             c.tags.append(t)
 
-    def parse_file(self, path):
-        rtags = dict()
+    def parse_file(self, path: str) -> None:
+        rtags: Dict[str, List[str]] = dict()
         rcsfile = rcsparse.rcsfile(path)
         branches = {'1': 'HEAD', '1.1.1': 'VENDOR'}
-        for k, v in list(rcsfile.symbols.items()):
-            r = v.split('.')
+        for k, v_ in list(rcsfile.symbols.items()):
+            r = v_.split('.')
             if len(r) == 3:
-                branches[v] = 'VENDOR'
+                branches[v_] = 'VENDOR'
             elif len(r) >= 3 and r[-2] == '0':
                 branches['.'.join(r[:-2] + r[-1:])] = k
             if len(r) == 2 and branches[r[0]] == 'HEAD':
-                if v not in rtags:
-                    rtags[v] = list()
-                rtags[v].append(k)
+                if v_ not in rtags:
+                    rtags[v_] = list()
+                rtags[v_].append(k)
 
-        revs = rcsfile.revs.items()
+        revs: List[Tuple[str, Tuple[str, int, str, str, List[str], str, str]]] = list(rcsfile.revs.items())
         # sort by revision descending to priorize 1.1.1.1 than 1.1
-        revs = sorted(revs, key=lambda a: a[1][0], reverse=True)
+        revs.sort(key=lambda a: a[1][0], reverse=True)
         # sort by time
-        revs = sorted(revs, key=lambda a: a[1][1])
+        revs.sort(key=lambda a: a[1][1])
         novendor = False
         have_initial_revision = False
         last_vendor_status = None
         for k, v in revs:
             r = k.split('.')
             if len(r) == 4 and r[0] == '1' and r[1] == '1' and r[2] == '1' \
                     and r[3] == '1':
                 if have_initial_revision:
                     continue
                 if v[3] == 'dead':
                     continue
                 last_vendor_status = v[3]
                 have_initial_revision = True
             elif len(r) == 4 and r[0] == '1' and r[1] == '1' and r[2] == '1':
                 if novendor:
                     continue
                 last_vendor_status = v[3]
             elif len(r) == 2:
                 if r[0] == '1' and r[1] == '1':
                     if have_initial_revision:
                         continue
                     if v[3] == 'dead':
                         continue
                     have_initial_revision = True
                 elif r[0] == '1' and r[1] != '1':
                     novendor = True
                 if last_vendor_status == 'dead' and v[3] == 'dead':
                     last_vendor_status = None
                     continue
                 last_vendor_status = None
             else:
                 # trunk only
                 continue
 
             if self.dumpfile:
                 self.markseq = self.markseq + 1
                 git_dump_file(path, k, self.rcs, self.markseq)
 
             b = '.'.join(r[:-1])
             try:
                 a = ChangeSetKey(
                     branches[b], v[2], v[1], rcsfile.getlog(v[0]), v[6],
                     self.fuzzsec)
             except Exception as e:
                 print('Aborted at %s %s' % (path, v[0]), file=sys.stderr)
                 raise e
 
             a.put_file(path, k, v[3], self.markseq)
             while a in self.changesets:
                 c = self.changesets[a]
                 del self.changesets[a]
                 c.merge(a)
                 a = c
             self.changesets[a] = a
             if k in rtags:
                 for t in rtags[k]:
                     if t not in self.tags or \
                             self.tags[t].max_time < a.max_time:
                         self.tags[t] = a
 
 
-def file_path(r, p):
+def file_path(r: str, p: str) -> str:
     if r.endswith('/'):
         r = r[:-1]
     if p[-2:] == ',v':
         path = p[:-2]               # drop ",v"
     else:
         path = p
-    p = path.split('/')
-    if len(p) > 0 and p[-2] == 'Attic':
-        path = '/'.join(p[:-2] + [p[-1]])
+    p_ = path.split('/')
+    if len(p_) > 0 and p_[-2] == 'Attic':
+        path = '/'.join(p_[:-2] + [p_[-1]])
     if path.startswith(r):
         path = path[len(r) + 1:]
     return path
 
 
-def git_dump_file(path, k, rcs, markseq):
+def git_dump_file(path: str, k, rcs, markseq) -> None:
     try:
         cont = rcs.expand_keyword(path, rcsparse.rcsfile(path), k)
     except RuntimeError as msg:
         print('Unexpected runtime error on parsing',
               path, k, ':', msg, file=sys.stderr)
         print('unlimit the resource limit may fix this problem.',
               file=sys.stderr)
         sys.exit(1)
     output('blob')
     output('mark :%d' % markseq)
     output('data', len(cont))
     output(cont)
 
 
 class RcsKeywords:
     RCS_KW_AUTHOR   = (1 << 0)
     RCS_KW_DATE     = (1 << 1)
     RCS_KW_LOG      = (1 << 2)
     RCS_KW_NAME     = (1 << 3)
     RCS_KW_RCSFILE  = (1 << 4)
     RCS_KW_REVISION = (1 << 5)
     RCS_KW_SOURCE   = (1 << 6)
     RCS_KW_STATE    = (1 << 7)
     RCS_KW_FULLPATH = (1 << 8)
     RCS_KW_MDOCDATE = (1 << 9)
     RCS_KW_LOCKER   = (1 << 10)
 
     RCS_KW_ID       = (RCS_KW_RCSFILE | RCS_KW_REVISION | RCS_KW_DATE |
                        RCS_KW_AUTHOR | RCS_KW_STATE)
     RCS_KW_HEADER   = (RCS_KW_ID | RCS_KW_FULLPATH)
 
     rcs_expkw = {
         b"Author":   RCS_KW_AUTHOR,
         b"Date":     RCS_KW_DATE,
         b"Header":   RCS_KW_HEADER,
         b"Id":       RCS_KW_ID,
         b"Log":      RCS_KW_LOG,
         b"Name":     RCS_KW_NAME,
         b"RCSfile":  RCS_KW_RCSFILE,
         b"Revision": RCS_KW_REVISION,
         b"Source":   RCS_KW_SOURCE,
         b"State":    RCS_KW_STATE,
         b"Mdocdate": RCS_KW_MDOCDATE,
         b"Locker":   RCS_KW_LOCKER
     }
 
     RCS_KWEXP_NONE    = (1 << 0)
     RCS_KWEXP_NAME    = (1 << 1)    # include keyword name
     RCS_KWEXP_VAL     = (1 << 2)    # include keyword value
     RCS_KWEXP_LKR     = (1 << 3)    # include name of locker
     RCS_KWEXP_OLD     = (1 << 4)    # generate old keyword string
     RCS_KWEXP_ERR     = (1 << 5)    # mode has an error
     RCS_KWEXP_DEFAULT = (RCS_KWEXP_NAME | RCS_KWEXP_VAL)
     RCS_KWEXP_KVL     = (RCS_KWEXP_NAME | RCS_KWEXP_VAL | RCS_KWEXP_LKR)
 
-    def __init__(self):
+    def __init__(self) -> None:
         self.rerecomple()
 
-    def rerecomple(self):
+    def rerecomple(self) -> None:
         pat = b'|'.join(list(self.rcs_expkw.keys()))
         self.re_kw = re.compile(b".*?\\$(" + pat + b")[\\$:]")
 
-    def add_id_keyword(self, keyword):
+    def add_id_keyword(self, keyword) -> None:
         self.rcs_expkw[keyword.encode('ascii')] = self.RCS_KW_ID
         self.rerecomple()
 
-    def kflag_get(self, flags):
+    def kflag_get(self, flags: Optional[str]) -> int:
         if flags is None:
             return self.RCS_KWEXP_DEFAULT
         fl = 0
         for fc in flags:
             if fc == 'k':
                 fl |= self.RCS_KWEXP_NAME
             elif fc == 'v':
                 fl |= self.RCS_KWEXP_VAL
             elif fc == 'l':
                 fl |= self.RCS_KWEXP_LKR
             elif fc == 'o':
                 if len(flags) != 1:
                     fl |= self.RCS_KWEXP_ERR
                 fl |= self.RCS_KWEXP_OLD
             elif fc == 'b':
                 if len(flags) != 1:
                     fl |= self.RCS_KWEXP_ERR
                 fl |= self.RCS_KWEXP_NONE
             else:
                 fl |= self.RCS_KWEXP_ERR
         return fl
 
-    def expand_keyword(self, filename, rcs, r):
+    def expand_keyword(self, filename: str, rcs: rcsparse.rcsfile, r: str) -> bytes:
         rev = rcs.revs[r]
 
         mode = self.kflag_get(rcs.expand)
         if (mode & (self.RCS_KWEXP_NONE | self.RCS_KWEXP_OLD)) != 0:
             return rcs.checkout(rev[0])
 
         ret = []
         for line in rcs.checkout(rev[0]).split(b'\n'):
             logbuf = None
             m = self.re_kw.match(line)
             if m is None:
                 # No RCS Keywords, use it as it is
                 ret += [line]
                 continue
 
             line0 = b''
             while m is not None:
                 try:
                     dsign = m.end(1) + line[m.end(1):].index(b'$')
                 except ValueError:
                     break
                 prefix = line[:m.start(1) - 1]
                 line = line[dsign + 1:]
                 line0 += prefix
                 expbuf = ''
                 if (mode & self.RCS_KWEXP_NAME) != 0:
                     expbuf += '$'
                     expbuf += m.group(1).decode('ascii')
                     if (mode & self.RCS_KWEXP_VAL) != 0:
                         expbuf += ': '
                 if (mode & self.RCS_KWEXP_VAL) != 0:
                     expkw = self.rcs_expkw[m.group(1)]
                     if (expkw & self.RCS_KW_RCSFILE) != 0:
                         expbuf += filename \
                             if (expkw & self.RCS_KW_FULLPATH) != 0 \
                             else os.path.basename(filename)
                         expbuf += " "
                     if (expkw & self.RCS_KW_REVISION) != 0:
                         expbuf += rev[0]
                         expbuf += " "
                     if (expkw & self.RCS_KW_DATE) != 0:
                         expbuf += time.strftime(
                             "%Y/%m/%d %H:%M:%S ", time.gmtime(rev[1]))
                     if (expkw & self.RCS_KW_MDOCDATE) != 0:
                         d = time.gmtime(rev[1])
                         expbuf += time.strftime(
                             "%B%e %Y " if (d.tm_mday < 10) else "%B %e %Y ", d)
                     if (expkw & self.RCS_KW_AUTHOR) != 0:
                         expbuf += rev[2]
                         expbuf += " "
                     if (expkw & self.RCS_KW_STATE) != 0:
                         expbuf += rev[3]
                         expbuf += " "
                     if (expkw & self.RCS_KW_LOG) != 0:
                         p = prefix
                         expbuf += filename \
                             if (expkw & self.RCS_KW_FULLPATH) != 0 \
                             else os.path.basename(filename)
                         expbuf += " "
                         logbuf = p + (
                             'Revision %s  %s  %s\n' % (
                                 rev[0], time.strftime(
                                     "%Y/%m/%d %H:%M:%S", time.gmtime(rev[1])),
                                 rev[2])).encode('ascii')
                         for lline in rcs.getlog(rev[0]).rstrip().split(b'\n'):
                             if len(lline) == 0:
                                 logbuf += p.rstrip() + b'\n'
                             else:
                                 logbuf += p + lline.lstrip() + b'\n'
                         if len(line) == 0:
                             logbuf += p.rstrip()
                         else:
                             logbuf += p + line.lstrip()
                         line = b''
                     if (expkw & self.RCS_KW_SOURCE) != 0:
                         expbuf += filename
                         expbuf += " "
                     if (expkw & (self.RCS_KW_NAME | self.RCS_KW_LOCKER)) != 0:
                         expbuf += " "
                 if (mode & self.RCS_KWEXP_NAME) != 0:
                     expbuf += '$'
                 line0 += expbuf[:255].encode('ascii')
                 m = self.re_kw.match(line)
 
             ret += [line0 + line]
             if logbuf is not None:
                 ret += [logbuf]
         return b'\n'.join(ret)
 
 
 # ----------------------------------------------------------------------
 # entry point
 # ----------------------------------------------------------------------
 if __name__ == '__main__':
     main()
diff --git a/swh/loader/cvs/loader.py b/swh/loader/cvs/loader.py
index d2441fd..563dcf2 100644
--- a/swh/loader/cvs/loader.py
+++ b/swh/loader/cvs/loader.py
@@ -1,470 +1,488 @@
 # Copyright (C) 2015-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU Affero General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 """Loader in charge of injecting either new or existing cvs repositories to
 swh-storage.
 
 """
 from datetime import datetime
 import os
 import subprocess
 import tempfile
 import time
 from typing import Any, BinaryIO, Dict, Iterator, List, Optional, Sequence, Tuple
 
 from urllib3.util import parse_url
 
 from swh.loader.core.loader import BaseLoader
 from swh.loader.core.utils import clean_dangling_folders
 from swh.loader.cvs.cvs2gitdump.cvs2gitdump import (
     CHANGESET_FUZZ_SEC,
     ChangeSetKey,
     CvsConv,
+    FileRevision,
     RcsKeywords,
     file_path,
 )
-import swh.loader.cvs.cvsclient as cvsclient
+from swh.loader.cvs.cvsclient import CVSClient
 import swh.loader.cvs.rcsparse as rcsparse
 from swh.loader.cvs.rlog import RlogConv
 from swh.loader.exception import NotFound
 from swh.model import from_disk, hashutil
 from swh.model.model import (
     Content,
     Directory,
     Origin,
     Person,
     Revision,
     RevisionType,
     Sha1Git,
     SkippedContent,
     Snapshot,
     SnapshotBranch,
     TargetType,
     TimestampWithTimezone,
 )
 from swh.storage.algos.snapshot import snapshot_get_latest
 from swh.storage.interface import StorageInterface
 
 DEFAULT_BRANCH = b"HEAD"
 
 TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.cvs."
 
 
+class Foo:
+    pass
+
+
 class CvsLoader(BaseLoader):
     """Swh cvs loader.
 
     The repository is local.  The loader deals with
     update on an already previously loaded repository.
 
     """
 
     visit_type = "cvs"
 
     cvs_module_name: str
-    cvsclient: cvsclient.CVSClient
+    cvsclient: CVSClient
 
     # remote CVS repository access (history is parsed from CVS rlog):
     rlog_file: BinaryIO
 
     swh_revision_gen: Iterator[
         Tuple[List[Content], List[SkippedContent], List[Directory], Revision]
     ]
 
     def __init__(
         self,
         storage: StorageInterface,
         url: str,
         origin_url: Optional[str] = None,
         visit_date: Optional[datetime] = None,
         cvsroot_path: Optional[str] = None,
         temp_directory: str = "/tmp",
         max_content_size: Optional[int] = None,
     ):
         super().__init__(
             storage=storage,
             logging_class="swh.loader.cvs.CvsLoader",
             max_content_size=max_content_size,
         )
         self.cvsroot_url = url
         # origin url as unique identifier for origin in swh archive
         self.origin_url = origin_url if origin_url else self.cvsroot_url
         self.temp_directory = temp_directory
 
         # internal state used to store swh objects
         self._contents: List[Content] = []
         self._skipped_contents: List[SkippedContent] = []
         self._directories: List[Directory] = []
         self._revisions: List[Revision] = []
         # internal state, current visit
         self._last_revision: Optional[Revision] = None
         self._visit_status = "full"
         self.visit_date = visit_date
 
         if not cvsroot_path:
             cvsroot_path = tempfile.mkdtemp(
                 suffix="-%s" % os.getpid(),
                 prefix=TEMPORARY_DIR_PREFIX_PATTERN,
                 dir=self.temp_directory,
             )
         self.cvsroot_path = cvsroot_path
 
         self.snapshot: Optional[Snapshot] = None
         self.last_snapshot: Optional[Snapshot] = snapshot_get_latest(
             self.storage, self.origin_url
         )
 
-    def compute_swh_revision(self, k, logmsg) -> Tuple[Revision, from_disk.Directory]:
+    def compute_swh_revision(
+        self, k: ChangeSetKey, logmsg: Optional[bytes]
+    ) -> Tuple[Revision, from_disk.Directory]:
         """Compute swh hash data per CVS changeset.
 
         Returns:
             tuple (rev, swh_directory)
             - rev: current SWH revision computed from checked out work tree
             - swh_directory: dictionary of path, swh hash data with type
 
         """
         # Compute SWH revision from the on-disk state
         swh_dir = from_disk.Directory.from_disk(path=os.fsencode(self.worktree_path))
         parents: Tuple[Sha1Git, ...]
         if self._last_revision:
             parents = (self._last_revision.id,)
         else:
             parents = ()
         revision = self.build_swh_revision(k, logmsg, swh_dir.hash, parents)
         self.log.debug("SWH revision ID: %s", hashutil.hash_to_hex(revision.id))
         self._last_revision = revision
         return (revision, swh_dir)
 
-    def checkout_file_with_rcsparse(self, k, f, rcsfile):
+    def checkout_file_with_rcsparse(
+        self, k: ChangeSetKey, f: FileRevision, rcsfile: rcsparse.rcsfile
+    ) -> None:
         path = file_path(self.cvsroot_path, f.path)
         wtpath = os.path.join(self.worktree_path, path)
         self.log.info("rev %s of file %s" % (f.rev, f.path))
         if f.state == "dead":
             # remove this file from work tree
             try:
                 os.remove(wtpath)
             except FileNotFoundError:
                 pass
         else:
             # create, or update, this file in the work tree
             if not rcsfile:
                 rcsfile = rcsparse.rcsfile(f.path)
             rcs = RcsKeywords()
             contents = rcs.expand_keyword(f.path, rcsfile, f.rev)
             os.makedirs(os.path.dirname(wtpath), exist_ok=True)
             outfile = open(wtpath, mode="wb")
             outfile.write(contents)
             outfile.close()
 
-    def checkout_file_with_cvsclient(self, k, f, cvsclient):
+    def checkout_file_with_cvsclient(
+        self, k: ChangeSetKey, f: FileRevision, cvsclient: CVSClient
+    ):
         path = file_path(self.cvsroot_path, f.path)
         wtpath = os.path.join(self.worktree_path, path)
         self.log.info("rev %s of file %s" % (f.rev, f.path))
         if f.state == "dead":
             # remove this file from work tree
             try:
                 os.remove(wtpath)
             except FileNotFoundError:
                 pass
         else:
             dirname = os.path.dirname(wtpath)
             os.makedirs(dirname, exist_ok=True)
             self.log.debug("checkout to %s\n" % wtpath)
             fp = cvsclient.checkout(f.path, f.rev, dirname)
             os.rename(fp.name, wtpath)
             try:
                 fp.close()
             except FileNotFoundError:
                 # Well, we have just renamed the file...
                 pass
 
     def process_cvs_changesets(
-        self, cvs_changesets, use_rcsparse,
+        self,
+        cvs_changesets: List[ChangeSetKey],
+        use_rcsparse: bool,
     ) -> Iterator[
         Tuple[List[Content], List[SkippedContent], List[Directory], Revision]
     ]:
         """Process CVS revisions.
 
         At each CVS revision, check out contents and compute swh hashes.
 
         Yields:
             tuple (contents, skipped-contents, directories, revision) of dict as a
             dictionary with keys, sha1_git, sha1, etc...
 
         """
         for k in cvs_changesets:
             tstr = time.strftime("%c", time.gmtime(k.max_time))
             self.log.info(
                 "changeset from %s by %s on branch %s", tstr, k.author, k.branch
             )
-            logmsg = ""
+            logmsg: Optional[bytes] = b""
             # Check out all files of this revision and get a log message.
             #
             # The log message is obtained from the first file in the changeset.
             # The message will usually be the same for all affected files, and
             # the SWH archive will only store one version of the log message.
             for f in k.revs:
                 rcsfile = None
                 if use_rcsparse:
                     if rcsfile is None:
                         rcsfile = rcsparse.rcsfile(f.path)
                     if not logmsg:
                         logmsg = rcsfile.getlog(k.revs[0].rev)
                     self.checkout_file_with_rcsparse(k, f, rcsfile)
                 else:
                     if not logmsg:
                         logmsg = self.rlog.getlog(self.rlog_file, f.path, k.revs[0].rev)
                     self.checkout_file_with_cvsclient(k, f, self.cvsclient)
 
             # TODO: prune empty directories?
             (revision, swh_dir) = self.compute_swh_revision(k, logmsg)
             (contents, skipped_contents, directories) = from_disk.iter_directory(
                 swh_dir
             )
             yield contents, skipped_contents, directories, revision
 
     def prepare_origin_visit(self) -> None:
         self.origin = Origin(
             url=self.origin_url if self.origin_url else self.cvsroot_url
         )
 
     def pre_cleanup(self) -> None:
         """Cleanup potential dangling files from prior runs (e.g. OOM killed
-           tasks)
+        tasks)
 
         """
         clean_dangling_folders(
             self.temp_directory,
             pattern_check=TEMPORARY_DIR_PREFIX_PATTERN,
             log=self.log,
         )
 
     def cleanup(self) -> None:
         self.log.info("cleanup")
 
     def fetch_cvs_repo_with_rsync(self, host: str, path: str) -> None:
         # URL *must* end with a trailing slash in order to get CVSROOT listed
         url = "rsync://%s%s/" % (host, os.path.dirname(path))
         rsync = subprocess.run(["rsync", url], capture_output=True, encoding="ascii")
         rsync.check_returncode()
         have_cvsroot = False
         have_module = False
         for line in rsync.stdout.split("\n"):
             self.log.debug("rsync server: %s", line)
             if line.endswith(" CVSROOT"):
                 have_cvsroot = True
             elif line.endswith(" %s" % self.cvs_module_name):
                 have_module = True
             if have_module and have_cvsroot:
                 break
         if not have_module:
             raise NotFound(
                 "CVS module %s not found at %s" % (self.cvs_module_name, url)
             )
         if not have_cvsroot:
             raise NotFound("No CVSROOT directory found at %s" % url)
 
         subprocess.run(["rsync", "-a", url, self.cvsroot_path]).check_returncode()
 
     def prepare(self) -> None:
         self._last_revision = None
         self.worktree_path = tempfile.mkdtemp(
             suffix="-%s" % os.getpid(),
             prefix=TEMPORARY_DIR_PREFIX_PATTERN,
             dir=self.temp_directory,
         )
         url = parse_url(self.origin_url)
         self.log.debug(
             "prepare; origin_url=%s scheme=%s path=%s",
             self.origin_url,
             url.scheme,
             url.path,
         )
         if not url.path:
             raise NotFound("Invalid CVS origin URL '%s'" % self.origin_url)
         self.cvs_module_name = os.path.basename(url.path)
         os.mkdir(os.path.join(self.worktree_path, self.cvs_module_name))
         if url.scheme == "file":
             if not os.path.exists(url.path):
                 raise NotFound
         elif url.scheme == "rsync":
             self.fetch_cvs_repo_with_rsync(url.host, url.path)
 
         if url.scheme == "file" or url.scheme == "rsync":
             # local CVS repository conversion
             have_rcsfile = False
             have_cvsroot = False
             for root, dirs, files in os.walk(self.cvsroot_path):
                 if "CVSROOT" in dirs:
                     have_cvsroot = True
                     dirs.remove("CVSROOT")
                     continue
                 for f in files:
                     filepath = os.path.join(root, f)
                     if f[-2:] == ",v":
                         rcsfile = rcsparse.rcsfile(filepath)  # noqa: F841
                         self.log.debug(
                             "Looks like we have data to convert; "
                             "found a valid RCS file at %s",
                             filepath,
                         )
                         have_rcsfile = True
                         break
                 if have_rcsfile:
                     break
 
             if not have_rcsfile:
                 raise NotFound(
                     "Directory %s does not contain any valid RCS files %s",
                     self.cvsroot_path,
                 )
             if not have_cvsroot:
                 self.log.warn(
                     "The CVS repository at '%s' lacks a CVSROOT directory; "
                     "we might be ingesting an incomplete copy of the repository",
                     self.cvsroot_path,
                 )
 
             # Unfortunately, there is no way to convert CVS history in an
             # iterative fashion because the data is not indexed by any kind
             # of changeset ID. We need to walk the history of each and every
             # RCS file in the repository during every visit, even if no new
             # changes will be added to the SWH archive afterwards.
             # "CVS’s repository is the software equivalent of a telephone book
             # sorted by telephone number."
             # https://corecursive.com/software-that-doesnt-suck-with-jim-blandy/
             #
             # An implicit assumption made here is that self.cvs_changesets will
             # fit into memory in its entirety. If it won't fit then the CVS walker
             # will need to be modified such that it spools the list of changesets
             # to disk instead.
             cvs = CvsConv(self.cvsroot_path, RcsKeywords(), False, CHANGESET_FUZZ_SEC)
             self.log.info("Walking CVS module %s", self.cvs_module_name)
             cvs.walk(self.cvs_module_name)
             cvs_changesets = sorted(cvs.changesets)
             self.log.info(
                 "CVS changesets found in %s: %d",
                 self.cvs_module_name,
                 len(cvs_changesets),
             )
             self.swh_revision_gen = self.process_cvs_changesets(
                 cvs_changesets, use_rcsparse=True
             )
         elif url.scheme == "pserver" or url.scheme == "fake" or url.scheme == "ssh":
             # remote CVS repository conversion
-            self.cvsclient = cvsclient.CVSClient(url)
+            self.cvsclient = CVSClient(url)
             cvsroot_path = os.path.dirname(url.path)
             self.log.info(
                 "Fetching CVS rlog from %s:%s/%s",
                 url.host,
                 cvsroot_path,
                 self.cvs_module_name,
             )
             self.rlog = RlogConv(cvsroot_path, CHANGESET_FUZZ_SEC)
             self.rlog_file = self.cvsclient.fetch_rlog()
             self.rlog.parse_rlog(self.rlog_file)
             cvs_changesets = sorted(self.rlog.changesets)
             self.log.info(
                 "CVS changesets found for %s: %d",
                 self.cvs_module_name,
                 len(cvs_changesets),
             )
             self.swh_revision_gen = self.process_cvs_changesets(
                 cvs_changesets, use_rcsparse=False
             )
         else:
             raise NotFound("Invalid CVS origin URL '%s'" % self.origin_url)
 
     def fetch_data(self) -> bool:
         """Fetch the next CVS revision."""
         try:
             data = next(self.swh_revision_gen)
         except StopIteration:
             return False
         except Exception:
             self.log.exception("Exception in fetch_data:")
             return False  # Stopping iteration
         self._contents, self._skipped_contents, self._directories, rev = data
         self._revisions = [rev]
         return True
 
     def build_swh_revision(
-        self, k: ChangeSetKey, logmsg: bytes, dir_id: bytes, parents: Sequence[bytes]
+        self,
+        k: ChangeSetKey,
+        logmsg: Optional[bytes],
+        dir_id: bytes,
+        parents: Sequence[bytes],
     ) -> Revision:
         """Given a CVS revision, build a swh revision.
 
         Args:
             k: changeset data
             logmsg: the changeset's log message
             dir_id: the tree's hash identifier
             parents: the revision's parents identifier
 
         Returns:
             The swh revision dictionary.
 
         """
         author = Person.from_fullname(k.author.encode("UTF-8"))
-        date = TimestampWithTimezone.from_datetime(k.max_time)
+        date = TimestampWithTimezone.from_dict(k.max_time)
 
         return Revision(
             type=RevisionType.CVS,
             date=date,
             committer_date=date,
             directory=dir_id,
             message=logmsg,
             author=author,
             committer=author,
             synthetic=True,
             extra_headers=[],
             parents=tuple(parents),
         )
 
-    def generate_and_load_snapshot(self, revision) -> Snapshot:
+    def generate_and_load_snapshot(self, revision: Revision) -> Snapshot:
         """Create the snapshot either from existing revision.
 
         Args:
             revision (dict): Last revision seen if any (None by default)
 
         Returns:
             Optional[Snapshot] The newly created snapshot
 
         """
         snap = Snapshot(
             branches={
                 DEFAULT_BRANCH: SnapshotBranch(
                     target=revision.id, target_type=TargetType.REVISION
                 )
             }
         )
         self.log.debug("snapshot: %s", snap)
         self.storage.snapshot_add([snap])
         return snap
 
     def store_data(self) -> None:
         "Add our current CVS changeset to the archive."
         self.storage.skipped_content_add(self._skipped_contents)
         self.storage.content_add(self._contents)
         self.storage.directory_add(self._directories)
         self.storage.revision_add(self._revisions)
+        assert self._last_revision is not None
         self.snapshot = self.generate_and_load_snapshot(self._last_revision)
         self.log.debug("SWH snapshot ID: %s", hashutil.hash_to_hex(self.snapshot.id))
         self.flush()
         self.loaded_snapshot_id = self.snapshot.id
         self._skipped_contents = []
         self._contents = []
         self._directories = []
         self._revisions = []
 
     def load_status(self) -> Dict[str, Any]:
         assert self.snapshot is not None
         if self.last_snapshot == self.snapshot:
             load_status = "uneventful"
         else:
             load_status = "eventful"
         return {
             "status": load_status,
         }
 
-    def visit_status(self):
+    def visit_status(self) -> str:
         return self._visit_status
diff --git a/swh/loader/cvs/rcsparse.pyi b/swh/loader/cvs/rcsparse.pyi
index 2fd3421..b5a7498 100644
--- a/swh/loader/cvs/rcsparse.pyi
+++ b/swh/loader/cvs/rcsparse.pyi
@@ -1,9 +1,27 @@
 # Copyright (C) 2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU Affero General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-from typing import Any
+from collections.abc import Mapping
+from typing import Any, List, Tuple
 
 def __getattr__(name) -> Any: ...
-def rcsfile(path): ...
+
+class rcsfile:
+    head: str
+    branch: str
+    access: List[str]
+    symbols: Mapping[str, str]  # actually rcsparse.rcstokmap
+    locks: Mapping[str, str]  # actually rcsparse.rcstokmap
+    strict: bool
+    comment: str
+    expand: str
+    revs: Mapping[str, Tuple[str, int, str, str, List[str], str, str]] # actually rcsparse.rcsrevtree
+    desc: str
+
+    def __init__(self, path: str): ...
+
+    def checkout(self, rev: str = "HEAD") -> bytes: ...
+    def getlog(self, rev: str) -> bytes: ...
+    def sym2rev(self, rev: str = "HEAD") -> str: ...
diff --git a/swh/loader/cvs/rlog.py b/swh/loader/cvs/rlog.py
index 7c93f83..4e2dd2c 100644
--- a/swh/loader/cvs/rlog.py
+++ b/swh/loader/cvs/rlog.py
@@ -1,469 +1,494 @@
 """ RCS/CVS rlog parser, derived from viewvc and cvs2gitdump.py """
 
 # Copyright (C) 1999-2021 The ViewCVS Group. All Rights Reserved.
 #
 # By using ViewVC, you agree to the terms and conditions set forth
 # below:
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 #
 #   * Redistributions of source code must retain the above copyright
 #     notice, this list of conditions and the following
 #     disclaimer.
 #
 #   * Redistributions in binary form must reproduce the above
 #     copyright notice, this list of conditions and the following
 #     disclaimer in the documentation and/or other materials provided
 #     with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
 # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
 # IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 # Copyright (c) 2012 YASUOKA Masahiko <yasuoka@yasuoka.net>
 #
 # Permission to use, copy, modify, and distribute this software for any
 # purpose with or without fee is hereby granted, provided that the above
 # copyright notice and this permission notice appear in all copies.
 #
 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 import calendar
 import re
 import time
+from typing import BinaryIO, Dict, List, NamedTuple, Optional, Tuple
 
 from swh.loader.cvs.cvs2gitdump.cvs2gitdump import ChangeSetKey, file_path
 
 # There is no known encoding of path names in CVS. The actual encoding used
 # will depend on the CVS server's operating system and perhaps even the
 # underlying filesystem used to host a CVS repository.
 # It is even conceivable that a given repository may use multiple encodings,
 # e.g. due to migrations of the repository between different servers over time.
 #
 # This issue also affects the CVS network protocol which is communicating
 # paths between the CVS server and the CVS client. For this reason, most
 # public-facing repositories should stick to ASCII in practice.
 #
 # TODO: If known, the actual path encoding used by the repository should
 # be specified as a parameter. This parameter should be a list since
 # multiple encodings may be present in a given repository.
 path_encodings = ["ascii", "utf-8"]
 
 
+class revtuple(NamedTuple):
+    number: str
+    date: int
+    author: bytes
+    state: str
+    branches: None
+    revnumstr: None
+    commitid: None
+
+
 class RlogConv:
-    def __init__(self, cvsroot_path, fuzzsec):
+    def __init__(self, cvsroot_path: str, fuzzsec: int) -> None:
         self.cvsroot_path = cvsroot_path
         self.fuzzsec = fuzzsec
-        self.changesets = dict()
-        self.tags = dict()
-        self.offsets = dict()
-
-    def _process_rlog_revisions(self, path, taginfo, revisions, logmsgs):
+        self.changesets: Dict[ChangeSetKey, ChangeSetKey] = dict()
+        self.tags: Dict[str, ChangeSetKey] = dict()
+        self.offsets: Dict[str, Dict[str, int]] = dict()
+
+    def _process_rlog_revisions(
+        self,
+        path: str,
+        taginfo: Dict[bytes, bytes],
+        revisions: Dict[str, revtuple],
+        logmsgs: Dict[str, Optional[bytes]]
+    ) -> None:
         """ Convert RCS revision history of a file into self.changesets items """
-        rtags = dict()
+        rtags: Dict[str, List[str]] = dict()
         # RCS and CVS represent branches by adding digits to revision numbers.
         # And CVS assigns special meaning to certain revision number ranges.
         #
         # Revision numbers on the main branch have only two digits:
         #
         #  1.1, 1.2, 1.3, ...
         #
         # Branches created with 'cvs tag -b' use even numbers for
         # the third digit:
         #
         #  1.1, 1.2, 1.3, ...  main branch history of the file
         #    |
         #    1.1.2.1, 1.1.2.2 ... a branch (2) forked off r1.1 of the file
         #
         # Branches are given human-readable names by associating
         # RCS tag labels with their revision numbers.
         # Given a file on the above branch which has been changed 10 times
         # since history was forked, the branch tag would look like this:
         #
         #   MY_BRANCH: r1.1.2.10
         #
         # Odd branch numbers are reserved for CVS "vendor" branches.
         # The default vendor branch is 1.1.1.
         # Vendor branches are populated with 'cvs import'.
         # Files on the vendor branch are merged to the main branch automatically
         # unless there are merge conflicts. Such conflicts have to be resolved
         # manually each time 'cvs import' is used to update the vendor branch.
         #
         # See here for details:
         # https://www.gnu.org/software/trans-coord/manual/cvs/html_node/Branches-and-revisions.html#Branches-and-revisions
         #
         # There are also "magic" branch numbers with a zero inserted
         # at the second-rightmost position:
         #
         #  1.1, 1.2, 1.3, ...  main branch history of the file
         #    |
         #    1.1.2.0.1 magic branch (2)
         #
         # This allows CVS to store information about a branch's existence
         # before any files on this branch have been modified.
         # Even-numbered branch revisions appear once the file is modified.
         branches = {"1": "HEAD", "1.1.1": "VENDOR"}
-        for k, v in list(taginfo.items()):
-            r = v.split(".")
+
+        k: str
+        v_: str
+        for k, v_ in list(taginfo.items()):  # type: ignore  # FIXME, inconsistent types
+            r = v_.split(".")
             if len(r) == 3:
                 # vendor branch number
-                branches[v] = "VENDOR"
+                branches[v_] = "VENDOR"
             elif len(r) >= 3 and r[-2] == "0":
                 # magic branch number
                 branches[".".join(r[:-2] + r[-1:])] = k
             if len(r) == 2 and branches[r[0]] == "HEAD":
                 # main branch number
-                if v not in rtags:
-                    rtags[v] = list()
-                rtags[v].append(k)
+                if v_ not in rtags:
+                    rtags[v_] = list()
+                rtags[v_].append(k)
 
-        revs = revisions.items()
+        revs: List[Tuple[str, revtuple]] = list(revisions.items())
         # sort by revision descending to priorize 1.1.1.1 than 1.1
-        revs = sorted(revs, key=lambda a: a[1][0], reverse=True)
+        revs.sort(key=lambda a: a[1][0], reverse=True)
         # sort by time
-        revs = sorted(revs, key=lambda a: a[1][1])
+        revs.sort(key=lambda a: a[1][1])
         novendor = False
         have_initial_revision = False
         last_vendor_status = None
         for k, v in revs:
             r = k.split(".")
             if (
                 len(r) == 4
                 and r[0] == "1"
                 and r[1] == "1"
                 and r[2] == "1"
                 and r[3] == "1"
             ):
                 if have_initial_revision:
                     continue
                 if v[3] == "dead":
                     continue
                 last_vendor_status = v[3]
                 have_initial_revision = True
             elif len(r) == 4 and r[0] == "1" and r[1] == "1" and r[2] == "1":
                 if novendor:
                     continue
                 last_vendor_status = v[3]
             elif len(r) == 2:
                 if r[0] == "1" and r[1] == "1":
                     if have_initial_revision:
                         continue
                     if v[3] == "dead":
                         continue
                     have_initial_revision = True
                 elif r[0] == "1" and r[1] != "1":
                     novendor = True
                 if last_vendor_status == "dead" and v[3] == "dead":
                     last_vendor_status = None
                     continue
                 last_vendor_status = None
             else:
                 # trunk only
                 continue
 
             b = ".".join(r[:-1])
             # decode author name in a potentially lossy way;
             # it is only used for internal hashing in this case
             author = v[2].decode("utf-8", "ignore")
-            a = ChangeSetKey(branches[b], author, v[1], logmsgs[k], v[6], self.fuzzsec)
+            logmsg = logmsgs[k]
+            assert logmsg is not None
+            a = ChangeSetKey(branches[b], author, v[1], logmsg, v[6], self.fuzzsec)
 
             a.put_file(path, k, v[3], 0)
             while a in self.changesets:
                 c = self.changesets[a]
                 del self.changesets[a]
                 c.merge(a)
                 a = c
             self.changesets[a] = a
             if k in rtags:
                 for t in rtags[k]:
                     if t not in self.tags or self.tags[t].max_time < a.max_time:
                         self.tags[t] = a
 
-    def parse_rlog(self, fp):
+    def parse_rlog(self, fp: BinaryIO) -> None:
         eof = None
         while eof != _EOF_LOG and eof != _EOF_ERROR:
             filename, branch, taginfo, lockinfo, errmsg, eof = _parse_log_header(fp)
-            revisions = {}
-            logmsgs = {}
+            revisions: Dict[str, revtuple] = {}
+            logmsgs: Dict[str, Optional[bytes]] = {}
             path = ""
             if filename:
                 # There is no known encoding of filenames in CVS.
                 # Attempt to decode the path with our list of known encodings.
                 # If none of them work, forcefully decode the path assuming
                 # the final path encoding provided in the list.
                 for i, e in enumerate(path_encodings):
                     try:
                         how = "ignore" if i == len(path_encodings) - 1 else "strict"
                         fname = filename.decode(e, how)
                         break
                     except UnicodeError:
                         pass
                 path = file_path(self.cvsroot_path, fname)
             elif not eof:
                 raise ValueError("No filename found in rlog header")
             while not eof:
                 off = fp.tell()
                 rev, logmsg, eof = _parse_log_entry(fp)
                 if rev:
                     revisions[rev[0]] = rev
                     logmsgs[rev[0]] = logmsg
                 if eof != _EOF_LOG and eof != _EOF_ERROR:
                     if path not in self.offsets.keys():
                         self.offsets[path] = dict()
                     if rev:
                         self.offsets[path][rev[0]] = off
 
             self._process_rlog_revisions(path, taginfo, revisions, logmsgs)
 
-    def getlog(self, fp, path, rev):
+    def getlog(self, fp: BinaryIO, path: str, rev: str) -> Optional[bytes]:
         off = self.offsets[path][rev]
         fp.seek(off)
-        rev, logmsg, eof = _parse_log_entry(fp)
+        _rev, logmsg, eof = _parse_log_entry(fp)
         return logmsg
 
 
 # if your rlog doesn't use 77 '=' characters, then this must change
 LOG_END_MARKER = b"=" * 77 + b"\n"
 ENTRY_END_MARKER = b"-" * 28 + b"\n"
 
 _EOF_FILE = b"end of file entries"  # no more entries for this RCS file
 _EOF_LOG = b"end of log"  # hit the true EOF on the pipe
 _EOF_ERROR = b"error message found"  # rlog issued an error
 
 # rlog error messages look like
 #
 #   rlog: filename/goes/here,v: error message
 #   rlog: filename/goes/here,v:123: error message
 #
 # so we should be able to match them with a regex like
 #
 #   ^rlog\: (.*)(?:\:\d+)?\: (.*)$
 #
 # But for some reason the windows version of rlog omits the "rlog: " prefix
 # for the first error message when the standard error stream has been
 # redirected to a file or pipe. (the prefix is present in subsequent errors
 # and when rlog is run from the console). So the expression below is more
 # complicated
 _re_log_error = re.compile(rb"^(?:rlog\: )*(.*,v)(?:\:\d+)?\: (.*)$")
 
 # CVSNT error messages look like:
 # cvs rcsfile: `C:/path/to/file,v' does not appear to be a valid rcs file
 # cvs [rcsfile aborted]: C:/path/to/file,v: No such file or directory
 # cvs [rcsfile aborted]: cannot open C:/path/to/file,v: Permission denied
 _re_cvsnt_error = re.compile(
     rb"^(?:cvs rcsfile\: |cvs \[rcsfile aborted\]: )"
     rb"(?:\`(.*,v)' |"
     rb"cannot open (.*,v)\: |(.*,v)\: |)"
     rb"(.*)$"
 )
 
 
-def _parse_log_header(fp):
+def _parse_log_header(fp: BinaryIO) -> Tuple[
+        bytes, bytes, Dict[bytes, bytes], Dict[bytes, bytes], bytes, Optional[bytes]
+]:
     """Parse and RCS/CVS log header.
 
   fp is a file (pipe) opened for reading the log information.
 
   On entry, fp should point to the start of a log entry.
   On exit, fp will have consumed the separator line between the header and
   the first revision log.
 
   If there is no revision information (e.g. the "-h" switch was passed to
   rlog), then fp will consumed the file separator line on exit.
 
   Returns: filename, default branch, tag dictionary, lock dictionary,
   rlog error message, and eof flag
   """
 
     filename = branch = msg = b""
-    taginfo = {}  # tag name => number
-    lockinfo = {}  # revision => locker
+    taginfo: Dict[bytes, bytes] = {}  # tag name => number
+    lockinfo: Dict[bytes, bytes] = {}  # revision => locker
     state = 0  # 0 = base, 1 = parsing symbols, 2 = parsing locks
     eof = None
 
     while 1:
         line = fp.readline()
         if not line:
             # the true end-of-file
             eof = _EOF_LOG
             break
 
         if state == 1:
             if line[0] == b"\t":
                 [tag, rev] = [x.strip() for x in line.split(b":")]
                 taginfo[tag] = rev
             else:
                 # oops. this line isn't tag info. stop parsing tags.
                 state = 0
 
         if state == 2:
             if line[0] == b"\t":
                 [locker, rev] = [x.strip() for x in line.split(b":")]
                 lockinfo[rev] = locker
             else:
                 # oops. this line isn't lock info. stop parsing tags.
                 state = 0
 
         if state == 0:
             if line[:9] == b"RCS file:":
                 filename = line[10:-1]
             elif line[:5] == b"head:":
                 # head = line[6:-1]
                 pass
             elif line[:7] == b"branch:":
                 branch = line[8:-1]
             elif line[:6] == b"locks:":
                 # start parsing the lock information
                 state = 2
             elif line[:14] == b"symbolic names":
                 # start parsing the tag information
                 state = 1
             elif line == ENTRY_END_MARKER:
                 # end of the headers
                 break
             elif line == LOG_END_MARKER:
                 # end of this file's log information
                 eof = _EOF_FILE
                 break
             else:
                 error = _re_cvsnt_error.match(line)
                 if error:
                     p1, p2, p3, msg = error.groups()
                     filename = p1 or p2 or p3
                     if not filename:
                         raise ValueError(
-                            "Could not get filename from CVSNT error:\n%s" % line
+                            "Could not get filename from CVSNT error:\n%r" % line
                         )
                     eof = _EOF_ERROR
                     break
 
                 error = _re_log_error.match(line)
                 if error:
                     filename, msg = error.groups()
                     if msg[:30] == b"warning: Unknown phrases like ":
                         # don't worry about this warning. it can happen with some RCS
                         # files that have unknown fields in them e.g. "permissions 644;"
                         continue
                     eof = _EOF_ERROR
                     break
 
     return filename, branch, taginfo, lockinfo, msg, eof
 
 
 _re_log_info = re.compile(
     rb"^date:\s+([^;]+);"
     rb"\s+author:\s+([^;]+);"
     rb"\s+state:\s+([^;]+);"
     rb"(\s+lines:\s+([0-9\s+-]+);?)?"
     rb"(\s+commitid:\s+([a-zA-Z0-9]+);)?\n$"
 )
 
 # TODO: _re_rev should be updated to extract the "locked" flag
 _re_rev = re.compile(rb"^revision\s+([0-9.]+).*")
 
 
 def cvs_strptime(timestr):
     try:
         return time.strptime(timestr, "%Y/%m/%d %H:%M:%S")[:-1] + (0,)
     except ValueError:
         return time.strptime(timestr, "%Y-%m-%d %H:%M:%S %z")[:-1] + (0,)
 
 
-def _parse_log_entry(fp):
+def _parse_log_entry(fp) -> Tuple[Optional[revtuple], Optional[bytes], Optional[bytes]]:
     """Parse a single log entry.
 
   On entry, fp should point to the first line of the entry (the "revision"
   line).
   On exit, fp will have consumed the log separator line (dashes) or the
   end-of-file marker (equals).
 
-  Returns: Revision data tuple, and eof flag (see _EOF_*)
+  Returns: Revision data tuple (number string, date, author, state, branches, revnumstr,
+  commitid) if any, log, and eof flag (see _EOF_*)
   """
     rev = None
     line = fp.readline()
     if not line:
         return None, None, _EOF_LOG
     if line == LOG_END_MARKER:
         # Needed because some versions of RCS precede LOG_END_MARKER
         # with ENTRY_END_MARKER
         return None, None, _EOF_FILE
     if line[:8] == b"revision":
         match = _re_rev.match(line)
         if not match:
             return None, None, _EOF_LOG
         rev = match.group(1)
 
         line = fp.readline()
         if not line:
             return None, None, _EOF_LOG
         match = _re_log_info.match(line)
 
     eof = None
     log = b""
     while 1:
         line = fp.readline()
         if not line:
             # true end-of-file
             eof = _EOF_LOG
             break
         if line[:9] == b"branches:":
             continue
         if line == ENTRY_END_MARKER:
             break
         if line == LOG_END_MARKER:
             # end of this file's log information
             eof = _EOF_FILE
             break
 
         log = log + line
 
     if not rev or not match:
         # there was a parsing error
         return None, None, eof
 
     # parse out a time tuple for the local time
     tm = cvs_strptime(match.group(1).decode("UTF-8"))
 
     # rlog seems to assume that two-digit years are 1900-based (so, "04"
     # comes out as "1904", not "2004").
     EPOCH = 1970
     if tm[0] < EPOCH:
         tm = list(tm)
         if (tm[0] - 1900) < 70:
             tm[0] = tm[0] + 100
         if tm[0] < EPOCH:
             raise ValueError("invalid year")
     date = calendar.timegm(tm)
 
     # return a revision tuple compatible with 'rcsparse', the log message,
     # and the EOF marker
     return (
-        (
+        revtuple(
             rev.decode("ascii"),  # revision number string
             date,
             match.group(2),  # author (encoding is arbitrary; don't attempt to decode)
             match.group(3).decode(
                 "ascii"
             ),  # state, usually "Exp" or "dead"; non-ASCII data here would be weird
             None,  # TODO: branches of this rev
             None,  # TODO: revnumstr of previous rev
             None,  # TODO: commitid
         ),
         log,
         eof,
     )