diff --git a/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py b/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py
index abf70e2..572a89d 100644
--- a/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py
+++ b/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py
@@ -1,646 +1,645 @@
 #!/usr/local/bin/python
 
 #
 # Copyright (c) 2012 YASUOKA Masahiko <yasuoka@yasuoka.net>
 #
 # Permission to use, copy, modify, and distribute this software for any
 # purpose with or without fee is hereby granted, provided that the above
 # copyright notice and this permission notice appear in all copies.
 #
 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 # Usage
 #
 #   First import:
 #   % git init --bare /git/openbsd.git
 #   % python cvs2gitdump.py -k OpenBSD -e openbsd.org /cvs/openbsd/src \
 #       > openbsd.dump
 #   % git --git-dir /git/openbsd.git fast-import < openbsd.dump
 #
 #   Periodic import:
 #   % sudo cvsync
 #   % python cvs2gitdump.py -k OpenBSD -e openbsd.org /cvs/openbsd/src \
 #       /git/openbsd.git > openbsd2.dump
 #   % git --git-dir /git/openbsd.git fast-import < openbsd2.dump
 #
 
 import getopt
 import os
 import re
 import subprocess
 import sys
 import time
 import swh.loader.cvs.rcsparse as rcsparse
 
 CHANGESET_FUZZ_SEC = 300
 
 
 def usage():
     print('usage: cvs2gitdump [-ah] [-z fuzz] [-e email_domain] '
           '[-E log_encodings]\n'
           '\t[-k rcs_keywords] [-b branch] [-m module] [-l last_revision]\n'
           '\tcvsroot [git_dir]', file=sys.stderr)
 
 
 def main():
     email_domain = None
     do_incremental = False
     git_tip = None
     git_branch = 'master'
     dump_all = False
     log_encoding = 'utf-8,iso-8859-1'
     rcs = RcsKeywords()
     modules = []
     last_revision = None
     fuzzsec = CHANGESET_FUZZ_SEC
 
     try:
         opts, args = getopt.getopt(sys.argv[1:], 'ab:hm:z:e:E:k:t:l:')
         for opt, v in opts:
             if opt == '-z':
                 fuzzsec = int(v)
             elif opt == '-e':
                 email_domain = v
             elif opt == '-a':
                 dump_all = True
             elif opt == '-b':
                 git_branch = v
             elif opt == '-E':
                 log_encoding = v
             elif opt == '-k':
                 rcs.add_id_keyword(v)
             elif opt == '-m':
                 if v == '.git':
                     print('Cannot handle the path named \'.git\'',
                           file=sys.stderr)
                     sys.exit(1)
                 modules.append(v)
             elif opt == '-l':
                 last_revision = v
             elif opt == '-h':
                 usage()
                 sys.exit(1)
     except getopt.GetoptError as msg:
         print(msg, file=sys.stderr)
         usage()
         sys.exit(1)
 
     if len(args) == 0 or len(args) > 2:
         usage()
         sys.exit(1)
 
     log_encodings = log_encoding.split(',')
 
     cvsroot = args[0]
     while cvsroot[-1] == '/':
         cvsroot = cvsroot[:-1]
 
     if len(args) == 2:
         do_incremental = True
         git = subprocess.Popen(
             ['git', '--git-dir=' + args[1], '-c',
              'i18n.logOutputEncoding=UTF-8', 'log', '--max-count', '1',
              '--date=raw', '--format=%ae%n%ad%n%H', git_branch],
             encoding='utf-8', stdout=subprocess.PIPE)
         outs = git.stdout.readlines()
         git.wait()
         if git.returncode != 0:
             print("Couldn't exec git", file=sys.stderr)
             sys.exit(git.returncode)
         git_tip = outs[2].strip()
 
         if last_revision is not None:
             git = subprocess.Popen(
                 ['git', '--git-dir=' + args[1], '-c',
                  'i18n.logOutputEncoding=UTF-8', 'log', '--max-count', '1',
                  '--date=raw', '--format=%ae%n%ad%n%H', last_revision],
                 encoding='utf-8', stdout=subprocess.PIPE)
             outs = git.stdout.readlines()
             git.wait()
             if git.returncode != 0:
                 print("Coundn't exec git", file=sys.stderr)
                 sys.exit(git.returncode)
         last_author = outs[0].strip()
         last_ctime = float(outs[1].split()[0])
 
         # strip off the domain part from the last author since cvs doesn't have
         # the domain part.
         if do_incremental and email_domain is not None and \
                 last_author.lower().endswith(('@' + email_domain).lower()):
             last_author = last_author[:-1 * (1 + len(email_domain))]
 
     cvs = CvsConv(cvsroot, rcs, not do_incremental, fuzzsec)
     print('** walk cvs tree', file=sys.stderr)
     if len(modules) == 0:
         cvs.walk()
     else:
         for module in modules:
             cvs.walk(module)
 
     changesets = sorted(cvs.changesets)
     nchangesets = len(changesets)
     print('** cvs has %d changeset' % (nchangesets), file=sys.stderr)
 
     if nchangesets <= 0:
         sys.exit(0)
 
     if not dump_all:
         # don't use last 10 minutes for safety
         max_time_max = changesets[-1].max_time - 600
     else:
         max_time_max = changesets[-1].max_time
 
     found_last_revision = False
     markseq = cvs.markseq
     extags = set()
     for k in changesets:
         if do_incremental and not found_last_revision:
             if k.min_time == last_ctime and k.author == last_author:
                 found_last_revision = True
             for tag in k.tags:
                 extags.add(tag)
             continue
         if k.max_time > max_time_max:
             break
 
         marks = {}
 
         for f in k.revs:
             if not do_incremental:
                 marks[f.markseq] = f
             else:
                 markseq = markseq + 1
                 git_dump_file(f.path, f.rev, rcs, markseq)
                 marks[markseq] = f
         log = rcsparse.rcsfile(k.revs[0].path).getlog(k.revs[0].rev)
         for i, e in enumerate(log_encodings):
             try:
                 how = 'ignore' if i == len(log_encodings) - 1 else 'strict'
                 log = log.decode(e, how)
                 break
             except UnicodeError:
                 pass
         log = log.encode('utf-8', 'ignore')
 
         output('commit refs/heads/' + git_branch)
         markseq = markseq + 1
         output('mark :%d' % (markseq))
         email = k.author if email_domain is None \
             else k.author + '@' + email_domain
         output('author %s <%s> %d +0000' % (k.author, email, k.min_time))
         output('committer %s <%s> %d +0000' % (k.author, email, k.min_time))
 
         output('data', len(log))
         output(log, end='')
         if do_incremental and git_tip is not None:
             output('from', git_tip)
             git_tip = None
 
         for m in marks:
             f = marks[m]
             mode = 0o100755 if os.access(f.path, os.X_OK) else 0o100644
             fn = file_path(cvs.cvsroot, f.path)
             if f.state == 'dead':
                 output('D', fn)
             else:
                 output('M %o :%d %s' % (mode, m, fn))
         output('')
         for tag in k.tags:
             if tag in extags:
                 continue
             output('reset refs/tags/%s' % (tag))
             output('from :%d' % (markseq))
             output('')
 
     if do_incremental and not found_last_revision:
         raise Exception('could not find the last revision')
 
     print('** dumped', file=sys.stderr)
 
 
 #
 # Encode by UTF-8 always for string objects since encoding for git-fast-import
 # is UTF-8.  Also write without conversion for a bytes object (file bodies
 # might be various encodings)
 #
 def output(*args, end='\n'):
     if len(args) == 0:
         pass
     elif len(args) > 1 or isinstance(args[0], str):
         lines = ' '.join(
             [arg if isinstance(arg, str) else str(arg) for arg in args])
         sys.stdout.buffer.write(lines.encode('utf-8'))
     else:
         sys.stdout.buffer.write(args[0])
     if len(end) > 0:
         sys.stdout.buffer.write(end.encode('utf-8'))
 
 
 class FileRevision:
     def __init__(self, path, rev, state, markseq):
         self.path = path
         self.rev = rev
         self.state = state
         self.markseq = markseq
 
 
 class ChangeSetKey:
     def __init__(self, branch, author, timestamp, log, commitid, fuzzsec):
         self.branch = branch
         self.author = author
         self.min_time = timestamp
         self.max_time = timestamp
         self.commitid = commitid
         self.fuzzsec = fuzzsec
         self.revs = []
         self.tags = []
         self.log_hash = 0
         h = 0
         for c in log:
             h = 31 * h + c
         self.log_hash = h
 
     def __lt__(self, other):
         return self._cmp(other) < 0
 
     def __gt__(self, other):
         return self._cmp(other) > 0
 
     def __eq__(self, other):
         return self._cmp(other) == 0
 
     def __le__(self, other):
         return self._cmp(other) <= 0
 
     def __ge__(self, other):
         return self._cmp(other) >= 0
 
     def __ne__(self, other):
         return self._cmp(other) != 0
 
     def _cmp(self, anon):
         # compare by the commitid
         cid = _cmp2(self.commitid, anon.commitid)
         if cid == 0 and self.commitid is not None:
             # both have commitid and they are same
             return 0
 
         # compare by the time
         ma = anon.min_time - self.max_time
         mi = self.min_time - anon.max_time
         ct = self.min_time - anon.min_time
         if ma > self.fuzzsec or mi > self.fuzzsec:
             return ct
 
         if cid != 0:
             # only one has the commitid, this means different commit
             return cid if ct == 0 else ct
 
         # compare by log, branch and author
         c = _cmp2(self.log_hash, anon.log_hash)
         if c == 0:
             c = _cmp2(self.branch, anon.branch)
         if c == 0:
             c = _cmp2(self.author, anon.author)
         if c == 0:
             return 0
 
         return ct if ct != 0 else c
 
     def merge(self, anot):
         self.max_time = max(self.max_time, anot.max_time)
         self.min_time = min(self.min_time, anot.min_time)
         self.revs.extend(anot.revs)
 
     def __hash__(self):
         return hash(self.branch + '/' + self.author) * 31 + self.log_hash
 
     def put_file(self, path, rev, state, markseq):
         self.revs.append(FileRevision(path, rev, state, markseq))
 
 
 def _cmp2(a, b):
     _a = a is not None
     _b = b is not None
     return (a > b) - (a < b) if _a and _b else (_a > _b) - (_a < _b)
 
 
 class CvsConv:
     def __init__(self, cvsroot, rcs, dumpfile, fuzzsec):
         self.cvsroot = cvsroot
         self.rcs = rcs
         self.changesets = dict()
         self.dumpfile = dumpfile
         self.markseq = 0
         self.tags = dict()
         self.fuzzsec = fuzzsec
 
     def walk(self, module=None):
         p = [self.cvsroot]
         if module is not None:
             p.append(module)
         path = os.path.join(*p)
 
         for root, dirs, files in os.walk(path):
             if '.git' in dirs:
                 print('Ignore %s: cannot handle the path named \'.git\'' % (
                       root + os.sep + '.git'), file=sys.stderr)
                 dirs.remove('.git')
             if '.git' in files:
                 print('Ignore %s: cannot handle the path named \'.git\'' % (
                       root + os.sep + '.git'), file=sys.stderr)
                 files.remove('.git')
             for f in files:
                 if not f[-2:] == ',v':
                     continue
                 self.parse_file(root + os.sep + f)
 
         for t, c in list(self.tags.items()):
             c.tags.append(t)
 
     def parse_file(self, path):
         rtags = dict()
         rcsfile = rcsparse.rcsfile(path)
         branches = {'1': 'HEAD', '1.1.1': 'VENDOR'}
         for k, v in list(rcsfile.symbols.items()):
             r = v.split('.')
             if len(r) == 3:
                 branches[v] = 'VENDOR'
             elif len(r) >= 3 and r[-2] == '0':
                 branches['.'.join(r[:-2] + r[-1:])] = k
             if len(r) == 2 and branches[r[0]] == 'HEAD':
                 if v not in rtags:
                     rtags[v] = list()
                 rtags[v].append(k)
 
         revs = rcsfile.revs.items()
         # sort by revision descending to priorize 1.1.1.1 than 1.1
         revs = sorted(revs, key=lambda a: a[1][0], reverse=True)
         # sort by time
         revs = sorted(revs, key=lambda a: a[1][1])
         novendor = False
         have_initial_revision = False
         last_vendor_status = None
         for k, v in revs:
             r = k.split('.')
             if len(r) == 4 and r[0] == '1' and r[1] == '1' and r[2] == '1' \
                     and r[3] == '1':
                 if have_initial_revision:
                     continue
                 if v[3] == 'dead':
                     continue
                 last_vendor_status = v[3]
                 have_initial_revision = True
             elif len(r) == 4 and r[0] == '1' and r[1] == '1' and r[2] == '1':
                 if novendor:
                     continue
                 last_vendor_status = v[3]
             elif len(r) == 2:
                 if r[0] == '1' and r[1] == '1':
                     if have_initial_revision:
                         continue
                     if v[3] == 'dead':
                         continue
                     have_initial_revision = True
                 elif r[0] == '1' and r[1] != '1':
                     novendor = True
                 if last_vendor_status == 'dead' and v[3] == 'dead':
                     last_vendor_status = None
                     continue
                 last_vendor_status = None
             else:
                 # trunk only
                 continue
 
             if self.dumpfile:
                 self.markseq = self.markseq + 1
                 git_dump_file(path, k, self.rcs, self.markseq)
 
             b = '.'.join(r[:-1])
             try:
                 a = ChangeSetKey(
                     branches[b], v[2], v[1], rcsfile.getlog(v[0]), v[6],
                     self.fuzzsec)
             except Exception as e:
                 print('Aborted at %s %s' % (path, v[0]), file=sys.stderr)
                 raise e
 
             a.put_file(path, k, v[3], self.markseq)
             while a in self.changesets:
                 c = self.changesets[a]
                 del self.changesets[a]
                 c.merge(a)
                 a = c
             self.changesets[a] = a
             if k in rtags:
                 for t in rtags[k]:
                     if t not in self.tags or \
                             self.tags[t].max_time < a.max_time:
                         self.tags[t] = a
 
 
 def file_path(r, p):
     if r.endswith('/'):
         r = r[:-1]
     path = p[:-2]               # drop ",v"
     p = path.split('/')
     if len(p) > 0 and p[-2] == 'Attic':
         path = '/'.join(p[:-2] + [p[-1]])
     if path.startswith(r):
         path = path[len(r) + 1:]
     return path
 
 
 def git_dump_file(path, k, rcs, markseq):
     try:
-        cont = rcs.expand_keyword(path, k)
+        cont = rcs.expand_keyword(path, rcsparse.rcsfile(path), k)
     except RuntimeError as msg:
         print('Unexpected runtime error on parsing',
               path, k, ':', msg, file=sys.stderr)
         print('unlimit the resource limit may fix this problem.',
               file=sys.stderr)
         sys.exit(1)
     output('blob')
     output('mark :%d' % markseq)
     output('data', len(cont))
     output(cont)
 
 
 class RcsKeywords:
     RCS_KW_AUTHOR   = (1 << 0)
     RCS_KW_DATE     = (1 << 1)
     RCS_KW_LOG      = (1 << 2)
     RCS_KW_NAME     = (1 << 3)
     RCS_KW_RCSFILE  = (1 << 4)
     RCS_KW_REVISION = (1 << 5)
     RCS_KW_SOURCE   = (1 << 6)
     RCS_KW_STATE    = (1 << 7)
     RCS_KW_FULLPATH = (1 << 8)
     RCS_KW_MDOCDATE = (1 << 9)
     RCS_KW_LOCKER   = (1 << 10)
 
     RCS_KW_ID       = (RCS_KW_RCSFILE | RCS_KW_REVISION | RCS_KW_DATE |
                        RCS_KW_AUTHOR | RCS_KW_STATE)
     RCS_KW_HEADER   = (RCS_KW_ID | RCS_KW_FULLPATH)
 
     rcs_expkw = {
         b"Author":   RCS_KW_AUTHOR,
         b"Date":     RCS_KW_DATE,
         b"Header":   RCS_KW_HEADER,
         b"Id":       RCS_KW_ID,
         b"Log":      RCS_KW_LOG,
         b"Name":     RCS_KW_NAME,
         b"RCSfile":  RCS_KW_RCSFILE,
         b"Revision": RCS_KW_REVISION,
         b"Source":   RCS_KW_SOURCE,
         b"State":    RCS_KW_STATE,
         b"Mdocdate": RCS_KW_MDOCDATE,
         b"Locker":   RCS_KW_LOCKER
     }
 
     RCS_KWEXP_NONE    = (1 << 0)
     RCS_KWEXP_NAME    = (1 << 1)    # include keyword name
     RCS_KWEXP_VAL     = (1 << 2)    # include keyword value
     RCS_KWEXP_LKR     = (1 << 3)    # include name of locker
     RCS_KWEXP_OLD     = (1 << 4)    # generate old keyword string
     RCS_KWEXP_ERR     = (1 << 5)    # mode has an error
     RCS_KWEXP_DEFAULT = (RCS_KWEXP_NAME | RCS_KWEXP_VAL)
     RCS_KWEXP_KVL     = (RCS_KWEXP_NAME | RCS_KWEXP_VAL | RCS_KWEXP_LKR)
 
     def __init__(self):
         self.rerecomple()
 
     def rerecomple(self):
         pat = b'|'.join(list(self.rcs_expkw.keys()))
         self.re_kw = re.compile(b".*?\\$(" + pat + b")[\\$:]")
 
     def add_id_keyword(self, keyword):
         self.rcs_expkw[keyword.encode('ascii')] = self.RCS_KW_ID
         self.rerecomple()
 
     def kflag_get(self, flags):
         if flags is None:
             return self.RCS_KWEXP_DEFAULT
         fl = 0
         for fc in flags:
             if fc == 'k':
                 fl |= self.RCS_KWEXP_NAME
             elif fc == 'v':
                 fl |= self.RCS_KWEXP_VAL
             elif fc == 'l':
                 fl |= self.RCS_KWEXP_LKR
             elif fc == 'o':
                 if len(flags) != 1:
                     fl |= self.RCS_KWEXP_ERR
                 fl |= self.RCS_KWEXP_OLD
             elif fc == 'b':
                 if len(flags) != 1:
                     fl |= self.RCS_KWEXP_ERR
                 fl |= self.RCS_KWEXP_NONE
             else:
                 fl |= self.RCS_KWEXP_ERR
         return fl
 
-    def expand_keyword(self, filename, r):
-        rcs = rcsparse.rcsfile(filename)
+    def expand_keyword(self, filename, rcs, r):
         rev = rcs.revs[r]
 
         mode = self.kflag_get(rcs.expand)
         if (mode & (self.RCS_KWEXP_NONE | self.RCS_KWEXP_OLD)) != 0:
             return rcs.checkout(rev[0])
 
         ret = []
         for line in rcs.checkout(rev[0]).split(b'\n'):
             logbuf = None
             m = self.re_kw.match(line)
             if m is None:
                 # No RCS Keywords, use it as it is
                 ret += [line]
                 continue
 
             line0 = b''
             while m is not None:
                 try:
                     dsign = m.end(1) + line[m.end(1):].index(b'$')
                 except ValueError:
                     break
                 prefix = line[:m.start(1) - 1]
                 line = line[dsign + 1:]
                 line0 += prefix
                 expbuf = ''
                 if (mode & self.RCS_KWEXP_NAME) != 0:
                     expbuf += '$'
                     expbuf += m.group(1).decode('ascii')
                     if (mode & self.RCS_KWEXP_VAL) != 0:
                         expbuf += ': '
                 if (mode & self.RCS_KWEXP_VAL) != 0:
                     expkw = self.rcs_expkw[m.group(1)]
                     if (expkw & self.RCS_KW_RCSFILE) != 0:
                         expbuf += filename \
                             if (expkw & self.RCS_KW_FULLPATH) != 0 \
                             else os.path.basename(filename)
                         expbuf += " "
                     if (expkw & self.RCS_KW_REVISION) != 0:
                         expbuf += rev[0]
                         expbuf += " "
                     if (expkw & self.RCS_KW_DATE) != 0:
                         expbuf += time.strftime(
                             "%Y/%m/%d %H:%M:%S ", time.gmtime(rev[1]))
                     if (expkw & self.RCS_KW_MDOCDATE) != 0:
                         d = time.gmtime(rev[1])
                         expbuf += time.strftime(
                             "%B%e %Y " if (d.tm_mday < 10) else "%B %e %Y ", d)
                     if (expkw & self.RCS_KW_AUTHOR) != 0:
                         expbuf += rev[2]
                         expbuf += " "
                     if (expkw & self.RCS_KW_STATE) != 0:
                         expbuf += rev[3]
                         expbuf += " "
                     if (expkw & self.RCS_KW_LOG) != 0:
                         p = prefix
                         expbuf += filename \
                             if (expkw & self.RCS_KW_FULLPATH) != 0 \
                             else os.path.basename(filename)
                         expbuf += " "
                         logbuf = p + (
                             'Revision %s  %s  %s\n' % (
                                 rev[0], time.strftime(
                                     "%Y/%m/%d %H:%M:%S", time.gmtime(rev[1])),
                                 rev[2])).encode('ascii')
                         for lline in rcs.getlog(rev[0]).rstrip().split(b'\n'):
                             if len(lline) == 0:
                                 logbuf += p.rstrip() + b'\n'
                             else:
                                 logbuf += p + lline.lstrip() + b'\n'
                         if len(line) == 0:
                             logbuf += p.rstrip()
                         else:
                             logbuf += p + line.lstrip()
                         line = b''
                     if (expkw & self.RCS_KW_SOURCE) != 0:
                         expbuf += filename
                         expbuf += " "
                     if (expkw & (self.RCS_KW_NAME | self.RCS_KW_LOCKER)) != 0:
                         expbuf += " "
                 if (mode & self.RCS_KWEXP_NAME) != 0:
                     expbuf += '$'
                 line0 += expbuf[:255].encode('ascii')
                 m = self.re_kw.match(line)
 
             ret += [line0 + line]
             if logbuf is not None:
                 ret += [logbuf]
         return b'\n'.join(ret)
 
 
 # ----------------------------------------------------------------------
 # entry point
 # ----------------------------------------------------------------------
 if __name__ == '__main__':
     main()