diff --git a/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py b/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py
index 572a89d..8bfa321 100644
--- a/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py
+++ b/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py
@@ -1,645 +1,648 @@
 #!/usr/local/bin/python
 
 #
 # Copyright (c) 2012 YASUOKA Masahiko <yasuoka@yasuoka.net>
 #
 # Permission to use, copy, modify, and distribute this software for any
 # purpose with or without fee is hereby granted, provided that the above
 # copyright notice and this permission notice appear in all copies.
 #
 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 # Usage
 #
 #   First import:
 #   % git init --bare /git/openbsd.git
 #   % python cvs2gitdump.py -k OpenBSD -e openbsd.org /cvs/openbsd/src \
 #       > openbsd.dump
 #   % git --git-dir /git/openbsd.git fast-import < openbsd.dump
 #
 #   Periodic import:
 #   % sudo cvsync
 #   % python cvs2gitdump.py -k OpenBSD -e openbsd.org /cvs/openbsd/src \
 #       /git/openbsd.git > openbsd2.dump
 #   % git --git-dir /git/openbsd.git fast-import < openbsd2.dump
 #
 
 import getopt
 import os
 import re
 import subprocess
 import sys
 import time
 import swh.loader.cvs.rcsparse as rcsparse
 
 CHANGESET_FUZZ_SEC = 300
 
 
 def usage():
     print('usage: cvs2gitdump [-ah] [-z fuzz] [-e email_domain] '
           '[-E log_encodings]\n'
           '\t[-k rcs_keywords] [-b branch] [-m module] [-l last_revision]\n'
           '\tcvsroot [git_dir]', file=sys.stderr)
 
 
 def main():
     email_domain = None
     do_incremental = False
     git_tip = None
     git_branch = 'master'
     dump_all = False
     log_encoding = 'utf-8,iso-8859-1'
     rcs = RcsKeywords()
     modules = []
     last_revision = None
     fuzzsec = CHANGESET_FUZZ_SEC
 
     try:
         opts, args = getopt.getopt(sys.argv[1:], 'ab:hm:z:e:E:k:t:l:')
         for opt, v in opts:
             if opt == '-z':
                 fuzzsec = int(v)
             elif opt == '-e':
                 email_domain = v
             elif opt == '-a':
                 dump_all = True
             elif opt == '-b':
                 git_branch = v
             elif opt == '-E':
                 log_encoding = v
             elif opt == '-k':
                 rcs.add_id_keyword(v)
             elif opt == '-m':
                 if v == '.git':
                     print('Cannot handle the path named \'.git\'',
                           file=sys.stderr)
                     sys.exit(1)
                 modules.append(v)
             elif opt == '-l':
                 last_revision = v
             elif opt == '-h':
                 usage()
                 sys.exit(1)
     except getopt.GetoptError as msg:
         print(msg, file=sys.stderr)
         usage()
         sys.exit(1)
 
     if len(args) == 0 or len(args) > 2:
         usage()
         sys.exit(1)
 
     log_encodings = log_encoding.split(',')
 
     cvsroot = args[0]
     while cvsroot[-1] == '/':
         cvsroot = cvsroot[:-1]
 
     if len(args) == 2:
         do_incremental = True
         git = subprocess.Popen(
             ['git', '--git-dir=' + args[1], '-c',
              'i18n.logOutputEncoding=UTF-8', 'log', '--max-count', '1',
              '--date=raw', '--format=%ae%n%ad%n%H', git_branch],
             encoding='utf-8', stdout=subprocess.PIPE)
         outs = git.stdout.readlines()
         git.wait()
         if git.returncode != 0:
             print("Couldn't exec git", file=sys.stderr)
             sys.exit(git.returncode)
         git_tip = outs[2].strip()
 
         if last_revision is not None:
             git = subprocess.Popen(
                 ['git', '--git-dir=' + args[1], '-c',
                  'i18n.logOutputEncoding=UTF-8', 'log', '--max-count', '1',
                  '--date=raw', '--format=%ae%n%ad%n%H', last_revision],
                 encoding='utf-8', stdout=subprocess.PIPE)
             outs = git.stdout.readlines()
             git.wait()
             if git.returncode != 0:
                 print("Coundn't exec git", file=sys.stderr)
                 sys.exit(git.returncode)
         last_author = outs[0].strip()
         last_ctime = float(outs[1].split()[0])
 
         # strip off the domain part from the last author since cvs doesn't have
         # the domain part.
         if do_incremental and email_domain is not None and \
                 last_author.lower().endswith(('@' + email_domain).lower()):
             last_author = last_author[:-1 * (1 + len(email_domain))]
 
     cvs = CvsConv(cvsroot, rcs, not do_incremental, fuzzsec)
     print('** walk cvs tree', file=sys.stderr)
     if len(modules) == 0:
         cvs.walk()
     else:
         for module in modules:
             cvs.walk(module)
 
     changesets = sorted(cvs.changesets)
     nchangesets = len(changesets)
     print('** cvs has %d changeset' % (nchangesets), file=sys.stderr)
 
     if nchangesets <= 0:
         sys.exit(0)
 
     if not dump_all:
         # don't use last 10 minutes for safety
         max_time_max = changesets[-1].max_time - 600
     else:
         max_time_max = changesets[-1].max_time
 
     found_last_revision = False
     markseq = cvs.markseq
     extags = set()
     for k in changesets:
         if do_incremental and not found_last_revision:
             if k.min_time == last_ctime and k.author == last_author:
                 found_last_revision = True
             for tag in k.tags:
                 extags.add(tag)
             continue
         if k.max_time > max_time_max:
             break
 
         marks = {}
 
         for f in k.revs:
             if not do_incremental:
                 marks[f.markseq] = f
             else:
                 markseq = markseq + 1
                 git_dump_file(f.path, f.rev, rcs, markseq)
                 marks[markseq] = f
         log = rcsparse.rcsfile(k.revs[0].path).getlog(k.revs[0].rev)
         for i, e in enumerate(log_encodings):
             try:
                 how = 'ignore' if i == len(log_encodings) - 1 else 'strict'
                 log = log.decode(e, how)
                 break
             except UnicodeError:
                 pass
         log = log.encode('utf-8', 'ignore')
 
         output('commit refs/heads/' + git_branch)
         markseq = markseq + 1
         output('mark :%d' % (markseq))
         email = k.author if email_domain is None \
             else k.author + '@' + email_domain
         output('author %s <%s> %d +0000' % (k.author, email, k.min_time))
         output('committer %s <%s> %d +0000' % (k.author, email, k.min_time))
 
         output('data', len(log))
         output(log, end='')
         if do_incremental and git_tip is not None:
             output('from', git_tip)
             git_tip = None
 
         for m in marks:
             f = marks[m]
             mode = 0o100755 if os.access(f.path, os.X_OK) else 0o100644
             fn = file_path(cvs.cvsroot, f.path)
             if f.state == 'dead':
                 output('D', fn)
             else:
                 output('M %o :%d %s' % (mode, m, fn))
         output('')
         for tag in k.tags:
             if tag in extags:
                 continue
             output('reset refs/tags/%s' % (tag))
             output('from :%d' % (markseq))
             output('')
 
     if do_incremental and not found_last_revision:
         raise Exception('could not find the last revision')
 
     print('** dumped', file=sys.stderr)
 
 
 #
 # Encode by UTF-8 always for string objects since encoding for git-fast-import
 # is UTF-8.  Also write without conversion for a bytes object (file bodies
 # might be various encodings)
 #
 def output(*args, end='\n'):
     if len(args) == 0:
         pass
     elif len(args) > 1 or isinstance(args[0], str):
         lines = ' '.join(
             [arg if isinstance(arg, str) else str(arg) for arg in args])
         sys.stdout.buffer.write(lines.encode('utf-8'))
     else:
         sys.stdout.buffer.write(args[0])
     if len(end) > 0:
         sys.stdout.buffer.write(end.encode('utf-8'))
 
 
 class FileRevision:
     def __init__(self, path, rev, state, markseq):
         self.path = path
         self.rev = rev
         self.state = state
         self.markseq = markseq
 
 
 class ChangeSetKey:
     def __init__(self, branch, author, timestamp, log, commitid, fuzzsec):
         self.branch = branch
         self.author = author
         self.min_time = timestamp
         self.max_time = timestamp
         self.commitid = commitid
         self.fuzzsec = fuzzsec
         self.revs = []
         self.tags = []
         self.log_hash = 0
         h = 0
         for c in log:
             h = 31 * h + c
         self.log_hash = h
 
     def __lt__(self, other):
         return self._cmp(other) < 0
 
     def __gt__(self, other):
         return self._cmp(other) > 0
 
     def __eq__(self, other):
         return self._cmp(other) == 0
 
     def __le__(self, other):
         return self._cmp(other) <= 0
 
     def __ge__(self, other):
         return self._cmp(other) >= 0
 
     def __ne__(self, other):
         return self._cmp(other) != 0
 
     def _cmp(self, anon):
         # compare by the commitid
         cid = _cmp2(self.commitid, anon.commitid)
         if cid == 0 and self.commitid is not None:
             # both have commitid and they are same
             return 0
 
         # compare by the time
         ma = anon.min_time - self.max_time
         mi = self.min_time - anon.max_time
         ct = self.min_time - anon.min_time
         if ma > self.fuzzsec or mi > self.fuzzsec:
             return ct
 
         if cid != 0:
             # only one has the commitid, this means different commit
             return cid if ct == 0 else ct
 
         # compare by log, branch and author
         c = _cmp2(self.log_hash, anon.log_hash)
         if c == 0:
             c = _cmp2(self.branch, anon.branch)
         if c == 0:
             c = _cmp2(self.author, anon.author)
         if c == 0:
             return 0
 
         return ct if ct != 0 else c
 
     def merge(self, anot):
         self.max_time = max(self.max_time, anot.max_time)
         self.min_time = min(self.min_time, anot.min_time)
         self.revs.extend(anot.revs)
 
     def __hash__(self):
         return hash(self.branch + '/' + self.author) * 31 + self.log_hash
 
     def put_file(self, path, rev, state, markseq):
         self.revs.append(FileRevision(path, rev, state, markseq))
 
 
 def _cmp2(a, b):
     _a = a is not None
     _b = b is not None
     return (a > b) - (a < b) if _a and _b else (_a > _b) - (_a < _b)
 
 
 class CvsConv:
     def __init__(self, cvsroot, rcs, dumpfile, fuzzsec):
         self.cvsroot = cvsroot
         self.rcs = rcs
         self.changesets = dict()
         self.dumpfile = dumpfile
         self.markseq = 0
         self.tags = dict()
         self.fuzzsec = fuzzsec
 
     def walk(self, module=None):
         p = [self.cvsroot]
         if module is not None:
             p.append(module)
         path = os.path.join(*p)
 
         for root, dirs, files in os.walk(path):
             if '.git' in dirs:
                 print('Ignore %s: cannot handle the path named \'.git\'' % (
                       root + os.sep + '.git'), file=sys.stderr)
                 dirs.remove('.git')
             if '.git' in files:
                 print('Ignore %s: cannot handle the path named \'.git\'' % (
                       root + os.sep + '.git'), file=sys.stderr)
                 files.remove('.git')
             for f in files:
                 if not f[-2:] == ',v':
                     continue
                 self.parse_file(root + os.sep + f)
 
         for t, c in list(self.tags.items()):
             c.tags.append(t)
 
     def parse_file(self, path):
         rtags = dict()
         rcsfile = rcsparse.rcsfile(path)
         branches = {'1': 'HEAD', '1.1.1': 'VENDOR'}
         for k, v in list(rcsfile.symbols.items()):
             r = v.split('.')
             if len(r) == 3:
                 branches[v] = 'VENDOR'
             elif len(r) >= 3 and r[-2] == '0':
                 branches['.'.join(r[:-2] + r[-1:])] = k
             if len(r) == 2 and branches[r[0]] == 'HEAD':
                 if v not in rtags:
                     rtags[v] = list()
                 rtags[v].append(k)
 
         revs = rcsfile.revs.items()
         # sort by revision descending to priorize 1.1.1.1 than 1.1
         revs = sorted(revs, key=lambda a: a[1][0], reverse=True)
         # sort by time
         revs = sorted(revs, key=lambda a: a[1][1])
         novendor = False
         have_initial_revision = False
         last_vendor_status = None
         for k, v in revs:
             r = k.split('.')
             if len(r) == 4 and r[0] == '1' and r[1] == '1' and r[2] == '1' \
                     and r[3] == '1':
                 if have_initial_revision:
                     continue
                 if v[3] == 'dead':
                     continue
                 last_vendor_status = v[3]
                 have_initial_revision = True
             elif len(r) == 4 and r[0] == '1' and r[1] == '1' and r[2] == '1':
                 if novendor:
                     continue
                 last_vendor_status = v[3]
             elif len(r) == 2:
                 if r[0] == '1' and r[1] == '1':
                     if have_initial_revision:
                         continue
                     if v[3] == 'dead':
                         continue
                     have_initial_revision = True
                 elif r[0] == '1' and r[1] != '1':
                     novendor = True
                 if last_vendor_status == 'dead' and v[3] == 'dead':
                     last_vendor_status = None
                     continue
                 last_vendor_status = None
             else:
                 # trunk only
                 continue
 
             if self.dumpfile:
                 self.markseq = self.markseq + 1
                 git_dump_file(path, k, self.rcs, self.markseq)
 
             b = '.'.join(r[:-1])
             try:
                 a = ChangeSetKey(
                     branches[b], v[2], v[1], rcsfile.getlog(v[0]), v[6],
                     self.fuzzsec)
             except Exception as e:
                 print('Aborted at %s %s' % (path, v[0]), file=sys.stderr)
                 raise e
 
             a.put_file(path, k, v[3], self.markseq)
             while a in self.changesets:
                 c = self.changesets[a]
                 del self.changesets[a]
                 c.merge(a)
                 a = c
             self.changesets[a] = a
             if k in rtags:
                 for t in rtags[k]:
                     if t not in self.tags or \
                             self.tags[t].max_time < a.max_time:
                         self.tags[t] = a
 
 
 def file_path(r, p):
     if r.endswith('/'):
         r = r[:-1]
-    path = p[:-2]               # drop ",v"
+    if p[-2:] == ',v':
+        path = p[:-2]               # drop ",v"
+    else:
+        path = p
     p = path.split('/')
     if len(p) > 0 and p[-2] == 'Attic':
         path = '/'.join(p[:-2] + [p[-1]])
     if path.startswith(r):
         path = path[len(r) + 1:]
     return path
 
 
 def git_dump_file(path, k, rcs, markseq):
     try:
         cont = rcs.expand_keyword(path, rcsparse.rcsfile(path), k)
     except RuntimeError as msg:
         print('Unexpected runtime error on parsing',
               path, k, ':', msg, file=sys.stderr)
         print('unlimit the resource limit may fix this problem.',
               file=sys.stderr)
         sys.exit(1)
     output('blob')
     output('mark :%d' % markseq)
     output('data', len(cont))
     output(cont)
 
 
 class RcsKeywords:
     RCS_KW_AUTHOR   = (1 << 0)
     RCS_KW_DATE     = (1 << 1)
     RCS_KW_LOG      = (1 << 2)
     RCS_KW_NAME     = (1 << 3)
     RCS_KW_RCSFILE  = (1 << 4)
     RCS_KW_REVISION = (1 << 5)
     RCS_KW_SOURCE   = (1 << 6)
     RCS_KW_STATE    = (1 << 7)
     RCS_KW_FULLPATH = (1 << 8)
     RCS_KW_MDOCDATE = (1 << 9)
     RCS_KW_LOCKER   = (1 << 10)
 
     RCS_KW_ID       = (RCS_KW_RCSFILE | RCS_KW_REVISION | RCS_KW_DATE |
                        RCS_KW_AUTHOR | RCS_KW_STATE)
     RCS_KW_HEADER   = (RCS_KW_ID | RCS_KW_FULLPATH)
 
     rcs_expkw = {
         b"Author":   RCS_KW_AUTHOR,
         b"Date":     RCS_KW_DATE,
         b"Header":   RCS_KW_HEADER,
         b"Id":       RCS_KW_ID,
         b"Log":      RCS_KW_LOG,
         b"Name":     RCS_KW_NAME,
         b"RCSfile":  RCS_KW_RCSFILE,
         b"Revision": RCS_KW_REVISION,
         b"Source":   RCS_KW_SOURCE,
         b"State":    RCS_KW_STATE,
         b"Mdocdate": RCS_KW_MDOCDATE,
         b"Locker":   RCS_KW_LOCKER
     }
 
     RCS_KWEXP_NONE    = (1 << 0)
     RCS_KWEXP_NAME    = (1 << 1)    # include keyword name
     RCS_KWEXP_VAL     = (1 << 2)    # include keyword value
     RCS_KWEXP_LKR     = (1 << 3)    # include name of locker
     RCS_KWEXP_OLD     = (1 << 4)    # generate old keyword string
     RCS_KWEXP_ERR     = (1 << 5)    # mode has an error
     RCS_KWEXP_DEFAULT = (RCS_KWEXP_NAME | RCS_KWEXP_VAL)
     RCS_KWEXP_KVL     = (RCS_KWEXP_NAME | RCS_KWEXP_VAL | RCS_KWEXP_LKR)
 
     def __init__(self):
         self.rerecomple()
 
     def rerecomple(self):
         pat = b'|'.join(list(self.rcs_expkw.keys()))
         self.re_kw = re.compile(b".*?\\$(" + pat + b")[\\$:]")
 
     def add_id_keyword(self, keyword):
         self.rcs_expkw[keyword.encode('ascii')] = self.RCS_KW_ID
         self.rerecomple()
 
     def kflag_get(self, flags):
         if flags is None:
             return self.RCS_KWEXP_DEFAULT
         fl = 0
         for fc in flags:
             if fc == 'k':
                 fl |= self.RCS_KWEXP_NAME
             elif fc == 'v':
                 fl |= self.RCS_KWEXP_VAL
             elif fc == 'l':
                 fl |= self.RCS_KWEXP_LKR
             elif fc == 'o':
                 if len(flags) != 1:
                     fl |= self.RCS_KWEXP_ERR
                 fl |= self.RCS_KWEXP_OLD
             elif fc == 'b':
                 if len(flags) != 1:
                     fl |= self.RCS_KWEXP_ERR
                 fl |= self.RCS_KWEXP_NONE
             else:
                 fl |= self.RCS_KWEXP_ERR
         return fl
 
     def expand_keyword(self, filename, rcs, r):
         rev = rcs.revs[r]
 
         mode = self.kflag_get(rcs.expand)
         if (mode & (self.RCS_KWEXP_NONE | self.RCS_KWEXP_OLD)) != 0:
             return rcs.checkout(rev[0])
 
         ret = []
         for line in rcs.checkout(rev[0]).split(b'\n'):
             logbuf = None
             m = self.re_kw.match(line)
             if m is None:
                 # No RCS Keywords, use it as it is
                 ret += [line]
                 continue
 
             line0 = b''
             while m is not None:
                 try:
                     dsign = m.end(1) + line[m.end(1):].index(b'$')
                 except ValueError:
                     break
                 prefix = line[:m.start(1) - 1]
                 line = line[dsign + 1:]
                 line0 += prefix
                 expbuf = ''
                 if (mode & self.RCS_KWEXP_NAME) != 0:
                     expbuf += '$'
                     expbuf += m.group(1).decode('ascii')
                     if (mode & self.RCS_KWEXP_VAL) != 0:
                         expbuf += ': '
                 if (mode & self.RCS_KWEXP_VAL) != 0:
                     expkw = self.rcs_expkw[m.group(1)]
                     if (expkw & self.RCS_KW_RCSFILE) != 0:
                         expbuf += filename \
                             if (expkw & self.RCS_KW_FULLPATH) != 0 \
                             else os.path.basename(filename)
                         expbuf += " "
                     if (expkw & self.RCS_KW_REVISION) != 0:
                         expbuf += rev[0]
                         expbuf += " "
                     if (expkw & self.RCS_KW_DATE) != 0:
                         expbuf += time.strftime(
                             "%Y/%m/%d %H:%M:%S ", time.gmtime(rev[1]))
                     if (expkw & self.RCS_KW_MDOCDATE) != 0:
                         d = time.gmtime(rev[1])
                         expbuf += time.strftime(
                             "%B%e %Y " if (d.tm_mday < 10) else "%B %e %Y ", d)
                     if (expkw & self.RCS_KW_AUTHOR) != 0:
                         expbuf += rev[2]
                         expbuf += " "
                     if (expkw & self.RCS_KW_STATE) != 0:
                         expbuf += rev[3]
                         expbuf += " "
                     if (expkw & self.RCS_KW_LOG) != 0:
                         p = prefix
                         expbuf += filename \
                             if (expkw & self.RCS_KW_FULLPATH) != 0 \
                             else os.path.basename(filename)
                         expbuf += " "
                         logbuf = p + (
                             'Revision %s  %s  %s\n' % (
                                 rev[0], time.strftime(
                                     "%Y/%m/%d %H:%M:%S", time.gmtime(rev[1])),
                                 rev[2])).encode('ascii')
                         for lline in rcs.getlog(rev[0]).rstrip().split(b'\n'):
                             if len(lline) == 0:
                                 logbuf += p.rstrip() + b'\n'
                             else:
                                 logbuf += p + lline.lstrip() + b'\n'
                         if len(line) == 0:
                             logbuf += p.rstrip()
                         else:
                             logbuf += p + line.lstrip()
                         line = b''
                     if (expkw & self.RCS_KW_SOURCE) != 0:
                         expbuf += filename
                         expbuf += " "
                     if (expkw & (self.RCS_KW_NAME | self.RCS_KW_LOCKER)) != 0:
                         expbuf += " "
                 if (mode & self.RCS_KWEXP_NAME) != 0:
                     expbuf += '$'
                 line0 += expbuf[:255].encode('ascii')
                 m = self.re_kw.match(line)
 
             ret += [line0 + line]
             if logbuf is not None:
                 ret += [logbuf]
         return b'\n'.join(ret)
 
 
 # ----------------------------------------------------------------------
 # entry point
 # ----------------------------------------------------------------------
 if __name__ == '__main__':
     main()
diff --git a/swh/loader/cvs/cvsclient.py b/swh/loader/cvs/cvsclient.py
new file mode 100644
index 0000000..e670f96
--- /dev/null
+++ b/swh/loader/cvs/cvsclient.py
@@ -0,0 +1,334 @@
+# Copyright (C) 2015-2021  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+"""Minimal CVS client implementation
+
+"""
+
+import socket
+import subprocess
+import os.path
+import tempfile
+import re
+
+from swh.loader.exception import NotFound
+
+CVS_PSERVER_PORT = 2401
+CVS_PROTOCOL_BUFFER_SIZE = 8192
+EXAMPLE_PSERVER_URL = "pserver://user:password@cvs.example.com/cvsroot/repository"
+EXAMPLE_SSH_URL = "ssh://user@cvs.example.com/cvsroot/repository"
+
+VALID_RESPONSES = [ "ok",  "error", "Valid-requests", "Checked-in",
+    "New-entry", "Checksum", "Copy-file", "Updated", "Created",
+    "Update-existing", "Merged", "Patched", "Rcs-diff", "Mode",
+    "Removed", "Remove-entry", "Template", "Notified", "Module-expansion",
+    "Wrapper-rcsOption", "M", "Mbinary", "E", "F", "MT" ]
+
+# Trivially encode strings to protect them from innocent eyes (i.e.,
+# inadvertent password compromises, like a network administrator
+# who's watching packets for legitimate reasons and accidentally sees
+# the password protocol go by).
+#
+# This is NOT secure encryption.
+def scramble_password(password):
+    s = ['A'] # scramble scheme version number
+    scramble_shifts = [
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+       16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+      114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
+      111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
+       41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
+      125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
+       36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
+       58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
+      225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
+      199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
+      174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
+      207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
+      192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
+      227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
+      182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
+      243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152 ]
+    for c in password:
+        s.append('%c' % scramble_shifts[ord(c)])
+    return "".join(s)
+
+
+class CVSProtocolError(Exception):
+    pass
+
+_re_kb_opt = re.compile(b'\/-kb\/')
+
+class CVSClient:
+
+    def connect_pserver(self, hostname, port, auth):
+        if port == None:
+            port = CVS_PSERVER_PORT
+        if auth == None:
+            raise NotFound("Username and password are required for a pserver connection: %s" % EXAMPLE_PSERVER_URL)
+        try:
+          user = auth.split(':')[0]
+          password = auth.split(':')[1]
+        except IndexError:
+            raise NotFound("Username and password are required for a pserver connection: %s" % EXAMPLE_PSERVER_URL)
+
+        try:
+          self.socket = socket.create_connection((hostname, port))
+        except ConnectionRefusedError:
+            raise NotFound("Could not connect to %s:%s", hostname, port)
+
+        scrambled_password = scramble_password(password)
+        request = "BEGIN AUTH REQUEST\n%s/%s\n%s\n%s\nEND AUTH REQUEST\n" \
+            % (self.cvsroot_path, self.cvs_module_name, user, scrambled_password)
+        self.socket.sendall(request.encode('UTF-8'))
+
+        response = self.socket.recv(11)
+        if response != b"I LOVE YOU\n":
+            raise NotFound("pserver authentication failed for %s:%s" % (hostname, port))
+
+    def connect_ssh(self, hostname, port, auth):
+        command = [ 'ssh' ]
+        if auth != None:
+            # Assume 'auth' contains only a user name.
+            # We do not support password authentication with SSH since the
+            # anoncvs user is usually granted access without a password.
+            command += [ '-l' , '%s' % auth ]
+        if port != None:
+            command += [ '-p' , '%d' % port ]
+
+        # accept new SSH hosts keys upon first use; changed host keys will require intervention
+        command += ['-o', "StrictHostKeyChecking=accept-new" ]
+
+        # disable interactive prompting
+        command += ['-o', "BatchMode=yes" ]
+
+        # disable further option processing by adding '--'
+        command += [ '--' ]
+
+        command += ['%s' % hostname, 'cvs', 'server']
+        self.ssh = subprocess.Popen(command,
+            bufsize=0, # use non-buffered I/O to match behaviour of self.socket
+            stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+
+    def connect_fake(self, hostname, port, auth):
+        command = [ 'cvs', 'server'  ]
+        self.ssh = subprocess.Popen(command,
+            bufsize=0, # use non-buffered I/O to match behaviour of self.socket
+            stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+
+    def conn_read_line(self, require_newline=True):
+        if len(self.linebuffer) != 0:
+            return self.linebuffer.pop(0)
+        buf = b''
+        idx = -1
+        while idx == -1:
+            if len(buf) >= CVS_PROTOCOL_BUFFER_SIZE:
+                if require_newline:
+                    raise CVSProtocolError("Overlong response from CVS server: %s" % buf)
+                else:
+                    break
+            if self.socket:
+                buf += self.socket.recv(CVS_PROTOCOL_BUFFER_SIZE)
+            elif self.ssh:
+                buf += self.ssh.stdout.read(CVS_PROTOCOL_BUFFER_SIZE)
+            else:
+                raise Exception("No valid connection")
+            if not buf:
+                return None
+            idx = buf.rfind(b'\n')
+        if idx != -1:
+            self.linebuffer = buf[:idx + 1].splitlines(keepends=True)
+        else:
+            if require_newline:
+                raise CVSProtocolError("Invalid response from CVS server: %s" % buf)
+            else:
+                self.linebuffer.append(buf)
+        if len(self.incomplete_line) > 0:
+            self.linebuffer[0] = self.incomplete_line + self.linebuffer[0]
+        if idx != -1:
+            self.incomplete_line = buf[idx + 1:]
+        else:
+            self.incomplete_line = b''
+        return self.linebuffer.pop(0)
+
+    def conn_write(self, data):
+        if self.socket:
+            return self.socket.sendall(data)
+        if self.ssh:
+            self.ssh.stdin.write(data)
+            return self.ssh.stdin.flush()
+        raise Exception("No valid connection")
+
+    def conn_write_str(self, s):
+        return self.conn_write(s.encode('UTF-8'))
+
+    def conn_close(self):
+        if self.socket:
+            self.socket.close()
+        if self.ssh:
+            self.ssh.kill()
+            try:
+              self.ssh.wait(timeout=10)
+            except TimeoutExpired as e:
+              raise TimeoutExpired("Could not terminate ssh program: %s" % e)
+
+    def __init__(self, url):
+        """
+        Connect to a CVS server at the specified URL and perform the initial
+        CVS protocol handshake.
+        """
+        self.hostname = url.host
+        self.cvsroot_path = os.path.dirname(url.path)
+        self.cvs_module_name = os.path.basename(url.path)
+        self.socket = None
+        self.ssh = None
+        self.linebuffer = list()
+        self.incomplete_line = b''
+
+        if url.scheme == 'pserver':
+            self.connect_pserver(url.host, url.port, url.auth)
+        elif url.scheme == 'ssh':
+            self.connect_ssh(url.host, url.port, url.auth)
+        elif url.scheme == 'fake':
+            self.connect_fake(url.host, url.port, url.auth)
+        else:
+            raise NotFound("Invalid CVS origin URL '%s'" % url)
+
+        # we should have a connection now
+        assert self.socket or self.ssh
+
+        self.conn_write_str("Root %s\nValid-responses %s\nvalid-requests\nUseUnchanged\n" % \
+            (self.cvsroot_path, ' '.join(VALID_RESPONSES)))
+        response = self.conn_read_line()
+        if not response:
+            raise CVSProtocolError("No response from CVS server")
+        try:
+            if response[0:15] != b"Valid-requests ":
+                raise CVSProtocolError("Invalid response from CVS server: %s" % response)
+        except IndexError:
+            raise CVSProtocolError("Invalid response from CVS server: %s" % response)
+        response = self.conn_read_line()
+        if response != b"ok\n":
+            raise CVSProtocolError("Invalid response from CVS server: %s" % response)
+
+    def __del__(self):
+        self.conn_close()
+
+    def _parse_rlog_response(self, fp):
+        rlog_output = tempfile.TemporaryFile()
+        expect_error = False
+        for line in fp.readlines():
+            if expect_error:
+                raise CVSProtocolError('CVS server error: %s' % line)
+            if line == b'ok\n':
+                break
+            elif line == b'M \n':
+                continue
+            elif line[0:2] == b'M ':
+                rlog_output.write(line[2:])
+            elif line[0:8] == b'MT text ':
+                rlog_output.write(line[8:-1])
+            elif line[0:8] == b'MT date ':
+                rlog_output.write(line[8:-1])
+            elif line[0:10] == b'MT newline':
+                rlog_output.write(line[10:])
+            elif line[0:7] == b'error  ':
+                epxect_error = True
+                continue
+            else:
+                raise CVSProtocolError('Bad CVS protocol response: %s' % line)
+        rlog_output.seek(0)
+        return rlog_output
+
+
+    def fetch_rlog(self):
+        fp = tempfile.TemporaryFile()
+        self.conn_write_str("Global_option -q\nArgument --\nArgument %s\nrlog\n" % \
+            self.cvs_module_name)
+        while True:
+            response = self.conn_read_line()
+            if response == None:
+                raise CVSProtocolError("No response from CVS server")
+            if response[0:2] == b"E ":
+                raise CVSProtocolError("Error response from CVS server: %s" % response)
+            fp.write(response)
+            if response == b"ok\n":
+                break
+        fp.seek(0)
+        return self._parse_rlog_response(fp)
+
+    def checkout(self, path, rev, dest_dir):
+        skip_line = False
+        expect_modeline = False
+        expect_bytecount = False
+        have_bytecount = False
+        bytecount = 0
+        dirname = os.path.dirname(path)
+        if dirname:
+            self.conn_write_str("Directory %s\n%s\n" % (dirname, dirname))
+        filename = os.path.basename(path)
+        co_output = tempfile.NamedTemporaryFile(dir=dest_dir, delete=True,
+            prefix='cvsclient-checkout-%s-r%s-' % (filename, rev))
+        # TODO: cvs <= 1.10 servers expect to be given every Directory along the path.
+        self.conn_write_str("Directory %s\n%s\n"
+            "Global_option -q\n"
+            "Argument -r%s\n"
+            "Argument -kb\n"
+            "Argument --\nArgument %s\nco \n" % (self.cvs_module_name,
+            self.cvs_module_name, rev, path))
+        while True:
+            if have_bytecount and bytecount > 0:
+                response = self.conn_read_line(require_newline=False)
+                if response == None:
+                    raise CVSProtocolError("Incomplete response from CVS server")
+                co_output.write(response)
+                bytecount -= len(response)
+                if bytecount < 0:
+                    raise CVSProtocolError("Overlong response from CVS server: %s" % response)
+                continue
+            else:
+                response = self.conn_read_line()
+            if response[0:2] == b'E ':
+                raise CVSProtocolError('Error from CVS server: %s' % response)
+            if have_bytecount and bytecount == 0 and response == b'ok\n':
+                break
+            if skip_line:
+                skip_line = False
+                continue
+            elif expect_bytecount:
+                try:
+                    bytecount = int(response[0:-1]) # strip trailing \n
+                except ValueError:
+                    raise CVSProtocolError('Bad CVS protocol response: %s' % response)
+                have_bytecount = True
+                continue
+            elif response == b'M \n':
+                continue
+            elif response == b'MT +updated\n':
+                continue
+            elif response == b'MT -updated\n':
+                continue
+            elif response[0:9] == b'MT fname ':
+                continue
+            elif response[0:8] == b'Created ':
+                skip_line = True
+                continue
+            elif response[0:1] == b'/' and _re_kb_opt.search(response):
+                expect_modeline = True
+                continue
+            elif expect_modeline and response[0:2] == b'u=':
+                expect_modeline = False
+                expect_bytecount = True
+                continue
+            elif response[0:2] == b'M ':
+                continue
+            elif response[0:8] == b'MT text ':
+                continue
+            elif response[0:10] == b'MT newline':
+                continue
+            else:
+                raise CVSProtocolError('Bad CVS protocol response: %s' % response)
+        co_output.seek(0)
+        return co_output
diff --git a/swh/loader/cvs/loader.py b/swh/loader/cvs/loader.py
index d5c7ae2..e7a187f 100644
--- a/swh/loader/cvs/loader.py
+++ b/swh/loader/cvs/loader.py
@@ -1,372 +1,466 @@
 # Copyright (C) 2015-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 """Loader in charge of injecting either new or existing cvs repositories to
 swh-storage.
 
 """
 from datetime import datetime
 import os
 import subprocess
 import tempfile
 import time
 from typing import Iterator, List, Optional, Sequence, Tuple
 from urllib3.util import parse_url
 
 from swh.loader.core.loader import BaseLoader
 from swh.loader.core.utils import clean_dangling_folders
 from swh.loader.exception import NotFound
 import swh.loader.cvs.rcsparse as rcsparse
+import swh.loader.cvs.cvsclient as cvsclient
+from swh.loader.cvs.rlog import RlogConv
 from swh.loader.cvs.cvs2gitdump.cvs2gitdump import CvsConv, RcsKeywords, CHANGESET_FUZZ_SEC, file_path, ChangeSetKey
 from swh.model import from_disk, hashutil
 from swh.model.model import Person, Revision, RevisionType, TimestampWithTimezone
 from swh.model.model import (
     Content,
     Directory,
     Origin,
     Revision,
     SkippedContent,
     Snapshot,
     SnapshotBranch,
     TargetType,
 )
 from swh.storage.interface import StorageInterface
 
 DEFAULT_BRANCH = b"HEAD"
 
 TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.cvs."
 
 
 class CvsLoader(BaseLoader):
     """Swh cvs loader.
 
     The repository is local.  The loader deals with
     update on an already previously loaded repository.
 
     """
 
     visit_type = "cvs"
 
     def __init__(
         self,
         storage: StorageInterface,
         url: str,
         origin_url: Optional[str] = None,
         visit_date: Optional[datetime] = None,
         cvsroot_path: Optional[str] = None,
         temp_directory: str = "/tmp",
         max_content_size: Optional[int] = None,
     ):
         super().__init__(
             storage=storage,
             logging_class="swh.loader.cvs.CvsLoader",
             max_content_size=max_content_size,
         )
         self.cvsroot_url = url
         # origin url as unique identifier for origin in swh archive
         self.origin_url = origin_url if origin_url else self.cvsroot_url
         self.temp_directory = temp_directory
         self.done = False
+
         self.cvs_module_name = None
-        self.cvs_module_path = None
+
+        # XXX At present changeset IDs are recomputed on the fly during every visit.
+        # If we were able to maintain a cached somewhere which can be indexed by a
+        # cvs2gitdump.ChangeSetKey and yields an SWH revision hash we could avoid
+        # doing a lot of redundant work during every visit.
+
         self.cvs_changesets = None
+
+        # remote CVS repository access (history is parsed from CVS rlog):
+        self.cvsclient = None
+        self.rlog_file = None
+
         # internal state used to store swh objects
         self._contents: List[Content] = []
         self._skipped_contents: List[SkippedContent] = []
         self._directories: List[Directory] = []
         self._revisions: List[Revision] = []
         self.swh_revision_gen = None
         # internal state, current visit
         self._last_revision = None
         self._visit_status = "full"
         self._load_status = "uneventful"
         self.visit_date = visit_date
         self.cvsroot_path = cvsroot_path
         self.snapshot = None
 
+    def compute_swh_revision(self, k, logmsg):
+        """Compute swh hash data per CVS changeset.
+
+        Returns:
+            tuple (rev, swh_directory)
+            - rev: current SWH revision computed from checked out work tree
+            - swh_directory: dictionary of path, swh hash data with type
+
+        """
+        # Compute SWH revision from the on-disk state
+        swh_dir = from_disk.Directory.from_disk(path=os.fsencode(self.worktree_path))
+        if self._last_revision:
+            parents = tuple([bytes(self._last_revision.id)])
+        else:
+            parents = ()
+        revision = self.build_swh_revision(k, logmsg, swh_dir.hash, parents)
+        self.log.debug("SWH revision ID: %s" % hashutil.hash_to_hex(revision.id))
+        self._last_revision = revision
+        if self._load_status == "uneventful":
+            # We have an eventful load if this revision is not already present in the archive
+            if not self.storage.revision_get([revision.id])[0]:
+                self._load_status = "eventful"
+        return (revision, swh_dir)
+
     def swh_hash_data_per_cvs_changeset(self):
         """Compute swh hash data per CVS changeset.
 
         Yields:
             tuple (rev, swh_directory)
             - rev: current SWH revision computed from checked out work tree
             - swh_directory: dictionary of path, swh hash data with type
 
         """
-        # XXX At present changeset IDs are recomputed on the fly during every visit.
-        # If we were able to maintain a cached somewhere which can be indexed by a
-        # cvs2gitdump.ChangeSetKey and yields an SWH revision hash we could avoid
-        # doing a lot of redundant work during every visit.
         for k in self.cvs_changesets:
             tstr = time.strftime('%c', time.gmtime(k.max_time))
             self.log.info("changeset from %s by %s on branch %s", tstr, k.author, k.branch);
             logmsg = ""
             # Check out the on-disk state of this revision
             for f in k.revs:
                 rcsfile = None
                 path = file_path(self.cvsroot_path, f.path)
                 wtpath = os.path.join(self.worktree_path, path)
                 self.log.info("rev %s of file %s" % (f.rev, f.path));
                 if not logmsg:
                     rcsfile = rcsparse.rcsfile(f.path)
                     logmsg = rcsfile.getlog(k.revs[0].rev)
                 if f.state == 'dead':
                     # remove this file from work tree
                     try:
                         os.remove(wtpath)
                     except FileNotFoundError:
                         pass
                 else:
                     # create, or update, this file in the work tree
                     if not rcsfile:
                         rcsfile = rcsparse.rcsfile(f.path)
                     rcs = RcsKeywords()
                     contents = rcs.expand_keyword(f.path, rcsfile, f.rev)
                     try:
                         outfile = open(wtpath, mode='wb')
                     except FileNotFoundError:
                         os.makedirs(os.path.dirname(wtpath))
                         outfile = open(wtpath, mode='wb')
                     outfile.write(contents)
                     outfile.close()
 
-            # Compute SWH revision from the on-disk state
-            swh_dir = from_disk.Directory.from_disk(path=os.fsencode(self.worktree_path))
-            if self._last_revision:
-                parents = tuple([bytes(self._last_revision.id)])
-            else:
-                parents = ()
-            revision = self.build_swh_revision(k, logmsg, swh_dir.hash, parents)
-            self.log.debug("SWH revision ID: %s" % hashutil.hash_to_hex(revision.id))
-            self._last_revision = revision
-            if self._load_status == "uneventful":
-                # We have an eventful load if this revision is not already present in the archive
-                if not self.storage.revision_get([revision.id])[0]:
-                    self._load_status = "eventful"
-
+            (revision, swh_dir) = self.compute_swh_revision(k, logmsg)
             yield revision, swh_dir
 
+    def swh_hash_data_per_cvs_rlog_changeset(self):
+        """Compute swh hash data per CVS rlog changeset.
+
+        Yields:
+            tuple (rev, swh_directory)
+            - rev: current SWH revision computed from checked out work tree
+            - swh_directory: dictionary of path, swh hash data with type
+
+        """
+        for k in self.cvs_changesets:
+            tstr = time.strftime('%c', time.gmtime(k.max_time))
+            self.log.info("changeset from %s by %s on branch %s", tstr, k.author, k.branch);
+            logmsg = ""
+            # Check out the on-disk state of this revision
+            for f in k.revs:
+                path = file_path(self.cvsroot_path, f.path)
+                wtpath = os.path.join(self.worktree_path, path)
+                self.log.info("rev %s of file %s" % (f.rev, f.path));
+                if not logmsg:
+                    logmsg = self.rlog.getlog(self.rlog_file, f.path, k.revs[0].rev)
+                self.log.debug("f.state is %s\n" % f.state)
+                if f.state == 'dead':
+                    # remove this file from work tree
+                    try:
+                        os.remove(wtpath)
+                    except FileNotFoundError:
+                        pass
+                else:
+                    dirname = os.path.dirname(wtpath)
+                    try:
+                        os.makedirs(dirname)
+                    except FileExistsError:
+                        pass
+                    self.log.debug("checkout to %s\n" % wtpath)
+                    fp = self.cvsclient.checkout(f.path, f.rev, dirname)
+                    os.rename(fp.name, wtpath)
+                    try:
+                       fp.close()
+                    except FileNotFoundError:
+                        # Well, we have just renamed the file...
+                        pass
+
+            # TODO: prune empty directories?
+            (revision, swh_dir) = self.compute_swh_revision(k, logmsg)
+            yield revision, swh_dir
 
     def process_cvs_changesets(self) -> Iterator[
         Tuple[List[Content], List[SkippedContent], List[Directory], Revision]
     ]:
         """Process CVS revisions.
 
         At each CVS revision, check out contents and compute swh hashes.
 
         Yields:
             tuple (contents, skipped-contents, directories, revision) of dict as a
             dictionary with keys, sha1_git, sha1, etc...
 
         """
         for swh_revision, swh_dir in self.swh_hash_data_per_cvs_changeset():
             # Send the associated contents/directories
             (_contents, _skipped_contents, _directories) = from_disk.iter_directory(swh_dir)
             yield _contents, _skipped_contents, _directories, swh_revision
 
+    def process_cvs_rlog_changesets(self) -> Iterator[
+        Tuple[List[Content], List[SkippedContent], List[Directory], Revision]
+    ]:
+        """Process CVS rlog revisions.
+
+        At each CVS revision, check out contents and compute swh hashes.
+
+        Yields:
+            tuple (contents, skipped-contents, directories, revision) of dict as a
+            dictionary with keys, sha1_git, sha1, etc...
+
+        """
+        for swh_revision, swh_dir in self.swh_hash_data_per_cvs_rlog_changeset():
+            # Send the associated contents/directories
+            (_contents, _skipped_contents, _directories) = from_disk.iter_directory(swh_dir)
+            yield _contents, _skipped_contents, _directories, swh_revision
 
     def prepare_origin_visit(self):
         self.origin = Origin(url=self.origin_url if self.origin_url else self.cvsroot_url)
 
     def pre_cleanup(self):
         """Cleanup potential dangling files from prior runs (e.g. OOM killed
            tasks)
 
         """
         clean_dangling_folders(
             self.temp_directory,
             pattern_check=TEMPORARY_DIR_PREFIX_PATTERN,
             log=self.log,
         )
 
     def cleanup(self):
         self.log.info("cleanup")
 
     def fetch_cvs_repo_with_rsync(self, host, path):
         # URL *must* end with a trailing slash in order to get CVSROOT listed
         url = 'rsync://%s%s/' % (host, os.path.dirname(path))
         rsync = subprocess.run(['rsync', url], capture_output=True, encoding='ascii')
         rsync.check_returncode()
         have_cvsroot = False
         have_module = False
         for line in rsync.stdout.split('\n'):
             self.log.debug("rsync server: %s" % line)
             if line.endswith(' CVSROOT'):
                 have_cvsroot = True
             elif line.endswith(' %s' % self.cvs_module_name):
                 have_module = True
             if have_module and have_cvsroot:
                 break
         if not have_module:
             raise NotFound("CVS module %s not found at %s" \
                 % (self.cvs_module_name, host, url))
         if not have_cvsroot:
             raise NotFound("No CVSROOT directory found at %s" % url)
 
         rsync = subprocess.run(['rsync', '-a', url, self.cvsroot_path])
         rsync.check_returncode()
 
     def prepare(self):
         self._last_revision = None
         self._load_status = "uneventful"
         self.swh_revision_gen = None
         if not self.cvsroot_path:
             self.cvsroot_path = tempfile.mkdtemp(
                 suffix="-%s" % os.getpid(),
                 prefix=TEMPORARY_DIR_PREFIX_PATTERN,
                 dir=self.temp_directory,
             )
         self.worktree_path = tempfile.mkdtemp(
             suffix="-%s" % os.getpid(),
             prefix=TEMPORARY_DIR_PREFIX_PATTERN,
             dir=self.temp_directory,
         )
         url = parse_url(self.origin_url)
         self.log.debug("prepare; origin_url=%s scheme=%s path=%s" % (self.origin_url, url.scheme, url.path))
         if not url.path:
             raise NotFound("Invalid CVS origin URL '%s'" % self.origin_url)
         self.cvs_module_name = os.path.basename(url.path)
         os.mkdir(os.path.join(self.worktree_path, self.cvs_module_name));
-        self.cvs_module_path = os.path.join(self.cvsroot_path, self.cvs_module_name)
         if url.scheme == 'file':
             if not os.path.exists(url.path):
                 raise NotFound
         elif url.scheme == 'rsync':
-            self.fetch_cvs_repo_with_rsync(url.host, url.path)
+              self.fetch_cvs_repo_with_rsync(url.host, url.path)
+
+        if url.scheme == 'file' or url.scheme == 'rsync':
+            # local CVS repository conversion
+            have_rcsfile = False
+            have_cvsroot = False
+            for root, dirs, files in os.walk(self.cvsroot_path):
+                if 'CVSROOT' in dirs:
+                    have_cvsroot = True
+                    dirs.remove('CVSROOT')
+                    continue;
+                for f in files:
+                    filepath = os.path.join(root, f)
+                    if f[-2:] == ',v':
+                        try:
+                          rcsfile = rcsparse.rcsfile(filepath)
+                        except(Exception):
+                            raise
+                        else:
+                            self.log.debug("Looks like we have data to convert; "
+                                "found a valid RCS file at %s" % filepath)
+                            have_rcsfile = True
+                            break
+                if have_rcsfile:
+                    break;
+
+            if not have_rcsfile:
+                raise NotFound("Directory %s does not contain any valid RCS files %s" % self.cvsroot_path)
+            if not have_cvsroot:
+                self.log.warn("The CVS repository at '%s' lacks a CVSROOT directory; "
+                    "we might be ingesting an incomplete copy of the repository" % self.cvsroot_path)
+
+            # Unfortunately, there is no way to convert CVS history in an iterative fashion
+            # because the data is not indexed by any kind of changeset ID. We need to walk
+            # the history of each and every RCS file in the repository during every visit,
+            # even if no new changes will be added to the SWH archive afterwards.
+            # "CVS’s repository is the software equivalent of a telephone book sorted by telephone number."
+            # https://corecursive.com/software-that-doesnt-suck-with-jim-blandy/
+            #
+            # An implicit assumption made here is that self.cvs_changesets will fit into
+            # memory in its entirety. If it won't fit then the CVS walker will need to
+            # be modified such that it spools the list of changesets to disk instead.
+            cvs = CvsConv(self.cvsroot_path, RcsKeywords(), False, CHANGESET_FUZZ_SEC)
+            self.log.info("Walking CVS module %s", self.cvs_module_name)
+            cvs.walk(self.cvs_module_name)
+            self.cvs_changesets = sorted(cvs.changesets)
+            self.log.info('CVS changesets found in %s: %d' % (self.cvs_module_name, len(self.cvs_changesets)))
+            self.swh_revision_gen = self.process_cvs_changesets()
+        elif url.scheme == 'pserver' or url.scheme == 'fake':
+            # remote CVS repository conversion
+            self.cvsclient = cvsclient.CVSClient(url)
+            cvsroot_path = os.path.dirname(url.path)
+            self.log.info("Fetching CVS rlog from %s:%s/%s", url.host, cvsroot_path, self.cvs_module_name)
+            self.rlog = RlogConv(cvsroot_path, CHANGESET_FUZZ_SEC)
+            self.rlog_file = self.cvsclient.fetch_rlog()
+            self.rlog.parse_rlog(self.rlog_file)
+            self.cvs_changesets = sorted(self.rlog.changesets)
+            self.log.info('CVS changesets found for %s: %d' % (self.cvs_module_name, len(self.cvs_changesets)))
+            self.swh_revision_gen = self.process_cvs_rlog_changesets()
         else:
             raise NotFound("Invalid CVS origin URL '%s'" % self.origin_url)
-        have_rcsfile = False
-        have_cvsroot = False
-        for root, dirs, files in os.walk(self.cvsroot_path):
-            if 'CVSROOT' in dirs:
-                have_cvsroot = True
-                dirs.remove('CVSROOT')
-                continue;
-            for f in files:
-                filepath = os.path.join(root, f)
-                if f[-2:] == ',v':
-                    try:
-                      rcsfile = rcsparse.rcsfile(filepath)
-                    except(Exception):
-                        raise
-                    else:
-                        self.log.debug("Looks like we have data to convert; "
-                            "found a valid RCS file at %s" % filepath)
-                        have_rcsfile = True
-                        break
-            if have_rcsfile:
-                break;
-
-        if not have_rcsfile:
-            raise NotFound("Directory %s does not contain any valid RCS files %s" % self.cvsroot_path)
-        if not have_cvsroot:
-            self.log.warn("The CVS repository at '%s' lacks a CVSROOT directory; "
-                "we might be ingesting an incomplete copy of the repository" % self.cvsroot_path)
-
-        # Unfortunately, there is no way to convert CVS history in an iterative fashion
-        # because the data is not indexed by any kind of changeset ID. We need to walk
-        # the history of each and every RCS file in the repository during every visit,
-        # even if no new changes will be added to the SWH archive afterwards.
-        # "CVS’s repository is the software equivalent of a telephone book sorted by telephone number."
-        # https://corecursive.com/software-that-doesnt-suck-with-jim-blandy/
-        #
-        # An implicit assumption made here is that self.cvs_changesets will fit into
-        # memory in its entirety. If it won't fit then the CVS walker will need to
-        # be modified such that it spools the list of changesets to disk instead.
-        cvs = CvsConv(self.cvsroot_path, RcsKeywords(), False, CHANGESET_FUZZ_SEC)
-        self.log.info("Walking CVS module %s", self.cvs_module_name)
-        cvs.walk(self.cvs_module_name)
-        self.cvs_changesets = sorted(cvs.changesets)
-        self.log.info('CVS changesets found in %s: %d' % (self.cvs_module_name, len(self.cvs_changesets)))
-        # SWH revisions are generated and stored iteratively to avoid high memory consumption
-        self.swh_revision_gen = self.process_cvs_changesets()
+
 
     def fetch_data(self):
         """Fetch the next CVS revision."""
         try:
             data = next(self.swh_revision_gen)
         except StopIteration:
             return False
         except Exception as e:
             self.log.exception(e)
             return False  # Stopping iteration
         self._contents, self._skipped_contents, self._directories, rev = data
         self._revisions = [rev]
         return True
 
     def build_swh_revision(self,
         k: ChangeSetKey, logmsg: bytes, dir_id: bytes, parents: Sequence[bytes]
     ) -> Revision:
         """Given a CVS revision, build a swh revision.
 
         Args:
             k: changeset data
             logmsg: the changeset's log message 
             dir_id: the tree's hash identifier
             parents: the revision's parents identifier
 
         Returns:
             The swh revision dictionary.
 
         """
         author = Person.from_fullname(k.author.encode('UTF-8'))
         date = TimestampWithTimezone.from_datetime(k.max_time)
 
         return Revision(
             type=RevisionType.CVS,
             date=date,
             committer_date=date,
             directory=dir_id,
             message=logmsg,
             author=author,
             committer=author,
             synthetic=True,
             extra_headers=[],
             parents=tuple(parents))
 
     def generate_and_load_snapshot(self, revision) -> Snapshot:
         """Create the snapshot either from existing revision.
 
         Args:
             revision (dict): Last revision seen if any (None by default)
 
         Returns:
             Optional[Snapshot] The newly created snapshot
 
         """
         snap = Snapshot(
             branches={
                 DEFAULT_BRANCH: SnapshotBranch(
                     target=revision.id, target_type=TargetType.REVISION
                 )
             }
         )
         self.log.debug("snapshot: %s" % snap)
         self.storage.snapshot_add([snap])
         return snap
 
     def store_data(self):
         "Add our current CVS changeset to the archive."
         self.storage.skipped_content_add(self._skipped_contents)
         self.storage.content_add(self._contents)
         self.storage.directory_add(self._directories)
         self.storage.revision_add(self._revisions)
         self.snapshot = self.generate_and_load_snapshot(self._last_revision)
         self.log.debug("SWH snapshot ID: %s" % hashutil.hash_to_hex(self.snapshot.id))
         self.flush()
         self.loaded_snapshot_id = self.snapshot.id
         self._skipped_contents = []
         self._contents = []
         self._directories = []
         self._revisions = []
 
     def load_status(self):
         return {
             "status": self._load_status,
         }
 
     def visit_status(self):
         return self._visit_status
 
diff --git a/swh/loader/cvs/rlog.py b/swh/loader/cvs/rlog.py
new file mode 100644
index 0000000..1a046c3
--- /dev/null
+++ b/swh/loader/cvs/rlog.py
@@ -0,0 +1,391 @@
+# Copyright (C) 2021  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+""" RCS/CVS rlog parser, derived from viewvc and cvs2gitdump.py """
+
+# Copyright (C) 1999-2021 The ViewCVS Group. All Rights Reserved.
+# 
+# By using ViewVC, you agree to the terms and conditions set forth
+# below:
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 
+#   * Redistributions of source code must retain the above copyright
+#     notice, this list of conditions and the following
+#     disclaimer.
+# 
+#   * Redistributions in binary form must reproduce the above
+#     copyright notice, this list of conditions and the following
+#     disclaimer in the documentation and/or other materials provided
+#     with the distribution.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Copyright (c) 2012 YASUOKA Masahiko <yasuoka@yasuoka.net>
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+import re
+import calendar
+import time
+
+from swh.loader.cvs.cvs2gitdump.cvs2gitdump import CHANGESET_FUZZ_SEC, file_path, ChangeSetKey
+
+# TODO: actual path encoding should be specified as a parameter
+path_encodings = [ 'ascii', 'utf-8' ]
+
+class RlogConv:
+    def __init__(self, cvsroot_path, fuzzsec):
+        self.cvsroot_path = cvsroot_path
+        self.fuzzsec = fuzzsec
+        self.changesets = dict()
+        self.tags = dict()
+        self.offsets = dict()
+
+    def _process_rlog_entry(self, path, taginfo, revisions, logmsgs):
+        """ Convert an rlog entry into an item in self.changesets """
+        rtags = dict()
+        branches = {'1': 'HEAD', '1.1.1': 'VENDOR'}
+        for k, v in list(taginfo.items()):
+            r = v.split('.')
+            if len(r) == 3:
+                branches[v] = 'VENDOR'
+            elif len(r) >= 3 and r[-2] == '0':
+                branches['.'.join(r[:-2] + r[-1:])] = k
+            if len(r) == 2 and branches[r[0]] == 'HEAD':
+                if v not in rtags:
+                    rtags[v] = list()
+                rtags[v].append(k)
+
+        revs = revisions.items()
+        # sort by revision descending to priorize 1.1.1.1 than 1.1
+        revs = sorted(revs, key=lambda a: a[1][0], reverse=True)
+        # sort by time
+        revs = sorted(revs, key=lambda a: a[1][1])
+        novendor = False
+        have_initial_revision = False
+        last_vendor_status = None
+        for k, v in revs:
+            r = k.split('.')
+            if len(r) == 4 and r[0] == '1' and r[1] == '1' and r[2] == '1' \
+                    and r[3] == '1':
+                if have_initial_revision:
+                    continue
+                if v[3] == 'dead':
+                    continue
+                last_vendor_status = v[3]
+                have_initial_revision = True
+            elif len(r) == 4 and r[0] == '1' and r[1] == '1' and r[2] == '1':
+                if novendor:
+                    continue
+                last_vendor_status = v[3]
+            elif len(r) == 2:
+                if r[0] == '1' and r[1] == '1':
+                    if have_initial_revision:
+                        continue
+                    if v[3] == 'dead':
+                        continue
+                    have_initial_revision = True
+                elif r[0] == '1' and r[1] != '1':
+                    novendor = True
+                if last_vendor_status == 'dead' and v[3] == 'dead':
+                    last_vendor_status = None
+                    continue
+                last_vendor_status = None
+            else:
+                # trunk only
+                continue
+
+            b = '.'.join(r[:-1])
+            # decode author name in a potentially lossy way;
+            # it is only used for internal hashing in this case
+            author = v[2].decode('utf-8', 'ignore')
+            a = ChangeSetKey(
+                branches[b], author, v[1], logmsgs[k], v[6],
+                self.fuzzsec)
+
+            a.put_file(path, k, v[3], 0)
+            while a in self.changesets:
+                c = self.changesets[a]
+                del self.changesets[a]
+                c.merge(a)
+                a = c
+            self.changesets[a] = a
+            if k in rtags:
+                for t in rtags[k]:
+                    if t not in self.tags or \
+                            self.tags[t].max_time < a.max_time:
+                        self.tags[t] = a
+
+    def parse_rlog(self, fp):
+        eof = None
+        while eof != _EOF_LOG and eof != _EOF_ERROR:
+          filename, branch, taginfo, lockinfo, errmsg, eof = _parse_log_header(fp)
+          revisions = {}
+          logmsgs = {}
+          if filename:
+              for i, e in enumerate(path_encodings):
+                 try:
+                    how = 'ignore' if i == len(path_encodings) - 1 else 'strict'
+                    fname = filename.decode(e, how)
+                    break
+                 except UnicodeError:
+                    pass
+          while not eof:
+            off = fp.tell()
+            rev, logmsg, eof = _parse_log_entry(fp)
+            if rev:
+              revisions[rev[0]] = rev
+              logmsgs[rev[0]] = logmsg
+          if eof != _EOF_LOG and eof != _EOF_ERROR:
+            path = file_path(self.cvsroot_path, fname)
+            if not path in self.offsets.keys():
+                self.offsets[path] = dict()
+            if rev:
+                self.offsets[path][rev[0]] = off
+            self._process_rlog_entry(path, taginfo, revisions, logmsgs)
+
+    def getlog(self, fp, path, rev):
+        off = self.offsets[path][rev]
+        fp.seek(off)
+        rev, logmsg, eof = _parse_log_entry(fp)
+        return logmsg
+
+# if your rlog doesn't use 77 '=' characters, then this must change
+LOG_END_MARKER = b'=' * 77 + b'\n'
+ENTRY_END_MARKER = b'-' * 28 + b'\n'
+
+_EOF_FILE = b'end of file entries'       # no more entries for this RCS file
+_EOF_LOG = b'end of log'                 # hit the true EOF on the pipe
+_EOF_ERROR = b'error message found'      # rlog issued an error
+
+# rlog error messages look like
+#
+#   rlog: filename/goes/here,v: error message
+#   rlog: filename/goes/here,v:123: error message
+#
+# so we should be able to match them with a regex like
+#
+#   ^rlog\: (.*)(?:\:\d+)?\: (.*)$
+#
+# But for some reason the windows version of rlog omits the "rlog: " prefix
+# for the first error message when the standard error stream has been
+# redirected to a file or pipe. (the prefix is present in subsequent errors
+# and when rlog is run from the console). So the expression below is more
+# complicated
+_re_log_error = re.compile(b'^(?:rlog\: )*(.*,v)(?:\:\d+)?\: (.*)$')
+
+# CVSNT error messages look like:
+# cvs rcsfile: `C:/path/to/file,v' does not appear to be a valid rcs file
+# cvs [rcsfile aborted]: C:/path/to/file,v: No such file or directory
+# cvs [rcsfile aborted]: cannot open C:/path/to/file,v: Permission denied
+_re_cvsnt_error = re.compile(b'^(?:cvs rcsfile\: |cvs \[rcsfile aborted\]: )'
+                             b'(?:\`(.*,v)\' |'
+                             b'cannot open (.*,v)\: |(.*,v)\: |)'
+                             b'(.*)$')
+
+
+def _parse_log_header(fp):
+  """Parse and RCS/CVS log header.
+
+  fp is a file (pipe) opened for reading the log information.
+
+  On entry, fp should point to the start of a log entry.
+  On exit, fp will have consumed the separator line between the header and
+  the first revision log.
+
+  If there is no revision information (e.g. the "-h" switch was passed to
+  rlog), then fp will consumed the file separator line on exit.
+
+  Returns: filename, default branch, tag dictionary, lock dictionary,
+  rlog error message, and eof flag
+  """
+
+  filename = branch = msg = b""
+  taginfo = {}   # tag name => number
+  lockinfo = {}  # revision => locker
+  state = 0      # 0 = base, 1 = parsing symbols, 2 = parsing locks
+  eof = None
+
+  while 1:
+    line = fp.readline()
+    if not line:
+      # the true end-of-file
+      eof = _EOF_LOG
+      break
+
+    if state == 1:
+      if line[0] == b'\t':
+        [tag, rev] = [x.strip() for x in line.split(b':')]
+        taginfo[tag] = rev
+      else:
+        # oops. this line isn't tag info. stop parsing tags.
+        state = 0
+
+    if state == 2:
+      if line[0] == b'\t':
+        [locker, rev] = [x.strip() for x in line.split(b':')]
+        lockinfo[rev] = locker
+      else:
+        # oops. this line isn't lock info. stop parsing tags.
+        state = 0
+
+    if state == 0:
+      if line[:9] == b'RCS file:':
+        filename = line[10:-1]
+      elif line[:5] == b'head:':
+        # head = line[6:-1]
+        pass
+      elif line[:7] == b'branch:':
+        branch = line[8:-1]
+      elif line[:6] == b'locks:':
+        # start parsing the lock information
+        state = 2
+      elif line[:14] == b'symbolic names':
+        # start parsing the tag information
+        state = 1
+      elif line == ENTRY_END_MARKER:
+        # end of the headers
+        break
+      elif line == LOG_END_MARKER:
+        # end of this file's log information
+        eof = _EOF_FILE
+        break
+      else:
+        error = _re_cvsnt_error.match(line)
+        if error:
+          p1, p2, p3, msg = error.groups()
+          filename = p1 or p2 or p3
+          if not filename:
+            raise vclib.Error("Could not get filename from CVSNT error:\n%s"
+                              % line)
+          eof = _EOF_ERROR
+          break
+
+        error = _re_log_error.match(line)
+        if error:
+          filename, msg = error.groups()
+          if msg[:30] == b'warning: Unknown phrases like ':
+            # don't worry about this warning. it can happen with some RCS
+            # files that have unknown fields in them (e.g. "permissions 644;"
+            continue
+          eof = _EOF_ERROR
+          break
+
+  return filename, branch, taginfo, lockinfo, msg, eof
+
+
+_re_log_info = re.compile(b'^date:\s+([^;]+);'
+                          b'\s+author:\s+([^;]+);'
+                          b'\s+state:\s+([^;]+);'
+                          b'(\s+lines:\s+([0-9\s+-]+);?)?'
+                          b'(\s+commitid:\s+([a-zA-Z0-9]+))?\n$')
+
+# TODO: _re_rev should be updated to extract the "locked" flag
+_re_rev = re.compile(b'^revision\s+([0-9.]+).*')
+
+def cvs_strptime(timestr):
+  try:
+    return time.strptime(timestr, '%Y/%m/%d %H:%M:%S')[:-1] + (0,)
+  except ValueError:
+    return time.strptime(timestr, '%Y-%m-%d %H:%M:%S %z')[:-1] + (0,)
+
+def _parse_log_entry(fp):
+  """Parse a single log entry.
+
+  On entry, fp should point to the first line of the entry (the "revision"
+  line).
+  On exit, fp will have consumed the log separator line (dashes) or the
+  end-of-file marker (equals).
+
+  Returns: Revision data tuple, and eof flag (see _EOF_*)
+  """
+  rev = None
+  line = fp.readline()
+  if not line:
+    return None, None, _EOF_LOG
+  if line == LOG_END_MARKER:
+    # Needed because some versions of RCS precede LOG_END_MARKER
+    # with ENTRY_END_MARKER
+    return None, None, _EOF_FILE
+  if line[:8] == b'revision':
+    match = _re_rev.match(line)
+    if not match:
+      return None, None, _EOF_LOG
+    rev = match.group(1)
+
+    line = fp.readline()
+    if not line:
+      return None, None, _EOF_LOG
+    match = _re_log_info.match(line)
+
+  eof = None
+  log = b''
+  while 1:
+    line = fp.readline()
+    if not line:
+      # true end-of-file
+      eof = _EOF_LOG
+      break
+    if line[:9] == b'branches:':
+      continue
+    if line == ENTRY_END_MARKER:
+      break
+    if line == LOG_END_MARKER:
+      # end of this file's log information
+      eof = _EOF_FILE
+      break
+
+    log = log + line
+
+  if not rev or not match:
+    # there was a parsing error
+    return None, None, eof
+
+  # parse out a time tuple for the local time
+  tm = cvs_strptime(match.group(1).decode('UTF-8'))
+
+  # rlog seems to assume that two-digit years are 1900-based (so, "04"
+  # comes out as "1904", not "2004").
+  EPOCH = 1970
+  if tm[0] < EPOCH:
+    tm = list(tm)
+    if (tm[0] - 1900) < 70:
+      tm[0] = tm[0] + 100
+    if tm[0] < EPOCH:
+      raise ValueError('invalid year')
+  date = calendar.timegm(tm)
+
+  # return a revision tuple compatible with 'rcsparse', the log message, and the EOF marker
+  return (rev.decode('ascii'), # revision number string
+      date,
+      match.group(2), # author (encoding is arbitrary; don't attempt to decode)
+      match.group(3).decode('ascii'), # state, usually "Exp" or "dead"; non-ASCII data here would be weird
+      None, # TODO: branches of this rev
+      None, # TODO: revnumstr of previous rev
+      None, # TODO: commitid
+      ), log, eof
diff --git a/swh/loader/cvs/tests/data/nano.rlog.tgz b/swh/loader/cvs/tests/data/nano.rlog.tgz
new file mode 100644
index 0000000..ffffa87
Binary files /dev/null and b/swh/loader/cvs/tests/data/nano.rlog.tgz differ
diff --git a/swh/loader/cvs/tests/data/runbaby.tgz b/swh/loader/cvs/tests/data/runbaby.tgz
index c2256f3..354845d 100644
Binary files a/swh/loader/cvs/tests/data/runbaby.tgz and b/swh/loader/cvs/tests/data/runbaby.tgz differ
diff --git a/swh/loader/cvs/tests/test_loader.py b/swh/loader/cvs/tests/test_loader.py
index c4abe57..d899a9a 100644
--- a/swh/loader/cvs/tests/test_loader.py
+++ b/swh/loader/cvs/tests/test_loader.py
@@ -1,223 +1,259 @@
 # Copyright (C) 2016-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 
 import pytest
 from swh.loader.cvs.loader import CvsLoader
 from swh.loader.tests import (
     assert_last_visit_matches,
     check_snapshot,
     get_stats,
     prepare_repository_from_archive,
 )
 from swh.model.hashutil import hash_to_bytes
 from swh.model.model import Snapshot, SnapshotBranch, TargetType
 
 RUNBABY_SNAPSHOT = Snapshot(
     id=hash_to_bytes("1cff69ab9bd70822d5e3006092f943ccaafdcf57"),
     branches={
         b"HEAD": SnapshotBranch(
             target=hash_to_bytes("ef511d258fa55035c2bc2a5b05cad233cee1d328"),
             target_type=TargetType.REVISION,
         )
     },
 )
 
 def test_loader_cvs_not_found_no_mock(swh_storage, tmp_path):
     """Given an unknown repository, the loader visit ends up in status not_found"""
     unknown_repo_url = "unknown-repository"
     loader = CvsLoader(swh_storage, unknown_repo_url, cvsroot_path=tmp_path)
 
     assert loader.load() == {"status": "uneventful"}
 
     assert_last_visit_matches(
         swh_storage, unknown_repo_url, status="not_found", type="cvs",
     )
 
 
 def test_loader_cvs_visit(swh_storage, datadir, tmp_path):
     """Eventful visit should yield 1 snapshot"""
     archive_name = "runbaby"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
 
     loader = CvsLoader(swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name))
 
     assert loader.load() == {"status": "eventful"}
 
     assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=RUNBABY_SNAPSHOT.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 5,
         "directory": 2,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 1,
         "skipped_content": 0,
         "snapshot": 1,
     }
 
     check_snapshot(RUNBABY_SNAPSHOT, loader.storage)
 
 def test_loader_cvs_2_visits_no_change(swh_storage, datadir, tmp_path):
     """Eventful visit followed by uneventful visit should yield the same snapshot
 
     """
     archive_name = "runbaby"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
 
     loader = CvsLoader(swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name))
 
     assert loader.load() == {"status": "eventful"}
     visit_status1 = assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=RUNBABY_SNAPSHOT.id,
     )
 
     assert loader.load() == {"status": "uneventful"}
     visit_status2 = assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=RUNBABY_SNAPSHOT.id,
     )
 
     assert visit_status1.date < visit_status2.date
     assert visit_status1.snapshot == visit_status2.snapshot
 
     stats = get_stats(loader.storage)
     assert stats["origin_visit"] == 1 + 1  # computed twice the same snapshot
     assert stats["snapshot"] == 1
 
 GREEK_SNAPSHOT = Snapshot(
     id=hash_to_bytes("5e74af67d69dfd7aea0eb118154d062f71f50120"),
     branches={
         b"HEAD": SnapshotBranch(
             target=hash_to_bytes("e18b92f14cd5b3efb3fcb4ea46cfaf97f25f301b"),
             target_type=TargetType.REVISION,
         )
     },
 )
 
 def test_loader_cvs_with_file_additions_and_deletions(swh_storage, datadir, tmp_path):
     """Eventful conversion of history with file additions and deletions"""
     archive_name = "greek-repository"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
     repo_url += '/greek-tree' # CVS module name
     loader = CvsLoader(swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name))
 
     assert loader.load() == {"status": "eventful"}
 
     assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=GREEK_SNAPSHOT.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 8,
         "directory": 20,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 7,
         "skipped_content": 0,
         "snapshot": 7,
     }
 
     check_snapshot(GREEK_SNAPSHOT, loader.storage)
 
 GREEK_SNAPSHOT2 = Snapshot(
     id=hash_to_bytes("048885ae2145ffe81588aea95dcf75c536ecdf26"),
     branches={
         b"HEAD": SnapshotBranch(
             target=hash_to_bytes("55eb1438c03588607ce4b8db8f45e8e23075951b"),
             target_type=TargetType.REVISION,
         )
     },
 )
 
 
 def test_loader_cvs_2_visits_with_change(swh_storage, datadir, tmp_path):
     """Eventful visit followed by eventful visit should yield two snapshots"""
     archive_name = "greek-repository"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
     repo_url += '/greek-tree' # CVS module name
     loader = CvsLoader(swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name))
 
     assert loader.load() == {"status": "eventful"}
 
     visit_status1 = assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=GREEK_SNAPSHOT.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 8,
         "directory": 20,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 7,
         "skipped_content": 0,
         "snapshot": 7,
     }
 
     archive_name2 = "greek-repository2"
     archive_path2 = os.path.join(datadir, f"{archive_name2}.tgz")
     repo_url = prepare_repository_from_archive(archive_path2, archive_name, tmp_path)
     repo_url += '/greek-tree' # CVS module name
 
     loader = CvsLoader(swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name))
 
     assert loader.load() == {"status": "eventful"}
 
     visit_status2 = assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=GREEK_SNAPSHOT2.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 10,
         "directory": 23,
         "origin": 1,
         "origin_visit": 2,
         "release": 0,
         "revision": 8,
         "skipped_content": 0,
         "snapshot": 8,
     }
 
     check_snapshot(GREEK_SNAPSHOT2, loader.storage)
 
     assert visit_status1.date < visit_status2.date
     assert visit_status1.snapshot != visit_status2.snapshot
+
+def test_loader_cvs_visit_pserver(swh_storage, datadir, tmp_path):
+    """Eventful visit to CVS pserver should yield 1 snapshot"""
+    archive_name = "runbaby"
+    archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+    repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
+    repo_url += '/runbaby' # CVS module name
+
+    # Ask our cvsclient to connect via the 'cvs server' command
+    repo_url = 'fake://' + repo_url[7:]
+
+    loader = CvsLoader(swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name))
+
+    assert loader.load() == {"status": "eventful"}
+
+    assert_last_visit_matches(
+        loader.storage,
+        repo_url,
+        status="full",
+        type="cvs",
+        snapshot=RUNBABY_SNAPSHOT.id,
+    )
+
+    stats = get_stats(loader.storage)
+    assert stats == {
+        "content": 5,
+        "directory": 2,
+        "origin": 1,
+        "origin_visit": 1,
+        "release": 0,
+        "revision": 1,
+        "skipped_content": 0,
+        "snapshot": 1,
+    }
+
+    check_snapshot(RUNBABY_SNAPSHOT, loader.storage)