diff --git a/swh/loader/cvs/cvsclient.py b/swh/loader/cvs/cvsclient.py
index 19cd9d6..6cec698 100644
--- a/swh/loader/cvs/cvsclient.py
+++ b/swh/loader/cvs/cvsclient.py
@@ -1,404 +1,417 @@
 # Copyright (C) 2015-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU Affero General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 """Minimal CVS client implementation
 
 """
 
 import os.path
-import re
 import socket
 import subprocess
 import tempfile
 
 from swh.loader.exception import NotFound
 
 CVS_PSERVER_PORT = 2401
 CVS_PROTOCOL_BUFFER_SIZE = 8192
 EXAMPLE_PSERVER_URL = "pserver://user:password@cvs.example.com/cvsroot/repository"
 EXAMPLE_SSH_URL = "ssh://user@cvs.example.com/cvsroot/repository"
 
 VALID_RESPONSES = [
     "ok",
     "error",
     "Valid-requests",
     "Checked-in",
     "New-entry",
     "Checksum",
     "Copy-file",
     "Updated",
     "Created",
     "Update-existing",
     "Merged",
     "Patched",
     "Rcs-diff",
     "Mode",
     "Removed",
     "Remove-entry",
     "Template",
     "Notified",
     "Module-expansion",
     "Wrapper-rcsOption",
     "M",
     "Mbinary",
     "E",
     "F",
     "MT",
 ]
 
 # Trivially encode strings to protect them from innocent eyes (i.e.,
 # inadvertent password compromises, like a network administrator
 # who's watching packets for legitimate reasons and accidentally sees
 # the password protocol go by).
 #
 # This is NOT secure encryption.
 
 
 def scramble_password(password):
     s = ["A"]  # scramble scheme version number
     # fmt: off
     scramble_shifts = [
         0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,  # noqa: E241
        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,  # noqa: E241,E131,E501
       114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,  # noqa: E241,E131,E501
       111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,  # noqa: E241,E131,E501
        41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,  # noqa: E241,E131,E501
       125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,  # noqa: E241,E131,E501
        36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,  # noqa: E241,E131,E501
        58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,  # noqa: E241,E131,E501
       225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,  # noqa: E241,E131,E501
       199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,  # noqa: E241,E131,E501
       174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,  # noqa: E241,E131,E501
       207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,  # noqa: E241,E131,E501
       192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,  # noqa: E241,E131,E501
       227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,  # noqa: E241,E131,E501
       182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,  # noqa: E241,E131,E501
       243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152]  # noqa: E241,E131,E501
     # fmt: on
     for c in password:
         s.append("%c" % scramble_shifts[ord(c)])
     return "".join(s)
 
 
 class CVSProtocolError(Exception):
     pass
 
 
-_re_kb_opt = re.compile(rb"\/-kb\/")
-
-
 class CVSClient:
     def connect_pserver(self, hostname, port, auth):
         if port is None:
             port = CVS_PSERVER_PORT
         if auth is None:
             raise NotFound(
                 "Username and password are required for "
                 "a pserver connection: %s" % EXAMPLE_PSERVER_URL
             )
         try:
             user = auth.split(":")[0]
             password = auth.split(":")[1]
         except IndexError:
             raise NotFound(
                 "Username and password are required for "
                 "a pserver connection: %s" % EXAMPLE_PSERVER_URL
             )
 
         try:
             self.socket = socket.create_connection((hostname, port))
         except ConnectionRefusedError:
             raise NotFound("Could not connect to %s:%s", hostname, port)
 
         scrambled_password = scramble_password(password)
         request = "BEGIN AUTH REQUEST\n%s\n%s\n%s\nEND AUTH REQUEST\n" % (
             self.cvsroot_path,
             user,
             scrambled_password,
         )
         print("Request: %s\n" % request)
         self.socket.sendall(request.encode("UTF-8"))
 
         response = self.conn_read_line()
         if response != b"I LOVE YOU\n":
             raise NotFound(
                 "pserver authentication failed for %s:%s: %s"
                 % (hostname, port, response)
             )
 
     def connect_ssh(self, hostname, port, auth):
         command = ["ssh"]
         if auth is not None:
             # Assume 'auth' contains only a user name.
             # We do not support password authentication with SSH since the
             # anoncvs user is usually granted access without a password.
             command += ["-l", "%s" % auth]
         if port is not None:
             command += ["-p", "%d" % port]
 
         # accept new SSH hosts keys upon first use; changed host keys
         # will require intervention
         command += ["-o", "StrictHostKeyChecking=accept-new"]
 
         # disable interactive prompting
         command += ["-o", "BatchMode=yes"]
 
         # disable further option processing by adding '--'
         command += ["--"]
 
         command += ["%s" % hostname, "cvs", "server"]
         # use non-buffered I/O to match behaviour of self.socket
         self.ssh = subprocess.Popen(
             command, bufsize=0, stdin=subprocess.PIPE, stdout=subprocess.PIPE
         )
 
     def connect_fake(self, hostname, port, auth):
         command = ["cvs", "server"]
         # use non-buffered I/O to match behaviour of self.socket
         self.ssh = subprocess.Popen(
             command, bufsize=0, stdin=subprocess.PIPE, stdout=subprocess.PIPE
         )
 
     def conn_read_line(self, require_newline=True):
         if len(self.linebuffer) != 0:
             return self.linebuffer.pop(0)
         buf = b""
         idx = -1
         while idx == -1:
             if len(buf) >= CVS_PROTOCOL_BUFFER_SIZE:
                 if require_newline:
                     raise CVSProtocolError(
                         "Overlong response from " "CVS server: %s" % buf
                     )
                 else:
                     break
             if self.socket:
                 buf += self.socket.recv(CVS_PROTOCOL_BUFFER_SIZE)
             elif self.ssh:
                 buf += self.ssh.stdout.read(CVS_PROTOCOL_BUFFER_SIZE)
             else:
                 raise Exception("No valid connection")
             if not buf:
                 return None
             idx = buf.rfind(b"\n")
         if idx != -1:
             self.linebuffer = buf[: idx + 1].splitlines(keepends=True)
         else:
             if require_newline:
                 raise CVSProtocolError("Invalid response from CVS server: %s" % buf)
             else:
                 self.linebuffer.append(buf)
         if len(self.incomplete_line) > 0:
             self.linebuffer[0] = self.incomplete_line + self.linebuffer[0]
         if idx != -1:
             self.incomplete_line = buf[idx + 1 :]
         else:
             self.incomplete_line = b""
         return self.linebuffer.pop(0)
 
     def conn_write(self, data):
         if self.socket:
             return self.socket.sendall(data)
         if self.ssh:
             self.ssh.stdin.write(data)
             return self.ssh.stdin.flush()
         raise Exception("No valid connection")
 
     def conn_write_str(self, s):
         return self.conn_write(s.encode("UTF-8"))
 
     def conn_close(self):
         if self.socket:
             self.socket.close()
         if self.ssh:
             self.ssh.kill()
             try:
                 self.ssh.wait(timeout=10)
             except subprocess.TimeoutExpired as e:
                 raise subprocess.TimeoutExpired(
                     "Could not terminate " "ssh program: %s" % e
                 )
 
     def __init__(self, url):
         """
         Connect to a CVS server at the specified URL and perform the initial
         CVS protocol handshake.
         """
         self.hostname = url.host
         self.cvsroot_path = os.path.dirname(url.path)
         self.cvs_module_name = os.path.basename(url.path)
         self.socket = None
         self.ssh = None
         self.linebuffer = list()
         self.incomplete_line = b""
 
         if url.scheme == "pserver":
             self.connect_pserver(url.host, url.port, url.auth)
         elif url.scheme == "ssh":
             self.connect_ssh(url.host, url.port, url.auth)
         elif url.scheme == "fake":
             self.connect_fake(url.host, url.port, url.auth)
         else:
             raise NotFound("Invalid CVS origin URL '%s'" % url)
 
         # we should have a connection now
         assert self.socket or self.ssh
 
         self.conn_write_str(
             "Root %s\nValid-responses %s\nvalid-requests\n"
             "UseUnchanged\n" % (self.cvsroot_path, " ".join(VALID_RESPONSES))
         )
         response = self.conn_read_line()
         if not response:
             raise CVSProtocolError("No response from CVS server")
         try:
             if response[0:15] != b"Valid-requests ":
                 raise CVSProtocolError(
                     "Invalid response from " "CVS server: %s" % response
                 )
         except IndexError:
             raise CVSProtocolError("Invalid response from CVS server: %s" % response)
         response = self.conn_read_line()
         if response != b"ok\n":
             raise CVSProtocolError("Invalid response from CVS server: %s" % response)
 
     def __del__(self):
         self.conn_close()
 
     def _parse_rlog_response(self, fp):
         rlog_output = tempfile.TemporaryFile()
         expect_error = False
         for line in fp.readlines():
             if expect_error:
                 raise CVSProtocolError("CVS server error: %s" % line)
             if line == b"ok\n":
                 break
             elif line == b"M \n":
                 continue
             elif line[0:2] == b"M ":
                 rlog_output.write(line[2:])
             elif line[0:8] == b"MT text ":
                 rlog_output.write(line[8:-1])
             elif line[0:8] == b"MT date ":
                 rlog_output.write(line[8:-1])
             elif line[0:10] == b"MT newline":
                 rlog_output.write(line[10:])
             elif line[0:7] == b"error  ":
                 expect_error = True
                 continue
             else:
                 raise CVSProtocolError("Bad CVS protocol response: %s" % line)
         rlog_output.seek(0)
         return rlog_output
 
     def fetch_rlog(self):
         fp = tempfile.TemporaryFile()
         self.conn_write_str(
             "Global_option -q\nArgument --\nArgument %s\nrlog\n" % self.cvs_module_name
         )
         while True:
             response = self.conn_read_line()
             if response is None:
                 raise CVSProtocolError("No response from CVS server")
             if response[0:2] == b"E ":
                 raise CVSProtocolError("Error response from CVS server: %s" % response)
             fp.write(response)
             if response == b"ok\n":
                 break
         fp.seek(0)
         return self._parse_rlog_response(fp)
 
-    def checkout(self, path, rev, dest_dir):
+    def checkout(self, path, rev, dest_dir, expand_keywords):
+        """
+        Download a file revision from the cvs server and store
+        the file's contents in a temporary file. If expand_keywords is
+        set then ask the server to expand RCS keywords in file content.
+
+        From the server's point of view this function behaves much
+        like 'cvs update -r rev path'. The server is unaware that
+        we do not actually maintain a CVS working copy. Because of
+        this it sends more information than we need. We simply skip
+        responses that are of no interest to us.
+        """
         skip_line = False
         expect_modeline = False
         expect_bytecount = False
         have_bytecount = False
         bytecount = 0
         dirname = os.path.dirname(path)
         if dirname:
             self.conn_write_str("Directory %s\n%s\n" % (dirname, dirname))
         filename = os.path.basename(path)
         co_output = tempfile.NamedTemporaryFile(
             dir=dest_dir,
             delete=True,
             prefix="cvsclient-checkout-%s-r%s-" % (filename, rev),
         )
+        if expand_keywords:
+            # use server-side per-file default expansion rules
+            karg = ""
+        else:
+            # force binary file mode
+            karg = "Argument -kb\n"
         # TODO: cvs <= 1.10 servers expect to be given every Directory along the path.
         self.conn_write_str(
             "Directory %s\n%s\n"
             "Global_option -q\n"
             "Argument -r%s\n"
-            "Argument -kb\n"
+            "%s"
             "Argument --\nArgument %s\nco \n"
-            % (self.cvs_module_name, self.cvs_module_name, rev, path)
+            % (self.cvs_module_name, self.cvs_module_name, rev, karg, path)
         )
         while True:
             if have_bytecount:
                 if bytecount < 0:
                     raise CVSProtocolError("server sent too much file content data")
                 response = self.conn_read_line(require_newline=False)
                 if response is None:
                     raise CVSProtocolError("Incomplete response from CVS server")
                 if len(response) > bytecount:
                     # When a file lacks a final newline we receive a line which
                     # contains file content as well as CVS protocol response data.
                     # Split last line of file content from CVS protocol data...
                     co_output.write(response[:bytecount])
                     response = response[bytecount:]
                     bytecount = 0
                     # ...and process the CVS protocol response below.
                 else:
                     co_output.write(response)
                     bytecount -= len(response)
                     continue
             else:
                 response = self.conn_read_line()
             if response[0:2] == b"E ":
                 raise CVSProtocolError("Error from CVS server: %s" % response)
             if response == b"ok\n":
                 if have_bytecount:
                     break
                 else:
                     raise CVSProtocolError("server sent 'ok' but no file contents")
             if skip_line:
                 skip_line = False
                 continue
             elif expect_bytecount:
                 try:
                     bytecount = int(response[0:-1])  # strip trailing \n
                 except ValueError:
                     raise CVSProtocolError("Bad CVS protocol response: %s" % response)
                 have_bytecount = True
                 continue
             elif response in (b"M \n", b"MT +updated\n", b"MT -updated\n"):
                 continue
             elif response[0:9] == b"MT fname ":
                 continue
             elif response.split(b" ")[0] in (
                 b"Created",
                 b"Checked-in",
                 b"Update-existing",
                 b"Updated",
                 b"Removed",
             ):
                 skip_line = True
                 continue
-            elif response[0:1] == b"/" and _re_kb_opt.search(response):
+            elif response[0:1] == b"/":
                 expect_modeline = True
                 continue
             elif expect_modeline and response[0:2] == b"u=":
                 expect_modeline = False
                 expect_bytecount = True
                 continue
             elif response[0:2] == b"M ":
                 continue
             elif response[0:8] == b"MT text ":
                 continue
             elif response[0:10] == b"MT newline":
                 continue
             else:
                 raise CVSProtocolError("Bad CVS protocol response: %s" % response)
         co_output.seek(0)
         return co_output
diff --git a/swh/loader/cvs/loader.py b/swh/loader/cvs/loader.py
index 2fba430..1263129 100644
--- a/swh/loader/cvs/loader.py
+++ b/swh/loader/cvs/loader.py
@@ -1,482 +1,482 @@
 # Copyright (C) 2015-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU Affero General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 """Loader in charge of injecting either new or existing cvs repositories to
 swh-storage.
 
 """
 from datetime import datetime
 import os
 import subprocess
 import tempfile
 import time
 from typing import Any, BinaryIO, Dict, Iterator, List, Optional, Sequence, Tuple
 
 from urllib3.util import parse_url
 
 from swh.loader.core.loader import BaseLoader
 from swh.loader.core.utils import clean_dangling_folders
 from swh.loader.cvs.cvs2gitdump.cvs2gitdump import (
     CHANGESET_FUZZ_SEC,
     ChangeSetKey,
     CvsConv,
     FileRevision,
     RcsKeywords,
     file_path,
 )
 from swh.loader.cvs.cvsclient import CVSClient
 import swh.loader.cvs.rcsparse as rcsparse
 from swh.loader.cvs.rlog import RlogConv
 from swh.loader.exception import NotFound
 from swh.model import from_disk, hashutil
 from swh.model.model import (
     Content,
     Directory,
     Origin,
     Person,
     Revision,
     RevisionType,
     Sha1Git,
     SkippedContent,
     Snapshot,
     SnapshotBranch,
     TargetType,
     TimestampWithTimezone,
 )
 from swh.storage.algos.snapshot import snapshot_get_latest
 from swh.storage.interface import StorageInterface
 
 DEFAULT_BRANCH = b"HEAD"
 
 TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.cvs."
 
 
 class CvsLoader(BaseLoader):
     """Swh cvs loader.
 
     The repository is local.  The loader deals with
     update on an already previously loaded repository.
 
     """
 
     visit_type = "cvs"
 
     cvs_module_name: str
     cvsclient: CVSClient
 
     # remote CVS repository access (history is parsed from CVS rlog):
     rlog_file: BinaryIO
 
     swh_revision_gen: Iterator[
         Tuple[List[Content], List[SkippedContent], List[Directory], Revision]
     ]
 
     def __init__(
         self,
         storage: StorageInterface,
         url: str,
         origin_url: Optional[str] = None,
         visit_date: Optional[datetime] = None,
         cvsroot_path: Optional[str] = None,
         temp_directory: str = "/tmp",
         max_content_size: Optional[int] = None,
     ):
         super().__init__(
             storage=storage,
             logging_class="swh.loader.cvs.CvsLoader",
             max_content_size=max_content_size,
         )
         self.cvsroot_url = url
         # origin url as unique identifier for origin in swh archive
         self.origin_url = origin_url if origin_url else self.cvsroot_url
         self.temp_directory = temp_directory
 
         # internal state used to store swh objects
         self._contents: List[Content] = []
         self._skipped_contents: List[SkippedContent] = []
         self._directories: List[Directory] = []
         self._revisions: List[Revision] = []
         # internal state, current visit
         self._last_revision: Optional[Revision] = None
         self._visit_status = "full"
         self.visit_date = visit_date
 
         if not cvsroot_path:
             cvsroot_path = tempfile.mkdtemp(
                 suffix="-%s" % os.getpid(),
                 prefix=TEMPORARY_DIR_PREFIX_PATTERN,
                 dir=self.temp_directory,
             )
         self.cvsroot_path = cvsroot_path
 
         self.snapshot: Optional[Snapshot] = None
         self.last_snapshot: Optional[Snapshot] = snapshot_get_latest(
             self.storage, self.origin_url
         )
 
     def compute_swh_revision(
         self, k: ChangeSetKey, logmsg: Optional[bytes]
     ) -> Tuple[Revision, from_disk.Directory]:
         """Compute swh hash data per CVS changeset.
 
         Returns:
             tuple (rev, swh_directory)
             - rev: current SWH revision computed from checked out work tree
             - swh_directory: dictionary of path, swh hash data with type
 
         """
         # Compute SWH revision from the on-disk state
         swh_dir = from_disk.Directory.from_disk(path=os.fsencode(self.worktree_path))
         parents: Tuple[Sha1Git, ...]
         if self._last_revision:
             parents = (self._last_revision.id,)
         else:
             parents = ()
         revision = self.build_swh_revision(k, logmsg, swh_dir.hash, parents)
         self.log.debug("SWH revision ID: %s", hashutil.hash_to_hex(revision.id))
         self._last_revision = revision
         return (revision, swh_dir)
 
     def checkout_file_with_rcsparse(
         self, k: ChangeSetKey, f: FileRevision, rcsfile: rcsparse.rcsfile
     ) -> None:
         path = file_path(self.cvsroot_path, f.path)
         wtpath = os.path.join(self.worktree_path, path)
         self.log.info("rev %s of file %s" % (f.rev, f.path))
         if f.state == "dead":
             # remove this file from work tree
             try:
                 os.remove(wtpath)
             except FileNotFoundError:
                 pass
         else:
             # create, or update, this file in the work tree
             if not rcsfile:
                 rcsfile = rcsparse.rcsfile(f.path)
             rcs = RcsKeywords()
             contents = rcs.expand_keyword(f.path, rcsfile, f.rev)
             os.makedirs(os.path.dirname(wtpath), exist_ok=True)
             outfile = open(wtpath, mode="wb")
             outfile.write(contents)
             outfile.close()
 
     def checkout_file_with_cvsclient(
         self, k: ChangeSetKey, f: FileRevision, cvsclient: CVSClient
     ):
         path = file_path(self.cvsroot_path, f.path)
         wtpath = os.path.join(self.worktree_path, path)
         self.log.info("rev %s of file %s" % (f.rev, f.path))
         if f.state == "dead":
             # remove this file from work tree
             try:
                 os.remove(wtpath)
             except FileNotFoundError:
                 pass
         else:
             dirname = os.path.dirname(wtpath)
             os.makedirs(dirname, exist_ok=True)
             self.log.debug("checkout to %s\n" % wtpath)
-            fp = cvsclient.checkout(f.path, f.rev, dirname)
+            fp = cvsclient.checkout(f.path, f.rev, dirname, expand_keywords=True)
             os.rename(fp.name, wtpath)
             try:
                 fp.close()
             except FileNotFoundError:
                 # Well, we have just renamed the file...
                 pass
 
     def process_cvs_changesets(
         self, cvs_changesets: List[ChangeSetKey], use_rcsparse: bool,
     ) -> Iterator[
         Tuple[List[Content], List[SkippedContent], List[Directory], Revision]
     ]:
         """Process CVS revisions.
 
         At each CVS revision, check out contents and compute swh hashes.
 
         Yields:
             tuple (contents, skipped-contents, directories, revision) of dict as a
             dictionary with keys, sha1_git, sha1, etc...
 
         """
         for k in cvs_changesets:
             tstr = time.strftime("%c", time.gmtime(k.max_time))
             self.log.info(
                 "changeset from %s by %s on branch %s", tstr, k.author, k.branch
             )
             logmsg: Optional[bytes] = b""
             # Check out all files of this revision and get a log message.
             #
             # The log message is obtained from the first file in the changeset.
             # The message will usually be the same for all affected files, and
             # the SWH archive will only store one version of the log message.
             for f in k.revs:
                 rcsfile = None
                 if use_rcsparse:
                     if rcsfile is None:
                         rcsfile = rcsparse.rcsfile(f.path)
                     if not logmsg:
                         logmsg = rcsfile.getlog(k.revs[0].rev)
                     self.checkout_file_with_rcsparse(k, f, rcsfile)
                 else:
                     if not logmsg:
                         logmsg = self.rlog.getlog(self.rlog_file, f.path, k.revs[0].rev)
                     self.checkout_file_with_cvsclient(k, f, self.cvsclient)
 
             # TODO: prune empty directories?
             (revision, swh_dir) = self.compute_swh_revision(k, logmsg)
             (contents, skipped_contents, directories) = from_disk.iter_directory(
                 swh_dir
             )
             yield contents, skipped_contents, directories, revision
 
     def prepare_origin_visit(self) -> None:
         self.origin = Origin(
             url=self.origin_url if self.origin_url else self.cvsroot_url
         )
 
     def pre_cleanup(self) -> None:
         """Cleanup potential dangling files from prior runs (e.g. OOM killed
         tasks)
 
         """
         clean_dangling_folders(
             self.temp_directory,
             pattern_check=TEMPORARY_DIR_PREFIX_PATTERN,
             log=self.log,
         )
 
     def cleanup(self) -> None:
         self.log.info("cleanup")
 
     def fetch_cvs_repo_with_rsync(self, host: str, path: str) -> None:
         # URL *must* end with a trailing slash in order to get CVSROOT listed
         url = "rsync://%s%s/" % (host, os.path.dirname(path))
         rsync = subprocess.run(["rsync", url], capture_output=True, encoding="ascii")
         rsync.check_returncode()
         have_cvsroot = False
         have_module = False
         for line in rsync.stdout.split("\n"):
             self.log.debug("rsync server: %s", line)
             if line.endswith(" CVSROOT"):
                 have_cvsroot = True
             elif line.endswith(" %s" % self.cvs_module_name):
                 have_module = True
             if have_module and have_cvsroot:
                 break
         if not have_module:
             raise NotFound(
                 "CVS module %s not found at %s" % (self.cvs_module_name, url)
             )
         if not have_cvsroot:
             raise NotFound("No CVSROOT directory found at %s" % url)
 
         subprocess.run(["rsync", "-a", url, self.cvsroot_path]).check_returncode()
 
     def prepare(self) -> None:
         self._last_revision = None
         self.worktree_path = tempfile.mkdtemp(
             suffix="-%s" % os.getpid(),
             prefix=TEMPORARY_DIR_PREFIX_PATTERN,
             dir=self.temp_directory,
         )
         url = parse_url(self.origin_url)
         self.log.debug(
             "prepare; origin_url=%s scheme=%s path=%s",
             self.origin_url,
             url.scheme,
             url.path,
         )
         if not url.path:
             raise NotFound("Invalid CVS origin URL '%s'" % self.origin_url)
         self.cvs_module_name = os.path.basename(url.path)
         os.mkdir(os.path.join(self.worktree_path, self.cvs_module_name))
         if url.scheme == "file":
             if not os.path.exists(url.path):
                 raise NotFound
         elif url.scheme == "rsync":
             self.fetch_cvs_repo_with_rsync(url.host, url.path)
 
         if url.scheme == "file" or url.scheme == "rsync":
             # local CVS repository conversion
             have_rcsfile = False
             have_cvsroot = False
             for root, dirs, files in os.walk(self.cvsroot_path):
                 if "CVSROOT" in dirs:
                     have_cvsroot = True
                     dirs.remove("CVSROOT")
                     continue
                 for f in files:
                     filepath = os.path.join(root, f)
                     if f[-2:] == ",v":
                         rcsfile = rcsparse.rcsfile(filepath)  # noqa: F841
                         self.log.debug(
                             "Looks like we have data to convert; "
                             "found a valid RCS file at %s",
                             filepath,
                         )
                         have_rcsfile = True
                         break
                 if have_rcsfile:
                     break
 
             if not have_rcsfile:
                 raise NotFound(
                     "Directory %s does not contain any valid RCS files %s",
                     self.cvsroot_path,
                 )
             if not have_cvsroot:
                 self.log.warn(
                     "The CVS repository at '%s' lacks a CVSROOT directory; "
                     "we might be ingesting an incomplete copy of the repository",
                     self.cvsroot_path,
                 )
 
             # Unfortunately, there is no way to convert CVS history in an
             # iterative fashion because the data is not indexed by any kind
             # of changeset ID. We need to walk the history of each and every
             # RCS file in the repository during every visit, even if no new
             # changes will be added to the SWH archive afterwards.
             # "CVS’s repository is the software equivalent of a telephone book
             # sorted by telephone number."
             # https://corecursive.com/software-that-doesnt-suck-with-jim-blandy/
             #
             # An implicit assumption made here is that self.cvs_changesets will
             # fit into memory in its entirety. If it won't fit then the CVS walker
             # will need to be modified such that it spools the list of changesets
             # to disk instead.
             cvs = CvsConv(self.cvsroot_path, RcsKeywords(), False, CHANGESET_FUZZ_SEC)
             self.log.info("Walking CVS module %s", self.cvs_module_name)
             cvs.walk(self.cvs_module_name)
             cvs_changesets = sorted(cvs.changesets)
             self.log.info(
                 "CVS changesets found in %s: %d",
                 self.cvs_module_name,
                 len(cvs_changesets),
             )
             self.swh_revision_gen = self.process_cvs_changesets(
                 cvs_changesets, use_rcsparse=True
             )
         elif url.scheme == "pserver" or url.scheme == "fake" or url.scheme == "ssh":
             # remote CVS repository conversion
             self.cvsclient = CVSClient(url)
             cvsroot_path = os.path.dirname(url.path)
             self.log.info(
                 "Fetching CVS rlog from %s:%s/%s",
                 url.host,
                 cvsroot_path,
                 self.cvs_module_name,
             )
             self.rlog = RlogConv(cvsroot_path, CHANGESET_FUZZ_SEC)
             self.rlog_file = self.cvsclient.fetch_rlog()
             self.rlog.parse_rlog(self.rlog_file)
             cvs_changesets = sorted(self.rlog.changesets)
             self.log.info(
                 "CVS changesets found for %s: %d",
                 self.cvs_module_name,
                 len(cvs_changesets),
             )
             self.swh_revision_gen = self.process_cvs_changesets(
                 cvs_changesets, use_rcsparse=False
             )
         else:
             raise NotFound("Invalid CVS origin URL '%s'" % self.origin_url)
 
     def fetch_data(self) -> bool:
         """Fetch the next CVS revision."""
         try:
             data = next(self.swh_revision_gen)
         except StopIteration:
             return False
         except Exception:
             self.log.exception("Exception in fetch_data:")
             return False  # Stopping iteration
         self._contents, self._skipped_contents, self._directories, rev = data
         self._revisions = [rev]
         return True
 
     def build_swh_revision(
         self,
         k: ChangeSetKey,
         logmsg: Optional[bytes],
         dir_id: bytes,
         parents: Sequence[bytes],
     ) -> Revision:
         """Given a CVS revision, build a swh revision.
 
         Args:
             k: changeset data
             logmsg: the changeset's log message
             dir_id: the tree's hash identifier
             parents: the revision's parents identifier
 
         Returns:
             The swh revision dictionary.
 
         """
         author = Person.from_fullname(k.author.encode("UTF-8"))
         date = TimestampWithTimezone.from_dict(k.max_time)
 
         return Revision(
             type=RevisionType.CVS,
             date=date,
             committer_date=date,
             directory=dir_id,
             message=logmsg,
             author=author,
             committer=author,
             synthetic=True,
             extra_headers=[],
             parents=tuple(parents),
         )
 
     def generate_and_load_snapshot(self, revision: Revision) -> Snapshot:
         """Create the snapshot either from existing revision.
 
         Args:
             revision (dict): Last revision seen if any (None by default)
 
         Returns:
             Optional[Snapshot] The newly created snapshot
 
         """
         snap = Snapshot(
             branches={
                 DEFAULT_BRANCH: SnapshotBranch(
                     target=revision.id, target_type=TargetType.REVISION
                 )
             }
         )
         self.log.debug("snapshot: %s", snap)
         self.storage.snapshot_add([snap])
         return snap
 
     def store_data(self) -> None:
         "Add our current CVS changeset to the archive."
         self.storage.skipped_content_add(self._skipped_contents)
         self.storage.content_add(self._contents)
         self.storage.directory_add(self._directories)
         self.storage.revision_add(self._revisions)
         assert self._last_revision is not None
         self.snapshot = self.generate_and_load_snapshot(self._last_revision)
         self.log.debug("SWH snapshot ID: %s", hashutil.hash_to_hex(self.snapshot.id))
         self.flush()
         self.loaded_snapshot_id = self.snapshot.id
         self._skipped_contents = []
         self._contents = []
         self._directories = []
         self._revisions = []
 
     def load_status(self) -> Dict[str, Any]:
         assert self.snapshot is not None
         if self.last_snapshot == self.snapshot:
             load_status = "uneventful"
         else:
             load_status = "eventful"
         return {
             "status": load_status,
         }
 
     def visit_status(self) -> str:
         return self._visit_status
diff --git a/swh/loader/cvs/tests/data/greek-repository4.tgz b/swh/loader/cvs/tests/data/greek-repository4.tgz
new file mode 100644
index 0000000..b20c3c1
Binary files /dev/null and b/swh/loader/cvs/tests/data/greek-repository4.tgz differ
diff --git a/swh/loader/cvs/tests/test_loader.py b/swh/loader/cvs/tests/test_loader.py
index c54c329..1c07c39 100644
--- a/swh/loader/cvs/tests/test_loader.py
+++ b/swh/loader/cvs/tests/test_loader.py
@@ -1,322 +1,410 @@
 # Copyright (C) 2016-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU Affero General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 
 from swh.loader.cvs.loader import CvsLoader
 from swh.loader.tests import (
     assert_last_visit_matches,
     check_snapshot,
     get_stats,
     prepare_repository_from_archive,
 )
 from swh.model.hashutil import hash_to_bytes
 from swh.model.model import Snapshot, SnapshotBranch, TargetType
 
 RUNBABY_SNAPSHOT = Snapshot(
     id=hash_to_bytes("1cff69ab9bd70822d5e3006092f943ccaafdcf57"),
     branches={
         b"HEAD": SnapshotBranch(
             target=hash_to_bytes("ef511d258fa55035c2bc2a5b05cad233cee1d328"),
             target_type=TargetType.REVISION,
         )
     },
 )
 
 
 def test_loader_cvs_not_found_no_mock(swh_storage, tmp_path):
     """Given an unknown repository, the loader visit ends up in status not_found"""
     unknown_repo_url = "unknown-repository"
     loader = CvsLoader(swh_storage, unknown_repo_url, cvsroot_path=tmp_path)
 
     assert loader.load() == {"status": "uneventful"}
 
     assert_last_visit_matches(
         swh_storage, unknown_repo_url, status="not_found", type="cvs",
     )
 
 
 def test_loader_cvs_visit(swh_storage, datadir, tmp_path):
     """Eventful visit should yield 1 snapshot"""
     archive_name = "runbaby"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
 
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
     )
 
     assert loader.load() == {"status": "eventful"}
 
     assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=RUNBABY_SNAPSHOT.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 5,
         "directory": 2,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 1,
         "skipped_content": 0,
         "snapshot": 1,
     }
 
     check_snapshot(RUNBABY_SNAPSHOT, loader.storage)
 
 
 def test_loader_cvs_2_visits_no_change(swh_storage, datadir, tmp_path):
     """Eventful visit followed by uneventful visit should yield the same snapshot
 
     """
     archive_name = "runbaby"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
 
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
     )
 
     assert loader.load() == {"status": "eventful"}
     visit_status1 = assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=RUNBABY_SNAPSHOT.id,
     )
 
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
     )
     assert loader.load() == {"status": "uneventful"}
     visit_status2 = assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=RUNBABY_SNAPSHOT.id,
     )
 
     assert visit_status1.date < visit_status2.date
     assert visit_status1.snapshot == visit_status2.snapshot
 
     stats = get_stats(loader.storage)
     assert stats["origin_visit"] == 1 + 1  # computed twice the same snapshot
     assert stats["snapshot"] == 1
 
 
 GREEK_SNAPSHOT = Snapshot(
     id=hash_to_bytes("5e74af67d69dfd7aea0eb118154d062f71f50120"),
     branches={
         b"HEAD": SnapshotBranch(
             target=hash_to_bytes("e18b92f14cd5b3efb3fcb4ea46cfaf97f25f301b"),
             target_type=TargetType.REVISION,
         )
     },
 )
 
 
 def test_loader_cvs_with_file_additions_and_deletions(swh_storage, datadir, tmp_path):
     """Eventful conversion of history with file additions and deletions"""
     archive_name = "greek-repository"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
     repo_url += "/greek-tree"  # CVS module name
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
     )
 
     assert loader.load() == {"status": "eventful"}
 
     assert_last_visit_matches(
         loader.storage, repo_url, status="full", type="cvs", snapshot=GREEK_SNAPSHOT.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 8,
         "directory": 20,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 7,
         "skipped_content": 0,
         "snapshot": 7,
     }
 
     check_snapshot(GREEK_SNAPSHOT, loader.storage)
 
 
 GREEK_SNAPSHOT2 = Snapshot(
     id=hash_to_bytes("048885ae2145ffe81588aea95dcf75c536ecdf26"),
     branches={
         b"HEAD": SnapshotBranch(
             target=hash_to_bytes("55eb1438c03588607ce4b8db8f45e8e23075951b"),
             target_type=TargetType.REVISION,
         )
     },
 )
 
 
 def test_loader_cvs_2_visits_with_change(swh_storage, datadir, tmp_path):
     """Eventful visit followed by eventful visit should yield two snapshots"""
     archive_name = "greek-repository"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
     repo_url += "/greek-tree"  # CVS module name
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
     )
 
     assert loader.load() == {"status": "eventful"}
 
     visit_status1 = assert_last_visit_matches(
         loader.storage, repo_url, status="full", type="cvs", snapshot=GREEK_SNAPSHOT.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 8,
         "directory": 20,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 7,
         "skipped_content": 0,
         "snapshot": 7,
     }
 
     archive_name2 = "greek-repository2"
     archive_path2 = os.path.join(datadir, f"{archive_name2}.tgz")
     repo_url = prepare_repository_from_archive(archive_path2, archive_name, tmp_path)
     repo_url += "/greek-tree"  # CVS module name
 
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
     )
 
     assert loader.load() == {"status": "eventful"}
 
     visit_status2 = assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=GREEK_SNAPSHOT2.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 10,
         "directory": 23,
         "origin": 1,
         "origin_visit": 2,
         "release": 0,
         "revision": 8,
         "skipped_content": 0,
         "snapshot": 8,
     }
 
     check_snapshot(GREEK_SNAPSHOT2, loader.storage)
 
     assert visit_status1.date < visit_status2.date
     assert visit_status1.snapshot != visit_status2.snapshot
 
 
 def test_loader_cvs_visit_pserver(swh_storage, datadir, tmp_path):
     """Eventful visit to CVS pserver should yield 1 snapshot"""
     archive_name = "runbaby"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
     repo_url += "/runbaby"  # CVS module name
 
     # Ask our cvsclient to connect via the 'cvs server' command
     repo_url = "fake://" + repo_url[7:]
 
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
     )
 
     assert loader.load() == {"status": "eventful"}
 
     assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=RUNBABY_SNAPSHOT.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 5,
         "directory": 2,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 1,
         "skipped_content": 0,
         "snapshot": 1,
     }
 
     check_snapshot(RUNBABY_SNAPSHOT, loader.storage)
 
 
 GREEK_SNAPSHOT3 = Snapshot(
     id=hash_to_bytes("cd801546b0137c82f01b9b67848ba8261d64ebbb"),
     branches={
         b"HEAD": SnapshotBranch(
             target=hash_to_bytes("14980990790ce1921db953c4c9ae03dd8861e8d6"),
             target_type=TargetType.REVISION,
         )
     },
 )
 
 
 def test_loader_cvs_visit_pserver_no_eol(swh_storage, datadir, tmp_path):
     """Visit to CVS pserver with file that lacks trailing eol"""
     archive_name = "greek-repository3"
     extracted_name = "greek-repository"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
     repo_url += "/greek-tree"  # CVS module name
 
     # Ask our cvsclient to connect via the 'cvs server' command
     repo_url = "fake://" + repo_url[7:]
 
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
     )
 
     assert loader.load() == {"status": "eventful"}
 
     assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=GREEK_SNAPSHOT3.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 9,
         "directory": 23,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 8,
         "skipped_content": 0,
         "snapshot": 8,
     }
 
     check_snapshot(GREEK_SNAPSHOT3, loader.storage)
+
+
+GREEK_SNAPSHOT4 = Snapshot(
+    id=hash_to_bytes("11673e2766654bd5fafb5119b418794230d48d6b"),
+    branches={
+        b"HEAD": SnapshotBranch(
+            target=hash_to_bytes("fe4a926d49d2af76e0025a8ba0b4ed159aec6829"),
+            target_type=TargetType.REVISION,
+        )
+    },
+)
+
+
+def test_loader_cvs_visit_expand_id_keyword(swh_storage, datadir, tmp_path):
+    """Visit to CVS repository with file with an RCS Id keyword"""
+    archive_name = "greek-repository4"
+    extracted_name = "greek-repository"
+    archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+    repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
+    repo_url += "/greek-tree"  # CVS module name
+
+    loader = CvsLoader(
+        swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
+    )
+
+    assert loader.load() == {"status": "eventful"}
+
+    assert_last_visit_matches(
+        loader.storage,
+        repo_url,
+        status="full",
+        type="cvs",
+        snapshot=GREEK_SNAPSHOT4.id,
+    )
+
+    stats = get_stats(loader.storage)
+    assert stats == {
+        "content": 9,
+        "directory": 22,
+        "origin": 1,
+        "origin_visit": 1,
+        "release": 0,
+        "revision": 8,
+        "skipped_content": 0,
+        "snapshot": 8,
+    }
+
+    check_snapshot(GREEK_SNAPSHOT4, loader.storage)
+
+
+def test_loader_cvs_visit_pserver_expand_id_keyword(swh_storage, datadir, tmp_path):
+    """Visit to CVS pserver with file with an RCS Id keyword"""
+    archive_name = "greek-repository4"
+    extracted_name = "greek-repository"
+    archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+    repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
+    repo_url += "/greek-tree"  # CVS module name
+
+    # Ask our cvsclient to connect via the 'cvs server' command
+    repo_url = f"fake://{repo_url[7:]}"
+
+    loader = CvsLoader(
+        swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
+    )
+
+    assert loader.load() == {"status": "eventful"}
+
+    assert_last_visit_matches(
+        loader.storage,
+        repo_url,
+        status="full",
+        type="cvs",
+        snapshot=GREEK_SNAPSHOT4.id,
+    )
+
+    stats = get_stats(loader.storage)
+    assert stats == {
+        "content": 9,
+        "directory": 22,
+        "origin": 1,
+        "origin_visit": 1,
+        "release": 0,
+        "revision": 8,
+        "skipped_content": 0,
+        "snapshot": 8,
+    }
+
+    check_snapshot(GREEK_SNAPSHOT4, loader.storage)