diff --git a/swh/loader/cvs/rlog.py b/swh/loader/cvs/rlog.py
index e09ee0b..e0b8553 100644
--- a/swh/loader/cvs/rlog.py
+++ b/swh/loader/cvs/rlog.py
@@ -1,499 +1,521 @@
 """ RCS/CVS rlog parser, derived from viewvc and cvs2gitdump.py """
 
 # Copyright (C) 1999-2021 The ViewCVS Group. All Rights Reserved.
 #
 # By using ViewVC, you agree to the terms and conditions set forth
 # below:
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 #
 #   * Redistributions of source code must retain the above copyright
 #     notice, this list of conditions and the following
 #     disclaimer.
 #
 #   * Redistributions in binary form must reproduce the above
 #     copyright notice, this list of conditions and the following
 #     disclaimer in the documentation and/or other materials provided
 #     with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
 # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
 # IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 # Copyright (c) 2012 YASUOKA Masahiko <yasuoka@yasuoka.net>
 #
 # Permission to use, copy, modify, and distribute this software for any
 # purpose with or without fee is hereby granted, provided that the above
 # copyright notice and this permission notice appear in all copies.
 #
 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 import calendar
 import re
+import string
 import time
 from typing import BinaryIO, Dict, List, NamedTuple, Optional, Tuple
 
 from swh.loader.cvs.cvs2gitdump.cvs2gitdump import ChangeSetKey
 
 # There is no known encoding of path names in CVS. The actual encoding used
 # will depend on the CVS server's operating system and perhaps even the
 # underlying filesystem used to host a CVS repository.
 # It is even conceivable that a given repository may use multiple encodings,
 # e.g. due to migrations of the repository between different servers over time.
 #
 # This issue also affects the CVS network protocol which is communicating
 # paths between the CVS server and the CVS client. For this reason, most
 # public-facing repositories should stick to ASCII in practice.
 #
 # TODO: If known, the actual path encoding used by the repository should
 # be specified as a parameter. This parameter should be a list since
 # multiple encodings may be present in a given repository.
 path_encodings = ["ascii", "utf-8"]
 
 
 class revtuple(NamedTuple):
     number: str
     date: int
     author: bytes
     state: str
     branches: None
     revnumstr: None
-    commitid: None
+    commitid: Optional[str]
 
 
 class RlogConv:
     def __init__(self, cvsroot_path: str, fuzzsec: int) -> None:
         self.cvsroot_path = cvsroot_path
         self.fuzzsec = fuzzsec
         self.changesets: Dict[ChangeSetKey, ChangeSetKey] = dict()
         self.tags: Dict[str, ChangeSetKey] = dict()
         self.offsets: Dict[str, Dict[str, int]] = dict()
 
     def _process_rlog_revisions(
         self,
         path: str,
         taginfo: Dict[bytes, bytes],
         revisions: Dict[str, revtuple],
         logmsgs: Dict[str, Optional[bytes]],
     ) -> None:
         """ Convert RCS revision history of a file into self.changesets items """
         rtags: Dict[str, List[str]] = dict()
         # RCS and CVS represent branches by adding digits to revision numbers.
         # And CVS assigns special meaning to certain revision number ranges.
         #
         # Revision numbers on the main branch have only two digits:
         #
         #  1.1, 1.2, 1.3, ...
         #
         # Branches created with 'cvs tag -b' use even numbers for
         # the third digit:
         #
         #  1.1, 1.2, 1.3, ...  main branch history of the file
         #    |
         #    1.1.2.1, 1.1.2.2 ... a branch (2) forked off r1.1 of the file
         #
         # Branches are given human-readable names by associating
         # RCS tag labels with their revision numbers.
         # Given a file on the above branch which has been changed 10 times
         # since history was forked, the branch tag would look like this:
         #
         #   MY_BRANCH: r1.1.2.10
         #
         # Odd branch numbers are reserved for CVS "vendor" branches.
         # The default vendor branch is 1.1.1.
         # Vendor branches are populated with 'cvs import'.
         # Files on the vendor branch are merged to the main branch automatically
         # unless there are merge conflicts. Such conflicts have to be resolved
         # manually each time 'cvs import' is used to update the vendor branch.
         #
         # See here for details:
         # https://www.gnu.org/software/trans-coord/manual/cvs/html_node/Branches-and-revisions.html#Branches-and-revisions
         #
         # There are also "magic" branch numbers with a zero inserted
         # at the second-rightmost position:
         #
         #  1.1, 1.2, 1.3, ...  main branch history of the file
         #    |
         #    1.1.2.0.1 magic branch (2)
         #
         # This allows CVS to store information about a branch's existence
         # before any files on this branch have been modified.
         # Even-numbered branch revisions appear once the file is modified.
         branches = {"1": "HEAD", "1.1.1": "VENDOR"}
 
         k: str
         v_: str
         for k, v_ in list(taginfo.items()):  # type: ignore  # FIXME, inconsistent types
             r = v_.split(".")
             if len(r) == 3:
                 # vendor branch number
                 branches[v_] = "VENDOR"
             elif len(r) >= 3 and r[-2] == "0":
                 # magic branch number
                 branches[".".join(r[:-2] + r[-1:])] = k
             if len(r) == 2 and branches[r[0]] == "HEAD":
                 # main branch number
                 if v_ not in rtags:
                     rtags[v_] = list()
                 rtags[v_].append(k)
 
         revs: List[Tuple[str, revtuple]] = list(revisions.items())
         # sort by revision descending to priorize 1.1.1.1 than 1.1
         revs.sort(key=lambda a: a[1][0], reverse=True)
         # sort by time
         revs.sort(key=lambda a: a[1][1])
         novendor = False
         have_initial_revision = False
         last_vendor_status = None
         for k, v in revs:
             r = k.split(".")
             if (
                 len(r) == 4
                 and r[0] == "1"
                 and r[1] == "1"
                 and r[2] == "1"
                 and r[3] == "1"
             ):
                 if have_initial_revision:
                     continue
                 if v[3] == "dead":
                     continue
                 last_vendor_status = v[3]
                 have_initial_revision = True
             elif len(r) == 4 and r[0] == "1" and r[1] == "1" and r[2] == "1":
                 if novendor:
                     continue
                 last_vendor_status = v[3]
             elif len(r) == 2:
                 if r[0] == "1" and r[1] == "1":
                     if have_initial_revision:
                         continue
                     if v[3] == "dead":
                         continue
                     have_initial_revision = True
                 elif r[0] == "1" and r[1] != "1":
                     novendor = True
                 if last_vendor_status == "dead" and v[3] == "dead":
                     last_vendor_status = None
                     continue
                 last_vendor_status = None
             else:
                 # trunk only
                 continue
 
             b = ".".join(r[:-1])
             # decode author name in a potentially lossy way;
             # it is only used for internal hashing in this case
             author = v[2].decode("utf-8", "ignore")
             logmsg = logmsgs[k]
             assert logmsg is not None
             a = ChangeSetKey(branches[b], author, v[1], logmsg, v[6], self.fuzzsec)
 
             a.put_file(path, k, v[3], 0)
             while a in self.changesets:
                 c = self.changesets[a]
                 del self.changesets[a]
                 c.merge(a)
                 a = c
             self.changesets[a] = a
             if k in rtags:
                 for t in rtags[k]:
                     if t not in self.tags or self.tags[t].max_time < a.max_time:
                         self.tags[t] = a
 
     def parse_rlog(self, fp: BinaryIO) -> None:
         self.changesets = dict()
         self.tags = dict()
         self.offsets = dict()
         eof = None
         while eof != _EOF_LOG and eof != _EOF_ERROR:
             filename, branch, taginfo, lockinfo, errmsg, eof = _parse_log_header(fp)
             revisions: Dict[str, revtuple] = {}
             logmsgs: Dict[str, Optional[bytes]] = {}
             path = ""
             if filename:
                 # There is no known encoding of filenames in CVS.
                 # Attempt to decode the path with our list of known encodings.
                 # If none of them work, forcefully decode the path assuming
                 # the final path encoding provided in the list.
                 for i, e in enumerate(path_encodings):
                     try:
                         how = "ignore" if i == len(path_encodings) - 1 else "strict"
                         fname = filename.decode(e, how)
                         break
                     except UnicodeError:
                         pass
                 path = fname
             elif not eof:
                 raise ValueError("No filename found in rlog header")
             while not eof:
                 off = fp.tell()
                 rev, logmsg, eof = _parse_log_entry(fp)
                 if rev:
                     revisions[rev[0]] = rev
                     logmsgs[rev[0]] = logmsg
                 if eof != _EOF_LOG and eof != _EOF_ERROR:
                     if path not in self.offsets.keys():
                         self.offsets[path] = dict()
                     if rev:
                         self.offsets[path][rev[0]] = off
 
             self._process_rlog_revisions(path, taginfo, revisions, logmsgs)
 
     def getlog(self, fp: BinaryIO, path: str, rev: str) -> Optional[bytes]:
         off = self.offsets[path][rev]
         fp.seek(off)
         _rev, logmsg, eof = _parse_log_entry(fp)
         return logmsg
 
 
 # if your rlog doesn't use 77 '=' characters, then this must change
 LOG_END_MARKER = b"=" * 77 + b"\n"
 ENTRY_END_MARKER = b"-" * 28 + b"\n"
 
 _EOF_FILE = b"end of file entries"  # no more entries for this RCS file
 _EOF_LOG = b"end of log"  # hit the true EOF on the pipe
 _EOF_ERROR = b"error message found"  # rlog issued an error
 
 # rlog error messages look like
 #
 #   rlog: filename/goes/here,v: error message
 #   rlog: filename/goes/here,v:123: error message
 #
 # so we should be able to match them with a regex like
 #
 #   ^rlog\: (.*)(?:\:\d+)?\: (.*)$
 #
 # But for some reason the windows version of rlog omits the "rlog: " prefix
 # for the first error message when the standard error stream has been
 # redirected to a file or pipe. (the prefix is present in subsequent errors
 # and when rlog is run from the console). So the expression below is more
 # complicated
 _re_log_error = re.compile(rb"^(?:rlog\: )*(.*,v)(?:\:\d+)?\: (.*)$")
 
 # CVSNT error messages look like:
 # cvs rcsfile: `C:/path/to/file,v' does not appear to be a valid rcs file
 # cvs [rcsfile aborted]: C:/path/to/file,v: No such file or directory
 # cvs [rcsfile aborted]: cannot open C:/path/to/file,v: Permission denied
 _re_cvsnt_error = re.compile(
     rb"^(?:cvs rcsfile\: |cvs \[rcsfile aborted\]: )"
     rb"(?:\`(.*,v)' |"
     rb"cannot open (.*,v)\: |(.*,v)\: |)"
     rb"(.*)$"
 )
 
 
 def _parse_log_header(
     fp: BinaryIO,
 ) -> Tuple[
     bytes, bytes, Dict[bytes, bytes], Dict[bytes, bytes], bytes, Optional[bytes]
 ]:
     """Parse and RCS/CVS log header.
 
   fp is a file (pipe) opened for reading the log information.
 
   On entry, fp should point to the start of a log entry.
   On exit, fp will have consumed the separator line between the header and
   the first revision log.
 
   If there is no revision information (e.g. the "-h" switch was passed to
   rlog), then fp will consumed the file separator line on exit.
 
   Returns: filename, default branch, tag dictionary, lock dictionary,
   rlog error message, and eof flag
   """
 
     filename = branch = msg = b""
     taginfo: Dict[bytes, bytes] = {}  # tag name => number
     lockinfo: Dict[bytes, bytes] = {}  # revision => locker
     state = 0  # 0 = base, 1 = parsing symbols, 2 = parsing locks
     eof = None
 
     while 1:
         line = fp.readline()
         if not line:
             # the true end-of-file
             eof = _EOF_LOG
             break
 
         if state == 1:
             if line[0] == b"\t":
                 [tag, rev] = [x.strip() for x in line.split(b":")]
                 taginfo[tag] = rev
             else:
                 # oops. this line isn't tag info. stop parsing tags.
                 state = 0
 
         if state == 2:
             if line[0] == b"\t":
                 [locker, rev] = [x.strip() for x in line.split(b":")]
                 lockinfo[rev] = locker
             else:
                 # oops. this line isn't lock info. stop parsing tags.
                 state = 0
 
         if state == 0:
             if line[:9] == b"RCS file:":
                 filename = line[10:-1]
             elif line[:5] == b"head:":
                 # head = line[6:-1]
                 pass
             elif line[:7] == b"branch:":
                 branch = line[8:-1]
             elif line[:6] == b"locks:":
                 # start parsing the lock information
                 state = 2
             elif line[:14] == b"symbolic names":
                 # start parsing the tag information
                 state = 1
             elif line == ENTRY_END_MARKER:
                 # end of the headers
                 break
             elif line == LOG_END_MARKER:
                 # end of this file's log information
                 eof = _EOF_FILE
                 break
             else:
                 error = _re_cvsnt_error.match(line)
                 if error:
                     p1, p2, p3, msg = error.groups()
                     filename = p1 or p2 or p3
                     if not filename:
                         raise ValueError(
                             "Could not get filename from CVSNT error:\n%r" % line
                         )
                     eof = _EOF_ERROR
                     break
 
                 error = _re_log_error.match(line)
                 if error:
                     filename, msg = error.groups()
                     if msg[:30] == b"warning: Unknown phrases like ":
                         # don't worry about this warning. it can happen with some RCS
                         # files that have unknown fields in them e.g. "permissions 644;"
                         continue
                     eof = _EOF_ERROR
                     break
 
     return filename, branch, taginfo, lockinfo, msg, eof
 
 
 _re_log_info = re.compile(
     rb"^date:\s+([^;]+);"
     rb"\s+author:\s+([^;]+);"
     rb"\s+state:\s+([^;]+);"
     rb"(\s+lines:\s+([0-9\s+-]+);?)?"
     rb"(\s+commitid:\s+([a-zA-Z0-9]+);)?\n$"
 )
 
 # TODO: _re_rev should be updated to extract the "locked" flag
 _re_rev = re.compile(rb"^revision\s+([0-9.]+).*")
 
 
 def cvs_strptime(timestr):
     try:
         return time.strptime(timestr, "%Y/%m/%d %H:%M:%S")[:-1] + (0,)
     except ValueError:
         return time.strptime(timestr, "%Y-%m-%d %H:%M:%S %z")[:-1] + (0,)
 
 
+def _parse_commitid(commitid: bytes) -> Optional[str]:
+    s = commitid.decode("ascii").strip()
+    # Strip "commitid: " tag and the trailing semicolon.
+    s = s[len("commitid: ") : -len(";")]
+    # The commitid itself contains digit and ASCII letters only:
+    for c in s:
+        if (
+            c not in string.digits
+            and c not in string.ascii_lowercase
+            and c not in string.ascii_uppercase
+        ):
+            raise ValueError("invalid commitid")
+    return s
+
+
 def _parse_log_entry(fp) -> Tuple[Optional[revtuple], Optional[bytes], Optional[bytes]]:
     """Parse a single log entry.
 
   On entry, fp should point to the first line of the entry (the "revision"
   line).
   On exit, fp will have consumed the log separator line (dashes) or the
   end-of-file marker (equals).
 
   Returns: Revision data tuple (number string, date, author, state, branches, revnumstr,
   commitid) if any, log, and eof flag (see _EOF_*)
   """
     rev = None
     line = fp.readline()
     if not line:
         return None, None, _EOF_LOG
     if line == LOG_END_MARKER:
         # Needed because some versions of RCS precede LOG_END_MARKER
         # with ENTRY_END_MARKER
         return None, None, _EOF_FILE
     if line[:8] == b"revision":
         match = _re_rev.match(line)
         if not match:
             return None, None, _EOF_LOG
         rev = match.group(1)
 
         line = fp.readline()
         if not line:
             return None, None, _EOF_LOG
         match = _re_log_info.match(line)
 
     eof = None
     log = b""
     while 1:
         line = fp.readline()
         if not line:
             # true end-of-file
             eof = _EOF_LOG
             break
         if line[:9] == b"branches:":
             continue
         if line == ENTRY_END_MARKER:
             break
         if line == LOG_END_MARKER:
             # end of this file's log information
             eof = _EOF_FILE
             break
 
         log = log + line
 
     if not rev or not match:
         # there was a parsing error
         return None, None, eof
 
     # parse out a time tuple for the local time
     tm = cvs_strptime(match.group(1).decode("UTF-8"))
 
     # rlog seems to assume that two-digit years are 1900-based (so, "04"
     # comes out as "1904", not "2004").
     EPOCH = 1970
     if tm[0] < EPOCH:
         tm = list(tm)
         if (tm[0] - 1900) < 70:
             tm[0] = tm[0] + 100
         if tm[0] < EPOCH:
             raise ValueError("invalid year")
     date = calendar.timegm(tm)
 
+    commitid = match.group(6) or None
+    if commitid:
+        parsed_commitid = _parse_commitid(commitid)
+    else:
+        parsed_commitid = None
+
     # return a revision tuple compatible with 'rcsparse', the log message,
     # and the EOF marker
     return (
         revtuple(
             rev.decode("ascii"),  # revision number string
             date,
             match.group(2),  # author (encoding is arbitrary; don't attempt to decode)
             match.group(3).decode(
                 "ascii"
             ),  # state, usually "Exp" or "dead"; non-ASCII data here would be weird
             None,  # TODO: branches of this rev
             None,  # TODO: revnumstr of previous rev
-            None,  # TODO: commitid
+            parsed_commitid,
         ),
         log,
         eof,
     )
diff --git a/swh/loader/cvs/tests/data/dino-commitid.tgz b/swh/loader/cvs/tests/data/dino-commitid.tgz
new file mode 100644
index 0000000..a444f44
Binary files /dev/null and b/swh/loader/cvs/tests/data/dino-commitid.tgz differ
diff --git a/swh/loader/cvs/tests/test_loader.py b/swh/loader/cvs/tests/test_loader.py
index 91cbf99..41d663d 100644
--- a/swh/loader/cvs/tests/test_loader.py
+++ b/swh/loader/cvs/tests/test_loader.py
@@ -1,627 +1,711 @@
 # Copyright (C) 2016-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU Affero General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 
 from swh.loader.cvs.loader import CvsLoader
 from swh.loader.tests import (
     assert_last_visit_matches,
     check_snapshot,
     get_stats,
     prepare_repository_from_archive,
 )
 from swh.model.hashutil import hash_to_bytes
 from swh.model.model import Snapshot, SnapshotBranch, TargetType
 
 RUNBABY_SNAPSHOT = Snapshot(
     id=hash_to_bytes("1cff69ab9bd70822d5e3006092f943ccaafdcf57"),
     branches={
         b"HEAD": SnapshotBranch(
             target=hash_to_bytes("ef511d258fa55035c2bc2a5b05cad233cee1d328"),
             target_type=TargetType.REVISION,
         )
     },
 )
 
 
 def test_loader_cvs_not_found_no_mock(swh_storage, tmp_path):
     """Given an unknown repository, the loader visit ends up in status not_found"""
     unknown_repo_url = "unknown-repository"
     loader = CvsLoader(swh_storage, unknown_repo_url, cvsroot_path=tmp_path)
 
     assert loader.load() == {"status": "uneventful"}
 
     assert_last_visit_matches(
         swh_storage, unknown_repo_url, status="not_found", type="cvs",
     )
 
 
 def test_loader_cvs_visit(swh_storage, datadir, tmp_path):
     """Eventful visit should yield 1 snapshot"""
     archive_name = "runbaby"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
 
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
     )
 
     assert loader.load() == {"status": "eventful"}
 
     assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=RUNBABY_SNAPSHOT.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 5,
         "directory": 2,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 1,
         "skipped_content": 0,
         "snapshot": 1,
     }
 
     check_snapshot(RUNBABY_SNAPSHOT, loader.storage)
 
 
 def test_loader_cvs_2_visits_no_change(swh_storage, datadir, tmp_path):
     """Eventful visit followed by uneventful visit should yield the same snapshot
 
     """
     archive_name = "runbaby"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
 
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
     )
 
     assert loader.load() == {"status": "eventful"}
     visit_status1 = assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=RUNBABY_SNAPSHOT.id,
     )
 
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
     )
     assert loader.load() == {"status": "uneventful"}
     visit_status2 = assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=RUNBABY_SNAPSHOT.id,
     )
 
     assert visit_status1.date < visit_status2.date
     assert visit_status1.snapshot == visit_status2.snapshot
 
     stats = get_stats(loader.storage)
     assert stats["origin_visit"] == 1 + 1  # computed twice the same snapshot
     assert stats["snapshot"] == 1
 
 
 GREEK_SNAPSHOT = Snapshot(
     id=hash_to_bytes("5e74af67d69dfd7aea0eb118154d062f71f50120"),
     branches={
         b"HEAD": SnapshotBranch(
             target=hash_to_bytes("e18b92f14cd5b3efb3fcb4ea46cfaf97f25f301b"),
             target_type=TargetType.REVISION,
         )
     },
 )
 
 
 def test_loader_cvs_with_file_additions_and_deletions(swh_storage, datadir, tmp_path):
     """Eventful conversion of history with file additions and deletions"""
     archive_name = "greek-repository"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
     repo_url += "/greek-tree"  # CVS module name
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
     )
 
     assert loader.load() == {"status": "eventful"}
 
     assert_last_visit_matches(
         loader.storage, repo_url, status="full", type="cvs", snapshot=GREEK_SNAPSHOT.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 8,
         "directory": 20,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 7,
         "skipped_content": 0,
         "snapshot": 7,
     }
 
     check_snapshot(GREEK_SNAPSHOT, loader.storage)
 
 
 def test_loader_cvs_pserver_with_file_additions_and_deletions(
     swh_storage, datadir, tmp_path
 ):
     """Eventful CVS pserver conversion with file additions and deletions"""
     archive_name = "greek-repository"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
     repo_url += "/greek-tree"  # CVS module name
 
     # Ask our cvsclient to connect via the 'cvs server' command
     repo_url = f"fake://{repo_url[7:]}"
 
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
     )
 
     assert loader.load() == {"status": "eventful"}
 
     assert_last_visit_matches(
         loader.storage, repo_url, status="full", type="cvs", snapshot=GREEK_SNAPSHOT.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 8,
         "directory": 20,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 7,
         "skipped_content": 0,
         "snapshot": 7,
     }
 
     check_snapshot(GREEK_SNAPSHOT, loader.storage)
 
 
 GREEK_SNAPSHOT2 = Snapshot(
     id=hash_to_bytes("048885ae2145ffe81588aea95dcf75c536ecdf26"),
     branches={
         b"HEAD": SnapshotBranch(
             target=hash_to_bytes("55eb1438c03588607ce4b8db8f45e8e23075951b"),
             target_type=TargetType.REVISION,
         )
     },
 )
 
 
 def test_loader_cvs_2_visits_with_change(swh_storage, datadir, tmp_path):
     """Eventful visit followed by eventful visit should yield two snapshots"""
     archive_name = "greek-repository"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
     repo_url += "/greek-tree"  # CVS module name
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
     )
 
     assert loader.load() == {"status": "eventful"}
 
     visit_status1 = assert_last_visit_matches(
         loader.storage, repo_url, status="full", type="cvs", snapshot=GREEK_SNAPSHOT.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 8,
         "directory": 20,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 7,
         "skipped_content": 0,
         "snapshot": 7,
     }
 
     archive_name2 = "greek-repository2"
     archive_path2 = os.path.join(datadir, f"{archive_name2}.tgz")
     repo_url = prepare_repository_from_archive(archive_path2, archive_name, tmp_path)
     repo_url += "/greek-tree"  # CVS module name
 
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
     )
 
     assert loader.load() == {"status": "eventful"}
 
     visit_status2 = assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=GREEK_SNAPSHOT2.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 10,
         "directory": 23,
         "origin": 1,
         "origin_visit": 2,
         "release": 0,
         "revision": 8,
         "skipped_content": 0,
         "snapshot": 8,
     }
 
     check_snapshot(GREEK_SNAPSHOT2, loader.storage)
 
     assert visit_status1.date < visit_status2.date
     assert visit_status1.snapshot != visit_status2.snapshot
 
 
 def test_loader_cvs_visit_pserver(swh_storage, datadir, tmp_path):
     """Eventful visit to CVS pserver should yield 1 snapshot"""
     archive_name = "runbaby"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
     repo_url += "/runbaby"  # CVS module name
 
     # Ask our cvsclient to connect via the 'cvs server' command
     repo_url = "fake://" + repo_url[7:]
 
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
     )
 
     assert loader.load() == {"status": "eventful"}
 
     assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=RUNBABY_SNAPSHOT.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 5,
         "directory": 2,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 1,
         "skipped_content": 0,
         "snapshot": 1,
     }
 
     check_snapshot(RUNBABY_SNAPSHOT, loader.storage)
 
 
 GREEK_SNAPSHOT3 = Snapshot(
     id=hash_to_bytes("cd801546b0137c82f01b9b67848ba8261d64ebbb"),
     branches={
         b"HEAD": SnapshotBranch(
             target=hash_to_bytes("14980990790ce1921db953c4c9ae03dd8861e8d6"),
             target_type=TargetType.REVISION,
         )
     },
 )
 
 
 def test_loader_cvs_visit_pserver_no_eol(swh_storage, datadir, tmp_path):
     """Visit to CVS pserver with file that lacks trailing eol"""
     archive_name = "greek-repository3"
     extracted_name = "greek-repository"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
     repo_url += "/greek-tree"  # CVS module name
 
     # Ask our cvsclient to connect via the 'cvs server' command
     repo_url = "fake://" + repo_url[7:]
 
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
     )
 
     assert loader.load() == {"status": "eventful"}
 
     assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=GREEK_SNAPSHOT3.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 9,
         "directory": 23,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 8,
         "skipped_content": 0,
         "snapshot": 8,
     }
 
     check_snapshot(GREEK_SNAPSHOT3, loader.storage)
 
 
 GREEK_SNAPSHOT4 = Snapshot(
     id=hash_to_bytes("11673e2766654bd5fafb5119b418794230d48d6b"),
     branches={
         b"HEAD": SnapshotBranch(
             target=hash_to_bytes("fe4a926d49d2af76e0025a8ba0b4ed159aec6829"),
             target_type=TargetType.REVISION,
         )
     },
 )
 
 
 def test_loader_cvs_visit_expand_id_keyword(swh_storage, datadir, tmp_path):
     """Visit to CVS repository with file with an RCS Id keyword"""
     archive_name = "greek-repository4"
     extracted_name = "greek-repository"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
     repo_url += "/greek-tree"  # CVS module name
 
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
     )
 
     assert loader.load() == {"status": "eventful"}
 
     assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=GREEK_SNAPSHOT4.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 9,
         "directory": 22,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 8,
         "skipped_content": 0,
         "snapshot": 8,
     }
 
     check_snapshot(GREEK_SNAPSHOT4, loader.storage)
 
 
 def test_loader_cvs_visit_pserver_expand_id_keyword(swh_storage, datadir, tmp_path):
     """Visit to CVS pserver with file with an RCS Id keyword"""
     archive_name = "greek-repository4"
     extracted_name = "greek-repository"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
     repo_url += "/greek-tree"  # CVS module name
 
     # Ask our cvsclient to connect via the 'cvs server' command
     repo_url = f"fake://{repo_url[7:]}"
 
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
     )
 
     assert loader.load() == {"status": "eventful"}
 
     assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=GREEK_SNAPSHOT4.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 9,
         "directory": 22,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 8,
         "skipped_content": 0,
         "snapshot": 8,
     }
 
     check_snapshot(GREEK_SNAPSHOT4, loader.storage)
 
 
 GREEK_SNAPSHOT5 = Snapshot(
     id=hash_to_bytes("ee6faeaf50aa513c53c8ba29194116a5ef88add6"),
     branches={
         b"HEAD": SnapshotBranch(
             target=hash_to_bytes("4320f152cc61ed660d25fdeebc787b3099e55a96"),
             target_type=TargetType.REVISION,
         )
     },
 )
 
 
 def test_loader_cvs_with_file_deleted_and_readded(swh_storage, datadir, tmp_path):
     """Eventful conversion of history with file deletion and re-addition"""
     archive_name = "greek-repository5"
     extracted_name = "greek-repository"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
     repo_url += "/greek-tree"  # CVS module name
 
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
     )
 
     assert loader.load() == {"status": "eventful"}
 
     assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=GREEK_SNAPSHOT5.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 9,
         "directory": 22,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 8,
         "skipped_content": 0,
         "snapshot": 8,
     }
 
     check_snapshot(GREEK_SNAPSHOT5, loader.storage)
 
 
 def test_loader_cvs_pserver_with_file_deleted_and_readded(
     swh_storage, datadir, tmp_path
 ):
     """Eventful pserver conversion with file deletion and re-addition"""
     archive_name = "greek-repository5"
     extracted_name = "greek-repository"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
     repo_url += "/greek-tree"  # CVS module name
 
     # Ask our cvsclient to connect via the 'cvs server' command
     repo_url = f"fake://{repo_url[7:]}"
 
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
     )
 
     assert loader.load() == {"status": "eventful"}
 
     assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="cvs",
         snapshot=GREEK_SNAPSHOT5.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 9,
         "directory": 22,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 8,
         "skipped_content": 0,
         "snapshot": 8,
     }
 
     check_snapshot(GREEK_SNAPSHOT5, loader.storage)
 
 
 DINO_SNAPSHOT = Snapshot(
     id=hash_to_bytes("417021c16e17c5e0038cf0e73dbf48a6142c8304"),
     branches={
         b"HEAD": SnapshotBranch(
             target=hash_to_bytes("df61a776c401a178cc796545849fc87bdadb2001"),
             target_type=TargetType.REVISION,
         )
     },
 )
 
 
 def test_loader_cvs_readded_file_in_attic(swh_storage, datadir, tmp_path):
     """Conversion of history with RCS files in the Attic"""
     # This repository has some file revisions marked "dead" in the Attic only.
     # This is different to the re-added file tests above, where the RCS file
     # was moved out of the Attic again as soon as the corresponding deleted
     # file was re-added. Failure to detect the "dead" file revisions in the
     # Attic would result in errors in our converted history.
     archive_name = "dino-readded-file"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
     repo_url += "/src"  # CVS module name
 
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
     )
 
     assert loader.load() == {"status": "eventful"}
 
     assert_last_visit_matches(
         loader.storage, repo_url, status="full", type="cvs", snapshot=DINO_SNAPSHOT.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 38,
         "directory": 105,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 35,
         "skipped_content": 0,
         "snapshot": 35,
     }
 
     check_snapshot(DINO_SNAPSHOT, loader.storage)
 
 
 def test_loader_cvs_pserver_readded_file_in_attic(swh_storage, datadir, tmp_path):
     """Conversion over pserver with RCS files in the Attic"""
     # This repository has some file revisions marked "dead" in the Attic only.
     # This is different to the re-added file tests above, where the RCS file
     # was moved out of the Attic again as soon as the corresponding deleted
     # file was re-added. Failure to detect the "dead" file revisions in the
     # Attic would result in errors in our converted history.
     # This has special implications for the pserver case, because the "dead"
     # revisions will not appear in in the output of 'cvs rlog' by default.
     archive_name = "dino-readded-file"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
     repo_url += "/src"  # CVS module name
 
     # Ask our cvsclient to connect via the 'cvs server' command
     repo_url = f"fake://{repo_url[7:]}"
 
     loader = CvsLoader(
         swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
     )
 
     assert loader.load() == {"status": "eventful"}
 
     assert_last_visit_matches(
         loader.storage, repo_url, status="full", type="cvs", snapshot=DINO_SNAPSHOT.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 38,
         "directory": 105,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 35,
         "skipped_content": 0,
         "snapshot": 35,
     }
 
     check_snapshot(DINO_SNAPSHOT, loader.storage)
+
+
+DINO_SNAPSHOT2 = Snapshot(
+    id=hash_to_bytes("a9d6ce0b4f22dc4fd752ad4c25ec9ea71ed568d7"),
+    branches={
+        b"HEAD": SnapshotBranch(
+            target=hash_to_bytes("150616a2a3206f00a73f2d6a017dde22c52e4a83"),
+            target_type=TargetType.REVISION,
+        )
+    },
+)
+
+
+def test_loader_cvs_split_commits_by_commitid(swh_storage, datadir, tmp_path):
+    """Conversion of RCS history which needs to be split by commit ID"""
+    # This repository has some file revisions which use the same log message
+    # and can only be told apart by commit IDs. Without commit IDs, these commits
+    # would get merged into a single commit in our conversion result.
+    archive_name = "dino-commitid"
+    archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+    repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
+    repo_url += "/dino"  # CVS module name
+
+    loader = CvsLoader(
+        swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
+    )
+
+    assert loader.load() == {"status": "eventful"}
+
+    assert_last_visit_matches(
+        loader.storage, repo_url, status="full", type="cvs", snapshot=DINO_SNAPSHOT2.id,
+    )
+
+    check_snapshot(DINO_SNAPSHOT2, loader.storage)
+
+    stats = get_stats(loader.storage)
+    assert stats == {
+        "content": 18,
+        "directory": 36,
+        "origin": 1,
+        "origin_visit": 1,
+        "release": 0,
+        "revision": 18,
+        "skipped_content": 0,
+        "snapshot": 18,
+    }
+
+
+def test_loader_cvs_pserver_split_commits_by_commitid(swh_storage, datadir, tmp_path):
+    """Conversion via pserver which needs to be split by commit ID"""
+    # This repository has some file revisions which use the same log message
+    # and can only be told apart by commit IDs. Without commit IDs, these commits
+    # would get merged into a single commit in our conversion result.
+    archive_name = "dino-commitid"
+    archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+    repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
+    repo_url += "/dino"  # CVS module name
+
+    # Ask our cvsclient to connect via the 'cvs server' command
+    repo_url = f"fake://{repo_url[7:]}"
+
+    loader = CvsLoader(
+        swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
+    )
+
+    assert loader.load() == {"status": "eventful"}
+
+    assert_last_visit_matches(
+        loader.storage, repo_url, status="full", type="cvs", snapshot=DINO_SNAPSHOT2.id,
+    )
+
+    check_snapshot(DINO_SNAPSHOT2, loader.storage)
+
+    stats = get_stats(loader.storage)
+    assert stats == {
+        "content": 18,
+        "directory": 36,
+        "origin": 1,
+        "origin_visit": 1,
+        "release": 0,
+        "revision": 18,
+        "skipped_content": 0,
+        "snapshot": 18,
+    }