Page MenuHomeSoftware Heritage

D6745.id24533.diff
No OneTemporary

D6745.id24533.diff

diff --git a/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py b/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py
--- a/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py
+++ b/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py
@@ -567,6 +567,14 @@
return fl
def expand_keyword(self, filename: str, rcs: rcsparse.rcsfile, r: str) -> bytes:
+ """
+ Check out a file with keywords expanded. Expansion rules are specific
+ to each keyword, and some cases specific to undocumented behaviour of CVS.
+ Our implementation does not expand some keywords (see comments in the code).
+ For a list of keywords and their expansion rules, see:
+ https://www.gnu.org/software/trans-coord/manual/cvs/cvs.html#Keyword-list
+ (also available in 'info cvs' if cvs is installed)
+ """
rev = rcs.revs[r]
mode = self.kflag_get(rcs.expand)
@@ -574,24 +582,26 @@
return rcs.checkout(rev[0])
ret = []
- for line in rcs.checkout(rev[0]).split(b'\n'):
+ for line in rcs.checkout(rev[0]).splitlines(keepends=True):
logbuf = None
m = self.re_kw.match(line)
if m is None:
# No RCS Keywords, use it as it is
- ret += [line]
+ ret.append(line)
continue
line0 = b''
while m is not None:
+ logbuf = None
try:
dsign = m.end(1) + line[m.end(1):].index(b'$')
except ValueError:
+ # No RCS Keywords, use it as it is
+ ret.append(line)
break
prefix = line[:m.start(1) - 1]
next_match_segment = copy.deepcopy(line[dsign:])
line = line[dsign + 1:]
- line0 += prefix
expbuf = ''
if (mode & self.RCS_KWEXP_NAME) != 0:
expbuf += '$'
@@ -622,6 +632,28 @@
expbuf += rev[3]
expbuf += " "
if (expkw & self.RCS_KW_LOG) != 0:
+ # Unlike other keywords, the Log keyword expands over multiple lines.
+ # The terminating '$' of the Log keyword appears on the line which
+ # contains the log keyword itself. Then follow all log message lines,
+ # and those lines are followed by content which follows the Log keyword.
+ # For example, the line:
+ #
+ # $Log$ content which follows
+ #
+ # must be expanded like this:
+ #
+ # $Log: delta,v $
+ # Revision 1.2 2021/11/29 14:24:18 stsp
+ # log message line 1
+ # log message line 2
+ # content which follows
+ #
+ # If we did not trim the Log keyword's trailing "$" here then
+ # the last line would read instead:
+ #
+ # $ content which follows
+ assert(next_match_segment[0] == ord('$'))
+ next_match_segment = next_match_segment[1:]
p = prefix
expbuf += filename \
if (expkw & self.RCS_KW_FULLPATH) != 0 \
@@ -632,37 +664,45 @@
rev[0], time.strftime(
"%Y/%m/%d %H:%M:%S", time.gmtime(rev[1])),
rev[2])).encode('ascii')
- for lline in rcs.getlog(rev[0]).rstrip().split(b'\n'):
- if len(lline) == 0:
- logbuf += p.rstrip() + b'\n'
- else:
- logbuf += p + lline.lstrip() + b'\n'
- if len(line) == 0:
- logbuf += p.rstrip()
- else:
- logbuf += p + line.lstrip()
- line = b''
+ for lline in rcs.getlog(rev[0]).splitlines(keepends=True):
+ logbuf += p + lline
if (expkw & self.RCS_KW_SOURCE) != 0:
expbuf += filename
expbuf += " "
if (expkw & (self.RCS_KW_NAME | self.RCS_KW_LOCKER)) != 0:
+ # We do not expand Name and Locker keywords.
+ # The Name keyword is only expanded when a file is checked
+ # out with an explicit tag name .perhaps this will be needed
+ # if the loader learns about CVS tags some day.
+ # The Locker keyword only expands if the file is currently
+ # locked via 'cvs admin -l', which is not part of the
+ # information we want to preserve about source code.
expbuf += " "
if (mode & self.RCS_KWEXP_NAME) != 0:
expbuf += '$'
- line0 += expbuf[:255].encode('ascii')
+ if logbuf is not None:
+ ret.append(prefix + expbuf.encode('ascii') + b'\n' + logbuf)
+ else:
+ line0 += prefix + expbuf[:255].encode('ascii')
m = self.re_kw.match(next_match_segment)
if m:
line = next_match_segment
- if (mode & self.RCS_KWEXP_NAME) != 0 and line0[-1] == ord('$'):
+ if (mode & self.RCS_KWEXP_NAME) != 0 and (expkw & self.RCS_KW_LOG) == 0 and line0[-1] == ord('$'):
# There is another keyword on this line that needs expansion.
# Avoid a double "$$" in the expanded string. This $ terminates
# the previous keyword and marks the beginning of the next one.
line0 = line0[:-1]
-
- ret += [line0 + line]
- if logbuf is not None:
- ret += [logbuf]
- return b'\n'.join(ret)
+ elif logbuf is not None:
+ # Trim whitespace from the beginning of text following the Log keyword.
+ # But leave a lone trailing empty line as-is. Which seems inconsistent,
+ # but testing suggests that this matches CVS's behaviour.
+ if len(line) == 1 and line[0] == ord('\n'):
+ ret.append(line0 + prefix + line)
+ else:
+ ret.append(line0 + prefix + line.lstrip())
+ else:
+ ret.append(line0 + line)
+ return b''.join(ret)
# ----------------------------------------------------------------------
diff --git a/swh/loader/cvs/tests/data/greek-repository8.tgz b/swh/loader/cvs/tests/data/greek-repository8.tgz
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/cvs/tests/test_loader.py b/swh/loader/cvs/tests/test_loader.py
--- a/swh/loader/cvs/tests/test_loader.py
+++ b/swh/loader/cvs/tests/test_loader.py
@@ -859,3 +859,91 @@
alpha = paths[b"greek-tree/alpha"]
alpha2 = paths2[b"greek-tree/alpha"]
assert alpha["sha1"] == alpha2["sha1"]
+
+
+GREEK_SNAPSHOT8 = Snapshot(
+ id=hash_to_bytes("b98a2744199723be827d48bad2f65ee1c2df7513"),
+ branches={
+ b"HEAD": SnapshotBranch(
+ target=hash_to_bytes("ee8be88b458b7fbca3037ab05e56552578e66faa"),
+ target_type=TargetType.REVISION,
+ )
+ },
+)
+
+
+def test_loader_cvs_expand_log_keyword(swh_storage, datadir, tmp_path):
+ """Conversion of RCS history with Log keyword in files"""
+ archive_name = "greek-repository8"
+ extracted_name = "greek-repository"
+ archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+ repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
+ repo_url += "/greek-tree" # CVS module name
+
+ loader = CvsLoader(
+ swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
+ )
+
+ assert loader.load() == {"status": "eventful"}
+
+ assert_last_visit_matches(
+ loader.storage,
+ repo_url,
+ status="full",
+ type="cvs",
+ snapshot=GREEK_SNAPSHOT8.id,
+ )
+
+ check_snapshot(GREEK_SNAPSHOT8, loader.storage)
+
+ stats = get_stats(loader.storage)
+ assert stats == {
+ "content": 14,
+ "directory": 31,
+ "origin": 1,
+ "origin_visit": 1,
+ "release": 0,
+ "revision": 11,
+ "skipped_content": 0,
+ "snapshot": 11,
+ }
+
+
+def test_loader_cvs_pserver_expand_log_keyword(swh_storage, datadir, tmp_path):
+ """Conversion of RCS history with Log keyword in files"""
+ archive_name = "greek-repository8"
+ extracted_name = "greek-repository"
+ archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+ repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
+ repo_url += "/greek-tree" # CVS module name
+
+ # Ask our cvsclient to connect via the 'cvs server' command
+ repo_url = f"fake://{repo_url[7:]}"
+
+ loader = CvsLoader(
+ swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
+ )
+
+ assert loader.load() == {"status": "eventful"}
+
+ assert_last_visit_matches(
+ loader.storage,
+ repo_url,
+ status="full",
+ type="cvs",
+ snapshot=GREEK_SNAPSHOT8.id,
+ )
+
+ check_snapshot(GREEK_SNAPSHOT8, loader.storage)
+
+ stats = get_stats(loader.storage)
+ assert stats == {
+ "content": 14,
+ "directory": 31,
+ "origin": 1,
+ "origin_visit": 1,
+ "release": 0,
+ "revision": 11,
+ "skipped_content": 0,
+ "snapshot": 11,
+ }

File Metadata

Mime Type
text/plain
Expires
Wed, Sep 17, 4:50 PM (18 h, 22 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217191

Event Timeline