Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F11023584
D6745.id24533.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
10 KB
Subscribers
None
D6745.id24533.diff
View Options
diff --git a/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py b/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py
--- a/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py
+++ b/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py
@@ -567,6 +567,14 @@
return fl
def expand_keyword(self, filename: str, rcs: rcsparse.rcsfile, r: str) -> bytes:
+ """
+ Check out a file with keywords expanded. Expansion rules are specific
+ to each keyword, and some cases specific to undocumented behaviour of CVS.
+ Our implementation does not expand some keywords (see comments in the code).
+ For a list of keywords and their expansion rules, see:
+ https://www.gnu.org/software/trans-coord/manual/cvs/cvs.html#Keyword-list
+ (also available in 'info cvs' if cvs is installed)
+ """
rev = rcs.revs[r]
mode = self.kflag_get(rcs.expand)
@@ -574,24 +582,26 @@
return rcs.checkout(rev[0])
ret = []
- for line in rcs.checkout(rev[0]).split(b'\n'):
+ for line in rcs.checkout(rev[0]).splitlines(keepends=True):
logbuf = None
m = self.re_kw.match(line)
if m is None:
# No RCS Keywords, use it as it is
- ret += [line]
+ ret.append(line)
continue
line0 = b''
while m is not None:
+ logbuf = None
try:
dsign = m.end(1) + line[m.end(1):].index(b'$')
except ValueError:
+ # No RCS Keywords, use it as it is
+ ret.append(line)
break
prefix = line[:m.start(1) - 1]
next_match_segment = copy.deepcopy(line[dsign:])
line = line[dsign + 1:]
- line0 += prefix
expbuf = ''
if (mode & self.RCS_KWEXP_NAME) != 0:
expbuf += '$'
@@ -622,6 +632,28 @@
expbuf += rev[3]
expbuf += " "
if (expkw & self.RCS_KW_LOG) != 0:
+ # Unlike other keywords, the Log keyword expands over multiple lines.
+ # The terminating '$' of the Log keyword appears on the line which
+ # contains the log keyword itself. Then follow all log message lines,
+ # and those lines are followed by content which follows the Log keyword.
+ # For example, the line:
+ #
+ # $Log$ content which follows
+ #
+ # must be expanded like this:
+ #
+ # $Log: delta,v $
+ # Revision 1.2 2021/11/29 14:24:18 stsp
+ # log message line 1
+ # log message line 2
+ # content which follows
+ #
+ # If we did not trim the Log keyword's trailing "$" here then
+ # the last line would read instead:
+ #
+ # $ content which follows
+ assert(next_match_segment[0] == ord('$'))
+ next_match_segment = next_match_segment[1:]
p = prefix
expbuf += filename \
if (expkw & self.RCS_KW_FULLPATH) != 0 \
@@ -632,37 +664,45 @@
rev[0], time.strftime(
"%Y/%m/%d %H:%M:%S", time.gmtime(rev[1])),
rev[2])).encode('ascii')
- for lline in rcs.getlog(rev[0]).rstrip().split(b'\n'):
- if len(lline) == 0:
- logbuf += p.rstrip() + b'\n'
- else:
- logbuf += p + lline.lstrip() + b'\n'
- if len(line) == 0:
- logbuf += p.rstrip()
- else:
- logbuf += p + line.lstrip()
- line = b''
+ for lline in rcs.getlog(rev[0]).splitlines(keepends=True):
+ logbuf += p + lline
if (expkw & self.RCS_KW_SOURCE) != 0:
expbuf += filename
expbuf += " "
if (expkw & (self.RCS_KW_NAME | self.RCS_KW_LOCKER)) != 0:
+ # We do not expand Name and Locker keywords.
+ # The Name keyword is only expanded when a file is checked
+ # out with an explicit tag name .perhaps this will be needed
+ # if the loader learns about CVS tags some day.
+ # The Locker keyword only expands if the file is currently
+ # locked via 'cvs admin -l', which is not part of the
+ # information we want to preserve about source code.
expbuf += " "
if (mode & self.RCS_KWEXP_NAME) != 0:
expbuf += '$'
- line0 += expbuf[:255].encode('ascii')
+ if logbuf is not None:
+ ret.append(prefix + expbuf.encode('ascii') + b'\n' + logbuf)
+ else:
+ line0 += prefix + expbuf[:255].encode('ascii')
m = self.re_kw.match(next_match_segment)
if m:
line = next_match_segment
- if (mode & self.RCS_KWEXP_NAME) != 0 and line0[-1] == ord('$'):
+ if (mode & self.RCS_KWEXP_NAME) != 0 and (expkw & self.RCS_KW_LOG) == 0 and line0[-1] == ord('$'):
# There is another keyword on this line that needs expansion.
# Avoid a double "$$" in the expanded string. This $ terminates
# the previous keyword and marks the beginning of the next one.
line0 = line0[:-1]
-
- ret += [line0 + line]
- if logbuf is not None:
- ret += [logbuf]
- return b'\n'.join(ret)
+ elif logbuf is not None:
+ # Trim whitespace from the beginning of text following the Log keyword.
+ # But leave a lone trailing empty line as-is. Which seems inconsistent,
+ # but testing suggests that this matches CVS's behaviour.
+ if len(line) == 1 and line[0] == ord('\n'):
+ ret.append(line0 + prefix + line)
+ else:
+ ret.append(line0 + prefix + line.lstrip())
+ else:
+ ret.append(line0 + line)
+ return b''.join(ret)
# ----------------------------------------------------------------------
diff --git a/swh/loader/cvs/tests/data/greek-repository8.tgz b/swh/loader/cvs/tests/data/greek-repository8.tgz
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/cvs/tests/test_loader.py b/swh/loader/cvs/tests/test_loader.py
--- a/swh/loader/cvs/tests/test_loader.py
+++ b/swh/loader/cvs/tests/test_loader.py
@@ -859,3 +859,91 @@
alpha = paths[b"greek-tree/alpha"]
alpha2 = paths2[b"greek-tree/alpha"]
assert alpha["sha1"] == alpha2["sha1"]
+
+
+GREEK_SNAPSHOT8 = Snapshot(
+ id=hash_to_bytes("b98a2744199723be827d48bad2f65ee1c2df7513"),
+ branches={
+ b"HEAD": SnapshotBranch(
+ target=hash_to_bytes("ee8be88b458b7fbca3037ab05e56552578e66faa"),
+ target_type=TargetType.REVISION,
+ )
+ },
+)
+
+
+def test_loader_cvs_expand_log_keyword(swh_storage, datadir, tmp_path):
+ """Conversion of RCS history with Log keyword in files"""
+ archive_name = "greek-repository8"
+ extracted_name = "greek-repository"
+ archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+ repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
+ repo_url += "/greek-tree" # CVS module name
+
+ loader = CvsLoader(
+ swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
+ )
+
+ assert loader.load() == {"status": "eventful"}
+
+ assert_last_visit_matches(
+ loader.storage,
+ repo_url,
+ status="full",
+ type="cvs",
+ snapshot=GREEK_SNAPSHOT8.id,
+ )
+
+ check_snapshot(GREEK_SNAPSHOT8, loader.storage)
+
+ stats = get_stats(loader.storage)
+ assert stats == {
+ "content": 14,
+ "directory": 31,
+ "origin": 1,
+ "origin_visit": 1,
+ "release": 0,
+ "revision": 11,
+ "skipped_content": 0,
+ "snapshot": 11,
+ }
+
+
+def test_loader_cvs_pserver_expand_log_keyword(swh_storage, datadir, tmp_path):
+ """Conversion of RCS history with Log keyword in files"""
+ archive_name = "greek-repository8"
+ extracted_name = "greek-repository"
+ archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+ repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
+ repo_url += "/greek-tree" # CVS module name
+
+ # Ask our cvsclient to connect via the 'cvs server' command
+ repo_url = f"fake://{repo_url[7:]}"
+
+ loader = CvsLoader(
+ swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
+ )
+
+ assert loader.load() == {"status": "eventful"}
+
+ assert_last_visit_matches(
+ loader.storage,
+ repo_url,
+ status="full",
+ type="cvs",
+ snapshot=GREEK_SNAPSHOT8.id,
+ )
+
+ check_snapshot(GREEK_SNAPSHOT8, loader.storage)
+
+ stats = get_stats(loader.storage)
+ assert stats == {
+ "content": 14,
+ "directory": 31,
+ "origin": 1,
+ "origin_visit": 1,
+ "release": 0,
+ "revision": 11,
+ "skipped_content": 0,
+ "snapshot": 11,
+ }
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Sep 17, 4:50 PM (18 h, 22 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217191
Attached To
D6745: fix expansion of the Log keyword with rsync origins
Event Timeline
Log In to Comment