Page MenuHomeSoftware Heritage

D6813.diff
No OneTemporary

D6813.diff

diff --git a/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py b/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py
--- a/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py
+++ b/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py
@@ -652,34 +652,36 @@
# and those lines are followed by content which follows the Log keyword.
# For example, the line:
#
- # $Log$ content which follows
+ # foo $Log$content which follows
#
- # must be expanded like this:
+ # will be expanded like this by CVS:
#
- # $Log: delta,v $
- # Revision 1.2 2021/11/29 14:24:18 stsp
- # log message line 1
- # log message line 2
- # content which follows
+ # foo $Log: delta,v $
+ # foo Revision 1.2 2021/11/29 14:24:18 stsp
+ # foo log message line 1
+ # foo log message line 2
+ # foocontent which follows
+ #
+ # (Side note: Trailing whitespace is stripped from "foo " when
+ # the content which follows gets written to the output file.)
#
# If we did not trim the Log keyword's trailing "$" here then
# the last line would read instead:
#
- # $ content which follows
+ # foo$content which follows
assert(next_match_segment[0] == ord('$'))
next_match_segment = next_match_segment[1:]
- p = prefix
expbuf += filename \
if (expkw & self.RCS_KW_FULLPATH) != 0 \
else os.path.basename(filename)
expbuf += " "
- logbuf = p + (
+ logbuf = prefix + (
'Revision %s %s %s\n' % (
rev[0], time.strftime(
"%Y/%m/%d %H:%M:%S", time.gmtime(rev[1])),
rev[2])).encode('ascii')
for lline in rcs.getlog(rev[0]).splitlines(keepends=True):
- logbuf += p + lline
+ logbuf += prefix + lline
if (expkw & self.RCS_KW_SOURCE) != 0:
expbuf += filename
expbuf += " "
@@ -707,13 +709,10 @@
# the previous keyword and marks the beginning of the next one.
line0 = line0[:-1]
elif logbuf is not None:
- # Trim whitespace from the beginning of text following the Log keyword.
- # But leave a lone trailing empty line as-is. Which seems inconsistent,
- # but testing suggests that this matches CVS's behaviour.
- if len(line) == 1 and line[0] == ord('\n'):
- ret.append(line0 + prefix + line)
- else:
- ret.append(line0 + prefix + line.lstrip())
+ # Trim whitespace from tail of prefix if appending a suffix which
+ # followed the Log keyword on the same line.
+ # Testing suggests that this matches CVS's behaviour.
+ ret.append(line0 + prefix.rstrip() + line)
else:
ret.append(line0 + line)
return b''.join(ret)
diff --git a/swh/loader/cvs/tests/data/rcsbase-log-kw-test-repo.tgz b/swh/loader/cvs/tests/data/rcsbase-log-kw-test-repo.tgz
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/cvs/tests/test_loader.py b/swh/loader/cvs/tests/test_loader.py
--- a/swh/loader/cvs/tests/test_loader.py
+++ b/swh/loader/cvs/tests/test_loader.py
@@ -995,3 +995,89 @@
}
check_snapshot(GREEK_SNAPSHOT9, loader.storage)
+
+
+RCSBASE_SNAPSHOT = Snapshot(
+ id=hash_to_bytes("2c75041ba8868df04349c1c8f4c29f992967b8aa"),
+ branches={
+ b"HEAD": SnapshotBranch(
+ target=hash_to_bytes("46f076387ff170dc3d4da5e43d953c1fc744c821"),
+ target_type=TargetType.REVISION,
+ )
+ },
+)
+
+
+def test_loader_cvs_expand_log_keyword2(swh_storage, datadir, tmp_path):
+ """Another conversion of RCS history with Log keyword in files"""
+ archive_name = "rcsbase-log-kw-test-repo"
+ archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+ repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
+ repo_url += "/src" # CVS module name
+
+ loader = CvsLoader(
+ swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
+ )
+
+ assert loader.load() == {"status": "eventful"}
+
+ assert_last_visit_matches(
+ loader.storage,
+ repo_url,
+ status="full",
+ type="cvs",
+ snapshot=RCSBASE_SNAPSHOT.id,
+ )
+
+ check_snapshot(RCSBASE_SNAPSHOT, loader.storage)
+
+ stats = get_stats(loader.storage)
+ assert stats == {
+ "content": 2,
+ "directory": 3,
+ "origin": 1,
+ "origin_visit": 1,
+ "release": 0,
+ "revision": 3,
+ "skipped_content": 0,
+ "snapshot": 1,
+ }
+
+
+def test_loader_cvs_pserver_expand_log_keyword2(swh_storage, datadir, tmp_path):
+ """Another conversion of RCS history with Log keyword in files"""
+ archive_name = "rcsbase-log-kw-test-repo"
+ archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+ repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
+ repo_url += "/src" # CVS module name
+
+ # Ask our cvsclient to connect via the 'cvs server' command
+ repo_url = f"fake://{repo_url[7:]}"
+
+ loader = CvsLoader(
+ swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
+ )
+
+ assert loader.load() == {"status": "eventful"}
+
+ assert_last_visit_matches(
+ loader.storage,
+ repo_url,
+ status="full",
+ type="cvs",
+ snapshot=RCSBASE_SNAPSHOT.id,
+ )
+
+ check_snapshot(RCSBASE_SNAPSHOT, loader.storage)
+
+ stats = get_stats(loader.storage)
+ assert stats == {
+ "content": 2,
+ "directory": 3,
+ "origin": 1,
+ "origin_visit": 1,
+ "release": 0,
+ "revision": 3,
+ "skipped_content": 0,
+ "snapshot": 1,
+ }

File Metadata

Mime Type
text/plain
Expires
Wed, Dec 18, 4:06 PM (5 h, 3 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220618

Event Timeline