Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F8395296
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
27 KB
Subscribers
None
View Options
diff --git a/swh/loader/cvs/tests/data/greek-repository7.tgz b/swh/loader/cvs/tests/data/greek-repository7.tgz
new file mode 100644
index 0000000..8b42fdd
Binary files /dev/null and b/swh/loader/cvs/tests/data/greek-repository7.tgz differ
diff --git a/swh/loader/cvs/tests/test_loader.py b/swh/loader/cvs/tests/test_loader.py
index e35d17f..369a3dd 100644
--- a/swh/loader/cvs/tests/test_loader.py
+++ b/swh/loader/cvs/tests/test_loader.py
@@ -1,799 +1,861 @@
# Copyright (C) 2016-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
+from typing import Any, Dict
from swh.loader.cvs.loader import CvsLoader
from swh.loader.tests import (
assert_last_visit_matches,
check_snapshot,
get_stats,
prepare_repository_from_archive,
)
from swh.model.hashutil import hash_to_bytes
from swh.model.model import Snapshot, SnapshotBranch, TargetType
RUNBABY_SNAPSHOT = Snapshot(
id=hash_to_bytes("1cff69ab9bd70822d5e3006092f943ccaafdcf57"),
branches={
b"HEAD": SnapshotBranch(
target=hash_to_bytes("ef511d258fa55035c2bc2a5b05cad233cee1d328"),
target_type=TargetType.REVISION,
)
},
)
def test_loader_cvs_not_found_no_mock(swh_storage, tmp_path):
"""Given an unknown repository, the loader visit ends up in status not_found"""
unknown_repo_url = "unknown-repository"
loader = CvsLoader(swh_storage, unknown_repo_url, cvsroot_path=tmp_path)
assert loader.load() == {"status": "uneventful"}
assert_last_visit_matches(
swh_storage, unknown_repo_url, status="not_found", type="cvs",
)
def test_loader_cvs_visit(swh_storage, datadir, tmp_path):
"""Eventful visit should yield 1 snapshot"""
archive_name = "runbaby"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
loader = CvsLoader(
swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage,
repo_url,
status="full",
type="cvs",
snapshot=RUNBABY_SNAPSHOT.id,
)
stats = get_stats(loader.storage)
assert stats == {
"content": 5,
"directory": 2,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 1,
"skipped_content": 0,
"snapshot": 1,
}
check_snapshot(RUNBABY_SNAPSHOT, loader.storage)
def test_loader_cvs_2_visits_no_change(swh_storage, datadir, tmp_path):
"""Eventful visit followed by uneventful visit should yield the same snapshot
"""
archive_name = "runbaby"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
loader = CvsLoader(
swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
)
assert loader.load() == {"status": "eventful"}
visit_status1 = assert_last_visit_matches(
loader.storage,
repo_url,
status="full",
type="cvs",
snapshot=RUNBABY_SNAPSHOT.id,
)
loader = CvsLoader(
swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
)
assert loader.load() == {"status": "uneventful"}
visit_status2 = assert_last_visit_matches(
loader.storage,
repo_url,
status="full",
type="cvs",
snapshot=RUNBABY_SNAPSHOT.id,
)
assert visit_status1.date < visit_status2.date
assert visit_status1.snapshot == visit_status2.snapshot
stats = get_stats(loader.storage)
assert stats["origin_visit"] == 1 + 1 # computed twice the same snapshot
assert stats["snapshot"] == 1
GREEK_SNAPSHOT = Snapshot(
id=hash_to_bytes("5e74af67d69dfd7aea0eb118154d062f71f50120"),
branches={
b"HEAD": SnapshotBranch(
target=hash_to_bytes("e18b92f14cd5b3efb3fcb4ea46cfaf97f25f301b"),
target_type=TargetType.REVISION,
)
},
)
def test_loader_cvs_with_file_additions_and_deletions(swh_storage, datadir, tmp_path):
"""Eventful conversion of history with file additions and deletions"""
archive_name = "greek-repository"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
repo_url += "/greek-tree" # CVS module name
loader = CvsLoader(
swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage, repo_url, status="full", type="cvs", snapshot=GREEK_SNAPSHOT.id,
)
stats = get_stats(loader.storage)
assert stats == {
"content": 8,
"directory": 20,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 7,
"skipped_content": 0,
"snapshot": 7,
}
check_snapshot(GREEK_SNAPSHOT, loader.storage)
def test_loader_cvs_pserver_with_file_additions_and_deletions(
swh_storage, datadir, tmp_path
):
"""Eventful CVS pserver conversion with file additions and deletions"""
archive_name = "greek-repository"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
repo_url += "/greek-tree" # CVS module name
# Ask our cvsclient to connect via the 'cvs server' command
repo_url = f"fake://{repo_url[7:]}"
loader = CvsLoader(
swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage, repo_url, status="full", type="cvs", snapshot=GREEK_SNAPSHOT.id,
)
stats = get_stats(loader.storage)
assert stats == {
"content": 8,
"directory": 20,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 7,
"skipped_content": 0,
"snapshot": 7,
}
check_snapshot(GREEK_SNAPSHOT, loader.storage)
GREEK_SNAPSHOT2 = Snapshot(
id=hash_to_bytes("048885ae2145ffe81588aea95dcf75c536ecdf26"),
branches={
b"HEAD": SnapshotBranch(
target=hash_to_bytes("55eb1438c03588607ce4b8db8f45e8e23075951b"),
target_type=TargetType.REVISION,
)
},
)
def test_loader_cvs_2_visits_with_change(swh_storage, datadir, tmp_path):
"""Eventful visit followed by eventful visit should yield two snapshots"""
archive_name = "greek-repository"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
repo_url += "/greek-tree" # CVS module name
loader = CvsLoader(
swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
)
assert loader.load() == {"status": "eventful"}
visit_status1 = assert_last_visit_matches(
loader.storage, repo_url, status="full", type="cvs", snapshot=GREEK_SNAPSHOT.id,
)
stats = get_stats(loader.storage)
assert stats == {
"content": 8,
"directory": 20,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 7,
"skipped_content": 0,
"snapshot": 7,
}
archive_name2 = "greek-repository2"
archive_path2 = os.path.join(datadir, f"{archive_name2}.tgz")
repo_url = prepare_repository_from_archive(archive_path2, archive_name, tmp_path)
repo_url += "/greek-tree" # CVS module name
loader = CvsLoader(
swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
)
assert loader.load() == {"status": "eventful"}
visit_status2 = assert_last_visit_matches(
loader.storage,
repo_url,
status="full",
type="cvs",
snapshot=GREEK_SNAPSHOT2.id,
)
stats = get_stats(loader.storage)
assert stats == {
"content": 10,
"directory": 23,
"origin": 1,
"origin_visit": 2,
"release": 0,
"revision": 8,
"skipped_content": 0,
"snapshot": 8,
}
check_snapshot(GREEK_SNAPSHOT2, loader.storage)
assert visit_status1.date < visit_status2.date
assert visit_status1.snapshot != visit_status2.snapshot
def test_loader_cvs_visit_pserver(swh_storage, datadir, tmp_path):
"""Eventful visit to CVS pserver should yield 1 snapshot"""
archive_name = "runbaby"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
repo_url += "/runbaby" # CVS module name
# Ask our cvsclient to connect via the 'cvs server' command
repo_url = "fake://" + repo_url[7:]
loader = CvsLoader(
swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage,
repo_url,
status="full",
type="cvs",
snapshot=RUNBABY_SNAPSHOT.id,
)
stats = get_stats(loader.storage)
assert stats == {
"content": 5,
"directory": 2,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 1,
"skipped_content": 0,
"snapshot": 1,
}
check_snapshot(RUNBABY_SNAPSHOT, loader.storage)
GREEK_SNAPSHOT3 = Snapshot(
id=hash_to_bytes("cd801546b0137c82f01b9b67848ba8261d64ebbb"),
branches={
b"HEAD": SnapshotBranch(
target=hash_to_bytes("14980990790ce1921db953c4c9ae03dd8861e8d6"),
target_type=TargetType.REVISION,
)
},
)
def test_loader_cvs_visit_pserver_no_eol(swh_storage, datadir, tmp_path):
"""Visit to CVS pserver with file that lacks trailing eol"""
archive_name = "greek-repository3"
extracted_name = "greek-repository"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
repo_url += "/greek-tree" # CVS module name
# Ask our cvsclient to connect via the 'cvs server' command
repo_url = "fake://" + repo_url[7:]
loader = CvsLoader(
swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage,
repo_url,
status="full",
type="cvs",
snapshot=GREEK_SNAPSHOT3.id,
)
stats = get_stats(loader.storage)
assert stats == {
"content": 9,
"directory": 23,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 8,
"skipped_content": 0,
"snapshot": 8,
}
check_snapshot(GREEK_SNAPSHOT3, loader.storage)
GREEK_SNAPSHOT4 = Snapshot(
id=hash_to_bytes("11673e2766654bd5fafb5119b418794230d48d6b"),
branches={
b"HEAD": SnapshotBranch(
target=hash_to_bytes("fe4a926d49d2af76e0025a8ba0b4ed159aec6829"),
target_type=TargetType.REVISION,
)
},
)
def test_loader_cvs_visit_expand_id_keyword(swh_storage, datadir, tmp_path):
"""Visit to CVS repository with file with an RCS Id keyword"""
archive_name = "greek-repository4"
extracted_name = "greek-repository"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
repo_url += "/greek-tree" # CVS module name
loader = CvsLoader(
swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage,
repo_url,
status="full",
type="cvs",
snapshot=GREEK_SNAPSHOT4.id,
)
stats = get_stats(loader.storage)
assert stats == {
"content": 9,
"directory": 22,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 8,
"skipped_content": 0,
"snapshot": 8,
}
check_snapshot(GREEK_SNAPSHOT4, loader.storage)
def test_loader_cvs_visit_pserver_expand_id_keyword(swh_storage, datadir, tmp_path):
"""Visit to CVS pserver with file with an RCS Id keyword"""
archive_name = "greek-repository4"
extracted_name = "greek-repository"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
repo_url += "/greek-tree" # CVS module name
# Ask our cvsclient to connect via the 'cvs server' command
repo_url = f"fake://{repo_url[7:]}"
loader = CvsLoader(
swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage,
repo_url,
status="full",
type="cvs",
snapshot=GREEK_SNAPSHOT4.id,
)
stats = get_stats(loader.storage)
assert stats == {
"content": 9,
"directory": 22,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 8,
"skipped_content": 0,
"snapshot": 8,
}
check_snapshot(GREEK_SNAPSHOT4, loader.storage)
GREEK_SNAPSHOT5 = Snapshot(
id=hash_to_bytes("ee6faeaf50aa513c53c8ba29194116a5ef88add6"),
branches={
b"HEAD": SnapshotBranch(
target=hash_to_bytes("4320f152cc61ed660d25fdeebc787b3099e55a96"),
target_type=TargetType.REVISION,
)
},
)
def test_loader_cvs_with_file_deleted_and_readded(swh_storage, datadir, tmp_path):
"""Eventful conversion of history with file deletion and re-addition"""
archive_name = "greek-repository5"
extracted_name = "greek-repository"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
repo_url += "/greek-tree" # CVS module name
loader = CvsLoader(
swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage,
repo_url,
status="full",
type="cvs",
snapshot=GREEK_SNAPSHOT5.id,
)
stats = get_stats(loader.storage)
assert stats == {
"content": 9,
"directory": 22,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 8,
"skipped_content": 0,
"snapshot": 8,
}
check_snapshot(GREEK_SNAPSHOT5, loader.storage)
def test_loader_cvs_pserver_with_file_deleted_and_readded(
swh_storage, datadir, tmp_path
):
"""Eventful pserver conversion with file deletion and re-addition"""
archive_name = "greek-repository5"
extracted_name = "greek-repository"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
repo_url += "/greek-tree" # CVS module name
# Ask our cvsclient to connect via the 'cvs server' command
repo_url = f"fake://{repo_url[7:]}"
loader = CvsLoader(
swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage,
repo_url,
status="full",
type="cvs",
snapshot=GREEK_SNAPSHOT5.id,
)
stats = get_stats(loader.storage)
assert stats == {
"content": 9,
"directory": 22,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 8,
"skipped_content": 0,
"snapshot": 8,
}
check_snapshot(GREEK_SNAPSHOT5, loader.storage)
DINO_SNAPSHOT = Snapshot(
id=hash_to_bytes("417021c16e17c5e0038cf0e73dbf48a6142c8304"),
branches={
b"HEAD": SnapshotBranch(
target=hash_to_bytes("df61a776c401a178cc796545849fc87bdadb2001"),
target_type=TargetType.REVISION,
)
},
)
def test_loader_cvs_readded_file_in_attic(swh_storage, datadir, tmp_path):
"""Conversion of history with RCS files in the Attic"""
# This repository has some file revisions marked "dead" in the Attic only.
# This is different to the re-added file tests above, where the RCS file
# was moved out of the Attic again as soon as the corresponding deleted
# file was re-added. Failure to detect the "dead" file revisions in the
# Attic would result in errors in our converted history.
archive_name = "dino-readded-file"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
repo_url += "/src" # CVS module name
loader = CvsLoader(
swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage, repo_url, status="full", type="cvs", snapshot=DINO_SNAPSHOT.id,
)
stats = get_stats(loader.storage)
assert stats == {
"content": 38,
"directory": 105,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 35,
"skipped_content": 0,
"snapshot": 35,
}
check_snapshot(DINO_SNAPSHOT, loader.storage)
def test_loader_cvs_pserver_readded_file_in_attic(swh_storage, datadir, tmp_path):
"""Conversion over pserver with RCS files in the Attic"""
# This repository has some file revisions marked "dead" in the Attic only.
# This is different to the re-added file tests above, where the RCS file
# was moved out of the Attic again as soon as the corresponding deleted
# file was re-added. Failure to detect the "dead" file revisions in the
# Attic would result in errors in our converted history.
# This has special implications for the pserver case, because the "dead"
# revisions will not appear in in the output of 'cvs rlog' by default.
archive_name = "dino-readded-file"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
repo_url += "/src" # CVS module name
# Ask our cvsclient to connect via the 'cvs server' command
repo_url = f"fake://{repo_url[7:]}"
loader = CvsLoader(
swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage, repo_url, status="full", type="cvs", snapshot=DINO_SNAPSHOT.id,
)
stats = get_stats(loader.storage)
assert stats == {
"content": 38,
"directory": 105,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 35,
"skipped_content": 0,
"snapshot": 35,
}
check_snapshot(DINO_SNAPSHOT, loader.storage)
DINO_SNAPSHOT2 = Snapshot(
id=hash_to_bytes("a9d6ce0b4f22dc4fd752ad4c25ec9ea71ed568d7"),
branches={
b"HEAD": SnapshotBranch(
target=hash_to_bytes("150616a2a3206f00a73f2d6a017dde22c52e4a83"),
target_type=TargetType.REVISION,
)
},
)
def test_loader_cvs_split_commits_by_commitid(swh_storage, datadir, tmp_path):
"""Conversion of RCS history which needs to be split by commit ID"""
# This repository has some file revisions which use the same log message
# and can only be told apart by commit IDs. Without commit IDs, these commits
# would get merged into a single commit in our conversion result.
archive_name = "dino-commitid"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
repo_url += "/dino" # CVS module name
loader = CvsLoader(
swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage, repo_url, status="full", type="cvs", snapshot=DINO_SNAPSHOT2.id,
)
check_snapshot(DINO_SNAPSHOT2, loader.storage)
stats = get_stats(loader.storage)
assert stats == {
"content": 18,
"directory": 36,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 18,
"skipped_content": 0,
"snapshot": 18,
}
def test_loader_cvs_pserver_split_commits_by_commitid(swh_storage, datadir, tmp_path):
"""Conversion via pserver which needs to be split by commit ID"""
# This repository has some file revisions which use the same log message
# and can only be told apart by commit IDs. Without commit IDs, these commits
# would get merged into a single commit in our conversion result.
archive_name = "dino-commitid"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
repo_url += "/dino" # CVS module name
# Ask our cvsclient to connect via the 'cvs server' command
repo_url = f"fake://{repo_url[7:]}"
loader = CvsLoader(
swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage, repo_url, status="full", type="cvs", snapshot=DINO_SNAPSHOT2.id,
)
check_snapshot(DINO_SNAPSHOT2, loader.storage)
stats = get_stats(loader.storage)
assert stats == {
"content": 18,
"directory": 36,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 18,
"skipped_content": 0,
"snapshot": 18,
}
GREEK_SNAPSHOT6 = Snapshot(
id=hash_to_bytes("b4c9423b2711c181251deb458d4ab4a3172948ac"),
branches={
b"HEAD": SnapshotBranch(
target=hash_to_bytes("f317c720e1929fec0afce10e6a8cfd24ef76dfc7"),
target_type=TargetType.REVISION,
)
},
)
def test_loader_cvs_empty_lines_in_log_message(swh_storage, datadir, tmp_path):
"""Conversion of RCS history with empty lines in a log message"""
archive_name = "greek-repository6"
extracted_name = "greek-repository"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
repo_url += "/greek-tree" # CVS module name
loader = CvsLoader(
swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage,
repo_url,
status="full",
type="cvs",
snapshot=GREEK_SNAPSHOT6.id,
)
check_snapshot(GREEK_SNAPSHOT6, loader.storage)
stats = get_stats(loader.storage)
assert stats == {
"content": 9,
"directory": 22,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 8,
"skipped_content": 0,
"snapshot": 8,
}
def test_loader_cvs_pserver_empty_lines_in_log_message(swh_storage, datadir, tmp_path):
"""Conversion via pserver with empty lines in a log message"""
archive_name = "greek-repository6"
extracted_name = "greek-repository"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
repo_url += "/greek-tree" # CVS module name
# Ask our cvsclient to connect via the 'cvs server' command
repo_url = f"fake://{repo_url[7:]}"
loader = CvsLoader(
swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage,
repo_url,
status="full",
type="cvs",
snapshot=GREEK_SNAPSHOT6.id,
)
check_snapshot(GREEK_SNAPSHOT6, loader.storage)
stats = get_stats(loader.storage)
assert stats == {
"content": 9,
"directory": 22,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 8,
"skipped_content": 0,
"snapshot": 8,
}
+
+
+def get_head_revision_paths_info(loader: CvsLoader) -> Dict[bytes, Dict[str, Any]]:
+ assert loader.snapshot is not None
+ root_dir = loader.snapshot.branches[b"HEAD"].target
+ revision = loader.storage.revision_get([root_dir])[0]
+ assert revision is not None
+
+ paths = {}
+ for entry in loader.storage.directory_ls(revision.directory, recursive=True):
+ paths[entry["name"]] = entry
+ return paths
+
+
+def test_loader_cvs_with_header_keyword(swh_storage, datadir, tmp_path):
+ """Eventful conversion of history with Header keyword in a file"""
+ archive_name = "greek-repository7"
+ extracted_name = "greek-repository"
+ archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+ repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
+ repo_url += "/greek-tree" # CVS module name
+ loader = CvsLoader(
+ swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
+ )
+
+ assert loader.load() == {"status": "eventful"}
+
+ repo_url = f"fake://{repo_url[7:]}"
+ loader2 = CvsLoader(
+ swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
+ )
+
+ assert loader2.load() == {"status": "eventful"}
+
+ # We cannot verify the snapshot ID. It is unpredicable due to use of the $Header$
+ # RCS keyword which contains the temporary directory where the repository is stored.
+
+ expected_stats = {
+ "content": 9,
+ "directory": 22,
+ "origin": 2,
+ "origin_visit": 2,
+ "release": 0,
+ "revision": 8,
+ "skipped_content": 0,
+ "snapshot": 8,
+ }
+ stats = get_stats(loader.storage)
+ assert stats == expected_stats
+ stats = get_stats(loader2.storage)
+ assert stats == expected_stats
+
+ # Ensure that file 'alpha', which contains a $Header$ keyword,
+ # was imported with equal content via file:// and fake:// URLs.
+
+ paths = get_head_revision_paths_info(loader)
+ paths2 = get_head_revision_paths_info(loader2)
+
+ alpha = paths[b"greek-tree/alpha"]
+ alpha2 = paths2[b"greek-tree/alpha"]
+ assert alpha["sha1"] == alpha2["sha1"]
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Jun 4 2025, 7:37 PM (10 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3276944
Attached To
rDLDCVS CVS Loader
Event Timeline
Log In to Comment