No OneTemporary
Actions

Size

27 KB

Subscribers

None

View Options

	diff --git a/swh/loader/cvs/tests/data/greek-repository7.tgz b/swh/loader/cvs/tests/data/greek-repository7.tgz
	new file mode 100644
	index 0000000..8b42fdd
	Binary files /dev/null and b/swh/loader/cvs/tests/data/greek-repository7.tgz differ
	diff --git a/swh/loader/cvs/tests/test_loader.py b/swh/loader/cvs/tests/test_loader.py
	index e35d17f..369a3dd 100644
	--- a/swh/loader/cvs/tests/test_loader.py
	+++ b/swh/loader/cvs/tests/test_loader.py
	@@ -1,799 +1,861 @@
	# Copyright (C) 2016-2021 The Software Heritage developers
	# See the AUTHORS file at the top-level directory of this distribution
	# License: GNU Affero General Public License version 3, or any later version
	# See top-level LICENSE file for more information

	import os
	+from typing import Any, Dict

	from swh.loader.cvs.loader import CvsLoader
	from swh.loader.tests import (
	assert_last_visit_matches,
	check_snapshot,
	get_stats,
	prepare_repository_from_archive,
	)
	from swh.model.hashutil import hash_to_bytes
	from swh.model.model import Snapshot, SnapshotBranch, TargetType

	RUNBABY_SNAPSHOT = Snapshot(
	id=hash_to_bytes("1cff69ab9bd70822d5e3006092f943ccaafdcf57"),
	branches={
	b"HEAD": SnapshotBranch(
	target=hash_to_bytes("ef511d258fa55035c2bc2a5b05cad233cee1d328"),
	target_type=TargetType.REVISION,
	)
	},
	)


	def test_loader_cvs_not_found_no_mock(swh_storage, tmp_path):
	"""Given an unknown repository, the loader visit ends up in status not_found"""
	unknown_repo_url = "unknown-repository"
	loader = CvsLoader(swh_storage, unknown_repo_url, cvsroot_path=tmp_path)

	assert loader.load() == {"status": "uneventful"}

	assert_last_visit_matches(
	swh_storage, unknown_repo_url, status="not_found", type="cvs",
	)


	def test_loader_cvs_visit(swh_storage, datadir, tmp_path):
	"""Eventful visit should yield 1 snapshot"""
	archive_name = "runbaby"
	archive_path = os.path.join(datadir, f"{archive_name}.tgz")
	repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)

	loader = CvsLoader(
	swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
	)

	assert loader.load() == {"status": "eventful"}

	assert_last_visit_matches(
	loader.storage,
	repo_url,
	status="full",
	type="cvs",
	snapshot=RUNBABY_SNAPSHOT.id,
	)

	stats = get_stats(loader.storage)
	assert stats == {
	"content": 5,
	"directory": 2,
	"origin": 1,
	"origin_visit": 1,
	"release": 0,
	"revision": 1,
	"skipped_content": 0,
	"snapshot": 1,
	}

	check_snapshot(RUNBABY_SNAPSHOT, loader.storage)


	def test_loader_cvs_2_visits_no_change(swh_storage, datadir, tmp_path):
	"""Eventful visit followed by uneventful visit should yield the same snapshot

	"""
	archive_name = "runbaby"
	archive_path = os.path.join(datadir, f"{archive_name}.tgz")
	repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)

	loader = CvsLoader(
	swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
	)

	assert loader.load() == {"status": "eventful"}
	visit_status1 = assert_last_visit_matches(
	loader.storage,
	repo_url,
	status="full",
	type="cvs",
	snapshot=RUNBABY_SNAPSHOT.id,
	)

	loader = CvsLoader(
	swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
	)
	assert loader.load() == {"status": "uneventful"}
	visit_status2 = assert_last_visit_matches(
	loader.storage,
	repo_url,
	status="full",
	type="cvs",
	snapshot=RUNBABY_SNAPSHOT.id,
	)

	assert visit_status1.date < visit_status2.date
	assert visit_status1.snapshot == visit_status2.snapshot

	stats = get_stats(loader.storage)
	assert stats["origin_visit"] == 1 + 1 # computed twice the same snapshot
	assert stats["snapshot"] == 1


	GREEK_SNAPSHOT = Snapshot(
	id=hash_to_bytes("5e74af67d69dfd7aea0eb118154d062f71f50120"),
	branches={
	b"HEAD": SnapshotBranch(
	target=hash_to_bytes("e18b92f14cd5b3efb3fcb4ea46cfaf97f25f301b"),
	target_type=TargetType.REVISION,
	)
	},
	)


	def test_loader_cvs_with_file_additions_and_deletions(swh_storage, datadir, tmp_path):
	"""Eventful conversion of history with file additions and deletions"""
	archive_name = "greek-repository"
	archive_path = os.path.join(datadir, f"{archive_name}.tgz")
	repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
	repo_url += "/greek-tree" # CVS module name
	loader = CvsLoader(
	swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
	)

	assert loader.load() == {"status": "eventful"}

	assert_last_visit_matches(
	loader.storage, repo_url, status="full", type="cvs", snapshot=GREEK_SNAPSHOT.id,
	)

	stats = get_stats(loader.storage)
	assert stats == {
	"content": 8,
	"directory": 20,
	"origin": 1,
	"origin_visit": 1,
	"release": 0,
	"revision": 7,
	"skipped_content": 0,
	"snapshot": 7,
	}

	check_snapshot(GREEK_SNAPSHOT, loader.storage)


	def test_loader_cvs_pserver_with_file_additions_and_deletions(
	swh_storage, datadir, tmp_path
	):
	"""Eventful CVS pserver conversion with file additions and deletions"""
	archive_name = "greek-repository"
	archive_path = os.path.join(datadir, f"{archive_name}.tgz")
	repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
	repo_url += "/greek-tree" # CVS module name

	# Ask our cvsclient to connect via the 'cvs server' command
	repo_url = f"fake://{repo_url[7:]}"

	loader = CvsLoader(
	swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
	)

	assert loader.load() == {"status": "eventful"}

	assert_last_visit_matches(
	loader.storage, repo_url, status="full", type="cvs", snapshot=GREEK_SNAPSHOT.id,
	)

	stats = get_stats(loader.storage)
	assert stats == {
	"content": 8,
	"directory": 20,
	"origin": 1,
	"origin_visit": 1,
	"release": 0,
	"revision": 7,
	"skipped_content": 0,
	"snapshot": 7,
	}

	check_snapshot(GREEK_SNAPSHOT, loader.storage)


	GREEK_SNAPSHOT2 = Snapshot(
	id=hash_to_bytes("048885ae2145ffe81588aea95dcf75c536ecdf26"),
	branches={
	b"HEAD": SnapshotBranch(
	target=hash_to_bytes("55eb1438c03588607ce4b8db8f45e8e23075951b"),
	target_type=TargetType.REVISION,
	)
	},
	)


	def test_loader_cvs_2_visits_with_change(swh_storage, datadir, tmp_path):
	"""Eventful visit followed by eventful visit should yield two snapshots"""
	archive_name = "greek-repository"
	archive_path = os.path.join(datadir, f"{archive_name}.tgz")
	repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
	repo_url += "/greek-tree" # CVS module name
	loader = CvsLoader(
	swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
	)

	assert loader.load() == {"status": "eventful"}

	visit_status1 = assert_last_visit_matches(
	loader.storage, repo_url, status="full", type="cvs", snapshot=GREEK_SNAPSHOT.id,
	)

	stats = get_stats(loader.storage)
	assert stats == {
	"content": 8,
	"directory": 20,
	"origin": 1,
	"origin_visit": 1,
	"release": 0,
	"revision": 7,
	"skipped_content": 0,
	"snapshot": 7,
	}

	archive_name2 = "greek-repository2"
	archive_path2 = os.path.join(datadir, f"{archive_name2}.tgz")
	repo_url = prepare_repository_from_archive(archive_path2, archive_name, tmp_path)
	repo_url += "/greek-tree" # CVS module name

	loader = CvsLoader(
	swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
	)

	assert loader.load() == {"status": "eventful"}

	visit_status2 = assert_last_visit_matches(
	loader.storage,
	repo_url,
	status="full",
	type="cvs",
	snapshot=GREEK_SNAPSHOT2.id,
	)

	stats = get_stats(loader.storage)
	assert stats == {
	"content": 10,
	"directory": 23,
	"origin": 1,
	"origin_visit": 2,
	"release": 0,
	"revision": 8,
	"skipped_content": 0,
	"snapshot": 8,
	}

	check_snapshot(GREEK_SNAPSHOT2, loader.storage)

	assert visit_status1.date < visit_status2.date
	assert visit_status1.snapshot != visit_status2.snapshot


	def test_loader_cvs_visit_pserver(swh_storage, datadir, tmp_path):
	"""Eventful visit to CVS pserver should yield 1 snapshot"""
	archive_name = "runbaby"
	archive_path = os.path.join(datadir, f"{archive_name}.tgz")
	repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
	repo_url += "/runbaby" # CVS module name

	# Ask our cvsclient to connect via the 'cvs server' command
	repo_url = "fake://" + repo_url[7:]

	loader = CvsLoader(
	swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
	)

	assert loader.load() == {"status": "eventful"}

	assert_last_visit_matches(
	loader.storage,
	repo_url,
	status="full",
	type="cvs",
	snapshot=RUNBABY_SNAPSHOT.id,
	)

	stats = get_stats(loader.storage)
	assert stats == {
	"content": 5,
	"directory": 2,
	"origin": 1,
	"origin_visit": 1,
	"release": 0,
	"revision": 1,
	"skipped_content": 0,
	"snapshot": 1,
	}

	check_snapshot(RUNBABY_SNAPSHOT, loader.storage)


	GREEK_SNAPSHOT3 = Snapshot(
	id=hash_to_bytes("cd801546b0137c82f01b9b67848ba8261d64ebbb"),
	branches={
	b"HEAD": SnapshotBranch(
	target=hash_to_bytes("14980990790ce1921db953c4c9ae03dd8861e8d6"),
	target_type=TargetType.REVISION,
	)
	},
	)


	def test_loader_cvs_visit_pserver_no_eol(swh_storage, datadir, tmp_path):
	"""Visit to CVS pserver with file that lacks trailing eol"""
	archive_name = "greek-repository3"
	extracted_name = "greek-repository"
	archive_path = os.path.join(datadir, f"{archive_name}.tgz")
	repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
	repo_url += "/greek-tree" # CVS module name

	# Ask our cvsclient to connect via the 'cvs server' command
	repo_url = "fake://" + repo_url[7:]

	loader = CvsLoader(
	swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
	)

	assert loader.load() == {"status": "eventful"}

	assert_last_visit_matches(
	loader.storage,
	repo_url,
	status="full",
	type="cvs",
	snapshot=GREEK_SNAPSHOT3.id,
	)

	stats = get_stats(loader.storage)
	assert stats == {
	"content": 9,
	"directory": 23,
	"origin": 1,
	"origin_visit": 1,
	"release": 0,
	"revision": 8,
	"skipped_content": 0,
	"snapshot": 8,
	}

	check_snapshot(GREEK_SNAPSHOT3, loader.storage)


	GREEK_SNAPSHOT4 = Snapshot(
	id=hash_to_bytes("11673e2766654bd5fafb5119b418794230d48d6b"),
	branches={
	b"HEAD": SnapshotBranch(
	target=hash_to_bytes("fe4a926d49d2af76e0025a8ba0b4ed159aec6829"),
	target_type=TargetType.REVISION,
	)
	},
	)


	def test_loader_cvs_visit_expand_id_keyword(swh_storage, datadir, tmp_path):
	"""Visit to CVS repository with file with an RCS Id keyword"""
	archive_name = "greek-repository4"
	extracted_name = "greek-repository"
	archive_path = os.path.join(datadir, f"{archive_name}.tgz")
	repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
	repo_url += "/greek-tree" # CVS module name

	loader = CvsLoader(
	swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
	)

	assert loader.load() == {"status": "eventful"}

	assert_last_visit_matches(
	loader.storage,
	repo_url,
	status="full",
	type="cvs",
	snapshot=GREEK_SNAPSHOT4.id,
	)

	stats = get_stats(loader.storage)
	assert stats == {
	"content": 9,
	"directory": 22,
	"origin": 1,
	"origin_visit": 1,
	"release": 0,
	"revision": 8,
	"skipped_content": 0,
	"snapshot": 8,
	}

	check_snapshot(GREEK_SNAPSHOT4, loader.storage)


	def test_loader_cvs_visit_pserver_expand_id_keyword(swh_storage, datadir, tmp_path):
	"""Visit to CVS pserver with file with an RCS Id keyword"""
	archive_name = "greek-repository4"
	extracted_name = "greek-repository"
	archive_path = os.path.join(datadir, f"{archive_name}.tgz")
	repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
	repo_url += "/greek-tree" # CVS module name

	# Ask our cvsclient to connect via the 'cvs server' command
	repo_url = f"fake://{repo_url[7:]}"

	loader = CvsLoader(
	swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
	)

	assert loader.load() == {"status": "eventful"}

	assert_last_visit_matches(
	loader.storage,
	repo_url,
	status="full",
	type="cvs",
	snapshot=GREEK_SNAPSHOT4.id,
	)

	stats = get_stats(loader.storage)
	assert stats == {
	"content": 9,
	"directory": 22,
	"origin": 1,
	"origin_visit": 1,
	"release": 0,
	"revision": 8,
	"skipped_content": 0,
	"snapshot": 8,
	}

	check_snapshot(GREEK_SNAPSHOT4, loader.storage)


	GREEK_SNAPSHOT5 = Snapshot(
	id=hash_to_bytes("ee6faeaf50aa513c53c8ba29194116a5ef88add6"),
	branches={
	b"HEAD": SnapshotBranch(
	target=hash_to_bytes("4320f152cc61ed660d25fdeebc787b3099e55a96"),
	target_type=TargetType.REVISION,
	)
	},
	)


	def test_loader_cvs_with_file_deleted_and_readded(swh_storage, datadir, tmp_path):
	"""Eventful conversion of history with file deletion and re-addition"""
	archive_name = "greek-repository5"
	extracted_name = "greek-repository"
	archive_path = os.path.join(datadir, f"{archive_name}.tgz")
	repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
	repo_url += "/greek-tree" # CVS module name

	loader = CvsLoader(
	swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
	)

	assert loader.load() == {"status": "eventful"}

	assert_last_visit_matches(
	loader.storage,
	repo_url,
	status="full",
	type="cvs",
	snapshot=GREEK_SNAPSHOT5.id,
	)

	stats = get_stats(loader.storage)
	assert stats == {
	"content": 9,
	"directory": 22,
	"origin": 1,
	"origin_visit": 1,
	"release": 0,
	"revision": 8,
	"skipped_content": 0,
	"snapshot": 8,
	}

	check_snapshot(GREEK_SNAPSHOT5, loader.storage)


	def test_loader_cvs_pserver_with_file_deleted_and_readded(
	swh_storage, datadir, tmp_path
	):
	"""Eventful pserver conversion with file deletion and re-addition"""
	archive_name = "greek-repository5"
	extracted_name = "greek-repository"
	archive_path = os.path.join(datadir, f"{archive_name}.tgz")
	repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
	repo_url += "/greek-tree" # CVS module name

	# Ask our cvsclient to connect via the 'cvs server' command
	repo_url = f"fake://{repo_url[7:]}"

	loader = CvsLoader(
	swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
	)

	assert loader.load() == {"status": "eventful"}

	assert_last_visit_matches(
	loader.storage,
	repo_url,
	status="full",
	type="cvs",
	snapshot=GREEK_SNAPSHOT5.id,
	)

	stats = get_stats(loader.storage)
	assert stats == {
	"content": 9,
	"directory": 22,
	"origin": 1,
	"origin_visit": 1,
	"release": 0,
	"revision": 8,
	"skipped_content": 0,
	"snapshot": 8,
	}

	check_snapshot(GREEK_SNAPSHOT5, loader.storage)


	DINO_SNAPSHOT = Snapshot(
	id=hash_to_bytes("417021c16e17c5e0038cf0e73dbf48a6142c8304"),
	branches={
	b"HEAD": SnapshotBranch(
	target=hash_to_bytes("df61a776c401a178cc796545849fc87bdadb2001"),
	target_type=TargetType.REVISION,
	)
	},
	)


	def test_loader_cvs_readded_file_in_attic(swh_storage, datadir, tmp_path):
	"""Conversion of history with RCS files in the Attic"""
	# This repository has some file revisions marked "dead" in the Attic only.
	# This is different to the re-added file tests above, where the RCS file
	# was moved out of the Attic again as soon as the corresponding deleted
	# file was re-added. Failure to detect the "dead" file revisions in the
	# Attic would result in errors in our converted history.
	archive_name = "dino-readded-file"
	archive_path = os.path.join(datadir, f"{archive_name}.tgz")
	repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
	repo_url += "/src" # CVS module name

	loader = CvsLoader(
	swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
	)

	assert loader.load() == {"status": "eventful"}

	assert_last_visit_matches(
	loader.storage, repo_url, status="full", type="cvs", snapshot=DINO_SNAPSHOT.id,
	)

	stats = get_stats(loader.storage)
	assert stats == {
	"content": 38,
	"directory": 105,
	"origin": 1,
	"origin_visit": 1,
	"release": 0,
	"revision": 35,
	"skipped_content": 0,
	"snapshot": 35,
	}

	check_snapshot(DINO_SNAPSHOT, loader.storage)


	def test_loader_cvs_pserver_readded_file_in_attic(swh_storage, datadir, tmp_path):
	"""Conversion over pserver with RCS files in the Attic"""
	# This repository has some file revisions marked "dead" in the Attic only.
	# This is different to the re-added file tests above, where the RCS file
	# was moved out of the Attic again as soon as the corresponding deleted
	# file was re-added. Failure to detect the "dead" file revisions in the
	# Attic would result in errors in our converted history.
	# This has special implications for the pserver case, because the "dead"
	# revisions will not appear in in the output of 'cvs rlog' by default.
	archive_name = "dino-readded-file"
	archive_path = os.path.join(datadir, f"{archive_name}.tgz")
	repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
	repo_url += "/src" # CVS module name

	# Ask our cvsclient to connect via the 'cvs server' command
	repo_url = f"fake://{repo_url[7:]}"

	loader = CvsLoader(
	swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
	)

	assert loader.load() == {"status": "eventful"}

	assert_last_visit_matches(
	loader.storage, repo_url, status="full", type="cvs", snapshot=DINO_SNAPSHOT.id,
	)

	stats = get_stats(loader.storage)
	assert stats == {
	"content": 38,
	"directory": 105,
	"origin": 1,
	"origin_visit": 1,
	"release": 0,
	"revision": 35,
	"skipped_content": 0,
	"snapshot": 35,
	}

	check_snapshot(DINO_SNAPSHOT, loader.storage)


	DINO_SNAPSHOT2 = Snapshot(
	id=hash_to_bytes("a9d6ce0b4f22dc4fd752ad4c25ec9ea71ed568d7"),
	branches={
	b"HEAD": SnapshotBranch(
	target=hash_to_bytes("150616a2a3206f00a73f2d6a017dde22c52e4a83"),
	target_type=TargetType.REVISION,
	)
	},
	)


	def test_loader_cvs_split_commits_by_commitid(swh_storage, datadir, tmp_path):
	"""Conversion of RCS history which needs to be split by commit ID"""
	# This repository has some file revisions which use the same log message
	# and can only be told apart by commit IDs. Without commit IDs, these commits
	# would get merged into a single commit in our conversion result.
	archive_name = "dino-commitid"
	archive_path = os.path.join(datadir, f"{archive_name}.tgz")
	repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
	repo_url += "/dino" # CVS module name

	loader = CvsLoader(
	swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
	)

	assert loader.load() == {"status": "eventful"}

	assert_last_visit_matches(
	loader.storage, repo_url, status="full", type="cvs", snapshot=DINO_SNAPSHOT2.id,
	)

	check_snapshot(DINO_SNAPSHOT2, loader.storage)

	stats = get_stats(loader.storage)
	assert stats == {
	"content": 18,
	"directory": 36,
	"origin": 1,
	"origin_visit": 1,
	"release": 0,
	"revision": 18,
	"skipped_content": 0,
	"snapshot": 18,
	}


	def test_loader_cvs_pserver_split_commits_by_commitid(swh_storage, datadir, tmp_path):
	"""Conversion via pserver which needs to be split by commit ID"""
	# This repository has some file revisions which use the same log message
	# and can only be told apart by commit IDs. Without commit IDs, these commits
	# would get merged into a single commit in our conversion result.
	archive_name = "dino-commitid"
	archive_path = os.path.join(datadir, f"{archive_name}.tgz")
	repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
	repo_url += "/dino" # CVS module name

	# Ask our cvsclient to connect via the 'cvs server' command
	repo_url = f"fake://{repo_url[7:]}"

	loader = CvsLoader(
	swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name)
	)

	assert loader.load() == {"status": "eventful"}

	assert_last_visit_matches(
	loader.storage, repo_url, status="full", type="cvs", snapshot=DINO_SNAPSHOT2.id,
	)

	check_snapshot(DINO_SNAPSHOT2, loader.storage)

	stats = get_stats(loader.storage)
	assert stats == {
	"content": 18,
	"directory": 36,
	"origin": 1,
	"origin_visit": 1,
	"release": 0,
	"revision": 18,
	"skipped_content": 0,
	"snapshot": 18,
	}


	GREEK_SNAPSHOT6 = Snapshot(
	id=hash_to_bytes("b4c9423b2711c181251deb458d4ab4a3172948ac"),
	branches={
	b"HEAD": SnapshotBranch(
	target=hash_to_bytes("f317c720e1929fec0afce10e6a8cfd24ef76dfc7"),
	target_type=TargetType.REVISION,
	)
	},
	)


	def test_loader_cvs_empty_lines_in_log_message(swh_storage, datadir, tmp_path):
	"""Conversion of RCS history with empty lines in a log message"""
	archive_name = "greek-repository6"
	extracted_name = "greek-repository"
	archive_path = os.path.join(datadir, f"{archive_name}.tgz")
	repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
	repo_url += "/greek-tree" # CVS module name

	loader = CvsLoader(
	swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
	)

	assert loader.load() == {"status": "eventful"}

	assert_last_visit_matches(
	loader.storage,
	repo_url,
	status="full",
	type="cvs",
	snapshot=GREEK_SNAPSHOT6.id,
	)

	check_snapshot(GREEK_SNAPSHOT6, loader.storage)

	stats = get_stats(loader.storage)
	assert stats == {
	"content": 9,
	"directory": 22,
	"origin": 1,
	"origin_visit": 1,
	"release": 0,
	"revision": 8,
	"skipped_content": 0,
	"snapshot": 8,
	}


	def test_loader_cvs_pserver_empty_lines_in_log_message(swh_storage, datadir, tmp_path):
	"""Conversion via pserver with empty lines in a log message"""
	archive_name = "greek-repository6"
	extracted_name = "greek-repository"
	archive_path = os.path.join(datadir, f"{archive_name}.tgz")
	repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
	repo_url += "/greek-tree" # CVS module name

	# Ask our cvsclient to connect via the 'cvs server' command
	repo_url = f"fake://{repo_url[7:]}"

	loader = CvsLoader(
	swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
	)

	assert loader.load() == {"status": "eventful"}

	assert_last_visit_matches(
	loader.storage,
	repo_url,
	status="full",
	type="cvs",
	snapshot=GREEK_SNAPSHOT6.id,
	)

	check_snapshot(GREEK_SNAPSHOT6, loader.storage)

	stats = get_stats(loader.storage)
	assert stats == {
	"content": 9,
	"directory": 22,
	"origin": 1,
	"origin_visit": 1,
	"release": 0,
	"revision": 8,
	"skipped_content": 0,
	"snapshot": 8,
	}
	+
	+
	+def get_head_revision_paths_info(loader: CvsLoader) -> Dict[bytes, Dict[str, Any]]:
	+ assert loader.snapshot is not None
	+ root_dir = loader.snapshot.branches[b"HEAD"].target
	+ revision = loader.storage.revision_get([root_dir])[0]
	+ assert revision is not None
	+
	+ paths = {}
	+ for entry in loader.storage.directory_ls(revision.directory, recursive=True):
	+ paths[entry["name"]] = entry
	+ return paths
	+
	+
	+def test_loader_cvs_with_header_keyword(swh_storage, datadir, tmp_path):
	+ """Eventful conversion of history with Header keyword in a file"""
	+ archive_name = "greek-repository7"
	+ extracted_name = "greek-repository"
	+ archive_path = os.path.join(datadir, f"{archive_name}.tgz")
	+ repo_url = prepare_repository_from_archive(archive_path, extracted_name, tmp_path)
	+ repo_url += "/greek-tree" # CVS module name
	+ loader = CvsLoader(
	+ swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
	+ )
	+
	+ assert loader.load() == {"status": "eventful"}
	+
	+ repo_url = f"fake://{repo_url[7:]}"
	+ loader2 = CvsLoader(
	+ swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, extracted_name)
	+ )
	+
	+ assert loader2.load() == {"status": "eventful"}
	+
	+ # We cannot verify the snapshot ID. It is unpredicable due to use of the $Header$
	+ # RCS keyword which contains the temporary directory where the repository is stored.
	+
	+ expected_stats = {
	+ "content": 9,
	+ "directory": 22,
	+ "origin": 2,
	+ "origin_visit": 2,
	+ "release": 0,
	+ "revision": 8,
	+ "skipped_content": 0,
	+ "snapshot": 8,
	+ }
	+ stats = get_stats(loader.storage)
	+ assert stats == expected_stats
	+ stats = get_stats(loader2.storage)
	+ assert stats == expected_stats
	+
	+ # Ensure that file 'alpha', which contains a $Header$ keyword,
	+ # was imported with equal content via file:// and fake:// URLs.
	+
	+ paths = get_head_revision_paths_info(loader)
	+ paths2 = get_head_revision_paths_info(loader2)
	+
	+ alpha = paths[b"greek-tree/alpha"]
	+ alpha2 = paths2[b"greek-tree/alpha"]
	+ assert alpha["sha1"] == alpha2["sha1"]

File Metadata

Mime Type: text/x-diff
Expires: Jun 4 2025, 7:37 PM (10 w, 1 d ago)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 3276944

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions