Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/svn/tests/test_loader.py
Show All 12 Lines | from swh.loader.tests import ( | ||||
get_stats, | get_stats, | ||||
) | ) | ||||
from swh.loader.svn.loader import ( | from swh.loader.svn.loader import ( | ||||
SvnLoader, | SvnLoader, | ||||
SvnLoaderFromRemoteDump, | SvnLoaderFromRemoteDump, | ||||
) | ) | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.model import Snapshot, SnapshotBranch, TargetType | |||||
GOURMET_SNAPSHOT = hash_to_bytes("889cacc2731e3312abfb2b1a0c18ade82a949e07") | GOURMET_SNAPSHOT = Snapshot( | ||||
id=hash_to_bytes("889cacc2731e3312abfb2b1a0c18ade82a949e07"), | |||||
GOURMET_UPDATES_SNAPSHOT = hash_to_bytes("11086d15317014e43d2438b7ffc712c44f1b8afe") | branches={ | ||||
b"HEAD": SnapshotBranch( | |||||
GOURMET_EXTERNALS_SNAPSHOT = hash_to_bytes("19cb68d0a3f22372e2b7017ea5e2a2ea5ae3e09a") | target=hash_to_bytes("4876cb10aec6f708f7466dddf547567b65f6c39c"), | ||||
target_type=TargetType.REVISION, | |||||
GOURMET_EDGE_CASES_SNAPSHOT = hash_to_bytes("18e60982fe521a2546ab8c3c73a535d80462d9d0") | ) | ||||
}, | |||||
GOURMET_WRONG_LINKS_SNAPSHOT = hash_to_bytes("b17f38acabb90f066dedd30c29f01a02af88a5c4") | ) | ||||
MEDIAWIKI_SNAPSHOT = hash_to_bytes("d6d6e9703f157c5702d9a4a5dec878926ed4ab76") | |||||
PYANG_SNAPSHOT = hash_to_bytes("6d9590de11b00a5801de0ff3297c5b44bbbf7d24") | GOURMET_UPDATES_SNAPSHOT = Snapshot( | ||||
id=hash_to_bytes("11086d15317014e43d2438b7ffc712c44f1b8afe"), | |||||
branches={ | |||||
b"HEAD": SnapshotBranch( | |||||
target=hash_to_bytes("171dc35522bfd17dda4e90a542a0377fb2fc707a"), | |||||
target_type=TargetType.REVISION, | |||||
) | |||||
}, | |||||
) | |||||
def test_loader_svn_new_visit(swh_config, datadir, tmp_path): | def test_loader_svn_new_visit(swh_config, datadir, tmp_path): | ||||
"""Eventful visit should yield 1 snapshot""" | """Eventful visit should yield 1 snapshot""" | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = SvnLoader(repo_url, destination_path=tmp_path) | loader = SvnLoader(repo_url, destination_path=tmp_path) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, repo_url, status="full", type="svn", snapshot=GOURMET_SNAPSHOT, | loader.storage, | ||||
repo_url, | |||||
status="full", | |||||
type="svn", | |||||
snapshot=GOURMET_SNAPSHOT.id, | |||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats == { | assert stats == { | ||||
"content": 19, | "content": 19, | ||||
"directory": 17, | "directory": 17, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"person": 1, | "person": 1, | ||||
"release": 0, | "release": 0, | ||||
"revision": 6, | "revision": 6, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} | } | ||||
expected_snapshot = { | check_snapshot(GOURMET_SNAPSHOT, loader.storage) | ||||
"id": GOURMET_SNAPSHOT, | |||||
"branches": { | |||||
b"HEAD": { | |||||
"target": hash_to_bytes("4876cb10aec6f708f7466dddf547567b65f6c39c"), | |||||
"target_type": "revision", | |||||
} | |||||
}, | |||||
} | |||||
check_snapshot(expected_snapshot, loader.storage) | |||||
def test_loader_svn_2_visits_no_change(swh_config, datadir, tmp_path): | def test_loader_svn_2_visits_no_change(swh_config, datadir, tmp_path): | ||||
"""Visit multiple times a repository with no change should yield the same snapshot | """Visit multiple times a repository with no change should yield the same snapshot | ||||
""" | """ | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = SvnLoader(repo_url) | loader = SvnLoader(repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
visit_status1 = assert_last_visit_matches( | visit_status1 = assert_last_visit_matches( | ||||
loader.storage, repo_url, status="full", type="svn", snapshot=GOURMET_SNAPSHOT, | loader.storage, | ||||
repo_url, | |||||
status="full", | |||||
type="svn", | |||||
snapshot=GOURMET_SNAPSHOT.id, | |||||
) | ) | ||||
assert loader.load() == {"status": "uneventful"} | assert loader.load() == {"status": "uneventful"} | ||||
visit_status2 = assert_last_visit_matches( | visit_status2 = assert_last_visit_matches( | ||||
loader.storage, repo_url, status="full", type="svn", snapshot=GOURMET_SNAPSHOT, | loader.storage, | ||||
repo_url, | |||||
status="full", | |||||
type="svn", | |||||
snapshot=GOURMET_SNAPSHOT.id, | |||||
) | ) | ||||
assert visit_status1.date < visit_status2.date | assert visit_status1.date < visit_status2.date | ||||
assert visit_status1.snapshot == visit_status2.snapshot | assert visit_status1.snapshot == visit_status2.snapshot | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin_visit"] == 1 + 1 # computed twice the same snapshot | assert stats["origin_visit"] == 1 + 1 # computed twice the same snapshot | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
Show All 11 Lines | def test_loader_svn_2_visits_no_change(swh_config, datadir, tmp_path): | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin_visit"] == 2 + 1 | assert stats["origin_visit"] == 2 + 1 | ||||
# ... with no change in repository, this yields the same snapshot | # ... with no change in repository, this yields the same snapshot | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, repo_url, status="full", type="svn", snapshot=GOURMET_SNAPSHOT, | loader.storage, | ||||
repo_url, | |||||
status="full", | |||||
type="svn", | |||||
snapshot=GOURMET_SNAPSHOT.id, | |||||
) | ) | ||||
def test_loader_tampered_repository(swh_config, datadir, tmp_path): | def test_loader_tampered_repository(swh_config, datadir, tmp_path): | ||||
"""In this scenario, the dump has been tampered with to modify the | """In this scenario, the dump has been tampered with to modify the | ||||
commit log [1]. This results in a hash divergence which is | commit log [1]. This results in a hash divergence which is | ||||
detected at startup after a new run for the same origin. | detected at startup after a new run for the same origin. | ||||
Show All 10 Lines | def test_loader_tampered_repository(swh_config, datadir, tmp_path): | ||||
``` | ``` | ||||
""" | """ | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = SvnLoader(repo_url) | loader = SvnLoader(repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
expected_snapshot = { | check_snapshot(GOURMET_SNAPSHOT, loader.storage) | ||||
"id": GOURMET_SNAPSHOT, | |||||
"branches": { | |||||
b"HEAD": { | |||||
"target": hash_to_bytes("4876cb10aec6f708f7466dddf547567b65f6c39c"), | |||||
"target_type": "revision", | |||||
} | |||||
}, | |||||
} | |||||
check_snapshot(expected_snapshot, loader.storage) | |||||
archive_path2 = os.path.join(datadir, "pkg-gourmet-tampered-rev6-log.tgz") | archive_path2 = os.path.join(datadir, "pkg-gourmet-tampered-rev6-log.tgz") | ||||
repo_tampered_url = prepare_repository_from_archive( | repo_tampered_url = prepare_repository_from_archive( | ||||
archive_path2, archive_name, tmp_path | archive_path2, archive_name, tmp_path | ||||
) | ) | ||||
loader2 = SvnLoader(repo_tampered_url, origin_url=repo_url) | loader2 = SvnLoader(repo_tampered_url, origin_url=repo_url) | ||||
assert loader2.load() == {"status": "failed"} | assert loader2.load() == {"status": "failed"} | ||||
Show All 24 Lines | def test_loader_svn_visit_with_changes(swh_config, datadir, tmp_path): | ||||
loader = SvnLoader(repo_initial_url) | loader = SvnLoader(repo_initial_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
visit_status1 = assert_last_visit_matches( | visit_status1 = assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_initial_url, | repo_initial_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_SNAPSHOT, | snapshot=GOURMET_SNAPSHOT.id, | ||||
) | ) | ||||
archive_path = os.path.join(datadir, "pkg-gourmet-with-updates.tgz") | archive_path = os.path.join(datadir, "pkg-gourmet-with-updates.tgz") | ||||
repo_updated_url = prepare_repository_from_archive( | repo_updated_url = prepare_repository_from_archive( | ||||
archive_path, "pkg-gourmet", tmp_path | archive_path, "pkg-gourmet", tmp_path | ||||
) | ) | ||||
loader = SvnLoader(repo_updated_url, origin_url=repo_initial_url,) | loader = SvnLoader(repo_updated_url, origin_url=repo_initial_url,) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
visit_status2 = assert_last_visit_matches( | visit_status2 = assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_updated_url, | repo_updated_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_UPDATES_SNAPSHOT, | snapshot=GOURMET_UPDATES_SNAPSHOT.id, | ||||
) | ) | ||||
assert visit_status1.date < visit_status2.date | assert visit_status1.date < visit_status2.date | ||||
assert visit_status1.snapshot != visit_status2.snapshot | assert visit_status1.snapshot != visit_status2.snapshot | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats == { | assert stats == { | ||||
"content": 22, | "content": 22, | ||||
"directory": 28, | "directory": 28, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 2, | "origin_visit": 2, | ||||
"person": 2, | "person": 2, | ||||
"release": 0, | "release": 0, | ||||
"revision": 11, | "revision": 11, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 2, | "snapshot": 2, | ||||
} | } | ||||
expected_snapshot = { | check_snapshot(GOURMET_UPDATES_SNAPSHOT, loader.storage) | ||||
"id": GOURMET_UPDATES_SNAPSHOT, | |||||
"branches": { | |||||
b"HEAD": { | |||||
"target": hash_to_bytes("171dc35522bfd17dda4e90a542a0377fb2fc707a"), | |||||
"target_type": "revision", | |||||
} | |||||
}, | |||||
} | |||||
check_snapshot(expected_snapshot, loader.storage) | |||||
# Start from scratch loading yields the same result | # Start from scratch loading yields the same result | ||||
loader = SvnLoader( | loader = SvnLoader( | ||||
repo_updated_url, origin_url=repo_initial_url, start_from_scratch=True | repo_updated_url, origin_url=repo_initial_url, start_from_scratch=True | ||||
) | ) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
visit_status3 = assert_last_visit_matches( | visit_status3 = assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_updated_url, | repo_updated_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_UPDATES_SNAPSHOT, | snapshot=GOURMET_UPDATES_SNAPSHOT.id, | ||||
) | ) | ||||
assert visit_status2.date < visit_status3.date | assert visit_status2.date < visit_status3.date | ||||
assert visit_status3.snapshot == visit_status2.snapshot | assert visit_status3.snapshot == visit_status2.snapshot | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(GOURMET_UPDATES_SNAPSHOT, loader.storage) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 # always the same visit | assert stats["origin"] == 1 # always the same visit | ||||
assert stats["origin_visit"] == 2 + 1 # 1 more visit | assert stats["origin_visit"] == 2 + 1 # 1 more visit | ||||
assert stats["snapshot"] == 2 # no new snapshot | assert stats["snapshot"] == 2 # no new snapshot | ||||
def test_loader_svn_visit_start_from_revision(swh_config, datadir, tmp_path): | def test_loader_svn_visit_start_from_revision(swh_config, datadir, tmp_path): | ||||
Show All 11 Lines | def test_loader_svn_visit_start_from_revision(swh_config, datadir, tmp_path): | ||||
loader = SvnLoader(repo_initial_url) | loader = SvnLoader(repo_initial_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
visit_status1 = assert_last_visit_matches( | visit_status1 = assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_initial_url, | repo_initial_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_SNAPSHOT, | snapshot=GOURMET_SNAPSHOT.id, | ||||
) | ) | ||||
revs = list( | revs = list( | ||||
loader.storage.revision_get( | loader.storage.revision_get( | ||||
[hash_to_bytes("95edacc8848369d6fb1608e887d6d2474fd5224f")] | [hash_to_bytes("95edacc8848369d6fb1608e887d6d2474fd5224f")] | ||||
) | ) | ||||
) | ) | ||||
start_revision = revs[0] | start_revision = revs[0] | ||||
Show All 12 Lines | def test_loader_svn_visit_start_from_revision(swh_config, datadir, tmp_path): | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
# nonetheless, we obtain the same snapshot (as previous tests on that repository) | # nonetheless, we obtain the same snapshot (as previous tests on that repository) | ||||
visit_status2 = assert_last_visit_matches( | visit_status2 = assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_updated_url, | repo_updated_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_UPDATES_SNAPSHOT, | snapshot=GOURMET_UPDATES_SNAPSHOT.id, | ||||
) | ) | ||||
assert visit_status1.date < visit_status2.date | assert visit_status1.date < visit_status2.date | ||||
assert visit_status1.snapshot != visit_status2.snapshot | assert visit_status1.snapshot != visit_status2.snapshot | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats == { | assert stats == { | ||||
"content": 22, | "content": 22, | ||||
"directory": 28, | "directory": 28, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 2, | "origin_visit": 2, | ||||
"person": 2, | "person": 2, | ||||
"release": 0, | "release": 0, | ||||
"revision": 11, | "revision": 11, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 2, | "snapshot": 2, | ||||
} | } | ||||
expected_snapshot = { | check_snapshot(GOURMET_UPDATES_SNAPSHOT, loader.storage) | ||||
"id": GOURMET_UPDATES_SNAPSHOT, | |||||
"branches": { | |||||
b"HEAD": { | |||||
"target": hash_to_bytes("171dc35522bfd17dda4e90a542a0377fb2fc707a"), | |||||
"target_type": "revision", | |||||
} | |||||
}, | |||||
} | |||||
check_snapshot(expected_snapshot, loader.storage) | |||||
def test_loader_svn_visit_with_eol_style(swh_config, datadir, tmp_path): | def test_loader_svn_visit_with_eol_style(swh_config, datadir, tmp_path): | ||||
"""Check that a svn repo containing a versioned file with CRLF line | """Check that a svn repo containing a versioned file with CRLF line | ||||
endings with svn:eol-style property set to 'native' (this is a | endings with svn:eol-style property set to 'native' (this is a | ||||
violation of svn specification as the file should have been | violation of svn specification as the file should have been | ||||
stored with LF line endings) can be loaded anyway. | stored with LF line endings) can be loaded anyway. | ||||
""" | """ | ||||
archive_name = "mediawiki-repo-r407-eol-native-crlf" | archive_name = "mediawiki-repo-r407-eol-native-crlf" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = SvnLoader(repo_url) | loader = SvnLoader(repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
expected_snapshot = { | mediawiki_snapshot = Snapshot( | ||||
"id": MEDIAWIKI_SNAPSHOT, | id=hash_to_bytes("d6d6e9703f157c5702d9a4a5dec878926ed4ab76"), | ||||
"branches": { | branches={ | ||||
b"HEAD": { | b"HEAD": SnapshotBranch( | ||||
"target": hash_to_bytes("7da4975c363101b819756d33459f30a866d01b1b"), | target=hash_to_bytes("7da4975c363101b819756d33459f30a866d01b1b"), | ||||
"target_type": "revision", | target_type=TargetType.REVISION, | ||||
} | ) | ||||
}, | }, | ||||
} | ) | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(mediawiki_snapshot, loader.storage) | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_url, | repo_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=MEDIAWIKI_SNAPSHOT, | snapshot=mediawiki_snapshot.id, | ||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 | assert stats["origin"] == 1 | ||||
assert stats["origin_visit"] == 1 | assert stats["origin_visit"] == 1 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
def test_loader_svn_visit_with_mixed_crlf_lf(swh_config, datadir, tmp_path): | def test_loader_svn_visit_with_mixed_crlf_lf(swh_config, datadir, tmp_path): | ||||
"""Check that a svn repo containing a versioned file with mixed | """Check that a svn repo containing a versioned file with mixed | ||||
CRLF/LF line endings with svn:eol-style property set to 'native' | CRLF/LF line endings with svn:eol-style property set to 'native' | ||||
(this is a violation of svn specification as mixed line endings | (this is a violation of svn specification as mixed line endings | ||||
for textual content should not be stored when the svn:eol-style | for textual content should not be stored when the svn:eol-style | ||||
property is set) can be loaded anyway. | property is set) can be loaded anyway. | ||||
""" | """ | ||||
archive_name = "pyang-repo-r343-eol-native-mixed-lf-crlf" | archive_name = "pyang-repo-r343-eol-native-mixed-lf-crlf" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = SvnLoader(repo_url) | loader = SvnLoader(repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
expected_snapshot = { | pyang_snapshot = Snapshot( | ||||
"id": PYANG_SNAPSHOT, | id=hash_to_bytes("6d9590de11b00a5801de0ff3297c5b44bbbf7d24"), | ||||
"branches": { | branches={ | ||||
b"HEAD": { | b"HEAD": SnapshotBranch( | ||||
"target": hash_to_bytes("9c6962eeb9164a636c374be700672355e34a98a7"), | target=hash_to_bytes("9c6962eeb9164a636c374be700672355e34a98a7"), | ||||
"target_type": "revision", | target_type=TargetType.REVISION, | ||||
} | ) | ||||
}, | }, | ||||
} | ) | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(pyang_snapshot, loader.storage) | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, repo_url, status="full", type="svn", snapshot=PYANG_SNAPSHOT, | loader.storage, repo_url, status="full", type="svn", snapshot=pyang_snapshot.id, | ||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 | assert stats["origin"] == 1 | ||||
assert stats["origin_visit"] == 1 | assert stats["origin_visit"] == 1 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
def test_loader_svn_with_external_properties(swh_config, datadir, tmp_path): | def test_loader_svn_with_external_properties(swh_config, datadir, tmp_path): | ||||
"""Repository with svn:external properties cannot be fully ingested yet | """Repository with svn:external properties cannot be fully ingested yet | ||||
""" | """ | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, "pkg-gourmet-with-external-id.tgz") | archive_path = os.path.join(datadir, "pkg-gourmet-with-external-id.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = SvnLoader(repo_url) | loader = SvnLoader(repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
# repositoy holds 21 revisions, but the last commit holds an 'svn:externals' | gourmet_externals_snapshot = Snapshot( | ||||
# property which will make the loader-svn stops at the last revision prior to the | id=hash_to_bytes("19cb68d0a3f22372e2b7017ea5e2a2ea5ae3e09a"), | ||||
# bad one | branches={ | ||||
expected_snapshot = { | b"HEAD": SnapshotBranch( | ||||
"id": GOURMET_EXTERNALS_SNAPSHOT, | target=hash_to_bytes("82a7a4a09f9549223429143ba36ad77375e33c5c"), | ||||
"branches": { | target_type=TargetType.REVISION, | ||||
b"HEAD": { | ) | ||||
"target": hash_to_bytes("82a7a4a09f9549223429143ba36ad77375e33c5c"), | |||||
"target_type": "revision", | |||||
} | |||||
}, | }, | ||||
} | ) | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(gourmet_externals_snapshot, loader.storage) | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_url, | repo_url, | ||||
status="partial", | status="partial", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_EXTERNALS_SNAPSHOT, | snapshot=gourmet_externals_snapshot.id, | ||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 | assert stats["origin"] == 1 | ||||
assert stats["origin_visit"] == 1 | assert stats["origin_visit"] == 1 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
# repository holds 21 revisions, but the last commit holds an 'svn:externals' | |||||
# property which will make the loader-svn stops at the last revision prior to the | |||||
# bad one | |||||
assert stats["revision"] == 21 - 1 # commit with the svn:external property | assert stats["revision"] == 21 - 1 # commit with the svn:external property | ||||
def test_loader_svn_with_symlink(swh_config, datadir, tmp_path): | def test_loader_svn_with_symlink(swh_config, datadir, tmp_path): | ||||
"""Repository with symlinks should be ingested ok | """Repository with symlinks should be ingested ok | ||||
Edge case: | Edge case: | ||||
- first create a file and commit it. | - first create a file and commit it. | ||||
Remove it, then add folder holding the same name, commit. | Remove it, then add folder holding the same name, commit. | ||||
- do the same scenario with symbolic link (instead of file) | - do the same scenario with symbolic link (instead of file) | ||||
""" | """ | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join( | archive_path = os.path.join( | ||||
datadir, "pkg-gourmet-with-edge-case-links-and-files.tgz" | datadir, "pkg-gourmet-with-edge-case-links-and-files.tgz" | ||||
) | ) | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = SvnLoader(repo_url) | loader = SvnLoader(repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
expected_snapshot = { | gourmet_edge_cases_snapshot = Snapshot( | ||||
"id": GOURMET_EDGE_CASES_SNAPSHOT, | id=hash_to_bytes("18e60982fe521a2546ab8c3c73a535d80462d9d0"), | ||||
"branches": { | branches={ | ||||
b"HEAD": { | b"HEAD": SnapshotBranch( | ||||
"target": hash_to_bytes("3f43af2578fccf18b0d4198e48563da7929dc608"), | target=hash_to_bytes("3f43af2578fccf18b0d4198e48563da7929dc608"), | ||||
"target_type": "revision", | target_type=TargetType.REVISION, | ||||
} | ) | ||||
}, | }, | ||||
} | ) | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(gourmet_edge_cases_snapshot, loader.storage) | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_url, | repo_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_EDGE_CASES_SNAPSHOT, | snapshot=gourmet_edge_cases_snapshot.id, | ||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 | assert stats["origin"] == 1 | ||||
assert stats["origin_visit"] == 1 | assert stats["origin_visit"] == 1 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
assert stats["revision"] == 19 | assert stats["revision"] == 19 | ||||
def test_loader_svn_with_wrong_symlinks(swh_config, datadir, tmp_path): | def test_loader_svn_with_wrong_symlinks(swh_config, datadir, tmp_path): | ||||
"""Repository with wrong symlinks should be ingested ok nonetheless | """Repository with wrong symlinks should be ingested ok nonetheless | ||||
Edge case: | Edge case: | ||||
- wrong symbolic link | - wrong symbolic link | ||||
- wrong symbolic link with empty space names | - wrong symbolic link with empty space names | ||||
""" | """ | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, "pkg-gourmet-with-wrong-link-cases.tgz") | archive_path = os.path.join(datadir, "pkg-gourmet-with-wrong-link-cases.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = SvnLoader(repo_url) | loader = SvnLoader(repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
expected_snapshot = { | gourmet_wrong_links_snapshot = Snapshot( | ||||
"id": GOURMET_WRONG_LINKS_SNAPSHOT, | id=hash_to_bytes("b17f38acabb90f066dedd30c29f01a02af88a5c4"), | ||||
"branches": { | branches={ | ||||
b"HEAD": { | b"HEAD": SnapshotBranch( | ||||
"target": hash_to_bytes("cf30d3bb9d5967d0a2bbeacc405f10a5dd9b138a"), | target=hash_to_bytes("cf30d3bb9d5967d0a2bbeacc405f10a5dd9b138a"), | ||||
"target_type": "revision", | target_type=TargetType.REVISION, | ||||
} | ) | ||||
}, | }, | ||||
} | ) | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(gourmet_wrong_links_snapshot, loader.storage) | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_url, | repo_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_WRONG_LINKS_SNAPSHOT, | snapshot=gourmet_wrong_links_snapshot.id, | ||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 | assert stats["origin"] == 1 | ||||
assert stats["origin_visit"] == 1 | assert stats["origin_visit"] == 1 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
assert stats["revision"] == 21 | assert stats["revision"] == 21 | ||||
Show All 12 Lines | def test_loader_svn_loader_from_dump_archive(swh_config, datadir, tmp_path): | ||||
loaderFromDump = SvnLoaderFromRemoteDump(repo_url) | loaderFromDump = SvnLoaderFromRemoteDump(repo_url) | ||||
assert loaderFromDump.load() == {"status": "eventful"} | assert loaderFromDump.load() == {"status": "eventful"} | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loaderFromDump.storage, | loaderFromDump.storage, | ||||
repo_url, | repo_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_SNAPSHOT, | snapshot=GOURMET_SNAPSHOT.id, | ||||
) | ) | ||||
origin_url = repo_url + "2" # rename to another origin | origin_url = repo_url + "2" # rename to another origin | ||||
loader = SvnLoader(repo_url, origin_url=origin_url) | loader = SvnLoader(repo_url, origin_url=origin_url) | ||||
assert loader.load() == {"status": "eventful"} # because are working on new origin | assert loader.load() == {"status": "eventful"} # because are working on new origin | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, origin_url, status="full", type="svn", snapshot=GOURMET_SNAPSHOT | loader.storage, | ||||
origin_url, | |||||
status="full", | |||||
type="svn", | |||||
snapshot=GOURMET_SNAPSHOT.id, | |||||
) | ) | ||||
expected_snapshot = { | check_snapshot(GOURMET_SNAPSHOT, loader.storage) | ||||
"id": GOURMET_SNAPSHOT, | |||||
"branches": { | |||||
b"HEAD": { | |||||
"target": hash_to_bytes("4876cb10aec6f708f7466dddf547567b65f6c39c"), | |||||
"target_type": "revision", | |||||
} | |||||
}, | |||||
} | |||||
check_snapshot(expected_snapshot, loader.storage) | |||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 2 # created one more origin | assert stats["origin"] == 2 # created one more origin | ||||
assert stats["origin_visit"] == 2 | assert stats["origin_visit"] == 2 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
loader = SvnLoader(repo_url) # no change on the origin-url | loader = SvnLoader(repo_url) # no change on the origin-url | ||||
assert loader.load() == {"status": "uneventful"} | assert loader.load() == {"status": "uneventful"} | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, origin_url, status="full", type="svn", snapshot=GOURMET_SNAPSHOT | loader.storage, | ||||
origin_url, | |||||
status="full", | |||||
type="svn", | |||||
snapshot=GOURMET_SNAPSHOT.id, | |||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 2 | assert stats["origin"] == 2 | ||||
assert stats["origin_visit"] == 3 | assert stats["origin_visit"] == 3 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
def test_loader_user_defined_svn_properties(swh_config, datadir, tmp_path): | def test_loader_user_defined_svn_properties(swh_config, datadir, tmp_path): | ||||
"""Edge cases: The repository held some user defined svn-properties with special | """Edge cases: The repository held some user defined svn-properties with special | ||||
encodings, this prevented the repository from being loaded even though we do not | encodings, this prevented the repository from being loaded even though we do not | ||||
ingest those information. | ingest those information. | ||||
""" | """ | ||||
archive_name = "httthttt" | archive_name = "httthttt" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = SvnLoader(repo_url) | loader = SvnLoader(repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
expected_snapshot_id = hash_to_bytes("70487267f682c07e52a2371061369b6cf5bffa47") | expected_snapshot = Snapshot( | ||||
expected_snapshot = { | id=hash_to_bytes("70487267f682c07e52a2371061369b6cf5bffa47"), | ||||
"id": expected_snapshot_id, | branches={ | ||||
"branches": { | b"HEAD": SnapshotBranch( | ||||
b"HEAD": { | target=hash_to_bytes("604a17dbb15e8d7ecb3e9f3768d09bf493667a93"), | ||||
"target": hash_to_bytes("604a17dbb15e8d7ecb3e9f3768d09bf493667a93"), | target_type=TargetType.REVISION, | ||||
"target_type": "revision", | ) | ||||
} | |||||
}, | }, | ||||
} | ) | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(expected_snapshot, loader.storage) | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_url, | repo_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=expected_snapshot_id, | snapshot=expected_snapshot.id, | ||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 | assert stats["origin"] == 1 | ||||
assert stats["origin_visit"] == 1 | assert stats["origin_visit"] == 1 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
assert stats["revision"] == 7 | assert stats["revision"] == 7 |