Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/svn/tests/test_loader.py
Show First 20 Lines • Show All 86 Lines • ▼ Show 20 Lines | def test_loader_svn_failures(swh_storage, tmp_path, exception, mocker): | ||||
assert loader.load() == {"status": "failed"} | assert loader.load() == {"status": "failed"} | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
swh_storage, existing_repo_url, status="failed", type="svn", | swh_storage, existing_repo_url, status="failed", type="svn", | ||||
) | ) | ||||
def test_loader_svnrdump_not_found(swh_storage, tmp_path, mocker): | |||||
"""Given any errors raised, the loader visit ends up in status failed""" | |||||
unknown_repo_url = "file:///tmp/svn.code.sf.net/p/white-rats-studios/svn" | |||||
# mock = mocker.patch("swh.loader.svn.loader.SvnRepo") | |||||
# mock.side_effect = exception | |||||
anlambert: To remove I guess. | |||||
ardumontAuthorUnsubmitted Done Inline Actionsyes, it's done locally, i forgot to update the diff. ardumont: yes, it's done locally, i forgot to update the diff. | |||||
# existing_repo_url = "existing-repo-url" | |||||
loader = SvnLoaderFromRemoteDump( | |||||
swh_storage, unknown_repo_url, destination_path=tmp_path | |||||
) | |||||
assert loader.load() == {"status": "uneventful"} | |||||
assert_last_visit_matches( | |||||
swh_storage, unknown_repo_url, status="not_found", type="svn", | |||||
) | |||||
def test_loader_svn_new_visit(swh_storage, datadir, tmp_path): | def test_loader_svn_new_visit(swh_storage, datadir, tmp_path): | ||||
"""Eventful visit should yield 1 snapshot""" | """Eventful visit should yield 1 snapshot""" | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | unknown_repo_url = prepare_repository_from_archive( | ||||
archive_path, archive_name, tmp_path | |||||
) | |||||
loader = SvnLoader(swh_storage, repo_url, destination_path=tmp_path) | loader = SvnLoader(swh_storage, unknown_repo_url, destination_path=tmp_path) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_url, | unknown_repo_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_SNAPSHOT.id, | snapshot=GOURMET_SNAPSHOT.id, | ||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats == { | assert stats == { | ||||
"content": 19, | "content": 19, | ||||
Show All 10 Lines | |||||
def test_loader_svn_2_visits_no_change(swh_storage, datadir, tmp_path): | def test_loader_svn_2_visits_no_change(swh_storage, datadir, tmp_path): | ||||
"""Visit multiple times a repository with no change should yield the same snapshot | """Visit multiple times a repository with no change should yield the same snapshot | ||||
""" | """ | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | unknown_repo_url = prepare_repository_from_archive( | ||||
anlambertUnsubmitted Done Inline ActionsNot sure if that variable should have been renamed in all other tests. anlambert: Not sure if that variable should have been renamed in all other tests. | |||||
ardumontAuthorUnsubmitted Done Inline Actionsoh no, right ;) ardumont: oh no, right ;) | |||||
archive_path, archive_name, tmp_path | |||||
) | |||||
loader = SvnLoader(swh_storage, repo_url) | loader = SvnLoader(swh_storage, unknown_repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
visit_status1 = assert_last_visit_matches( | visit_status1 = assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_url, | unknown_repo_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_SNAPSHOT.id, | snapshot=GOURMET_SNAPSHOT.id, | ||||
) | ) | ||||
assert loader.load() == {"status": "uneventful"} | assert loader.load() == {"status": "uneventful"} | ||||
visit_status2 = assert_last_visit_matches( | visit_status2 = assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_url, | unknown_repo_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_SNAPSHOT.id, | snapshot=GOURMET_SNAPSHOT.id, | ||||
) | ) | ||||
assert visit_status1.date < visit_status2.date | assert visit_status1.date < visit_status2.date | ||||
assert visit_status1.snapshot == visit_status2.snapshot | assert visit_status1.snapshot == visit_status2.snapshot | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin_visit"] == 1 + 1 # computed twice the same snapshot | assert stats["origin_visit"] == 1 + 1 # computed twice the same snapshot | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
# even starting from previous revision... | # even starting from previous revision... | ||||
start_revision = loader.storage.revision_get( | start_revision = loader.storage.revision_get( | ||||
[hash_to_bytes("95edacc8848369d6fb1608e887d6d2474fd5224f")] | [hash_to_bytes("95edacc8848369d6fb1608e887d6d2474fd5224f")] | ||||
)[0] | )[0] | ||||
assert start_revision is not None | assert start_revision is not None | ||||
loader = SvnLoader(swh_storage, repo_url, swh_revision=start_revision) | loader = SvnLoader(swh_storage, unknown_repo_url, swh_revision=start_revision) | ||||
assert loader.load() == {"status": "uneventful"} | assert loader.load() == {"status": "uneventful"} | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin_visit"] == 2 + 1 | assert stats["origin_visit"] == 2 + 1 | ||||
# ... with no change in repository, this yields the same snapshot | # ... with no change in repository, this yields the same snapshot | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_url, | unknown_repo_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_SNAPSHOT.id, | snapshot=GOURMET_SNAPSHOT.id, | ||||
) | ) | ||||
def test_loader_tampered_repository(swh_storage, datadir, tmp_path): | def test_loader_tampered_repository(swh_storage, datadir, tmp_path): | ||||
"""In this scenario, the dump has been tampered with to modify the | """In this scenario, the dump has been tampered with to modify the | ||||
Show All 9 Lines | ``` | ||||
cd pkg-gourmet/ | cd pkg-gourmet/ | ||||
echo "Tampering with commit log message for fun and profit" > log.txt | echo "Tampering with commit log message for fun and profit" > log.txt | ||||
svnadmin setlog . -r 6 log.txt --bypass-hooks | svnadmin setlog . -r 6 log.txt --bypass-hooks | ||||
tar cvf pkg-gourmet-tampered-rev6-log.tgz pkg-gourmet/ | tar cvf pkg-gourmet-tampered-rev6-log.tgz pkg-gourmet/ | ||||
``` | ``` | ||||
""" | """ | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | unknown_repo_url = prepare_repository_from_archive( | ||||
archive_path, archive_name, tmp_path | |||||
) | |||||
loader = SvnLoader(swh_storage, repo_url) | loader = SvnLoader(swh_storage, unknown_repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
check_snapshot(GOURMET_SNAPSHOT, loader.storage) | check_snapshot(GOURMET_SNAPSHOT, loader.storage) | ||||
archive_path2 = os.path.join(datadir, "pkg-gourmet-tampered-rev6-log.tgz") | archive_path2 = os.path.join(datadir, "pkg-gourmet-tampered-rev6-log.tgz") | ||||
repo_tampered_url = prepare_repository_from_archive( | repo_tampered_url = prepare_repository_from_archive( | ||||
archive_path2, archive_name, tmp_path | archive_path2, archive_name, tmp_path | ||||
) | ) | ||||
loader2 = SvnLoader(swh_storage, repo_tampered_url, origin_url=repo_url) | loader2 = SvnLoader(swh_storage, repo_tampered_url, origin_url=unknown_repo_url) | ||||
assert loader2.load() == {"status": "failed"} | assert loader2.load() == {"status": "failed"} | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader2.storage, repo_url, status="failed", type="svn", snapshot=None, | loader2.storage, unknown_repo_url, status="failed", type="svn", snapshot=None, | ||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 | assert stats["origin"] == 1 | ||||
assert stats["origin_visit"] == 2 | assert stats["origin_visit"] == 2 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
▲ Show 20 Lines • Show All 154 Lines • ▼ Show 20 Lines | def test_loader_svn_visit_with_eol_style(swh_storage, datadir, tmp_path): | ||||
"""Check that a svn repo containing a versioned file with CRLF line | """Check that a svn repo containing a versioned file with CRLF line | ||||
endings with svn:eol-style property set to 'native' (this is a | endings with svn:eol-style property set to 'native' (this is a | ||||
violation of svn specification as the file should have been | violation of svn specification as the file should have been | ||||
stored with LF line endings) can be loaded anyway. | stored with LF line endings) can be loaded anyway. | ||||
""" | """ | ||||
archive_name = "mediawiki-repo-r407-eol-native-crlf" | archive_name = "mediawiki-repo-r407-eol-native-crlf" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | unknown_repo_url = prepare_repository_from_archive( | ||||
archive_path, archive_name, tmp_path | |||||
) | |||||
loader = SvnLoader(swh_storage, repo_url) | loader = SvnLoader(swh_storage, unknown_repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
mediawiki_snapshot = Snapshot( | mediawiki_snapshot = Snapshot( | ||||
id=hash_to_bytes("d6d6e9703f157c5702d9a4a5dec878926ed4ab76"), | id=hash_to_bytes("d6d6e9703f157c5702d9a4a5dec878926ed4ab76"), | ||||
branches={ | branches={ | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch( | ||||
target=hash_to_bytes("7da4975c363101b819756d33459f30a866d01b1b"), | target=hash_to_bytes("7da4975c363101b819756d33459f30a866d01b1b"), | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
) | ) | ||||
}, | }, | ||||
) | ) | ||||
check_snapshot(mediawiki_snapshot, loader.storage) | check_snapshot(mediawiki_snapshot, loader.storage) | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_url, | unknown_repo_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=mediawiki_snapshot.id, | snapshot=mediawiki_snapshot.id, | ||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 | assert stats["origin"] == 1 | ||||
assert stats["origin_visit"] == 1 | assert stats["origin_visit"] == 1 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
def test_loader_svn_visit_with_mixed_crlf_lf(swh_storage, datadir, tmp_path): | def test_loader_svn_visit_with_mixed_crlf_lf(swh_storage, datadir, tmp_path): | ||||
"""Check that a svn repo containing a versioned file with mixed | """Check that a svn repo containing a versioned file with mixed | ||||
CRLF/LF line endings with svn:eol-style property set to 'native' | CRLF/LF line endings with svn:eol-style property set to 'native' | ||||
(this is a violation of svn specification as mixed line endings | (this is a violation of svn specification as mixed line endings | ||||
for textual content should not be stored when the svn:eol-style | for textual content should not be stored when the svn:eol-style | ||||
property is set) can be loaded anyway. | property is set) can be loaded anyway. | ||||
""" | """ | ||||
archive_name = "pyang-repo-r343-eol-native-mixed-lf-crlf" | archive_name = "pyang-repo-r343-eol-native-mixed-lf-crlf" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | unknown_repo_url = prepare_repository_from_archive( | ||||
archive_path, archive_name, tmp_path | |||||
) | |||||
loader = SvnLoader(swh_storage, repo_url) | loader = SvnLoader(swh_storage, unknown_repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
pyang_snapshot = Snapshot( | pyang_snapshot = Snapshot( | ||||
id=hash_to_bytes("6d9590de11b00a5801de0ff3297c5b44bbbf7d24"), | id=hash_to_bytes("6d9590de11b00a5801de0ff3297c5b44bbbf7d24"), | ||||
branches={ | branches={ | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch( | ||||
target=hash_to_bytes("9c6962eeb9164a636c374be700672355e34a98a7"), | target=hash_to_bytes("9c6962eeb9164a636c374be700672355e34a98a7"), | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
) | ) | ||||
}, | }, | ||||
) | ) | ||||
check_snapshot(pyang_snapshot, loader.storage) | check_snapshot(pyang_snapshot, loader.storage) | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, repo_url, status="full", type="svn", snapshot=pyang_snapshot.id, | loader.storage, | ||||
unknown_repo_url, | |||||
status="full", | |||||
type="svn", | |||||
snapshot=pyang_snapshot.id, | |||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 | assert stats["origin"] == 1 | ||||
assert stats["origin_visit"] == 1 | assert stats["origin_visit"] == 1 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
def test_loader_svn_with_external_properties(swh_storage, datadir, tmp_path): | def test_loader_svn_with_external_properties(swh_storage, datadir, tmp_path): | ||||
"""Repository with svn:external properties cannot be fully ingested yet | """Repository with svn:external properties cannot be fully ingested yet | ||||
""" | """ | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, "pkg-gourmet-with-external-id.tgz") | archive_path = os.path.join(datadir, "pkg-gourmet-with-external-id.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | unknown_repo_url = prepare_repository_from_archive( | ||||
archive_path, archive_name, tmp_path | |||||
) | |||||
loader = SvnLoader(swh_storage, repo_url) | loader = SvnLoader(swh_storage, unknown_repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
gourmet_externals_snapshot = Snapshot( | gourmet_externals_snapshot = Snapshot( | ||||
id=hash_to_bytes("19cb68d0a3f22372e2b7017ea5e2a2ea5ae3e09a"), | id=hash_to_bytes("19cb68d0a3f22372e2b7017ea5e2a2ea5ae3e09a"), | ||||
branches={ | branches={ | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch( | ||||
target=hash_to_bytes("82a7a4a09f9549223429143ba36ad77375e33c5c"), | target=hash_to_bytes("82a7a4a09f9549223429143ba36ad77375e33c5c"), | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
) | ) | ||||
}, | }, | ||||
) | ) | ||||
check_snapshot(gourmet_externals_snapshot, loader.storage) | check_snapshot(gourmet_externals_snapshot, loader.storage) | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_url, | unknown_repo_url, | ||||
status="partial", | status="partial", | ||||
type="svn", | type="svn", | ||||
snapshot=gourmet_externals_snapshot.id, | snapshot=gourmet_externals_snapshot.id, | ||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 | assert stats["origin"] == 1 | ||||
assert stats["origin_visit"] == 1 | assert stats["origin_visit"] == 1 | ||||
Show All 12 Lines | Edge case: | ||||
Remove it, then add folder holding the same name, commit. | Remove it, then add folder holding the same name, commit. | ||||
- do the same scenario with symbolic link (instead of file) | - do the same scenario with symbolic link (instead of file) | ||||
""" | """ | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join( | archive_path = os.path.join( | ||||
datadir, "pkg-gourmet-with-edge-case-links-and-files.tgz" | datadir, "pkg-gourmet-with-edge-case-links-and-files.tgz" | ||||
) | ) | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | unknown_repo_url = prepare_repository_from_archive( | ||||
archive_path, archive_name, tmp_path | |||||
) | |||||
loader = SvnLoader(swh_storage, repo_url) | loader = SvnLoader(swh_storage, unknown_repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
gourmet_edge_cases_snapshot = Snapshot( | gourmet_edge_cases_snapshot = Snapshot( | ||||
id=hash_to_bytes("18e60982fe521a2546ab8c3c73a535d80462d9d0"), | id=hash_to_bytes("18e60982fe521a2546ab8c3c73a535d80462d9d0"), | ||||
branches={ | branches={ | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch( | ||||
target=hash_to_bytes("3f43af2578fccf18b0d4198e48563da7929dc608"), | target=hash_to_bytes("3f43af2578fccf18b0d4198e48563da7929dc608"), | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
) | ) | ||||
}, | }, | ||||
) | ) | ||||
check_snapshot(gourmet_edge_cases_snapshot, loader.storage) | check_snapshot(gourmet_edge_cases_snapshot, loader.storage) | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_url, | unknown_repo_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=gourmet_edge_cases_snapshot.id, | snapshot=gourmet_edge_cases_snapshot.id, | ||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 | assert stats["origin"] == 1 | ||||
assert stats["origin_visit"] == 1 | assert stats["origin_visit"] == 1 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
assert stats["revision"] == 19 | assert stats["revision"] == 19 | ||||
def test_loader_svn_with_wrong_symlinks(swh_storage, datadir, tmp_path): | def test_loader_svn_with_wrong_symlinks(swh_storage, datadir, tmp_path): | ||||
"""Repository with wrong symlinks should be ingested ok nonetheless | """Repository with wrong symlinks should be ingested ok nonetheless | ||||
Edge case: | Edge case: | ||||
- wrong symbolic link | - wrong symbolic link | ||||
- wrong symbolic link with empty space names | - wrong symbolic link with empty space names | ||||
""" | """ | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, "pkg-gourmet-with-wrong-link-cases.tgz") | archive_path = os.path.join(datadir, "pkg-gourmet-with-wrong-link-cases.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | unknown_repo_url = prepare_repository_from_archive( | ||||
archive_path, archive_name, tmp_path | |||||
) | |||||
loader = SvnLoader(swh_storage, repo_url) | loader = SvnLoader(swh_storage, unknown_repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
gourmet_wrong_links_snapshot = Snapshot( | gourmet_wrong_links_snapshot = Snapshot( | ||||
id=hash_to_bytes("b17f38acabb90f066dedd30c29f01a02af88a5c4"), | id=hash_to_bytes("b17f38acabb90f066dedd30c29f01a02af88a5c4"), | ||||
branches={ | branches={ | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch( | ||||
target=hash_to_bytes("cf30d3bb9d5967d0a2bbeacc405f10a5dd9b138a"), | target=hash_to_bytes("cf30d3bb9d5967d0a2bbeacc405f10a5dd9b138a"), | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
) | ) | ||||
}, | }, | ||||
) | ) | ||||
check_snapshot(gourmet_wrong_links_snapshot, loader.storage) | check_snapshot(gourmet_wrong_links_snapshot, loader.storage) | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_url, | unknown_repo_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=gourmet_wrong_links_snapshot.id, | snapshot=gourmet_wrong_links_snapshot.id, | ||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 | assert stats["origin"] == 1 | ||||
assert stats["origin_visit"] == 1 | assert stats["origin_visit"] == 1 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
assert stats["revision"] == 21 | assert stats["revision"] == 21 | ||||
def test_loader_svn_loader_from_dump_archive(swh_storage, datadir, tmp_path): | def test_loader_svn_loader_from_dump_archive(swh_storage, datadir, tmp_path): | ||||
"""Repository with wrong symlinks should be ingested ok nonetheless | """Repository with wrong symlinks should be ingested ok nonetheless | ||||
Edge case: | Edge case: | ||||
- wrong symbolic link | - wrong symbolic link | ||||
- wrong symbolic link with empty space names | - wrong symbolic link with empty space names | ||||
""" | """ | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | unknown_repo_url = prepare_repository_from_archive( | ||||
archive_path, archive_name, tmp_path | |||||
) | |||||
loaderFromDump = SvnLoaderFromRemoteDump(swh_storage, repo_url) | loaderFromDump = SvnLoaderFromRemoteDump(swh_storage, unknown_repo_url) | ||||
assert loaderFromDump.load() == {"status": "eventful"} | assert loaderFromDump.load() == {"status": "eventful"} | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loaderFromDump.storage, | loaderFromDump.storage, | ||||
repo_url, | unknown_repo_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_SNAPSHOT.id, | snapshot=GOURMET_SNAPSHOT.id, | ||||
) | ) | ||||
origin_url = repo_url + "2" # rename to another origin | origin_url = unknown_repo_url + "2" # rename to another origin | ||||
loader = SvnLoader(swh_storage, repo_url, origin_url=origin_url) | loader = SvnLoader(swh_storage, unknown_repo_url, origin_url=origin_url) | ||||
assert loader.load() == {"status": "eventful"} # because are working on new origin | assert loader.load() == {"status": "eventful"} # because are working on new origin | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
origin_url, | origin_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_SNAPSHOT.id, | snapshot=GOURMET_SNAPSHOT.id, | ||||
) | ) | ||||
check_snapshot(GOURMET_SNAPSHOT, loader.storage) | check_snapshot(GOURMET_SNAPSHOT, loader.storage) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 2 # created one more origin | assert stats["origin"] == 2 # created one more origin | ||||
assert stats["origin_visit"] == 2 | assert stats["origin_visit"] == 2 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
loader = SvnLoader(swh_storage, repo_url) # no change on the origin-url | loader = SvnLoader(swh_storage, unknown_repo_url) # no change on the origin-url | ||||
assert loader.load() == {"status": "uneventful"} | assert loader.load() == {"status": "uneventful"} | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
origin_url, | origin_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_SNAPSHOT.id, | snapshot=GOURMET_SNAPSHOT.id, | ||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 2 | assert stats["origin"] == 2 | ||||
assert stats["origin_visit"] == 3 | assert stats["origin_visit"] == 3 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
# second visit from the dump should be uneventful | # second visit from the dump should be uneventful | ||||
loaderFromDump = SvnLoaderFromRemoteDump(swh_storage, repo_url) | loaderFromDump = SvnLoaderFromRemoteDump(swh_storage, unknown_repo_url) | ||||
assert loaderFromDump.load() == {"status": "uneventful"} | assert loaderFromDump.load() == {"status": "uneventful"} | ||||
def test_loader_user_defined_svn_properties(swh_storage, datadir, tmp_path): | def test_loader_user_defined_svn_properties(swh_storage, datadir, tmp_path): | ||||
"""Edge cases: The repository held some user defined svn-properties with special | """Edge cases: The repository held some user defined svn-properties with special | ||||
encodings, this prevented the repository from being loaded even though we do not | encodings, this prevented the repository from being loaded even though we do not | ||||
ingest those information. | ingest those information. | ||||
""" | """ | ||||
archive_name = "httthttt" | archive_name = "httthttt" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | unknown_repo_url = prepare_repository_from_archive( | ||||
archive_path, archive_name, tmp_path | |||||
) | |||||
loader = SvnLoader(swh_storage, repo_url) | loader = SvnLoader(swh_storage, unknown_repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
expected_snapshot = Snapshot( | expected_snapshot = Snapshot( | ||||
id=hash_to_bytes("70487267f682c07e52a2371061369b6cf5bffa47"), | id=hash_to_bytes("70487267f682c07e52a2371061369b6cf5bffa47"), | ||||
branches={ | branches={ | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch( | ||||
target=hash_to_bytes("604a17dbb15e8d7ecb3e9f3768d09bf493667a93"), | target=hash_to_bytes("604a17dbb15e8d7ecb3e9f3768d09bf493667a93"), | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
) | ) | ||||
}, | }, | ||||
) | ) | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(expected_snapshot, loader.storage) | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_url, | unknown_repo_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=expected_snapshot.id, | snapshot=expected_snapshot.id, | ||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 | assert stats["origin"] == 1 | ||||
assert stats["origin_visit"] == 1 | assert stats["origin_visit"] == 1 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
assert stats["revision"] == 7 | assert stats["revision"] == 7 | ||||
def test_loader_svn_dir_added_then_removed(swh_storage, datadir, tmp_path): | def test_loader_svn_dir_added_then_removed(swh_storage, datadir, tmp_path): | ||||
"""Loader should handle directory removal when processing a commit""" | """Loader should handle directory removal when processing a commit""" | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, f"{archive_name}-add-remove-dir.tgz") | archive_path = os.path.join(datadir, f"{archive_name}-add-remove-dir.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | unknown_repo_url = prepare_repository_from_archive( | ||||
archive_path, archive_name, tmp_path | |||||
) | |||||
loader = SvnLoader(swh_storage, repo_url, destination_path=tmp_path) | loader = SvnLoader(swh_storage, unknown_repo_url, destination_path=tmp_path) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
assert loader.visit_status() == "full" | assert loader.visit_status() == "full" |
To remove I guess.