Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/svn/tests/test_loader.py
Show All 33 Lines | branches={ | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch( | ||||
target=hash_to_bytes("171dc35522bfd17dda4e90a542a0377fb2fc707a"), | target=hash_to_bytes("171dc35522bfd17dda4e90a542a0377fb2fc707a"), | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
) | ) | ||||
}, | }, | ||||
) | ) | ||||
def test_loader_svn_not_found_no_mock(swh_config, tmp_path): | def test_loader_svn_not_found_no_mock(swh_storage, tmp_path): | ||||
"""Given an unknown repository, the loader visit ends up in status not_found""" | """Given an unknown repository, the loader visit ends up in status not_found""" | ||||
unknown_repo_url = "unknown-repository" | unknown_repo_url = "unknown-repository" | ||||
loader = SvnLoader(unknown_repo_url, destination_path=tmp_path) | loader = SvnLoader(swh_storage, unknown_repo_url, destination_path=tmp_path) | ||||
assert loader.load() == {"status": "uneventful"} | assert loader.load() == {"status": "uneventful"} | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, unknown_repo_url, status="not_found", type="svn", | swh_storage, unknown_repo_url, status="not_found", type="svn", | ||||
) | ) | ||||
@pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
"exception_msg", ["Unable to connect to a repository at URL", "Unknown URL type",] | "exception_msg", ["Unable to connect to a repository at URL", "Unknown URL type",] | ||||
) | ) | ||||
def test_loader_svn_not_found(swh_config, tmp_path, exception_msg, mocker): | def test_loader_svn_not_found(swh_storage, tmp_path, exception_msg, mocker): | ||||
"""Given unknown repository issues, the loader visit ends up in status not_found""" | """Given unknown repository issues, the loader visit ends up in status not_found""" | ||||
mock = mocker.patch("swh.loader.svn.loader.SvnRepo") | mock = mocker.patch("swh.loader.svn.loader.SvnRepo") | ||||
mock.side_effect = SubversionException(exception_msg, 0) | mock.side_effect = SubversionException(exception_msg, 0) | ||||
unknown_repo_url = "unknown-repository" | unknown_repo_url = "unknown-repository" | ||||
loader = SvnLoader(unknown_repo_url, destination_path=tmp_path) | loader = SvnLoader(swh_storage, unknown_repo_url, destination_path=tmp_path) | ||||
assert loader.load() == {"status": "uneventful"} | assert loader.load() == {"status": "uneventful"} | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, unknown_repo_url, status="not_found", type="svn", | swh_storage, unknown_repo_url, status="not_found", type="svn", | ||||
) | ) | ||||
@pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
"exception", | "exception", | ||||
[ | [ | ||||
SubversionException("Irrelevant message, considered a failure", 10), | SubversionException("Irrelevant message, considered a failure", 10), | ||||
SubversionException("Present but fails to read, considered a failure", 20), | SubversionException("Present but fails to read, considered a failure", 20), | ||||
ValueError("considered a failure"), | ValueError("considered a failure"), | ||||
], | ], | ||||
) | ) | ||||
def test_loader_svn_failures(swh_config, tmp_path, exception, mocker): | def test_loader_svn_failures(swh_storage, tmp_path, exception, mocker): | ||||
"""Given any errors raised, the loader visit ends up in status failed""" | """Given any errors raised, the loader visit ends up in status failed""" | ||||
mock = mocker.patch("swh.loader.svn.loader.SvnRepo") | mock = mocker.patch("swh.loader.svn.loader.SvnRepo") | ||||
mock.side_effect = exception | mock.side_effect = exception | ||||
existing_repo_url = "existing-repo-url" | existing_repo_url = "existing-repo-url" | ||||
loader = SvnLoader(existing_repo_url, destination_path=tmp_path) | loader = SvnLoader(swh_storage, existing_repo_url, destination_path=tmp_path) | ||||
assert loader.load() == {"status": "failed"} | assert loader.load() == {"status": "failed"} | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, existing_repo_url, status="failed", type="svn", | swh_storage, existing_repo_url, status="failed", type="svn", | ||||
) | ) | ||||
def test_loader_svn_new_visit(swh_config, datadir, tmp_path): | def test_loader_svn_new_visit(swh_storage, datadir, tmp_path): | ||||
"""Eventful visit should yield 1 snapshot""" | """Eventful visit should yield 1 snapshot""" | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = SvnLoader(repo_url, destination_path=tmp_path) | loader = SvnLoader(swh_storage, repo_url, destination_path=tmp_path) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_url, | repo_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
Show All 10 Lines | assert stats == { | ||||
"revision": 6, | "revision": 6, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} | } | ||||
check_snapshot(GOURMET_SNAPSHOT, loader.storage) | check_snapshot(GOURMET_SNAPSHOT, loader.storage) | ||||
def test_loader_svn_2_visits_no_change(swh_config, datadir, tmp_path): | def test_loader_svn_2_visits_no_change(swh_storage, datadir, tmp_path): | ||||
"""Visit multiple times a repository with no change should yield the same snapshot | """Visit multiple times a repository with no change should yield the same snapshot | ||||
""" | """ | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = SvnLoader(repo_url) | loader = SvnLoader(swh_storage, repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
visit_status1 = assert_last_visit_matches( | visit_status1 = assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_url, | repo_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_SNAPSHOT.id, | snapshot=GOURMET_SNAPSHOT.id, | ||||
Show All 16 Lines | def test_loader_svn_2_visits_no_change(swh_storage, datadir, tmp_path): | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
# even starting from previous revision... | # even starting from previous revision... | ||||
start_revision = loader.storage.revision_get( | start_revision = loader.storage.revision_get( | ||||
[hash_to_bytes("95edacc8848369d6fb1608e887d6d2474fd5224f")] | [hash_to_bytes("95edacc8848369d6fb1608e887d6d2474fd5224f")] | ||||
)[0] | )[0] | ||||
assert start_revision is not None | assert start_revision is not None | ||||
loader = SvnLoader(repo_url, swh_revision=start_revision) | loader = SvnLoader(swh_storage, repo_url, swh_revision=start_revision) | ||||
assert loader.load() == {"status": "uneventful"} | assert loader.load() == {"status": "uneventful"} | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin_visit"] == 2 + 1 | assert stats["origin_visit"] == 2 + 1 | ||||
# ... with no change in repository, this yields the same snapshot | # ... with no change in repository, this yields the same snapshot | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_url, | repo_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_SNAPSHOT.id, | snapshot=GOURMET_SNAPSHOT.id, | ||||
) | ) | ||||
def test_loader_tampered_repository(swh_config, datadir, tmp_path): | def test_loader_tampered_repository(swh_storage, datadir, tmp_path): | ||||
"""In this scenario, the dump has been tampered with to modify the | """In this scenario, the dump has been tampered with to modify the | ||||
commit log [1]. This results in a hash divergence which is | commit log [1]. This results in a hash divergence which is | ||||
detected at startup after a new run for the same origin. | detected at startup after a new run for the same origin. | ||||
In effect, that stops the loading and do nothing. | In effect, that stops the loading and do nothing. | ||||
[1] Tampering with revision 6 log message following: | [1] Tampering with revision 6 log message following: | ||||
``` | ``` | ||||
tar xvf pkg-gourmet.tgz # initial repository ingested | tar xvf pkg-gourmet.tgz # initial repository ingested | ||||
cd pkg-gourmet/ | cd pkg-gourmet/ | ||||
echo "Tampering with commit log message for fun and profit" > log.txt | echo "Tampering with commit log message for fun and profit" > log.txt | ||||
svnadmin setlog . -r 6 log.txt --bypass-hooks | svnadmin setlog . -r 6 log.txt --bypass-hooks | ||||
tar cvf pkg-gourmet-tampered-rev6-log.tgz pkg-gourmet/ | tar cvf pkg-gourmet-tampered-rev6-log.tgz pkg-gourmet/ | ||||
``` | ``` | ||||
""" | """ | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = SvnLoader(repo_url) | loader = SvnLoader(swh_storage, repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
check_snapshot(GOURMET_SNAPSHOT, loader.storage) | check_snapshot(GOURMET_SNAPSHOT, loader.storage) | ||||
archive_path2 = os.path.join(datadir, "pkg-gourmet-tampered-rev6-log.tgz") | archive_path2 = os.path.join(datadir, "pkg-gourmet-tampered-rev6-log.tgz") | ||||
repo_tampered_url = prepare_repository_from_archive( | repo_tampered_url = prepare_repository_from_archive( | ||||
archive_path2, archive_name, tmp_path | archive_path2, archive_name, tmp_path | ||||
) | ) | ||||
loader2 = SvnLoader(repo_tampered_url, origin_url=repo_url) | loader2 = SvnLoader(swh_storage, repo_tampered_url, origin_url=repo_url) | ||||
assert loader2.load() == {"status": "failed"} | assert loader2.load() == {"status": "failed"} | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader2.storage, repo_url, status="failed", type="svn", snapshot=None, | loader2.storage, repo_url, status="failed", type="svn", snapshot=None, | ||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 | assert stats["origin"] == 1 | ||||
assert stats["origin_visit"] == 2 | assert stats["origin_visit"] == 2 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
def test_loader_svn_visit_with_changes(swh_config, datadir, tmp_path): | def test_loader_svn_visit_with_changes(swh_storage, datadir, tmp_path): | ||||
"""In this scenario, the repository has been updated with new changes. | """In this scenario, the repository has been updated with new changes. | ||||
The loading visit should result in new objects stored and 1 new | The loading visit should result in new objects stored and 1 new | ||||
snapshot. | snapshot. | ||||
""" | """ | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_initial_url = prepare_repository_from_archive( | repo_initial_url = prepare_repository_from_archive( | ||||
archive_path, archive_name, tmp_path | archive_path, archive_name, tmp_path | ||||
) | ) | ||||
# repo_initial_url becomes the origin_url we want to visit some more below | # repo_initial_url becomes the origin_url we want to visit some more below | ||||
loader = SvnLoader(repo_initial_url) | loader = SvnLoader(swh_storage, repo_initial_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
visit_status1 = assert_last_visit_matches( | visit_status1 = assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_initial_url, | repo_initial_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_SNAPSHOT.id, | snapshot=GOURMET_SNAPSHOT.id, | ||||
) | ) | ||||
archive_path = os.path.join(datadir, "pkg-gourmet-with-updates.tgz") | archive_path = os.path.join(datadir, "pkg-gourmet-with-updates.tgz") | ||||
repo_updated_url = prepare_repository_from_archive( | repo_updated_url = prepare_repository_from_archive( | ||||
archive_path, "pkg-gourmet", tmp_path | archive_path, "pkg-gourmet", tmp_path | ||||
) | ) | ||||
loader = SvnLoader(repo_updated_url, origin_url=repo_initial_url,) | loader = SvnLoader(swh_storage, repo_updated_url, origin_url=repo_initial_url,) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
visit_status2 = assert_last_visit_matches( | visit_status2 = assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_updated_url, | repo_updated_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_UPDATES_SNAPSHOT.id, | snapshot=GOURMET_UPDATES_SNAPSHOT.id, | ||||
Show All 14 Lines | assert stats == { | ||||
"snapshot": 2, | "snapshot": 2, | ||||
} | } | ||||
check_snapshot(GOURMET_UPDATES_SNAPSHOT, loader.storage) | check_snapshot(GOURMET_UPDATES_SNAPSHOT, loader.storage) | ||||
# Start from scratch loading yields the same result | # Start from scratch loading yields the same result | ||||
loader = SvnLoader( | loader = SvnLoader( | ||||
repo_updated_url, origin_url=repo_initial_url, start_from_scratch=True | swh_storage, | ||||
repo_updated_url, | |||||
origin_url=repo_initial_url, | |||||
start_from_scratch=True, | |||||
) | ) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
visit_status3 = assert_last_visit_matches( | visit_status3 = assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_updated_url, | repo_updated_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_UPDATES_SNAPSHOT.id, | snapshot=GOURMET_UPDATES_SNAPSHOT.id, | ||||
) | ) | ||||
assert visit_status2.date < visit_status3.date | assert visit_status2.date < visit_status3.date | ||||
assert visit_status3.snapshot == visit_status2.snapshot | assert visit_status3.snapshot == visit_status2.snapshot | ||||
check_snapshot(GOURMET_UPDATES_SNAPSHOT, loader.storage) | check_snapshot(GOURMET_UPDATES_SNAPSHOT, loader.storage) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 # always the same visit | assert stats["origin"] == 1 # always the same visit | ||||
assert stats["origin_visit"] == 2 + 1 # 1 more visit | assert stats["origin_visit"] == 2 + 1 # 1 more visit | ||||
assert stats["snapshot"] == 2 # no new snapshot | assert stats["snapshot"] == 2 # no new snapshot | ||||
def test_loader_svn_visit_start_from_revision(swh_config, datadir, tmp_path): | def test_loader_svn_visit_start_from_revision(swh_storage, datadir, tmp_path): | ||||
"""Starting from existing revision, next visit on changed repo should yield 1 new | """Starting from existing revision, next visit on changed repo should yield 1 new | ||||
snapshot. | snapshot. | ||||
""" | """ | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_initial_url = prepare_repository_from_archive( | repo_initial_url = prepare_repository_from_archive( | ||||
archive_path, archive_name, tmp_path | archive_path, archive_name, tmp_path | ||||
) | ) | ||||
# repo_initial_url becomes the origin_url we want to visit some more below | # repo_initial_url becomes the origin_url we want to visit some more below | ||||
loader = SvnLoader(repo_initial_url) | loader = SvnLoader(swh_storage, repo_initial_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
visit_status1 = assert_last_visit_matches( | visit_status1 = assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_initial_url, | repo_initial_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_SNAPSHOT.id, | snapshot=GOURMET_SNAPSHOT.id, | ||||
) | ) | ||||
start_revision = loader.storage.revision_get( | start_revision = loader.storage.revision_get( | ||||
[hash_to_bytes("95edacc8848369d6fb1608e887d6d2474fd5224f")] | [hash_to_bytes("95edacc8848369d6fb1608e887d6d2474fd5224f")] | ||||
)[0] | )[0] | ||||
assert start_revision is not None | assert start_revision is not None | ||||
archive_path = os.path.join(datadir, "pkg-gourmet-with-updates.tgz") | archive_path = os.path.join(datadir, "pkg-gourmet-with-updates.tgz") | ||||
repo_updated_url = prepare_repository_from_archive( | repo_updated_url = prepare_repository_from_archive( | ||||
archive_path, "pkg-gourmet", tmp_path | archive_path, "pkg-gourmet", tmp_path | ||||
) | ) | ||||
# we'll start from start_revision | # we'll start from start_revision | ||||
loader = SvnLoader( | loader = SvnLoader( | ||||
repo_updated_url, origin_url=repo_initial_url, swh_revision=start_revision | swh_storage, | ||||
repo_updated_url, | |||||
origin_url=repo_initial_url, | |||||
swh_revision=start_revision, | |||||
) | ) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
# nonetheless, we obtain the same snapshot (as previous tests on that repository) | # nonetheless, we obtain the same snapshot (as previous tests on that repository) | ||||
visit_status2 = assert_last_visit_matches( | visit_status2 = assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_updated_url, | repo_updated_url, | ||||
Show All 15 Lines | assert stats == { | ||||
"revision": 11, | "revision": 11, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 2, | "snapshot": 2, | ||||
} | } | ||||
check_snapshot(GOURMET_UPDATES_SNAPSHOT, loader.storage) | check_snapshot(GOURMET_UPDATES_SNAPSHOT, loader.storage) | ||||
def test_loader_svn_visit_with_eol_style(swh_config, datadir, tmp_path): | def test_loader_svn_visit_with_eol_style(swh_storage, datadir, tmp_path): | ||||
"""Check that a svn repo containing a versioned file with CRLF line | """Check that a svn repo containing a versioned file with CRLF line | ||||
endings with svn:eol-style property set to 'native' (this is a | endings with svn:eol-style property set to 'native' (this is a | ||||
violation of svn specification as the file should have been | violation of svn specification as the file should have been | ||||
stored with LF line endings) can be loaded anyway. | stored with LF line endings) can be loaded anyway. | ||||
""" | """ | ||||
archive_name = "mediawiki-repo-r407-eol-native-crlf" | archive_name = "mediawiki-repo-r407-eol-native-crlf" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = SvnLoader(repo_url) | loader = SvnLoader(swh_storage, repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
mediawiki_snapshot = Snapshot( | mediawiki_snapshot = Snapshot( | ||||
id=hash_to_bytes("d6d6e9703f157c5702d9a4a5dec878926ed4ab76"), | id=hash_to_bytes("d6d6e9703f157c5702d9a4a5dec878926ed4ab76"), | ||||
branches={ | branches={ | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch( | ||||
target=hash_to_bytes("7da4975c363101b819756d33459f30a866d01b1b"), | target=hash_to_bytes("7da4975c363101b819756d33459f30a866d01b1b"), | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
Show All 11 Lines | def test_loader_svn_visit_with_eol_style(swh_storage, datadir, tmp_path): | ||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 | assert stats["origin"] == 1 | ||||
assert stats["origin_visit"] == 1 | assert stats["origin_visit"] == 1 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
def test_loader_svn_visit_with_mixed_crlf_lf(swh_config, datadir, tmp_path): | def test_loader_svn_visit_with_mixed_crlf_lf(swh_storage, datadir, tmp_path): | ||||
"""Check that a svn repo containing a versioned file with mixed | """Check that a svn repo containing a versioned file with mixed | ||||
CRLF/LF line endings with svn:eol-style property set to 'native' | CRLF/LF line endings with svn:eol-style property set to 'native' | ||||
(this is a violation of svn specification as mixed line endings | (this is a violation of svn specification as mixed line endings | ||||
for textual content should not be stored when the svn:eol-style | for textual content should not be stored when the svn:eol-style | ||||
property is set) can be loaded anyway. | property is set) can be loaded anyway. | ||||
""" | """ | ||||
archive_name = "pyang-repo-r343-eol-native-mixed-lf-crlf" | archive_name = "pyang-repo-r343-eol-native-mixed-lf-crlf" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = SvnLoader(repo_url) | loader = SvnLoader(swh_storage, repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
pyang_snapshot = Snapshot( | pyang_snapshot = Snapshot( | ||||
id=hash_to_bytes("6d9590de11b00a5801de0ff3297c5b44bbbf7d24"), | id=hash_to_bytes("6d9590de11b00a5801de0ff3297c5b44bbbf7d24"), | ||||
branches={ | branches={ | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch( | ||||
target=hash_to_bytes("9c6962eeb9164a636c374be700672355e34a98a7"), | target=hash_to_bytes("9c6962eeb9164a636c374be700672355e34a98a7"), | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
) | ) | ||||
}, | }, | ||||
) | ) | ||||
check_snapshot(pyang_snapshot, loader.storage) | check_snapshot(pyang_snapshot, loader.storage) | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, repo_url, status="full", type="svn", snapshot=pyang_snapshot.id, | loader.storage, repo_url, status="full", type="svn", snapshot=pyang_snapshot.id, | ||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 | assert stats["origin"] == 1 | ||||
assert stats["origin_visit"] == 1 | assert stats["origin_visit"] == 1 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
def test_loader_svn_with_external_properties(swh_config, datadir, tmp_path): | def test_loader_svn_with_external_properties(swh_storage, datadir, tmp_path): | ||||
"""Repository with svn:external properties cannot be fully ingested yet | """Repository with svn:external properties cannot be fully ingested yet | ||||
""" | """ | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, "pkg-gourmet-with-external-id.tgz") | archive_path = os.path.join(datadir, "pkg-gourmet-with-external-id.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = SvnLoader(repo_url) | loader = SvnLoader(swh_storage, repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
gourmet_externals_snapshot = Snapshot( | gourmet_externals_snapshot = Snapshot( | ||||
id=hash_to_bytes("19cb68d0a3f22372e2b7017ea5e2a2ea5ae3e09a"), | id=hash_to_bytes("19cb68d0a3f22372e2b7017ea5e2a2ea5ae3e09a"), | ||||
branches={ | branches={ | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch( | ||||
target=hash_to_bytes("82a7a4a09f9549223429143ba36ad77375e33c5c"), | target=hash_to_bytes("82a7a4a09f9549223429143ba36ad77375e33c5c"), | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
Show All 14 Lines | def test_loader_svn_with_external_properties(swh_storage, datadir, tmp_path): | ||||
assert stats["origin_visit"] == 1 | assert stats["origin_visit"] == 1 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
# repository holds 21 revisions, but the last commit holds an 'svn:externals' | # repository holds 21 revisions, but the last commit holds an 'svn:externals' | ||||
# property which will make the loader-svn stops at the last revision prior to the | # property which will make the loader-svn stops at the last revision prior to the | ||||
# bad one | # bad one | ||||
assert stats["revision"] == 21 - 1 # commit with the svn:external property | assert stats["revision"] == 21 - 1 # commit with the svn:external property | ||||
def test_loader_svn_with_symlink(swh_config, datadir, tmp_path): | def test_loader_svn_with_symlink(swh_storage, datadir, tmp_path): | ||||
"""Repository with symlinks should be ingested ok | """Repository with symlinks should be ingested ok | ||||
Edge case: | Edge case: | ||||
- first create a file and commit it. | - first create a file and commit it. | ||||
Remove it, then add folder holding the same name, commit. | Remove it, then add folder holding the same name, commit. | ||||
- do the same scenario with symbolic link (instead of file) | - do the same scenario with symbolic link (instead of file) | ||||
""" | """ | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join( | archive_path = os.path.join( | ||||
datadir, "pkg-gourmet-with-edge-case-links-and-files.tgz" | datadir, "pkg-gourmet-with-edge-case-links-and-files.tgz" | ||||
) | ) | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = SvnLoader(repo_url) | loader = SvnLoader(swh_storage, repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
gourmet_edge_cases_snapshot = Snapshot( | gourmet_edge_cases_snapshot = Snapshot( | ||||
id=hash_to_bytes("18e60982fe521a2546ab8c3c73a535d80462d9d0"), | id=hash_to_bytes("18e60982fe521a2546ab8c3c73a535d80462d9d0"), | ||||
branches={ | branches={ | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch( | ||||
target=hash_to_bytes("3f43af2578fccf18b0d4198e48563da7929dc608"), | target=hash_to_bytes("3f43af2578fccf18b0d4198e48563da7929dc608"), | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
Show All 12 Lines | def test_loader_svn_with_symlink(swh_storage, datadir, tmp_path): | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 | assert stats["origin"] == 1 | ||||
assert stats["origin_visit"] == 1 | assert stats["origin_visit"] == 1 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
assert stats["revision"] == 19 | assert stats["revision"] == 19 | ||||
def test_loader_svn_with_wrong_symlinks(swh_config, datadir, tmp_path): | def test_loader_svn_with_wrong_symlinks(swh_storage, datadir, tmp_path): | ||||
"""Repository with wrong symlinks should be ingested ok nonetheless | """Repository with wrong symlinks should be ingested ok nonetheless | ||||
Edge case: | Edge case: | ||||
- wrong symbolic link | - wrong symbolic link | ||||
- wrong symbolic link with empty space names | - wrong symbolic link with empty space names | ||||
""" | """ | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, "pkg-gourmet-with-wrong-link-cases.tgz") | archive_path = os.path.join(datadir, "pkg-gourmet-with-wrong-link-cases.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = SvnLoader(repo_url) | loader = SvnLoader(swh_storage, repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
gourmet_wrong_links_snapshot = Snapshot( | gourmet_wrong_links_snapshot = Snapshot( | ||||
id=hash_to_bytes("b17f38acabb90f066dedd30c29f01a02af88a5c4"), | id=hash_to_bytes("b17f38acabb90f066dedd30c29f01a02af88a5c4"), | ||||
branches={ | branches={ | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch( | ||||
target=hash_to_bytes("cf30d3bb9d5967d0a2bbeacc405f10a5dd9b138a"), | target=hash_to_bytes("cf30d3bb9d5967d0a2bbeacc405f10a5dd9b138a"), | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
Show All 12 Lines | def test_loader_svn_with_wrong_symlinks(swh_storage, datadir, tmp_path): | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 | assert stats["origin"] == 1 | ||||
assert stats["origin_visit"] == 1 | assert stats["origin_visit"] == 1 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
assert stats["revision"] == 21 | assert stats["revision"] == 21 | ||||
def test_loader_svn_loader_from_dump_archive(swh_config, datadir, tmp_path): | def test_loader_svn_loader_from_dump_archive(swh_storage, datadir, tmp_path): | ||||
"""Repository with wrong symlinks should be ingested ok nonetheless | """Repository with wrong symlinks should be ingested ok nonetheless | ||||
Edge case: | Edge case: | ||||
- wrong symbolic link | - wrong symbolic link | ||||
- wrong symbolic link with empty space names | - wrong symbolic link with empty space names | ||||
""" | """ | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loaderFromDump = SvnLoaderFromRemoteDump(repo_url) | loaderFromDump = SvnLoaderFromRemoteDump(swh_storage, repo_url) | ||||
assert loaderFromDump.load() == {"status": "eventful"} | assert loaderFromDump.load() == {"status": "eventful"} | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loaderFromDump.storage, | loaderFromDump.storage, | ||||
repo_url, | repo_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_SNAPSHOT.id, | snapshot=GOURMET_SNAPSHOT.id, | ||||
) | ) | ||||
origin_url = repo_url + "2" # rename to another origin | origin_url = repo_url + "2" # rename to another origin | ||||
loader = SvnLoader(repo_url, origin_url=origin_url) | loader = SvnLoader(swh_storage, repo_url, origin_url=origin_url) | ||||
assert loader.load() == {"status": "eventful"} # because are working on new origin | assert loader.load() == {"status": "eventful"} # because are working on new origin | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
origin_url, | origin_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_SNAPSHOT.id, | snapshot=GOURMET_SNAPSHOT.id, | ||||
) | ) | ||||
check_snapshot(GOURMET_SNAPSHOT, loader.storage) | check_snapshot(GOURMET_SNAPSHOT, loader.storage) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 2 # created one more origin | assert stats["origin"] == 2 # created one more origin | ||||
assert stats["origin_visit"] == 2 | assert stats["origin_visit"] == 2 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
loader = SvnLoader(repo_url) # no change on the origin-url | loader = SvnLoader(swh_storage, repo_url) # no change on the origin-url | ||||
assert loader.load() == {"status": "uneventful"} | assert loader.load() == {"status": "uneventful"} | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
origin_url, | origin_url, | ||||
status="full", | status="full", | ||||
type="svn", | type="svn", | ||||
snapshot=GOURMET_SNAPSHOT.id, | snapshot=GOURMET_SNAPSHOT.id, | ||||
) | ) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 2 | assert stats["origin"] == 2 | ||||
assert stats["origin_visit"] == 3 | assert stats["origin_visit"] == 3 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
# second visit from the dump should be uneventful | # second visit from the dump should be uneventful | ||||
loaderFromDump = SvnLoaderFromRemoteDump(repo_url) | loaderFromDump = SvnLoaderFromRemoteDump(swh_storage, repo_url) | ||||
assert loaderFromDump.load() == {"status": "uneventful"} | assert loaderFromDump.load() == {"status": "uneventful"} | ||||
def test_loader_user_defined_svn_properties(swh_config, datadir, tmp_path): | def test_loader_user_defined_svn_properties(swh_storage, datadir, tmp_path): | ||||
"""Edge cases: The repository held some user defined svn-properties with special | """Edge cases: The repository held some user defined svn-properties with special | ||||
encodings, this prevented the repository from being loaded even though we do not | encodings, this prevented the repository from being loaded even though we do not | ||||
ingest those information. | ingest those information. | ||||
""" | """ | ||||
archive_name = "httthttt" | archive_name = "httthttt" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = SvnLoader(repo_url) | loader = SvnLoader(swh_storage, repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
expected_snapshot = Snapshot( | expected_snapshot = Snapshot( | ||||
id=hash_to_bytes("70487267f682c07e52a2371061369b6cf5bffa47"), | id=hash_to_bytes("70487267f682c07e52a2371061369b6cf5bffa47"), | ||||
branches={ | branches={ | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch( | ||||
target=hash_to_bytes("604a17dbb15e8d7ecb3e9f3768d09bf493667a93"), | target=hash_to_bytes("604a17dbb15e8d7ecb3e9f3768d09bf493667a93"), | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
Show All 12 Lines | def test_loader_user_defined_svn_properties(swh_storage, datadir, tmp_path): | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats["origin"] == 1 | assert stats["origin"] == 1 | ||||
assert stats["origin_visit"] == 1 | assert stats["origin_visit"] == 1 | ||||
assert stats["snapshot"] == 1 | assert stats["snapshot"] == 1 | ||||
assert stats["revision"] == 7 | assert stats["revision"] == 7 | ||||
def test_loader_svn_dir_added_then_removed(swh_config, datadir, tmp_path): | def test_loader_svn_dir_added_then_removed(swh_storage, datadir, tmp_path): | ||||
"""Loader should handle directory removal when processing a commit""" | """Loader should handle directory removal when processing a commit""" | ||||
archive_name = "pkg-gourmet" | archive_name = "pkg-gourmet" | ||||
archive_path = os.path.join(datadir, f"{archive_name}-add-remove-dir.tgz") | archive_path = os.path.join(datadir, f"{archive_name}-add-remove-dir.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = SvnLoader(repo_url, destination_path=tmp_path) | loader = SvnLoader(swh_storage, repo_url, destination_path=tmp_path) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
assert loader.visit_status() == "full" | assert loader.visit_status() == "full" |