Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/swh/loader/svn/tests/resources/pkg-gourmet-tampered-rev6-log.tgz b/swh/loader/svn/tests/resources/pkg-gourmet-tampered-rev6-log.tgz
new file mode 100644
index 0000000..a29df58
Binary files /dev/null and b/swh/loader/svn/tests/resources/pkg-gourmet-tampered-rev6-log.tgz differ
diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py
index 83d5ac9..60e492c 100644
--- a/swh/loader/svn/tests/test_loader.py
+++ b/swh/loader/svn/tests/test_loader.py
@@ -1,818 +1,825 @@
# Copyright (C) 2016-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import copy
import os
import subprocess
from typing import Optional
from swh.loader.core.tests import BaseLoaderTest
from swh.loader.tests.common import assert_last_visit_matches
from swh.loader.package.tests.common import check_snapshot, get_stats
from swh.loader.svn.loader import (
DEFAULT_BRANCH,
SvnLoader,
SvnLoaderFromRemoteDump,
build_swh_snapshot,
)
from swh.model import hashutil
from swh.model.model import Origin, Snapshot
def test_build_swh_snapshot():
rev_id = hashutil.hash_to_bytes("3f51abf3b3d466571be0855dfa67e094f9ceff1b")
snap = build_swh_snapshot(rev_id)
assert isinstance(snap, Snapshot)
expected_snapshot = Snapshot.from_dict(
{"branches": {DEFAULT_BRANCH: {"target": rev_id, "target_type": "revision",}}}
)
assert snap == expected_snapshot
_LOADER_TEST_CONFIG = {
"check_revision": {"limit": 100, "status": False},
"debug": False,
"log_db": "dbname=softwareheritage-log",
"save_data": False,
"save_data_path": "",
"temp_directory": "/tmp",
"max_content_size": 100 * 1024 * 1024,
"storage": {
"cls": "pipeline",
"steps": [
{"cls": "retry",},
{"cls": "filter",},
{
"cls": "buffer",
"min_batch_size": {
"content": 10000,
"content_bytes": 1073741824,
"directory": 2500,
"revision": 10,
"release": 100,
},
},
{"cls": "memory"},
],
},
}
GOURMET_SNAPSHOT = hashutil.hash_to_bytes("889cacc2731e3312abfb2b1a0c18ade82a949e07")
GOURMET_FLAG_SNAPSHOT = hashutil.hash_to_bytes(
"0011223344556677889900112233445566778899"
)
GOURMET_UPDATES_SNAPSHOT = hashutil.hash_to_bytes(
"11086d15317014e43d2438b7ffc712c44f1b8afe"
)
GOURMET_EXTERNALS_SNAPSHOT = hashutil.hash_to_bytes(
"19cb68d0a3f22372e2b7017ea5e2a2ea5ae3e09a"
)
GOURMET_EDGE_CASES_SNAPSHOT = hashutil.hash_to_bytes(
"18e60982fe521a2546ab8c3c73a535d80462d9d0"
)
GOURMET_WRONG_LINKS_SNAPSHOT = hashutil.hash_to_bytes(
"b17f38acabb90f066dedd30c29f01a02af88a5c4"
)
MEDIAWIKI_SNAPSHOT = hashutil.hash_to_bytes("d6d6e9703f157c5702d9a4a5dec878926ed4ab76")
PYANG_SNAPSHOT = hashutil.hash_to_bytes("6d9590de11b00a5801de0ff3297c5b44bbbf7d24")
class SvnLoaderTest(SvnLoader):
"""An SVNLoader with no persistence.
Context:
Load a new svn repository using the swh policy (so no update).
"""
def __init__(
self,
url,
last_snp_rev={},
destination_path=None,
start_from_scratch=False,
swh_revision=None,
):
super().__init__(
url,
destination_path=destination_path,
start_from_scratch=start_from_scratch,
swh_revision=swh_revision,
)
self.origin = Origin(url=url)
self.last_snp_rev = last_snp_rev
def parse_config_file(self, *args, **kwargs):
return _LOADER_TEST_CONFIG
def swh_latest_snapshot_revision(self, origin_url, prev_swh_revision=None):
"""Avoid the storage persistence call and return the expected previous
revision for that repository.
Check the following for explanation about the hashes:
- test_loader.org for (swh policy).
- cf. SvnLoaderTest
"""
return self.last_snp_rev
class BaseSvnLoaderTest(BaseLoaderTest):
"""Base test loader class.
In its setup, it's uncompressing a local svn mirror to /tmp.
"""
def setUp(
self,
archive_name="pkg-gourmet.tgz",
filename="pkg-gourmet",
loader=None,
snapshot=None,
type="default",
start_from_scratch=False,
swh_revision=None,
):
super().setUp(
archive_name=archive_name,
filename=filename,
prefix_tmp_folder_name="swh.loader.svn.",
start_path=os.path.dirname(__file__),
)
self.svn_mirror_url = self.repo_url
if type == "default":
loader_test_class = SvnLoaderTest
else:
loader_test_class = SvnLoaderTestFromRemoteDump
if loader:
self.loader = loader
elif snapshot:
self.loader = loader_test_class(
self.svn_mirror_url,
destination_path=self.destination_path,
start_from_scratch=start_from_scratch,
swh_revision=swh_revision,
last_snp_rev=snapshot,
)
else:
self.loader = loader_test_class(
self.svn_mirror_url,
destination_path=self.destination_path,
start_from_scratch=start_from_scratch,
swh_revision=swh_revision,
)
self.storage = self.loader.storage
def prepare_repository_from_archive(
archive_path: str, filename: Optional[str] = None, tmp_path: str = "/tmp"
) -> str:
# uncompress folder/repositories/dump for the loader to ingest
subprocess.check_output(["tar", "xf", archive_path, "-C", tmp_path])
# build the origin url (or some derivative form)
_fname = filename if filename else os.path.basename(archive_path)
repo_url = f"file://{tmp_path}/{_fname}"
return repo_url
def test_loader_svn_new_visit(swh_config, datadir, tmp_path):
"""Eventful visit should yield 1 snapshot"""
archive_name = "pkg-gourmet"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
loader = SvnLoader(repo_url, destination_path=tmp_path)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage, repo_url, status="full", type="svn", snapshot=GOURMET_SNAPSHOT,
)
stats = get_stats(loader.storage)
assert stats == {
"content": 19,
"directory": 17,
"origin": 1,
"origin_visit": 1,
"person": 1,
"release": 0,
"revision": 6,
"skipped_content": 0,
"snapshot": 1,
}
expected_snapshot = {
"id": GOURMET_SNAPSHOT,
"branches": {
"HEAD": {
"target": "4876cb10aec6f708f7466dddf547567b65f6c39c",
"target_type": "revision",
}
},
}
check_snapshot(expected_snapshot, loader.storage)
def test_loader_svn_2_visits_no_change(swh_config, datadir, tmp_path):
"""Visit multiple times a repository with no change should yield the same snapshot
"""
archive_name = "pkg-gourmet"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
loader = SvnLoader(repo_url)
assert loader.load() == {"status": "eventful"}
visit_status1 = assert_last_visit_matches(
loader.storage, repo_url, status="full", type="svn", snapshot=GOURMET_SNAPSHOT,
)
# FIXME: This should be uneventful here as there is no change in between visits...
assert loader.load() == {"status": "eventful"}
visit_status2 = assert_last_visit_matches(
loader.storage, repo_url, status="full", type="svn", snapshot=GOURMET_SNAPSHOT,
)
assert visit_status1.date < visit_status2.date
assert visit_status1.snapshot == visit_status2.snapshot
stats = get_stats(loader.storage)
assert stats["origin_visit"] == 1 + 1 # computed twice the same snapshot
assert stats["snapshot"] == 1
# even starting from previous revision...
revs = list(
loader.storage.revision_get(
[hashutil.hash_to_bytes("95edacc8848369d6fb1608e887d6d2474fd5224f")]
)
)
start_revision = revs[0]
assert start_revision is not None
loader = SvnLoader(repo_url, swh_revision=start_revision)
assert loader.load() == {"status": "eventful"}
stats = get_stats(loader.storage)
assert stats["origin_visit"] == 2 + 1
# ... with no change in repository, this yields the same snapshot
assert stats["snapshot"] == 1
assert_last_visit_matches(
loader.storage, repo_url, status="full", type="svn", snapshot=GOURMET_SNAPSHOT,
)
_LAST_SNP_REV = {
"snapshot": Snapshot.from_dict({"id": GOURMET_FLAG_SNAPSHOT, "branches": {}}),
"revision": {
"id": hashutil.hash_to_bytes("4876cb10aec6f708f7466dddf547567b65f6c39c"),
"parents": (
hashutil.hash_to_bytes("a3a577948fdbda9d1061913b77a1588695eadb41"),
),
"directory": hashutil.hash_to_bytes("0deab3023ac59398ae467fc4bff5583008af1ee2"),
"target_type": "revision",
"metadata": {
"extra_headers": [
["svn_repo_uuid", "3187e211-bb14-4c82-9596-0b59d67cd7f4"],
["svn_revision", "6"],
]
},
},
}
-class SvnLoaderTest3(BaseSvnLoaderTest):
+def test_loader_tampered_repository(swh_config, datadir, tmp_path):
"""In this scenario, the dump has been tampered with to modify the
- commit log. This results in a hash divergence which is
- detected at startup.
+ commit log [1]. This results in a hash divergence which is
+ detected at startup after a new run for the same origin.
In effect, that stops the loading and do nothing.
+ [1] Tampering with revision 6 log message following:
+
+ ```
+ tar xvf pkg-gourmet.tgz # initial repository ingested
+ cd pkg-gourmet/
+ echo "Tampering with commit log message for fun and profit" > log.txt
+ svnadmin setlog . -r 6 log.txt --bypass-hooks
+ tar cvf pkg-gourmet-tampered-rev6-log.tgz pkg-gourmet/
+ ```
"""
+ archive_name = "pkg-gourmet"
+ archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+ repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- def setUp(self):
- last_snp_rev = copy.deepcopy(_LAST_SNP_REV)
- last_snp_rev["snapshot"] = None
- # Changed the revision id's hash to simulate history altered
- last_snp_rev["revision"]["id"] = hashutil.hash_to_bytes(
- "badbadbadbadf708f7466dddf547567b65f6c39d"
- )
- # the svn repository pkg-gourmet has been updated with changes
- super().setUp(
- archive_name="pkg-gourmet-with-updates.tgz", snapshot=last_snp_rev
- )
+ loader = SvnLoader(repo_url)
+ assert loader.load() == {"status": "eventful"}
+ expected_snapshot = {
+ "id": GOURMET_SNAPSHOT,
+ "branches": {
+ "HEAD": {
+ "target": "4876cb10aec6f708f7466dddf547567b65f6c39c",
+ "target_type": "revision",
+ }
+ },
+ }
+ check_snapshot(expected_snapshot, loader.storage)
- def test_load(self):
- """Load known repository with history altered should do nothing
+ archive_path2 = os.path.join(datadir, "pkg-gourmet-tampered-rev6-log.tgz")
+ repo_tampered_url = prepare_repository_from_archive(
+ archive_path2, archive_name, tmp_path
+ )
- """
- # when
- assert self.loader.load() == {"status": "failed"}
-
- # then
- # we got the previous run's last revision (rev 6)
- # so 2 news + 1 old
- self.assertCountContents(0)
- self.assertCountDirectories(0)
- self.assertCountRevisions(0)
- self.assertCountReleases(0)
- self.assertCountSnapshots(0)
- self.assertEqual(self.loader.visit_status(), "partial")
-
- visit_status = assert_last_visit_matches(
- self.storage, self.repo_url, status="partial", type="svn"
- )
- assert visit_status.snapshot is None
+ loader2 = SvnLoader(repo_tampered_url, origin_url=repo_url)
+ assert loader2.load() == {"status": "failed"}
+
+ assert_last_visit_matches(
+ loader2.storage, repo_url, status="partial", type="svn", snapshot=None,
+ )
+
+ stats = get_stats(loader.storage)
+ assert stats["origin"] == 1
+ assert stats["origin_visit"] == 2
+ assert stats["snapshot"] == 1
def test_loader_svn_visit_with_changes(swh_config, datadir, tmp_path):
"""In this scenario, the repository has been updated with new changes.
The loading visit should result in new objects stored and 1 new
snapshot.
"""
archive_name = "pkg-gourmet"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_initial_url = prepare_repository_from_archive(
archive_path, archive_name, tmp_path
)
# repo_initial_url becomes the origin_url we want to visit some more below
loader = SvnLoader(repo_initial_url)
assert loader.load() == {"status": "eventful"}
visit_status1 = assert_last_visit_matches(
loader.storage,
repo_initial_url,
status="full",
type="svn",
snapshot=GOURMET_SNAPSHOT,
)
archive_path = os.path.join(datadir, "pkg-gourmet-with-updates.tgz")
repo_updated_url = prepare_repository_from_archive(
archive_path, "pkg-gourmet", tmp_path
)
loader = SvnLoader(repo_updated_url, origin_url=repo_initial_url,)
assert loader.load() == {"status": "eventful"}
visit_status2 = assert_last_visit_matches(
loader.storage,
repo_updated_url,
status="full",
type="svn",
snapshot=GOURMET_UPDATES_SNAPSHOT,
)
assert visit_status1.date < visit_status2.date
assert visit_status1.snapshot != visit_status2.snapshot
stats = get_stats(loader.storage)
assert stats == {
"content": 22,
"directory": 28,
"origin": 1,
"origin_visit": 2,
"person": 2,
"release": 0,
"revision": 11,
"skipped_content": 0,
"snapshot": 2,
}
expected_snapshot = {
"id": GOURMET_UPDATES_SNAPSHOT,
"branches": {
"HEAD": {
"target": "171dc35522bfd17dda4e90a542a0377fb2fc707a",
"target_type": "revision",
}
},
}
check_snapshot(expected_snapshot, loader.storage)
# Start from scratch loading yields the same result
loader = SvnLoader(
repo_updated_url, origin_url=repo_initial_url, start_from_scratch=True
)
assert loader.load() == {"status": "eventful"}
visit_status3 = assert_last_visit_matches(
loader.storage,
repo_updated_url,
status="full",
type="svn",
snapshot=GOURMET_UPDATES_SNAPSHOT,
)
assert visit_status2.date < visit_status3.date
assert visit_status3.snapshot == visit_status2.snapshot
check_snapshot(expected_snapshot, loader.storage)
stats = get_stats(loader.storage)
assert stats["origin"] == 1 # always the same visit
assert stats["origin_visit"] == 2 + 1 # 1 more visit
assert stats["snapshot"] == 2 # no new snapshot
def test_loader_svn_visit_start_from_revision(swh_config, datadir, tmp_path):
"""Starting from existing revision, next visit on changed repo should yield 1 new
snapshot.
"""
archive_name = "pkg-gourmet"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_initial_url = prepare_repository_from_archive(
archive_path, archive_name, tmp_path
)
# repo_initial_url becomes the origin_url we want to visit some more below
loader = SvnLoader(repo_initial_url)
assert loader.load() == {"status": "eventful"}
visit_status1 = assert_last_visit_matches(
loader.storage,
repo_initial_url,
status="full",
type="svn",
snapshot=GOURMET_SNAPSHOT,
)
revs = list(
loader.storage.revision_get(
[hashutil.hash_to_bytes("95edacc8848369d6fb1608e887d6d2474fd5224f")]
)
)
start_revision = revs[0]
assert start_revision is not None
archive_path = os.path.join(datadir, "pkg-gourmet-with-updates.tgz")
repo_updated_url = prepare_repository_from_archive(
archive_path, "pkg-gourmet", tmp_path
)
# we'll start from start_revision
loader = SvnLoader(
repo_updated_url, origin_url=repo_initial_url, swh_revision=start_revision
)
assert loader.load() == {"status": "eventful"}
# nonetheless, we obtain the same snapshot (as previous tests on that repository)
visit_status2 = assert_last_visit_matches(
loader.storage,
repo_updated_url,
status="full",
type="svn",
snapshot=GOURMET_UPDATES_SNAPSHOT,
)
assert visit_status1.date < visit_status2.date
assert visit_status1.snapshot != visit_status2.snapshot
stats = get_stats(loader.storage)
assert stats == {
"content": 22,
"directory": 28,
"origin": 1,
"origin_visit": 2,
"person": 2,
"release": 0,
"revision": 11,
"skipped_content": 0,
"snapshot": 2,
}
expected_snapshot = {
"id": GOURMET_UPDATES_SNAPSHOT,
"branches": {
"HEAD": {
"target": "171dc35522bfd17dda4e90a542a0377fb2fc707a",
"target_type": "revision",
}
},
}
check_snapshot(expected_snapshot, loader.storage)
def test_loader_svn_visit_with_eol_style(swh_config, datadir, tmp_path):
"""Check that a svn repo containing a versioned file with CRLF line
endings with svn:eol-style property set to 'native' (this is a
violation of svn specification as the file should have been
stored with LF line endings) can be loaded anyway.
"""
archive_name = "mediawiki-repo-r407-eol-native-crlf"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
loader = SvnLoader(repo_url)
assert loader.load() == {"status": "eventful"}
expected_snapshot = {
"id": MEDIAWIKI_SNAPSHOT,
"branches": {
"HEAD": {
"target": "7da4975c363101b819756d33459f30a866d01b1b",
"target_type": "revision",
}
},
}
check_snapshot(expected_snapshot, loader.storage)
assert_last_visit_matches(
loader.storage,
repo_url,
status="full",
type="svn",
snapshot=MEDIAWIKI_SNAPSHOT,
)
stats = get_stats(loader.storage)
assert stats["origin"] == 1
assert stats["origin_visit"] == 1
assert stats["snapshot"] == 1
def test_loader_svn_visit_with_mixed_crlf_lf(swh_config, datadir, tmp_path):
"""Check that a svn repo containing a versioned file with mixed
CRLF/LF line endings with svn:eol-style property set to 'native'
(this is a violation of svn specification as mixed line endings
for textual content should not be stored when the svn:eol-style
property is set) can be loaded anyway.
"""
archive_name = "pyang-repo-r343-eol-native-mixed-lf-crlf"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
loader = SvnLoader(repo_url)
assert loader.load() == {"status": "eventful"}
expected_snapshot = {
"id": PYANG_SNAPSHOT,
"branches": {
"HEAD": {
"target": "9c6962eeb9164a636c374be700672355e34a98a7",
"target_type": "revision",
}
},
}
check_snapshot(expected_snapshot, loader.storage)
assert_last_visit_matches(
loader.storage, repo_url, status="full", type="svn", snapshot=PYANG_SNAPSHOT,
)
stats = get_stats(loader.storage)
assert stats["origin"] == 1
assert stats["origin_visit"] == 1
assert stats["snapshot"] == 1
def test_loader_svn_with_external_properties(swh_config, datadir, tmp_path):
"""Repository with svn:external properties cannot be fully ingested yet
"""
archive_name = "pkg-gourmet"
archive_path = os.path.join(datadir, "pkg-gourmet-with-external-id.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
loader = SvnLoader(repo_url)
assert loader.load() == {"status": "eventful"}
# repositoy holds 21 revisions, but the last commit holds an 'svn:externals'
# property which will make the loader-svn stops at the last revision prior to the
# bad one
expected_snapshot = {
"id": GOURMET_EXTERNALS_SNAPSHOT,
"branches": {
"HEAD": {
"target": "82a7a4a09f9549223429143ba36ad77375e33c5c",
"target_type": "revision",
}
},
}
check_snapshot(expected_snapshot, loader.storage)
assert_last_visit_matches(
loader.storage,
repo_url,
status="partial",
type="svn",
snapshot=GOURMET_EXTERNALS_SNAPSHOT,
)
stats = get_stats(loader.storage)
assert stats["origin"] == 1
assert stats["origin_visit"] == 1
assert stats["snapshot"] == 1
assert stats["revision"] == 21 - 1 # commit with the svn:external property
def test_loader_svn_with_symlink(swh_config, datadir, tmp_path):
"""Repository with symlinks should be ingested ok
Edge case:
- first create a file and commit it.
Remove it, then add folder holding the same name, commit.
- do the same scenario with symbolic link (instead of file)
"""
archive_name = "pkg-gourmet"
archive_path = os.path.join(
datadir, "pkg-gourmet-with-edge-case-links-and-files.tgz"
)
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
loader = SvnLoader(repo_url)
assert loader.load() == {"status": "eventful"}
expected_snapshot = {
"id": GOURMET_EDGE_CASES_SNAPSHOT,
"branches": {
"HEAD": {
"target": "3f43af2578fccf18b0d4198e48563da7929dc608",
"target_type": "revision",
}
},
}
check_snapshot(expected_snapshot, loader.storage)
assert_last_visit_matches(
loader.storage,
repo_url,
status="full",
type="svn",
snapshot=GOURMET_EDGE_CASES_SNAPSHOT,
)
stats = get_stats(loader.storage)
assert stats["origin"] == 1
assert stats["origin_visit"] == 1
assert stats["snapshot"] == 1
assert stats["revision"] == 19
def test_loader_svn_with_wrong_symlinks(swh_config, datadir, tmp_path):
"""Repository with wrong symlinks should be ingested ok nonetheless
Edge case:
- wrong symbolic link
- wrong symbolic link with empty space names
"""
archive_name = "pkg-gourmet"
archive_path = os.path.join(datadir, "pkg-gourmet-with-wrong-link-cases.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
loader = SvnLoader(repo_url)
assert loader.load() == {"status": "eventful"}
expected_snapshot = {
"id": GOURMET_WRONG_LINKS_SNAPSHOT,
"branches": {
"HEAD": {
"target": "cf30d3bb9d5967d0a2bbeacc405f10a5dd9b138a",
"target_type": "revision",
}
},
}
check_snapshot(expected_snapshot, loader.storage)
assert_last_visit_matches(
loader.storage,
repo_url,
status="full",
type="svn",
snapshot=GOURMET_WRONG_LINKS_SNAPSHOT,
)
stats = get_stats(loader.storage)
assert stats["origin"] == 1
assert stats["origin_visit"] == 1
assert stats["snapshot"] == 1
assert stats["revision"] == 21
def test_loader_svn_loader_from_dump_archive(swh_config, datadir, tmp_path):
"""Repository with wrong symlinks should be ingested ok nonetheless
Edge case:
- wrong symbolic link
- wrong symbolic link with empty space names
"""
archive_name = "pkg-gourmet"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
loaderFromDump = SvnLoaderFromRemoteDump(repo_url)
assert loaderFromDump.load() == {"status": "eventful"}
assert_last_visit_matches(
loaderFromDump.storage,
repo_url,
status="full",
type="svn",
snapshot=GOURMET_SNAPSHOT,
)
origin_url = repo_url + "2" # rename to another origin
loader = SvnLoader(repo_url, origin_url=origin_url)
assert loader.load() == {"status": "eventful"} # because are working on new origin
assert_last_visit_matches(
loader.storage, origin_url, status="full", type="svn", snapshot=GOURMET_SNAPSHOT
)
expected_snapshot = {
"id": GOURMET_SNAPSHOT,
"branches": {
"HEAD": {
"target": "4876cb10aec6f708f7466dddf547567b65f6c39c",
"target_type": "revision",
}
},
}
check_snapshot(expected_snapshot, loader.storage)
stats = get_stats(loader.storage)
assert stats["origin"] == 2 # created one more origin
assert stats["origin_visit"] == 2
assert stats["snapshot"] == 1
loader = SvnLoader(repo_url) # no change on the origin-url
assert loader.load() == {"status": "uneventful"}
assert_last_visit_matches(
loader.storage, origin_url, status="full", type="svn", snapshot=GOURMET_SNAPSHOT
)
stats = get_stats(loader.storage)
assert stats["origin"] == 2
assert stats["origin_visit"] == 3
assert stats["snapshot"] == 1
class SvnLoaderTestFromRemoteDump(SvnLoaderTest, SvnLoaderFromRemoteDump):
pass
def test_loader_user_defined_svn_properties(swh_config, datadir, tmp_path):
"""Edge cases: The repository held some user defined svn-properties with special
encodings, this prevented the repository from being loaded even though we do not
ingest those information.
"""
archive_name = "httthttt"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
loader = SvnLoader(repo_url)
assert loader.load() == {"status": "eventful"}
expected_snapshot_id = hashutil.hash_to_bytes(
"70487267f682c07e52a2371061369b6cf5bffa47"
)
expected_snapshot = {
"id": expected_snapshot_id,
"branches": {
"HEAD": {
"target": "604a17dbb15e8d7ecb3e9f3768d09bf493667a93",
"target_type": "revision",
}
},
}
check_snapshot(expected_snapshot, loader.storage)
assert_last_visit_matches(
loader.storage,
repo_url,
status="full",
type="svn",
snapshot=expected_snapshot_id,
)
stats = get_stats(loader.storage)
assert stats["origin"] == 1
assert stats["origin_visit"] == 1
assert stats["snapshot"] == 1
assert stats["revision"] == 7

File Metadata

Mime Type
text/x-diff
Expires
Jul 4 2025, 8:11 AM (10 w, 4 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3292805

Event Timeline