Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9340831
test_loader.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
70 KB
Subscribers
None
test_loader.py
View Options
# Copyright (C) 2016-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import
os
import
shutil
import
subprocess
import
textwrap
from
typing
import
Any
,
Dict
import
pytest
from
subvertpy
import
SubversionException
from
swh.loader.svn.loader
import
(
SvnLoader
,
SvnLoaderFromDumpArchive
,
SvnLoaderFromRemoteDump
,
)
from
swh.loader.svn.svn
import
SvnRepo
from
swh.loader.svn.utils
import
init_svn_repo_from_dump
from
swh.loader.tests
import
(
assert_last_visit_matches
,
check_snapshot
,
get_stats
,
prepare_repository_from_archive
,
)
from
swh.model.from_disk
import
DentryPerms
from
swh.model.hashutil
import
hash_to_bytes
from
swh.model.model
import
Snapshot
,
SnapshotBranch
,
TargetType
from
.utils
import
CommitChange
,
CommitChangeType
,
add_commit
GOURMET_SNAPSHOT
=
Snapshot
(
id
=
hash_to_bytes
(
"889cacc2731e3312abfb2b1a0c18ade82a949e07"
),
branches
=
{
b
"HEAD"
:
SnapshotBranch
(
target
=
hash_to_bytes
(
"4876cb10aec6f708f7466dddf547567b65f6c39c"
),
target_type
=
TargetType
.
REVISION
,
)
},
)
GOURMET_UPDATES_SNAPSHOT
=
Snapshot
(
id
=
hash_to_bytes
(
"11086d15317014e43d2438b7ffc712c44f1b8afe"
),
branches
=
{
b
"HEAD"
:
SnapshotBranch
(
target
=
hash_to_bytes
(
"171dc35522bfd17dda4e90a542a0377fb2fc707a"
),
target_type
=
TargetType
.
REVISION
,
)
},
)
def
test_loader_svn_not_found_no_mock
(
swh_storage
,
tmp_path
):
"""Given an unknown repository, the loader visit ends up in status not_found"""
repo_url
=
"unknown-repository"
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"uneventful"
}
assert_last_visit_matches
(
swh_storage
,
repo_url
,
status
=
"not_found"
,
type
=
"svn"
,
)
@pytest.mark.parametrize
(
"exception_msg"
,
[
"Unable to connect to a repository at URL"
,
"Unknown URL type"
,
],
)
def
test_loader_svn_not_found
(
swh_storage
,
tmp_path
,
exception_msg
,
mocker
):
"""Given unknown repository issues, the loader visit ends up in status not_found"""
mock
=
mocker
.
patch
(
"swh.loader.svn.loader.SvnRepo"
)
mock
.
side_effect
=
SubversionException
(
exception_msg
,
0
)
unknown_repo_url
=
"unknown-repository"
loader
=
SvnLoader
(
swh_storage
,
unknown_repo_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"uneventful"
}
assert_last_visit_matches
(
swh_storage
,
unknown_repo_url
,
status
=
"not_found"
,
type
=
"svn"
,
)
@pytest.mark.parametrize
(
"exception"
,
[
SubversionException
(
"Irrelevant message, considered a failure"
,
10
),
SubversionException
(
"Present but fails to read, considered a failure"
,
20
),
ValueError
(
"considered a failure"
),
],
)
def
test_loader_svn_failures
(
swh_storage
,
tmp_path
,
exception
,
mocker
):
"""Given any errors raised, the loader visit ends up in status failed"""
mock
=
mocker
.
patch
(
"swh.loader.svn.loader.SvnRepo"
)
mock
.
side_effect
=
exception
existing_repo_url
=
"existing-repo-url"
loader
=
SvnLoader
(
swh_storage
,
existing_repo_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"failed"
}
assert_last_visit_matches
(
swh_storage
,
existing_repo_url
,
status
=
"failed"
,
type
=
"svn"
,
)
def
test_loader_svnrdump_not_found
(
swh_storage
,
tmp_path
,
mocker
):
"""Loading from remote dump which does not exist should end up as not_found visit"""
unknown_repo_url
=
"file:///tmp/svn.code.sf.net/p/white-rats-studios/svn"
loader
=
SvnLoaderFromRemoteDump
(
swh_storage
,
unknown_repo_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"uneventful"
}
assert_last_visit_matches
(
swh_storage
,
unknown_repo_url
,
status
=
"not_found"
,
type
=
"svn"
,
)
def
test_loader_svnrdump_no_such_revision
(
swh_storage
,
tmp_path
,
datadir
):
"""Visit multiple times an origin with the remote loader should not raise.
It used to fail the ingestion on the second visit with a "No such revision x,
160006" message.
"""
archive_ori_dump
=
os
.
path
.
join
(
datadir
,
"penguinsdbtools2018.dump.gz"
)
archive_dump_dir
=
os
.
path
.
join
(
tmp_path
,
"dump"
)
os
.
mkdir
(
archive_dump_dir
)
archive_dump
=
os
.
path
.
join
(
archive_dump_dir
,
"penguinsdbtools2018.dump.gz"
)
# loader now drops the dump as soon as it's mounted so we need to make a copy first
shutil
.
copyfile
(
archive_ori_dump
,
archive_dump
)
loading_path
=
str
(
tmp_path
/
"loading"
)
os
.
mkdir
(
loading_path
)
# Prepare the dump as a local svn repository for test purposes
temp_dir
,
repo_path
=
init_svn_repo_from_dump
(
archive_dump
,
root_dir
=
tmp_path
,
gzip
=
True
)
repo_url
=
f
"file://{repo_path}"
loader
=
SvnLoaderFromRemoteDump
(
swh_storage
,
repo_url
,
temp_directory
=
loading_path
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
actual_visit
=
assert_last_visit_matches
(
swh_storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
loader2
=
SvnLoaderFromRemoteDump
(
swh_storage
,
repo_url
,
temp_directory
=
loading_path
)
# Visiting a second time the same repository should be uneventful...
assert
loader2
.
load
()
==
{
"status"
:
"uneventful"
}
actual_visit2
=
assert_last_visit_matches
(
swh_storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
)
assert
actual_visit
.
snapshot
is
not
None
# ... with the same snapshot as the first visit
assert
actual_visit2
.
snapshot
==
actual_visit
.
snapshot
def
test_loader_svn_new_visit
(
swh_storage
,
datadir
,
tmp_path
):
"""Eventful visit should yield 1 snapshot"""
archive_name
=
"pkg-gourmet"
archive_path
=
os
.
path
.
join
(
datadir
,
f
"{archive_name}.tgz"
)
repo_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
GOURMET_SNAPSHOT
.
id
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
stats
=
get_stats
(
loader
.
storage
)
assert
stats
==
{
"content"
:
19
,
"directory"
:
17
,
"origin"
:
1
,
"origin_visit"
:
1
,
"release"
:
0
,
"revision"
:
6
,
"skipped_content"
:
0
,
"snapshot"
:
1
,
}
check_snapshot
(
GOURMET_SNAPSHOT
,
loader
.
storage
)
def
test_loader_svn_2_visits_no_change
(
swh_storage
,
datadir
,
tmp_path
):
"""Visit multiple times a repository with no change should yield the same snapshot"""
archive_name
=
"pkg-gourmet"
archive_path
=
os
.
path
.
join
(
datadir
,
f
"{archive_name}.tgz"
)
repo_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
visit_status1
=
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
GOURMET_SNAPSHOT
.
id
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
assert
loader
.
load
()
==
{
"status"
:
"uneventful"
}
visit_status2
=
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
GOURMET_SNAPSHOT
.
id
,
)
assert
visit_status1
.
date
<
visit_status2
.
date
assert
visit_status1
.
snapshot
==
visit_status2
.
snapshot
stats
=
get_stats
(
loader
.
storage
)
assert
stats
[
"origin_visit"
]
==
1
+
1
# computed twice the same snapshot
assert
stats
[
"snapshot"
]
==
1
# even starting from previous revision...
start_revision
=
loader
.
storage
.
revision_get
(
[
hash_to_bytes
(
"95edacc8848369d6fb1608e887d6d2474fd5224f"
)]
)[
0
]
assert
start_revision
is
not
None
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"uneventful"
}
stats
=
get_stats
(
loader
.
storage
)
assert
stats
[
"origin_visit"
]
==
2
+
1
# ... with no change in repository, this yields the same snapshot
assert
stats
[
"snapshot"
]
==
1
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
GOURMET_SNAPSHOT
.
id
,
)
def
test_loader_tampered_repository
(
swh_storage
,
datadir
,
tmp_path
):
"""In this scenario, the dump has been tampered with to modify the
commit log [1]. This results in a hash divergence which is
detected at startup after a new run for the same origin.
In effect, this will perform a complete reloading of the repository.
[1] Tampering with revision 6 log message following:
```
tar xvf pkg-gourmet.tgz # initial repository ingested
cd pkg-gourmet/
echo "Tampering with commit log message for fun and profit" > log.txt
svnadmin setlog . -r 6 log.txt --bypass-hooks
tar cvf pkg-gourmet-tampered-rev6-log.tgz pkg-gourmet/
```
"""
archive_name
=
"pkg-gourmet"
archive_path
=
os
.
path
.
join
(
datadir
,
f
"{archive_name}.tgz"
)
repo_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
check_snapshot
(
GOURMET_SNAPSHOT
,
loader
.
storage
)
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
GOURMET_SNAPSHOT
.
id
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
archive_path2
=
os
.
path
.
join
(
datadir
,
"pkg-gourmet-tampered-rev6-log.tgz"
)
repo_tampered_url
=
prepare_repository_from_archive
(
archive_path2
,
archive_name
,
tmp_path
)
loader2
=
SvnLoader
(
swh_storage
,
repo_tampered_url
,
origin_url
=
repo_url
,
temp_directory
=
tmp_path
)
assert
loader2
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader2
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
hash_to_bytes
(
"5aa61959e788e281fd6e187053d0f46c68e8d8bb"
),
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
stats
=
get_stats
(
loader
.
storage
)
assert
stats
[
"origin"
]
==
1
assert
stats
[
"origin_visit"
]
==
2
assert
stats
[
"snapshot"
]
==
2
def
test_loader_svn_visit_with_changes
(
swh_storage
,
datadir
,
tmp_path
):
"""In this scenario, the repository has been updated with new changes.
The loading visit should result in new objects stored and 1 new
snapshot.
"""
archive_name
=
"pkg-gourmet"
archive_path
=
os
.
path
.
join
(
datadir
,
f
"{archive_name}.tgz"
)
repo_initial_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
# repo_initial_url becomes the origin_url we want to visit some more below
loader
=
SvnLoader
(
swh_storage
,
repo_initial_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
visit_status1
=
assert_last_visit_matches
(
loader
.
storage
,
repo_initial_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
GOURMET_SNAPSHOT
.
id
,
)
check_snapshot
(
GOURMET_SNAPSHOT
,
loader
.
storage
)
archive_path
=
os
.
path
.
join
(
datadir
,
"pkg-gourmet-with-updates.tgz"
)
repo_updated_url
=
prepare_repository_from_archive
(
archive_path
,
"pkg-gourmet"
,
tmp_path
)
loader
=
SvnLoader
(
swh_storage
,
repo_updated_url
,
origin_url
=
repo_initial_url
,
temp_directory
=
tmp_path
,
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
visit_status2
=
assert_last_visit_matches
(
loader
.
storage
,
repo_updated_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
GOURMET_UPDATES_SNAPSHOT
.
id
,
)
assert
visit_status1
.
date
<
visit_status2
.
date
assert
visit_status1
.
snapshot
!=
visit_status2
.
snapshot
stats
=
get_stats
(
loader
.
storage
)
assert
stats
==
{
"content"
:
22
,
"directory"
:
28
,
"origin"
:
1
,
"origin_visit"
:
2
,
"release"
:
0
,
"revision"
:
11
,
"skipped_content"
:
0
,
"snapshot"
:
2
,
}
check_snapshot
(
GOURMET_UPDATES_SNAPSHOT
,
loader
.
storage
)
# Let's start the ingestion from the start, this should yield the same result
loader
=
SvnLoader
(
swh_storage
,
repo_updated_url
,
origin_url
=
repo_initial_url
,
incremental
=
False
,
temp_directory
=
tmp_path
,
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
visit_status3
=
assert_last_visit_matches
(
loader
.
storage
,
repo_updated_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
GOURMET_UPDATES_SNAPSHOT
.
id
,
)
assert
visit_status2
.
date
<
visit_status3
.
date
assert
visit_status3
.
snapshot
==
visit_status2
.
snapshot
check_snapshot
(
GOURMET_UPDATES_SNAPSHOT
,
loader
.
storage
)
stats
=
get_stats
(
loader
.
storage
)
assert
stats
[
"origin"
]
==
1
# always the same visit
assert
stats
[
"origin_visit"
]
==
2
+
1
# 1 more visit
assert
stats
[
"snapshot"
]
==
2
# no new snapshot
def
test_loader_svn_visit_start_from_revision
(
swh_storage
,
datadir
,
tmp_path
):
"""Starting from existing revision, next visit on changed repo should yield 1 new
snapshot.
"""
archive_name
=
"pkg-gourmet"
archive_path
=
os
.
path
.
join
(
datadir
,
f
"{archive_name}.tgz"
)
repo_initial_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
# repo_initial_url becomes the origin_url we want to visit some more below
loader
=
SvnLoader
(
swh_storage
,
repo_initial_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
visit_status1
=
assert_last_visit_matches
(
loader
.
storage
,
repo_initial_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
GOURMET_SNAPSHOT
.
id
,
)
check_snapshot
(
GOURMET_SNAPSHOT
,
loader
.
storage
)
start_revision
=
loader
.
storage
.
revision_get
(
[
hash_to_bytes
(
"95edacc8848369d6fb1608e887d6d2474fd5224f"
)]
)[
0
]
assert
start_revision
is
not
None
archive_path
=
os
.
path
.
join
(
datadir
,
"pkg-gourmet-with-updates.tgz"
)
repo_updated_url
=
prepare_repository_from_archive
(
archive_path
,
"pkg-gourmet"
,
tmp_path
)
# we'll start from start_revision
loader
=
SvnLoader
(
swh_storage
,
repo_updated_url
,
origin_url
=
repo_initial_url
,
temp_directory
=
tmp_path
,
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
# nonetheless, we obtain the same snapshot (as previous tests on that repository)
visit_status2
=
assert_last_visit_matches
(
loader
.
storage
,
repo_updated_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
GOURMET_UPDATES_SNAPSHOT
.
id
,
)
assert
visit_status1
.
date
<
visit_status2
.
date
assert
visit_status1
.
snapshot
!=
visit_status2
.
snapshot
stats
=
get_stats
(
loader
.
storage
)
assert
stats
==
{
"content"
:
22
,
"directory"
:
28
,
"origin"
:
1
,
"origin_visit"
:
2
,
"release"
:
0
,
"revision"
:
11
,
"skipped_content"
:
0
,
"snapshot"
:
2
,
}
check_snapshot
(
GOURMET_UPDATES_SNAPSHOT
,
loader
.
storage
)
def
test_loader_svn_visit_with_eol_style
(
swh_storage
,
datadir
,
tmp_path
):
"""Check that a svn repo containing a versioned file with CRLF line
endings with svn:eol-style property set to 'native' (this is a
violation of svn specification as the file should have been
stored with LF line endings) can be loaded anyway.
"""
archive_name
=
"mediawiki-repo-r407-eol-native-crlf"
archive_path
=
os
.
path
.
join
(
datadir
,
f
"{archive_name}.tgz"
)
repo_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
mediawiki_snapshot
=
Snapshot
(
id
=
hash_to_bytes
(
"d6d6e9703f157c5702d9a4a5dec878926ed4ab76"
),
branches
=
{
b
"HEAD"
:
SnapshotBranch
(
target
=
hash_to_bytes
(
"7da4975c363101b819756d33459f30a866d01b1b"
),
target_type
=
TargetType
.
REVISION
,
)
},
)
check_snapshot
(
mediawiki_snapshot
,
loader
.
storage
)
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
mediawiki_snapshot
.
id
,
)
stats
=
get_stats
(
loader
.
storage
)
assert
stats
[
"origin"
]
==
1
assert
stats
[
"origin_visit"
]
==
1
assert
stats
[
"snapshot"
]
==
1
def
test_loader_svn_visit_with_mixed_crlf_lf
(
swh_storage
,
datadir
,
tmp_path
):
"""Check that a svn repo containing a versioned file with mixed
CRLF/LF line endings with svn:eol-style property set to 'native'
(this is a violation of svn specification as mixed line endings
for textual content should not be stored when the svn:eol-style
property is set) can be loaded anyway.
"""
archive_name
=
"pyang-repo-r343-eol-native-mixed-lf-crlf"
archive_path
=
os
.
path
.
join
(
datadir
,
f
"{archive_name}.tgz"
)
repo_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
pyang_snapshot
=
Snapshot
(
id
=
hash_to_bytes
(
"6d9590de11b00a5801de0ff3297c5b44bbbf7d24"
),
branches
=
{
b
"HEAD"
:
SnapshotBranch
(
target
=
hash_to_bytes
(
"9c6962eeb9164a636c374be700672355e34a98a7"
),
target_type
=
TargetType
.
REVISION
,
)
},
)
check_snapshot
(
pyang_snapshot
,
loader
.
storage
)
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
pyang_snapshot
.
id
,
)
stats
=
get_stats
(
loader
.
storage
)
assert
stats
[
"origin"
]
==
1
assert
stats
[
"origin_visit"
]
==
1
assert
stats
[
"snapshot"
]
==
1
def
test_loader_svn_with_symlink
(
swh_storage
,
datadir
,
tmp_path
):
"""Repository with symlinks should be ingested ok
Edge case:
- first create a file and commit it.
Remove it, then add folder holding the same name, commit.
- do the same scenario with symbolic link (instead of file)
"""
archive_name
=
"pkg-gourmet"
archive_path
=
os
.
path
.
join
(
datadir
,
"pkg-gourmet-with-edge-case-links-and-files.tgz"
)
repo_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
gourmet_edge_cases_snapshot
=
Snapshot
(
id
=
hash_to_bytes
(
"18e60982fe521a2546ab8c3c73a535d80462d9d0"
),
branches
=
{
b
"HEAD"
:
SnapshotBranch
(
target
=
hash_to_bytes
(
"3f43af2578fccf18b0d4198e48563da7929dc608"
),
target_type
=
TargetType
.
REVISION
,
)
},
)
check_snapshot
(
gourmet_edge_cases_snapshot
,
loader
.
storage
)
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
gourmet_edge_cases_snapshot
.
id
,
)
stats
=
get_stats
(
loader
.
storage
)
assert
stats
[
"origin"
]
==
1
assert
stats
[
"origin_visit"
]
==
1
assert
stats
[
"snapshot"
]
==
1
assert
stats
[
"revision"
]
==
19
def
test_loader_svn_with_wrong_symlinks
(
swh_storage
,
datadir
,
tmp_path
):
"""Repository with wrong symlinks should be ingested ok nonetheless
Edge case:
- wrong symbolic link
- wrong symbolic link with empty space names
"""
archive_name
=
"pkg-gourmet"
archive_path
=
os
.
path
.
join
(
datadir
,
"pkg-gourmet-with-wrong-link-cases.tgz"
)
repo_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
gourmet_wrong_links_snapshot
=
Snapshot
(
id
=
hash_to_bytes
(
"b17f38acabb90f066dedd30c29f01a02af88a5c4"
),
branches
=
{
b
"HEAD"
:
SnapshotBranch
(
target
=
hash_to_bytes
(
"cf30d3bb9d5967d0a2bbeacc405f10a5dd9b138a"
),
target_type
=
TargetType
.
REVISION
,
)
},
)
check_snapshot
(
gourmet_wrong_links_snapshot
,
loader
.
storage
)
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
gourmet_wrong_links_snapshot
.
id
,
)
stats
=
get_stats
(
loader
.
storage
)
assert
stats
[
"origin"
]
==
1
assert
stats
[
"origin_visit"
]
==
1
assert
stats
[
"snapshot"
]
==
1
assert
stats
[
"revision"
]
==
21
def
test_loader_svn_cleanup_loader
(
swh_storage
,
datadir
,
tmp_path
):
"""Loader should clean up its working directory after the load"""
archive_name
=
"pkg-gourmet"
archive_path
=
os
.
path
.
join
(
datadir
,
f
"{archive_name}.tgz"
)
repo_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
loading_temp_directory
=
str
(
tmp_path
/
"loading"
)
os
.
mkdir
(
loading_temp_directory
)
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
loading_temp_directory
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
# the root temporary directory still exists
assert
os
.
path
.
exists
(
loader
.
temp_directory
)
# but it should be empty
assert
os
.
listdir
(
loader
.
temp_directory
)
==
[]
def
test_loader_svn_cleanup_loader_from_remote_dump
(
swh_storage
,
datadir
,
tmp_path
):
"""Loader should clean up its working directory after the load"""
archive_name
=
"pkg-gourmet"
archive_path
=
os
.
path
.
join
(
datadir
,
f
"{archive_name}.tgz"
)
repo_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
loading_temp_directory
=
str
(
tmp_path
/
"loading"
)
os
.
mkdir
(
loading_temp_directory
)
loader
=
SvnLoaderFromRemoteDump
(
swh_storage
,
repo_url
,
temp_directory
=
loading_temp_directory
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
# the root temporary directory still exists
assert
os
.
path
.
exists
(
loader
.
temp_directory
)
# but it should be empty
assert
os
.
listdir
(
loader
.
temp_directory
)
==
[]
# the internal temp_dir should be cleaned up though
assert
not
os
.
path
.
exists
(
loader
.
temp_dir
)
def
test_loader_svn_cleanup_loader_from_dump_archive
(
swh_storage
,
datadir
,
tmp_path
):
"""Loader should clean up its working directory after the load"""
archive_ori_dump
=
os
.
path
.
join
(
datadir
,
"penguinsdbtools2018.dump.gz"
)
archive_dump_dir
=
os
.
path
.
join
(
tmp_path
,
"dump"
)
os
.
mkdir
(
archive_dump_dir
)
archive_dump
=
os
.
path
.
join
(
archive_dump_dir
,
"penguinsdbtools2018.dump.gz"
)
# loader now drops the dump as soon as it's mounted so we need to make a copy first
shutil
.
copyfile
(
archive_ori_dump
,
archive_dump
)
loading_path
=
str
(
tmp_path
/
"loading"
)
os
.
mkdir
(
loading_path
)
# Prepare the dump as a local svn repository for test purposes
temp_dir
,
repo_path
=
init_svn_repo_from_dump
(
archive_dump
,
root_dir
=
tmp_path
,
gzip
=
True
)
repo_url
=
f
"file://{repo_path}"
loader
=
SvnLoaderFromRemoteDump
(
swh_storage
,
repo_url
,
temp_directory
=
loading_path
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
# the root temporary directory still exists
assert
os
.
path
.
exists
(
loader
.
temp_directory
)
# but it should be empty
assert
os
.
listdir
(
loader
.
temp_directory
)
==
[]
# the internal temp_dir should be cleaned up though
assert
not
os
.
path
.
exists
(
loader
.
temp_dir
)
def
test_svn_loader_from_remote_dump
(
swh_storage
,
datadir
,
tmpdir_factory
):
archive_name
=
"pkg-gourmet"
archive_path
=
os
.
path
.
join
(
datadir
,
f
"{archive_name}.tgz"
)
tmp_path
=
tmpdir_factory
.
mktemp
(
"repo1"
)
repo_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
loaderFromDump
=
SvnLoaderFromRemoteDump
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
assert
loaderFromDump
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loaderFromDump
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
GOURMET_SNAPSHOT
.
id
,
)
# rename to another origin
tmp_path
=
tmpdir_factory
.
mktemp
(
"repo2"
)
origin_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
origin_url
=
origin_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
# because are working on new origin
assert_last_visit_matches
(
loader
.
storage
,
origin_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
GOURMET_SNAPSHOT
.
id
,
)
check_snapshot
(
GOURMET_SNAPSHOT
,
loader
.
storage
)
stats
=
get_stats
(
loader
.
storage
)
assert
stats
[
"origin"
]
==
2
# created one more origin
assert
stats
[
"origin_visit"
]
==
2
assert
stats
[
"snapshot"
]
==
1
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
# no change on the origin-url
assert
loader
.
load
()
==
{
"status"
:
"uneventful"
}
assert_last_visit_matches
(
loader
.
storage
,
origin_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
GOURMET_SNAPSHOT
.
id
,
)
stats
=
get_stats
(
loader
.
storage
)
assert
stats
[
"origin"
]
==
2
assert
stats
[
"origin_visit"
]
==
3
assert
stats
[
"snapshot"
]
==
1
# second visit from the dump should be uneventful
loaderFromDump
=
SvnLoaderFromRemoteDump
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
assert
loaderFromDump
.
load
()
==
{
"status"
:
"uneventful"
}
def
test_svn_loader_from_remote_dump_incremental_load_on_stale_repo
(
swh_storage
,
datadir
,
tmp_path
,
mocker
):
archive_name
=
"pkg-gourmet"
archive_path
=
os
.
path
.
join
(
datadir
,
f
"{archive_name}.tgz"
)
repo_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
# first load: a dump file will be created, mounted to a local repository
# and the latter will be loaded into the archive
loaderFromDump
=
SvnLoaderFromRemoteDump
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
assert
loaderFromDump
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loaderFromDump
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
GOURMET_SNAPSHOT
.
id
,
)
# second load on same repository: the loader will detect there is no changes
# since last load and will skip the dump, mount and load phases
loaderFromDump
=
SvnLoaderFromRemoteDump
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
loaderFromDump
.
dump_svn_revisions
=
mocker
.
MagicMock
()
init_svn_repo_from_dump
=
mocker
.
patch
(
"swh.loader.svn.loader.init_svn_repo_from_dump"
)
loaderFromDump
.
process_svn_revisions
=
mocker
.
MagicMock
()
loaderFromDump
.
_check_revision_divergence
=
mocker
.
MagicMock
()
assert
loaderFromDump
.
load
()
==
{
"status"
:
"uneventful"
}
assert_last_visit_matches
(
loaderFromDump
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
GOURMET_SNAPSHOT
.
id
,
)
# no dump
loaderFromDump
.
dump_svn_revisions
.
assert_not_called
()
# no mount
init_svn_repo_from_dump
.
assert_not_called
()
# no loading
loaderFromDump
.
process_svn_revisions
.
assert_not_called
()
# no redundant post_load processing
loaderFromDump
.
_check_revision_divergence
.
assert_not_called
()
def
test_svn_loader_from_remote_dump_incremental_load_on_non_stale_repo
(
swh_storage
,
datadir
,
tmp_path
,
mocker
):
archive_name
=
"pkg-gourmet"
archive_path
=
os
.
path
.
join
(
datadir
,
f
"{archive_name}.tgz"
)
repo_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
# first load
loader
=
SvnLoaderFromRemoteDump
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
loader
.
load
()
archive_path
=
os
.
path
.
join
(
datadir
,
"pkg-gourmet-with-updates.tgz"
)
repo_updated_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
# second load
loader
=
SvnLoaderFromRemoteDump
(
swh_storage
,
repo_updated_url
,
temp_directory
=
tmp_path
)
dump_svn_revisions
=
mocker
.
spy
(
loader
,
"dump_svn_revisions"
)
process_svn_revisions
=
mocker
.
spy
(
loader
,
"process_svn_revisions"
)
loader
.
load
()
dump_svn_revisions
.
assert_called
()
process_svn_revisions
.
assert_called
()
def
test_loader_user_defined_svn_properties
(
swh_storage
,
datadir
,
tmp_path
):
"""Edge cases: The repository held some user defined svn-properties with special
encodings, this prevented the repository from being loaded even though we do not
ingest those information.
"""
archive_name
=
"httthttt"
archive_path
=
os
.
path
.
join
(
datadir
,
f
"{archive_name}.tgz"
)
repo_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
loader
=
SvnLoader
(
swh_storage
,
repo_url
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
expected_snapshot
=
Snapshot
(
id
=
hash_to_bytes
(
"70487267f682c07e52a2371061369b6cf5bffa47"
),
branches
=
{
b
"HEAD"
:
SnapshotBranch
(
target
=
hash_to_bytes
(
"604a17dbb15e8d7ecb3e9f3768d09bf493667a93"
),
target_type
=
TargetType
.
REVISION
,
)
},
)
check_snapshot
(
expected_snapshot
,
loader
.
storage
)
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
expected_snapshot
.
id
,
)
stats
=
get_stats
(
loader
.
storage
)
assert
stats
[
"origin"
]
==
1
assert
stats
[
"origin_visit"
]
==
1
assert
stats
[
"snapshot"
]
==
1
assert
stats
[
"revision"
]
==
7
def
test_loader_svn_dir_added_then_removed
(
swh_storage
,
datadir
,
tmp_path
):
"""Loader should handle directory removal when processing a commit"""
archive_name
=
"pkg-gourmet"
archive_path
=
os
.
path
.
join
(
datadir
,
f
"{archive_name}-add-remove-dir.tgz"
)
repo_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
def
test_loader_svn_loader_from_dump_archive
(
swh_storage
,
datadir
,
tmp_path
):
archive_name
=
"pkg-gourmet"
archive_path
=
os
.
path
.
join
(
datadir
,
f
"{archive_name}.tgz"
)
repo_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
dump_filename
=
f
"{archive_name}.dump"
with
open
(
os
.
path
.
join
(
tmp_path
,
dump_filename
),
"wb"
)
as
dump_file
:
# create compressed dump file of pkg-gourmet repo
subprocess
.
run
([
"svnrdump"
,
"dump"
,
repo_url
],
stdout
=
dump_file
)
subprocess
.
run
([
"gzip"
,
dump_filename
],
cwd
=
tmp_path
)
# load svn repo from that compressed dump file
loader
=
SvnLoaderFromDumpArchive
(
swh_storage
,
url
=
repo_url
,
archive_path
=
os
.
path
.
join
(
tmp_path
,
f
"{dump_filename}.gz"
),
temp_directory
=
tmp_path
,
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
GOURMET_SNAPSHOT
.
id
,
)
check_snapshot
(
GOURMET_SNAPSHOT
,
loader
.
storage
)
assert
get_stats
(
loader
.
storage
)
==
{
"content"
:
19
,
"directory"
:
17
,
"origin"
:
1
,
"origin_visit"
:
1
,
"release"
:
0
,
"revision"
:
6
,
"skipped_content"
:
0
,
"snapshot"
:
1
,
}
def
test_loader_eol_style_file_property_handling_edge_case
(
swh_storage
,
repo_url
,
tmp_path
):
# # first commit
add_commit
(
repo_url
,
(
"Add a directory containing a file with CRLF end of line "
"and set svn:eol-style property to native so CRLF will be "
"replaced by LF in the file when exporting the revision"
),
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"directory/file_with_crlf_eol.txt"
,
properties
=
{
"svn:eol-style"
:
"native"
},
data
=
b
"Hello world!
\r\n
"
,
)
],
)
# second commit
add_commit
(
repo_url
,
"Remove previously added directory and file"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
Delete
,
path
=
"directory/"
,
)
],
)
# third commit
add_commit
(
repo_url
,
(
"Add again same directory containing same file with CRLF end of line "
"but do not set svn:eol-style property value so CRLF will not be "
"replaced by LF when exporting the revision"
),
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"directory/file_with_crlf_eol.txt"
,
data
=
b
"Hello world!
\r\n
"
,
)
],
)
# instantiate a svn loader checking after each processed revision that
# the repository filesystem it reconstructed does not differ from a subversion
# export of that revision
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
,
check_revision
=
1
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
assert
get_stats
(
loader
.
storage
)
==
{
"content"
:
2
,
"directory"
:
5
,
"origin"
:
1
,
"origin_visit"
:
1
,
"release"
:
0
,
"revision"
:
3
,
"skipped_content"
:
0
,
"snapshot"
:
1
,
}
def
get_head_revision_paths_info
(
loader
:
SvnLoader
)
->
Dict
[
bytes
,
Dict
[
str
,
Any
]]:
assert
loader
.
snapshot
is
not
None
root_dir
=
loader
.
snapshot
.
branches
[
b
"HEAD"
]
.
target
revision
=
loader
.
storage
.
revision_get
([
root_dir
])[
0
]
assert
revision
is
not
None
paths
=
{}
for
entry
in
loader
.
storage
.
directory_ls
(
revision
.
directory
,
recursive
=
True
):
paths
[
entry
[
"name"
]]
=
entry
return
paths
def
test_loader_eol_style_on_svn_link_handling
(
swh_storage
,
repo_url
,
tmp_path
):
# first commit
add_commit
(
repo_url
,
(
"Add a regular file, a directory and a link to the regular file "
"in the directory. Set svn:eol-style property for the regular "
"file and the link. Set svn:special property for the link."
),
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"file_with_crlf_eol.txt"
,
properties
=
{
"svn:eol-style"
:
"native"
},
data
=
b
"Hello world!
\r\n
"
,
),
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"directory/file_with_crlf_eol.txt"
,
properties
=
{
"svn:eol-style"
:
"native"
,
"svn:special"
:
"*"
},
data
=
b
"link ../file_with_crlf_eol.txt"
,
),
],
)
# instantiate a svn loader checking after each processed revision that
# the repository filesystem it reconstructed does not differ from a subversion
# export of that revision
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
,
check_revision
=
1
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
# check loaded objects are those expected
assert
get_stats
(
loader
.
storage
)
==
{
"content"
:
2
,
"directory"
:
2
,
"origin"
:
1
,
"origin_visit"
:
1
,
"release"
:
0
,
"revision"
:
1
,
"skipped_content"
:
0
,
"snapshot"
:
1
,
}
paths
=
get_head_revision_paths_info
(
loader
)
assert
(
loader
.
storage
.
content_get_data
(
paths
[
b
"file_with_crlf_eol.txt"
][
"sha1"
])
==
b
"Hello world!
\n
"
)
assert
paths
[
b
"directory/file_with_crlf_eol.txt"
][
"perms"
]
==
DentryPerms
.
symlink
assert
(
loader
.
storage
.
content_get_data
(
paths
[
b
"directory/file_with_crlf_eol.txt"
][
"sha1"
]
)
==
b
"../file_with_crlf_eol.txt"
)
def
test_loader_svn_special_property_unset
(
swh_storage
,
repo_url
,
tmp_path
):
# first commit
add_commit
(
repo_url
,
(
"Create a regular file, a link to a file and a link to an "
"external file. Set the svn:special property on the links."
),
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"file.txt"
,
data
=
b
"Hello world!
\n
"
,
),
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"link.txt"
,
properties
=
{
"svn:special"
:
"*"
},
data
=
b
"link ./file.txt"
,
),
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"external_link.txt"
,
properties
=
{
"svn:special"
:
"*"
},
data
=
b
"link /home/user/data.txt"
,
),
],
)
# second commit
add_commit
(
repo_url
,
"Unset the svn:special property on the links."
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"link.txt"
,
properties
=
{
"svn:special"
:
None
},
),
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"external_link.txt"
,
properties
=
{
"svn:special"
:
None
},
),
],
)
# instantiate a svn loader checking after each processed revision that
# the repository filesystem it reconstructed does not differ from a subversion
# export of that revision
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
,
check_revision
=
1
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
# check loaded objects are those expected
assert
get_stats
(
loader
.
storage
)
==
{
"content"
:
5
,
"directory"
:
2
,
"origin"
:
1
,
"origin_visit"
:
1
,
"release"
:
0
,
"revision"
:
2
,
"skipped_content"
:
0
,
"snapshot"
:
1
,
}
paths
=
get_head_revision_paths_info
(
loader
)
assert
paths
[
b
"link.txt"
][
"perms"
]
==
DentryPerms
.
content
assert
(
loader
.
storage
.
content_get_data
(
paths
[
b
"link.txt"
][
"sha1"
])
==
b
"link ./file.txt"
)
assert
paths
[
b
"external_link.txt"
][
"perms"
]
==
DentryPerms
.
content
assert
(
loader
.
storage
.
content_get_data
(
paths
[
b
"external_link.txt"
][
"sha1"
])
==
b
"link /home/user/data.txt"
)
def
test_loader_invalid_svn_eol_style_property_value
(
swh_storage
,
repo_url
,
tmp_path
):
filename
=
"file_with_crlf_eol.txt"
file_content
=
b
"Hello world!
\r\n
"
# # first commit
add_commit
(
repo_url
,
(
"Add a file with CRLF end of line and set svn:eol-style property "
"to an invalid value."
),
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
filename
,
properties
=
{
"svn:eol-style"
:
"foo"
},
data
=
file_content
,
)
],
)
# instantiate a svn loader checking after each processed revision that
# the repository filesystem it reconstructed does not differ from a subversion
# export of that revision
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
,
check_revision
=
1
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
paths
=
get_head_revision_paths_info
(
loader
)
# end of lines should not have been processed
assert
(
loader
.
storage
.
content_get_data
(
paths
[
filename
.
encode
()][
"sha1"
])
==
file_content
)
def
test_loader_first_revision_is_not_number_one
(
swh_storage
,
mocker
,
repo_url
,
tmp_path
):
class
SvnRepoSkipFirstRevision
(
SvnRepo
):
def
logs
(
self
,
revision_start
,
revision_end
):
"""Overrides logs method to skip revision number one in yielded revisions"""
yield from
super
()
.
logs
(
revision_start
+
1
,
revision_end
)
from
swh.loader.svn
import
loader
mocker
.
patch
.
object
(
loader
,
"SvnRepo"
,
SvnRepoSkipFirstRevision
)
for
filename
in
(
"foo"
,
"bar"
,
"baz"
):
add_commit
(
repo_url
,
f
"Add {filename} file"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
filename
,
data
=
f
"{filename}
\n
"
.
encode
(),
)
],
)
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
# post loading will detect an issue and make a partial visit with a snapshot
assert
loader
.
load
()
==
{
"status"
:
"failed"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"partial"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
assert
get_stats
(
loader
.
storage
)
==
{
"content"
:
2
,
"directory"
:
2
,
"origin"
:
1
,
"origin_visit"
:
1
,
"release"
:
0
,
"revision"
:
2
,
"skipped_content"
:
0
,
"snapshot"
:
1
,
}
def
test_loader_svn_special_property_on_binary_file
(
swh_storage
,
repo_url
,
tmp_path
):
"""When a file has the svn:special property set but is not a svn link,
it might be truncated under certain conditions when performing an export
operation."""
data
=
(
b
"!<symlink>
\xff\xfe
a
\x00
p
\x00
t
\x00
-
\x00
c
\x00
y
\x00
g
\x00
.
\x00
s
\x00
h
\x00\x00\x00
"
)
# first commit
add_commit
(
repo_url
,
(
"Add a non svn link binary file and set the svn:special property on it."
"That file will be truncated when exporting it."
),
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"binary_file"
,
properties
=
{
"svn:special"
:
"*"
},
data
=
data
,
),
],
)
# second commit
add_commit
(
repo_url
,
(
"Add a non svn link binary file and set the svn:special and "
"svn:mime-type properties on it."
"That file will not be truncated when exporting it."
),
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"another_binary_file"
,
properties
=
{
"svn:special"
:
"*"
,
"svn:mime-type"
:
"application/octet-stream"
,
},
data
=
data
,
),
],
)
# third commit
add_commit
(
repo_url
,
"Remove the svn:special property on the previously added files"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"binary_file"
,
properties
=
{
"svn:special"
:
None
},
),
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"another_binary_file"
,
properties
=
{
"svn:special"
:
None
},
),
],
)
# instantiate a svn loader checking after each processed revision that
# the repository filesystem it reconstructed does not differ from a subversion
# export of that revision
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
,
check_revision
=
1
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
def
test_loader_last_revision_divergence
(
swh_storage
,
datadir
,
tmp_path
):
archive_name
=
"pkg-gourmet"
archive_path
=
os
.
path
.
join
(
datadir
,
f
"{archive_name}.tgz"
)
repo_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
class
SvnLoaderRevisionDivergence
(
SvnLoader
):
def
_check_revision_divergence
(
self
,
count
,
rev
,
dir_id
):
raise
ValueError
(
"revision divergence detected"
)
loader
=
SvnLoaderRevisionDivergence
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"failed"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"partial"
,
type
=
"svn"
,
snapshot
=
GOURMET_SNAPSHOT
.
id
,
)
check_snapshot
(
GOURMET_SNAPSHOT
,
loader
.
storage
)
def
test_loader_delete_directory_while_file_has_same_prefix
(
swh_storage
,
repo_url
,
tmp_path
):
# first commit
add_commit
(
repo_url
,
"Add a file and a directory with same prefix"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"foo/bar.c"
,
data
=
b
'#include "../foo.c"'
,
),
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"foo.c"
,
data
=
b
"int foo() {return 0;}"
,
),
],
)
# second commit
add_commit
(
repo_url
,
"Delete previously added directory and update file content"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
Delete
,
path
=
"foo"
),
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"foo.c"
,
data
=
b
"int foo() {return 1;}"
,
),
],
)
# instantiate a svn loader checking after each processed revision that
# the repository filesystem it reconstructed does not differ from a subversion
# export of that revision
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
,
check_revision
=
1
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
def
test_svn_loader_incremental
(
swh_storage
,
repo_url
,
tmp_path
):
# first commit
add_commit
(
repo_url
,
(
"Add a directory containing a file with CRLF end of line "
"and set svn:eol-style property to native so CRLF will be "
"replaced by LF in the file when exporting the revision"
),
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"file_with_crlf_eol.txt"
,
properties
=
{
"svn:eol-style"
:
"native"
},
data
=
b
"Hello world!
\r\n
"
,
)
],
)
# first load
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
,
check_revision
=
1
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
# second commit
add_commit
(
repo_url
,
"Modify previously added file"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"file_with_crlf_eol.txt"
,
data
=
b
"Hello World!
\r\n
"
,
)
],
)
# second load, incremental
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
,
check_revision
=
1
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
# third commit
add_commit
(
repo_url
,
"Unset svn:eol-style property on file"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"file_with_crlf_eol.txt"
,
properties
=
{
"svn:eol-style"
:
None
},
)
],
)
# third load, incremental
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
,
check_revision
=
1
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
def
test_svn_loader_incremental_replay_start_with_empty_directory
(
swh_storage
,
mocker
,
repo_url
,
tmp_path
):
# first commit
add_commit
(
repo_url
,
(
"Add a file"
),
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"foo.txt"
,
data
=
b
"foo
\n
"
,
)
],
)
# first load
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
# second commit
add_commit
(
repo_url
,
"Modify previously added file"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"foo.txt"
,
data
=
b
"bar
\n
"
,
)
],
)
class
SvnRepoCheckReplayStartWithEmptyDirectory
(
SvnRepo
):
def
swh_hash_data_per_revision
(
self
,
start_revision
:
int
,
end_revision
:
int
):
"""Overrides swh_hash_data_per_revision method to grab the content
of the directory where the svn revisions will be replayed before that
process starts."""
self
.
replay_dir_content_before_start
=
[
os
.
path
.
join
(
root
,
name
)
for
root
,
_
,
files
in
os
.
walk
(
self
.
local_url
)
for
name
in
files
]
yield from
super
()
.
swh_hash_data_per_revision
(
start_revision
,
end_revision
)
from
swh.loader.svn
import
loader
mocker
.
patch
.
object
(
loader
,
"SvnRepo"
,
SvnRepoCheckReplayStartWithEmptyDirectory
)
# second load, incremental
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
loader
.
load
()
# check work directory was empty before replaying revisions
assert
loader
.
svnrepo
.
replay_dir_content_before_start
==
[]
def
test_loader_svn_executable_property_on_svn_link_handling
(
swh_storage
,
repo_url
,
tmp_path
):
# first commit
add_commit
(
repo_url
,
(
"Add an executable file and a svn link to it."
"Set svn:executable property for both paths."
),
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"hello-world"
,
properties
=
{
"svn:executable"
:
"*"
},
data
=
b
"#!/bin/bash
\n
echo Hello World !"
,
),
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"hello"
,
properties
=
{
"svn:executable"
:
"*"
,
"svn:special"
:
"*"
},
data
=
b
"link hello-world"
,
),
],
)
# second commit
add_commit
(
repo_url
,
(
"Remove executable file, unset link and replace it with executable content."
"As the link was previously marked as executable, execution rights should"
"be set after turning it to a regular file."
),
[
CommitChange
(
change_type
=
CommitChangeType
.
Delete
,
path
=
"hello-world"
),
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"hello"
,
properties
=
{
"svn:special"
:
None
},
data
=
b
"#!/bin/bash
\n
echo Hello World !"
,
),
],
)
# instantiate a svn loader checking after each processed revision that
# the repository filesystem it reconstructed does not differ from a subversion
# export of that revision
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
,
check_revision
=
1
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
def
test_loader_svn_add_property_on_link
(
swh_storage
,
repo_url
,
tmp_path
):
# first commit
add_commit
(
repo_url
,
"Add an executable file and a svn link to it."
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"hello-world"
,
properties
=
{
"svn:executable"
:
"*"
},
data
=
b
"#!/bin/bash
\n
echo Hello World !"
,
),
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"hello"
,
properties
=
{
"svn:special"
:
"*"
},
data
=
b
"link hello-world"
,
),
],
)
# second commit
add_commit
(
repo_url
,
"Set svn:eol-style property on link"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"hello"
,
properties
=
{
"svn:eol-style"
:
"native"
},
),
],
)
# instantiate a svn loader checking after each processed revision that
# the repository filesystem it reconstructed does not differ from a subversion
# export of that revision
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
,
check_revision
=
1
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
def
test_loader_svn_link_parsing
(
swh_storage
,
repo_url
,
tmp_path
):
# first commit
add_commit
(
repo_url
,
"Add an executable file and a svn link to it."
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"hello-world"
,
properties
=
{
"svn:executable"
:
"*"
},
data
=
b
"#!/bin/bash
\n
echo Hello World !"
,
),
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"hello"
,
properties
=
{
"svn:special"
:
"*"
},
data
=
b
"link hello-world"
,
),
],
)
# second commit
add_commit
(
repo_url
,
"Update svn link content"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"hello"
,
data
=
b
"link hello-world
\r\n
"
,
),
],
)
# instantiate a svn loader checking after each processed revision that
# the repository filesystem it reconstructed does not differ from a subversion
# export of that revision
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
,
check_revision
=
1
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
def
test_loader_svn_empty_local_dir_before_post_load
(
swh_storage
,
datadir
,
tmp_path
):
archive_name
=
"pkg-gourmet"
archive_path
=
os
.
path
.
join
(
datadir
,
f
"{archive_name}.tgz"
)
repo_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
class
SvnLoaderPostLoadLocalDirIsEmpty
(
SvnLoader
):
def
post_load
(
self
,
success
=
True
):
if
success
:
self
.
local_dirname_content
=
[
os
.
path
.
join
(
root
,
name
)
for
root
,
_
,
files
in
os
.
walk
(
self
.
svnrepo
.
local_dirname
)
for
name
in
files
]
return
super
()
.
post_load
(
success
)
loader
=
SvnLoaderPostLoadLocalDirIsEmpty
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert
loader
.
local_dirname_content
==
[]
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
GOURMET_SNAPSHOT
.
id
,
)
check_snapshot
(
GOURMET_SNAPSHOT
,
loader
.
storage
)
def
_dump_project
(
tmp_path
,
origin_url
):
svnrdump_cmd
=
[
"svnrdump"
,
"dump"
,
origin_url
]
dump_path
=
f
"{tmp_path}/repo.dump"
with
open
(
dump_path
,
"wb"
)
as
dump_file
:
subprocess
.
run
(
svnrdump_cmd
,
stdout
=
dump_file
)
subprocess
.
run
([
"gzip"
,
dump_path
])
return
dump_path
+
".gz"
def
test_loader_svn_add_property_on_directory_link
(
swh_storage
,
repo_url
,
tmp_path
):
# first commit
add_commit
(
repo_url
,
"Add an executable file in a directory and a svn link to the directory."
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"code/hello-world"
,
properties
=
{
"svn:executable"
:
"*"
},
data
=
b
"#!/bin/bash
\n
echo Hello World !"
,
),
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"hello"
,
properties
=
{
"svn:special"
:
"*"
},
data
=
b
"link code"
,
),
],
)
# second commit
add_commit
(
repo_url
,
"Set svn:eol-style property on link"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"hello"
,
properties
=
{
"svn:eol-style"
:
"native"
},
),
],
)
# instantiate a svn loader checking after each processed revision that
# the repository filesystem it reconstructed does not differ from a subversion
# export of that revision
loader
=
SvnLoader
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
,
check_revision
=
1
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
@pytest.mark.parametrize
(
"svn_loader_cls"
,
[
SvnLoader
,
SvnLoaderFromDumpArchive
,
SvnLoaderFromRemoteDump
]
)
def
test_loader_with_subprojects
(
swh_storage
,
repo_url
,
tmp_path
,
svn_loader_cls
):
# first commit
add_commit
(
repo_url
,
"Add first project in repository"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"project1/foo.sh"
,
data
=
b
"#!/bin/bash
\n
echo foo"
,
),
],
)
# second commit
add_commit
(
repo_url
,
"Add second project in repository"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"project2/bar.sh"
,
data
=
b
"#!/bin/bash
\n
echo bar"
,
),
],
)
# third commit
add_commit
(
repo_url
,
"Add third project in repository"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"project3/baz.sh"
,
data
=
b
"#!/bin/bash
\n
echo baz"
,
),
],
)
for
i
in
range
(
1
,
4
):
# load each project in the repository separately and check behavior
# is the same if origin URL has a trailing slash or not
origin_url
=
f
"{repo_url}/project{i}{'/' if i%2 else ''}"
loader_params
=
{
"storage"
:
swh_storage
,
"url"
:
origin_url
,
"origin_url"
:
origin_url
,
"temp_directory"
:
tmp_path
,
"incremental"
:
True
,
"check_revision"
:
1
,
}
if
svn_loader_cls
==
SvnLoaderFromDumpArchive
:
loader_params
[
"archive_path"
]
=
_dump_project
(
tmp_path
,
origin_url
)
loader
=
svn_loader_cls
(
**
loader_params
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
origin_url
,
status
=
"full"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
if
svn_loader_cls
==
SvnLoaderFromDumpArchive
:
loader_params
[
"archive_path"
]
=
_dump_project
(
tmp_path
,
origin_url
)
loader
=
svn_loader_cls
(
**
loader_params
)
assert
loader
.
load
()
==
{
"status"
:
"uneventful"
}
# each project origin must have
assert
get_stats
(
loader
.
storage
)
==
{
"content"
:
i
,
# one content
"directory"
:
2
*
i
,
# two directories
"origin"
:
i
,
"origin_visit"
:
2
*
i
,
# two visits
"release"
:
0
,
"revision"
:
i
,
# one revision
"skipped_content"
:
0
,
"snapshot"
:
i
,
# one snapshot
}
@pytest.mark.parametrize
(
"svn_loader_cls"
,
[
SvnLoader
,
SvnLoaderFromDumpArchive
,
SvnLoaderFromRemoteDump
]
)
def
test_loader_subproject_root_dir_removal
(
swh_storage
,
repo_url
,
tmp_path
,
svn_loader_cls
):
# first commit
add_commit
(
repo_url
,
"Add project in repository"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"project/foo.sh"
,
data
=
b
"#!/bin/bash
\n
echo foo"
,
),
],
)
# second commit
add_commit
(
repo_url
,
"Remove project root directory"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
Delete
,
path
=
"project/"
)],
)
# third commit
add_commit
(
repo_url
,
"Re-add project in repository"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"project/foo.sh"
,
data
=
b
"#!/bin/bash
\n
echo foo"
,
),
],
)
origin_url
=
f
"{repo_url}/project"
loader_params
=
{
"storage"
:
swh_storage
,
"url"
:
origin_url
,
"origin_url"
:
origin_url
,
"temp_directory"
:
tmp_path
,
"incremental"
:
True
,
"check_revision"
:
1
,
}
if
svn_loader_cls
==
SvnLoaderFromDumpArchive
:
loader_params
[
"archive_path"
]
=
_dump_project
(
tmp_path
,
origin_url
)
loader
=
svn_loader_cls
(
**
loader_params
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
origin_url
,
status
=
"full"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
if
svn_loader_cls
==
SvnLoaderFromDumpArchive
:
loader_params
[
"archive_path"
]
=
_dump_project
(
tmp_path
,
origin_url
)
loader
=
svn_loader_cls
(
**
loader_params
)
assert
loader
.
load
()
==
{
"status"
:
"uneventful"
}
@pytest.mark.parametrize
(
"svn_loader_cls"
,
[
SvnLoader
,
SvnLoaderFromRemoteDump
])
def
test_loader_svn_not_found_after_successful_visit
(
swh_storage
,
datadir
,
tmp_path
,
svn_loader_cls
):
archive_name
=
"pkg-gourmet"
archive_path
=
os
.
path
.
join
(
datadir
,
f
"{archive_name}.tgz"
)
repo_url
=
prepare_repository_from_archive
(
archive_path
,
archive_name
,
tmp_path
)
loader
=
svn_loader_cls
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
snapshot
=
GOURMET_SNAPSHOT
.
id
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
# simulate removal of remote repository
shutil
.
rmtree
(
repo_url
.
replace
(
"file://"
,
""
))
loader
=
svn_loader_cls
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"uneventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"not_found"
,
type
=
"svn"
,
snapshot
=
None
,
)
def
test_loader_svn_from_remote_dump_url_redirect
(
swh_storage
,
tmp_path
,
mocker
):
repo_url
=
"http://svn.example.org/repo"
repo_redirect_url
=
"https://svn.example.org/repo"
# mock remote subversion operations
from
swh.loader.svn.svn
import
client
mocker
.
patch
(
"swh.loader.svn.svn.RemoteAccess"
)
init_svn_repo_from_dump
=
mocker
.
patch
(
"swh.loader.svn.loader.init_svn_repo_from_dump"
)
init_svn_repo_from_dump
.
return_value
=
(
""
,
""
)
mock_client
=
mocker
.
MagicMock
()
mocker
.
patch
.
object
(
client
,
"Client"
,
mock_client
)
class
Info
:
repos_root_url
=
repo_redirect_url
url
=
repo_redirect_url
mock_client
()
.
info
.
return_value
=
{
"repo"
:
Info
()}
# init remote dump loader and mock some methods
loader
=
SvnLoaderFromRemoteDump
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
loader
.
dump_svn_revisions
=
mocker
.
MagicMock
(
return_value
=
(
""
,
-
1
))
loader
.
start_from
=
mocker
.
MagicMock
(
return_value
=
(
0
,
0
))
# prepare loading
loader
.
prepare
()
# check redirection URL has been used to dump repository
assert
loader
.
dump_svn_revisions
.
call_args_list
[
0
][
0
][
0
]
==
repo_redirect_url
@pytest.mark.parametrize
(
"svn_loader_cls"
,
[
SvnLoader
,
SvnLoaderFromRemoteDump
])
def
test_loader_basic_authentication_required
(
swh_storage
,
repo_url
,
tmp_path
,
svn_loader_cls
,
svnserve
):
# add file to empty test repo
add_commit
(
repo_url
,
"Add project in repository"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"project/foo.sh"
,
data
=
b
"#!/bin/bash
\n
echo foo"
,
),
],
)
# compute repo URLs that will be made available by svnserve
repo_path
=
repo_url
.
replace
(
"file://"
,
""
)
repo_root
=
os
.
path
.
dirname
(
repo_path
)
repo_name
=
os
.
path
.
basename
(
repo_path
)
username
=
"anonymous"
password
=
"anonymous"
port
=
12000
repo_url_no_auth
=
f
"svn://localhost:{port}/{repo_name}"
repo_url
=
f
"svn://{username}:{password}@localhost:{port}/{repo_name}"
# disable anonymous access and require authentication on test repo
with
open
(
os
.
path
.
join
(
repo_path
,
"conf"
,
"svnserve.conf"
),
"w"
)
as
svnserve_conf
:
svnserve_conf
.
write
(
textwrap
.
dedent
(
"""
[general]
# Authentication realm of the repository.
realm = test-repository
password-db = passwd
# Deny all anonymous access
anon-access = none
# Grant authenticated users read and write privileges
auth-access = write
"""
)
)
# add a user with read/write access on test repo
with
open
(
os
.
path
.
join
(
repo_path
,
"conf"
,
"passwd"
),
"w"
)
as
passwd
:
passwd
.
write
(
f
"[users]
\n
{username} = {password}"
)
# execute svnserve
svnserve
(
repo_root
,
port
)
# check loading failed with no authentication
loader
=
svn_loader_cls
(
swh_storage
,
repo_url_no_auth
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"uneventful"
}
# check loading succeeded with authentication
loader
=
svn_loader_cls
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
def
test_loader_with_spaces_in_svn_url
(
swh_storage
,
repo_url
,
tmp_path
):
filename
=
"file with spaces.txt"
content
=
b
"foo"
add_commit
(
repo_url
,
"Add file with spaces in its name"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
filename
,
data
=
content
,
),
],
)
svnrepo
=
SvnRepo
(
repo_url
,
repo_url
,
tmp_path
,
max_content_length
=
10000
)
dest_path
=
f
"{tmp_path}/file"
svnrepo
.
export
(
f
"{repo_url}/{filename}"
,
to
=
dest_path
)
with
open
(
dest_path
,
"rb"
)
as
f
:
assert
f
.
read
()
==
content
@pytest.mark.parametrize
(
"svn_loader_cls"
,
[
SvnLoader
,
SvnLoaderFromRemoteDump
])
def
test_loader_repo_with_copyfrom_and_replace_operations
(
swh_storage
,
repo_url
,
tmp_path
,
svn_loader_cls
):
add_commit
(
repo_url
,
"Create trunk/data folder"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"trunk/data/foo"
,
data
=
b
"foo"
,
),
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"trunk/data/bar"
,
data
=
b
"bar"
,
),
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"trunk/data/baz/"
,
),
],
)
add_commit
(
repo_url
,
"Create trunk/project folder"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"trunk/project/"
,
),
],
)
add_commit
(
repo_url
,
"Create trunk/project/bar as copy of trunk/data/bar from revision 1"
,
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"trunk/project/bar"
,
copyfrom_path
=
repo_url
+
"/trunk/data/bar"
,
copyfrom_rev
=
1
,
),
],
)
add_commit
(
repo_url
,
(
"Create trunk/project/data/ folder as a copy of /trunk/data from revision 1"
" and replace the trunk/project/data/baz/ folder by a trunk/project/data/baz file"
),
[
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"trunk/project/data/"
,
copyfrom_path
=
repo_url
+
"/trunk/data/"
,
copyfrom_rev
=
1
,
),
CommitChange
(
change_type
=
CommitChangeType
.
Delete
,
path
=
"trunk/project/data/baz/"
,
),
CommitChange
(
change_type
=
CommitChangeType
.
AddOrUpdate
,
path
=
"trunk/project/data/baz"
,
data
=
b
"baz"
,
),
],
)
loader
=
svn_loader_cls
(
swh_storage
,
repo_url
,
temp_directory
=
tmp_path
,
check_revision
=
1
)
assert
loader
.
load
()
==
{
"status"
:
"eventful"
}
assert_last_visit_matches
(
loader
.
storage
,
repo_url
,
status
=
"full"
,
type
=
"svn"
,
)
check_snapshot
(
loader
.
snapshot
,
loader
.
storage
)
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Fri, Jul 4, 11:13 AM (3 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3295217
Attached To
rDLDSVN Subversion (SVN) loader
Event Timeline
Log In to Comment