Page MenuHomeSoftware Heritage

D6604.diff
No OneTemporary

D6604.diff

diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py
--- a/swh/loader/svn/loader.py
+++ b/swh/loader/svn/loader.py
@@ -111,7 +111,7 @@
self.visit_date = visit_date
self.destination_path = destination_path
self.incremental = incremental
- self.snapshot = None
+ self.snapshot: Optional[Snapshot] = None
# state from previous visit
self.latest_snapshot = None
self.latest_revision = None
diff --git a/swh/loader/svn/ra.py b/swh/loader/svn/ra.py
--- a/swh/loader/svn/ra.py
+++ b/swh/loader/svn/ra.py
@@ -36,11 +36,12 @@
Returns:
bytes: lines with endings normalized
"""
- lines = lines.replace(_eol_style["CRLF"], _eol_style["LF"]).replace(
- _eol_style["CR"], _eol_style["LF"]
- )
- if _eol_style[eol_style] != _eol_style["LF"]:
- lines = lines.replace(_eol_style["LF"], _eol_style[eol_style])
+ if eol_style in _eol_style:
+ lines = lines.replace(_eol_style["CRLF"], _eol_style["LF"]).replace(
+ _eol_style["CR"], _eol_style["LF"]
+ )
+ if _eol_style[eol_style] != _eol_style["LF"]:
+ lines = lines.replace(_eol_style["LF"], _eol_style[eol_style])
return lines
diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py
--- a/swh/loader/svn/tests/test_loader.py
+++ b/swh/loader/svn/tests/test_loader.py
@@ -7,7 +7,7 @@
from io import BytesIO
import os
import subprocess
-from typing import Dict, List
+from typing import Any, Dict, List
import pytest
from subvertpy import SubversionException, delta, repos
@@ -911,6 +911,18 @@
}
+def get_head_revision_paths_info(loader: SvnLoader) -> Dict[bytes, Dict[str, Any]]:
+ assert loader.snapshot is not None
+ root_dir = loader.snapshot.branches[b"HEAD"].target
+ revision = loader.storage.revision_get([root_dir])[0]
+ assert revision is not None
+
+ paths = {}
+ for entry in loader.storage.directory_ls(revision.directory, recursive=True):
+ paths[entry["name"]] = entry
+ return paths
+
+
def test_loader_eol_style_on_svn_link_handling(swh_storage, tmp_path):
# create a repository
repo_path = os.path.join(tmp_path, "tmprepo")
@@ -963,12 +975,7 @@
"snapshot": 1,
}
- root_dir = loader.snapshot.branches[b"HEAD"].target
- revision = loader.storage.revision_get([root_dir])[0]
-
- paths = {}
- for entry in loader.storage.directory_ls(revision.directory, recursive=True):
- paths[entry["name"]] = entry
+ paths = get_head_revision_paths_info(loader)
assert (
loader.storage.content_get_data(paths[b"file_with_crlf_eol.txt"]["sha1"])
@@ -1058,12 +1065,7 @@
"snapshot": 1,
}
- root_dir = loader.snapshot.branches[b"HEAD"].target
- revision = loader.storage.revision_get([root_dir])[0]
-
- paths = {}
- for entry in loader.storage.directory_ls(revision.directory, recursive=True):
- paths[entry["name"]] = entry
+ paths = get_head_revision_paths_info(loader)
assert paths[b"link.txt"]["perms"] == DentryPerms.content
assert (
@@ -1076,3 +1078,47 @@
loader.storage.content_get_data(paths[b"external_link.txt"]["sha1"])
== b"link /home/user/data.txt"
)
+
+
+def test_loader_invalid_svn_eol_style_property_value(swh_storage, tmp_path):
+ # create a repository
+ repo_path = os.path.join(tmp_path, "tmprepo")
+ repos.create(repo_path)
+ repo_url = f"file://{repo_path}"
+
+ filename = "file_with_crlf_eol.txt"
+ file_content = b"Hello world!\r\n"
+
+ # # first commit
+ add_commit(
+ repo_url,
+ (
+ "Add a file with CRLF end of line and set svn:eol-style property "
+ "to an invalid value."
+ ),
+ [
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path=filename,
+ properties={"svn:eol-style": "foo"},
+ data=file_content,
+ )
+ ],
+ )
+
+ # instantiate a svn loader checking after each processed revision that
+ # the repository filesystem it reconstructed does not differ from a subversion
+ # export of that revision
+ loader = SvnLoader(
+ swh_storage, repo_url, destination_path=tmp_path, check_revision=1
+ )
+
+ assert loader.load() == {"status": "eventful"}
+ assert loader.visit_status() == "full"
+
+ paths = get_head_revision_paths_info(loader)
+ # end of lines should not have been processed
+ assert (
+ loader.storage.content_get_data(paths[filename.encode()]["sha1"])
+ == file_content
+ )

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 7:32 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3228434

Event Timeline