Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7163576
D407.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
10 KB
Subscribers
None
D407.diff
View Options
diff --git a/swh/loader/svn/ra.py b/swh/loader/svn/ra.py
--- a/swh/loader/svn/ra.py
+++ b/swh/loader/svn/ra.py
@@ -19,7 +19,30 @@
from swh.model.from_disk import Content, Directory
-CRLF = b'\r\n'
+_eol_style = {
+ 'native': b'\n',
+ 'CRLF': b'\r\n',
+ 'LF': b'\n',
+ 'CR': b'\r'
+}
+
+
+def _normalize_line_endings(lines, eol_style='native'):
+ """Normalize line endings to unix (\n), windows (\r\n) or mac (\r).
+ Args:
+ lines (bytes): The lines to normalize
+ line_ending (str): The line ending format as defined for
+ svn:eol-style property. Acceptable values are 'native',
+ 'CRLF', 'LF' and 'CR'
+ Returns
+ bytes: lines with endings normalized
+ """
+ lines = lines.replace(_eol_style['CRLF'], _eol_style['LF'])\
+ .replace(_eol_style['CR'], _eol_style['LF'])
+ if _eol_style[eol_style] != _eol_style['LF']:
+ lines = lines.replace(_eol_style['LF'], _eol_style[eol_style])
+
+ return lines
def apply_txdelta_handler(sbuf, target_stream):
@@ -85,7 +108,7 @@
SVN_PROPERTY_EOL = 'svn:eol-style'
# EOL state check mess
-EOL_CHECK = {}
+EOL_STYLE = {}
class SWHFileEditor:
@@ -112,12 +135,9 @@
# Possibly a symbolic link. We cannot check further at
# that moment though, patch(s) not being applied yet
self.link = True
- elif key == SVN_PROPERTY_EOL: # Detect inconsistent repositories
- if value in ['LF', 'native']:
- EOL_CHECK[self.fullpath] = value
- else:
- if self.fullpath in EOL_CHECK:
- del EOL_CHECK[self.fullpath]
+ elif key == SVN_PROPERTY_EOL:
+ # backup end of line style for file
+ EOL_STYLE[self.fullpath] = value
def __make_symlink(self, src):
"""Convert the svnlink to a symlink on disk.
@@ -198,18 +218,21 @@
elif self.executable == NOEXEC_FLAG:
os.chmod(self.fullpath, 0o644)
- check_eol = EOL_CHECK.get(self.fullpath)
- if check_eol:
- raw_content = open(self.fullpath, 'rb').read()
- if CRLF in raw_content: # CRLF
- msg = 'Inconsistency. CRLF detected in a converted ' \
- 'file %s (%s: %s)' % (
- self.fullpath, SVN_PROPERTY_EOL, check_eol)
- raise ValueError(msg)
-
# And now compute file's checksums
- self.directory[self.path] = Content.from_file(path=self.fullpath,
- data=True)
+ eol_style = EOL_STYLE.get(self.fullpath, None)
+ if eol_style:
+ # ensure to normalize line endings as defined by svn:eol-style
+ # property to get the same file checksum as after an export
+ # or checkout operation with subversion
+ with open(self.fullpath, 'rb') as f:
+ data = f.read()
+ data = _normalize_line_endings(data, eol_style)
+ mode = os.lstat(self.fullpath).st_mode
+ self.directory[self.path] = Content.from_bytes(mode=mode,
+ data=data)
+ else:
+ self.directory[self.path] = Content.from_file(path=self.fullpath,
+ data=True)
class BaseDirSWHEditor:
@@ -262,8 +285,8 @@
shutil.rmtree(fpath)
else:
os.remove(fpath)
- if path in EOL_CHECK:
- del EOL_CHECK[path]
+ if path in EOL_STYLE:
+ del EOL_STYLE[path]
def update_checksum(self):
raise NotImplementedError('This should be implemented.')
diff --git a/swh/loader/svn/tests/svn-test-repos/mediawiki-repo-r407-eol-native-crlf.tgz b/swh/loader/svn/tests/svn-test-repos/mediawiki-repo-r407-eol-native-crlf.tgz
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/svn/tests/svn-test-repos/pkg-gourmet-with-eol-corner-cases.tgz b/swh/loader/svn/tests/svn-test-repos/pkg-gourmet-with-eol-corner-cases.tgz
deleted file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/svn/tests/svn-test-repos/pyang-repo-r343-eol-native-mixed-lf-crlf.tgz b/swh/loader/svn/tests/svn-test-repos/pyang-repo-r343-eol-native-mixed-lf-crlf.tgz
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py
--- a/swh/loader/svn/tests/test_loader.py
+++ b/swh/loader/svn/tests/test_loader.py
@@ -620,67 +620,77 @@
self.assertRevisionsOk(expected_revisions)
-class SWHSvnLoaderUpdateAndTestCornerCasesAboutEolITTest(BaseTestSvnLoader):
+class SWHSvnLoaderTestCornerCaseAboutCrlfEolInRepoITTest(BaseTestSvnLoader):
+ """
+ Check that a svn repo containing a versioned file with CRLF line
+ endings with svn:eol-style property set to 'native' (this is
+ a violation of svn specification as the file should have been
+ stored with LF line endings) can be loaded anyway.
+ """
def setUp(self):
- super().setUp(archive_name='pkg-gourmet-with-eol-corner-cases.tgz')
+ super().setUp(archive_name='mediawiki-repo-r407-eol-native-crlf.tgz',
+ filename='mediawiki-repo-r407-eol-native-crlf')
- self.origin = {'id': 2, 'type': 'svn', 'url': 'file:///dev/null'}
+ self.origin = {'id': 1, 'type': 'svn',
+ 'url': 'https://code.google.com/p/pyang/pyang-repo'}
self.origin_visit = {
'origin': self.origin['id'],
'visit': 1,
}
- self.loader = SWHSvnLoaderUpdateLessRecentNoStorage()
+ self.loader = SWHSvnLoaderNoStorage()
self.loader.prepare(
self.svn_mirror_url, self.destination_path, self.origin)
@istest
def process_repository(self):
- """EOL corner cases and update.
-
"""
- previous_unfinished_revision = {
- 'id': hashutil.hash_to_bytes(
- '171dc35522bfd17dda4e90a542a0377fb2fc707a'),
- 'parents': [hashutil.hash_to_bytes(
- '902f29b4323a9b9de3af6d28e72dd581e76d9397')],
- 'directory': hashutil.hash_to_bytes(
- 'fd24a76c87a3207428e06612b49860fc78e9f6dc'),
- 'target_type': 'revision',
- 'metadata': {
- 'extra_headers': [
- ['svn_repo_uuid', '3187e211-bb14-4c82-9596-0b59d67cd7f4'],
- ['svn_revision', '11']
- ]
- }
+ Process repository with CRLF line endings and svn:eol-style set to 'native'
+ """ # noqa
+ # when
+ self.loader.process_repository(self.origin_visit)
+
+ expected_revisions = {
+ '7da4975c363101b819756d33459f30a866d01b1b': 'f63637223ee0f7d4951ffd2d4d9547a4882c5d8b' # noqa
}
- # when
- self.loader.process_repository(
- self.origin_visit,
- last_known_swh_revision=previous_unfinished_revision)
+ self.assertRevisionsOk(expected_revisions)
- # then
- # we got the previous run's last revision (rev 11)
- # so 8 new
- self.assertEquals(len(self.loader.all_revisions), 8)
- self.assertEquals(len(self.loader.all_releases), 0)
- last_revision = '0148ae3eaa520b73a50802c59f3f416b7a36cf8c'
+class SWHSvnLoaderTestCornerCaseAboutMixedCrlfLfEolInRepoITTest(BaseTestSvnLoader): # noqa
+ """
+ Check that a svn repo containing a versioned file with mixed
+ CRLF/LF line endings with svn:eol-style property set to 'native'
+ (this is a violation of svn specification as mixed line endings
+ for textual content should not be stored when the svn:eol-style
+ property is set) can be loaded anyway.
+ """
+ def setUp(self):
+ super().setUp(archive_name='pyang-repo-r343-eol-native-mixed-lf-crlf.tgz', # noqa
+ filename='pyang-repo-r343-eol-native-mixed-lf-crlf')
+
+ self.origin = {'id': 1, 'type': 'svn',
+ 'url': 'https://code.google.com/m/mediawiki/mediawiki-repo'} # noqa
+
+ self.origin_visit = {
+ 'origin': self.origin['id'],
+ 'visit': 1,
+ }
+
+ self.loader = SWHSvnLoaderNoStorage()
+ self.loader.prepare(
+ self.svn_mirror_url, self.destination_path, self.origin)
+
+ @istest
+ def process_repository(self):
+ """
+ Process repository with mixed CRLF/LF line endings and svn:eol-style set to 'native'
+ """ # noqa
+ self.loader.process_repository(self.origin_visit)
- # cf. test_loader.org for explaining from where those hashes
- # come from
expected_revisions = {
- # revision hash | directory hash
- '027e8769f4786597436ab94a91f85527d04a6cbb': '2d9ca72c6afec6284fb01e459588cbb007017c8c', # noqa
- '4474d96018877742d9697d5c76666c9693353bfc': 'ab111577e0ab39e4a157c476072af48f2641d93f', # noqa
- '97ad21eab92961e2a22ca0285f09c6d1e9a7ffbc': 'ab111577e0ab39e4a157c476072af48f2641d93f', # noqa
- 'd04ea8afcee6205cc8384c091bfc578931c169fd': 'b0a648b02e55a4dce356ac35187a058f89694ec7', # noqa
- 'ded78810401fd354ffe894aa4a1e5c7d30a645d1': 'b0a648b02e55a4dce356ac35187a058f89694ec7', # noqa
- '4ee95e39358712f53c4fc720da3fafee9249ed19': 'c3c98df624733fef4e592bef983f93e2ed02b179', # noqa
- 'ffa901b69ca0f46a2261f42948838d19709cb9f8': 'c3c98df624733fef4e592bef983f93e2ed02b179', # noqa
- last_revision: '844d4646d6c2b4f3a3b2b22ab0ee38c7df07bab2', # noqa
+ '9c6962eeb9164a636c374be700672355e34a98a7': '16aa6b6271f3456d4643999d234cf39fe3d0cc5a' # noqa
}
self.assertRevisionsOk(expected_revisions)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Jan 30 2025, 10:40 AM (6 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220689
Attached To
D407: SVN loader: Normalize line endings when svn:eol-style property is set
Event Timeline
Log In to Comment