Page MenuHomeSoftware Heritage

D407.diff
No OneTemporary

D407.diff

diff --git a/swh/loader/svn/ra.py b/swh/loader/svn/ra.py
--- a/swh/loader/svn/ra.py
+++ b/swh/loader/svn/ra.py
@@ -19,7 +19,30 @@
from swh.model.from_disk import Content, Directory
-CRLF = b'\r\n'
+_eol_style = {
+ 'native': b'\n',
+ 'CRLF': b'\r\n',
+ 'LF': b'\n',
+ 'CR': b'\r'
+}
+
+
+def _normalize_line_endings(lines, eol_style='native'):
+ """Normalize line endings to unix (\n), windows (\r\n) or mac (\r).
+ Args:
+ lines (bytes): The lines to normalize
+ line_ending (str): The line ending format as defined for
+ svn:eol-style property. Acceptable values are 'native',
+ 'CRLF', 'LF' and 'CR'
+ Returns
+ bytes: lines with endings normalized
+ """
+ lines = lines.replace(_eol_style['CRLF'], _eol_style['LF'])\
+ .replace(_eol_style['CR'], _eol_style['LF'])
+ if _eol_style[eol_style] != _eol_style['LF']:
+ lines = lines.replace(_eol_style['LF'], _eol_style[eol_style])
+
+ return lines
def apply_txdelta_handler(sbuf, target_stream):
@@ -85,7 +108,7 @@
SVN_PROPERTY_EOL = 'svn:eol-style'
# EOL state check mess
-EOL_CHECK = {}
+EOL_STYLE = {}
class SWHFileEditor:
@@ -112,12 +135,9 @@
# Possibly a symbolic link. We cannot check further at
# that moment though, patch(s) not being applied yet
self.link = True
- elif key == SVN_PROPERTY_EOL: # Detect inconsistent repositories
- if value in ['LF', 'native']:
- EOL_CHECK[self.fullpath] = value
- else:
- if self.fullpath in EOL_CHECK:
- del EOL_CHECK[self.fullpath]
+ elif key == SVN_PROPERTY_EOL:
+ # backup end of line style for file
+ EOL_STYLE[self.fullpath] = value
def __make_symlink(self, src):
"""Convert the svnlink to a symlink on disk.
@@ -198,18 +218,21 @@
elif self.executable == NOEXEC_FLAG:
os.chmod(self.fullpath, 0o644)
- check_eol = EOL_CHECK.get(self.fullpath)
- if check_eol:
- raw_content = open(self.fullpath, 'rb').read()
- if CRLF in raw_content: # CRLF
- msg = 'Inconsistency. CRLF detected in a converted ' \
- 'file %s (%s: %s)' % (
- self.fullpath, SVN_PROPERTY_EOL, check_eol)
- raise ValueError(msg)
-
# And now compute file's checksums
- self.directory[self.path] = Content.from_file(path=self.fullpath,
- data=True)
+ eol_style = EOL_STYLE.get(self.fullpath, None)
+ if eol_style:
+ # ensure to normalize line endings as defined by svn:eol-style
+ # property to get the same file checksum as after an export
+ # or checkout operation with subversion
+ with open(self.fullpath, 'rb') as f:
+ data = f.read()
+ data = _normalize_line_endings(data, eol_style)
+ mode = os.lstat(self.fullpath).st_mode
+ self.directory[self.path] = Content.from_bytes(mode=mode,
+ data=data)
+ else:
+ self.directory[self.path] = Content.from_file(path=self.fullpath,
+ data=True)
class BaseDirSWHEditor:
@@ -262,8 +285,8 @@
shutil.rmtree(fpath)
else:
os.remove(fpath)
- if path in EOL_CHECK:
- del EOL_CHECK[path]
+ if path in EOL_STYLE:
+ del EOL_STYLE[path]
def update_checksum(self):
raise NotImplementedError('This should be implemented.')
diff --git a/swh/loader/svn/tests/svn-test-repos/mediawiki-repo-r407-eol-native-crlf.tgz b/swh/loader/svn/tests/svn-test-repos/mediawiki-repo-r407-eol-native-crlf.tgz
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/svn/tests/svn-test-repos/pkg-gourmet-with-eol-corner-cases.tgz b/swh/loader/svn/tests/svn-test-repos/pkg-gourmet-with-eol-corner-cases.tgz
deleted file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/svn/tests/svn-test-repos/pyang-repo-r343-eol-native-mixed-lf-crlf.tgz b/swh/loader/svn/tests/svn-test-repos/pyang-repo-r343-eol-native-mixed-lf-crlf.tgz
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py
--- a/swh/loader/svn/tests/test_loader.py
+++ b/swh/loader/svn/tests/test_loader.py
@@ -620,67 +620,77 @@
self.assertRevisionsOk(expected_revisions)
-class SWHSvnLoaderUpdateAndTestCornerCasesAboutEolITTest(BaseTestSvnLoader):
+class SWHSvnLoaderTestCornerCaseAboutCrlfEolInRepoITTest(BaseTestSvnLoader):
+ """
+ Check that a svn repo containing a versioned file with CRLF line
+ endings with svn:eol-style property set to 'native' (this is
+ a violation of svn specification as the file should have been
+ stored with LF line endings) can be loaded anyway.
+ """
def setUp(self):
- super().setUp(archive_name='pkg-gourmet-with-eol-corner-cases.tgz')
+ super().setUp(archive_name='mediawiki-repo-r407-eol-native-crlf.tgz',
+ filename='mediawiki-repo-r407-eol-native-crlf')
- self.origin = {'id': 2, 'type': 'svn', 'url': 'file:///dev/null'}
+ self.origin = {'id': 1, 'type': 'svn',
+ 'url': 'https://code.google.com/p/pyang/pyang-repo'}
self.origin_visit = {
'origin': self.origin['id'],
'visit': 1,
}
- self.loader = SWHSvnLoaderUpdateLessRecentNoStorage()
+ self.loader = SWHSvnLoaderNoStorage()
self.loader.prepare(
self.svn_mirror_url, self.destination_path, self.origin)
@istest
def process_repository(self):
- """EOL corner cases and update.
-
"""
- previous_unfinished_revision = {
- 'id': hashutil.hash_to_bytes(
- '171dc35522bfd17dda4e90a542a0377fb2fc707a'),
- 'parents': [hashutil.hash_to_bytes(
- '902f29b4323a9b9de3af6d28e72dd581e76d9397')],
- 'directory': hashutil.hash_to_bytes(
- 'fd24a76c87a3207428e06612b49860fc78e9f6dc'),
- 'target_type': 'revision',
- 'metadata': {
- 'extra_headers': [
- ['svn_repo_uuid', '3187e211-bb14-4c82-9596-0b59d67cd7f4'],
- ['svn_revision', '11']
- ]
- }
+ Process repository with CRLF line endings and svn:eol-style set to 'native'
+ """ # noqa
+ # when
+ self.loader.process_repository(self.origin_visit)
+
+ expected_revisions = {
+ '7da4975c363101b819756d33459f30a866d01b1b': 'f63637223ee0f7d4951ffd2d4d9547a4882c5d8b' # noqa
}
- # when
- self.loader.process_repository(
- self.origin_visit,
- last_known_swh_revision=previous_unfinished_revision)
+ self.assertRevisionsOk(expected_revisions)
- # then
- # we got the previous run's last revision (rev 11)
- # so 8 new
- self.assertEquals(len(self.loader.all_revisions), 8)
- self.assertEquals(len(self.loader.all_releases), 0)
- last_revision = '0148ae3eaa520b73a50802c59f3f416b7a36cf8c'
+class SWHSvnLoaderTestCornerCaseAboutMixedCrlfLfEolInRepoITTest(BaseTestSvnLoader): # noqa
+ """
+ Check that a svn repo containing a versioned file with mixed
+ CRLF/LF line endings with svn:eol-style property set to 'native'
+ (this is a violation of svn specification as mixed line endings
+ for textual content should not be stored when the svn:eol-style
+ property is set) can be loaded anyway.
+ """
+ def setUp(self):
+ super().setUp(archive_name='pyang-repo-r343-eol-native-mixed-lf-crlf.tgz', # noqa
+ filename='pyang-repo-r343-eol-native-mixed-lf-crlf')
+
+ self.origin = {'id': 1, 'type': 'svn',
+ 'url': 'https://code.google.com/m/mediawiki/mediawiki-repo'} # noqa
+
+ self.origin_visit = {
+ 'origin': self.origin['id'],
+ 'visit': 1,
+ }
+
+ self.loader = SWHSvnLoaderNoStorage()
+ self.loader.prepare(
+ self.svn_mirror_url, self.destination_path, self.origin)
+
+ @istest
+ def process_repository(self):
+ """
+ Process repository with mixed CRLF/LF line endings and svn:eol-style set to 'native'
+ """ # noqa
+ self.loader.process_repository(self.origin_visit)
- # cf. test_loader.org for explaining from where those hashes
- # come from
expected_revisions = {
- # revision hash | directory hash
- '027e8769f4786597436ab94a91f85527d04a6cbb': '2d9ca72c6afec6284fb01e459588cbb007017c8c', # noqa
- '4474d96018877742d9697d5c76666c9693353bfc': 'ab111577e0ab39e4a157c476072af48f2641d93f', # noqa
- '97ad21eab92961e2a22ca0285f09c6d1e9a7ffbc': 'ab111577e0ab39e4a157c476072af48f2641d93f', # noqa
- 'd04ea8afcee6205cc8384c091bfc578931c169fd': 'b0a648b02e55a4dce356ac35187a058f89694ec7', # noqa
- 'ded78810401fd354ffe894aa4a1e5c7d30a645d1': 'b0a648b02e55a4dce356ac35187a058f89694ec7', # noqa
- '4ee95e39358712f53c4fc720da3fafee9249ed19': 'c3c98df624733fef4e592bef983f93e2ed02b179', # noqa
- 'ffa901b69ca0f46a2261f42948838d19709cb9f8': 'c3c98df624733fef4e592bef983f93e2ed02b179', # noqa
- last_revision: '844d4646d6c2b4f3a3b2b22ab0ee38c7df07bab2', # noqa
+ '9c6962eeb9164a636c374be700672355e34a98a7': '16aa6b6271f3456d4643999d234cf39fe3d0cc5a' # noqa
}
self.assertRevisionsOk(expected_revisions)

File Metadata

Mime Type
text/plain
Expires
Jan 30 2025, 10:40 AM (6 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220689

Event Timeline