diff --git a/swh/loader/svn/ra.py b/swh/loader/svn/ra.py --- a/swh/loader/svn/ra.py +++ b/swh/loader/svn/ra.py @@ -19,7 +19,30 @@ from swh.model.from_disk import Content, Directory -CRLF = b'\r\n' +_eol_style = { + 'native': b'\n', + 'CRLF': b'\r\n', + 'LF': b'\n', + 'CR': b'\r' +} + + +def _normalize_line_endings(lines, eol_style='native'): + """Normalize line endings to unix (\n), windows (\r\n) or mac (\r). + Args: + lines (bytes): The lines to normalize + line_ending (str): The line ending format as defined for + svn:eol-style property. Acceptable values are 'native', + 'CRLF', 'LF' and 'CR' + Returns + bytes: lines with endings normalized + """ + lines = lines.replace(_eol_style['CRLF'], _eol_style['LF'])\ + .replace(_eol_style['CR'], _eol_style['LF']) + if _eol_style[eol_style] != _eol_style['LF']: + lines = lines.replace(_eol_style['LF'], _eol_style[eol_style]) + + return lines def apply_txdelta_handler(sbuf, target_stream): @@ -85,7 +108,7 @@ SVN_PROPERTY_EOL = 'svn:eol-style' # EOL state check mess -EOL_CHECK = {} +EOL_STYLE = {} class SWHFileEditor: @@ -112,12 +135,9 @@ # Possibly a symbolic link. We cannot check further at # that moment though, patch(s) not being applied yet self.link = True - elif key == SVN_PROPERTY_EOL: # Detect inconsistent repositories - if value in ['LF', 'native']: - EOL_CHECK[self.fullpath] = value - else: - if self.fullpath in EOL_CHECK: - del EOL_CHECK[self.fullpath] + elif key == SVN_PROPERTY_EOL: + # backup end of line style for file + EOL_STYLE[self.fullpath] = value def __make_symlink(self, src): """Convert the svnlink to a symlink on disk. @@ -198,18 +218,21 @@ elif self.executable == NOEXEC_FLAG: os.chmod(self.fullpath, 0o644) - check_eol = EOL_CHECK.get(self.fullpath) - if check_eol: - raw_content = open(self.fullpath, 'rb').read() - if CRLF in raw_content: # CRLF - msg = 'Inconsistency. CRLF detected in a converted ' \ - 'file %s (%s: %s)' % ( - self.fullpath, SVN_PROPERTY_EOL, check_eol) - raise ValueError(msg) - # And now compute file's checksums - self.directory[self.path] = Content.from_file(path=self.fullpath, - data=True) + eol_style = EOL_STYLE.get(self.fullpath, None) + if eol_style: + # ensure to normalize line endings as defined by svn:eol-style + # property to get the same file checksum as after an export + # or checkout operation with subversion + with open(self.fullpath, 'rb') as f: + data = f.read() + data = _normalize_line_endings(data, eol_style) + mode = os.lstat(self.fullpath).st_mode + self.directory[self.path] = Content.from_bytes(mode=mode, + data=data) + else: + self.directory[self.path] = Content.from_file(path=self.fullpath, + data=True) class BaseDirSWHEditor: @@ -262,8 +285,8 @@ shutil.rmtree(fpath) else: os.remove(fpath) - if path in EOL_CHECK: - del EOL_CHECK[path] + if path in EOL_STYLE: + del EOL_STYLE[path] def update_checksum(self): raise NotImplementedError('This should be implemented.') diff --git a/swh/loader/svn/tests/svn-test-repos/mediawiki-repo-r407-eol-native-crlf.tgz b/swh/loader/svn/tests/svn-test-repos/mediawiki-repo-r407-eol-native-crlf.tgz new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@