diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py --- a/swh/loader/svn/loader.py +++ b/swh/loader/svn/loader.py @@ -7,14 +7,14 @@ swh-storage. """ - import os +import pty import re import shutil import tempfile from mmap import mmap, ACCESS_WRITE -from subprocess import run, PIPE +from subprocess import Popen from swh.model import hashutil from swh.model.from_disk import Directory @@ -25,7 +25,8 @@ from . import svn, converters from .utils import ( - init_svn_repo_from_dump, init_svn_repo_from_archive_dump + init_svn_repo_from_dump, init_svn_repo_from_archive_dump, + OutputStream ) from .exception import SvnLoaderUneventful from .exception import SvnLoaderHistoryAltered @@ -650,9 +651,21 @@ dump_temp_dir = tempfile.mkdtemp(dir=self.temp_dir) dump_name = ''.join(c for c in svn_url if c.isalnum()) dump_path = '%s/%s.svndump' % (dump_temp_dir, dump_name) + stderr_lines = [] self.log.debug('Executing %s' % ' '.join(svnrdump_cmd)) with open(dump_path, 'wb') as dump_file: - svnrdump = run(svnrdump_cmd, stdout=dump_file, stderr=PIPE) + stderr_r, stderr_w = pty.openpty() + svnrdump = Popen(svnrdump_cmd, stdout=dump_file, stderr=stderr_w) + os.close(stderr_w) + stderr_stream = OutputStream(stderr_r) + readable = True + while readable: + lines, readable = stderr_stream.read_lines() + stderr_lines += lines + for line in lines: + self.log.debug(line) + svnrdump.wait() + os.close(stderr_r) if svnrdump.returncode == 0: return dump_path @@ -661,14 +674,13 @@ # can be loaded. # Get the stderr line with latest dumped revision - stderr_lines = svnrdump.stderr.split(b'\n') last_dumped_rev = None if len(stderr_lines) > 1: last_dumped_rev = stderr_lines[-2] if last_dumped_rev: # Get the latest dumped revision number - matched_rev = re.search(b'.*revision ([0-9]+)', last_dumped_rev) + matched_rev = re.search('.*revision ([0-9]+)', last_dumped_rev) last_dumped_rev = int(matched_rev.group(1)) if matched_rev else -1 # Check if revisions inside the dump file can be loaded anyway if last_dumped_rev > last_loaded_svn_rev: diff --git a/swh/loader/svn/tests/test_utils.py b/swh/loader/svn/tests/test_utils.py --- a/swh/loader/svn/tests/test_utils.py +++ b/swh/loader/svn/tests/test_utils.py @@ -3,8 +3,12 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import os +import pty import unittest +from subprocess import Popen + from swh.loader.svn import utils @@ -25,3 +29,18 @@ utils.strdate_to_timestamp('')) self.assertEqual({'seconds': 0, 'microseconds': 0}, utils.strdate_to_timestamp(None)) + + def test_outputstream(self): + stdout_r, stdout_w = pty.openpty() + echo = Popen(['echo', '-e', 'foo\nbar\nbaz'], stdout=stdout_w) + os.close(stdout_w) + stdout_stream = utils.OutputStream(stdout_r) + lines = [] + while True: + current_lines, readable = stdout_stream.read_lines() + lines += current_lines + if not readable: + break + echo.wait() + os.close(stdout_r) + self.assertEqual(lines, ['foo', 'bar', 'baz']) diff --git a/swh/loader/svn/utils.py b/swh/loader/svn/utils.py --- a/swh/loader/svn/utils.py +++ b/swh/loader/svn/utils.py @@ -3,6 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import errno import os import tempfile import shutil @@ -33,6 +34,48 @@ return ts +class OutputStream: + """Helper class to read lines from a program output while + it is running + + Args: + fileno (int): File descriptor of a program output stream + opened in text mode + """ + + def __init__(self, fileno): + self._fileno = fileno + self._buffer = '' + + def read_lines(self): + """ + Read available lines from the output stream and return them. + + Returns: + Tuple[List[str], bool]: A tuple whose first member is the read + lines and second member a boolean indicating if there are + still some other lines available to read. + """ + try: + output = os.read(self._fileno, 1000).decode() + except OSError as e: + if e.errno != errno.EIO: + raise + output = '' + output = output.replace('\r\n', '\n') + lines = output.split('\n') + lines[0] = self._buffer + lines[0] + + if output: + self._buffer = lines[-1] + return (lines[:-1], True) + else: + self._buffer = '' + if len(lines) == 1 and not lines[0]: + lines = [] + return (lines, False) + + def init_svn_repo_from_dump(dump_path, prefix=None, suffix=None, root_dir='/tmp', gzip=False): """Given a path to a svn dump.