Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/svn/loader.py
# Copyright (C) 2015-2018 The Software Heritage developers | # Copyright (C) 2015-2018 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
"""Loader in charge of injecting either new or existing svn mirrors to | """Loader in charge of injecting either new or existing svn mirrors to | ||||
swh-storage. | swh-storage. | ||||
""" | """ | ||||
import os | import os | ||||
import pty | |||||
import re | import re | ||||
import shutil | import shutil | ||||
import tempfile | import tempfile | ||||
from mmap import mmap, ACCESS_WRITE | from mmap import mmap, ACCESS_WRITE | ||||
from subprocess import run, PIPE | from subprocess import Popen | ||||
from swh.model import hashutil | from swh.model import hashutil | ||||
from swh.model.from_disk import Directory | from swh.model.from_disk import Directory | ||||
from swh.model.identifiers import identifier_to_bytes, revision_identifier | from swh.model.identifiers import identifier_to_bytes, revision_identifier | ||||
from swh.model.identifiers import snapshot_identifier | from swh.model.identifiers import snapshot_identifier | ||||
from swh.loader.core.loader import BufferedLoader | from swh.loader.core.loader import BufferedLoader | ||||
from swh.loader.core.utils import clean_dangling_folders | from swh.loader.core.utils import clean_dangling_folders | ||||
from . import svn, converters | from . import svn, converters | ||||
from .utils import ( | from .utils import ( | ||||
init_svn_repo_from_dump, init_svn_repo_from_archive_dump | init_svn_repo_from_dump, init_svn_repo_from_archive_dump, | ||||
OutputStream | |||||
) | ) | ||||
from .exception import SvnLoaderUneventful | from .exception import SvnLoaderUneventful | ||||
from .exception import SvnLoaderHistoryAltered | from .exception import SvnLoaderHistoryAltered | ||||
DEFAULT_BRANCH = b'HEAD' | DEFAULT_BRANCH = b'HEAD' | ||||
▲ Show 20 Lines • Show All 608 Lines • ▼ Show 20 Lines | def dump_svn_revisions(self, svn_url, last_loaded_svn_rev=-1): | ||||
# Build the svnrdump command line | # Build the svnrdump command line | ||||
svnrdump_cmd = ['svnrdump', 'dump', svn_url] | svnrdump_cmd = ['svnrdump', 'dump', svn_url] | ||||
# Launch the svnrdump command while capturing stderr as | # Launch the svnrdump command while capturing stderr as | ||||
# successfully dumped revision numbers are printed to it | # successfully dumped revision numbers are printed to it | ||||
dump_temp_dir = tempfile.mkdtemp(dir=self.temp_dir) | dump_temp_dir = tempfile.mkdtemp(dir=self.temp_dir) | ||||
dump_name = ''.join(c for c in svn_url if c.isalnum()) | dump_name = ''.join(c for c in svn_url if c.isalnum()) | ||||
dump_path = '%s/%s.svndump' % (dump_temp_dir, dump_name) | dump_path = '%s/%s.svndump' % (dump_temp_dir, dump_name) | ||||
stderr_lines = [] | |||||
self.log.debug('Executing %s' % ' '.join(svnrdump_cmd)) | self.log.debug('Executing %s' % ' '.join(svnrdump_cmd)) | ||||
with open(dump_path, 'wb') as dump_file: | with open(dump_path, 'wb') as dump_file: | ||||
svnrdump = run(svnrdump_cmd, stdout=dump_file, stderr=PIPE) | stderr_r, stderr_w = pty.openpty() | ||||
svnrdump = Popen(svnrdump_cmd, stdout=dump_file, stderr=stderr_w) | |||||
os.close(stderr_w) | |||||
stderr_stream = OutputStream(stderr_r) | |||||
readable = True | |||||
while readable: | |||||
lines, readable = stderr_stream.read_lines() | |||||
stderr_lines += lines | |||||
vlorentz: Revisions are fetched in order, right?
If so, all but the last line can be skipped, it will… | |||||
Not Done Inline Actionsyes, fetched in order. ardumont: yes, fetched in order. | |||||
Done Inline ActionsThe idea here is to get an idea of the current dump progress, so I think it is better to print all dumped revision info. anlambert: The idea here is to get an idea of the current dump progress, so I think it is better to print… | |||||
for line in lines: | |||||
self.log.debug(line) | |||||
Not Done Inline Actionsnitpick: readable = True while readable: lines, readable = stderr_stream.read_lines() # ... vlorentz: nitpick:
```
readable = True
while readable:
lines, readable = stderr_stream.read_lines()… | |||||
svnrdump.wait() | |||||
os.close(stderr_r) | |||||
if svnrdump.returncode == 0: | if svnrdump.returncode == 0: | ||||
return dump_path | return dump_path | ||||
# There was an error but it does not mean that no revisions | # There was an error but it does not mean that no revisions | ||||
# can be loaded. | # can be loaded. | ||||
# Get the stderr line with latest dumped revision | # Get the stderr line with latest dumped revision | ||||
stderr_lines = svnrdump.stderr.split(b'\n') | |||||
last_dumped_rev = None | last_dumped_rev = None | ||||
if len(stderr_lines) > 1: | if len(stderr_lines) > 1: | ||||
last_dumped_rev = stderr_lines[-2] | last_dumped_rev = stderr_lines[-2] | ||||
if last_dumped_rev: | if last_dumped_rev: | ||||
# Get the latest dumped revision number | # Get the latest dumped revision number | ||||
matched_rev = re.search(b'.*revision ([0-9]+)', last_dumped_rev) | matched_rev = re.search('.*revision ([0-9]+)', last_dumped_rev) | ||||
last_dumped_rev = int(matched_rev.group(1)) if matched_rev else -1 | last_dumped_rev = int(matched_rev.group(1)) if matched_rev else -1 | ||||
# Check if revisions inside the dump file can be loaded anyway | # Check if revisions inside the dump file can be loaded anyway | ||||
if last_dumped_rev > last_loaded_svn_rev: | if last_dumped_rev > last_loaded_svn_rev: | ||||
self.log.debug(('svnrdump did not dump all expected revisions ' | self.log.debug(('svnrdump did not dump all expected revisions ' | ||||
'but revisions range %s:%s are available in ' | 'but revisions range %s:%s are available in ' | ||||
'the generated dump file and will be loaded ' | 'the generated dump file and will be loaded ' | ||||
'into the archive.') % (last_loaded_svn_rev+1, | 'into the archive.') % (last_loaded_svn_rev+1, | ||||
last_dumped_rev)) | last_dumped_rev)) | ||||
▲ Show 20 Lines • Show All 58 Lines • Show Last 20 Lines |
Revisions are fetched in order, right?
If so, all but the last line can be skipped, it will make logs less verbose.