diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py index b0065a0..ea3d05e 100644 --- a/swh/loader/svn/loader.py +++ b/swh/loader/svn/loader.py @@ -1,515 +1,559 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Loader in charge of injecting either new or existing svn mirrors to swh-storage. """ import os +import psutil import shutil +import tempfile from swh.core import utils from swh.model import hashutil from swh.model.from_disk import Directory from swh.model.identifiers import identifier_to_bytes, revision_identifier from swh.model.identifiers import snapshot_identifier from swh.loader.core.loader import SWHLoader from . import svn, converters from .utils import init_svn_repo_from_archive_dump from .exception import SvnLoaderEventful, SvnLoaderUneventful from .exception import SvnLoaderHistoryAltered DEFAULT_BRANCH = b'master' def _revision_id(revision): return identifier_to_bytes(revision_identifier(revision)) def build_swh_snapshot(revision_id, branch=DEFAULT_BRANCH): """Build a swh snapshot from the revision id, origin id, and visit. """ return { 'id': None, 'branches': { branch: { 'target': revision_id, 'target_type': 'revision', } } } +TEMPORARY_DIR_PREFIX = 'swh.loader.svn.' + + class SWHSvnLoader(SWHLoader): """Swh svn loader to load an svn repository The repository is either remote or local. The loader deals with update on an already previously loaded repository. Default policy: Keep data as close as possible from the original svn data. We only add information that are needed for update or continuing from last known revision (svn revision and svn repository's uuid). """ CONFIG_BASE_FILENAME = 'loader/svn' ADDITIONAL_CONFIG = { 'check_revision': ('int', 1000), + 'temp_directory': ('str', '/tmp'), 'debug': ('bool', False), # NOT FOR PRODUCTION, False is mandatory } def __init__(self): super().__init__(logging_class='swh.loader.svn.SvnLoader') self.check_revision = self.config['check_revision'] self.origin_id = None self.debug = self.config['debug'] self.last_seen_revision = None + self.temp_directory = self.config['temp_directory'] + + def pre_cleanup(self): + """Cleanup potential dangling files from prior runs (e.g. OOM killed + tasks) + + """ + if not os.path.exists(self.temp_directory): + return + for filename in os.listdir(self.temp_directory): + try: + # pattern: `swh.loader.svn-pid.{noise}` + if TEMPORARY_DIR_PREFIX not in filename or \ + '-' not in filename: # silently ignore unknown patterns + continue + _, pid = filename.split('-') + pid = int(pid.split('.')[0]) + if psutil.pid_exists(pid): + self.log.debug('PID %s is live, skipping' % pid) + continue + path_to_cleanup = os.path.join(self.temp_directory, filename) + # could be removed concurrently, another existence check + if os.path.exists(path_to_cleanup): + shutil.rmtree(path_to_cleanup) + except Exception as e: + msg = 'Fail to clean dangling path %s: %s' % ( + path_to_cleanup, e) + self.log.warn(msg) def cleanup(self): """Clean up the svn repository's working representation on disk. """ if self.debug: self.log.error('''NOT FOR PRODUCTION - debug flag activated Local repository not cleaned up for investigation: %s''' % ( self.svnrepo.local_url.decode('utf-8'), )) return self.svnrepo.clean_fs() def swh_revision_hash_tree_at_svn_revision(self, revision): """Compute and return the hash tree at a given svn revision. Args: rev (int): the svn revision we want to check Returns: The hash tree directory as bytes. """ local_dirname, local_url = self.svnrepo.export_temporary(revision) h = Directory.from_disk(path=local_url).hash self.svnrepo.clean_fs(local_dirname) return h - def get_svn_repo(self, svn_url, destination_path, origin): + def get_svn_repo(self, svn_url, local_dirname, origin): """Instantiates the needed svnrepo collaborator to permit reading svn repository. Args: svn_url (str): the svn repository url to read from - destination_path (str): the local path on disk to compute data + local_dirname (str): the local path on disk to compute data origin (int): the corresponding origin Returns: Instance of :mod:`swh.loader.svn.svn` clients """ return svn.SWHSvnRepo( svn_url, origin['id'], self.storage, - destination_path=destination_path) + local_dirname=local_dirname) def swh_latest_snapshot_revision(self, origin_id, previous_swh_revision=None): """Look for latest snapshot revision and returns it if any. Args: origin_id (int): Origin identifier previous_swh_revision: (optional) id of a possible previous swh revision Returns: dict: The latest known point in time. Dict with keys: 'revision': latest visited revision 'snapshot': latest snapshot If None is found, return an empty dict. """ storage = self.storage if not previous_swh_revision: # check latest snapshot's revision latest_snap = storage.snapshot_get_latest(origin_id) if latest_snap: branches = latest_snap.get('branches') if not branches: return {} branch = branches.get(DEFAULT_BRANCH) if not branch: return {} target_type = branch['target_type'] if target_type != 'revision': return {} previous_swh_revision = branch['target'] else: return {} revs = list(storage.revision_get([previous_swh_revision])) if revs: return { 'snapshot': latest_snap, 'revision': revs[0] } return {} def build_swh_revision(self, rev, commit, dir_id, parents): """Build the swh revision dictionary. This adds: - the `'synthetic`' flag to true - the '`extra_headers`' containing the repository's uuid and the svn revision number. Args: rev (dict): the svn revision commit (dict): the commit metadata dir_id (bytes): the upper tree's hash identifier parents ([bytes]): the parents' identifiers Returns: The swh revision corresponding to the svn revision. """ return converters.build_swh_revision(rev, commit, self.svnrepo.uuid, dir_id, parents) def check_history_not_altered(self, svnrepo, revision_start, swh_rev): """Given a svn repository, check if the history was not tampered with. """ revision_id = swh_rev['id'] parents = swh_rev['parents'] hash_data_per_revs = svnrepo.swh_hash_data_at_revision(revision_start) rev = revision_start rev, _, commit, _, root_dir = list(hash_data_per_revs)[0] dir_id = root_dir.hash swh_revision = self.build_swh_revision(rev, commit, dir_id, parents) swh_revision_id = _revision_id(swh_revision) return swh_revision_id == revision_id def process_repository(self, origin_visit, last_known_swh_revision=None, start_from_scratch=False): """The main idea of this function is to: - iterate over the svn commit logs - extract the svn commit log metadata - compute the hashes from the current directory down to the file - compute the equivalent swh revision - send all those objects for storage - create an swh occurrence pointing to the last swh revision seen - send that occurrence for storage in swh-storage. """ svnrepo = self.svnrepo revision_head = svnrepo.head_revision() if revision_head == 0: # empty repository case revision_start = 0 revision_end = 0 else: # default configuration revision_start = svnrepo.initial_revision() revision_end = revision_head revision_parents = { revision_start: [] } if not start_from_scratch: # Check if we already know a previous revision for that origin if self.latest_snapshot: swh_rev = self.latest_snapshot['revision'] else: swh_rev = None # Determine from which known revision to start swh_rev = self.init_from(last_known_swh_revision, previous_swh_revision=swh_rev) if swh_rev: # Yes, we know a previous revision. Try and update it. extra_headers = dict(swh_rev['metadata']['extra_headers']) revision_start = int(extra_headers['svn_revision']) revision_parents = { revision_start: swh_rev['parents'], } self.log.debug('svn export --ignore-keywords %s@%s' % ( svnrepo.remote_url, revision_start)) if swh_rev and not self.check_history_not_altered( svnrepo, revision_start, swh_rev): msg = 'History of svn %s@%s altered. ' \ 'Skipping...' % ( svnrepo.remote_url, revision_start) raise SvnLoaderHistoryAltered(msg) # now we know history is ok, we start at next revision revision_start = revision_start + 1 # and the parent become the latest know revision for # that repository revision_parents[revision_start] = [swh_rev['id']] if revision_start > revision_end and revision_start is not 1: msg = '%s@%s already injected.' % (svnrepo.remote_url, revision_end) raise SvnLoaderUneventful(msg) self.log.info('Processing revisions [%s-%s] for %s' % ( revision_start, revision_end, svnrepo)) # process and store revision to swh (sent by by blocks of # 'revision_packet_size') return self.process_swh_revisions( svnrepo, revision_start, revision_end, revision_parents) def process_svn_revisions(self, svnrepo, revision_start, revision_end, revision_parents): """Process revisions from revision_start to revision_end and send to swh for storage. At each svn revision, checkout the repository, compute the tree hash and blobs and send for swh storage to store. Then computes and yields the swh revision. Note that at every self.check_revision, an svn export is done and a hash tree is computed to check that no divergence occurred. Yields: swh revision as a dictionary with keys, sha1_git, sha1, etc... """ gen_revs = svnrepo.swh_hash_data_per_revision( revision_start, revision_end) swh_revision = None count = 0 for rev, nextrev, commit, new_objects, root_directory in gen_revs: count += 1 # Send the associated contents/directories self.maybe_load_contents(new_objects.get('content', {}).values()) self.maybe_load_directories( new_objects.get('directory', {}).values()) # compute the fs tree's checksums dir_id = root_directory.hash swh_revision = self.build_swh_revision( rev, commit, dir_id, revision_parents[rev]) swh_revision['id'] = _revision_id(swh_revision) self.log.debug('rev: %s, swhrev: %s, dir: %s' % ( rev, hashutil.hash_to_hex(swh_revision['id']), hashutil.hash_to_hex(dir_id))) if (count % self.check_revision) == 0: # hash computation check self.log.debug('Checking hash computations on revision %s...' % rev) checked_dir_id = self.swh_revision_hash_tree_at_svn_revision( rev) if checked_dir_id != dir_id: err = 'Hash tree computation divergence detected ' \ '(%s != %s), stopping!' % ( hashutil.hash_to_hex(dir_id), hashutil.hash_to_hex(checked_dir_id)) raise ValueError(err) if nextrev: revision_parents[nextrev] = [swh_revision['id']] yield swh_revision def process_swh_revisions(self, svnrepo, revision_start, revision_end, revision_parents): """Process and store revision to swh (sent by blocks of revision_packet_size) Returns: The latest revision stored. """ try: swh_revision_gen = self.process_svn_revisions(svnrepo, revision_start, revision_end, revision_parents) revs = [] for revisions in utils.grouper( swh_revision_gen, self.config['revision_packet_size']): revs = list(revisions) self.maybe_load_revisions(revs) last_revision = revs[-1] self.log.debug('Processed %s revisions: [..., %s]' % ( len(revs), hashutil.hash_to_hex(last_revision['id']))) self.last_seen_revision = last_revision except Exception as e: if revs: # flush remaining revisions self.maybe_load_revisions(revs) # Take the last one as the last known revisions known_swh_rev = revs[-1] elif self.last_seen_revision: # We'll try to make a snapshot known_swh_rev = self.last_seen_revision else: raise _id = known_swh_rev.get('id') if not _id: _id = _revision_id(known_swh_rev) # Then notify something is wrong, and we stopped at that rev. raise SvnLoaderEventful(e, swh_revision={ 'id': _id, }) return last_revision def process_swh_snapshot(self, revision=None, snapshot=None): """Create the snapshot either from existing snapshot or revision. """ if snapshot: snap = snapshot elif revision: snap = build_swh_snapshot(revision['id']) snap['id'] = identifier_to_bytes(snapshot_identifier(snap)) else: return None self.log.debug('snapshot: %s' % snap) self.maybe_load_snapshot(snap) def prepare_origin_visit(self, *, svn_url, visit_date=None, origin_url=None, **kwargs): self.origin = { 'url': origin_url if origin_url else svn_url, 'type': 'svn', } self.visit_date = visit_date - def prepare(self, *, destination_path, svn_url, + def prepare(self, *, svn_url, destination_path=None, swh_revision=None, start_from_scratch=False, **kwargs): self.start_from_scratch = start_from_scratch if swh_revision: self.last_known_swh_revision = hashutil.hash_to_bytes( swh_revision) else: self.last_known_swh_revision = None self.latest_snapshot = self.swh_latest_snapshot_revision( self.origin_id, self.last_known_swh_revision) - self.svnrepo = self.get_svn_repo( - svn_url, destination_path, self.origin) + + if destination_path: + local_dirname = destination_path + else: + local_dirname = tempfile.mkdtemp(suffix='-%s' % os.getpid(), + prefix=TEMPORARY_DIR_PREFIX, + dir=self.temp_directory) + self.svnrepo = self.get_svn_repo(svn_url, local_dirname, self.origin) def fetch_data(self): """We need to fetch and stream the data to store directly. So fetch_data do actually nothing. The method ``store_data`` below is in charge to do everything, fetch and store. """ pass def store_data(self): """We need to fetch and stream the data to store directly because there is too much data and state changes. Everything is intertwined together (We receive patch and apply on disk and compute at the hashes at the same time) So every data to fetch and store is done here. Note: origin_visit and last_known_swh_revision must have been set in the prepare method. """ origin_visit = {'origin': self.origin_id, 'visit': self.visit} try: latest_rev = self.process_repository( origin_visit, last_known_swh_revision=self.last_known_swh_revision, start_from_scratch=self.start_from_scratch) except SvnLoaderEventful as e: latest_rev = e.swh_revision self.process_swh_snapshot(revision=latest_rev) raise except Exception as e: if self.latest_snapshot and 'snapshot' in self.latest_snapshot: snapshot = self.latest_snapshot['snapshot'] self.process_swh_snapshot(snapshot=snapshot) raise else: self.process_swh_snapshot(revision=latest_rev) def init_from(self, partial_swh_revision, previous_swh_revision): """Function to determine from where to start from. Args: partial_swh_revision: A known revision from which the previous loading did not finish. known_previous_revision: A known revision from which the previous loading did finish. Returns: The revision from which to start or None if nothing (fresh start). """ if partial_swh_revision and not previous_swh_revision: return partial_swh_revision if not partial_swh_revision and previous_swh_revision: return previous_swh_revision if partial_swh_revision and previous_swh_revision: # will determine from which to start from extra_headers1 = dict( partial_swh_revision['metadata']['extra_headers']) extra_headers2 = dict( previous_swh_revision['metadata']['extra_headers']) rev_start1 = int(extra_headers1['svn_revision']) rev_start2 = int(extra_headers2['svn_revision']) if rev_start1 <= rev_start2: return previous_swh_revision return partial_swh_revision return None class SWHSvnLoaderFromDumpArchive(SWHSvnLoader): """Uncompress an archive containing an svn dump, mount the svn dump as an svn repository and load said repository. """ def __init__(self, archive_path): super().__init__() self.log.info('Archive to mount and load %s' % archive_path) self.temp_dir, self.repo_path = init_svn_repo_from_archive_dump( - archive_path) + archive_path, + prefix=TEMPORARY_DIR_PREFIX, + suffix='-%s' % os.getpid(), + root_dir=self.temp_directory) def cleanup(self): super().cleanup() if self.temp_dir and os.path.exists(self.temp_dir): - self.log.debug('Clean up temp directory %s for project %s' % ( - self.temp_dir, os.path.basename(self.repo_path))) + msg = 'Clean up temporary directory dump %s for project %s' % ( + self.temp_dir, os.path.basename(self.repo_path)) + self.log.debug(msg) shutil.rmtree(self.temp_dir) diff --git a/swh/loader/svn/svn.py b/swh/loader/svn/svn.py index 0c28967..8bda250 100644 --- a/swh/loader/svn/svn.py +++ b/swh/loader/svn/svn.py @@ -1,270 +1,267 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """SVN client in charge of iterating over svn logs and yield commit representations including the hash tree/content computations per svn commit. """ import os import tempfile import shutil from subvertpy.ra import RemoteAccess, Auth, get_username_provider from subvertpy import client, properties from swh.model.from_disk import Directory from . import ra, converters # When log message contains empty data DEFAULT_AUTHOR_MESSAGE = '' class SWHSvnRepo: """SWH's svn repository representation. + Args: + remote_url (str): + origin_id (int): Associated origin identifier + storage (Storage): Storage to use to execute storage statements + local_dirname (str): Path to write intermediary svn action results + """ - def __init__(self, remote_url, origin_id, storage, - destination_path=None): + def __init__(self, remote_url, origin_id, storage, local_dirname): self.remote_url = remote_url.rstrip('/') self.storage = storage self.origin_id = origin_id - if destination_path: - os.makedirs(destination_path, exist_ok=True) - self.root_dir = destination_path - else: - self.root_dir = '/tmp' - auth = Auth([get_username_provider()]) # one connection for log iteration self.conn_log = RemoteAccess(self.remote_url, auth=auth) # another for replay self.conn = RemoteAccess(self.remote_url, auth=auth) # one client for update operation self.client = client.Client(auth=auth) - self.local_dirname = tempfile.mkdtemp(suffix='.tmp', - prefix='swh.loader.svn.', - dir=self.root_dir) + self.local_dirname = local_dirname local_name = os.path.basename(self.remote_url) self.local_url = os.path.join(self.local_dirname, local_name).encode( 'utf-8') self.uuid = self.conn.get_uuid().encode('utf-8') self.swhreplay = ra.SWHReplay(conn=self.conn, rootpath=self.local_url) def __str__(self): return str({ 'swh-origin': self.origin_id, 'remote_url': self.remote_url, 'local_url': self.local_url, 'uuid': self.uuid, }) def head_revision(self): """Retrieve current head revision. """ return self.conn.get_latest_revnum() def initial_revision(self): """Retrieve the initial revision from which the remote url appeared. """ return 1 def convert_commit_message(self, msg): """Simply encode the commit message. Args: msg (str): the commit message to convert. Returns: The transformed message as bytes. """ if isinstance(msg, bytes): return msg return msg.encode('utf-8') def convert_commit_date(self, date): """Convert the message commit date into a timestamp in swh format. The precision is kept. Args: date (str): the commit date to convert. Returns: The transformed date. """ return converters.svn_date_to_swh_date(date) def convert_commit_author(self, author): """Convert the commit author into an swh person. The user becomes a dictionary of the form:: { name: author, email: '', fullname: author } Args: author (str): the commit author to convert. Returns: The transformed author as dict. """ return converters.svn_author_to_swh_person(author) def __to_entry(self, log_entry): changed_paths, rev, revprops, has_children = log_entry author_date = self.convert_commit_date( revprops.get(properties.PROP_REVISION_DATE)) author = self.convert_commit_author( revprops.get(properties.PROP_REVISION_AUTHOR)) message = self.convert_commit_message( revprops.get(properties.PROP_REVISION_LOG, DEFAULT_AUTHOR_MESSAGE)) return { 'rev': rev, 'author_date': author_date, 'author_name': author, 'message': message, } def logs(self, revision_start, revision_end): """Stream svn logs between revision_start and revision_end by chunks of block_size logs. Yields revision and associated revision information between the revision start and revision_end. Args: revision_start: the svn revision starting bound revision_end: the svn revision ending bound Yields: tuple: tuple of revisions and logs: - revisions: list of revisions in order - logs: Dictionary with key revision number and value the log entry. The log entry is a dictionary with the following keys: - author_date: date of the commit - author_name: name of the author - message: commit message """ for log_entry in self.conn_log.iter_log(paths=None, start=revision_start, end=revision_end, discover_changed_paths=False): yield self.__to_entry(log_entry) def export(self, revision): """Export the repository to a given version. """ self.client.export(self.remote_url, to=self.local_url.decode('utf-8'), rev=revision, ignore_keywords=True) def export_temporary(self, revision): """Export the repository to a given revision in a temporary location. This is up to the caller of this function to clean up the temporary location when done (cf. self.clean_fs method) Args: revision: Revision to export at Returns: The tuple local_dirname the temporary location root folder, local_url where the repository was exported. """ local_dirname = tempfile.mkdtemp( prefix='check-revision-%s.' % revision, dir=self.local_dirname) local_name = os.path.basename(self.remote_url) local_url = os.path.join(local_dirname, local_name) self.client.export( self.remote_url, to=local_url, rev=revision, ignore_keywords=True) return local_dirname, os.fsencode(local_url) def swh_hash_data_per_revision(self, start_revision, end_revision): """Compute swh hash data per each revision between start_revision and end_revision. Args: start_revision: starting revision end_revision: ending revision Yields: tuple (rev, nextrev, commit, objects_per_path) - rev: current revision - nextrev: next revision - commit: commit data (author, date, message) for such revision - objects_per_path: dictionary of path, swh hash data with type """ for commit in self.logs(start_revision, end_revision): rev = commit['rev'] objects = self.swhreplay.compute_hashes(rev) if rev == end_revision: nextrev = None else: nextrev = rev + 1 yield rev, nextrev, commit, objects, self.swhreplay.directory def swh_hash_data_at_revision(self, revision): """Compute the hash data at revision. Expected to be used for update only. """ # Update the disk at revision self.export(revision) # Compute the current hashes on disk directory = Directory.from_disk(path=os.fsencode(self.local_url), save_path=True) # Update the replay collaborator with the right state self.swhreplay = ra.SWHReplay( conn=self.conn, rootpath=self.local_url, directory=directory) # Retrieve the commit information for revision commit = list(self.logs(revision, revision))[0] yield revision, revision + 1, commit, {}, directory def clean_fs(self, local_dirname=None): """Clean up the local working copy. Args: local_dirname (str): Path to remove recursively if provided. Otherwise, remove the temporary upper root tree used for svn repository loading. """ if local_dirname: shutil.rmtree(local_dirname) else: shutil.rmtree(self.local_dirname) diff --git a/swh/loader/svn/tasks.py b/swh/loader/svn/tasks.py index b671fca..973b444 100644 --- a/swh/loader/svn/tasks.py +++ b/swh/loader/svn/tasks.py @@ -1,53 +1,53 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.scheduler.task import Task from .loader import SWHSvnLoader, SWHSvnLoaderFromDumpArchive class LoadSWHSvnRepositoryTsk(Task): """Import one svn repository to Software Heritage. """ task_queue = 'swh_loader_svn' def run_task(self, *args, **kwargs): """Import a svn repository with swh policy. Args: args: ordered arguments (expected None) kwargs: Dictionary with the following expected keys: - svn_url: (mandatory) svn's repository url - destination_path: (mandatory) root directory to locally retrieve svn's data - swh_revision: (optional) extra SWH revision hex to start from. see swh.loader.svn.SvnLoader.process docstring """ loader = SWHSvnLoader() loader.log = self.log return loader.load(*args, **kwargs) class MountAndLoadSvnRepositoryTsk(Task): task_queue = 'swh_loader_svn_mount_and_load' def run_task(self, archive_path, origin_url=None, visit_date=None, start_from_scratch=False): """1. Mount an svn dump from archive as a local svn repository. 2. Load it through the svn loader. 3. Clean up mounted svn repository archive. """ loader = SWHSvnLoaderFromDumpArchive(archive_path) loader.log = self.log return loader.load(svn_url='file://%s' % loader.repo_path, origin_url=origin_url, visit_date=visit_date, - destination_path=None, + archive_path=archive_path, start_from_scratch=start_from_scratch) diff --git a/swh/loader/svn/utils.py b/swh/loader/svn/utils.py index 9f5214c..90b749f 100644 --- a/swh/loader/svn/utils.py +++ b/swh/loader/svn/utils.py @@ -1,76 +1,76 @@ # Copyright (C) 2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import tempfile import shutil from dateutil import parser from subprocess import PIPE, Popen, call def strdate_to_timestamp(strdate): """Convert a string date to an int timestamp. Args: strdate: A string representing a date with format like 'YYYY-mm-DDTHH:MM:SS.800722Z' Returns: A couple of integers: seconds, microseconds """ if strdate: dt = parser.parse(strdate) ts = { 'seconds': int(dt.timestamp()), 'microseconds': dt.microsecond, } else: # epoch ts = {'seconds': 0, 'microseconds': 0} return ts -def init_svn_repo_from_archive_dump(archive_path, root_temp_dir='/tmp'): +def init_svn_repo_from_archive_dump(archive_path, prefix=None, suffix=None, + root_dir='/tmp'): """Given a path to an archive containing an svn dump. Initialize an svn repository with the content of said dump. Returns: A tuple: - - temporary folder: containing the mounted repository - - repo_path, path to the mounted repository inside the temporary folder + - temporary folder (str): containing the mounted repository + - repo_path (str): path to the mounted repository inside the + temporary folder Raises: ValueError in case of failure to run the command to uncompress and load the dump. """ project_name = os.path.basename(os.path.dirname(archive_path)) - temp_dir = tempfile.mkdtemp(suffix='.tmp', - prefix='swh.loader.svn.', - dir=root_temp_dir) + temp_dir = tempfile.mkdtemp(prefix=prefix, suffix=suffix, dir=root_dir) try: repo_path = os.path.join(temp_dir, project_name) # create the repository that will be loaded with the dump cmd = ['svnadmin', 'create', repo_path] r = call(cmd) if r != 0: raise ValueError( 'Failed to initialize empty svn repo for %s' % project_name) with Popen(['gzip', '-dc', archive_path], stdout=PIPE) as dump: cmd = ['svnadmin', 'load', '-q', repo_path] r = call(cmd, stdin=dump.stdout) if r != 0: raise ValueError( 'Failed to mount the svn dump for project %s' % project_name) return temp_dir, repo_path except Exception as e: shutil.rmtree(temp_dir) raise e