diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,4 +1,4 @@ swh.storage >= 0.0.163 -swh.model >= 0.0.54 +swh.model >= 0.0.59 swh.scheduler >= 0.0.39 swh.loader.core >= 0.0.78 diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py --- a/swh/loader/svn/loader.py +++ b/swh/loader/svn/loader.py @@ -110,6 +110,7 @@ self.destination_path = destination_path self.start_from_scratch = start_from_scratch self.swh_revision = swh_revision + self.max_content_length = self.config['max_content_size'] def pre_cleanup(self): """Cleanup potential dangling files from prior runs (e.g. OOM killed @@ -148,22 +149,6 @@ self.svnrepo.clean_fs(local_dirname) return h - def get_svn_repo(self, svn_url, local_dirname, origin_url): - """Instantiates the needed svnrepo collaborator to permit reading svn - repository. - - Args: - svn_url (str): the svn repository url to read from - local_dirname (str): the local path on disk to compute data - origin_url (str): the corresponding origin url - - Returns: - Instance of :mod:`swh.loader.svn.svn` clients - - """ - return svn.SvnRepo(svn_url, - local_dirname=local_dirname, origin_url=origin_url) - def swh_latest_snapshot_revision(self, origin_url, previous_swh_revision=None): """Look for latest snapshot revision and returns it if any. @@ -476,8 +461,10 @@ prefix=TEMPORARY_DIR_PREFIX_PATTERN, dir=self.temp_directory) - self.svnrepo = self.get_svn_repo( - self.svn_url, local_dirname, self.origin_url) + self.svnrepo = svn.SvnRepo( + self.svn_url, local_dirname, self.origin_url, + self.max_content_length) + try: revision_start, revision_end, revision_parents = self.start_from( self.last_known_swh_revision, self.start_from_scratch) diff --git a/swh/loader/svn/ra.py b/swh/loader/svn/ra.py --- a/swh/loader/svn/ra.py +++ b/swh/loader/svn/ra.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2018 The Software Heritage developers +# Copyright (C) 2016-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -248,8 +248,7 @@ self.directory[self.path] = Content.from_bytes(mode=mode, data=data) else: - self.directory[self.path] = Content.from_file(path=self.fullpath, - data=True) + self.directory[self.path] = Content.from_file(path=self.fullpath) class BaseDirEditor: diff --git a/swh/loader/svn/svn.py b/swh/loader/svn/svn.py --- a/swh/loader/svn/svn.py +++ b/swh/loader/svn/svn.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 The Software Heritage developers +# Copyright (C) 2015-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -34,7 +34,8 @@ local_dirname (str): Path to write intermediary svn action results """ - def __init__(self, remote_url, origin_url, local_dirname): + def __init__(self, remote_url, origin_url, local_dirname, + max_content_length): self.remote_url = remote_url.rstrip('/') self.origin_url = origin_url @@ -53,6 +54,7 @@ self.uuid = self.conn.get_uuid().encode('utf-8') self.swhreplay = ra.Replay(conn=self.conn, rootpath=self.local_url) + self.max_content_length = max_content_length def __str__(self): return str({ @@ -237,8 +239,9 @@ # Update the disk at revision self.export(revision) # Compute the current hashes on disk - directory = Directory.from_disk(path=os.fsencode(self.local_url), - save_path=True) + directory = Directory.from_disk( + path=os.fsencode(self.local_url), + max_content_length=self.max_content_length) # Update the replay collaborator with the right state self.swhreplay = ra.Replay(