diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py --- a/swh/loader/svn/loader.py +++ b/swh/loader/svn/loader.py @@ -22,6 +22,7 @@ from swh.model.identifiers import snapshot_identifier from swh.loader.core.loader import BufferedLoader from swh.loader.core.utils import clean_dangling_folders +from swh.storage.algos.snapshot import snapshot_get_all_branches from . import svn, converters from .utils import ( @@ -40,7 +41,7 @@ def build_swh_snapshot(revision_id, branch=DEFAULT_BRANCH): - """Build a swh snapshot from the revision id, origin id, and visit. + """Build a swh snapshot from the revision id, origin url, and visit. """ return { @@ -77,7 +78,7 @@ def __init__(self): super().__init__(logging_class='swh.loader.svn.SvnLoader') - self.origin_id = None + self.origin_url = None self.debug = self.config['debug'] self.last_seen_revision = None self.temp_directory = self.config['temp_directory'] @@ -135,28 +136,28 @@ self.svnrepo.clean_fs(local_dirname) return h - def get_svn_repo(self, svn_url, local_dirname, origin_id): + def get_svn_repo(self, svn_url, local_dirname, origin_url): """Instantiates the needed svnrepo collaborator to permit reading svn repository. Args: svn_url (str): the svn repository url to read from local_dirname (str): the local path on disk to compute data - origin_id (int): the corresponding origin id + origin_url (str): the corresponding origin url Returns: Instance of :mod:`swh.loader.svn.svn` clients """ return svn.SvnRepo(svn_url, - local_dirname=local_dirname, origin_id=origin_id) + local_dirname=local_dirname, origin_url=origin_url) - def swh_latest_snapshot_revision(self, origin_id, + def swh_latest_snapshot_revision(self, origin_url, previous_swh_revision=None): """Look for latest snapshot revision and returns it if any. Args: - origin_id (int): Origin identifier + origin_url (str): Origin identifier previous_swh_revision: (optional) id of a possible previous swh revision @@ -171,18 +172,24 @@ """ storage = self.storage if not previous_swh_revision: # check latest snapshot's revision - latest_snap = storage.snapshot_get_latest(origin_id) - if latest_snap: - branches = latest_snap.get('branches') - if not branches: + visit = storage.origin_visit_get_latest( + origin_url, require_snapshot=True) + if visit: + latest_snap = snapshot_get_all_branches( + storage, visit['snapshot']) + if latest_snap: + branches = latest_snap.get('branches') + if not branches: + return {} + branch = branches.get(DEFAULT_BRANCH) + if not branch: + return {} + target_type = branch['target_type'] + if target_type != 'revision': + return {} + previous_swh_revision = branch['target'] + else: return {} - branch = branches.get(DEFAULT_BRANCH) - if not branch: - return {} - target_type = branch['target_type'] - if target_type != 'revision': - return {} - previous_swh_revision = branch['target'] else: return {} @@ -452,7 +459,7 @@ self.last_known_swh_revision = None self.latest_snapshot = self.swh_latest_snapshot_revision( - self.origin_id, self.last_known_swh_revision) + self.origin_url, self.last_known_swh_revision) if destination_path: local_dirname = destination_path @@ -463,7 +470,7 @@ dir=self.temp_directory) self.svnrepo = self.get_svn_repo( - svn_url, local_dirname, self.origin_id) + svn_url, local_dirname, self.origin_url) try: revision_start, revision_end, revision_parents = self.start_from( self.last_known_swh_revision, self.start_from_scratch) @@ -628,7 +635,7 @@ origin = \ self.storage.origin_get({'type': 'svn', 'url': svn_url}) last_swh_rev = \ - self.swh_latest_snapshot_revision(origin['id'])['revision'] + self.swh_latest_snapshot_revision(origin['url'])['revision'] last_swh_rev_headers = \ dict(last_swh_rev['metadata']['extra_headers']) last_loaded_svn_rev = int(last_swh_rev_headers['svn_revision']) diff --git a/swh/loader/svn/svn.py b/swh/loader/svn/svn.py --- a/swh/loader/svn/svn.py +++ b/swh/loader/svn/svn.py @@ -30,13 +30,13 @@ Args: remote_url (str): - origin_id (int): Associated origin identifier + origin_url (str): Associated origin identifier local_dirname (str): Path to write intermediary svn action results """ - def __init__(self, remote_url, origin_id, local_dirname): + def __init__(self, remote_url, origin_url, local_dirname): self.remote_url = remote_url.rstrip('/') - self.origin_id = origin_id + self.origin_url = origin_url auth = Auth([get_username_provider()]) # one connection for log iteration @@ -56,7 +56,7 @@ def __str__(self): return str({ - 'swh-origin': self.origin_id, + 'swh-origin': self.origin_url, 'remote_url': self.remote_url, 'local_url': self.local_url, 'uuid': self.uuid,