Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py
index 4348eac..b638b94 100644
--- a/swh/loader/svn/loader.py
+++ b/swh/loader/svn/loader.py
@@ -1,252 +1,255 @@
# Copyright (C) 2015 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import logging
import os
import svn.remote as remote
import svn.local as local
import tempfile
from contextlib import contextmanager
from swh.loader.svn import libloader
from swh.loader.dir import git
def checkout_repo(remote_repo_url, destination_path=None):
"""Checkout a remote repository locally.
Args:
remote_repo_url: The remote svn url
destination_path: The optional local parent folder to checkout the
repository to
Returns:
Dictionary with the following keys:
- remote: remote instance to manipulate the repo
- local: local instance to manipulate the local repo
- remote_url: remote url (same as input)
- local_url: local url which has been computed
"""
name = os.path.basename(remote_repo_url)
if destination_path:
os.makedirs(destination_path, exist_ok=True)
local_dirname = destination_path
else:
local_dirname = tempfile.mkdtemp(suffix='swh.loader.svn.',
prefix='tmp.',
dir='/tmp')
local_repo_url = os.path.join(local_dirname, name)
remote_client = remote.RemoteClient(remote_repo_url)
remote_client.checkout(local_repo_url)
return {'remote': remote_client,
'local': local.LocalClient(local_repo_url),
'remote_url': remote_repo_url,
'local_url': local_repo_url}
def retrieve_last_known_revision(remote_url_repo):
"""Function that given a remote url returns the last known revision or
1 if this is the first time.
"""
# TODO: Contact swh-storage to retrieve the last occurrence for
# the given origin
return 1
def read_log_entries(repo):
"""Read the logs entries from the repository.
"""
logs = {}
for log in repo['local'].log_default():
logs[log.revision] = log
return logs
def read_commit(repo, rev):
log_entry = repo['logs'][rev]
return {
'message': log_entry.msg,
'author_date': log_entry.date,
'author_name': log_entry.author,
}
def read_svn_revisions(repo, latest_revision):
"""Compute tree for each revision from last known revision to
latest_revision.
"""
rev = retrieve_last_known_revision(repo['remote_url'])
if rev == latest_revision:
return None
with cwd(repo['local_url']):
while rev != latest_revision:
# checkout to the revision rev
repo['remote'].checkout(revision=rev, path='.')
# compute git commit
objects = git.walk_and_compute_sha1_from_directory(
repo['local_url'].encode('utf-8'))
commit = read_commit(repo, rev)
yield rev, commit, objects
rev += 1
-def build_swh_revision(repo_uuid, commit, rev, dir_id):
+def build_swh_revision(repo_uuid, commit, rev, dir_id, parents):
"""Given a svn revision, build a swh revision.
"""
author = commit['author_name']
if author:
author_committer = {
# HACK: shouldn't we use the same for email?
'name': author.encode('utf-8'),
'email': b'',
}
else:
author_committer = {
'name': b'', # HACK: some repository have commits without author
'email': b'',
}
msg = commit['message']
if msg:
msg = msg.encode('utf-8')
else:
msg = b''
date = {
'timestamp': commit['author_date'],
'offset': 0, # HACK: PySvn transforms into datetime with utc timezone
}
return {
'date': date,
'committer_date': date,
'type': 'svn',
'directory': dir_id,
'message': msg,
'author': author_committer,
'committer': author_committer,
'synthetic': True,
'metadata': {
'extra-headers': {
'svn-repo-uuid': repo_uuid,
'svn-revision': rev,
}
},
- 'parents': [],
+ 'parents': parents,
}
@contextmanager
def cwd(path):
"""Contextually change the working directory to do thy bidding.
Then gets back to the original location.
"""
prev_cwd = os.getcwd()
os.chdir(path)
try:
yield
finally:
os.chdir(prev_cwd)
class SvnLoader(libloader.SvnLoader):
"""A svn loader.
This will load the svn repository.
"""
def __init__(self, config):
super().__init__(config)
self.log = logging.getLogger('swh.loader.svn.SvnLoader')
def process(self, svn_url, origin, destination_path):
"""Load a svn repository.
Checkout the svn repository locally in destination_path.
Args:
- svn_url: svn repository url to import
- origin: Dictionary origin
- id: origin's id
- url: url origin we fetched
- type: type of the origin
Returns:
Dictionary with the following keys:
- status: mandatory, the status result as a boolean
- stderr: optional when status is True, mandatory otherwise
- objects: the actual objects sent to swh storage
"""
repo = checkout_repo(svn_url, destination_path)
# 2. retrieve current svn revision
repo_metadata = repo['local'].info()
repo['logs'] = read_log_entries(repo)
latest_revision = repo_metadata['entry_revision']
repo_uuid = repo_metadata['repository_uuid']
+ parents = {1: []} # rev 1 has no parents
+
for rev, commit, objects in read_svn_revisions(
repo, latest_revision):
dir_id = objects[git.ROOT_TREE_KEY][0]['sha1_git']
- swh_revision = build_swh_revision(repo_uuid, commit, rev, dir_id)
+ swh_revision = build_swh_revision(repo_uuid, commit, rev,
+ dir_id, parents[rev])
swh_revision['id'] = git.compute_revision_sha1_git(swh_revision)
- self.log.debug('rev: %s, commit: %s, swhrev: %s' % (
- rev, commit, swh_revision))
+ parents[rev+1] = [swh_revision['id']]
+ self.log.debug('rev: %s, swhrev: %s' % (rev, swh_revision))
return {'status': True}
class SvnLoaderWithHistory(SvnLoader):
"""A svn loader.
This will:
- create the origin if it does not exist
- open an entry in fetch_history
- load the svn repository
- close the entry in fetch_history
"""
def __init__(self, config):
super().__init__(config)
self.log = logging.getLogger('swh.loader.svn.SvnLoaderWithHistory')
def process(self, svn_url, destination_path):
"""Load a directory in backend.
Args:
- svn_url: svn url to import
- origin: Dictionary origin
- url: url origin we fetched
- type: type of the origin
"""
origin = {'type': 'svn',
'url': svn_url}
origin['id'] = self.storage.origin_add_one(origin)
fetch_history_id = self.open_fetch_history(origin['id'])
result = super().process(svn_url, origin, destination_path)
self.close_fetch_history(fetch_history_id, result)

File Metadata

Mime Type
text/x-diff
Expires
Tue, Aug 19, 12:59 AM (3 w, 9 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3262194

Event Timeline