Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9345236
svn.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
12 KB
Subscribers
None
svn.py
View Options
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""SVN client in charge of iterating over svn logs and yield commit
representations including the hash tree/content computations per
svn commit.
"""
import
os
import
tempfile
import
shutil
from
subvertpy.ra
import
RemoteAccess
,
Auth
,
get_username_provider
from
subvertpy
import
client
,
properties
from
swh.model.from_disk
import
Directory
from
.config
import
DEFAULT_BRANCH
from
.
import
ra
,
converters
# When log message contains empty data
DEFAULT_AUTHOR_MESSAGE
=
''
class
SvnRepoException
(
ValueError
):
def
__init__
(
self
,
svnrepo
,
e
):
super
()
.
__init__
(
e
)
self
.
svnrepo
=
svnrepo
class
BaseSvnRepo
():
"""Base svn repository representation for swh.
To override some of the behavior regarding the message log properties, you
can instantiate a subclass of this class and override::
def convert_commit_author(self, author)
def convert_commit_message(self, msg)
def convert_commit_date(self, date)
see :class:`GitSvnSvnRepo`, :class:`SwhSvnRepo` for instanciation
example.
"""
def
__init__
(
self
,
remote_url
,
origin_id
,
storage
,
destination_path
=
None
):
self
.
remote_url
=
remote_url
.
rstrip
(
'/'
)
self
.
storage
=
storage
self
.
origin_id
=
origin_id
if
destination_path
:
os
.
makedirs
(
destination_path
,
exist_ok
=
True
)
self
.
root_dir
=
destination_path
else
:
self
.
root_dir
=
'/tmp'
auth
=
Auth
([
get_username_provider
()])
# one connection for log iteration
self
.
conn_log
=
RemoteAccess
(
self
.
remote_url
,
auth
=
auth
)
# another for replay
self
.
conn
=
RemoteAccess
(
self
.
remote_url
,
auth
=
auth
)
# one client for update operation
self
.
client
=
client
.
Client
(
auth
=
auth
)
self
.
local_dirname
=
tempfile
.
mkdtemp
(
suffix
=
'.tmp'
,
prefix
=
'swh.loader.svn.'
,
dir
=
self
.
root_dir
)
local_name
=
os
.
path
.
basename
(
self
.
remote_url
)
self
.
local_url
=
os
.
path
.
join
(
self
.
local_dirname
,
local_name
)
.
encode
(
'utf-8'
)
self
.
uuid
=
self
.
conn
.
get_uuid
()
.
encode
(
'utf-8'
)
def
__str__
(
self
):
return
str
({
'remote_url'
:
self
.
remote_url
,
'local_url'
:
self
.
local_url
,
'uuid'
:
self
.
uuid
,
'swh-origin'
:
self
.
origin_id
})
def
head_revision
(
self
):
"""Retrieve current revision of the repository's working copy.
"""
return
self
.
conn
.
get_latest_revnum
()
def
initial_revision
(
self
):
"""Retrieve the initial revision from which the remote url appeared.
Note: This should always be 1 since we won't be dealing with in-depth
url.
"""
return
1
def
convert_commit_message
(
self
,
msg
):
"""Do something with message (e.g add extra line, etc...)
cf. SvnRepo for a simple implementation.
Args:
msg (str): the commit message to convert.
Returns:
The transformed message as bytes.
"""
raise
NotImplementedError
(
'Should be overridden by subclass.'
)
def
convert_commit_date
(
self
,
date
):
"""Convert the message date (e.g, convert into timestamp or whatever
makes sense to you.).
Args:
date (str): the commit date to convert.
Returns:
The transformed date.
"""
raise
NotImplementedError
(
'Should be overridden by subclass.'
)
def
convert_commit_author
(
self
,
author
):
"""Convert the commit author (e.g, convert into dict or whatever
makes sense to you.).
Args:
author (str): the commit author to convert.
Returns:
The transformed author as dict.
"""
raise
NotImplementedError
(
'Should be overridden by subclass.'
)
def
__to_entry
(
self
,
log_entry
):
changed_paths
,
rev
,
revprops
,
has_children
=
log_entry
author_date
=
self
.
convert_commit_date
(
revprops
.
get
(
properties
.
PROP_REVISION_DATE
))
author
=
self
.
convert_commit_author
(
revprops
.
get
(
properties
.
PROP_REVISION_AUTHOR
))
message
=
self
.
convert_commit_message
(
revprops
.
get
(
properties
.
PROP_REVISION_LOG
,
DEFAULT_AUTHOR_MESSAGE
))
return
{
'rev'
:
rev
,
'author_date'
:
author_date
,
'author_name'
:
author
,
'message'
:
message
,
}
def
logs
(
self
,
revision_start
,
revision_end
):
"""Stream svn logs between revision_start and revision_end by chunks of
block_size logs.
Yields revision and associated revision information between the
revision start and revision_end.
Args:
revision_start: the svn revision starting bound
revision_end: the svn revision ending bound
Yields:
tuple: tuple of revisions and logs:
- revisions: list of revisions in order
- logs: Dictionary with key revision number and value the log
entry. The log entry is a dictionary with the following keys:
- author_date: date of the commit
- author_name: name of the author
- message: commit message
"""
for
log_entry
in
self
.
conn_log
.
iter_log
(
paths
=
None
,
start
=
revision_start
,
end
=
revision_end
,
discover_changed_paths
=
False
):
yield
self
.
__to_entry
(
log_entry
)
def
export
(
self
,
revision
):
"""Export the repository to a given version.
"""
self
.
client
.
export
(
self
.
remote_url
,
to
=
self
.
local_url
.
decode
(
'utf-8'
),
rev
=
revision
,
ignore_keywords
=
True
)
def
export_temporary
(
self
,
revision
):
"""Export the repository to a given revision in a temporary location.
This is up to the caller of this function to clean up the
temporary location when done (cf. self.clean_fs method)
Args:
revision: Revision to export at
Returns:
The tuple local_dirname the temporary location root
folder, local_url where the repository was exported.
"""
local_dirname
=
tempfile
.
mkdtemp
(
prefix
=
'check-revision-
%s
.'
%
revision
,
dir
=
self
.
local_dirname
)
local_name
=
os
.
path
.
basename
(
self
.
remote_url
)
local_url
=
os
.
path
.
join
(
local_dirname
,
local_name
)
self
.
client
.
export
(
self
.
remote_url
,
to
=
local_url
,
rev
=
revision
,
ignore_keywords
=
True
)
return
local_dirname
,
os
.
fsencode
(
local_url
)
def
swh_latest_snapshot_revision
(
self
,
previous_swh_revision
=
None
):
"""Look for latest snapshot revision and returns it if any.
Args:
previous_swh_revision: (optional) id of a possible
previous swh revision
Returns:
dict: The latest known point in time. Dict with keys:
'revision': latest visited revision
'snapshot': latest snapshot
If None is found, return an empty dict.
"""
storage
=
self
.
storage
if
not
previous_swh_revision
:
# check latest snapshot's revision
latest_snap
=
storage
.
snapshot_get_latest
(
self
.
origin_id
)
if
latest_snap
:
branches
=
latest_snap
.
get
(
'branches'
)
if
not
branches
:
return
{}
branch
=
branches
.
get
(
DEFAULT_BRANCH
)
if
not
branch
:
return
{}
target_type
=
branch
[
'target_type'
]
if
target_type
!=
'revision'
:
return
{}
previous_swh_revision
=
branch
[
'target'
]
else
:
return
{}
revs
=
list
(
storage
.
revision_get
([
previous_swh_revision
]))
if
revs
:
return
{
'snapshot'
:
latest_snap
,
'revision'
:
revs
[
0
]
}
return
{}
def
swh_hash_data_per_revision
(
self
,
start_revision
,
end_revision
):
"""Compute swh hash data per each revision between start_revision and
end_revision.
Args:
start_revision: starting revision
end_revision: ending revision
Yields:
tuple (rev, nextrev, commit, objects_per_path)
- rev: current revision
- nextrev: next revision
- commit: commit data (author, date, message) for such revision
- objects_per_path: dictionary of path, swh hash data with type
"""
for
commit
in
self
.
logs
(
start_revision
,
end_revision
):
rev
=
commit
[
'rev'
]
objects
=
self
.
swhreplay
.
compute_hashes
(
rev
)
if
rev
==
end_revision
:
nextrev
=
None
else
:
nextrev
=
rev
+
1
yield
rev
,
nextrev
,
commit
,
objects
,
self
.
swhreplay
.
directory
def
swh_hash_data_at_revision
(
self
,
revision
):
"""Compute the hash data at revision.
Expected to be used for update only.
"""
# Update the disk at revision
self
.
export
(
revision
)
# Compute the current hashes on disk
directory
=
Directory
.
from_disk
(
path
=
os
.
fsencode
(
self
.
local_url
),
save_path
=
True
)
# Update the replay collaborator with the right state
self
.
swhreplay
=
ra
.
SWHReplay
(
conn
=
self
.
conn
,
rootpath
=
self
.
local_url
,
directory
=
directory
)
# Retrieve the commit information for revision
commit
=
list
(
self
.
logs
(
revision
,
revision
))[
0
]
yield
revision
,
revision
+
1
,
commit
,
{},
directory
def
clean_fs
(
self
,
local_dirname
=
None
):
"""Clean up the local working copy.
Args:
local_dirname (str): Path to remove recursively if
provided. Otherwise, remove the temporary upper root tree
used for svn repository loading.
"""
if
local_dirname
:
shutil
.
rmtree
(
local_dirname
)
else
:
shutil
.
rmtree
(
self
.
local_dirname
)
class
SWHSvnRepo
(
BaseSvnRepo
):
"""Same as :class:`BaseSvnRepo` except for:
- the commit message which is simply encoded
- the commit author is left as is.
- the commit timestamp is left as is.
"""
def
__init__
(
self
,
remote_url
,
origin_id
,
storage
,
destination_path
=
None
):
super
()
.
__init__
(
remote_url
,
origin_id
,
storage
,
destination_path
=
destination_path
)
self
.
swhreplay
=
ra
.
SWHReplay
(
conn
=
self
.
conn
,
rootpath
=
self
.
local_url
)
def
convert_commit_message
(
self
,
msg
):
"""Simply encode the commit message.
Args:
msg (str): the commit message to convert.
Returns:
The transformed message as bytes.
"""
if
isinstance
(
msg
,
bytes
):
return
msg
return
msg
.
encode
(
'utf-8'
)
def
convert_commit_date
(
self
,
date
):
"""Convert the message commit date into a timestamp in swh format.
The precision is kept.
Args:
date (str): the commit date to convert.
Returns:
The transformed date.
"""
return
converters
.
svn_date_to_swh_date
(
date
)
def
convert_commit_author
(
self
,
author
):
"""Convert the commit author into an swh person.
The user becomes a dictionary of the form::
{
name: author,
email: '',
fullname: author
}
Args:
author (str): the commit author to convert.
Returns:
The transformed author as dict.
"""
return
converters
.
svn_author_to_swh_person
(
author
)
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Fri, Jul 4, 3:14 PM (5 d, 8 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3431865
Attached To
rDLDSVN Subversion (SVN) loader
Event Timeline
Log In to Comment