diff --git a/swh/loader/svn/converters.py b/swh/loader/svn/converters.py index 8a06418..38d0b54 100644 --- a/swh/loader/svn/converters.py +++ b/swh/loader/svn/converters.py @@ -1,187 +1,166 @@ -# Copyright (C) 2015-2018 The Software Heritage developers +# Copyright (C) 2015-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from email import utils from .utils import strdate_to_timestamp -def svn_date_to_gitsvn_date(strdate): - """Convert a string date to an swh one. - - Args: - strdate: A string formatted for .utils.strdate_to_timestamp - to do its jobs - - Returns: - An swh date format with an integer timestamp. - - """ - ts = strdate_to_timestamp(strdate) - return { - 'timestamp': { - 'seconds': ts['seconds'], - 'microseconds': 0, - }, - 'offset': 0 - } - - def svn_date_to_swh_date(strdate): """Convert a string date to an swh one. Args: strdate: A string formatted for .utils.strdate_to_timestamp to do its jobs Returns: An swh date format """ return { 'timestamp': strdate_to_timestamp(strdate), 'offset': 0 } def svn_author_to_swh_person(author): """Convert an svn author to an swh person. Default policy: No information is added. Args: author (string): the svn author (in bytes) Returns: a dictionary with keys: fullname: the author's associated fullname name: the author's associated name email: None (no email in svn) """ if not author: return {'fullname': b'', 'name': None, 'email': None} if isinstance(author, str): author = author.encode('utf-8') if b'<' in author and b'>' in author: name, email = utils.parseaddr(author.decode('utf-8')) return { 'fullname': author, 'name': name.encode('utf-8'), 'email': email.encode('utf-8') } return {'fullname': author, 'email': None, 'name': author} def svn_author_to_gitsvn_person(author, repo_uuid): """Convert an svn author to a person suitable for insertion. Default policy: If no email is found, the email is created using the author and the repo_uuid. Args: author (string): the svn author (in bytes) repo_uuid (bytes): the repository's uuid Returns: a dictionary with keys: fullname: the author's associated fullname name: the author's associated name email: None (no email in svn) """ if not author: author = '(no author)' author = author.encode('utf-8') if b'<' in author and b'>' in author: name, email = utils.parseaddr(author.decode('utf-8')) return { 'fullname': author, 'name': name.encode('utf-8'), 'email': email.encode('utf-8') } # we'll construct the author's fullname the same way git svn does # 'user ' email = b'@'.join([author, repo_uuid]) return { 'fullname': b''.join([author, b' ', b'<', email, b'>']), 'name': author, 'email': email, } def build_swh_revision(rev, commit, repo_uuid, dir_id, parents): """Given a svn revision, build a swh revision. This adds an ['metadata']['extra-headers'] entry with the repository's uuid and the svn revision. Args: - rev: the svn revision number - commit: the commit metadata - repo_uuid: The repository's uuid - dir_id: the tree's hash identifier - parents: the revision's parents identifier Returns: The swh revision dictionary. """ author = commit['author_name'] msg = commit['message'] date = commit['author_date'] metadata = { 'extra_headers': [ ['svn_repo_uuid', repo_uuid], ['svn_revision', str(rev).encode('utf-8')] ] } return { 'date': date, 'committer_date': date, 'type': 'svn', 'directory': dir_id, 'message': msg, 'author': author, 'committer': author, 'synthetic': True, 'metadata': metadata, 'parents': parents, } def build_gitsvn_swh_revision(rev, commit, dir_id, parents): """Given a svn revision, build a swh revision. Args: - rev: the svn revision number - commit: the commit metadata - dir_id: the tree's hash identifier - parents: the revision's parents identifier Returns: The swh revision dictionary. """ author = commit['author_name'] msg = commit['message'] date = commit['author_date'] return { 'date': date, 'committer_date': date, 'type': 'svn', 'directory': dir_id, 'message': msg, 'author': author, 'committer': author, 'synthetic': True, 'metadata': None, 'parents': parents, } diff --git a/swh/loader/svn/tests/test_converters.py b/swh/loader/svn/tests/test_converters.py index ec3fa1a..67f3f90 100644 --- a/swh/loader/svn/tests/test_converters.py +++ b/swh/loader/svn/tests/test_converters.py @@ -1,280 +1,206 @@ -# Copyright (C) 2015-2018 The Software Heritage developers +# Copyright (C) 2015-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from swh.loader.svn import converters -class TestAuthorGitSvnConverters(unittest.TestCase): - def test_svn_author_to_gitsvn_person(self): - """The author should have name, email and fullname filled. - - """ - actual_person = converters.svn_author_to_gitsvn_person( - 'tony ', - repo_uuid=None) - self.assertEqual(actual_person, { - 'fullname': b'tony ', - 'name': b'tony', - 'email': b'ynot@dagobah', - }) - - def test_svn_author_to_gitsvn_person_no_email(self): - """The author should see his/her email filled with author@. - - """ - actual_person = converters.svn_author_to_gitsvn_person( - 'tony', - repo_uuid=b'some-uuid') - self.assertEqual(actual_person, { - 'fullname': b'tony ', - 'name': b'tony', - 'email': b'tony@some-uuid', - }) - - def test_svn_author_to_gitsvn_person_empty_person(self): - """The empty person should see name, fullname and email filled. - - """ - actual_person = converters.svn_author_to_gitsvn_person( - '', - repo_uuid=b'some-uuid') - self.assertEqual(actual_person, { - 'fullname': b'(no author) <(no author)@some-uuid>', - 'name': b'(no author)', - 'email': b'(no author)@some-uuid' - }) - - class TestAuthorConverters(unittest.TestCase): def test_svn_author_to_swh_person(self): """The author should have name, email and fullname filled. """ actual_person = converters.svn_author_to_swh_person( 'tony ') self.assertEqual(actual_person, { 'fullname': b'tony ', 'name': b'tony', 'email': b'ynot@dagobah', }) def test_svn_author_to_swh_person_no_email(self): """The author and fullname should be the same as the input (author). """ actual_person = converters.svn_author_to_swh_person('tony') self.assertEqual(actual_person, { 'fullname': b'tony', 'name': b'tony', 'email': None, }) def test_svn_author_to_swh_person_empty_person(self): """Empty person has only its fullname filled with the empty byte-string. """ actual_person = converters.svn_author_to_swh_person('') self.assertEqual(actual_person, { 'fullname': b'', 'name': None, 'email': None, }) class TestRevisionConverters(unittest.TestCase): def test_build_swh_revision_default(self): """This should build the swh revision with the swh revision's extra headers about the repository. """ actual_swh_revision = converters.build_swh_revision( repo_uuid=b'uuid', dir_id='dir-id', commit={ 'author_name': { 'name': b'theo', 'email': b'theo@uuid', 'fullname': b'theo ' }, 'message': b'commit message', 'author_date': { 'timestamp': { 'seconds': 1088108379, 'microseconds': 0, }, 'offset': 0 } }, rev=10, parents=['123']) date = { 'timestamp': { 'seconds': 1088108379, 'microseconds': 0, }, 'offset': 0, } self.assertEqual(actual_swh_revision, { 'date': date, 'committer_date': date, 'type': 'svn', 'directory': 'dir-id', 'message': b'commit message', 'author': { 'name': b'theo', 'email': b'theo@uuid', 'fullname': b'theo ' }, 'committer': { 'name': b'theo', 'email': b'theo@uuid', 'fullname': b'theo ' }, 'synthetic': True, 'metadata': { 'extra_headers': [ ['svn_repo_uuid', b'uuid'], ['svn_revision', b'10'], ] }, 'parents': ['123'], }) class TestGitSvnRevisionConverters(unittest.TestCase): def test_build_gitsvn_swh_revision_default(self): """This should build the swh revision without the swh revision's extra headers about the repository. """ actual_swh_revision = converters.build_gitsvn_swh_revision( dir_id='dir-id', commit={ 'author_name': { 'name': b'theo', 'email': b'theo@uuid', 'fullname': b'theo ' }, 'message': b'commit message', 'author_date': { 'timestamp': { 'seconds': 1088108379, 'microseconds': 0, }, 'offset': 0 } }, rev=10, parents=['123']) date = { 'timestamp': { 'seconds': 1088108379, 'microseconds': 0, }, 'offset': 0, } self.assertEqual(actual_swh_revision, { 'date': date, 'committer_date': date, 'type': 'svn', 'directory': 'dir-id', 'message': b'commit message', 'author': { 'name': b'theo', 'email': b'theo@uuid', 'fullname': b'theo ' }, 'committer': { 'name': b'theo', 'email': b'theo@uuid', 'fullname': b'theo ' }, 'synthetic': True, 'metadata': None, 'parents': ['123'], }) class ConvertDate(unittest.TestCase): def test_svn_date_to_swh_date(self): """The timestamp should not be tampered with and include the decimals. """ self.assertEqual( converters.svn_date_to_swh_date('2011-05-31T06:04:39.500900Z'), { 'timestamp': { 'seconds': 1306821879, 'microseconds': 500900, }, 'offset': 0 }) self.assertEqual( converters.svn_date_to_swh_date('2011-05-31T06:04:39.800722Z'), { 'timestamp': { 'seconds': 1306821879, 'microseconds': 800722, }, 'offset': 0 }) def test_svn_date_to_swh_date_epoch(self): """Empty date should be EPOCH (timestamp and offset at 0).""" # It should return 0, epoch self.assertEqual({ 'timestamp': { 'seconds': 0, 'microseconds': 0, }, 'offset': 0, }, converters.svn_date_to_swh_date('')) self.assertEqual({ 'timestamp': { 'seconds': 0, 'microseconds': 0, }, 'offset': 0, }, converters.svn_date_to_swh_date(None)) - - -class ConvertGitSvnDate(unittest.TestCase): - def test_svn_date_to_gitsvn_date(self): - """The timestamp should be truncated to be an integer.""" - actual_ts = converters.svn_date_to_gitsvn_date( - '2011-05-31T06:04:39.800722Z') - - self.assertEqual(actual_ts, { - 'timestamp': { - 'seconds': 1306821879, - 'microseconds': 0, - }, - 'offset': 0, - }) - - def test_svn_date_to_gitsvn_date_epoch(self): - """Empty date should be EPOCH (timestamp and offset at 0).""" - # It should return 0, epoch - self.assertEqual({ - 'timestamp': { - 'seconds': 0, - 'microseconds': 0, - }, - 'offset': 0, - }, converters.svn_date_to_gitsvn_date('')) - self.assertEqual({ - 'timestamp': { - 'seconds': 0, - 'microseconds': 0, - }, - 'offset': 0, - }, converters.svn_date_to_gitsvn_date(None))