diff --git a/swh/loader/dir/converters.py b/swh/loader/dir/converters.py index 654c818..5670754 100644 --- a/swh/loader/dir/converters.py +++ b/swh/loader/dir/converters.py @@ -1,71 +1,79 @@ -# Copyright (C) 2015-2016 The Software Heritage developers +# Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime +from swh.model import hashutil + def to_datetime(ts): """Convert a timestamp to utc datetime. """ return datetime.datetime.utcfromtimestamp(ts).replace( tzinfo=datetime.timezone.utc) def format_to_minutes(offset_str): """Convert a git string timezone format string (e.g +0200, -0310) to minutes. Args: offset_str: a string representing an offset. Returns: A positive or negative number of minutes of such input """ sign = offset_str[0] hours = int(offset_str[1:3]) minutes = int(offset_str[3:]) + (hours * 60) return minutes if sign == '+' else -1 * minutes def commit_to_revision(commit, log=None): """Format a commit as a revision. """ new_commit = commit.copy() new_commit.update({ 'author': { 'name': commit['author']['name'].encode('utf-8'), 'fullname': commit['author']['fullname'].encode('utf-8'), 'email': commit['author']['email'].encode('utf-8'), }, 'committer': { 'name': commit['committer']['name'].encode('utf-8'), 'fullname': commit['committer']['fullname'].encode('utf-8'), 'email': commit['committer']['email'].encode('utf-8'), }, 'message': commit['message'].encode('utf-8'), 'synthetic': True, - 'parents': [] }) + + if 'parents' in new_commit: + new_commit['parents'] = [hashutil.hash_to_bytes(h) + for h in new_commit['parents']] + else: + new_commit['parents'] = [] + return new_commit def annotated_tag_to_release(release, log=None): """Format a swh release. """ new_release = release.copy() new_release.update({ 'name': release['name'].encode('utf-8'), 'author': { 'name': release['author']['name'].encode('utf-8'), 'fullname': release['author']['fullname'].encode('utf-8'), 'email': release['author']['email'].encode('utf-8'), }, 'message': release['message'].encode('utf-8'), 'synthetic': True }) return new_release diff --git a/swh/loader/dir/tests/test_converters.py b/swh/loader/dir/tests/test_converters.py index 0a9f658..888cbc2 100644 --- a/swh/loader/dir/tests/test_converters.py +++ b/swh/loader/dir/tests/test_converters.py @@ -1,117 +1,167 @@ -# Copyright (C) 2015-2016 The Software Heritage developers +# Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import shutil import tempfile import unittest from nose.tools import istest +from swh.model import hashutil from swh.loader.dir import converters def tmpfile_with_content(fromdir, contentfile): """Create a temporary file with content contentfile in directory fromdir. """ tmpfilepath = tempfile.mktemp( suffix='.swh', prefix='tmp-file-for-test', dir=fromdir) with open(tmpfilepath, 'wb') as f: f.write(contentfile) return tmpfilepath class TestConverters(unittest.TestCase): @classmethod def setUpClass(cls): super().setUpClass() cls.tmpdir = tempfile.mkdtemp(prefix='test-swh-loader-dir.') @classmethod def tearDownClass(cls): shutil.rmtree(cls.tmpdir) super().tearDownClass() @istest def format_to_minutes(self): self.assertEquals(converters.format_to_minutes('+0100'), 60) self.assertEquals(converters.format_to_minutes('-0200'), -120) self.assertEquals(converters.format_to_minutes('+1250'), 12*60+50) self.assertEquals(converters.format_to_minutes('+0000'), 0) self.assertEquals(converters.format_to_minutes('-0000'), 0) @istest def annotated_tag_to_release(self): # given release = { 'name': 'v0.0.1', 'message': 'synthetic-message-input', 'author': {'name': 'author-name', 'email': 'author-email', 'fullname': 'fullname'}, } expected_release = { 'name': b'v0.0.1', 'message': b'synthetic-message-input', 'author': {'name': b'author-name', 'email': b'author-email', 'fullname': b'fullname'}, 'synthetic': True, } # when actual_release = converters.annotated_tag_to_release(release) # then self.assertDictEqual(actual_release, expected_release) @istest def commit_to_revision(self): # given commit = { 'sha1_git': 'commit-git-sha1', 'directory': 'targeted-tree-sha1', 'date': {'timestamp': 1444054085, 'offset': '+0000'}, 'committer_date': {'timestamp': 1444054085, 'offset': '+0000'}, 'type': 'tar', 'message': 'synthetic-message-input', 'author': {'name': 'author-name', 'email': 'author-email', 'fullname': 'fullname'}, 'committer': {'name': 'author-name', 'email': 'author-email', 'fullname': 'fullname'}, 'directory': 'targeted-tree-sha1', } expected_revision = { 'sha1_git': 'commit-git-sha1', 'directory': 'targeted-tree-sha1', 'date': {'timestamp': 1444054085, 'offset': '+0000'}, 'committer_date': {'timestamp': 1444054085, 'offset': '+0000'}, 'type': 'tar', 'message': b'synthetic-message-input', 'author': {'name': b'author-name', 'email': b'author-email', 'fullname': b'fullname'}, 'committer': {'name': b'author-name', 'email': b'author-email', 'fullname': b'fullname'}, 'directory': 'targeted-tree-sha1', 'synthetic': True, 'parents': [] } # when actual_revision = converters.commit_to_revision(commit) # then self.assertEquals(actual_revision, expected_revision) + + @istest + def commit_to_revision_with_parents(self): + """Commit with existing parents should not lose information + + """ + h = '10041ddb6cbc154c24227b1e8759b81dcd99ea3e' + + # given + commit = { + 'sha1_git': 'commit-git-sha1', + 'directory': 'targeted-tree-sha1', + 'date': {'timestamp': 1444054085, 'offset': '+0000'}, + 'committer_date': {'timestamp': 1444054085, 'offset': '+0000'}, + 'type': 'tar', + 'message': 'synthetic-message-input', + 'author': {'name': 'author-name', + 'email': 'author-email', + 'fullname': 'fullname'}, + 'committer': {'name': 'author-name', + 'email': 'author-email', + 'fullname': 'fullname'}, + 'directory': 'targeted-tree-sha1', + 'parents': [h], + } + + expected_revision = { + 'sha1_git': 'commit-git-sha1', + 'directory': 'targeted-tree-sha1', + 'date': {'timestamp': 1444054085, 'offset': '+0000'}, + 'committer_date': {'timestamp': 1444054085, 'offset': '+0000'}, + 'type': 'tar', + 'message': b'synthetic-message-input', + 'author': {'name': b'author-name', + 'email': b'author-email', + 'fullname': b'fullname'}, + 'committer': {'name': b'author-name', + 'email': b'author-email', + 'fullname': b'fullname'}, + 'directory': 'targeted-tree-sha1', + 'synthetic': True, + 'parents': [hashutil.hash_to_bytes(h)] + } + + # when + actual_revision = converters.commit_to_revision(commit) + + # then + self.assertEquals(actual_revision, expected_revision)