diff --git a/swh/loader/dir/converters.py b/swh/loader/dir/converters.py index 24ce561..8442985 100644 --- a/swh/loader/dir/converters.py +++ b/swh/loader/dir/converters.py @@ -1,147 +1,155 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Convert dir objects to dictionaries suitable for swh.storage""" -from datetime import datetime +import datetime from swh.loader.dir.git.git import GitType from swh.loader.dir.git import git, utils +def to_datetime(ts): + """Convert a timestamp to utc datetime. + + """ + return datetime.datetime.utcfromtimestamp(ts).replace( + tzinfo=datetime.timezone.utc) + + def format_to_minutes(offset_str): """Convert a git string timezone format string (e.g +0200, -0310) to minutes. Args: offset_str: a string representing an offset. Returns: A positive or negative number of minutes of such input """ sign = offset_str[0] hours = int(offset_str[1:3]) minutes = int(offset_str[3:]) + (hours * 60) return minutes if sign == '+' else -1 * minutes def blob_to_content(obj, log=None, max_content_size=None, origin_id=None): if 'data' not in obj: filepath = obj['path'] content_raw, length = utils._read_raw(filepath) obj.update({'data': content_raw, 'length': length}) return _blob_to_content(obj, log, max_content_size, origin_id) def _blob_to_content(obj, log=None, max_content_size=None, origin_id=None): """Convert to a compliant swh content. """ size = obj['length'] ret = { 'sha1': obj['sha1'], 'sha256': obj['sha256'], 'sha1_git': obj['sha1_git'], 'data': obj['data'], 'length': size, 'perms': obj['perms'].value, 'type': obj['type'].value } if max_content_size and size > max_content_size: if log: log.info('Skipping content %s, too large (%s > %s)' % (obj['sha1_git'], size, max_content_size)) ret.update({'status': 'absent', 'reason': 'Content too large', 'origin': origin_id}) return ret ret.update({ 'status': 'visible' }) return ret # Map of type to swh types _entry_type_map = { GitType.TREE: 'dir', GitType.BLOB: 'file', GitType.COMM: 'rev', } def tree_to_directory(tree, objects, log=None): """Format a tree as a directory """ entries = [] for entry in objects[tree['path']]: entries.append({ 'type': _entry_type_map[entry['type']], 'perms': int(entry['perms'].value), 'name': entry['name'], 'target': entry['sha1_git'] }) return { 'id': tree['sha1_git'], 'entries': entries } def commit_to_revision(commit, objects, log=None): """Format a commit as a revision. """ upper_directory = objects[git.ROOT_TREE_KEY][0] return { 'id': commit['sha1_git'], 'date': - datetime.fromtimestamp(commit['revision_author_date']), + to_datetime(commit['revision_author_date']), 'date_offset': format_to_minutes(commit['revision_author_offset']), 'committer_date': - datetime.fromtimestamp(commit['revision_committer_date']), + to_datetime(commit['revision_committer_date']), 'committer_date_offset': format_to_minutes(commit['revision_committer_offset']), 'type': commit['revision_type'], 'directory': upper_directory['sha1_git'], 'message': commit['revision_message'], 'author_name': commit['revision_author_name'], 'author_email': commit['revision_author_email'], 'committer_name': commit['revision_committer_name'], 'committer_email': commit['revision_committer_email'], 'parents': [], } def annotated_tag_to_release(release, log=None): """Format a swh release. """ return { 'id': release['sha1_git'], 'revision': release['revision_sha1_git'], 'name': release['release_name'], 'comment': release['release_comment'], - 'date': datetime.fromtimestamp(release['release_date']), + 'date': to_datetime(release['release_date']), 'date_offset': format_to_minutes(release['release_offset']), 'author_name': release['release_author_name'], 'author_email': release['release_author_email'], } def origin_url_to_origin(origin_url): """Format a pygit2.Repository as an origin suitable for swh.storage""" return { 'type': 'dir', 'url': origin_url, } diff --git a/swh/loader/dir/tests/test_converters.py b/swh/loader/dir/tests/test_converters.py index 960d7bb..91f1423 100644 --- a/swh/loader/dir/tests/test_converters.py +++ b/swh/loader/dir/tests/test_converters.py @@ -1,207 +1,213 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest +import datetime from nose.tools import istest -from datetime import datetime from swh.loader.dir import converters from swh.loader.dir.git import git from swh.loader.dir.git.git import GitType, GitPerm class TestConverters(unittest.TestCase): @istest def format_to_minutes(self): self.assertEquals(converters.format_to_minutes('+0100'), 60) self.assertEquals(converters.format_to_minutes('-0200'), -120) self.assertEquals(converters.format_to_minutes('+1250'), 12*60+50) self.assertEquals(converters.format_to_minutes('+0000'), 0) self.assertEquals(converters.format_to_minutes('-0000'), 0) @istest def origin_url_to_origin(self): # given origin_url = 'foobar' # when self.assertDictEqual({ 'type': 'dir', 'url': origin_url, }, converters.origin_url_to_origin(origin_url)) @istest def annotated_tag_to_release(self): # given release = { 'sha1_git': '123', 'revision_sha1_git': '456', 'release_name': 'some-release', 'release_comment': 'some-comment-on-release', 'release_date': 1444054085, 'release_offset': '-0300', 'release_author_name': 'someone', 'release_author_email': 'someone@whatelse.eu' } expected_release = { 'id': '123', 'revision': '456', 'name': 'some-release', 'comment': 'some-comment-on-release', - 'date': datetime.fromtimestamp(1444054085), + 'date': datetime.datetime.fromtimestamp( + 1444054085, + tz=datetime.timezone.utc), 'date_offset': -180, 'author_name': 'someone', 'author_email': 'someone@whatelse.eu', } # when actual_release = converters.annotated_tag_to_release(release) # then self.assertDictEqual( expected_release, actual_release) @istest def _blob_to_content_visible(self): obj = { 'length': 9, 'data': b'some-data', 'sha1': b'sha1', 'sha1_git': b'sha1-git', 'sha256': b'sha256', 'perms': GitPerm.BLOB, 'type': GitType.BLOB } expected_content = { 'length': 9, 'data': b'some-data', 'sha1': b'sha1', 'sha1_git': b'sha1-git', 'sha256': b'sha256', 'perms': GitPerm.BLOB.value, 'type': GitType.BLOB.value, 'status': 'visible' } # when actual_content = converters._blob_to_content(obj) # then self.assertEqual(expected_content, actual_content) @istest def _blob_to_content_absent(self): obj = { 'length': 9, 'data': b'some-data', 'sha1': b'sha1', 'sha1_git': b'sha1-git', 'sha256': b'sha256', 'perms': GitPerm.BLOB, 'type': GitType.BLOB } expected_content = { 'length': 9, 'data': b'some-data', 'sha1': b'sha1', 'sha1_git': b'sha1-git', 'sha256': b'sha256', 'perms': GitPerm.BLOB.value, 'type': GitType.BLOB.value, 'status': 'absent', 'reason': 'Content too large', 'origin': 3} # when actual_content = converters._blob_to_content(obj, max_content_size=5, origin_id=3) # then self.assertEqual(expected_content, actual_content) @istest def tree_to_directory_no_entries(self): # given tree = { 'path': 'foo', 'sha1_git': b'tree_sha1_git' } objects = { 'foo': [{'type': GitType.TREE, 'perms': GitPerm.TREE, 'name': 'bar', 'sha1_git': b'sha1-target'}, {'type': GitType.BLOB, 'perms': GitPerm.BLOB, 'name': 'file-foo', 'sha1_git': b'file-foo-sha1-target'}] } expected_directory = { 'id': b'tree_sha1_git', 'entries': [{'type': 'dir', 'perms': int(GitPerm.TREE.value), 'name': 'bar', 'target': b'sha1-target'}, {'type': 'file', 'perms': int(GitPerm.BLOB.value), 'name': 'file-foo', 'target': b'file-foo-sha1-target'}] } # when actual_directory = converters.tree_to_directory(tree, objects) # then self.assertEqual(actual_directory, expected_directory) @istest def commit_to_revision(self): # given commit = { 'sha1_git': 'commit-git-sha1', 'revision_author_date': 1444054085, 'revision_author_offset': '+0000', 'revision_committer_date': 1444054085, 'revision_committer_offset': '-0000', 'revision_type': 'tar', 'revision_message': 'synthetic-message-input', 'revision_author_name': 'author-name', 'revision_author_email': 'author-email', 'revision_committer_name': 'committer-name', 'revision_committer_email': 'committer-email', } objects = { git.ROOT_TREE_KEY: [{'sha1_git': 'targeted-tree-sha1'}] } expected_revision = { 'id': 'commit-git-sha1', - 'date': datetime.fromtimestamp(1444054085), + 'date': datetime.datetime.fromtimestamp( + 1444054085, + tz=datetime.timezone.utc), 'date_offset': 0, - 'committer_date': datetime.fromtimestamp(1444054085), + 'committer_date': datetime.datetime.fromtimestamp( + 1444054085, + tz=datetime.timezone.utc), 'committer_date_offset': 0, 'type': 'tar', 'directory': 'targeted-tree-sha1', 'message': 'synthetic-message-input', 'author_name': 'author-name', 'author_email': 'author-email', 'committer_name': 'committer-name', 'committer_email': 'committer-email', 'parents': [], } # when actual_revision = converters.commit_to_revision(commit, objects) # then self.assertEquals(actual_revision, expected_revision)