diff --git a/AUTHORS b/AUTHORS index 72ac9eb..2d0a34a 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,24 +1,3 @@ -Authors -======= +Copyright (C) 2015 The Software Heritage developers -Below you can find a list of contributors to swh-loader-git and copyright -owners of code that has become part of swh-loader-git. They've contributed in a -variety of ways and this software wouldn't exist without them. - -Thank you! - -(For actual copyright notices, please refer to the individual source files and -the Git repository.) - - -Original authors ----------------- - -* Stefano Zacchiroli -* Antoine R. Dumont -* Nicolas Dandrimont - -Code contributors ------------------ - -* Contribute and ADD YOUR NAME HERE! +See http://www.softwareheritage.org/ for more information. diff --git a/PKG-INFO b/PKG-INFO index 0243a1d..92a71d5 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.loader.git -Version: 0.0.6 +Version: 0.0.7 Summary: Software Heritage git loader Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh.loader.git.egg-info/PKG-INFO b/swh.loader.git.egg-info/PKG-INFO index 0243a1d..92a71d5 100644 --- a/swh.loader.git.egg-info/PKG-INFO +++ b/swh.loader.git.egg-info/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.loader.git -Version: 0.0.6 +Version: 0.0.7 Summary: Software Heritage git loader Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh/loader/git/converters.py b/swh/loader/git/converters.py index 6e75ca4..aef8f7c 100644 --- a/swh/loader/git/converters.py +++ b/swh/loader/git/converters.py @@ -1,160 +1,162 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Convert pygit2 objects to dictionaries suitable for swh.storage""" from pygit2 import GIT_OBJ_COMMIT from swh.core import hashutil from .utils import format_date HASH_ALGORITHMS = ['sha1', 'sha256'] def blob_to_content(id, repo, log=None, max_content_size=None, origin_id=None): """Format a blob as a content""" blob = repo[id] size = blob.size ret = { 'sha1_git': id.raw, 'length': blob.size, 'status': 'absent' } if max_content_size: if size > max_content_size: if log: log.info('Skipping content %s, too large (%s > %s)' % (id.hex, size, max_content_size), extra={ 'swh_type': 'loader_git_content_skip', 'swh_repo': repo.path, 'swh_id': id.hex, 'swh_size': size, }) ret['reason'] = 'Content too large' ret['origin'] = origin_id return ret data = blob.data hashes = hashutil.hashdata(data, HASH_ALGORITHMS) ret.update(hashes) ret['data'] = data ret['status'] = 'visible' return ret def tree_to_directory(id, repo, log=None): """Format a tree as a directory""" ret = { 'id': id.raw, } entries = [] ret['entries'] = entries entry_type_map = { 'tree': 'dir', 'blob': 'file', 'commit': 'rev', } for entry in repo[id]: entries.append({ 'type': entry_type_map[entry.type], 'perms': entry.filemode, 'name': entry._name, 'target': entry.id.raw, }) return ret def commit_to_revision(id, repo, log=None): """Format a commit as a revision""" commit = repo[id] author = commit.author committer = commit.committer return { 'id': id.raw, 'date': format_date(author), 'date_offset': author.offset, 'committer_date': format_date(committer), 'committer_date_offset': committer.offset, 'type': 'git', 'directory': commit.tree_id.raw, 'message': commit.raw_message, 'author_name': author.raw_name, 'author_email': author.raw_email, 'committer_name': committer.raw_name, 'committer_email': committer.raw_email, + 'synthetic': False, 'parents': [p.raw for p in commit.parent_ids], } def annotated_tag_to_release(id, repo, log=None): """Format an annotated tag as a release""" tag = repo[id] tag_pointer = repo[tag.target] if tag_pointer.type != GIT_OBJ_COMMIT: if log: log.warn("Ignoring tag %s pointing at %s %s" % ( tag.id.hex, tag_pointer.__class__.__name__, tag_pointer.id.hex), extra={ 'swh_type': 'loader_git_tag_ignore', 'swh_repo': repo.path, 'swh_tag_id': tag.id.hex, 'swh_tag_dest': { 'type': tag_pointer.__class__.__name__, 'id': tag_pointer.id.hex, }, }) return author = tag.tagger if not author: if log: log.warn("Tag %s has no author, using default values" % id.hex, extra={ 'swh_type': 'loader_git_tag_author_default', 'swh_repo': repo.path, 'swh_tag_id': tag.id.hex, }) author_name = b'' author_email = b'' date = None date_offset = 0 else: author_name = author.raw_name author_email = author.raw_email date = format_date(author) date_offset = author.offset return { 'id': id.raw, 'date': date, 'date_offset': date_offset, 'revision': tag.target.raw, 'comment': tag._message, 'name': tag.name, 'author_name': author_name, 'author_email': author_email, + 'synthetic': False, } def ref_to_occurrence(ref): """Format a reference as an occurrence""" return ref def origin_url_to_origin(origin_url): """Format a pygit2.Repository as an origin suitable for swh.storage""" return { 'type': 'git', 'url': origin_url, } diff --git a/swh/loader/git/tests/test_converters.py b/swh/loader/git/tests/test_converters.py index b901d09..b66d59e 100644 --- a/swh/loader/git/tests/test_converters.py +++ b/swh/loader/git/tests/test_converters.py @@ -1,91 +1,124 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import shutil import subprocess import tempfile import unittest +import datetime from nose.tools import istest import pygit2 import swh.loader.git.converters as converters from swh.core.hashutil import hex_to_hash class TestConverters(unittest.TestCase): @classmethod def setUpClass(cls): super().setUpClass() cls.repo_path = tempfile.mkdtemp() cls.repo = pygit2.init_repository(cls.repo_path, bare=True) fast_export = os.path.join(os.path.dirname(__file__), '../../../../..', 'swh-storage-testdata', 'git-repos', 'example-submodule.fast-export.xz') xz = subprocess.Popen( ['xzcat'], stdin=open(fast_export, 'rb'), stdout=subprocess.PIPE, ) git = subprocess.Popen( ['git', 'fast-import', '--quiet'], stdin=xz.stdout, cwd=cls.repo_path, ) # flush stdout of xz xz.stdout.close() git.communicate() @classmethod def tearDownClass(cls): super().tearDownClass() shutil.rmtree(cls.repo_path) print(cls.repo_path) def setUp(self): super().setUp() self.blob_id = pygit2.Oid( hex='28c6f4023d65f74e3b59a2dea3c4277ed9ee07b0') self.blob = { 'sha1_git': self.blob_id.raw, 'sha1': hex_to_hash('4850a3420a2262ff061cb296fb915430fa92301c'), 'sha256': hex_to_hash('fee7c8a485a10321ad94b64135073cb5' '5f22cb9f57fa2417d2adfb09d310adef'), 'data': (b'[submodule "example-dependency"]\n' b'\tpath = example-dependency\n' b'\turl = https://github.com/githubtraining/' b'example-dependency.git\n'), 'length': 124, 'status': 'visible', } self.blob_hidden = { 'sha1_git': self.blob_id.raw, 'length': 124, 'status': 'absent', 'reason': 'Content too large', 'origin': None, } @istest def blob_to_content(self): content = converters.blob_to_content(self.blob_id, self.repo) self.assertEqual(self.blob, content) @istest def blob_to_content_absent(self): max_length = self.blob['length'] - 1 content = converters.blob_to_content(self.blob_id, self.repo, max_content_size=max_length) self.assertEqual(self.blob_hidden, content) + + @istest + def commit_to_revision(self): + sha1 = '9768d0b576dbaaecd80abedad6dfd0d72f1476da' + commit = self.repo.revparse_single(sha1) + + # when + actual_revision = converters.commit_to_revision(commit.id, self.repo) + + expected_revision = { + 'author_email': b'zack@upsilon.cc', + 'id': hex_to_hash('9768d0b576dbaaecd80abedad6dfd0d72f1476da'), + 'directory': b'\xf0i\\./\xa7\xce\x9dW@#\xc3A7a\xa4s\xe5\x00\xca', + 'type': 'git', + 'committer_name': b'Stefano Zacchiroli', + 'date_offset': 120, + 'committer_email': b'zack@upsilon.cc', + 'committer_date': datetime.datetime(2015, 9, 24, 8, 36, 5, + tzinfo=datetime.timezone.utc), + 'author_name': b'Stefano Zacchiroli', + 'message': b'add submodule dependency\n', + 'date': datetime.datetime(2015, 9, 24, 8, 36, 5, + tzinfo=datetime.timezone.utc), + 'committer_date_offset': 120, + 'parents': [ + b'\xc3\xc5\x88q23`\x9f[\xbb\xb2\xd9\xe7\xf3\xfbJf\x0f?r' + ], + 'synthetic': False, + } + + # then + self.assertEquals(actual_revision, expected_revision) diff --git a/version.txt b/version.txt index ea8fa74..57370b7 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.6-0-g0a23378 \ No newline at end of file +v0.0.7-0-gee29257 \ No newline at end of file