diff --git a/swh/gitloader/git.py b/swh/gitloader/git.py index 467ff7b..c0c023b 100644 --- a/swh/gitloader/git.py +++ b/swh/gitloader/git.py @@ -1,190 +1,187 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging import pygit2 import time from datetime import datetime from pygit2 import GIT_REF_OID from pygit2 import GIT_OBJ_COMMIT, GIT_OBJ_TREE, GIT_SORT_TOPOLOGICAL from enum import Enum from swh.core import hashutil from swh.data import swhrepo from swh.store import store class DirectoryTypeEntry(Enum): """Types of git objects. """ file = 'file' directory = 'directory' def date_format(d): """d is expected to be a datetime object. """ return time.strftime("%a, %d %b %Y %H:%M:%S +0000", d.timetuple()) def now(): """Cheat time values.""" return date_format(datetime.utcnow()) def timestamp_to_string(timestamp): """Convert a timestamps to string. """ return date_format(datetime.utcfromtimestamp(timestamp)) HASH_ALGORITHMS=['sha1', 'sha256'] def parse(repo_path): """Given a repository path, parse and return a memory model of such repository.""" def read_signature(signature): return '%s <%s>' % (signature.name, signature.email) def treewalk(repo, tree): """Walk a tree with the same implementation as `os.path`. Returns: tree, trees, blobs """ trees, blobs, dir_entries = [], [], [] for tree_entry in tree: obj = repo.get(tree_entry.oid) if obj is None: logging.warn('skip submodule-commit %s' % tree_entry.hex) continue # submodule! if obj.type == GIT_OBJ_TREE: logging.debug('found tree %s' % tree_entry.hex) nature = DirectoryTypeEntry.directory.value trees.append(tree_entry) else: logging.debug('found content %s' % tree_entry.hex) data = obj.data nature = DirectoryTypeEntry.file.value hashes = hashutil.hashdata(data, HASH_ALGORITHMS) blobs.append({'id': obj.hex, 'type': store.Type.content, 'content-sha1': hashes['sha1'], 'content-sha256': hashes['sha256'], 'content': data, # FIXME: add pointer to data on disk? 'size': obj.size}) dir_entries.append({'name': tree_entry.name, 'type': store.Type.directory_entry, 'target-sha1': obj.hex, 'nature': nature, 'perms': tree_entry.filemode, 'atime': now(), # FIXME: use real data 'mtime': now(), # FIXME: use real data 'ctime': now(), # FIXME: use real data 'parent': tree.hex}) yield tree, dir_entries, trees, blobs for tree_entry in trees: for x in treewalk(repo, repo[tree_entry.oid]): yield x def walk_tree(repo, swh_repo, rev): """Walk the rev revision's directories. """ if swh_repo.already_visited(rev.hex): logging.debug('commit %s already visited, skipped' % rev.hex) return swh_repo for dir_root, dir_entries, _, contents_ref in treewalk(repo, rev.tree): for content_ref in contents_ref: swh_repo.add_content(content_ref) swh_repo.add_directory({'id': dir_root.hex, 'type': store.Type.directory, - 'content': dir_root.read_raw(), # FIXME: add pointer to data on disk? 'entries': dir_entries}) revision_parent_sha1s = list(map(str, rev.parent_ids)) author = {'name': rev.author.name, 'email': rev.author.email, 'type': store.Type.person} committer = {'name': rev.committer.name, 'email': rev.committer.email, 'type': store.Type.person} swh_repo.add_revision({'id': rev.hex, 'type':store.Type.revision, - 'content': rev.read_raw(), # FIXME: add pointer to data on disk? 'date': timestamp_to_string(rev.commit_time), 'directory': rev.tree.hex, 'message': rev.message, 'committer': committer, 'author': author, 'parent-sha1s': revision_parent_sha1s }) swh_repo.add_person(read_signature(rev.author), author) swh_repo.add_person(read_signature(rev.committer), committer) return swh_repo def walk_revision_from(repo, swh_repo, head_rev): """Walk the rev history log from head_rev. - repo is the current repository - rev is the latest rev to start from. """ for rev in repo.walk(head_rev.id, GIT_SORT_TOPOLOGICAL): swh_repo = walk_tree(repo, swh_repo, rev) return swh_repo repo = pygit2.Repository(repo_path) # memory model swh_repo = swhrepo.SWHRepo() # add origin origin = {'type': 'git', 'url': 'file://' + repo.path} swh_repo.add_origin(origin) # add references and crawl them for ref_name in repo.listall_references(): logging.info('walk reference %s' % ref_name) ref = repo.lookup_reference(ref_name) head_rev = repo[ref.target] \ if ref.type is GIT_REF_OID \ else ref.peel(GIT_OBJ_COMMIT) # noqa if isinstance(head_rev, pygit2.Tag): head_start = head_rev.get_object() taggerSig = head_rev.tagger author = {'name': taggerSig.name, 'email': taggerSig.email, 'type': store.Type.person} release = {'id': head_rev.hex, 'type': store.Type.release, - 'content': head_rev.read_raw(), # FIXME: add pointer to data on disk? 'revision': head_rev.target.hex, 'name': ref_name, 'date': now(), # FIXME: find the tag's date, 'author': author, 'comment': head_rev.message} swh_repo.add_release(release) swh_repo.add_person(read_signature(taggerSig), author) else: swh_repo.add_occurrence({'id': head_rev.hex, 'revision': head_rev.hex, 'reference': ref_name, 'url-origin': origin['url'], 'type': store.Type.occurrence}) head_start = head_rev # crawl commits and trees walk_revision_from(repo, swh_repo, head_start) return swh_repo diff --git a/swh/tests/test_api_directory.py b/swh/tests/test_api_directory.py index c1082ae..9222010 100644 --- a/swh/tests/test_api_directory.py +++ b/swh/tests/test_api_directory.py @@ -1,120 +1,119 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr from swh.store import db, models from swh.protocols import serial from test_utils import now, app_client, app_client_teardown @attr('slow') class DirectoryTestCase(unittest.TestCase): def setUp(self): self.app, db_url, self.content_storage_dir = app_client() with db.connect(db_url) as db_conn: self.content_sha1_id = 'content-sha1c46ee476a8be155ab049994f717e' content_sha1_hex = 'content-sha1c46ee476a8be155ab049994f717e' content_sha256_hex = 'content-sha2566ee476a8be155ab049994f717e' models.add_content(db_conn, self.content_sha1_id, content_sha1_hex, content_sha256_hex, 10) self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' models.add_directory(db_conn, self.directory_sha1_hex) def tearDown(self): app_client_teardown(self.content_storage_dir) @istest def get_directory_ok(self): # when rv = self.app.get('/vcs/directories/%s' % self.directory_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == 'directory-sha16ee476a8be155ab049994f717e' @istest def get_directory_not_found(self): # when rv = self.app.get('/vcs/directories/111111f9dd5dc46ee476a8be155ab049994f7170') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def get_directory_not_found_with_bad_format(self): # when rv = self.app.get('/vcs/directories/1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def put_directory_create_and_update(self): directory_sha1='directory-sha16ee476a8be155ab049994f7170' # does not exist rv = self.app.get('/vcs/directories/%s' % directory_sha1) # then assert rv.status_code == 404 assert rv.data == b'Not found!' # we create it - body = serial.dumps({'content': b'directory has content too.', - 'entries': [{'name': 'filename', + body = serial.dumps({'entries': [{'name': 'filename', 'target-sha1': self.content_sha1_id, 'nature': 'file', 'perms': '000', 'atime': now(), 'mtime': now(), 'ctime': now(), 'parent': directory_sha1}, {'name': 'dirname', 'target-sha1': self.directory_sha1_hex, 'nature': 'directory', 'perms': '012', 'atime': now(), 'mtime': now(), 'ctime': now(), 'parent': directory_sha1} ]}) rv = self.app.put('/vcs/directories/%s' % directory_sha1, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # now it exists rv = self.app.get('/vcs/directories/%s' % directory_sha1) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == 'directory-sha16ee476a8be155ab049994f7170' # we update it rv = self.app.put('/vcs/directories/directory-sha16ee476a8be155ab049994f7170', data=serial.dumps({'entry': 'directory-bar'}), headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 200 assert rv.data == b'Successful update!' # still the same rv = self.app.get('/vcs/directories/directory-sha16ee476a8be155ab049994f7170') # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == 'directory-sha16ee476a8be155ab049994f7170' diff --git a/swh/tests/test_api_occurrence.py b/swh/tests/test_api_occurrence.py index 30b95cc..b3f9de1 100644 --- a/swh/tests/test_api_occurrence.py +++ b/swh/tests/test_api_occurrence.py @@ -1,132 +1,131 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr from swh.store import db, models from swh.protocols import serial from test_utils import now, app_client, app_client_teardown @attr('slow') class OccurrenceTestCase(unittest.TestCase): def setUp(self): self.app, db_url, self.content_storage_dir = app_client() with db.connect(db_url) as db_conn: self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' models.add_directory(db_conn, self.directory_sha1_hex) authorAndCommitter = {'name': 'some-name', 'email': 'some-email'} models.add_person(db_conn, authorAndCommitter['name'], authorAndCommitter['email']) self.revision_sha1_hex = 'revision-sha1-to-test-existence9994f717e' models.add_revision(db_conn, self.revision_sha1_hex, now(), self.directory_sha1_hex, "revision message", authorAndCommitter, authorAndCommitter) self.origin_url = "https://github.com/user/repo" models.add_origin(db_conn, self.origin_url, 'git') self.reference_name = 'master' models.add_occurrence(db_conn, self.origin_url, self.reference_name, self.revision_sha1_hex) self.reference_name2 = 'master2' models.add_occurrence(db_conn, self.origin_url, self.reference_name2, self.revision_sha1_hex) self.revision_sha1_hex_2 = '2-revision-sha1-to-test-existence9994f71' models.add_revision(db_conn, self.revision_sha1_hex_2, now(), self.directory_sha1_hex, "revision message 2", authorAndCommitter, authorAndCommitter) def tearDown(self): app_client_teardown(self.content_storage_dir) @istest def get_occurrence_ok(self): # when rv = self.app.get('/vcs/occurrences/%s' % self.revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data) == [self.reference_name, self.reference_name2] @istest def get_occurrence_not_found(self): # when rv = self.app.get('/vcs/occurrences/inexistant-sha1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def get_occurrence_not_found_with_bad_format(self): # when rv = self.app.get('/vcs/occurrences/1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def put_occurrence_create_and_update(self): occ_revision_sha1_hex = self.revision_sha1_hex_2 rv = self.app.get('/vcs/occurrences/%s' % occ_revision_sha1_hex) # then assert rv.status_code == 404 assert rv.data == b'Not found!' # we create it - body = serial.dumps({'content': b'occurrence content', - 'revision': occ_revision_sha1_hex, # FIXME: redundant with the one from uri.. + body = serial.dumps({'revision': occ_revision_sha1_hex, # FIXME: redundant with the one from uri.. 'reference': 'master', 'url-origin': self.origin_url}) rv = self.app.put('/vcs/occurrences/%s' % occ_revision_sha1_hex, # ... here data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # now it exists rv = self.app.get('/vcs/occurrences/%s' % occ_revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data) == ['master'] # we update it rv = self.app.put('/vcs/occurrences/%s' % occ_revision_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 200 assert rv.data == b'Successful update!' # still the same rv = self.app.get('/vcs/occurrences/%s' % occ_revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data) == ['master'] diff --git a/swh/tests/test_api_release.py b/swh/tests/test_api_release.py index 31b051d..877d73d 100644 --- a/swh/tests/test_api_release.py +++ b/swh/tests/test_api_release.py @@ -1,120 +1,119 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr from swh.store import db, models from swh.protocols import serial from test_utils import now, app_client, app_client_teardown @attr('slow') class ReleaseTestCase(unittest.TestCase): def setUp(self): self.app, db_url, self.content_storage_dir = app_client() with db.connect(db_url) as db_conn: self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' models.add_directory(db_conn, self.directory_sha1_hex) self.tagAuthor = {'name': 'tony', 'email': 'tony@mail.org'} models.add_person(db_conn, self.tagAuthor['name'], self.tagAuthor['email']) self.revision_sha1_hex = 'revision-sha1-to-test-existence9994f717e' models.add_revision(db_conn, self.revision_sha1_hex, now(), self.directory_sha1_hex, "revision message", self.tagAuthor, self.tagAuthor) self.release_sha1_hex = 'release-sha1-to-test-existence1234567901' models.add_release(db_conn, self.release_sha1_hex, self.revision_sha1_hex, now(), "0.0.1", "Super release tagged by tony", self.tagAuthor) def tearDown(self): app_client_teardown(self.content_storage_dir) @istest def get_release_ok(self): # when rv = self.app.get('/vcs/releases/%s' % self.release_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == self.release_sha1_hex @istest def get_release_not_found(self): # when rv = self.app.get('/vcs/releases/inexistant-sha1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def get_release_not_found_with_bad_format(self): # when rv = self.app.get('/vcs/releases/1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def put_release_create_and_update(self): release_sha1_hex = 'sha1-release46ee476a8be155ab049994f717e' rv = self.app.get('/vcs/releases/%s' % release_sha1_hex) # then assert rv.status_code == 404 assert rv.data == b'Not found!' # we create it body = serial.dumps({'id': release_sha1_hex, - 'content': b'release also has content', 'revision': self.revision_sha1_hex, 'date': now(), 'name': '0.0.1', 'comment': 'super release tagged by ardumont', 'author': self.tagAuthor}) rv = self.app.put('/vcs/releases/%s' % release_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # now it exists rv = self.app.get('/vcs/releases/%s' % release_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == release_sha1_hex # we update it rv = self.app.put('/vcs/releases/%s' % release_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 200 assert rv.data == b'Successful update!' # still the same rv = self.app.get('/vcs/releases/%s' % release_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == release_sha1_hex diff --git a/swh/tests/test_api_revision.py b/swh/tests/test_api_revision.py index dc2a30a..6fa16ae 100644 --- a/swh/tests/test_api_revision.py +++ b/swh/tests/test_api_revision.py @@ -1,111 +1,110 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr from swh.store import db, models from swh.protocols import serial from test_utils import now, app_client, app_client_teardown @attr('slow') class RevisionTestCase(unittest.TestCase): def setUp(self): self.app, db_url, self.content_storage_dir = app_client() with db.connect(db_url) as db_conn: self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' models.add_directory(db_conn, self.directory_sha1_hex) self.authorAndCommitter = {'name': 'some-name', 'email': 'some-email'} models.add_person(db_conn, self.authorAndCommitter['name'], self.authorAndCommitter['email']) self.revision_sha1_hex = 'revision-sha1-to-test-existence9994f717e' models.add_revision(db_conn, self.revision_sha1_hex, now(), self.directory_sha1_hex, "revision message", self.authorAndCommitter, self.authorAndCommitter) def tearDown(self): app_client_teardown(self.content_storage_dir) @istest def get_revision_ok(self): # when rv = self.app.get('/vcs/revisions/%s' % self.revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == self.revision_sha1_hex @istest def get_revision_not_found(self): # when rv = self.app.get('/vcs/revisions/inexistant-sha1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def get_revision_not_found_with_bad_format(self): # when rv = self.app.get('/vcs/revisions/1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def put_revision_create_and_update(self): revision_sha1_hex = 'sha1-revision46ee476a8be155ab049994f717e' rv = self.app.get('/vcs/revisions/%s' % revision_sha1_hex) # then assert rv.status_code == 404 assert rv.data == b'Not found!' # we create it - body = serial.dumps({'content': b'revision has content too.', - 'date': now(), + body = serial.dumps({'date': now(), 'directory': self.directory_sha1_hex, 'message': 'revision message describing it', 'committer': self.authorAndCommitter, 'author': self.authorAndCommitter, 'parent-sha1s': [self.revision_sha1_hex]}) rv = self.app.put('/vcs/revisions/%s' % revision_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # now it exists rv = self.app.get('/vcs/revisions/%s' % revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == revision_sha1_hex # we update it rv = self.app.put('/vcs/revisions/%s' % revision_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 200 assert rv.data == b'Successful update!' # still the same rv = self.app.get('/vcs/revisions/%s' % revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == revision_sha1_hex diff --git a/swh/tests/test_local_loader.py b/swh/tests/test_local_loader.py index 5a6b2c2..ac9a741 100644 --- a/swh/tests/test_local_loader.py +++ b/swh/tests/test_local_loader.py @@ -1,249 +1,249 @@ # coding: utf-8 # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import pygit2 import tempfile import shutil import os from nose.plugins.attrib import attr from nose.tools import istest from swh.store import db, models from swh.gitloader import loader from swh.conf import reader import test_initdb from test_utils import list_files_from from test_git_utils import create_commit_with_content, create_tag @attr('slow') class TestLocalLoader(unittest.TestCase): def setUp(self): """Initialize a git repository for the remaining test to manipulate. """ tmp_git_folder_path = tempfile.mkdtemp(prefix='test-sgloader.', dir='/tmp') self.tmp_git_repo = pygit2.init_repository(tmp_git_folder_path) self.conf_back = reader.read('./resources/test/back.ini', {'port': ('int', 9999)}) self.db_url = self.conf_back['db_url'] self.conf = { 'action': 'load', 'repo_path': self.tmp_git_repo.workdir, 'backend-type': 'local', 'backend': './resources/test/back.ini' } def init_db_setup(self): """Initialize a git repository for the remaining test to manipulate. """ test_initdb.prepare_db(self.db_url) def tearDown(self): """Destroy the test git repository. """ shutil.rmtree(self.tmp_git_repo.workdir) shutil.rmtree(self.conf_back['content_storage_dir'], ignore_errors=True) @istest def should_fail_on_bad_action(self): # when try: loader.load({'action': 'unknown'}) except: pass @istest def should_fail_on_inexistant_folder(self): # when try: loader.load({'action': 'load', 'repo_path': 'something-that-definitely-does-not-exist'}) except: pass @istest def should_fail_on_inexistant_backend_type(self): # when try: loader.load({'action': 'load', 'repo_path': '.', 'backend-type': 'unknown'}) # only local or remote supported except: pass @istest def local_loader(self): """Trigger loader and make sure everything is ok. """ self.init_db_setup() # given commit0 = create_commit_with_content(self.tmp_git_repo, 'blob 0', 'commit msg 0') commit1 = create_commit_with_content(self.tmp_git_repo, 'blob 1', 'commit msg 1', [commit0.hex]) commit2 = create_commit_with_content(self.tmp_git_repo, 'blob 2', 'commit msg 2', [commit1.hex]) commit3 = create_commit_with_content(self.tmp_git_repo, None, 'commit msg 3', [commit2.hex]) commit4 = create_commit_with_content(self.tmp_git_repo, 'blob 4', 'commit msg 4', [commit3.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf_back['content_storage_dir'])) - self.assertEquals(nb_files, 4+5+4, "4 blobs, 4 trees, 5 commits were created so 13 files.") + self.assertEquals(nb_files, 4, "4 blobs.") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 5, "Should be 5 commits") self.assertEquals( models.count_directories(db_conn), 5, "Should be 5 trees") self.assertEquals( models.count_contents(db_conn), 4, "Should be 4 blobs as we created one commit without data!") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 1, "Should be 1 reference (master) so 1 occurrence.") # given commit5 = create_commit_with_content(self.tmp_git_repo, 'new blob 5', 'commit msg 5', [commit4.hex]) commit6 = create_commit_with_content(self.tmp_git_repo, 'new blob and last 6', 'commit msg 6', [commit5.hex]) commit7 = create_commit_with_content(self.tmp_git_repo, 'new blob 7', 'commit msg 7', [commit6.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf_back['content_storage_dir'])) - self.assertEquals(nb_files, 13+3+3+3, "3 commits + 3 trees + 3 blobs so 9 more.") + self.assertEquals(nb_files, 4+3, "3 new blobs.") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 8, "Should be 5+3 == 8 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 5+3 == 8 trees") self.assertEquals( models.count_contents(db_conn), 7, "Should be 4+3 == 7 blobs") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 2, "Should be 1 reference which changed twice so 2 occurrences (master changed).") # given create_commit_with_content(self.tmp_git_repo, None, 'commit 8 with parent 2', [commit7.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf_back['content_storage_dir'])) - self.assertEquals(nb_files, 22+1, "1 commit more without blob so no tree either.") + self.assertEquals(nb_files, 7, "no new blob.") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 9, "Should be 8+1 == 9 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 8 trees (new commit without blob so no new tree)") self.assertEquals( models.count_contents(db_conn), 7, "Should be 7 blobs (new commit without new blob)") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 3, "Should be 1 reference which changed thrice so 3 occurrences (master changed again).") self.assertEquals( models.count_person(db_conn), 2, "1 author + 1 committer") # add tag create_tag(self.tmp_git_repo, '0.0.1', commit5, 'bad ass release 0.0.1, towards infinity...') create_tag(self.tmp_git_repo, '0.0.2', commit7, 'release 0.0.2... and beyond') loader.load(self.conf) # then nb_files = len(list_files_from(self.conf_back['content_storage_dir'])) - self.assertEquals(nb_files, 23+2, "2 tags more.") + self.assertEquals(nb_files, 7, "no new blob.") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 9, "Should be 8+1 == 9 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 8 trees (new commit without blob so no new tree)") self.assertEquals( models.count_contents(db_conn), 7, "Should be 7 blobs (new commit without new blob)") self.assertEquals( models.count_release(db_conn), 2, "Should be 2 annotated tags so 2 releases") self.assertEquals( models.count_occurrence(db_conn), 3, "master did not change this time so still 3 occurrences") self.assertEquals( models.count_person(db_conn), 3, "1 author + 1 committer + 1 tagger") diff --git a/swh/tests/test_remote_loader.py b/swh/tests/test_remote_loader.py index 0b736c4..04912e1 100644 --- a/swh/tests/test_remote_loader.py +++ b/swh/tests/test_remote_loader.py @@ -1,251 +1,251 @@ # coding: utf-8 # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import pygit2 import tempfile import shutil import os from nose.plugins.attrib import attr from nose.tools import istest from swh.store import db, models from swh.gitloader import loader from swh.conf import reader import test_initdb from test_git_utils import create_commit_with_content, create_tag from test_utils import list_files_from @attr('slow') class TestRemoteLoader(unittest.TestCase): def setUp(self): tmp_git_folder_path = tempfile.mkdtemp(prefix='test-sgloader.', dir='/tmp') self.tmp_git_repo = pygit2.init_repository(tmp_git_folder_path) self.conf = reader.read('./resources/test/back.ini', {'port': ('int', 9999)}) self.db_url = self.conf['db_url'] self.conf.update({ 'action': 'load', 'repo_path': self.tmp_git_repo.workdir, 'backend-type': 'remote', 'backend': 'http://localhost:%s' % self.conf['port'] }) # Not the remote loader in charge of creating the folder, so we do it if not os.path.exists(self.conf['content_storage_dir']): os.mkdir(self.conf['content_storage_dir']) def init_db_setup(self): """Initialize a git repository for the remaining test to manipulate. """ test_initdb.prepare_db(self.db_url) def tearDown(self): """Destroy the test git repository. """ shutil.rmtree(self.tmp_git_repo.workdir) shutil.rmtree(self.conf['content_storage_dir']) @istest def should_fail_on_bad_action(self): # when try: loader.load({'action': 'unknown'}) except: pass @istest def should_fail_on_inexistant_folder(self): # when try: loader.load({'action': 'load', 'repo_path': 'something-that-definitely-does-not-exist'}) except: pass @istest def should_fail_on_inexistant_backend_type(self): # when try: loader.load({'action': 'load', 'repo_path': '.', 'backend-type': 'unknown'}) # only local or remote supported except: pass @istest def remote_loader(self): """Trigger loader and make sure everything is ok. """ # given self.init_db_setup() # given commit0 = create_commit_with_content(self.tmp_git_repo, 'blob 0', 'commit msg 0') commit1 = create_commit_with_content(self.tmp_git_repo, 'blob 1', 'commit msg 1', [commit0.hex]) commit2 = create_commit_with_content(self.tmp_git_repo, 'blob 2', 'commit msg 2', [commit1.hex]) commit3 = create_commit_with_content(self.tmp_git_repo, None, 'commit msg 3', [commit2.hex]) commit4 = create_commit_with_content(self.tmp_git_repo, 'blob 4', 'commit msg 4', [commit3.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf['content_storage_dir'])) - self.assertEquals(nb_files, 4+5+4, "4 blobs, 4 trees, 5 commits were created so 13 files.") + self.assertEquals(nb_files, 4, "4 blobs") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 5, "Should be 5 commits") self.assertEquals( models.count_directories(db_conn), 5, "Should be 5 trees") self.assertEquals( models.count_contents(db_conn), 4, "Should be 4 blobs as we created one commit without data!") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 1, "Should be 1 reference (master) so 1 occurrence.") # given commit5 = create_commit_with_content(self.tmp_git_repo, 'new blob 5', 'commit msg 5', [commit4.hex]) commit6 = create_commit_with_content(self.tmp_git_repo, 'new blob and last 6', 'commit msg 6', [commit5.hex]) commit7 = create_commit_with_content(self.tmp_git_repo, 'new blob 7', 'commit msg 7', [commit6.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf['content_storage_dir'])) - self.assertEquals(nb_files, 13+3+3+3, "3 commits + 3 trees + 3 blobs so 9 more.") + self.assertEquals(nb_files, 4+3, "3 new blobs") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 8, "Should be 5+3 == 8 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 5+3 == 8 trees") self.assertEquals( models.count_contents(db_conn), 7, "Should be 4+3 == 7 blobs") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 2, "Should be 1 reference which changed twice so 2 occurrences (master changed).") # given create_commit_with_content(self.tmp_git_repo, None, 'commit 8 with parent 2', [commit7.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf['content_storage_dir'])) - self.assertEquals(nb_files, 22+1, "1 commit more without blob so no tree either.") + self.assertEquals(nb_files, 7, "no new blob") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 9, "Should be 8+1 == 9 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 8 trees (new commit without blob so no new tree)") self.assertEquals( models.count_contents(db_conn), 7, "Should be 7 blobs (new commit without new blob)") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 3, "Should be 1 reference which changed thrice so 3 occurrences (master changed again).") self.assertEquals( models.count_person(db_conn), 2, "1 author + 1 committer") # add tag create_tag(self.tmp_git_repo, '0.0.1', commit5, 'bad ass release 0.0.1, towards infinity...') create_tag(self.tmp_git_repo, '0.0.2', commit7, 'release 0.0.2... and beyond') loader.load(self.conf) # then nb_files = len(list_files_from(self.conf['content_storage_dir'])) - self.assertEquals(nb_files, 23+2, "2 tags more.") + self.assertEquals(nb_files, 7, "no new blob") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 9, "Should be 8+1 == 9 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 8 trees (new commit without blob so no new tree)") self.assertEquals( models.count_contents(db_conn), 7, "Should be 7 blobs (new commit without new blob)") self.assertEquals( models.count_release(db_conn), 2, "Should be 2 annotated tags so 2 releases") self.assertEquals( models.count_occurrence(db_conn), 3, "master did not change this time so still 3 occurrences") self.assertEquals( models.count_person(db_conn), 3, "1 author + 1 committer + 1 tagger")