diff --git a/bin/swh-loader-git b/bin/swh-loader-git index 268820f..3d6ae75 100755 --- a/bin/swh-loader-git +++ b/bin/swh-loader-git @@ -1,67 +1,67 @@ #!/usr/bin/env python3 # Copyright (C) 2015 Stefano Zacchiroli , # Antoine R. Dumont # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import argparse import logging import os -from swh.loader.git.gitloader import loader +from swh.loader.git import loader from swh.loader.git.conf import reader # Default configuration file DEFAULT_CONF_FILE = '~/.config/swh/loader-git.ini' # default configuration (can be overriden by the DEFAULT_CONF_FILE) DEFAULT_CONF = { 'log_dir': ('string', '/tmp/swh-loader-git/log'), 'backend-type': ('string', 'remote'), 'backend': ('string', 'http://localhost:5000'), } # Another example of configuration: # DEFAULT_CONF = { # 'log_dir': ('string', '/tmp/swh-loader-git/log'), # 'backend-type': ('string', 'local'), # 'backend': ('string', '~/.config/swh/back.ini'), # } def parse_args(): """Parse the CLI arguments. """ cli = argparse.ArgumentParser( description='Parse git repository objects to load them into DB.') cli.add_argument('--verbose', '-v', action='store_true', help='Verbosity level in log file.') cli.add_argument('--config', '-c', help='configuration file path') subcli = cli.add_subparsers(dest='action') load_cli = subcli.add_parser('load', help='load Git repo into DB') load_cli.add_argument('repository', help='Git repository path') args = cli.parse_args() if not args.action: cli.error('no action given') return args if __name__ == '__main__': args = parse_args() conf = reader.read(args.config or DEFAULT_CONF_FILE, DEFAULT_CONF) reader.prepare_folders(conf, 'log_dir') conf['action'] = args.action conf['repo_path'] = args.repository logging.basicConfig(filename=os.path.join(conf['log_dir'], 'sgloader.log'), level=logging.DEBUG if args.verbose else logging.INFO) loader.load(conf) diff --git a/swh/loader/git/gitloader/git.py b/swh/loader/git/git.py similarity index 100% rename from swh/loader/git/gitloader/git.py rename to swh/loader/git/git.py diff --git a/swh/loader/git/gitloader/loader.py b/swh/loader/git/loader.py similarity index 95% rename from swh/loader/git/gitloader/loader.py rename to swh/loader/git/loader.py index 4dfce9d..5578ae7 100644 --- a/swh/loader/git/gitloader/loader.py +++ b/swh/loader/git/loader.py @@ -1,53 +1,53 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging import os -from swh.loader.git.gitloader import git, remote_store, local_store +from swh.loader.git import git, remote_store, local_store _load_to_back_fn = {'remote': remote_store.load_to_back ,'local': local_store.prepare_and_load_to_back } def check_user_conf(conf): """Check the user's configuration and rejects if problems. """ action = conf['action'] if action != 'load': return 'skip unknown action %s' % action backend_type = conf['backend-type'] if backend_type not in _load_to_back_fn: return 'skip unknown backend-type %s (only `remote`, `local` supported)' % backend_type repo_path = conf['repo_path'] if not os.path.exists(repo_path): return 'Repository %s does not exist.' % repo_path return None def load(conf): """According to action, load the repo_path. used configuration keys: - action: requested action - repo_path: git repository path ('load' action only) - backend-type: backend access's type (remote or local) - backend: url access to backend api """ error_msg = check_user_conf(conf) if error_msg: logging.error(error_msg) raise Exception(error_msg) repo_path = conf['repo_path'] logging.info('load repo_path %s' % repo_path) swhrepo = git.parse(repo_path) _load_to_back_fn[conf['backend-type']](conf['backend'], swhrepo) diff --git a/swh/loader/git/gitloader/local_store.py b/swh/loader/git/local_store.py similarity index 100% rename from swh/loader/git/gitloader/local_store.py rename to swh/loader/git/local_store.py diff --git a/swh/loader/git/gitloader/remote_store.py b/swh/loader/git/remote_store.py similarity index 100% rename from swh/loader/git/gitloader/remote_store.py rename to swh/loader/git/remote_store.py diff --git a/swh/loader/git/tests/test_local_loader.py b/swh/loader/git/tests/test_local_loader.py index b2a758b..220edf8 100644 --- a/swh/loader/git/tests/test_local_loader.py +++ b/swh/loader/git/tests/test_local_loader.py @@ -1,249 +1,249 @@ # coding: utf-8 # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import pygit2 import tempfile import shutil import os from nose.plugins.attrib import attr from nose.tools import istest from swh.loader.git.storage import db, models -from swh.loader.git.gitloader import loader +from swh.loader.git import loader from swh.loader.git.conf import reader import test_initdb from test_utils import list_files_from from test_git_utils import create_commit_with_content, create_tag @attr('slow') class TestLocalLoader(unittest.TestCase): def setUp(self): """Initialize a git repository for the remaining test to manipulate. """ tmp_git_folder_path = tempfile.mkdtemp(prefix='test-sgloader.', dir='/tmp') self.tmp_git_repo = pygit2.init_repository(tmp_git_folder_path) self.conf_back = reader.read('./resources/test/back.ini', {'port': ('int', 9999)}) self.db_url = self.conf_back['db_url'] self.conf = { 'action': 'load', 'repo_path': self.tmp_git_repo.workdir, 'backend-type': 'local', 'backend': './resources/test/back.ini' } def init_db_setup(self): """Initialize a git repository for the remaining test to manipulate. """ test_initdb.prepare_db(self.db_url) def tearDown(self): """Destroy the test git repository. """ shutil.rmtree(self.tmp_git_repo.workdir) shutil.rmtree(self.conf_back['content_storage_dir'], ignore_errors=True) @istest def should_fail_on_bad_action(self): # when try: loader.load({'action': 'unknown'}) except: pass @istest def should_fail_on_inexistant_folder(self): # when try: loader.load({'action': 'load', 'repo_path': 'something-that-definitely-does-not-exist'}) except: pass @istest def should_fail_on_inexistant_backend_type(self): # when try: loader.load({'action': 'load', 'repo_path': '.', 'backend-type': 'unknown'}) # only local or remote supported except: pass @istest def local_loader(self): """Trigger loader and make sure everything is ok. """ self.init_db_setup() # given commit0 = create_commit_with_content(self.tmp_git_repo, 'blob 0', 'commit msg 0') commit1 = create_commit_with_content(self.tmp_git_repo, 'blob 1', 'commit msg 1', [commit0.hex]) commit2 = create_commit_with_content(self.tmp_git_repo, 'blob 2', 'commit msg 2', [commit1.hex]) commit3 = create_commit_with_content(self.tmp_git_repo, None, 'commit msg 3', [commit2.hex]) commit4 = create_commit_with_content(self.tmp_git_repo, 'blob 4', 'commit msg 4', [commit3.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf_back['content_storage_dir'])) self.assertEquals(nb_files, 4, "4 blobs.") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 5, "Should be 5 commits") self.assertEquals( models.count_directories(db_conn), 5, "Should be 5 trees") self.assertEquals( models.count_contents(db_conn), 4, "Should be 4 blobs as we created one commit without data!") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 1, "Should be 1 reference (master) so 1 occurrence.") # given commit5 = create_commit_with_content(self.tmp_git_repo, 'new blob 5', 'commit msg 5', [commit4.hex]) commit6 = create_commit_with_content(self.tmp_git_repo, 'new blob and last 6', 'commit msg 6', [commit5.hex]) commit7 = create_commit_with_content(self.tmp_git_repo, 'new blob 7', 'commit msg 7', [commit6.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf_back['content_storage_dir'])) self.assertEquals(nb_files, 4+3, "3 new blobs.") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 8, "Should be 5+3 == 8 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 5+3 == 8 trees") self.assertEquals( models.count_contents(db_conn), 7, "Should be 4+3 == 7 blobs") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 2, "Should be 1 reference which changed twice so 2 occurrences (master changed).") # given create_commit_with_content(self.tmp_git_repo, None, 'commit 8 with parent 2', [commit7.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf_back['content_storage_dir'])) self.assertEquals(nb_files, 7, "no new blob.") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 9, "Should be 8+1 == 9 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 8 trees (new commit without blob so no new tree)") self.assertEquals( models.count_contents(db_conn), 7, "Should be 7 blobs (new commit without new blob)") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 3, "Should be 1 reference which changed thrice so 3 occurrences (master changed again).") self.assertEquals( models.count_person(db_conn), 2, "1 author + 1 committer") # add tag create_tag(self.tmp_git_repo, '0.0.1', commit5, 'bad ass release 0.0.1, towards infinity...') create_tag(self.tmp_git_repo, '0.0.2', commit7, 'release 0.0.2... and beyond') loader.load(self.conf) # then nb_files = len(list_files_from(self.conf_back['content_storage_dir'])) self.assertEquals(nb_files, 7, "no new blob.") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 9, "Should be 8+1 == 9 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 8 trees (new commit without blob so no new tree)") self.assertEquals( models.count_contents(db_conn), 7, "Should be 7 blobs (new commit without new blob)") self.assertEquals( models.count_release(db_conn), 2, "Should be 2 annotated tags so 2 releases") self.assertEquals( models.count_occurrence(db_conn), 3, "master did not change this time so still 3 occurrences") self.assertEquals( models.count_person(db_conn), 3, "1 author + 1 committer + 1 tagger") diff --git a/swh/loader/git/tests/test_remote_loader.py b/swh/loader/git/tests/test_remote_loader.py index 9e53e41..971f83e 100644 --- a/swh/loader/git/tests/test_remote_loader.py +++ b/swh/loader/git/tests/test_remote_loader.py @@ -1,252 +1,252 @@ # coding: utf-8 # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import pygit2 import tempfile import shutil import os from nose.plugins.attrib import attr from nose.tools import istest from swh.loader.git.storage import db, models -from swh.loader.git.gitloader import loader +from swh.loader.git import loader from swh.loader.git.conf import reader import test_initdb from test_git_utils import create_commit_with_content, create_tag from test_utils import list_files_from @attr('slow') class TestRemoteLoader(unittest.TestCase): def setUp(self): tmp_git_folder_path = tempfile.mkdtemp(prefix='test-sgloader.', dir='/tmp') self.tmp_git_repo = pygit2.init_repository(tmp_git_folder_path) self.conf = reader.read('./resources/test/back.ini', {'port': ('int', 9999)}) self.db_url = self.conf['db_url'] self.conf.update({ 'action': 'load', 'repo_path': self.tmp_git_repo.workdir, 'backend-type': 'remote', 'backend': 'http://localhost:%s' % self.conf['port'] }) # Not the remote loader in charge of creating the folder, so we do it if not os.path.exists(self.conf['content_storage_dir']): os.mkdir(self.conf['content_storage_dir']) def init_db_setup(self): """Initialize a git repository for the remaining test to manipulate. """ test_initdb.prepare_db(self.db_url) def tearDown(self): """Destroy the test git repository. """ shutil.rmtree(self.tmp_git_repo.workdir) shutil.rmtree(self.conf['content_storage_dir']) @istest def should_fail_on_bad_action(self): # when try: loader.load({'action': 'unknown'}) except: # FIXME assert raises pass @istest def should_fail_on_inexistant_folder(self): # when try: loader.load({'action': 'load', 'repo_path': 'something-that-definitely-does-not-exist'}) except: pass @istest def should_fail_on_inexistant_backend_type(self): # when try: loader.load({'action': 'load', 'repo_path': '.', 'backend-type': 'unknown'}) # only local or remote supported except: pass @istest def remote_loader(self): """Trigger loader and make sure everything is ok. """ # given self.init_db_setup() # given commit0 = create_commit_with_content(self.tmp_git_repo, 'blob 0', 'commit msg 0') commit1 = create_commit_with_content(self.tmp_git_repo, 'blob 1', 'commit msg 1', [commit0.hex]) commit2 = create_commit_with_content(self.tmp_git_repo, 'blob 2', 'commit msg 2', [commit1.hex]) commit3 = create_commit_with_content(self.tmp_git_repo, None, 'commit msg 3', [commit2.hex]) commit4 = create_commit_with_content(self.tmp_git_repo, 'blob 4', 'commit msg 4', [commit3.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf['content_storage_dir'])) self.assertEquals(nb_files, 4, "4 blobs") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 5, "Should be 5 commits") self.assertEquals( models.count_directories(db_conn), 5, "Should be 5 trees") self.assertEquals( models.count_contents(db_conn), 4, "Should be 4 blobs as we created one commit without data!") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 1, "Should be 1 reference (master) so 1 occurrence.") # given commit5 = create_commit_with_content(self.tmp_git_repo, 'new blob 5', 'commit msg 5', [commit4.hex]) commit6 = create_commit_with_content(self.tmp_git_repo, 'new blob and last 6', 'commit msg 6', [commit5.hex]) commit7 = create_commit_with_content(self.tmp_git_repo, 'new blob 7', 'commit msg 7', [commit6.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf['content_storage_dir'])) self.assertEquals(nb_files, 4+3, "3 new blobs") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 8, "Should be 5+3 == 8 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 5+3 == 8 trees") self.assertEquals( models.count_contents(db_conn), 7, "Should be 4+3 == 7 blobs") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 2, "Should be 1 reference which changed twice so 2 occurrences (master changed).") # given create_commit_with_content(self.tmp_git_repo, None, 'commit 8 with parent 2', [commit7.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf['content_storage_dir'])) self.assertEquals(nb_files, 7, "no new blob") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 9, "Should be 8+1 == 9 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 8 trees (new commit without blob so no new tree)") self.assertEquals( models.count_contents(db_conn), 7, "Should be 7 blobs (new commit without new blob)") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 3, "Should be 1 reference which changed thrice so 3 occurrences (master changed again).") self.assertEquals( models.count_person(db_conn), 2, "1 author + 1 committer") # add tag create_tag(self.tmp_git_repo, '0.0.1', commit5, 'bad ass release 0.0.1, towards infinity...') create_tag(self.tmp_git_repo, '0.0.2', commit7, 'release 0.0.2... and beyond') loader.load(self.conf) # then nb_files = len(list_files_from(self.conf['content_storage_dir'])) self.assertEquals(nb_files, 7, "no new blob") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 9, "Should be 8+1 == 9 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 8 trees (new commit without blob so no new tree)") self.assertEquals( models.count_contents(db_conn), 7, "Should be 7 blobs (new commit without new blob)") self.assertEquals( models.count_release(db_conn), 2, "Should be 2 annotated tags so 2 releases") self.assertEquals( models.count_occurrence(db_conn), 3, "master did not change this time so still 3 occurrences") self.assertEquals( models.count_person(db_conn), 3, "1 author + 1 committer + 1 tagger")