diff --git a/swh/loader/git/tasks.py b/swh/loader/git/tasks.py index d9bf18d..98e2333 100644 --- a/swh/loader/git/tasks.py +++ b/swh/loader/git/tasks.py @@ -1,119 +1,38 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import datetime -import os +import dateutil.parser from swh.scheduler.task import Task -from .loader import BulkLoader +from .loader import GitLoader from .updater import BulkUpdater -class LoadGitRepository(Task): - """Import a git repository to Software Heritage""" - - task_queue = 'swh_loader_git' - - CONFIG_BASE_FILENAME = 'loader/git.ini' - ADDITIONAL_CONFIG = {} - - def __init__(self): - self.config = BulkLoader.parse_config_file( - base_filename=self.CONFIG_BASE_FILENAME, - additional_configs=[self.ADDITIONAL_CONFIG], - ) - - def run(self, repo_path, origin_url, authority_id, validity): - """Import a git repository""" - loader = BulkLoader(self.config) - loader.log = self.log - - loader.process(repo_path, origin_url, authority_id, validity) - - +# TODO: rename to LoadRemoteGitRepository class UpdateGitRepository(Task): - """Import a git repository as an update""" + """Import a git repository from a remote location""" task_queue = 'swh_loader_git' - CONFIG_BASE_FILENAME = 'loader/git-updater.ini' - ADDITIONAL_CONFIG = {} - - def __init__(self): - self.config = BulkUpdater.parse_config_file( - base_filename=self.CONFIG_BASE_FILENAME, - additional_configs=[self.ADDITIONAL_CONFIG], - ) - def run(self, repo_url, base_url=None): """Import a git repository""" - loader = BulkUpdater(self.config) + loader = BulkUpdater() loader.log = self.log - return loader.process(repo_url, base_url) - + return loader.load(repo_url, base_url) -class LoadGitHubRepository(LoadGitRepository): - """Import a github repository to Software Heritage""" +class LoadDiskGitRepository(Task): + """Import a git repository from disk""" task_queue = 'swh_loader_git' - CONFIG_BASE_FILENAME = 'loader/github.ini' - ADDITIONAL_CONFIG = { - 'github_basepath': ('str', '/srv/storage/space/data/github'), - 'authority_id': ('str', '5f4d4c51-498a-4e28-88b3-b3e4e8396cba'), - 'default_validity': ('str', '1970-01-01 00:00:00+00'), - } - - def run(self, repo_fullname): - authority_id = self.config['authority_id'] - validity = self.config['default_validity'] + def run(self, origin_url, directory, date): + """Import a git repository, cloned in `directory` from `origin_url` at + `date`.""" - repo_path = os.path.join(self.config['github_basepath'], - repo_fullname[0], repo_fullname) - - witness_file = os.path.join(repo_path, 'witness') - if os.path.exists(witness_file): - validity_timestamp = os.stat(witness_file).st_mtime - validity = '%s+00' % datetime.datetime.utcfromtimestamp( - validity_timestamp) - - origin_url = 'https://github.com/%s' % repo_fullname - - super().run(repo_path, origin_url, authority_id, validity) - - -class LoadGitHubRepositoryReleases(LoadGitHubRepository): - """Import a GitHub repository to SoftwareHeritage, only with releases""" - - task_queue = 'swh_loader_git_express' - - def __init__(self): - super(self.__class__, self).__init__() - - self.config.update({ - 'send_contents': False, - 'send_directories': False, - 'send_revisions': False, - 'send_releases': True, - 'send_occurrences': False, - }) - - -class LoadGitHubRepositoryContents(LoadGitHubRepository): - """Import a GitHub repository to SoftwareHeritage, only with contents""" - - task_queue = 'swh_loader_git_express' - - def __init__(self): - super(self.__class__, self).__init__() + loader = GitLoader() + loader.log = self.log - self.config.update({ - 'send_contents': True, - 'send_directories': False, - 'send_revisions': False, - 'send_releases': False, - 'send_occurrences': False, - }) + return loader.load(origin_url, directory, dateutil.parser.parse(date))