diff --git a/PKG-INFO b/PKG-INFO index 676377d..0243a1d 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.loader.git -Version: 0.0.5 +Version: 0.0.6 Summary: Software Heritage git loader Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/bin/swh-loader-git b/bin/swh-loader-git index 246bbbb..2938319 100755 --- a/bin/swh-loader-git +++ b/bin/swh-loader-git @@ -1,38 +1,36 @@ #!/usr/bin/env python3 import logging import sys from swh.core.logger import PostgresHandler from swh.loader.git import BulkLoader ADDITIONAL_CONFIG = { 'repo_path': ('str', None), 'origin_url': ('str', 'file:///dev/null'), 'authority': ('int', 1), 'validity': ('str', '2015-01-01 00:00:00+00'), } -my_config = BulkLoader.parse_config_file(config_filename=sys.argv[1], - additional_configs=[ADDITIONAL_CONFIG]) +my_config = BulkLoader.parse_config_file( + config_filename=sys.argv[1], additional_configs=[ADDITIONAL_CONFIG]) logging.basicConfig( level=logging.DEBUG, format='%(asctime)s %(name)s %(levelname)s %(message)s', handlers=[ logging.StreamHandler(), PostgresHandler(my_config['log_db']), ], ) requests_log = logging.getLogger("requests") requests_log.setLevel(logging.CRITICAL) - - loader = BulkLoader(my_config) loader.process(my_config['repo_path'], my_config['origin_url'], my_config['authority'], my_config['validity']) diff --git a/swh.loader.git.egg-info/PKG-INFO b/swh.loader.git.egg-info/PKG-INFO index 676377d..0243a1d 100644 --- a/swh.loader.git.egg-info/PKG-INFO +++ b/swh.loader.git.egg-info/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.loader.git -Version: 0.0.5 +Version: 0.0.6 Summary: Software Heritage git loader Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh/loader/git/tasks.py b/swh/loader/git/tasks.py index 994a68a..07e854d 100644 --- a/swh/loader/git/tasks.py +++ b/swh/loader/git/tasks.py @@ -1,59 +1,97 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime import os from swh.core.scheduling import Task from .loader import BulkLoader class LoadGitRepository(Task): """Import a git repository to Software Heritage""" + task_queue = 'swh_loader_git' + CONFIG_BASE_FILENAME = 'loader/git.ini' ADDITIONAL_CONFIG = {} def __init__(self): self.config = BulkLoader.parse_config_file( base_filename=self.CONFIG_BASE_FILENAME, additional_configs=[self.ADDITIONAL_CONFIG], ) def run(self, repo_path, origin_url, authority_id, validity): """Import a git repository""" loader = BulkLoader(self.config) loader.log = self.log loader.process(repo_path, origin_url, authority_id, validity) class LoadGitHubRepository(LoadGitRepository): """Import a github repository to Software Heritage""" + task_queue = 'swh_loader_git' + CONFIG_BASE_FILENAME = 'loader/github.ini' ADDITIONAL_CONFIG = { 'github_basepath': ('str', '/srv/storage/space/data/github'), 'authority_id': ('int', 1), 'default_validity': ('str', '1970-01-01 00:00:00+00'), } def run(self, repo_fullname): authority_id = self.config['authority_id'] validity = self.config['default_validity'] repo_path = os.path.join(self.config['github_basepath'], repo_fullname[0], repo_fullname) witness_file = os.path.join(repo_path, 'witness') if os.path.exists(witness_file): validity_timestamp = os.stat(witness_file).st_mtime validity = '%s+00' % datetime.datetime.utcfromtimestamp( validity_timestamp) origin_url = 'https://github.com/%s' % repo_fullname super().run(repo_path, origin_url, authority_id, validity) + + +class LoadGitHubRepositoryReleases(LoadGitHubRepository): + """Import a GitHub repository to SoftwareHeritage, only with releases""" + + task_queue = 'swh_loader_git_express' + + def __init__(self): + super(self.__class__, self).__init__() + + self.config.update({ + 'send_contents': False, + 'send_directories': False, + 'send_revisions': False, + 'send_releases': True, + 'send_occurrences': False, + }) + + +class LoadGitHubRepositoryContents(LoadGitHubRepository): + """Import a GitHub repository to SoftwareHeritage, only with contents""" + + task_queue = 'swh_loader_git_express' + + def __init__(self): + super(self.__class__, self).__init__() + + self.config.update({ + 'send_contents': True, + 'send_directories': False, + 'send_revisions': False, + 'send_releases': False, + 'send_occurrences': False, + }) diff --git a/version.txt b/version.txt index 3ebb5ee..ea8fa74 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.5-0-g87de6a6 \ No newline at end of file +v0.0.6-0-g0a23378 \ No newline at end of file