diff --git a/swh/loader/tar/loader.py b/swh/loader/tar/loader.py new file mode 100644 index 0000000..0a1c12f --- /dev/null +++ b/swh/loader/tar/loader.py @@ -0,0 +1,87 @@ +# Copyright (C) 2015 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import os +import tempfile +import shutil + +from swh.loader.dir import loader +from swh.loader.tar import tarball + + +class TarLoader(loader.DirLoader): + """A tarball loader. + + """ + def process(self, tarpath, origin, revision, release, occurrences): + """Load a tarball in backend. + + This will: + - persist the origin if it does not exist. + - write an entry in fetch_history to mark the loading tarball start + - uncompress locally the tarballs in a temporary location + - process the content of the tarballs to persist on swh storage + - clean up the temporary location + - write an entry in fetch_history to mark the loading tarball end + + Args: + - tarpath: path to the tarball to uncompress + - origin: Dictionary origin + - url: url origin we fetched + - type: type of the origin + - revision: Dictionary of information needed, keys are: + - author_name: revision's author name + - author_email: revision's author email + - author_date: timestamp (e.g. 1444054085) + - author_offset: date offset e.g. -0220, +0100 + - committer_name: revision's committer name + - committer_email: revision's committer email + - committer_date: timestamp + - committer_offset: date offset e.g. -0220, +0100 + - type: type of revision dir, tar + - message: synthetic message for the revision + - release: Dictionary of information needed, keys are: + - name: release name + - date: release timestamp (e.g. 1444054085) + - offset: release date offset e.g. -0220, +0100 + - author_name: release author's name + - author_email: release author's email + - comment: release's comment message + - occurrences: List of occurrence dictionary. + Information needed, keys are: + - branch: occurrence's branch name + - authority_id: authority id (e.g. 1 for swh) + - validity: validity date (e.g. 2015-01-01 00:00:00+00) + + """ + if 'type' not in origin: # let the type flow if present + origin['type'] = 'tar' + origin['id'] = self.storage.origin_add_one(origin) + + # Mark the start of the loading + fetch_history_id = self.open_fetch_history(origin['id']) + + # Prepare the extraction path + extraction_dir = self.config['extraction_dir'] + os.makedirs(extraction_dir, 0o755, exist_ok=True) + dir_path = tempfile.mkdtemp(prefix='swh.loader.tar-', + dir=extraction_dir) + + # T62: + # - create tarball as content in storage + # - transit the information to the loader dir + + # T22: add checksums in revision + + try: + self.log.info('Uncompress %s to %s' % (tarpath, dir_path)) + tarball.uncompress(tarpath, dir_path) + + objects = super().process(dir_path, origin, revision, release, + occurrences) + finally: + shutil.rmtree(dir_path) + # mark the end of the loading + self.close_fetch_history(fetch_history_id, objects) diff --git a/swh/loader/tar/tasks.py b/swh/loader/tar/tasks.py index 37b6242..94dc6c2 100644 --- a/swh/loader/tar/tasks.py +++ b/swh/loader/tar/tasks.py @@ -1,53 +1,39 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import os -import shutil -import tempfile +from swh.core.scheduling import Task -from swh.loader.dir import tasks -from swh.loader.tar import tarball +from swh.loader.tar.loader import TarLoader -class LoadTarRepository(tasks.LoadDirRepository): +class LoadTarRepository(Task): """Import a tarball to Software Heritage """ task_queue = 'swh_loader_tar' CONFIG_BASE_FILENAME = 'loader/tar.ini' ADDITIONAL_CONFIG = { 'extraction_dir': ('str', '/tmp/swh.loader.tar/'), } + def __init__(self): + self.config = TarLoader.parse_config_file( + base_filename=self.CONFIG_BASE_FILENAME, + additional_configs=[self.ADDITIONAL_CONFIG], + ) + def run(self, tarpath, origin, revision, release, occurrences): """Import a tarball into swh. Args: - tarpath: path to a tarball file - - origin, revision, release, occurrences: see LoadDirRepository.run + - origin, revision, release, occurrences: + cf. swh.loader.dir.loader.run docstring """ - extraction_dir = self.config['extraction_dir'] - - os.makedirs(extraction_dir, 0o755, exist_ok=True) - - dir_path = tempfile.mkdtemp(prefix='swh.loader.tar-', - dir=extraction_dir) - - if 'type' not in origin: # let the type flow if present - origin['type'] = 'tar' - - try: - self.log.info('Uncompress %s to %s' % (tarpath, dir_path)) - tarball.uncompress(tarpath, dir_path) - - super().run(dir_path, - origin, - revision, - release, - occurrences) - finally: # always clean up - shutil.rmtree(dir_path) + loader = TarLoader(self.config) + loader.log = self.log + loader.process(tarpath, origin, revision, release, occurrences)