diff --git a/PKG-INFO b/PKG-INFO index 9cbba70..423a1c7 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.loader.tar -Version: 0.0.34 +Version: 0.0.35 Summary: Software Heritage Tarball Loader Home-page: https://forge.softwareheritage.org/diffusion/DLDTAR Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/debian/control b/debian/control index 8993181..6078985 100644 --- a/debian/control +++ b/debian/control @@ -1,27 +1,27 @@ Source: swh-loader-tar Maintainer: Software Heritage developers Section: python Priority: optional Build-Depends: debhelper (>= 9), dh-python (>= 2), python3-all, python3-nose, python3-setuptools, python3-swh.core (>= 0.0.36~), - python3-swh.loader.dir (>= 0.0.31~), + python3-swh.loader.dir (>= 0.0.32~), python3-swh.model (>= 0.0.15~), python3-swh.scheduler (>= 0.0.14~), python3-swh.storage (>= 0.0.83~), python3-vcversioner Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/diffusion/DLDTAR/ Package: python3-swh.loader.tar Architecture: all Depends: python3-swh.core (>= 0.0.36~), - python3-swh.loader.dir (>= 0.0.31~), + python3-swh.loader.dir (>= 0.0.32~), python3-swh.scheduler (>= 0.0.14~), python3-swh.storage (>= 0.0.83~), ${misc:Depends}, ${python3:Depends} Description: Software Heritage Tarball Loader diff --git a/requirements-swh.txt b/requirements-swh.txt index baacbb0..26ad0c3 100644 --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,5 +1,5 @@ swh.core >= 0.0.36 swh.model >= 0.0.15 swh.scheduler >= 0.0.14 swh.storage >= 0.0.83 -swh.loader.dir >= 0.0.31 +swh.loader.dir >= 0.0.32 diff --git a/swh.loader.tar.egg-info/PKG-INFO b/swh.loader.tar.egg-info/PKG-INFO index 9cbba70..423a1c7 100644 --- a/swh.loader.tar.egg-info/PKG-INFO +++ b/swh.loader.tar.egg-info/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.loader.tar -Version: 0.0.34 +Version: 0.0.35 Summary: Software Heritage Tarball Loader Home-page: https://forge.softwareheritage.org/diffusion/DLDTAR Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh.loader.tar.egg-info/requires.txt b/swh.loader.tar.egg-info/requires.txt index 04e02aa..d761eca 100644 --- a/swh.loader.tar.egg-info/requires.txt +++ b/swh.loader.tar.egg-info/requires.txt @@ -1,9 +1,9 @@ click python-dateutil retrying swh.core>=0.0.36 -swh.loader.dir>=0.0.31 +swh.loader.dir>=0.0.32 swh.model>=0.0.15 swh.scheduler>=0.0.14 swh.storage>=0.0.83 vcversioner diff --git a/swh/loader/tar/loader.py b/swh/loader/tar/loader.py index abf186d..7d4981f 100644 --- a/swh/loader/tar/loader.py +++ b/swh/loader/tar/loader.py @@ -1,110 +1,154 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import tempfile import shutil from swh.core import tarball from swh.loader.core.loader import SWHLoader from swh.loader.dir import loader from swh.loader.tar import utils from swh.model import hashutil class TarLoader(loader.DirLoader): """Tarball loader implementation. This is a subclass of the :class:DirLoader as the main goal of this class is to first uncompress a tarball, then provide the uncompressed directory/tree to be loaded by the DirLoader. This will: - creates an origin (if it does not exist) - creates a fetch_history entry - creates an origin_visit - uncompress locally the tarball in a temporary location - process the content of the tarballs to persist on swh storage - clean up the temporary location - write an entry in fetch_history to mark the loading tarball end (success or failure) """ CONFIG_BASE_FILENAME = 'loader/tar' ADDITIONAL_CONFIG = { 'extraction_dir': ('string', '/tmp') } def __init__(self, logging_class='swh.loader.tar.TarLoader', config=None): super().__init__(logging_class=logging_class, config=config) + self.dir_path = None def load(self, *, tar_path, origin, visit_date, revision, branch_name=None): """Load a tarball in `tarpath` in the Software Heritage Archive. Args: tar_path: tarball to import origin (dict): an origin dictionary as returned by :func:`swh.storage.storage.Storage.origin_get_one` visit_date (str): the date the origin was visited (as an isoformatted string) revision (dict): a revision as passed to :func:`swh.storage.storage.Storage.revision_add`, excluding the `id` and `directory` keys (computed from the directory) branch_name (str): the optional branch_name to use for snapshot """ # Shortcut super() as we use different arguments than the DirLoader. return SWHLoader.load(self, tar_path=tar_path, origin=origin, visit_date=visit_date, revision=revision, branch_name=branch_name) - def prepare(self, *, tar_path, origin, visit_date, revision, + def prepare_origin_visit(self, *, origin, visit_date=None, **kwargs): + self.origin = origin + if 'type' not in self.origin: # let the type flow if present + self.origin['type'] = 'tar' + self.visit_date = visit_date + + def prepare(self, *, tar_path, origin, revision, visit_date=None, branch_name=None): """1. Uncompress the tarball in a temporary directory. 2. Compute some metadata to update the revision. """ - if 'type' not in origin: # let the type flow if present - origin['type'] = 'tar' - # Prepare the extraction path extraction_dir = self.config['extraction_dir'] os.makedirs(extraction_dir, 0o755, exist_ok=True) - dir_path = tempfile.mkdtemp(prefix='swh.loader.tar-', - dir=extraction_dir) + self.dir_path = tempfile.mkdtemp(prefix='swh.loader.tar-', + dir=extraction_dir) # add checksums in revision - self.log.info('Uncompress %s to %s' % (tar_path, dir_path)) - nature = tarball.uncompress(tar_path, dir_path) + self.log.info('Uncompress %s to %s' % (tar_path, self.dir_path)) + nature = tarball.uncompress(tar_path, self.dir_path) if 'metadata' not in revision: artifact = utils.convert_to_hex(hashutil.hash_path(tar_path)) artifact['name'] = os.path.basename(tar_path) artifact['archive_type'] = nature artifact['length'] = os.path.getsize(tar_path) revision['metadata'] = { 'original_artifact': [artifact], } branch = branch_name if branch_name else os.path.basename(tar_path) - super().prepare(dir_path=dir_path, + super().prepare(dir_path=self.dir_path, origin=origin, visit_date=visit_date, revision=revision, release=None, branch_name=branch) def cleanup(self): """Clean up temporary directory where we uncompress the tarball. """ - dir_path = self.dir_path - if dir_path and os.path.exists(dir_path): - shutil.rmtree(dir_path) + if self.dir_path and os.path.exists(self.dir_path): + shutil.rmtree(self.dir_path) + + +if __name__ == '__main__': + import click + import logging + logging.basicConfig( + level=logging.DEBUG, + format='%(asctime)s %(process)d %(message)s' + ) + + @click.command() + @click.option('--archive-path', required=1, help='Archive path to load') + @click.option('--origin-url', required=1, help='Origin url to associate') + @click.option('--visit-date', default=None, + help='Visit date time override') + def main(archive_path, origin_url, visit_date): + """Loading archive tryout.""" + import datetime + origin = {'url': origin_url, 'type': 'tar'} + commit_time = int(datetime.datetime.now( + tz=datetime.timezone.utc).timestamp()) + swh_person = { + 'name': 'Software Heritage', + 'fullname': 'Software Heritage', + 'email': 'robot@softwareheritage.org' + } + revision = { + 'date': {'timestamp': commit_time, 'offset': 0}, + 'committer_date': {'timestamp': commit_time, 'offset': 0}, + 'author': swh_person, + 'committer': swh_person, + 'type': 'tar', + 'message': 'swh-loader-tar: synthetic revision message', + 'metadata': {}, + 'synthetic': True, + } + TarLoader().load(tar_path=archive_path, origin=origin, + visit_date=visit_date, revision=revision, + branch_name='master') + + main() diff --git a/version.txt b/version.txt index 267c9fe..6da7cfa 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.34-0-g7738bdb \ No newline at end of file +v0.0.35-0-gd4bd5e1 \ No newline at end of file