diff --git a/PKG-INFO b/PKG-INFO index b24d3a9..c49618f 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.loader.tar -Version: 0.0.17 +Version: 0.0.18 Summary: Software Heritage Tarball Loader Home-page: https://forge.softwareheritage.org/diffusion/DLDTAR Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/debian/control b/debian/control index c357cc1..8db70fd 100644 --- a/debian/control +++ b/debian/control @@ -1,23 +1,23 @@ Source: swh-loader-tar Maintainer: Software Heritage developers Section: python Priority: optional Build-Depends: debhelper (>= 9), dh-python, python3-all, python3-nose, python3-setuptools, python3-swh.core (>= 0.0.14~), python3-swh.scheduler, python3-swh.storage (>= 0.0.31~), - python3-swh.loader.dir (>= 0.0.18~), - python3-swh.loader.core (>= 0.0.5), + python3-swh.loader.dir (>= 0.0.19~), + python3-swh.loader.core (>= 0.0.8~), python3-vcversioner Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/diffusion/DLDTAR/ Package: python3-swh.loader.tar Architecture: all Depends: ${misc:Depends}, ${python3:Depends} Description: Software Heritage Tarball Loader diff --git a/requirements.txt b/requirements.txt index 88fea74..a5c6933 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ # Add here external Python modules dependencies, one per line. Module names # should match https://pypi.python.org/pypi names. For the full spec or # dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html vcversioner swh.core >= 0.0.14 swh.scheduler swh.storage >= 0.0.31 -swh.loader.dir >= 0.0.18 -swh.loader.core >= 0.0.5 +swh.loader.dir >= 0.0.19 +swh.loader.core >= 0.0.8 retrying diff --git a/swh.loader.tar.egg-info/PKG-INFO b/swh.loader.tar.egg-info/PKG-INFO index b24d3a9..c49618f 100644 --- a/swh.loader.tar.egg-info/PKG-INFO +++ b/swh.loader.tar.egg-info/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.loader.tar -Version: 0.0.17 +Version: 0.0.18 Summary: Software Heritage Tarball Loader Home-page: https://forge.softwareheritage.org/diffusion/DLDTAR Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh.loader.tar.egg-info/requires.txt b/swh.loader.tar.egg-info/requires.txt index a11f998..9345108 100644 --- a/swh.loader.tar.egg-info/requires.txt +++ b/swh.loader.tar.egg-info/requires.txt @@ -1,7 +1,7 @@ retrying swh.core>=0.0.14 -swh.loader.core>=0.0.5 -swh.loader.dir>=0.0.18 +swh.loader.core>=0.0.8 +swh.loader.dir>=0.0.19 swh.scheduler swh.storage>=0.0.31 vcversioner diff --git a/swh/loader/tar/build.py b/swh/loader/tar/build.py index 1ee03b7..83f9372 100755 --- a/swh/loader/tar/build.py +++ b/swh/loader/tar/build.py @@ -1,134 +1,136 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os from swh.loader.tar import utils # Static setup EPOCH = 0 -UTC_OFFSET = '+0000' -SWH_PERSON = 'Software Heritage' -SWH_MAIL = 'robot@softwareheritage.org' +UTC_OFFSET = 0 +SWH_PERSON = { + 'name': 'Software Heritage', + 'fullname': 'Software Heritage', + 'email': 'robot@softwareheritage.org' +} REVISION_MESSAGE = 'synthetic revision message' RELEASE_MESSAGE = 'synthetic release message' REVISION_TYPE = 'tar' def compute_origin(url_scheme, url_type, root_dirpath, tarpath): """Compute the origin. Args: - url_scheme: scheme to build the origin's url - url_type: origin's type - root_dirpath: the top level root directory path - tarpath: file's absolute path Returns: Dictionary origin with keys: - url: origin's url - type: origin's type """ relative_path = utils.commonname(root_dirpath, tarpath) return { 'url': ''.join([url_scheme, os.path.dirname(relative_path)]), 'type': url_type, } def occurrence_with_date(date, tarpath): """Compute the occurrence using the tarpath's ctime. Args: authority: the authority's uuid tarpath: file's path Returns: Occurrence dictionary (cf. _build_occurrence) """ return { 'branch': os.path.basename(tarpath), 'date': date } def _time_from_path(tarpath): """Compute the modification time from the tarpath. """ return os.lstat(tarpath).st_mtime def compute_revision(tarpath): """Compute a revision. Args: tarpath: absolute path to the tarball Returns: Revision as dict: - - author_date: the modification timestamp as returned by a fstat call - - author_offset: +0000 + - date: the modification timestamp as returned by a fstat call - committer_date: the modification timestamp as returned by a fstat call - - committer_offset: +0000 - - author_name: cf. SWH_PERSON - - author_email: cf. SWH_MAIL - - committer_name: cf. SWH_MAIL - - committer_email: cf. SWH_MAIL + - author: cf. SWH_PERSON + - committer: cf. SWH_PERSON - type: cf. REVISION_TYPE - message: cf. REVISION_MESSAGE """ ts = _time_from_path(tarpath) return { - 'author_date': ts, - 'author_offset': UTC_OFFSET, - 'committer_date': ts, - 'committer_offset': UTC_OFFSET, - 'author_name': SWH_PERSON, - 'author_email': SWH_MAIL, - 'committer_name': SWH_PERSON, - 'committer_email': SWH_MAIL, + 'date': { + 'timestamp': ts, + 'offset': UTC_OFFSET, + }, + 'committer_date': { + 'timestamp': ts, + 'offset': UTC_OFFSET, + }, + 'author': SWH_PERSON, + 'committer': SWH_PERSON, 'type': REVISION_TYPE, 'message': REVISION_MESSAGE, } def compute_release(filename, tarpath): """Compute a release from a given tarpath, filename. If the tarpath does not contain a recognizable release number, the release can be skipped. Args: filename: file's name without path tarpath: file's absolute path Returns: None if the release number cannot be extracted from the filename. Otherwise a synthetic release is computed with the following keys: - name: the release computed from the filename - date: the modification timestamp as returned by a fstat call - - offset: +0000 + - offset: 0 - author_name: '' - author_email: '' - comment: '' """ release_number = utils.release_number(filename) if release_number: return { 'name': release_number, - 'date': _time_from_path(tarpath), - 'offset': UTC_OFFSET, - 'author_name': SWH_PERSON, - 'author_email': SWH_MAIL, - 'comment': RELEASE_MESSAGE, + 'date': { + 'timestamp': _time_from_path(tarpath), + 'offset': UTC_OFFSET, + }, + 'author': SWH_PERSON, + 'message': RELEASE_MESSAGE, } return None diff --git a/swh/loader/tar/loader.py b/swh/loader/tar/loader.py index 38a02ed..002f091 100644 --- a/swh/loader/tar/loader.py +++ b/swh/loader/tar/loader.py @@ -1,101 +1,93 @@ # Copyright (C) 2015-2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import tempfile import shutil -import sys -import traceback from swh.core import hashutil from swh.loader.dir import loader from swh.loader.tar import tarball, utils class TarLoader(loader.DirLoader): """A tarball loader. """ + CONFIG_BASE_FILENAME = 'loader/tar.ini' + + ADDITIONAL_CONFIG = { + 'extraction_dir': ('string', '/tmp') + } + def __init__(self, origin_id): super().__init__(origin_id, logging_class='swh.loader.tar.TarLoader') def process(self, tarpath, origin, revision, release, occurrences): """Load a tarball in backend. This will: - persist the origin if it does not exist. - write an entry in fetch_history to mark the loading tarball start - uncompress locally the tarballs in a temporary location - process the content of the tarballs to persist on swh storage - clean up the temporary location - write an entry in fetch_history to mark the loading tarball end Args: - tarpath: path to the tarball to uncompress - origin: Dictionary origin - url: url origin we fetched - type: type of the origin - revision: Dictionary of information needed, keys are: - author_name: revision's author name - author_email: revision's author email - author_date: timestamp (e.g. 1444054085) - author_offset: date offset e.g. -0220, +0100 - committer_name: revision's committer name - committer_email: revision's committer email - committer_date: timestamp - committer_offset: date offset e.g. -0220, +0100 - type: type of revision dir, tar - message: synthetic message for the revision - release: Dictionary of information needed, keys are: - name: release name - date: release timestamp (e.g. 1444054085) - offset: release date offset e.g. -0220, +0100 - author_name: release author's name - author_email: release author's email - comment: release's comment message - occurrences: List of occurrence dictionary. Information needed, keys are: - branch: occurrence's branch name - authority_id: authority id (e.g. 1 for swh) - validity: validity date (e.g. 2015-01-01 00:00:00+00) """ # Prepare the extraction path extraction_dir = self.config['extraction_dir'] os.makedirs(extraction_dir, 0o755, exist_ok=True) dir_path = tempfile.mkdtemp(prefix='swh.loader.tar-', dir=extraction_dir) # add checksums in revision artifact = utils.convert_to_hex(hashutil.hashfile(tarpath)) artifact['name'] = os.path.basename(tarpath) - # for edge cases (NotImplemented...) - result = {'status': False, 'stderr': ''} - try: self.log.info('Uncompress %s to %s' % (tarpath, dir_path)) nature = tarball.uncompress(tarpath, dir_path) artifact['archive_type'] = nature artifact['length'] = os.path.getsize(tarpath) revision['metadata'] = { 'original_artifact': [artifact], } return super().process(dir_path, origin, revision, release, occurrences) - except: - e_info = sys.exc_info() - if not result['status']: - # Enrich the error message with the tarball - result['stderr'] = 'reason:%s\ntrace:%s\n%s' % ( - e_info[1], - ''.join(traceback.format_tb(e_info[2])), - result.get('stderr', '')) - return result finally: shutil.rmtree(dir_path) diff --git a/swh/loader/tar/tasks.py b/swh/loader/tar/tasks.py index 6ae84fd..91afac7 100644 --- a/swh/loader/tar/tasks.py +++ b/swh/loader/tar/tasks.py @@ -1,41 +1,41 @@ # Copyright (C) 2015-2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.loader.core import tasks from swh.loader.tar.loader import TarLoader class LoadTarRepository(tasks.LoaderCoreTask): """Import a directory to Software Heritage """ task_queue = 'swh_loader_tar' def run(self, tarpath, origin, revision, release, occurrences): """Import a tarball into swh. Args: - tarpath: path to a tarball file - origin, revision, release, occurrences: cf. swh.loader.dir.loader.run docstring """ - storage = TarLoader().storage + storage = TarLoader(origin_id=None).storage if 'type' not in origin: # let the type flow if present origin['type'] = 'tar' origin['id'] = storage.origin_add_one(origin) fetch_history_id = self.open_fetch_history(storage, origin['id']) result = TarLoader(origin['id']).process(tarpath, origin, revision, release, occurrences) self.close_fetch_history(storage, fetch_history_id, result) diff --git a/swh/loader/tar/tests/test_build.py b/swh/loader/tar/tests/test_build.py index d51afbc..e4a7763 100644 --- a/swh/loader/tar/tests/test_build.py +++ b/swh/loader/tar/tests/test_build.py @@ -1,103 +1,106 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from unittest.mock import patch from swh.loader.tar import build class TestBuildUtils(unittest.TestCase): @istest def compute_origin(self): # given expected_origin = { 'url': 'rsync://some/url/package-foo', 'type': 'rsync', } # when actual_origin = build.compute_origin( 'rsync://some/url/', 'rsync', '/some/root/path/', '/some/root/path/package-foo/package-foo-1.2.3.tgz') # then self.assertEquals(actual_origin, expected_origin) @istest def occurrence_with_date(self): # given expected_occurrence = { 'branch': b'package-bar.tgz', 'date': '2015-10-22 08:44:47.422384+00' } # when actual_occurrence = build.occurrence_with_date( '2015-10-22 08:44:47.422384+00', b'/path/to/package-bar.tgz',) # then self.assertEquals(actual_occurrence, expected_occurrence) @istest def compute_release__no_release(self): # given # when actual_release = build.compute_release( 'pack-without-version.tgz', '/some/path/to/pack-without-version.tgz') # then self.assertIsNone(actual_release) @istest def compute_release(self): # given expected_release = { 'name': '1.2.3rc1', - 'date': 'some-time', - 'offset': build.UTC_OFFSET, - 'author_name': build.SWH_PERSON, - 'author_email': build.SWH_MAIL, - 'comment': build.RELEASE_MESSAGE, + 'date': { + 'timestamp': 'some-time', + 'offset': build.UTC_OFFSET, + }, + 'author': build.SWH_PERSON, + 'message': build.RELEASE_MESSAGE, } # when with patch('swh.loader.tar.build._time_from_path', return_value='some-time'): actual_release = build.compute_release( 'foobar-1.2.3rc1.tgz', '/some/path/to/path-without-version.tgz') # then self.assertEquals(expected_release, actual_release) @istest def compute_revision(self): # when with patch('swh.loader.tar.build._time_from_path', return_value='some-other-time'): actual_revision = build.compute_revision('/some/path') expected_revision = { - 'author_date': 'some-other-time', - 'author_offset': build.UTC_OFFSET, - 'committer_date': 'some-other-time', - 'committer_offset': build.UTC_OFFSET, - 'author_name': build.SWH_PERSON, - 'author_email': build.SWH_MAIL, - 'committer_name': build.SWH_PERSON, - 'committer_email': build.SWH_MAIL, + 'date': { + 'timestamp': 'some-other-time', + 'offset': build.UTC_OFFSET, + }, + 'committer_date': { + 'timestamp': 'some-other-time', + 'offset': build.UTC_OFFSET, + }, + 'author': build.SWH_PERSON, + 'committer': build.SWH_PERSON, 'type': build.REVISION_TYPE, 'message': build.REVISION_MESSAGE, } # then self.assertEquals(actual_revision, expected_revision) diff --git a/version.txt b/version.txt index bdf7927..80f6aea 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.17-0-gf590883 \ No newline at end of file +v0.0.18-0-g20a7308 \ No newline at end of file