Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/swh/loader/tar/loader.py b/swh/loader/tar/loader.py
index 2192c2c..ab17f0b 100644
--- a/swh/loader/tar/loader.py
+++ b/swh/loader/tar/loader.py
@@ -1,114 +1,113 @@
# Copyright (C) 2015 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import logging
import os
import tempfile
import shutil
import sys
import traceback
from swh.loader.dir import loader
from swh.loader.tar import tarball
class TarLoader(loader.DirLoader):
"""A tarball loader.
"""
def __init__(self, config):
super().__init__(config)
self.log = logging.getLogger('swh.loader.tar.TarLoader')
def process(self, tarpath, origin, revision, release, occurrences):
"""Load a tarball in backend.
This will:
- persist the origin if it does not exist.
- write an entry in fetch_history to mark the loading tarball start
- uncompress locally the tarballs in a temporary location
- process the content of the tarballs to persist on swh storage
- clean up the temporary location
- write an entry in fetch_history to mark the loading tarball end
Args:
- tarpath: path to the tarball to uncompress
- origin: Dictionary origin
- url: url origin we fetched
- type: type of the origin
- revision: Dictionary of information needed, keys are:
- author_name: revision's author name
- author_email: revision's author email
- author_date: timestamp (e.g. 1444054085)
- author_offset: date offset e.g. -0220, +0100
- committer_name: revision's committer name
- committer_email: revision's committer email
- committer_date: timestamp
- committer_offset: date offset e.g. -0220, +0100
- type: type of revision dir, tar
- message: synthetic message for the revision
- release: Dictionary of information needed, keys are:
- name: release name
- date: release timestamp (e.g. 1444054085)
- offset: release date offset e.g. -0220, +0100
- author_name: release author's name
- author_email: release author's email
- comment: release's comment message
- occurrences: List of occurrence dictionary.
Information needed, keys are:
- branch: occurrence's branch name
- authority_id: authority id (e.g. 1 for swh)
- validity: validity date (e.g. 2015-01-01 00:00:00+00)
"""
if 'type' not in origin: # let the type flow if present
origin['type'] = 'tar'
origin['id'] = self.storage.origin_add_one(origin)
# Mark the start of the loading
fetch_history_id = self.open_fetch_history(origin['id'])
# Prepare the extraction path
extraction_dir = self.config['extraction_dir']
os.makedirs(extraction_dir, 0o755, exist_ok=True)
dir_path = tempfile.mkdtemp(prefix='swh.loader.tar-',
dir=extraction_dir)
# T62:
# - create tarball as content in storage
# - transit the information to the loader dir
# T22: add checksums in revision
# for edge cases (NotImplemented...)
result = {'status': False, 'stderr': ''}
try:
self.log.info('Uncompress %s to %s' % (tarpath, dir_path))
tarball.uncompress(tarpath, dir_path)
result = super().process(dir_path, origin, revision, release,
occurrences)
-
except:
e_info = sys.exc_info()
if not result['status']:
# Enrich the error message with the tarball
- result['stderr'] = 'reason: %s\ntrace: %s\n%s' % (
+ result['stderr'] = 'reason:%s\ntrace:%s\n%s' % (
e_info[1],
''.join(traceback.format_tb(e_info[2])),
result.get('stderr', ''))
raise
finally:
shutil.rmtree(dir_path)
if not result['status']:
- result['stderr'] = 'archive:%s\n%s' % (
+ result['stderr'] = 'archive:%s\nreason:%s' % (
tarpath,
result.get('stderr', ''))
# mark the end of the loading
self.close_fetch_history(fetch_history_id, result)

File Metadata

Mime Type
text/x-diff
Expires
Fri, Jul 4, 12:17 PM (2 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3245985

Event Timeline