Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/debian/control b/debian/control
index 0bec5e7..d111162 100644
--- a/debian/control
+++ b/debian/control
@@ -1,24 +1,25 @@
Source: swh-loader-tar
Maintainer: Software Heritage developers <swh-devel@inria.fr>
Section: python
Priority: optional
Build-Depends: debhelper (>= 9),
dh-python,
python3-all,
python3-nose,
python3-setuptools,
python3-swh.core (>= 0.0.14~),
+ python3-swh.model (>= 0.0.13~),
python3-swh.scheduler,
python3-swh.storage (>= 0.0.76~),
python3-swh.loader.dir (>= 0.0.24~),
python3-vcversioner
Standards-Version: 3.9.6
Homepage: https://forge.softwareheritage.org/diffusion/DLDTAR/
Package: python3-swh.loader.tar
Architecture: all
Depends: python3-swh.core (>= 0.0.14~), python3-swh.storage (>= 0.0.76~),
python3-swh.loader.dir (>= 0.0.24~), python3-swh.scheduler,
${misc:Depends},
${python3:Depends}
Description: Software Heritage Tarball Loader
diff --git a/requirements-swh.txt b/requirements-swh.txt
index cd41b69..08432c5 100644
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,4 +1,5 @@
swh.core >= 0.0.14
+swh.model >= 0.0.13
swh.scheduler
swh.storage >= 0.0.76
swh.loader.dir >= 0.0.24
diff --git a/swh/loader/tar/loader.py b/swh/loader/tar/loader.py
index 7d320f4..9d2ac5d 100644
--- a/swh/loader/tar/loader.py
+++ b/swh/loader/tar/loader.py
@@ -1,100 +1,100 @@
# Copyright (C) 2015-2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import tempfile
import shutil
-from swh.core import hashutil
from swh.loader.dir import loader
from swh.loader.tar import tarball, utils
+from swh.model import hashutil
class TarLoader(loader.DirLoader):
"""A tarball loader:
- creates an origin if it does not exist
- creates a fetch_history entry
- creates an origin_visit
- uncompress locally the tarballs in a temporary location
- process the content of the tarballs to persist on swh storage
- clean up the temporary location
- write an entry in fetch_history to mark the loading tarball end
(success or failure)
Args:
- tarpath: path to the tarball to uncompress
- origin: Dictionary origin
- url: url origin we fetched
- type: type of the origin
- visit_date (str): To override the visit date
- revision: Dictionary of information needed, keys are:
- author_name: revision's author name
- author_email: revision's author email
- author_date: timestamp (e.g. 1444054085)
- author_offset: date offset e.g. -0220, +0100
- committer_name: revision's committer name
- committer_email: revision's committer email
- committer_date: timestamp
- committer_offset: date offset e.g. -0220, +0100
- type: type of revision dir, tar
- message: synthetic message for the revision
- occurrences: List of occurrence dictionary.
Information needed, keys are:
- branch: occurrence's branch name
- authority_id: authority id (e.g. 1 for swh)
- validity: validity date (e.g. 2015-01-01 00:00:00+00)
"""
CONFIG_BASE_FILENAME = 'loader/tar'
ADDITIONAL_CONFIG = {
'extraction_dir': ('string', '/tmp')
}
def __init__(self):
super().__init__(logging_class='swh.loader.tar.TarLoader')
def prepare(self, *args, **kwargs):
"""1. Uncompress the tarball in a temporary directory.
2. Compute some metadata to update the revision.
"""
tarpath, origin, visit_date, revision, occs = args
if 'type' not in origin: # let the type flow if present
origin['type'] = 'tar'
# Prepare the extraction path
extraction_dir = self.config['extraction_dir']
os.makedirs(extraction_dir, 0o755, exist_ok=True)
dir_path = tempfile.mkdtemp(prefix='swh.loader.tar-',
dir=extraction_dir)
# add checksums in revision
- artifact = utils.convert_to_hex(hashutil.hashfile(tarpath))
+ artifact = utils.convert_to_hex(hashutil.hash_path(tarpath))
artifact['name'] = os.path.basename(tarpath)
self.log.info('Uncompress %s to %s' % (tarpath, dir_path))
nature = tarball.uncompress(tarpath, dir_path)
artifact['archive_type'] = nature
artifact['length'] = os.path.getsize(tarpath)
revision['metadata'] = {
'original_artifact': [artifact],
}
self.dir_path = dir_path
super().prepare(dir_path, origin, visit_date, revision, None, occs)
def cleanup(self):
"""Clean up temporary directory where we uncompress the tarball.
"""
dir_path = self.dir_path
if dir_path and os.path.exists(dir_path):
shutil.rmtree(dir_path)
diff --git a/swh/loader/tar/utils.py b/swh/loader/tar/utils.py
index 67706d6..1edbdfc 100644
--- a/swh/loader/tar/utils.py
+++ b/swh/loader/tar/utils.py
@@ -1,78 +1,78 @@
# Copyright (C) 2015-2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import itertools
import random
-from swh.core import hashutil
+from swh.model import hashutil
def commonname(path0, path1, as_str=False):
"""Compute the commonname between the path0 and path1.
"""
return path1.split(path0)[1]
def convert_to_hex(d):
"""Convert a flat dictionary with bytes in values to the same dictionary
with hex as values.
Args:
dict: flat dictionary with sha bytes in their values.
Returns:
Mirror dictionary with values as string hex.
"""
if not d:
return d
checksums = {}
for key, h in d.items():
checksums[key] = hashutil.hash_to_hex(h)
return checksums
def grouper(iterable, n, fillvalue=None):
"""Collect data into fixed-length chunks or blocks.
Args:
iterable: an iterable
n: size of block
fillvalue: value to use for the last block
Returns:
fixed-length chunks of blocks as iterables
"""
args = [iter(iterable)] * n
return itertools.zip_longest(*args, fillvalue=fillvalue)
def random_blocks(iterable, block=100, fillvalue=None):
"""Given an iterable:
- slice the iterable in data set of block-sized elements
- randomized the data set
- yield each element
Args:
iterable: iterable of data
block: number of elements per block
fillvalue: a fillvalue for the last block if not enough values in
last block
Returns:
An iterable of randomized per block-size elements.
"""
count = 0
for iterable in grouper(iterable, block, fillvalue=fillvalue):
count += 1
l = list(iterable)
random.shuffle(l)
for e in l:
yield e

File Metadata

Mime Type
text/x-diff
Expires
Sat, Jun 21, 9:15 PM (4 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3344582

Event Timeline