Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9125115
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
26 KB
Subscribers
None
View Options
diff --git a/PKG-INFO b/PKG-INFO
index 6a5a665..69d55c8 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,10 +1,10 @@
Metadata-Version: 1.0
Name: swh.loader.tar
-Version: 0.0.31
+Version: 0.0.32
Summary: Software Heritage Tarball Loader
Home-page: https://forge.softwareheritage.org/diffusion/DLDTAR
Author: Software Heritage developers
Author-email: swh-devel@inria.fr
License: UNKNOWN
Description: UNKNOWN
Platform: UNKNOWN
diff --git a/debian/control b/debian/control
index 21ce26e..f639d03 100644
--- a/debian/control
+++ b/debian/control
@@ -1,25 +1,25 @@
Source: swh-loader-tar
Maintainer: Software Heritage developers <swh-devel@inria.fr>
Section: python
Priority: optional
Build-Depends: debhelper (>= 9),
dh-python (>= 2),
python3-all,
python3-nose,
python3-setuptools,
- python3-swh.core (>= 0.0.14~),
+ python3-swh.core (>= 0.0.36~),
python3-swh.model (>= 0.0.15~),
python3-swh.scheduler (>= 0.0.14~),
python3-swh.storage (>= 0.0.83~),
python3-swh.loader.dir (>= 0.0.30~),
python3-vcversioner
Standards-Version: 3.9.6
Homepage: https://forge.softwareheritage.org/diffusion/DLDTAR/
Package: python3-swh.loader.tar
Architecture: all
-Depends: python3-swh.core (>= 0.0.14~), python3-swh.storage (>= 0.0.83~),
+Depends: python3-swh.core (>= 0.0.36~), python3-swh.storage (>= 0.0.83~),
python3-swh.loader.dir (>= 0.0.30~), python3-swh.scheduler (>= 0.0.14~),
${misc:Depends},
${python3:Depends}
Description: Software Heritage Tarball Loader
diff --git a/requirements-swh.txt b/requirements-swh.txt
index 85dfb89..649e66d 100644
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,5 +1,5 @@
-swh.core >= 0.0.14
+swh.core >= 0.0.36
swh.model >= 0.0.15
swh.scheduler >= 0.0.14
swh.storage >= 0.0.83
swh.loader.dir >= 0.0.30
diff --git a/swh.loader.tar.egg-info/PKG-INFO b/swh.loader.tar.egg-info/PKG-INFO
index 6a5a665..69d55c8 100644
--- a/swh.loader.tar.egg-info/PKG-INFO
+++ b/swh.loader.tar.egg-info/PKG-INFO
@@ -1,10 +1,10 @@
Metadata-Version: 1.0
Name: swh.loader.tar
-Version: 0.0.31
+Version: 0.0.32
Summary: Software Heritage Tarball Loader
Home-page: https://forge.softwareheritage.org/diffusion/DLDTAR
Author: Software Heritage developers
Author-email: swh-devel@inria.fr
License: UNKNOWN
Description: UNKNOWN
Platform: UNKNOWN
diff --git a/swh.loader.tar.egg-info/SOURCES.txt b/swh.loader.tar.egg-info/SOURCES.txt
index 4ed34f6..4c71250 100644
--- a/swh.loader.tar.egg-info/SOURCES.txt
+++ b/swh.loader.tar.egg-info/SOURCES.txt
@@ -1,43 +1,42 @@
.gitignore
AUTHORS
LICENSE
MANIFEST.in
Makefile
README
requirements-swh.txt
requirements.txt
setup.py
version.txt
debian/changelog
debian/compat
debian/control
debian/copyright
debian/rules
debian/source/format
docs/.gitignore
docs/Makefile
docs/conf.py
docs/index.rst
docs/_static/.placeholder
docs/_templates/.placeholder
resources/producer/tar-gnu.yml
resources/producer/tar-old-gnu.yml
swh/__init__.py
swh.loader.tar.egg-info/PKG-INFO
swh.loader.tar.egg-info/SOURCES.txt
swh.loader.tar.egg-info/dependency_links.txt
swh.loader.tar.egg-info/requires.txt
swh.loader.tar.egg-info/top_level.txt
swh/loader/__init__.py
swh/loader/tar/__init__.py
swh/loader/tar/build.py
swh/loader/tar/db.py
swh/loader/tar/file.py
swh/loader/tar/loader.py
swh/loader/tar/producer.py
-swh/loader/tar/tarball.py
swh/loader/tar/tasks.py
swh/loader/tar/utils.py
swh/loader/tar/tests/test_build.py
swh/loader/tar/tests/test_loader.py
swh/loader/tar/tests/test_utils.py
\ No newline at end of file
diff --git a/swh.loader.tar.egg-info/requires.txt b/swh.loader.tar.egg-info/requires.txt
index 5b0b80f..e0ec4d4 100644
--- a/swh.loader.tar.egg-info/requires.txt
+++ b/swh.loader.tar.egg-info/requires.txt
@@ -1,9 +1,9 @@
click
python-dateutil
retrying
-swh.core>=0.0.14
+swh.core>=0.0.36
swh.loader.dir>=0.0.30
swh.model>=0.0.15
swh.scheduler>=0.0.14
swh.storage>=0.0.83
vcversioner
diff --git a/swh/loader/tar/build.py b/swh/loader/tar/build.py
index c5ff02d..47e5415 100755
--- a/swh/loader/tar/build.py
+++ b/swh/loader/tar/build.py
@@ -1,116 +1,116 @@
# Copyright (C) 2015-2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
-from swh.loader.tar import utils
+from swh.core import utils
# Static setup
EPOCH = 0
UTC_OFFSET = 0
SWH_PERSON = {
'name': 'Software Heritage',
'fullname': 'Software Heritage',
'email': 'robot@softwareheritage.org'
}
REVISION_MESSAGE = 'synthetic revision message'
REVISION_TYPE = 'tar'
def compute_origin(url_scheme, url_type, root_dirpath, tarpath):
"""Compute the origin.
Args:
- url_scheme: scheme to build the origin's url
- url_type: origin's type
- root_dirpath: the top level root directory path
- tarpath: file's absolute path
Returns:
Dictionary origin with keys:
- url: origin's url
- type: origin's type
"""
relative_path = utils.commonname(root_dirpath, tarpath)
return {
'url': ''.join([url_scheme,
os.path.dirname(relative_path)]),
'type': url_type,
}
def compute_occurrence(tarpath):
"""Compute the occurrence using the tarpath's ctime.
Args:
tarpath: file's path
Returns:
Occurrence dictionary.
"""
return {
'branch': os.path.basename(tarpath),
}
def _time_from_path(tarpath):
"""Compute the modification time from the tarpath.
Args:
tarpath (str|bytes): Full path to the archive to extract the
date from.
Returns:
dict representing a timestamp with keys seconds and microseconds keys.
"""
mtime = os.lstat(tarpath).st_mtime
if isinstance(mtime, float):
normalized_time = list(map(int, str(mtime).split('.')))
else: # assuming int
normalized_time = [mtime, 0]
return {
'seconds': normalized_time[0],
'microseconds': normalized_time[1]
}
def compute_revision(tarpath):
"""Compute a revision.
Args:
tarpath: absolute path to the tarball
Returns:
Revision as dict:
- date (dict): the modification timestamp as returned by
_time_from_path function
- committer_date: the modification timestamp as returned by
_time_from_path function
- author: cf. SWH_PERSON
- committer: cf. SWH_PERSON
- type: cf. REVISION_TYPE
- message: cf. REVISION_MESSAGE
"""
ts = _time_from_path(tarpath)
return {
'date': {
'timestamp': ts,
'offset': UTC_OFFSET,
},
'committer_date': {
'timestamp': ts,
'offset': UTC_OFFSET,
},
'author': SWH_PERSON,
'committer': SWH_PERSON,
'type': REVISION_TYPE,
'message': REVISION_MESSAGE,
}
diff --git a/swh/loader/tar/file.py b/swh/loader/tar/file.py
index af7fa91..57fd6b5 100644
--- a/swh/loader/tar/file.py
+++ b/swh/loader/tar/file.py
@@ -1,89 +1,90 @@
-# Copyright (C) 2015 The Software Heritage developers
+# Copyright (C) 2015-2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import itertools
import os
-from swh.loader.tar import tarball, utils
+from swh.core import tarball
+from swh.loader.tar import utils
def archives_from_dir(path):
"""Given a path to a directory, walk such directory and yield tuple of
tarpath, fname.
Args:
path: top level directory
Returns:
Generator of tuple tarpath, filename with tarpath a tarball.
"""
for dirpath, dirnames, filenames in os.walk(path):
for fname in filenames:
tarpath = os.path.join(dirpath, fname)
if not os.path.exists(tarpath):
continue
if tarball.is_tarball(tarpath):
yield tarpath, fname
def archives_from_file(mirror_file):
"""Given a path to a file containing one tarball per line, yield a tuple of
tarpath, fname.
Args:
mirror_file: path to the file containing list of tarpath.
Returns:
Generator of tuple tarpath, filename with tarpath a tarball.
"""
with open(mirror_file, 'r') as f:
for tarpath in f.readlines():
tarpath = tarpath.strip()
if not os.path.exists(tarpath):
print('WARN: %s does not exist. Skipped.' % tarpath)
continue
if tarball.is_tarball(tarpath):
yield tarpath, os.path.basename(tarpath)
def archives_from(path):
"""From path, list tuple of tarpath, fname.
Args:
path: top directory to list archives from or custom file format.
Returns:
Generator of tuple tarpath, filename with tarpath a tarball.
"""
if os.path.isfile(path):
yield from archives_from_file(path)
elif os.path.isdir(path):
yield from archives_from_dir(path)
else:
raise ValueError(
'Input incorrect, %s must be a file or a directory.' % path)
def random_archives_from(path, block, limit=None):
"""Randomize by size block the archives.
Returns:
Generator of randomized tuple tarpath, filename with tarpath a tarball.
"""
random_archives = utils.random_blocks(archives_from(path),
block,
fillvalue=(None, None))
if limit:
random_archives = itertools.islice(random_archives, limit)
for tarpath, fname in ((t, f) for t, f in random_archives if t and f):
yield tarpath, fname
diff --git a/swh/loader/tar/loader.py b/swh/loader/tar/loader.py
index 9a01379..fb205e8 100644
--- a/swh/loader/tar/loader.py
+++ b/swh/loader/tar/loader.py
@@ -1,106 +1,107 @@
# Copyright (C) 2015-2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import tempfile
import shutil
+from swh.core import tarball
from swh.loader.core.loader import SWHLoader
from swh.loader.dir import loader
-from swh.loader.tar import tarball, utils
+from swh.loader.tar import utils
from swh.model import hashutil
class TarLoader(loader.DirLoader):
"""Tarball loader implementation.
This is a subclass of the :class:DirLoader as the main goal of
this class is to first uncompress a tarball, then provide the
uncompressed directory/tree to be loaded by the DirLoader.
This will:
- creates an origin (if it does not exist)
- creates a fetch_history entry
- creates an origin_visit
- uncompress locally the tarball in a temporary location
- process the content of the tarballs to persist on swh storage
- clean up the temporary location
- write an entry in fetch_history to mark the loading tarball end (success
or failure)
"""
CONFIG_BASE_FILENAME = 'loader/tar'
ADDITIONAL_CONFIG = {
'extraction_dir': ('string', '/tmp')
}
def __init__(self, logging_class='swh.loader.tar.TarLoader', config=None):
super().__init__(logging_class=logging_class, config=config)
def load(self, *, tar_path, origin, visit_date, revision, occurrences):
"""Load a tarball in `tarpath` in the Software Heritage Archive.
Args:
tar_path: tarball to import
origin (dict): an origin dictionary as returned by
:func:`swh.storage.storage.Storage.origin_get_one`
visit_date (str): the date the origin was visited (as an
isoformatted string)
revision (dict): a revision as passed to
:func:`swh.storage.storage.Storage.revision_add`, excluding the
`id` and `directory` keys (computed from the directory)
occurrences (list of dicts): the occurrences to create in the
generated origin visit. Each dict contains a 'branch' key with
the branch name as value.
"""
# Shortcut super() as we use different arguments than the DirLoader.
SWHLoader.load(self, tar_path=tar_path, origin=origin,
visit_date=visit_date, revision=revision,
occurrences=occurrences)
def prepare(self, *, tar_path, origin, visit_date, revision, occurrences):
"""1. Uncompress the tarball in a temporary directory.
2. Compute some metadata to update the revision.
"""
if 'type' not in origin: # let the type flow if present
origin['type'] = 'tar'
# Prepare the extraction path
extraction_dir = self.config['extraction_dir']
os.makedirs(extraction_dir, 0o755, exist_ok=True)
dir_path = tempfile.mkdtemp(prefix='swh.loader.tar-',
dir=extraction_dir)
# add checksums in revision
self.log.info('Uncompress %s to %s' % (tar_path, dir_path))
nature = tarball.uncompress(tar_path, dir_path)
if 'metadata' not in revision:
artifact = utils.convert_to_hex(hashutil.hash_path(tar_path))
artifact['name'] = os.path.basename(tar_path)
artifact['archive_type'] = nature
artifact['length'] = os.path.getsize(tar_path)
revision['metadata'] = {
'original_artifact': [artifact],
}
super().prepare(dir_path=dir_path,
origin=origin,
visit_date=visit_date,
revision=revision,
release=None,
occurrences=occurrences)
def cleanup(self):
"""Clean up temporary directory where we uncompress the tarball.
"""
dir_path = self.dir_path
if dir_path and os.path.exists(dir_path):
shutil.rmtree(dir_path)
diff --git a/swh/loader/tar/tarball.py b/swh/loader/tar/tarball.py
deleted file mode 100644
index d341180..0000000
--- a/swh/loader/tar/tarball.py
+++ /dev/null
@@ -1,227 +0,0 @@
-# Copyright (C) 2015-2017 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import os
-import stat
-import tarfile
-import zipfile
-
-from os.path import abspath, realpath, join, dirname
-from swh.loader.tar import utils
-
-
-def canonical_abspath(path):
- """Resolve all paths to an absolute and real one.
-
- Args:
- path: to resolve
-
- Returns:
- canonical absolute path to path
-
- """
- return realpath(abspath(path))
-
-
-def badpath(path, basepath):
- """Determine if a path is outside basepath.
-
- Args:
- path: a relative or absolute path of a file or directory
- basepath: the basepath path must be in
-
- Returns:
- True if path is outside basepath, false otherwise.
-
- """
- return not canonical_abspath(join(basepath, path)).startswith(basepath)
-
-
-def badlink(info, basepath):
- """Determine if the tarinfo member is outside basepath.
-
- Args:
- info: TarInfo member representing a symlink or hardlink of tar archive
- basepath: the basepath the info member must be in
-
- Returns:
- True if info is outside basepath, false otherwise.
-
- """
- tippath = canonical_abspath(join(basepath, dirname(info.name)))
- return badpath(info.linkname, basepath=tippath)
-
-
-def is_tarball(filepath):
- """Given a filepath, determine if it represents an archive.
-
- Args:
- filepath: file to test for tarball property
-
- Returns:
- Bool, True if it's a tarball, False otherwise
-
- """
- return tarfile.is_tarfile(filepath) or zipfile.is_zipfile(filepath)
-
-
-def _uncompress_zip(tarpath, dirpath):
- """Uncompress zip archive safely.
-
- As per zipfile is concerned
- (cf. note on https://docs.python.org/3.5/library/zipfile.html#zipfile.ZipFile.extract) # noqa
-
- Args:
- tarpath: path to the archive
- dirpath: directory to uncompress the archive to
-
- """
- with zipfile.ZipFile(tarpath) as z:
- z.extractall(path=dirpath)
-
-
-def _uncompress_tar(tarpath, dirpath):
- """Uncompress tarpath if the tarpath is safe.
- Safe means, no file will be uncompressed outside of dirpath.
-
- Args:
- tarpath: path to the archive
- dirpath: directory to uncompress the archive to
-
- Raises:
- ValueError when a member would be extracted outside dirpath.
-
- """
- def safemembers(tarpath, members, basepath):
- """Given a list of archive members, yield the members (directory,
- file, hard-link) that stays in bounds with basepath. Note
- that symbolic link are authorized to point outside the
- basepath though.
-
- Args:
- tarpath: Name of the tarball
- members: Archive members for such tarball
- basepath: the basepath sandbox
-
- Yields:
- Safe TarInfo member
-
- Raises:
- ValueError when a member would be extracted outside basepath
-
- """
- errormsg = 'Archive {} blocked. Illegal path to %s %s'.format(tarpath)
-
- for finfo in members:
- if finfo.isdir() and badpath(finfo.name, basepath):
- raise ValueError(errormsg % ('directory', finfo.name))
- elif finfo.isfile() and badpath(finfo.name, basepath):
- raise ValueError(errormsg % ('file', finfo.name))
- elif finfo.islnk() and badlink(finfo, basepath):
- raise ValueError(errormsg % ('hard-link', finfo.linkname))
- # Authorize symlinks to point outside basepath
- # elif finfo.issym() and badlink(finfo, basepath):
- # raise ValueError(errormsg % ('symlink', finfo.linkname))
- else:
- yield finfo
-
- with tarfile.open(tarpath) as t:
- members = t.getmembers()
- t.extractall(path=dirpath,
- members=safemembers(tarpath, members, dirpath))
-
-
-def uncompress(tarpath, dest):
- """Uncompress tarpath to dest folder if tarball is supported and safe.
- Safe means, no file will be uncompressed outside of dirpath.
-
- Note that this fixes permissions after successfully
- uncompressing the archive.
-
- Args:
- tarpath: path to tarball to uncompress
- dest: the destination folder where to uncompress the tarball
-
- Returns:
- The nature of the tarball, zip or tar.
-
- Raises:
- ValueError when:
- - an archive member would be extracted outside basepath
- - the archive is not supported
-
- """
- if tarfile.is_tarfile(tarpath):
- _uncompress_tar(tarpath, dest)
- nature = 'tar'
- elif zipfile.is_zipfile(tarpath):
- _uncompress_zip(tarpath, dest)
- nature = 'zip'
- else:
- raise ValueError('File %s is not a supported archive.' % tarpath)
-
- # Fix permissions
- for dirpath, _, fnames in os.walk(dest):
- os.chmod(dirpath, 0o755)
- for fname in fnames:
- fpath = os.path.join(dirpath, fname)
- if not os.path.islink(fpath):
- fpath_exec = os.stat(fpath).st_mode & stat.S_IXUSR
- if not fpath_exec:
- os.chmod(fpath, 0o644)
-
- return nature
-
-
-def ls(rootdir):
- """Generator of filepath, filename from rootdir.
-
- """
- for dirpath, dirnames, fnames in os.walk(rootdir):
- for fname in (dirnames+fnames):
- fpath = os.path.join(dirpath, fname)
- fname = utils.commonname(rootdir, fpath)
- yield fpath, fname
-
-
-def _compress_zip(tarpath, files):
- """Compress dirpath's content as tarpath.
-
- """
- with zipfile.ZipFile(tarpath, 'w') as z:
- for fpath, fname in files:
- z.write(fpath, arcname=fname)
-
-
-def _compress_tar(tarpath, files):
- """Compress dirpath's content as tarpath.
-
- """
- with tarfile.open(tarpath, 'w:bz2') as t:
- for fpath, fname in files:
- t.add(fpath, arcname=fname, recursive=False)
-
-
-def compress(tarpath, nature, dirpath_or_files):
- """Create a tarball tarpath with nature nature.
- The content of the tarball is either dirpath's content (if representing
- a directory path) or dirpath's iterable contents.
-
- Compress the directory dirpath's content to a tarball.
- The tarball being dumped at tarpath.
- The nature of the tarball is determined by the nature argument.
-
- """
- if isinstance(dirpath_or_files, str):
- files = ls(dirpath_or_files)
- else: # iterable of 'filepath, filename'
- files = dirpath_or_files
-
- if nature == 'zip':
- _compress_zip(tarpath, files)
- else:
- _compress_tar(tarpath, files)
-
- return tarpath
diff --git a/swh/loader/tar/tests/test_utils.py b/swh/loader/tar/tests/test_utils.py
index 38f222f..05b43fc 100644
--- a/swh/loader/tar/tests/test_utils.py
+++ b/swh/loader/tar/tests/test_utils.py
@@ -1,59 +1,45 @@
-# Copyright (C) 2015 The Software Heritage developers
+# Copyright (C) 2015-2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import unittest
from nose.tools import istest
from swh.loader.tar import utils
class TestUtils(unittest.TestCase):
- @istest
- def commonname(self):
- # when
- actual_commonname = utils.commonname('/some/where/to/',
- '/some/where/to/go/to')
- # then
- self.assertEquals('go/to', actual_commonname)
-
- # when
- actual_commonname2 = utils.commonname(b'/some/where/to/',
- b'/some/where/to/go/to')
- # then
- self.assertEquals(b'go/to', actual_commonname2)
-
@istest
def convert_to_hex(self):
# given
input_dict = {
'sha1_git': b'\xf6\xb7 \x8b+\xcd \x9fq5E\xe6\x03\xffg\x87\xd7\xb9D\xa1', # noqa
'sha1': b'\xf4O\xf0\xd4\xc0\xb0\xae\xca\xe4C\xab%\x10\xf7\x12h\x1e\x9f\xac\xeb', # noqa
'sha256': b'\xa8\xf9=\xf3\xfek\xa2$\xee\xc7\x1b\xc2\x83\xca\x96\xae8\xaf&\xab\x08\xfa\xb1\x13\xec(.s]\xf6Yb', # noqa
'length': 10,
} # noqa
expected_dict = {'sha1_git': 'f6b7208b2bcd209f713545e603ff6'
'787d7b944a1',
'sha1': 'f44ff0d4c0b0aecae443ab2510f712681e'
'9faceb',
'sha256': 'a8f93df3fe6ba224eec71bc283ca96ae3'
'8af26ab08fab113ec282e735df65962',
'length': 10}
# when
actual_dict = utils.convert_to_hex(input_dict)
# then
self.assertDictEqual(actual_dict, expected_dict)
@istest
def convert_to_hex_edge_cases(self):
# when
actual_dict = utils.convert_to_hex({})
# then
self.assertDictEqual(actual_dict, {})
self.assertIsNone(utils.convert_to_hex(None))
diff --git a/swh/loader/tar/utils.py b/swh/loader/tar/utils.py
index e0c9230..c5e9bac 100644
--- a/swh/loader/tar/utils.py
+++ b/swh/loader/tar/utils.py
@@ -1,81 +1,74 @@
# Copyright (C) 2015-2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import itertools
import random
from swh.model import hashutil
-def commonname(path0, path1, as_str=False):
- """Compute the commonname between the path0 and path1.
-
- """
- return path1.split(path0)[1]
-
-
def convert_to_hex(d):
"""Convert a flat dictionary with bytes in values to the same dictionary
with hex as values.
Args:
dict: flat dictionary with sha bytes in their values.
Returns:
Mirror dictionary with values as string hex.
"""
if not d:
return d
checksums = {}
for key, h in d.items():
if isinstance(h, bytes):
checksums[key] = hashutil.hash_to_hex(h)
else:
checksums[key] = h
return checksums
def grouper(iterable, n, fillvalue=None):
"""Collect data into fixed-length chunks or blocks.
Args:
iterable: an iterable
n: size of block
fillvalue: value to use for the last block
Returns:
fixed-length chunks of blocks as iterables
"""
args = [iter(iterable)] * n
return itertools.zip_longest(*args, fillvalue=fillvalue)
def random_blocks(iterable, block=100, fillvalue=None):
"""Given an iterable:
- slice the iterable in data set of block-sized elements
- randomized the data set
- yield each element
Args:
iterable: iterable of data
block: number of elements per block
fillvalue: a fillvalue for the last block if not enough values in
last block
Returns:
An iterable of randomized per block-size elements.
"""
count = 0
for iterable in grouper(iterable, block, fillvalue=fillvalue):
count += 1
l = list(iterable)
random.shuffle(l)
for e in l:
yield e
diff --git a/version.txt b/version.txt
index 58b808f..e86f201 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-v0.0.31-0-gecb555c
\ No newline at end of file
+v0.0.32-0-gc957097
\ No newline at end of file
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Jun 21, 8:05 PM (3 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3254855
Attached To
rDLDTAR Tarball Loader
Event Timeline
Log In to Comment