diff --git a/debian/control b/debian/control index 8c64902..be018b9 100644 --- a/debian/control +++ b/debian/control @@ -1,30 +1,30 @@ Source: swh-loader-tar Maintainer: Software Heritage developers Section: python Priority: optional Build-Depends: debhelper (>= 9), dh-python (>= 2), python3-all, python3-nose, python3-setuptools, - python3-swh.core (>= 0.0.36~), + python3-swh.core (>= 0.0.46~), python3-swh.loader.core (>= 0.0.35~), python3-swh.loader.dir (>= 0.0.33~), python3-swh.model (>= 0.0.27~), python3-swh.scheduler (>= 0.0.14~), python3-swh.storage (>= 0.0.83~), python3-vcversioner Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/diffusion/DLDTAR/ Package: python3-swh.loader.tar Architecture: all -Depends: python3-swh.core (>= 0.0.36~), +Depends: python3-swh.core (>= 0.0.46~), python3-swh.loader.core (>= 0.0.35~), python3-swh.loader.dir (>= 0.0.33~), python3-swh.model (>= 0.0.27~), python3-swh.scheduler (>= 0.0.14~), python3-swh.storage (>= 0.0.83~), ${misc:Depends}, ${python3:Depends} Description: Software Heritage Tarball Loader diff --git a/requirements-swh.txt b/requirements-swh.txt index e1f2d27..7442ced 100644 --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,6 +1,6 @@ -swh.core >= 0.0.36 +swh.core >= 0.0.46 swh.model >= 0.0.27 swh.scheduler >= 0.0.14 swh.storage >= 0.0.83 swh.loader.core >= 0.0.35 swh.loader.dir >= 0.0.33 diff --git a/swh/loader/tar/utils.py b/swh/loader/tar/utils.py index 73a17d6..232832d 100644 --- a/swh/loader/tar/utils.py +++ b/swh/loader/tar/utils.py @@ -1,56 +1,37 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import random -import itertools - -def grouper(iterable, n, fillvalue=None): - """Collect data into fixed-length chunks or blocks. - - Args: - iterable (Iterable): an iterable - n (int): size of block to slice the iterable into - fillvalue (Optional[Something]): value to use as fill-in - values (typically for the last loop, the iterable might be - less than n elements). None by default but could be anything - relevant for the caller (e.g tuple of (None, None)) - - Returns: - fixed-length chunks of blocks as iterables - - """ - args = [iter(iterable)] * n - for _data in itertools.zip_longest(*args, fillvalue=fillvalue): - yield (d for d in _data if d is not fillvalue) +from swh.core.utils import grouper def random_blocks(iterable, block=100, fillvalue=None): """Given an iterable: - slice the iterable in data set of block-sized elements - randomized the block-sized elements - yield each element of that randomized block-sized - continue onto the next block-sized block Args: iterable (Iterable): an iterable block (int): number of elements per block fillvalue (Optional[Something]): value to use as fill-in values (typically for the last loop, the iterable might be less than n elements). None by default but could be anything relevant for the caller (e.g tuple of (None, None)) Yields: random elements per size of block """ count = 0 for iter_ in grouper(iterable, block, fillvalue=fillvalue): count += 1 lst = list(iter_) random.shuffle(lst) for e in lst: yield e