diff --git a/swh/lister/core/indexing_lister.py b/swh/lister/core/indexing_lister.py --- a/swh/lister/core/indexing_lister.py +++ b/swh/lister/core/indexing_lister.py @@ -139,18 +139,22 @@ partition_width = (max_index - min_index) / n_partitions - partitions = [ - [ - format_bound(min_index + i * partition_width), - format_bound(min_index + (i+1) * partition_width), - ] for i in range(n_partitions) + # Generate n_partitions + 1 bounds for n_partitions partitons + bounds = [ + format_bound(min_index + i * partition_width) + for i in range(n_partitions + 1) ] + # Trim duplicate bounds + bounds.append(None) + bounds = [cur + for cur, next in zip(bounds[:-1], bounds[1:]) + if cur != next] + # Remove bounds for lowest and highest partition - partitions[0][0] = None - partitions[-1][1] = None + bounds[0] = bounds[-1] = None - return [tuple(partition) for partition in partitions] + return list(zip(bounds[:-1], bounds[1:])) def db_first_index(self): """Look in the db for the smallest indexable value diff --git a/swh/lister/core/tests/test_indexing_lister.py b/swh/lister/core/tests/test_indexing_lister.py --- a/swh/lister/core/tests/test_indexing_lister.py +++ b/swh/lister/core/tests/test_indexing_lister.py @@ -63,6 +63,19 @@ assert partitions[-1] == (9000, None) +def test_db_partition_indices_small_index_range(): + m = MockedIndexingListerDbPartitionIndices( + num_entries=5000, + first_index=0, + last_index=5, + ) + assert m + + partitions = m.db_partition_indices(100) + + assert partitions == [(None, 1), (1, 2), (2, 3), (3, 4), (4, None)] + + def test_db_partition_indices_date_indices(): # 24 hour delta first = datetime.datetime.fromisoformat('2019-11-01T00:00:00+00:00')