Page MenuHomeSoftware Heritage

D3899.diff
No OneTemporary

D3899.diff

diff --git a/swh/lister/gitea/tests/test_tasks.py b/swh/lister/gitea/tests/test_tasks.py
--- a/swh/lister/gitea/tests/test_tasks.py
+++ b/swh/lister/gitea/tests/test_tasks.py
@@ -4,9 +4,12 @@
# See top-level LICENSE file for more information
from time import sleep
+
from celery.result import GroupResult
+from unittest.mock import patch, call
-from unittest.mock import patch
+from swh.lister.gitea.tasks import NBPAGES
+from swh.lister.utils import split_range
def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
@@ -57,13 +60,11 @@
@patch("swh.lister.gitea.tasks.GiteaLister")
def test_relister(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
+ total_pages = 85
# setup the mocked GiteaLister
lister.return_value = lister
lister.run.return_value = None
- lister.get_pages_information.return_value = (None, 85, None)
- lister.db_partition_indices.return_value = [
- (i, i + 9) for i in range(0, 80, 10)
- ] + [(80, 85)]
+ lister.get_pages_information.return_value = (None, total_pages, None)
res = swh_scheduler_celery_app.send_task("swh.lister.gitea.tasks.FullGiteaRelister")
assert res
@@ -92,25 +93,21 @@
lister.get_pages_information.assert_called_once_with()
# lister.run should have been called once per partition interval
- for i in range(8):
- # XXX inconsistent behavior: max_bound is EXCLUDED here
+ for min_bound, max_bound in split_range(total_pages, NBPAGES):
assert (
- dict(min_bound=10 * i, max_bound=10 * i + 10),
- ) in lister.run.call_args_list
- assert (dict(min_bound=80, max_bound=85),) in lister.run.call_args_list
+ call(min_bound=min_bound, max_bound=max_bound) in lister.run.call_args_list
+ )
@patch("swh.lister.gitea.tasks.GiteaLister")
def test_relister_instance(
lister, swh_scheduler_celery_app, swh_scheduler_celery_worker
):
+ total_pages = 85
# setup the mocked GiteaLister
lister.return_value = lister
lister.run.return_value = None
- lister.get_pages_information.return_value = (None, 85, None)
- lister.db_partition_indices.return_value = [
- (i, i + 9) for i in range(0, 80, 10)
- ] + [(80, 85)]
+ lister.get_pages_information.return_value = (None, total_pages, None)
res = swh_scheduler_celery_app.send_task(
"swh.lister.gitea.tasks.FullGiteaRelister",
@@ -142,9 +139,7 @@
lister.get_pages_information.assert_called_once_with()
# lister.run should have been called once per partition interval
- for i in range(8):
- # XXX inconsistent behavior: max_bound is EXCLUDED here
+ for min_bound, max_bound in split_range(total_pages, NBPAGES):
assert (
- dict(min_bound=10 * i, max_bound=10 * i + 10),
- ) in lister.run.call_args_list
- assert (dict(min_bound=80, max_bound=85),) in lister.run.call_args_list
+ call(min_bound=min_bound, max_bound=max_bound) in lister.run.call_args_list
+ )
diff --git a/swh/lister/gitlab/tests/test_tasks.py b/swh/lister/gitlab/tests/test_tasks.py
--- a/swh/lister/gitlab/tests/test_tasks.py
+++ b/swh/lister/gitlab/tests/test_tasks.py
@@ -4,9 +4,12 @@
# See top-level LICENSE file for more information
from time import sleep
+
from celery.result import GroupResult
+from unittest.mock import patch, call
-from unittest.mock import patch
+from swh.lister.gitea.tasks import NBPAGES
+from swh.lister.utils import split_range
def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
@@ -57,13 +60,11 @@
@patch("swh.lister.gitlab.tasks.GitLabLister")
def test_relister(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
+ total_pages = 85
# setup the mocked GitlabLister
lister.return_value = lister
lister.run.return_value = None
- lister.get_pages_information.return_value = (None, 85, None)
- lister.db_partition_indices.return_value = [
- (i, i + 9) for i in range(0, 80, 10)
- ] + [(80, 85)]
+ lister.get_pages_information.return_value = (None, total_pages, None)
res = swh_scheduler_celery_app.send_task(
"swh.lister.gitlab.tasks.FullGitLabRelister"
@@ -94,25 +95,21 @@
lister.get_pages_information.assert_called_once_with()
# lister.run should have been called once per partition interval
- for i in range(8):
- # XXX inconsistent behavior: max_bound is EXCLUDED here
+ for min_bound, max_bound in split_range(total_pages, NBPAGES):
assert (
- dict(min_bound=10 * i, max_bound=10 * i + 10),
- ) in lister.run.call_args_list
- assert (dict(min_bound=80, max_bound=85),) in lister.run.call_args_list
+ call(min_bound=min_bound, max_bound=max_bound) in lister.run.call_args_list
+ )
@patch("swh.lister.gitlab.tasks.GitLabLister")
def test_relister_instance(
lister, swh_scheduler_celery_app, swh_scheduler_celery_worker
):
+ total_pages = 85
# setup the mocked GitlabLister
lister.return_value = lister
lister.run.return_value = None
- lister.get_pages_information.return_value = (None, 85, None)
- lister.db_partition_indices.return_value = [
- (i, i + 9) for i in range(0, 80, 10)
- ] + [(80, 85)]
+ lister.get_pages_information.return_value = (None, total_pages, None)
res = swh_scheduler_celery_app.send_task(
"swh.lister.gitlab.tasks.FullGitLabRelister",
@@ -144,9 +141,7 @@
lister.get_pages_information.assert_called_once_with()
# lister.run should have been called once per partition interval
- for i in range(8):
- # XXX inconsistent behavior: max_bound is EXCLUDED here
+ for min_bound, max_bound in split_range(total_pages, NBPAGES):
assert (
- dict(min_bound=10 * i, max_bound=10 * i + 10),
- ) in lister.run.call_args_list
- assert (dict(min_bound=80, max_bound=85),) in lister.run.call_args_list
+ call(min_bound=min_bound, max_bound=max_bound) in lister.run.call_args_list
+ )
diff --git a/swh/lister/tests/test_utils.py b/swh/lister/tests/test_utils.py
--- a/swh/lister/tests/test_utils.py
+++ b/swh/lister/tests/test_utils.py
@@ -11,7 +11,12 @@
@pytest.mark.parametrize(
"total_pages,nb_pages,expected_ranges",
- [(14, 5, [(0, 5), (5, 10), (10, 14)]), (19, 10, [(0, 10), (10, 19)])],
+ [
+ (14, 5, [(0, 4), (5, 9), (10, 14)]),
+ (19, 10, [(0, 9), (10, 19)]),
+ (20, 3, [(0, 2), (3, 5), (6, 8), (9, 11), (12, 14), (15, 17), (18, 20)]),
+ (21, 3, [(0, 2), (3, 5), (6, 8), (9, 11), (12, 14), (15, 17), (18, 21),],),
+ ],
)
def test_split_range(total_pages, nb_pages, expected_ranges):
actual_ranges = list(utils.split_range(total_pages, nb_pages))
diff --git a/swh/lister/utils.py b/swh/lister/utils.py
--- a/swh/lister/utils.py
+++ b/swh/lister/utils.py
@@ -1,13 +1,28 @@
-# Copyright (C) 2018 the Software Heritage developers
+# Copyright (C) 2018-2020 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from typing import Iterator, Tuple
-def split_range(total_pages, nb_pages):
+
+def split_range(total_pages: int, nb_pages: int) -> Iterator[Tuple[int, int]]:
+ """Split `total_pages` into mostly `nb_pages` ranges. In some cases, the last range can
+ have one more element.
+
+ >>> split_range(19, 10)
+ [(0, 9), (10, 19)]
+
+ >>> split_range(20, 3)
+ [(0, 2), (3, 5), (6, 8), (9, 11), (12, 14), (15, 17), (18, 20)]
+
+ >>> split_range(21, 3)
+ [(0, 2), (3, 5), (6, 8), (9, 11), (12, 14), (15, 17), (18, 21)]
+
+ """
prev_index = None
for index in range(0, total_pages, nb_pages):
if index is not None and prev_index is not None:
- yield prev_index, index
+ yield prev_index, index - 1
prev_index = index
if index != total_pages:

File Metadata

Mime Type
text/plain
Expires
Tue, Dec 17, 8:02 PM (2 d, 17 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224304

Event Timeline