diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py --- a/swh/scheduler/backend.py +++ b/swh/scheduler/backend.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2021 The Software Heritage developers +# Copyright (C) 2015-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -314,6 +314,7 @@ self, lister_id: Optional[UUID] = None, url: Optional[str] = None, + enabled: Optional[bool] = True, limit: int = 1000, page_token: Optional[ListedOriginPageToken] = None, db=None, @@ -334,6 +335,10 @@ query_filters.append("url = %s") query_params.append(url) + if enabled is not None: + query_filters.append("enabled = %s") + query_params.append(enabled) + if page_token is not None: query_filters.append("(lister_id, url) > %s") # the typeshed annotation for tuple() is too strict. diff --git a/swh/scheduler/interface.py b/swh/scheduler/interface.py --- a/swh/scheduler/interface.py +++ b/swh/scheduler/interface.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2021 The Software Heritage developers +# Copyright (C) 2015-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -385,14 +385,23 @@ self, lister_id: Optional[UUID] = None, url: Optional[str] = None, + enabled: Optional[bool] = True, limit: int = 1000, page_token: Optional[ListedOriginPageToken] = None, ) -> PaginatedListedOriginList: - """Get information on the listed origins matching either the `url` or - `lister_id`, or both arguments. + """Get information on listed origins, possibly filtered, in a paginated way. - Use the `limit` and `page_token` arguments for continuation. The next - page token, if any, is returned in the PaginatedListedOriginList object. + Args: + lister_id: if provided, return origins discovered with that lister + url: if provided, return origins matching that URL + enabled: If :const:`True` return only enabled origins, if :const:`False` + return only disabled origins, if :const:`None` return all origins. + limit: maximum number of origins per page + page_token: to get the next page of origins, is returned in the + :class:`PaginatedListedOriginList` object + + Returns: + A page of listed origins """ ... diff --git a/swh/scheduler/tests/conftest.py b/swh/scheduler/tests/conftest.py --- a/swh/scheduler/tests/conftest.py +++ b/swh/scheduler/tests/conftest.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2021 The Software Heritage developers +# Copyright (C) 2016-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -63,6 +63,14 @@ return sum(listed_origins_by_type.values(), []) +@pytest.fixture +def listed_origins_with_non_enabled(listed_origins) -> List[ListedOrigin]: + """Return a (fixed) set of listed origins""" + for i, origin in enumerate(listed_origins): + origin.enabled = i % 2 == 0 + return listed_origins + + @pytest.fixture def storage(swh_storage): """An instance of in-memory storage that gets injected diff --git a/swh/scheduler/tests/test_journal_client.py b/swh/scheduler/tests/test_journal_client.py --- a/swh/scheduler/tests/test_journal_client.py +++ b/swh/scheduler/tests/test_journal_client.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -1003,7 +1003,7 @@ ) # Now check that the origin in question is disabled - actual_page = swh_scheduler.get_listed_origins(url="bar") + actual_page = swh_scheduler.get_listed_origins(url="bar", enabled=False) assert len(actual_page.results) == 1 assert actual_page.next_page_token is None diff --git a/swh/scheduler/tests/test_scheduler.py b/swh/scheduler/tests/test_scheduler.py --- a/swh/scheduler/tests/test_scheduler.py +++ b/swh/scheduler/tests/test_scheduler.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2021 The Software Heritage developers +# Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -795,6 +795,38 @@ assert ret.next_page_token is None assert len(ret.results) == len(listed_origins) + def test_get_listed_origins_with_enabled_parameter( + self, swh_scheduler, listed_origins_with_non_enabled + ) -> None: + swh_scheduler.record_listed_origins(listed_origins_with_non_enabled) + + # get all enabled listed origins + ret = swh_scheduler.get_listed_origins( + enabled=True, limit=len(listed_origins_with_non_enabled) + 1 + ) + assert ret.next_page_token is None + assert len(ret.results) == len( + [lo for lo in listed_origins_with_non_enabled if lo.enabled] + ) + assert all([lo.enabled for lo in ret.results]) + + # get all disabled listed origins + ret = swh_scheduler.get_listed_origins( + enabled=False, limit=len(listed_origins_with_non_enabled) + 1 + ) + assert ret.next_page_token is None + assert len(ret.results) == len( + [lo for lo in listed_origins_with_non_enabled if not lo.enabled] + ) + assert all([not lo.enabled for lo in ret.results]) + + # get all listed origins + ret = swh_scheduler.get_listed_origins( + enabled=None, limit=len(listed_origins_with_non_enabled) + 1 + ) + assert ret.next_page_token is None + assert len(ret.results) == len(listed_origins_with_non_enabled) + def _grab_next_visits_setup(self, swh_scheduler, listed_origins_by_type): """Basic origins setup for scheduling policy tests""" visit_type = next(iter(listed_origins_by_type))