diff --git a/PKG-INFO b/PKG-INFO index 3ad5043..e8141e2 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,93 +1,93 @@ Metadata-Version: 2.1 Name: swh.core -Version: 0.2.1 +Version: 0.2.2 Summary: Software Heritage core utilities Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-core Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-core/ Description: swh-core ======== core library for swh's modules: - config parser - hash computations - serialization - logging mechanism - database connection - http-based RPC client/server Development ----------- We strongly recommend you to use a [virtualenv][1] if you want to run tests or hack the code. To set up your development environment: ``` (swh) user@host:~/swh-environment/swh-core$ pip install -e .[testing] ``` This will install every Python package needed to run this package's tests. Unit tests can be executed using [pytest][2] or [tox][3]. ``` (swh) user@host:~/swh-environment/swh-core$ pytest ============================== test session starts ============================== platform linux -- Python 3.7.3, pytest-3.10.1, py-1.8.0, pluggy-0.12.0 hypothesis profile 'default' -> database=DirectoryBasedExampleDatabase('/home/ddouard/src/swh-environment/swh-core/.hypothesis/examples') rootdir: /home/ddouard/src/swh-environment/swh-core, inifile: pytest.ini plugins: requests-mock-1.6.0, hypothesis-4.26.4, celery-4.3.0, postgresql-1.4.1 collected 89 items swh/core/api/tests/test_api.py .. [ 2%] swh/core/api/tests/test_async.py .... [ 6%] swh/core/api/tests/test_serializers.py ..... [ 12%] swh/core/db/tests/test_db.py .... [ 16%] swh/core/tests/test_cli.py ...... [ 23%] swh/core/tests/test_config.py .............. [ 39%] swh/core/tests/test_statsd.py ........................................... [ 87%] .... [ 92%] swh/core/tests/test_utils.py ....... [100%] ===================== 89 passed, 9 warnings in 6.94 seconds ===================== ``` Note: this git repository uses [pre-commit][4] hooks to ensure better and more consistent code. It should already be installed in your virtualenv (if not, just type `pip install pre-commit`). Make sure to activate it in your local copy of the git repository: ``` (swh) user@host:~/swh-environment/swh-core$ pre-commit install pre-commit installed at .git/hooks/pre-commit ``` Please read the [developer setup manual][5] for more information on how to hack on Software Heritage. [1]: https://virtualenv.pypa.io [2]: https://docs.pytest.org [3]: https://tox.readthedocs.io [4]: https://pre-commit.com [5]: https://docs.softwareheritage.org/devel/developer-setup.html Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing-core Provides-Extra: logging Provides-Extra: db Provides-Extra: testing-db Provides-Extra: http Provides-Extra: testing diff --git a/swh.core.egg-info/PKG-INFO b/swh.core.egg-info/PKG-INFO index 3ad5043..e8141e2 100644 --- a/swh.core.egg-info/PKG-INFO +++ b/swh.core.egg-info/PKG-INFO @@ -1,93 +1,93 @@ Metadata-Version: 2.1 Name: swh.core -Version: 0.2.1 +Version: 0.2.2 Summary: Software Heritage core utilities Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-core Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-core/ Description: swh-core ======== core library for swh's modules: - config parser - hash computations - serialization - logging mechanism - database connection - http-based RPC client/server Development ----------- We strongly recommend you to use a [virtualenv][1] if you want to run tests or hack the code. To set up your development environment: ``` (swh) user@host:~/swh-environment/swh-core$ pip install -e .[testing] ``` This will install every Python package needed to run this package's tests. Unit tests can be executed using [pytest][2] or [tox][3]. ``` (swh) user@host:~/swh-environment/swh-core$ pytest ============================== test session starts ============================== platform linux -- Python 3.7.3, pytest-3.10.1, py-1.8.0, pluggy-0.12.0 hypothesis profile 'default' -> database=DirectoryBasedExampleDatabase('/home/ddouard/src/swh-environment/swh-core/.hypothesis/examples') rootdir: /home/ddouard/src/swh-environment/swh-core, inifile: pytest.ini plugins: requests-mock-1.6.0, hypothesis-4.26.4, celery-4.3.0, postgresql-1.4.1 collected 89 items swh/core/api/tests/test_api.py .. [ 2%] swh/core/api/tests/test_async.py .... [ 6%] swh/core/api/tests/test_serializers.py ..... [ 12%] swh/core/db/tests/test_db.py .... [ 16%] swh/core/tests/test_cli.py ...... [ 23%] swh/core/tests/test_config.py .............. [ 39%] swh/core/tests/test_statsd.py ........................................... [ 87%] .... [ 92%] swh/core/tests/test_utils.py ....... [100%] ===================== 89 passed, 9 warnings in 6.94 seconds ===================== ``` Note: this git repository uses [pre-commit][4] hooks to ensure better and more consistent code. It should already be installed in your virtualenv (if not, just type `pip install pre-commit`). Make sure to activate it in your local copy of the git repository: ``` (swh) user@host:~/swh-environment/swh-core$ pre-commit install pre-commit installed at .git/hooks/pre-commit ``` Please read the [developer setup manual][5] for more information on how to hack on Software Heritage. [1]: https://virtualenv.pypa.io [2]: https://docs.pytest.org [3]: https://tox.readthedocs.io [4]: https://pre-commit.com [5]: https://docs.softwareheritage.org/devel/developer-setup.html Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing-core Provides-Extra: logging Provides-Extra: db Provides-Extra: testing-db Provides-Extra: http Provides-Extra: testing diff --git a/swh.core.egg-info/SOURCES.txt b/swh.core.egg-info/SOURCES.txt index 2b8418e..453ddfc 100644 --- a/swh.core.egg-info/SOURCES.txt +++ b/swh.core.egg-info/SOURCES.txt @@ -1,99 +1,100 @@ .gitignore .pre-commit-config.yaml AUTHORS CODE_OF_CONDUCT.md CONTRIBUTORS LICENSE MANIFEST.in Makefile README.md conftest.py mypy.ini pyproject.toml pytest.ini requirements-db.txt requirements-http.txt requirements-logging.txt requirements-swh.txt requirements-test-db.txt requirements-test.txt requirements.txt setup.cfg setup.py tox.ini version.txt docs/.gitignore docs/Makefile docs/cli.rst docs/conf.py docs/index.rst docs/_static/.placeholder docs/_templates/.placeholder swh/__init__.py swh.core.egg-info/PKG-INFO swh.core.egg-info/SOURCES.txt swh.core.egg-info/dependency_links.txt swh.core.egg-info/entry_points.txt swh.core.egg-info/requires.txt swh.core.egg-info/top_level.txt swh/core/__init__.py swh/core/api_async.py swh/core/config.py swh/core/logger.py swh/core/py.typed swh/core/pytest_plugin.py swh/core/sentry.py swh/core/statsd.py swh/core/tarball.py swh/core/utils.py swh/core/api/__init__.py swh/core/api/asynchronous.py swh/core/api/classes.py swh/core/api/gunicorn_config.py swh/core/api/negotiation.py swh/core/api/serializers.py swh/core/api/tests/__init__.py swh/core/api/tests/server_testing.py swh/core/api/tests/test_async.py +swh/core/api/tests/test_classes.py swh/core/api/tests/test_gunicorn.py swh/core/api/tests/test_rpc_client.py swh/core/api/tests/test_rpc_client_server.py swh/core/api/tests/test_rpc_server.py swh/core/api/tests/test_serializers.py swh/core/cli/__init__.py swh/core/cli/db.py swh/core/db/__init__.py swh/core/db/common.py swh/core/db/db_utils.py swh/core/db/tests/__init__.py swh/core/db/tests/conftest.py swh/core/db/tests/db_testing.py swh/core/db/tests/test_cli.py swh/core/db/tests/test_db.py swh/core/sql/log-schema.sql swh/core/tests/__init__.py swh/core/tests/test_cli.py swh/core/tests/test_config.py swh/core/tests/test_logger.py swh/core/tests/test_pytest_plugin.py swh/core/tests/test_statsd.py swh/core/tests/test_tarball.py swh/core/tests/test_utils.py swh/core/tests/data/archives/groff-1.02.tar.Z swh/core/tests/data/archives/hello.tar swh/core/tests/data/archives/hello.tar.bz2 swh/core/tests/data/archives/hello.tar.gz swh/core/tests/data/archives/hello.tar.lz swh/core/tests/data/archives/hello.tar.x swh/core/tests/data/archives/hello.zip swh/core/tests/data/http_example.com/something.json swh/core/tests/data/https_example.com/file.json swh/core/tests/data/https_example.com/file.json,name=doe,firstname=jane swh/core/tests/data/https_example.com/file.json_visit1 swh/core/tests/data/https_example.com/other.json swh/core/tests/data/https_forge.s.o/api_diffusion,attachments[uris]=1 swh/core/tests/data/https_www.reference.com/web,q=What+Is+an+Example+of+a+URL?,qo=contentPageRelatedSearch,o=600605,l=dir,sga=1 swh/core/tests/fixture/__init__.py swh/core/tests/fixture/conftest.py swh/core/tests/fixture/test_pytest_plugin.py swh/core/tests/fixture/data/https_example.com/file.json \ No newline at end of file diff --git a/swh/core/api/classes.py b/swh/core/api/classes.py index 28f5f69..0aefcae 100644 --- a/swh/core/api/classes.py +++ b/swh/core/api/classes.py @@ -1,25 +1,44 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from dataclasses import dataclass, field from typing import ( + Callable, Generic, + Iterable, List, Optional, TypeVar, ) TResult = TypeVar("TResult") TToken = TypeVar("TToken") @dataclass(eq=True) class PagedResult(Generic[TResult, TToken]): """Represents a page of results; with a token to get the next page""" results: List[TResult] = field(default_factory=list) next_page_token: Optional[TToken] = field(default=None) + + +def stream_results( + f: Callable[..., PagedResult[TResult, TToken]], *args, **kwargs +) -> Iterable[TResult]: + """Consume the paginated result and stream the page results + + """ + if "page_token" in kwargs: + raise TypeError('stream_results has no argument "page_token".') + page_token = None + while True: + page_result = f(*args, page_token=page_token, **kwargs) + yield from page_result.results + page_token = page_result.next_page_token + if page_token is None: + break diff --git a/swh/core/api/tests/test_classes.py b/swh/core/api/tests/test_classes.py new file mode 100644 index 0000000..d63d410 --- /dev/null +++ b/swh/core/api/tests/test_classes.py @@ -0,0 +1,60 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.core.api.classes import PagedResult as CorePagedResult, stream_results + +from typing import TypeVar + + +T = TypeVar("T") +TestPagedResult = CorePagedResult[T, bytes] + + +def test_stream_results_no_result(): + def paged_results(page_token) -> TestPagedResult: + return TestPagedResult(results=[], next_page_token=None) + + # only 1 call, no pagination + actual_data = stream_results(paged_results) + assert list(actual_data) == [] + + +def test_stream_results_no_pagination(): + input_data = [ + {"url": "something"}, + {"url": "something2"}, + ] + + def paged_results(page_token) -> TestPagedResult: + return TestPagedResult(results=input_data, next_page_token=None) + + # only 1 call, no pagination + actual_data = stream_results(paged_results) + assert list(actual_data) == input_data + + +def test_stream_results_pagination(): + input_data = [ + {"url": "something"}, + {"url": "something2"}, + ] + input_data2 = [ + {"url": "something3"}, + ] + input_data3 = [ + {"url": "something4"}, + ] + + def page_results2(page_token=None) -> TestPagedResult: + result_per_token = { + None: TestPagedResult(results=input_data, next_page_token=b"two"), + b"two": TestPagedResult(results=input_data2, next_page_token=b"three"), + b"three": TestPagedResult(results=input_data3, next_page_token=None), + } + return result_per_token[page_token] + + # multiple calls to solve the pagination calls + actual_data = stream_results(page_results2) + assert list(actual_data) == input_data + input_data2 + input_data3