diff --git a/PKG-INFO b/PKG-INFO index ec11c59..cdba6ca 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,93 +1,93 @@ Metadata-Version: 2.1 Name: swh.core -Version: 0.3.0 +Version: 0.3.1 Summary: Software Heritage core utilities Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-core Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-core/ Description: swh-core ======== core library for swh's modules: - config parser - hash computations - serialization - logging mechanism - database connection - http-based RPC client/server Development ----------- We strongly recommend you to use a [virtualenv][1] if you want to run tests or hack the code. To set up your development environment: ``` (swh) user@host:~/swh-environment/swh-core$ pip install -e .[testing] ``` This will install every Python package needed to run this package's tests. Unit tests can be executed using [pytest][2] or [tox][3]. ``` (swh) user@host:~/swh-environment/swh-core$ pytest ============================== test session starts ============================== platform linux -- Python 3.7.3, pytest-3.10.1, py-1.8.0, pluggy-0.12.0 hypothesis profile 'default' -> database=DirectoryBasedExampleDatabase('/home/ddouard/src/swh-environment/swh-core/.hypothesis/examples') rootdir: /home/ddouard/src/swh-environment/swh-core, inifile: pytest.ini plugins: requests-mock-1.6.0, hypothesis-4.26.4, celery-4.3.0, postgresql-1.4.1 collected 89 items swh/core/api/tests/test_api.py .. [ 2%] swh/core/api/tests/test_async.py .... [ 6%] swh/core/api/tests/test_serializers.py ..... [ 12%] swh/core/db/tests/test_db.py .... [ 16%] swh/core/tests/test_cli.py ...... [ 23%] swh/core/tests/test_config.py .............. [ 39%] swh/core/tests/test_statsd.py ........................................... [ 87%] .... [ 92%] swh/core/tests/test_utils.py ....... [100%] ===================== 89 passed, 9 warnings in 6.94 seconds ===================== ``` Note: this git repository uses [pre-commit][4] hooks to ensure better and more consistent code. It should already be installed in your virtualenv (if not, just type `pip install pre-commit`). Make sure to activate it in your local copy of the git repository: ``` (swh) user@host:~/swh-environment/swh-core$ pre-commit install pre-commit installed at .git/hooks/pre-commit ``` Please read the [developer setup manual][5] for more information on how to hack on Software Heritage. [1]: https://virtualenv.pypa.io [2]: https://docs.pytest.org [3]: https://tox.readthedocs.io [4]: https://pre-commit.com [5]: https://docs.softwareheritage.org/devel/developer-setup.html Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing-core Provides-Extra: logging Provides-Extra: db Provides-Extra: testing-db Provides-Extra: http Provides-Extra: testing diff --git a/swh.core.egg-info/PKG-INFO b/swh.core.egg-info/PKG-INFO index ec11c59..cdba6ca 100644 --- a/swh.core.egg-info/PKG-INFO +++ b/swh.core.egg-info/PKG-INFO @@ -1,93 +1,93 @@ Metadata-Version: 2.1 Name: swh.core -Version: 0.3.0 +Version: 0.3.1 Summary: Software Heritage core utilities Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-core Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-core/ Description: swh-core ======== core library for swh's modules: - config parser - hash computations - serialization - logging mechanism - database connection - http-based RPC client/server Development ----------- We strongly recommend you to use a [virtualenv][1] if you want to run tests or hack the code. To set up your development environment: ``` (swh) user@host:~/swh-environment/swh-core$ pip install -e .[testing] ``` This will install every Python package needed to run this package's tests. Unit tests can be executed using [pytest][2] or [tox][3]. ``` (swh) user@host:~/swh-environment/swh-core$ pytest ============================== test session starts ============================== platform linux -- Python 3.7.3, pytest-3.10.1, py-1.8.0, pluggy-0.12.0 hypothesis profile 'default' -> database=DirectoryBasedExampleDatabase('/home/ddouard/src/swh-environment/swh-core/.hypothesis/examples') rootdir: /home/ddouard/src/swh-environment/swh-core, inifile: pytest.ini plugins: requests-mock-1.6.0, hypothesis-4.26.4, celery-4.3.0, postgresql-1.4.1 collected 89 items swh/core/api/tests/test_api.py .. [ 2%] swh/core/api/tests/test_async.py .... [ 6%] swh/core/api/tests/test_serializers.py ..... [ 12%] swh/core/db/tests/test_db.py .... [ 16%] swh/core/tests/test_cli.py ...... [ 23%] swh/core/tests/test_config.py .............. [ 39%] swh/core/tests/test_statsd.py ........................................... [ 87%] .... [ 92%] swh/core/tests/test_utils.py ....... [100%] ===================== 89 passed, 9 warnings in 6.94 seconds ===================== ``` Note: this git repository uses [pre-commit][4] hooks to ensure better and more consistent code. It should already be installed in your virtualenv (if not, just type `pip install pre-commit`). Make sure to activate it in your local copy of the git repository: ``` (swh) user@host:~/swh-environment/swh-core$ pre-commit install pre-commit installed at .git/hooks/pre-commit ``` Please read the [developer setup manual][5] for more information on how to hack on Software Heritage. [1]: https://virtualenv.pypa.io [2]: https://docs.pytest.org [3]: https://tox.readthedocs.io [4]: https://pre-commit.com [5]: https://docs.softwareheritage.org/devel/developer-setup.html Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing-core Provides-Extra: logging Provides-Extra: db Provides-Extra: testing-db Provides-Extra: http Provides-Extra: testing diff --git a/swh/core/collections.py b/swh/core/collections.py index 92fab40..ed7b869 100644 --- a/swh/core/collections.py +++ b/swh/core/collections.py @@ -1,62 +1,61 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import bisect -import collections import itertools from typing import Any, Callable, Generic, Iterator, List, Optional, Tuple, TypeVar SortedListItem = TypeVar("SortedListItem") SortedListKey = TypeVar("SortedListKey") -class SortedList(collections.UserList, Generic[SortedListKey, SortedListItem]): +class SortedList(Generic[SortedListKey, SortedListItem]): data: List[Tuple[SortedListKey, SortedListItem]] # https://github.com/python/mypy/issues/708 # key: Callable[[SortedListItem], SortedListKey] def __init__( self, data: List[SortedListItem] = None, key: Optional[Callable[[SortedListItem], SortedListKey]] = None, ): if key is None: def key(item): return item assert key is not None # for mypy - super().__init__(sorted((key(x), x) for x in data or [])) + self.data = sorted((key(x), x) for x in data or []) self.key: Callable[[SortedListItem], SortedListKey] = key def add(self, item: SortedListItem): k = self.key(item) bisect.insort(self.data, (k, item)) def __iter__(self) -> Iterator[SortedListItem]: for (k, item) in self.data: yield item def iter_from(self, start_key: Any) -> Iterator[SortedListItem]: """Returns an iterator over all the elements whose key is greater or equal to `start_key`. (This is an efficient equivalent to: `(x for x in L if key(x) >= start_key)`) """ from_index = bisect.bisect_left(self.data, (start_key,)) for (k, item) in itertools.islice(self.data, from_index, None): yield item def iter_after(self, start_key: Any) -> Iterator[SortedListItem]: """Same as iter_from, but using a strict inequality.""" it = self.iter_from(start_key) for item in it: if self.key(item) > start_key: yield item break yield from it diff --git a/swh/core/logger.py b/swh/core/logger.py index 1c88883..5243668 100644 --- a/swh/core/logger.py +++ b/swh/core/logger.py @@ -1,118 +1,133 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime import logging -from typing import Any, Generator, List, Tuple +from typing import Any, Dict, Generator, List, Tuple from systemd.journal import JournalHandler as _JournalHandler from systemd.journal import send try: from celery import current_task except ImportError: current_task = None EXTRA_LOGDATA_PREFIX = "swh_" +LOGGED_TASK_KWARGS = ("url", "instance") def db_level_of_py_level(lvl): """convert a log level of the logging module to a log level suitable for the logging Postgres DB """ return logging.getLevelName(lvl).lower() -def get_extra_data(record, task_args=True): - """Get the extra data to insert to the database from the logging record""" +def get_extra_data(record: logging.LogRecord) -> Dict[str, Any]: + """Get the extra data to send to the log handler from the logging record. + + This gets the following data: + - all fields in the record data starting with `EXTRA_LOGDATA_PREFIX` + - arguments to the logging call (which can either be a tuple, or a dict + if the arguments were named) + - if this is called within a celery task, the following data: + - the (uu)id of the task + - the name of the task + - any task keyword arguments named for values in `LOGGED_TASK_KWARGS` + """ log_data = record.__dict__ extra_data = { k[len(EXTRA_LOGDATA_PREFIX) :]: v for k, v in log_data.items() if k.startswith(EXTRA_LOGDATA_PREFIX) } args = log_data.get("args") if args: extra_data["logging_args"] = args # Retrieve Celery task info if current_task and current_task.request: extra_data["task"] = { "id": current_task.request.id, "name": current_task.name, } - if task_args: - extra_data["task"].update( - { - "kwargs": current_task.request.kwargs, - "args": current_task.request.args, - } - ) + + for task_arg in LOGGED_TASK_KWARGS: + if task_arg in current_task.request.kwargs: + try: + value = stringify(current_task.request.kwargs[task_arg]) + except Exception: + continue + + extra_data["task"][f"kwarg_{task_arg}"] = value return extra_data def flatten(data: Any, separator: str = "_") -> Generator[Tuple[str, Any], None, None]: """Flatten the data dictionary into a flat structure""" def inner_flatten( data: Any, prefix: List[str] ) -> Generator[Tuple[List[str], Any], None, None]: if isinstance(data, dict): if all(isinstance(key, str) for key in data): for key, value in data.items(): yield from inner_flatten(value, prefix + [key]) else: yield prefix, str(data) elif isinstance(data, (list, tuple)): for key, value in enumerate(data): yield from inner_flatten(value, prefix + [str(key)]) else: yield prefix, data for path, value in inner_flatten(data, []): yield separator.join(path), value -def stringify(value): +def stringify(value: Any) -> str: """Convert value to string""" if isinstance(value, datetime.datetime): return value.isoformat() return str(value) class JournalHandler(_JournalHandler): def emit(self, record): """Write `record` as a journal event. MESSAGE is taken from the message provided by the user, and PRIORITY, LOGGER, THREAD_NAME, CODE_{FILE,LINE,FUNC} fields are appended automatically. In addition, record.MESSAGE_ID will be used if present. + + This also records all the extra data fetched by `get_extra_data`. """ try: - extra_data = flatten(get_extra_data(record, task_args=False)) + extra_data = flatten(get_extra_data(record)) extra_data = { (EXTRA_LOGDATA_PREFIX + key).upper(): stringify(value) for key, value in extra_data } msg = self.format(record) pri = self.mapPriority(record.levelno) send( msg, PRIORITY=format(pri), LOGGER=record.name, THREAD_NAME=record.threadName, CODE_FILE=record.pathname, CODE_LINE=record.lineno, CODE_FUNC=record.funcName, **extra_data, ) except Exception: self.handleError(record) diff --git a/swh/core/tests/test_collections.py b/swh/core/tests/test_collections.py index 22efbc0..b2b4b21 100644 --- a/swh/core/tests/test_collections.py +++ b/swh/core/tests/test_collections.py @@ -1,71 +1,86 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from swh.core.collections import SortedList parametrize = pytest.mark.parametrize( "items", [ [1, 2, 3, 4, 5, 6, 10, 100], [10, 100, 6, 5, 4, 3, 2, 1], [10, 4, 5, 6, 1, 2, 3, 100], ], ) @parametrize def test_sorted_list_iter(items): list1 = SortedList() for item in items: list1.add(item) assert list(list1) == sorted(items) list2 = SortedList(items) assert list(list2) == sorted(items) @parametrize def test_sorted_list_iter__key(items): list1 = SortedList(key=lambda item: -item) for item in items: list1.add(item) assert list(list1) == list(reversed(sorted(items))) list2 = SortedList(items, key=lambda item: -item) assert list(list2) == list(reversed(sorted(items))) @parametrize def test_sorted_list_iter_from(items): list_ = SortedList(items) for split in items: expected = sorted(item for item in items if item >= split) assert list(list_.iter_from(split)) == expected, f"split: {split}" @parametrize def test_sorted_list_iter_from__key(items): list_ = SortedList(items, key=lambda item: -item) for split in items: expected = reversed(sorted(item for item in items if item <= split)) assert list(list_.iter_from(-split)) == list(expected), f"split: {split}" @parametrize def test_sorted_list_iter_after(items): list_ = SortedList(items) for split in items: expected = sorted(item for item in items if item > split) assert list(list_.iter_after(split)) == expected, f"split: {split}" @parametrize def test_sorted_list_iter_after__key(items): list_ = SortedList(items, key=lambda item: -item) for split in items: expected = reversed(sorted(item for item in items if item < split)) assert list(list_.iter_after(-split)) == list(expected), f"split: {split}" + + +@parametrize +def test_contains(items): + list_ = SortedList() + for i in range(len(items)): + for item in items[0:i]: + assert item in list_ + for item in items[i:]: + assert item not in list_ + + list_.add(items[i]) + + for item in items: + assert item in list_