diff --git a/PKG-INFO b/PKG-INFO index e8141e2..51636f1 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,93 +1,93 @@ Metadata-Version: 2.1 Name: swh.core -Version: 0.2.2 +Version: 0.2.3 Summary: Software Heritage core utilities Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-core Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-core/ Description: swh-core ======== core library for swh's modules: - config parser - hash computations - serialization - logging mechanism - database connection - http-based RPC client/server Development ----------- We strongly recommend you to use a [virtualenv][1] if you want to run tests or hack the code. To set up your development environment: ``` (swh) user@host:~/swh-environment/swh-core$ pip install -e .[testing] ``` This will install every Python package needed to run this package's tests. Unit tests can be executed using [pytest][2] or [tox][3]. ``` (swh) user@host:~/swh-environment/swh-core$ pytest ============================== test session starts ============================== platform linux -- Python 3.7.3, pytest-3.10.1, py-1.8.0, pluggy-0.12.0 hypothesis profile 'default' -> database=DirectoryBasedExampleDatabase('/home/ddouard/src/swh-environment/swh-core/.hypothesis/examples') rootdir: /home/ddouard/src/swh-environment/swh-core, inifile: pytest.ini plugins: requests-mock-1.6.0, hypothesis-4.26.4, celery-4.3.0, postgresql-1.4.1 collected 89 items swh/core/api/tests/test_api.py .. [ 2%] swh/core/api/tests/test_async.py .... [ 6%] swh/core/api/tests/test_serializers.py ..... [ 12%] swh/core/db/tests/test_db.py .... [ 16%] swh/core/tests/test_cli.py ...... [ 23%] swh/core/tests/test_config.py .............. [ 39%] swh/core/tests/test_statsd.py ........................................... [ 87%] .... [ 92%] swh/core/tests/test_utils.py ....... [100%] ===================== 89 passed, 9 warnings in 6.94 seconds ===================== ``` Note: this git repository uses [pre-commit][4] hooks to ensure better and more consistent code. It should already be installed in your virtualenv (if not, just type `pip install pre-commit`). Make sure to activate it in your local copy of the git repository: ``` (swh) user@host:~/swh-environment/swh-core$ pre-commit install pre-commit installed at .git/hooks/pre-commit ``` Please read the [developer setup manual][5] for more information on how to hack on Software Heritage. [1]: https://virtualenv.pypa.io [2]: https://docs.pytest.org [3]: https://tox.readthedocs.io [4]: https://pre-commit.com [5]: https://docs.softwareheritage.org/devel/developer-setup.html Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing-core Provides-Extra: logging Provides-Extra: db Provides-Extra: testing-db Provides-Extra: http Provides-Extra: testing diff --git a/swh.core.egg-info/PKG-INFO b/swh.core.egg-info/PKG-INFO index e8141e2..51636f1 100644 --- a/swh.core.egg-info/PKG-INFO +++ b/swh.core.egg-info/PKG-INFO @@ -1,93 +1,93 @@ Metadata-Version: 2.1 Name: swh.core -Version: 0.2.2 +Version: 0.2.3 Summary: Software Heritage core utilities Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-core Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-core/ Description: swh-core ======== core library for swh's modules: - config parser - hash computations - serialization - logging mechanism - database connection - http-based RPC client/server Development ----------- We strongly recommend you to use a [virtualenv][1] if you want to run tests or hack the code. To set up your development environment: ``` (swh) user@host:~/swh-environment/swh-core$ pip install -e .[testing] ``` This will install every Python package needed to run this package's tests. Unit tests can be executed using [pytest][2] or [tox][3]. ``` (swh) user@host:~/swh-environment/swh-core$ pytest ============================== test session starts ============================== platform linux -- Python 3.7.3, pytest-3.10.1, py-1.8.0, pluggy-0.12.0 hypothesis profile 'default' -> database=DirectoryBasedExampleDatabase('/home/ddouard/src/swh-environment/swh-core/.hypothesis/examples') rootdir: /home/ddouard/src/swh-environment/swh-core, inifile: pytest.ini plugins: requests-mock-1.6.0, hypothesis-4.26.4, celery-4.3.0, postgresql-1.4.1 collected 89 items swh/core/api/tests/test_api.py .. [ 2%] swh/core/api/tests/test_async.py .... [ 6%] swh/core/api/tests/test_serializers.py ..... [ 12%] swh/core/db/tests/test_db.py .... [ 16%] swh/core/tests/test_cli.py ...... [ 23%] swh/core/tests/test_config.py .............. [ 39%] swh/core/tests/test_statsd.py ........................................... [ 87%] .... [ 92%] swh/core/tests/test_utils.py ....... [100%] ===================== 89 passed, 9 warnings in 6.94 seconds ===================== ``` Note: this git repository uses [pre-commit][4] hooks to ensure better and more consistent code. It should already be installed in your virtualenv (if not, just type `pip install pre-commit`). Make sure to activate it in your local copy of the git repository: ``` (swh) user@host:~/swh-environment/swh-core$ pre-commit install pre-commit installed at .git/hooks/pre-commit ``` Please read the [developer setup manual][5] for more information on how to hack on Software Heritage. [1]: https://virtualenv.pypa.io [2]: https://docs.pytest.org [3]: https://tox.readthedocs.io [4]: https://pre-commit.com [5]: https://docs.softwareheritage.org/devel/developer-setup.html Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing-core Provides-Extra: logging Provides-Extra: db Provides-Extra: testing-db Provides-Extra: http Provides-Extra: testing diff --git a/swh.core.egg-info/SOURCES.txt b/swh.core.egg-info/SOURCES.txt index 453ddfc..efe1f34 100644 --- a/swh.core.egg-info/SOURCES.txt +++ b/swh.core.egg-info/SOURCES.txt @@ -1,100 +1,102 @@ .gitignore .pre-commit-config.yaml AUTHORS CODE_OF_CONDUCT.md CONTRIBUTORS LICENSE MANIFEST.in Makefile README.md conftest.py mypy.ini pyproject.toml pytest.ini requirements-db.txt requirements-http.txt requirements-logging.txt requirements-swh.txt requirements-test-db.txt requirements-test.txt requirements.txt setup.cfg setup.py tox.ini version.txt docs/.gitignore docs/Makefile docs/cli.rst docs/conf.py docs/index.rst docs/_static/.placeholder docs/_templates/.placeholder swh/__init__.py swh.core.egg-info/PKG-INFO swh.core.egg-info/SOURCES.txt swh.core.egg-info/dependency_links.txt swh.core.egg-info/entry_points.txt swh.core.egg-info/requires.txt swh.core.egg-info/top_level.txt swh/core/__init__.py swh/core/api_async.py +swh/core/collections.py swh/core/config.py swh/core/logger.py swh/core/py.typed swh/core/pytest_plugin.py swh/core/sentry.py swh/core/statsd.py swh/core/tarball.py swh/core/utils.py swh/core/api/__init__.py swh/core/api/asynchronous.py swh/core/api/classes.py swh/core/api/gunicorn_config.py swh/core/api/negotiation.py swh/core/api/serializers.py swh/core/api/tests/__init__.py swh/core/api/tests/server_testing.py swh/core/api/tests/test_async.py swh/core/api/tests/test_classes.py swh/core/api/tests/test_gunicorn.py swh/core/api/tests/test_rpc_client.py swh/core/api/tests/test_rpc_client_server.py swh/core/api/tests/test_rpc_server.py swh/core/api/tests/test_serializers.py swh/core/cli/__init__.py swh/core/cli/db.py swh/core/db/__init__.py swh/core/db/common.py swh/core/db/db_utils.py swh/core/db/tests/__init__.py swh/core/db/tests/conftest.py swh/core/db/tests/db_testing.py swh/core/db/tests/test_cli.py swh/core/db/tests/test_db.py swh/core/sql/log-schema.sql swh/core/tests/__init__.py swh/core/tests/test_cli.py +swh/core/tests/test_collections.py swh/core/tests/test_config.py swh/core/tests/test_logger.py swh/core/tests/test_pytest_plugin.py swh/core/tests/test_statsd.py swh/core/tests/test_tarball.py swh/core/tests/test_utils.py swh/core/tests/data/archives/groff-1.02.tar.Z swh/core/tests/data/archives/hello.tar swh/core/tests/data/archives/hello.tar.bz2 swh/core/tests/data/archives/hello.tar.gz swh/core/tests/data/archives/hello.tar.lz swh/core/tests/data/archives/hello.tar.x swh/core/tests/data/archives/hello.zip swh/core/tests/data/http_example.com/something.json swh/core/tests/data/https_example.com/file.json swh/core/tests/data/https_example.com/file.json,name=doe,firstname=jane swh/core/tests/data/https_example.com/file.json_visit1 swh/core/tests/data/https_example.com/other.json swh/core/tests/data/https_forge.s.o/api_diffusion,attachments[uris]=1 swh/core/tests/data/https_www.reference.com/web,q=What+Is+an+Example+of+a+URL?,qo=contentPageRelatedSearch,o=600605,l=dir,sga=1 swh/core/tests/fixture/__init__.py swh/core/tests/fixture/conftest.py swh/core/tests/fixture/test_pytest_plugin.py swh/core/tests/fixture/data/https_example.com/file.json \ No newline at end of file diff --git a/swh/core/collections.py b/swh/core/collections.py new file mode 100644 index 0000000..92fab40 --- /dev/null +++ b/swh/core/collections.py @@ -0,0 +1,62 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import bisect +import collections +import itertools +from typing import Any, Callable, Generic, Iterator, List, Optional, Tuple, TypeVar + +SortedListItem = TypeVar("SortedListItem") +SortedListKey = TypeVar("SortedListKey") + + +class SortedList(collections.UserList, Generic[SortedListKey, SortedListItem]): + data: List[Tuple[SortedListKey, SortedListItem]] + + # https://github.com/python/mypy/issues/708 + # key: Callable[[SortedListItem], SortedListKey] + + def __init__( + self, + data: List[SortedListItem] = None, + key: Optional[Callable[[SortedListItem], SortedListKey]] = None, + ): + if key is None: + + def key(item): + return item + + assert key is not None # for mypy + super().__init__(sorted((key(x), x) for x in data or [])) + + self.key: Callable[[SortedListItem], SortedListKey] = key + + def add(self, item: SortedListItem): + k = self.key(item) + bisect.insort(self.data, (k, item)) + + def __iter__(self) -> Iterator[SortedListItem]: + for (k, item) in self.data: + yield item + + def iter_from(self, start_key: Any) -> Iterator[SortedListItem]: + """Returns an iterator over all the elements whose key is greater + or equal to `start_key`. + (This is an efficient equivalent to: + `(x for x in L if key(x) >= start_key)`) + """ + from_index = bisect.bisect_left(self.data, (start_key,)) + for (k, item) in itertools.islice(self.data, from_index, None): + yield item + + def iter_after(self, start_key: Any) -> Iterator[SortedListItem]: + """Same as iter_from, but using a strict inequality.""" + it = self.iter_from(start_key) + for item in it: + if self.key(item) > start_key: + yield item + break + + yield from it diff --git a/swh/core/tests/test_collections.py b/swh/core/tests/test_collections.py new file mode 100644 index 0000000..c40a121 --- /dev/null +++ b/swh/core/tests/test_collections.py @@ -0,0 +1,73 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest + + +from swh.core.collections import SortedList + + +parametrize = pytest.mark.parametrize( + "items", + [ + [1, 2, 3, 4, 5, 6, 10, 100], + [10, 100, 6, 5, 4, 3, 2, 1], + [10, 4, 5, 6, 1, 2, 3, 100], + ], +) + + +@parametrize +def test_sorted_list_iter(items): + list1 = SortedList() + for item in items: + list1.add(item) + assert list(list1) == sorted(items) + + list2 = SortedList(items) + assert list(list2) == sorted(items) + + +@parametrize +def test_sorted_list_iter__key(items): + list1 = SortedList(key=lambda item: -item) + for item in items: + list1.add(item) + assert list(list1) == list(reversed(sorted(items))) + + list2 = SortedList(items, key=lambda item: -item) + assert list(list2) == list(reversed(sorted(items))) + + +@parametrize +def test_sorted_list_iter_from(items): + list_ = SortedList(items) + for split in items: + expected = sorted(item for item in items if item >= split) + assert list(list_.iter_from(split)) == expected, f"split: {split}" + + +@parametrize +def test_sorted_list_iter_from__key(items): + list_ = SortedList(items, key=lambda item: -item) + for split in items: + expected = reversed(sorted(item for item in items if item <= split)) + assert list(list_.iter_from(-split)) == list(expected), f"split: {split}" + + +@parametrize +def test_sorted_list_iter_after(items): + list_ = SortedList(items) + for split in items: + expected = sorted(item for item in items if item > split) + assert list(list_.iter_after(split)) == expected, f"split: {split}" + + +@parametrize +def test_sorted_list_iter_after__key(items): + list_ = SortedList(items, key=lambda item: -item) + for split in items: + expected = reversed(sorted(item for item in items if item < split)) + assert list(list_.iter_after(-split)) == list(expected), f"split: {split}" diff --git a/swh/core/tests/test_tarball.py b/swh/core/tests/test_tarball.py index ab432e6..3c0449f 100644 --- a/swh/core/tests/test_tarball.py +++ b/swh/core/tests/test_tarball.py @@ -1,171 +1,223 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import pytest import shutil from swh.core import tarball @pytest.fixture def prepare_shutil_state(): """Reset any shutil modification in its current state """ import shutil registered_formats = [f[0] for f in shutil.get_unpack_formats()] for format_id in tarball.ADDITIONAL_ARCHIVE_FORMATS: name = format_id[0] if name in registered_formats: shutil.unregister_unpack_format(name) return shutil def test_compress_uncompress_zip(tmp_path): tocompress = tmp_path / "compressme" tocompress.mkdir() for i in range(10): fpath = tocompress / ("file%s.txt" % i) fpath.write_text("content of file %s" % i) zipfile = tmp_path / "archive.zip" tarball.compress(str(zipfile), "zip", str(tocompress)) destdir = tmp_path / "destdir" tarball.uncompress(str(zipfile), str(destdir)) lsdir = sorted(x.name for x in destdir.iterdir()) assert ["file%s.txt" % i for i in range(10)] == lsdir +@pytest.mark.xfail( + reason=( + "Python's zipfile library doesn't support Info-ZIP's " + "extension for file permissions." + ) +) +def test_compress_uncompress_zip_modes(tmp_path): + tocompress = tmp_path / "compressme" + tocompress.mkdir() + + fpath = tocompress / "text.txt" + fpath.write_text("echo foo") + fpath.chmod(0o644) + + fpath = tocompress / "executable.sh" + fpath.write_text("echo foo") + fpath.chmod(0o755) + + zipfile = tmp_path / "archive.zip" + tarball.compress(str(zipfile), "zip", str(tocompress)) + + destdir = tmp_path / "destdir" + tarball.uncompress(str(zipfile), str(destdir)) + + (executable_path, text_path) = sorted(destdir.iterdir()) + assert text_path.stat().st_mode == 0o100644 # succeeds, it's the default + assert executable_path.stat().st_mode == 0o100755 # fails + + def test_compress_uncompress_tar(tmp_path): tocompress = tmp_path / "compressme" tocompress.mkdir() for i in range(10): fpath = tocompress / ("file%s.txt" % i) fpath.write_text("content of file %s" % i) tarfile = tmp_path / "archive.tar" tarball.compress(str(tarfile), "tar", str(tocompress)) destdir = tmp_path / "destdir" tarball.uncompress(str(tarfile), str(destdir)) lsdir = sorted(x.name for x in destdir.iterdir()) assert ["file%s.txt" % i for i in range(10)] == lsdir +def test_compress_uncompress_tar_modes(tmp_path): + tocompress = tmp_path / "compressme" + tocompress.mkdir() + + fpath = tocompress / "text.txt" + fpath.write_text("echo foo") + fpath.chmod(0o644) + + fpath = tocompress / "executable.sh" + fpath.write_text("echo foo") + fpath.chmod(0o755) + + tarfile = tmp_path / "archive.tar" + tarball.compress(str(tarfile), "tar", str(tocompress)) + + destdir = tmp_path / "destdir" + tarball.uncompress(str(tarfile), str(destdir)) + + (executable_path, text_path) = sorted(destdir.iterdir()) + assert text_path.stat().st_mode == 0o100644 + assert executable_path.stat().st_mode == 0o100755 + + def test__unpack_tar_failure(tmp_path, datadir): """Unpack inexistent tarball should fail """ tarpath = os.path.join(datadir, "archives", "inexistent-archive.tar.Z") assert not os.path.exists(tarpath) with pytest.raises( shutil.ReadError, match=f"Unable to uncompress {tarpath} to {tmp_path}" ): tarball._unpack_tar(tarpath, tmp_path) def test__unpack_tar_failure2(tmp_path, datadir): """Unpack Existent tarball into an inexistent folder should fail """ filename = "groff-1.02.tar.Z" tarpath = os.path.join(datadir, "archives", filename) assert os.path.exists(tarpath) extract_dir = os.path.join(tmp_path, "dir", "inexistent") with pytest.raises( shutil.ReadError, match=f"Unable to uncompress {tarpath} to {tmp_path}" ): tarball._unpack_tar(tarpath, extract_dir) def test__unpack_tar_failure3(tmp_path, datadir): """Unpack unsupported tarball should fail """ filename = "hello.zip" tarpath = os.path.join(datadir, "archives", filename) assert os.path.exists(tarpath) with pytest.raises( shutil.ReadError, match=f"Unable to uncompress {tarpath} to {tmp_path}" ): tarball._unpack_tar(tarpath, tmp_path) def test__unpack_tar(tmp_path, datadir): """Unpack supported tarball into an existent folder should be ok """ filename = "groff-1.02.tar.Z" tarpath = os.path.join(datadir, "archives", filename) assert os.path.exists(tarpath) extract_dir = os.path.join(tmp_path, filename) os.makedirs(extract_dir, exist_ok=True) output_directory = tarball._unpack_tar(tarpath, extract_dir) assert extract_dir == output_directory assert len(os.listdir(extract_dir)) > 0 def test_register_new_archive_formats(prepare_shutil_state): """Registering new archive formats should be fine """ unpack_formats_v1 = [f[0] for f in shutil.get_unpack_formats()] for format_id in tarball.ADDITIONAL_ARCHIVE_FORMATS: assert format_id[0] not in unpack_formats_v1 # when tarball.register_new_archive_formats() # then unpack_formats_v2 = [f[0] for f in shutil.get_unpack_formats()] for format_id in tarball.ADDITIONAL_ARCHIVE_FORMATS: assert format_id[0] in unpack_formats_v2 def test_uncompress_tarpaths(tmp_path, datadir, prepare_shutil_state): """High level call uncompression on un/supported tarballs """ archive_dir = os.path.join(datadir, "archives") tarfiles = os.listdir(archive_dir) tarpaths = [os.path.join(archive_dir, tarfile) for tarfile in tarfiles] unsupported_tarpaths = [] for t in tarpaths: if t.endswith(".Z") or t.endswith(".x") or t.endswith(".lz"): unsupported_tarpaths.append(t) # not supported yet for tarpath in unsupported_tarpaths: with pytest.raises(ValueError, match=f"Problem during unpacking {tarpath}."): tarball.uncompress(tarpath, dest=tmp_path) # register those unsupported formats tarball.register_new_archive_formats() # unsupported formats are now supported for n, tarpath in enumerate(tarpaths, start=1): tarball.uncompress(tarpath, dest=tmp_path) assert n == len(tarpaths)