diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c2bbc53..64d1c34 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,41 +1,41 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.1.0 + rev: v4.3.0 hooks: - id: trailing-whitespace - id: check-json - id: check-yaml - - repo: https://gitlab.com/pycqa/flake8 - rev: 4.0.1 + - repo: https://github.com/pycqa/flake8 + rev: 5.0.4 hooks: - id: flake8 - additional_dependencies: [flake8-bugbear==22.3.23] + additional_dependencies: [flake8-bugbear==22.9.23] - repo: https://github.com/codespell-project/codespell - rev: v2.1.0 + rev: v2.2.2 hooks: - id: codespell name: Check source code spelling args: [-L crate] stages: [commit] - repo: local hooks: - id: mypy name: mypy entry: mypy args: [swh] pass_filenames: false language: system types: [python] - repo: https://github.com/PyCQA/isort rev: 5.10.1 hooks: - id: isort - repo: https://github.com/python/black - rev: 22.3.0 + rev: 22.10.0 hooks: - id: black diff --git a/PKG-INFO b/PKG-INFO index 828ba2d..ee6daf2 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,39 +1,39 @@ Metadata-Version: 2.1 Name: swh.core -Version: 2.16.1 +Version: 2.17.0 Summary: Software Heritage core utilities Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-core Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-core/ Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/x-rst Provides-Extra: testing-core Provides-Extra: logging Provides-Extra: db Provides-Extra: http Provides-Extra: github Provides-Extra: testing License-File: LICENSE License-File: AUTHORS Software Heritage - Core foundations ==================================== Low-level utilities and helpers used by almost all other modules in the stack. core library for swh's modules: - config parser - serialization - logging mechanism - database connection - http-based RPC client/server diff --git a/swh.core.egg-info/PKG-INFO b/swh.core.egg-info/PKG-INFO index 828ba2d..ee6daf2 100644 --- a/swh.core.egg-info/PKG-INFO +++ b/swh.core.egg-info/PKG-INFO @@ -1,39 +1,39 @@ Metadata-Version: 2.1 Name: swh.core -Version: 2.16.1 +Version: 2.17.0 Summary: Software Heritage core utilities Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-core Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-core/ Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/x-rst Provides-Extra: testing-core Provides-Extra: logging Provides-Extra: db Provides-Extra: http Provides-Extra: github Provides-Extra: testing License-File: LICENSE License-File: AUTHORS Software Heritage - Core foundations ==================================== Low-level utilities and helpers used by almost all other modules in the stack. core library for swh's modules: - config parser - serialization - logging mechanism - database connection - http-based RPC client/server diff --git a/swh.core.egg-info/SOURCES.txt b/swh.core.egg-info/SOURCES.txt index 2cd17f0..06732d4 100644 --- a/swh.core.egg-info/SOURCES.txt +++ b/swh.core.egg-info/SOURCES.txt @@ -1,145 +1,146 @@ .git-blame-ignore-revs .gitignore .pre-commit-config.yaml AUTHORS CODE_OF_CONDUCT.md CONTRIBUTORS LICENSE MANIFEST.in Makefile Makefile.local README.rst conftest.py mypy.ini pyproject.toml pytest.ini requirements-db-pytestplugin.txt requirements-db.txt requirements-github.txt requirements-http.txt requirements-logging.txt requirements-swh.txt requirements-test.txt requirements.txt setup.cfg setup.py tox.ini docs/.gitignore docs/Makefile docs/README.rst docs/cli.rst docs/conf.py docs/db.rst docs/index.rst docs/_static/.placeholder docs/_templates/.placeholder swh/__init__.py swh/__main__.py swh.core.egg-info/PKG-INFO swh.core.egg-info/SOURCES.txt swh.core.egg-info/dependency_links.txt swh.core.egg-info/entry_points.txt swh.core.egg-info/requires.txt swh.core.egg-info/top_level.txt swh/core/__init__.py swh/core/api_async.py swh/core/collections.py swh/core/config.py swh/core/logger.py swh/core/py.typed swh/core/pytest_plugin.py swh/core/sentry.py swh/core/statsd.py swh/core/tarball.py swh/core/utils.py swh/core/api/__init__.py swh/core/api/asynchronous.py swh/core/api/classes.py swh/core/api/gunicorn_config.py swh/core/api/negotiation.py swh/core/api/serializers.py swh/core/api/tests/__init__.py swh/core/api/tests/conftest.py swh/core/api/tests/server_testing.py swh/core/api/tests/test_async.py swh/core/api/tests/test_classes.py swh/core/api/tests/test_gunicorn.py swh/core/api/tests/test_init.py swh/core/api/tests/test_rpc_client.py swh/core/api/tests/test_rpc_client_server.py swh/core/api/tests/test_rpc_server.py swh/core/api/tests/test_rpc_server_asynchronous.py swh/core/api/tests/test_serializers.py swh/core/cli/__init__.py swh/core/cli/db.py swh/core/db/__init__.py swh/core/db/common.py swh/core/db/db_utils.py swh/core/db/pytest_plugin.py swh/core/db/sql/35-dbversion.sql swh/core/db/sql/36-dbmodule.sql swh/core/db/tests/__init__.py swh/core/db/tests/conftest.py swh/core/db/tests/test_cli.py swh/core/db/tests/test_db.py swh/core/db/tests/test_db_utils.py swh/core/db/tests/data/cli/sql/0-superuser-init.sql swh/core/db/tests/data/cli/sql/15-flavor.sql swh/core/db/tests/data/cli/sql/30-schema.sql swh/core/db/tests/data/cli/sql/40-funcs.sql swh/core/db/tests/data/cli/sql/50-data.sql swh/core/db/tests/data/cli_new/sql/0-superuser-init.sql swh/core/db/tests/data/cli_new/sql/15-flavor.sql swh/core/db/tests/data/cli_new/sql/30-schema.sql swh/core/db/tests/data/cli_new/sql/40-funcs.sql swh/core/db/tests/data/cli_new/sql/50-data.sql swh/core/db/tests/data/cli_new/sql/upgrades/001.sql swh/core/db/tests/data/cli_new/sql/upgrades/002.sql swh/core/db/tests/data/cli_new/sql/upgrades/003.sql swh/core/db/tests/data/cli_new/sql/upgrades/004.sql swh/core/db/tests/data/cli_new/sql/upgrades/005.sql swh/core/db/tests/data/cli_new/sql/upgrades/006.sql swh/core/db/tests/pytest_plugin/__init__.py swh/core/db/tests/pytest_plugin/test_pytest_plugin.py swh/core/db/tests/pytest_plugin/data/0-schema.sql swh/core/db/tests/pytest_plugin/data/1-data.sql swh/core/github/__init__.py swh/core/github/pytest_plugin.py swh/core/github/utils.py swh/core/github/tests/__init__.py swh/core/github/tests/test_github_utils.py swh/core/github/tests/test_pytest_plugin.py swh/core/tests/__init__.py swh/core/tests/test_cli.py swh/core/tests/test_collections.py swh/core/tests/test_config.py swh/core/tests/test_logger.py swh/core/tests/test_pytest_plugin.py swh/core/tests/test_sentry.py swh/core/tests/test_statsd.py swh/core/tests/test_tarball.py swh/core/tests/test_utils.py swh/core/tests/data/archives/ca-certificates-20210603-1-any.pkg.tar.zst swh/core/tests/data/archives/groff-1.02.tar.Z swh/core/tests/data/archives/hello.jar swh/core/tests/data/archives/hello.tar swh/core/tests/data/archives/hello.tar.bz2 swh/core/tests/data/archives/hello.tar.gz swh/core/tests/data/archives/hello.tar.lz swh/core/tests/data/archives/hello.tar.x swh/core/tests/data/archives/hello.tbz swh/core/tests/data/archives/hello.tbz2 +swh/core/tests/data/archives/hello.war swh/core/tests/data/archives/hello.zip swh/core/tests/data/archives/msk316src.zip swh/core/tests/data/archives/tokei-12.1.2.crate swh/core/tests/data/http_example.com/something.json swh/core/tests/data/https_example.com/file.json swh/core/tests/data/https_example.com/file.json,name=doe,firstname=jane swh/core/tests/data/https_example.com/file.json_visit1 swh/core/tests/data/https_example.com/other.json swh/core/tests/data/https_forge.s.o/api_diffusion,attachments[uris]=1 swh/core/tests/data/https_www.reference.com/web,q=What+Is+an+Example+of+a+URL?,qo=contentPageRelatedSearch,o=600605,l=dir,sga=1 swh/core/tests/fixture/__init__.py swh/core/tests/fixture/conftest.py swh/core/tests/fixture/test_pytest_plugin.py swh/core/tests/fixture/data/https_example.com/file.json \ No newline at end of file diff --git a/swh/core/tarball.py b/swh/core/tarball.py index c80cb5c..e9dc637 100644 --- a/swh/core/tarball.py +++ b/swh/core/tarball.py @@ -1,260 +1,260 @@ # Copyright (C) 2015-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import shutil import stat from subprocess import run import tarfile import zipfile import magic from . import utils MIMETYPE_TO_ARCHIVE_FORMAT = { "application/x-compress": "tar.Z|x", "application/x-tar": "tar", "application/x-bzip2": "bztar", "application/gzip": "gztar", "application/x-gzip": "gztar", "application/x-lzip": "tar.lz", "application/zip": "zip", "application/java-archive": "jar", "application/zstd": "tar.zst", "application/x-zstd": "tar.zst", } def _unpack_tar(tarpath: str, extract_dir: str) -> str: """Unpack tarballs unsupported by the standard python library. Examples include tar.Z, tar.lz, tar.x, etc.... As this implementation relies on the `tar` command, this function supports the same compression the tar command supports. This expects the `extract_dir` to exist. Raises: shutil.ReadError in case of issue uncompressing the archive (tarpath does not exist, extract_dir does not exist, etc...) Returns: full path to the uncompressed directory. """ try: run(["tar", "xf", tarpath, "-C", extract_dir], check=True) return extract_dir except Exception as e: raise shutil.ReadError( f"Unable to uncompress {tarpath} to {extract_dir}. Reason: {e}" ) def _unpack_zip(zippath: str, extract_dir: str) -> str: """Unpack zip files unsupported by the standard python library, for instance those with legacy compression type 6 (implode). This expects the `extract_dir` to exist. Raises: shutil.ReadError in case of issue uncompressing the archive (zippath does not exist, extract_dir does not exist, etc...) Returns: full path to the uncompressed directory. """ try: run(["unzip", "-q", "-d", extract_dir, zippath], check=True) return extract_dir except Exception as e: raise shutil.ReadError( f"Unable to uncompress {zippath} to {extract_dir}. Reason: {e}" ) def _unpack_jar(jarpath: str, extract_dir: str) -> str: """Unpack jar files using standard Python module zipfile. This expects the `extract_dir` to exist. Raises: shutil.ReadError in case of issue uncompressing the archive (jarpath does not exist, extract_dir does not exist, etc...) Returns: full path to the uncompressed directory. """ try: with zipfile.ZipFile(jarpath) as jar: jar.extractall(path=extract_dir) return extract_dir except Exception as e: raise shutil.ReadError( f"Unable to uncompress {jarpath} to {extract_dir}. Reason: {e}" ) def _unpack_zst(zstpath: str, extract_dir: str) -> str: """Unpack zst files unsupported by the standard python library. Example include tar.zst This expects the `extract_dir` to exist. Raises: shutil.ReadError in case of issue uncompressing the archive (zstpath """ try: run( ["tar", "--force-local", "-I 'zstd'", "-xf", zstpath, "-C", extract_dir], check=True, ) return extract_dir except Exception as e: raise shutil.ReadError( f"Unable to uncompress {zstpath} to {extract_dir}. Reason: {e}" ) def register_new_archive_formats(): """Register new archive formats to uncompress""" registered_formats = [f[0] for f in shutil.get_unpack_formats()] for name, extensions, function in ADDITIONAL_ARCHIVE_FORMATS: if name in registered_formats: continue shutil.register_unpack_format(name, extensions, function) def uncompress(tarpath: str, dest: str): """Uncompress tarpath to dest folder if tarball is supported. Note that this fixes permissions after successfully uncompressing the archive. Args: tarpath: path to tarball to uncompress dest: the destination folder where to uncompress the tarball, it will be created if it does not exist Raises: ValueError when a problem occurs during unpacking """ try: os.makedirs(dest, exist_ok=True) format = None # try to get archive format from extension for format_, exts, _ in shutil.get_unpack_formats(): if any([tarpath.lower().endswith(ext.lower()) for ext in exts]): format = format_ break # try to get archive format from file mimetype if format is None: m = magic.Magic(mime=True) mime = m.from_file(tarpath) format = MIMETYPE_TO_ARCHIVE_FORMAT.get(mime) shutil.unpack_archive(tarpath, extract_dir=dest, format=format) except shutil.ReadError as e: raise ValueError(f"Problem during unpacking {tarpath}. Reason: {e}") except NotImplementedError: if tarpath.lower().endswith(".zip") or format == "zip": _unpack_zip(tarpath, dest) else: raise except NotADirectoryError: if format and "tar" in format: # some old tarballs might fail to be unpacked by shutil.unpack_archive, # fallback using the tar command as last resort _unpack_tar(tarpath, dest) else: raise normalize_permissions(dest) def normalize_permissions(path: str): """Normalize the permissions of all files and directories under `path`. This makes all subdirectories and files with the user executable bit set mode 0o0755, and all other files mode 0o0644. Args: path: the path under which permissions should be normalized """ os.chmod(path, 0o0755) for dirpath, dnames, fnames in os.walk(path): for dname in dnames: dpath = os.path.join(dirpath, dname) os.chmod(dpath, 0o0755) for fname in fnames: fpath = os.path.join(dirpath, fname) if not os.path.islink(fpath): is_executable = os.stat(fpath).st_mode & stat.S_IXUSR forced_mode = 0o0755 if is_executable else 0o0644 os.chmod(fpath, forced_mode) def _ls(rootdir): """Generator of filepath, filename from rootdir.""" for dirpath, dirnames, fnames in os.walk(rootdir): for fname in dirnames + fnames: fpath = os.path.join(dirpath, fname) fname = utils.commonname(rootdir, fpath) yield fpath, fname def _compress_zip(tarpath, files): """Compress dirpath's content as tarpath.""" with zipfile.ZipFile(tarpath, "w") as z: for fpath, fname in files: z.write(fpath, arcname=fname) def _compress_tar(tarpath, files): """Compress dirpath's content as tarpath.""" with tarfile.open(tarpath, "w:bz2") as t: for fpath, fname in files: t.add(fpath, arcname=fname, recursive=False) def compress(tarpath, nature, dirpath_or_files): """Create a tarball tarpath with nature nature. The content of the tarball is either dirpath's content (if representing a directory path) or dirpath's iterable contents. Compress the directory dirpath's content to a tarball. The tarball being dumped at tarpath. The nature of the tarball is determined by the nature argument. """ if isinstance(dirpath_or_files, str): files = _ls(dirpath_or_files) else: # iterable of 'filepath, filename' files = dirpath_or_files if nature == "zip": _compress_zip(tarpath, files) else: _compress_tar(tarpath, files) return tarpath # Additional uncompression archive format support ADDITIONAL_ARCHIVE_FORMATS = [ # name, extensions, function ("tar.Z|x", [".tar.Z", ".tar.x"], _unpack_tar), - ("jar", [".jar"], _unpack_jar), + ("jar", [".jar", ".war"], _unpack_jar), ("tbz2", [".tbz", "tbz2"], _unpack_tar), # FIXME: make this optional depending on the runtime lzip package install ("tar.lz", [".tar.lz"], _unpack_tar), ("crate", [".crate"], _unpack_tar), ("tar.zst", [".tar.zst", ".tar.zstd"], _unpack_zst), ] register_new_archive_formats() diff --git a/swh/core/tests/data/archives/hello.war b/swh/core/tests/data/archives/hello.war new file mode 100644 index 0000000..b63aeaa Binary files /dev/null and b/swh/core/tests/data/archives/hello.war differ diff --git a/tox.ini b/tox.ini index 9160506..49c7164 100644 --- a/tox.ini +++ b/tox.ini @@ -1,100 +1,101 @@ [tox] envlist=black,flake8,mypy,py3-{core,db,server,github} [testenv] passenv = PYTHONASYNCIODEBUG extras = testing-core core: logging db: db server: http github: github deps = cover: pytest-cov commands = pytest --doctest-modules \ slow: --hypothesis-profile=slow \ cover: --cov={envsitepackagesdir}/swh/core --cov-branch \ core: {envsitepackagesdir}/swh/core/tests \ db: {envsitepackagesdir}/swh/core/db/tests \ server: {envsitepackagesdir}/swh/core/api/tests \ github: {envsitepackagesdir}/swh/core/github/tests \ {posargs} [testenv:py3] skip_install = true deps = tox commands = tox -e py3-core-db-server-github-slow-cover -- {posargs} [testenv:black] skip_install = true deps = - black==22.3.0 + black==22.10.0 commands = {envpython} -m black --check swh [testenv:flake8] skip_install = true deps = - flake8==4.0.1 - flake8-bugbear==22.3.23 + flake8==5.0.4 + flake8-bugbear==22.9.23 + pycodestyle==2.9.1 commands = {envpython} -m flake8 [testenv:mypy] extras = testing-core logging db http github deps = mypy==0.942 commands = mypy swh # build documentation outside swh-environment using the current # git HEAD of swh-docs, is executed on CI for each diff to prevent # breaking doc build [testenv:sphinx] whitelist_externals = make usedevelop = true extras = testing-core logging db http github deps = # fetch and install swh-docs in develop mode -e git+https://forge.softwareheritage.org/source/swh-docs#egg=swh.docs setenv = SWH_PACKAGE_DOC_TOX_BUILD = 1 # turn warnings into errors SPHINXOPTS = -W commands = make -I ../.tox/sphinx/src/swh-docs/swh/ -C docs # build documentation only inside swh-environment using local state # of swh-docs package [testenv:sphinx-dev] whitelist_externals = make usedevelop = true extras = testing-core logging db http github deps = # install swh-docs in develop mode -e ../swh-docs setenv = SWH_PACKAGE_DOC_TOX_BUILD = 1 # turn warnings into errors SPHINXOPTS = -W commands = make -I ../.tox/sphinx-dev/src/swh-docs/swh/ -C docs