diff --git a/PKG-INFO b/PKG-INFO index 8b8f7a4..376f27c 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,39 +1,39 @@ Metadata-Version: 2.1 Name: swh.core -Version: 2.14.1 +Version: 2.15.0 Summary: Software Heritage core utilities Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-core Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-core/ Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/x-rst Provides-Extra: testing-core Provides-Extra: logging Provides-Extra: db Provides-Extra: http Provides-Extra: github Provides-Extra: testing License-File: LICENSE License-File: AUTHORS Software Heritage - Core foundations ==================================== Low-level utilities and helpers used by almost all other modules in the stack. core library for swh's modules: - config parser - serialization - logging mechanism - database connection - http-based RPC client/server diff --git a/swh.core.egg-info/PKG-INFO b/swh.core.egg-info/PKG-INFO index 8b8f7a4..376f27c 100644 --- a/swh.core.egg-info/PKG-INFO +++ b/swh.core.egg-info/PKG-INFO @@ -1,39 +1,39 @@ Metadata-Version: 2.1 Name: swh.core -Version: 2.14.1 +Version: 2.15.0 Summary: Software Heritage core utilities Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-core Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-core/ Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/x-rst Provides-Extra: testing-core Provides-Extra: logging Provides-Extra: db Provides-Extra: http Provides-Extra: github Provides-Extra: testing License-File: LICENSE License-File: AUTHORS Software Heritage - Core foundations ==================================== Low-level utilities and helpers used by almost all other modules in the stack. core library for swh's modules: - config parser - serialization - logging mechanism - database connection - http-based RPC client/server diff --git a/swh/core/api/asynchronous.py b/swh/core/api/asynchronous.py index 323d305..fd9e7ed 100644 --- a/swh/core/api/asynchronous.py +++ b/swh/core/api/asynchronous.py @@ -1,185 +1,185 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from collections import OrderedDict import functools import logging from typing import Callable, Dict, List, Optional, Tuple, Type, Union import aiohttp.web from aiohttp_utils import Response, negotiation from deprecated import deprecated import multidict from .serializers import ( exception_to_dict, json_dumps, json_loads, msgpack_dumps, msgpack_loads, ) logger = logging.getLogger(__name__) def encode_msgpack(data, **kwargs): return aiohttp.web.Response( body=msgpack_dumps(data), headers=multidict.MultiDict({"Content-Type": "application/x-msgpack"}), **kwargs, ) encode_data_server = Response def render_msgpack(request, data, extra_encoders=None): return msgpack_dumps(data, extra_encoders=extra_encoders) def render_json(request, data, extra_encoders=None): return json_dumps(data, extra_encoders=extra_encoders) def decode_data(data, content_type, extra_decoders=None): """Decode data according to content type, eventually using some extra decoders.""" if not data: return {} if content_type == "application/x-msgpack": r = msgpack_loads(data, extra_decoders=extra_decoders) elif content_type == "application/json": r = json_loads(data, extra_decoders=extra_decoders) else: raise ValueError(f"Wrong content type `{content_type}` for API request") return r async def decode_request(request, extra_decoders=None): """Decode asynchronously the request""" data = await request.read() return decode_data(data, request.content_type, extra_decoders=extra_decoders) async def error_middleware(app, handler): async def middleware_handler(request): try: return await handler(request) except Exception as e: if isinstance(e, aiohttp.web.HTTPException): raise - logger.exception(e) res = exception_to_dict(e) if isinstance(e, app.client_exception_classes): status = 400 else: + logger.exception(e) status = 500 return encode_data_server(res, status=status) return middleware_handler class RPCServerApp(aiohttp.web.Application): """For each endpoint of the given `backend_class`, tells app.route to call a function that decodes the request and sends it to the backend object provided by the factory. :param Any backend_class: The class of the backend, which will be analyzed to look for API endpoints. :param Optional[Callable[[], backend_class]] backend_factory: A function with no argument that returns an instance of `backend_class`. If unset, defaults to calling `backend_class` constructor directly. """ client_exception_classes: Tuple[Type[Exception], ...] = () """Exceptions that should be handled as a client error (eg. object not found, invalid argument)""" extra_type_encoders: List[Tuple[type, str, Callable]] = [] """Value of `extra_encoders` passed to `json_dumps` or `msgpack_dumps` to be able to serialize more object types.""" extra_type_decoders: Dict[str, Callable] = {} """Value of `extra_decoders` passed to `json_loads` or `msgpack_loads` to be able to deserialize more object types.""" def __init__( self, app_name: Optional[str] = None, backend_class: Optional[Callable] = None, backend_factory: Optional[Union[Callable, str]] = None, middlewares=(), **kwargs, ): nego_middleware = negotiation.negotiation_middleware( renderers=self._renderers(), force_rendering=True ) middlewares = ( nego_middleware, error_middleware, ) + middlewares super().__init__(middlewares=middlewares, **kwargs) # swh decorations starts here self.app_name = app_name if backend_class is None and backend_factory is not None: raise ValueError( "backend_factory should only be provided if backend_class is" ) self.backend_class = backend_class if backend_class is not None: backend_factory = backend_factory or backend_class for (meth_name, meth) in backend_class.__dict__.items(): if hasattr(meth, "_endpoint_path"): path = meth._endpoint_path http_method = meth._method path = path if path.startswith("/") else f"/{path}" self.router.add_route( http_method, path, self._endpoint(meth_name, meth, backend_factory), ) def _renderers(self): """Return an ordered list of renderers in order of increasing desirability (!) See mimetype.best_match() docstring """ return OrderedDict( [ ( "application/json", lambda request, data: render_json( request, data, extra_encoders=self.extra_type_encoders ), ), ( "application/x-msgpack", lambda request, data: render_msgpack( request, data, extra_encoders=self.extra_type_encoders ), ), ] ) def _endpoint(self, meth_name, meth, backend_factory): """Create endpoint out of the method `meth`.""" @functools.wraps(meth) # Copy signature and doc async def decorated_meth(request, *args, **kwargs): obj_meth = getattr(backend_factory(), meth_name) data = await request.read() kw = decode_data( data, request.content_type, extra_decoders=self.extra_type_decoders ) result = obj_meth(**kw) return encode_data_server(result) return decorated_meth @deprecated(version="0.0.64", reason="Use the RPCServerApp instead") class SWHRemoteAPI(RPCServerApp): pass diff --git a/swh/core/tarball.py b/swh/core/tarball.py index 11dba55..b248b31 100644 --- a/swh/core/tarball.py +++ b/swh/core/tarball.py @@ -1,250 +1,249 @@ -# Copyright (C) 2015-2021 The Software Heritage developers +# Copyright (C) 2015-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import shutil import stat from subprocess import run import tarfile import zipfile import magic from . import utils +MIMETYPE_TO_ARCHIVE_FORMAT = { + "application/x-compress": "tar.Z|x", + "application/x-tar": "tar", + "application/x-bzip2": "bztar", + "application/gzip": "gztar", + "application/x-lzip": "tar.lz", + "application/zip": "zip", + "application/java-archive": "jar", + "application/zstd": "tar.zst", + "application/x-zstd": "tar.zst", +} + def _unpack_tar(tarpath: str, extract_dir: str) -> str: """Unpack tarballs unsupported by the standard python library. Examples include tar.Z, tar.lz, tar.x, etc.... As this implementation relies on the `tar` command, this function supports the same compression the tar command supports. This expects the `extract_dir` to exist. Raises: shutil.ReadError in case of issue uncompressing the archive (tarpath does not exist, extract_dir does not exist, etc...) Returns: full path to the uncompressed directory. """ try: run(["tar", "xf", tarpath, "-C", extract_dir], check=True) return extract_dir except Exception as e: raise shutil.ReadError( f"Unable to uncompress {tarpath} to {extract_dir}. Reason: {e}" ) def _unpack_zip(zippath: str, extract_dir: str) -> str: """Unpack zip files unsupported by the standard python library, for instance those with legacy compression type 6 (implode). This expects the `extract_dir` to exist. Raises: shutil.ReadError in case of issue uncompressing the archive (zippath does not exist, extract_dir does not exist, etc...) Returns: full path to the uncompressed directory. """ try: run(["unzip", "-q", "-d", extract_dir, zippath], check=True) return extract_dir except Exception as e: raise shutil.ReadError( f"Unable to uncompress {zippath} to {extract_dir}. Reason: {e}" ) def _unpack_jar(jarpath: str, extract_dir: str) -> str: """Unpack jar files using standard Python module zipfile. This expects the `extract_dir` to exist. Raises: shutil.ReadError in case of issue uncompressing the archive (jarpath does not exist, extract_dir does not exist, etc...) Returns: full path to the uncompressed directory. """ try: with zipfile.ZipFile(jarpath) as jar: jar.extractall(path=extract_dir) return extract_dir except Exception as e: raise shutil.ReadError( f"Unable to uncompress {jarpath} to {extract_dir}. Reason: {e}" ) def _unpack_zst(zstpath: str, extract_dir: str) -> str: """Unpack zst files unsupported by the standard python library. Example include tar.zst This expects the `extract_dir` to exist. Raises: shutil.ReadError in case of issue uncompressing the archive (zstpath """ try: run( ["tar", "--force-local", "-I 'zstd'", "-xf", zstpath, "-C", extract_dir], check=True, ) return extract_dir except Exception as e: raise shutil.ReadError( f"Unable to uncompress {zstpath} to {extract_dir}. Reason: {e}" ) def register_new_archive_formats(): """Register new archive formats to uncompress""" registered_formats = [f[0] for f in shutil.get_unpack_formats()] for name, extensions, function in ADDITIONAL_ARCHIVE_FORMATS: if name in registered_formats: continue shutil.register_unpack_format(name, extensions, function) -_mime_to_archive_format = { - "application/x-compress": "tar.Z|x", - "application/x-tar": "tar", - "application/x-bzip2": "bztar", - "application/gzip": "gztar", - "application/x-lzip": "tar.lz", - "application/zip": "zip", - "application/java-archive": "jar", - "application/zstd": "tar.zst", - "application/x-zstd": "tar.zst", -} - - def uncompress(tarpath: str, dest: str): """Uncompress tarpath to dest folder if tarball is supported. Note that this fixes permissions after successfully uncompressing the archive. Args: tarpath: path to tarball to uncompress dest: the destination folder where to uncompress the tarball, it will be created if it does not exist Raises: ValueError when a problem occurs during unpacking """ try: os.makedirs(dest, exist_ok=True) format = None # try to get archive format from extension for format_, exts, _ in shutil.get_unpack_formats(): if any([tarpath.lower().endswith(ext.lower()) for ext in exts]): format = format_ break # try to get archive format from file mimetype if format is None: m = magic.Magic(mime=True) mime = m.from_file(tarpath) - format = _mime_to_archive_format.get(mime) + format = MIMETYPE_TO_ARCHIVE_FORMAT.get(mime) shutil.unpack_archive(tarpath, extract_dir=dest, format=format) except shutil.ReadError as e: raise ValueError(f"Problem during unpacking {tarpath}. Reason: {e}") except NotImplementedError: if tarpath.lower().endswith(".zip") or format == "zip": _unpack_zip(tarpath, dest) else: raise normalize_permissions(dest) def normalize_permissions(path: str): """Normalize the permissions of all files and directories under `path`. This makes all subdirectories and files with the user executable bit set mode 0o0755, and all other files mode 0o0644. Args: path: the path under which permissions should be normalized """ for dirpath, _, fnames in os.walk(path): os.chmod(dirpath, 0o0755) for fname in fnames: fpath = os.path.join(dirpath, fname) if not os.path.islink(fpath): is_executable = os.stat(fpath).st_mode & stat.S_IXUSR forced_mode = 0o0755 if is_executable else 0o0644 os.chmod(fpath, forced_mode) def _ls(rootdir): """Generator of filepath, filename from rootdir.""" for dirpath, dirnames, fnames in os.walk(rootdir): for fname in dirnames + fnames: fpath = os.path.join(dirpath, fname) fname = utils.commonname(rootdir, fpath) yield fpath, fname def _compress_zip(tarpath, files): """Compress dirpath's content as tarpath.""" with zipfile.ZipFile(tarpath, "w") as z: for fpath, fname in files: z.write(fpath, arcname=fname) def _compress_tar(tarpath, files): """Compress dirpath's content as tarpath.""" with tarfile.open(tarpath, "w:bz2") as t: for fpath, fname in files: t.add(fpath, arcname=fname, recursive=False) def compress(tarpath, nature, dirpath_or_files): """Create a tarball tarpath with nature nature. The content of the tarball is either dirpath's content (if representing a directory path) or dirpath's iterable contents. Compress the directory dirpath's content to a tarball. The tarball being dumped at tarpath. The nature of the tarball is determined by the nature argument. """ if isinstance(dirpath_or_files, str): files = _ls(dirpath_or_files) else: # iterable of 'filepath, filename' files = dirpath_or_files if nature == "zip": _compress_zip(tarpath, files) else: _compress_tar(tarpath, files) return tarpath # Additional uncompression archive format support ADDITIONAL_ARCHIVE_FORMATS = [ # name, extensions, function ("tar.Z|x", [".tar.Z", ".tar.x"], _unpack_tar), ("jar", [".jar"], _unpack_jar), ("tbz2", [".tbz", "tbz2"], _unpack_tar), # FIXME: make this optional depending on the runtime lzip package install ("tar.lz", [".tar.lz"], _unpack_tar), ("crate", [".crate"], _unpack_tar), ("tar.zst", [".tar.zst", ".tar.zstd"], _unpack_zst), ] register_new_archive_formats()