diff --git a/PKG-INFO b/PKG-INFO index 2df177c..7e33a65 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,24 +1,24 @@ Metadata-Version: 2.1 Name: swh.objstorage -Version: 0.0.29 +Version: 0.0.30 Summary: Software Heritage Object Storage Home-page: https://forge.softwareheritage.org/diffusion/DOBJS Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest -Project-URL: Source, https://forge.softwareheritage.org/source/swh-objstorage Project-URL: Funding, https://www.softwareheritage.org/donate +Project-URL: Source, https://forge.softwareheritage.org/source/swh-objstorage Description: swh-objstorage ============== Content-addressable object storage for the Software Heritage project. Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown Provides-Extra: testing diff --git a/setup.py b/setup.py index 5fd9abc..0df3ad3 100755 --- a/setup.py +++ b/setup.py @@ -1,68 +1,72 @@ #!/usr/bin/env python3 # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from setuptools import setup, find_packages from os import path from io import open here = path.abspath(path.dirname(__file__)) # Get the long description from the README file with open(path.join(here, 'README.md'), encoding='utf-8') as f: long_description = f.read() def parse_requirements(name=None): if name: reqf = 'requirements-%s.txt' % name else: reqf = 'requirements.txt' requirements = [] if not path.exists(reqf): return requirements with open(reqf) as f: for line in f.readlines(): line = line.strip() if not line or line.startswith('#'): continue requirements.append(line) return requirements setup( name='swh.objstorage', description='Software Heritage Object Storage', long_description=long_description, long_description_content_type='text/markdown', author='Software Heritage developers', author_email='swh-devel@inria.fr', url='https://forge.softwareheritage.org/diffusion/DOBJS', packages=find_packages(), scripts=[ 'bin/swh-objstorage-add-dir', 'bin/swh-objstorage-fsck' ], # scripts to package install_requires=parse_requirements() + parse_requirements('swh'), setup_requires=['vcversioner'], extras_require={'testing': parse_requirements('test')}, vcversioner={}, include_package_data=True, + entry_points=''' + [console_scripts] + swh-objstorage=swh.objstorage.cli:main + ''', classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Development Status :: 5 - Production/Stable", ], project_urls={ 'Bug Reports': 'https://forge.softwareheritage.org/maniphest', 'Funding': 'https://www.softwareheritage.org/donate', 'Source': 'https://forge.softwareheritage.org/source/swh-objstorage', }, ) diff --git a/swh.objstorage.egg-info/PKG-INFO b/swh.objstorage.egg-info/PKG-INFO index 2df177c..7e33a65 100644 --- a/swh.objstorage.egg-info/PKG-INFO +++ b/swh.objstorage.egg-info/PKG-INFO @@ -1,24 +1,24 @@ Metadata-Version: 2.1 Name: swh.objstorage -Version: 0.0.29 +Version: 0.0.30 Summary: Software Heritage Object Storage Home-page: https://forge.softwareheritage.org/diffusion/DOBJS Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest -Project-URL: Source, https://forge.softwareheritage.org/source/swh-objstorage Project-URL: Funding, https://www.softwareheritage.org/donate +Project-URL: Source, https://forge.softwareheritage.org/source/swh-objstorage Description: swh-objstorage ============== Content-addressable object storage for the Software Heritage project. Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown Provides-Extra: testing diff --git a/swh.objstorage.egg-info/SOURCES.txt b/swh.objstorage.egg-info/SOURCES.txt index dc24170..fe30d75 100644 --- a/swh.objstorage.egg-info/SOURCES.txt +++ b/swh.objstorage.egg-info/SOURCES.txt @@ -1,45 +1,49 @@ MANIFEST.in Makefile README.md requirements-swh.txt requirements.txt setup.py version.txt bin/swh-objstorage-add-dir bin/swh-objstorage-fsck swh/__init__.py swh.objstorage.egg-info/PKG-INFO swh.objstorage.egg-info/SOURCES.txt swh.objstorage.egg-info/dependency_links.txt +swh.objstorage.egg-info/entry_points.txt swh.objstorage.egg-info/requires.txt swh.objstorage.egg-info/top_level.txt swh/objstorage/__init__.py +swh/objstorage/cli.py swh/objstorage/exc.py swh/objstorage/objstorage.py swh/objstorage/objstorage_in_memory.py swh/objstorage/objstorage_pathslicing.py swh/objstorage/objstorage_rados.py swh/objstorage/api/__init__.py swh/objstorage/api/client.py swh/objstorage/api/server.py +swh/objstorage/api/wsgi.py swh/objstorage/cloud/__init__.py swh/objstorage/cloud/objstorage_azure.py swh/objstorage/cloud/objstorage_cloud.py swh/objstorage/multiplexer/__init__.py swh/objstorage/multiplexer/multiplexer_objstorage.py swh/objstorage/multiplexer/striping_objstorage.py swh/objstorage/multiplexer/filter/__init__.py swh/objstorage/multiplexer/filter/filter.py swh/objstorage/multiplexer/filter/id_filter.py swh/objstorage/multiplexer/filter/read_write_filter.py swh/objstorage/tests/__init__.py swh/objstorage/tests/objstorage_testing.py swh/objstorage/tests/test_multiplexer_filter.py swh/objstorage/tests/test_objstorage_api.py swh/objstorage/tests/test_objstorage_azure.py swh/objstorage/tests/test_objstorage_cloud.py swh/objstorage/tests/test_objstorage_in_memory.py swh/objstorage/tests/test_objstorage_instantiation.py swh/objstorage/tests/test_objstorage_multiplexer.py swh/objstorage/tests/test_objstorage_pathslicing.py -swh/objstorage/tests/test_objstorage_striping.py \ No newline at end of file +swh/objstorage/tests/test_objstorage_striping.py +swh/objstorage/tests/test_server.py \ No newline at end of file diff --git a/swh.objstorage.egg-info/entry_points.txt b/swh.objstorage.egg-info/entry_points.txt new file mode 100644 index 0000000..9ffef5b --- /dev/null +++ b/swh.objstorage.egg-info/entry_points.txt @@ -0,0 +1,4 @@ + + [console_scripts] + swh-objstorage=swh.objstorage.cli:main + \ No newline at end of file diff --git a/swh/objstorage/api/server.py b/swh/objstorage/api/server.py index eedbca2..9dff768 100644 --- a/swh/objstorage/api/server.py +++ b/swh/objstorage/api/server.py @@ -1,181 +1,217 @@ -# Copyright (C) 2015-2017 The Software Heritage developers +# Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import asyncio import aiohttp.web -import click +import os -from swh.core import config +from swh.core.config import read as config_read from swh.core.api_async import (SWHRemoteAPI, decode_request, encode_data_server as encode_data) from swh.model import hashutil from swh.objstorage import get_objstorage from swh.objstorage.exc import ObjNotFoundError -DEFAULT_CONFIG_PATH = 'objstorage/server' -DEFAULT_CONFIG = { - 'cls': ('str', 'pathslicing'), - 'args': ('dict', { - 'root': '/srv/softwareheritage/objects', - 'slicing': '0:2/2:4/4:6', - }), - 'client_max_size': ('int', 1024 * 1024 * 1024), -} - - @asyncio.coroutine def index(request): return aiohttp.web.Response(body="SWH Objstorage API server") @asyncio.coroutine def check_config(request): req = yield from decode_request(request) return encode_data(request.app['objstorage'].check_config(**req)) @asyncio.coroutine def contains(request): req = yield from decode_request(request) return encode_data(request.app['objstorage'].__contains__(**req)) @asyncio.coroutine def add_bytes(request): req = yield from decode_request(request) return encode_data(request.app['objstorage'].add(**req)) @asyncio.coroutine def add_batch(request): req = yield from decode_request(request) return encode_data(request.app['objstorage'].add_batch(**req)) @asyncio.coroutine def get_bytes(request): req = yield from decode_request(request) try: ret = request.app['objstorage'].get(**req) except ObjNotFoundError: ret = { 'error': 'object_not_found', 'request': req, } return encode_data(ret, status=404) else: return encode_data(ret) @asyncio.coroutine def get_batch(request): req = yield from decode_request(request) return encode_data(request.app['objstorage'].get_batch(**req)) @asyncio.coroutine def check(request): req = yield from decode_request(request) return encode_data(request.app['objstorage'].check(**req)) @asyncio.coroutine def delete(request): req = yield from decode_request(request) return encode_data(request.app['objstorage'].delete(**req)) # Management methods @asyncio.coroutine def get_random_contents(request): req = yield from decode_request(request) return encode_data(request.app['objstorage'].get_random(**req)) # Streaming methods @asyncio.coroutine def add_stream(request): hex_id = request.match_info['hex_id'] obj_id = hashutil.hash_to_bytes(hex_id) check_pres = (request.query.get('check_presence', '').lower() == 'true') objstorage = request.app['objstorage'] if check_pres and obj_id in objstorage: return encode_data(obj_id) with objstorage.chunk_writer(obj_id) as write: # XXX (3.5): use 'async for chunk in request.content.iter_any()' while not request.content.at_eof(): chunk = yield from request.content.readany() write(chunk) return encode_data(obj_id) @asyncio.coroutine def get_stream(request): hex_id = request.match_info['hex_id'] obj_id = hashutil.hash_to_bytes(hex_id) response = aiohttp.web.StreamResponse() yield from response.prepare(request) for chunk in request.app['objstorage'].get_stream(obj_id, 2 << 20): response.write(chunk) yield from response.drain() return response -@asyncio.coroutine -def set_app_config(app): - if app['config']: - cfg = app['config'] - else: - cfg = config.load_named_config(DEFAULT_CONFIG_PATH, DEFAULT_CONFIG) - if 'client_max_size' in cfg: - app._client_max_size = cfg.pop('client_max_size') - app.update(cfg) +def make_app(config): + """Initialize the remote api application. + """ + app = SWHRemoteAPI() + # retro compatibility configuration settings + app['config'] = config + _cfg = config['objstorage'] + app['objstorage'] = get_objstorage(_cfg['cls'], _cfg['args']) -@asyncio.coroutine -def create_objstorage(app): - app['objstorage'] = get_objstorage(app['cls'], app['args']) - - -app = SWHRemoteAPI() -app['config'] = None -app.router.add_route('GET', '/', index) -app.router.add_route('POST', '/check_config', check_config) -app.router.add_route('POST', '/content/contains', contains) -app.router.add_route('POST', '/content/add', add_bytes) -app.router.add_route('POST', '/content/add/batch', add_batch) -app.router.add_route('POST', '/content/get', get_bytes) -app.router.add_route('POST', '/content/get/batch', get_batch) -app.router.add_route('POST', '/content/get/random', get_random_contents) -app.router.add_route('POST', '/content/check', check) -app.router.add_route('POST', '/content/delete', delete) -app.router.add_route('POST', '/content/add_stream/{hex_id}', add_stream) -app.router.add_route('GET', '/content/get_stream/{hex_id}', get_stream) -app.on_startup.append(set_app_config) -app.on_startup.append(create_objstorage) - - -@click.command() -@click.argument('config-path', required=1) -@click.option('--host', default='0.0.0.0', help="Host to run the server") -@click.option('--port', default=5003, type=click.INT, - help="Binding port of the server") -@click.option('--debug/--nodebug', default=True, - help="Indicates if the server should run in debug mode") -def launch(config_path, host, port, debug): - cfg = config.load_named_config(config_path, DEFAULT_CONFIG) - app['config'] = cfg - app.update(debug=bool(debug)) - aiohttp.web.run_app(app, host=host, port=int(port)) + client_max_size = config.get('client_max_size') + if client_max_size: + app._client_max_size = client_max_size + + app.router.add_route('GET', '/', index) + app.router.add_route('POST', '/check_config', check_config) + app.router.add_route('POST', '/content/contains', contains) + app.router.add_route('POST', '/content/add', add_bytes) + app.router.add_route('POST', '/content/add/batch', add_batch) + app.router.add_route('POST', '/content/get', get_bytes) + app.router.add_route('POST', '/content/get/batch', get_batch) + app.router.add_route('POST', '/content/get/random', get_random_contents) + app.router.add_route('POST', '/content/check', check) + app.router.add_route('POST', '/content/delete', delete) + app.router.add_route('POST', '/content/add_stream/{hex_id}', add_stream) + app.router.add_route('GET', '/content/get_stream/{hex_id}', get_stream) + return app + + +def load_and_check_config(config_file): + """Check the minimal configuration is set to run the api or raise an + error explanation. + + Args: + config_file (str): Path to the configuration file to load + type (str): configuration type. For 'local' type, more + checks are done. + + Raises: + Error if the setup is not as expected + + Returns: + configuration as a dict + + """ + if not config_file: + raise EnvironmentError('Configuration file must be defined') + + if not os.path.exists(config_file): + raise FileNotFoundError('Configuration file %s does not exist' % ( + config_file, )) + + cfg = config_read(config_file) + + if 'objstorage' not in cfg: + raise KeyError( + "Invalid configuration; missing objstorage config entry") + + missing_keys = [] + vcfg = cfg['objstorage'] + for key in ('cls', 'args'): + v = vcfg.get(key) + if v is None: + missing_keys.append(key) + + if missing_keys: + raise KeyError( + "Invalid configuration; missing %s config entry" % ( + ', '.join(missing_keys), )) + + cls = vcfg.get('cls') + if cls == 'pathslicing': + args = vcfg['args'] + for key in ('root', 'slicing'): + v = args.get(key) + if v is None: + missing_keys.append(key) + + if missing_keys: + raise KeyError( + "Invalid configuration; missing args.%s config entry" % ( + ', '.join(missing_keys), )) + + return cfg + + +def make_app_from_configfile(): + """Load configuration and then build application to run + + """ + config_file = os.environ.get('SWH_CONFIG_FILENAME') + config = load_and_check_config(config_file) + return make_app(config=config) if __name__ == '__main__': - launch() + print('Deprecated. Use swh-objstorage') diff --git a/swh/objstorage/api/wsgi.py b/swh/objstorage/api/wsgi.py new file mode 100644 index 0000000..02c4901 --- /dev/null +++ b/swh/objstorage/api/wsgi.py @@ -0,0 +1,8 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from .server import make_app_from_configfile + +application = make_app_from_configfile() diff --git a/swh/objstorage/cli.py b/swh/objstorage/cli.py new file mode 100644 index 0000000..22a68a3 --- /dev/null +++ b/swh/objstorage/cli.py @@ -0,0 +1,27 @@ +# Copyright (C) 2015-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import click +import aiohttp.web + +from swh.objstorage.api.server import load_and_check_config, make_app + + +@click.command() +@click.argument('config-path', required=1) +@click.option('--host', default='0.0.0.0', help="Host to run the server") +@click.option('--port', default=5007, type=click.INT, + help="Binding port of the server") +@click.option('--debug/--nodebug', default=True, + help="Indicates if the server should run in debug mode") +def main(config_path, host, port, debug): + cfg = load_and_check_config(config_path) + app = make_app(cfg) + app.update(debug=bool(debug)) + aiohttp.web.run_app(app, host=host, port=int(port)) + + +if __name__ == '__main__': + main() diff --git a/swh/objstorage/tests/test_objstorage_api.py b/swh/objstorage/tests/test_objstorage_api.py index 3dce298..06a3651 100644 --- a/swh/objstorage/tests/test_objstorage_api.py +++ b/swh/objstorage/tests/test_objstorage_api.py @@ -1,42 +1,43 @@ -# Copyright (C) 2015-2018 The Software Heritage developers +# Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import shutil import tempfile import unittest from swh.core.tests.server_testing import ServerTestFixtureAsync from swh.objstorage import get_objstorage -from swh.objstorage.api.server import app +from swh.objstorage.api.server import make_app from swh.objstorage.tests.objstorage_testing import ObjStorageTestFixture class TestRemoteObjStorage(ServerTestFixtureAsync, ObjStorageTestFixture, unittest.TestCase): """ Test the remote archive API. """ def setUp(self): self.tmpdir = tempfile.mkdtemp() self.config = { - 'cls': 'pathslicing', - 'args': { - 'root': self.tmpdir, - 'slicing': '0:1/0:5', - 'allow_delete': True, + 'objstorage': { + 'cls': 'pathslicing', + 'args': { + 'root': self.tmpdir, + 'slicing': '0:1/0:5', + 'allow_delete': True, + } }, 'client_max_size': 8 * 1024 * 1024, } - self.app = app - self.app['config'] = self.config + self.app = make_app(self.config) super().setUp() self.storage = get_objstorage('remote', { 'url': self.url() }) def tearDown(self): super().tearDown() shutil.rmtree(self.tmpdir) diff --git a/swh/objstorage/tests/test_server.py b/swh/objstorage/tests/test_server.py new file mode 100644 index 0000000..02eedea --- /dev/null +++ b/swh/objstorage/tests/test_server.py @@ -0,0 +1,134 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import copy +import pytest +import yaml + +from swh.objstorage.api.server import load_and_check_config + + +def prepare_config_file(tmpdir, content, name='config.yml'): + """Prepare configuration file in `$tmpdir/name` with content `content`. + + Args: + tmpdir (LocalPath): root directory + content (str/dict): Content of the file either as string or as a dict. + If a dict, converts the dict into a yaml string. + name (str): configuration filename + + Returns + path (str) of the configuration file prepared. + + """ + config_path = tmpdir / name + if isinstance(content, dict): # convert if needed + content = yaml.dump(content) + config_path.write_text(content, encoding='utf-8') + # pytest on python3.5 does not support LocalPath manipulation, so + # convert path to string + return str(config_path) + + +def test_load_and_check_config_no_configuration(): + """Inexistant configuration files raises""" + with pytest.raises(EnvironmentError) as e: + load_and_check_config(None) + + assert e.value.args[0] == 'Configuration file must be defined' + + config_path = '/indexer/inexistant/config.yml' + with pytest.raises(FileNotFoundError) as e: + load_and_check_config(config_path) + + assert e.value.args[0] == 'Configuration file %s does not exist' % ( + config_path, ) + + +def test_load_and_check_config_invalid_configuration_toplevel(tmpdir): + """Invalid configuration raises""" + config = { + 'something': 'useless' + } + config_path = prepare_config_file(tmpdir, content=config) + with pytest.raises(KeyError) as e: + load_and_check_config(config_path) + + assert ( + e.value.args[0] == + 'Invalid configuration; missing objstorage config entry' + ) + + +def test_load_and_check_config_invalid_configuration(tmpdir): + """Invalid configuration raises""" + for data, missing_keys in [ + ({'objstorage': {'something': 'useless'}}, ['cls', 'args']), + ({'objstorage': {'cls': 'something'}}, ['args']), + ]: + config_path = prepare_config_file(tmpdir, content=data) + with pytest.raises(KeyError) as e: + load_and_check_config(config_path) + + assert ( + e.value.args[0] == + 'Invalid configuration; missing %s config entry' % ( + ', '.join(missing_keys), ) + ) + + +def test_load_and_check_config_invalid_configuration_level2(tmpdir): + """Invalid configuration at 2nd level raises""" + config = { + 'objstorage': { + 'cls': 'pathslicing', + 'args': { + 'root': 'root', + 'slicing': 'slicing', + }, + 'client_max_size': '10', + } + } + for key in ('root', 'slicing'): + c = copy.deepcopy(config) + c['objstorage']['args'].pop(key) + config_path = prepare_config_file(tmpdir, c) + with pytest.raises(KeyError) as e: + load_and_check_config(config_path) + + assert ( + e.value.args[0] == + "Invalid configuration; missing args.%s config entry" % key + ) + + +def test_load_and_check_config_fine(tmpdir): + """pathslicing configuration fine loads ok""" + config = { + 'objstorage': { + 'cls': 'pathslicing', + 'args': { + 'root': 'root', + 'slicing': 'slicing', + } + } + } + + config_path = prepare_config_file(tmpdir, config) + cfg = load_and_check_config(config_path) + assert cfg == config + + +def test_load_and_check_config_fine2(tmpdir): + config = { + 'client_max_size': '10', + 'objstorage': { + 'cls': 'remote', + 'args': {} + } + } + config_path = prepare_config_file(tmpdir, config) + cfg = load_and_check_config(config_path) + assert cfg == config diff --git a/version.txt b/version.txt index 603cbe1..e780f5e 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.29-0-g011d0ed \ No newline at end of file +v0.0.30-0-g0ecf2ed \ No newline at end of file