diff --git a/api/client.py b/api/client.py index 8f03086..2daabee 100644 --- a/api/client.py +++ b/api/client.py @@ -1,92 +1,103 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pickle import requests from requests.exceptions import ConnectionError from ...exc import StorageAPIError from ...api.common import (decode_response, encode_data_client as encode_data) class RemoteObjStorage(): """ Proxy to a remote object storage. This class allows to connect to an object storage server via http protocol. Attributes: base_url (string): The url of the server to connect. Must end with a '/' session: The session to send requests. """ def __init__(self, base_url): self.base_url = base_url self.session = requests.Session() def url(self, endpoint): return '%s%s' % (self.base_url, endpoint) def post(self, endpoint, data): try: response = self.session.post( self.url(endpoint), data=encode_data(data), headers={'content-type': 'application/x-msgpack'}, ) except ConnectionError as e: print(str(e)) raise StorageAPIError(e) # XXX: this breaks language-independence and should be # replaced by proper unserialization if response.status_code == 400: raise pickle.loads(decode_response(response)) return decode_response(response) def content_add(self, bytes, obj_id=None): """ Add a new object to the object storage. Args: bytes: content of the object to be added to the storage. obj_id: checksums of `bytes` as computed by ID_HASH_ALGO. When given, obj_id will be trusted to match bytes. If missing, obj_id will be computed on the fly. """ return self.post('content/add', {'bytes': bytes, 'obj_id': obj_id}) def content_get(self, obj_id): """ Retrieve the content of a given object. Args: obj_id: The id of the object. Returns: The content of the requested objects as bytes. Raises: ObjNotFoundError: if the requested object is missing """ return self.post('content/get', {'obj_id': obj_id}) + def content_get_random(self, batch_size): + """ Retrieve a random sample of existing content. + + Args: + batch_size: Number of content requested. + + Returns: + A list of random ids that represents existing contents. + """ + return self.post('content/get/random', {'batch_size': batch_size}) + def content_check(self, obj_id): """ Integrity check for a given object verify that the file object is in place, and that the gzipped content matches the object id Args: obj_id: The id of the object. Raises: ObjNotFoundError: if the requested object is missing Error: if the requested object is corrupt """ self.post('content/check', {'obj_id': obj_id}) diff --git a/api/server.py b/api/server.py index cfb3d25..16a672b 100644 --- a/api/server.py +++ b/api/server.py @@ -1,89 +1,96 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import click import logging from flask import Flask, g, request from swh.core import config from swh.storage.objstorage import ObjStorage from swh.storage.api.common import (BytesRequest, decode_request, error_handler, encode_data_server as encode_data) DEFAULT_CONFIG = { 'storage_base': ('str', '/tmp/swh-storage/objects/'), 'storage_depth': ('int', 3) } app = Flask(__name__) app.request_class = BytesRequest @app.errorhandler(Exception) def my_error_handler(exception): return error_handler(exception, encode_data) @app.before_request def before_request(): g.objstorage = ObjStorage(app.config['storage_base'], app.config['storage_depth']) @app.route('/') def index(): return "SWH Objstorage API server" @app.route('/content') def content(): return str(list(g.storage)) @app.route('/content/add', methods=['POST']) def add_bytes(): return encode_data(g.objstorage.add_bytes(**decode_request(request))) @app.route('/content/get', methods=['POST']) def get_bytes(): return encode_data(g.objstorage.get_bytes(**decode_request(request))) +@app.route('/content/get/random', methods=['POST']) +def get_random_contents(): + return encode_data( + g.objstorage.get_random_contents(**decode_request(request)) + ) + + @app.route('/content/check', methods=['POST']) def check(): return encode_data(g.objstorage.check(**decode_request(request))) def run_from_webserver(environ, start_response): """Run the WSGI app from the webserver, loading the configuration. """ config_path = '/etc/softwareheritage/storage/objstorage.ini' app.config.update(config.read(config_path, DEFAULT_CONFIG)) handler = logging.StreamHandler() app.logger.addHandler(handler) return app(environ, start_response) @click.command() @click.argument('config-path', required=1) @click.option('--host', default='0.0.0.0', help="Host to run the server") @click.option('--port', default=5000, type=click.INT, help="Binding port of the server") @click.option('--debug/--nodebug', default=True, help="Indicates if the server should run in debug mode") def launch(config_path, host, port, debug): app.config.update(config.read(config_path, DEFAULT_CONFIG)) app.run(host, port=int(port), debug=bool(debug)) if __name__ == '__main__': launch()