diff --git a/swh/storage/api/client.py b/swh/storage/api/client.py index 2d283d21..d2557ec5 100644 --- a/swh/storage/api/client.py +++ b/swh/storage/api/client.py @@ -1,103 +1,101 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import json - import requests -from swh.core.hashutil import hash_to_hex -from swh.core.json import SWHJSONDecoder, SWHJSONEncoder +from swh.core.serializers import msgpack_dumps, msgpack_loads, SWHJSONDecoder + + +def encode_data(data): + return msgpack_dumps(data) + + +def decode_response(response): + content_type = response.headers['content-type'] + + if content_type.startswith('application/x-msgpack'): + r = msgpack_loads(response.content) + elif content_type.startswith('application/json'): + r = response.json(cls=SWHJSONDecoder) + else: + raise ValueError('Wrong content type `%s` for API response' + % content_type) + + return r class RemoteStorage(): """Proxy to a remote storage API""" def __init__(self, base_url): self.base_url = base_url def url(self, endpoint): return '%s%s' % (self.base_url, endpoint) def post(self, endpoint, data): - raw_data = json.dumps(data, cls=SWHJSONEncoder) - response = requests.post( - self.url(endpoint), - data=raw_data, - headers={'content-type': 'application/json; charset=utf8'}, - ) - - return response.json(cls=SWHJSONDecoder) - - def post_files(self, endpoint, data, files): - raw_data = json.dumps(data, cls=SWHJSONEncoder) - files['metadata'] = raw_data response = requests.post( self.url(endpoint), - files=files, + data=encode_data(data), + headers={'content-type': 'application/x-msgpack'}, ) - return response.json(cls=SWHJSONDecoder) + return decode_response(response) def get(self, endpoint, data): response = requests.get( self.url(endpoint), params=data, ) if response.status_code == 404: return None else: - return response.json(cls=SWHJSONDecoder) + return decode_response(response) def content_add(self, content): - files = {} - for file in content: - if file.get('status', 'visible') != 'visible': - continue - file_id = hash_to_hex(file['sha1']) - files[file_id] = file.pop('data') - return self.post_files('content/add', {'content': content}, files) + return self.post('content/add', {'content': content}) def content_missing(self, content, key_hash='sha1'): return self.post('content/missing', {'content': content, 'key_hash': key_hash}) def content_find(self, content): return self.post('content/present', {'content': content}) def directory_add(self, directories): return self.post('directory/add', {'directories': directories}) def directory_missing(self, directories): return self.post('directory/missing', {'directories': directories}) def directory_get(self, directory): return [tuple(entry) for entry in self.get('directory', {'directory': directory})] def revision_add(self, revisions): return self.post('revision/add', {'revisions': revisions}) def revision_missing(self, revisions): return self.post('revision/missing', {'revisions': revisions}) def release_add(self, releases): return self.post('release/add', {'releases': releases}) def release_missing(self, releases): return self.post('release/missing', {'releases': releases}) def occurrence_add(self, occurrences): return self.post('occurrence/add', {'occurrences': occurrences}) def origin_get(self, origin): origin = self.get('origin', origin) if not origin: return None else: return origin['id'] def origin_add_one(self, origin): return self.post('origin', {'origin': origin}) diff --git a/swh/storage/api/server.py b/swh/storage/api/server.py index 3ba4c9a8..880b6de2 100644 --- a/swh/storage/api/server.py +++ b/swh/storage/api/server.py @@ -1,153 +1,157 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import logging from flask import Flask, Request, Response, abort, g, request from swh.core import config -from swh.core.json import SWHJSONDecoder, SWHJSONEncoder -from swh.core.hashutil import hash_to_hex +from swh.core.serializers import msgpack_dumps, msgpack_loads, SWHJSONDecoder from swh.storage import Storage DEFAULT_CONFIG = { 'db': ('str', 'dbname=softwareheritage-dev'), 'storage_base': ('str', '/tmp/swh-storage/test'), } class BytesRequest(Request): """Request with proper escaping of arbitrary byte sequences.""" encoding = 'utf-8' encoding_errors = 'surrogateescape' app = Flask(__name__) -app.json_encoder = SWHJSONEncoder -app.json_decoder = SWHJSONDecoder app.request_class = BytesRequest -def jsonify(data): +def encode_data(data): return Response( - json.dumps(data, cls=SWHJSONEncoder), - mimetype='application/json', + msgpack_dumps(data), + mimetype='application/x-msgpack', ) +def decode_request(request): + content_type = request.mimetype + data = request.get_data() + + if content_type == 'application/x-msgpack': + r = msgpack_loads(data) + elif content_type == 'application/json': + r = json.loads(data, cls=SWHJSONDecoder) + else: + raise ValueError('Wrong content type `%s` for API request' + % content_type) + + return r + + @app.before_request def before_request(): g.storage = Storage(app.config['db'], app.config['storage_base']) @app.route('/') def index(): return 'Hello' @app.route('/content/missing', methods=['POST']) def content_missing(): - return jsonify(g.storage.content_missing(**request.json)) + return encode_data(g.storage.content_missing(**decode_request(request))) @app.route('/content/present', methods=['POST']) def content_find(): - return jsonify(g.storage.content_find(**request.json)) + return encode_data(g.storage.content_find(**decode_request(request))) @app.route('/content/add', methods=['POST']) def content_add(): - json_data = request.files['metadata'].read().decode('utf-8') - metadata = json.loads(json_data, cls=SWHJSONDecoder)['content'] - for file_data in metadata: - if file_data.get('status', 'visible') == 'visible': - file_id = hash_to_hex(file_data['sha1']) - file = request.files[file_id] - file_data['data'] = file.read() - file.close() - return jsonify(g.storage.content_add(content=metadata)) + return encode_data(g.storage.content_add(**decode_request(request))) @app.route('/directory/missing', methods=['POST']) def directory_missing(): - return jsonify(g.storage.directory_missing(**request.json)) + return encode_data(g.storage.directory_missing(**decode_request(request))) @app.route('/directory/add', methods=['POST']) def directory_add(): - return jsonify(g.storage.directory_add(**request.json)) + return encode_data(g.storage.directory_add(**decode_request(request))) @app.route('/directory', methods=['GET']) def directory_get(): dir = request.args['directory'].encode('utf-8', 'surrogateescape') - return jsonify(g.storage.directory_get(dir)) + return encode_data(g.storage.directory_get(dir)) @app.route('/revision/add', methods=['POST']) def revision_add(): - return jsonify(g.storage.revision_add(**request.json)) + return encode_data(g.storage.revision_add(**decode_request(request))) @app.route('/revision/missing', methods=['POST']) def revision_missing(): - return jsonify(g.storage.revision_missing(**request.json)) + return encode_data(g.storage.revision_missing(**decode_request(request))) @app.route('/release/add', methods=['POST']) def release_add(): - return jsonify(g.storage.release_add(**request.json)) + return encode_data(g.storage.release_add(**decode_request(request))) @app.route('/release/missing', methods=['POST']) def release_missing(): - return jsonify(g.storage.release_missing(**request.json)) + return encode_data(g.storage.release_missing(**decode_request(request))) @app.route('/occurrence/add', methods=['POST']) def occurrence_add(): - return jsonify(g.storage.occurrence_add(**request.json)) + return encode_data(g.storage.occurrence_add(**decode_request(request))) @app.route('/origin', methods=['GET']) def origin_get(): origin = { 'type': request.args['type'], 'url': request.args['url'], } id = g.storage.origin_get(origin) if not id: abort(404) else: origin['id'] = id - return jsonify(origin) + return encode_data(origin) @app.route('/origin', methods=['POST']) def origin_add_one(): - return jsonify(g.storage.origin_add_one(**request.json)) + return encode_data(g.storage.origin_add_one(**decode_request(request))) def run_from_webserver(environ, start_response): """Run the WSGI app from the webserver, loading the configuration.""" config_path = '/etc/softwareheritage/storage/storage.ini' app.config.update(config.read(config_path, DEFAULT_CONFIG)) handler = logging.StreamHandler() app.logger.addHandler(handler) return app(environ, start_response) if __name__ == '__main__': import sys app.config.update(config.read(sys.argv[1], DEFAULT_CONFIG)) app.run(debug=True)