diff --git a/swh/storage/api.py b/swh/storage/api.py index ecc934b9..945146c5 100644 --- a/swh/storage/api.py +++ b/swh/storage/api.py @@ -1,134 +1,141 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json from flask import Flask, Request, Response, abort, g, request from swh.core import config from swh.core.json import SWHJSONDecoder, SWHJSONEncoder +from swh.core.hashutil import hash_to_hex from swh.storage import Storage DEFAULT_CONFIG = { 'db': ('str', 'dbname=softwareheritage-dev'), 'storage_base': ('str', '/tmp/swh-storage/test'), } class BytesRequest(Request): """Request with proper escaping of arbitrary byte sequences.""" encoding = 'utf-8' encoding_errors = 'surrogateescape' app = Flask(__name__) app.json_encoder = SWHJSONEncoder app.json_decoder = SWHJSONDecoder app.request_class = BytesRequest def jsonify(data): return Response( json.dumps(data, cls=SWHJSONEncoder), mimetype='application/json', ) @app.before_request def before_request(): g.storage = Storage(app.config['db'], app.config['storage_base']) @app.route('/') def index(): return 'Hello' @app.route('/content/missing', methods=['POST']) def content_missing(): return jsonify(g.storage.content_missing(**request.json)) @app.route('/content/add', methods=['POST']) def content_add(): - return jsonify(g.storage.content_add(**request.json)) + json_data = request.files['metadata'].read().decode('utf-8') + metadata = json.loads(json_data, cls=SWHJSONDecoder)['content'] + for file_data in metadata: + file_id = hash_to_hex(file_data['sha1']) + file = request.files[file_id] + file_data['data'] = file.read() + return jsonify(g.storage.content_add(content=metadata)) @app.route('/directory/missing', methods=['POST']) def directory_missing(): return jsonify(g.storage.directory_missing(**request.json)) @app.route('/directory/add', methods=['POST']) def directory_add(): return jsonify(g.storage.directory_add(**request.json)) @app.route('/directory', methods=['GET']) def directory_get(): dir = request.args['directory'].encode('utf-8', 'surrogateescape') return jsonify(g.storage.directory_get(dir)) @app.route('/revision/add', methods=['POST']) def revision_add(): return jsonify(g.storage.revision_add(**request.json)) @app.route('/revision/missing', methods=['POST']) def revision_missing(): return jsonify(g.storage.revision_missing(**request.json)) @app.route('/release/add', methods=['POST']) def release_add(): return jsonify(g.storage.release_add(**request.json)) @app.route('/release/missing', methods=['POST']) def release_missing(): return jsonify(g.storage.release_missing(**request.json)) @app.route('/occurrence/add', methods=['POST']) def occurrence_add(): return jsonify(g.storage.occurrence_add(**request.json)) @app.route('/origin', methods=['GET']) def origin_get(): origin = { 'type': request.args['type'], 'url': request.args['url'], } id = g.storage.origin_get(origin) if not id: abort(404) else: origin['id'] = id return jsonify(origin) @app.route('/origin', methods=['POST']) def origin_add_one(): return jsonify(g.storage.origin_add_one(**request.json)) def run_from_webserver(environ, start_response): """Run the WSGI app from the webserver, loading the configuration.""" config_path = '/etc/softwareheritage/storage/storage.ini' app.config.update(config.read(config_path, DEFAULT_CONFIG)) return app(environ, start_response) if __name__ == '__main__': import sys app.config.update(config.read(sys.argv[1], DEFAULT_CONFIG)) app.run(debug=True) diff --git a/swh/storage/remote_storage.py b/swh/storage/remote_storage.py index f4689df0..a2270593 100644 --- a/swh/storage/remote_storage.py +++ b/swh/storage/remote_storage.py @@ -1,83 +1,98 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import requests +from swh.core.hashutil import hash_to_hex from swh.core.json import SWHJSONDecoder, SWHJSONEncoder class RemoteStorage(): """Proxy to a remote storage API""" def __init__(self, base_url): self.base_url = base_url def url(self, endpoint): return '%s%s' % (self.base_url, endpoint) def post(self, endpoint, data): raw_data = json.dumps(data, cls=SWHJSONEncoder) response = requests.post( self.url(endpoint), data=raw_data, headers={'content-type': 'application/json; charset=utf8'}, ) return response.json(cls=SWHJSONDecoder) + def post_files(self, endpoint, data, files): + raw_data = json.dumps(data, cls=SWHJSONEncoder) + files['data'] = raw_data + response = requests.post( + self.url(endpoint), + files=files, + ) + + return response.json(cls=SWHJSONDecoder) + def get(self, endpoint, data): response = requests.get( self.url(endpoint), params=data, ) if response.status_code == 404: return None else: return response.json(cls=SWHJSONDecoder) def content_add(self, content): - return self.post('content/add', {'content': content}) + files = {} + for file in content: + file_id = hash_to_hex(file['sha1']) + files[file_id] = file.pop('data') + return self.post_files('content/add', {'content': content}, files) def content_missing(self, content, key_hash='sha1'): return self.post('content/missing', {'content': content, 'key_hash': key_hash}) def directory_add(self, directories): return self.post('directory/add', {'directories': directories}) def directory_missing(self, directories): return self.post('directory/missing', {'directories': directories}) def directory_get(self, directory): return [tuple(entry) for entry in self.get('directory', {'directory': directory})] def revision_add(self, revisions): return self.post('revision/add', {'revisions': revisions}) def revision_missing(self, revisions): return self.post('revision/missing', {'revisions': revisions}) def release_add(self, releases): return self.post('release/add', {'releases': releases}) def release_missing(self, releases): return self.post('release/missing', {'releases': releases}) def occurrence_add(self, occurrences): return self.post('occurrence/add', {'occurrences': occurrences}) def origin_get(self, origin): origin = self.get('origin', origin) if not origin: return None else: return origin['id'] def origin_add_one(self, origin): return self.post('origin', {'origin': origin})