diff --git a/swh/backend/api.py b/swh/backend/api.py index f48355c..28baa86 100755 --- a/swh/backend/api.py +++ b/swh/backend/api.py @@ -1,298 +1,292 @@ #!/usr/bin/env python3 # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging from flask import Flask, Response, make_response, request from swh.storage import store, db, service from swh.protocols import serial # api's definition app = Flask(__name__) def read_request_payload(request): """Read the request's payload. """ # TODO: Check the signed pickled data? return serial.load(request.stream) def write_response(data): """Write response from data. """ return Response(serial.dumps(data), mimetype=serial.MIMETYPE) @app.route('/') def hello(): """A simple api to define what the server is all about. FIXME: A redirect towards a static page defining the routes would be nice. """ return 'Dev SWH API' -# dispatch on build object function for the right type -_build_object_fn = service.build_object_fn - # from uri to type _uri_types = {'revisions': store.Type.revision, 'directories': store.Type.directory, 'contents': store.Type.content, 'releases': store.Type.release, 'occurrences': store.Type.occurrence} -def _do_action(action_fn, uri_type, sha1hex, map_result_fn): - uri_type_ok = _uri_types.get(uri_type, None) - if not uri_type_ok: - return make_response('Bad request!', 400) - - vcs_object = _build_object_fn[uri_type_ok](sha1hex, None) - return action_fn(app.config['conf'], vcs_object, map_result_fn) - - -def _do_action_with_payload(action_fn, uri_type, sha1hex, map_result_fn): +def _do_action_with_payload(conf, action_fn, uri_type, id, map_result_fn): uri_type_ok = _uri_types.get(uri_type, None) if uri_type_ok is None: return make_response('Bad request!', 400) - payload = read_request_payload(request) - vcs_object = _build_object_fn[uri_type_ok](sha1hex, payload) - return action_fn(app.config['conf'], vcs_object, map_result_fn) + vcs_object = read_request_payload(request) + vcs_object.update({'id': id, + 'type': uri_type_ok}) + return action_fn(conf, vcs_object, map_result_fn) # occurrence type is not dealt the same way _post_all_uri_types = {'revisions': store.Type.revision, 'directories': store.Type.directory, 'contents': store.Type.content} @app.route('/vcs//', methods=['POST']) def filter_unknowns_type(uri_type): """Filters unknown sha1 to the backend and returns them. """ if request.headers.get('Content-Type') != serial.MIMETYPE: return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) obj_type = _post_all_uri_types.get(uri_type) if obj_type is None: return make_response('Bad request. Type not supported!', 400) sha1s = read_request_payload(request) config = app.config['conf'] with db.connect(config['db_url']) as db_conn: unknowns_sha1s = service.filter_unknowns_type(db_conn, obj_type, sha1s) if unknowns_sha1s is None: return make_response('Bad request!', 400) else: return write_response(unknowns_sha1s) @app.route('/vcs/persons/', methods=['POST']) def post_person(): """Post a person. """ if request.headers.get('Content-Type') != serial.MIMETYPE: return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) origin = read_request_payload(request) config = app.config['conf'] with db.connect(config['db_url']) as db_conn: try: person_found = service.find_person(db_conn, origin) if person_found: return write_response(person_found) else: return make_response('Person not found!', 404) except: return make_response('Bad request!', 400) @app.route('/origins/', methods=['POST']) def post_origin(): """Post an origin. """ if request.headers.get('Content-Type') != serial.MIMETYPE: return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) origin = read_request_payload(request) config = app.config['conf'] with db.connect(config['db_url']) as db_conn: try: origin_found = service.find_origin(db_conn, origin) if origin_found: return write_response(origin_found) else: return make_response('Origin not found!', 404) except: return make_response('Bad request!', 400) @app.route('/origins/', methods=['PUT']) def put_origin(): """Create an origin or returns it if already existing. """ if request.headers.get('Content-Type') != serial.MIMETYPE: return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) origin = read_request_payload(request) config = app.config['conf'] with db.connect(config['db_url']) as db_conn: try: origin_found = service.add_origin(db_conn, origin) return write_response(origin_found) # FIXME 204 except: return make_response('Bad request!', 400) @app.route('/vcs/persons/', methods=['PUT']) def put_all_persons(): """Store or update given revisions. FIXME: Refactor same behavior with `put_all`. """ if request.headers.get('Content-Type') != serial.MIMETYPE: return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) payload = read_request_payload(request) obj_type = store.Type.person config = app.config['conf'] with db.connect(config['db_url']) as db_conn: service.add_persons(db_conn, config, obj_type, payload) return make_response('Successful creation!', 204) @app.route('/vcs/revisions/', methods=['PUT']) def put_all_revisions(): """Store or update given revisions. FIXME: Refactor same behavior with `put_all`. """ if request.headers.get('Content-Type') != serial.MIMETYPE: return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) payload = read_request_payload(request) obj_type = store.Type.revision config = app.config['conf'] with db.connect(config['db_url']) as db_conn: service.add_revisions(db_conn, config, obj_type, payload) return make_response('Successful creation!', 204) @app.route('/vcs//', methods=['PUT']) def put_all(uri_type): """Store or update given objects (uri_type in {contents, directories, releases). """ if request.headers.get('Content-Type') != serial.MIMETYPE: return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) payload = read_request_payload(request) obj_type = _uri_types[uri_type] config = app.config['conf'] with db.connect(config['db_url']) as db_conn: service.add_objects(db_conn, config, obj_type, payload) return make_response('Successful creation!', 204) -def lookup(config, vcs_object, map_result_fn): - """Looking up type object with sha1. - - config is the configuration needed for the backend to execute query - - vcs_object is the object to look for in the backend - - map_result_fn is a mapping function which takes the backend's result - and transform its output accordingly. - - This function returns an http response of the result. - """ - sha1hex = vcs_object['id'] - logging.debug('read %s %s' % (vcs_object['type'], sha1hex)) - - with db.connect(config['db_url']) as db_conn: - res = store.find(db_conn, vcs_object) - if res: - return write_response(map_result_fn(sha1hex, res)) # 200 - return make_response('Not found!', 404) - - def add_object(config, vcs_object, map_result_fn): """Add object in storage. - config is the configuration needed for the backend to execute query - vcs_object is the object to look for in the backend - map_result_fn is a mapping function which takes the backend's result and transform its output accordingly. This function returns an http response of the result. """ type = vcs_object['type'] - sha1hex = vcs_object['id'] # FIXME: remove useless key and send direct list - logging.debug('store %s %s' % (type, sha1hex)) + id = vcs_object['id'] # FIXME: remove useless key and send direct list + logging.debug('store %s %s' % (type, id)) with db.connect(config['db_url']) as db_conn: if store.find(db_conn, vcs_object): - logging.debug('update %s %s' % (sha1hex, type)) + logging.debug('update %s %s' % (id, type)) return make_response('Successful update!', 200) # immutable else: - logging.debug('store %s %s' % (sha1hex, type)) + logging.debug('store %s %s' % (id, type)) res = store.add(db_conn, config, vcs_object) if res is None: return make_response('Bad request!', 400) elif res is False: - logging.error('store %s %s' % (sha1hex, type)) + logging.error('store %s %s' % (id, type)) return make_response('Internal server error!', 500) else: - return make_response(map_result_fn(sha1hex, res), 204) + return make_response(map_result_fn(id, res), 204) + + +def _do_lookup(conf, uri_type, id, map_result_fn): + """Looking up type object with sha1. + - config is the configuration needed for the backend to execute query + - vcs_object is the object to look for in the backend + - map_result_fn is a mapping function which takes the backend's result + and transform its output accordingly. + + This function returns an http response of the result. + """ + uri_type_ok = _uri_types.get(uri_type, None) + if not uri_type_ok: + return make_response('Bad request!', 400) + + vcs_object = {'id': id, + 'type': uri_type_ok} + + with db.connect(conf['db_url']) as db_conn: + res = store.find(db_conn, vcs_object) + if res: + return write_response(map_result_fn(id, res)) # 200 + return make_response('Not found!', 404) -@app.route('/vcs/occurrences/') -def list_occurrences_for(sha1hex): - """Return the occurrences pointing to the revision sha1hex. +@app.route('/vcs/occurrences/') +def list_occurrences_for(id): + """Return the occurrences pointing to the revision id. """ - return _do_action(lookup, + return _do_lookup(app.config['conf'], 'occurrences', - sha1hex, + id, lambda _, result: list(map(lambda col: col[1], result))) -@app.route('/vcs//') -def object_exists_p(uri_type, sha1hex): - """Assert if the object with sha1 sha1hex, of type uri_type, exists. +@app.route('/vcs//') +def object_exists_p(uri_type, id): + """Assert if the object with sha1 id, of type uri_type, exists. """ - return _do_action(lookup, + return _do_lookup(app.config['conf'], uri_type, - sha1hex, - lambda sha1hex, _: {'id': sha1hex}) + id, + lambda sha1, _: {'id': sha1}) -@app.route('/vcs//', methods=['PUT']) -def put_object(uri_type, sha1hex): +@app.route('/vcs//', methods=['PUT']) +def put_object(uri_type, id): """Put an object in storage. """ - return _do_action_with_payload(add_object, + return _do_action_with_payload(app.config['conf'], + add_object, uri_type, - sha1hex, - lambda _1, _2: 'Successful Creation!') # FIXME use sha1hex or result instead + id, + lambda _1, _2: 'Successful Creation!') # FIXME use id or result instead def run(conf): """Run the api's server. conf is a dictionary of keywords: - 'db_url' the db url's access (through psycopg2 format) - 'content_storage_dir' revisions/directories/contents storage on disk - 'port' to override the default of 5000 (from the underlying layer: flask) - 'debug' activate the verbose logs """ app.config['conf'] = conf # app.config is the app's state (accessible) app.run(port=conf.get('port', None), debug=conf['debug'] == 'true') diff --git a/swh/gitloader/git.py b/swh/gitloader/git.py index 180fce2..7999735 100644 --- a/swh/gitloader/git.py +++ b/swh/gitloader/git.py @@ -1,179 +1,190 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging import pygit2 import time from datetime import datetime from pygit2 import GIT_REF_OID from pygit2 import GIT_OBJ_COMMIT, GIT_OBJ_TREE, GIT_SORT_TOPOLOGICAL from enum import Enum from swh.core import hashutil -#from swh import hash from swh.data import swhrepo +from swh.storage import store class DirectoryTypeEntry(Enum): """Types of git objects. """ file = 'file' directory = 'directory' def date_format(d): """d is expected to be a datetime object. """ return time.strftime("%a, %d %b %Y %H:%M:%S +0000", d.timetuple()) def now(): """Cheat time values.""" return date_format(datetime.utcnow()) def timestamp_to_string(timestamp): """Convert a timestamps to string. """ return date_format(datetime.utcfromtimestamp(timestamp)) HASH_ALGORITHMS=['sha1', 'sha256'] def parse(repo_path): """Given a repository path, parse and return a memory model of such repository.""" def read_signature(signature): return '%s <%s>' % (signature.name, signature.email) def treewalk(repo, tree): """Walk a tree with the same implementation as `os.path`. Returns: tree, trees, blobs """ trees, blobs, dir_entries = [], [], [] for tree_entry in tree: obj = repo.get(tree_entry.oid) if obj is None: logging.warn('skip submodule-commit %s' % tree_entry.hex) continue # submodule! if obj.type == GIT_OBJ_TREE: logging.debug('found tree %s' % tree_entry.hex) nature = DirectoryTypeEntry.directory.value trees.append(tree_entry) else: logging.debug('found content %s' % tree_entry.hex) data = obj.data nature = DirectoryTypeEntry.file.value hashes = hashutil.hashdata(data, HASH_ALGORITHMS) blobs.append({'id': obj.hex, + 'type': store.Type.content, 'content-sha1': hashes['sha1'], 'content-sha256': hashes['sha256'], 'content': data, # FIXME: add pointer to data on disk? 'size': obj.size}) dir_entries.append({'name': tree_entry.name, + 'type': store.Type.directory_entry, 'target-sha1': obj.hex, 'nature': nature, 'perms': tree_entry.filemode, 'atime': now(), # FIXME: use real data 'mtime': now(), # FIXME: use real data 'ctime': now(), # FIXME: use real data 'parent': tree.hex}) yield tree, dir_entries, trees, blobs for tree_entry in trees: for x in treewalk(repo, repo[tree_entry.oid]): yield x def walk_tree(repo, swh_repo, rev): """Walk the rev revision's directories. """ if swh_repo.already_visited(rev.hex): logging.debug('commit %s already visited, skipped' % rev.hex) return swh_repo for dir_root, dir_entries, _, contents_ref in treewalk(repo, rev.tree): for content_ref in contents_ref: swh_repo.add_content(content_ref) swh_repo.add_directory({'id': dir_root.hex, - 'content': dir_root.read_raw(), # FIXME: add pointer to data on disk? - 'entries': dir_entries}) + 'type': store.Type.directory, + 'content': dir_root.read_raw(), # FIXME: add pointer to data on disk? + 'entries': dir_entries}) revision_parent_sha1s = list(map(str, rev.parent_ids)) author = {'name': rev.author.name, - 'email': rev.author.email} + 'email': rev.author.email, + 'type': store.Type.person} committer = {'name': rev.committer.name, - 'email': rev.committer.email} + 'email': rev.committer.email, + 'type': store.Type.person} + swh_repo.add_revision({'id': rev.hex, - 'content': rev.read_raw(), # FIXME: add pointer to data on disk? - 'date': timestamp_to_string(rev.commit_time), - 'directory': rev.tree.hex, - 'message': rev.message, - 'committer': committer, - 'author': author, - 'parent-sha1s': revision_parent_sha1s + 'type':store.Type.revision, + 'content': rev.read_raw(), # FIXME: add pointer to data on disk? + 'date': timestamp_to_string(rev.commit_time), + 'directory': rev.tree.hex, + 'message': rev.message, + 'committer': committer, + 'author': author, + 'parent-sha1s': revision_parent_sha1s }) swh_repo.add_person(read_signature(rev.author), author) swh_repo.add_person(read_signature(rev.committer), committer) return swh_repo def walk_revision_from(repo, swh_repo, head_rev): """Walk the rev history log from head_rev. - repo is the current repository - rev is the latest rev to start from. """ for rev in repo.walk(head_rev.id, GIT_SORT_TOPOLOGICAL): swh_repo = walk_tree(repo, swh_repo, rev) return swh_repo repo = pygit2.Repository(repo_path) # memory model swh_repo = swhrepo.SWHRepo() # add origin origin = {'type': 'git', 'url': 'file://' + repo.path} swh_repo.add_origin(origin) # add references and crawl them for ref_name in repo.listall_references(): logging.info('walk reference %s' % ref_name) ref = repo.lookup_reference(ref_name) head_rev = repo[ref.target] \ if ref.type is GIT_REF_OID \ else ref.peel(GIT_OBJ_COMMIT) # noqa if isinstance(head_rev, pygit2.Tag): head_start = head_rev.get_object() taggerSig = head_rev.tagger author = {'name': taggerSig.name, - 'email': taggerSig.email} + 'email': taggerSig.email, + 'type': store.Type.person} release = {'id': head_rev.hex, + 'type': store.Type.release, 'content': head_rev.read_raw(), # FIXME: add pointer to data on disk? 'revision': head_rev.target.hex, 'name': ref_name, 'date': now(), # FIXME: find the tag's date, 'author': author, 'comment': head_rev.message} swh_repo.add_release(release) swh_repo.add_person(read_signature(taggerSig), author) else: swh_repo.add_occurrence({'id': head_rev.hex, - 'reference': ref_name, - 'url-origin': origin['url']}) + 'revision': head_rev.hex, + 'reference': ref_name, + 'url-origin': origin['url'], + 'type': store.Type.occurrence}) head_start = head_rev # crawl commits and trees walk_revision_from(repo, swh_repo, head_start) return swh_repo diff --git a/swh/storage/mapping.py b/swh/storage/mapping.py index c562443..4f7d601 100755 --- a/swh/storage/mapping.py +++ b/swh/storage/mapping.py @@ -1,111 +1,110 @@ #!/usr/bin/env python3 # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime from swh.storage import store def build_content(sha1hex, obj_partial): """Build a content object from the obj_partial. """ obj_partial = obj_partial if obj_partial else {} return {'id': sha1hex, 'type': store.Type.content, 'content-sha1': obj_partial.get('content-sha1'), 'content-sha256': obj_partial.get('content-sha256'), 'content': obj_partial.get('content'), 'size': obj_partial.get('size')} def build_directory(sha1hex, obj_partial): """Build a directory object from the obj_partial. """ obj_partial = obj_partial if obj_partial else {} # FIXME get hack -> split get-post/put directory = {'id': sha1hex, 'type': store.Type.directory, 'content': obj_partial.get('content')} directory_entries = [] for entry in obj_partial.get('entries', []): directory_entry = build_directory_entry(sha1hex, entry) directory_entries.append(directory_entry) directory.update({'entries': directory_entries}) return directory def date_from_string(str_date): """Convert a string date with format '%a, %d %b %Y %H:%M:%S +0000'. """ return datetime.strptime(str_date, '%a, %d %b %Y %H:%M:%S +0000') def build_directory_entry(parent_sha1hex, entry): """Build a directory object from the entry. """ return {'name': entry['name'], 'target-sha1': entry['target-sha1'], 'nature': entry['nature'], 'perms': entry['perms'], 'atime': date_from_string(entry['atime']), 'mtime': date_from_string(entry['mtime']), 'ctime': date_from_string(entry['ctime']), 'parent': entry['parent']} - def build_revision(sha1hex, obj_partial): """Build a revision object from the obj_partial. """ obj = {'id': sha1hex, 'type': store.Type.revision} if obj_partial: obj.update({'content': obj_partial['content'], 'date': date_from_string(obj_partial['date']), 'directory': obj_partial['directory'], 'message': obj_partial['message'], 'author': obj_partial['author'], 'committer': obj_partial['committer'], 'parent-sha1s': obj_partial['parent-sha1s']}) return obj def build_release(sha1hex, obj_partial): """Build a release object from the obj_partial. """ obj = {'id': sha1hex, 'type': store.Type.release} if obj_partial: obj.update({'id': sha1hex, 'content': obj_partial['content'], 'revision': obj_partial['revision'], 'date': obj_partial['date'], 'name': obj_partial['name'], 'comment': obj_partial['comment'], 'author': obj_partial['author']}) return obj def build_occurrence(sha1hex, obj_partial): """Build a content object from the obj_partial. """ obj = {'id': sha1hex, 'type': store.Type.occurrence} if obj_partial: obj.update({'reference': obj_partial['reference'], 'type': store.Type.occurrence, 'revision': sha1hex, 'url-origin': obj_partial['url-origin']}) return obj def build_origin(sha1hex, obj_partial): """Build an origin. """ obj = {'id': obj_partial['url'], 'origin-type': obj_partial['type']} return obj diff --git a/swh/storage/service.py b/swh/storage/service.py index f829a53..d0e0293 100644 --- a/swh/storage/service.py +++ b/swh/storage/service.py @@ -1,91 +1,81 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from swh.storage import store, mapping +from swh.storage import store filter_unknowns_type = store.find_unknowns def find_origin(db_conn, origin): """Find origin. """ origin_found = store.find_origin(db_conn, origin) if origin_found: return {'id': origin_found[0]} return None def find_person(db_conn, person): """Find person. """ person_found = store.find_person(db_conn, person) if person_found: return {'id': person_found[0]} return None def add_origin(db_conn, origin): """Add origin if not already existing. """ origin_found = store.find_origin(db_conn, origin) if origin_found: return {'id': origin_found[0]} else: origin_id = store.add_origin(db_conn, origin) return {'id': origin_id} -build_object_fn = {store.Type.revision: mapping.build_revision, - store.Type.directory: mapping.build_directory, - store.Type.content: mapping.build_content, - store.Type.release: mapping.build_release, - store.Type.occurrence: mapping.build_occurrence} - - def add_revisions(db_conn, conf, obj_type, objs): """Add Revisions. """ couple_parents = [] for obj in objs: # iterate over objects of type uri_type - objfull = build_object_fn[obj_type](obj['id'], obj) - obj_found = store.find(db_conn, objfull) + obj_found = store.find(db_conn, obj) if not obj_found: - store.add(db_conn, conf, objfull) + store.add(db_conn, conf, obj) # deal with revision history - par_shas = objfull.get('parent-sha1s', None) + par_shas = obj.get('parent-sha1s', None) if par_shas: - couple_parents.extend([(objfull['id'], p) for p in par_shas]) + couple_parents.extend([(obj['id'], p) for p in par_shas]) store.add_revision_history(db_conn, couple_parents) return True def add_persons(db_conn, conf, obj_type, objs): """Add persons. conf, obj_type are not used (implementation detail.) """ for obj in objs: obj_found = store.find_person(db_conn, obj) if not obj_found: store.add_person(db_conn, obj) return True def add_objects(db_conn, conf, obj_type, objs): """Add objects. """ for obj in objs: # iterate over objects of type uri_type - obj_to_store = build_object_fn[obj_type](obj['id'], obj) - - obj_found = store.find(db_conn, obj_to_store) + obj_found = store.find(db_conn, obj) if not obj_found: - store.add(db_conn, conf, obj_to_store) + store.add(db_conn, conf, obj) return True diff --git a/swh/storage/store.py b/swh/storage/store.py index c263e1c..721ac9a 100755 --- a/swh/storage/store.py +++ b/swh/storage/store.py @@ -1,198 +1,199 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from io import StringIO from swh.storage import models, fs Type = models.Type + _find_object = {Type.occurrence: models.find_occurrences_for_revision} def find(db_conn, vcs_object): """Find an object according to its sha1hex and type. """ id = vcs_object['id'] # sha1 for every object except for origin (url) type = vcs_object['type'] find_fn = _find_object.get(type, models.find_object) return find_fn(db_conn, id, type) _find_unknown = {Type.revision: models.find_unknown_revisions, Type.content: models.find_unknown_contents, Type.directory: models.find_unknown_directories} def find_unknowns(db_conn, obj_type, sha1s_hex): """Given a list of sha1s, return the non presents one in storage. """ def row_to_sha1(row): """Convert a row (memoryview) to a string sha1. """ return row[0] vals = '\n'.join(sha1s_hex) cpy_data_buffer = StringIO() cpy_data_buffer.write(vals) cpy_data_buffer.seek(0) # move file cursor back at start of file find_unknown_fn = _find_unknown[obj_type] unknowns = find_unknown_fn(db_conn, cpy_data_buffer) cpy_data_buffer.close() return list(map(row_to_sha1, unknowns)) def _add_content(db_conn, vcs_object, sha1hex): """Add a blob to storage. Designed to be wrapped in a db transaction. Returns: - the sha1 if everything went alright. - None if something went wrong Writing exceptions can also be raised and expected to be handled by the caller. """ models.add_content(db_conn, sha1hex, vcs_object['content-sha1'], vcs_object['content-sha256'], vcs_object['size']) return sha1hex def _add_directory(db_conn, vcs_object, sha1hex): """Add a directory to storage. Designed to be wrapped in a db transaction. """ models.add_directory(db_conn, sha1hex) for directory_entry in vcs_object['entries']: _add_directory_entry(db_conn, directory_entry) return sha1hex def _add_directory_entry(db_conn, vcs_object): """Add a directory to storage. Designed to be wrapped in a db transaction. Returns: - the sha1 if everything went alright. - None if something went wrong Writing exceptions can also be raised and expected to be handled by the caller. """ name = vcs_object['name'] parent = vcs_object['parent'] models.add_directory_entry(db_conn, name, vcs_object['target-sha1'], vcs_object['nature'], vcs_object['perms'], vcs_object['atime'], vcs_object['mtime'], vcs_object['ctime'], parent) return name, parent def _add_revision(db_conn, vcs_object, sha1hex): """Add a revision to storage. Designed to be wrapped in a db transaction. Returns: - the sha1 if everything went alright. - None if something went wrong Writing exceptions can also be raised and expected to be handled by the caller. """ models.add_revision(db_conn, sha1hex, vcs_object['date'], vcs_object['directory'], vcs_object['message'], vcs_object['author'], vcs_object['committer'], vcs_object['parent-sha1s']) return sha1hex def _add_release(db_conn, vcs_object, sha1hex): """Add a release. """ models.add_release(db_conn, sha1hex, vcs_object['revision'], vcs_object['date'], vcs_object['name'], vcs_object['comment'], vcs_object['author']) return sha1hex def _add_occurrence(db_conn, vcs_object, sha1hex): """Add an occurrence. """ models.add_occurrence(db_conn, vcs_object['url-origin'], vcs_object['reference'], vcs_object['revision']) return sha1hex def add_person(db_conn, vcs_object): """Add an author. """ return models.add_person(db_conn, vcs_object['name'], vcs_object['email']) _store_fn = {Type.content: _add_content, Type.directory: _add_directory, Type.revision: _add_revision, Type.release: _add_release, Type.occurrence: _add_occurrence} def add_origin(db_conn, origin): """A a new origin and returns its id. """ return models.add_origin(db_conn, origin['url'], origin['type']) def find_origin(db_conn, origin): """Find an existing origin. """ return models.find_origin(db_conn, origin['url'], origin['type']) def find_person(db_conn, person): """Find an existing person. """ return models.find_person(db_conn, person['email'], person['name']) def add(db_conn, config, vcs_object): """Given a sha1hex, type and content, store a given object in the store. """ type = vcs_object['type'] sha1hex = vcs_object['id'] obj_content = vcs_object.get('content') if obj_content: res = fs.write_object(config['content_storage_dir'], sha1hex, obj_content, config['folder_depth'], config['storage_compression']) if not res: return False return _store_fn[type](db_conn, vcs_object, sha1hex) return _store_fn[type](db_conn, vcs_object, sha1hex) def add_revision_history(db_conn, couple_parents): """Given a list of tuple (sha, parent_sha), store in revision_history. """ if len(couple_parents) > 0: models.add_revision_history(db_conn, couple_parents) diff --git a/swh/tests/test_api_occurrence.py b/swh/tests/test_api_occurrence.py index 960b244..a06e9c2 100644 --- a/swh/tests/test_api_occurrence.py +++ b/swh/tests/test_api_occurrence.py @@ -1,129 +1,130 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr from swh.storage import db, models from swh.protocols import serial from test_utils import now, app_client @attr('slow') class OccurrenceTestCase(unittest.TestCase): def setUp(self): self.app, db_url = app_client() with db.connect(db_url) as db_conn: self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' models.add_directory(db_conn, self.directory_sha1_hex) authorAndCommitter = {'name': 'some-name', 'email': 'some-email'} models.add_person(db_conn, authorAndCommitter['name'], authorAndCommitter['email']) self.revision_sha1_hex = 'revision-sha1-to-test-existence9994f717e' models.add_revision(db_conn, self.revision_sha1_hex, now(), self.directory_sha1_hex, "revision message", authorAndCommitter, authorAndCommitter) self.origin_url = "https://github.com/user/repo" models.add_origin(db_conn, self.origin_url, 'git') self.reference_name = 'master' models.add_occurrence(db_conn, self.origin_url, self.reference_name, self.revision_sha1_hex) self.reference_name2 = 'master2' models.add_occurrence(db_conn, self.origin_url, self.reference_name2, self.revision_sha1_hex) self.revision_sha1_hex_2 = '2-revision-sha1-to-test-existence9994f71' models.add_revision(db_conn, self.revision_sha1_hex_2, now(), self.directory_sha1_hex, "revision message 2", authorAndCommitter, authorAndCommitter) @istest def get_occurrence_ok(self): # when rv = self.app.get('/vcs/occurrences/%s' % self.revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data) == [self.reference_name, self.reference_name2] @istest def get_occurrence_not_found(self): # when rv = self.app.get('/vcs/occurrences/inexistant-sha1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def get_occurrence_not_found_with_bad_format(self): # when rv = self.app.get('/vcs/occurrences/1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def put_occurrence_create_and_update(self): occ_revision_sha1_hex = self.revision_sha1_hex_2 rv = self.app.get('/vcs/occurrences/%s' % occ_revision_sha1_hex) # then assert rv.status_code == 404 assert rv.data == b'Not found!' # we create it body = serial.dumps({'content': b'occurrence content', + 'revision': occ_revision_sha1_hex, # FIXME redundant with the one from uri... 'reference': 'master', 'url-origin': self.origin_url}) - rv = self.app.put('/vcs/occurrences/%s' % occ_revision_sha1_hex, + rv = self.app.put('/vcs/occurrences/%s' % occ_revision_sha1_hex, # ... here data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # now it exists rv = self.app.get('/vcs/occurrences/%s' % occ_revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data) == ['master'] # we update it rv = self.app.put('/vcs/occurrences/%s' % occ_revision_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 200 assert rv.data == b'Successful update!' # still the same rv = self.app.get('/vcs/occurrences/%s' % occ_revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data) == ['master']