diff --git a/swh/backend/api.py b/swh/backend/api.py index 1a10f3f..f48355c 100755 --- a/swh/backend/api.py +++ b/swh/backend/api.py @@ -1,298 +1,298 @@ #!/usr/bin/env python3 # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging from flask import Flask, Response, make_response, request from swh.storage import store, db, service from swh.protocols import serial # api's definition app = Flask(__name__) def read_request_payload(request): """Read the request's payload. """ # TODO: Check the signed pickled data? return serial.load(request.stream) def write_response(data): """Write response from data. """ return Response(serial.dumps(data), mimetype=serial.MIMETYPE) @app.route('/') def hello(): """A simple api to define what the server is all about. FIXME: A redirect towards a static page defining the routes would be nice. """ return 'Dev SWH API' # dispatch on build object function for the right type _build_object_fn = service.build_object_fn # from uri to type _uri_types = {'revisions': store.Type.revision, 'directories': store.Type.directory, 'contents': store.Type.content, 'releases': store.Type.release, 'occurrences': store.Type.occurrence} def _do_action(action_fn, uri_type, sha1hex, map_result_fn): uri_type_ok = _uri_types.get(uri_type, None) if not uri_type_ok: return make_response('Bad request!', 400) vcs_object = _build_object_fn[uri_type_ok](sha1hex, None) return action_fn(app.config['conf'], vcs_object, map_result_fn) def _do_action_with_payload(action_fn, uri_type, sha1hex, map_result_fn): uri_type_ok = _uri_types.get(uri_type, None) if uri_type_ok is None: return make_response('Bad request!', 400) payload = read_request_payload(request) vcs_object = _build_object_fn[uri_type_ok](sha1hex, payload) return action_fn(app.config['conf'], vcs_object, map_result_fn) # occurrence type is not dealt the same way _post_all_uri_types = {'revisions': store.Type.revision, 'directories': store.Type.directory, 'contents': store.Type.content} @app.route('/vcs//', methods=['POST']) def filter_unknowns_type(uri_type): """Filters unknown sha1 to the backend and returns them. """ if request.headers.get('Content-Type') != serial.MIMETYPE: return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) obj_type = _post_all_uri_types.get(uri_type) if obj_type is None: return make_response('Bad request. Type not supported!', 400) sha1s = read_request_payload(request) config = app.config['conf'] with db.connect(config['db_url']) as db_conn: unknowns_sha1s = service.filter_unknowns_type(db_conn, obj_type, sha1s) if unknowns_sha1s is None: return make_response('Bad request!', 400) else: return write_response(unknowns_sha1s) @app.route('/vcs/persons/', methods=['POST']) def post_person(): """Post a person. """ if request.headers.get('Content-Type') != serial.MIMETYPE: return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) origin = read_request_payload(request) config = app.config['conf'] with db.connect(config['db_url']) as db_conn: try: person_found = service.find_person(db_conn, origin) if person_found: return write_response(person_found) else: return make_response('Person not found!', 404) except: return make_response('Bad request!', 400) @app.route('/origins/', methods=['POST']) def post_origin(): """Post an origin. """ if request.headers.get('Content-Type') != serial.MIMETYPE: return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) origin = read_request_payload(request) config = app.config['conf'] with db.connect(config['db_url']) as db_conn: try: origin_found = service.find_origin(db_conn, origin) if origin_found: return write_response(origin_found) else: return make_response('Origin not found!', 404) except: return make_response('Bad request!', 400) @app.route('/origins/', methods=['PUT']) def put_origin(): """Create an origin or returns it if already existing. """ if request.headers.get('Content-Type') != serial.MIMETYPE: return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) origin = read_request_payload(request) config = app.config['conf'] with db.connect(config['db_url']) as db_conn: try: origin_found = service.add_origin(db_conn, origin) return write_response(origin_found) # FIXME 204 except: return make_response('Bad request!', 400) @app.route('/vcs/persons/', methods=['PUT']) def put_all_persons(): """Store or update given revisions. FIXME: Refactor same behavior with `put_all`. """ if request.headers.get('Content-Type') != serial.MIMETYPE: return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) payload = read_request_payload(request) obj_type = store.Type.person config = app.config['conf'] with db.connect(config['db_url']) as db_conn: service.add_persons(db_conn, config, obj_type, payload) return make_response('Successful creation!', 204) @app.route('/vcs/revisions/', methods=['PUT']) def put_all_revisions(): """Store or update given revisions. FIXME: Refactor same behavior with `put_all`. """ if request.headers.get('Content-Type') != serial.MIMETYPE: return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) payload = read_request_payload(request) obj_type = store.Type.revision config = app.config['conf'] with db.connect(config['db_url']) as db_conn: service.add_revisions(db_conn, config, obj_type, payload) return make_response('Successful creation!', 204) @app.route('/vcs//', methods=['PUT']) def put_all(uri_type): """Store or update given objects (uri_type in {contents, directories, releases). """ if request.headers.get('Content-Type') != serial.MIMETYPE: return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) payload = read_request_payload(request) obj_type = _uri_types[uri_type] config = app.config['conf'] with db.connect(config['db_url']) as db_conn: service.add_objects(db_conn, config, obj_type, payload) return make_response('Successful creation!', 204) def lookup(config, vcs_object, map_result_fn): """Looking up type object with sha1. - config is the configuration needed for the backend to execute query - vcs_object is the object to look for in the backend - map_result_fn is a mapping function which takes the backend's result and transform its output accordingly. This function returns an http response of the result. """ - sha1hex = vcs_object['sha1'] + sha1hex = vcs_object['id'] logging.debug('read %s %s' % (vcs_object['type'], sha1hex)) with db.connect(config['db_url']) as db_conn: res = store.find(db_conn, vcs_object) if res: return write_response(map_result_fn(sha1hex, res)) # 200 return make_response('Not found!', 404) def add_object(config, vcs_object, map_result_fn): """Add object in storage. - config is the configuration needed for the backend to execute query - vcs_object is the object to look for in the backend - map_result_fn is a mapping function which takes the backend's result and transform its output accordingly. This function returns an http response of the result. """ type = vcs_object['type'] - sha1hex = vcs_object['sha1'] # FIXME: remove useless key and send direct list + sha1hex = vcs_object['id'] # FIXME: remove useless key and send direct list logging.debug('store %s %s' % (type, sha1hex)) with db.connect(config['db_url']) as db_conn: if store.find(db_conn, vcs_object): logging.debug('update %s %s' % (sha1hex, type)) return make_response('Successful update!', 200) # immutable else: logging.debug('store %s %s' % (sha1hex, type)) res = store.add(db_conn, config, vcs_object) if res is None: return make_response('Bad request!', 400) elif res is False: logging.error('store %s %s' % (sha1hex, type)) return make_response('Internal server error!', 500) else: return make_response(map_result_fn(sha1hex, res), 204) @app.route('/vcs/occurrences/') def list_occurrences_for(sha1hex): """Return the occurrences pointing to the revision sha1hex. """ return _do_action(lookup, 'occurrences', sha1hex, lambda _, result: list(map(lambda col: col[1], result))) @app.route('/vcs//') def object_exists_p(uri_type, sha1hex): """Assert if the object with sha1 sha1hex, of type uri_type, exists. """ return _do_action(lookup, uri_type, sha1hex, lambda sha1hex, _: {'id': sha1hex}) @app.route('/vcs//', methods=['PUT']) def put_object(uri_type, sha1hex): """Put an object in storage. """ return _do_action_with_payload(add_object, uri_type, sha1hex, lambda _1, _2: 'Successful Creation!') # FIXME use sha1hex or result instead def run(conf): """Run the api's server. conf is a dictionary of keywords: - 'db_url' the db url's access (through psycopg2 format) - 'content_storage_dir' revisions/directories/contents storage on disk - 'port' to override the default of 5000 (from the underlying layer: flask) - 'debug' activate the verbose logs """ app.config['conf'] = conf # app.config is the app's state (accessible) app.run(port=conf.get('port', None), debug=conf['debug'] == 'true') diff --git a/swh/data/swhrepo.py b/swh/data/swhrepo.py index b82ebe7..df4963d 100644 --- a/swh/data/swhrepo.py +++ b/swh/data/swhrepo.py @@ -1,70 +1,70 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information class SWHRepo(): """Structure with: - sha1s as list - map indexed by sha1 """ def __init__(self): self.origin = {} self.releases = [] self.occurrences = [] self.contents = {} self.directories = {} self.revisions = {} self.persons = {} self.visited = set() def add_origin(self, origin): self.origin = origin def get_origin(self): return self.origin def add_release(self, release): self.releases.append(release) def get_releases(self): return self.releases def add_occurrence(self, occurrence): self.occurrences.append(occurrence) def get_occurrences(self): return self.occurrences def add_content(self, content_ref): - sha1 = content_ref['sha1'] + sha1 = content_ref['id'] self.contents[sha1] = content_ref self.visited.add(sha1) def get_contents(self): return self.contents def add_directory(self, directory): - sha1 = directory['sha1'] + sha1 = directory['id'] self.directories[sha1] = directory self.visited.add(sha1) def get_directories(self): return self.directories def add_revision(self, revision): - sha1 = revision['sha1'] + sha1 = revision['id'] self.revisions[sha1] = revision self.visited.add(sha1) def add_person(self, id, person): self.persons[id] = person def get_persons(self): return self.persons.values() def already_visited(self, sha1): return sha1 in self.visited def get_revisions(self): return self.revisions diff --git a/swh/gitloader/git.py b/swh/gitloader/git.py index 3975177..180fce2 100644 --- a/swh/gitloader/git.py +++ b/swh/gitloader/git.py @@ -1,179 +1,179 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging import pygit2 import time from datetime import datetime from pygit2 import GIT_REF_OID from pygit2 import GIT_OBJ_COMMIT, GIT_OBJ_TREE, GIT_SORT_TOPOLOGICAL from enum import Enum from swh.core import hashutil #from swh import hash from swh.data import swhrepo class DirectoryTypeEntry(Enum): """Types of git objects. """ file = 'file' directory = 'directory' def date_format(d): """d is expected to be a datetime object. """ return time.strftime("%a, %d %b %Y %H:%M:%S +0000", d.timetuple()) def now(): """Cheat time values.""" return date_format(datetime.utcnow()) def timestamp_to_string(timestamp): """Convert a timestamps to string. """ return date_format(datetime.utcfromtimestamp(timestamp)) HASH_ALGORITHMS=['sha1', 'sha256'] def parse(repo_path): """Given a repository path, parse and return a memory model of such repository.""" def read_signature(signature): return '%s <%s>' % (signature.name, signature.email) def treewalk(repo, tree): """Walk a tree with the same implementation as `os.path`. Returns: tree, trees, blobs """ trees, blobs, dir_entries = [], [], [] for tree_entry in tree: obj = repo.get(tree_entry.oid) if obj is None: logging.warn('skip submodule-commit %s' % tree_entry.hex) continue # submodule! if obj.type == GIT_OBJ_TREE: logging.debug('found tree %s' % tree_entry.hex) nature = DirectoryTypeEntry.directory.value trees.append(tree_entry) else: logging.debug('found content %s' % tree_entry.hex) data = obj.data nature = DirectoryTypeEntry.file.value hashes = hashutil.hashdata(data, HASH_ALGORITHMS) - blobs.append({'sha1': obj.hex, + blobs.append({'id': obj.hex, 'content-sha1': hashes['sha1'], 'content-sha256': hashes['sha256'], 'content': data, # FIXME: add pointer to data on disk? 'size': obj.size}) dir_entries.append({'name': tree_entry.name, 'target-sha1': obj.hex, 'nature': nature, 'perms': tree_entry.filemode, 'atime': now(), # FIXME: use real data 'mtime': now(), # FIXME: use real data 'ctime': now(), # FIXME: use real data 'parent': tree.hex}) yield tree, dir_entries, trees, blobs for tree_entry in trees: for x in treewalk(repo, repo[tree_entry.oid]): yield x def walk_tree(repo, swh_repo, rev): """Walk the rev revision's directories. """ if swh_repo.already_visited(rev.hex): logging.debug('commit %s already visited, skipped' % rev.hex) return swh_repo for dir_root, dir_entries, _, contents_ref in treewalk(repo, rev.tree): for content_ref in contents_ref: swh_repo.add_content(content_ref) - swh_repo.add_directory({'sha1': dir_root.hex, + swh_repo.add_directory({'id': dir_root.hex, 'content': dir_root.read_raw(), # FIXME: add pointer to data on disk? 'entries': dir_entries}) revision_parent_sha1s = list(map(str, rev.parent_ids)) author = {'name': rev.author.name, 'email': rev.author.email} committer = {'name': rev.committer.name, 'email': rev.committer.email} - swh_repo.add_revision({'sha1': rev.hex, + swh_repo.add_revision({'id': rev.hex, 'content': rev.read_raw(), # FIXME: add pointer to data on disk? 'date': timestamp_to_string(rev.commit_time), 'directory': rev.tree.hex, 'message': rev.message, 'committer': committer, 'author': author, 'parent-sha1s': revision_parent_sha1s }) swh_repo.add_person(read_signature(rev.author), author) swh_repo.add_person(read_signature(rev.committer), committer) return swh_repo def walk_revision_from(repo, swh_repo, head_rev): """Walk the rev history log from head_rev. - repo is the current repository - rev is the latest rev to start from. """ for rev in repo.walk(head_rev.id, GIT_SORT_TOPOLOGICAL): swh_repo = walk_tree(repo, swh_repo, rev) return swh_repo repo = pygit2.Repository(repo_path) # memory model swh_repo = swhrepo.SWHRepo() # add origin origin = {'type': 'git', 'url': 'file://' + repo.path} swh_repo.add_origin(origin) # add references and crawl them for ref_name in repo.listall_references(): logging.info('walk reference %s' % ref_name) ref = repo.lookup_reference(ref_name) head_rev = repo[ref.target] \ if ref.type is GIT_REF_OID \ else ref.peel(GIT_OBJ_COMMIT) # noqa if isinstance(head_rev, pygit2.Tag): head_start = head_rev.get_object() taggerSig = head_rev.tagger author = {'name': taggerSig.name, 'email': taggerSig.email} - release = {'sha1': head_rev.hex, + release = {'id': head_rev.hex, 'content': head_rev.read_raw(), # FIXME: add pointer to data on disk? 'revision': head_rev.target.hex, 'name': ref_name, 'date': now(), # FIXME: find the tag's date, 'author': author, 'comment': head_rev.message} swh_repo.add_release(release) swh_repo.add_person(read_signature(taggerSig), author) else: - swh_repo.add_occurrence({'sha1': head_rev.hex, + swh_repo.add_occurrence({'id': head_rev.hex, 'reference': ref_name, 'url-origin': origin['url']}) head_start = head_rev # crawl commits and trees walk_revision_from(repo, swh_repo, head_start) return swh_repo diff --git a/swh/storage/mapping.py b/swh/storage/mapping.py index 2b32545..c562443 100755 --- a/swh/storage/mapping.py +++ b/swh/storage/mapping.py @@ -1,111 +1,111 @@ #!/usr/bin/env python3 # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime from swh.storage import store def build_content(sha1hex, obj_partial): """Build a content object from the obj_partial. """ obj_partial = obj_partial if obj_partial else {} - return {'sha1': sha1hex, + return {'id': sha1hex, 'type': store.Type.content, 'content-sha1': obj_partial.get('content-sha1'), 'content-sha256': obj_partial.get('content-sha256'), 'content': obj_partial.get('content'), 'size': obj_partial.get('size')} def build_directory(sha1hex, obj_partial): """Build a directory object from the obj_partial. """ obj_partial = obj_partial if obj_partial else {} # FIXME get hack -> split get-post/put - directory = {'sha1': sha1hex, + directory = {'id': sha1hex, 'type': store.Type.directory, 'content': obj_partial.get('content')} directory_entries = [] for entry in obj_partial.get('entries', []): directory_entry = build_directory_entry(sha1hex, entry) directory_entries.append(directory_entry) directory.update({'entries': directory_entries}) return directory def date_from_string(str_date): """Convert a string date with format '%a, %d %b %Y %H:%M:%S +0000'. """ return datetime.strptime(str_date, '%a, %d %b %Y %H:%M:%S +0000') def build_directory_entry(parent_sha1hex, entry): """Build a directory object from the entry. """ return {'name': entry['name'], 'target-sha1': entry['target-sha1'], 'nature': entry['nature'], 'perms': entry['perms'], 'atime': date_from_string(entry['atime']), 'mtime': date_from_string(entry['mtime']), 'ctime': date_from_string(entry['ctime']), 'parent': entry['parent']} def build_revision(sha1hex, obj_partial): """Build a revision object from the obj_partial. """ - obj = {'sha1': sha1hex, + obj = {'id': sha1hex, 'type': store.Type.revision} if obj_partial: obj.update({'content': obj_partial['content'], 'date': date_from_string(obj_partial['date']), 'directory': obj_partial['directory'], 'message': obj_partial['message'], 'author': obj_partial['author'], 'committer': obj_partial['committer'], 'parent-sha1s': obj_partial['parent-sha1s']}) return obj def build_release(sha1hex, obj_partial): """Build a release object from the obj_partial. """ - obj = {'sha1': sha1hex, + obj = {'id': sha1hex, 'type': store.Type.release} if obj_partial: - obj.update({'sha1': sha1hex, + obj.update({'id': sha1hex, 'content': obj_partial['content'], 'revision': obj_partial['revision'], 'date': obj_partial['date'], 'name': obj_partial['name'], 'comment': obj_partial['comment'], 'author': obj_partial['author']}) return obj def build_occurrence(sha1hex, obj_partial): """Build a content object from the obj_partial. """ - obj = {'sha1': sha1hex, + obj = {'id': sha1hex, 'type': store.Type.occurrence} if obj_partial: obj.update({'reference': obj_partial['reference'], 'type': store.Type.occurrence, 'revision': sha1hex, 'url-origin': obj_partial['url-origin']}) return obj def build_origin(sha1hex, obj_partial): """Build an origin. """ obj = {'id': obj_partial['url'], 'origin-type': obj_partial['type']} return obj diff --git a/swh/storage/service.py b/swh/storage/service.py index 370bc78..f829a53 100644 --- a/swh/storage/service.py +++ b/swh/storage/service.py @@ -1,91 +1,91 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.storage import store, mapping filter_unknowns_type = store.find_unknowns def find_origin(db_conn, origin): """Find origin. """ origin_found = store.find_origin(db_conn, origin) if origin_found: return {'id': origin_found[0]} return None def find_person(db_conn, person): """Find person. """ person_found = store.find_person(db_conn, person) if person_found: return {'id': person_found[0]} return None def add_origin(db_conn, origin): """Add origin if not already existing. """ origin_found = store.find_origin(db_conn, origin) if origin_found: return {'id': origin_found[0]} else: origin_id = store.add_origin(db_conn, origin) return {'id': origin_id} build_object_fn = {store.Type.revision: mapping.build_revision, store.Type.directory: mapping.build_directory, store.Type.content: mapping.build_content, store.Type.release: mapping.build_release, store.Type.occurrence: mapping.build_occurrence} def add_revisions(db_conn, conf, obj_type, objs): """Add Revisions. """ couple_parents = [] for obj in objs: # iterate over objects of type uri_type - objfull = build_object_fn[obj_type](obj['sha1'], obj) + objfull = build_object_fn[obj_type](obj['id'], obj) obj_found = store.find(db_conn, objfull) if not obj_found: store.add(db_conn, conf, objfull) # deal with revision history par_shas = objfull.get('parent-sha1s', None) if par_shas: - couple_parents.extend([(objfull['sha1'], p) for p in par_shas]) + couple_parents.extend([(objfull['id'], p) for p in par_shas]) store.add_revision_history(db_conn, couple_parents) return True def add_persons(db_conn, conf, obj_type, objs): """Add persons. conf, obj_type are not used (implementation detail.) """ for obj in objs: obj_found = store.find_person(db_conn, obj) if not obj_found: store.add_person(db_conn, obj) return True def add_objects(db_conn, conf, obj_type, objs): """Add objects. """ for obj in objs: # iterate over objects of type uri_type - obj_to_store = build_object_fn[obj_type](obj['sha1'], obj) + obj_to_store = build_object_fn[obj_type](obj['id'], obj) obj_found = store.find(db_conn, obj_to_store) if not obj_found: store.add(db_conn, conf, obj_to_store) return True diff --git a/swh/storage/store.py b/swh/storage/store.py index b4c9206..c263e1c 100755 --- a/swh/storage/store.py +++ b/swh/storage/store.py @@ -1,198 +1,198 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from io import StringIO from swh.storage import models, fs Type = models.Type _find_object = {Type.occurrence: models.find_occurrences_for_revision} def find(db_conn, vcs_object): """Find an object according to its sha1hex and type. """ - id = vcs_object['sha1'] # sha1 for every object except for origin (url) + id = vcs_object['id'] # sha1 for every object except for origin (url) type = vcs_object['type'] find_fn = _find_object.get(type, models.find_object) return find_fn(db_conn, id, type) _find_unknown = {Type.revision: models.find_unknown_revisions, Type.content: models.find_unknown_contents, Type.directory: models.find_unknown_directories} def find_unknowns(db_conn, obj_type, sha1s_hex): """Given a list of sha1s, return the non presents one in storage. """ def row_to_sha1(row): """Convert a row (memoryview) to a string sha1. """ return row[0] vals = '\n'.join(sha1s_hex) cpy_data_buffer = StringIO() cpy_data_buffer.write(vals) cpy_data_buffer.seek(0) # move file cursor back at start of file find_unknown_fn = _find_unknown[obj_type] unknowns = find_unknown_fn(db_conn, cpy_data_buffer) cpy_data_buffer.close() return list(map(row_to_sha1, unknowns)) def _add_content(db_conn, vcs_object, sha1hex): """Add a blob to storage. Designed to be wrapped in a db transaction. Returns: - the sha1 if everything went alright. - None if something went wrong Writing exceptions can also be raised and expected to be handled by the caller. """ models.add_content(db_conn, sha1hex, vcs_object['content-sha1'], vcs_object['content-sha256'], vcs_object['size']) return sha1hex def _add_directory(db_conn, vcs_object, sha1hex): """Add a directory to storage. Designed to be wrapped in a db transaction. """ models.add_directory(db_conn, sha1hex) for directory_entry in vcs_object['entries']: _add_directory_entry(db_conn, directory_entry) return sha1hex def _add_directory_entry(db_conn, vcs_object): """Add a directory to storage. Designed to be wrapped in a db transaction. Returns: - the sha1 if everything went alright. - None if something went wrong Writing exceptions can also be raised and expected to be handled by the caller. """ name = vcs_object['name'] parent = vcs_object['parent'] models.add_directory_entry(db_conn, name, vcs_object['target-sha1'], vcs_object['nature'], vcs_object['perms'], vcs_object['atime'], vcs_object['mtime'], vcs_object['ctime'], parent) return name, parent def _add_revision(db_conn, vcs_object, sha1hex): """Add a revision to storage. Designed to be wrapped in a db transaction. Returns: - the sha1 if everything went alright. - None if something went wrong Writing exceptions can also be raised and expected to be handled by the caller. """ models.add_revision(db_conn, sha1hex, vcs_object['date'], vcs_object['directory'], vcs_object['message'], vcs_object['author'], vcs_object['committer'], vcs_object['parent-sha1s']) return sha1hex def _add_release(db_conn, vcs_object, sha1hex): """Add a release. """ models.add_release(db_conn, sha1hex, vcs_object['revision'], vcs_object['date'], vcs_object['name'], vcs_object['comment'], vcs_object['author']) return sha1hex def _add_occurrence(db_conn, vcs_object, sha1hex): """Add an occurrence. """ models.add_occurrence(db_conn, vcs_object['url-origin'], vcs_object['reference'], vcs_object['revision']) return sha1hex def add_person(db_conn, vcs_object): """Add an author. """ return models.add_person(db_conn, vcs_object['name'], vcs_object['email']) _store_fn = {Type.content: _add_content, Type.directory: _add_directory, Type.revision: _add_revision, Type.release: _add_release, Type.occurrence: _add_occurrence} def add_origin(db_conn, origin): """A a new origin and returns its id. """ return models.add_origin(db_conn, origin['url'], origin['type']) def find_origin(db_conn, origin): """Find an existing origin. """ return models.find_origin(db_conn, origin['url'], origin['type']) def find_person(db_conn, person): """Find an existing person. """ return models.find_person(db_conn, person['email'], person['name']) def add(db_conn, config, vcs_object): """Given a sha1hex, type and content, store a given object in the store. """ type = vcs_object['type'] - sha1hex = vcs_object['sha1'] + sha1hex = vcs_object['id'] obj_content = vcs_object.get('content') if obj_content: res = fs.write_object(config['content_storage_dir'], sha1hex, obj_content, config['folder_depth'], config['storage_compression']) if not res: return False return _store_fn[type](db_conn, vcs_object, sha1hex) return _store_fn[type](db_conn, vcs_object, sha1hex) def add_revision_history(db_conn, couple_parents): """Given a list of tuple (sha, parent_sha), store in revision_history. """ if len(couple_parents) > 0: models.add_revision_history(db_conn, couple_parents) diff --git a/swh/tests/test_api_content.py b/swh/tests/test_api_content.py index 0e2956a..5dc2add 100644 --- a/swh/tests/test_api_content.py +++ b/swh/tests/test_api_content.py @@ -1,107 +1,107 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr from swh.storage import db, models from swh.protocols import serial from test_utils import app_client @attr('slow') class ContentTestCase(unittest.TestCase): def setUp(self): self.app, db_url = app_client() with db.connect(db_url) as db_conn: self.content_sha1_id = '222222f9dd5dc46ee476a8be155ab049994f717e' content_sha1_id = 'blabliblablo' self.content_sha256_hex = '222222f9dd5dc46ee476a8be155ab049994f717e' models.add_content(db_conn, self.content_sha1_id, content_sha1_id, self.content_sha256_hex, 10) @istest def get_content_ok(self): # when rv = self.app.get('/vcs/contents/%s' % self.content_sha1_id) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == '222222f9dd5dc46ee476a8be155ab049994f717e' @istest def get_content_not_found(self): # when rv = self.app.get('/vcs/contents/222222f9dd5dc46ee476a8be155ab049994f7170') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def get_content_not_found_with_bad_format(self): # when rv = self.app.get('/vcs/contents/1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def put_content_create_and_update(self): content_sha1 = 'sha1-contentc46ee476a8be155ab03333333333' # does not exist rv = self.app.get('/vcs/contents/%s' % content_sha1) # then assert rv.status_code == 404 assert rv.data == b'Not found!' # we create it - body = {'sha1': content_sha1, + body = {'id': content_sha1, 'content-sha1': 'content-sha1c46ee476a8be155ab03333333333', 'content-sha256': 'content-sha2566ee476a8be155ab03333333333', 'content': b'bar', 'size': '3'} rv = self.app.put('/vcs/contents/%s' % content_sha1, data=serial.dumps(body), headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # now it exists rv = self.app.get('/vcs/contents/%s' % content_sha1) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == 'sha1-contentc46ee476a8be155ab03333333333' # # we update it - body = {'sha1': content_sha1, + body = {'id': content_sha1, 'content-sha1': 'content-sha1c46ee476a8be155ab03333333333', 'content-sha256': 'content-sha2566ee476a8be155ab03333333333', 'content': b'bar', 'size': '3'} rv = self.app.put('/vcs/contents/%s' % content_sha1, data=serial.dumps(body), headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 200 assert rv.data == b'Successful update!' # still the same rv = self.app.get('/vcs/contents/%s' % content_sha1) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == 'sha1-contentc46ee476a8be155ab03333333333' diff --git a/swh/tests/test_api_release.py b/swh/tests/test_api_release.py index d564a8d..e653aa9 100644 --- a/swh/tests/test_api_release.py +++ b/swh/tests/test_api_release.py @@ -1,117 +1,117 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr from swh.storage import db, models from swh.protocols import serial from test_utils import now, app_client @attr('slow') class ReleaseTestCase(unittest.TestCase): def setUp(self): self.app, db_url = app_client() with db.connect(db_url) as db_conn: self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' models.add_directory(db_conn, self.directory_sha1_hex) self.tagAuthor = {'name': 'tony', 'email': 'tony@mail.org'} models.add_person(db_conn, self.tagAuthor['name'], self.tagAuthor['email']) self.revision_sha1_hex = 'revision-sha1-to-test-existence9994f717e' models.add_revision(db_conn, self.revision_sha1_hex, now(), self.directory_sha1_hex, "revision message", self.tagAuthor, self.tagAuthor) self.release_sha1_hex = 'release-sha1-to-test-existence1234567901' models.add_release(db_conn, self.release_sha1_hex, self.revision_sha1_hex, now(), "0.0.1", "Super release tagged by tony", self.tagAuthor) @istest def get_release_ok(self): # when rv = self.app.get('/vcs/releases/%s' % self.release_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == self.release_sha1_hex @istest def get_release_not_found(self): # when rv = self.app.get('/vcs/releases/inexistant-sha1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def get_release_not_found_with_bad_format(self): # when rv = self.app.get('/vcs/releases/1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def put_release_create_and_update(self): release_sha1_hex = 'sha1-release46ee476a8be155ab049994f717e' rv = self.app.get('/vcs/releases/%s' % release_sha1_hex) # then assert rv.status_code == 404 assert rv.data == b'Not found!' # we create it - body = serial.dumps({'sha1': release_sha1_hex, + body = serial.dumps({'id': release_sha1_hex, 'content': b'release also has content', 'revision': self.revision_sha1_hex, 'date': now(), 'name': '0.0.1', 'comment': 'super release tagged by ardumont', 'author': self.tagAuthor}) rv = self.app.put('/vcs/releases/%s' % release_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # now it exists rv = self.app.get('/vcs/releases/%s' % release_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == release_sha1_hex # we update it rv = self.app.put('/vcs/releases/%s' % release_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 200 assert rv.data == b'Successful update!' # still the same rv = self.app.get('/vcs/releases/%s' % release_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == release_sha1_hex diff --git a/swh/tests/test_swhrepo.py b/swh/tests/test_swhrepo.py index 850fbf8..4f4087d 100644 --- a/swh/tests/test_swhrepo.py +++ b/swh/tests/test_swhrepo.py @@ -1,57 +1,57 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from swh.data import swhrepo from test_utils import app_client class SWHRepoTestCase(unittest.TestCase): def setUp(self): self.app, db_url = app_client() @istest def new_swhrepo(self): # when r = swhrepo.SWHRepo() r.add_origin({'url': 'foobar'}) - r.add_content({'sha1': 'some-con-sha1'}) - r.add_content({'sha1': 'some-con-sha1-2','stuff': 'some-stuff'}) - r.add_directory({'sha1': 'some-dir-sha1'}) - r.add_directory({'sha1': 'some-dir-sha1-2'}) - r.add_revision({'sha1': 'some-rev-sha1'}) - r.add_revision({'sha1': 'some-rev-sha1-2'}) + r.add_content({'id': 'some-con-sha1'}) + r.add_content({'id': 'some-con-sha1-2','stuff': 'some-stuff'}) + r.add_directory({'id': 'some-dir-sha1'}) + r.add_directory({'id': 'some-dir-sha1-2'}) + r.add_revision({'id': 'some-rev-sha1'}) + r.add_revision({'id': 'some-rev-sha1-2'}) r.add_person('id0', {'name': 'the one'}) r.add_person('id1', {'name': 'another one'}) - r.add_occurrence({'sha1': 'some-occ-sha1'}) - r.add_release({'sha1': 'some-rel-sha1'}) + r.add_occurrence({'id': 'some-occ-sha1'}) + r.add_release({'id': 'some-rel-sha1'}) # then assert r.get_origin() == {'url': 'foobar'} - assert r.get_releases() == [{'sha1': 'some-rel-sha1'}] - assert r.get_occurrences() == [{'sha1': 'some-occ-sha1'}] + assert r.get_releases() == [{'id': 'some-rel-sha1'}] + assert r.get_occurrences() == [{'id': 'some-occ-sha1'}] for sha in ['some-con-sha1', 'some-con-sha1-2', 'some-dir-sha1', 'some-dir-sha1-2', 'some-rev-sha1', 'some-rev-sha1-2']: assert r.already_visited(sha) is True assert r.already_visited('some-occ-sha1') is False assert r.already_visited('some-rel-sha1') is False - assert r.get_contents() == {'some-con-sha1': {'sha1': 'some-con-sha1'}, - 'some-con-sha1-2': {'sha1': 'some-con-sha1-2','stuff': 'some-stuff'}} - assert r.get_directories() == {'some-dir-sha1': {'sha1': 'some-dir-sha1'}, - 'some-dir-sha1-2': {'sha1': 'some-dir-sha1-2'}} - assert r.get_revisions() == {'some-rev-sha1': {'sha1': 'some-rev-sha1'}, - 'some-rev-sha1-2': {'sha1': 'some-rev-sha1-2'}} + assert r.get_contents() == {'some-con-sha1': {'id': 'some-con-sha1'}, + 'some-con-sha1-2': {'id': 'some-con-sha1-2','stuff': 'some-stuff'}} + assert r.get_directories() == {'some-dir-sha1': {'id': 'some-dir-sha1'}, + 'some-dir-sha1-2': {'id': 'some-dir-sha1-2'}} + assert r.get_revisions() == {'some-rev-sha1': {'id': 'some-rev-sha1'}, + 'some-rev-sha1-2': {'id': 'some-rev-sha1-2'}} assert len(r.get_persons()) == 2