diff --git a/swh/web/ui/api.py b/swh/web/ui/api.py index 578fd253..2ace540d 100644 --- a/swh/web/ui/api.py +++ b/swh/web/ui/api.py @@ -1,424 +1,424 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from flask import request, url_for, Response, redirect from swh.web.ui import service from swh.web.ui.exc import BadInputExc, NotFoundExc from swh.web.ui.main import app @app.route('/api/1/stat/counters/') def api_stats(): """Return statistics on SWH storage. Returns: SWH storage's statistics """ return service.stat_counters() @app.route('/api/1/search/') @app.route('/api/1/search//') def api_search(q='sha1:bd819b5b28fcde3bf114d16a44ac46250da94ee5'): """Search a content per hash. Args: q is of the form algo_hash:hash with algo_hash in (sha1, sha1_git, sha256) Returns: Dictionary with 'found' key and the associated result. Raises: BadInputExc in case of unknown algo_hash or bad hash Example: GET /api/1/search/sha1:bd819b5b28fcde3bf114d16a44ac46250da94ee5/ """ r = service.lookup_hash(q).get('found') return {'found': True if r else False} def _api_lookup(criteria, lookup_fn, error_msg_if_not_found, enrich_fn=lambda x: x): """Factorize function regarding the api to lookup for data.""" res = lookup_fn(criteria) if not res: raise NotFoundExc(error_msg_if_not_found) if isinstance(res, (map, list)): enriched_data = [] for e in res: enriched_data.append(enrich_fn(e)) return enriched_data return enrich_fn(res) @app.route('/api/1/origin/') @app.route('/api/1/origin//') def api_origin(origin_id=1): """Return information about origin with id origin_id. Args: origin_id: the origin's identifier Returns: Information on the origin if found. Raises: NotFoundExc if the origin is not found. Example: GET /api/1/origin/1/ """ return _api_lookup( origin_id, lookup_fn=service.lookup_origin, error_msg_if_not_found='Origin with id %s not found.' % origin_id) @app.route('/api/1/person/') @app.route('/api/1/person//') def api_person(person_id=1): """Return information about person with identifier person_id. Args: person_id: the person's identifier Returns: Information on the person if found. Raises: NotFoundExc if the person is not found. Example: GET /api/1/person/1/ """ return _api_lookup( person_id, lookup_fn=service.lookup_person, error_msg_if_not_found='Person with id %s not found.' % person_id) def enrich_release(release): """Enrich a release with link to the 'target' of 'type' revision. """ if 'target' in release and \ 'target_type' in release and \ release['target_type'] == 'revision': release['target_url'] = url_for('api_revision', sha1_git=release['target']) return release @app.route('/api/1/release/') @app.route('/api/1/release//') def api_release(sha1_git='3c31de6fdc47031857fda10cfa4caf7044cadefb'): """Return information about release with id sha1_git. Args: sha1_git: the release's hash Returns: Information on the release if found. Raises: BadInputExc in case of unknown algo_hash or bad hash NotFoundExc if the release is not found. Example: GET /api/1/release/b307094f00c3641b0c9da808d894f3a325371414 """ error_msg = 'Release with sha1_git %s not found.' % sha1_git return _api_lookup( sha1_git, lookup_fn=service.lookup_release, error_msg_if_not_found=error_msg, enrich_fn=enrich_release) def enrich_revision_with_urls(revision, context=None): """Enrich revision with links where it makes sense (directory, parents). """ if not context: context = revision['id'] revision['url'] = url_for('api_revision', sha1_git=revision['id']) revision['history_url'] = url_for('api_revision_log', sha1_git=revision['id']) if 'directory' in revision: revision['directory_url'] = url_for('api_directory', sha1_git=revision['directory']) if 'parents' in revision: parents = [] for parent in revision['parents']: parents.append(url_for('api_revision_history', sha1_git_root=context, sha1_git=parent)) revision['parent_urls'] = parents if 'children' in revision: children = [] for child in revision['children']: children.append(url_for('api_revision_history', sha1_git_root=context, sha1_git=child)) revision['children_urls'] = children return revision @app.route('/api/1/revision/') @app.route('/api/1/revision//') def api_revision(sha1_git='a585d2b738bfa26326b3f1f40f0f1eda0c067ccf'): """Return information about revision with id sha1_git. Args: sha1_git: the revision's hash Returns: Information on the revision if found. Raises: BadInputExc in case of unknown algo_hash or bad hash NotFoundExc if the revision is not found. Example: GET /api/1/revision/baf18f9fc50a0b6fef50460a76c33b2ddc57486e """ return _api_lookup( sha1_git, lookup_fn=service.lookup_revision, error_msg_if_not_found='Revision with sha1_git %s not' ' found.' % sha1_git, enrich_fn=enrich_revision_with_urls) @app.route('/api/1/revision//history//') def api_revision_history(sha1_git_root, sha1_git): - """Return information about revision SHA1_GIT, limited to the - sub-graph of all transitive parents of . + """Return information about revision sha1_git, limited to the + sub-graph of all transitive parents of sha1_git_root. In other words, sha1_git is an ancestor of sha1_git_root. Args: sha1_git_root: latest revision of the browsed history sha1_git: one of sha1_git_root's ancestors Returns: Information on sha1_git if it is an ancestor of sha1_git_root including children leading to sha1_git_root Raises: BadInputExc in case of unknown algo_hash or bad hash NotFoundExc if either revision is not found or if sha1_git is not an ancestor of sha1_git_root """ if sha1_git == sha1_git_root: return redirect(url_for('api_revision', sha1_git=sha1_git)) revision = service.lookup_revision_with_context(sha1_git_root, sha1_git) if not revision: raise NotFoundExc( "Possibly sha1_git '%s' is not an ancestor of sha1_git_root '%s'" % (sha1_git, sha1_git_root)) return enrich_revision_with_urls(revision, context=sha1_git_root) @app.route('/api/1/revision//log/') def api_revision_log(sha1_git): """Show all revisions (~git log) starting from sha1_git. The first element returned is the given sha1_git. Args: sha1_git: the revision's hash Returns: Information on the revision if found. Raises: BadInputExc in case of unknown algo_hash or bad hash NotFoundExc if the revision is not found. """ error_msg = 'Revision with sha1_git %s not found.' % sha1_git return _api_lookup(sha1_git, lookup_fn=service.lookup_revision_log, error_msg_if_not_found=error_msg, enrich_fn=enrich_revision_with_urls) def enrich_directory(directory): """Enrich directory with url to content or directory. """ if 'type' in directory: target_type = directory['type'] target = directory['target'] if target_type == 'file': directory['target_url'] = url_for('api_content_with_details', q='sha1_git:%s' % target) else: directory['target_url'] = url_for('api_directory', sha1_git=target) return directory @app.route('/api/1/directory/') @app.route('/api/1/directory//') def api_directory(sha1_git='dcf3289b576b1c8697f2a2d46909d36104208ba3'): """Return information about release with id sha1_git. Args: Directory's sha1_git Raises: BadInputExc in case of unknown algo_hash or bad hash NotFoundExc if the content is not found. Example: GET /api/1/directory/8d7dc91d18546a91564606c3e3695a5ab568d179 """ error_msg = 'Directory with sha1_git %s not found.' % sha1_git return _api_lookup( sha1_git, lookup_fn=service.lookup_directory, error_msg_if_not_found=error_msg, enrich_fn=enrich_directory) @app.route('/api/1/browse/') @app.route('/api/1/browse//') def api_content_checksum_to_origin(q='sha1_git:26ac0281bc74e9bd8a4a4aab1c7c7a' '0c19d4436c'): """Return content information up to one of its origin if the content is found. Args: q is of the form algo_hash:hash with algo_hash in (sha1, sha1_git, sha256) Returns: Information on one possible origin for such content. Raises: BadInputExc in case of unknown algo_hash or bad hash NotFoundExc if the content is not found. Example: GET /api/1/browse/sha1_git:88b9b366facda0b5ff8d8640ee9279bed346f242 """ found = service.lookup_hash(q)['found'] if not found: raise NotFoundExc('Content with %s not found.' % q) return service.lookup_hash_origin(q) @app.route('/api/1/content//raw/') def api_content_raw(q): """Return content's raw data if content is found. Args: q is of the form (algo_hash:)hash with algo_hash in (sha1, sha1_git, sha256). When algo_hash is not provided, 'hash' is considered sha1. Returns: Content's raw data in application/octet-stream Raises: - BadInputExc in case of unknown algo_hash or bad hash - NotFoundExc if the content is not found. """ def generate(content): yield content['data'] content = service.lookup_content_raw(q) if not content: raise NotFoundExc('Content with %s not found.' % q) return Response(generate(content), mimetype='application/octet-stream') def enrich_content(content): """Enrich content with 'data', a link to its raw content. """ content['data_url'] = url_for('api_content_raw', q=content['sha1']) return content @app.route('/api/1/content/') @app.route('/api/1/content//') def api_content_with_details(q='sha256:e2c76e40866bb6b28916387bdfc8649beceb' '523015738ec6d4d540c7fe65232b'): """Return content information if content is found. Args: q is of the form (algo_hash:)hash with algo_hash in (sha1, sha1_git, sha256). When algo_hash is not provided, 'hash' is considered sha1. Returns: Content's information. Raises: - BadInputExc in case of unknown algo_hash or bad hash - NotFoundExc if the content is not found. Example: GET /api/1/content/sha256:e2c76e40866bb6b28916387bdfc8649beceb 523015738ec6d4d540c7fe65232b """ return _api_lookup( q, lookup_fn=service.lookup_content, error_msg_if_not_found='Content with %s not found.' % q, enrich_fn=enrich_content) @app.route('/api/1/uploadnsearch/', methods=['POST']) def api_uploadnsearch(): """Upload the file's content in the post body request. Compute its hash and determine if it exists in the storage. Args: request.files filled with the filename's data to upload. Returns: Dictionary with 'sha1', 'filename' and 'found' predicate depending on whether we find it or not. Raises: BadInputExc in case of the form submitted is incorrect. """ file = request.files.get('filename') if not file: raise BadInputExc('Bad request, missing \'filename\' entry in form.') return service.upload_and_search(file) diff --git a/swh/web/ui/service.py b/swh/web/ui/service.py index c604ba43..e8cd5991 100644 --- a/swh/web/ui/service.py +++ b/swh/web/ui/service.py @@ -1,296 +1,296 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from collections import defaultdict from swh.core import hashutil from swh.web.ui import converters, main, query, upload from swh.web.ui.exc import BadInputExc, NotFoundExc def hash_and_search(filepath): """Hash the filepath's content as sha1, then search in storage if it exists. Args: Filepath of the file to hash and search. Returns: Tuple (hex sha1, found as True or false). The found boolean, according to whether the sha1 of the file is present or not. """ h = hashutil.hashfile(filepath) c = main.storage().content_find(h) if c: r = converters.from_content(c) r['found'] = True return r else: return {'sha1': hashutil.hash_to_hex(h['sha1']), 'found': False} def upload_and_search(file): """Upload a file and compute its hash. """ tmpdir, filename, filepath = upload.save_in_upload_folder(file) res = {'filename': filename} try: content = hash_and_search(filepath) res.update(content) return res finally: # clean up if tmpdir: upload.cleanup(tmpdir) def lookup_hash(q): """Checks if the storage contains a given content checksum Args: query string of the form Returns: Dict with key found to True or False, according to whether the checksum is present or not """ (algo, hash) = query.parse_hash(q) found = main.storage().content_find({algo: hash}) return {'found': found, 'algo': algo} def lookup_hash_origin(q): """Return information about the checksum contained in the query q. Args: query string of the form Returns: origin as dictionary if found for the given content. """ algo, h = query.parse_hash(q) origin = main.storage().content_find_occurrence({algo: h}) return converters.from_origin(origin) def lookup_origin(origin_id): """Return information about the origin with id origin_id. Args: origin_id as string Returns: origin information as dict. """ return main.storage().origin_get({'id': origin_id}) def lookup_person(person_id): """Return information about the person with id person_id. Args: person_id as string Returns: person information as dict. """ persons = main.storage().person_get([person_id]) if not persons: return None return converters.from_person(persons[0]) def lookup_directory(sha1_git): """Return information about the directory with id sha1_git. Args: sha1_git as string Returns: directory information as dict. """ algo, hBinSha1 = query.parse_hash(sha1_git) if algo != 'sha1': # HACK: sha1_git really but they are both sha1... raise BadInputExc('Only sha1_git is supported.') directory_entries = main.storage().directory_get(hBinSha1) if not directory_entries: return None return map(converters.from_directory_entry, directory_entries) def lookup_release(release_sha1_git): """Return information about the release with sha1 release_sha1_git. Args: release_sha1_git: The release's sha1 as hexadecimal Returns: Release information as dict. Raises: ValueError if the identifier provided is not of sha1 nature. """ algo, hBinSha1 = query.parse_hash(release_sha1_git) if algo != 'sha1': # HACK: sha1_git really but they are both sha1... raise BadInputExc('Only sha1_git is supported.') res = main.storage().release_get([hBinSha1]) if res and len(res) >= 1: return converters.from_release(res[0]) return None def lookup_revision(rev_sha1_git): """Return information about the revision with sha1 revision_sha1_git. Args: revision_sha1_git: The revision's sha1 as hexadecimal Returns: Revision information as dict. Raises: ValueError if the identifier provided is not of sha1 nature. """ algo, hBinSha1 = query.parse_hash(rev_sha1_git) if algo != 'sha1': # HACK: sha1_git really but they are both sha1... raise BadInputExc('Only sha1_git is supported.') res = main.storage().revision_get([hBinSha1]) if res and len(res) >= 1: return converters.from_revision(res[0]) return None def lookup_revision_log(rev_sha1_git): """Return information about the revision with sha1 revision_sha1_git. Args: revision_sha1_git: The revision's sha1 as hexadecimal Returns: Revision information as dict. Raises: ValueError if the identifier provided is not of sha1 nature. """ algo, hBinSha1 = query.parse_hash(rev_sha1_git) if algo != 'sha1': # HACK: sha1_git really but they are both sha1... raise BadInputExc('Only sha1_git is supported.') revision_entries = main.storage().revision_log(hBinSha1) return map(converters.from_revision, revision_entries) def lookup_revision_with_context(sha1_git_root, sha1_git): - """Return information about revision SHA1_GIT, limited to the - sub-graph of all transitive parents of . + """Return information about revision sha1_git, limited to the + sub-graph of all transitive parents of sha1_git_root. In other words, sha1_git is an ancestor of sha1_git_root. Args: sha1_git_root: latest revision of the browsed history sha1_git: one of sha1_git_root's ancestors Returns: Information on sha1_git if it is an ancestor of sha1_git_root including children leading to sha1_git_root Raises: BadInputExc in case of unknown algo_hash or bad hash NotFoundExc if either revision is not found or if sha1_git is not an ancestor of sha1_git_root """ revision = lookup_revision(sha1_git) if not revision: raise NotFoundExc('Revision %s not found' % sha1_git) revision_root = lookup_revision(sha1_git_root) if not revision_root: raise NotFoundExc('Revision %s not found' % sha1_git_root) hBinRootSha1 = hashutil.hex_to_hash(sha1_git_root) revision_log = main.storage().revision_log(hBinRootSha1) parents = {} children = defaultdict(list) for rev in revision_log: rev_id = hashutil.hash_to_hex(rev['id']) parents[rev_id] = [] for parent_id in rev['parents']: parent_id = hashutil.hash_to_hex(parent_id) parents[rev_id].append(parent_id) children[parent_id].append(rev_id) if revision['id'] not in parents: raise NotFoundExc('Revision %s is not an ancestor of %s' % (sha1_git, sha1_git_root)) revision['children'] = children[revision['id']] return revision def lookup_content(q): """Lookup the content designed by q. Args: q: The release's sha1 as hexadecimal """ (algo, hash) = query.parse_hash(q) c = main.storage().content_find({algo: hash}) if c: return converters.from_content(c) return None def lookup_content_raw(q): """Lookup the content designed by q. Args: q: query string of the form Returns: dict with 'sha1' and 'data' keys. data representing its raw data decoded. """ (algo, hash) = query.parse_hash(q) c = main.storage().content_find({algo: hash}) if not c: return None sha1 = c['sha1'] contents = main.storage().content_get([sha1]) if contents and len(contents) >= 1: return converters.from_content(contents[0]) return None def stat_counters(): """Return the stat counters for Software Heritage Returns: A dict mapping textual labels to integer values. """ return main.storage().stat_counters()