diff --git a/PKG-INFO b/PKG-INFO index 32dc5e7d..d89a2337 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.web.ui -Version: 0.0.7 +Version: 0.0.8 Summary: Software Heritage Web UI Home-page: https://forge.softwareheritage.org/diffusion/DWUI/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/README b/README index af0fa908..11e51015 100644 --- a/README +++ b/README @@ -1,24 +1,24 @@ swh-web-ui ========== SWH's web application # Configuration file sample -~/.config/swh/web-ui.ini +~/.config/swh/webapp.ini [main] # where to log information log_dir = /tmp/swh/web-ui/log # for dev only debug = true # current server (0.0.0.0 for world opening) host = 127.0.0.1 # its port port = 6543 # the backend this server communicates to api_backend = http://127.0.0.1:5000 diff --git a/swh.web.ui.egg-info/PKG-INFO b/swh.web.ui.egg-info/PKG-INFO index 32dc5e7d..d89a2337 100644 --- a/swh.web.ui.egg-info/PKG-INFO +++ b/swh.web.ui.egg-info/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.web.ui -Version: 0.0.7 +Version: 0.0.8 Summary: Software Heritage Web UI Home-page: https://forge.softwareheritage.org/diffusion/DWUI/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh/web/ui/controller.py b/swh/web/ui/controller.py old mode 100755 new mode 100644 index 539fc8e1..8ffa879f --- a/swh/web/ui/controller.py +++ b/swh/web/ui/controller.py @@ -1,328 +1,333 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import logging -from flask import redirect, render_template, url_for, flash, jsonify, request +from flask import redirect, render_template, url_for, jsonify, request from flask import make_response from swh.core.hashutil import ALGORITHMS from swh.web.ui.main import app from swh.web.ui import service, query from swh.web.ui.decorators import jsonp hash_filter_keys = ALGORITHMS @app.route('/') def main(): """Main application view. At the moment, redirect to the content search view. """ return redirect(url_for('info')) @app.route('/info') def info(): """A simple api to define what the server is all about. """ logging.info('Dev SWH UI') return 'Dev SWH UI' @app.route('/search') def search(): """Search for hashes in swh-storage. """ q = request.args.get('q', '') + env = {'q': q, 'message': '', 'found': None} - if q: - flash("Search hash '%s' posted!" % q) - message = service.lookup_hash(q) - else: - message = '' + try: + if q: + env['found'] = service.lookup_hash(q) + except ValueError: + env['message'] = 'Error: invalid query string' - return render_template('search.html', - q=q, - message=message) + return render_template('search.html', **env) @app.route('/browse/revision/') def revision(sha1_git): """Show commit information. Args: sha1_git: the revision's sha1 Returns: Revision information """ return render_template('revision.html', sha1_git=sha1_git) @app.route('/browse/directory/') def directory(sha1_git): """Show directory information. Args: sha1_git: the directory's sha1 Returns: Directory information """ return render_template('directory.html', sha1_git=sha1_git) @app.route('/browse/directory//') def directory_at_path(sha1_git, p): """Show directory information for the sha1_git at path. Args: sha1_git: the directory's sha1 path: file or directory pointed to Returns: Directory information at sha1_git + path """ return render_template('directory.html', sha1_git=sha1_git, path=p) @app.route('/browse/content/:') def content(hash, sha): """Show content information. Args: hash: hash according to HASH_ALGO, where HASH_ALGO is one of: sha1, sha1_git, sha256. This means that several different URLs (at least one per HASH_ALGO) will point to the same content sha: the sha with 'hash' format Returns: The content's information at sha1_git """ # Checks user input if hash not in hash_filter_keys: return make_response( 'Bad request, sha must be one of sha1, sha1_git, sha256', 400) h = query.categorize_hash(sha) if h == {}: return make_response( 'Bad request, %s is not of type %s' % (sha, hash), 400) if hash == 'sha256' and not h.get(hash): return make_response( 'Bad request, %s is not of type sha256' % (sha,), 400) if hash != 'sha256' and not h.get('sha1') and not h.get('sha1_git'): return make_response( 'Bad request, %s is not of type sha1 or sha1_git' % (sha,), 400) message = service.lookup_hash_origin(h) return render_template('content.html', hash=hash, sha=sha, message=message) @app.route('/browse/release/') def release(sha1_git): """Show release's information. Args: sha1_git: sha1_git for this particular release Returns: Release's information """ return 'Release information at %s' % sha1_git @app.route('/browse/person/') def person(id): """Show Person's information at id. Args: id: person's unique identifier Returns: Person's information """ return 'Person information at %s' % id @app.route('/browse/origin/') def origin(id): """Show origin's information at id. Args: id: origin's unique identifier Returns: Origin's information """ return 'Origin information at %s' % id @app.route('/browse/project/') def project(id): """Show project's information at id. Args: id: project's unique identifier Returns: Project's information """ return 'Project information at %s' % id @app.route('/browse/organization/') def organization(id): """Show organization's information at id. Args: id: organization's unique identifier Returns: Organization's information """ return 'Organization information at %s' % id @app.route('/browse/directory//' '+|/' '|/') def directory_at_origin(timestamp, origin_type, origin_url, branch, path): """Show directory information at timestamp, origin-type, origin-url, branch and path. Those parameters are separated by the `|` terminator. Args: timestamp: the timestamp to look for. can be latest or some iso8601 date format. (TODO: decide the time matching policy.) origin_type: origin's type origin_url: origin's url (can contain `/`) branch: branch name which can contain `/` path: path to directory or file Returns: Directory information at the given parameters. """ return 'Directory at (%s, %s, %s, %s, %s)' % (timestamp, origin_type, origin_url, branch, path) @app.route('/browse/revision//' '+|/') def revision_at_origin_and_branch(timestamp, origin_type, origin_url, branch): """Show revision information at timestamp, origin, and branch. Those parameters are separated by the `|` terminator. Args: timestamp: the timestamp to look for. can be latest or some iso8601 date format. (TODO: decide the time matching policy.) origin_type: origin's type origin_url: origin's url (can contain `/`) branch: branch name which can contain / Returns: Revision information at the given parameters. """ return 'Revision at (ts=%s, type=%s, url=%s, branch=%s)' % (timestamp, origin_type, origin_url, branch) @app.route('/browse/revision//' '+|') def revision_at_origin(timestamp, origin_type, origin_url): """Show revision information at timestamp, origin, and branch. Those parameters are separated by the `|` terminator. Args: timestamp: the timestamp to look for. can be latest or iso8601 date format. (TODO: decide the time matching policy.) origin_type: origin's type origin_url: origin's url (can contain `/`) Returns: Revision information at the given parameters. """ return 'Revision at (timestamp=%s, type=%s, url=%s)' % (timestamp, origin_type, origin_url) @app.route('/api/1/stat/counters') @jsonp def api_stats(): """Return statistics as a JSON object""" return jsonify(service.stat_counters()) +@app.route('/api/1/search//') +@jsonp +def api_search(q): + """Return search results as a JSON object""" + return jsonify({'query': q, + 'found': service.lookup_hash(q)}) + + def run(conf): """Run the api's server. Args: conf is a dictionary of keywords: - 'db_url' the db url's access (through psycopg2 format) - 'content_storage_dir' revisions/directories/contents storage on disk - 'host' to override the default 127.0.0.1 to open or not the server to the world - 'port' to override the default of 5000 (from the underlying layer: flask) - 'debug' activate the verbose logs - 'secret_key' the flask secret key Returns: Never Raises: ? """ - print("""SWH Web UI run -host: %s -port: %s + print("""SWH Web UI available at http://%s:%s/ debug: %s""" % (conf['host'], conf.get('port', None), conf['debug'])) app.secret_key = conf['secret_key'] app.config.update({'conf': conf}) app.run(host=conf['host'], port=conf.get('port', None), debug=conf['debug']) diff --git a/swh/web/ui/query.py b/swh/web/ui/query.py index 96579c68..06d1306d 100644 --- a/swh/web/ui/query.py +++ b/swh/web/ui/query.py @@ -1,40 +1,58 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import re -from swh.core import hashutil +from swh.core.hashutil import ALGORITHMS, hex_to_hash -# Regexp to filter and check inputs -sha256_regexp = '[0-9a-f]{64}' -sha1_regexp = '[0-9a-f]{40}' +SHA256_RE = re.compile(r'^[0-9a-f]{64}$', re.IGNORECASE) +SHA1_RE = re.compile(r'^[0-9a-f]{40}$', re.IGNORECASE) -def categorize_hash(hash): - """Categorize the hash string according to what it is. +def parse_hash(q): + """Detect the hash type of a user submitted query string. Args: - hash: hash string representation (sha1 or sha256) + query string with the following format: "[HASH_TYPE:]HEX_CHECKSUM", + where HASH_TYPE is optional, defaults to "sha1", and can be one of + swh.core.hashutil.ALGORITHMS Returns: - A dictionary of hash indexed by their nature (sha1, sha256) - The dictionary will be empty if nothing matches + A pair (hash_algorithm, byte hash value) Raises: - None + ValueError if the given query string does not correspond to a valid + hash value """ - try: - h = hashutil.hex_to_hash(hash) - except ValueError: # ignore silently to check the other inputs - return {} - - if re.search(sha256_regexp, hash): - return {'sha256': h} - if re.search(sha1_regexp, hash): - return {'sha1': h} - return {} + def guess_algo(q): + if SHA1_RE.match(q): + return 'sha1' + elif SHA256_RE.match(q): + return 'sha256' + else: + raise ValueError('invalid checksum query string') + + def check_algo(algo, hex): + if (algo in set(['sha1', 'sha1_git']) and not SHA1_RE.match(hex)) \ + or (algo == 'sha256' and not SHA256_RE.match(hex)): + raise ValueError('invalid hash for algorithm ' + algo) + + parts = q.split(':') + if len(parts) > 2: + raise ValueError('invalid checksum query string') + elif len(parts) == 1: + parts = (guess_algo(q), q) + elif len(parts) == 2: + check_algo(parts[0], parts[1]) + algo = parts[0] + hash = hex_to_hash(parts[1]) + + if algo not in ALGORITHMS: + raise ValueError('unknown hash algorithm: ' + algo) + + return (algo, hash) diff --git a/swh/web/ui/service.py b/swh/web/ui/service.py old mode 100755 new mode 100644 index e2fb8533..9ae49c75 --- a/swh/web/ui/service.py +++ b/swh/web/ui/service.py @@ -1,84 +1,77 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.ui import main from swh.web.ui import query def lookup_hash(q): - """Given a string query q of one hash, lookup its hash to the backend. + """Checks if the storage contains a given content checksum - Args: - query, hash as a string (sha1, sha256, etc...) + Args: query string Returns: - a string message (found, not found or a potential error explanation) + True or False, according to whether the checksum is present or not - Raises: - OSError (no route to host), etc... Network issues in general """ - hash = query.categorize_hash(q) - if hash != {}: - present = main.storage().content_exist(hash) - return 'Found!' if present else 'Not Found' - return """This is not a hash. -Hint: hexadecimal string with length either 20 (sha1) or 32 (sha256).""" + (algo, hash) = query.parse_hash(q) + return main.storage().content_exist({algo: hash}) def _origin_seen(hash, data): """Given an origin, compute a message string with the right information. Args: origin: a dictionary with keys: - origin: a dictionary with type and url keys - occurrence: a dictionary with a validity range Returns: message as a string """ if data is None: return 'Content with hash %s is unknown as of now.' % hash origin_type = data['origin_type'] origin_url = data['origin_url'] revision = data['revision'] branch = data['branch'] path = data['path'] - print("data:", data) + return """The content with hash %s has been seen on origin with type '%s' at url '%s'. The revision was identified at '%s' on branch '%s'. The file's path referenced was '%s'.""" % (hash, origin_type, origin_url, revision, branch, path) def lookup_hash_origin(hash): """Given a hash, return the origin of such content if any is found. Args: hash: key/value dictionary Returns: The origin for such hash if it's found. Raises: OSError (no route to host), etc... Network issues in general """ data = main.storage().content_find_occurrence(hash) return _origin_seen(hash, data) def stat_counters(): """Return the stat counters for Software Heritage Returns: A dict mapping textual labels to integer values. """ return main.storage().stat_counters() diff --git a/swh/web/ui/templates/layout.html b/swh/web/ui/templates/layout.html index aab291e6..279c731a 100644 --- a/swh/web/ui/templates/layout.html +++ b/swh/web/ui/templates/layout.html @@ -1,10 +1,11 @@ -SWH Web UI - -
-
+Software Heritage Archive + +
+
{% for message in get_flashed_messages() %} -
{{ message }}
+
{{ message }}
{% endfor %} {% block body %}{% endblock %} +
diff --git a/swh/web/ui/templates/search.html b/swh/web/ui/templates/search.html index 26f9a1fb..77c0a14d 100644 --- a/swh/web/ui/templates/search.html +++ b/swh/web/ui/templates/search.html @@ -1,13 +1,16 @@ {% extends "layout.html" %} {% block body %} - +{% if message is not none %} +
{{ message | safe }}
+{% endif %} +{% if found is not none %} +
Found: {{ found | safe }}
+{% endif %} {% endblock %} diff --git a/swh/web/ui/tests/test_query.py b/swh/web/ui/tests/test_query.py index f387a6e9..dbae2df7 100644 --- a/swh/web/ui/tests/test_query.py +++ b/swh/web/ui/tests/test_query.py @@ -1,36 +1,33 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from swh.web.ui import query from swh.core import hashutil class QueryTestCase(unittest.TestCase): - @istest - def categorize_hash(self): - input_sha1 = 'f1d2d2f924e986ac86fdf7b36c94bcdf32beec15' - - res = query.categorize_hash(input_sha1) - - self.assertEquals(res, {'sha1': hashutil.hex_to_hash(input_sha1)}) - - def categorize_hash_2(self): - input_sha256 = \ - '084c799cd551dd1d8d5c5f9a5d593b2e931f5e36122ee5c793c1d08a19839cc0' - - res = query.categorize_hash(input_sha256) - self.assertEquals(res, {'sha256': hashutil.hex_to_hash(input_sha256)}) - - def categorize_hash_3(self): - input_bad_length = '1234567890987654' + @istest + def parse_hash(self): + q = 'f1d2d2f924e986ac86fdf7b36c94bcdf32beec15' + r = query.parse_hash(q) + self.assertEquals(r, ('sha1', hashutil.hex_to_hash(q))) - res = query.categorize_hash(input_bad_length) + @istest + def parse_hash_2(self): + q = '084C799CD551DD1D8D5C5F9A5D593B2' \ + 'E931F5E36122ee5c793c1d08a19839cc0' + r = query.parse_hash(q) + self.assertEquals(r, ('sha256', hashutil.hex_to_hash(q))) - self.assertEquals(res, {}) + @istest + def parse_hash_3(self): + q = '1234567890987654' + with self.assertRaises(ValueError): + query.parse_hash(q) diff --git a/version.txt b/version.txt index 5b14ec5e..33f71f9b 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.7-0-gad9eafc \ No newline at end of file +v0.0.8-0-g69d124a \ No newline at end of file