diff --git a/PKG-INFO b/PKG-INFO
index 6485330c..d0adc346 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,10 +1,10 @@
Metadata-Version: 1.0
Name: swh.web.ui
-Version: 0.0.32
+Version: 0.0.33
Summary: Software Heritage Web UI
Home-page: https://forge.softwareheritage.org/diffusion/DWUI/
Author: Software Heritage developers
Author-email: swh-devel@inria.fr
License: UNKNOWN
Description: UNKNOWN
Platform: UNKNOWN
diff --git a/requirements.txt b/requirements.txt
index 096cc139..18e67367 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,19 +1,19 @@
# Add here external Python modules dependencies, one per line. Module names
# should match https://pypi.python.org/pypi names. For the full spec or
# dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html
# Runtime dependencies
Flask
-Flask-API
swh.core >= 0.0.20
swh.storage >= 0.0.33
dateutil
+docutils
# Test dependencies
#Flask-Testing
#blinker
# Non-Python dependencies
#libjs-cryptojs
#libjs-jquery-flot
#libjs-jquery-flot-tooltip
diff --git a/resources/test/webapp.ini b/resources/test/webapp.ini
index 5eba7fba..f84418a0 100644
--- a/resources/test/webapp.ini
+++ b/resources/test/webapp.ini
@@ -1,26 +1,26 @@
[main]
# the dedicated storage arguments (comma separated list of values)
storage_args = http://localhost:5000/
# either remote_storage or local_storage
storage_class = remote_storage
# where to log information
log_dir = /tmp/swh/web-ui/log
# for dev only
debug = true
# current server (0.0.0.0 for world opening)
host = 127.0.0.1
# its port
port = 6543
-# Upload folder for temporary upload and hash
-upload_folder = /tmp/swh-web-ui/uploads
+# Max revisions shown in a log
+max_log_revs = 25
# Allowed extensions for upload (commented or empty means all is accepted)
# Otherwise, comma separated values of extensions.
# upload_allowed_extensions = txt, csv
# upload_allowed_extensions =
diff --git a/swh.web.ui.egg-info/PKG-INFO b/swh.web.ui.egg-info/PKG-INFO
index 6485330c..d0adc346 100644
--- a/swh.web.ui.egg-info/PKG-INFO
+++ b/swh.web.ui.egg-info/PKG-INFO
@@ -1,10 +1,10 @@
Metadata-Version: 1.0
Name: swh.web.ui
-Version: 0.0.32
+Version: 0.0.33
Summary: Software Heritage Web UI
Home-page: https://forge.softwareheritage.org/diffusion/DWUI/
Author: Software Heritage developers
Author-email: swh-devel@inria.fr
License: UNKNOWN
Description: UNKNOWN
Platform: UNKNOWN
diff --git a/swh.web.ui.egg-info/SOURCES.txt b/swh.web.ui.egg-info/SOURCES.txt
index 4a0d569f..2a869de9 100644
--- a/swh.web.ui.egg-info/SOURCES.txt
+++ b/swh.web.ui.egg-info/SOURCES.txt
@@ -1,82 +1,84 @@
.gitignore
AUTHORS
LICENSE
MANIFEST.in
Makefile
Makefile.local
README
README-dev.md
README-uri-scheme.md
requirements.txt
setup.py
version.txt
bin/swh-web-ui-dev
debian/changelog
debian/compat
debian/control
debian/copyright
debian/rules
debian/source/format
docs/Makefile
docs/source/conf.py
docs/source/index.rst
docs/source/modules.rst
docs/source/swh.web.ui.rst
docs/source/_static/dependencies.dot
docs/source/_static/dependencies.png
resources/test/webapp.ini
swh.web.ui.egg-info/PKG-INFO
swh.web.ui.egg-info/SOURCES.txt
swh.web.ui.egg-info/dependency_links.txt
swh.web.ui.egg-info/requires.txt
swh.web.ui.egg-info/top_level.txt
swh/web/ui/__init__.py
swh/web/ui/apidoc.py
swh/web/ui/backend.py
swh/web/ui/converters.py
swh/web/ui/exc.py
swh/web/ui/main.py
swh/web/ui/query.py
swh/web/ui/renderers.py
swh/web/ui/service.py
swh/web/ui/utils.py
swh/web/ui/static/css/bootstrap-responsive.min.css
swh/web/ui/static/css/style.css
swh/web/ui/static/js/calendar.js
-swh/web/ui/static/js/filedrop.js
+swh/web/ui/static/js/search.js
swh/web/ui/static/lib/README
swh/web/ui/static/lib/jquery.flot.min.js
swh/web/ui/static/lib/jquery.flot.selection.min.js
swh/web/ui/static/lib/jquery.flot.time.min.js
swh/web/ui/templates/about.html
swh/web/ui/templates/api.html
+swh/web/ui/templates/apidoc.html
swh/web/ui/templates/content-with-origin.html
swh/web/ui/templates/content.html
swh/web/ui/templates/directory.html
swh/web/ui/templates/entity.html
swh/web/ui/templates/home.html
swh/web/ui/templates/layout.html
swh/web/ui/templates/origin.html
swh/web/ui/templates/person.html
swh/web/ui/templates/release.html
swh/web/ui/templates/revision-directory.html
swh/web/ui/templates/revision-log.html
swh/web/ui/templates/revision.html
swh/web/ui/templates/search.html
swh/web/ui/tests/__init__.py
+swh/web/ui/tests/test_apidoc.py
swh/web/ui/tests/test_app.py
swh/web/ui/tests/test_backend.py
swh/web/ui/tests/test_converters.py
swh/web/ui/tests/test_query.py
swh/web/ui/tests/test_renderers.py
swh/web/ui/tests/test_service.py
swh/web/ui/tests/test_utils.py
swh/web/ui/tests/views/__init__.py
swh/web/ui/tests/views/test_api.py
swh/web/ui/tests/views/test_browse.py
swh/web/ui/tests/views/test_main.py
swh/web/ui/views/__init__.py
swh/web/ui/views/api.py
swh/web/ui/views/browse.py
swh/web/ui/views/errorhandler.py
swh/web/ui/views/main.py
\ No newline at end of file
diff --git a/swh.web.ui.egg-info/requires.txt b/swh.web.ui.egg-info/requires.txt
index 638e2b70..9c2bc271 100644
--- a/swh.web.ui.egg-info/requires.txt
+++ b/swh.web.ui.egg-info/requires.txt
@@ -1,5 +1,5 @@
Flask
-Flask-API
dateutil
+docutils
swh.core>=0.0.20
swh.storage>=0.0.33
diff --git a/swh/web/ui/apidoc.py b/swh/web/ui/apidoc.py
index 895999ad..31e16c0a 100644
--- a/swh/web/ui/apidoc.py
+++ b/swh/web/ui/apidoc.py
@@ -1,49 +1,241 @@
# Copyright (C) 2015 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import re
+
+from functools import wraps
+from enum import Enum
+
+from flask import request, render_template, url_for
+from flask import g
-import os
-from swh.web.ui import utils, main
from swh.web.ui.main import app
-def _create_url_doc_endpoints(rules):
- def split_path(path, acc):
- rpath = os.path.dirname(path)
- if rpath == '/':
- yield from acc
- else:
- acc.append(rpath+'/')
- yield from split_path(rpath, acc)
-
- url_doc_endpoints = set()
- for rule in rules:
- url_rule = rule['rule']
- url_doc_endpoints.add(url_rule)
- if '<' in url_rule or '>' in url_rule:
- continue
- acc = []
- for rpath in split_path(url_rule, acc):
- if rpath in url_doc_endpoints:
- continue
- yield rpath
- url_doc_endpoints.add(rpath)
-
-
-def install_browsable_api_endpoints():
- """Install browsable endpoints.
-
- """
- url_doc_endpoints = _create_url_doc_endpoints(main.rules())
- for url_doc in url_doc_endpoints:
- endpoint_name = 'doc_api_' + url_doc.strip('/').replace('/', '_')
-
- def view_func(url_doc=url_doc):
- return utils.filter_endpoints(main.rules(),
- url_doc)
- app.add_url_rule(rule=url_doc,
- endpoint=endpoint_name,
- view_func=view_func,
- methods=['GET'])
+class argtypes(Enum): # noqa: N801
+ """Class for centralizing argument type descriptions
+
+ """
+
+ ts = 'timestamp'
+ int = 'integer'
+ str = 'string'
+ path = 'path'
+ sha1 = 'sha1'
+ uuid = 'uuid'
+ sha1_git = 'sha1_git'
+ algo_and_hash = 'algo_hash:hash'
+
+
+class rettypes(Enum): # noqa: N801
+ """Class for centralizing return type descriptions
+
+ """
+ octet_stream = 'octet stream'
+ list = 'list'
+ dict = 'dict'
+
+
+class excs(Enum): # noqa: N801
+ """Class for centralizing exception type descriptions
+
+ """
+
+ badinput = 'BadInputExc'
+ notfound = 'NotFoundExc'
+
+
+class APIUrls(object):
+ """
+ Class to manage API documentation URLs.
+ * Indexes all routes documented using apidoc's decorators.
+ * Tracks endpoint/request processing method relationships for use
+ in generating related urls in API documentation
+ Relies on the load_controllers logic in main.py for initialization.
+
+ """
+ apidoc_routes = {}
+ method_endpoints = {}
+
+ @classmethod
+ def get_app_endpoints(cls):
+ return cls.apidoc_routes
+
+ @classmethod
+ def get_method_endpoints(cls, fname):
+ if len(cls.method_endpoints) == 0:
+ cls.method_endpoints = cls.group_routes_by_method()
+ return cls.method_endpoints[fname]
+
+ @classmethod
+ def group_routes_by_method(cls):
+ """
+ Group URL endpoints according to their processing method.
+ Returns:
+ A dict where keys are the processing method names, and values
+ are the routes that are bound to the key method.
+ """
+ endpoints = {}
+ for rule in app.url_map.iter_rules():
+ rule_dict = {'rule': rule.rule,
+ 'methods': rule.methods}
+ if rule.endpoint not in endpoints:
+ endpoints[rule.endpoint] = [rule_dict]
+ else:
+ endpoints[rule.endpoint].append(rule_dict)
+ return endpoints
+
+ @classmethod
+ def index_add_route(cls, route, docstring):
+ """
+ Add a route to the self-documenting API reference
+ """
+ if route not in cls.apidoc_routes:
+ cls.apidoc_routes[route] = docstring
+
+
+class route(object): # noqa: N801
+ """
+ Decorate an API method to register it in the API doc route index
+ and create the corresponding Flask route.
+ Caution: decorating a method with this requires to also decorate it
+ __at least__ with @returns, or breaks the decorated endpoint
+ Args:
+ route: the documentation page's route
+ noargs: set to True if the route has no arguments, and its result
+ should be displayed anytime its documentation is requested
+ """
+ def __init__(self, route, noargs=False):
+ self.route = route
+ self.noargs = noargs
+
+ def __call__(self, f):
+ APIUrls.index_add_route(self.route, f.__doc__)
+
+ @wraps(f)
+ def doc_func(*args, **kwargs):
+ return f(call_args=(args, kwargs),
+ doc_route=self.route,
+ noargs=self.noargs)
+
+ if not self.noargs:
+ app.add_url_rule(self.route, f.__name__, doc_func)
+
+ return doc_func
+
+
+class arg(object): # noqa: N801
+ """
+ Decorate an API method to display an argument's information on the doc
+ page specified by @route above.
+ Args:
+ name: the argument's name. MUST match the method argument's name to
+ create the example request URL.
+ default: the argument's default value
+ argtype: the argument's type as an Enum value from apidoc.argtypes
+ argdoc: the argument's documentation string
+ """
+ def __init__(self, name, default, argtype, argdoc):
+ self.doc_dict = {
+ 'name': name,
+ 'type': argtype.value,
+ 'doc': argdoc,
+ 'default': default
+ }
+
+ def __call__(self, f):
+ @wraps(f)
+ def arg_fun(*args, **kwargs):
+ if 'args' in kwargs:
+ kwargs['args'].append(self.doc_dict)
+ else:
+ kwargs['args'] = [self.doc_dict]
+ return f(*args, **kwargs)
+ return arg_fun
+
+
+class raises(object): # noqa: N801
+ """
+ Decorate an API method to display information pertaining to an exception
+ that can be raised by this method.
+ Args:
+ exc: the exception name as an Enum value from apidoc.excs
+ doc: the exception's documentation string
+ """
+ def __init__(self, exc, doc):
+ self.exc_dict = {
+ 'exc': exc.value,
+ 'doc': doc
+ }
+
+ def __call__(self, f):
+ @wraps(f)
+ def exc_fun(*args, **kwargs):
+ if 'excs' in kwargs:
+ kwargs['excs'].append(self.exc_dict)
+ else:
+ kwargs['excs'] = [self.exc_dict]
+ return f(*args, **kwargs)
+ return exc_fun
+
+
+class returns(object): # noqa: N801
+ """
+ Decorate an API method to display information about its return value.
+ Caution: this MUST be the last decorator in the apidoc decorator stack,
+ or the decorated endpoint breaks
+ Args:
+ rettype: the return value's type as an Enum value from apidoc.rettypes
+ retdoc: the return value's documentation string
+ """
+ def __init__(self, rettype=None, retdoc=None):
+ self.return_dict = {
+ 'type': rettype.value,
+ 'doc': retdoc
+ }
+
+ def filter_api_url(self, endpoint, route_re, noargs):
+ doc_methods = {'GET', 'HEAD', 'OPTIONS'}
+ if re.match(route_re, endpoint['rule']):
+ if endpoint['methods'] == doc_methods and not noargs:
+ return False
+ return True
+
+ def __call__(self, f):
+ @wraps(f)
+ def ret_fun(*args, **kwargs):
+ # Build documentation
+ env = {
+ 'docstring': f.__doc__,
+ 'route': kwargs['doc_route'],
+ 'return': self.return_dict
+ }
+
+ for arg in ['args', 'excs']:
+ if arg in kwargs:
+ env[arg] = kwargs[arg]
+
+ route_re = re.compile('.*%s$' % kwargs['doc_route'])
+ endpoint_list = APIUrls.get_method_endpoints(f.__name__)
+ other_urls = [url for url in endpoint_list if
+ self.filter_api_url(url, route_re, kwargs['noargs'])]
+ env['urls'] = other_urls
+
+ # Build example endpoint URL
+ if 'args' in env:
+ defaults = {arg['name']: arg['default'] for arg in env['args']}
+ example = url_for(f.__name__, **defaults)
+ env['example'] = re.sub(r'(.*)\?.*', r'\1', example)
+
+ # Prepare and send to mimetype selector if it's not a doc request
+ if re.match(route_re, request.url) and not kwargs['noargs']:
+ return app.response_class(
+ render_template('apidoc.html', **env),
+ content_type='text/html')
+
+ cargs, ckwargs = kwargs['call_args']
+ g.doc_env = env # Store for response processing
+ return f(*cargs, **ckwargs)
+ return ret_fun
diff --git a/swh/web/ui/backend.py b/swh/web/ui/backend.py
index 28a71a4d..229e27b8 100644
--- a/swh/web/ui/backend.py
+++ b/swh/web/ui/backend.py
@@ -1,270 +1,271 @@
# Copyright (C) 2015 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
from swh.web.ui import main
def content_get(sha1_bin):
"""Lookup the content designed by {algo: hash_bin}.
Args:
sha1_bin: content's binary sha1.
Returns:
Content as dict with 'sha1' and 'data' keys.
data representing its raw data.
"""
contents = main.storage().content_get([sha1_bin])
if contents and len(contents) >= 1:
return contents[0]
return None
def content_find(algo, hash_bin):
"""Retrieve the content with binary hash hash_bin
Args:
algo: nature of the hash hash_bin.
hash_bin: content's hash searched for.
Returns:
A triplet (sha1, sha1_git, sha256) if the content exist
or None otherwise.
"""
return main.storage().content_find({algo: hash_bin})
def content_find_occurrence(algo, hash_bin):
"""Find the content's occurrence.
Args:
algo: nature of the hash hash_bin.
hash_bin: content's hash searched for.
Returns:
The occurrence of the content.
"""
return main.storage().content_find_occurrence({algo: hash_bin})
def content_missing_per_sha1(sha1list):
"""List content missing from storage based on sha1
Args:
sha1s: Iterable of sha1 to check for absence
Returns:
an iterable of sha1s missing from the storage
"""
return main.storage().content_missing_per_sha1(sha1list)
def directory_get(sha1_bin):
"""Retrieve information on one directory.
Args:
sha1_bin: Directory's identifier
Returns:
The directory's information.
"""
res = main.storage().directory_get([sha1_bin])
if res and len(res) >= 1:
return res[0]
-def origin_get(origin_id):
- """Return information about the origin with id origin_id.
+def origin_get(origin):
+ """Return information about the origin matching dict origin.
Args:
- origin_id: origin's identifier
+ origin: origin's dict with keys either 'id' or
+ ('type' AND 'url')
Returns:
Origin information as dict.
"""
- return main.storage().origin_get({'id': origin_id})
+ return main.storage().origin_get(origin)
def person_get(person_id):
"""Return information about the person with id person_id.
Args:
person_id: person's identifier.v
Returns:
Person information as dict.
"""
res = main.storage().person_get([person_id])
if res and len(res) >= 1:
return res[0]
def directory_ls(sha1_git_bin, recursive=False):
"""Return information about the directory with id sha1_git.
Args:
sha1_git: directory's identifier.
recursive: Optional recursive flag default to False
Returns:
Directory information as dict.
"""
directory_entries = main.storage().directory_ls(sha1_git_bin, recursive)
if not directory_entries:
return []
return directory_entries
def release_get(sha1_git_bin):
"""Return information about the release with sha1 sha1_git_bin.
Args:
sha1_git_bin: The release's sha1 as bytes.
Returns:
Release information as dict if found, None otherwise.
Raises:
ValueError if the identifier provided is not of sha1 nature.
"""
res = main.storage().release_get([sha1_git_bin])
if res and len(res) >= 1:
return res[0]
return None
def revision_get(sha1_git_bin):
"""Return information about the revision with sha1 sha1_git_bin.
Args:
sha1_git_bin: The revision's sha1 as bytes.
Returns:
Revision information as dict if found, None otherwise.
Raises:
ValueError if the identifier provided is not of sha1 nature.
"""
res = main.storage().revision_get([sha1_git_bin])
if res and len(res) >= 1:
return res[0]
return None
def revision_get_multiple(sha1_git_bin_list):
"""Return information about the revisions in sha1_git_bin_list
Args:
sha1_git_bin_list: The revisions' sha1s as a list of bytes.
Returns:
Revisions' information as an iterable of dicts if any found,
an empty list otherwise
Raises:
ValueError if the identifier provided is not of sha1 nature.
"""
res = main.storage().revision_get(sha1_git_bin_list)
if res and len(res) >= 1:
return res
return []
-def revision_log(sha1_git_bin, limit=100):
+def revision_log(sha1_git_bin, limit):
"""Return information about the revision with sha1 sha1_git_bin.
Args:
sha1_git_bin: The revision's sha1 as bytes.
limit: the maximum number of revisions returned.
Returns:
Revision information as dict if found, None otherwise.
Raises:
ValueError if the identifier provided is not of sha1 nature.
"""
return main.storage().revision_log([sha1_git_bin], limit)
-def revision_log_by(origin_id, branch_name, ts, limit=100):
+def revision_log_by(origin_id, branch_name, ts, limit):
"""Return information about the revision matching the timestamp
ts, from origin origin_id, in branch branch_name.
Args:
origin_id: origin of the revision
- branch_name: revision's branch.
- timestamp: revision's time frame.
Returns:
Information for the revision matching the criterions.
"""
-
return main.storage().revision_log_by(origin_id,
branch_name,
- ts)
+ ts,
+ limit=limit)
def stat_counters():
"""Return the stat counters for Software Heritage
Returns:
A dict mapping textual labels to integer values.
"""
return main.storage().stat_counters()
def stat_origin_visits(origin_id):
"""Return the dates at which the given origin was scanned for content.
Returns:
An array of dates
"""
return main.storage().origin_visit_get(origin_id)
def revision_get_by(origin_id, branch_name, timestamp):
"""Return occurrence information matching the criterions origin_id,
branch_name, ts.
"""
res = main.storage().revision_get_by(origin_id,
branch_name,
timestamp=timestamp,
limit=1)
if not res:
return None
return res[0]
def directory_entry_get_by_path(directory, path):
"""Return a directory entry by its path.
"""
paths = path.strip(os.path.sep).split(os.path.sep)
return main.storage().directory_entry_get_by_path(
directory,
list(map(lambda p: p.encode('utf-8'), paths)))
def entity_get(uuid):
"""Retrieve the entity per its uuid.
"""
return main.storage().entity_get(uuid)
diff --git a/swh/web/ui/converters.py b/swh/web/ui/converters.py
index 656ac16a..287f95a4 100644
--- a/swh/web/ui/converters.py
+++ b/swh/web/ui/converters.py
@@ -1,228 +1,232 @@
# Copyright (C) 2015 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import datetime
from swh.core import hashutil
from swh.core.utils import decode_with_escape
from swh.web.ui import utils
def from_swh(dict_swh, hashess={}, bytess={}, dates={}, blacklist={},
convert={}, convert_fn=lambda x: x):
"""Convert from an swh dictionary to something reasonably json
serializable.
Args:
- dict_swh: the origin dictionary needed to be transformed
- hashess: list/set of keys representing hashes values (sha1, sha256,
sha1_git, etc...) as bytes. Those need to be transformed in hexadecimal
string
- bytess: list/set of keys representing bytes values which needs to
be decoded
- blacklist: set of keys to filter out from the conversion
- convert: set of keys whose associated values need to be converted
using convert_fn
- convert_fn: the conversion function to apply on the value of key
in 'convert'
The remaining keys are copied as is in the output.
Returns:
dictionary equivalent as dict_swh only with its keys `converted`.
"""
def convert_hashes_bytes(v):
"""v is supposedly a hash as bytes, returns it converted in hex.
"""
if v and isinstance(v, bytes):
return hashutil.hash_to_hex(v)
return v
def convert_bytes(v):
"""v is supposedly a bytes string, decode as utf-8.
FIXME: Improve decoding policy.
If not utf-8, break!
"""
if v and isinstance(v, bytes):
return v.decode('utf-8')
return v
def convert_date(v):
"""v is a dict with three keys:
timestamp
offset
negative_utc
We convert it to a human-readable string
"""
tz = datetime.timezone(datetime.timedelta(minutes=v['offset']))
date = datetime.datetime.fromtimestamp(v['timestamp'], tz=tz)
datestr = date.isoformat()
if v['offset'] == 0 and v['negative_utc']:
# remove the rightmost + and replace it with a -
return '-'.join(datestr.rsplit('+', 1))
return datestr
if not dict_swh:
return dict_swh
new_dict = {}
for key, value in dict_swh.items():
if key in blacklist:
continue
elif key in dates:
new_dict[key] = convert_date(value)
elif isinstance(value, dict):
new_dict[key] = from_swh(value, hashess, bytess, dates, blacklist,
convert, convert_fn)
elif key in hashess:
new_dict[key] = utils.fmap(convert_hashes_bytes, value)
elif key in bytess:
try:
new_dict[key] = utils.fmap(convert_bytes, value)
except UnicodeDecodeError:
if 'decoding_failures' not in new_dict:
new_dict['decoding_failures'] = [key]
else:
new_dict['decoding_failures'].append(key)
new_dict[key] = utils.fmap(decode_with_escape, value)
elif key in convert:
new_dict[key] = convert_fn(value)
else:
new_dict[key] = value
return new_dict
def from_origin(origin):
"""Convert from an SWH origin to an origin dictionary.
"""
return from_swh(origin,
hashess=set(['revision']),
bytess=set(['path']))
def from_release(release):
"""Convert from an SWH release to a json serializable release dictionary.
Args:
release: Dict with the following keys
- id: identifier of the revision (sha1 in bytes)
- revision: identifier of the revision the release points to (sha1 in
bytes)
- comment: release's comment message (bytes)
- name: release's name (string)
- author: release's author identifier (swh's id)
- synthetic: the synthetic property (boolean)
Returns:
Release dictionary with the following keys:
- id: hexadecimal sha1 (string)
- revision: hexadecimal sha1 (string)
- comment: release's comment message (string)
- name: release's name (string)
- author: release's author identifier (swh's id)
- synthetic: the synthetic property (boolean)
"""
return from_swh(
release,
hashess=set(['id', 'target']),
bytess=set(['message', 'name', 'fullname', 'email']),
dates={'date'},
)
def from_revision(revision):
"""Convert from an SWH revision to a json serializable revision dictionary.
Args:
revision: Dict with the following keys
- id: identifier of the revision (sha1 in bytes)
- directory: identifier of the directory the revision points to (sha1
in bytes)
- author_name, author_email: author's revision name and email
- committer_name, committer_email: committer's revision name and email
- message: revision's message
- date, date_offset: revision's author date
- committer_date, committer_date_offset: revision's commit date
- parents: list of parents for such revision
- synthetic: revision's property nature
- type: revision's type (git, tar or dsc at the moment)
- metadata: if the revision is synthetic, this can reference dynamic
properties.
Returns:
Revision dictionary with the same keys as inputs, only:
- sha1s are in hexadecimal strings (id, directory)
- bytes are decoded in string (author_name, committer_name,
author_email, committer_email)
- remaining keys are left as is
"""
revision = from_swh(revision,
hashess=set(['id',
'directory',
'parents',
'children']),
bytess=set(['name',
'fullname',
'email']),
dates={'date', 'committer_date'})
if revision:
+ if 'parents' in revision:
+ revision['merge'] = len(revision['parents']) > 1
if 'message' in revision:
try:
revision['message'] = revision['message'].decode('utf-8')
except UnicodeDecodeError:
revision['message_decoding_failed'] = True
revision['message'] = None
return revision
def from_content(content):
"""Convert swh content to serializable content dictionary.
"""
+ if content:
+ content = {k: v for k, v in content.items() if k not in ['ctime']}
return from_swh(content,
hashess={'sha1', 'sha1_git', 'sha256'},
bytess={},
blacklist={},
convert={'status'},
convert_fn=lambda v: 'absent' if v == 'hidden' else v)
def from_person(person):
"""Convert swh person to serializable person dictionary.
"""
return from_swh(person,
hashess=set(),
bytess=set(['name', 'fullname', 'email']))
def from_directory_entry(dir_entry):
"""Convert swh person to serializable person dictionary.
"""
return from_swh(dir_entry,
hashess=set(['dir_id',
'sha1_git',
'sha1',
'sha256',
'target']),
bytess=set(['name']),
blacklist={},
convert={'status'},
convert_fn=lambda v: 'absent' if v == 'hidden' else v)
diff --git a/swh/web/ui/main.py b/swh/web/ui/main.py
index 6bdfba22..4837a8e7 100644
--- a/swh/web/ui/main.py
+++ b/swh/web/ui/main.py
@@ -1,150 +1,139 @@
# Copyright (C) 2015 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import logging
import os
+import json
-from flask.ext.api import FlaskAPI
+from flask import Flask
from swh.core import config
-from swh.web.ui.renderers import RENDERERS, urlize_api_links
+from swh.web.ui.renderers import urlize_api_links
from swh.web.ui.renderers import safe_docstring_display
+from swh.web.ui.renderers import revision_id_from_url
+from swh.web.ui.renderers import SWHMultiResponse
from swh.storage import get_storage
DEFAULT_CONFIG = {
'storage_args': ('list[str]', ['http://localhost:5000/']),
'storage_class': ('str', 'remote_storage'),
'log_dir': ('string', '/tmp/swh/log'),
'debug': ('bool', None),
'host': ('string', '127.0.0.1'),
'port': ('int', 6543),
'secret_key': ('string', 'development key'),
- 'max_upload_size': ('int', 16 * 1024 * 1024),
- 'upload_folder': ('string', '/tmp/swh-web-ui/uploads'),
- 'upload_allowed_extensions': ('list[str]', []) # means all are accepted
+ 'max_log_revs': ('int', 25),
}
-
# api's definition
-app = FlaskAPI(__name__)
+app = Flask(__name__)
+app.response_class = SWHMultiResponse
app.jinja_env.filters['urlize_api_links'] = urlize_api_links
app.jinja_env.filters['safe_docstring_display'] = safe_docstring_display
-
-
-AUTODOC_ENDPOINT_INSTALLED = False
+app.jinja_env.filters['revision_id_from_url'] = revision_id_from_url
def read_config(config_file):
"""Read the configuration file `config_file`, update the app with
parameters (secret_key, conf) and return the parsed configuration as a
dict"""
conf = config.read(config_file, DEFAULT_CONFIG)
- config.prepare_folders(conf, 'log_dir', 'upload_folder')
+ config.prepare_folders(conf, 'log_dir')
conf['storage'] = get_storage(conf['storage_class'], conf['storage_args'])
return conf
def load_controllers():
"""Load the controllers for the application.
"""
from swh.web.ui import views, apidoc # flake8: noqa
- # side-effects here (install autodoc endpoints so do it only once!)
- global AUTODOC_ENDPOINT_INSTALLED
- if not AUTODOC_ENDPOINT_INSTALLED:
- apidoc.install_browsable_api_endpoints()
- AUTODOC_ENDPOINT_INSTALLED = True
-
def rules():
"""Returns rules from the application in dictionary form.
Beware, must be called after swh.web.ui.main.load_controllers funcall.
Returns:
Generator of application's rules.
"""
for rule in app.url_map._rules:
yield {'rule': rule.rule,
'methods': rule.methods,
'endpoint': rule.endpoint}
def storage():
"""Return the current application's storage.
"""
return app.config['conf']['storage']
def run_from_webserver(environ, start_response):
"""Run the WSGI app from the webserver, loading the configuration.
Note: This function is called on a per-request basis so beware the side
effects here!
"""
load_controllers()
config_path = '/etc/softwareheritage/webapp/webapp.ini'
conf = read_config(config_path)
app.secret_key = conf['secret_key']
app.config['conf'] = conf
- app.config['MAX_CONTENT_LENGTH'] = conf['max_upload_size']
- app.config['DEFAULT_RENDERERS'] = RENDERERS
logging.basicConfig(filename=os.path.join(conf['log_dir'], 'web-ui.log'),
level=logging.INFO)
return app(environ, start_response)
def run_debug_from(config_path, verbose=False):
"""Run the api's server in dev mode.
Note: This is called only once (contrast with the production mode
in run_from_webserver function)
Args:
conf is a dictionary of keywords:
- 'db_url' the db url's access (through psycopg2 format)
- 'content_storage_dir' revisions/directories/contents storage on disk
- 'host' to override the default 127.0.0.1 to open or not the server
to the world
- 'port' to override the default of 5000 (from the underlying layer:
flask)
- 'debug' activate the verbose logs
- 'secret_key' the flask secret key
Returns:
Never
"""
load_controllers()
conf = read_config(config_path)
app.secret_key = conf['secret_key']
app.config['conf'] = conf
- app.config['MAX_CONTENT_LENGTH'] = conf['max_upload_size']
- app.config['DEFAULT_RENDERERS'] = RENDERERS
host = conf.get('host', '127.0.0.1')
port = conf.get('port')
debug = conf.get('debug')
log_file = os.path.join(conf['log_dir'], 'web-ui.log')
logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO,
handlers=[logging.FileHandler(log_file),
logging.StreamHandler()])
app.run(host=host, port=port, debug=debug)
diff --git a/swh/web/ui/renderers.py b/swh/web/ui/renderers.py
index b5172ef3..056aaf14 100644
--- a/swh/web/ui/renderers.py
+++ b/swh/web/ui/renderers.py
@@ -1,144 +1,146 @@
# Copyright (C) 2015 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import re
import yaml
+import json
+
+from docutils.core import publish_parts
+from docutils.writers.html4css1 import Writer, HTMLTranslator
+from inspect import cleandoc
+
+from flask import request, Response, render_template
+from flask import g
-from flask import make_response, request
-from flask.ext.api import renderers, parsers
-from flask_api.mediatypes import MediaType
from swh.web.ui import utils
class SWHFilterEnricher():
"""Global filter on fields.
"""
def filter_by_fields(self, data):
"""Extract a request parameter 'fields' if it exists to permit the
filtering on the data dict's keys.
If such field is not provided, returns the data as is.
"""
fields = request.args.get('fields')
if fields:
fields = set(fields.split(','))
data = utils.filter_field_keys(data, fields)
return data
-class YAMLRenderer(renderers.BaseRenderer, SWHFilterEnricher):
- """Renderer for application/yaml.
- Orchestrate from python data structure to yaml.
-
+class SWHMultiResponse(Response, SWHFilterEnricher):
"""
- media_type = 'application/yaml'
-
- def render(self, data, media_type, **options):
- data = self.filter_by_fields(data)
- return yaml.dump(data, encoding=self.charset)
-
-
-class JSONPEnricher():
- """JSONP rendering.
-
+ A Flask Response subclass.
+ Override force_type to transform dict responses into callable Flask
+ response objects whose mimetype matches the request's Accept header: HTML
+ template render, YAML dump or default to a JSON dump.
"""
- def enrich_with_jsonp(self, data):
- """Defines a jsonp function that extracts a potential 'callback'
- request parameter holding the function name and wraps the data
- inside a call to such function
-
- e.g:
- GET /blah/foo/bar renders: {'output': 'wrapped'}
- GET /blah/foo/bar?callback=fn renders: fn({'output': 'wrapped'})
- """
- jsonp = request.args.get('callback')
- if jsonp:
- return '%s(%s)' % (jsonp, data)
-
- return data
-
-class SWHJSONRenderer(renderers.JSONRenderer,
- SWHFilterEnricher,
- JSONPEnricher):
- """Renderer for application/json.
- Serializes in json the data and returns it.
-
- Also deals with jsonp. If callback is found in request parameter,
- wrap the result as a function with name the value of the parameter
- query 'callback'.
+ @classmethod
+ def make_response_from_mimetype(cls, rv, options={}):
+ if not (isinstance(rv, list) or isinstance(rv, dict)):
+ return rv
+
+ def wants_html(best_match):
+ return best_match == 'text/html' and \
+ request.accept_mimetypes[best_match] > \
+ request.accept_mimetypes['application/json']
+
+ def wants_yaml(best_match):
+ return best_match == 'application/yaml' and \
+ request.accept_mimetypes[best_match] > \
+ request.accept_mimetypes['application/json']
+
+ rv = cls.filter_by_fields(cls, rv)
+ acc_mime = ['application/json', 'application/yaml', 'text/html']
+ best_match = request.accept_mimetypes.best_match(acc_mime)
+ # return a template render
+ if wants_html(best_match):
+ data = json.dumps(rv, sort_keys=True,
+ indent=4, separators=(',', ': '))
+ env = g.get('doc_env', {})
+ env['response_data'] = data
+ env['request'] = request
+ rv = Response(render_template('apidoc.html', **env),
+ content_type='text/html',
+ **options)
+ # return formatted yaml
+ elif wants_yaml(best_match):
+ rv = Response(
+ yaml.dump(rv),
+ content_type='application/yaml',
+ **options)
+ # return formatted json
+ else:
+ # jsonify is unhappy with lists in Flask 0.10.1, use json.dumps
+ rv = Response(
+ json.dumps(rv),
+ content_type='application/json',
+ **options)
+ return rv
+
+ @classmethod
+ def force_type(cls, rv, environ=None):
+ if isinstance(rv, dict) or isinstance(rv, list):
+ rv = cls.make_response_from_mimetype(rv)
+ return super().force_type(rv, environ)
+
+
+def error_response(error_code, error):
+ """Private function to create a custom error response.
"""
- def render(self, data, media_type, **options):
- data = self.filter_by_fields(data)
- res = super().render(data, media_type, **options)
- return self.enrich_with_jsonp(res)
+ error_opts = {'status': error_code}
+ error_data = {'error': str(error)}
+
+ return SWHMultiResponse.make_response_from_mimetype(error_data,
+ options=error_opts)
def urlize_api_links(content):
"""Utility function for decorating api links in browsable api."""
return re.sub(r'"(/api/.*|/browse/.*)"', r'"\1"', content)
-def safe_docstring_display(docstring):
- """Utility function to safely decorate docstring in browsable api."""
- src = r'(Args|Raises?|Throws?|Yields?|Returns?|Examples?|Samples?):.*'
- dest = r'
\1:
'
- return re.sub(src, dest, docstring)
-
-
-class SWHBrowsableAPIRenderer(renderers.BrowsableAPIRenderer):
- """SWH's browsable api renderer.
-
+class NoHeaderHTMLTranslator(HTMLTranslator):
"""
- template = "api.html"
-
-
-RENDERERS = [
- 'swh.web.ui.renderers.SWHJSONRenderer',
- 'swh.web.ui.renderers.SWHBrowsableAPIRenderer',
- 'flask.ext.api.parsers.URLEncodedParser',
- 'swh.web.ui.renderers.YAMLRenderer',
-]
+ Docutils translator subclass to customize the generation of HTML
+ from reST-formatted docstrings
+ """
+ def __init__(self, document):
+ super().__init__(document)
+ self.body_prefix = []
+ self.body_suffix = []
+ def visit_bullet_list(self, node):
+ self.context.append((self.compact_simple, self.compact_p))
+ self.compact_p = None
+ self.compact_simple = self.is_compactable(node)
+ self.body.append(self.starttag(node, 'ul', CLASS='docstring'))
-RENDERERS_INSTANCE = [
- SWHJSONRenderer(),
- SWHBrowsableAPIRenderer(),
- parsers.URLEncodedParser(),
- YAMLRenderer(),
-]
+DOCSTRING_WRITER = Writer()
+DOCSTRING_WRITER.translator_class = NoHeaderHTMLTranslator
-RENDERERS_BY_TYPE = {
- r.media_type: r
- for r in RENDERERS_INSTANCE
-}
+def safe_docstring_display(docstring):
+ """
+ Utility function to htmlize reST-formatted documentation in browsable
+ api.
+ """
+ docstring = cleandoc(docstring)
+ return publish_parts(docstring, writer=DOCSTRING_WRITER)['html_body']
-def error_response(default_error_msg, error_code, error):
- """Private function to create a custom error response.
- """
- # if nothing is requested by client, use json
- default_application_type = 'application/json'
- accept_type = request.headers.get('Accept', default_application_type)
- renderer = RENDERERS_BY_TYPE.get(
- accept_type,
- RENDERERS_BY_TYPE[default_application_type])
-
- # for edge cases, use the elected renderer's media type
- accept_type = renderer.media_type
- response = make_response(default_error_msg, error_code)
- response.headers['Content-Type'] = accept_type
- response.data = renderer.render({"error": str(error)},
- media_type=MediaType(accept_type),
- status=error_code,
- headers={'Content-Type': accept_type})
-
- return response
+def revision_id_from_url(url):
+ """Utility function to obtain a revision's ID from its browsing URL."""
+ return re.sub(r'/browse/revision/([0-9a-f]{40}|[0-9a-f]{64})/.*',
+ r'\1', url)
diff --git a/swh/web/ui/service.py b/swh/web/ui/service.py
index 50437ecf..8b2ec9b7 100644
--- a/swh/web/ui/service.py
+++ b/swh/web/ui/service.py
@@ -1,643 +1,622 @@
# Copyright (C) 2015 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from collections import defaultdict
from swh.core import hashutil
from swh.web.ui import converters, query, backend
from swh.web.ui.exc import NotFoundExc
def lookup_multiple_hashes(hashes):
"""Lookup the passed hashes in a single DB connection, using batch processing.
Args:
An array of {filename: X, sha1: Y}, string X, hex sha1 string Y.
Returns:
The same array with elements updated with elem['found'] = true if
the hash is present in storage, elem['found'] = false if not.
"""
hashlist = [hashutil.hex_to_hash(elem['sha1']) for elem in hashes]
content_missing = backend.content_missing_per_sha1(hashlist)
missing = [hashutil.hash_to_hex(x) for x in content_missing]
for x in hashes:
x.update({'found': True})
for h in hashes:
if h['sha1'] in missing:
h['found'] = False
return hashes
-def hash_and_search(filepath):
- """Hash the filepath's content as sha1, then search in storage if
- it exists.
-
- Args:
- Filepath of the file to hash and search.
-
- Returns:
- Tuple (hex sha1, found as True or false).
- The found boolean, according to whether the sha1 of the file
- is present or not.
- """
- h = hashutil.hashfile(filepath)
- c = backend.content_find('sha1', h['sha1'])
- if c:
- r = converters.from_content(c)
- r['found'] = True
- return r
- else:
- return {'sha1': hashutil.hash_to_hex(h['sha1']),
- 'found': False}
-
-
def lookup_hash(q):
"""Checks if the storage contains a given content checksum
Args: query string of the form
Returns: Dict with key found containing the hash info if the
hash is present, None if not.
"""
algo, hash = query.parse_hash(q)
found = backend.content_find(algo, hash)
return {'found': found,
'algo': algo}
def search_hash(q):
"""Checks if the storage contains a given content checksum
Args: query string of the form
Returns: Dict with key found to True or False, according to
whether the checksum is present or not
"""
algo, hash = query.parse_hash(q)
found = backend.content_find(algo, hash)
return {'found': found is not None}
def lookup_hash_origin(q):
"""Return information about the checksum contained in the query q.
Args: query string of the form
Returns:
origin as dictionary if found for the given content.
"""
algo, hash = query.parse_hash(q)
origin = backend.content_find_occurrence(algo, hash)
return converters.from_origin(origin)
-def lookup_origin(origin_id):
- """Return information about the origin with id origin_id.
+def lookup_origin(origin):
+ """Return information about the origin matching dict origin.
Args:
- origin_id as string
+ origin: origin's dict with keys either 'id' or
+ ('type' AND 'url')
Returns:
origin information as dict.
"""
- return backend.origin_get(origin_id)
+ return backend.origin_get(origin)
def lookup_person(person_id):
"""Return information about the person with id person_id.
Args:
person_id as string
Returns:
person information as dict.
"""
person = backend.person_get(person_id)
return converters.from_person(person)
def lookup_directory(sha1_git):
"""Return information about the directory with id sha1_git.
Args:
sha1_git as string
Returns:
directory information as dict.
"""
_, sha1_git_bin = query.parse_hash_with_algorithms_or_throws(
sha1_git,
['sha1'], # HACK: sha1_git really
'Only sha1_git is supported.')
dir = backend.directory_get(sha1_git_bin)
if not dir:
return None
directory_entries = backend.directory_ls(sha1_git_bin)
return map(converters.from_directory_entry, directory_entries)
def lookup_directory_with_path(directory_sha1_git, path_string):
"""Return directory information for entry with path path_string w.r.t.
root directory pointed by directory_sha1_git
Args:
- directory_sha1_git: sha1_git corresponding to the directory
to which we append paths to (hopefully) find the entry
- the relative path to the entry starting from the directory pointed by
directory_sha1_git
Raises:
NotFoundExc if the directory entry is not found
"""
_, sha1_git_bin = query.parse_hash_with_algorithms_or_throws(
directory_sha1_git,
['sha1'],
'Only sha1_git is supported.')
queried_dir = backend.directory_entry_get_by_path(
sha1_git_bin, path_string)
if not queried_dir:
raise NotFoundExc(('Directory entry with path %s from %s not found') %
(path_string, directory_sha1_git))
return converters.from_directory_entry(queried_dir)
def lookup_release(release_sha1_git):
"""Return information about the release with sha1 release_sha1_git.
Args:
release_sha1_git: The release's sha1 as hexadecimal
Returns:
Release information as dict.
Raises:
ValueError if the identifier provided is not of sha1 nature.
"""
_, sha1_git_bin = query.parse_hash_with_algorithms_or_throws(
release_sha1_git,
['sha1'],
'Only sha1_git is supported.')
res = backend.release_get(sha1_git_bin)
return converters.from_release(res)
def lookup_revision(rev_sha1_git):
"""Return information about the revision with sha1 revision_sha1_git.
Args:
revision_sha1_git: The revision's sha1 as hexadecimal
Returns:
Revision information as dict.
Raises:
ValueError if the identifier provided is not of sha1 nature.
"""
_, sha1_git_bin = query.parse_hash_with_algorithms_or_throws(
rev_sha1_git,
['sha1'],
'Only sha1_git is supported.')
revision = backend.revision_get(sha1_git_bin)
return converters.from_revision(revision)
def lookup_revision_multiple(sha1_git_list):
"""Return information about the revision with sha1 revision_sha1_git.
Args:
revision_sha1_git: The revision's sha1 as hexadecimal
Returns:
Revision information as dict.
Raises:
ValueError if the identifier provided is not of sha1 nature.
"""
def to_sha1_bin(sha1_hex):
_, sha1_git_bin = query.parse_hash_with_algorithms_or_throws(
sha1_hex,
['sha1'],
'Only sha1_git is supported.')
return sha1_git_bin
sha1_bin_list = (to_sha1_bin(x) for x in sha1_git_list)
revisions = backend.revision_get_multiple(sha1_bin_list)
return (converters.from_revision(x) for x in revisions)
def lookup_revision_message(rev_sha1_git):
"""Return the raw message of the revision with sha1 revision_sha1_git.
Args:
revision_sha1_git: The revision's sha1 as hexadecimal
Returns:
Decoded revision message as dict {'message': }
Raises:
ValueError if the identifier provided is not of sha1 nature.
NotFoundExc if the revision is not found, or if it has no message
"""
_, sha1_git_bin = query.parse_hash_with_algorithms_or_throws(
rev_sha1_git,
['sha1'],
'Only sha1_git is supported.')
revision = backend.revision_get(sha1_git_bin)
if not revision:
raise NotFoundExc('Revision with sha1_git %s not found.'
% rev_sha1_git)
if 'message' not in revision:
raise NotFoundExc('No message for revision with sha1_git %s.'
% rev_sha1_git)
res = {'message': revision['message']}
return res
def lookup_revision_by(origin_id,
branch_name="refs/heads/master",
timestamp=None):
"""Lookup revisions by origin_id, branch_name and timestamp.
If:
- branch_name is not provided, lookup using 'refs/heads/master' as default.
- ts is not provided, use the most recent
Args:
- origin_id: origin of the revision.
- branch_name: revision's branch.
- timestamp: revision's time frame.
Yields:
The revisions matching the criterions.
"""
res = backend.revision_get_by(origin_id, branch_name, timestamp)
return converters.from_revision(res)
-def lookup_revision_log(rev_sha1_git, limit=100):
+def lookup_revision_log(rev_sha1_git, limit):
"""Return information about the revision with sha1 revision_sha1_git.
Args:
revision_sha1_git: The revision's sha1 as hexadecimal
limit: the maximum number of revisions returned
Returns:
Revision information as dict.
Raises:
ValueError if the identifier provided is not of sha1 nature.
"""
_, sha1_git_bin = query.parse_hash_with_algorithms_or_throws(
rev_sha1_git,
['sha1'],
'Only sha1_git is supported.')
revision_entries = backend.revision_log(sha1_git_bin, limit)
return map(converters.from_revision, revision_entries)
-def lookup_revision_log_by(origin_id, branch_name, timestamp):
+def lookup_revision_log_by(origin_id, branch_name, timestamp, limit):
"""Return information about the revision with sha1 revision_sha1_git.
Args:
origin_id: origin of the revision
branch_name: revision's branch
timestamp: revision's time frame
limit: the maximum number of revisions returned
Returns:
Revision information as dict.
Raises:
NotFoundExc if no revision corresponds to the criterion
NotFoundExc if the corresponding revision has no log
"""
revision_entries = backend.revision_log_by(origin_id,
branch_name,
- timestamp)
+ timestamp,
+ limit)
if not revision_entries:
return None
return map(converters.from_revision, revision_entries)
def lookup_revision_with_context_by(origin_id, branch_name, ts, sha1_git,
limit=100):
"""Return information about revision sha1_git, limited to the
sub-graph of all transitive parents of sha1_git_root.
sha1_git_root being resolved through the lookup of a revision by origin_id,
branch_name and ts.
In other words, sha1_git is an ancestor of sha1_git_root.
Args:
- origin_id: origin of the revision.
- branch_name: revision's branch.
- timestamp: revision's time frame.
- sha1_git: one of sha1_git_root's ancestors.
- limit: limit the lookup to 100 revisions back.
Returns:
Pair of (root_revision, revision).
Information on sha1_git if it is an ancestor of sha1_git_root
including children leading to sha1_git_root
Raises:
- BadInputExc in case of unknown algo_hash or bad hash.
- NotFoundExc if either revision is not found or if sha1_git is not an
ancestor of sha1_git_root.
"""
rev_root = backend.revision_get_by(origin_id, branch_name, ts)
if not rev_root:
raise NotFoundExc('Revision with (origin_id: %s, branch_name: %s'
', ts: %s) not found.' % (origin_id,
branch_name,
ts))
return (converters.from_revision(rev_root),
lookup_revision_with_context(rev_root, sha1_git, limit))
def lookup_revision_with_context(sha1_git_root, sha1_git, limit=100):
"""Return information about revision sha1_git, limited to the
sub-graph of all transitive parents of sha1_git_root.
In other words, sha1_git is an ancestor of sha1_git_root.
Args:
sha1_git_root: latest revision. The type is either a sha1 (as an hex
string) or a non converted dict.
sha1_git: one of sha1_git_root's ancestors
limit: limit the lookup to 100 revisions back
Returns:
Information on sha1_git if it is an ancestor of sha1_git_root
including children leading to sha1_git_root
Raises:
BadInputExc in case of unknown algo_hash or bad hash
NotFoundExc if either revision is not found or if sha1_git is not an
ancestor of sha1_git_root
"""
_, sha1_git_bin = query.parse_hash_with_algorithms_or_throws(
sha1_git,
['sha1'],
'Only sha1_git is supported.')
revision = backend.revision_get(sha1_git_bin)
if not revision:
raise NotFoundExc('Revision %s not found' % sha1_git)
if isinstance(sha1_git_root, str):
_, sha1_git_root_bin = query.parse_hash_with_algorithms_or_throws(
sha1_git_root,
['sha1'],
'Only sha1_git is supported.')
revision_root = backend.revision_get(sha1_git_root_bin)
if not revision_root:
raise NotFoundExc('Revision root %s not found' % sha1_git_root)
else:
sha1_git_root_bin = sha1_git_root['id']
revision_log = backend.revision_log(sha1_git_root_bin, limit)
parents = {}
children = defaultdict(list)
for rev in revision_log:
rev_id = rev['id']
parents[rev_id] = []
for parent_id in rev['parents']:
parents[rev_id].append(parent_id)
children[parent_id].append(rev_id)
if revision['id'] not in parents:
raise NotFoundExc('Revision %s is not an ancestor of %s' %
(sha1_git, sha1_git_root))
revision['children'] = children[revision['id']]
return converters.from_revision(revision)
def lookup_directory_with_revision(sha1_git, dir_path=None, with_data=False):
"""Return information on directory pointed by revision with sha1_git.
If dir_path is not provided, display top level directory.
Otherwise, display the directory pointed by dir_path (if it exists).
Args:
sha1_git: revision's hash.
dir_path: optional directory pointed to by that revision.
with_data: boolean that indicates to retrieve the raw data if the path
resolves to a content. Default to False (for the api)
Returns:
Information on the directory pointed to by that revision.
Raises:
BadInputExc in case of unknown algo_hash or bad hash.
NotFoundExc either if the revision is not found or the path referenced
does not exist.
NotImplementedError in case of dir_path exists but do not reference a
type 'dir' or 'file'.
"""
_, sha1_git_bin = query.parse_hash_with_algorithms_or_throws(
sha1_git,
['sha1'],
'Only sha1_git is supported.')
revision = backend.revision_get(sha1_git_bin)
if not revision:
raise NotFoundExc('Revision %s not found' % sha1_git)
dir_sha1_git_bin = revision['directory']
if dir_path:
entity = backend.directory_entry_get_by_path(dir_sha1_git_bin,
dir_path)
if not entity:
raise NotFoundExc(
"Directory or File '%s' pointed to by revision %s not found"
% (dir_path, sha1_git))
else:
entity = {'type': 'dir', 'target': dir_sha1_git_bin}
if entity['type'] == 'dir':
directory_entries = backend.directory_ls(entity['target'])
return {'type': 'dir',
'path': '.' if not dir_path else dir_path,
'revision': sha1_git,
'content': map(converters.from_directory_entry,
directory_entries)}
elif entity['type'] == 'file': # content
content = backend.content_find('sha1_git', entity['target'])
if with_data:
content['data'] = backend.content_get(content['sha1'])['data']
return {'type': 'file',
'path': '.' if not dir_path else dir_path,
'revision': sha1_git,
'content': converters.from_content(content)}
else:
raise NotImplementedError('Entity of type %s not implemented.'
% entity['type'])
def lookup_content(q):
"""Lookup the content designed by q.
Args:
q: The release's sha1 as hexadecimal
"""
algo, hash = query.parse_hash(q)
c = backend.content_find(algo, hash)
return converters.from_content(c)
def lookup_content_raw(q):
"""Lookup the content defined by q.
Args:
q: query string of the form
Returns:
dict with 'sha1' and 'data' keys.
data representing its raw data decoded.
"""
algo, hash = query.parse_hash(q)
c = backend.content_find(algo, hash)
if not c:
return None
content = backend.content_get(c['sha1'])
return converters.from_content(content)
def stat_counters():
"""Return the stat counters for Software Heritage
Returns:
A dict mapping textual labels to integer values.
"""
return backend.stat_counters()
def stat_origin_visits(origin_id):
"""Return the dates at which the given origin was scanned for content.
Returns:
An array of dates in the datetime format
"""
for visit in backend.stat_origin_visits(origin_id):
visit['date'] = visit['date'].timestamp()
yield(visit)
def lookup_entity_by_uuid(uuid):
"""Return the entity's hierarchy from its uuid.
Args:
uuid: entity's identifier.
Returns:
List of hierarchy entities from the entity with uuid.
"""
uuid = query.parse_uuid4(uuid)
return backend.entity_get(uuid)
def lookup_revision_through(revision, limit=100):
"""Retrieve a revision from the criterion stored in revision dictionary.
Args:
revision: Dictionary of criterion to lookup the revision with.
Here are the supported combination of possible values:
- origin_id, branch_name, ts, sha1_git
- origin_id, branch_name, ts
- sha1_git_root, sha1_git
- sha1_git
Returns:
None if the revision is not found or the actual revision.
"""
if 'origin_id' in revision and \
'branch_name' in revision and \
'ts' in revision and \
'sha1_git' in revision:
return lookup_revision_with_context_by(revision['origin_id'],
revision['branch_name'],
revision['ts'],
revision['sha1_git'],
limit)
if 'origin_id' in revision and \
'branch_name' in revision and \
'ts' in revision:
return lookup_revision_by(revision['origin_id'],
revision['branch_name'],
revision['ts'])
if 'sha1_git_root' in revision and \
'sha1_git' in revision:
return lookup_revision_with_context(revision['sha1_git_root'],
revision['sha1_git'],
limit)
if 'sha1_git' in revision:
return lookup_revision(revision['sha1_git'])
# this should not happen
raise NotImplementedError('Should not happen!')
def lookup_directory_through_revision(revision, path=None,
limit=100, with_data=False):
"""Retrieve the directory information from the revision.
Args:
revision: dictionary of criterion representing a revision to lookup
path: directory's path to lookup.
limit: optional query parameter to limit the revisions log.
(default to 100). For now, note that this limit could impede the
transitivity conclusion about sha1_git not being an ancestor of.
with_data: indicate to retrieve the content's raw data if path resolves
to a content.
Returns:
The directory pointing to by the revision criterions at path.
"""
rev = lookup_revision_through(revision, limit)
if not rev:
raise NotFoundExc('Revision with criterion %s not found!' % revision)
return (rev['id'],
lookup_directory_with_revision(rev['id'], path, with_data))
diff --git a/swh/web/ui/static/js/filedrop.js b/swh/web/ui/static/js/filedrop.js
deleted file mode 100644
index 820c4d40..00000000
--- a/swh/web/ui/static/js/filedrop.js
+++ /dev/null
@@ -1,194 +0,0 @@
-/**
- * Search page management
- */
-
-
-$.fn.extend({
- /**
- * Call on any HTMLElement to make that element the recipient of files
- * drag & dropped into it.
- * Files then have their sha1 checksum calculated
- * and searched in SWH.
- * Args:
- * resultDiv: the table where the result should be displayed
- * errorDiv: the element where the error message should be displayed
- */
- filedrop: function(fileLister, searchForm) {
-
- return this.each(function() {
-
- var dragwin = $(this);
- var fileshovering = false;
-
- dragwin.on('dragover', function(event) {
- event.stopPropagation();
- event.preventDefault();
- });
-
- dragwin.on('dragenter', function(event) {
- event.stopPropagation();
- event.preventDefault();
- if (!fileshovering) {
- dragwin.css("border-style", "solid");
- dragwin.css("box-shadow", "inset 0 3px 4px");
- fileshovering = true;
- }
- });
-
- dragwin.on('dragover', function(event) {
- event.stopPropagation();
- event.preventDefault();
- if (!fileshovering) {
- dragwin.css("border-style", "solid");
- dragwin.css("box-shadow", "inset 0 3px 4px");
- fileshovering = true;
- }
- });
-
- dragwin.on('dragleave', function(event) {
- event.stopPropagation();
- event.preventDefault();
- if (fileshovering) {
- dragwin.css("border-style", "dashed");
- dragwin.css("box-shadow", "none");
- fileshovering = false;
- }
- });
-
- dragwin.on('drop', function(event) {
- event.stopPropagation();
- event.preventDefault();
- if (fileshovering) {
- dragwin.css("border-style", "dashed");
- dragwin.css("box-shadow", "none");
- fileshovering = false;
- }
- var myfiles = event.originalEvent.dataTransfer.files;
- if (myfiles.length >= 1) {
- handleFiles(myfiles, fileLister, searchForm);
- }
- });
- });
- },
- /**
- * Call on a jQuery-selected input to make it sensitive to
- * the reception of new files, and have it process received
- * files.
- * Args:
- * fileLister: the element keeping track of the files
- * searchForm: the form whose submission will POST the file
- * information
- */
- filedialog: function(fileLister, searchForm) {
- return this.each(function() {
- var elem = $(this);
- elem.on('change', function(){
- handleFiles(this.files, fileLister, searchForm);
- });
- });
- },
- /**
- * Call on a jQuery-selected element to delegate its click
- * event to the given input instead.
- * Args:
- * input: the element to be clicked when the caller is clicked.
- */
- inputclick: function(input) {
- return this.each(function() {
- $(this).click(function(event) {
- event.preventDefault();
- input.click();
- });
- });
- },
- /**
- * Call on a form to intercept its submission event and
- * check the validity of the text input if present before submitting
- * the form.
- * Args:
- * textInput: the input to validate
- * messageElement: the element where the warning will be written
- */
- checkSubmission: function(textInput, messageElement) {
- var CHECKSUM_RE = /^([0-9a-f]{40}|[0-9a-f]{64})$/i;
- $(this).submit(function(event) {
- event.preventDefault();
- var q = textInput.val();
- if (q && !q.match(CHECKSUM_RE)) {
- messageElement.empty();
- messageElement.html('Please enter a valid SHA-1');
- } else {
- searchForm.submit();
- }
- });
- }
-});
-
-
-var nameList = []; /** Avoid adding the same file twice **/
-
-/**
- * Start reading the supplied files to hash them and add them to the form,
- * and add their names to the file lister pre-search.
- * Args:
- * myfiles: the file array
- * fileLister: the element that will receive the file names
- * searchForm: the form to which we add hidden inputs with the
- * correct values
- */
-function handleFiles(myfiles, fileLister, searchForm) {
- for (var i = 0; i < myfiles.length; i++) {
- var file = myfiles.item(i);
- if (nameList.indexOf(file.name) == -1) {
- nameList.push(file.name);
- var fr = new FileReader();
- fileLister.append(make_row(file.name));
- bind_reader(fr, file.name, searchForm);
- fr.readAsArrayBuffer(file);
- }
- }
-};
-
-/**
- * Bind a given FileReader to hash the file contents when the file
- * has been read
- * Args:
- * filereader: the FileReader object
- * filename: the name of the file being read by the FileReader
- * searchForm: the form the corresponding hidden input will be
- * appended to
- */
-function bind_reader(filereader, filename, searchForm) {
- filereader.onloadend = function(evt) {
- if (evt.target.readyState == FileReader.DONE){
- return fileReadDone(evt.target.result, filename, searchForm);
- }
- };
-}
-
-function make_row(name) {
- return "
"+name+"
";
-}
-
-/**
- * Hash the buffer contents with CryptoJS's SHA1 implementation, and
- * append the result to the given form for submission.
- * Args:
- * buffer: the buffer to be hashed
- * fname: the file name corresponding to the buffer
- * searchForm: the form the inputs should be appended to
- */
-function fileReadDone(buffer, fname, searchForm) {
- var wordArray = CryptoJS.lib.WordArray.create(buffer);
- var sha1 = CryptoJS.SHA1(wordArray);
- /**
- var git_hd = "blob " + wordArray.length + "\0";
- var git_Array = CryptoJS.enc.utf8.parse(git_hd).concat(wordArray);
- var sha256 = CryptoJS.SHA256(wordArray);
- var sha1_git = CryptoJS.SHA1(wordArray);
- **/
- searchForm.append($("", {type: "hidden",
- name: fname,
- value: sha1}
- ));
-}
diff --git a/swh/web/ui/static/js/search.js b/swh/web/ui/static/js/search.js
new file mode 100644
index 00000000..f551ed6c
--- /dev/null
+++ b/swh/web/ui/static/js/search.js
@@ -0,0 +1,242 @@
+/**
+ * Search page management
+ * Args:
+ * textForm: the form containing the text input, if any
+ * fileForm: the form containing the file input, if any
+ * messageElem: the element that should display search messages
+ */
+var SearchFormController = function(textForm, fileForm, messageElem)
+
+{
+ this.textForm = textForm;
+ this.fileForm = fileForm;
+ this.messageElem = messageElem;
+
+ // List of hashes to check against files being processed
+ this.hashed_already = {
+ 'sha1': {},
+ 'sha256': {},
+ 'sha1_git': {}
+ };
+ this.algos = ['sha1', 'sha256', 'sha1_git'];
+ this.CHECKSUM_RE = /^([0-9a-f]{40}|[0-9a-f]{64})$/i;
+ var self = this;
+
+ /**
+ * Show search messages on the page
+ * Args:
+ * msg: the message to show
+ */
+ this.searchMessage = function(msg) {
+ self.messageElem.empty();
+ self.messageElem.text(msg);
+ };
+
+ /**
+ * Setup the text field
+ * Args:
+ * textFormInput: the text form's input
+ */
+ this.setupTextForm = function(textFormInput) {
+ self.textForm.submit(function(event) {
+ var q = textFormInput.val();
+ if (!q) {
+ event.preventDefault();
+ self.searchMessage("Please enter a SHA-1 or SHA-256 checksum.");
+ }
+ else if (q && !q.match(self.CHECKSUM_RE)) {
+ event.preventDefault();
+ self.searchMessage("Invalid SHA-1 or SHA-256 checksum");
+ }
+ });
+ };
+
+ /**
+ * Setup the file drag&drop UI and hashing support.
+ * Args:
+ * fileDropElem: the element receptive to drag & drop
+ * hashedListerElem: the element that receives the hased file descriptions
+ * fileFormInput: the input that actually receives files
+ * clearButton: the button used to clear currently hashed files
+ */
+ this.setupFileForm = function(fileDropElem, hashedListerElem, fileFormInput, clearButton) {
+ if (!FileReader || !CryptoJS) {
+ self.searchMessage("Client-side file hashing is not available for your browser.");
+ return;
+ }
+
+ // Enable clicking on the text element for file picker
+ fileDropElem.click(function(event) {
+ event.preventDefault();
+ fileFormInput.click();
+ });
+
+ // Enable drag&drop
+ var makeDroppable = function(fileReceptionElt) {
+ var fileshovering = false;
+
+ fileReceptionElt.on('dragover', function(event) {
+ event.stopPropagation();
+ event.preventDefault();
+ });
+
+ fileReceptionElt.on('dragenter', function(event) {
+ event.stopPropagation();
+ event.preventDefault();
+ if (!fileshovering) {
+ fileReceptionElt.css("border-style", "solid");
+ fileReceptionElt.css("box-shadow", "inset 0 3px 4px");
+ fileshovering = true;
+ }
+ });
+
+ fileReceptionElt.on('dragover', function(event) {
+ event.stopPropagation();
+ event.preventDefault();
+ if (!fileshovering) {
+ fileReceptionElt.css("border-style", "solid");
+ fileReceptionElt.css("box-shadow", "inset 0 3px 4px");
+ fileshovering = true;
+ }
+ });
+
+ fileReceptionElt.on('dragleave', function(event) {
+ event.stopPropagation();
+ event.preventDefault();
+ if (fileshovering) {
+ fileReceptionElt.css("border-style", "dashed");
+ fileReceptionElt.css("box-shadow", "none");
+ fileshovering = false;
+ }
+ });
+
+ fileReceptionElt.on('drop', function(event) {
+ event.stopPropagation();
+ event.preventDefault();
+ if (fileshovering) {
+ fileReceptionElt.css("border-style", "dashed");
+ fileReceptionElt.css("box-shadow", "none");
+ fileshovering = false;
+ }
+ var myfiles = event.originalEvent.dataTransfer.files;
+ readAndHash(myfiles);
+ });
+ };
+ makeDroppable(fileDropElem);
+
+ // Connect input change and rehash
+ var makeInputChange = function(fileInput) {
+ return fileInput.each(function() {
+ $(this).on('change', function(){
+ readAndHash(this.files);
+ });
+ });
+ };
+ makeInputChange(fileFormInput);
+
+ // Connect clear button
+ var makeClearButton = function(button) {
+ return button.each(function() {
+ $(this).click(function(event) {
+ event.preventDefault();
+ hashedListerElem.empty();
+ self.fileForm.children('.search-hidden').remove();
+ self.hashed_already = {
+ 'sha1': {},
+ 'sha256': {},
+ 'sha1_git': {}
+ };
+ });
+ });
+ };
+ makeClearButton(clearButton);
+
+ var readAndHash = function(filelist) {
+ for (var file_idx = 0; file_idx < filelist.length; file_idx++) {
+ var file = filelist.item(file_idx);
+ var fr = new FileReader();
+ bindReader(fr, file.name);
+ fr.readAsArrayBuffer(file);
+ }
+ };
+
+ var bindReader = function(freader, fname) {
+ freader.onloadend = function(event) {
+ if (event.target.readyState == FileReader.DONE)
+ return dedupAndAdd(event.target.result, fname);
+ else
+ return null;
+ };
+ };
+
+ /**
+ * Hash the buffer with SHA-1, SHA-1_GIT, SHA-256
+ * Args:
+ * buffer: the buffer to hash
+ * fname: the file name corresponding to the buffer
+ * Returns:
+ * a dict of algo_hash: hash
+ */
+ var hashBuffer = function (buffer, fname) {
+ function str2ab(header) {
+ var buf = new ArrayBuffer(header.length);
+ var view = new Uint8Array(buf); // byte view, all we need is ASCII
+ for (var idx = 0, len=header.length; idx < len; idx++)
+ view[idx] = header.charCodeAt(idx);
+ return buf;
+ }
+
+ var content_array = CryptoJS.lib.WordArray.create(buffer);
+ var git_hd_str = 'blob ' + buffer.byteLength + '\0';
+ var git_hd_buffer = str2ab(git_hd_str);
+ var git_hd_array = CryptoJS.lib.WordArray.create(git_hd_buffer);
+
+ var sha1 = CryptoJS.SHA1(content_array);
+ var sha256 = CryptoJS.SHA256(content_array);
+ var sha1_git = CryptoJS.SHA1(git_hd_array.concat(content_array));
+ return {
+ 'sha1': sha1 + '',
+ 'sha256': sha256 + '',
+ 'sha1_git': sha1_git + ''
+ };
+ };
+
+ /**
+ * Hash the buffer and add it to the form if it is unique
+ * If not, display which file has the same content
+ * Args:
+ * buffer: the buffer to hash
+ * fname: the file name corresponding to the buffer
+ */
+ var dedupAndAdd = function(buffer, fname) {
+ var hashes = hashBuffer(buffer);
+ var has_duplicate = false;
+ for (var algo_s in hashes) {
+ if (self.hashed_already[algo_s][hashes[algo_s]] != undefined) {
+ // Duplicate content -- fileLister addition only, as duplicate
+ hashedListerElem.append($('
')
+ .addClass('span3')
+ .text(fname + ': duplicate of ' + self.hashed_already[algo_s][hashes[algo_s]]));
+ has_duplicate = true;
+ break;
+ }
+ }
+ // First file read with this content -- fileLister and form addition
+ if (!has_duplicate) {
+ // Add to hashed list
+ for (var algo_c in self.hashed_already)
+ self.hashed_already[algo_c][hashes[algo_c]] = fname;
+ hashedListerElem.append($('
- {% endif %}
+ {% for key in revision.keys() %}
+ {% if key in ['type', 'synthetic'] and key not in ['decoding_failures'] and revision[key] is not none %}
+
+
{{ key }}
+
{{ revision[key] }}
+
+ {% endif %}
+ {% endfor %}
- {% if revision['message'] is not none %}
-
- {% endif %}
-
- {% for key in revision.keys() %}
- {% if key in ['type', 'synthetic'] and key not in ['decoding_failures'] and revision[key] is not none %}
-
-
{{ key }}
-
{{ revision[key] }}
-
- {% endif %}
+ {% for key in ['children_urls', 'parent_urls'] %}
+ {% if revision[key] is not none %}
+