diff --git a/debian/control b/debian/control index a04a0c64..70c599b3 100644 --- a/debian/control +++ b/debian/control @@ -1,42 +1,42 @@ Source: swh-web Maintainer: Software Heritage developers Section: python Priority: optional Build-Depends: curl, debhelper (>= 9), dh-python (>= 2), python3-all, python3-bs4, python3-django (>= 1.10.7~), python3-djangorestframework (>= 3.4.0~), python3-django-webpack-loader, python3-django-js-reverse, python3-docutils, python3-htmlmin, python3-magic (>= 0.3.0~), python3-lxml, python3-nose, python3-pygments, python3-pypandoc, python3-setuptools, python3-sphinx, python3-sphinxcontrib.httpdomain, python3-yaml, python3-swh.core (>= 0.0.40~), - python3-swh.model (>= 0.0.23~), + python3-swh.model (>= 0.0.24~), python3-swh.storage (>= 0.0.101~), python3-swh.indexer.storage (>= 0.0.52~), python3-swh.vault (>= 0.0.20~) Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/diffusion/DWUI/ Package: python3-swh.web Architecture: all Depends: python3-swh.core (>= 0.0.40~), - python3-swh.model (>= 0.0.23~), + python3-swh.model (>= 0.0.24~), python3-swh.storage (>= 0.0.101~), python3-swh.indexer.storage (>= 0.0.52~), python3-swh.vault (>= 0.0.20~), ${misc:Depends}, ${python3:Depends} Description: Software Heritage Web Applications diff --git a/requirements-swh.txt b/requirements-swh.txt index 4e37e849..43d0fe02 100644 --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,5 +1,5 @@ swh.core >= 0.0.40 -swh.model >= 0.0.23 +swh.model >= 0.0.24 swh.storage >= 0.0.101 swh.vault >= 0.0.20 swh.indexer.storage >= 0.0.52 diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py index 11ffe2f5..e9d58c63 100644 --- a/swh/web/common/utils.py +++ b/swh/web/common/utils.py @@ -1,296 +1,284 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import re from datetime import datetime, timezone from dateutil import parser as date_parser from dateutil import tz from django.core.cache import cache from django.core import urlresolvers from django.http import QueryDict -from swh.model.fields.hashes import validate_sha1_git +from swh.model.exceptions import ValidationError +from swh.model.identifiers import persistent_identifier from swh.web.common import service from swh.web.common.exc import BadInputExc def reverse(viewname, args=None, kwargs=None, query_params=None, current_app=None, urlconf=None): """An override of django reverse function supporting query parameters. Args: viewname: the name of the django view from which to compute a url args: list of url arguments ordered according to their position it kwargs: dictionary of url arguments indexed by their names query_params: dictionary of query parameters to append to the reversed url current_app: the name of the django app tighted to the view urlconf: url configuration module Returns: The url of the requested view with processed arguments and query parameters """ if kwargs: kwargs = {k: v for k, v in kwargs.items() if v is not None} url = urlresolvers.reverse( viewname, urlconf=urlconf, args=args, kwargs=kwargs, current_app=current_app) if query_params: query_params = {k: v for k, v in query_params.items() if v is not None} if query_params and len(query_params) > 0: query_dict = QueryDict('', mutable=True) for k in sorted(query_params.keys()): query_dict[k] = query_params[k] url += ('?' + query_dict.urlencode(safe='/')) return url def fmap(f, data): """Map f to data at each level. This must keep the origin data structure type: - map -> map - dict -> dict - list -> list - None -> None Args: f: function that expects one argument. data: data to traverse to apply the f function. list, map, dict or bare value. Returns: The same data-structure with modified values by the f function. """ if data is None: return data if isinstance(data, map): return map(lambda y: fmap(f, y), (x for x in data)) if isinstance(data, list): return [fmap(f, x) for x in data] if isinstance(data, dict): return {k: fmap(f, v) for (k, v) in data.items()} return f(data) def datetime_to_utc(date): """Returns datetime in UTC without timezone info Args: date (datetime.datetime): input datetime with timezone info Returns: datetime.datime: datetime in UTC without timezone info """ if date.tzinfo: return date.astimezone(tz.gettz('UTC')).replace(tzinfo=timezone.utc) else: return date def parse_timestamp(timestamp): """Given a time or timestamp (as string), parse the result as UTC datetime. Returns: a timezone-aware datetime representing the parsed value. None if the parsing fails. Samples: - 2016-01-12 - 2016-01-12T09:19:12+0100 - Today is January 1, 2047 at 8:21:00AM - 1452591542 """ if not timestamp: return None try: date = date_parser.parse(timestamp, ignoretz=False, fuzzy=True) return datetime_to_utc(date) except Exception: try: return datetime.utcfromtimestamp(float(timestamp)).replace( tzinfo=timezone.utc) except (ValueError, OverflowError) as e: raise BadInputExc(e) def shorten_path(path): """Shorten the given path: for each hash present, only return the first 8 characters followed by an ellipsis""" sha256_re = r'([0-9a-f]{8})[0-9a-z]{56}' sha1_re = r'([0-9a-f]{8})[0-9a-f]{32}' ret = re.sub(sha256_re, r'\1...', path) return re.sub(sha1_re, r'\1...', ret) def format_utc_iso_date(iso_date, fmt='%d %B %Y, %H:%M UTC'): """Turns a string reprensation of an ISO 8601 date string to UTC and format it into a more human readable one. For instance, from the following input string: '2017-05-04T13:27:13+02:00' the following one is returned: '04 May 2017, 11:27 UTC'. Custom format string may also be provided as parameter Args: iso_date (str): a string representation of an ISO 8601 date fmt (str): optional date formatting string Returns: A formatted string representation of the input iso date """ if not iso_date: return iso_date date = parse_timestamp(iso_date) return date.strftime(fmt) def gen_path_info(path): """Function to generate path data navigation for use with a breadcrumb in the swh web ui. For instance, from a path /folder1/folder2/folder3, it returns the following list:: [{'name': 'folder1', 'path': 'folder1'}, {'name': 'folder2', 'path': 'folder1/folder2'}, {'name': 'folder3', 'path': 'folder1/folder2/folder3'}] Args: path: a filesystem path Returns: A list of path data for navigation as illustrated above. """ path_info = [] if path: sub_paths = path.strip('/').split('/') path_from_root = '' for p in sub_paths: path_from_root += '/' + p path_info.append({'name': p, 'path': path_from_root.strip('/')}) return path_info def get_origin_visits(origin_info): """Function that returns the list of visits for a swh origin. That list is put in cache in order to speedup the navigation in the swh web browse ui. Args: origin_id (int): the id of the swh origin to fetch visits from Returns: A list of dict describing the origin visits:: [{'date': , 'origin': , 'status': <'full' | 'partial'>, 'visit': }, ... ] Raises: NotFoundExc if the origin is not found """ cache_entry_id = 'origin_%s_visits' % origin_info['id'] cache_entry = cache.get(cache_entry_id) if cache_entry: return cache_entry origin_visits = [] per_page = service.MAX_LIMIT last_visit = None while 1: visits = list(service.lookup_origin_visits(origin_info['id'], last_visit=last_visit, per_page=per_page)) origin_visits += visits if len(visits) < per_page: break else: if not last_visit: last_visit = per_page else: last_visit += per_page def _visit_sort_key(visit): ts = parse_timestamp(visit['date']).timestamp() return ts + (float(visit['visit']) / 10e3) for v in origin_visits: if 'metadata' in v: del v['metadata'] origin_visits = [dict(t) for t in set([tuple(d.items()) for d in origin_visits])] origin_visits = sorted(origin_visits, key=lambda v: _visit_sort_key(v)) cache.set(cache_entry_id, origin_visits) return origin_visits def get_swh_persistent_id(object_type, object_id, scheme_version=1): """ Returns the persistent identifier for a swh object based on: * the object type * the object id * the swh identifiers scheme version Args: object_type (str): the swh object type (content/directory/release/revision/snapshot) object_id (str): the swh object id (hexadecimal representation of its hash value) - schem_version (int): the scheme version of the swh + scheme_version (int): the scheme version of the swh persistent identifiers - Returns: str: the swh object persistent identifier + Returns: + str: the swh object persistent identifier Raises: BadInputExc if the provided parameters do not enable to generate a valid identifier """ - swh_id = 'swh:%s:' % scheme_version - if object_type == 'content': - swh_id += 'cnt:' - elif object_type == 'directory': - swh_id += 'dir:' - elif object_type == 'release': - swh_id += 'rel:' - elif object_type == 'revision': - swh_id += 'rev:' - elif object_type == 'snapshot': - swh_id += 'snp:' - else: - raise BadInputExc('Invalid object type (%s) for swh persistent id' % - object_type) try: - validate_sha1_git(object_id) - swh_id += object_id - except Exception: - raise BadInputExc('Invalid object id (%s) for swh persistent id' % - object_id) - return swh_id + swh_id = persistent_identifier(object_type, object_id, scheme_version) + except ValidationError as e: + raise BadInputExc('Invalid object (%s) for swh persistent id. %s' % + (object_id, e)) + else: + return swh_id diff --git a/swh/web/tests/common/test_utils.py b/swh/web/tests/common/test_utils.py index 2dfb070b..53d81a1f 100644 --- a/swh/web/tests/common/test_utils.py +++ b/swh/web/tests/common/test_utils.py @@ -1,159 +1,159 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime import unittest from nose.tools import istest from unittest.mock import patch from swh.web.common import utils from swh.web.common.exc import BadInputExc class UtilsTestCase(unittest.TestCase): @istest def shorten_path_noop(self): noops = [ '/api/', '/browse/', '/content/symbol/foobar/' ] for noop in noops: self.assertEqual( utils.shorten_path(noop), noop ) @istest def shorten_path_sha1(self): sha1 = 'aafb16d69fd30ff58afdd69036a26047f3aebdc6' short_sha1 = sha1[:8] + '...' templates = [ '/api/1/content/sha1:%s/', '/api/1/content/sha1_git:%s/', '/api/1/directory/%s/', '/api/1/content/sha1:%s/ctags/', ] for template in templates: self.assertEqual( utils.shorten_path(template % sha1), template % short_sha1 ) @istest def shorten_path_sha256(self): sha256 = ('aafb16d69fd30ff58afdd69036a26047' '213add102934013a014dfca031c41aef') short_sha256 = sha256[:8] + '...' templates = [ '/api/1/content/sha256:%s/', '/api/1/directory/%s/', '/api/1/content/sha256:%s/filetype/', ] for template in templates: self.assertEqual( utils.shorten_path(template % sha256), template % short_sha256 ) @istest def parse_timestamp(self): input_timestamps = [ None, '2016-01-12', '2016-01-12T09:19:12+0100', 'Today is January 1, 2047 at 8:21:00AM', '1452591542', ] output_dates = [ None, datetime.datetime(2016, 1, 12, 0, 0), datetime.datetime(2016, 1, 12, 8, 19, 12, tzinfo=datetime.timezone.utc), datetime.datetime(2047, 1, 1, 8, 21), datetime.datetime(2016, 1, 12, 9, 39, 2, tzinfo=datetime.timezone.utc), ] for ts, exp_date in zip(input_timestamps, output_dates): self.assertEquals(utils.parse_timestamp(ts), exp_date) @istest def format_utc_iso_date(self): self.assertEqual(utils.format_utc_iso_date('2017-05-04T13:27:13+02:00'), # noqa '04 May 2017, 11:27 UTC') @istest def gen_path_info(self): input_path = '/home/user/swh-environment/swh-web/' expected_result = [ {'name': 'home', 'path': 'home'}, {'name': 'user', 'path': 'home/user'}, {'name': 'swh-environment', 'path': 'home/user/swh-environment'}, {'name': 'swh-web', 'path': 'home/user/swh-environment/swh-web'} ] path_info = utils.gen_path_info(input_path) self.assertEquals(path_info, expected_result) input_path = 'home/user/swh-environment/swh-web' path_info = utils.gen_path_info(input_path) self.assertEquals(path_info, expected_result) @patch('swh.web.common.utils.service') @istest def get_origin_visits(self, mock_service): mock_service.MAX_LIMIT = 2 def _lookup_origin_visits(*args, **kwargs): if kwargs['last_visit'] is None: return [{'visit': 1, 'date': '2017-05-06T00:59:10+00:00', 'metadata': {}}, {'visit': 2, 'date': '2017-08-06T00:59:10+00:00', 'metadata': {}} ] else: return [{'visit': 3, 'date': '2017-09-06T00:59:10+00:00', 'metadata': {}} ] mock_service.lookup_origin_visits.side_effect = _lookup_origin_visits origin_info = { 'id': 1, 'type': 'git', 'url': 'https://github.com/foo/bar', } origin_visits = utils.get_origin_visits(origin_info) self.assertEqual(len(origin_visits), 3) @istest def get_swh_persisent_id(self): swh_object_type = 'content' sha1_git = 'aafb16d69fd30ff58afdd69036a26047f3aebdc6' expected_swh_id = 'swh:1:cnt:' + sha1_git self.assertEqual(utils.get_swh_persistent_id(swh_object_type, sha1_git), # noqa expected_swh_id) with self.assertRaises(BadInputExc) as cm: utils.get_swh_persistent_id('foo', sha1_git) - self.assertIn('Invalid object type', cm.exception.args[0]) + self.assertIn('Invalid object', cm.exception.args[0]) with self.assertRaises(BadInputExc) as cm: utils.get_swh_persistent_id(swh_object_type, 'not a valid id') - self.assertIn('Invalid object id', cm.exception.args[0]) + self.assertIn('Invalid object', cm.exception.args[0])