diff --git a/swh/web/ui/converters.py b/swh/web/ui/converters.py index ab9a338c..170b922f 100644 --- a/swh/web/ui/converters.py +++ b/swh/web/ui/converters.py @@ -1,187 +1,180 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.core import hashutil - - -def fmap(f, data): - if isinstance(data, list): - return [f(x) for x in data] - if isinstance(data, dict): - return {k: f(v) for (k, v) in data.items()} - return f(data) +from swh.web.ui import utils def from_swh(dict_swh, hashess={}, bytess={}, blacklist={}, convert={}, convert_fn=lambda x: x): """Convert from an swh dictionary to something reasonably json serializable. Args: - dict_swh: the origin dictionary needed to be transformed - hashess: list/set of keys representing hashes values (sha1, sha256, sha1_git, etc...) as bytes. Those need to be transformed in hexadecimal string - bytess: list/set of keys representing bytes values which needs to be decoded - blacklist: set of keys to filter out from the conversion - convert: set of keys whose associated values need to be converted using convert_fn - convert_fn: the conversion function to apply on the value of key in 'convert' The remaining keys are copied as is in the output. Returns: dictionary equivalent as dict_swh only with its keys `converted`. """ def convert_hashes_bytes(v): """v is supposedly a hash as bytes, returns it converted in hex. """ if v and isinstance(v, bytes): return hashutil.hash_to_hex(v) return v def convert_bytes(v): """v is supposedly a bytes string, decode as utf-8. FIXME: Improve decoding policy. If not utf-8, break! """ if v and isinstance(v, bytes): return v.decode('utf-8') return v if not dict_swh: return dict_swh new_dict = {} for key, value in dict_swh.items(): if key in blacklist: continue elif isinstance(value, dict): new_dict[key] = from_swh(value, hashess, bytess, blacklist, convert, convert_fn) elif key in hashess: - new_dict[key] = fmap(convert_hashes_bytes, value) + new_dict[key] = utils.fmap(convert_hashes_bytes, value) elif key in bytess: - new_dict[key] = fmap(convert_bytes, value) + new_dict[key] = utils.fmap(convert_bytes, value) elif key in convert: new_dict[key] = convert_fn(value) else: new_dict[key] = value return new_dict def from_origin(origin): """Convert from an SWH origin to an origin dictionary. """ return from_swh(origin, hashess=set(['revision']), bytess=set(['path'])) def from_release(release): """Convert from an SWH release to a json serializable release dictionary. Args: release: Dict with the following keys - id: identifier of the revision (sha1 in bytes) - revision: identifier of the revision the release points to (sha1 in bytes) - comment: release's comment message (bytes) - name: release's name (string) - author: release's author identifier (swh's id) - synthetic: the synthetic property (boolean) Returns: Release dictionary with the following keys: - id: hexadecimal sha1 (string) - revision: hexadecimal sha1 (string) - comment: release's comment message (string) - name: release's name (string) - author: release's author identifier (swh's id) - synthetic: the synthetic property (boolean) """ return from_swh(release, hashess=set(['id', 'target']), bytess=set(['message', 'name', 'email'])) def from_revision(revision): """Convert from an SWH revision to a json serializable revision dictionary. Args: revision: Dict with the following keys - id: identifier of the revision (sha1 in bytes) - directory: identifier of the directory the revision points to (sha1 in bytes) - author_name, author_email: author's revision name and email - committer_name, committer_email: committer's revision name and email - message: revision's message - date, date_offset: revision's author date - committer_date, committer_date_offset: revision's commit date - parents: list of parents for such revision - synthetic: revision's property nature - type: revision's type (git, tar or dsc at the moment) - metadata: if the revision is synthetic, this can reference dynamic properties. Returns: Revision dictionary with the same keys as inputs, only: - sha1s are in hexadecimal strings (id, directory) - bytes are decoded in string (author_name, committer_name, author_email, committer_email, message) - remaining keys are left as is """ return from_swh(revision, hashess=set(['id', 'directory', 'parents']), bytess=set(['name', 'email', 'message'])) def from_content(content): """Convert swh content to serializable content dictionary. """ return from_swh(content, hashess={'sha1', 'sha1_git', 'sha256'}, bytess={}, blacklist={}, convert={'status'}, convert_fn=lambda v: 'absent' if v == 'hidden' else v) def from_person(person): """Convert swh person to serializable person dictionary. """ return from_swh(person, hashess=set(), bytess=set(['name', 'email'])) def from_directory_entry(dir_entry): """Convert swh person to serializable person dictionary. """ return from_swh(dir_entry, hashess=set(['dir_id', 'sha1_git', 'sha1', 'sha256', 'target']), bytess=set(['name']), blacklist={}, convert={'status'}, convert_fn=lambda v: 'absent' if v == 'hidden' else v) diff --git a/swh/web/ui/tests/test_converters.py b/swh/web/ui/tests/test_converters.py index 48072701..48b06a4d 100644 --- a/swh/web/ui/tests/test_converters.py +++ b/swh/web/ui/tests/test_converters.py @@ -1,379 +1,378 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime import unittest from nose.tools import istest from swh.core import hashutil from swh.web.ui import converters class ConvertersTestCase(unittest.TestCase): - @istest def from_swh(self): some_input = { 'a': 'something', 'b': 'someone', 'c': b'sharp-0.3.4.tgz', 'd': hashutil.hex_to_hash( 'b04caf10e9535160d90e874b45aa426de762f19f'), 'e': b'sharp.html/doc_002dS_005fISREG.html', 'g': [b'utf-8-to-decode', b'another-one'], 'h': 'something filtered', 'i': {'e': b'something'}, 'j': { 'k': { 'l': [b'bytes thing', b'another thingy'], 'n': 'dont care either' }, 'm': 'dont care' }, 'o': 'something', 'p': 'bar', 'q': 'intact', 'r': {'p': 'also intact', 'q': 'bar'}, } expected_output = { 'a': 'something', 'b': 'someone', 'c': 'sharp-0.3.4.tgz', 'd': 'b04caf10e9535160d90e874b45aa426de762f19f', 'e': 'sharp.html/doc_002dS_005fISREG.html', 'g': ['utf-8-to-decode', 'another-one'], 'i': {'e': 'something'}, 'j': { 'k': { 'l': ['bytes thing', 'another thingy'] } }, 'p': 'foo', 'q': 'intact', 'r': {'p': 'also intact', 'q': 'foo'}, } def test_convert_fn(v): return 'foo' if v == 'bar' else v actual_output = converters.from_swh(some_input, hashess={'d', 'o'}, bytess={'c', 'e', 'g', 'l'}, blacklist={'h', 'm', 'n', 'o'}, convert={'p', 'q'}, convert_fn=test_convert_fn) self.assertEquals(expected_output, actual_output) @istest def from_swh_edge_cases_do_no_conversion_if_none_or_not_bytes(self): some_input = { 'a': 'something', 'b': None, 'c': 'someone', 'd': None, } expected_output = { 'a': 'something', 'b': None, 'c': 'someone', 'd': None, } actual_output = converters.from_swh(some_input, hashess={'a', 'b'}, bytess={'c', 'd'}) self.assertEquals(expected_output, actual_output) @istest def from_swh_empty(self): # when self.assertEquals({}, converters.from_swh({})) @istest def from_swh_none(self): # when self.assertIsNone(converters.from_swh(None)) @istest def from_origin(self): # given origin_input = { 'origin_type': 'ftp', 'origin_url': 'rsync://ftp.gnu.org/gnu/octave', 'branch': 'octave-3.4.0.tar.gz', 'revision': b'\xb0L\xaf\x10\xe9SQ`\xd9\x0e\x87KE\xaaBm\xe7b\xf1\x9f', # noqa 'path': b'octave-3.4.0/doc/interpreter/octave.html/doc_002dS_005fISREG.html' # noqa } expected_origin = { 'origin_type': 'ftp', 'origin_url': 'rsync://ftp.gnu.org/gnu/octave', 'branch': 'octave-3.4.0.tar.gz', 'revision': 'b04caf10e9535160d90e874b45aa426de762f19f', 'path': 'octave-3.4.0/doc/interpreter/octave.html/doc_002dS_005fISREG.html' # noqa } # when actual_origin = converters.from_origin(origin_input) # then self.assertEqual(actual_origin, expected_origin) @istest def from_release(self): release_input = { 'id': hashutil.hex_to_hash( 'aad23fa492a0c5fed0708a6703be875448c86884'), 'target': hashutil.hex_to_hash( '5e46d564378afc44b31bb89f99d5675195fbdf67'), 'target_type': 'revision', 'date': datetime.datetime(2015, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc), 'author': { 'name': b'author name', 'email': b'author@email', }, 'name': b'v0.0.1', 'message': b'some comment on release', 'synthetic': True, } expected_release = { 'id': 'aad23fa492a0c5fed0708a6703be875448c86884', 'target': '5e46d564378afc44b31bb89f99d5675195fbdf67', 'target_type': 'revision', 'date': datetime.datetime(2015, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc), 'author': { 'name': 'author name', 'email': 'author@email', }, 'name': 'v0.0.1', 'message': 'some comment on release', 'target_type': 'revision', 'synthetic': True, } # when actual_release = converters.from_release(release_input) # then self.assertEqual(actual_release, expected_release) @istest def from_release_no_revision(self): release_input = { 'id': hashutil.hex_to_hash( 'b2171ee2bdf119cd99a7ec7eff32fa8013ef9a4e'), 'target': None, 'date': datetime.datetime(2016, 3, 2, 10, 0, 0, tzinfo=datetime.timezone.utc), 'name': b'v0.1.1', 'message': b'comment on release', 'synthetic': False, 'author': { 'name': b'bob', 'email': b'bob@alice.net', }, } expected_release = { 'id': 'b2171ee2bdf119cd99a7ec7eff32fa8013ef9a4e', 'target': None, 'date': datetime.datetime(2016, 3, 2, 10, 0, 0, tzinfo=datetime.timezone.utc), 'name': 'v0.1.1', 'message': 'comment on release', 'synthetic': False, 'author': { 'name': 'bob', 'email': 'bob@alice.net', }, } # when actual_release = converters.from_release(release_input) # then self.assertEqual(actual_release, expected_release) @istest def from_revision(self): revision_input = { 'id': hashutil.hex_to_hash( '18d8be353ed3480476f032475e7c233eff7371d5'), 'directory': hashutil.hex_to_hash( '7834ef7e7c357ce2af928115c6c6a42b7e2a44e6'), 'author': { 'name': b'Software Heritage', 'email': b'robot@softwareheritage.org', }, 'committer': { 'name': b'Software Heritage', 'email': b'robot@softwareheritage.org', }, 'message': b'synthetic revision message', 'date': datetime.datetime(2000, 1, 17, 11, 23, 54, tzinfo=None), 'date_offset': 0, 'committer_date': datetime.datetime(2000, 1, 17, 11, 23, 54, tzinfo=None), 'committer_date_offset': 0, 'synthetic': True, 'type': 'tar', 'parents': [ hashutil.hex_to_hash( '29d8be353ed3480476f032475e7c244eff7371d5'), hashutil.hex_to_hash( '30d8be353ed3480476f032475e7c244eff7371d5') ], 'metadata': { 'original_artifact': [{ 'archive_type': 'tar', 'name': 'webbase-5.7.0.tar.gz', 'sha1': '147f73f369733d088b7a6fa9c4e0273dcd3c7ccd', 'sha1_git': '6a15ea8b881069adedf11feceec35588f2cfe8f1', 'sha256': '401d0df797110bea805d358b85bcc1ced29549d3d73f' '309d36484e7edf7bb912', }] }, } expected_revision = { 'id': '18d8be353ed3480476f032475e7c233eff7371d5', 'directory': '7834ef7e7c357ce2af928115c6c6a42b7e2a44e6', 'author': { 'name': 'Software Heritage', 'email': 'robot@softwareheritage.org', }, 'committer': { 'name': 'Software Heritage', 'email': 'robot@softwareheritage.org', }, 'message': 'synthetic revision message', 'date': datetime.datetime(2000, 1, 17, 11, 23, 54, tzinfo=None), 'date_offset': 0, 'committer_date': datetime.datetime(2000, 1, 17, 11, 23, 54, tzinfo=None), 'committer_date_offset': 0, 'parents': [ '29d8be353ed3480476f032475e7c244eff7371d5', '30d8be353ed3480476f032475e7c244eff7371d5' ], 'type': 'tar', 'synthetic': True, 'metadata': { 'original_artifact': [{ 'archive_type': 'tar', 'name': 'webbase-5.7.0.tar.gz', 'sha1': '147f73f369733d088b7a6fa9c4e0273dcd3c7ccd', 'sha1_git': '6a15ea8b881069adedf11feceec35588f2cfe8f1', 'sha256': '401d0df797110bea805d358b85bcc1ced29549d3d73f' '309d36484e7edf7bb912' }] }, } # when actual_revision = converters.from_revision(revision_input) # then self.assertEqual(actual_revision, expected_revision) @istest def from_content(self): content_input = { 'sha1': hashutil.hex_to_hash('5c6f0e2750f48fa0bd0c4cf5976ba0b9e0' '2ebda5'), 'sha256': hashutil.hex_to_hash('39007420ca5de7cb3cfc15196335507e' 'e76c98930e7e0afa4d2747d3bf96c926'), 'sha1_git': hashutil.hex_to_hash('40e71b8614fcd89ccd17ca2b1d9e66' 'c5b00a6d03'), 'data': b'data in bytes', 'length': 10, 'status': 'hidden', } # 'status' is filtered expected_content = { 'sha1': '5c6f0e2750f48fa0bd0c4cf5976ba0b9e02ebda5', 'sha256': '39007420ca5de7cb3cfc15196335507ee76c98930e7e0afa4d274' '7d3bf96c926', 'sha1_git': '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03', 'data': b'data in bytes', 'length': 10, 'status': 'absent', } # when actual_content = converters.from_content(content_input) # then self.assertEqual(actual_content, expected_content) @istest def from_person(self): person_input = { 'id': 10, 'anything': 'else', 'name': b'bob', 'email': b'bob@foo.alice', } expected_person = { 'id': 10, 'anything': 'else', 'name': 'bob', 'email': 'bob@foo.alice', } # when actual_person = converters.from_person(person_input) # then self.assertEqual(actual_person, expected_person) @istest def from_directory_entries(self): dir_entries_input = { 'sha1': hashutil.hex_to_hash('5c6f0e2750f48fa0bd0c4cf5976ba0b9e0' '2ebda5'), 'sha256': hashutil.hex_to_hash('39007420ca5de7cb3cfc15196335507e' 'e76c98930e7e0afa4d2747d3bf96c926'), 'sha1_git': hashutil.hex_to_hash('40e71b8614fcd89ccd17ca2b1d9e66' 'c5b00a6d03'), 'target': hashutil.hex_to_hash('40e71b8614fcd89ccd17ca2b1d9e66' 'c5b00a6d03'), 'dir_id': hashutil.hex_to_hash('40e71b8614fcd89ccd17ca2b1d9e66' 'c5b00a6d03'), 'name': b'bob', 'type': 10, 'status': 'hidden', } expected_dir_entries = { 'sha1': '5c6f0e2750f48fa0bd0c4cf5976ba0b9e02ebda5', 'sha256': '39007420ca5de7cb3cfc15196335507ee76c98930e7e0afa4d2747' 'd3bf96c926', 'sha1_git': '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03', 'target': '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03', 'dir_id': '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03', 'name': 'bob', 'type': 10, 'status': 'absent', } # when actual_dir_entries = converters.from_directory_entry(dir_entries_input) # then self.assertEqual(actual_dir_entries, expected_dir_entries) diff --git a/swh/web/ui/tests/test_utils.py b/swh/web/ui/tests/test_utils.py index 75b1ecc8..cad1dba3 100644 --- a/swh/web/ui/tests/test_utils.py +++ b/swh/web/ui/tests/test_utils.py @@ -1,174 +1,183 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from unittest.mock import patch from nose.tools import istest from swh.web.ui import utils class Rule(object): rule = "" endpoint = None methods = [] def __init__(self, rule, methods, endpoint): self.rule = rule self.endpoint = endpoint self.methods = methods class Map(object): _rules = [] def __init__(self, rules): self._rules = rules class UtilsTestCase(unittest.TestCase): def setUp(self): self.url_map = Map([Rule('/other/', methods=set(['GET', 'POST', 'HEAD']), endpoint='foo'), Rule('/some/old/url/', methods=set(['GET', 'POST']), endpoint='blablafn'), Rule('/other/old/url/', methods=set(['GET', 'HEAD']), endpoint='bar'), Rule('/other', methods=set([]), endpoint=None), Rule('/other2', methods=set([]), endpoint=None)]) @istest def filter_endpoints_1(self): # when actual_data = utils.filter_endpoints(self.url_map, '/some') # then self.assertEquals(actual_data, { '/some/old/url/': { 'methods': ['GET', 'POST'], 'endpoint': 'blablafn' } }) @istest def filter_endpoints_2(self): # when actual_data = utils.filter_endpoints(self.url_map, '/other', blacklist=['/other2']) # then # rules /other is skipped because its' exactly the prefix url # rules /other2 is skipped because it's blacklisted self.assertEquals(actual_data, { '/other/': { 'methods': ['GET', 'HEAD', 'POST'], 'endpoint': 'foo' }, '/other/old/url/': { 'methods': ['GET', 'HEAD'], 'endpoint': 'bar' } }) @patch('swh.web.ui.utils.flask') @istest def prepare_directory_listing(self, mock_flask): # given def mock_url_for(url_key, **kwds): if url_key == 'browse_directory': sha1_git = kwds['sha1_git'] return '/path/to/url/dir' + '/' + sha1_git else: sha1_git = kwds['q'] return '/path/to/url/file' + '/' + sha1_git mock_flask.url_for.side_effect = mock_url_for inputs = [{'type': 'dir', 'target': '123', 'name': 'some-dir-name'}, {'type': 'file', 'sha1': '654', 'name': 'some-filename'}, {'type': 'dir', 'target': '987', 'name': 'some-other-dirname'}] expected_output = [{'link': '/path/to/url/dir/123', 'name': 'some-dir-name', 'type': 'dir'}, {'link': '/path/to/url/file/654', 'name': 'some-filename', 'type': 'file'}, {'link': '/path/to/url/dir/987', 'name': 'some-other-dirname', 'type': 'dir'}] # when actual_outputs = utils.prepare_directory_listing(inputs) # then self.assertEquals(actual_outputs, expected_output) @istest def filter_field_keys_dict_unknown_keys(self): # when actual_res = utils.filter_field_keys( {'directory': 1, 'file': 2, 'link': 3}, {'directory1', 'file2'}) # then self.assertEqual(actual_res, {}) @istest def filter_field_keys_dict(self): # when actual_res = utils.filter_field_keys( {'directory': 1, 'file': 2, 'link': 3}, {'directory', 'link'}) # then self.assertEqual(actual_res, {'directory': 1, 'link': 3}) @istest def filter_field_keys_list_unknown_keys(self): # when actual_res = utils.filter_field_keys( [{'directory': 1, 'file': 2, 'link': 3}, {'1': 1, '2': 2, 'link': 3}], {'d'}) # then self.assertEqual(actual_res, [{}, {}]) @istest def filter_field_keys_list(self): # when actual_res = utils.filter_field_keys( [{'directory': 1, 'file': 2, 'link': 3}, {'dir': 1, 'fil': 2, 'lin': 3}], {'directory', 'dir'}) # then self.assertEqual(actual_res, [{'directory': 1}, {'dir': 1}]) @istest def filter_field_keys_other(self): # given inputSet = {1, 2} # when actual_res = utils.filter_field_keys(inputSet, {'a', '1'}) # then self.assertEqual(actual_res, inputSet) + + @istest + def fmap(self): + self.assertEquals([2, 3, 4], + utils.fmap(lambda x: x+1, [1, 2, 3])) + self.assertEquals({'a': 2, 'b': 4}, + utils.fmap(lambda x: x*2, {'a': 1, 'b': 2})) + self.assertEquals(100, + utils.fmap(lambda x: x*10, 10)) diff --git a/swh/web/ui/utils.py b/swh/web/ui/utils.py index af2ad412..fd754139 100644 --- a/swh/web/ui/utils.py +++ b/swh/web/ui/utils.py @@ -1,83 +1,91 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import flask def filter_endpoints(url_map, prefix_url_rule, blacklist=[]): """Filter endpoints by prefix url rule. Args: - url_map: Url Werkzeug.Map of rules - prefix_url_rule: prefix url string - blacklist: blacklist of some url Returns: Dictionary of url_rule with values methods and endpoint. The key is the url, the associated value is a dictionary of 'methods' (possible http methods) and 'endpoint' (python function) """ out = {} for r in url_map._rules: rule = r.rule if rule == prefix_url_rule or rule in blacklist: continue if rule.startswith(prefix_url_rule): out[r.rule] = {'methods': sorted(map(str, r.methods)), 'endpoint': r.endpoint} return out def prepare_directory_listing(files): """Given a list of dictionary files, return a view ready dictionary. """ ls = [] for entry in files: new_entry = {'name': entry['name'], 'type': entry['type']} if entry['type'] == 'dir': new_entry['link'] = flask.url_for('browse_directory', sha1_git=entry['target']) else: new_entry['link'] = flask.url_for('show_content', q=entry['sha1']) ls.append(new_entry) return ls def filter_field_keys(obj, field_keys): """Given an object instance (directory or list), and a csv field keys to filter on. Return the object instance with filtered keys. Note: Returns obj as is if it's an instance of types not in (dictionary, list) Args: - obj: one object (dictionary, list...) to filter. - field_keys: csv or set of keys to filter the object on Returns: obj filtered on field_keys """ if isinstance(obj, dict): filt_dict = {} for key, value in obj.items(): if key in field_keys: filt_dict[key] = value return filt_dict elif isinstance(obj, list): filt_list = [] for e in obj: filt_list.append(filter_field_keys(e, field_keys)) return filt_list return obj + + +def fmap(f, data): + if isinstance(data, list): + return [f(x) for x in data] + if isinstance(data, dict): + return {k: f(v) for (k, v) in data.items()} + return f(data)