diff --git a/ardumont/group_by_exception.py b/ardumont/group_by_exception.py index 1df2606..716bfab 100755 --- a/ardumont/group_by_exception.py +++ b/ardumont/group_by_exception.py @@ -1,95 +1,108 @@ #!/usr/bin/env python3 # Copyright (C) 2017 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # Use: # ./kibana_fetch_logs.py | tee temporary-error-file | \ # ./group_by_exception.py | jq > temporary-error-file-groupby-exception import ast import click import json import operator import re import sys from collections import defaultdict, OrderedDict LOADER_TYPES = ['git', 'svn'] def work_on_exception_msg(exception): exception_msg = None if exception.startswith('['): exception_msg = re.sub('\[.*\]', '', exception).lstrip() else: exception_msg = exception return exception_msg[0:50] def group_by(origin_types, loader_type): group = {ori_type: defaultdict(list) for ori_type in origin_types} if loader_type == 'svn': # args = ('path-to-archive', 'some-origin-url') origin_key_to_lookup = 1 elif loader_type == 'git': - # args = {'origin_url: 'some-origin-url} origin_key_to_lookup = 'origin_url' + seen = set() for line in sys.stdin: origin_type = None + origin_url = None line = line.strip() data = ast.literal_eval(line) + args = data['args'] for ori_type in origin_types: - args = data['args'] - if args and ori_type in args[origin_key_to_lookup]: - origin_type = ori_type + try: + if args and ori_type in args[origin_key_to_lookup]: + origin_type = ori_type + origin_url = args[origin_key_to_lookup] + break + except IndexError: # when something is wrong, just be the unknown + # origin_type break - if not origin_type: + if not origin_type: # corner case when we don't have the + # input parameters origin_type = 'unknown' + if origin_url: + if origin_url in seen: + continue + + seen.add(origin_url) + reworked_exception_msg = work_on_exception_msg(data['exception']) group[origin_type][reworked_exception_msg].append(data['args']) return group @click.command() @click.option('--origin-types', default=['gitorious', 'googlecode'], help='Default types of origin to lookup') @click.option('--loader-type', default='svn', help="Type of loader (git, svn)") def main(origin_types, loader_type): if loader_type not in LOADER_TYPES: raise ValueError('Bad input, loader type is one of %s' % LOADER_TYPES) origin_types = origin_types + ['unknown'] group = group_by(origin_types, loader_type) result = {} for ori_type in origin_types: _map = {} total = 0 for k, v in group[ori_type].items(): l = len(v) _map[k] = l total += l out = sorted(_map.items(), key=operator.itemgetter(1), reverse=True) result[ori_type] = { 'total': total, 'errors': OrderedDict(out), } print(json.dumps(result)) if __name__ == '__main__': main()