diff --git a/swh/indexer/ctags.py b/swh/indexer/ctags.py index 098abab..6b27d66 100644 --- a/swh/indexer/ctags.py +++ b/swh/indexer/ctags.py @@ -1,116 +1,57 @@ # Copyright (C) 2015-2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import click -import logging -import os import subprocess +import json -from swh.core import utils -# maximum number of detailed warnings for malformed tags that will be emitted. -# used to avoid flooding logs -BAD_TAGS_THRESHOLD = 5 - -# Option used to generate the tag file -CTAGS_FLAGS = [ - '--excmd=number', - '--fields=+lnz', - '--sort=no', - '--links=no' +# Options used to compute tags +__FLAGS = [ + '--fields=+lnz', # +l: language of source file containing tag + # +n: line number of tag definition + # +z: include the symbol's kind (function, variable, ...) + '--sort=no', # sort output on tag name + '--links=no', # do not follow symlinks + '--output-format=json', # outputs in json ] -# debian: 'sudo update-alternatives --config ctags' and choose -# ctags-exuberant - def run_ctags(path, lang=None): """Run ctags on file path with optional language. Args: path: path to the file lang: language for that path (optional) Returns: - ctags' filepath + ctags' output """ - ctagsfilepath = path + '.tags' optional = [] # if lang: # optional = ['--language-force', lang] - filename = os.path.basename(path) - cmd = ['ctags'] + CTAGS_FLAGS + optional + ['-o', ctagsfilepath, filename] - - with utils.cwd(os.path.dirname(path)): - subprocess.check_call(cmd) - - return ctagsfilepath - - -def parse_ctags(path): - """Parse exuberant ctags tags file. - - Args: - path: Path to the ctag file - - Yields: - For each tag, a tag dictionary with the keys: - - tag: 'TAG_NAME', - - path: 'PATH/WITH/IN/PACKAGE', - - line: LINE_NUMBER, # int - - kind: 'TAG_KIND', # 1 letter - - language: 'TAG_LANGUAGE', - - """ - def parse_tag(line): - tag = {'kind': None, 'line': None, 'language': None} - # initialize with extension fields which are not guaranteed to exist - fields = line.rstrip().split('\t') - - tag['tag'] = fields[0] - tag['path'] = fields[1] - - for ext in fields[3:]: # parse extension fields - k, v = ext.split(':', 1) # caution: "typeref:struct:__RAW_R_INFO" - if k == 'kind': - tag['kind'] = v - elif k == 'line': - tag['line'] = int(v) - elif k == 'language': - tag['language'] = v.lower() - else: - pass # ignore other fields - assert tag['line'] is not None - return tag + cmd = ['ctags'] + __FLAGS + optional + [path] + output = subprocess.check_output(cmd, universal_newlines=True) - with open(path) as ctags: - bad_tags = 0 - for line in ctags: - # e.g. 'music\tsound.c\t13;"\tkind:v\tline:13\tlanguage:C\tfile:\n' - # see CTAGS(1), section "TAG FILE FORMAT" - if line.startswith('!_TAG'): # skip ctags metadata - continue - try: - yield parse_tag(line) - except: - bad_tags += 1 - if bad_tags <= BAD_TAGS_THRESHOLD: - logging.warn('ignore malformed tag "%s"' % line.rstrip()) - if bad_tags > BAD_TAGS_THRESHOLD: - logging.warn('%d extra malformed tag(s) ignored' % - (bad_tags - BAD_TAGS_THRESHOLD)) + for symbol in output.split('\n'): + if not symbol: + continue + js_symbol = json.loads(symbol) + yield { + k: v for k, v in js_symbol.items() if k != '_type' and k != 'path' + } @click.command() @click.option('--path', help="Path to execute index on") def main(path): - r = list(parse_ctags(path)) + r = list(run_ctags(path)) print(r) if __name__ == '__main__': main()