python.c
No OneTemporary
Actions

Size

29 KB

Subscribers

None

python.c
View Options

	/*
	* Copyright (c) 2000-2003, Darren Hiebert
	*
	* This source code is released for free distribution under the terms of the
	* GNU General Public License version 2 or (at your option) any later version.
	*
	* This module contains functions for generating tags for Python language
	* files.
	*/
	/*
	* INCLUDE FILES
	*/
	#include "general.h" /* must always come first */

	#include <string.h>
	#include <stdio.h>

	#include "entry.h"
	#include "nestlevel.h"
	#include "options.h"
	#include "read.h"
	#include "main.h"
	#include "vstring.h"
	#include "routines.h"
	#include "debug.h"
	#include "xtag.h"

	/*
	* DATA DECLARATIONS
	*/

	typedef enum {
	K_UNDEFINED = -1,
	K_CLASS, K_FUNCTION, K_MEMBER, K_VARIABLE, K_NAMESPACE, K_MODULE, K_UNKNOWN,
	} pythonKind;

	typedef enum {
	PYTHON_MODULE_IMPORTED,
	PYTHON_MODULE_NAMESPACE,
	PYTHON_MODULE_INDIRECTLY_IMPORTED,
	} pythonModuleRole;

	typedef enum {
	PYTHON_UNKNOWN_IMPORTED,
	PYTHON_UNKNOWN_INDIRECTLY_IMPORTED,
	} pythonUnknownRole;

	/*
	* DATA DEFINITIONS
	*/

	/* Roles releated to `import'
	* ==========================
	* import X X = (kind:module, role:imported)
	*
	* import X as Y X = (kind:module, role:indirectly-imported),
	* Y = (kind:namespace, [nameref:X])
	* ------------------------------------------------
	* Don't confuse with namespace role of module kind.
	*
	* from X import * X = (kind:module, role:namespace)
	*
	* from X import Y X = (kind:module, role:namespace),
	* Y = (kind:unknown, role:imported, [scope:X])
	*
	* from X import Y as Z X = (kind:module, role:namespace),
	* Y = (kind:unknown, role:indirectly-imported, [scope:X])
	* Z = (kind:unknown, [nameref:X.Y]) */

	static roleDesc PythonModuleRoles [] = {
	{ TRUE, "imported",
	"imported modules" },
	{ TRUE, "namespace",
	"namespace from where classes/variables/functions are imported" },
	{ TRUE, "indirectly-imported",
	"module imported in alternative name" },
	};

	static roleDesc PythonUnknownRoles [] = {
	{ TRUE, "imported", "imported from the other module" },
	{ TRUE, "indirectly-imported",
	"classes/variables/functions/modules imported in alternative name" },
	};

	static kindOption PythonKinds[] = {
	{TRUE, 'c', "class", "classes"},
	{TRUE, 'f', "function", "functions"},
	{TRUE, 'm', "member", "class members"},
	{TRUE, 'v', "variable", "variables"},
	{TRUE, 'I', "namespace", "name referring a module defined in other file"},
	{TRUE, 'i', "module", "modules",
	.referenceOnly = TRUE, ATTACH_ROLES(PythonModuleRoles)},
	{TRUE, 'x', "unknown", "name referring a classe/variable/function/module defined in other module",
	.referenceOnly = FALSE, ATTACH_ROLES(PythonUnknownRoles)},
	};

	typedef enum {
	A_PUBLIC, A_PRIVATE, A_PROTECTED
	} pythonAccess;

	static const char *const PythonAccesses[] = {
	"public", "private", "protected"
	};

	static char const * const singletriple = "'''";
	static char const * const doubletriple = "\"\"\"";

	/*
	* FUNCTION DEFINITIONS
	*/

	static boolean isIdentifierFirstCharacter (int c)
	{
	return (boolean) (isalpha (c) \|\| c == '_');
	}

	static boolean isIdentifierFirstCharacterCB (int c, void *dummy __unused__)
	{
	return isIdentifierFirstCharacter (c);
	}

	static boolean isModuleFirstCharacterCB (int c, void *dummy __unused__)
	{
	return (boolean) (isIdentifierFirstCharacter (c) \|\| c == '.');
	}

	static boolean isIdentifierCharacter (int c)
	{
	return (boolean) (isalnum (c) \|\| c == '_');
	}

	static boolean isModuleCharacter (int c)
	{
	return (boolean) (isIdentifierCharacter (c) \|\| c == '.');
	}

	/* follows PEP-8, and always reports single-underscores as protected
	* See:
	* - http://www.python.org/dev/peps/pep-0008/#method-names-and-instance-variables
	* - http://www.python.org/dev/peps/pep-0008/#designing-for-inheritance
	*/
	static pythonAccess accessFromIdentifier (const vString *const ident,
	pythonKind kind, boolean has_parent, boolean parent_is_class)
	{
	const char *const p = vStringValue (ident);
	const size_t len = vStringLength (ident);

	/* inside a function/method, private */
	if (has_parent && !parent_is_class)
	return A_PRIVATE;
	/* not starting with "_", public */
	else if (len < 1 \|\| p[0] != '_')
	return A_PUBLIC;
	/* "__...__": magic methods */
	else if (kind == K_MEMBER && parent_is_class &&
	len > 3 && p[1] == '_' && p[len - 2] == '_' && p[len - 1] == '_')
	return A_PUBLIC;
	/* "__...": name mangling */
	else if (parent_is_class && len > 1 && p[1] == '_')
	return A_PRIVATE;
	/* "_...": suggested as non-public, but easily accessible */
	else
	return A_PROTECTED;
	}

	static void addAccessFields (tagEntryInfo *const entry,
	const vString *const ident, pythonKind kind,
	boolean has_parent, boolean parent_is_class)
	{
	pythonAccess access;

	access = accessFromIdentifier (ident, kind, has_parent, parent_is_class);
	entry->extensionFields.access = PythonAccesses [access];
	/* FIXME: should we really set isFileScope in addition to access? */
	if (access == A_PRIVATE)
	entry->isFileScope = TRUE;
	}

	/* Given a string with the contents of a line directly after the "def" keyword,
	* extract all relevant information and create a tag.
	*/
	static void makeFunctionTagFull (tagEntryInfo tag, vString const function,
	vString const parent, int is_class_parent, const char arglist)
	{
	char scope_kind_letter = KIND_NULL;

	if (is_class_parent)
	{
	if (!PythonKinds[K_MEMBER].enabled)
	return;
	}
	else
	{
	if (!PythonKinds[K_FUNCTION].enabled)
	return;
	}

	tag->extensionFields.signature = arglist;

	if (vStringLength (parent) > 0)
	{
	if (is_class_parent)
	{
	tag->kind = &(PythonKinds[K_MEMBER]);
	tag->extensionFields.scopeKind = &(PythonKinds[K_CLASS]);
	tag->extensionFields.scopeName = vStringValue (parent);
	scope_kind_letter = PythonKinds[K_CLASS].letter;
	}
	else
	{
	tag->extensionFields.scopeKind = &(PythonKinds[K_FUNCTION]);
	tag->extensionFields.scopeName = vStringValue (parent);
	}
	}

	addAccessFields (tag, function, is_class_parent ? K_MEMBER : K_FUNCTION,
	vStringLength (parent) > 0, is_class_parent);

	makeTagEntry (tag);

	if ((scope_kind_letter != KIND_NULL)
	&& tag->extensionFields.scopeName)
	makeQualifiedTagEntry (tag);
	}

	static void makeFunctionTag (vString *const function,
	vString const parent, int is_class_parent, const char arglist)
	{
	tagEntryInfo tag;

	initTagEntry (&tag, vStringValue (function), &(PythonKinds[K_FUNCTION]));
	makeFunctionTagFull (&tag, function, parent, is_class_parent, arglist);
	}

	/* Given a string with the contents of the line directly after the "class"
	* keyword, extract all necessary information and create a tag.
	*/
	static void makeClassTag (vString const class, vString const inheritance,
	vString *const parent, int is_class_parent)
	{
	tagEntryInfo tag;

	if (! PythonKinds[K_CLASS].enabled)
	return;

	initTagEntry (&tag, vStringValue (class), &(PythonKinds[K_CLASS]));
	if (vStringLength (parent) > 0)
	{
	if (is_class_parent)
	{
	tag.extensionFields.scopeKind = &(PythonKinds[K_CLASS]);
	tag.extensionFields.scopeName = vStringValue (parent);
	}
	else
	{
	tag.extensionFields.scopeKind = &(PythonKinds[K_FUNCTION]);
	tag.extensionFields.scopeName = vStringValue (parent);
	}
	}
	tag.extensionFields.inheritance = vStringValue (inheritance);
	addAccessFields (&tag, class, K_CLASS, vStringLength (parent) > 0,
	is_class_parent);
	makeTagEntry (&tag);
	}

	static void makeVariableTag (vString const var, vString const parent,
	boolean is_class_parent)
	{
	tagEntryInfo tag;

	if (! PythonKinds[K_VARIABLE].enabled)
	return;

	initTagEntry (&tag, vStringValue (var), &(PythonKinds[K_VARIABLE]));
	if (vStringLength (parent) > 0)
	{
	tag.extensionFields.scopeKind = &(PythonKinds[K_CLASS]);
	tag.extensionFields.scopeName = vStringValue (parent);
	}
	addAccessFields (&tag, var, K_VARIABLE, vStringLength (parent) > 0,
	is_class_parent);
	makeTagEntry (&tag);
	}

	/* Skip a single or double quoted string. */
	static const char skipString (const char cp)
	{
	const char *start = cp;
	int escaped = 0;
	for (cp++; *cp; cp++)
	{
	if (escaped)
	escaped--;
	else if (*cp == '\\')
	escaped++;
	else if (cp == start)
	return cp + 1;
	}
	return cp;
	}

	static char const find_triple_start0(char const string)
	{
	if (strncmp(string, doubletriple, 3) == 0)
	return doubletriple;
	else if (strncmp(string, singletriple, 3) == 0)
	return singletriple;
	else
	return NULL;
	}

	static const char skipUntil (const char cp,
	const char **longStringLiteral,
	boolean (* isAcceptable) (int, void*),
	void *user_data)
	{
	int match;
	for (; *cp; cp++)
	{
	if (*cp == '#')
	return strchr(cp, '\0');

	match = 0;
	if (cp == '"' \|\| cp == '\'')
	{
	if (longStringLiteral)
	{
	*longStringLiteral = find_triple_start0 (cp);
	if (*longStringLiteral)
	return strchr(cp, '\0');
	}
	match = 1;
	}

	/* these checks find unicode, binary (Python 3) and raw strings */
	if (!match)
	{
	boolean r_first = (cp == 'r' \|\| cp == 'R');

	/* "r" \| "R" \| "u" \| "U" \| "b" \| "B" */
	if (r_first \|\| cp == 'u' \|\| cp == 'U' \|\| cp == 'b' \|\| cp == 'B')
	{
	unsigned int i = 1;

	/* r_first -> "rb" \| "rB" \| "Rb" \| "RB"
	!r_first -> "ur" \| "UR" \| "Ur" \| "uR" \| "br" \| "Br" \| "bR" \| "BR" */
	if (( r_first && (cp[i] == 'b' \|\| cp[i] == 'B')) \|\|
	(!r_first && (cp[i] == 'r' \|\| cp[i] == 'R')))
	i++;

	if (cp[i] == '\'' \|\| cp[i] == '"')
	{
	match = 1;
	cp += i;
	}
	}
	}
	if (match)
	{
	cp = skipString(cp);
	if (!*cp) break;
	}
	if (isAcceptable ((int) *cp, user_data))
	return cp;
	if (match)
	cp--; /* avoid jumping over the character after a skipped string */
	}
	return cp;
	}

	/* Skip everything up to an identifier start. */
	static const char skipToNextIdentifier (const char cp)
	{
	return skipUntil (cp, NULL, isIdentifierFirstCharacterCB, NULL);
	}

	/* Skip everything up to a module start. */
	static const char skipToNextModule (const char cp)
	{
	return skipUntil (cp, NULL, isModuleFirstCharacterCB, NULL);
	}


	/* Skip an identifier. */
	static const char skipIdentifier (const char cp)
	{
	while (isIdentifierCharacter ((int) *cp))
	cp++;
	return cp;
	}

	static const char findDefinitionOrClass (const char cp)
	{
	while (*cp)
	{
	cp = skipToNextIdentifier (cp);
	if (!strncmp(cp, "def", 3) \|\| !strncmp(cp, "class", 5) \|\|
	!strncmp(cp, "cdef", 4) \|\| !strncmp(cp, "cpdef", 5))
	{
	return cp;
	}
	cp = skipIdentifier (cp);
	}
	return NULL;
	}

	static const char skipSpace (const char cp)
	{
	while (isspace ((int) *cp))
	++cp;
	return cp;
	}

	/* Starting at ''cp'', parse an identifier into ''identifier''. */
	static const char parseIdentifier (const char cp, vString *const identifier)
	{
	vStringClear (identifier);
	while (isIdentifierCharacter ((int) *cp))
	{
	vStringPut (identifier, (int) *cp);
	++cp;
	}
	vStringTerminate (identifier);
	return cp;
	}

	static const char parseModule (const char cp, vString *const module)
	{
	vStringClear (module);
	while (isModuleCharacter (*cp))
	{
	vStringPut (module, (int) *cp);
	++cp;
	}
	vStringTerminate (module);
	return cp;
	}

	static void parseClass (const char cp, vString const class,
	vString *const parent, int is_class_parent)
	{
	vString *const inheritance = vStringNew ();
	vStringClear (inheritance);
	cp = parseIdentifier (cp, class);
	cp = skipSpace (cp);
	if (*cp == '(')
	{
	++cp;
	while (*cp != ')')
	{
	if (*cp == '\0')
	{
	/* Closing parenthesis can be in follow up line. */
	cp = (const char *) readLineFromInputFile ();
	if (!cp) break;
	vStringPut (inheritance, ' ');
	continue;
	}
	vStringPut (inheritance, *cp);
	++cp;
	}
	vStringTerminate (inheritance);
	}
	makeClassTag (class, inheritance, parent, is_class_parent);
	vStringDelete (inheritance);
	}

	static void parseImports (const char cp, const char from_module)
	{
	const char* cp_next;
	vString name, name_next, *fq;
	boolean maybe_multiline = FALSE;
	boolean found_multiline_end = FALSE;

	name = vStringNew ();
	name_next = vStringNew ();
	fq = vStringNew ();

	cp = skipSpace (cp);
	if (from_module && *cp == '(')
	{
	maybe_multiline = TRUE;
	++cp;
	}

	cp = skipToNextModule (cp);
	nextLine:
	while (*cp)
	{
	cp = parseModule (cp, name);
	cp = skipSpace (cp);
	if (*cp == ')')
	found_multiline_end = TRUE;
	cp = skipToNextModule (cp);
	cp_next = parseIdentifier (cp, name_next);

	if (strcmp (vStringValue (name_next), "as") == 0)
	{
	cp = skipToNextIdentifier (cp_next);
	cp = parseIdentifier (cp, name_next);
	if (from_module)
	{
	/* from x import Y as Z
	----------------------------
	x = (kind:module, role:namespace),
	Y = (kind:unknown, role:indirectly-imported),
	Z = (kind:unknown) */

	/* Y */
	makeSimpleRefTag (name, PythonKinds, K_UNKNOWN,
	PYTHON_UNKNOWN_INDIRECTLY_IMPORTED);
	/* x.Y */
	if (isXtagEnabled(XTAG_QUALIFIED_TAGS))
	{
	tagEntryInfo fqe;
	vStringCatS (fq, from_module);
	vStringPut (fq, '.');
	vStringCat (fq, name);
	initRefTagEntry (&fqe, vStringValue (fq), PythonKinds + K_UNKNOWN,
	PYTHON_UNKNOWN_INDIRECTLY_IMPORTED);
	markTagExtraBit (&fqe, XTAG_QUALIFIED_TAGS);
	makeTagEntry (&fqe);
	vStringClear(fq);
	}
	/* Z */
	makeSimpleTag (name_next, PythonKinds, K_UNKNOWN);
	}
	else
	{
	/* import x as Y
	----------------------------
	X = (kind:module, role:indirectly-imported)
	Y = (kind:namespace)*/
	/* X */
	makeSimpleRefTag (name, PythonKinds, K_MODULE,
	PYTHON_MODULE_INDIRECTLY_IMPORTED);
	/* Y */
	makeSimpleTag (name_next, PythonKinds, K_NAMESPACE);
	}

	cp = skipSpace (cp);
	if (*cp == ')')
	{
	found_multiline_end = TRUE;
	cp++;
	}
	cp = skipToNextIdentifier (cp);
	}
	else
	{
	if (from_module)
	{
	/* from x import Y
	--------------
	x = (kind:module, role:namespace),
	Y = (kind:unknown, role:imported) */
	/* Y */
	makeSimpleRefTag (name, PythonKinds, K_UNKNOWN,
	PYTHON_MODULE_IMPORTED);
	/* x.Y */
	if (isXtagEnabled(XTAG_QUALIFIED_TAGS))
	{
	tagEntryInfo fqe;
	vStringCatS (fq, from_module);
	vStringPut (fq, '.');
	vStringCat (fq, name);
	initRefTagEntry (&fqe, vStringValue (fq),
	PythonKinds + K_UNKNOWN,
	PYTHON_MODULE_IMPORTED);
	markTagExtraBit (&fqe, XTAG_QUALIFIED_TAGS);
	makeTagEntry (&fqe);
	vStringClear(fq);
	}
	}
	else
	{
	/* import X
	--------------
	X = (kind:module, role:imported) */
	makeSimpleRefTag (name, PythonKinds, K_MODULE,
	PYTHON_MODULE_IMPORTED);
	}
	/* Don't update cp. Start from the position of name_next. */
	}
	}

	if (maybe_multiline && (!found_multiline_end))
	{
	if ((cp = (const char *) readLineFromInputFile ()) != NULL)
	{
	cp = skipSpace (cp);
	if (*cp == ')')
	{
	cp++;
	found_multiline_end = TRUE;
	}
	else
	goto nextLine;
	}
	}

	vStringDelete (fq);
	vStringDelete (name);
	vStringDelete (name_next);
	}

	static void parseFromModule (const char cp, const char dummy __unused__)
	{
	vString *from_module;
	vString *import_keyword;

	/* from X import ...
	--------------------
	X = (kind:module, role:namespace) */

	from_module = vStringNew ();
	import_keyword = vStringNew ();

	cp = skipToNextModule (cp);
	cp = parseModule (cp, from_module);
	cp = skipToNextIdentifier (cp);
	cp = parseIdentifier (cp, import_keyword);

	if (strcmp (vStringValue (import_keyword), "import") == 0
	\|\| strcmp (vStringValue (import_keyword), "cimport") == 0)
	{
	makeSimpleRefTag (from_module, PythonKinds, K_MODULE,
	PYTHON_MODULE_NAMESPACE);
	parseImports (cp, vStringValue (from_module));
	}

	vStringDelete (import_keyword);
	vStringDelete (from_module);
	}


	static boolean parseNamespace (const char *cp)
	{
	void (* parse_sub) (const char , const char );

	cp = skipToNextIdentifier (cp);

	if (strncmp (cp, "import", 6) == 0)
	{
	cp += 6;
	parse_sub = parseImports;
	}
	else if (strncmp (cp, "cimport", 7) == 0)
	{
	cp += 7;
	parse_sub = parseImports;
	}
	else if (strncmp (cp, "from", 4) == 0)
	{
	cp += 4;
	parse_sub = parseFromModule;
	}
	else
	return FALSE;

	/* continue only if there is some space between the keyword and the identifier */
	if (! isspace (*cp))
	return FALSE;

	cp++;
	cp = skipSpace (cp);

	parse_sub (cp, NULL);
	return TRUE;
	}

	/* modified from get.c getArglistFromStr().
	* warning: terminates rest of string past arglist!
	* note: does not ignore brackets inside strings! */
	struct argParsingState
	{
	vString *arglist;
	int level;
	char const *longStringLiteral;
	};

	static boolean gatherArglistCB (int c, void *arglist)
	{
	if (arglist)
	{
	if ('\t' == c)
	c = ' ';

	if (vStringLast ((vString *)arglist) != ' '
	\|\| c != ' ')
	vStringPut ((vString *)arglist, c);
	}

	if (c == '(' \|\| c == ')')
	return TRUE;
	else
	return FALSE;
	}

	static boolean parseArglist(const char* buf, struct argParsingState *state)
	{
	const char start, current;

	start = buf;
	if (state->level == 0)
	{
	if (NULL == (start = strchr(buf, '(')))
	return FALSE;
	else
	{
	if (state->arglist)
	vStringPut (state->arglist, *start);
	state->level = 1;
	start += 1;
	}
	}


	do {
	current = skipUntil (start, &state->longStringLiteral,
	gatherArglistCB, state->arglist);
	switch (*current)
	{
	case '\0':
	break;
	case '(':
	++ state->level;
	break;
	case ')':
	-- state->level;
	break;
	}
	start = current + 1;
	} while (
	/* Still be in parenthesis */
	state->level > 0
	/* the input string is continued. */
	&& current && start
	);

	return TRUE;
	}

	static char const find_triple_end(char const string, char const **which,
	boolean dontRepeat);
	static void captureArguments (const char start, vString arglist)
	{
	struct argParsingState state;

	state.level = 0;
	state.arglist = arglist;
	state.longStringLiteral = NULL;

	while (start)
	{
	if (arglist == NULL && state.longStringLiteral)
	{
	start = find_triple_end(start, &state.longStringLiteral,
	TRUE);
	if (arglist == NULL && state.longStringLiteral)
	{
	start = (const char *) readLineFromInputFile ();
	continue;
	}
	}

	if (parseArglist (start, &state) == FALSE)
	/* No '(' is found: broken input */
	break;
	else if (state.level == 0)
	break;
	else
	start = (const char *) readLineFromInputFile ();
	}
	}

	static void skipParens (const char *start)
	{
	captureArguments (start, NULL);
	}

	static void parseFunction (const char cp, vString const def,
	vString *const parent, int is_class_parent)
	{
	tagEntryInfo tag;
	static vString *arglist;

	cp = parseIdentifier (cp, def);
	initTagEntry (&tag, vStringValue (def), &(PythonKinds[K_FUNCTION]));

	if (arglist)
	vStringClear (arglist);
	else
	arglist = vStringNew ();
	captureArguments (cp, arglist);
	makeFunctionTagFull (&tag, def, parent, is_class_parent, vStringValue (arglist));
	}

	/* Get the combined name of a nested symbol. Classes are separated with ".",
	* functions with "/". For example this code:
	* class MyClass:
	* def myFunction:
	* def SubFunction:
	* class SubClass:
	* def Method:
	* pass
	* Would produce this string:
	* MyClass.MyFunction/SubFunction/SubClass.Method
	*/
	static boolean constructParentString(NestingLevels *nls, int indent,
	vString *result)
	{
	int i;
	NestingLevel *prev = NULL;
	int is_class = FALSE;
	vStringClear (result);
	for (i = 0; i < nls->n; i++)
	{
	NestingLevel *nl = nls->levels + i;
	if (indent <= nl->indentation)
	break;
	if (prev)
	{
	vStringCatS(result, "."); /* make Geany symbol list grouping work properly */
	/*
	if (prev->kindIndex == K_CLASS)
	vStringCatS(result, ".");
	else
	vStringCatS(result, "/");
	*/
	}
	vStringCat(result, nl->name);
	is_class = (nl->kindIndex == K_CLASS);
	prev = nl;
	}
	return is_class;
	}

	/* Check indentation level and truncate nesting levels accordingly */
	static void checkIndent(NestingLevels *nls, int indent)
	{
	int i;
	NestingLevel *n;

	for (i = 0; i < nls->n; i++)
	{
	n = nls->levels + i;
	if (n && indent <= n->indentation)
	{
	/* truncate levels */
	nls->n = i;
	break;
	}
	}
	}

	static void addNestingLevel(NestingLevels *nls, int indentation,
	const vString *name, boolean is_class)
	{
	int i;
	NestingLevel *nl = NULL;
	int kindIndex = is_class ? K_CLASS : K_FUNCTION;

	for (i = 0; i < nls->n; i++)
	{
	nl = nls->levels + i;
	if (indentation <= nl->indentation) break;
	}
	if (i == nls->n)
	nl = nestingLevelsPush(nls, name, kindIndex);
	else
	/* reuse existing slot */
	nl = nestingLevelsTruncate (nls, i + 1, name, kindIndex);

	nl->indentation = indentation;
	}

	/* Return a pointer to the start of the next triple string, or NULL. Store
	* the kind of triple string in "which" if the return is not NULL.
	*/
	static char const find_triple_start(char const string, char const **which)
	{
	char const *cp = string;

	for (; *cp; cp++)
	{
	if (*cp == '#')
	break;
	if (cp == '"' \|\| cp == '\'')
	{
	*which = find_triple_start0 (cp);
	if (*which)
	return cp;
	cp = skipString(cp);
	if (!*cp) break;
	cp--; /* avoid jumping over the character after a skipped string */
	}
	}
	return NULL;
	}

	/* Find the end of a triple string as pointed to by "which", and update "which"
	* with any other triple strings following in the given string.
	*/
	static char const find_triple_end(char const string, char const **which,
	boolean dontRepeat)
	{
	char const *s = string;
	while (1)
	{
	char const *last;

	/* Check if the string ends in the same line. */
	last = s;
	s = strstr (s, *which);
	if (!s)
	{
	s = last;
	break;
	}
	s += 3;
	*which = NULL;

	if (dontRepeat)
	break;
	/* If yes, check if another one starts in the same line. */
	last = s;
	s = find_triple_start(s, which);
	if (!s)
	{
	s = last;
	break;
	}
	s += 3;
	}
	return s;
	}

	static const char findVariable(const char line, const char** lineContinuation)
	{
	/* Parse global and class variable names (C.x) from assignment statements.
	* Object attributes (obj.x) are ignored.
	* Assignment to a tuple 'x, y = 2, 3' not supported.
	* TODO: ignore duplicate tags from reassignment statements. */
	const char cp, sp, eq, start;

	cp = strstr(line, "=");
	if (!cp)
	return NULL;
	eq = cp + 1;
	while (*eq)
	{
	if (*eq == '=')
	return NULL; /* ignore '==' operator and 'x=5,y=6)' function lines */
	if (eq == '(' \|\| eq == '#')
	break; /* allow 'x = func(b=2,y=2,' lines and comments at the end of line */
	eq++;
	}

	if (*eq == '(')
	*lineContinuation = eq;

	/* go backwards to the start of the line, checking we have valid chars */
	start = cp - 1;
	while (start >= line && isspace ((int) *start))
	--start;
	while (start >= line && isIdentifierCharacter ((int) *start))
	--start;
	if (!isIdentifierFirstCharacter(*(start + 1)))
	return NULL;
	sp = start;
	while (sp >= line && isspace ((int) *sp))
	--sp;
	if ((sp + 1) != line) /* the line isn't a simple variable assignment */
	return NULL;
	/* the line is valid, parse the variable name */
	++start;
	return start;
	}

	/* Skip type declaration that optionally follows a cdef/cpdef */
	static const char skipTypeDecl (const char cp, boolean *is_class)
	{
	const char lastStart = cp, ptr = cp;
	int loopCount = 0;
	ptr = skipSpace(cp);
	if (!strncmp("extern", ptr, 6)) {
	ptr += 6;
	ptr = skipSpace(ptr);
	if (!strncmp("from", ptr, 4)) { return NULL; }
	}
	if (!strncmp("class", ptr, 5)) {
	ptr += 5 ;
	*is_class = TRUE;
	ptr = skipSpace(ptr);
	return ptr;
	}
	/* limit so that we don't pick off "int item=obj()" */
	while (*ptr && loopCount++ < 2) {
	while (ptr && ptr != '=' && ptr != '(' && !isspace(ptr)) {
	/* skip over e.g. 'cpdef numpy.ndarray[dtype=double, ndim=1]' */
	if(*ptr == '[') {
	while (ptr && ptr != ']') ptr++;
	if (*ptr) ptr++;
	} else {
	ptr++;
	}
	}
	if (!ptr \|\| ptr == '=') return NULL;
	if (*ptr == '(') {
	return lastStart; /* if we stopped on a '(' we are done */
	}
	ptr = skipSpace(ptr);
	lastStart = ptr;
	while (lastStart == '') lastStart++; /* cdef int identifier /
	}
	return NULL;
	}

	/* checks if there is a lambda at position of cp, and return its argument list
	* if so.
	* We don't return the lambda name since it is useless for now since we already
	* know it when we call this function, and it would be a little slower. */
	static boolean varIsLambda (const char cp, char *arglist)
	{
	boolean is_lambda = FALSE;

	cp = skipSpace (cp);
	cp = skipIdentifier (cp); /* skip the lambda's name */
	cp = skipSpace (cp);
	if (*cp == '=')
	{
	cp++;
	cp = skipSpace (cp);
	if (strncmp (cp, "lambda", 6) == 0)
	{
	const char *tmp;

	cp += 6; /* skip the lambda */
	tmp = skipSpace (cp);
	/* check if there is a space after lambda to detect assignations
	* starting with 'lambdaXXX' */
	if (tmp != cp)
	{
	vString *args = vStringNew ();

	cp = tmp;
	vStringPut (args, '(');
	for (; cp != 0 && cp != ':'; cp++)
	vStringPut (args, *cp);
	vStringPut (args, ')');
	vStringTerminate (args);
	if (arglist)
	*arglist = strdup (vStringValue (args));
	vStringDelete (args);
	is_lambda = TRUE;
	}
	}
	}
	return is_lambda;
	}

	/* checks if @p cp has keyword @p keyword at the start, and fills @p cp_n with
	* the position of the next non-whitespace after the keyword */
	static boolean matchKeyword (const char keyword, const char cp, const char **cp_n)
	{
	size_t kw_len = strlen (keyword);
	if (strncmp (cp, keyword, kw_len) == 0 && isspace (cp[kw_len]))
	{
	*cp_n = skipSpace (&cp[kw_len + 1]);
	return TRUE;
	}
	return FALSE;
	}

	static void findPythonTags (void)
	{
	vString *const continuation = vStringNew ();
	vString *const name = vStringNew ();
	vString *const parent = vStringNew();

	NestingLevels *const nesting_levels = nestingLevelsNew();

	const char *line;
	int line_skip = 0;
	char const *longStringLiteral = NULL;

	while ((line = (const char *) readLineFromInputFile ()) != NULL)
	{
	const char *variableLineContinuation = NULL;
	const char cp = line, candidate;
	char const *longstring;
	char const keyword, variable;
	int indent;

	cp = skipSpace (cp);

	if (cp == '\0') / skip blank line */
	continue;

	/* Skip comment if we are not inside a multi-line string. */
	if (*cp == '#' && !longStringLiteral)
	continue;

	/* Deal with line continuation. */
	if (!line_skip) vStringClear(continuation);
	vStringCatS(continuation, line);
	vStringStripTrailing(continuation);
	if (vStringLast(continuation) == '\\')
	{
	vStringChop(continuation);
	vStringCatS(continuation, " ");
	line_skip = 1;
	continue;
	}
	cp = line = vStringValue(continuation);
	cp = skipSpace (cp);
	indent = cp - line;
	line_skip = 0;

	/* Deal with multiline string ending. */
	if (longStringLiteral)
	{
	find_triple_end(cp, &longStringLiteral, FALSE);
	continue;
	}

	checkIndent(nesting_levels, indent);

	/* Find global and class variables */
	variable = findVariable(line, &variableLineContinuation);
	if (variable)
	{
	const char *start = variable;
	char *arglist;
	boolean parent_is_class;

	vStringClear (name);
	while (isIdentifierCharacter ((int) *start))
	{
	vStringPut (name, (int) *start);
	++start;
	}
	vStringTerminate (name);

	parent_is_class = constructParentString(nesting_levels, indent, parent);
	if (varIsLambda (variable, &arglist))
	{
	/* show class members or top-level script lambdas only */
	if (parent_is_class \|\| vStringLength(parent) == 0)
	makeFunctionTag (name, parent, parent_is_class, arglist);
	eFree (arglist);
	}
	else
	{
	/* skip variables in methods */
	if (parent_is_class \|\| vStringLength(parent) == 0)
	makeVariableTag (name, parent, parent_is_class);
	}

	if (variableLineContinuation)
	{
	skipParens (variableLineContinuation);
	continue;
	}
	}

	/* Deal with multiline string start. */
	longstring = find_triple_start(cp, &longStringLiteral);
	if (longstring)
	{
	longstring += 3;
	find_triple_end(longstring, &longStringLiteral, FALSE);
	/* We don't parse for any tags in the rest of the line. */
	continue;
	}

	/* Deal with def and class keywords. */
	keyword = findDefinitionOrClass (cp);
	if (keyword)
	{
	boolean found = FALSE;
	boolean is_class = FALSE;
	if (matchKeyword ("def", keyword, &cp))
	{
	found = TRUE;
	}
	else if (matchKeyword ("class", keyword, &cp))
	{
	found = TRUE;
	is_class = TRUE;
	}
	else if (matchKeyword ("cdef", keyword, &cp))
	{
	candidate = skipTypeDecl (cp, &is_class);
	if (candidate)
	{
	found = TRUE;
	cp = candidate;
	}

	}
	else if (matchKeyword ("cpdef", keyword, &cp))
	{
	candidate = skipTypeDecl (cp, &is_class);
	if (candidate)
	{
	found = TRUE;
	cp = candidate;
	}
	}

	if (found)
	{
	boolean is_parent_class;

	is_parent_class =
	constructParentString(nesting_levels, indent, parent);

	if (is_class)
	parseClass (cp, name, parent, is_parent_class);
	else
	parseFunction(cp, name, parent, is_parent_class);

	addNestingLevel(nesting_levels, indent, name, is_class);
	}
	continue;
	}
	/* Find and parse namespace releated elements */
	if (parseNamespace(line))
	continue;

	/* If the current line contains
	an open parenthesis skip lines till its associated
	close parenthesis:

	foo (...
	... ) */
	skipParens (line);
	}
	/* Clean up all memory we allocated. */
	vStringDelete (parent);
	vStringDelete (name);
	vStringDelete (continuation);
	nestingLevelsFree (nesting_levels);
	}

	extern parserDefinition *PythonParser (void)
	{
	static const char *const extensions[] = { "py", "pyx", "pxd", "pxi" ,"scons",
	NULL };
	static const char const aliases[] = { "python[23]", "scons",
	NULL };
	parserDefinition *def = parserNew ("Python");
	def->kinds = PythonKinds;
	def->kindCount = ARRAY_SIZE (PythonKinds);
	def->extensions = extensions;
	def->aliases = aliases;
	def->parser = findPythonTags;
	return def;
	}

	/* vi:set tabstop=4 shiftwidth=4: */

File Metadata

Mime Type: text/x-c
Expires: Jun 4 2025, 6:41 PM (14 w, 1 d ago)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 3398701

python.cNo OneTemporaryActions

python.cView Options

File Metadata

Event Timeline

python.c
No OneTemporary
Actions

python.c
View Options