Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F8394679
cxx_parser_variable.c
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
17 KB
Subscribers
None
cxx_parser_variable.c
View Options
/*
* Copyright (c) 2016, Szymon Tomasz Stefanek
*
* This source code is released for free distribution under the terms of the
* GNU General Public License version 2 or (at your option) any later version.
*
* This module contains functions for parsing and scanning C++ source files
*/
#include
"cxx_parser.h"
#include
"cxx_parser_internal.h"
#include
"cxx_debug.h"
#include
"cxx_keyword.h"
#include
"cxx_token.h"
#include
"cxx_token_chain.h"
#include
"cxx_scope.h"
#include
"vstring.h"
#include
"read.h"
//
// Attempt to extract variable declarations from the chain.
// Returns true if at least one variable was extracted.
// Returns false if a variable declaration could not be identified.
//
// Recognized variable declarations are of the following kinds:
//
// type var;
// type var1,var2;
// type var[];
// type var(constructor args);
// type var{list initializer};
// type var = ...;
// type (*ident)();
// type var:bits;
// type var: range declaration <-- (FIXME: this is only inside for!)
// very complex type with modifiers() namespace::namespace::var = ...;
// type<with template> namespace::var[] = {
// ...
//
// Assumptions:
// - there is a terminator at the end: either ; or {
//
// Notes:
// - Be aware that if this function returns true then the pChain very likely has been modified
// (partially destroyed) as part of the type extraction algorithm.
// If the function returns false the chain has not been modified (and
// to extract something else from it).
//
// - This function is quite tricky.
//
bool
cxxParserExtractVariableDeclarations
(
CXXTokenChain
*
pChain
,
unsigned
int
uFlags
)
{
CXX_DEBUG_ENTER
();
if
(
pChain
->
iCount
<
1
)
{
CXX_DEBUG_LEAVE_TEXT
(
"Chain is empty"
);
return
false
;
}
#ifdef CXX_DO_DEBUGGING
vString
*
pJoinedChain
=
cxxTokenChainJoin
(
pChain
,
NULL
,
0
);
CXX_DEBUG_PRINT
(
"Looking for variable declarations in '%s'"
,
vStringValue
(
pJoinedChain
)
);
vStringDelete
(
pJoinedChain
);
#endif
//
// Strategy:
// - verify that the chain starts with an identifier or keyword (always present)
// - run to one of : ; [] () {} = ,
// - ensure that the previous token is an identifier (except for special cases)
// - go back to skip the eventual scope
// - ensure that there is a leading type
// - if we are at : [], () or {} then run to the next ; = or ,
// - once we have determined that a variable declaration is there
// modify the chain to contain only the type name
// - emit variable tag
// - if we are at , then check if there are more declarations
//
CXXToken
*
t
=
cxxTokenChainFirst
(
pChain
);
enum
CXXScopeType
eScopeType
=
cxxScopeGetType
();
CXX_DEBUG_ASSERT
(
t
,
"There should be an initial token here"
);
if
(
!
cxxTokenTypeIsOneOf
(
t
,
CXXTokenTypeIdentifier
|
CXXTokenTypeKeyword
))
{
CXX_DEBUG_LEAVE_TEXT
(
"Statement does not start with identifier or keyword"
);
return
false
;
}
// Handle the special case of delete/new keywords at the beginning
if
(
cxxTokenTypeIs
(
t
,
CXXTokenTypeKeyword
)
&&
(
(
t
->
eKeyword
==
CXXKeywordDELETE
)
||
(
t
->
eKeyword
==
CXXKeywordNEW
)
)
)
{
CXX_DEBUG_LEAVE_TEXT
(
"Statement looks like a delete or new call"
);
return
false
;
}
bool
bGotVariable
=
false
;
// Loop over the whole statement.
while
(
t
)
{
// Scan up to a notable token: ()[]{}=,;:{
while
(
t
)
{
if
(
cxxTokenTypeIsOneOf
(
t
,
CXXTokenTypeSingleColon
|
CXXTokenTypeParenthesisChain
|
CXXTokenTypeSquareParenthesisChain
|
CXXTokenTypeBracketChain
|
CXXTokenTypeAssignment
|
CXXTokenTypeComma
|
CXXTokenTypeSemicolon
|
CXXTokenTypeOpeningBracket
))
{
// Notable token reached.
break
;
}
if
(
cxxTokenTypeIsOneOf
(
t
,
CXXTokenTypeOperator
|
CXXTokenTypeMultipleAnds
|
CXXTokenTypePointerOperator
|
CXXTokenTypeStringConstant
|
CXXTokenTypeAngleBracketChain
|
CXXTokenTypeCharacterConstant
|
CXXTokenTypeMultipleDots
|
CXXTokenTypeClosingBracket
|
CXXTokenTypeClosingParenthesis
|
CXXTokenTypeClosingSquareParenthesis
|
CXXTokenTypeGreaterThanSign
)
)
{
// Something that should not appear in a variable declaration
CXX_DEBUG_LEAVE_TEXT
(
"Found token '%s' of type 0x%02x that should "
\
"not appear in the initial part of a variable declaration"
,
vStringValue
(
t
->
pszWord
),
t
->
eType
);
return
bGotVariable
;
}
if
(
t
->
eType
==
CXXTokenTypeSmallerThanSign
)
{
// Must be part of template type name (so properly balanced).
t
=
cxxTokenChainSkipToEndOfTemplateAngleBracket
(
t
);
if
(
!
t
)
{
CXX_DEBUG_LEAVE_TEXT
(
"Failed to skip past angle bracket chain"
);
return
bGotVariable
;
}
}
t
=
t
->
pNext
;
}
// Notable token reached?
if
(
!
t
)
{
CXX_DEBUG_LEAVE_TEXT
(
"Nothing interesting here"
);
return
bGotVariable
;
}
CXX_DEBUG_PRINT
(
"Found notable token '%s' of type 0x%02x"
,
vStringValue
(
t
->
pszWord
),
t
->
eType
);
// Now before the notable token there MUST be an identifier
// (eventually hidden in a parenthesis chain) and also a typename.
if
(
!
t
->
pPrev
)
{
CXX_DEBUG_LEAVE_TEXT
(
"Nothing interesting before notable token"
);
return
bGotVariable
;
}
CXXToken
*
pIdentifier
=
NULL
;
CXXToken
*
pTokenBefore
=
NULL
;
// If we have to continue scanning we'll remove the tokens from here
// so they don't end up being part of the type name.
// If this is set to NULL then it means that we cannot determine properly
// what to remove and we should stop scanning after the current variable.
CXXToken
*
pRemoveStart
=
t
;
switch
(
t
->
eType
)
{
case
CXXTokenTypeParenthesisChain
:
{
// At a parenthesis chain we need some additional checks.
if
(
// check for function pointers or nasty arrays
// Possible cases:
// ret type (*variable)(params)
// ret type (* const (variable[4]))(params)
t
->
pNext
&&
(
(
cxxTokenTypeIs
(
t
->
pNext
,
CXXTokenTypeParenthesisChain
)
&&
cxxParserTokenChainLooksLikeFunctionParameterList
(
t
->
pNext
->
pChain
,
NULL
)
)
||
cxxTokenTypeIs
(
t
->
pNext
,
CXXTokenTypeSquareParenthesisChain
)
)
&&
(
pIdentifier
=
cxxTokenChainFirstPossiblyNestedTokenOfType
(
t
->
pChain
,
CXXTokenTypeIdentifier
,
NULL
))
&&
// Discard function declarations with function return types
// like void (*A(B))(C);
(
(
!
pIdentifier
->
pNext
)
||
(
!
cxxTokenTypeIs
(
pIdentifier
->
pNext
,
CXXTokenTypeParenthesisChain
))
)
)
{
// A function pointer.
// There are two parentheses, skip the second too.
pTokenBefore
=
t
->
pPrev
;
t
=
t
->
pNext
->
pNext
;
pRemoveStart
=
t
;
}
else
if
(
(
t
->
pChain
->
iCount
==
3
)
&&
cxxTokenTypeIs
(
cxxTokenChainAt
(
t
->
pChain
,
1
),
CXXTokenTypeParenthesisChain
)
&&
t
->
pPrev
&&
cxxTokenTypeIs
(
t
->
pPrev
,
CXXTokenTypeIdentifier
)
&&
t
->
pPrev
->
pPrev
&&
cxxTokenTypeIs
(
t
->
pPrev
->
pPrev
,
CXXTokenTypeIdentifier
)
)
{
CXX_DEBUG_LEAVE_TEXT
(
"Parenthesis seems to define an __ARGS style prototype"
);
return
bGotVariable
;
}
else
if
(
cxxTokenTypeIs
(
t
->
pPrev
,
CXXTokenTypeIdentifier
)
&&
(
(
eScopeType
==
CXXScopeTypeNamespace
)
||
(
eScopeType
==
CXXScopeTypeFunction
)
)
&&
cxxParserCurrentLanguageIsCPP
()
&&
cxxParserTokenChainLooksLikeConstructorParameterSet
(
t
->
pChain
)
)
{
// ok, *might* be variable instantiation
pIdentifier
=
t
->
pPrev
;
pTokenBefore
=
pIdentifier
->
pPrev
;
}
else
{
CXX_DEBUG_LEAVE_TEXT
(
"No recognizable parenthesis form for a variable"
);
return
bGotVariable
;
}
}
break
;
case
CXXTokenTypeBracketChain
:
if
(
cxxTokenTypeIs
(
t
->
pPrev
,
CXXTokenTypeIdentifier
)
&&
cxxParserCurrentLanguageIsCPP
()
&&
cxxParserTokenChainLooksLikeConstructorParameterSet
(
t
->
pChain
)
)
{
// ok, *might* be new C++ style variable initialization
pIdentifier
=
t
->
pPrev
;
pTokenBefore
=
pIdentifier
->
pPrev
;
}
else
{
CXX_DEBUG_LEAVE_TEXT
(
"Bracket chain that doesn't look like a C++ var init"
);
return
bGotVariable
;
}
break
;
case
CXXTokenTypeSingleColon
:
// check for bitfield
if
(
t
->
pNext
&&
cxxTokenTypeIsOneOf
(
t
->
pNext
,
CXXTokenTypeNumber
|
CXXTokenTypeIdentifier
)
)
{
// ok, looks like a bit field
if
(
cxxTokenTypeIs
(
t
->
pNext
,
CXXTokenTypeNumber
)
&&
t
->
pNext
->
pNext
&&
cxxTokenTypeIsOneOf
(
t
->
pNext
->
pNext
,
CXXTokenTypeComma
|
CXXTokenTypeSemicolon
|
CXXTokenTypeAssignment
)
)
{
// keep bitfield width specification as part of type
pIdentifier
=
t
->
pPrev
;
pTokenBefore
=
pIdentifier
->
pPrev
;
t
=
t
->
pNext
->
pNext
;
}
else
{
// Too complex: strip width specification (the best we can do)
pIdentifier
=
t
->
pPrev
;
pTokenBefore
=
pIdentifier
->
pPrev
;
}
}
else
{
CXX_DEBUG_LEAVE_TEXT
(
"Single colon that doesn't look like a bit field"
);
return
bGotVariable
;
}
break
;
case
CXXTokenTypeSquareParenthesisChain
:
// check for array
// Keep the array specifier as part of type
pIdentifier
=
t
->
pPrev
;
pTokenBefore
=
pIdentifier
->
pPrev
;
while
(
t
->
pNext
&&
cxxTokenTypeIs
(
t
->
pNext
,
CXXTokenTypeSquareParenthesisChain
))
t
=
t
->
pNext
;
if
(
(
!
t
->
pNext
)
||
(
!
cxxTokenTypeIsOneOf
(
t
->
pNext
,
CXXTokenTypeComma
|
CXXTokenTypeSemicolon
|
CXXTokenTypeAssignment
|
CXXTokenTypeBracketChain
))
)
{
CXX_DEBUG_LEAVE_TEXT
(
"No comma, semicolon, = or {} after []"
);
return
bGotVariable
;
}
t
=
t
->
pNext
;
pRemoveStart
=
t
;
break
;
default
:
// Must be identifier
if
(
t
->
pPrev
->
eType
!=
CXXTokenTypeIdentifier
)
{
CXX_DEBUG_LEAVE_TEXT
(
"No identifier before the notable token"
);
return
bGotVariable
;
}
pIdentifier
=
t
->
pPrev
;
pTokenBefore
=
pIdentifier
->
pPrev
;
break
;
}
CXX_DEBUG_ASSERT
(
pIdentifier
,
"We should have found an identifier here"
);
if
(
!
pTokenBefore
)
{
CXX_DEBUG_LEAVE_TEXT
(
"Identifier not preceeded by a type"
);
return
bGotVariable
;
}
CXXToken
*
pScopeEnd
=
pTokenBefore
->
pNext
;
CXXToken
*
pScopeStart
=
NULL
;
// Skip back to the beginning of the scope, if any
while
(
pTokenBefore
->
eType
==
CXXTokenTypeMultipleColons
)
{
if
(
!
cxxParserCurrentLanguageIsCPP
())
{
CXX_DEBUG_LEAVE_TEXT
(
"Syntax error: found multiple colons in C language"
);
return
false
;
}
pTokenBefore
=
pTokenBefore
->
pPrev
;
if
(
!
pTokenBefore
)
{
CXX_DEBUG_LEAVE_TEXT
(
"Identifier preceeded by multiple colons "
\
"but not preceeded by a type"
);
return
bGotVariable
;
}
if
(
cxxTokenTypeIs
(
pTokenBefore
,
CXXTokenTypeGreaterThanSign
))
{
CXXToken
*
pAux
=
cxxTokenChainSkipBackToStartOfTemplateAngleBracket
(
pTokenBefore
);
if
((
!
pAux
)
||
(
!
pAux
->
pPrev
))
{
CXX_DEBUG_LEAVE_TEXT
(
"Identifier preceeded by multiple colons "
\
"and by a >, but failed to skip back to starting <"
);
return
bGotVariable
;
}
pTokenBefore
=
pAux
->
pPrev
;
}
if
(
!
cxxTokenTypeIs
(
pTokenBefore
,
CXXTokenTypeIdentifier
))
{
CXX_DEBUG_LEAVE_TEXT
(
"Identifier preceeded by multiple colons "
\
"with probable syntax error"
);
return
bGotVariable
;
}
pScopeStart
=
pTokenBefore
;
pTokenBefore
=
pTokenBefore
->
pPrev
;
if
(
!
pTokenBefore
)
{
CXX_DEBUG_LEAVE_TEXT
(
"Identifier preceeded by multiple colons "
\
"but not preceeded by a type"
);
return
bGotVariable
;
}
}
if
(
!
bGotVariable
)
{
// now pTokenBefore should be part of a type (either the variable type or return
// type of a function in case of a function pointer)
if
(
!
cxxTokenTypeIsOneOf
(
pTokenBefore
,
CXXTokenTypeIdentifier
|
CXXTokenTypeKeyword
|
CXXTokenTypeStar
|
CXXTokenTypeAnd
))
{
if
(
cxxTokenTypeIs
(
pTokenBefore
,
CXXTokenTypeGreaterThanSign
))
{
// the < > must be balanced
CXXToken
*
t2
=
pTokenBefore
->
pPrev
;
int
iLevel
=
1
;
while
(
t2
)
{
if
(
cxxTokenTypeIs
(
t2
,
CXXTokenTypeGreaterThanSign
))
iLevel
++
;
else
if
(
cxxTokenTypeIs
(
t2
,
CXXTokenTypeSmallerThanSign
))
iLevel
--
;
t2
=
t2
->
pPrev
;
}
if
(
iLevel
!=
0
)
{
CXX_DEBUG_LEAVE_TEXT
(
"The > token is unbalanced and does not "
\
"seem to be part of type name"
);
return
bGotVariable
;
}
}
else
{
CXX_DEBUG_LEAVE_TEXT
(
"Token '%s' of type 0x%02x does not seem "
\
"to be part of type name"
,
vStringValue
(
pTokenBefore
->
pszWord
),
pTokenBefore
->
eType
);
return
bGotVariable
;
}
}
bGotVariable
=
true
;
}
// Goodie. We have an identifier and almost certainly a type here.
// From now on we start destroying the chain: mark the return value as true
// so nobody else will try to extract stuff from it
int
iScopesPushed
=
0
;
if
(
pScopeStart
)
{
// Push the scopes and remove them from the chain so they are not in the way
while
(
pScopeStart
!=
pScopeEnd
)
{
// This is the scope id START. It might contain
// also other tokens like in ...::A<B>::...
CXXToken
*
pPartEnd
=
cxxTokenChainNextTokenOfType
(
pScopeStart
,
CXXTokenTypeMultipleColons
);
CXX_DEBUG_ASSERT
(
pPartEnd
,
"We should have found multiple colons here!"
);
CXX_DEBUG_ASSERT
(
pPartEnd
->
pPrev
,
"And there should be a previous token too"
);
CXXToken
*
pScopeId
=
cxxTokenChainExtractRange
(
pScopeStart
,
pPartEnd
->
pPrev
,
0
);
cxxScopePush
(
pScopeId
,
CXXScopeTypeClass
,
// WARNING: We don't know if it's really a class! (FIXME?)
CXXScopeAccessUnknown
);
CXXToken
*
pAux
=
pPartEnd
->
pNext
;
cxxTokenChainDestroyRange
(
pChain
,
pScopeStart
,
pPartEnd
);
pScopeStart
=
pAux
;
iScopesPushed
++
;
}
}
bool
bKnRStyleParameters
=
(
uFlags
&
CXXExtractVariableDeclarationsKnRStyleParameters
);
tagEntryInfo
*
tag
=
cxxTagBegin
(
bKnRStyleParameters
?
CXXTagKindPARAMETER
:
((
g_cxx
.
uKeywordState
&
CXXParserKeywordStateSeenExtern
)
?
CXXTagKindEXTERNVAR
:
cxxScopeGetVariableKind
()),
pIdentifier
);
if
(
tag
)
{
CXX_DEBUG_ASSERT
(
t
!=
pIdentifier
,
"This should not happen"
);
// remove the identifier
cxxTokenChainTakeRecursive
(
pChain
,
pIdentifier
);
// Fix square parentheses: if they contain something that is not a numeric
// constant then empty them up
CXXToken
*
pPartOfType
=
t
->
pPrev
;
CXX_DEBUG_ASSERT
(
pPartOfType
,
"There should be a part of type name here"
);
while
(
pPartOfType
&&
cxxTokenTypeIs
(
pPartOfType
,
CXXTokenTypeSquareParenthesisChain
))
{
CXXTokenChain
*
pAuxChain
=
pPartOfType
->
pChain
;
if
(
pAuxChain
->
iCount
>
2
)
{
if
(
(
pAuxChain
->
iCount
>
3
)
||
(
!
cxxTokenTypeIs
(
cxxTokenChainAt
(
pAuxChain
,
1
),
CXXTokenTypeNumber
))
)
{
cxxTokenChainDestroyRange
(
pAuxChain
,
cxxTokenChainFirst
(
pAuxChain
)
->
pNext
,
cxxTokenChainLast
(
pAuxChain
)
->
pPrev
);
}
}
pPartOfType
=
pPartOfType
->
pPrev
;
}
// anything that remains is part of type
CXXToken
*
pTypeToken
=
cxxTagCheckAndSetTypeField
(
cxxTokenChainFirst
(
pChain
),
t
->
pPrev
);
tag
->
isFileScope
=
bKnRStyleParameters
?
true
:
(
(
(
eScopeType
==
CXXScopeTypeNamespace
)
&&
(
g_cxx
.
uKeywordState
&
CXXParserKeywordStateSeenStatic
)
&&
(
!
isInputHeaderFile
())
)
||
// locals are always hidden
(
eScopeType
==
CXXScopeTypeFunction
)
||
(
(
eScopeType
!=
CXXScopeTypeNamespace
)
&&
(
eScopeType
!=
CXXScopeTypeFunction
)
&&
(
!
isInputHeaderFile
())
)
);
vString
*
pszProperties
=
NULL
;
if
(
cxxTagFieldEnabled
(
CXXTagFieldProperties
))
{
unsigned
int
uProperties
=
0
;
if
(
g_cxx
.
uKeywordState
&
CXXParserKeywordStateSeenStatic
)
uProperties
|=
CXXTagPropertyStatic
;
if
(
g_cxx
.
uKeywordState
&
CXXParserKeywordStateSeenExtern
)
uProperties
|=
CXXTagPropertyExtern
;
if
(
g_cxx
.
uKeywordState
&
CXXParserKeywordStateSeenMutable
)
uProperties
|=
CXXTagPropertyMutable
;
if
(
g_cxx
.
uKeywordState
&
CXXParserKeywordStateSeenAttributeDeprecated
)
uProperties
|=
CXXTagPropertyDeprecated
;
// Volatile is part of the type, so we don't mark it as a property
//if(g_cxx.uKeywordState & CXXParserKeywordStateSeenVolatile)
// uProperties |= CXXTagPropertyVolatile;
pszProperties
=
cxxTagSetProperties
(
uProperties
);
}
cxxTagCommit
();
if
(
pTypeToken
)
cxxTokenDestroy
(
pTypeToken
);
if
(
pszProperties
)
vStringDelete
(
pszProperties
);
cxxTokenDestroy
(
pIdentifier
);
}
while
(
iScopesPushed
>
0
)
{
cxxScopePop
();
iScopesPushed
--
;
}
if
(
!
t
)
{
CXX_DEBUG_LEAVE_TEXT
(
"Nothing more"
);
return
bGotVariable
;
}
if
(
!
cxxTokenTypeIsOneOf
(
t
,
CXXTokenTypeComma
|
CXXTokenTypeSemicolon
|
CXXTokenTypeOpeningBracket
))
{
t
=
cxxTokenChainNextTokenOfType
(
t
,
CXXTokenTypeComma
|
CXXTokenTypeSemicolon
|
CXXTokenTypeOpeningBracket
);
if
(
!
t
)
{
CXX_DEBUG_LEAVE_TEXT
(
"Didn't find a comma, semicolon or {"
);
return
bGotVariable
;
}
}
if
(
cxxTokenTypeIsOneOf
(
t
,
CXXTokenTypeSemicolon
|
CXXTokenTypeOpeningBracket
))
{
CXX_DEBUG_LEAVE_TEXT
(
"Noting else"
);
return
bGotVariable
;
}
// Comma. Might have other declarations here.
CXX_DEBUG_PRINT
(
"At a comma, might have other declarations here"
);
t
=
t
->
pNext
;
CXX_DEBUG_ASSERT
(
t
,
"There should be something after the comma here!"
);
if
(
!
pRemoveStart
)
{
CXX_DEBUG_LEAVE_TEXT
(
"Could not properly fix type name for next token: stopping here"
);
return
bGotVariable
;
}
cxxTokenChainDestroyRange
(
pChain
,
pRemoveStart
,
t
->
pPrev
);
}
CXX_DEBUG_LEAVE_TEXT
(
"Reached end"
);
return
bGotVariable
;
}
File Metadata
Details
Attached
Mime Type
text/x-c
Expires
Jun 4 2025, 7:27 PM (9 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3399173
Attached To
rPUC universal-ctags debian packaging
Event Timeline
Log In to Comment