1
0
mirror of https://github.com/cc65/cc65.git synced 2024-06-15 02:29:32 +00:00

Merge pull request #1833 from acqn/TokenizerFix

[cc65] Tokenizer fixes
This commit is contained in:
Bob Andrews 2022-08-21 17:45:38 +02:00 committed by GitHub
commit f838ba7341
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 270 additions and 142 deletions

View File

@ -1234,6 +1234,7 @@ static void Primary (ExprDesc* E)
case TOK_ICONST:
case TOK_CCONST:
case TOK_WCCONST:
/* Character and integer constants */
E->IVal = CurTok.IVal;
E->Flags = E_LOC_NONE | E_RTYPE_RVAL;

View File

@ -114,6 +114,7 @@ static void PPhiePrimary (PPExpr* Expr)
switch (CurTok.Tok) {
case TOK_ICONST:
case TOK_CCONST:
case TOK_WCCONST:
/* Character and integer constants */
Expr->IVal = CurTok.IVal;
/* According to the C standard, all signed types act as intmax_t
@ -215,6 +216,12 @@ static void PPhie11 (PPExpr* Expr)
}
}
/* Check for excessive expressions */
if (!TokIsPunc (&CurTok)) {
PPError ("Missing binary operator");
PPErrorSkipLine ();
}
}
@ -854,7 +861,7 @@ void ParsePPExprInLine (PPExpr* Expr)
/* Initialize the parser status */
PPEvaluationFailed = 0;
PPEvaluationEnabled = 1;
NextLineDisabled = 1;
PPParserRunning = 1;
/* Parse */
PPExprInit (Expr);
@ -867,5 +874,5 @@ void ParsePPExprInLine (PPExpr* Expr)
}
/* Restore parser status */
NextLineDisabled = 0;
PPParserRunning = 0;
}

View File

@ -69,7 +69,7 @@
Token CurTok; /* The current token */
Token NextTok; /* The next token */
int NextLineDisabled; /* Disabled to read next line */
int PPParserRunning; /* Is tokenizer used by the preprocessor */
@ -189,8 +189,10 @@ static int SkipWhite (void)
{
while (1) {
while (CurC == '\0') {
/* If reading next line fails or is forbidden, bail out */
if (NextLineDisabled || PreprocessNextLine () == 0) {
/* If reading next line fails or is disabled with directives, bail
** out.
*/
if (PPParserRunning || PreprocessNextLine () == 0) {
return 0;
}
}
@ -246,6 +248,45 @@ int IsSym (char* S)
int IsPPNumber (int Cur, int Next)
/* Return 1 if the two successive characters indicate a pp-number, otherwise
** return 0.
*/
{
return Cur != '.' ? IsDigit (Cur) : IsDigit (Next);
}
void CopyPPNumber (StrBuf* Target)
/* Copy a pp-number from the input to Target */
{
int Std;
if (!IsPPNumber (CurC, NextC)) {
return;
}
/* P-exp is only valid in C99 and later */
Std = IS_Get (&Standard);
while (IsIdent (CurC) || IsDigit (CurC) || CurC == '.') {
SB_AppendChar (Target, CurC);
if (NextC == '+' || NextC == '-') {
if (CurC == 'e' || CurC == 'E' ||
(Std >= STD_C99 && (CurC == 'p' || CurC == 'P'))) {
SB_AppendChar (Target, NextC);
NextChar ();
} else {
NextChar ();
break;
}
}
NextChar ();
}
}
static void UnknownChar (char C)
/* Error message for unknown character */
{
@ -371,6 +412,15 @@ static void CharConst (void)
{
int C;
if (CurC == 'L') {
/* Wide character constant */
NextTok.Tok = TOK_WCCONST;
NextChar ();
} else {
/* Narrow character constant */
NextTok.Tok = TOK_CCONST;
}
/* Skip the quote */
NextChar ();
@ -385,9 +435,6 @@ static void CharConst (void)
NextChar ();
}
/* Setup values and attributes */
NextTok.Tok = TOK_CCONST;
/* Translate into target charset */
NextTok.IVal = SignExtendChar (TgtTranslateChar (C));
@ -458,76 +505,77 @@ static void StringConst (void)
static void NumericConst (void)
/* Parse a numeric constant */
{
unsigned Base; /* Temporary number base */
unsigned Prefix; /* Base according to prefix */
StrBuf S = STATIC_STRBUF_INITIALIZER;
unsigned Base; /* Temporary number base according to prefix */
unsigned Index;
StrBuf Src = AUTO_STRBUF_INITIALIZER;
int IsFloat;
char C;
unsigned DigitVal;
unsigned long IVal; /* Value */
/* Get the pp-number first, then parse on it */
CopyPPNumber (&Src);
SB_Terminate (&Src);
SB_Reset (&Src);
/* Check for a leading hex, octal or binary prefix and determine the
** possible integer types.
*/
if (CurC == '0') {
if (SB_Peek (&Src) == '0') {
/* Gobble 0 and examine next char */
NextChar ();
if (toupper (CurC) == 'X') {
Base = Prefix = 16;
NextChar (); /* gobble "x" */
} else if (toupper (CurC) == 'B' && IS_Get (&Standard) >= STD_CC65) {
Base = Prefix = 2;
NextChar (); /* gobble 'b' */
SB_Skip (&Src);
if (toupper (SB_Peek (&Src)) == 'X' &&
IsXDigit (SB_LookAt (&Src, SB_GetIndex (&Src) + 1))) {
Base = 16;
SB_Skip (&Src); /* gobble "x" */
} else if (toupper (SB_Peek (&Src)) == 'B' &&
IS_Get (&Standard) >= STD_CC65 &&
IsDigit (SB_LookAt (&Src, SB_GetIndex (&Src) + 1))) {
Base = 2;
SB_Skip (&Src); /* gobble 'b' */
} else {
Base = 10; /* Assume 10 for now - see below */
Prefix = 8; /* Actual prefix says octal */
}
} else {
Base = Prefix = 10;
Base = 10;
}
/* Because floating point numbers don't have octal prefixes (a number
** with a leading zero is decimal), we first have to read the number
** before converting it, so we can determine if it's a float or an
** integer.
/* Because floating point numbers don't have octal prefixes (a number with
** a leading zero is decimal), we first have to read the number before
** converting it, so we can determine if it's a float or an integer.
*/
while (IsXDigit (CurC) && HexVal (CurC) < Base) {
SB_AppendChar (&S, CurC);
NextChar ();
Index = SB_GetIndex (&Src);
while ((C = SB_Peek (&Src)) != '\0' && (Base <= 10 ? IsDigit (C) : IsXDigit (C))) {
SB_Skip (&Src);
}
SB_Terminate (&S);
/* The following character tells us if we have an integer or floating
** point constant. Note: Hexadecimal floating point constants aren't
** supported in C89.
*/
IsFloat = (CurC == '.' ||
(Base == 10 && toupper (CurC) == 'E') ||
(Base == 16 && toupper (CurC) == 'P' && IS_Get (&Standard) >= STD_C99));
IsFloat = (C == '.' ||
(Base == 10 && toupper (C) == 'E') ||
(Base == 16 && toupper (C) == 'P' && IS_Get (&Standard) >= STD_C99));
/* If we don't have a floating point type, an octal prefix results in an
** octal base.
*/
if (!IsFloat && Prefix == 8) {
/* An octal prefix for an integer type results in an octal base */
if (!IsFloat && Base == 10 && SB_LookAt (&Src, 0) == '0') {
Base = 8;
}
/* Since we do now know the correct base, convert the remembered input
** into a number.
*/
SB_Reset (&S);
/* Since we now know the correct base, convert the input into a number */
SB_SetIndex (&Src, Index);
IVal = 0;
while ((C = SB_Get (&S)) != '\0') {
while ((C = SB_Peek (&Src)) != '\0' && (Base <= 10 ? IsDigit (C) : IsXDigit (C))) {
DigitVal = HexVal (C);
if (DigitVal >= Base) {
Error ("Numeric constant contains digits beyond the radix");
Error ("Invalid digit \"%c\" beyond radix %u constant", C, Base);
SB_Clear (&Src);
break;
}
IVal = (IVal * Base) + DigitVal;
SB_Skip (&Src);
}
/* We don't need the string buffer any longer */
SB_Done (&S);
/* Distinguish between integer and floating point constants */
if (!IsFloat) {
@ -538,27 +586,32 @@ static void NumericConst (void)
** possible to convert the data to unsigned long even if the IT_ULONG
** flag were not set, but we are not doing that.
*/
if (toupper (CurC) == 'U') {
if (toupper (SB_Peek (&Src)) == 'U') {
/* Unsigned type */
NextChar ();
if (toupper (CurC) != 'L') {
SB_Skip (&Src);
if (toupper (SB_Peek (&Src)) != 'L') {
Types = IT_UINT | IT_ULONG;
} else {
NextChar ();
SB_Skip (&Src);
Types = IT_ULONG;
}
} else if (toupper (CurC) == 'L') {
} else if (toupper (SB_Peek (&Src)) == 'L') {
/* Long type */
NextChar ();
if (toupper (CurC) != 'U') {
SB_Skip (&Src);
if (toupper (SB_Peek (&Src)) != 'U') {
Types = IT_LONG | IT_ULONG;
WarnTypes = IT_ULONG;
} else {
NextChar ();
SB_Skip (&Src);
Types = IT_ULONG;
}
} else {
if (Prefix == 10) {
if (SB_Peek (&Src) != '\0') {
Error ("Invalid suffix \"%s\" on integer constant",
SB_GetConstBuf (&Src) + SB_GetIndex (&Src));
}
if (Base == 10) {
/* Decimal constants are of any type but uint */
Types = IT_INT | IT_LONG | IT_ULONG;
WarnTypes = IT_LONG | IT_ULONG;
@ -622,16 +675,16 @@ static void NumericConst (void)
Double FVal = FP_D_FromInt (IVal); /* Convert to double */
/* Check for a fractional part and read it */
if (CurC == '.') {
if (SB_Peek (&Src) == '.') {
Double Scale;
/* Skip the dot */
NextChar ();
SB_Skip (&Src);
/* Read fractional digits */
Scale = FP_D_Make (1.0);
while (IsXDigit (CurC) && (DigitVal = HexVal (CurC)) < Base) {
while (IsXDigit (SB_Peek (&Src)) && (DigitVal = HexVal (SB_Peek (&Src))) < Base) {
/* Get the value of this digit */
Double FracVal = FP_D_Div (FP_D_FromInt (DigitVal * Base), Scale);
/* Add it to the float value */
@ -639,25 +692,25 @@ static void NumericConst (void)
/* Scale base */
Scale = FP_D_Mul (Scale, FP_D_FromInt (DigitVal));
/* Skip the digit */
NextChar ();
SB_Skip (&Src);
}
}
/* Check for an exponent and read it */
if ((Base == 16 && toupper (CurC) == 'F') ||
(Base == 10 && toupper (CurC) == 'E')) {
if ((Base == 16 && toupper (SB_Peek (&Src)) == 'P') ||
(Base == 10 && toupper (SB_Peek (&Src)) == 'E')) {
unsigned Digits;
unsigned Exp;
/* Skip the exponent notifier */
NextChar ();
SB_Skip (&Src);
/* Read an optional sign */
if (CurC == '-') {
NextChar ();
} else if (CurC == '+') {
NextChar ();
if (SB_Peek (&Src) == '-') {
SB_Skip (&Src);
} else if (SB_Peek (&Src) == '+') {
SB_Skip (&Src);
}
/* Read exponent digits. Since we support only 32 bit floats
@ -668,11 +721,11 @@ static void NumericConst (void)
*/
Digits = 0;
Exp = 0;
while (IsDigit (CurC)) {
while (IsDigit (SB_Peek (&Src))) {
if (++Digits <= 3) {
Exp = Exp * 10 + HexVal (CurC);
Exp = Exp * 10 + HexVal (SB_Peek (&Src));
}
NextChar ();
SB_Skip (&Src);
}
/* Check for errors: We must have exponent digits, and not more
@ -691,10 +744,14 @@ static void NumericConst (void)
}
/* Check for a suffix and determine the type of the constant */
if (toupper (CurC) == 'F') {
NextChar ();
if (toupper (SB_Peek (&Src)) == 'F') {
SB_Skip (&Src);
NextTok.Type = type_float;
} else {
if (SB_Peek (&Src) != '\0') {
Error ("Invalid suffix \"%s\" on floating constant",
SB_GetConstBuf (&Src) + SB_GetIndex (&Src));
}
NextTok.Type = type_double;
}
@ -703,6 +760,9 @@ static void NumericConst (void)
NextTok.Tok = TOK_FCONST;
}
/* We don't need the string buffer any longer */
SB_Done (&Src);
}
@ -744,26 +804,34 @@ void NextToken (void)
}
/* Determine the next token from the lookahead */
if (IsDigit (CurC) || (CurC == '.' && IsDigit (NextC))) {
if (IsPPNumber (CurC, NextC)) {
/* A number */
NumericConst ();
return;
}
/* Check for wide character literals */
if (CurC == 'L' && NextC == '\"') {
StringConst ();
return;
/* Check for wide character constants and literals */
if (CurC == 'L') {
if (NextC == '\"') {
StringConst ();
return;
} else if (NextC == '\'') {
CharConst ();
return;
}
}
/* Check for keywords and identifiers */
if (IsSym (token)) {
/* Check for a keyword */
if ((NextTok.Tok = FindKey (token)) != TOK_IDENT) {
/* Reserved word found */
return;
if (!PPParserRunning) {
/* Check for a keyword */
if ((NextTok.Tok = FindKey (token)) != TOK_IDENT) {
/* Reserved word found */
return;
}
}
/* No reserved word, check for special symbols */
if (token[0] == '_' && token[1] == '_') {
/* Special symbols */
@ -1012,6 +1080,15 @@ void NextToken (void)
SetTok (TOK_COMP);
break;
case '#':
NextChar ();
if (CurC == '#') {
SetTok (TOK_DOUBLE_HASH);
} else {
NextTok.Tok = TOK_HASH;
}
break;
default:
UnknownChar (CurC);

View File

@ -79,6 +79,10 @@ typedef enum token_t {
TOK_FASTCALL,
TOK_CDECL,
/* Address sizes */
TOK_FAR,
TOK_NEAR,
/* Tokens denoting types */
TOK_FIRST_TYPE,
TOK_ENUM = TOK_FIRST_TYPE,
@ -95,94 +99,101 @@ typedef enum token_t {
TOK_VOID,
TOK_LAST_TYPE = TOK_VOID,
/* Control statements */
/* Selection statements */
TOK_IF,
TOK_ELSE,
TOK_SWITCH,
/* Iteration statements */
TOK_WHILE,
TOK_DO,
TOK_FOR,
TOK_GOTO,
TOK_IF,
TOK_RETURN,
TOK_SWITCH,
TOK_WHILE,
TOK_ASM,
/* Jump statements */
TOK_GOTO,
TOK_CONTINUE,
TOK_BREAK,
TOK_RETURN,
/* Labels */
TOK_CASE,
TOK_DEFAULT,
TOK_BREAK,
TOK_CONTINUE,
TOK_ELSE,
TOK_ELLIPSIS,
/* Misc. */
TOK_ATTRIBUTE,
TOK_PRAGMA,
TOK_STATIC_ASSERT,
TOK_ASM,
TOK_SIZEOF,
TOK_IDENT,
TOK_SEMI,
/* Primary operators */
TOK_LBRACK,
/* Punctuators */
TOK_FIRST_PUNC,
TOK_LBRACK = TOK_FIRST_PUNC,
TOK_RBRACK,
TOK_LPAREN,
TOK_RPAREN,
TOK_LCURLY,
TOK_RCURLY,
TOK_DOT,
TOK_PTR_REF,
TOK_LCURLY,
TOK_RBRACK,
TOK_COMP,
TOK_INC,
TOK_PLUS_ASSIGN,
TOK_PLUS,
TOK_COMMA,
TOK_DEC,
TOK_MINUS_ASSIGN,
TOK_RCURLY,
TOK_MINUS,
TOK_MUL_ASSIGN,
TOK_ADDR,
TOK_AND = TOK_ADDR, /* Alias */
TOK_STAR,
TOK_MUL = TOK_STAR, /* Alias */
TOK_DIV_ASSIGN,
TOK_DIV,
TOK_BOOL_AND,
TOK_AND_ASSIGN,
TOK_AND,
TOK_NE,
TOK_PLUS,
TOK_MINUS,
TOK_COMP,
TOK_BOOL_NOT,
TOK_BOOL_OR,
TOK_OR_ASSIGN,
TOK_OR,
TOK_EQ,
TOK_ASSIGN,
/* Inequalities */
TOK_LE,
TOK_LT,
TOK_GE,
TOK_GT,
TOK_SHL_ASSIGN,
TOK_SHL,
TOK_SHR_ASSIGN,
TOK_SHR,
TOK_XOR_ASSIGN,
TOK_XOR,
TOK_MOD_ASSIGN,
TOK_DIV,
TOK_MOD,
TOK_SHL,
TOK_SHR,
TOK_LT,
TOK_GT,
TOK_LE,
TOK_GE,
TOK_EQ,
TOK_NE,
TOK_XOR,
TOK_OR,
TOK_BOOL_AND,
TOK_BOOL_OR,
TOK_QUEST,
TOK_COLON,
TOK_RPAREN,
TOK_SCONST,
TOK_SEMI,
TOK_ELLIPSIS,
TOK_ASSIGN,
TOK_MUL_ASSIGN,
TOK_DIV_ASSIGN,
TOK_MOD_ASSIGN,
TOK_PLUS_ASSIGN,
TOK_MINUS_ASSIGN,
TOK_SHL_ASSIGN,
TOK_SHR_ASSIGN,
TOK_AND_ASSIGN,
TOK_XOR_ASSIGN,
TOK_OR_ASSIGN,
TOK_COMMA,
TOK_HASH,
TOK_HASH_HASH,
TOK_DOUBLE_HASH = TOK_HASH_HASH, /* Alias */
TOK_LAST_PUNC = TOK_DOUBLE_HASH,
/* Primary expressions */
TOK_ICONST,
TOK_CCONST,
TOK_WCCONST,
TOK_FCONST,
TOK_SCONST,
TOK_WCSCONST,
TOK_ATTRIBUTE,
TOK_STATIC_ASSERT,
TOK_FAR,
TOK_NEAR,
TOK_IDENT,
TOK_A,
TOK_X,
TOK_Y,
TOK_AX,
TOK_EAX,
TOK_PRAGMA
TOK_EAX
} token_t;
@ -210,7 +221,7 @@ struct Token {
extern Token CurTok; /* The current token */
extern Token NextTok; /* The next token */
extern int NextLineDisabled; /* Disabled to read next line */
extern int PPParserRunning; /* Is tokenizer used by the preprocessor */
@ -220,6 +231,17 @@ extern int NextLineDisabled; /* Disabled to read next line */
#if defined(HAVE_INLINE)
INLINE int TokIsPunc (const Token* T)
/* Return true if the token is a punctuator */
{
return (T->Tok >= TOK_FIRST_PUNC && T->Tok <= TOK_LAST_PUNC);
}
#else
# define TokIsPunc(T) \
((T)->Tok >= TOK_FIRST_PUNC && (T)->Tok <= TOK_LAST_PUNC)
#endif
#if defined(HAVE_INLINE)
INLINE int TokIsStorageClass (const Token* T)
/* Return true if the token is a storage class specifier */
@ -263,6 +285,14 @@ void SymName (char* S);
int IsSym (char* S);
/* If a symbol follows, read it and return 1, otherwise return 0 */
int IsPPNumber (int Cur, int Next);
/* Return 1 if the two successive characters indicate a pp-number, otherwise
** return 0.
*/
void CopyPPNumber (StrBuf* Target);
/* Copy a pp-number from the input to Target */
void NextToken (void);
/* Get next token from input stream */

13
test/val/pr1833.c Normal file
View File

@ -0,0 +1,13 @@
/* Test for PR #1833 fixes */
#define char 1
#if char && !int && L'A' - L'B' == 'A' - 'B' && L'A' == 'A'
#else
#error
#endif
int main(void)
{
return 0;
}