Support unconvertible preprocessing numbers.

These are tokens that follow the syntax for a preprocessing number, but not for an integer or floating constant after preprocessing. They are now allowed within the preprocessing phases of the compiler. They are not legal after preprocessing, but they may be used as operands of the # and ## preprocessor operators to produce legal tokens.
This commit is contained in:
Stephen Heumann 2024-04-23 21:39:14 -05:00
parent 6b7414384f
commit 8278f7865a
7 changed files with 156 additions and 53 deletions

View File

@ -202,6 +202,7 @@ type
barbarop,pluseqop,minuseqop,asteriskeqop,slasheqop, barbarop,pluseqop,minuseqop,asteriskeqop,slasheqop,
percenteqop,ltlteqop,gtgteqop,andeqop,caroteqop, percenteqop,ltlteqop,gtgteqop,andeqop,caroteqop,
bareqop,poundpoundop,dotdotdotsy, bareqop,poundpoundop,dotdotdotsy,
ppnumber, {preprocessing number (pp-token)}
otherch, {other non-whitespace char (pp-token)} otherch, {other non-whitespace char (pp-token)}
eolsy,eofsy, {control characters} eolsy,eofsy, {control characters}
typedef, {user types} typedef, {user types}
@ -225,7 +226,7 @@ type
tokenSet = set of tokenEnum; tokenSet = set of tokenEnum;
tokenClass = (reservedWord,reservedSymbol,identifier,intConstant,longConstant, tokenClass = (reservedWord,reservedSymbol,identifier,intConstant,longConstant,
longlongConstant,realConstant,stringConstant,otherCharacter, longlongConstant,realConstant,stringConstant,otherCharacter,
macroParameter); preprocessingNumber,macroParameter);
identPtr = ^identRecord; {^ to a symbol table entry} identPtr = ^identRecord; {^ to a symbol table entry}
tokenType = record {a token} tokenType = record {a token}
kind: tokenEnum; {kind of token} kind: tokenEnum; {kind of token}
@ -243,6 +244,7 @@ type
ispstring: boolean; ispstring: boolean;
prefix: charStrPrefixEnum); prefix: charStrPrefixEnum);
otherCharacter: (ch: char); {used for preprocessing tokens only} otherCharacter: (ch: char); {used for preprocessing tokens only}
preprocessingNumber: (errCode: integer); {used for pp tokens only}
macroParameter: (pnum: integer); macroParameter: (pnum: integer);
end; end;

View File

@ -18,7 +18,7 @@ uses CCommon, MM, Scanner, Symbol, CGI;
{$segment 'HEADER'} {$segment 'HEADER'}
const const
symFileVersion = 42; {version number of .sym file format} symFileVersion = 44; {version number of .sym file format}
var var
inhibitHeader: boolean; {should .sym includes be blocked?} inhibitHeader: boolean; {should .sym includes be blocked?}
@ -722,6 +722,7 @@ procedure EndInclude {chPtr: ptr};
WriteByte(ord(token.prefix)); WriteByte(ord(token.prefix));
end; end;
otherCharacter: WriteByte(ord(token.ch)); otherCharacter: WriteByte(ord(token.ch));
preprocessingNumber:WriteWord(token.errCode);
macroParameter: WriteWord(token.pnum); macroParameter: WriteWord(token.pnum);
reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch, reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch,
rbrackch,poundch,poundpoundop] then rbrackch,poundch,poundpoundop] then
@ -1392,6 +1393,7 @@ var
token.prefix := charStrPrefixEnum(ReadByte); token.prefix := charStrPrefixEnum(ReadByte);
end; end;
otherCharacter: token.ch := chr(ReadByte); otherCharacter: token.ch := chr(ReadByte);
preprocessingNumber: token.errCode := ReadWord;
macroParameter: token.pnum := ReadWord; macroParameter: token.pnum := ReadWord;
reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch, reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch,
rbrackch,poundch,poundpoundop] then rbrackch,poundch,poundpoundop] then

View File

@ -805,6 +805,7 @@ if list or (numErr <> 0) then begin
186: msg := @'lint: implicit conversion changes value of constant'; 186: msg := @'lint: implicit conversion changes value of constant';
187: msg := @'expression has incomplete struct or union type'; 187: msg := @'expression has incomplete struct or union type';
188: msg := @'local variable used in asm statement is out of range for addressing mode'; 188: msg := @'local variable used in asm statement is out of range for addressing mode';
189: msg := @'malformed numeric constant';
end; {case} end; {case}
if extraStr <> nil then begin if extraStr <> nil then begin
extraStr^ := concat(msg^,extraStr^); extraStr^ := concat(msg^,extraStr^);
@ -1097,6 +1098,8 @@ case token.kind of
dotdotdotsy: write('...'); dotdotdotsy: write('...');
otherch: write(token.ch); otherch: write(token.ch);
ppNumber: write(token.numString^);
macroParm: write('$', token.pnum:1); macroParm: write('$', token.pnum:1);
@ -1118,16 +1121,11 @@ procedure CheckIdentifier; forward;
{ See if an identifier is a reserved word, macro or typedef } { See if an identifier is a reserved word, macro or typedef }
procedure DoNumber (scanWork: boolean); forward; procedure DoNumber; forward;
{ The current character starts a number - scan it } { Scan a number from workString }
{ }
{ Parameters: }
{ scanWork - get characters from workString? }
{ } { }
{ Globals: } { Globals: }
{ ch - first character in sequence; set to first char }
{ after sequence }
{ workString - string to take numbers from } { workString - string to take numbers from }
@ -1374,6 +1372,12 @@ else if class1 in numericConstants then begin
str2 := @reservedWords[kind2] str2 := @reservedWords[kind2]
else if kind2 = dotch then else if kind2 = dotch then
str2 := @'.' str2 := @'.'
else if (kind2 = plusch)
and (tk1.numString^[length(tk1.numString^)] in ['e','E','p','P']) then
str2 := @'+'
else if (kind2 = minusch)
and (tk1.numString^[length(tk1.numString^)] in ['e','E','p','P']) then
str2 := @'-'
else begin else begin
Error(63); Error(63);
goto 1; goto 1;
@ -1382,7 +1386,7 @@ else if class1 in numericConstants then begin
lt := token; lt := token;
lsaveNumber := saveNumber; lsaveNumber := saveNumber;
saveNumber := true; saveNumber := true;
DoNumber(true); DoNumber;
saveNumber := lsaveNumber; saveNumber := lsaveNumber;
tk1 := token; tk1 := token;
token := lt; token := lt;
@ -1390,14 +1394,15 @@ else if class1 in numericConstants then begin
end {else if class1 in numericConstants} end {else if class1 in numericConstants}
else if kind1 = dotch then begin else if kind1 = dotch then begin
if class2 in numericConstants then begin if class2 in numericConstants then
workString := concat(tk1.numString^, tk2.numString^); if charKinds[ord(tk2.numString^[1])] = digit then begin
lt := token; workString := concat('.', tk2.numString^);
DoNumber(true); lt := token;
tk1 := token; DoNumber;
token := lt; tk1 := token;
goto 1; token := lt;
end; {if} goto 1;
end; {if}
end {else if class1 in numericConstants} end {else if class1 in numericConstants}
else if kind1 = poundch then begin else if kind1 = poundch then begin
@ -2981,6 +2986,9 @@ var
tk2^.token.sval^.str[i] then tk2^.token.sval^.str[i] then
goto 3; goto 3;
end; end;
preprocessingNumber:
if tk1^.token.numString^ <> tk2^.token.numString^ then
goto 3;
macroParameter: macroParameter:
if tk1^.token.pnum <> tk2^.token.pnum then if tk1^.token.pnum <> tk2^.token.pnum then
goto 3; goto 3;
@ -3866,22 +3874,19 @@ Error(err);
end; {Error2} end; {Error2}
procedure DoNumber {scanWork: boolean}; procedure DoNumber;
{ The current character starts a number - scan it } { Scan a number from workString }
{ }
{ Parameters: }
{ scanWork - get characters from workString? }
{ } { }
{ Globals: } { Globals: }
{ ch - first character in sequence; set to first char }
{ after sequence }
{ workString - string to take numbers from } { workString - string to take numbers from }
label 1,2; label 1,2;
var var
atEnd: boolean; {at end of workString?}
c2: char; {next character to process} c2: char; {next character to process}
err: integer; {error code}
i: integer; {loop index} i: integer; {loop index}
isBin: boolean; {is the value a binary number?} isBin: boolean; {is the value a binary number?}
isHex: boolean; {is the value a hex number?} isHex: boolean; {is the value a hex number?}
@ -3904,17 +3909,13 @@ var
{ Return the next character that is a part of the number } { Return the next character that is a part of the number }
begin {NextChar} begin {NextChar}
if scanWork then begin if ord(workString[0]) <> numIndex then begin
if ord(workString[0]) <> numIndex then begin numIndex := numIndex+1;
numIndex := numIndex+1; c2 := workString[numIndex];
c2 := workString[numIndex];
end {if}
else
c2 := ' ';
end {if} end {if}
else begin else begin
NextCh; atEnd := true;
c2 := ch; c2 := ' ';
end; {else} end; {else}
end; {NextChar} end; {NextChar}
@ -3926,8 +3927,10 @@ var
{ code never actually get converted to numeric constants. } { code never actually get converted to numeric constants. }
begin {FlagError} begin {FlagError}
if not skipping then if err = 0 then
Error(errCode); err := errCode
else if err <> errCode then
err := 189;
end; {FlagError} end; {FlagError}
@ -3974,6 +3977,7 @@ var
begin {DoNumber} begin {DoNumber}
atEnd := false; {not at end}
isBin := false; {assume it's not binary} isBin := false; {assume it's not binary}
isHex := false; {assume it's not hex} isHex := false; {assume it's not hex}
isReal := false; {assume it's an integer} isReal := false; {assume it's an integer}
@ -3981,13 +3985,10 @@ isLong := false; {assume a short integer}
isLongLong := false; isLongLong := false;
isFloat := false; isFloat := false;
unsigned := false; {assume signed numbers} unsigned := false; {assume signed numbers}
err := 0; {no error so far}
stringIndex := 0; {no digits so far...} stringIndex := 0; {no digits so far...}
if scanWork then begin {set up the scanner} numIndex := 0; {set up the scanner}
numIndex := 0; NextChar;
NextChar;
end {if}
else
c2 := ch;
if c2 = '.' then begin {handle the case of no leading digits} if c2 = '.' then begin {handle the case of no leading digits}
stringIndex := 1; stringIndex := 1;
numString[1] := '0'; numString[1] := '0';
@ -4229,14 +4230,18 @@ else begin {hex, octal, & binary}
token.class := intConstant; token.class := intConstant;
end; {else} end; {else}
end; {else} end; {else}
if saveNumber then begin if not atEnd then {make sure we read all characters}
sp := pointer(GMalloc(length(numString)+1)); FlagError(189);
CopyString(pointer(sp), @numString); if err <> 0 then begin {handle unconvertible pp-numbers}
token.class := preprocessingNumber;
token.kind := ppnumber;
token.errCode := err;
end; {if}
if saveNumber or (err <> 0) then begin
sp := pointer(GMalloc(length(workString)+1));
CopyString(pointer(sp), @workString);
token.numString := sp; token.numString := sp;
end; {if} end; {if}
if scanWork then {make sure we read all characters}
if ord(workString[0]) <> numIndex then
Error(63);
end; {DoNumber} end; {DoNumber}
@ -4573,7 +4578,8 @@ lintErrors :=
spaceStr := ' '; {strings used in stringization} spaceStr := ' '; {strings used in stringization}
quoteStr := '"'; quoteStr := '"';
{set of classes for numeric constants} {set of classes for numeric constants}
numericConstants := [intConstant,longConstant,longlongConstant,realConstant]; numericConstants :=
[intConstant,longConstant,longlongConstant,realConstant,preprocessingNumber];
new(mp); {__LINE__} new(mp); {__LINE__}
mp^.name := @'__LINE__'; mp^.name := @'__LINE__';
@ -4804,7 +4810,7 @@ repeat
else if lch in ['.','0'..'9'] then begin else if lch in ['.','0'..'9'] then begin
token.name := GetWord; token.name := GetWord;
saveNumber := true; saveNumber := true;
DoNumber(true); DoNumber;
saveNumber := false; saveNumber := false;
end {else if} end {else if}
else if lch = '"' then else if lch = '"' then
@ -5328,6 +5334,44 @@ var
end; {ConcatenateTokenString} end; {ConcatenateTokenString}
procedure Number;
{ Scan a preprocessing number token. It is converted to an }
{ integer or floating constant if it matches the syntax for }
{ one of those, or left as a preprocessing number if not. }
var
numLen: 1..maxint;
lastCh: char;
begin {Number}
numLen := 0;
lastCh := chr(0);
while (charKinds[ord(ch)] in [digit,letter,ch_dot])
or ((lastCh in ['e','E','p','P'])
and (charKinds[ord(ch)] in [ch_plus,ch_dash])) do
begin
if numLen < 255 then begin
numLen := numLen + 1;
workString[numLen] := ch;
end {if}
else
numLen := 256;
lastCh := ch;
NextCh;
end; {while}
if numLen = 256 then begin
if not skipping then
Error(131);
numLen := 1;
workString[1] := '0';
end; {if}
workString[0] := chr(numLen);
DoNumber;
end; {Number}
begin {NextToken} begin {NextToken}
if ifList = nil then {do pending EndInclude calls} if ifList = nil then {do pending EndInclude calls}
while includeCount <> 0 do begin while includeCount <> 0 do begin
@ -5661,7 +5705,7 @@ case charKinds[ord(ch)] of
ch_dot : begin {tokens that start with '.'} ch_dot : begin {tokens that start with '.'}
if charKinds[ord(PeekCh)] = digit then if charKinds[ord(PeekCh)] = digit then
DoNumber(false) Number
else begin else begin
NextCh; NextCh;
if (ch = '.') and (PeekCh = '.') then begin if (ch = '.') and (PeekCh = '.') then begin
@ -5874,7 +5918,7 @@ case charKinds[ord(ch)] of
end; end;
digit : {numeric constants} digit : {numeric constants}
DoNumber(false); Number;
ch_other: begin {other non-whitespace char (pp-token)} ch_other: begin {other non-whitespace char (pp-token)}
token.kind := otherch; token.kind := otherch;
@ -5932,10 +5976,20 @@ if printMacroExpansions then
if not suppressMacroExpansions then if not suppressMacroExpansions then
if not suppressPrint then if not suppressPrint then
PrintToken(token); {print the token stream} PrintToken(token); {print the token stream}
if token.kind = otherch then if token.kind = otherch then begin
if not (skipping or preprocessing or suppressMacroExpansions) if not (skipping or preprocessing or suppressMacroExpansions)
or doingPPExpression then or doingPPExpression then
Error(1); Error(1);
end {if}
else if token.kind = ppNumber then
if not (skipping or preprocessing or suppressMacroExpansions)
or doingPPExpression then begin
Error(token.errCode);
token.kind := intconst;
token.class := intConstant;
token.ival := 0;
token.numString := @'0';
end; {if}
end; {NextToken} end; {NextToken}

View File

@ -309,6 +309,8 @@ charSym start single character symbols
enum (barbarop,pluseqop,minuseqop,asteriskeqop,slasheqop) enum (barbarop,pluseqop,minuseqop,asteriskeqop,slasheqop)
enum (percenteqop,ltlteqop,gtgteqop,andeqop,caroteqop) enum (percenteqop,ltlteqop,gtgteqop,andeqop,caroteqop)
enum (bareqop,poundpoundop,dotdotdotsy) enum (bareqop,poundpoundop,dotdotdotsy)
enum (ppnumber) preprocessing number
enum (otherch) other non-whitespace char
enum (eolsy,eofsy) control characters enum (eolsy,eofsy) control characters
enum (typedef) user types enum (typedef) user types
! converted operations ! converted operations
@ -466,6 +468,7 @@ icp start in-coming priority for expression
dc i1'3' bareqop dc i1'3' bareqop
dc i1'200' poundpoundop dc i1'200' poundpoundop
dc i1'200' dotdotdotsy dc i1'200' dotdotdotsy
dc i1'200' ppnumber
dc i1'200' otherch dc i1'200' otherch
dc i1'200' eolsy dc i1'200' eolsy
dc i1'200' eofsy dc i1'200' eofsy
@ -644,6 +647,7 @@ isp start in stack priority for expression
dc i1'2' bareqop dc i1'2' bareqop
dc i1'0' poundpoundop dc i1'0' poundpoundop
dc i1'0' dotdotdotsy dc i1'0' dotdotdotsy
dc i1'0' ppnumber
dc i1'0' otherch dc i1'0' otherch
dc i1'0' eolsy dc i1'0' eolsy
dc i1'0' eofsy dc i1'0' eofsy

View File

@ -28,6 +28,7 @@
{1} c99desinit.c {1} c99desinit.c
{1} c99printfa.c {1} c99printfa.c
{1} c99strtold.c {1} c99strtold.c
{1} c99ppnum.c
{1} c11generic.c {1} c11generic.c
{1} c11align.c {1} c11align.c
{1} c11noret.c {1} c11noret.c

View File

@ -0,0 +1,36 @@
/*
* Test handling of preprocessing numbers.
*
* Most of this applies to C89, but hex float and long long are specific to
* C99 and later.
*/
#include <stdio.h>
#include <string.h>
#define COMBINE3(a,b,c) a##b##c
#define STRINGIZE(x) #x
int main(void) {
if (COMBINE3(123,.,456) != 123.456)
goto Fail;
if (COMBINE3(1.,08,999999999999999999999999999999999)
!= 1.08999999999999999999999999999999999)
goto Fail;
if (COMBINE3(0x,AB,09) != 0xAB09)
goto Fail;
if (strcmp(STRINGIZE(.1xyzp+), ".1xyzp+") != 0)
goto Fail;
if (strcmp(STRINGIZE(0xaBcD), "0xaBcD") != 0)
goto Fail;
if (strcmp(STRINGIZE(089ae-.), "089ae-.") != 0)
goto Fail;
if (sizeof(COMBINE3(123,L,L)) < sizeof(long long))
goto Fail;
printf ("Passed Conformance Test c99ppnum\n");
return 0;
Fail:
printf ("Failed Conformance Test c99ppnum\n");
}

View File

@ -1624,6 +1624,10 @@ If you use #pragma debug 0x0010 to enable stack check debug code, the compiler w
17. Incorrect code could be generated in certain circumstances where a long long or unsigned long long member of a structure or array was accessed via a pointer. 17. Incorrect code could be generated in certain circumstances where a long long or unsigned long long member of a structure or array was accessed via a pointer.
18. The ORCA/C preprocessor now allows for preprocessing number tokens that do not match the syntax of an integer or floating constant (e.g. 08Ae-.x). If any such tokens remain after preprocessing, an error will still be reported. Note that this means that code like 0x3e+1 is now treated as a single token that is invalid if it remains after preprocessing, rather than as three tokens that form a valid expression; if you want it to be interpreted as three tokens, you must include whitespace before the +.
19. When numeric tokens beginning with . were used as operands to the ## preprocessing operator, they behaved as if they started with a leading 0, which could lead to an incorrect result (e.g. 123##.456 became 1230.456).
-- Bugs from C 2.1.1 B3 that have been fixed in C 2.2.0 --------------------- -- Bugs from C 2.1.1 B3 that have been fixed in C 2.2.0 ---------------------
1. There were various bugs that could cause incorrect code to be generated in certain cases. Some of these were specific to certain optimization passes, alone or in combination. 1. There were various bugs that could cause incorrect code to be generated in certain cases. Some of these were specific to certain optimization passes, alone or in combination.