Support unconvertible preprocessing numbers.

These are tokens that follow the syntax for a preprocessing number, but not for an integer or floating constant after preprocessing. They are now allowed within the preprocessing phases of the compiler. They are not legal after preprocessing, but they may be used as operands of the # and ## preprocessor operators to produce legal tokens.
This commit is contained in:
Stephen Heumann 2024-04-23 21:39:14 -05:00
parent 6b7414384f
commit 8278f7865a
7 changed files with 156 additions and 53 deletions

View File

@ -202,6 +202,7 @@ type
barbarop,pluseqop,minuseqop,asteriskeqop,slasheqop,
percenteqop,ltlteqop,gtgteqop,andeqop,caroteqop,
bareqop,poundpoundop,dotdotdotsy,
ppnumber, {preprocessing number (pp-token)}
otherch, {other non-whitespace char (pp-token)}
eolsy,eofsy, {control characters}
typedef, {user types}
@ -225,7 +226,7 @@ type
tokenSet = set of tokenEnum;
tokenClass = (reservedWord,reservedSymbol,identifier,intConstant,longConstant,
longlongConstant,realConstant,stringConstant,otherCharacter,
macroParameter);
preprocessingNumber,macroParameter);
identPtr = ^identRecord; {^ to a symbol table entry}
tokenType = record {a token}
kind: tokenEnum; {kind of token}
@ -243,6 +244,7 @@ type
ispstring: boolean;
prefix: charStrPrefixEnum);
otherCharacter: (ch: char); {used for preprocessing tokens only}
preprocessingNumber: (errCode: integer); {used for pp tokens only}
macroParameter: (pnum: integer);
end;

View File

@ -18,7 +18,7 @@ uses CCommon, MM, Scanner, Symbol, CGI;
{$segment 'HEADER'}
const
symFileVersion = 42; {version number of .sym file format}
symFileVersion = 44; {version number of .sym file format}
var
inhibitHeader: boolean; {should .sym includes be blocked?}
@ -722,6 +722,7 @@ procedure EndInclude {chPtr: ptr};
WriteByte(ord(token.prefix));
end;
otherCharacter: WriteByte(ord(token.ch));
preprocessingNumber:WriteWord(token.errCode);
macroParameter: WriteWord(token.pnum);
reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch,
rbrackch,poundch,poundpoundop] then
@ -1392,6 +1393,7 @@ var
token.prefix := charStrPrefixEnum(ReadByte);
end;
otherCharacter: token.ch := chr(ReadByte);
preprocessingNumber: token.errCode := ReadWord;
macroParameter: token.pnum := ReadWord;
reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch,
rbrackch,poundch,poundpoundop] then

View File

@ -805,6 +805,7 @@ if list or (numErr <> 0) then begin
186: msg := @'lint: implicit conversion changes value of constant';
187: msg := @'expression has incomplete struct or union type';
188: msg := @'local variable used in asm statement is out of range for addressing mode';
189: msg := @'malformed numeric constant';
end; {case}
if extraStr <> nil then begin
extraStr^ := concat(msg^,extraStr^);
@ -1097,6 +1098,8 @@ case token.kind of
dotdotdotsy: write('...');
otherch: write(token.ch);
ppNumber: write(token.numString^);
macroParm: write('$', token.pnum:1);
@ -1118,16 +1121,11 @@ procedure CheckIdentifier; forward;
{ See if an identifier is a reserved word, macro or typedef }
procedure DoNumber (scanWork: boolean); forward;
procedure DoNumber; forward;
{ The current character starts a number - scan it }
{ }
{ Parameters: }
{ scanWork - get characters from workString? }
{ Scan a number from workString }
{ }
{ Globals: }
{ ch - first character in sequence; set to first char }
{ after sequence }
{ workString - string to take numbers from }
@ -1374,6 +1372,12 @@ else if class1 in numericConstants then begin
str2 := @reservedWords[kind2]
else if kind2 = dotch then
str2 := @'.'
else if (kind2 = plusch)
and (tk1.numString^[length(tk1.numString^)] in ['e','E','p','P']) then
str2 := @'+'
else if (kind2 = minusch)
and (tk1.numString^[length(tk1.numString^)] in ['e','E','p','P']) then
str2 := @'-'
else begin
Error(63);
goto 1;
@ -1382,7 +1386,7 @@ else if class1 in numericConstants then begin
lt := token;
lsaveNumber := saveNumber;
saveNumber := true;
DoNumber(true);
DoNumber;
saveNumber := lsaveNumber;
tk1 := token;
token := lt;
@ -1390,14 +1394,15 @@ else if class1 in numericConstants then begin
end {else if class1 in numericConstants}
else if kind1 = dotch then begin
if class2 in numericConstants then begin
workString := concat(tk1.numString^, tk2.numString^);
lt := token;
DoNumber(true);
tk1 := token;
token := lt;
goto 1;
end; {if}
if class2 in numericConstants then
if charKinds[ord(tk2.numString^[1])] = digit then begin
workString := concat('.', tk2.numString^);
lt := token;
DoNumber;
tk1 := token;
token := lt;
goto 1;
end; {if}
end {else if class1 in numericConstants}
else if kind1 = poundch then begin
@ -2981,6 +2986,9 @@ var
tk2^.token.sval^.str[i] then
goto 3;
end;
preprocessingNumber:
if tk1^.token.numString^ <> tk2^.token.numString^ then
goto 3;
macroParameter:
if tk1^.token.pnum <> tk2^.token.pnum then
goto 3;
@ -3866,22 +3874,19 @@ Error(err);
end; {Error2}
procedure DoNumber {scanWork: boolean};
procedure DoNumber;
{ The current character starts a number - scan it }
{ }
{ Parameters: }
{ scanWork - get characters from workString? }
{ Scan a number from workString }
{ }
{ Globals: }
{ ch - first character in sequence; set to first char }
{ after sequence }
{ workString - string to take numbers from }
label 1,2;
var
atEnd: boolean; {at end of workString?}
c2: char; {next character to process}
err: integer; {error code}
i: integer; {loop index}
isBin: boolean; {is the value a binary number?}
isHex: boolean; {is the value a hex number?}
@ -3904,17 +3909,13 @@ var
{ Return the next character that is a part of the number }
begin {NextChar}
if scanWork then begin
if ord(workString[0]) <> numIndex then begin
numIndex := numIndex+1;
c2 := workString[numIndex];
end {if}
else
c2 := ' ';
if ord(workString[0]) <> numIndex then begin
numIndex := numIndex+1;
c2 := workString[numIndex];
end {if}
else begin
NextCh;
c2 := ch;
atEnd := true;
c2 := ' ';
end; {else}
end; {NextChar}
@ -3926,8 +3927,10 @@ var
{ code never actually get converted to numeric constants. }
begin {FlagError}
if not skipping then
Error(errCode);
if err = 0 then
err := errCode
else if err <> errCode then
err := 189;
end; {FlagError}
@ -3974,6 +3977,7 @@ var
begin {DoNumber}
atEnd := false; {not at end}
isBin := false; {assume it's not binary}
isHex := false; {assume it's not hex}
isReal := false; {assume it's an integer}
@ -3981,13 +3985,10 @@ isLong := false; {assume a short integer}
isLongLong := false;
isFloat := false;
unsigned := false; {assume signed numbers}
err := 0; {no error so far}
stringIndex := 0; {no digits so far...}
if scanWork then begin {set up the scanner}
numIndex := 0;
NextChar;
end {if}
else
c2 := ch;
numIndex := 0; {set up the scanner}
NextChar;
if c2 = '.' then begin {handle the case of no leading digits}
stringIndex := 1;
numString[1] := '0';
@ -4229,14 +4230,18 @@ else begin {hex, octal, & binary}
token.class := intConstant;
end; {else}
end; {else}
if saveNumber then begin
sp := pointer(GMalloc(length(numString)+1));
CopyString(pointer(sp), @numString);
if not atEnd then {make sure we read all characters}
FlagError(189);
if err <> 0 then begin {handle unconvertible pp-numbers}
token.class := preprocessingNumber;
token.kind := ppnumber;
token.errCode := err;
end; {if}
if saveNumber or (err <> 0) then begin
sp := pointer(GMalloc(length(workString)+1));
CopyString(pointer(sp), @workString);
token.numString := sp;
end; {if}
if scanWork then {make sure we read all characters}
if ord(workString[0]) <> numIndex then
Error(63);
end; {DoNumber}
@ -4573,7 +4578,8 @@ lintErrors :=
spaceStr := ' '; {strings used in stringization}
quoteStr := '"';
{set of classes for numeric constants}
numericConstants := [intConstant,longConstant,longlongConstant,realConstant];
numericConstants :=
[intConstant,longConstant,longlongConstant,realConstant,preprocessingNumber];
new(mp); {__LINE__}
mp^.name := @'__LINE__';
@ -4804,7 +4810,7 @@ repeat
else if lch in ['.','0'..'9'] then begin
token.name := GetWord;
saveNumber := true;
DoNumber(true);
DoNumber;
saveNumber := false;
end {else if}
else if lch = '"' then
@ -5328,6 +5334,44 @@ var
end; {ConcatenateTokenString}
procedure Number;
{ Scan a preprocessing number token. It is converted to an }
{ integer or floating constant if it matches the syntax for }
{ one of those, or left as a preprocessing number if not. }
var
numLen: 1..maxint;
lastCh: char;
begin {Number}
numLen := 0;
lastCh := chr(0);
while (charKinds[ord(ch)] in [digit,letter,ch_dot])
or ((lastCh in ['e','E','p','P'])
and (charKinds[ord(ch)] in [ch_plus,ch_dash])) do
begin
if numLen < 255 then begin
numLen := numLen + 1;
workString[numLen] := ch;
end {if}
else
numLen := 256;
lastCh := ch;
NextCh;
end; {while}
if numLen = 256 then begin
if not skipping then
Error(131);
numLen := 1;
workString[1] := '0';
end; {if}
workString[0] := chr(numLen);
DoNumber;
end; {Number}
begin {NextToken}
if ifList = nil then {do pending EndInclude calls}
while includeCount <> 0 do begin
@ -5661,7 +5705,7 @@ case charKinds[ord(ch)] of
ch_dot : begin {tokens that start with '.'}
if charKinds[ord(PeekCh)] = digit then
DoNumber(false)
Number
else begin
NextCh;
if (ch = '.') and (PeekCh = '.') then begin
@ -5874,7 +5918,7 @@ case charKinds[ord(ch)] of
end;
digit : {numeric constants}
DoNumber(false);
Number;
ch_other: begin {other non-whitespace char (pp-token)}
token.kind := otherch;
@ -5932,10 +5976,20 @@ if printMacroExpansions then
if not suppressMacroExpansions then
if not suppressPrint then
PrintToken(token); {print the token stream}
if token.kind = otherch then
if token.kind = otherch then begin
if not (skipping or preprocessing or suppressMacroExpansions)
or doingPPExpression then
Error(1);
end {if}
else if token.kind = ppNumber then
if not (skipping or preprocessing or suppressMacroExpansions)
or doingPPExpression then begin
Error(token.errCode);
token.kind := intconst;
token.class := intConstant;
token.ival := 0;
token.numString := @'0';
end; {if}
end; {NextToken}

View File

@ -309,6 +309,8 @@ charSym start single character symbols
enum (barbarop,pluseqop,minuseqop,asteriskeqop,slasheqop)
enum (percenteqop,ltlteqop,gtgteqop,andeqop,caroteqop)
enum (bareqop,poundpoundop,dotdotdotsy)
enum (ppnumber) preprocessing number
enum (otherch) other non-whitespace char
enum (eolsy,eofsy) control characters
enum (typedef) user types
! converted operations
@ -466,6 +468,7 @@ icp start in-coming priority for expression
dc i1'3' bareqop
dc i1'200' poundpoundop
dc i1'200' dotdotdotsy
dc i1'200' ppnumber
dc i1'200' otherch
dc i1'200' eolsy
dc i1'200' eofsy
@ -644,6 +647,7 @@ isp start in stack priority for expression
dc i1'2' bareqop
dc i1'0' poundpoundop
dc i1'0' dotdotdotsy
dc i1'0' ppnumber
dc i1'0' otherch
dc i1'0' eolsy
dc i1'0' eofsy

View File

@ -28,6 +28,7 @@
{1} c99desinit.c
{1} c99printfa.c
{1} c99strtold.c
{1} c99ppnum.c
{1} c11generic.c
{1} c11align.c
{1} c11noret.c

View File

@ -0,0 +1,36 @@
/*
* Test handling of preprocessing numbers.
*
* Most of this applies to C89, but hex float and long long are specific to
* C99 and later.
*/
#include <stdio.h>
#include <string.h>
#define COMBINE3(a,b,c) a##b##c
#define STRINGIZE(x) #x
int main(void) {
if (COMBINE3(123,.,456) != 123.456)
goto Fail;
if (COMBINE3(1.,08,999999999999999999999999999999999)
!= 1.08999999999999999999999999999999999)
goto Fail;
if (COMBINE3(0x,AB,09) != 0xAB09)
goto Fail;
if (strcmp(STRINGIZE(.1xyzp+), ".1xyzp+") != 0)
goto Fail;
if (strcmp(STRINGIZE(0xaBcD), "0xaBcD") != 0)
goto Fail;
if (strcmp(STRINGIZE(089ae-.), "089ae-.") != 0)
goto Fail;
if (sizeof(COMBINE3(123,L,L)) < sizeof(long long))
goto Fail;
printf ("Passed Conformance Test c99ppnum\n");
return 0;
Fail:
printf ("Failed Conformance Test c99ppnum\n");
}

View File

@ -1624,6 +1624,10 @@ If you use #pragma debug 0x0010 to enable stack check debug code, the compiler w
17. Incorrect code could be generated in certain circumstances where a long long or unsigned long long member of a structure or array was accessed via a pointer.
18. The ORCA/C preprocessor now allows for preprocessing number tokens that do not match the syntax of an integer or floating constant (e.g. 08Ae-.x). If any such tokens remain after preprocessing, an error will still be reported. Note that this means that code like 0x3e+1 is now treated as a single token that is invalid if it remains after preprocessing, rather than as three tokens that form a valid expression; if you want it to be interpreted as three tokens, you must include whitespace before the +.
19. When numeric tokens beginning with . were used as operands to the ## preprocessing operator, they behaved as if they started with a leading 0, which could lead to an incorrect result (e.g. 123##.456 became 1230.456).
-- Bugs from C 2.1.1 B3 that have been fixed in C 2.2.0 ---------------------
1. There were various bugs that could cause incorrect code to be generated in certain cases. Some of these were specific to certain optimization passes, alone or in combination.