From 8278f7865a84213f998c616ed9032a7a12e965fa Mon Sep 17 00:00:00 2001 From: Stephen Heumann Date: Tue, 23 Apr 2024 21:39:14 -0500 Subject: [PATCH] Support unconvertible preprocessing numbers. These are tokens that follow the syntax for a preprocessing number, but not for an integer or floating constant after preprocessing. They are now allowed within the preprocessing phases of the compiler. They are not legal after preprocessing, but they may be used as operands of the # and ## preprocessor operators to produce legal tokens. --- CCommon.pas | 4 +- Header.pas | 4 +- Scanner.pas | 156 +++++++++++++++++++++++------------ Table.asm | 4 + Tests/Conformance/DOIT3 | 1 + Tests/Conformance/c99ppnum.c | 36 ++++++++ cc.notes | 4 + 7 files changed, 156 insertions(+), 53 deletions(-) create mode 100644 Tests/Conformance/c99ppnum.c diff --git a/CCommon.pas b/CCommon.pas index 91b7d50..bb86fd1 100644 --- a/CCommon.pas +++ b/CCommon.pas @@ -202,6 +202,7 @@ type barbarop,pluseqop,minuseqop,asteriskeqop,slasheqop, percenteqop,ltlteqop,gtgteqop,andeqop,caroteqop, bareqop,poundpoundop,dotdotdotsy, + ppnumber, {preprocessing number (pp-token)} otherch, {other non-whitespace char (pp-token)} eolsy,eofsy, {control characters} typedef, {user types} @@ -225,7 +226,7 @@ type tokenSet = set of tokenEnum; tokenClass = (reservedWord,reservedSymbol,identifier,intConstant,longConstant, longlongConstant,realConstant,stringConstant,otherCharacter, - macroParameter); + preprocessingNumber,macroParameter); identPtr = ^identRecord; {^ to a symbol table entry} tokenType = record {a token} kind: tokenEnum; {kind of token} @@ -243,6 +244,7 @@ type ispstring: boolean; prefix: charStrPrefixEnum); otherCharacter: (ch: char); {used for preprocessing tokens only} + preprocessingNumber: (errCode: integer); {used for pp tokens only} macroParameter: (pnum: integer); end; diff --git a/Header.pas b/Header.pas index 804f401..43c8921 100644 --- a/Header.pas +++ b/Header.pas @@ -18,7 +18,7 @@ uses CCommon, MM, Scanner, Symbol, CGI; {$segment 'HEADER'} const - symFileVersion = 42; {version number of .sym file format} + symFileVersion = 44; {version number of .sym file format} var inhibitHeader: boolean; {should .sym includes be blocked?} @@ -722,6 +722,7 @@ procedure EndInclude {chPtr: ptr}; WriteByte(ord(token.prefix)); end; otherCharacter: WriteByte(ord(token.ch)); + preprocessingNumber:WriteWord(token.errCode); macroParameter: WriteWord(token.pnum); reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch, rbrackch,poundch,poundpoundop] then @@ -1392,6 +1393,7 @@ var token.prefix := charStrPrefixEnum(ReadByte); end; otherCharacter: token.ch := chr(ReadByte); + preprocessingNumber: token.errCode := ReadWord; macroParameter: token.pnum := ReadWord; reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch, rbrackch,poundch,poundpoundop] then diff --git a/Scanner.pas b/Scanner.pas index d445449..369c85e 100644 --- a/Scanner.pas +++ b/Scanner.pas @@ -805,6 +805,7 @@ if list or (numErr <> 0) then begin 186: msg := @'lint: implicit conversion changes value of constant'; 187: msg := @'expression has incomplete struct or union type'; 188: msg := @'local variable used in asm statement is out of range for addressing mode'; + 189: msg := @'malformed numeric constant'; end; {case} if extraStr <> nil then begin extraStr^ := concat(msg^,extraStr^); @@ -1097,6 +1098,8 @@ case token.kind of dotdotdotsy: write('...'); otherch: write(token.ch); + + ppNumber: write(token.numString^); macroParm: write('$', token.pnum:1); @@ -1118,16 +1121,11 @@ procedure CheckIdentifier; forward; { See if an identifier is a reserved word, macro or typedef } -procedure DoNumber (scanWork: boolean); forward; +procedure DoNumber; forward; -{ The current character starts a number - scan it } -{ } -{ Parameters: } -{ scanWork - get characters from workString? } +{ Scan a number from workString } { } { Globals: } -{ ch - first character in sequence; set to first char } -{ after sequence } { workString - string to take numbers from } @@ -1374,6 +1372,12 @@ else if class1 in numericConstants then begin str2 := @reservedWords[kind2] else if kind2 = dotch then str2 := @'.' + else if (kind2 = plusch) + and (tk1.numString^[length(tk1.numString^)] in ['e','E','p','P']) then + str2 := @'+' + else if (kind2 = minusch) + and (tk1.numString^[length(tk1.numString^)] in ['e','E','p','P']) then + str2 := @'-' else begin Error(63); goto 1; @@ -1382,7 +1386,7 @@ else if class1 in numericConstants then begin lt := token; lsaveNumber := saveNumber; saveNumber := true; - DoNumber(true); + DoNumber; saveNumber := lsaveNumber; tk1 := token; token := lt; @@ -1390,14 +1394,15 @@ else if class1 in numericConstants then begin end {else if class1 in numericConstants} else if kind1 = dotch then begin - if class2 in numericConstants then begin - workString := concat(tk1.numString^, tk2.numString^); - lt := token; - DoNumber(true); - tk1 := token; - token := lt; - goto 1; - end; {if} + if class2 in numericConstants then + if charKinds[ord(tk2.numString^[1])] = digit then begin + workString := concat('.', tk2.numString^); + lt := token; + DoNumber; + tk1 := token; + token := lt; + goto 1; + end; {if} end {else if class1 in numericConstants} else if kind1 = poundch then begin @@ -2981,6 +2986,9 @@ var tk2^.token.sval^.str[i] then goto 3; end; + preprocessingNumber: + if tk1^.token.numString^ <> tk2^.token.numString^ then + goto 3; macroParameter: if tk1^.token.pnum <> tk2^.token.pnum then goto 3; @@ -3866,22 +3874,19 @@ Error(err); end; {Error2} -procedure DoNumber {scanWork: boolean}; +procedure DoNumber; -{ The current character starts a number - scan it } -{ } -{ Parameters: } -{ scanWork - get characters from workString? } +{ Scan a number from workString } { } { Globals: } -{ ch - first character in sequence; set to first char } -{ after sequence } { workString - string to take numbers from } label 1,2; var + atEnd: boolean; {at end of workString?} c2: char; {next character to process} + err: integer; {error code} i: integer; {loop index} isBin: boolean; {is the value a binary number?} isHex: boolean; {is the value a hex number?} @@ -3904,17 +3909,13 @@ var { Return the next character that is a part of the number } begin {NextChar} - if scanWork then begin - if ord(workString[0]) <> numIndex then begin - numIndex := numIndex+1; - c2 := workString[numIndex]; - end {if} - else - c2 := ' '; + if ord(workString[0]) <> numIndex then begin + numIndex := numIndex+1; + c2 := workString[numIndex]; end {if} else begin - NextCh; - c2 := ch; + atEnd := true; + c2 := ' '; end; {else} end; {NextChar} @@ -3926,8 +3927,10 @@ var { code never actually get converted to numeric constants. } begin {FlagError} - if not skipping then - Error(errCode); + if err = 0 then + err := errCode + else if err <> errCode then + err := 189; end; {FlagError} @@ -3974,6 +3977,7 @@ var begin {DoNumber} +atEnd := false; {not at end} isBin := false; {assume it's not binary} isHex := false; {assume it's not hex} isReal := false; {assume it's an integer} @@ -3981,13 +3985,10 @@ isLong := false; {assume a short integer} isLongLong := false; isFloat := false; unsigned := false; {assume signed numbers} +err := 0; {no error so far} stringIndex := 0; {no digits so far...} -if scanWork then begin {set up the scanner} - numIndex := 0; - NextChar; - end {if} -else - c2 := ch; +numIndex := 0; {set up the scanner} +NextChar; if c2 = '.' then begin {handle the case of no leading digits} stringIndex := 1; numString[1] := '0'; @@ -4229,14 +4230,18 @@ else begin {hex, octal, & binary} token.class := intConstant; end; {else} end; {else} -if saveNumber then begin - sp := pointer(GMalloc(length(numString)+1)); - CopyString(pointer(sp), @numString); +if not atEnd then {make sure we read all characters} + FlagError(189); +if err <> 0 then begin {handle unconvertible pp-numbers} + token.class := preprocessingNumber; + token.kind := ppnumber; + token.errCode := err; + end; {if} +if saveNumber or (err <> 0) then begin + sp := pointer(GMalloc(length(workString)+1)); + CopyString(pointer(sp), @workString); token.numString := sp; end; {if} -if scanWork then {make sure we read all characters} - if ord(workString[0]) <> numIndex then - Error(63); end; {DoNumber} @@ -4573,7 +4578,8 @@ lintErrors := spaceStr := ' '; {strings used in stringization} quoteStr := '"'; {set of classes for numeric constants} -numericConstants := [intConstant,longConstant,longlongConstant,realConstant]; +numericConstants := + [intConstant,longConstant,longlongConstant,realConstant,preprocessingNumber]; new(mp); {__LINE__} mp^.name := @'__LINE__'; @@ -4804,7 +4810,7 @@ repeat else if lch in ['.','0'..'9'] then begin token.name := GetWord; saveNumber := true; - DoNumber(true); + DoNumber; saveNumber := false; end {else if} else if lch = '"' then @@ -5328,6 +5334,44 @@ var end; {ConcatenateTokenString} + procedure Number; + + { Scan a preprocessing number token. It is converted to an } + { integer or floating constant if it matches the syntax for } + { one of those, or left as a preprocessing number if not. } + + var + numLen: 1..maxint; + lastCh: char; + + begin {Number} + numLen := 0; + lastCh := chr(0); + + while (charKinds[ord(ch)] in [digit,letter,ch_dot]) + or ((lastCh in ['e','E','p','P']) + and (charKinds[ord(ch)] in [ch_plus,ch_dash])) do + begin + if numLen < 255 then begin + numLen := numLen + 1; + workString[numLen] := ch; + end {if} + else + numLen := 256; + lastCh := ch; + NextCh; + end; {while} + if numLen = 256 then begin + if not skipping then + Error(131); + numLen := 1; + workString[1] := '0'; + end; {if} + workString[0] := chr(numLen); + DoNumber; + end; {Number} + + begin {NextToken} if ifList = nil then {do pending EndInclude calls} while includeCount <> 0 do begin @@ -5661,7 +5705,7 @@ case charKinds[ord(ch)] of ch_dot : begin {tokens that start with '.'} if charKinds[ord(PeekCh)] = digit then - DoNumber(false) + Number else begin NextCh; if (ch = '.') and (PeekCh = '.') then begin @@ -5874,7 +5918,7 @@ case charKinds[ord(ch)] of end; digit : {numeric constants} - DoNumber(false); + Number; ch_other: begin {other non-whitespace char (pp-token)} token.kind := otherch; @@ -5932,10 +5976,20 @@ if printMacroExpansions then if not suppressMacroExpansions then if not suppressPrint then PrintToken(token); {print the token stream} -if token.kind = otherch then +if token.kind = otherch then begin if not (skipping or preprocessing or suppressMacroExpansions) or doingPPExpression then Error(1); + end {if} +else if token.kind = ppNumber then + if not (skipping or preprocessing or suppressMacroExpansions) + or doingPPExpression then begin + Error(token.errCode); + token.kind := intconst; + token.class := intConstant; + token.ival := 0; + token.numString := @'0'; + end; {if} end; {NextToken} diff --git a/Table.asm b/Table.asm index 5ab0aba..e4aa1ca 100644 --- a/Table.asm +++ b/Table.asm @@ -309,6 +309,8 @@ charSym start single character symbols enum (barbarop,pluseqop,minuseqop,asteriskeqop,slasheqop) enum (percenteqop,ltlteqop,gtgteqop,andeqop,caroteqop) enum (bareqop,poundpoundop,dotdotdotsy) + enum (ppnumber) preprocessing number + enum (otherch) other non-whitespace char enum (eolsy,eofsy) control characters enum (typedef) user types ! converted operations @@ -466,6 +468,7 @@ icp start in-coming priority for expression dc i1'3' bareqop dc i1'200' poundpoundop dc i1'200' dotdotdotsy + dc i1'200' ppnumber dc i1'200' otherch dc i1'200' eolsy dc i1'200' eofsy @@ -644,6 +647,7 @@ isp start in stack priority for expression dc i1'2' bareqop dc i1'0' poundpoundop dc i1'0' dotdotdotsy + dc i1'0' ppnumber dc i1'0' otherch dc i1'0' eolsy dc i1'0' eofsy diff --git a/Tests/Conformance/DOIT3 b/Tests/Conformance/DOIT3 index 0d9021c..a5ce4f2 100644 --- a/Tests/Conformance/DOIT3 +++ b/Tests/Conformance/DOIT3 @@ -28,6 +28,7 @@ {1} c99desinit.c {1} c99printfa.c {1} c99strtold.c +{1} c99ppnum.c {1} c11generic.c {1} c11align.c {1} c11noret.c diff --git a/Tests/Conformance/c99ppnum.c b/Tests/Conformance/c99ppnum.c new file mode 100644 index 0000000..1770256 --- /dev/null +++ b/Tests/Conformance/c99ppnum.c @@ -0,0 +1,36 @@ +/* + * Test handling of preprocessing numbers. + * + * Most of this applies to C89, but hex float and long long are specific to + * C99 and later. + */ + +#include +#include + +#define COMBINE3(a,b,c) a##b##c +#define STRINGIZE(x) #x + +int main(void) { + if (COMBINE3(123,.,456) != 123.456) + goto Fail; + if (COMBINE3(1.,08,999999999999999999999999999999999) + != 1.08999999999999999999999999999999999) + goto Fail; + if (COMBINE3(0x,AB,09) != 0xAB09) + goto Fail; + if (strcmp(STRINGIZE(.1xyzp+), ".1xyzp+") != 0) + goto Fail; + if (strcmp(STRINGIZE(0xaBcD), "0xaBcD") != 0) + goto Fail; + if (strcmp(STRINGIZE(089ae-.), "089ae-.") != 0) + goto Fail; + if (sizeof(COMBINE3(123,L,L)) < sizeof(long long)) + goto Fail; + + printf ("Passed Conformance Test c99ppnum\n"); + return 0; + +Fail: + printf ("Failed Conformance Test c99ppnum\n"); +} diff --git a/cc.notes b/cc.notes index bab12d2..4fe0879 100644 --- a/cc.notes +++ b/cc.notes @@ -1624,6 +1624,10 @@ If you use #pragma debug 0x0010 to enable stack check debug code, the compiler w 17. Incorrect code could be generated in certain circumstances where a long long or unsigned long long member of a structure or array was accessed via a pointer. +18. The ORCA/C preprocessor now allows for preprocessing number tokens that do not match the syntax of an integer or floating constant (e.g. 08Ae-.x). If any such tokens remain after preprocessing, an error will still be reported. Note that this means that code like 0x3e+1 is now treated as a single token that is invalid if it remains after preprocessing, rather than as three tokens that form a valid expression; if you want it to be interpreted as three tokens, you must include whitespace before the +. + +19. When numeric tokens beginning with . were used as operands to the ## preprocessing operator, they behaved as if they started with a leading 0, which could lead to an incorrect result (e.g. 123##.456 became 1230.456). + -- Bugs from C 2.1.1 B3 that have been fixed in C 2.2.0 --------------------- 1. There were various bugs that could cause incorrect code to be generated in certain cases. Some of these were specific to certain optimization passes, alone or in combination.