From 9cc72c88452bf6b75a94ba28ddeb5ec2849a6667 Mon Sep 17 00:00:00 2001 From: Stephen Heumann Date: Tue, 8 Nov 2022 18:47:03 -0600 Subject: [PATCH] Support "other character" preprocessing tokens. This implements the catch-all category for preprocessing tokens for "each non-white-space character that cannot be one of the above" (C17 section 6.4). These may appear in skipped code, or in macros or macro parameters if they are never expanded or are stringized during macro processing. The affected characters are $, @, `, and many extended characters. It is still an error if these tokens are used in contexts where they remain present after preprocessing. If #pragma ignore bit 0 is clear, these characters are also reported as errors in skipped code or preprocessor constructs. --- CCommon.pas | 7 +++- Header.pas | 4 +- Scanner.asm | 2 +- Scanner.pas | 26 ++++++++++++- Table.asm | 104 ++++++++++++++++++++++++++-------------------------- 5 files changed, 86 insertions(+), 57 deletions(-) diff --git a/CCommon.pas b/CCommon.pas index b4faa57..a7ea74c 100644 --- a/CCommon.pas +++ b/CCommon.pas @@ -196,6 +196,7 @@ type barbarop,pluseqop,minuseqop,asteriskeqop,slasheqop, percenteqop,ltlteqop,gtgteqop,andeqop,caroteqop, bareqop,poundpoundop,dotdotdotsy, + otherch, {other non-whitespace char (pp-token)} eolsy,eofsy, {control characters} typedef, {user types} uminus,uand,uasterisk, {converted operations} @@ -209,14 +210,15 @@ type (illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc, ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string, ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon, - ch_backslash,letter,digit); + ch_backslash,ch_other,letter,digit); {prefixes of a character/string literal} charStrPrefixEnum = (prefix_none,prefix_L,prefix_u16,prefix_U32,prefix_u8); tokenSet = set of tokenEnum; tokenClass = (reservedWord,reservedSymbol,identifier,intConstant,longConstant, - longlongConstant,realConstant,stringConstant,macroParameter); + longlongConstant,realConstant,stringConstant,otherCharacter, + macroParameter); identPtr = ^identRecord; {^ to a symbol table entry} tokenType = record {a token} kind: tokenEnum; {kind of token} @@ -233,6 +235,7 @@ type stringConstant: (sval: longstringPtr; ispstring: boolean; prefix: charStrPrefixEnum); + otherCharacter: (ch: char); {used for preprocessing tokens only} macroParameter: (pnum: integer); end; diff --git a/Header.pas b/Header.pas index 1507c63..7b53efb 100644 --- a/Header.pas +++ b/Header.pas @@ -18,7 +18,7 @@ uses CCommon, MM, Scanner, Symbol, CGI; {$segment 'HEADER'} const - symFileVersion = 31; {version number of .sym file format} + symFileVersion = 32; {version number of .sym file format} var inhibitHeader: boolean; {should .sym includes be blocked?} @@ -721,6 +721,7 @@ procedure EndInclude {chPtr: ptr}; WriteByte(ord(token.ispstring)); WriteByte(ord(token.prefix)); end; + otherCharacter: WriteByte(ord(token.ch)); macroParameter: WriteWord(token.pnum); reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch, rbrackch,poundch,poundpoundop] then @@ -1360,6 +1361,7 @@ var token.ispstring := ReadByte <> 0; token.prefix := charStrPrefixEnum(ReadByte); end; + otherCharacter: token.ch := chr(ReadByte); macroParameter: token.pnum := ReadWord; reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch, rbrackch,poundch,poundpoundop] then diff --git a/Scanner.asm b/Scanner.asm index 87584f3..515779d 100644 --- a/Scanner.asm +++ b/Scanner.asm @@ -465,7 +465,7 @@ cch equ 13 enum (illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc),0 enum (ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string) enum (ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon) - enum (ch_backslash,letter,digit) + enum (ch_backslash,ch_other,letter,digit) ! begin {NextCh} tsc create stack frame diff --git a/Scanner.pas b/Scanner.pas index e43b637..1b765ef 100644 --- a/Scanner.pas +++ b/Scanner.pas @@ -77,6 +77,7 @@ var macros: ^macroTable; {preprocessor macro list} pathList: pathRecordPtr; {additional search paths} printMacroExpansions: boolean; {print the token list?} + preprocessing: boolean; {doing pp directive or macro params?} suppressMacroExpansions: boolean; {suppress printing even if requested?} reportEOL: boolean; {report eolsy as a token?} token: tokenType; {next token to process} @@ -1059,6 +1060,8 @@ case token.kind of write('%:%:'); dotdotdotsy: write('...'); + + otherch: write(token.ch); macroParm: write('$', token.pnum:1); @@ -1838,6 +1841,7 @@ var i: integer; {loop counter} inhibit: boolean; {inhibit parameter expansion?} lexpandMacros: boolean; {local copy of expandMacros} + lPreprocessing: boolean; {local copy of preprocessing} lSuppressMacroExpansions: boolean; {local copy of suppressMacroExpansions} mPtr: macroRecordPtr; {for checking list of macros} newParm: parameterPtr; {for building a new parameter entry} @@ -1861,6 +1865,8 @@ parms := nil; {no parms so far} if macro^.parameters >= 0 then begin {find the values of the parameters} NextToken; {get the '(' (we hope...)} if token.kind = lparench then begin + lPreprocessing := preprocessing; + preprocessing := true; NextToken; {skip the '('} paramCount := 0; {process the parameters} parmEnd := nil; @@ -1912,6 +1918,7 @@ if macro^.parameters >= 0 then begin {find the values of the parameters} PutBackToken(token, true); Error(12); end; {if} + preprocessing := lPreprocessing; end {if} else begin Error(13); @@ -3294,6 +3301,7 @@ var begin {PreProcess} +preprocessing := true; lSuppressMacroExpansions := suppressMacroExpansions; {inhibit token printing} suppressMacroExpansions := true; lReportEOL := reportEOL; {we need to see eol's} @@ -3693,6 +3701,7 @@ expandMacros := true; reportEOL := lReportEOL; {restore flags} suppressMacroExpansions := lSuppressMacroExpansions; skipping := tskipping; +preprocessing := false; if nextLineNumber >= 0 then lineNumber := nextLineNumber; end; {PreProcess} @@ -4447,6 +4456,7 @@ customDefaultName := nil; {no custom default name} pragmaKeepFile := nil; {no #pragma keep file so far} doingFakeFile := false; {not doing a fake file} doingDigitSequence := false; {not expecting a digit sequence} +preprocessing := false; {not preprocessing} {error codes for lint messages} {if changed, also change maxLint} @@ -5681,9 +5691,19 @@ case charKinds[ord(ch)] of CheckIdentifier; end; - digit : {numeric constants} + digit : {numeric constants} DoNumber(false); + ch_other: begin {other non-whitespace char (pp-token)} + token.kind := otherch; + token.class := otherCharacter; + token.ch := ch; + NextCh; + if skipping or preprocessing then + if not skipIllegalTokens then + Error(1); + end; + otherwise: Error(57); end; {case} tokenEnd := currentChPtr; {record the end of the token} @@ -5728,6 +5748,10 @@ if doingPPExpression then begin end; {if} if printMacroExpansions and not suppressMacroExpansions then PrintToken(token); {print the token stream} +if token.kind = otherch then + if not (skipping or preprocessing or suppressMacroExpansions) + or doingPPExpression then + Error(1); end; {NextToken} diff --git a/Table.asm b/Table.asm index c74e460..b5c2ee2 100644 --- a/Table.asm +++ b/Table.asm @@ -19,7 +19,7 @@ charKinds start character set enum (illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc),0 enum (ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string) enum (ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon) - enum (ch_backslash,letter,digit) + enum (ch_backslash,ch_other,letter,digit) ! STANDARD dc i'ch_eof' nul @@ -58,7 +58,7 @@ charKinds start character set dc i'ch_exc' ! dc i'ch_string' " dc i'ch_pound' # - dc i'illegal' $ + dc i'ch_other' $ dc i'ch_percent' % dc i'ch_and' & dc i'ch_char' ' @@ -86,7 +86,7 @@ charKinds start character set dc i'ch_eq' = dc i'ch_gt' > dc i'ch_special' ? - dc i'illegal' @ + dc i'ch_other' @ dc i'letter' A dc i'letter' B dc i'letter' C @@ -118,7 +118,7 @@ charKinds start character set dc i'ch_special' ] dc i'ch_carot' ^ dc i'letter' _ - dc i'illegal' ` + dc i'ch_other' ` dc i'letter' a dc i'letter' b dc i'letter' c @@ -183,24 +183,24 @@ charKinds start character set dc i'letter' gs dc i'letter' rs dc i'letter' us - dc i'illegal' space - dc i'illegal' ! - dc i'illegal' " - dc i'illegal' # - dc i'illegal' $ - dc i'illegal' % - dc i'illegal' & + dc i'ch_other' space + dc i'ch_other' ! + dc i'ch_other' " + dc i'ch_other' # + dc i'ch_other' $ + dc i'ch_other' % + dc i'ch_other' & dc i'letter' ' - dc i'illegal' ( - dc i'illegal' ) - dc i'illegal' * - dc i'illegal' + - dc i'illegal' , + dc i'ch_other' ( + dc i'ch_other' ) + dc i'ch_other' * + dc i'ch_other' + + dc i'ch_other' , dc i'ch_special' - dc i'letter' . dc i'letter' / - dc i'illegal' 0 - dc i'illegal' 1 + dc i'ch_other' 0 + dc i'ch_other' 1 dc i'ch_special' 2 dc i'ch_special' 3 dc i'letter' 4 @@ -209,49 +209,49 @@ charKinds start character set dc i'letter' 7 dc i'letter' 8 dc i'letter' 9 - dc i'illegal' : + dc i'ch_other' : dc i'letter' ; dc i'letter' < dc i'letter' = dc i'letter' > dc i'letter' ? - dc i'illegal' @ - dc i'illegal' A - dc i'illegal' B - dc i'illegal' C + dc i'ch_other' @ + dc i'ch_other' A + dc i'ch_other' B + dc i'ch_other' C dc i'letter' D - dc i'illegal' E + dc i'ch_other' E dc i'letter' F dc i'ch_special' G dc i'ch_special' H - dc i'illegal' I + dc i'ch_other' I dc i'ch_white' J dc i'letter' K dc i'letter' L dc i'letter' M dc i'letter' N dc i'letter' O - dc i'illegal' P - dc i'illegal' Q - dc i'illegal' R - dc i'illegal' S - dc i'illegal' T - dc i'illegal' U + dc i'ch_other' P + dc i'ch_other' Q + dc i'ch_other' R + dc i'ch_other' S + dc i'ch_other' T + dc i'ch_other' U dc i'ch_special' V - dc i'illegal' W + dc i'ch_other' W dc i'letter' X dc i'letter' Y - dc i'illegal' Z - dc i'illegal' [ - dc i'illegal' \ - dc i'illegal' ] + dc i'ch_other' Z + dc i'ch_other' [ + dc i'ch_other' \ + dc i'ch_other' ] dc i'letter' ^ dc i'letter' _ - dc i'illegal' ` - dc i'illegal' a - dc i'illegal' b - dc i'illegal' c - dc i'illegal' d + dc i'ch_other' ` + dc i'ch_other' a + dc i'ch_other' b + dc i'ch_other' c + dc i'ch_other' d dc i'letter' e dc i'letter' f dc i'letter' g @@ -263,22 +263,22 @@ charKinds start character set dc i'letter' m dc i'letter' n dc i'letter' o - dc i'illegal' p + dc i'ch_other' p dc i'letter' q dc i'letter' r dc i'letter' s dc i'letter' t dc i'letter' u - dc i'illegal' v - dc i'illegal' w - dc i'illegal' x - dc i'illegal' y - dc i'illegal' z - dc i'illegal' { - dc i'illegal' | - dc i'illegal' } - dc i'illegal' ~ - dc i'illegal' rub + dc i'ch_other' v + dc i'ch_other' w + dc i'ch_other' x + dc i'ch_other' y + dc i'ch_other' z + dc i'ch_other' { + dc i'ch_other' | + dc i'ch_other' } + dc i'ch_other' ~ + dc i'ch_other' rub end charSym start single character symbols