Support "other character" preprocessing tokens.

This implements the catch-all category for preprocessing tokens for "each non-white-space character that cannot be one of the above" (C17 section 6.4). These may appear in skipped code, or in macros or macro parameters if they are never expanded or are stringized during macro processing. The affected characters are $, @, `, and many extended characters.

It is still an error if these tokens are used in contexts where they remain present after preprocessing. If #pragma ignore bit 0 is clear, these characters are also reported as errors in skipped code or preprocessor constructs.
This commit is contained in:
Stephen Heumann 2022-11-08 18:47:03 -06:00
parent d96a5f86f9
commit 9cc72c8845
5 changed files with 86 additions and 57 deletions

View File

@ -196,6 +196,7 @@ type
barbarop,pluseqop,minuseqop,asteriskeqop,slasheqop,
percenteqop,ltlteqop,gtgteqop,andeqop,caroteqop,
bareqop,poundpoundop,dotdotdotsy,
otherch, {other non-whitespace char (pp-token)}
eolsy,eofsy, {control characters}
typedef, {user types}
uminus,uand,uasterisk, {converted operations}
@ -209,14 +210,15 @@ type
(illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc,
ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string,
ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon,
ch_backslash,letter,digit);
ch_backslash,ch_other,letter,digit);
{prefixes of a character/string literal}
charStrPrefixEnum = (prefix_none,prefix_L,prefix_u16,prefix_U32,prefix_u8);
tokenSet = set of tokenEnum;
tokenClass = (reservedWord,reservedSymbol,identifier,intConstant,longConstant,
longlongConstant,realConstant,stringConstant,macroParameter);
longlongConstant,realConstant,stringConstant,otherCharacter,
macroParameter);
identPtr = ^identRecord; {^ to a symbol table entry}
tokenType = record {a token}
kind: tokenEnum; {kind of token}
@ -233,6 +235,7 @@ type
stringConstant: (sval: longstringPtr;
ispstring: boolean;
prefix: charStrPrefixEnum);
otherCharacter: (ch: char); {used for preprocessing tokens only}
macroParameter: (pnum: integer);
end;

View File

@ -18,7 +18,7 @@ uses CCommon, MM, Scanner, Symbol, CGI;
{$segment 'HEADER'}
const
symFileVersion = 31; {version number of .sym file format}
symFileVersion = 32; {version number of .sym file format}
var
inhibitHeader: boolean; {should .sym includes be blocked?}
@ -721,6 +721,7 @@ procedure EndInclude {chPtr: ptr};
WriteByte(ord(token.ispstring));
WriteByte(ord(token.prefix));
end;
otherCharacter: WriteByte(ord(token.ch));
macroParameter: WriteWord(token.pnum);
reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch,
rbrackch,poundch,poundpoundop] then
@ -1360,6 +1361,7 @@ var
token.ispstring := ReadByte <> 0;
token.prefix := charStrPrefixEnum(ReadByte);
end;
otherCharacter: token.ch := chr(ReadByte);
macroParameter: token.pnum := ReadWord;
reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch,
rbrackch,poundch,poundpoundop] then

View File

@ -465,7 +465,7 @@ cch equ 13
enum (illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc),0
enum (ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string)
enum (ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon)
enum (ch_backslash,letter,digit)
enum (ch_backslash,ch_other,letter,digit)
! begin {NextCh}
tsc create stack frame

View File

@ -77,6 +77,7 @@ var
macros: ^macroTable; {preprocessor macro list}
pathList: pathRecordPtr; {additional search paths}
printMacroExpansions: boolean; {print the token list?}
preprocessing: boolean; {doing pp directive or macro params?}
suppressMacroExpansions: boolean; {suppress printing even if requested?}
reportEOL: boolean; {report eolsy as a token?}
token: tokenType; {next token to process}
@ -1059,6 +1060,8 @@ case token.kind of
write('%:%:');
dotdotdotsy: write('...');
otherch: write(token.ch);
macroParm: write('$', token.pnum:1);
@ -1838,6 +1841,7 @@ var
i: integer; {loop counter}
inhibit: boolean; {inhibit parameter expansion?}
lexpandMacros: boolean; {local copy of expandMacros}
lPreprocessing: boolean; {local copy of preprocessing}
lSuppressMacroExpansions: boolean; {local copy of suppressMacroExpansions}
mPtr: macroRecordPtr; {for checking list of macros}
newParm: parameterPtr; {for building a new parameter entry}
@ -1861,6 +1865,8 @@ parms := nil; {no parms so far}
if macro^.parameters >= 0 then begin {find the values of the parameters}
NextToken; {get the '(' (we hope...)}
if token.kind = lparench then begin
lPreprocessing := preprocessing;
preprocessing := true;
NextToken; {skip the '('}
paramCount := 0; {process the parameters}
parmEnd := nil;
@ -1912,6 +1918,7 @@ if macro^.parameters >= 0 then begin {find the values of the parameters}
PutBackToken(token, true);
Error(12);
end; {if}
preprocessing := lPreprocessing;
end {if}
else begin
Error(13);
@ -3294,6 +3301,7 @@ var
begin {PreProcess}
preprocessing := true;
lSuppressMacroExpansions := suppressMacroExpansions; {inhibit token printing}
suppressMacroExpansions := true;
lReportEOL := reportEOL; {we need to see eol's}
@ -3693,6 +3701,7 @@ expandMacros := true;
reportEOL := lReportEOL; {restore flags}
suppressMacroExpansions := lSuppressMacroExpansions;
skipping := tskipping;
preprocessing := false;
if nextLineNumber >= 0 then
lineNumber := nextLineNumber;
end; {PreProcess}
@ -4447,6 +4456,7 @@ customDefaultName := nil; {no custom default name}
pragmaKeepFile := nil; {no #pragma keep file so far}
doingFakeFile := false; {not doing a fake file}
doingDigitSequence := false; {not expecting a digit sequence}
preprocessing := false; {not preprocessing}
{error codes for lint messages}
{if changed, also change maxLint}
@ -5681,9 +5691,19 @@ case charKinds[ord(ch)] of
CheckIdentifier;
end;
digit : {numeric constants}
digit : {numeric constants}
DoNumber(false);
ch_other: begin {other non-whitespace char (pp-token)}
token.kind := otherch;
token.class := otherCharacter;
token.ch := ch;
NextCh;
if skipping or preprocessing then
if not skipIllegalTokens then
Error(1);
end;
otherwise: Error(57);
end; {case}
tokenEnd := currentChPtr; {record the end of the token}
@ -5728,6 +5748,10 @@ if doingPPExpression then begin
end; {if}
if printMacroExpansions and not suppressMacroExpansions then
PrintToken(token); {print the token stream}
if token.kind = otherch then
if not (skipping or preprocessing or suppressMacroExpansions)
or doingPPExpression then
Error(1);
end; {NextToken}

104
Table.asm
View File

@ -19,7 +19,7 @@ charKinds start character set
enum (illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc),0
enum (ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string)
enum (ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon)
enum (ch_backslash,letter,digit)
enum (ch_backslash,ch_other,letter,digit)
! STANDARD
dc i'ch_eof' nul
@ -58,7 +58,7 @@ charKinds start character set
dc i'ch_exc' !
dc i'ch_string' "
dc i'ch_pound' #
dc i'illegal' $
dc i'ch_other' $
dc i'ch_percent' %
dc i'ch_and' &
dc i'ch_char' '
@ -86,7 +86,7 @@ charKinds start character set
dc i'ch_eq' =
dc i'ch_gt' >
dc i'ch_special' ?
dc i'illegal' @
dc i'ch_other' @
dc i'letter' A
dc i'letter' B
dc i'letter' C
@ -118,7 +118,7 @@ charKinds start character set
dc i'ch_special' ]
dc i'ch_carot' ^
dc i'letter' _
dc i'illegal' `
dc i'ch_other' `
dc i'letter' a
dc i'letter' b
dc i'letter' c
@ -183,24 +183,24 @@ charKinds start character set
dc i'letter' gs
dc i'letter' rs
dc i'letter' us
dc i'illegal' space
dc i'illegal' !
dc i'illegal' "
dc i'illegal' #
dc i'illegal' $
dc i'illegal' %
dc i'illegal' &
dc i'ch_other' space
dc i'ch_other' !
dc i'ch_other' "
dc i'ch_other' #
dc i'ch_other' $
dc i'ch_other' %
dc i'ch_other' &
dc i'letter' '
dc i'illegal' (
dc i'illegal' )
dc i'illegal' *
dc i'illegal' +
dc i'illegal' ,
dc i'ch_other' (
dc i'ch_other' )
dc i'ch_other' *
dc i'ch_other' +
dc i'ch_other' ,
dc i'ch_special' -
dc i'letter' .
dc i'letter' /
dc i'illegal' 0
dc i'illegal' 1
dc i'ch_other' 0
dc i'ch_other' 1
dc i'ch_special' 2
dc i'ch_special' 3
dc i'letter' 4
@ -209,49 +209,49 @@ charKinds start character set
dc i'letter' 7
dc i'letter' 8
dc i'letter' 9
dc i'illegal' :
dc i'ch_other' :
dc i'letter' ;
dc i'letter' <
dc i'letter' =
dc i'letter' >
dc i'letter' ?
dc i'illegal' @
dc i'illegal' A
dc i'illegal' B
dc i'illegal' C
dc i'ch_other' @
dc i'ch_other' A
dc i'ch_other' B
dc i'ch_other' C
dc i'letter' D
dc i'illegal' E
dc i'ch_other' E
dc i'letter' F
dc i'ch_special' G
dc i'ch_special' H
dc i'illegal' I
dc i'ch_other' I
dc i'ch_white' J
dc i'letter' K
dc i'letter' L
dc i'letter' M
dc i'letter' N
dc i'letter' O
dc i'illegal' P
dc i'illegal' Q
dc i'illegal' R
dc i'illegal' S
dc i'illegal' T
dc i'illegal' U
dc i'ch_other' P
dc i'ch_other' Q
dc i'ch_other' R
dc i'ch_other' S
dc i'ch_other' T
dc i'ch_other' U
dc i'ch_special' V
dc i'illegal' W
dc i'ch_other' W
dc i'letter' X
dc i'letter' Y
dc i'illegal' Z
dc i'illegal' [
dc i'illegal' \
dc i'illegal' ]
dc i'ch_other' Z
dc i'ch_other' [
dc i'ch_other' \
dc i'ch_other' ]
dc i'letter' ^
dc i'letter' _
dc i'illegal' `
dc i'illegal' a
dc i'illegal' b
dc i'illegal' c
dc i'illegal' d
dc i'ch_other' `
dc i'ch_other' a
dc i'ch_other' b
dc i'ch_other' c
dc i'ch_other' d
dc i'letter' e
dc i'letter' f
dc i'letter' g
@ -263,22 +263,22 @@ charKinds start character set
dc i'letter' m
dc i'letter' n
dc i'letter' o
dc i'illegal' p
dc i'ch_other' p
dc i'letter' q
dc i'letter' r
dc i'letter' s
dc i'letter' t
dc i'letter' u
dc i'illegal' v
dc i'illegal' w
dc i'illegal' x
dc i'illegal' y
dc i'illegal' z
dc i'illegal' {
dc i'illegal' |
dc i'illegal' }
dc i'illegal' ~
dc i'illegal' rub
dc i'ch_other' v
dc i'ch_other' w
dc i'ch_other' x
dc i'ch_other' y
dc i'ch_other' z
dc i'ch_other' {
dc i'ch_other' |
dc i'ch_other' }
dc i'ch_other' ~
dc i'ch_other' rub
end
charSym start single character symbols