Implement support for digraphs.

Specifically, the following six punctuator tokens are now supported:

<: :> <% %> %: %:%:

These behave the same as the existing tokens [, ], {, }, #, and ## (respectively), apart from their spelling.

This can be useful when the full ASCII character set cannot easily be displayed or input (e.g. on the IIgs text screen with certain language settings).
This commit is contained in:
Stephen Heumann 2020-01-04 21:49:50 -06:00
parent 6f2eb301e5
commit 9036a98e1c
5 changed files with 93 additions and 13 deletions

View File

@ -191,7 +191,8 @@ type
charEnum = {character kinds} charEnum = {character kinds}
(illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc, (illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc,
ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string, ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string,
ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,letter,digit); ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon,
letter,digit);
tokenSet = set of tokenEnum; tokenSet = set of tokenEnum;
tokenClass = (reservedWord,reservedSymbol,identifier,intConstant,longConstant, tokenClass = (reservedWord,reservedSymbol,identifier,intConstant,longConstant,
@ -202,7 +203,7 @@ type
numString: stringPtr; {chars in number (macros only)} numString: stringPtr; {chars in number (macros only)}
case class: tokenClass of {token info} case class: tokenClass of {token info}
reservedWord : (); reservedWord : ();
reservedSymbol: (); reservedSymbol: (isDigraph: boolean);
identifier : (name: stringPtr; identifier : (name: stringPtr;
symbolPtr: identPtr); symbolPtr: identPtr);
intConstant : (ival: integer); intConstant : (ival: integer);

View File

@ -18,7 +18,7 @@ uses CCommon, MM, Scanner, Symbol, CGI;
{$segment 'SCANNER'} {$segment 'SCANNER'}
const const
symFileVersion = 4; {version number of .sym file format} symFileVersion = 5; {version number of .sym file format}
var var
inhibitHeader: boolean; {should .sym includes be blocked?} inhibitHeader: boolean; {should .sym includes be blocked?}
@ -717,6 +717,9 @@ procedure EndInclude {chPtr: ptr};
WriteByte(ord(token.ispstring)); WriteByte(ord(token.ispstring));
end; end;
macroParameter: WriteWord(token.pnum); macroParameter: WriteWord(token.pnum);
reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch,
rbrackch,poundch,poundpoundop] then
WriteByte(ord(token.isDigraph));
otherwise: ; otherwise: ;
end; {case} end; {case}
end; {WriteToken} end; {WriteToken}
@ -1321,6 +1324,9 @@ var
token.ispstring := ReadByte <> 0; token.ispstring := ReadByte <> 0;
end; end;
macroParameter: token.pnum := ReadWord; macroParameter: token.pnum := ReadWord;
reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch,
rbrackch,poundch,poundpoundop] then
token.isDigraph := boolean(ReadByte);
otherwise: ; otherwise: ;
end; {case} end; {case}
end; {ReadToken} end; {ReadToken}

View File

@ -115,7 +115,8 @@ cch equ 13
enum (illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc),0 enum (illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc),0
enum (ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string) enum (ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string)
enum (ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,letter,digit) enum (ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon)
enum (letter,digit)
tsc create stack frame tsc create stack frame
sec sec

View File

@ -736,8 +736,7 @@ case token.kind of
unsignedsy,voidsy,volatilesy,whilesy: unsignedsy,voidsy,volatilesy,whilesy:
write(reservedWords[token.kind]); write(reservedWords[token.kind]);
tildech,questionch,lparench,rparench,lbrackch,rbrackch,lbracech, tildech,questionch,lparench,rparench,commach,semicolonch,colonch:
rbracech,commach,semicolonch,colonch,poundch:
begin begin
for i := minChar to maxChar do for i := minChar to maxChar do
if charSym[i] = token.kind then begin if charSym[i] = token.kind then begin
@ -746,6 +745,31 @@ case token.kind of
end; {if} end; {if}
end; end;
lbrackch: if not token.isDigraph then
write('[')
else
write('<:');
rbrackch: if not token.isDigraph then
write(']')
else
write(':>');
lbracech: if not token.isDigraph then
write('{')
else
write('<%');
rbracech: if not token.isDigraph then
write('}')
else
write('%>');
poundch: if not token.isDigraph then
write('#')
else
write('%:');
minusch: write('-'); minusch: write('-');
plusch: write('+'); plusch: write('+');
@ -2580,7 +2604,6 @@ reportEOL := true;
tSkipping := skipping; {don't skip the directive name!} tSkipping := skipping; {don't skip the directive name!}
skipping := false; skipping := false;
nextLineNumber := -1; nextLineNumber := -1;
NextCh; {skip the '#' char}
while charKinds[ord(ch)] = ch_white do {skip white space} while charKinds[ord(ch)] = ch_white do {skip white space}
NextCh; NextCh;
if ch in ['a','d','e','i','l','p','u','w'] then begin if ch in ['a','d','e','i','l','p','u','w'] then begin
@ -3671,7 +3694,7 @@ procedure NextToken;
{ Read the next token from the file. } { Read the next token from the file. }
label 1,2,3,4; label 1,2,3,4,5;
type type
three = (s100,s1000,s4000); {these declarations are used for a} three = (s100,s1000,s4000); {these declarations are used for a}
@ -3701,6 +3724,7 @@ var
tPtr: tokenListRecordPtr; {for removing tokens from putback buffer} tPtr: tokenListRecordPtr; {for removing tokens from putback buffer}
tToken: tokenType; {for merging tokens} tToken: tokenType; {for merging tokens}
sPtr,tsPtr: gstringPtr; {for forming string constants} sPtr,tsPtr: gstringPtr; {for forming string constants}
lLastWasReturn: boolean; {local copy of lastWasReturn}
function EscapeCh: integer; function EscapeCh: integer;
@ -3899,11 +3923,13 @@ if tokenList <> nil then begin {get a token put back by a macro}
end; {if} end; {if}
goto 2; goto 2;
end; {if} end; {if}
{skip white space} 5: {skip white space}
while charKinds[ord(ch)] in [illegal,ch_white,ch_eol] do begin while charKinds[ord(ch)] in [illegal,ch_white,ch_eol] do begin
if charKinds[ord(ch)] = illegal then begin if charKinds[ord(ch)] = illegal then begin
if (ch = '#') and (lastWasReturn or (token.kind = eolsy)) then if (ch = '#') and (lastWasReturn or (token.kind = eolsy)) then begin
NextCh; {skip the '#' char}
PreProcess {call the preprocessor} PreProcess {call the preprocessor}
end {if}
else begin else begin
tokenLine := lineNumber; {record a # token} tokenLine := lineNumber; {record a # token}
tokenColumn := ord(ord4(chPtr)-ord4(firstPtr)); tokenColumn := ord(ord4(chPtr)-ord4(firstPtr));
@ -3941,6 +3967,7 @@ case charKinds[ord(ch)] of
ch_special : begin ch_special : begin
token.kind := charSym[ord(ch)]; token.kind := charSym[ord(ch)];
token.isDigraph := false;
NextCh; NextCh;
end; end;
@ -3949,6 +3976,7 @@ case charKinds[ord(ch)] of
ch_pound : begin {tokens that start with '#'} ch_pound : begin {tokens that start with '#'}
NextCh; NextCh;
token.isDigraph := false;
if ch = '#' then begin if ch = '#' then begin
token.kind := poundpoundop; token.kind := poundpoundop;
NextCh; NextCh;
@ -4004,6 +4032,16 @@ case charKinds[ord(ch)] of
token.kind := lteqop; token.kind := lteqop;
NextCh; NextCh;
end end
else if ch = ':' then begin
token.kind := lbrackch; { <: digraph }
token.isDigraph := true;
NextCh;
end
else if ch = '%' then begin
token.kind := lbracech; { <% digraph }
token.isDigraph := true;
NextCh;
end
else else
token.kind := ltch; token.kind := ltch;
end; end;
@ -4075,12 +4113,34 @@ case charKinds[ord(ch)] of
token.kind := barch; token.kind := barch;
end; end;
ch_percent: begin {tokens that start with '%'} ch_percent: begin {tokens that start with '%'}
lLastWasReturn := lastWasReturn or (token.kind = eolsy);
NextCh; NextCh;
if ch = '=' then begin if ch = '=' then begin
token.kind := percenteqop; token.kind := percenteqop;
NextCh; NextCh;
end end
else if ch = '>' then begin
token.kind := rbracech; {%> digraph}
token.isDigraph := true;
NextCh;
end
else if ch = ':' then begin
NextCh;
token.isDigraph := true;
if (ch = '%') and (chPtr <> eofPtr) and (chr(chPtr^) = ':') then begin
token.kind := poundpoundop; {%:%: digraph}
NextCh;
NextCh;
end
else begin
token.kind := poundch; {%: digraph}
if lLastWasReturn then begin
PreProcess;
goto 5;
end;
end;
end
else else
token.kind := percentch; token.kind := percentch;
end; end;
@ -4124,6 +4184,17 @@ case charKinds[ord(ch)] of
end; {else} end; {else}
end; end;
ch_colon : begin {tokens that start with ':'}
NextCh;
if ch = '>' then begin
token.kind := rbrackch; {:> digraph}
token.isDigraph := true;
NextCh;
end
else
token.kind := colonch;
end;
ch_char : CharConstant; {character constants} ch_char : CharConstant; {character constants}
ch_string: begin {string constants} ch_string: begin {string constants}

View File

@ -18,7 +18,8 @@ root start dummy (.root) segment
charKinds start character set charKinds start character set
enum (illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc),0 enum (illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc),0
enum (ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string) enum (ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string)
enum (ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,letter,digit) enum (ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon)
enum (letter,digit)
! STANDARD ! STANDARD
dc i'ch_eof' nul dc i'ch_eof' nul
@ -79,7 +80,7 @@ charKinds start character set
dc i'digit' 7 dc i'digit' 7
dc i'digit' 8 dc i'digit' 8
dc i'digit' 9 dc i'digit' 9
dc i'ch_special' : dc i'ch_colon' :
dc i'ch_special' ; dc i'ch_special' ;
dc i'ch_lt' < dc i'ch_lt' <
dc i'ch_eq' = dc i'ch_eq' =