Implement support for digraphs.

Specifically, the following six punctuator tokens are now supported:

<: :> <% %> %: %:%:

These behave the same as the existing tokens [, ], {, }, #, and ## (respectively), apart from their spelling.

This can be useful when the full ASCII character set cannot easily be displayed or input (e.g. on the IIgs text screen with certain language settings).
This commit is contained in:
Stephen Heumann 2020-01-04 21:49:50 -06:00
parent 6f2eb301e5
commit 9036a98e1c
5 changed files with 93 additions and 13 deletions

View File

@ -191,7 +191,8 @@ type
charEnum = {character kinds}
(illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc,
ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string,
ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,letter,digit);
ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon,
letter,digit);
tokenSet = set of tokenEnum;
tokenClass = (reservedWord,reservedSymbol,identifier,intConstant,longConstant,
@ -202,7 +203,7 @@ type
numString: stringPtr; {chars in number (macros only)}
case class: tokenClass of {token info}
reservedWord : ();
reservedSymbol: ();
reservedSymbol: (isDigraph: boolean);
identifier : (name: stringPtr;
symbolPtr: identPtr);
intConstant : (ival: integer);

View File

@ -18,7 +18,7 @@ uses CCommon, MM, Scanner, Symbol, CGI;
{$segment 'SCANNER'}
const
symFileVersion = 4; {version number of .sym file format}
symFileVersion = 5; {version number of .sym file format}
var
inhibitHeader: boolean; {should .sym includes be blocked?}
@ -717,6 +717,9 @@ procedure EndInclude {chPtr: ptr};
WriteByte(ord(token.ispstring));
end;
macroParameter: WriteWord(token.pnum);
reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch,
rbrackch,poundch,poundpoundop] then
WriteByte(ord(token.isDigraph));
otherwise: ;
end; {case}
end; {WriteToken}
@ -1321,6 +1324,9 @@ var
token.ispstring := ReadByte <> 0;
end;
macroParameter: token.pnum := ReadWord;
reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch,
rbrackch,poundch,poundpoundop] then
token.isDigraph := boolean(ReadByte);
otherwise: ;
end; {case}
end; {ReadToken}

View File

@ -115,7 +115,8 @@ cch equ 13
enum (illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc),0
enum (ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string)
enum (ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,letter,digit)
enum (ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon)
enum (letter,digit)
tsc create stack frame
sec

View File

@ -736,8 +736,7 @@ case token.kind of
unsignedsy,voidsy,volatilesy,whilesy:
write(reservedWords[token.kind]);
tildech,questionch,lparench,rparench,lbrackch,rbrackch,lbracech,
rbracech,commach,semicolonch,colonch,poundch:
tildech,questionch,lparench,rparench,commach,semicolonch,colonch:
begin
for i := minChar to maxChar do
if charSym[i] = token.kind then begin
@ -746,6 +745,31 @@ case token.kind of
end; {if}
end;
lbrackch: if not token.isDigraph then
write('[')
else
write('<:');
rbrackch: if not token.isDigraph then
write(']')
else
write(':>');
lbracech: if not token.isDigraph then
write('{')
else
write('<%');
rbracech: if not token.isDigraph then
write('}')
else
write('%>');
poundch: if not token.isDigraph then
write('#')
else
write('%:');
minusch: write('-');
plusch: write('+');
@ -2580,7 +2604,6 @@ reportEOL := true;
tSkipping := skipping; {don't skip the directive name!}
skipping := false;
nextLineNumber := -1;
NextCh; {skip the '#' char}
while charKinds[ord(ch)] = ch_white do {skip white space}
NextCh;
if ch in ['a','d','e','i','l','p','u','w'] then begin
@ -3671,7 +3694,7 @@ procedure NextToken;
{ Read the next token from the file. }
label 1,2,3,4;
label 1,2,3,4,5;
type
three = (s100,s1000,s4000); {these declarations are used for a}
@ -3701,6 +3724,7 @@ var
tPtr: tokenListRecordPtr; {for removing tokens from putback buffer}
tToken: tokenType; {for merging tokens}
sPtr,tsPtr: gstringPtr; {for forming string constants}
lLastWasReturn: boolean; {local copy of lastWasReturn}
function EscapeCh: integer;
@ -3899,11 +3923,13 @@ if tokenList <> nil then begin {get a token put back by a macro}
end; {if}
goto 2;
end; {if}
{skip white space}
5: {skip white space}
while charKinds[ord(ch)] in [illegal,ch_white,ch_eol] do begin
if charKinds[ord(ch)] = illegal then begin
if (ch = '#') and (lastWasReturn or (token.kind = eolsy)) then
if (ch = '#') and (lastWasReturn or (token.kind = eolsy)) then begin
NextCh; {skip the '#' char}
PreProcess {call the preprocessor}
end {if}
else begin
tokenLine := lineNumber; {record a # token}
tokenColumn := ord(ord4(chPtr)-ord4(firstPtr));
@ -3941,6 +3967,7 @@ case charKinds[ord(ch)] of
ch_special : begin
token.kind := charSym[ord(ch)];
token.isDigraph := false;
NextCh;
end;
@ -3949,6 +3976,7 @@ case charKinds[ord(ch)] of
ch_pound : begin {tokens that start with '#'}
NextCh;
token.isDigraph := false;
if ch = '#' then begin
token.kind := poundpoundop;
NextCh;
@ -4004,6 +4032,16 @@ case charKinds[ord(ch)] of
token.kind := lteqop;
NextCh;
end
else if ch = ':' then begin
token.kind := lbrackch; { <: digraph }
token.isDigraph := true;
NextCh;
end
else if ch = '%' then begin
token.kind := lbracech; { <% digraph }
token.isDigraph := true;
NextCh;
end
else
token.kind := ltch;
end;
@ -4075,12 +4113,34 @@ case charKinds[ord(ch)] of
token.kind := barch;
end;
ch_percent: begin {tokens that start with '%'}
ch_percent: begin {tokens that start with '%'}
lLastWasReturn := lastWasReturn or (token.kind = eolsy);
NextCh;
if ch = '=' then begin
token.kind := percenteqop;
NextCh;
end
else if ch = '>' then begin
token.kind := rbracech; {%> digraph}
token.isDigraph := true;
NextCh;
end
else if ch = ':' then begin
NextCh;
token.isDigraph := true;
if (ch = '%') and (chPtr <> eofPtr) and (chr(chPtr^) = ':') then begin
token.kind := poundpoundop; {%:%: digraph}
NextCh;
NextCh;
end
else begin
token.kind := poundch; {%: digraph}
if lLastWasReturn then begin
PreProcess;
goto 5;
end;
end;
end
else
token.kind := percentch;
end;
@ -4124,6 +4184,17 @@ case charKinds[ord(ch)] of
end; {else}
end;
ch_colon : begin {tokens that start with ':'}
NextCh;
if ch = '>' then begin
token.kind := rbrackch; {:> digraph}
token.isDigraph := true;
NextCh;
end
else
token.kind := colonch;
end;
ch_char : CharConstant; {character constants}
ch_string: begin {string constants}

View File

@ -18,7 +18,8 @@ root start dummy (.root) segment
charKinds start character set
enum (illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc),0
enum (ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string)
enum (ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,letter,digit)
enum (ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon)
enum (letter,digit)
! STANDARD
dc i'ch_eof' nul
@ -79,7 +80,7 @@ charKinds start character set
dc i'digit' 7
dc i'digit' 8
dc i'digit' 9
dc i'ch_special' :
dc i'ch_colon' :
dc i'ch_special' ;
dc i'ch_lt' <
dc i'ch_eq' =