Implement support for digraphs.

Specifically, the following six punctuator tokens are now supported: <: :> <% %> %: %:%: These behave the same as the existing tokens [, ], {, }, #, and ## (respectively), apart from their spelling. This can be useful when the full ASCII character set cannot easily be displayed or input (e.g. on the IIgs text screen with certain language settings).
2024-06-07 19:29:29 +00:00 · 2020-01-04 21:49:50 -06:00 · 2020-01-04 21:49:50 -06:00 · 9036a98e1c
commit 9036a98e1c
parent 6f2eb301e5
5 changed files with 93 additions and 13 deletions
--- a/CCommon.pas
+++ b/CCommon.pas
@ -191,7 +191,8 @@ type
   charEnum =                           {character kinds}
      (illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc,
       ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string,
-       ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,letter,digit);
+       ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon,
       letter,digit);
   tokenSet = set of tokenEnum;
   tokenClass = (reservedWord,reservedSymbol,identifier,intConstant,longConstant,
@ -202,7 +203,7 @@ type
      numString: stringPtr;             {chars in number (macros only)}
      case class: tokenClass of         {token info}
         reservedWord  : ();
-         reservedSymbol: ();
+         reservedSymbol: (isDigraph: boolean);
         identifier    : (name: stringPtr;
                          symbolPtr: identPtr);
         intConstant   : (ival: integer);
--- a/Header.pas
+++ b/Header.pas
@ -18,7 +18,7 @@ uses CCommon, MM, Scanner, Symbol, CGI;
 {$segment 'SCANNER'}
 const
-   symFileVersion = 4;                  {version number of .sym file format}
+   symFileVersion = 5;                  {version number of .sym file format}
 var
   inhibitHeader: boolean;		{should .sym includes be blocked?}
@ -717,6 +717,9 @@ procedure EndInclude {chPtr: ptr};
                		WriteByte(ord(token.ispstring));
                		end;
            macroParameter:	WriteWord(token.pnum);
            reservedSymbol:	if token.kind in [lbracech,rbracech,lbrackch,
                                   rbrackch,poundch,poundpoundop] then 
                                   WriteByte(ord(token.isDigraph));
 	    otherwise:	;
 	    end; {case}
 	 end; {WriteToken}
@ -1321,6 +1324,9 @@ var
                	        token.ispstring := ReadByte <> 0;
                	        end;
         macroParameter:	token.pnum := ReadWord;
         reservedSymbol:	if token.kind in [lbracech,rbracech,lbrackch,
                                   rbrackch,poundch,poundpoundop] then 
                                   token.isDigraph := boolean(ReadByte);
 	 otherwise:		;
 	 end; {case}
      end; {ReadToken}
--- a/Scanner.asm
+++ b/Scanner.asm
@ -115,7 +115,8 @@ cch      equ   13
         enum  (illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc),0
         enum  (ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string)
-         enum  (ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,letter,digit)
+         enum  (ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon)
         enum  (letter,digit)
         tsc                            create stack frame
         sec
--- a/Scanner.pas
+++ b/Scanner.pas
@ -736,8 +736,7 @@ case token.kind of
   unsignedsy,voidsy,volatilesy,whilesy:
                     write(reservedWords[token.kind]);
-   tildech,questionch,lparench,rparench,lbrackch,rbrackch,lbracech,
+   tildech,questionch,lparench,rparench,commach,semicolonch,colonch:
   rbracech,commach,semicolonch,colonch,poundch:
                     begin
                     for i := minChar to maxChar do
                        if charSym[i] = token.kind then begin
@ -746,6 +745,31 @@ case token.kind of
                           end; {if}
                     end;
   lbrackch:         if not token.isDigraph then
                        write('[')
                     else
                        write('<:');
   rbrackch:         if not token.isDigraph then
                        write(']')
                     else
                        write(':>');
   lbracech:         if not token.isDigraph then
                        write('{')
                     else
                        write('<%');
   rbracech:         if not token.isDigraph then
                        write('}')
                     else
                        write('%>');
   poundch:          if not token.isDigraph then
                        write('#')
                     else
                        write('%:');
   minusch:          write('-');
   plusch:           write('+');
@ -2580,7 +2604,6 @@ reportEOL := true;
 tSkipping := skipping;                  {don't skip the directive name!}
 skipping := false;
 nextLineNumber := -1;
 NextCh;                                 {skip the '#' char}
 while charKinds[ord(ch)] = ch_white do  {skip white space}
   NextCh;
 if ch in ['a','d','e','i','l','p','u','w'] then begin
@ -3671,7 +3694,7 @@ procedure NextToken;
 { Read the next token from the file.                            }
-label 1,2,3,4;
+label 1,2,3,4,5;
 type
   three = (s100,s1000,s4000);          {these declarations are used for a}
@ -3701,6 +3724,7 @@ var
   tPtr: tokenListRecordPtr;            {for removing tokens from putback buffer}
   tToken: tokenType;                   {for merging tokens}
   sPtr,tsPtr: gstringPtr;              {for forming string constants}
   lLastWasReturn: boolean;             {local copy of lastWasReturn}
   function EscapeCh: integer;
@ -3899,11 +3923,13 @@ if tokenList <> nil then begin          {get a token put back by a macro}
         end; {if}
   goto 2;
   end; {if}
-                                        {skip white space}
+5:                                      {skip white space}
 while charKinds[ord(ch)] in [illegal,ch_white,ch_eol] do begin
   if charKinds[ord(ch)] = illegal then begin
-      if (ch = '#') and (lastWasReturn or (token.kind = eolsy)) then
+      if (ch = '#') and (lastWasReturn or (token.kind = eolsy)) then begin
         NextCh;                        {skip the '#' char}
         PreProcess                     {call the preprocessor}
         end {if}
      else begin
         tokenLine := lineNumber;       {record a # token}
         tokenColumn := ord(ord4(chPtr)-ord4(firstPtr));
@ -3941,6 +3967,7 @@ case charKinds[ord(ch)] of
   ch_special  : begin
      token.kind := charSym[ord(ch)];
      token.isDigraph := false;
      NextCh;
      end;
@ -3949,6 +3976,7 @@ case charKinds[ord(ch)] of
   ch_pound : begin                     {tokens that start with '#'}
      NextCh;
      token.isDigraph := false;
      if ch = '#' then begin
         token.kind := poundpoundop;
         NextCh;
@ -4004,6 +4032,16 @@ case charKinds[ord(ch)] of
         token.kind := lteqop;
         NextCh;
         end
      else if ch = ':' then begin
         token.kind := lbrackch;        { <: digraph }
         token.isDigraph := true;
         NextCh;
         end
      else if ch = '%' then begin
         token.kind := lbracech;        { <% digraph }
         token.isDigraph := true;
         NextCh;
         end
      else
         token.kind := ltch;
      end;
@ -4075,12 +4113,34 @@ case charKinds[ord(ch)] of
         token.kind := barch;
      end;
-   ch_percent: begin                      {tokens that start with '%'}
+   ch_percent: begin                    {tokens that start with '%'}
      lLastWasReturn := lastWasReturn or (token.kind = eolsy);
      NextCh;
      if ch = '=' then begin
         token.kind := percenteqop;
         NextCh;
         end
      else if ch = '>' then begin
         token.kind := rbracech;        {%> digraph}
         token.isDigraph := true;
         NextCh;
         end
      else if ch = ':' then begin
         NextCh;
         token.isDigraph := true;
         if (ch = '%') and (chPtr <> eofPtr) and (chr(chPtr^) = ':') then begin
            token.kind := poundpoundop; {%:%: digraph}
            NextCh;
            NextCh;
            end
         else begin
            token.kind := poundch;      {%: digraph}
            if lLastWasReturn then begin
               PreProcess;
               goto 5;
               end;
            end;
         end
      else
         token.kind := percentch;
      end;
@ -4124,6 +4184,17 @@ case charKinds[ord(ch)] of
         end; {else}
      end;
   ch_colon : begin                     {tokens that start with ':'}
      NextCh;
      if ch = '>' then begin
         token.kind := rbrackch;        {:> digraph}
         token.isDigraph := true;
         NextCh;
         end
      else
         token.kind := colonch;
      end;
   ch_char  : CharConstant;		{character constants}
   ch_string: begin                     {string constants}
--- a/Table.asm
+++ b/Table.asm
@ -18,7 +18,8 @@ root     start                          dummy (.root) segment
 charKinds start                         character set
         enum  (illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc),0
         enum  (ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string)
-         enum  (ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,letter,digit)
+         enum  (ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon)
         enum  (letter,digit)
 ! STANDARD
         dc    i'ch_eof'                nul
@ -79,7 +80,7 @@ charKinds start                         character set
         dc    i'digit'                 7
         dc    i'digit'                 8
         dc    i'digit'                 9
-         dc    i'ch_special'            :
+         dc    i'ch_colon'              :
         dc    i'ch_special'            ;
         dc    i'ch_lt'                 <
         dc    i'ch_eq'                 =