Implement support for digraphs.

Specifically, the following six punctuator tokens are now supported: <: :> <% %> %: %:%: These behave the same as the existing tokens [, ], {, }, #, and ## (respectively), apart from their spelling. This can be useful when the full ASCII character set cannot easily be displayed or input (e.g. on the IIgs text screen with certain language settings).
2020-01-04 21:49:50 -06:00 · 2020-01-04 21:49:50 -06:00 · 9036a98e1c
parent 6f2eb301e5
commit 9036a98e1c
5 changed files with 93 additions and 13 deletions
--- a/CCommon.pas
+++ b/CCommon.pas
@ -191,7 +191,8 @@ type
   charEnum =                           {character kinds}
      (illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc,
       ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string,
-       ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,letter,digit);
+       ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon,
+       letter,digit);

   tokenSet = set of tokenEnum;
   tokenClass = (reservedWord,reservedSymbol,identifier,intConstant,longConstant,
@ -202,7 +203,7 @@ type
      numString: stringPtr;             {chars in number (macros only)}
      case class: tokenClass of         {token info}
         reservedWord  : ();
-         reservedSymbol: ();
+         reservedSymbol: (isDigraph: boolean);
         identifier    : (name: stringPtr;
                          symbolPtr: identPtr);
         intConstant   : (ival: integer);
--- a/Header.pas
+++ b/Header.pas
@ -18,7 +18,7 @@ uses CCommon, MM, Scanner, Symbol, CGI;
 {$segment 'SCANNER'}

 const
-   symFileVersion = 4;                  {version number of .sym file format}
+   symFileVersion = 5;                  {version number of .sym file format}

 var
   inhibitHeader: boolean;		{should .sym includes be blocked?}
@ -717,6 +717,9 @@ procedure EndInclude {chPtr: ptr};
                		WriteByte(ord(token.ispstring));
                		end;
            macroParameter:	WriteWord(token.pnum);
+            reservedSymbol:	if token.kind in [lbracech,rbracech,lbrackch,
+                                   rbrackch,poundch,poundpoundop] then 
+                                   WriteByte(ord(token.isDigraph));
 	    otherwise:	;
 	    end; {case}
 	 end; {WriteToken}
@ -1321,6 +1324,9 @@ var
                	        token.ispstring := ReadByte <> 0;
                	        end;
         macroParameter:	token.pnum := ReadWord;
+         reservedSymbol:	if token.kind in [lbracech,rbracech,lbrackch,
+                                   rbrackch,poundch,poundpoundop] then 
+                                   token.isDigraph := boolean(ReadByte);
 	 otherwise:		;
 	 end; {case}
      end; {ReadToken}
--- a/Scanner.asm
+++ b/Scanner.asm
@ -115,7 +115,8 @@ cch      equ   13

         enum  (illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc),0
         enum  (ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string)
-         enum  (ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,letter,digit)
+         enum  (ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon)
+         enum  (letter,digit)

         tsc                            create stack frame
         sec
--- a/Scanner.pas
+++ b/Scanner.pas
@ -736,8 +736,7 @@ case token.kind of
   unsignedsy,voidsy,volatilesy,whilesy:
                     write(reservedWords[token.kind]);

-   tildech,questionch,lparench,rparench,lbrackch,rbrackch,lbracech,
-   rbracech,commach,semicolonch,colonch,poundch:
+   tildech,questionch,lparench,rparench,commach,semicolonch,colonch:
                     begin
                     for i := minChar to maxChar do
                        if charSym[i] = token.kind then begin
@ -746,6 +745,31 @@ case token.kind of
                           end; {if}
                     end;

+   lbrackch:         if not token.isDigraph then
+                        write('[')
+                     else
+                        write('<:');
+
+   rbrackch:         if not token.isDigraph then
+                        write(']')
+                     else
+                        write(':>');
+
+   lbracech:         if not token.isDigraph then
+                        write('{')
+                     else
+                        write('<%');
+
+   rbracech:         if not token.isDigraph then
+                        write('}')
+                     else
+                        write('%>');
+
+   poundch:          if not token.isDigraph then
+                        write('#')
+                     else
+                        write('%:');
+
   minusch:          write('-');

   plusch:           write('+');
@ -2580,7 +2604,6 @@ reportEOL := true;
 tSkipping := skipping;                  {don't skip the directive name!}
 skipping := false;
 nextLineNumber := -1;
-NextCh;                                 {skip the '#' char}
 while charKinds[ord(ch)] = ch_white do  {skip white space}
   NextCh;
 if ch in ['a','d','e','i','l','p','u','w'] then begin
@ -3671,7 +3694,7 @@ procedure NextToken;

 { Read the next token from the file.                            }

-label 1,2,3,4;
+label 1,2,3,4,5;

 type
   three = (s100,s1000,s4000);          {these declarations are used for a}
@ -3701,6 +3724,7 @@ var
   tPtr: tokenListRecordPtr;            {for removing tokens from putback buffer}
   tToken: tokenType;                   {for merging tokens}
   sPtr,tsPtr: gstringPtr;              {for forming string constants}
+   lLastWasReturn: boolean;             {local copy of lastWasReturn}


   function EscapeCh: integer;
@ -3899,11 +3923,13 @@ if tokenList <> nil then begin          {get a token put back by a macro}
         end; {if}
   goto 2;
   end; {if}
-                                        {skip white space}
+5:                                      {skip white space}
 while charKinds[ord(ch)] in [illegal,ch_white,ch_eol] do begin
   if charKinds[ord(ch)] = illegal then begin
-      if (ch = '#') and (lastWasReturn or (token.kind = eolsy)) then
+      if (ch = '#') and (lastWasReturn or (token.kind = eolsy)) then begin
+         NextCh;                        {skip the '#' char}
         PreProcess                     {call the preprocessor}
+         end {if}
      else begin
         tokenLine := lineNumber;       {record a # token}
         tokenColumn := ord(ord4(chPtr)-ord4(firstPtr));
@ -3941,6 +3967,7 @@ case charKinds[ord(ch)] of

   ch_special  : begin
      token.kind := charSym[ord(ch)];
+      token.isDigraph := false;
      NextCh;
      end;

@ -3949,6 +3976,7 @@ case charKinds[ord(ch)] of

   ch_pound : begin                     {tokens that start with '#'}
      NextCh;
+      token.isDigraph := false;
      if ch = '#' then begin
         token.kind := poundpoundop;
         NextCh;
@ -4004,6 +4032,16 @@ case charKinds[ord(ch)] of
         token.kind := lteqop;
         NextCh;
         end
+      else if ch = ':' then begin
+         token.kind := lbrackch;        { <: digraph }
+         token.isDigraph := true;
+         NextCh;
+         end
+      else if ch = '%' then begin
+         token.kind := lbracech;        { <% digraph }
+         token.isDigraph := true;
+         NextCh;
+         end
      else
         token.kind := ltch;
      end;
@ -4075,12 +4113,34 @@ case charKinds[ord(ch)] of
         token.kind := barch;
      end;

-   ch_percent: begin                      {tokens that start with '%'}
+   ch_percent: begin                    {tokens that start with '%'}
+      lLastWasReturn := lastWasReturn or (token.kind = eolsy);
      NextCh;
      if ch = '=' then begin
         token.kind := percenteqop;
         NextCh;
         end
+      else if ch = '>' then begin
+         token.kind := rbracech;        {%> digraph}
+         token.isDigraph := true;
+         NextCh;
+         end
+      else if ch = ':' then begin
+         NextCh;
+         token.isDigraph := true;
+         if (ch = '%') and (chPtr <> eofPtr) and (chr(chPtr^) = ':') then begin
+            token.kind := poundpoundop; {%:%: digraph}
+            NextCh;
+            NextCh;
+            end
+         else begin
+            token.kind := poundch;      {%: digraph}
+            if lLastWasReturn then begin
+               PreProcess;
+               goto 5;
+               end;
+            end;
+         end
      else
         token.kind := percentch;
      end;
@ -4124,6 +4184,17 @@ case charKinds[ord(ch)] of
         end; {else}
      end;

+   ch_colon : begin                     {tokens that start with ':'}
+      NextCh;
+      if ch = '>' then begin
+         token.kind := rbrackch;        {:> digraph}
+         token.isDigraph := true;
+         NextCh;
+         end
+      else
+         token.kind := colonch;
+      end;
+
   ch_char  : CharConstant;		{character constants}

   ch_string: begin                     {string constants}
--- a/Table.asm
+++ b/Table.asm
@ -18,7 +18,8 @@ root     start                          dummy (.root) segment
 charKinds start                         character set
         enum  (illegal,ch_special,ch_dash,ch_plus,ch_lt,ch_gt,ch_eq,ch_exc),0
         enum  (ch_and,ch_bar,ch_dot,ch_white,ch_eol,ch_eof,ch_char,ch_string)
-         enum  (ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,letter,digit)
+         enum  (ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon)
+         enum  (letter,digit)

 ! STANDARD
         dc    i'ch_eof'                nul
@ -79,7 +80,7 @@ charKinds start                         character set
         dc    i'digit'                 7
         dc    i'digit'                 8
         dc    i'digit'                 9
-         dc    i'ch_special'            :
+         dc    i'ch_colon'              :
         dc    i'ch_special'            ;
         dc    i'ch_lt'                 <
         dc    i'ch_eq'                 =