From b8b7dc2c2b616b304127a2acb154ef2850d22dd1 Mon Sep 17 00:00:00 2001
From: Stephen Heumann <stephenheumann@gmail.com>
Date: Thu, 13 Oct 2022 18:12:02 -0500
Subject: [PATCH] Remove code that treats # as an illegal character in most
 places.

C90 had constraints requiring # and ## tokens to only appear in preprocessing directives, but C99 and later removed those constraints, so this code is no longer necessary when targeting current languages versions. (It would be necessary in a "strict C90" mode, if that was ever implemented.)

The main practical effect of this is that # and ## tokens can be passed as parameters to macros, provided the macro either ignores or stringizes that parameter. # and ## tokens still have no role in the grammar of the C language after preprocessing, so they will be an unexpected token and produce some kind of error if they appear anywhere.

This also contains a change to ensure that a line containing one or more illegal characters (e.g. $) and then a # is not treated as a preprocessing directive.
---
 Asm.pas     |  4 ----
 Parser.pas  | 18 ++----------------
 Scanner.pas | 37 +++++++++++++++++--------------------
 Table.asm   |  2 +-
 4 files changed, 20 insertions(+), 41 deletions(-)

diff --git a/Asm.pas b/Asm.pas
index dd7dfb6..2bf0838 100644
--- a/Asm.pas
+++ b/Asm.pas
@@ -166,10 +166,8 @@ var
    { An error was found: skip to the end & quit                 }
 
    begin {Skip}
-   charKinds[ord('#')] := ch_pound;
    while not (token.kind in [rbracech,eofsy]) do
       NextToken;
-   charKinds[ord('#')] := illegal;
    goto 99;
    end; {Skip}
 
@@ -329,7 +327,6 @@ while not (token.kind in [rbracech,eofsy]) do begin
 
    {find the label and op-code}
    CheckForComment;
-   charKinds[ord('#')] := ch_pound;     {allow # as a token}
    if token.kind <> ident then begin    {error if not an identifier}
       Error(9);
       Skip;
@@ -345,7 +342,6 @@ while not (token.kind in [rbracech,eofsy]) do begin
       opname := token;
       NextToken;
       end; {while}
-   charKinds[ord('#')] := illegal;      {don't allow # as a token}
 
    {identify the op-code}
    if length(opname.name^) = 3 then begin
diff --git a/Parser.pas b/Parser.pas
index ce10e00..7e76491 100644
--- a/Parser.pas
+++ b/Parser.pas
@@ -3625,33 +3625,19 @@ var
 
    var
       braceCount: integer;              {# of unmatched { chars}
-      doingAsm: boolean;                {compiling an asm statement?}
 
    begin {SkipFunction}
    Match(lbracech,27);                  {skip to the closing rbrackch}
    braceCount := 1;
-   doingAsm := false;
-   if isAsm then
-      charKinds[ord('#')] := ch_pound;
    while (not (token.kind = eofsy)) and (braceCount <> 0) do begin
-      if token.kind = asmsy then begin
-         doingAsm := true;
-         charKinds[ord('#')] := ch_pound;
-         end {if}
-      else if token.kind = lbracech then
+      if token.kind = lbracech then
          braceCount := braceCount+1
-      else if token.kind = rbracech then begin
+      else if token.kind = rbracech then
          braceCount := braceCount-1;
-         if doingAsm then begin
-            doingAsm := false;
-            charKinds[ord('#')] := illegal;
-            end; {if}
-         end; {else if}
       NextToken;
       end; {while}
    nameFound := false;                  {no pc_nam for the next function (yet)}
    doingFunction := false;              {no longer doing a function}
-   charKinds[ord('#')] := illegal;      {# is a preprocessor command}
    end; {SkipFunction}
 
 
diff --git a/Scanner.pas b/Scanner.pas
index 6673611..f700022 100644
--- a/Scanner.pas
+++ b/Scanner.pas
@@ -2545,7 +2545,6 @@ var
       mPtr^.saved := false;		{not saved in symbol file}
       mPtr^.tokens := nil;              {no tokens yet}
       mPtr^.isVarargs := false;         {not varargs (yet)}
-      charKinds[ord('#')] := ch_pound;  {allow # as a token}
       if ch = '(' then begin            {scan the parameter list...}
          NextToken;                     {done with the name token...}
          NextToken;                     {skip the opening '('}
@@ -2750,7 +2749,6 @@ var
       parameterList := np^.next;
       dispose(np);
       end; {while}
-   charKinds[ord('#')] := illegal;      {don't allow # as a token}
    saveNumber := false;                 {stop saving numeric strings}
    end; {DoDefine}
 
@@ -3480,7 +3478,6 @@ else if charKinds[ord(ch)] = ch_eol     {allow null commands}
 if not tSkipping then
    Error(8);                            {bad preprocessor command}
 2:
-charKinds[ord('#')] := ch_pound;        {allow # as a token}
 expandMacros := false;                  {skip to the end of the line}
 flagOverflows := false;
 skipping := tSkipping;
@@ -3488,7 +3485,6 @@ while not (token.kind in [eolsy,eofsy]) do
    NextToken;
 flagOverflows := true;
 expandMacros := true;
-charKinds[ord('#')] := illegal;         {don't allow # as a token}
 reportEOL := lReportEOL;                {restore flags}
 suppressMacroExpansions := lSuppressMacroExpansions;
 skipping := tskipping;
@@ -4243,7 +4239,6 @@ new(macros);                            {no preprocessor macros so far}
 for i := 0 to hashSize do
    macros^[i] := nil;
 pathList := nil;			{no additional search paths}
-charKinds[ord('#')] := illegal;         {don't allow # as a token}
 tokenList := nil;                       {nothing in putback buffer}
 saveNumber := false;                    {don't save numbers}
 expandMacros := true;                   {enable macro expansion}
@@ -4619,7 +4614,7 @@ procedure NextToken;
 
 { Read the next token from the file.                            }
 
-label 1,2,3,4,5,6;
+label 1,2,3,4,5,6,7;
 
 type
    three = (s100,s1000,sMAX);           {these declarations are used for a}
@@ -4974,21 +4969,14 @@ if tokenList <> nil then begin          {get a token put back by a macro}
    goto 2;
    end; {if}
 5:                                      {skip white space}
-while charKinds[ord(ch)] in [illegal,ch_white,ch_eol] do begin
-   if charKinds[ord(ch)] = illegal then begin
-      if (ch = '#') and (lastWasReturn or (token.kind = eolsy)) then begin
+while charKinds[ord(ch)] in [illegal,ch_white,ch_eol,ch_pound] do begin
+   if charKinds[ord(ch)] = ch_pound then begin
+      if lastWasReturn or (token.kind = eolsy) then begin
          NextCh;                        {skip the '#' char}
          PreProcess                     {call the preprocessor}
          end {if}
-      else begin
-         tokenLine := lineNumber;       {record a # token}
-         tokenColumn := ord(ord4(chPtr)-ord4(firstPtr));
-         tokenStart := pointer(ord4(chPtr)-1);
-         tokenEnd := chPtr;
-         if (not skipping) or (not (skipIllegalTokens or (ch = '#'))) then
-            Error(1);
-         NextCh;
-         end; {else}
+      else
+         goto 7;
       end {if}
    else if (charKinds[ord(ch)] = ch_eol) and reportEOL then begin
       token.class := reservedSymbol;    {record an eol token}
@@ -5000,6 +4988,16 @@ while charKinds[ord(ch)] in [illegal,ch_white,ch_eol] do begin
       NextCh;
       goto 2;
       end {if}
+   else if charKinds[ord(ch)] = illegal then begin
+      tokenLine := lineNumber;          {record an illegal token}
+      tokenColumn := ord(ord4(chPtr)-ord4(firstPtr));
+      tokenStart := pointer(ord4(chPtr)-1);
+      tokenEnd := chPtr;
+      token.kind := questionch;         {make sure it is not eolsy}
+      if (not skipping) or (not skipIllegalTokens) then
+         Error(1);
+      NextCh;
+      end {else if}
    else begin                           {skip white space}
       if printMacroExpansions and not suppressMacroExpansions then
          if charKinds[ord(ch)] = ch_eol then begin
@@ -5011,6 +5009,7 @@ while charKinds[ord(ch)] in [illegal,ch_white,ch_eol] do begin
       NextCh;
       end;
    end; {while}
+7:
 tokenLine := lineNumber;                {record the position of the token}
 tokenColumn := ord(ord4(currentChPtr)-ord4(firstPtr)+1);
 tokenStart := currentChPtr;
@@ -5183,8 +5182,6 @@ case charKinds[ord(ch)] of
          token.isDigraph := true;
          if (ch = '%') and (chPtr <> eofPtr) and (chr(chPtr^) = ':') then begin
             token.kind := poundpoundop; {%:%: digraph}
-            if charKinds[ord('#')] = illegal then
-               Error(1);
             NextCh;
             NextCh;
             end
diff --git a/Table.asm b/Table.asm
index d0ae402..c74e460 100644
--- a/Table.asm
+++ b/Table.asm
@@ -57,7 +57,7 @@ charKinds start                         character set
          dc    i'ch_white'              space
          dc    i'ch_exc'                !
          dc    i'ch_string'             "
-         dc    i'illegal'               #
+         dc    i'ch_pound'              #
          dc    i'illegal'               $
          dc    i'ch_percent'            %
          dc    i'ch_and'                &