Remove code that treats # as an illegal character in most places.

C90 had constraints requiring # and ## tokens to only appear in preprocessing directives, but C99 and later removed those constraints, so this code is no longer necessary when targeting current languages versions. (It would be necessary in a "strict C90" mode, if that was ever implemented.) The main practical effect of this is that # and ## tokens can be passed as parameters to macros, provided the macro either ignores or stringizes that parameter. # and ## tokens still have no role in the grammar of the C language after preprocessing, so they will be an unexpected token and produce some kind of error if they appear anywhere. This also contains a change to ensure that a line containing one or more illegal characters (e.g. $) and then a # is not treated as a preprocessing directive.
2025-03-14 11:32:18 +00:00 · 2022-10-13 18:12:02 -05:00 · 2022-10-13 18:12:02 -05:00 · b8b7dc2c2b
commit b8b7dc2c2b
parent 99a10590b1
4 changed files with 20 additions and 41 deletions
--- a/Asm.pas
+++ b/Asm.pas
@ -166,10 +166,8 @@ var
   { An error was found: skip to the end & quit                 }

   begin {Skip}
-   charKinds[ord('#')] := ch_pound;
   while not (token.kind in [rbracech,eofsy]) do
      NextToken;
-   charKinds[ord('#')] := illegal;
   goto 99;
   end; {Skip}

@ -329,7 +327,6 @@ while not (token.kind in [rbracech,eofsy]) do begin

   {find the label and op-code}
   CheckForComment;
-   charKinds[ord('#')] := ch_pound;     {allow # as a token}
   if token.kind <> ident then begin    {error if not an identifier}
      Error(9);
      Skip;
@ -345,7 +342,6 @@ while not (token.kind in [rbracech,eofsy]) do begin
      opname := token;
      NextToken;
      end; {while}
-   charKinds[ord('#')] := illegal;      {don't allow # as a token}

   {identify the op-code}
   if length(opname.name^) = 3 then begin
--- a/Parser.pas
+++ b/Parser.pas
@ -3625,33 +3625,19 @@ var

   var
      braceCount: integer;              {# of unmatched { chars}
-      doingAsm: boolean;                {compiling an asm statement?}

   begin {SkipFunction}
   Match(lbracech,27);                  {skip to the closing rbrackch}
   braceCount := 1;
-   doingAsm := false;
-   if isAsm then
-      charKinds[ord('#')] := ch_pound;
   while (not (token.kind = eofsy)) and (braceCount <> 0) do begin
-      if token.kind = asmsy then begin
-         doingAsm := true;
-         charKinds[ord('#')] := ch_pound;
-         end {if}
-      else if token.kind = lbracech then
+      if token.kind = lbracech then
         braceCount := braceCount+1
-      else if token.kind = rbracech then begin
+      else if token.kind = rbracech then
         braceCount := braceCount-1;
-         if doingAsm then begin
-            doingAsm := false;
-            charKinds[ord('#')] := illegal;
-            end; {if}
-         end; {else if}
      NextToken;
      end; {while}
   nameFound := false;                  {no pc_nam for the next function (yet)}
   doingFunction := false;              {no longer doing a function}
-   charKinds[ord('#')] := illegal;      {# is a preprocessor command}
   end; {SkipFunction}


--- a/Scanner.pas
+++ b/Scanner.pas
@ -2545,7 +2545,6 @@ var
      mPtr^.saved := false;		{not saved in symbol file}
      mPtr^.tokens := nil;              {no tokens yet}
      mPtr^.isVarargs := false;         {not varargs (yet)}
-      charKinds[ord('#')] := ch_pound;  {allow # as a token}
      if ch = '(' then begin            {scan the parameter list...}
         NextToken;                     {done with the name token...}
         NextToken;                     {skip the opening '('}
@ -2750,7 +2749,6 @@ var
      parameterList := np^.next;
      dispose(np);
      end; {while}
-   charKinds[ord('#')] := illegal;      {don't allow # as a token}
   saveNumber := false;                 {stop saving numeric strings}
   end; {DoDefine}

@ -3480,7 +3478,6 @@ else if charKinds[ord(ch)] = ch_eol     {allow null commands}
 if not tSkipping then
   Error(8);                            {bad preprocessor command}
 2:
-charKinds[ord('#')] := ch_pound;        {allow # as a token}
 expandMacros := false;                  {skip to the end of the line}
 flagOverflows := false;
 skipping := tSkipping;
@ -3488,7 +3485,6 @@ while not (token.kind in [eolsy,eofsy]) do
   NextToken;
 flagOverflows := true;
 expandMacros := true;
-charKinds[ord('#')] := illegal;         {don't allow # as a token}
 reportEOL := lReportEOL;                {restore flags}
 suppressMacroExpansions := lSuppressMacroExpansions;
 skipping := tskipping;
@ -4243,7 +4239,6 @@ new(macros);                            {no preprocessor macros so far}
 for i := 0 to hashSize do
   macros^[i] := nil;
 pathList := nil;			{no additional search paths}
-charKinds[ord('#')] := illegal;         {don't allow # as a token}
 tokenList := nil;                       {nothing in putback buffer}
 saveNumber := false;                    {don't save numbers}
 expandMacros := true;                   {enable macro expansion}
@ -4619,7 +4614,7 @@ procedure NextToken;

 { Read the next token from the file.                            }

-label 1,2,3,4,5,6;
+label 1,2,3,4,5,6,7;

 type
   three = (s100,s1000,sMAX);           {these declarations are used for a}
@ -4974,21 +4969,14 @@ if tokenList <> nil then begin          {get a token put back by a macro}
   goto 2;
   end; {if}
 5:                                      {skip white space}
-while charKinds[ord(ch)] in [illegal,ch_white,ch_eol] do begin
-   if charKinds[ord(ch)] = illegal then begin
-      if (ch = '#') and (lastWasReturn or (token.kind = eolsy)) then begin
+while charKinds[ord(ch)] in [illegal,ch_white,ch_eol,ch_pound] do begin
+   if charKinds[ord(ch)] = ch_pound then begin
+      if lastWasReturn or (token.kind = eolsy) then begin
         NextCh;                        {skip the '#' char}
         PreProcess                     {call the preprocessor}
         end {if}
-      else begin
-         tokenLine := lineNumber;       {record a # token}
-         tokenColumn := ord(ord4(chPtr)-ord4(firstPtr));
-         tokenStart := pointer(ord4(chPtr)-1);
-         tokenEnd := chPtr;
-         if (not skipping) or (not (skipIllegalTokens or (ch = '#'))) then
-            Error(1);
-         NextCh;
-         end; {else}
+      else
+         goto 7;
      end {if}
   else if (charKinds[ord(ch)] = ch_eol) and reportEOL then begin
      token.class := reservedSymbol;    {record an eol token}
@ -5000,6 +4988,16 @@ while charKinds[ord(ch)] in [illegal,ch_white,ch_eol] do begin
      NextCh;
      goto 2;
      end {if}
+   else if charKinds[ord(ch)] = illegal then begin
+      tokenLine := lineNumber;          {record an illegal token}
+      tokenColumn := ord(ord4(chPtr)-ord4(firstPtr));
+      tokenStart := pointer(ord4(chPtr)-1);
+      tokenEnd := chPtr;
+      token.kind := questionch;         {make sure it is not eolsy}
+      if (not skipping) or (not skipIllegalTokens) then
+         Error(1);
+      NextCh;
+      end {else if}
   else begin                           {skip white space}
      if printMacroExpansions and not suppressMacroExpansions then
         if charKinds[ord(ch)] = ch_eol then begin
@ -5011,6 +5009,7 @@ while charKinds[ord(ch)] in [illegal,ch_white,ch_eol] do begin
      NextCh;
      end;
   end; {while}
+7:
 tokenLine := lineNumber;                {record the position of the token}
 tokenColumn := ord(ord4(currentChPtr)-ord4(firstPtr)+1);
 tokenStart := currentChPtr;
@ -5183,8 +5182,6 @@ case charKinds[ord(ch)] of
         token.isDigraph := true;
         if (ch = '%') and (chPtr <> eofPtr) and (chr(chPtr^) = ':') then begin
            token.kind := poundpoundop; {%:%: digraph}
-            if charKinds[ord('#')] = illegal then
-               Error(1);
            NextCh;
            NextCh;
            end
--- a/Table.asm
+++ b/Table.asm
@ -57,7 +57,7 @@ charKinds start                         character set
         dc    i'ch_white'              space
         dc    i'ch_exc'                !
         dc    i'ch_string'             "
-         dc    i'illegal'               #
+         dc    i'ch_pound'              #
         dc    i'illegal'               $
         dc    i'ch_percent'            %
         dc    i'ch_and'                &