From 81934109fc9b926f4f929ada915477d938904c9f Mon Sep 17 00:00:00 2001
From: Stephen Heumann <stephenheumann@gmail.com>
Date: Wed, 13 Mar 2024 22:09:25 -0500
Subject: [PATCH] Fix issues with type names in the third expression of a for
 loop.

There were a couple issues here:
*If the type name contained a semicolon (for struct/union member declarations), a spurious error would be reported.
*Tags or enumeration constants declared in the type name should be in scope within the loop, but were not.

These both stemmed from the way the parser handled the third expression, which was to save the tokens from it and re-inject them at the end of the loop. To get the scope issues right, the expression really needs to be evaluated at the point where it occurs, so we now do that. To enable that while still placing the code at the end of the loop, a mechanism to remove and re-insert sections of generated code is introduced.

Here is an example illustrating the issues:

int main(void) {
        int i, j, x;
        for (i = 0; i < 123; i += sizeof(struct {int a;}))
                for (j = 0; j < 123; j += sizeof(enum E {A,B,C}))
                        x = i + j + A;
}
---
 CGI.Debug  |   2 +-
 CGI.pas    | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 Parser.pas |  68 +++++-----------------------------
 cc.notes   |   2 +
 4 files changed, 119 insertions(+), 60 deletions(-)

diff --git a/CGI.Debug b/CGI.Debug
index 5b93648..2231ecf 100644
--- a/CGI.Debug
+++ b/CGI.Debug
@@ -139,7 +139,7 @@ opt[pc_ckn] := 'ckn';
 end; {InitWriteCode}
 
 
-procedure PrintDAG (tag: stringPtr; code: icptr);
+procedure PrintDAG {tag: stringPtr; code: icptr};
 
 { print a DAG							}
 {								}
diff --git a/CGI.pas b/CGI.pas
index c37ae6f..47a6411 100644
--- a/CGI.pas
+++ b/CGI.pas
@@ -293,6 +293,8 @@ type
          ccPointer      : (pval: longint; pstr: longStringPtr);
       end;
 
+   codeRef = icptr;                     {reference to a code location}
+
 					{basic blocks}
                                         {------------}
    iclist = ^iclistRecord;		{used to form lists of records}
@@ -658,6 +660,21 @@ procedure GenTool (fop: pcodes; fp1, fp2: integer; dispatcher: longint);
 {       dispatcher - tool entry point                           }
 
 
+function GetCodeLocation: codeRef;
+
+{ Get a reference to the current location in the generated      }
+{ code, suitable to be passed to RemoveCode.                    }
+
+
+procedure InsertCode (theCode: codeRef);
+
+{ Insert a section of already-generated code that was           }
+{ previously removed with RemoveCode.                           }
+{                                                               }
+{ parameters:                                                   }
+{       theCode - code removed (returned from RemoveCode)       }
+
+
 {procedure PrintBlocks (tag: stringPtr; bp: blockPtr); {debug}
 
 { print a series of basic blocks				}
@@ -667,6 +684,28 @@ procedure GenTool (fop: pcodes; fp1, fp2: integer; dispatcher: longint);
 {    bp - first block to print					}
 
 
+{procedure PrintDAG (tag: stringPtr; code: icptr); {debug}
+
+{ print a DAG                                                   }
+{                                                               }
+{ parameters:                                                   }
+{    tag - label for lines                                      }
+{    code - first node in DAG                                   }
+
+
+function RemoveCode (start: codeRef): codeRef;
+
+{ Remove a section of already-generated code, from immediately  }
+{ after start up to the latest code generated.  Returns the     }
+{ code removed, so it may be re-inserted later.                 }
+{                                                               }
+{ parameters:                                                   }
+{       start - location to start removing from                 }
+{                                                               }
+{ Note: start must be a top-level pcode (not a subexpression).  }
+{ Note: The region removed must not include a dc_enp.           }
+
+
 function TypeSize (tp: baseTypeEnum): integer;
 
 { Find the size, in bytes, of a variable			}
@@ -1431,6 +1470,74 @@ if codeGeneration then begin
 end; {GenLdcReal}
 
 
+function GetCodeLocation{: codeRef};
+
+{ Get a reference to the current location in the generated      }
+{ code, suitable to be passed to RemoveCode.                    }
+
+begin {GetCodeLocation}
+GetCodeLocation := DAGhead;
+end {GetCodeLocation};
+
+
+procedure InsertCode {theCode: codeRef};
+
+{ Insert a section of already-generated code that was           }
+{ previously removed with RemoveCode.                           }
+{                                                               }
+{ parameters:                                                   }
+{       theCode - code removed (returned from RemoveCode)       }
+
+var
+   lcode: icptr;
+
+begin {InsertCode}
+if theCode <> nil then
+   if codeGeneration then begin
+      lcode := theCode;
+{     PrintDAG(@'Inserting: ', lcode);  {debug}
+      while lcode^.next <> nil do
+         lcode := lcode^.next;
+      lcode^.next := DAGhead;
+      DAGhead := theCode;
+      end; {if}
+end; {InsertCode}
+
+
+function RemoveCode {start: codeRef): codeRef};
+
+{ Remove a section of already-generated code, from immediately  }
+{ after start up to the latest code generated.  Returns the     }
+{ code removed, so it may be re-inserted later.                 }
+{                                                               }
+{ parameters:                                                   }
+{       start - location to start removing from                 }
+{                                                               }
+{ Note: start must be a top-level pcode (not a subexpression).  }
+{ Note: The region removed must not include a dc_enp.           }
+
+var
+   lcode: icptr;
+
+begin {RemoveCode}
+if start = DAGhead then
+   RemoveCode := nil
+else begin
+   RemoveCode := DAGhead;
+   if codeGeneration then begin
+      lcode := DAGhead;
+      while (lcode^.next <> start) and (lcode^.next <> nil) do
+         lcode := lcode^.next;
+      if lcode^.next = nil then
+         Error(cge1);
+      lcode^.next := nil;
+{     PrintDAG(@'Removing: ', DAGhead); {debug}
+      DAGhead := start;
+      end; {if}
+   end; {else}
+end; {RemoveCode}
+
+
 function TypeSize {tp: baseTypeEnum): integer};
 
 { Find the size, in bytes, of a variable			}
diff --git a/Parser.pas b/Parser.pas
index 67c2d87..dd8c4ab 100644
--- a/Parser.pas
+++ b/Parser.pas
@@ -122,13 +122,6 @@ type
       val: longlong;                    {switch value}
       end;
 
-                                        {token stack}
-                                        {-----------}
-   tokenStackPtr = ^tokenStackRecord;
-   tokenStackRecord = record
-      next: tokenStackPtr;
-      token: tokenType;
-      end;
                                         {statement stack}
                                         {---------------}
    statementPtr = ^statementRecord;
@@ -157,7 +150,7 @@ type
             );
          forSt: (
             forLoop: integer;           {branch here to loop}
-            e3List: tokenStackPtr;      {tokens for last expression}
+            e3Code: codeRef;            {code for last expression}
             );
          switchSt: (
             maxVal: longint;            {max switch value}
@@ -690,11 +683,9 @@ var
    { handle a for statement                                      }
  
    var
-      errorFound: boolean;              {did we find an error?}
+      e3Start: codeRef;                 {ref to start of code for expression 3}
       forLoop, continueLab, breakLab: integer; {branch points}
-      parencount: integer;              {number of unmatched '(' chars}
       stPtr: statementPtr;              {work pointer}
-      tl,tk: tokenStackPtr;             {for forming expression list}
 
    begin {ForStatement}
    NextToken;                           {skip the 'for' token}
@@ -733,29 +724,12 @@ var
       end; {if}
    Match(semicolonch,22);
 
-   tl := nil;                           {collect the tokens for the last expression}
-   parencount := 0;
-   errorFound := false;
-   while (token.kind <> eofsy)
-      and ((token.kind <> rparench) or (parencount <> 0))
-      and (token.kind <> semicolonch) do begin
-      new(tk);                          {place the token in the list}
-      tk^.next := tl;
-      tl := tk;
-      tk^.token := token;
-      if token.kind = lparench then     {allow parens in the expression}
-         parencount := parencount+1
-      else if token.kind = rparench then
-         parencount := parencount-1;
-      NextToken;                        {next token}
-      end; {while}
-   if errorFound then                   {if an error was found, dump the list}
-      while tl <> nil do begin
-         tk := tl;
-         tl := tl^.next;
-         dispose(tk);
-         end; {while}
-   stPtr^.e3List := tl;                 {save the list}
+   e3Start := GetCodeLocation;          {generate and save code for expression 3}
+   if token.kind <> rparench then begin
+      Expression(normalExpression, [rparench]);
+      Gen0t(pc_pop, UsualUnaryConversions);
+      end; {if}
+   stPtr^.e3Code := RemoveCode(e3Start);
    Match(rparench,12);                  {get the closing for loop paren}
 
    if c99Scope then PushTable;
@@ -1128,37 +1102,13 @@ procedure EndForStatement;
 { finish off a for statement                                    }
 
 var
-   ltoken: tokenType;                   {for putting ; on stack}
    stPtr: statementPtr;                 {work pointer}
-   tl,tk: tokenStackPtr;                {for forming expression list}
-   lSuppressMacroExpansions: boolean;   {local copy of suppressMacroExpansions}
 
 begin {EndForStatement}
 if c99Scope then PopTable;
 stPtr := statementList;
 Gen1(dc_lab, stPtr^.continueLab);       {define the continue label}
-
-tl := stPtr^.e3List;                    {place the expression back in the list}
-if tl <> nil then begin
-   PutBackToken(token, false, false);
-   ltoken.kind := semicolonch;
-   ltoken.class := reservedSymbol;
-   PutBackToken(ltoken, false, false);
-   while tl <> nil do begin
-      PutBackToken(tl^.token, false, false);
-      tk := tl;
-      tl := tl^.next;
-      dispose(tk);
-      end; {while}
-   lSuppressMacroExpansions := suppressMacroExpansions; {inhibit token echo}
-   suppressMacroExpansions := true;
-   NextToken;                           {evaluate the expression}
-   Expression(normalExpression, [semicolonch]);
-   Gen0t(pc_pop, UsualUnaryConversions);
-   NextToken;                           {skip the semicolon}
-   suppressMacroExpansions := lSuppressMacroExpansions;
-   end; {if}
-
+InsertCode(stPtr^.e3Code);              {insert code for expression 3}
 Gen1(pc_ujp, stPtr^.forLoop);           {loop to the test}
 Gen1(dc_lab, stPtr^.breakLab);          {create the exit label}
 statementList := stPtr^.next;           {pop the statement record}
diff --git a/cc.notes b/cc.notes
index 6cfda7f..c5a9d35 100644
--- a/cc.notes
+++ b/cc.notes
@@ -1616,6 +1616,8 @@ If you use #pragma debug 0x0010 to enable stack check debug code, the compiler w
 
 13. If an empty argument was passed for a macro parameter that was used as an operand of the ## preprocessing operator, the result would likely be incorrect, and subsequent uses of the same macro also might not be expanded correctly.
 
+14. If a struct, union, or enum type name appeared within the third expression in a for loop statement (e.g. in a cast or as the argument to sizeof), ORCA/C could behave incorrectly.  It could report a spurious error if a semicolon occurred within the type name as part of a structure or union member declaration.  Also, any tags or enumeration constants declared by such a type name should be in scope within the loop body, but they were not.
+
 -- Bugs from C 2.1.1 B3 that have been fixed in C 2.2.0 ---------------------
 
 1.  There were various bugs that could cause incorrect code to be generated in certain cases.  Some of these were specific to certain optimization passes, alone or in combination.