Fix issues with type names in the third expression of a for loop.

There were a couple issues here: *If the type name contained a semicolon (for struct/union member declarations), a spurious error would be reported. *Tags or enumeration constants declared in the type name should be in scope within the loop, but were not. These both stemmed from the way the parser handled the third expression, which was to save the tokens from it and re-inject them at the end of the loop. To get the scope issues right, the expression really needs to be evaluated at the point where it occurs, so we now do that. To enable that while still placing the code at the end of the loop, a mechanism to remove and re-insert sections of generated code is introduced. Here is an example illustrating the issues: int main(void) { int i, j, x; for (i = 0; i < 123; i += sizeof(struct {int a;})) for (j = 0; j < 123; j += sizeof(enum E {A,B,C})) x = i + j + A; }
2024-03-13 22:09:25 -05:00 · 2024-03-13 22:09:25 -05:00 · 81934109fc
parent 72234a4f2b
commit 81934109fc
4 changed files with 119 additions and 60 deletions
--- a/CGI.Debug
+++ b/CGI.Debug
@ -139,7 +139,7 @@ opt[pc_ckn] := 'ckn';
 end; {InitWriteCode}


-procedure PrintDAG (tag: stringPtr; code: icptr);
+procedure PrintDAG {tag: stringPtr; code: icptr};

 { print a DAG							}
 {								}
--- a/CGI.pas
+++ b/CGI.pas
@ -293,6 +293,8 @@ type
         ccPointer      : (pval: longint; pstr: longStringPtr);
      end;

+   codeRef = icptr;                     {reference to a code location}
+
 					{basic blocks}
                                        {------------}
   iclist = ^iclistRecord;		{used to form lists of records}
@ -658,6 +660,21 @@ procedure GenTool (fop: pcodes; fp1, fp2: integer; dispatcher: longint);
 {       dispatcher - tool entry point                           }


+function GetCodeLocation: codeRef;
+
+{ Get a reference to the current location in the generated      }
+{ code, suitable to be passed to RemoveCode.                    }
+
+
+procedure InsertCode (theCode: codeRef);
+
+{ Insert a section of already-generated code that was           }
+{ previously removed with RemoveCode.                           }
+{                                                               }
+{ parameters:                                                   }
+{       theCode - code removed (returned from RemoveCode)       }
+
+
 {procedure PrintBlocks (tag: stringPtr; bp: blockPtr); {debug}

 { print a series of basic blocks				}
@ -667,6 +684,28 @@ procedure GenTool (fop: pcodes; fp1, fp2: integer; dispatcher: longint);
 {    bp - first block to print					}


+{procedure PrintDAG (tag: stringPtr; code: icptr); {debug}
+
+{ print a DAG                                                   }
+{                                                               }
+{ parameters:                                                   }
+{    tag - label for lines                                      }
+{    code - first node in DAG                                   }
+
+
+function RemoveCode (start: codeRef): codeRef;
+
+{ Remove a section of already-generated code, from immediately  }
+{ after start up to the latest code generated.  Returns the     }
+{ code removed, so it may be re-inserted later.                 }
+{                                                               }
+{ parameters:                                                   }
+{       start - location to start removing from                 }
+{                                                               }
+{ Note: start must be a top-level pcode (not a subexpression).  }
+{ Note: The region removed must not include a dc_enp.           }
+
+
 function TypeSize (tp: baseTypeEnum): integer;

 { Find the size, in bytes, of a variable			}
@ -1431,6 +1470,74 @@ if codeGeneration then begin
 end; {GenLdcReal}


+function GetCodeLocation{: codeRef};
+
+{ Get a reference to the current location in the generated      }
+{ code, suitable to be passed to RemoveCode.                    }
+
+begin {GetCodeLocation}
+GetCodeLocation := DAGhead;
+end {GetCodeLocation};
+
+
+procedure InsertCode {theCode: codeRef};
+
+{ Insert a section of already-generated code that was           }
+{ previously removed with RemoveCode.                           }
+{                                                               }
+{ parameters:                                                   }
+{       theCode - code removed (returned from RemoveCode)       }
+
+var
+   lcode: icptr;
+
+begin {InsertCode}
+if theCode <> nil then
+   if codeGeneration then begin
+      lcode := theCode;
+{     PrintDAG(@'Inserting: ', lcode);  {debug}
+      while lcode^.next <> nil do
+         lcode := lcode^.next;
+      lcode^.next := DAGhead;
+      DAGhead := theCode;
+      end; {if}
+end; {InsertCode}
+
+
+function RemoveCode {start: codeRef): codeRef};
+
+{ Remove a section of already-generated code, from immediately  }
+{ after start up to the latest code generated.  Returns the     }
+{ code removed, so it may be re-inserted later.                 }
+{                                                               }
+{ parameters:                                                   }
+{       start - location to start removing from                 }
+{                                                               }
+{ Note: start must be a top-level pcode (not a subexpression).  }
+{ Note: The region removed must not include a dc_enp.           }
+
+var
+   lcode: icptr;
+
+begin {RemoveCode}
+if start = DAGhead then
+   RemoveCode := nil
+else begin
+   RemoveCode := DAGhead;
+   if codeGeneration then begin
+      lcode := DAGhead;
+      while (lcode^.next <> start) and (lcode^.next <> nil) do
+         lcode := lcode^.next;
+      if lcode^.next = nil then
+         Error(cge1);
+      lcode^.next := nil;
+{     PrintDAG(@'Removing: ', DAGhead); {debug}
+      DAGhead := start;
+      end; {if}
+   end; {else}
+end; {RemoveCode}
+
+
 function TypeSize {tp: baseTypeEnum): integer};

 { Find the size, in bytes, of a variable			}
--- a/Parser.pas
+++ b/Parser.pas
@ -122,13 +122,6 @@ type
      val: longlong;                    {switch value}
      end;

-                                        {token stack}
-                                        {-----------}
-   tokenStackPtr = ^tokenStackRecord;
-   tokenStackRecord = record
-      next: tokenStackPtr;
-      token: tokenType;
-      end;
                                        {statement stack}
                                        {---------------}
   statementPtr = ^statementRecord;
@ -157,7 +150,7 @@ type
            );
         forSt: (
            forLoop: integer;           {branch here to loop}
-            e3List: tokenStackPtr;      {tokens for last expression}
+            e3Code: codeRef;            {code for last expression}
            );
         switchSt: (
            maxVal: longint;            {max switch value}
@ -690,11 +683,9 @@ var
   { handle a for statement                                      }
 
   var
-      errorFound: boolean;              {did we find an error?}
+      e3Start: codeRef;                 {ref to start of code for expression 3}
      forLoop, continueLab, breakLab: integer; {branch points}
-      parencount: integer;              {number of unmatched '(' chars}
      stPtr: statementPtr;              {work pointer}
-      tl,tk: tokenStackPtr;             {for forming expression list}

   begin {ForStatement}
   NextToken;                           {skip the 'for' token}
@ -733,29 +724,12 @@ var
      end; {if}
   Match(semicolonch,22);

-   tl := nil;                           {collect the tokens for the last expression}
-   parencount := 0;
-   errorFound := false;
-   while (token.kind <> eofsy)
-      and ((token.kind <> rparench) or (parencount <> 0))
-      and (token.kind <> semicolonch) do begin
-      new(tk);                          {place the token in the list}
-      tk^.next := tl;
-      tl := tk;
-      tk^.token := token;
-      if token.kind = lparench then     {allow parens in the expression}
-         parencount := parencount+1
-      else if token.kind = rparench then
-         parencount := parencount-1;
-      NextToken;                        {next token}
-      end; {while}
-   if errorFound then                   {if an error was found, dump the list}
-      while tl <> nil do begin
-         tk := tl;
-         tl := tl^.next;
-         dispose(tk);
-         end; {while}
-   stPtr^.e3List := tl;                 {save the list}
+   e3Start := GetCodeLocation;          {generate and save code for expression 3}
+   if token.kind <> rparench then begin
+      Expression(normalExpression, [rparench]);
+      Gen0t(pc_pop, UsualUnaryConversions);
+      end; {if}
+   stPtr^.e3Code := RemoveCode(e3Start);
   Match(rparench,12);                  {get the closing for loop paren}

   if c99Scope then PushTable;
@ -1128,37 +1102,13 @@ procedure EndForStatement;
 { finish off a for statement                                    }

 var
-   ltoken: tokenType;                   {for putting ; on stack}
   stPtr: statementPtr;                 {work pointer}
-   tl,tk: tokenStackPtr;                {for forming expression list}
-   lSuppressMacroExpansions: boolean;   {local copy of suppressMacroExpansions}

 begin {EndForStatement}
 if c99Scope then PopTable;
 stPtr := statementList;
 Gen1(dc_lab, stPtr^.continueLab);       {define the continue label}
-
-tl := stPtr^.e3List;                    {place the expression back in the list}
-if tl <> nil then begin
-   PutBackToken(token, false, false);
-   ltoken.kind := semicolonch;
-   ltoken.class := reservedSymbol;
-   PutBackToken(ltoken, false, false);
-   while tl <> nil do begin
-      PutBackToken(tl^.token, false, false);
-      tk := tl;
-      tl := tl^.next;
-      dispose(tk);
-      end; {while}
-   lSuppressMacroExpansions := suppressMacroExpansions; {inhibit token echo}
-   suppressMacroExpansions := true;
-   NextToken;                           {evaluate the expression}
-   Expression(normalExpression, [semicolonch]);
-   Gen0t(pc_pop, UsualUnaryConversions);
-   NextToken;                           {skip the semicolon}
-   suppressMacroExpansions := lSuppressMacroExpansions;
-   end; {if}
-
+InsertCode(stPtr^.e3Code);              {insert code for expression 3}
 Gen1(pc_ujp, stPtr^.forLoop);           {loop to the test}
 Gen1(dc_lab, stPtr^.breakLab);          {create the exit label}
 statementList := stPtr^.next;           {pop the statement record}
--- a/cc.notes
+++ b/cc.notes
@ -1616,6 +1616,8 @@ If you use #pragma debug 0x0010 to enable stack check debug code, the compiler w

 13. If an empty argument was passed for a macro parameter that was used as an operand of the ## preprocessing operator, the result would likely be incorrect, and subsequent uses of the same macro also might not be expanded correctly.

+14. If a struct, union, or enum type name appeared within the third expression in a for loop statement (e.g. in a cast or as the argument to sizeof), ORCA/C could behave incorrectly.  It could report a spurious error if a semicolon occurred within the type name as part of a structure or union member declaration.  Also, any tags or enumeration constants declared by such a type name should be in scope within the loop body, but they were not.
+
 -- Bugs from C 2.1.1 B3 that have been fixed in C 2.2.0 ---------------------

 1.  There were various bugs that could cause incorrect code to be generated in certain cases.  Some of these were specific to certain optimization passes, alone or in combination.