Fix issues with type names in the third expression of a for loop.

There were a couple issues here:
*If the type name contained a semicolon (for struct/union member declarations), a spurious error would be reported.
*Tags or enumeration constants declared in the type name should be in scope within the loop, but were not.

These both stemmed from the way the parser handled the third expression, which was to save the tokens from it and re-inject them at the end of the loop. To get the scope issues right, the expression really needs to be evaluated at the point where it occurs, so we now do that. To enable that while still placing the code at the end of the loop, a mechanism to remove and re-insert sections of generated code is introduced.

Here is an example illustrating the issues:

int main(void) {
        int i, j, x;
        for (i = 0; i < 123; i += sizeof(struct {int a;}))
                for (j = 0; j < 123; j += sizeof(enum E {A,B,C}))
                        x = i + j + A;
}
This commit is contained in:
Stephen Heumann 2024-03-13 22:09:25 -05:00
parent 72234a4f2b
commit 81934109fc
4 changed files with 119 additions and 60 deletions

View File

@ -139,7 +139,7 @@ opt[pc_ckn] := 'ckn';
end; {InitWriteCode}
procedure PrintDAG (tag: stringPtr; code: icptr);
procedure PrintDAG {tag: stringPtr; code: icptr};
{ print a DAG }
{ }

107
CGI.pas
View File

@ -293,6 +293,8 @@ type
ccPointer : (pval: longint; pstr: longStringPtr);
end;
codeRef = icptr; {reference to a code location}
{basic blocks}
{------------}
iclist = ^iclistRecord; {used to form lists of records}
@ -658,6 +660,21 @@ procedure GenTool (fop: pcodes; fp1, fp2: integer; dispatcher: longint);
{ dispatcher - tool entry point }
function GetCodeLocation: codeRef;
{ Get a reference to the current location in the generated }
{ code, suitable to be passed to RemoveCode. }
procedure InsertCode (theCode: codeRef);
{ Insert a section of already-generated code that was }
{ previously removed with RemoveCode. }
{ }
{ parameters: }
{ theCode - code removed (returned from RemoveCode) }
{procedure PrintBlocks (tag: stringPtr; bp: blockPtr); {debug}
{ print a series of basic blocks }
@ -667,6 +684,28 @@ procedure GenTool (fop: pcodes; fp1, fp2: integer; dispatcher: longint);
{ bp - first block to print }
{procedure PrintDAG (tag: stringPtr; code: icptr); {debug}
{ print a DAG }
{ }
{ parameters: }
{ tag - label for lines }
{ code - first node in DAG }
function RemoveCode (start: codeRef): codeRef;
{ Remove a section of already-generated code, from immediately }
{ after start up to the latest code generated. Returns the }
{ code removed, so it may be re-inserted later. }
{ }
{ parameters: }
{ start - location to start removing from }
{ }
{ Note: start must be a top-level pcode (not a subexpression). }
{ Note: The region removed must not include a dc_enp. }
function TypeSize (tp: baseTypeEnum): integer;
{ Find the size, in bytes, of a variable }
@ -1431,6 +1470,74 @@ if codeGeneration then begin
end; {GenLdcReal}
function GetCodeLocation{: codeRef};
{ Get a reference to the current location in the generated }
{ code, suitable to be passed to RemoveCode. }
begin {GetCodeLocation}
GetCodeLocation := DAGhead;
end {GetCodeLocation};
procedure InsertCode {theCode: codeRef};
{ Insert a section of already-generated code that was }
{ previously removed with RemoveCode. }
{ }
{ parameters: }
{ theCode - code removed (returned from RemoveCode) }
var
lcode: icptr;
begin {InsertCode}
if theCode <> nil then
if codeGeneration then begin
lcode := theCode;
{ PrintDAG(@'Inserting: ', lcode); {debug}
while lcode^.next <> nil do
lcode := lcode^.next;
lcode^.next := DAGhead;
DAGhead := theCode;
end; {if}
end; {InsertCode}
function RemoveCode {start: codeRef): codeRef};
{ Remove a section of already-generated code, from immediately }
{ after start up to the latest code generated. Returns the }
{ code removed, so it may be re-inserted later. }
{ }
{ parameters: }
{ start - location to start removing from }
{ }
{ Note: start must be a top-level pcode (not a subexpression). }
{ Note: The region removed must not include a dc_enp. }
var
lcode: icptr;
begin {RemoveCode}
if start = DAGhead then
RemoveCode := nil
else begin
RemoveCode := DAGhead;
if codeGeneration then begin
lcode := DAGhead;
while (lcode^.next <> start) and (lcode^.next <> nil) do
lcode := lcode^.next;
if lcode^.next = nil then
Error(cge1);
lcode^.next := nil;
{ PrintDAG(@'Removing: ', DAGhead); {debug}
DAGhead := start;
end; {if}
end; {else}
end; {RemoveCode}
function TypeSize {tp: baseTypeEnum): integer};
{ Find the size, in bytes, of a variable }

View File

@ -122,13 +122,6 @@ type
val: longlong; {switch value}
end;
{token stack}
{-----------}
tokenStackPtr = ^tokenStackRecord;
tokenStackRecord = record
next: tokenStackPtr;
token: tokenType;
end;
{statement stack}
{---------------}
statementPtr = ^statementRecord;
@ -157,7 +150,7 @@ type
);
forSt: (
forLoop: integer; {branch here to loop}
e3List: tokenStackPtr; {tokens for last expression}
e3Code: codeRef; {code for last expression}
);
switchSt: (
maxVal: longint; {max switch value}
@ -690,11 +683,9 @@ var
{ handle a for statement }
var
errorFound: boolean; {did we find an error?}
e3Start: codeRef; {ref to start of code for expression 3}
forLoop, continueLab, breakLab: integer; {branch points}
parencount: integer; {number of unmatched '(' chars}
stPtr: statementPtr; {work pointer}
tl,tk: tokenStackPtr; {for forming expression list}
begin {ForStatement}
NextToken; {skip the 'for' token}
@ -733,29 +724,12 @@ var
end; {if}
Match(semicolonch,22);
tl := nil; {collect the tokens for the last expression}
parencount := 0;
errorFound := false;
while (token.kind <> eofsy)
and ((token.kind <> rparench) or (parencount <> 0))
and (token.kind <> semicolonch) do begin
new(tk); {place the token in the list}
tk^.next := tl;
tl := tk;
tk^.token := token;
if token.kind = lparench then {allow parens in the expression}
parencount := parencount+1
else if token.kind = rparench then
parencount := parencount-1;
NextToken; {next token}
end; {while}
if errorFound then {if an error was found, dump the list}
while tl <> nil do begin
tk := tl;
tl := tl^.next;
dispose(tk);
end; {while}
stPtr^.e3List := tl; {save the list}
e3Start := GetCodeLocation; {generate and save code for expression 3}
if token.kind <> rparench then begin
Expression(normalExpression, [rparench]);
Gen0t(pc_pop, UsualUnaryConversions);
end; {if}
stPtr^.e3Code := RemoveCode(e3Start);
Match(rparench,12); {get the closing for loop paren}
if c99Scope then PushTable;
@ -1128,37 +1102,13 @@ procedure EndForStatement;
{ finish off a for statement }
var
ltoken: tokenType; {for putting ; on stack}
stPtr: statementPtr; {work pointer}
tl,tk: tokenStackPtr; {for forming expression list}
lSuppressMacroExpansions: boolean; {local copy of suppressMacroExpansions}
begin {EndForStatement}
if c99Scope then PopTable;
stPtr := statementList;
Gen1(dc_lab, stPtr^.continueLab); {define the continue label}
tl := stPtr^.e3List; {place the expression back in the list}
if tl <> nil then begin
PutBackToken(token, false, false);
ltoken.kind := semicolonch;
ltoken.class := reservedSymbol;
PutBackToken(ltoken, false, false);
while tl <> nil do begin
PutBackToken(tl^.token, false, false);
tk := tl;
tl := tl^.next;
dispose(tk);
end; {while}
lSuppressMacroExpansions := suppressMacroExpansions; {inhibit token echo}
suppressMacroExpansions := true;
NextToken; {evaluate the expression}
Expression(normalExpression, [semicolonch]);
Gen0t(pc_pop, UsualUnaryConversions);
NextToken; {skip the semicolon}
suppressMacroExpansions := lSuppressMacroExpansions;
end; {if}
InsertCode(stPtr^.e3Code); {insert code for expression 3}
Gen1(pc_ujp, stPtr^.forLoop); {loop to the test}
Gen1(dc_lab, stPtr^.breakLab); {create the exit label}
statementList := stPtr^.next; {pop the statement record}

View File

@ -1616,6 +1616,8 @@ If you use #pragma debug 0x0010 to enable stack check debug code, the compiler w
13. If an empty argument was passed for a macro parameter that was used as an operand of the ## preprocessing operator, the result would likely be incorrect, and subsequent uses of the same macro also might not be expanded correctly.
14. If a struct, union, or enum type name appeared within the third expression in a for loop statement (e.g. in a cast or as the argument to sizeof), ORCA/C could behave incorrectly. It could report a spurious error if a semicolon occurred within the type name as part of a structure or union member declaration. Also, any tags or enumeration constants declared by such a type name should be in scope within the loop body, but they were not.
-- Bugs from C 2.1.1 B3 that have been fixed in C 2.2.0 ---------------------
1. There were various bugs that could cause incorrect code to be generated in certain cases. Some of these were specific to certain optimization passes, alone or in combination.