From 3c2b4926183baf594023bd105bdd08abe7c62bba Mon Sep 17 00:00:00 2001 From: Stephen Heumann Date: Wed, 8 Jun 2022 20:58:52 -0500 Subject: [PATCH] Add support for compound literals within functions. The basic approach is to generate a single expression tree containing the code for the initialization plus the reference to the compound literal (or its address). The various subexpressions are joined together with pc_bno pcodes, similar to the code generated for the comma operator. The initializer expressions are placed in a balanced binary tree, so that it is not excessively deep. Note: Common subexpression elimination has poor performance for very large trees. This is not specific to compound literals, but compound literals for relatively large arrays can run into this issue. It will eventually complete and generate a correct program, but it may be quite slow. To avoid this, turn off CSE. --- CCommon.pas | 2 +- Expression.pas | 44 ++++++++++++++++++-- Header.pas | 2 +- Parser.pas | 74 +++++++++++++++++++++++++++++----- Scanner.pas | 2 +- Table.asm | 4 +- Tests/Conformance/c99complit.c | 38 +++++++++++++++-- cc.notes | 12 ++++-- 8 files changed, 154 insertions(+), 24 deletions(-) diff --git a/CCommon.pas b/CCommon.pas index 7a78183..84146ae 100644 --- a/CCommon.pas +++ b/CCommon.pas @@ -199,7 +199,7 @@ type eolsy,eofsy, {control characters} typedef, {user types} uminus,uand,uasterisk, {converted operations} - parameteroper,castoper,opplusplus,opminusminus, + parameteroper,castoper,opplusplus,opminusminus,compoundliteral, macroParm); {macro language} {Note: this enumeration also } diff --git a/Expression.pas b/Expression.pas index 4b28bb7..88d2046 100644 --- a/Expression.pas +++ b/Expression.pas @@ -276,6 +276,17 @@ function MakeCompoundLiteral(tp: typePtr): identPtr; extern; { parameters: } { tp - the type of the compound literal } + +procedure AutoInit (variable: identPtr; line: integer; + isCompoundLiteral: boolean); extern; + +{ generate code to initialize an auto variable } +{ } +{ parameters: } +{ variable - the variable to initialize } +{ line - line number (used for debugging) } +{ isCompoundLiteral - initializing a compound literal? } + {-- External unsigned math routines ----------------------------} function lshr (x,y: longint): longint; extern; @@ -2038,7 +2049,10 @@ var {create an operand on the stack} new(sp); - sp^.token.kind := ident; + if id^.class = staticsy then + sp^.token.kind := ident + else + sp^.token.kind := compoundliteral; sp^.token.class := identifier; sp^.token.symbolPtr := id; sp^.token.name := id^.name; @@ -2646,7 +2660,7 @@ kind := tree^.token.kind; {A variable identifier is an l-value unless it is a function or } {non-parameter array } -if kind = ident then begin +if kind in [ident,compoundliteral] then begin if tree^.id^.itype^.kind = arrayType then begin if tree^.id^.storage <> parameter then if doDispose then {prevent spurious errors} @@ -2768,6 +2782,7 @@ var lbitDisp,lbitSize: integer; {for temp storage} lisBitField: boolean; + ldoDispose: boolean; {local copy of doDispose} function ExpressionKind (tree: tokenPtr): typeKind; @@ -2861,6 +2876,15 @@ var expressionType := eType; end; {with} end {if} + else if tree^.token.kind = compoundliteral then begin + + {evaluate a compound literal and load its address} + AutoInit(tree^.id, 0, true); + tree^.token.kind := ident; + LoadAddress(tree); + tree^.token.kind := compoundliteral; + Gen0t(pc_bno, cgULong); + end {if} else if tree^.token.kind = uasterisk then begin {load the address of the item pointed to by the pointer} @@ -3564,6 +3588,20 @@ case tree^.token.kind of end; {case} end; + compoundLiteral: begin + AutoInit(tree^.id, 0, true); + tree^.token.kind := ident; + ldoDispose := doDispose; + doDispose := false; + GenerateCode(tree); + doDispose := ldoDispose; + tree^.token.kind := compoundliteral; + if expressionType^.kind = scalarType then + Gen0t(pc_bno, expressionType^.baseType) + else + Gen0t(pc_bno, cgULong); + end; + intConst,uintConst,ushortConst,charConst,scharConst,ucharConst: begin Gen1t(pc_ldc, tree^.token.ival, cgWord); lastwasconst := true; @@ -4469,7 +4507,7 @@ case tree^.token.kind of DoIncDec(tree^.left, pc_lld, pc_gld, pc_ild); uand: begin {unary & (address operator)} - if not (tree^.left^.token.kind in [ident,uasterisk]) then + if not (tree^.left^.token.kind in [ident,compoundliteral,uasterisk]) then L_Value(tree^.left); LoadAddress(tree^.left); end; {case uand} diff --git a/Header.pas b/Header.pas index 3d6c9f0..f36d078 100644 --- a/Header.pas +++ b/Header.pas @@ -18,7 +18,7 @@ uses CCommon, MM, Scanner, Symbol, CGI; {$segment 'SCANNER'} const - symFileVersion = 25; {version number of .sym file format} + symFileVersion = 26; {version number of .sym file format} var inhibitHeader: boolean; {should .sym includes be blocked?} diff --git a/Parser.pas b/Parser.pas index 1748d98..9c84b1a 100644 --- a/Parser.pas +++ b/Parser.pas @@ -48,13 +48,15 @@ procedure TypeName; { typeSpec - pointer to the type } -procedure AutoInit (variable: identPtr; line: integer); +procedure AutoInit (variable: identPtr; line: integer; + isCompoundLiteral: boolean); { generate code to initialize an auto variable } { } { parameters: } { variable - the variable to initialize } { line - line number (used for debugging) } +{ isCompoundLiteral - initializing a compound literal? } function MakeFuncIdentifier: identPtr; @@ -4010,7 +4012,7 @@ else {if not isFunction then} begin variable^.lln := GetLocalLabel; Gen2(dc_loc, variable^.lln, long(variable^.itype^.size).lsw); if variable^.state = initialized then - AutoInit(variable, startLine); {initialize auto variable} + AutoInit(variable, startLine, false); {initialize auto variable} end; {if} if (token.kind = commach) and (not doingPrototypes) then begin done := false; {allow multiple variables on one line} @@ -4314,17 +4316,22 @@ case statementList^.kind of end; {DoStatement} -procedure AutoInit {variable: identPtr, line: integer}; +procedure AutoInit {variable: identPtr; line: integer; + isCompoundLiteral: boolean}; { generate code to initialize an auto variable } { } { parameters: } { variable - the variable to initialize } { line - line number (used for debugging) } +{ isCompoundLiteral - initializing a compound literal? } var count: integer; {initializer counter} iPtr: initializerPtr; {pointer to the next initializer} + codeCount: longint; {number of initializer expressions} + treeCount: integer; {current number of distinct trees} + ldoDispose: boolean; {local copy of doDispose} procedure Initialize (id: identPtr; disp: longint; itype: typePtr); @@ -4398,6 +4405,27 @@ var end; {ZeroFill} + procedure AddOperation; + + { Deal with a new initializer expression in a compound } + { literal, adding expression tree nodes as appropriate. } + { This aims to produce a balanced binary tree. } + + var + val: longint; + + begin {AddOperation} + treeCount := treeCount + 1; + codeCount := codeCount + 1; + val := codeCount; + while (val & 1) = 0 do begin + Gen0t(pc_bno, cgVoid); + treeCount := treeCount - 1; + val := val >> 1; + end; {end} + end; {AddOperation} + + begin {Initialize} while itype^.kind = definedType do itype := itype^.dType; @@ -4407,7 +4435,8 @@ var tree := iptr^.itree; if tree = nil then goto 2; {don't generate code in error case} LoadAddress; {load the destination address} - doDispose := count = 1; {generate the expression value} + {generate the expression value} + doDispose := ldoDispose and (count = 1); {see if this is a constant} {do assignment conversions} while tree^.token.kind = castoper do @@ -4448,6 +4477,8 @@ var pointerType,functionType: Gen0t(pc_sto, cgULong); end; {case} + if isCompoundLiteral then + AddOperation; 2: end; arrayType: begin @@ -4471,6 +4502,8 @@ var Gen0t(pc_stk, cgULong); Gen0t(pc_bno, cgULong); Gen1tName(pc_cup, 0, cgVoid, @'memcpy'); + if isCompoundLiteral then + AddOperation; end; {if} if size < elements then begin elements := elements - size; @@ -4481,6 +4514,8 @@ var Gen0t(pc_stk, cgWord); Gen0t(pc_bno, cgULong); Gen1tName(pc_cup, -1, cgVoid, @'~ZERO'); + if isCompoundLiteral then + AddOperation; end; {if} iPtr := iPtr^.next; goto 1; @@ -4498,6 +4533,8 @@ var Gen0t(pc_stk, cgWord); Gen0t(pc_bno, cgULong); Gen1tName(pc_cup, -1, cgVoid, @'~ZERO'); + if isCompoundLiteral then + AddOperation; disp := disp + size; count := count - long(elements).lsw; if count = 0 then begin @@ -4535,6 +4572,8 @@ var with expressionType^ do Gen2(pc_mov, long(size).msw, long(size).lsw); Gen0t(pc_pop, UsualUnaryConversions); + if isCompoundLiteral then + AddOperation; end {if} else begin union := itype^.kind = unionType; @@ -4600,13 +4639,28 @@ var begin {AutoInit} iPtr := variable^.iPtr; count := iPtr^.count; +if isCompoundLiteral then begin + treeCount := 0; + codeCount := 0; + ldoDispose := doDispose; + end {if} +else + ldoDispose := true; if variable^.class <> staticsy then begin if traceBack or debugFlag then if nameFound or debugFlag then if (statementList <> nil) and not statementList^.doingDeclaration then - RecordLineNumber(line); + if lineNumber <> 0 then + RecordLineNumber(line); Initialize(variable, 0, variable^.itype); end; {if} +if isCompoundLiteral then begin + while treeCount > 1 do begin + Gen0t(pc_bno, cgVoid); + treeCount := treeCount - 1; + end; {while} + doDispose := lDoDispose; + end; {if} end; {AutoInit} @@ -4681,26 +4735,24 @@ var class: tokenEnum; {storage class} begin {MakeCompoundLiteral} -if functionTable <> nil then begin - Error(164); +if functionTable <> nil then class := autosy - end {if} else class := staticsy; name := pointer(Malloc(25)); name^ := concat('~CompoundLiteral', cnvis(compoundLiteralNumber)); id := NewSymbol(name, tp, class, variableSpace, defined); -Initializer(id); -MakeCompoundLiteral := id; compoundLiteralNumber := compoundLiteralNumber + 1; if compoundLiteralNumber = 0 then Error(57); +Initializer(id); +MakeCompoundLiteral := id; if class = autosy then begin id^.lln := GetLocalLabel; id^.clnext := compoundLiteralToAllocate; compoundLiteralToAllocate := id; end; -end; {MakeFuncIdentifier} +end; {MakeCompoundLiteral} procedure InitParser; diff --git a/Scanner.pas b/Scanner.pas index c0052b4..b0db751 100644 --- a/Scanner.pas +++ b/Scanner.pas @@ -744,7 +744,7 @@ if list or (numErr <> 0) then begin 161: msg := @'illegal operator in a constant expression'; 162: msg := @'invalid escape sequence'; 163: msg := @'pointer assignment discards qualifier(s)'; - 164: msg := @'compound literals within functions are not supported by ORCA/C'; + {164: msg := @'compound literals within functions are not supported by ORCA/C';} 165: msg := @'''\p'' may not be used in a prefixed string'; 166: msg := @'string literals with these prefixes may not be merged'; 167: msg := @'''L''-prefixed character or string constants are not supported by ORCA/C'; diff --git a/Table.asm b/Table.asm index d723057..d99a951 100644 --- a/Table.asm +++ b/Table.asm @@ -312,7 +312,7 @@ charSym start single character symbols enum (eolsy,eofsy) control characters enum (typedef) user types enum (uminus,uand,uasterisk) converted operations - enum (parameteroper,castoper,opplusplus,opminusminus) + enum (parameteroper,castoper,opplusplus,opminusminus,compoundliteral) enum (macroParm) macro language dc i'0,0,0,0,0,0,0,0' nul-bel @@ -474,6 +474,7 @@ icp start in-coming priority for expression dc i1'16' castoper dc i1'16' opplusplus dc i1'16' opminusminus + dc i1'200' compoundliteral dc i1'200' macroParm end @@ -648,6 +649,7 @@ isp start in stack priority for expression dc i1'16' castoper dc i1'16' opplusplus dc i1'16' opminusminus + dc i1'0' compoundliteral dc i1'0' macroParm end diff --git a/Tests/Conformance/c99complit.c b/Tests/Conformance/c99complit.c index d9d07ea..c977b43 100644 --- a/Tests/Conformance/c99complit.c +++ b/Tests/Conformance/c99complit.c @@ -1,8 +1,5 @@ /* * Test of compound literals (C99). - * - * This currently only tests compound literals outside of functions, - * since that is the only place where ORCA/C currently supports them. */ #include @@ -11,6 +8,14 @@ int *p = (int[]){1,2,3}; int *q = &(int[100]){4,5,6}[1]; struct S *s = &(struct S {int i; double d; void *p;}){100,200.5,&p}; +int f(struct S s) { + return s.i; +} + +double g(struct S *s) { + return s->d + s->i; +} + int main(void) { if (p[2] != 3) goto Fail; @@ -24,6 +29,33 @@ int main(void) { p[2] = s->i; if (p[2] != 100) goto Fail; + + if ((int[]){6,7,8}[2] != 8) + goto Fail; + + if (((char){34} += (long long){53}) != 87) + goto Fail; + + if ((int){(double){(long){(char){22}}}} != (signed char){22}) + goto Fail; + + if (((struct S*)((struct S){0,-.5,&(struct S){-12,14,0}}.p))->d != 14.) + goto Fail; + + if (f((struct S){f((struct S){-12,14,0}),23.5}) != -12) + goto Fail; + + if (g(&(struct S){5,2.5,&(char){7}}) != 7.5) + goto Fail; + + if ((char[100]){12}[99] != 0) + goto Fail; + + if ((char[]){"Hello world"}[10] != 'd') + goto Fail; + + if ((char[100]){"Hello world"}[50] != '\0') + goto Fail; printf ("Passed Conformance Test c99complit\n"); return 0; diff --git a/cc.notes b/cc.notes index 82ed6a2..706da80 100644 --- a/cc.notes +++ b/cc.notes @@ -514,15 +514,21 @@ Generic selection expressions are primarily useful within macros, which can give The type of an 'array' parameter is adjusted to a pointer type, and the type qualifiers are applied to that pointer type (so the x parameter in the example has the type "long * const"). The "static" keyword indicates that when the function is called, the corresponding argument must give access to an array of at least the specified length; if it does not, the behavior is undefined. -26. (C99) ORCA/C now has partial support for compound literals. These are expressions of the following form: +26. (C99) ORCA/C now supports compound literals. These are expressions of the following form: ( type-name ) { initializer-list } -Such an expression behaves similarly to a declaration in that it creates an object of the specified type, initialized with the brace-enclosed initializer list. That object is unnamed, but the compound literal expression acts as a reference to it. Note that a compound literal is not a cast, even though the syntax is similar. As an example, the following declaration creates an unnamed array and initializes p to point to the first element of that array: +Such an expression behaves similarly to a declaration in that it creates an object of the specified type, initialized with the brace-enclosed initializer list. That object is unnamed, but the compound literal expression acts as a reference to it. Compound literals within a function have automatic storage duration, while ones outside of any function have static storage duration. Note that a compound literal is not a cast, even though the syntax is similar. + +Compound literals can be used in code similarly to the identifier for a named variable. For example, a compound literal can be used to designate a structure to be passed to a function, either directly or via a pointer, e.g.: + + PaintRect(&(Rect){10,20,100,200}); + +Outside of functions, compound literals can be used in initializers, as in the following declaration, which creates an unnamed array and initializes p to point to the first element of that array: int *p = (int[]){1,2,3}; -ORCA/C supports the use of compound literals outside of functions, where they can be used in initializers for global variables (as in the example above). Compound literals outside of functions have static storage duration. Standard C also allows compound literals to be used within functions (with automatic storage duration), but ORCA/C does not currently support that. +(This declaration could also be used within a function, but in that case the variable p and the unnamed array would have automatic storage duration.) Multi-Character Character Constants