Add a new optimize flag for FP math optimizations that break IEEE rules.

There were several existing optimizations that could change behavior in ways that violated the IEEE standard with regard to infinities, NaNs, or signed zeros. They are now gated behind a new #pragma optimize flag. This change allows intermediate code peephole optimization and common subexpression elimination to be used while maintaining IEEE conformance, but also keeps the rule-breaking optimizations available if desired.

See section F.9.2 of recent C standards for a discussion of how these optimizations violate IEEE rules.
This commit is contained in:
Stephen Heumann 2021-11-29 20:31:15 -06:00
parent 6fa294aa3b
commit b43036409e
7 changed files with 61 additions and 14 deletions

21
CGC.asm
View File

@ -252,3 +252,24 @@ maxLabel equ 3200
stz intLabel intLabel := 0; stz intLabel intLabel := 0;
rtl rtl
end end
datachk on
****************************************************************
*
* function SignBit (val: extended): integer;
*
* returns the sign bit of a floating-point number
* (0 for positive, 1 for negative)
*
****************************************************************
*
SignBit start cg
subroutine (10:val),0
asl val+8
stz val
rol val
return 2:val
end

View File

@ -107,6 +107,12 @@ procedure InitLabels; extern;
{ } { }
{ Note: also defined in CGI.pas } { Note: also defined in CGI.pas }
function SignBit (val: extended): integer; extern;
{ returns the sign bit of a floating-point number }
{ (0 for positive, 1 for negative) }
{-- These routines are defined in the compiler, but used from cg --} {-- These routines are defined in the compiler, but used from cg --}
function Calloc (bytes: integer): ptr; extern; function Calloc (bytes: integer): ptr; extern;

View File

@ -327,6 +327,7 @@ var
debugFlag: boolean; {generate debugger calls?} debugFlag: boolean; {generate debugger calls?}
debugStrFlag: boolean; {gsbug/niftylist debug names?} debugStrFlag: boolean; {gsbug/niftylist debug names?}
dataBank: boolean; {save, restore data bank?} dataBank: boolean; {save, restore data bank?}
fastMath: boolean; {do FP math opts that break IEEE rules?}
floatCard: integer; {0 -> SANE; 1 -> FPE} floatCard: integer; {0 -> SANE; 1 -> FPE}
floatSlot: integer; {FPE slot} floatSlot: integer; {FPE slot}
loopOptimizations: boolean; {do loop optimizations?} loopOptimizations: boolean; {do loop optimizations?}
@ -817,6 +818,7 @@ volatile := false; {no volatile qualifiers found}
registers := cLineOptimize; {don't do register optimizations} registers := cLineOptimize; {don't do register optimizations}
peepHole := cLineOptimize; {not doing peephole optimization (yet)} peepHole := cLineOptimize; {not doing peephole optimization (yet)}
npeepHole := cLineOptimize; npeepHole := cLineOptimize;
fastMath := cLineOptimize;
commonSubexpression := cLineOptimize; {not doing common subexpression elimination} commonSubexpression := cLineOptimize; {not doing common subexpression elimination}
loopOptimizations := cLineOptimize; {not doing loop optimizations, yet} loopOptimizations := cLineOptimize; {not doing loop optimizations, yet}

30
DAG.pas
View File

@ -198,7 +198,9 @@ else if (op1 <> nil) and (op2 <> nil) then
CodesMatch := true; CodesMatch := true;
cgReal, cgDouble, cgComp, cgExtended: cgReal, cgDouble, cgComp, cgExtended:
if op1^.rval = op2^.rval then if op1^.rval = op2^.rval then
CodesMatch := true; if (SignBit(op1^.rval) = SignBit(op2^.rval))
or fastMath then
CodesMatch := true;
cgString: cgString:
CodesMatch := LongStrCmp(op1^.str, op2^.str); CodesMatch := LongStrCmp(op1^.str, op2^.str);
cgVoid, ccPointer: cgVoid, ccPointer:
@ -899,8 +901,9 @@ case op^.opcode of {check for optimizations of this node}
if op^.left^.opcode = pc_ldc then if op^.left^.opcode = pc_ldc then
ReverseChildren(op); ReverseChildren(op);
if op^.right^.opcode = pc_ldc then begin if op^.right^.opcode = pc_ldc then begin
if op^.right^.rval = 0.0 then if fastMath then
opv := op^.left; if op^.right^.rval = 0.0 then
opv := op^.left;
end; {if} end; {if}
end; {else} end; {else}
end; {case pc_adr} end; {case pc_adr}
@ -2069,8 +2072,9 @@ case op^.opcode of {check for optimizations of this node}
if rval = 1.0 then if rval = 1.0 then
opv := op^.left opv := op^.left
else if rval = 0.0 then else if rval = 0.0 then
if not SideEffects(op^.left) then if fastMath then
opv := op^.right; if not SideEffects(op^.left) then
opv := op^.right;
end; {if} end; {if}
end; {else} end; {else}
end; {case pc_mpr} end; {case pc_mpr}
@ -2346,15 +2350,17 @@ case op^.opcode of {check for optimizations of this node}
op^.left^.rval := op^.left^.rval - op^.right^.rval; op^.left^.rval := op^.left^.rval - op^.right^.rval;
opv := op^.left; opv := op^.left;
end {if} end {if}
else if op^.left^.rval = 0.0 then begin else if op^.left^.rval = 0.0 then
op^.opcode := pc_ngr; if fastMath then begin
op^.left := op^.right; op^.opcode := pc_ngr;
op^.right := nil; op^.left := op^.right;
end; {else if} op^.right := nil;
end; {if}
end {if} end {if}
else if op^.right^.opcode = pc_ldc then begin else if op^.right^.opcode = pc_ldc then begin
if op^.right^.rval = 0.0 then if fastMath then
opv := op^.left; if op^.right^.rval = 0.0 then
opv := op^.left;
end; {if} end; {if}
end; {case pc_sbr} end; {case pc_sbr}

View File

@ -18,7 +18,7 @@ uses CCommon, MM, Scanner, Symbol, CGI;
{$segment 'SCANNER'} {$segment 'SCANNER'}
const const
symFileVersion = 18; {version number of .sym file format} symFileVersion = 19; {version number of .sym file format}
var var
inhibitHeader: boolean; {should .sym includes be blocked?} inhibitHeader: boolean; {should .sym includes be blocked?}
@ -845,7 +845,8 @@ procedure EndInclude {chPtr: ptr};
| (ord(saveStack) << 3) | (ord(saveStack) << 3)
| (ord(commonSubexpression) << 4) | (ord(commonSubexpression) << 4)
| (ord(loopOptimizations) << 5) | (ord(loopOptimizations) << 5)
| (ord(strictVararg) << 6)); | (ord(strictVararg) << 6)
| (ord(fastMath) << 7));
p_stacksize: WriteWord(stackSize); p_stacksize: WriteWord(stackSize);
@ -1508,6 +1509,7 @@ var
commonSubexpression := odd(val >> 4); commonSubexpression := odd(val >> 4);
loopOptimizations := odd(val >> 5); loopOptimizations := odd(val >> 5);
strictVararg := odd(val >> 6); strictVararg := odd(val >> 6);
fastMath := odd(val >> 7);
end; end;
p_stacksize: stackSize := ReadWord; p_stacksize: stackSize := ReadWord;

View File

@ -3139,6 +3139,7 @@ if ch in ['a','d','e','i','l','p','u','w'] then begin
{ 16 - common subexpression elimination } { 16 - common subexpression elimination }
{ 32 - loop invariant removal } { 32 - loop invariant removal }
{ 64 - remove stack checks for vararg calls} { 64 - remove stack checks for vararg calls}
{ 128 - fp math opts that break IEEE rules }
FlagPragmas(p_optimize); FlagPragmas(p_optimize);
NumericDirective; NumericDirective;
if expressionType^.kind = scalarType then if expressionType^.kind = scalarType then
@ -3152,6 +3153,7 @@ if ch in ['a','d','e','i','l','p','u','w'] then begin
commonSubexpression := odd(val >> 4); commonSubexpression := odd(val >> 4);
loopOptimizations := odd(val >> 5); loopOptimizations := odd(val >> 5);
strictVararg := not odd(val >> 6); strictVararg := not odd(val >> 6);
fastMath := odd(val >> 7);
if saveStack then if saveStack then
npeepHole := false; npeepHole := false;
if token.kind <> eolsy then if token.kind <> eolsy then

View File

@ -492,6 +492,14 @@ Using the extended format provides greater precision and range than the float or
If you want to get a value strictly in a certain type with no extra range or precision, you can store it in a variable of that type or explicitly cast it to that type. (In older versions of ORCA/C, floating-point casts did not remove extra range or precision, but now they do, as required by the C standards.) If you want to get a value strictly in a certain type with no extra range or precision, you can store it in a variable of that type or explicitly cast it to that type. (In older versions of ORCA/C, floating-point casts did not remove extra range or precision, but now they do, as required by the C standards.)
Floating-Point Optimizations
----------------------------
ORCA/C can perform certain optimizations on floating-point computations based on properties that are true for real numbers but are not always true in the IEEE floating-point arithmetic system. In particular, these optimizations can occasionally cause behavior that differs from the IEEE standard in regard to infinities, NaNs, or the sign of zero. Historically, these optimizations were performed mainly as part of intermediate code peephole optimization and in some cases also as part of common subexpression elimination.
A new #pragma optimize bit has now been introduced to control this behavior. Setting bit 7 (a value of 128) allows floating-point math optimizations that may violate the IEEE standard. It currently only has an effect if #pragma optimize bit 0 or bit 4 is also set. If bit 7 is not set, these floating-point optimizations will not be performed. This allows most aspects of intermediate code peephole optimization and common subexpression elimination to be used while preserving IEEE floating-point behavior.
Additions to #pragma ignore Additions to #pragma ignore
--------------------------- ---------------------------
Several additional #pragma ignore bits are now supported. Several additional #pragma ignore bits are now supported.