From d891e672e3d75c0de6ecc6752360e7655897e932 Mon Sep 17 00:00:00 2001 From: Stephen Heumann Date: Thu, 18 Feb 2021 19:17:39 -0600 Subject: [PATCH] Add various intermediate code peephole optimizations. These mainly cover 64-bit arithmetic and shifts, but also include a few optimizations for 16-bit and 32-bit shifts. --- CGC.asm | 54 ++++++++++ CGC.macros | 173 +++++++++++++++++++++++++++++++ CGC.pas | 18 ++++ DAG.pas | 298 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 526 insertions(+), 17 deletions(-) diff --git a/CGC.asm b/CGC.asm index 86bfd11..716a217 100644 --- a/CGC.asm +++ b/CGC.asm @@ -85,6 +85,60 @@ rec_cmp equ 18 disp to comp (SANE) value rtl end +**************************************************************** +* +* procedure CnvXLL (var result: longlong; val: extended); +* +* Convert floating point to long long +* +* Inputs: +* result - longlong to hold the converted value +* val - the real value +* +**************************************************************** + +CnvXLL start cg + + subroutine (4:result,10:val),0 + + pei (val+8) + pei (val+6) + pei (val+4) + pei (val+2) + pei (val) + jsl ~CnvRealLongLong + pl8 [result] + + return + end + +**************************************************************** +* +* procedure CnvXULL (var result: longlong; val: extended); +* +* Convert floating point to unsigned long long +* +* Inputs: +* result - longlong to hold the converted value +* val - the real value +* +**************************************************************** + +CnvXULL start cg + + subroutine (4:result,10:val),0 + + pei (val+8) + pei (val+6) + pei (val+4) + pei (val+2) + pei (val) + jsl ~CnvRealULongLong + pl8 [result] + + return + end + datachk off **************************************************************** * diff --git a/CGC.macros b/CGC.macros index 2a32ccc..3f9e3c4 100644 --- a/CGC.macros +++ b/CGC.macros @@ -186,3 +186,176 @@ LDX #$090A JSL $E10000 MEND + macro +&l pl8 &n1 + lclc &c +&l anop + aif s:longa=1,.a + rep #%00100000 +.a +&c amid &n1,1,1 + aif "&c"<>"{",.b +&c amid &n1,l:&n1,1 + aif "&c"<>"}",.f +&n1 amid &n1,2,l:&n1-2 + pla + sta (&n1) + ldy #2 + pla + sta (&n1),y + ldy #4 + pla + sta (&n1),y + ldy #6 + pla + sta (&n1),y + ago .d +.b + aif "&c"<>"[",.c + pla + sta &n1 + ldy #2 + pla + sta &n1,y + ldy #4 + pla + sta &n1,y + ldy #6 + pla + sta &n1,y + ago .d +.c + pla + sta &n1 + pla + sta &n1+2 + pla + sta &n1+4 + pla + sta &n1+6 +.d + aif s:longa=1,.e + sep #%00100000 +.e + mexit +.f + mnote "Missing closing '}'",16 + mend + MACRO +&lab subroutine &parms,&work +&lab anop + aif c:&work,.a + lclc &work +&work setc 0 +.a + gbla &totallen + gbla &worklen +&worklen seta &work +&totallen seta 0 + aif c:&parms=0,.e + lclc &len + lclc &p + lcla &i +&i seta c:&parms +.b +&p setc &parms(&i) +&len amid &p,2,1 + aif "&len"=":",.c +&len amid &p,1,2 +&p amid &p,4,l:&p-3 + ago .d +.c +&len amid &p,1,1 +&p amid &p,3,l:&p-2 +.d +&p equ &totallen+3+&work +&totallen seta &totallen+&len +&i seta &i-1 + aif &i,^b +.e + tsc + sec + sbc #&work + tcs + inc a + phd + tcd + mend + MACRO +&lab return &r +&lab anop + lclc &len + aif c:&r,.a + lclc &r +&r setc 0 +&len setc 0 + ago .h +.a +&len amid &r,2,1 + aif "&len"=":",.b +&len amid &r,1,2 +&r amid &r,4,l:&r-3 + ago .c +.b +&len amid &r,1,1 +&r amid &r,3,l:&r-2 +.c + aif &len<>2,.d + ldy &r + ago .h +.d + aif &len<>4,.e + ldx &r+2 + ldy &r + ago .h +.e + aif &len<>10,.g + aif &totallen=0,.f + lda &worklen+1 + sta &worklen+&totallen+1 + lda &worklen + sta &worklen+&totallen +.f + pld + tsc + clc + adc #&worklen+&totallen + tcs + phb + plx + ply + lda &r+8 + pha + lda &r+6 + pha + lda &r+4 + pha + lda &r+2 + pha + lda &r + pha + phy + phx + plb + rtl + mexit +.g + mnote 'Not a valid return length',16 + mexit +.h + aif &totallen=0,.i + lda &worklen+1 + sta &worklen+&totallen+1 + lda &worklen + sta &worklen+&totallen +.i + pld + tsc + clc + adc #&worklen+&totallen + tcs + aif &len=0,.j + tya +.j + rtl + mend diff --git a/CGC.pas b/CGC.pas index cd0a2e8..bc8ff55 100644 --- a/CGC.pas +++ b/CGC.pas @@ -67,6 +67,24 @@ procedure CnvSX (rec: realrec); extern; { has space for the result } +procedure CnvXLL (var result: longlong; val: extended); extern; + +{ convert a real number to long long } +{ } +{ parameters: } +{ result - longlong to hold the converted value } +{ val - the real value } + + +procedure CnvXULL (var result: longlong; val: extended); extern; + +{ convert a real number to unsigned long long } +{ } +{ parameters: } +{ result - longlong to hold the converted value } +{ val - the real value } + + procedure InitLabels; extern; { initialize the labels array for a procedure } diff --git a/DAG.pas b/DAG.pas index 1ababf9..486aaa1 100644 --- a/DAG.pas +++ b/DAG.pas @@ -11,7 +11,7 @@ unit DAG; interface -{$segment 'CG'} +{$segment 'DAG'} {$LibPrefix '0/obj/'} @@ -49,6 +49,31 @@ function umod (x,y: longint): longint; extern; function umul (x,y: longint): longint; extern; +function lshr (x,y: longint): longint; extern; + +{-- External 64-bit math routines; imported from Expression.pas --} +{ Procedures for arithmetic and shifts compute "x := x OP y". } + +procedure umul64 (var x: longlong; y: longlong); extern; + +procedure udiv64 (var x: longlong; y: longlong); extern; + +procedure div64 (var x: longlong; y: longlong); extern; + +procedure umod64 (var x: longlong; y: longlong); extern; + +procedure rem64 (var x: longlong; y: longlong); extern; + +procedure add64 (var x: longlong; y: longlong); extern; + +procedure sub64 (var x: longlong; y: longlong); extern; + +procedure shl64 (var x: longlong; y: integer); extern; + +procedure ashr64 (var x: longlong; y: integer); extern; + +procedure lshr64 (var x: longlong; y: integer); extern; + {---------------------------------------------------------------} function CodesMatch (op1, op2: icptr; exact: boolean): boolean; @@ -224,8 +249,8 @@ if opt1 = cgByte then begin opt1 := cgWord; end {if} else if opt1 = cgUByte then begin - op1^.optype := cgUWord; - opt1 := cgUWord; + op1^.optype := cgWord; + opt1 := cgWord; end {else if} else if opt1 in [cgReal, cgDouble, cgComp] then begin op1^.optype := cgExtended; @@ -236,8 +261,8 @@ if opt2 = cgByte then begin opt2 := cgWord; end {if} else if opt2 = cgUByte then begin - op2^.optype := cgUWord; - opt2 := cgUWord; + op2^.optype := cgWord; + opt2 := cgWord; end {else if} else if opt2 in [cgReal, cgDouble, cgComp] then begin op2^.optype := cgExtended; @@ -877,12 +902,18 @@ case op^.opcode of {check for optimizations of this node} end; {case pc_adr} pc_adq: begin {pc_adq} - if op^.left^.opcode = pc_ldc then - ReverseChildren(op); - if op^.right^.opcode = pc_ldc then begin - if (op^.right^.qval.lo = 0) and (op^.right^.qval.hi = 0) then - opv := op^.left; - end; {if} + if (op^.right^.opcode = pc_ldc) and (op^.left^.opcode = pc_ldc) then begin + add64(op^.left^.qval, op^.right^.qval); + opv := op^.left; + end {if} + else begin + if op^.left^.opcode = pc_ldc then + ReverseChildren(op); + if op^.right^.opcode = pc_ldc then begin + if (op^.right^.qval.lo = 0) and (op^.right^.qval.hi = 0) then + opv := op^.left; + end; {if} + end; {else} end; {case pc_adq} pc_and: begin {pc_and} @@ -1106,6 +1137,14 @@ case op^.opcode of {check for optimizations of this node} op^.left^.q := 0; op^.left^.lval := lval; end; + cgQuad,cgUQuad: begin + op^.left^.qval.lo := op^.left^.q; + if op^.left^.qval.lo < 0 then + op^.left^.qval.hi := -1 + else + op^.left^.qval.hi := 0; + op^.left^.q := 0; + end; cgReal,cgDouble,cgComp,cgExtended: begin rval := op^.left^.q; op^.left^.q := 0; @@ -1121,6 +1160,11 @@ case op^.opcode of {check for optimizations of this node} op^.left^.q := 0; op^.left^.lval := lval; end; + cgQuad,cgUQuad: begin + op^.left^.qval.lo := ord4(op^.left^.q) & $0000FFFF; + op^.left^.qval.hi := 0; + op^.left^.q := 0; + end; cgReal,cgDouble,cgComp,cgExtended: begin rval := ord4(op^.left^.q) & $0000FFFF; op^.left^.q := 0; @@ -1136,6 +1180,13 @@ case op^.opcode of {check for optimizations of this node} op^.left^.q := q; end; cgLong, cgULong: ; + cgQuad,cgUQuad: begin + op^.left^.qval.lo := op^.left^.lval; + if op^.left^.qval.lo < 0 then + op^.left^.qval.hi := -1 + else + op^.left^.qval.hi := 0; + end; cgReal,cgDouble,cgComp,cgExtended: begin rval := op^.left^.lval; op^.left^.lval := 0; @@ -1151,6 +1202,10 @@ case op^.opcode of {check for optimizations of this node} op^.left^.q := q; end; cgLong, cgULong: ; + cgQuad,cgUQuad: begin + op^.left^.qval.lo := op^.left^.lval; + op^.left^.qval.hi := 0; + end; cgReal,cgDouble,cgComp,cgExtended: begin lval := op^.left^.lval; op^.left^.lval := 0; @@ -1229,6 +1284,10 @@ case op^.opcode of {check for optimizations of this node} op^.left^.rval := 0.0; op^.left^.lval := lval; end; + cgQuad: + CnvXLL(op^.left^.qval, rval); + cgUQuad: + CnvXULL(op^.left^.qval, rval); cgReal,cgDouble,cgComp,cgExtended: ; otherwise: ; end; @@ -1239,8 +1298,8 @@ case op^.opcode of {check for optimizations of this node} [cgByte,cgUByte,cgWord,cgUWord,cgLong,cgULong,cgReal,cgDouble, cgComp,cgExtended] then if totype.optype in - [cgByte,cgUByte,cgWord,cgUWord,cgLong,cgULong,cgReal,cgDouble, - cgComp,cgExtended] then begin + [cgByte,cgUByte,cgWord,cgUWord,cgLong,cgULong,cgQuad,cgUQuad, + cgReal,cgDouble,cgComp,cgExtended] then begin op^.left^.optype := totype.optype; opv := op^.left; end; {if} @@ -1286,6 +1345,13 @@ case op^.opcode of {check for optimizations of this node} op^.left^.optype := totype.optype; opv := op^.left; end; {if} + if fromtype.optype in [cgQuad,cgUQuad] then + if totype.optype in + [cgByte,cgUByte,cgWord,cgUWord,cgLong,cgULong,cgQuad,cgUQuad] + then begin + op^.left^.optype := totype.optype; + opv := op^.left; + end; {if} end {else if} else if op^.q in [$40,$41,$50,$51] then begin {any long type to byte type} @@ -1372,6 +1438,19 @@ case op^.opcode of {check for optimizations of this node} end; {if} end; {case pc_dvl} + pc_dvq: begin {pc_dvq} + if op^.right^.opcode = pc_ldc then begin + if op^.left^.opcode = pc_ldc then begin + if (op^.right^.qval.lo <> 0) or (op^.right^.qval.hi <> 0) then begin + div64(op^.left^.qval, op^.right^.qval); + opv := op^.left; + end; {if} + end {if} + else if (op^.right^.qval.lo = 1) and (op^.right^.qval.hi = 0) then + opv := op^.left; + end; {if} + end; {case pc_dvq} + pc_dvr: begin {pc_dvr} if op^.right^.opcode = pc_ldc then begin if op^.left^.opcode = pc_ldc then begin @@ -1734,6 +1813,21 @@ case op^.opcode of {check for optimizations of this node} end; {if} end; {case pc_mdl} + pc_mdq: begin {pc_mdq} + if op^.right^.opcode = pc_ldc then + if (op^.right^.qval.lo = 1) and (op^.right^.qval.hi = 0) then begin + if not SideEffects(op^.left) then begin + op^.right^.qval := longlong0; + opv := op^.right; + end; {if} + end {if} + else if op^.left^.opcode = pc_ldc then + if (op^.right^.qval.lo <> 0) or (op^.right^.qval.hi <> 0) then begin + rem64(op^.left^.qval, op^.right^.qval); + opv := op^.left; + end; {if} + end; {case pc_mdq} + pc_mod: begin {pc_mod} if op^.right^.opcode = pc_ldc then if op^.right^.q = 1 then begin @@ -1814,6 +1908,31 @@ case op^.opcode of {check for optimizations of this node} end; {else} end; {case pc_mpl, pc_uml} + pc_mpq, pc_umq: begin {pc_mpq, pc_umq} + if (op^.right^.opcode = pc_ldc) and (op^.left^.opcode = pc_ldc) then begin + umul64(op^.left^.qval, op^.right^.qval); + opv := op^.left; + end {if} + else begin + if op^.left^.opcode = pc_ldc then + ReverseChildren(op); + if op^.right^.opcode = pc_ldc then begin + if (op^.right^.qval.lo = 1) and (op^.right^.qval.hi = 0) then + opv := op^.left + else if (op^.right^.qval.lo = 0) and (op^.right^.qval.hi = 0) then + begin + if not SideEffects(op^.left) then + opv := op^.right; + end {else if} + else if (op^.right^.qval.lo = -1) and (op^.right^.qval.hi = -1) then + if op^.opcode = pc_mpq then begin + op^.opcode := pc_ngq; + op^.right := nil; + end; {if} + end; {if} + end; {else} + end; {case pc_mpq, pc_umq} + pc_mpr: begin {pc_mpr} if (op^.right^.opcode = pc_ldc) and (op^.left^.opcode = pc_ldc) then begin op^.left^.rval := op^.left^.rval*op^.right^.rval; @@ -1909,6 +2028,19 @@ case op^.opcode of {check for optimizations of this node} end; {if} end; {case pc_ngl} + pc_ngq: begin {pc_ngq} + if op^.left^.opcode = pc_ldc then begin + with op^.left^.qval do begin + lo := ~lo; + hi := ~hi; + lo := lo + 1; + if lo = 0 then + hi := hi + 1; + end; {with} + opv := op^.left; + end; {if} + end; {case pc_ngq} + pc_ngr: begin {pc_ngr} if op^.left^.opcode = pc_ldc then begin op^.left^.rval := -op^.left^.rval; @@ -1925,7 +2057,12 @@ case op^.opcode of {check for optimizations of this node} end {if} else if op^.left^.optype in [cgLong,cgULong] then begin q := ord(op^.left^.lval = 0); - lval := 0; + op^.left^.q := q; + op^.left^.optype := cgWord; + opv := op^.left; + end {else if} + else if op^.left^.optype in [cgQuad,cgUQuad] then begin + q := ord((op^.left^.qval.lo = 0) and (op^.left^.qval.hi = 0)); op^.left^.q := q; op^.left^.optype := cgWord; opv := op^.left; @@ -2100,7 +2237,11 @@ case op^.opcode of {check for optimizations of this node} pc_sbq: begin {pc_sbq} if op^.left^.opcode = pc_ldc then begin - if (op^.left^.qval.lo = 0) and (op^.left^.qval.hi = 0) then begin + if op^.right^.opcode = pc_ldc then begin + sub64(op^.left^.qval, op^.right^.qval); + opv := op^.left; + end {if} + else if (op^.left^.qval.lo = 0) and (op^.left^.qval.hi = 0) then begin op^.opcode := pc_ngq; op^.left := op^.right; op^.right := nil; @@ -2115,7 +2256,11 @@ case op^.opcode of {check for optimizations of this node} pc_shl: begin {pc_shl} if op^.right^.opcode = pc_ldc then begin opcode := op^.left^.opcode; - if opcode = pc_shl then begin + if opcode = pc_ldc then begin + op^.left^.q := op^.left^.q << op^.right^.q; + opv := op^.left; + end {if} + else if opcode = pc_shl then begin if op^.left^.right^.opcode = pc_ldc then begin op^.right^.q := op^.right^.q + op^.left^.right^.q; op^.left := op^.left^.left; @@ -2128,10 +2273,56 @@ case op^.opcode of {check for optimizations of this node} op2^.left := op; opv := op2; PeepHoleOptimization(op2^.left); - end; {else if} + end {else if} + else if op^.right^.q = 0 then + opv := op^.left; end; {if} end; {case pc_shl} + pc_shr: begin {pc_shr} + if op^.right^.opcode = pc_ldc then begin + if op^.left^.opcode = pc_ldc then begin + op^.left^.q := op^.left^.q >> op^.right^.q; + opv := op^.left; + end {if} + else if op^.right^.q = 0 then + opv := op^.left; + end; {if} + end; {case pc_shr} + + pc_sll: begin {pc_sll} + if op^.right^.opcode = pc_ldc then begin + if op^.left^.opcode = pc_ldc then begin + op^.left^.lval := op^.left^.lval << op^.right^.lval; + opv := op^.left; + end {if} + else if op^.right^.lval = 0 then + opv := op^.left; + end; {if} + end; {case pc_sll} + + pc_slr: begin {pc_slr} + if op^.right^.opcode = pc_ldc then begin + if op^.left^.opcode = pc_ldc then begin + op^.left^.lval := op^.left^.lval >> op^.right^.lval; + opv := op^.left; + end {if} + else if op^.right^.lval = 0 then + opv := op^.left; + end; {if} + end; {case pc_slr} + + pc_slq: begin {pc_slq} + if op^.right^.opcode = pc_ldc then begin + if op^.left^.opcode = pc_ldc then begin + shl64(op^.left^.qval, op^.right^.q); + opv := op^.left; + end {if} + else if op^.right^.q = 0 then + opv := op^.left; + end; {if} + end; {case pc_slq} + pc_sro, pc_str: begin {pc_sro, pc_str} if op^.optype in [cgReal,cgDouble,cgExtended] then RealStoreOptimizations(op, op^.left); @@ -2156,6 +2347,17 @@ case op^.opcode of {check for optimizations of this node} end; {if} end; {case pc_sto} + pc_sqr: begin {pc_sqr} + if op^.right^.opcode = pc_ldc then begin + if op^.left^.opcode = pc_ldc then begin + ashr64(op^.left^.qval, op^.right^.q); + opv := op^.left; + end {if} + else if op^.right^.q = 0 then + opv := op^.left; + end; {if} + end; {case pc_sqr} + pc_tjp: begin {pc_tjp} opcode := op^.left^.opcode; if opcode = pc_ldc then begin @@ -2259,6 +2461,19 @@ case op^.opcode of {check for optimizations of this node} end; {if} end; {case pc_udl} + pc_udq: begin {pc_udq} + if op^.right^.opcode = pc_ldc then begin + if op^.left^.opcode = pc_ldc then begin + if (op^.right^.qval.lo <> 0) or (op^.right^.qval.hi <> 0) then begin + udiv64(op^.left^.qval, op^.right^.qval); + opv := op^.left; + end; {if} + end {if} + else if (op^.right^.qval.lo = 1) and (op^.right^.qval.hi = 0) then + opv := op^.left; + end; {if} + end; {case pc_udq} + pc_uim: begin {pc_uim} if op^.right^.opcode = pc_ldc then if op^.right^.q = 1 then begin @@ -2305,6 +2520,55 @@ case op^.opcode of {check for optimizations of this node} end; {if} end; {case pc_ulm} + pc_uqm: begin {pc_uqm} + if op^.right^.opcode = pc_ldc then + if (op^.right^.qval.lo = 1) and (op^.right^.qval.hi = 0) then begin + if not SideEffects(op^.left) then begin + op^.right^.qval := longlong0; + opv := op^.right; + end; {if} + end {if} + else if op^.left^.opcode = pc_ldc then + if (op^.right^.qval.lo <> 0) or (op^.right^.qval.hi <> 0) then begin + umod64(op^.left^.qval, op^.right^.qval); + opv := op^.left; + end; {if} + end; {case pc_uqm} + + pc_usr: begin {pc_usr} + if op^.right^.opcode = pc_ldc then begin + if op^.left^.opcode = pc_ldc then begin + lval := lshr(op^.left^.q & $0000FFFF, op^.right^.q); + op^.left^.q := long(lval).lsw; + opv := op^.left; + end {if} + else if op^.right^.q = 0 then + opv := op^.left; + end; {if} + end; {case pc_usr} + + pc_vsr: begin {pc_vsr} + if op^.right^.opcode = pc_ldc then begin + if op^.left^.opcode = pc_ldc then begin + op^.left^.lval := lshr(op^.left^.lval, op^.right^.lval); + opv := op^.left; + end {if} + else if op^.right^.lval = 0 then + opv := op^.left; + end; {if} + end; {case pc_vsr} + + pc_wsr: begin {pc_wsr} + if op^.right^.opcode = pc_ldc then begin + if op^.left^.opcode = pc_ldc then begin + lshr64(op^.left^.qval, op^.right^.q); + opv := op^.left; + end {if} + else if op^.right^.q = 0 then + opv := op^.left; + end; {if} + end; {case pc_wsr} + otherwise: ; end; {case} end; {PeepHoleOptimization}