From 6dcceb5ecbff459c3e6e30f459f49d2b90696da4 Mon Sep 17 00:00:00 2001 From: Duraid Madina Date: Fri, 8 Apr 2005 10:01:48 +0000 Subject: [PATCH] fix bogus division-by-power-of-2 (was wrong for negative input, adds extr insn) fix hack in division (clean up frcpa instruction) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@21153 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/IA64/IA64ISelPattern.cpp | 27 ++++++++++++++++++++------- lib/Target/IA64/IA64InstrInfo.td | 13 +++++-------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/lib/Target/IA64/IA64ISelPattern.cpp b/lib/Target/IA64/IA64ISelPattern.cpp index 03ff36ab04a..13a4689bd31 100644 --- a/lib/Target/IA64/IA64ISelPattern.cpp +++ b/lib/Target/IA64/IA64ISelPattern.cpp @@ -1122,9 +1122,21 @@ pC = pA OR pB switch (ponderIntegerDivisionBy(N.getOperand(1), isSigned, Tmp3)) { case 1: // division by a constant that's a power of 2 Tmp1 = SelectExpr(N.getOperand(0)); - if(isSigned) // becomes a shift right: - BuildMI(BB, IA64::SHRS, 2, Result).addReg(Tmp1).addImm(Tmp3); - else + if(isSigned) { // argument could be negative, so emit some code: + unsigned divAmt=Tmp3; + unsigned tempGR1=MakeReg(MVT::i64); + unsigned tempGR2=MakeReg(MVT::i64); + unsigned tempGR3=MakeReg(MVT::i64); + BuildMI(BB, IA64::SHRS, 2, tempGR1) + .addReg(Tmp1).addImm(divAmt-1); + BuildMI(BB, IA64::EXTRU, 3, tempGR2) + .addReg(tempGR1).addImm(64-divAmt).addImm(divAmt); + BuildMI(BB, IA64::ADD, 2, tempGR3) + .addReg(Tmp1).addReg(tempGR2); + BuildMI(BB, IA64::SHRS, 2, Result) + .addReg(tempGR3).addImm(divAmt); + } + else // unsigned div-by-power-of-2 becomes a simple shift right: BuildMI(BB, IA64::SHRU, 2, Result).addReg(Tmp1).addImm(Tmp3); return Result; // early exit } @@ -1171,10 +1183,11 @@ pC = pA OR pB } // we start by computing an approximate reciprocal (good to 9 bits?) - // note, this instruction writes _both_ TmpF5 (answer) and tmpPR (predicate) - // FIXME: or at least, it should!! - BuildMI(BB, IA64::FRCPAS1FLOAT, 2, TmpF5).addReg(TmpF3).addReg(TmpF4); - BuildMI(BB, IA64::FRCPAS1PREDICATE, 2, TmpPR).addReg(TmpF3).addReg(TmpF4); + // note, this instruction writes _both_ TmpF5 (answer) and TmpPR (predicate) + BuildMI(BB, IA64::FRCPAS1, 4) + .addReg(TmpF5, MachineOperand::Def) + .addReg(TmpPR, MachineOperand::Def) + .addReg(TmpF3).addReg(TmpF4); if(!isModulus) { // if this is a divide, we worry about div-by-zero unsigned bogusPR=MakeReg(MVT::i1); // won't appear, due to twoAddress diff --git a/lib/Target/IA64/IA64InstrInfo.td b/lib/Target/IA64/IA64InstrInfo.td index 0424b3be2ef..6177c9ba996 100644 --- a/lib/Target/IA64/IA64InstrInfo.td +++ b/lib/Target/IA64/IA64InstrInfo.td @@ -120,6 +120,9 @@ def SHRS : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), def SHRSI : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, s21imm:$imm), "shr $dst = $src1, $imm;;">; +def EXTRU : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, u6imm:$imm1, u6imm:$imm2), + "extr.u $dst = $src1, $imm1, $imm2;;">; + def DEPZ : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, u6imm:$imm1, u6imm:$imm2), "dep.z $dst = $src1, $imm1, $imm2;;">; def SXT1 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "sxt1 $dst = $src;;">; @@ -258,14 +261,8 @@ def CFNMAS1 : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3, PR:$qp), "($qp) fnma.s1 $dst = $src1, $src2, $src3;;">; -// FIXME: we 'explode' FRCPA (which should write two registers) into two -// operations that write one each. this is a waste, and is also destroying -// f127. not cool. -def FRCPAS1FLOAT : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2), - "frcpa.s1 $dst , p0 = $src1, $src2;;">; -// XXX: this _will_ break things: (f127) -def FRCPAS1PREDICATE : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2), - "frcpa.s1 f127 , $dst = $src1, $src2;; // XXX FIXME!!!!">; +def FRCPAS1 : AForm<0x03, 0x0b, (ops FP:$dstFR, PR:$dstPR, FP:$src1, FP:$src2), + "frcpa.s1 $dstFR, $dstPR = $src1, $src2;;">; def XMAL : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3), "xma.l $dst = $src1, $src2, $src3;;">;