Shifter ops are not always free. Do not fold them (especially to form

complex load / store addressing mode) when they have higher cost and
when they have more than one use.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@117509 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2010-10-27 23:41:30 +00:00
parent de5fa932b9
commit f40deed62f
4 changed files with 183 additions and 25 deletions

View File

@ -78,8 +78,12 @@ public:
SDNode *Select(SDNode *N);
bool isShifterOpProfitable(const SDValue &Shift,
ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
bool SelectShifterOperandReg(SDValue N, SDValue &A,
SDValue &B, SDValue &C);
bool SelectShiftShifterOperandReg(SDValue N, SDValue &A,
SDValue &B, SDValue &C);
bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
@ -246,6 +250,17 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
}
bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
ARM_AM::ShiftOpc ShOpcVal,
unsigned ShAmt) {
if (!Subtarget->isCortexA9())
return true;
if (Shift.hasOneUse())
return true;
// R << 2 is free.
return ShOpcVal == ARM_AM::lsl && ShAmt == 2;
}
bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
SDValue &BaseReg,
SDValue &ShReg,
@ -261,6 +276,32 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
BaseReg = N.getOperand(0);
unsigned ShImmVal = 0;
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
ShReg = CurDAG->getRegister(0, MVT::i32);
ShImmVal = RHS->getZExtValue() & 31;
} else {
ShReg = N.getOperand(1);
if (!isShifterOpProfitable(N, ShOpcVal, ShImmVal))
return false;
}
Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
MVT::i32);
return true;
}
bool ARMDAGToDAGISel::SelectShiftShifterOperandReg(SDValue N,
SDValue &BaseReg,
SDValue &ShReg,
SDValue &Opc) {
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
// Don't match base register only case. That is matched to a separate
// lower complexity pattern with explicit register operand.
if (ShOpcVal == ARM_AM::no_shift) return false;
BaseReg = N.getOperand(0);
unsigned ShImmVal = 0;
// Do not check isShifterOpProfitable. This must return true.
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
ShReg = CurDAG->getRegister(0, MVT::i32);
ShImmVal = RHS->getZExtValue() & 31;
@ -321,7 +362,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
SDValue &Opc) {
if (N.getOpcode() == ISD::MUL) {
if (N.getOpcode() == ISD::MUL &&
(!Subtarget->isCortexA9() || N.hasOneUse())) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
// X * [3,5,9] -> X + X * [2,4,8] etc.
int RHSC = (int)RHS->getZExtValue();
@ -357,6 +399,10 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
}
}
if (Subtarget->isCortexA9() && !N.hasOneUse())
// Compute R +/- (R << N) and reuse it.
return false;
// Otherwise this is R +/- [possibly shifted] R.
ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
@ -371,14 +417,20 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
if (ConstantSDNode *Sh =
dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
ShAmt = Sh->getZExtValue();
Offset = N.getOperand(1).getOperand(0);
if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
Offset = N.getOperand(1).getOperand(0);
else {
ShAmt = 0;
ShOpcVal = ARM_AM::no_shift;
}
} else {
ShOpcVal = ARM_AM::no_shift;
}
}
// Try matching (R shl C) + (R).
if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift &&
!(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't
@ -386,8 +438,15 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
if (ConstantSDNode *Sh =
dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
ShAmt = Sh->getZExtValue();
Offset = N.getOperand(0).getOperand(0);
Base = N.getOperand(1);
if (!Subtarget->isCortexA9() ||
(N.hasOneUse() &&
isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
Offset = N.getOperand(0).getOperand(0);
Base = N.getOperand(1);
} else {
ShAmt = 0;
ShOpcVal = ARM_AM::no_shift;
}
} else {
ShOpcVal = ARM_AM::no_shift;
}
@ -408,7 +467,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
SDValue &Base,
SDValue &Offset,
SDValue &Opc) {
if (N.getOpcode() == ISD::MUL) {
if (N.getOpcode() == ISD::MUL &&
(!Subtarget->isCortexA9() || N.hasOneUse())) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
// X * [3,5,9] -> X + X * [2,4,8] etc.
int RHSC = (int)RHS->getZExtValue();
@ -474,6 +534,16 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
}
}
if (Subtarget->isCortexA9() && !N.hasOneUse()) {
// Compute R +/- (R << N) and reuse it.
Base = N;
Offset = CurDAG->getRegister(0, MVT::i32);
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
ARM_AM::no_shift),
MVT::i32);
return AM2_BASE;
}
// Otherwise this is R +/- [possibly shifted] R.
ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
@ -488,14 +558,20 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
if (ConstantSDNode *Sh =
dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
ShAmt = Sh->getZExtValue();
Offset = N.getOperand(1).getOperand(0);
if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
Offset = N.getOperand(1).getOperand(0);
else {
ShAmt = 0;
ShOpcVal = ARM_AM::no_shift;
}
} else {
ShOpcVal = ARM_AM::no_shift;
}
}
// Try matching (R shl C) + (R).
if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift &&
!(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't
@ -503,8 +579,15 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
if (ConstantSDNode *Sh =
dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
ShAmt = Sh->getZExtValue();
Offset = N.getOperand(0).getOperand(0);
Base = N.getOperand(1);
if (!Subtarget->isCortexA9() ||
(N.hasOneUse() &&
isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
Offset = N.getOperand(0).getOperand(0);
Base = N.getOperand(1);
} else {
ShAmt = 0;
ShOpcVal = ARM_AM::no_shift;
}
} else {
ShOpcVal = ARM_AM::no_shift;
}
@ -543,7 +626,12 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
// it.
if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
ShAmt = Sh->getZExtValue();
Offset = N.getOperand(0);
if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
Offset = N.getOperand(0);
else {
ShAmt = 0;
ShOpcVal = ARM_AM::no_shift;
}
} else {
ShOpcVal = ARM_AM::no_shift;
}
@ -959,6 +1047,12 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
return false;
}
if (Subtarget->isCortexA9() && !N.hasOneUse()) {
// Compute R + (R << [1,2,3]) and reuse it.
Base = N;
return false;
}
// Look for (R + R) or (R + (R << [1,2,3])).
unsigned ShAmt = 0;
Base = N.getOperand(0);
@ -977,11 +1071,12 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
// it.
if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
ShAmt = Sh->getZExtValue();
if (ShAmt >= 4) {
if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
OffReg = OffReg.getOperand(0);
else {
ShAmt = 0;
ShOpcVal = ARM_AM::no_shift;
} else
OffReg = OffReg.getOperand(0);
}
} else {
ShOpcVal = ARM_AM::no_shift;
}

View File

@ -325,6 +325,13 @@ def so_reg : Operand<i32>, // reg reg imm
let PrintMethod = "printSORegOperand";
let MIOperandInfo = (ops GPR, GPR, i32imm);
}
def shift_so_reg : Operand<i32>, // reg reg imm
ComplexPattern<i32, 3, "SelectShiftShifterOperandReg",
[shl,srl,sra,rotr]> {
string EncoderMethod = "getSORegOpValue";
let PrintMethod = "printSORegOperand";
let MIOperandInfo = (ops GPR, GPR, i32imm);
}
// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
// 8-bit immediate rotated by an arbitrary number of bits. so_imm values are
@ -1715,9 +1722,10 @@ def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
let Inst{15-12} = Rd;
}
def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins so_reg:$src),
def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src),
DPSoRegFrm, IIC_iMOVsr,
"mov", "\t$Rd, $src", [(set GPR:$Rd, so_reg:$src)]>, UnaryDP {
"mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg:$src)]>,
UnaryDP {
bits<4> Rd;
bits<12> src;
let Inst{15-12} = Rd;

View File

@ -1,18 +1,72 @@
; RUN: llc < %s -march=arm | grep add | grep lsl
; RUN: llc < %s -march=arm | grep bic | grep asr
; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9
; rdar://8576755
define i32 @test1(i32 %X, i32 %Y, i8 %sh) {
%shift.upgrd.1 = zext i8 %sh to i32 ; <i32> [#uses=1]
%A = shl i32 %Y, %shift.upgrd.1 ; <i32> [#uses=1]
%B = add i32 %X, %A ; <i32> [#uses=1]
; A8: test1:
; A8: add r0, r0, r1, lsl r2
; A9: test1:
; A9: add r0, r0, r1, lsl r2
%shift.upgrd.1 = zext i8 %sh to i32
%A = shl i32 %Y, %shift.upgrd.1
%B = add i32 %X, %A
ret i32 %B
}
define i32 @test2(i32 %X, i32 %Y, i8 %sh) {
%shift.upgrd.2 = zext i8 %sh to i32 ; <i32> [#uses=1]
%A = ashr i32 %Y, %shift.upgrd.2 ; <i32> [#uses=1]
%B = xor i32 %A, -1 ; <i32> [#uses=1]
%C = and i32 %X, %B ; <i32> [#uses=1]
; A8: test2:
; A8: bic r0, r0, r1, asr r2
; A9: test2:
; A9: bic r0, r0, r1, asr r2
%shift.upgrd.2 = zext i8 %sh to i32
%A = ashr i32 %Y, %shift.upgrd.2
%B = xor i32 %A, -1
%C = and i32 %X, %B
ret i32 %C
}
define i32 @test3(i32 %base, i32 %base2, i32 %offset) {
entry:
; A8: test3:
; A8: ldr r0, [r0, r2, lsl #2]
; A8: ldr r1, [r1, r2, lsl #2]
; lsl #2 is free
; A9: test3:
; A9: ldr r1, [r1, r2, lsl #2]
; A9: ldr r0, [r0, r2, lsl #2]
%tmp1 = shl i32 %offset, 2
%tmp2 = add i32 %base, %tmp1
%tmp3 = inttoptr i32 %tmp2 to i32*
%tmp4 = add i32 %base2, %tmp1
%tmp5 = inttoptr i32 %tmp4 to i32*
%tmp6 = load i32* %tmp3
%tmp7 = load i32* %tmp5
%tmp8 = add i32 %tmp7, %tmp6
ret i32 %tmp8
}
declare i8* @malloc(...)
define fastcc void @test4() nounwind {
entry:
; A8: test4:
; A8: ldr r1, [r0, r0, lsl #2]
; A8: str r1, [r0, r0, lsl #2]
; A9: test4:
; A9: add r0, r0, r0, lsl #2
; A9: ldr r1, [r0]
; A9: str r1, [r0]
%0 = tail call i8* (...)* @malloc(i32 undef) nounwind
%1 = bitcast i8* %0 to i32*
%2 = sext i16 undef to i32
%3 = getelementptr inbounds i32* %1, i32 %2
%4 = load i32* %3, align 4
%5 = add nsw i32 %4, 1
store i32 %5, i32* %3, align 4
ret void
}

View File

@ -586,6 +586,7 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
MISC("brtarget", "kOperandTypeARMBranchTarget"); // ?
MISC("so_reg", "kOperandTypeARMSoReg"); // R, R, I
MISC("shift_so_reg", "kOperandTypeARMSoReg"); // R, R, I
MISC("t2_so_reg", "kOperandTypeThumb2SoReg"); // R, I
MISC("so_imm", "kOperandTypeARMSoImm"); // I
MISC("rot_imm", "kOperandTypeARMRotImm"); // I