[FastISel][AArch64] Fold mul into the address computation of memory operations.

Teach 'computeAddress' to also fold multiplies into the address computation
(when possible).

This fixes rdar://problem/18369443.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217977 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Juergen Ributzka 2014-09-17 19:19:31 +00:00
parent b3edb56485
commit 5461af97bc
2 changed files with 111 additions and 0 deletions

View File

@ -425,6 +425,19 @@ unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
}
/// \brief Check if the multiply is by a power-of-2 constant.
static bool isMulPowOf2(const Value *I) {
if (const auto *MI = dyn_cast<MulOperator>(I)) {
if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
if (C->getValue().isPowerOf2())
return true;
if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
if (C->getValue().isPowerOf2())
return true;
}
return false;
}
// Computes the address to get to an object.
bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
{
@ -589,7 +602,64 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
return true;
}
break;
case Instruction::Mul: {
if (Addr.getOffsetReg())
break;
if (!isMulPowOf2(U))
break;
const Value *LHS = U->getOperand(0);
const Value *RHS = U->getOperand(1);
// Canonicalize power-of-2 value to the RHS.
if (const auto *C = dyn_cast<ConstantInt>(LHS))
if (C->getValue().isPowerOf2())
std::swap(LHS, RHS);
assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
const auto *C = cast<ConstantInt>(RHS);
unsigned Val = C->getValue().logBase2();
if (Val < 1 || Val > 3)
break;
uint64_t NumBytes = 0;
if (Ty && Ty->isSized()) {
uint64_t NumBits = DL.getTypeSizeInBits(Ty);
NumBytes = NumBits / 8;
if (!isPowerOf2_64(NumBits))
NumBytes = 0;
}
if (NumBytes != (1ULL << Val))
break;
Addr.setShift(Val);
Addr.setExtendType(AArch64_AM::LSL);
if (const auto *I = dyn_cast<Instruction>(LHS))
if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
U = I;
if (const auto *ZE = dyn_cast<ZExtInst>(U))
if (ZE->getOperand(0)->getType()->isIntegerTy(32)) {
Addr.setExtendType(AArch64_AM::UXTW);
LHS = U->getOperand(0);
}
if (const auto *SE = dyn_cast<SExtInst>(U))
if (SE->getOperand(0)->getType()->isIntegerTy(32)) {
Addr.setExtendType(AArch64_AM::SXTW);
LHS = U->getOperand(0);
}
unsigned Reg = getRegForValue(LHS);
if (!Reg)
return false;
Addr.setOffsetReg(Reg);
return true;
}
} // end switch
if (Addr.getReg()) {
if (!Addr.getOffsetReg()) {

View File

@ -339,6 +339,16 @@ define i32 @load_shift_offreg_1(i64 %a) {
ret i32 %3
}
define i32 @load_mul_offreg_1(i64 %a) {
; CHECK-LABEL: load_mul_offreg_1
; CHECK: lsl [[REG:x[0-9]+]], x0, #2
; CHECK: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
%1 = mul i64 %a, 4
%2 = inttoptr i64 %1 to i32*
%3 = load i32* %2
ret i32 %3
}
; Load Base Register + Scaled Register Offset
define i32 @load_breg_shift_offreg_1(i64 %a, i64 %b) {
; CHECK-LABEL: load_breg_shift_offreg_1
@ -405,6 +415,15 @@ define i32 @load_breg_shift_offreg_5(i64 %a, i64 %b) {
ret i32 %5
}
define i32 @load_breg_mul_offreg_1(i64 %a, i64 %b) {
; CHECK-LABEL: load_breg_mul_offreg_1
; CHECK: ldr {{w[0-9]+}}, [x1, x0, lsl #2]
%1 = mul i64 %a, 4
%2 = add i64 %1, %b
%3 = inttoptr i64 %2 to i32*
%4 = load i32* %3
ret i32 %4
}
; Load Base Register + Scaled Register Offset + Sign/Zero extension
define i32 @load_breg_zext_shift_offreg_1(i32 %a, i64 %b) {
@ -429,6 +448,17 @@ define i32 @load_breg_zext_shift_offreg_2(i32 %a, i64 %b) {
ret i32 %5
}
define i32 @load_breg_zext_mul_offreg_1(i32 %a, i64 %b) {
; CHECK-LABEL: load_breg_zext_mul_offreg_1
; CHECK: ldr {{w[0-9]+}}, [x1, w0, uxtw #2]
%1 = zext i32 %a to i64
%2 = mul i64 %1, 4
%3 = add i64 %2, %b
%4 = inttoptr i64 %3 to i32*
%5 = load i32* %4
ret i32 %5
}
define i32 @load_breg_sext_shift_offreg_1(i32 %a, i64 %b) {
; CHECK-LABEL: load_breg_sext_shift_offreg_1
; CHECK: ldr {{w[0-9]+}}, [x1, w0, sxtw #2]
@ -451,6 +481,17 @@ define i32 @load_breg_sext_shift_offreg_2(i32 %a, i64 %b) {
ret i32 %5
}
define i32 @load_breg_sext_mul_offreg_1(i32 %a, i64 %b) {
; CHECK-LABEL: load_breg_sext_mul_offreg_1
; CHECK: ldr {{w[0-9]+}}, [x1, w0, sxtw #2]
%1 = sext i32 %a to i64
%2 = mul i64 %1, 4
%3 = add i64 %2, %b
%4 = inttoptr i64 %3 to i32*
%5 = load i32* %4
ret i32 %5
}
; Load Scaled Register Offset + Immediate Offset + Sign/Zero extension
define i64 @load_sext_shift_offreg_imm1(i32 %a) {
; CHECK-LABEL: load_sext_shift_offreg_imm1