Reapply "[FastISel][AArch64] Add custom lowering for GEPs."

This is mostly a copy of the existing FastISel GEP code, but we have to
duplicate it for AArch64, because otherwise we would bail out even for simple
cases. This is because the standard fastEmit functions don't cover MUL at all
and ADD is lowered very inefficientily.

The original commit had a bug in the add emit logic, which has been fixed.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219831 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Juergen Ributzka 2014-10-15 18:58:07 +00:00
parent 32c5fde3f1
commit 7440a83e60
3 changed files with 119 additions and 3 deletions

View File

@ -134,6 +134,7 @@ private:
bool selectBitCast(const Instruction *I);
bool selectFRem(const Instruction *I);
bool selectSDiv(const Instruction *I);
bool selectGetElementPtr(const Instruction *I);
// Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT);
@ -4556,6 +4557,79 @@ bool AArch64FastISel::selectSDiv(const Instruction *I) {
return true;
}
/// This is mostly a copy of the existing FastISel GEP code, but we have to
/// duplicate it for AArch64, because otherwise we would bail out even for
/// simple cases. This is because the standard fastEmit functions don't cover
/// MUL at all and ADD is lowered very inefficientily.
bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
unsigned N = getRegForValue(I->getOperand(0));
if (!N)
return false;
bool NIsKill = hasTrivialKill(I->getOperand(0));
// Keep a running tab of the total offset to coalesce multiple N = N + Offset
// into a single N = N + TotalOffset.
uint64_t TotalOffs = 0;
Type *Ty = I->getOperand(0)->getType();
MVT VT = TLI.getPointerTy();
for (auto OI = std::next(I->op_begin()), E = I->op_end(); OI != E; ++OI) {
const Value *Idx = *OI;
if (auto *StTy = dyn_cast<StructType>(Ty)) {
unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
// N = N + Offset
if (Field)
TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
Ty = StTy->getElementType(Field);
} else {
Ty = cast<SequentialType>(Ty)->getElementType();
// If this is a constant subscript, handle it quickly.
if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero())
continue;
// N = N + Offset
TotalOffs +=
DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
continue;
}
if (TotalOffs) {
N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
if (!N)
return false;
NIsKill = true;
TotalOffs = 0;
}
// N = N + Idx * ElementSize;
uint64_t ElementSize = DL.getTypeAllocSize(Ty);
std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
unsigned IdxN = Pair.first;
bool IdxNIsKill = Pair.second;
if (!IdxN)
return false;
if (ElementSize != 1) {
unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
if (!C)
return false;
IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
if (!IdxN)
return false;
IdxNIsKill = true;
}
N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
if (!N)
return false;
}
}
if (TotalOffs) {
N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
if (!N)
return false;
}
updateValueMap(I, N);
return true;
}
bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
default:
@ -4627,6 +4701,8 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
return selectRet(I);
case Instruction::FRem:
return selectFRem(I);
case Instruction::GetElementPtr:
return selectGetElementPtr(I);
}
// fall-back to target-independent instruction selection.

View File

@ -15,9 +15,8 @@ define void @main() nounwind {
entry:
; CHECK: main
; CHECK: mov x29, sp
; CHECK: mov x[[REG:[0-9]+]], sp
; CHECK-NEXT: orr x[[REG1:[0-9]+]], xzr, #0x8
; CHECK-NEXT: add x0, x[[REG]], x[[REG1]]
; CHECK: mov [[REG:x[0-9]+]], sp
; CHECK-NEXT: add x0, [[REG]], #8
%E = alloca %struct.S2Ty, align 4
%B = getelementptr inbounds %struct.S2Ty* %E, i32 0, i32 1
call void @takeS1(%struct.S1Ty* %B)

View File

@ -0,0 +1,41 @@
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
%struct.foo = type { i32, i64, float, double }
define double* @test_struct(%struct.foo* %f) {
; CHECK-LABEL: test_struct
; CHECK: add x0, x0, #24
%1 = getelementptr inbounds %struct.foo* %f, i64 0, i32 3
ret double* %1
}
define i32* @test_array1(i32* %a, i64 %i) {
; CHECK-LABEL: test_array1
; CHECK: orr [[REG:x[0-9]+]], xzr, #0x4
; CHECK-NEXT: madd x0, x1, [[REG]], x0
%1 = getelementptr inbounds i32* %a, i64 %i
ret i32* %1
}
define i32* @test_array2(i32* %a) {
; CHECK-LABEL: test_array2
; CHECK: add x0, x0, #16
%1 = getelementptr inbounds i32* %a, i64 4
ret i32* %1
}
define i32* @test_array3(i32* %a) {
; CHECK-LABEL: test_array3
; CHECK: add x0, x0, #1, lsl #12
%1 = getelementptr inbounds i32* %a, i64 1024
ret i32* %1
}
define i32* @test_array4(i32* %a) {
; CHECK-LABEL: test_array4
; CHECK: movz [[REG:x[0-9]+]], #0x1008
; CHECK-NEXR: add x0, x0, [[REG]]
%1 = getelementptr inbounds i32* %a, i64 1026
ret i32* %1
}