diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index ed6b5a53c5b..46263020300 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -134,6 +134,7 @@ private: bool selectBitCast(const Instruction *I); bool selectFRem(const Instruction *I); bool selectSDiv(const Instruction *I); + bool selectGetElementPtr(const Instruction *I); // Utility helper routines. bool isTypeLegal(Type *Ty, MVT &VT); @@ -4556,6 +4557,79 @@ bool AArch64FastISel::selectSDiv(const Instruction *I) { return true; } +/// This is mostly a copy of the existing FastISel GEP code, but we have to +/// duplicate it for AArch64, because otherwise we would bail out even for +/// simple cases. This is because the standard fastEmit functions don't cover +/// MUL at all and ADD is lowered very inefficientily. +bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { + unsigned N = getRegForValue(I->getOperand(0)); + if (!N) + return false; + bool NIsKill = hasTrivialKill(I->getOperand(0)); + + // Keep a running tab of the total offset to coalesce multiple N = N + Offset + // into a single N = N + TotalOffset. + uint64_t TotalOffs = 0; + Type *Ty = I->getOperand(0)->getType(); + MVT VT = TLI.getPointerTy(); + for (auto OI = std::next(I->op_begin()), E = I->op_end(); OI != E; ++OI) { + const Value *Idx = *OI; + if (auto *StTy = dyn_cast(Ty)) { + unsigned Field = cast(Idx)->getZExtValue(); + // N = N + Offset + if (Field) + TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); + Ty = StTy->getElementType(Field); + } else { + Ty = cast(Ty)->getElementType(); + // If this is a constant subscript, handle it quickly. + if (const auto *CI = dyn_cast(Idx)) { + if (CI->isZero()) + continue; + // N = N + Offset + TotalOffs += + DL.getTypeAllocSize(Ty) * cast(CI)->getSExtValue(); + continue; + } + if (TotalOffs) { + N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); + if (!N) + return false; + NIsKill = true; + TotalOffs = 0; + } + + // N = N + Idx * ElementSize; + uint64_t ElementSize = DL.getTypeAllocSize(Ty); + std::pair Pair = getRegForGEPIndex(Idx); + unsigned IdxN = Pair.first; + bool IdxNIsKill = Pair.second; + if (!IdxN) + return false; + + if (ElementSize != 1) { + unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); + if (!C) + return false; + IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true); + if (!IdxN) + return false; + IdxNIsKill = true; + } + N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); + if (!N) + return false; + } + } + if (TotalOffs) { + N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); + if (!N) + return false; + } + updateValueMap(I, N); + return true; +} + bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { switch (I->getOpcode()) { default: @@ -4627,6 +4701,8 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { return selectRet(I); case Instruction::FRem: return selectFRem(I); + case Instruction::GetElementPtr: + return selectGetElementPtr(I); } // fall-back to target-independent instruction selection. diff --git a/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll b/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll index 34394b2af0a..a8417027ce2 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll @@ -15,9 +15,8 @@ define void @main() nounwind { entry: ; CHECK: main ; CHECK: mov x29, sp -; CHECK: mov x[[REG:[0-9]+]], sp -; CHECK-NEXT: orr x[[REG1:[0-9]+]], xzr, #0x8 -; CHECK-NEXT: add x0, x[[REG]], x[[REG1]] +; CHECK: mov [[REG:x[0-9]+]], sp +; CHECK-NEXT: add x0, [[REG]], #8 %E = alloca %struct.S2Ty, align 4 %B = getelementptr inbounds %struct.S2Ty* %E, i32 0, i32 1 call void @takeS1(%struct.S1Ty* %B) diff --git a/test/CodeGen/AArch64/fast-isel-gep.ll b/test/CodeGen/AArch64/fast-isel-gep.ll new file mode 100644 index 00000000000..465f595245c --- /dev/null +++ b/test/CodeGen/AArch64/fast-isel-gep.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s + +%struct.foo = type { i32, i64, float, double } + +define double* @test_struct(%struct.foo* %f) { +; CHECK-LABEL: test_struct +; CHECK: add x0, x0, #24 + %1 = getelementptr inbounds %struct.foo* %f, i64 0, i32 3 + ret double* %1 +} + +define i32* @test_array1(i32* %a, i64 %i) { +; CHECK-LABEL: test_array1 +; CHECK: orr [[REG:x[0-9]+]], xzr, #0x4 +; CHECK-NEXT: madd x0, x1, [[REG]], x0 + %1 = getelementptr inbounds i32* %a, i64 %i + ret i32* %1 +} + +define i32* @test_array2(i32* %a) { +; CHECK-LABEL: test_array2 +; CHECK: add x0, x0, #16 + %1 = getelementptr inbounds i32* %a, i64 4 + ret i32* %1 +} + +define i32* @test_array3(i32* %a) { +; CHECK-LABEL: test_array3 +; CHECK: add x0, x0, #1, lsl #12 + %1 = getelementptr inbounds i32* %a, i64 1024 + ret i32* %1 +} + +define i32* @test_array4(i32* %a) { +; CHECK-LABEL: test_array4 +; CHECK: movz [[REG:x[0-9]+]], #0x1008 +; CHECK-NEXR: add x0, x0, [[REG]] + %1 = getelementptr inbounds i32* %a, i64 1026 + ret i32* %1 +} +