diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 619dfd66d1c..4f5de90afcf 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -2096,8 +2096,7 @@ bool AArch64FastISel::fastLowerArguments() { if (CC != CallingConv::C) return false; - // Only handle simple cases like i1/i8/i16/i32/i64/f32/f64 of up to 8 GPR and - // FPR each. + // Only handle simple cases of up to 8 GPR and FPR each. unsigned GPRCnt = 0; unsigned FPRCnt = 0; unsigned Idx = 0; @@ -2111,32 +2110,34 @@ bool AArch64FastISel::fastLowerArguments() { return false; Type *ArgTy = Arg.getType(); - if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) + if (ArgTy->isStructTy() || ArgTy->isArrayTy()) return false; EVT ArgVT = TLI.getValueType(ArgTy); - if (!ArgVT.isSimple()) return false; - switch (ArgVT.getSimpleVT().SimpleTy) { - default: return false; - case MVT::i1: - case MVT::i8: - case MVT::i16: - case MVT::i32: - case MVT::i64: + if (!ArgVT.isSimple()) + return false; + + MVT VT = ArgVT.getSimpleVT().SimpleTy; + if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) + return false; + + if (VT.isVector() && + (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) + return false; + + if (VT >= MVT::i1 && VT <= MVT::i64) ++GPRCnt; - break; - case MVT::f16: - case MVT::f32: - case MVT::f64: + else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || + VT.is128BitVector()) ++FPRCnt; - break; - } + else + return false; if (GPRCnt > 8 || FPRCnt > 8) return false; } - static const MCPhysReg Registers[5][8] = { + static const MCPhysReg Registers[6][8] = { { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, AArch64::W5, AArch64::W6, AArch64::W7 }, { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, @@ -2146,7 +2147,9 @@ bool AArch64FastISel::fastLowerArguments() { { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, AArch64::S5, AArch64::S6, AArch64::S7 }, { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, - AArch64::D5, AArch64::D6, AArch64::D7 } + AArch64::D5, AArch64::D6, AArch64::D7 }, + { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, + AArch64::Q5, AArch64::Q6, AArch64::Q7 } }; unsigned GPRIdx = 0; @@ -2154,29 +2157,28 @@ bool AArch64FastISel::fastLowerArguments() { for (auto const &Arg : F->args()) { MVT VT = TLI.getSimpleValueType(Arg.getType()); unsigned SrcReg; - const TargetRegisterClass *RC = nullptr; - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type."); - case MVT::i1: - case MVT::i8: - case MVT::i16: VT = MVT::i32; // fall-through - case MVT::i32: - SrcReg = Registers[0][GPRIdx++]; RC = &AArch64::GPR32RegClass; break; - case MVT::i64: - SrcReg = Registers[1][GPRIdx++]; RC = &AArch64::GPR64RegClass; break; - case MVT::f16: - SrcReg = Registers[2][FPRIdx++]; RC = &AArch64::FPR16RegClass; break; - case MVT::f32: - SrcReg = Registers[3][FPRIdx++]; RC = &AArch64::FPR32RegClass; break; - case MVT::f64: - SrcReg = Registers[4][FPRIdx++]; RC = &AArch64::FPR64RegClass; break; - } - - // Skip unused arguments. - if (Arg.use_empty()) { - updateValueMap(&Arg, 0); - continue; - } + const TargetRegisterClass *RC; + if (VT >= MVT::i1 && VT <= MVT::i32) { + SrcReg = Registers[0][GPRIdx++]; + RC = &AArch64::GPR32RegClass; + VT = MVT::i32; + } else if (VT == MVT::i64) { + SrcReg = Registers[1][GPRIdx++]; + RC = &AArch64::GPR64RegClass; + } else if (VT == MVT::f16) { + SrcReg = Registers[2][FPRIdx++]; + RC = &AArch64::FPR16RegClass; + } else if (VT == MVT::f32) { + SrcReg = Registers[3][FPRIdx++]; + RC = &AArch64::FPR32RegClass; + } else if ((VT == MVT::f64) || VT.is64BitVector()) { + SrcReg = Registers[4][FPRIdx++]; + RC = &AArch64::FPR64RegClass; + } else if (VT.is128BitVector()) { + SrcReg = Registers[5][FPRIdx++]; + RC = &AArch64::FPR128RegClass; + } else + llvm_unreachable("Unexpected value type."); unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. diff --git a/test/CodeGen/AArch64/fast-isel-vector-arithmetic.ll b/test/CodeGen/AArch64/fast-isel-vector-arithmetic.ll new file mode 100644 index 00000000000..eaa0db52794 --- /dev/null +++ b/test/CodeGen/AArch64/fast-isel-vector-arithmetic.ll @@ -0,0 +1,74 @@ +; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -fast-isel-abort-args -verify-machineinstrs < %s | FileCheck %s + +; Vector Integer Add +define <8 x i8> @add_v8i8_rr(<8 x i8> %a, <8 x i8> %b) { +; CHECK: add_v8i8_rr +; CHECK: add.8b v0, v0, v1 + %1 = add <8 x i8> %a, %b + ret <8 x i8> %1 +} + +define <16 x i8> @add_v16i8_rr(<16 x i8> %a, <16 x i8> %b) { +; CHECK: add_v16i8_rr +; CHECK: add.16b v0, v0, v1 + %1 = add <16 x i8> %a, %b + ret <16 x i8> %1 +} + +define <4 x i16> @add_v4i16_rr(<4 x i16> %a, <4 x i16> %b) { +; CHECK: add_v4i16_rr +; CHECK: add.4h v0, v0, v1 + %1 = add <4 x i16> %a, %b + ret <4 x i16> %1 +} + +define <8 x i16> @add_v8i16_rr(<8 x i16> %a, <8 x i16> %b) { +; CHECK: add_v8i16_rr +; CHECK: add.8h v0, v0, v1 + %1 = add <8 x i16> %a, %b + ret <8 x i16> %1 +} + +define <2 x i32> @add_v2i32_rr(<2 x i32> %a, <2 x i32> %b) { +; CHECK: add_v2i32_rr +; CHECK: add.2s v0, v0, v1 + %1 = add <2 x i32> %a, %b + ret <2 x i32> %1 +} + +define <4 x i32> @add_v4i32_rr(<4 x i32> %a, <4 x i32> %b) { +; CHECK: add_v4i32_rr +; CHECK: add.4s v0, v0, v1 + %1 = add <4 x i32> %a, %b + ret <4 x i32> %1 +} + +define <2 x i64> @add_v2i64_rr(<2 x i64> %a, <2 x i64> %b) { +; CHECK: add_v2i64_rr +; CHECK: add.2d v0, v0, v1 + %1 = add <2 x i64> %a, %b + ret <2 x i64> %1 +} + +; Vector Floating-point Add +define <2 x float> @add_v2f32_rr(<2 x float> %a, <2 x float> %b) { +; CHECK: add_v2f32_rr +; CHECK: fadd.2s v0, v0, v1 + %1 = fadd <2 x float> %a, %b + ret <2 x float> %1 +} + +define <4 x float> @add_v4f32_rr(<4 x float> %a, <4 x float> %b) { +; CHECK: add_v4f32_rr +; CHECK: fadd.4s v0, v0, v1 + %1 = fadd <4 x float> %a, %b + ret <4 x float> %1 +} + +define <2 x double> @add_v2f64_rr(<2 x double> %a, <2 x double> %b) { +; CHECK: add_v2f64_rr +; CHECK: fadd.2d v0, v0, v1 + %1 = fadd <2 x double> %a, %b + ret <2 x double> %1 +}