From 94fe5c1fe2a3419d90e612735200dbc2c36f29ef Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Sun, 15 Jun 2014 09:27:06 +0000 Subject: [PATCH] AArch64: improve vector [su]itofp handling. This somehow got missed in the AArch64 merge, so should fix a performance regression since 3.4. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210984 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 35 ++-- .../AArch64/AArch64TargetTransformInfo.cpp | 47 +++-- .../AArch64/arm64-convert-v2i32-v2f64.ll | 29 ---- ...rt-v2f64-v2i32.ll => complex-fp-to-int.ll} | 0 test/CodeGen/AArch64/complex-int-to-fp.ll | 164 ++++++++++++++++++ 5 files changed, 210 insertions(+), 65 deletions(-) delete mode 100644 test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll rename test/CodeGen/AArch64/{arm64-convert-v2f64-v2i32.ll => complex-fp-to-int.ll} (100%) create mode 100644 test/CodeGen/AArch64/complex-int-to-fp.ll diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 79312adbd2c..fd9e69d3231 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1440,32 +1440,23 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { SDValue In = Op.getOperand(0); EVT InVT = In.getValueType(); - // v2i32 to v2f32 is legal. - if (VT == MVT::v2f32 && InVT == MVT::v2i32) - return Op; - - // This function only handles v2f64 outputs. - if (VT == MVT::v2f64) { - // Extend the input argument to a v2i64 that we can feed into the - // floating point conversion. Zero or sign extend based on whether - // we're doing a signed or unsigned float conversion. - unsigned Opc = - Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; - assert(Op.getNumOperands() == 1 && "FP conversions take one argument"); - SDValue Promoted = DAG.getNode(Opc, dl, MVT::v2i64, Op.getOperand(0)); - return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Promoted); + if (VT.getSizeInBits() < InVT.getSizeInBits()) { + MVT CastVT = + MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()), + InVT.getVectorNumElements()); + In = DAG.getNode(Op.getOpcode(), dl, CastVT, In); + return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0)); } - // Scalarize v2i64 to v2f32 conversions. - std::vector BuildVectorOps; - for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { - SDValue Sclr = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, In, - DAG.getConstant(i, MVT::i64)); - Sclr = DAG.getNode(Op->getOpcode(), dl, MVT::f32, Sclr); - BuildVectorOps.push_back(Sclr); + if (VT.getSizeInBits() > InVT.getSizeInBits()) { + unsigned CastOpc = + Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + EVT CastVT = VT.changeVectorElementTypeToInteger(); + In = DAG.getNode(CastOpc, dl, CastVT, In); + return DAG.getNode(Op.getOpcode(), dl, VT, In); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BuildVectorOps); + return Op; } SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 33e482a53a4..4fae0a53b32 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -306,28 +306,47 @@ unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, static const TypeConversionCostTblEntry ConversionTbl[] = { // LowerVectorINT_TO_FP: { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, + + // Complex: to v2f32 + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 }, + + // Complex: to v4f32 + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + + // Complex: to v2f64 + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + + // LowerVectorFP_TO_INT + { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 }, { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 }, + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 }, { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 }, - { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 }, - { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 }, - { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 4 }, - { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 4 }, - { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4 }, - { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 4 }, - { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 4 }, - { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 4 }, + + { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, + { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 }, + { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, + { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 }, + { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, }; int Idx = ConvertCostTableLookup( diff --git a/test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll b/test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll deleted file mode 100644 index 4eb0ca71850..00000000000 --- a/test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll +++ /dev/null @@ -1,29 +0,0 @@ -; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s - -define <2 x double> @f1(<2 x i32> %v) nounwind readnone { -; CHECK-LABEL: f1: -; CHECK: sshll.2d v0, v0, #0 -; CHECK-NEXT: scvtf.2d v0, v0 -; CHECK-NEXT: ret - %conv = sitofp <2 x i32> %v to <2 x double> - ret <2 x double> %conv -} -define <2 x double> @f2(<2 x i32> %v) nounwind readnone { -; CHECK-LABEL: f2: -; CHECK: ushll.2d v0, v0, #0 -; CHECK-NEXT: ucvtf.2d v0, v0 -; CHECK-NEXT: ret - %conv = uitofp <2 x i32> %v to <2 x double> - ret <2 x double> %conv -} - -; CHECK: autogen_SD19655 -; CHECK: scvtf -; CHECK: ret -define void @autogen_SD19655(<2 x i64>* %addr, <2 x float>* %addrfloat) { - %T = load <2 x i64>* %addr - %F = sitofp <2 x i64> %T to <2 x float> - store <2 x float> %F, <2 x float>* %addrfloat - ret void -} - diff --git a/test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll b/test/CodeGen/AArch64/complex-fp-to-int.ll similarity index 100% rename from test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll rename to test/CodeGen/AArch64/complex-fp-to-int.ll diff --git a/test/CodeGen/AArch64/complex-int-to-fp.ll b/test/CodeGen/AArch64/complex-int-to-fp.ll new file mode 100644 index 00000000000..5c943f95c35 --- /dev/null +++ b/test/CodeGen/AArch64/complex-int-to-fp.ll @@ -0,0 +1,164 @@ +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s + +; CHECK: autogen_SD19655 +; CHECK: scvtf +; CHECK: ret +define void @autogen_SD19655(<2 x i64>* %addr, <2 x float>* %addrfloat) { + %T = load <2 x i64>* %addr + %F = sitofp <2 x i64> %T to <2 x float> + store <2 x float> %F, <2 x float>* %addrfloat + ret void +} + +define <2 x double> @test_signed_v2i32_to_v2f64(<2 x i32> %v) nounwind readnone { +; CHECK-LABEL: test_signed_v2i32_to_v2f64: +; CHECK: sshll.2d [[VAL64:v[0-9]+]], v0, #0 +; CHECK-NEXT: scvtf.2d v0, [[VAL64]] +; CHECK-NEXT: ret + %conv = sitofp <2 x i32> %v to <2 x double> + ret <2 x double> %conv +} + +define <2 x double> @test_unsigned_v2i32_to_v2f64(<2 x i32> %v) nounwind readnone { +; CHECK-LABEL: test_unsigned_v2i32_to_v2f64 +; CHECK: ushll.2d [[VAL64:v[0-9]+]], v0, #0 +; CHECK-NEXT: ucvtf.2d v0, [[VAL64]] +; CHECK-NEXT: ret + %conv = uitofp <2 x i32> %v to <2 x double> + ret <2 x double> %conv +} + +define <2 x double> @test_signed_v2i16_to_v2f64(<2 x i16> %v) nounwind readnone { +; CHECK-LABEL: test_signed_v2i16_to_v2f64: +; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #16 +; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #16 +; CHECK: sshll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0 +; CHECK: scvtf.2d v0, [[VAL64]] + + %conv = sitofp <2 x i16> %v to <2 x double> + ret <2 x double> %conv +} +define <2 x double> @test_unsigned_v2i16_to_v2f64(<2 x i16> %v) nounwind readnone { +; CHECK-LABEL: test_unsigned_v2i16_to_v2f64 +; CHECK: movi d[[MASK:[0-9]+]], #0x00ffff0000ffff +; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]] +; CHECK: ushll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0 +; CHECK: ucvtf.2d v0, [[VAL64]] + + %conv = uitofp <2 x i16> %v to <2 x double> + ret <2 x double> %conv +} + +define <2 x double> @test_signed_v2i8_to_v2f64(<2 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_signed_v2i8_to_v2f64: +; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #24 +; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #24 +; CHECK: sshll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0 +; CHECK: scvtf.2d v0, [[VAL64]] + + %conv = sitofp <2 x i8> %v to <2 x double> + ret <2 x double> %conv +} +define <2 x double> @test_unsigned_v2i8_to_v2f64(<2 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_unsigned_v2i8_to_v2f64 +; CHECK: movi d[[MASK:[0-9]+]], #0x0000ff000000ff +; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]] +; CHECK: ushll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0 +; CHECK: ucvtf.2d v0, [[VAL64]] + + %conv = uitofp <2 x i8> %v to <2 x double> + ret <2 x double> %conv +} + +define <2 x float> @test_signed_v2i64_to_v2f32(<2 x i64> %v) nounwind readnone { +; CHECK-LABEL: test_signed_v2i64_to_v2f32: +; CHECK: scvtf.2d [[VAL64:v[0-9]+]], v0 +; CHECK: fcvtn v0.2s, [[VAL64]].2d + + %conv = sitofp <2 x i64> %v to <2 x float> + ret <2 x float> %conv +} +define <2 x float> @test_unsigned_v2i64_to_v2f32(<2 x i64> %v) nounwind readnone { +; CHECK-LABEL: test_unsigned_v2i64_to_v2f32 +; CHECK: ucvtf.2d [[VAL64:v[0-9]+]], v0 +; CHECK: fcvtn v0.2s, [[VAL64]].2d + + %conv = uitofp <2 x i64> %v to <2 x float> + ret <2 x float> %conv +} + +define <2 x float> @test_signed_v2i16_to_v2f32(<2 x i16> %v) nounwind readnone { +; CHECK-LABEL: test_signed_v2i16_to_v2f32: +; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #16 +; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #16 +; CHECK: scvtf.2s v0, [[VAL32]] + + %conv = sitofp <2 x i16> %v to <2 x float> + ret <2 x float> %conv +} +define <2 x float> @test_unsigned_v2i16_to_v2f32(<2 x i16> %v) nounwind readnone { +; CHECK-LABEL: test_unsigned_v2i16_to_v2f32 +; CHECK: movi d[[MASK:[0-9]+]], #0x00ffff0000ffff +; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]] +; CHECK: ucvtf.2s v0, [[VAL32]] + + %conv = uitofp <2 x i16> %v to <2 x float> + ret <2 x float> %conv +} + +define <2 x float> @test_signed_v2i8_to_v2f32(<2 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_signed_v2i8_to_v2f32: +; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #24 +; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #24 +; CHECK: scvtf.2s v0, [[VAL32]] + + %conv = sitofp <2 x i8> %v to <2 x float> + ret <2 x float> %conv +} +define <2 x float> @test_unsigned_v2i8_to_v2f32(<2 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_unsigned_v2i8_to_v2f32 +; CHECK: movi d[[MASK:[0-9]+]], #0x0000ff000000ff +; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]] +; CHECK: ucvtf.2s v0, [[VAL32]] + + %conv = uitofp <2 x i8> %v to <2 x float> + ret <2 x float> %conv +} + +define <4 x float> @test_signed_v4i16_to_v4f32(<4 x i16> %v) nounwind readnone { +; CHECK-LABEL: test_signed_v4i16_to_v4f32: +; CHECK: sshll.4s [[VAL32:v[0-9]+]], v0, #0 +; CHECK: scvtf.4s v0, [[VAL32]] + + %conv = sitofp <4 x i16> %v to <4 x float> + ret <4 x float> %conv +} + +define <4 x float> @test_unsigned_v4i16_to_v4f32(<4 x i16> %v) nounwind readnone { +; CHECK-LABEL: test_unsigned_v4i16_to_v4f32 +; CHECK: ushll.4s [[VAL32:v[0-9]+]], v0, #0 +; CHECK: ucvtf.4s v0, [[VAL32]] + + %conv = uitofp <4 x i16> %v to <4 x float> + ret <4 x float> %conv +} + +define <4 x float> @test_signed_v4i8_to_v4f32(<4 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_signed_v4i8_to_v4f32: +; CHECK: shl.4h [[TMP:v[0-9]+]], v0, #8 +; CHECK: sshr.4h [[VAL16:v[0-9]+]], [[TMP]], #8 +; CHECK: sshll.4s [[VAL32:v[0-9]+]], [[VAL16]], #0 +; CHECK: scvtf.4s v0, [[VAL32]] + + %conv = sitofp <4 x i8> %v to <4 x float> + ret <4 x float> %conv +} +define <4 x float> @test_unsigned_v4i8_to_v4f32(<4 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_unsigned_v4i8_to_v4f32 +; CHECK: bic.4h v0, #0xff, lsl #8 +; CHECK: ushll.4s [[VAL32:v[0-9]+]], v0, #0 +; CHECK: ucvtf.4s v0, [[VAL32]] + + %conv = uitofp <4 x i8> %v to <4 x float> + ret <4 x float> %conv +}