From d9ced7112e85a566999276099aef5f2ddf6735b7 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 26 Apr 2014 14:53:05 +0000 Subject: [PATCH] X86TTI: i16/i32 vector div with a constant (splat) divisor are reasonably cheap now. Turn vectorization back on. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207320 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 19 +++++ test/Analysis/CostModel/X86/vdiv-cost.ll | 92 +++++++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 test/Analysis/CostModel/X86/vdiv-cost.ll diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 5158004cadb..628e9130a15 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -284,6 +284,21 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + static const CostTblEntry + AVX2UniformConstCostTable[] = { + { ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence + { ISD::UDIV, MVT::v16i16, 6 }, // vpmulhuw sequence + { ISD::SDIV, MVT::v8i32, 15 }, // vpmuldq sequence + { ISD::UDIV, MVT::v8i32, 15 }, // vpmuludq sequence + }; + + if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && + ST->hasAVX2()) { + int Idx = CostTableLookup(AVX2UniformConstCostTable, ISD, LT.second); + if (Idx != -1) + return LT.first * AVX2UniformConstCostTable[Idx].Cost; + } + static const CostTblEntry AVX2CostTable[] = { // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to // customize them to detect the cases where shift amount is a scalar one. @@ -351,6 +366,10 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb. { ISD::SRA, MVT::v8i16, 1 }, // psraw. { ISD::SRA, MVT::v4i32, 1 }, // psrad. + + { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence + { ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence + { ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence }; if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && diff --git a/test/Analysis/CostModel/X86/vdiv-cost.ll b/test/Analysis/CostModel/X86/vdiv-cost.ll new file mode 100644 index 00000000000..4ba1ef07ad7 --- /dev/null +++ b/test/Analysis/CostModel/X86/vdiv-cost.ll @@ -0,0 +1,92 @@ +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 + +define <4 x i32> @test1(<4 x i32> %a) { + %div = udiv <4 x i32> %a, + ret <4 x i32> %div + +; CHECK: 'Cost Model Analysis' for function 'test1': +; SSE2: Found an estimated cost of 15 for instruction: %div +; AVX2: Found an estimated cost of 15 for instruction: %div +} + +define <8 x i32> @test2(<8 x i32> %a) { + %div = udiv <8 x i32> %a, + ret <8 x i32> %div + +; CHECK: 'Cost Model Analysis' for function 'test2': +; SSE2: Found an estimated cost of 30 for instruction: %div +; AVX2: Found an estimated cost of 15 for instruction: %div +} + +define <8 x i16> @test3(<8 x i16> %a) { + %div = udiv <8 x i16> %a, + ret <8 x i16> %div + +; CHECK: 'Cost Model Analysis' for function 'test3': +; SSE2: Found an estimated cost of 6 for instruction: %div +; AVX2: Found an estimated cost of 6 for instruction: %div +} + +define <16 x i16> @test4(<16 x i16> %a) { + %div = udiv <16 x i16> %a, + ret <16 x i16> %div + +; CHECK: 'Cost Model Analysis' for function 'test4': +; SSE2: Found an estimated cost of 12 for instruction: %div +; AVX2: Found an estimated cost of 6 for instruction: %div +} + +define <8 x i16> @test5(<8 x i16> %a) { + %div = sdiv <8 x i16> %a, + ret <8 x i16> %div + +; CHECK: 'Cost Model Analysis' for function 'test5': +; SSE2: Found an estimated cost of 6 for instruction: %div +; AVX2: Found an estimated cost of 6 for instruction: %div +} + +define <16 x i16> @test6(<16 x i16> %a) { + %div = sdiv <16 x i16> %a, + ret <16 x i16> %div + +; CHECK: 'Cost Model Analysis' for function 'test6': +; SSE2: Found an estimated cost of 12 for instruction: %div +; AVX2: Found an estimated cost of 6 for instruction: %div +} + +define <16 x i8> @test7(<16 x i8> %a) { + %div = sdiv <16 x i8> %a, + ret <16 x i8> %div + +; CHECK: 'Cost Model Analysis' for function 'test7': +; SSE2: Found an estimated cost of 320 for instruction: %div +; AVX2: Found an estimated cost of 320 for instruction: %div +} + +define <4 x i32> @test8(<4 x i32> %a) { + %div = sdiv <4 x i32> %a, + ret <4 x i32> %div + +; CHECK: 'Cost Model Analysis' for function 'test8': +; SSE2: Found an estimated cost of 15 for instruction: %div +; AVX2: Found an estimated cost of 15 for instruction: %div +} + +define <8 x i32> @test9(<8 x i32> %a) { + %div = sdiv <8 x i32> %a, + ret <8 x i32> %div + +; CHECK: 'Cost Model Analysis' for function 'test9': +; SSE2: Found an estimated cost of 30 for instruction: %div +; AVX2: Found an estimated cost of 15 for instruction: %div +} + +define <8 x i32> @test10(<8 x i32> %a) { + %div = sdiv <8 x i32> %a, + ret <8 x i32> %div + +; CHECK: 'Cost Model Analysis' for function 'test10': +; SSE2: Found an estimated cost of 160 for instruction: %div +; AVX2: Found an estimated cost of 160 for instruction: %div +}