From 2d1528b3584ad85f2674f7db14b6ee607da678ec Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Mon, 5 Nov 2012 22:20:53 +0000 Subject: [PATCH] Code Model: Improve the accuracy of the zext/sext/trunc vector cost estimation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167412 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/TargetTransformImpl.cpp | 13 +++++-- test/Analysis/CostModel/X86/cast.ll | 34 +++++++++++++++++++ .../LoopVectorize/X86/conversion-cost.ll | 4 +-- 3 files changed, 46 insertions(+), 5 deletions(-) create mode 100644 test/Analysis/CostModel/X86/cast.ll diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp index 4b427a2b6d4..ca0dd9a54ed 100644 --- a/lib/Target/TargetTransformImpl.cpp +++ b/lib/Target/TargetTransformImpl.cpp @@ -101,7 +101,7 @@ int VectorTargetTransformImpl::InstructionOpcodeToISD(unsigned Opcode) const { case AtomicRMW: return 0; case Trunc: return ISD::TRUNCATE; case ZExt: return ISD::ZERO_EXTEND; - case SExt: return ISD::SEXTLOAD; + case SExt: return ISD::SIGN_EXTEND; case FPToUI: return ISD::FP_TO_UINT; case FPToSI: return ISD::FP_TO_SINT; case UIToFP: return ISD::UINT_TO_FP; @@ -235,9 +235,17 @@ unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst, SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { // Bitcast between types that are legalized to the same type are free. - if (Opcode == Instruction::BitCast) + if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) return 0; + // Assume that Zext is done using AND. + if (Opcode == Instruction::ZExt) + return 1; + + // Assume that sext is done using SHL and SRA. + if (Opcode == Instruction::SExt) + return 2; + // Just check the op cost. If the operation is legal then assume it costs // 1 and multiply by the type-legalization overhead. if (!TLI->isOperationExpand(ISD, DstLT.second)) @@ -310,7 +318,6 @@ unsigned VectorTargetTransformImpl::getCmpSelInstrCost(unsigned Opcode, return 1; } -/// Returns the expected cost of Vector Insert and Extract. unsigned VectorTargetTransformImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const { diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll new file mode 100644 index 00000000000..f8b1114a7cc --- /dev/null +++ b/test/Analysis/CostModel/X86/cast.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +define i32 @add(i32 %arg) { + + ; -- Same size registeres -- + ;CHECK: cost of 1 {{.*}} zext + %A = zext <4 x i1> undef to <4 x i32> + ;CHECK: cost of 2 {{.*}} sext + %B = sext <4 x i1> undef to <4 x i32> + ;CHECK: cost of 0 {{.*}} trunc + %C = trunc <4 x i32> undef to <4 x i1> + + ; -- Different size registers -- + ;CHECK-NOT: cost of 1 {{.*}} zext + %D = zext <8 x i1> undef to <8 x i32> + ;CHECK-NOT: cost of 2 {{.*}} sext + %E = sext <8 x i1> undef to <8 x i32> + ;CHECK-NOT: cost of 2 {{.*}} trunc + %F = trunc <8 x i32> undef to <8 x i1> + + ; -- scalars -- + + ;CHECK: cost of 1 {{.*}} zext + %G = zext i1 undef to i32 + ;CHECK: cost of 0 {{.*}} trunc + %H = trunc i32 undef to i1 + + ;CHECK: cost of 1 {{.*}} ret + ret i32 undef +} + diff --git a/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/test/Transforms/LoopVectorize/X86/conversion-cost.ll index 19bcdc5d902..8f1bb545fa0 100644 --- a/test/Transforms/LoopVectorize/X86/conversion-cost.ll +++ b/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-apple-macosx10.8.0" ;CHECK: @conversion_cost1 -;CHECK: store <8 x i8> +;CHECK: store <2 x i8> ;CHECK: ret define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp { %1 = icmp sgt i32 %n, 3 @@ -25,7 +25,7 @@ define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) noun } ;CHECK: @conversion_cost2 -;CHECK-NOT: <8 x float> +;CHECK: <2 x float> ;CHECK: ret define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp { %1 = icmp sgt i32 %n, 9