mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-02 10:33:53 +00:00
Fix a bug in the cost calculation of vector casts. Detect situations where bitcasts cost zero.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167170 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2932284f04
commit
0dba9a9a26
@ -211,40 +211,55 @@ unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||
std::pair<unsigned, EVT> DstLT =
|
||||
getTypeLegalizationCost(Dst->getContext(), TLI->getValueType(Dst));
|
||||
|
||||
// If the cast is between same-sized registers, then the check is simple.
|
||||
if (SrcLT.first == DstLT.first &&
|
||||
SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
|
||||
// Just check the op cost:
|
||||
if (!TLI->isOperationExpand(ISD, DstLT.second)) {
|
||||
// The operation is legal. Assume it costs 1. Multiply
|
||||
// by the type-legalization overhead.
|
||||
return SrcLT.first * 1;
|
||||
// Handle scalar conversions.
|
||||
if (!Src->isVectorTy() && !Dst->isVectorTy()) {
|
||||
// Just check the op cost. If the operation is legal then assume it costs 1.
|
||||
if (!TLI->isOperationExpand(ISD, DstLT.second))
|
||||
return 1;
|
||||
|
||||
// Assume that illegal scalar instruction are expensive.
|
||||
return 4;
|
||||
}
|
||||
|
||||
// Check vector-to-vector casts.
|
||||
if (Dst->isVectorTy() && Src->isVectorTy()) {
|
||||
|
||||
// If the cast is between same-sized registers, then the check is simple.
|
||||
if (SrcLT.first == DstLT.first &&
|
||||
SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
|
||||
|
||||
// Bitcast between types that are legalized to the same type are free.
|
||||
if (Opcode == Instruction::BitCast)
|
||||
return 0;
|
||||
|
||||
// Just check the op cost. If the operation is legal then assume it costs
|
||||
// 1 and multiply by the type-legalization overhead.
|
||||
if (!TLI->isOperationExpand(ISD, DstLT.second))
|
||||
return SrcLT.first * 1;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned ScalarizationCost = 1;
|
||||
|
||||
// Otherwise, assume that the cast is scalarized.
|
||||
if (Dst->isVectorTy()) {
|
||||
// If we are converting vectors and the operation is illegal, or
|
||||
// if the vectors are legalized to different types, estimate the
|
||||
// scalarization costs.
|
||||
unsigned Num = Dst->getVectorNumElements();
|
||||
unsigned Cost = getCastInstrCost(Opcode, Src->getScalarType(),
|
||||
Dst->getScalarType());
|
||||
// return the cost of multiple scalar invocation plus the cost of inserting
|
||||
// and extracting the values.
|
||||
ScalarizationCost *= getScalarizationOverhead(Dst, true, true) + Num * Cost;
|
||||
}
|
||||
|
||||
if (Src->isVectorTy()) {
|
||||
unsigned Num = Src->getVectorNumElements();
|
||||
unsigned Cost = getCastInstrCost(Opcode, Dst->getScalarType(),
|
||||
Src->getScalarType());
|
||||
// return the cost of multiple scalar invocation plus the cost of inserting
|
||||
// and extracting the values.
|
||||
ScalarizationCost *= getScalarizationOverhead(Src, true, true) + Num * Cost;
|
||||
|
||||
// Return the cost of multiple scalar invocation plus the cost of
|
||||
// inserting and extracting the values.
|
||||
return getScalarizationOverhead(Dst, true, true) + Num * Cost;
|
||||
}
|
||||
|
||||
return ScalarizationCost;
|
||||
}
|
||||
// We already handled vector-to-vector and scalar-to-scalar conversions. This
|
||||
// is where we handle bitcast between vectors and scalars. We need to assume
|
||||
// that the conversion is scalarized in one way or another.
|
||||
if (Opcode == Instruction::BitCast)
|
||||
// Illegal bitcasts are done by storing and loading from a stack slot.
|
||||
return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) +
|
||||
(Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0);
|
||||
|
||||
llvm_unreachable("Unhandled cast");
|
||||
}
|
||||
|
||||
unsigned VectorTargetTransformImpl::getCFInstrCost(unsigned Opcode) const {
|
||||
return 1;
|
||||
|
48
test/Transforms/LoopVectorize/X86/conversion-cost.ll
Normal file
48
test/Transforms/LoopVectorize/X86/conversion-cost.ll
Normal file
@ -0,0 +1,48 @@
|
||||
; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
;CHECK: @conversion_cost1
|
||||
;CHECK: store <8 x i8>
|
||||
;CHECK: ret
|
||||
define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp {
|
||||
%1 = icmp sgt i32 %n, 3
|
||||
br i1 %1, label %.lr.ph, label %._crit_edge
|
||||
|
||||
.lr.ph: ; preds = %0, %.lr.ph
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 3, %0 ]
|
||||
%2 = trunc i64 %indvars.iv to i8
|
||||
%3 = getelementptr inbounds i8* %A, i64 %indvars.iv
|
||||
store i8 %2, i8* %3, align 1
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %._crit_edge, label %.lr.ph
|
||||
|
||||
._crit_edge: ; preds = %.lr.ph, %0
|
||||
ret i32 undef
|
||||
}
|
||||
|
||||
;CHECK: @conversion_cost2
|
||||
;CHECK: store <8 x float>
|
||||
;CHECK: ret
|
||||
define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp {
|
||||
%1 = icmp sgt i32 %n, 9
|
||||
br i1 %1, label %.lr.ph, label %._crit_edge
|
||||
|
||||
.lr.ph: ; preds = %0, %.lr.ph
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ]
|
||||
%2 = add nsw i64 %indvars.iv, 3
|
||||
%3 = trunc i64 %2 to i32
|
||||
%4 = sitofp i32 %3 to float
|
||||
%5 = getelementptr inbounds float* %B, i64 %indvars.iv
|
||||
store float %4, float* %5, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %._crit_edge, label %.lr.ph
|
||||
|
||||
._crit_edge: ; preds = %.lr.ph, %0
|
||||
ret i32 undef
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user