mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-27 14:34:58 +00:00
ARM cost model: Fix cost of fptrunc and fpext instructions
A vector fptrunc and fpext simply gets split into scalar instructions. radar://13192358 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177159 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
044f841267
commit
c0d8dc0eb6
@ -177,6 +177,23 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||
assert(ISD && "Invalid opcode");
|
||||
|
||||
// Single to/from double precision conversions.
|
||||
static const CostTblEntry<MVT> NEONFltDblTbl[] = {
|
||||
// Vector fptrunc/fpext conversions.
|
||||
{ ISD::FP_ROUND, MVT::v2f64, 2 },
|
||||
{ ISD::FP_EXTEND, MVT::v2f32, 2 },
|
||||
{ ISD::FP_EXTEND, MVT::v4f32, 4 }
|
||||
};
|
||||
|
||||
if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
|
||||
ISD == ISD::FP_EXTEND)) {
|
||||
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
|
||||
int Idx = CostTableLookup<MVT>(NEONFltDblTbl, array_lengthof(NEONFltDblTbl),
|
||||
ISD, LT.second);
|
||||
if (Idx != -1)
|
||||
return LT.first * NEONFltDblTbl[Idx].Cost;
|
||||
}
|
||||
|
||||
EVT SrcTy = TLI->getValueType(Src);
|
||||
EVT DstTy = TLI->getValueType(Dst);
|
||||
|
||||
@ -255,7 +272,6 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||
return NEONFloatConversionTbl[Idx].Cost;
|
||||
}
|
||||
|
||||
|
||||
// Scalar integer to float conversions.
|
||||
static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = {
|
||||
{ ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
|
||||
@ -311,7 +327,6 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||
return ARMIntegerConversionTbl[Idx].Cost;
|
||||
}
|
||||
|
||||
|
||||
return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
|
||||
}
|
||||
|
||||
|
@ -166,6 +166,30 @@ define i32 @casts() {
|
||||
; CHECK: cost of 38 {{.*}} trunc
|
||||
%r75 = trunc <16 x i32> undef to <16 x i8>
|
||||
|
||||
; Floating point truncation costs.
|
||||
; CHECK: cost of 1 {{.*}} fptrunc double
|
||||
%r80 = fptrunc double undef to float
|
||||
; CHECK: cost of 2 {{.*}} fptrunc <2 x double
|
||||
%r81 = fptrunc <2 x double> undef to <2 x float>
|
||||
; CHECK: cost of 4 {{.*}} fptrunc <4 x double
|
||||
%r82 = fptrunc <4 x double> undef to <4 x float>
|
||||
; CHECK: cost of 8 {{.*}} fptrunc <8 x double
|
||||
%r83 = fptrunc <8 x double> undef to <8 x float>
|
||||
; CHECK: cost of 16 {{.*}} fptrunc <16 x double
|
||||
%r84 = fptrunc <16 x double> undef to <16 x float>
|
||||
|
||||
; Floating point extension costs.
|
||||
; CHECK: cost of 1 {{.*}} fpext float
|
||||
%r85 = fpext float undef to double
|
||||
; CHECK: cost of 2 {{.*}} fpext <2 x float
|
||||
%r86 = fpext <2 x float> undef to <2 x double>
|
||||
; CHECK: cost of 4 {{.*}} fpext <4 x float
|
||||
%r87 = fpext <4 x float> undef to <4 x double>
|
||||
; CHECK: cost of 8 {{.*}} fpext <8 x float
|
||||
%r88 = fpext <8 x float> undef to <8 x double>
|
||||
; CHECK: cost of 16 {{.*}} fpext <16 x float
|
||||
%r89 = fpext <16 x float> undef to <16 x double>
|
||||
|
||||
;CHECK: cost of 0 {{.*}} ret
|
||||
ret i32 undef
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user