mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-26 21:32:10 +00:00
Improve the X86 cost model for loads and stores.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170830 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c2a537bd08
commit
f5637c3997
@ -17818,6 +17818,30 @@ X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
|
||||
return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty);
|
||||
}
|
||||
|
||||
|
||||
unsigned
|
||||
X86VectorTargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
unsigned Alignment,
|
||||
unsigned AddressSpace) const {
|
||||
// Legalize the type.
|
||||
std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Src);
|
||||
assert(Opcode == Instruction::Load || Opcode == Instruction::Store &&
|
||||
"Invalid Opcode");
|
||||
|
||||
const X86Subtarget &ST =
|
||||
TLI->getTargetMachine().getSubtarget<X86Subtarget>();
|
||||
|
||||
// Each load/store unit costs 1.
|
||||
unsigned Cost = LT.first * 1;
|
||||
|
||||
// On Sandybridge 256bit load/stores are double pumped
|
||||
// (but not on Haswell).
|
||||
if (LT.second.getSizeInBits() > 128 && !ST.hasAVX2())
|
||||
Cost*=2;
|
||||
|
||||
return Cost;
|
||||
}
|
||||
|
||||
unsigned
|
||||
X86VectorTargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
|
||||
unsigned Index) const {
|
||||
|
@ -953,6 +953,10 @@ namespace llvm {
|
||||
|
||||
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
|
||||
|
||||
virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
unsigned Alignment,
|
||||
unsigned AddressSpace) const;
|
||||
|
||||
virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
|
||||
unsigned Index) const;
|
||||
|
||||
|
64
test/Analysis/CostModel/X86/load_store.ll
Normal file
64
test/Analysis/CostModel/X86/load_store.ll
Normal file
@ -0,0 +1,64 @@
|
||||
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
define i32 @stores(i32 %arg) {
|
||||
|
||||
;CHECK: cost of 1 {{.*}} store
|
||||
store i8 undef, i8* undef, align 4
|
||||
;CHECK: cost of 1 {{.*}} store
|
||||
store i16 undef, i16* undef, align 4
|
||||
;CHECK: cost of 1 {{.*}} store
|
||||
store i32 undef, i32* undef, align 4
|
||||
;CHECK: cost of 1 {{.*}} store
|
||||
store i64 undef, i64* undef, align 4
|
||||
;CHECK: cost of 2 {{.*}} store
|
||||
store i128 undef, i128* undef, align 4
|
||||
|
||||
;CHECK: cost of 1 {{.*}} store
|
||||
store <4 x i16> undef, <4 x i16>* undef, align 4
|
||||
;CHECK: cost of 1 {{.*}} store
|
||||
store <4 x i32> undef, <4 x i32>* undef, align 4
|
||||
;CHECK: cost of 2 {{.*}} store
|
||||
store <4 x i64> undef, <4 x i64>* undef, align 4
|
||||
|
||||
;CHECK: cost of 1 {{.*}} store
|
||||
store <8 x i16> undef, <8 x i16>* undef, align 4
|
||||
;CHECK: cost of 2 {{.*}} store
|
||||
store <8 x i32> undef, <8 x i32>* undef, align 4
|
||||
;CHECK: cost of 4 {{.*}} store
|
||||
store <8 x i64> undef, <8 x i64>* undef, align 4
|
||||
|
||||
ret i32 undef
|
||||
}
|
||||
define i32 @loads(i32 %arg) {
|
||||
;CHECK: cost of 1 {{.*}} load
|
||||
load i8* undef, align 4
|
||||
;CHECK: cost of 1 {{.*}} load
|
||||
load i16* undef, align 4
|
||||
;CHECK: cost of 1 {{.*}} load
|
||||
load i32* undef, align 4
|
||||
;CHECK: cost of 1 {{.*}} load
|
||||
load i64* undef, align 4
|
||||
;CHECK: cost of 2 {{.*}} load
|
||||
load i128* undef, align 4
|
||||
|
||||
;CHECK: cost of 1 {{.*}} load
|
||||
load <2 x i32>* undef, align 4
|
||||
;CHECK: cost of 1 {{.*}} load
|
||||
load <4 x i32>* undef, align 4
|
||||
;CHECK: cost of 2 {{.*}} load
|
||||
load <8 x i32>* undef, align 4
|
||||
|
||||
|
||||
;CHECK: cost of 1 {{.*}} load
|
||||
load <2 x i64>* undef, align 4
|
||||
;CHECK: cost of 2 {{.*}} load
|
||||
load <4 x i64>* undef, align 4
|
||||
;CHECK: cost of 4 {{.*}} load
|
||||
load <8 x i64>* undef, align 4
|
||||
|
||||
ret i32 undef
|
||||
}
|
||||
|
@ -28,16 +28,17 @@ vector.body: ; preds = %for.body.lr.ph, %ve
|
||||
%4 = getelementptr inbounds i32* %B, i64 %3
|
||||
;CHECK: cost of 0 {{.*}} bitcast
|
||||
%5 = bitcast i32* %4 to <8 x i32>*
|
||||
;CHECK: cost of 1 {{.*}} load
|
||||
;CHECK: cost of 2 {{.*}} load
|
||||
%6 = load <8 x i32>* %5, align 4
|
||||
;CHECK: cost of 4 {{.*}} mul
|
||||
%7 = mul nsw <8 x i32> %6, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
|
||||
%8 = getelementptr inbounds i32* %A, i64 %index
|
||||
%9 = bitcast i32* %8 to <8 x i32>*
|
||||
;CHECK: cost of 2 {{.*}} load
|
||||
%10 = load <8 x i32>* %9, align 4
|
||||
;CHECK: cost of 4 {{.*}} add
|
||||
%11 = add nsw <8 x i32> %10, %7
|
||||
;CHECK: cost of 1 {{.*}} store
|
||||
;CHECK: cost of 2 {{.*}} store
|
||||
store <8 x i32> %11, <8 x i32>* %9, align 4
|
||||
%index.next = add i64 %index, 8
|
||||
%12 = icmp eq i64 %index.next, %end.idx.rnd.down
|
||||
|
Loading…
Reference in New Issue
Block a user