mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-01 00:33:09 +00:00
R600: Use masked read sel for texture instructions
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192554 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
91ec4b0cac
commit
f2b3a569ae
@ -1379,6 +1379,11 @@ CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
if (NewBldVec[i].getOpcode() == ISD::UNDEF)
|
||||
// We mask write here to teach later passes that the ith element of this
|
||||
// vector is undef. Thus we can use it to reduce 128 bits reg usage,
|
||||
// break false dependencies and additionnaly make assembly easier to read.
|
||||
RemapSwizzle[i] = 7; // SEL_MASK_WRITE
|
||||
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
|
||||
if (C->isZero()) {
|
||||
RemapSwizzle[i] = 4; // SEL_0
|
||||
|
@ -93,6 +93,7 @@ main_body:
|
||||
}
|
||||
|
||||
; EG-CHECK: @main2
|
||||
; EG-CHECK: T{{[0-9]+}}.XY__
|
||||
; EG-CHECK: T{{[0-9]+}}.YXZ0
|
||||
|
||||
define void @main2() #0 {
|
||||
@ -110,14 +111,12 @@ main_body:
|
||||
%10 = extractelement <4 x float> %9, i32 1
|
||||
%11 = insertelement <4 x float> undef, float %0, i32 0
|
||||
%12 = insertelement <4 x float> %11, float %1, i32 1
|
||||
%13 = insertelement <4 x float> %12, float %2, i32 2
|
||||
%14 = insertelement <4 x float> %13, float %3, i32 3
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %14, i32 60, i32 1)
|
||||
%15 = insertelement <4 x float> undef, float %6, i32 0
|
||||
%16 = insertelement <4 x float> %15, float %8, i32 1
|
||||
%17 = insertelement <4 x float> %16, float %10, i32 2
|
||||
%18 = insertelement <4 x float> %17, float 0.000000e+00, i32 3
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 2)
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %12, i32 60, i32 1)
|
||||
%13 = insertelement <4 x float> undef, float %6, i32 0
|
||||
%14 = insertelement <4 x float> %13, float %8, i32 1
|
||||
%15 = insertelement <4 x float> %14, float %10, i32 2
|
||||
%16 = insertelement <4 x float> %15, float 0.000000e+00, i32 3
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %16, i32 0, i32 2)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user