mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-03 18:32:50 +00:00
R600/SI: fix MIMG writemask adjustement
This fixes piglit: - shaders/glsl-fs-texture2d-masked - shaders/glsl-fs-texture2d-masked-4 Patch by: Marek Olšák Signed-off-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193222 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f95b162188
commit
96b5670cf4
@ -1092,7 +1092,9 @@ static unsigned SubIdx2Lane(unsigned Idx) {
|
||||
void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
|
||||
SelectionDAG &DAG) const {
|
||||
SDNode *Users[4] = { };
|
||||
unsigned Writemask = 0, Lane = 0;
|
||||
unsigned Lane = 0;
|
||||
unsigned OldDmask = Node->getConstantOperandVal(0);
|
||||
unsigned NewDmask = 0;
|
||||
|
||||
// Try to figure out the used register components
|
||||
for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
|
||||
@ -1103,29 +1105,42 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
|
||||
I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
|
||||
return;
|
||||
|
||||
// Lane means which subreg of %VGPRa_VGPRb_VGPRc_VGPRd is used.
|
||||
// Note that subregs are packed, i.e. Lane==0 is the first bit set
|
||||
// in OldDmask, so it can be any of X,Y,Z,W; Lane==1 is the second bit
|
||||
// set, etc.
|
||||
Lane = SubIdx2Lane(I->getConstantOperandVal(1));
|
||||
|
||||
// Set which texture component corresponds to the lane.
|
||||
unsigned Comp;
|
||||
for (unsigned i = 0, Dmask = OldDmask; i <= Lane; i++) {
|
||||
assert(Dmask);
|
||||
Comp = ffs(Dmask)-1;
|
||||
Dmask &= ~(1 << Comp);
|
||||
}
|
||||
|
||||
// Abort if we have more than one user per component
|
||||
if (Users[Lane])
|
||||
return;
|
||||
|
||||
Users[Lane] = *I;
|
||||
Writemask |= 1 << Lane;
|
||||
NewDmask |= 1 << Comp;
|
||||
}
|
||||
|
||||
// Abort if all components are used
|
||||
if (Writemask == 0xf)
|
||||
// Abort if there's no change
|
||||
if (NewDmask == OldDmask)
|
||||
return;
|
||||
|
||||
// Adjust the writemask in the node
|
||||
std::vector<SDValue> Ops;
|
||||
Ops.push_back(DAG.getTargetConstant(Writemask, MVT::i32));
|
||||
Ops.push_back(DAG.getTargetConstant(NewDmask, MVT::i32));
|
||||
for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
|
||||
Ops.push_back(Node->getOperand(i));
|
||||
Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
|
||||
|
||||
// If we only got one lane, replace it with a copy
|
||||
if (Writemask == (1U << Lane)) {
|
||||
// (if NewDmask has only one bit set...)
|
||||
if (NewDmask && (NewDmask & (NewDmask-1)) == 0) {
|
||||
SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32);
|
||||
SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
|
||||
SDLoc(), Users[Lane]->getValueType(0),
|
||||
|
93
test/CodeGen/R600/llvm.SI.sample-masked.ll
Normal file
93
test/CodeGen/R600/llvm.SI.sample-masked.ll
Normal file
@ -0,0 +1,93 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: @v1
|
||||
; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 13
|
||||
define void @v1(i32 %a1) {
|
||||
entry:
|
||||
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
|
||||
%1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
|
||||
%2 = extractelement <4 x float> %1, i32 0
|
||||
%3 = extractelement <4 x float> %1, i32 2
|
||||
%4 = extractelement <4 x float> %1, i32 3
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @v2
|
||||
; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 11
|
||||
define void @v2(i32 %a1) {
|
||||
entry:
|
||||
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
|
||||
%1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
|
||||
%2 = extractelement <4 x float> %1, i32 0
|
||||
%3 = extractelement <4 x float> %1, i32 1
|
||||
%4 = extractelement <4 x float> %1, i32 3
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @v3
|
||||
; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 14
|
||||
define void @v3(i32 %a1) {
|
||||
entry:
|
||||
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
|
||||
%1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
|
||||
%2 = extractelement <4 x float> %1, i32 1
|
||||
%3 = extractelement <4 x float> %1, i32 2
|
||||
%4 = extractelement <4 x float> %1, i32 3
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @v4
|
||||
; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 7
|
||||
define void @v4(i32 %a1) {
|
||||
entry:
|
||||
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
|
||||
%1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
|
||||
%2 = extractelement <4 x float> %1, i32 0
|
||||
%3 = extractelement <4 x float> %1, i32 1
|
||||
%4 = extractelement <4 x float> %1, i32 2
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @v5
|
||||
; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}, 10
|
||||
define void @v5(i32 %a1) {
|
||||
entry:
|
||||
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
|
||||
%1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
|
||||
%2 = extractelement <4 x float> %1, i32 1
|
||||
%3 = extractelement <4 x float> %1, i32 3
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @v6
|
||||
; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}, 6
|
||||
define void @v6(i32 %a1) {
|
||||
entry:
|
||||
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
|
||||
%1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
|
||||
%2 = extractelement <4 x float> %1, i32 1
|
||||
%3 = extractelement <4 x float> %1, i32 2
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @v7
|
||||
; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}, 9
|
||||
define void @v7(i32 %a1) {
|
||||
entry:
|
||||
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
|
||||
%1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
|
||||
%2 = extractelement <4 x float> %1, i32 0
|
||||
%3 = extractelement <4 x float> %1, i32 3
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, i32) readnone
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
Loading…
x
Reference in New Issue
Block a user