mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-07-28 19:25:00 +00:00
R600/SI: dynamical figure out the reg class of MIMG
Depending on the number of bits set in the writemask. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Michel Dänzer <michel.daenzer@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179166 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -107,6 +107,9 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
|
|||||||
} else if (AMDGPU::VReg_64RegClass.contains(reg)) {
|
} else if (AMDGPU::VReg_64RegClass.contains(reg)) {
|
||||||
isSGPR = false;
|
isSGPR = false;
|
||||||
width = 2;
|
width = 2;
|
||||||
|
} else if (AMDGPU::VReg_96RegClass.contains(reg)) {
|
||||||
|
isSGPR = false;
|
||||||
|
width = 3;
|
||||||
} else if (AMDGPU::SReg_128RegClass.contains(reg)) {
|
} else if (AMDGPU::SReg_128RegClass.contains(reg)) {
|
||||||
isSGPR = true;
|
isSGPR = true;
|
||||||
width = 4;
|
width = 4;
|
||||||
|
@@ -720,7 +720,7 @@ unsigned SubIdx2Lane(unsigned Idx) {
|
|||||||
void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
|
void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
|
||||||
SelectionDAG &DAG) const {
|
SelectionDAG &DAG) const {
|
||||||
SDNode *Users[4] = { };
|
SDNode *Users[4] = { };
|
||||||
unsigned Writemask = 0;
|
unsigned Writemask = 0, Lane = 0;
|
||||||
|
|
||||||
// Try to figure out the used register components
|
// Try to figure out the used register components
|
||||||
for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
|
for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
|
||||||
@@ -731,7 +731,7 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
|
|||||||
I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
|
I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
unsigned Lane = SubIdx2Lane(I->getConstantOperandVal(1));
|
Lane = SubIdx2Lane(I->getConstantOperandVal(1));
|
||||||
|
|
||||||
// Abort if we have more than one user per component
|
// Abort if we have more than one user per component
|
||||||
if (Users[Lane])
|
if (Users[Lane])
|
||||||
@@ -752,6 +752,16 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
|
|||||||
Ops.push_back(Node->getOperand(i));
|
Ops.push_back(Node->getOperand(i));
|
||||||
Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
|
Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
|
||||||
|
|
||||||
|
// If we only got one lane, replace it with a copy
|
||||||
|
if (Writemask == (1U << Lane)) {
|
||||||
|
SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32);
|
||||||
|
SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
|
||||||
|
DebugLoc(), MVT::f32,
|
||||||
|
SDValue(Node, 0), RC);
|
||||||
|
DAG.ReplaceAllUsesWith(Users[Lane], Copy);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Update the users of the node with the new indices
|
// Update the users of the node with the new indices
|
||||||
for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
|
for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
|
||||||
|
|
||||||
@@ -780,3 +790,28 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
|
|||||||
|
|
||||||
return foldOperands(Node, DAG);
|
return foldOperands(Node, DAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// \brief Assign the register class depending on the number of
|
||||||
|
/// bits set in the writemask
|
||||||
|
void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
|
||||||
|
SDNode *Node) const {
|
||||||
|
if (AMDGPU::isMIMG(MI->getOpcode()) == -1)
|
||||||
|
return;
|
||||||
|
|
||||||
|
unsigned VReg = MI->getOperand(0).getReg();
|
||||||
|
unsigned Writemask = MI->getOperand(1).getImm();
|
||||||
|
unsigned BitsSet = 0;
|
||||||
|
for (unsigned i = 0; i < 4; ++i)
|
||||||
|
BitsSet += Writemask & (1 << i) ? 1 : 0;
|
||||||
|
|
||||||
|
const TargetRegisterClass *RC;
|
||||||
|
switch (BitsSet) {
|
||||||
|
default: return;
|
||||||
|
case 1: RC = &AMDGPU::VReg_32RegClass; break;
|
||||||
|
case 2: RC = &AMDGPU::VReg_64RegClass; break;
|
||||||
|
case 3: RC = &AMDGPU::VReg_96RegClass; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
|
||||||
|
MRI.setRegClass(VReg, RC);
|
||||||
|
}
|
||||||
|
@@ -53,6 +53,8 @@ public:
|
|||||||
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
|
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
|
||||||
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||||
virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
|
virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
|
||||||
|
virtual void AdjustInstrPostInstrSelection(MachineInstr *MI,
|
||||||
|
SDNode *Node) const;
|
||||||
|
|
||||||
int32_t analyzeImmediate(const SDNode *N) const;
|
int32_t analyzeImmediate(const SDNode *N) const;
|
||||||
};
|
};
|
||||||
|
@@ -58,6 +58,10 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||||||
AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
|
AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const int16_t Sub0_2[] = {
|
||||||
|
AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0
|
||||||
|
};
|
||||||
|
|
||||||
const int16_t Sub0_1[] = {
|
const int16_t Sub0_1[] = {
|
||||||
AMDGPU::sub0, AMDGPU::sub1, 0
|
AMDGPU::sub0, AMDGPU::sub1, 0
|
||||||
};
|
};
|
||||||
@@ -125,6 +129,11 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||||||
Opcode = AMDGPU::V_MOV_B32_e32;
|
Opcode = AMDGPU::V_MOV_B32_e32;
|
||||||
SubIndices = Sub0_1;
|
SubIndices = Sub0_1;
|
||||||
|
|
||||||
|
} else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
|
||||||
|
assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
|
||||||
|
Opcode = AMDGPU::V_MOV_B32_e32;
|
||||||
|
SubIndices = Sub0_2;
|
||||||
|
|
||||||
} else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
|
} else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
|
||||||
assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
|
assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
|
||||||
AMDGPU::SReg_128RegClass.contains(SrcReg));
|
AMDGPU::SReg_128RegClass.contains(SrcReg));
|
||||||
|
@@ -346,6 +346,7 @@ class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
|
|||||||
[]> {
|
[]> {
|
||||||
let mayLoad = 1;
|
let mayLoad = 1;
|
||||||
let mayStore = 0;
|
let mayStore = 0;
|
||||||
|
let hasPostISelHook = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@@ -379,6 +380,7 @@ def getCommuteOrig : InstrMapping {
|
|||||||
let ValueCols = [["1"]];
|
let ValueCols = [["1"]];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test if the supplied opcode is an MIMG instruction
|
||||||
def isMIMG : InstrMapping {
|
def isMIMG : InstrMapping {
|
||||||
let FilterClass = "MIMG_Load_Helper";
|
let FilterClass = "MIMG_Load_Helper";
|
||||||
let RowFields = ["Inst"];
|
let RowFields = ["Inst"];
|
||||||
|
@@ -94,6 +94,12 @@ def VGPR_64 : RegisterTuples<[sub0, sub1],
|
|||||||
[(add (trunc VGPR_32, 255)),
|
[(add (trunc VGPR_32, 255)),
|
||||||
(add (shl VGPR_32, 1))]>;
|
(add (shl VGPR_32, 1))]>;
|
||||||
|
|
||||||
|
// VGPR 96-bit registers
|
||||||
|
def VGPR_96 : RegisterTuples<[sub0, sub1, sub2],
|
||||||
|
[(add (trunc VGPR_32, 254)),
|
||||||
|
(add (shl VGPR_32, 1)),
|
||||||
|
(add (shl VGPR_32, 2))]>;
|
||||||
|
|
||||||
// VGPR 128-bit registers
|
// VGPR 128-bit registers
|
||||||
def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
|
def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
|
||||||
[(add (trunc VGPR_32, 253)),
|
[(add (trunc VGPR_32, 253)),
|
||||||
@@ -162,6 +168,10 @@ def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>;
|
|||||||
|
|
||||||
def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
|
def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
|
||||||
|
|
||||||
|
def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> {
|
||||||
|
let Size = 96;
|
||||||
|
}
|
||||||
|
|
||||||
def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
|
def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
|
||||||
|
|
||||||
def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>;
|
def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>;
|
||||||
|
@@ -1,21 +1,21 @@
|
|||||||
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
|
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
|
||||||
|
|
||||||
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15
|
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15
|
||||||
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 3
|
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 3
|
||||||
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 2
|
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 2
|
||||||
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 1
|
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 1
|
||||||
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 4
|
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 4
|
||||||
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 8
|
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8
|
||||||
;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 5
|
;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 5
|
||||||
;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 9
|
;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 9
|
||||||
;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 6
|
;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 6
|
||||||
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 10
|
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 10
|
||||||
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 12
|
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 12
|
||||||
;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7
|
;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7
|
||||||
;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11
|
;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11
|
||||||
;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13
|
;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13
|
||||||
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14
|
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14
|
||||||
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 8
|
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8
|
||||||
|
|
||||||
define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
|
define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
|
||||||
%v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
|
%v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
|
||||||
|
Reference in New Issue
Block a user