R600/SI: Partially move operand legalization to post-isel hook.

Disable the SGPR usage restriction parts of the DAG legalizeOperands.
It now should only be doing immediate folding until it can be replaced
later. The real legalization work is now done by the other
SIInstrInfo::legalizeOperands

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218531 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2014-09-26 17:54:59 +00:00
parent 29202835d8
commit aed12d4bad
9 changed files with 53 additions and 82 deletions

View File

@ -1648,57 +1648,6 @@ bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
return TRI->getRegClass(RegClass)->hasSubClassEq(RC);
}
/// \brief Make sure that we don't exeed the number of allowed scalars
void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
unsigned RegClass,
bool &ScalarSlotUsed) const {
if (!isVSrc(RegClass))
return;
// First map the operands register class to a destination class
switch (RegClass) {
case AMDGPU::VSrc_32RegClassID:
case AMDGPU::VCSrc_32RegClassID:
RegClass = AMDGPU::VReg_32RegClassID;
break;
case AMDGPU::VSrc_64RegClassID:
case AMDGPU::VCSrc_64RegClassID:
RegClass = AMDGPU::VReg_64RegClassID;
break;
default:
llvm_unreachable("Unknown vsrc reg class");
}
// Nothing to do if they fit naturally
if (fitsRegClass(DAG, Operand, RegClass))
return;
// If the scalar slot isn't used yet use it now
if (!ScalarSlotUsed) {
ScalarSlotUsed = true;
return;
}
// This is a conservative aproach. It is possible that we can't determine the
// correct register class and copy too often, but better safe than sorry.
SDNode *Node;
// We can't use COPY_TO_REGCLASS with FrameIndex arguments.
if (isa<FrameIndexSDNode>(Operand) ||
isa<GlobalAddressSDNode>(Operand)) {
unsigned Opcode = Operand.getValueType() == MVT::i32 ?
AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
Node = DAG.getMachineNode(Opcode, SDLoc(), Operand.getValueType(),
Operand);
} else {
SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32);
Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, SDLoc(),
Operand.getValueType(), Operand, RC);
}
Operand = SDValue(Node, 0);
}
/// \returns true if \p Node's operands are different from the SDValue list
/// \p Ops
static bool isNodeChanged(const SDNode *Node, const std::vector<SDValue> &Ops) {
@ -1710,8 +1659,9 @@ static bool isNodeChanged(const SDNode *Node, const std::vector<SDValue> &Ops) {
return false;
}
/// \brief Try to commute instructions and insert copies in order to satisfy the
/// operand constraints.
/// TODO: This needs to be removed. It's current primary purpose is to fold
/// immediates into operands when legal. The legalization parts are redundant
/// with SIInstrInfo::legalizeOperands which is called in a post-isel hook.
SDNode *SITargetLowering::legalizeOperands(MachineSDNode *Node,
SelectionDAG &DAG) const {
// Original encoding (either e32 or e64)
@ -1784,11 +1734,9 @@ SDNode *SITargetLowering::legalizeOperands(MachineSDNode *Node,
// Is this a VSrc or SSrc operand?
unsigned RegClass = Desc->OpInfo[Op].RegClass;
if (isVSrc(RegClass) || isSSrc(RegClass)) {
// Try to fold the immediates
if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) {
// Folding didn't work, make sure we don't hit the SReg limit.
ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed);
}
// Try to fold the immediates. If this ends up with multiple constant bus
// uses, it will be legalized later.
foldImm(Ops[i], Immediate, ScalarSlotUsed);
continue;
}
@ -1938,6 +1886,8 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
getTargetMachine().getSubtargetImpl()->getInstrInfo());
TII->legalizeOperands(MI);
if (TII->isMIMG(MI->getOpcode())) {
unsigned VReg = MI->getOperand(0).getReg();
unsigned Writemask = MI->getOperand(1).getImm();

View File

@ -47,8 +47,6 @@ class SITargetLowering : public AMDGPUTargetLowering {
const SDValue &Op) const;
bool fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
unsigned RegClass) const;
void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
unsigned RegClass, bool &ScalarSlotUsed) const;
SDNode *legalizeOperands(MachineSDNode *N, SelectionDAG &DAG) const;
void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;

View File

@ -42,6 +42,10 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
let TSFlags{10} = MUBUF;
let TSFlags{11} = MTBUF;
let TSFlags{12} = FLAT;
// Most instructions require adjustments after selection to satisfy
// operand requirements.
let hasPostISelHook = 1;
}
class Enc32 {

View File

@ -1394,20 +1394,39 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx };
// First we need to consider the instruction's operand requirements before
// legalizing. Some operands are required to be SGPRs, but we are still
// bound by the constant bus requirement to only use one.
//
// If the operand's class is an SGPR, we can never move it.
for (unsigned i = 0; i < 3; ++i) {
int Idx = VOP3Idx[i];
if (Idx == -1)
break;
for (const MachineOperand &MO : MI->implicit_operands()) {
// We only care about reads.
if (MO.isDef())
continue;
if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass)) {
SGPRReg = MI->getOperand(Idx).getReg();
if (MO.getReg() == AMDGPU::VCC) {
SGPRReg = AMDGPU::VCC;
break;
}
if (MO.getReg() == AMDGPU::FLAT_SCR) {
SGPRReg = AMDGPU::FLAT_SCR;
break;
}
}
if (SGPRReg == AMDGPU::NoRegister) {
// First we need to consider the instruction's operand requirements before
// legalizing. Some operands are required to be SGPRs, but we are still
// bound by the constant bus requirement to only use one.
//
// If the operand's class is an SGPR, we can never move it.
for (unsigned i = 0; i < 3; ++i) {
int Idx = VOP3Idx[i];
if (Idx == -1)
break;
if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass)) {
SGPRReg = MI->getOperand(Idx).getReg();
break;
}
}
}
for (unsigned i = 0; i < 3; ++i) {

View File

@ -50,7 +50,7 @@ define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
; SI-LABEL: @fneg_fold
; SI: S_LOAD_DWORDX2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI-NOT: XOR
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], {{v\[[0-9]+:[0-9]+\]}}
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
define void @fneg_fold_f64(double addrspace(1)* %out, double %in) {
%fsub = fsub double -0.0, %in
%fmul = fmul double %fsub, %in

View File

@ -59,7 +59,7 @@ define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
; FUNC-LABEL: @fneg_fold
; SI: S_LOAD_DWORD [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; SI-NOT: XOR
; SI: V_MUL_F32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], v{{[0-9]+}}
; SI: V_MUL_F32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
define void @fneg_fold_f32(float addrspace(1)* %out, float %in) {
%fsub = fsub float -0.0, %in
%fmul = fmul float %fsub, %in

View File

@ -8,7 +8,7 @@
; SI: S_SUB_I32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}}
; SI: V_MOV_B32_e32 [[VDST:v[0-9]+]], [[SDST]]
; SI: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, v[0-9]+}}, [[VDST]]
; SI: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, s[0-9]+}}, [[VDST]]
define void @rotl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) {
entry:
%0 = shl i32 %x, %y

View File

@ -1,8 +1,8 @@
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
;CHECK-LABEL: @main
;CHECK: V_CMP_O_F32_e32 vcc, {{[sv][0-9]+, v[0-9]+}}
; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
; CHECK-LABEL: @main
; CHECK: V_CMP_O_F32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
; CHECK-NEXT: V_CNDMASK_B32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
define void @main(float %p) {
main_body:
%c = fcmp oeq float %p, %p

View File

@ -1,8 +1,8 @@
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
;CHECK-LABEL: @main
;CHECK: V_CMP_U_F32_e32 vcc, {{[sv][0-9]+, v[0-9]+}}
; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
; CHECK-LABEL: @main
; CHECK: V_CMP_U_F32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
; CHECK-NEXT: V_CNDMASK_B32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
define void @main(float %p) {
main_body:
%c = fcmp une float %p, %p