mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-07-27 12:26:08 +00:00
R600: Move fabs/fneg/sel folding logic into PostProcessIsel
This move makes possible to correctly handle multiples instructions from a single pattern. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190643 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -201,92 +201,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
|||||||
}
|
}
|
||||||
switch (Opc) {
|
switch (Opc) {
|
||||||
default: break;
|
default: break;
|
||||||
case AMDGPUISD::CONST_ADDRESS: {
|
|
||||||
for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I);
|
|
||||||
I != SDNode::use_end(); I = Next) {
|
|
||||||
Next = llvm::next(I);
|
|
||||||
if (!I->isMachineOpcode()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
unsigned Opcode = I->getMachineOpcode();
|
|
||||||
bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
|
|
||||||
int SrcIdx = I.getOperandNo();
|
|
||||||
int SelIdx;
|
|
||||||
// Unlike MachineInstrs, SDNodes do not have results in their operand
|
|
||||||
// list, so we need to increment the SrcIdx, since
|
|
||||||
// R600InstrInfo::getOperandIdx is based on the MachineInstr indices.
|
|
||||||
if (HasDst) {
|
|
||||||
SrcIdx++;
|
|
||||||
}
|
|
||||||
|
|
||||||
SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx);
|
|
||||||
if (SelIdx < 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue CstOffset;
|
|
||||||
if (N->getValueType(0).isVector() ||
|
|
||||||
!SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// Gather constants values
|
|
||||||
int SrcIndices[] = {
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
|
|
||||||
};
|
|
||||||
std::vector<unsigned> Consts;
|
|
||||||
for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
|
|
||||||
int OtherSrcIdx = SrcIndices[i];
|
|
||||||
int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
|
|
||||||
if (OtherSrcIdx < 0 || OtherSelIdx < 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (HasDst) {
|
|
||||||
OtherSrcIdx--;
|
|
||||||
OtherSelIdx--;
|
|
||||||
}
|
|
||||||
if (RegisterSDNode *Reg =
|
|
||||||
dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) {
|
|
||||||
if (Reg->getReg() == AMDGPU::ALU_CONST) {
|
|
||||||
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx));
|
|
||||||
Consts.push_back(Cst->getZExtValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
|
|
||||||
Consts.push_back(Cst->getZExtValue());
|
|
||||||
if (!TII->fitsConstReadLimitations(Consts))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// Convert back to SDNode indices
|
|
||||||
if (HasDst) {
|
|
||||||
SrcIdx--;
|
|
||||||
SelIdx--;
|
|
||||||
}
|
|
||||||
std::vector<SDValue> Ops;
|
|
||||||
for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
|
|
||||||
if (i == SrcIdx) {
|
|
||||||
Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32));
|
|
||||||
} else if (i == SelIdx) {
|
|
||||||
Ops.push_back(CstOffset);
|
|
||||||
} else {
|
|
||||||
Ops.push_back(I->getOperand(i));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size());
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case ISD::BUILD_VECTOR: {
|
case ISD::BUILD_VECTOR: {
|
||||||
unsigned RegClassID;
|
unsigned RegClassID;
|
||||||
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
|
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
|
||||||
@@ -508,38 +422,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
|||||||
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||||
const R600InstrInfo *TII =
|
const R600InstrInfo *TII =
|
||||||
static_cast<const R600InstrInfo*>(TM.getInstrInfo());
|
static_cast<const R600InstrInfo*>(TM.getInstrInfo());
|
||||||
if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) {
|
|
||||||
bool IsModified = false;
|
|
||||||
do {
|
|
||||||
std::vector<SDValue> Ops;
|
|
||||||
for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
|
|
||||||
I != E; ++I)
|
|
||||||
Ops.push_back(*I);
|
|
||||||
IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops);
|
|
||||||
if (IsModified) {
|
|
||||||
Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
|
|
||||||
}
|
|
||||||
} while (IsModified);
|
|
||||||
|
|
||||||
}
|
|
||||||
if (Result && Result->isMachineOpcode() &&
|
if (Result && Result->isMachineOpcode() &&
|
||||||
!(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
|
!(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
|
||||||
&& TII->hasInstrModifiers(Result->getMachineOpcode())) {
|
&& TII->hasInstrModifiers(Result->getMachineOpcode())) {
|
||||||
// Fold FNEG/FABS
|
|
||||||
// TODO: Isel can generate multiple MachineInst, we need to recursively
|
|
||||||
// parse Result
|
|
||||||
bool IsModified = false;
|
|
||||||
do {
|
|
||||||
std::vector<SDValue> Ops;
|
|
||||||
for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
|
|
||||||
I != E; ++I)
|
|
||||||
Ops.push_back(*I);
|
|
||||||
IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
|
|
||||||
if (IsModified) {
|
|
||||||
Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
|
|
||||||
}
|
|
||||||
} while (IsModified);
|
|
||||||
|
|
||||||
// If node has a single use which is CLAMP_R600, folds it
|
// If node has a single use which is CLAMP_R600, folds it
|
||||||
if (Result->hasOneUse() && Result->isMachineOpcode()) {
|
if (Result->hasOneUse() && Result->isMachineOpcode()) {
|
||||||
SDNode *PotentialClamp = *Result->use_begin();
|
SDNode *PotentialClamp = *Result->use_begin();
|
||||||
@@ -564,120 +449,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
|||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg,
|
|
||||||
SDValue &Abs, const R600InstrInfo *TII) {
|
|
||||||
switch (Src.getOpcode()) {
|
|
||||||
case ISD::FNEG:
|
|
||||||
Src = Src.getOperand(0);
|
|
||||||
Neg = CurDAG->getTargetConstant(1, MVT::i32);
|
|
||||||
return true;
|
|
||||||
case ISD::FABS:
|
|
||||||
if (!Abs.getNode())
|
|
||||||
return false;
|
|
||||||
Src = Src.getOperand(0);
|
|
||||||
Abs = CurDAG->getTargetConstant(1, MVT::i32);
|
|
||||||
return true;
|
|
||||||
case ISD::BITCAST:
|
|
||||||
Src = Src.getOperand(0);
|
|
||||||
return true;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
|
|
||||||
const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
|
|
||||||
int OperandIdx[] = {
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
|
|
||||||
};
|
|
||||||
int SelIdx[] = {
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_sel)
|
|
||||||
};
|
|
||||||
int NegIdx[] = {
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
|
|
||||||
};
|
|
||||||
int AbsIdx[] = {
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
|
|
||||||
-1
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < 3; i++) {
|
|
||||||
if (OperandIdx[i] < 0)
|
|
||||||
return false;
|
|
||||||
SDValue &Src = Ops[OperandIdx[i] - 1];
|
|
||||||
SDValue &Sel = Ops[SelIdx[i] - 1];
|
|
||||||
SDValue &Neg = Ops[NegIdx[i] - 1];
|
|
||||||
SDValue FakeAbs;
|
|
||||||
SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
|
|
||||||
if (FoldOperand(Src, Sel, Neg, Abs, TII))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode,
|
|
||||||
const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
|
|
||||||
int OperandIdx[] = {
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
|
|
||||||
};
|
|
||||||
int SelIdx[] = {
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_X),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Y),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Z),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_W),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_X),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Y),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Z),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_W)
|
|
||||||
};
|
|
||||||
int NegIdx[] = {
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
|
|
||||||
};
|
|
||||||
int AbsIdx[] = {
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
|
|
||||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
|
|
||||||
};
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < 8; i++) {
|
|
||||||
if (OperandIdx[i] < 0)
|
|
||||||
return false;
|
|
||||||
SDValue &Src = Ops[OperandIdx[i] - 1];
|
|
||||||
SDValue &Sel = Ops[SelIdx[i] - 1];
|
|
||||||
SDValue &Neg = Ops[NegIdx[i] - 1];
|
|
||||||
SDValue &Abs = Ops[AbsIdx[i] - 1];
|
|
||||||
if (FoldOperand(Src, Sel, Neg, Abs, TII))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
|
bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
|
||||||
if (!ptr) {
|
if (!ptr) {
|
||||||
@@ -890,26 +661,27 @@ bool AMDGPUDAGToDAGISel::SelectU24(SDValue Op, SDValue &U24) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
|
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
|
||||||
|
|
||||||
if (Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Go over all selected nodes and try to fold them a bit more
|
|
||||||
const AMDGPUTargetLowering& Lowering =
|
const AMDGPUTargetLowering& Lowering =
|
||||||
(*(const AMDGPUTargetLowering*)getTargetLowering());
|
(*(const AMDGPUTargetLowering*)getTargetLowering());
|
||||||
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
|
bool IsModified = false;
|
||||||
E = CurDAG->allnodes_end(); I != E; ++I) {
|
do {
|
||||||
|
IsModified = false;
|
||||||
|
// Go over all selected nodes and try to fold them a bit more
|
||||||
|
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
|
||||||
|
E = CurDAG->allnodes_end(); I != E; ++I) {
|
||||||
|
|
||||||
SDNode *Node = I;
|
SDNode *Node = I;
|
||||||
|
|
||||||
MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
|
MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
|
||||||
if (!MachineNode)
|
if (!MachineNode)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
|
SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
|
||||||
if (ResNode != Node) {
|
if (ResNode != Node) {
|
||||||
ReplaceUses(Node, ResNode);
|
ReplaceUses(Node, ResNode);
|
||||||
|
IsModified = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
CurDAG->RemoveDeadNodes();
|
||||||
|
} while (IsModified);
|
||||||
}
|
}
|
||||||
|
@@ -1629,3 +1629,182 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
|
|||||||
}
|
}
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
|
||||||
|
SDValue &Abs, SDValue &Sel, SelectionDAG &DAG) {
|
||||||
|
const R600InstrInfo *TII =
|
||||||
|
static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
|
||||||
|
if (!Src.isMachineOpcode())
|
||||||
|
return false;
|
||||||
|
switch (Src.getMachineOpcode()) {
|
||||||
|
case AMDGPU::FNEG_R600:
|
||||||
|
if (!Neg.getNode())
|
||||||
|
return false;
|
||||||
|
Src = Src.getOperand(0);
|
||||||
|
Neg = DAG.getTargetConstant(1, MVT::i32);
|
||||||
|
return true;
|
||||||
|
case AMDGPU::FABS_R600:
|
||||||
|
if (!Abs.getNode())
|
||||||
|
return false;
|
||||||
|
Src = Src.getOperand(0);
|
||||||
|
Abs = DAG.getTargetConstant(1, MVT::i32);
|
||||||
|
return true;
|
||||||
|
case AMDGPU::CONST_COPY: {
|
||||||
|
unsigned Opcode = ParentNode->getMachineOpcode();
|
||||||
|
bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
|
||||||
|
|
||||||
|
if (!Sel.getNode())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
SDValue CstOffset = Src.getOperand(0);
|
||||||
|
if (ParentNode->getValueType(0).isVector())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Gather constants values
|
||||||
|
int SrcIndices[] = {
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
|
||||||
|
};
|
||||||
|
std::vector<unsigned> Consts;
|
||||||
|
for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
|
||||||
|
int OtherSrcIdx = SrcIndices[i];
|
||||||
|
int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
|
||||||
|
if (OtherSrcIdx < 0 || OtherSelIdx < 0)
|
||||||
|
continue;
|
||||||
|
if (HasDst) {
|
||||||
|
OtherSrcIdx--;
|
||||||
|
OtherSelIdx--;
|
||||||
|
}
|
||||||
|
if (RegisterSDNode *Reg =
|
||||||
|
dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
|
||||||
|
if (Reg->getReg() == AMDGPU::ALU_CONST) {
|
||||||
|
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
|
||||||
|
ParentNode->getOperand(OtherSelIdx));
|
||||||
|
Consts.push_back(Cst->getZExtValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
|
||||||
|
Consts.push_back(Cst->getZExtValue());
|
||||||
|
if (!TII->fitsConstReadLimitations(Consts)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Sel = CstOffset;
|
||||||
|
Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// \brief Fold the instructions after selecting them
|
||||||
|
SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
|
||||||
|
SelectionDAG &DAG) const {
|
||||||
|
const R600InstrInfo *TII =
|
||||||
|
static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
|
||||||
|
if (!Node->isMachineOpcode())
|
||||||
|
return Node;
|
||||||
|
unsigned Opcode = Node->getMachineOpcode();
|
||||||
|
SDValue FakeOp;
|
||||||
|
|
||||||
|
std::vector<SDValue> Ops;
|
||||||
|
for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
|
||||||
|
I != E; ++I)
|
||||||
|
Ops.push_back(*I);
|
||||||
|
|
||||||
|
if (Opcode == AMDGPU::DOT_4) {
|
||||||
|
int OperandIdx[] = {
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
|
||||||
|
};
|
||||||
|
int NegIdx[] = {
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
|
||||||
|
};
|
||||||
|
int AbsIdx[] = {
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
|
||||||
|
};
|
||||||
|
for (unsigned i = 0; i < 8; i++) {
|
||||||
|
if (OperandIdx[i] < 0)
|
||||||
|
return Node;
|
||||||
|
SDValue &Src = Ops[OperandIdx[i] - 1];
|
||||||
|
SDValue &Neg = Ops[NegIdx[i] - 1];
|
||||||
|
SDValue &Abs = Ops[AbsIdx[i] - 1];
|
||||||
|
bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
|
||||||
|
int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
|
||||||
|
if (HasDst)
|
||||||
|
SelIdx--;
|
||||||
|
SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
|
||||||
|
if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
|
||||||
|
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (!TII->hasInstrModifiers(Opcode))
|
||||||
|
return Node;
|
||||||
|
int OperandIdx[] = {
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
|
||||||
|
};
|
||||||
|
int NegIdx[] = {
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
|
||||||
|
};
|
||||||
|
int AbsIdx[] = {
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
|
||||||
|
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
|
||||||
|
-1
|
||||||
|
};
|
||||||
|
for (unsigned i = 0; i < 3; i++) {
|
||||||
|
if (OperandIdx[i] < 0)
|
||||||
|
return Node;
|
||||||
|
SDValue &Src = Ops[OperandIdx[i] - 1];
|
||||||
|
SDValue &Neg = Ops[NegIdx[i] - 1];
|
||||||
|
SDValue FakeAbs;
|
||||||
|
SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
|
||||||
|
bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
|
||||||
|
int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
|
||||||
|
if (HasDst)
|
||||||
|
SelIdx--;
|
||||||
|
SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
|
||||||
|
if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
|
||||||
|
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Node;
|
||||||
|
}
|
||||||
|
@@ -67,6 +67,7 @@ private:
|
|||||||
void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
|
void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
|
||||||
unsigned &Channel, unsigned &PtrIncr) const;
|
unsigned &Channel, unsigned &PtrIncr) const;
|
||||||
bool isZero(SDValue Op) const;
|
bool isZero(SDValue Op) const;
|
||||||
|
virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // End namespace llvm;
|
} // End namespace llvm;
|
||||||
|
18
test/CodeGen/R600/complex-folding.ll
Normal file
18
test/CodeGen/R600/complex-folding.ll
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||||
|
|
||||||
|
; CHECK: @main
|
||||||
|
; CHECK-NOT: MOV
|
||||||
|
define void @main() {
|
||||||
|
entry:
|
||||||
|
%0 = call float @llvm.R600.load.input(i32 0)
|
||||||
|
%1 = call float @fabs(float %0)
|
||||||
|
%2 = fptoui float %1 to i32
|
||||||
|
%3 = bitcast i32 %2 to float
|
||||||
|
%4 = insertelement <4 x float> undef, float %3, i32 0
|
||||||
|
call void @llvm.R600.store.swizzle(<4 x float> %4, i32 0, i32 0)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare float @llvm.R600.load.input(i32) readnone
|
||||||
|
declare float @fabs(float ) readnone
|
||||||
|
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
Reference in New Issue
Block a user