mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-13 09:33:50 +00:00
R600: Relax some vector constraints on Dot4.
Dot4 now uses 8 scalar operands instead of 2 vectors one which allows register coalescer to remove some unneeded COPY. This patch also defines some structures/functions that can be used to handle every vector instructions (CUBE, Cayman special instructions...) in a similar fashion. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182126 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d3293b49f9
commit
4ed9917147
@ -126,6 +126,7 @@ enum {
|
||||
SMIN,
|
||||
UMIN,
|
||||
URECIP,
|
||||
DOT4,
|
||||
TEXTURE_FETCH,
|
||||
EXPORT,
|
||||
CONST_ADDRESS,
|
||||
|
@ -98,6 +98,80 @@ namespace R600Operands {
|
||||
{0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17,18}
|
||||
};
|
||||
|
||||
enum VecOps {
|
||||
UPDATE_EXEC_MASK_X,
|
||||
UPDATE_PREDICATE_X,
|
||||
WRITE_X,
|
||||
OMOD_X,
|
||||
DST_REL_X,
|
||||
CLAMP_X,
|
||||
SRC0_X,
|
||||
SRC0_NEG_X,
|
||||
SRC0_REL_X,
|
||||
SRC0_ABS_X,
|
||||
SRC0_SEL_X,
|
||||
SRC1_X,
|
||||
SRC1_NEG_X,
|
||||
SRC1_REL_X,
|
||||
SRC1_ABS_X,
|
||||
SRC1_SEL_X,
|
||||
PRED_SEL_X,
|
||||
UPDATE_EXEC_MASK_Y,
|
||||
UPDATE_PREDICATE_Y,
|
||||
WRITE_Y,
|
||||
OMOD_Y,
|
||||
DST_REL_Y,
|
||||
CLAMP_Y,
|
||||
SRC0_Y,
|
||||
SRC0_NEG_Y,
|
||||
SRC0_REL_Y,
|
||||
SRC0_ABS_Y,
|
||||
SRC0_SEL_Y,
|
||||
SRC1_Y,
|
||||
SRC1_NEG_Y,
|
||||
SRC1_REL_Y,
|
||||
SRC1_ABS_Y,
|
||||
SRC1_SEL_Y,
|
||||
PRED_SEL_Y,
|
||||
UPDATE_EXEC_MASK_Z,
|
||||
UPDATE_PREDICATE_Z,
|
||||
WRITE_Z,
|
||||
OMOD_Z,
|
||||
DST_REL_Z,
|
||||
CLAMP_Z,
|
||||
SRC0_Z,
|
||||
SRC0_NEG_Z,
|
||||
SRC0_REL_Z,
|
||||
SRC0_ABS_Z,
|
||||
SRC0_SEL_Z,
|
||||
SRC1_Z,
|
||||
SRC1_NEG_Z,
|
||||
SRC1_REL_Z,
|
||||
SRC1_ABS_Z,
|
||||
SRC1_SEL_Z,
|
||||
PRED_SEL_Z,
|
||||
UPDATE_EXEC_MASK_W,
|
||||
UPDATE_PREDICATE_W,
|
||||
WRITE_W,
|
||||
OMOD_W,
|
||||
DST_REL_W,
|
||||
CLAMP_W,
|
||||
SRC0_W,
|
||||
SRC0_NEG_W,
|
||||
SRC0_REL_W,
|
||||
SRC0_ABS_W,
|
||||
SRC0_SEL_W,
|
||||
SRC1_W,
|
||||
SRC1_NEG_W,
|
||||
SRC1_REL_W,
|
||||
SRC1_ABS_W,
|
||||
SRC1_SEL_W,
|
||||
PRED_SEL_W,
|
||||
IMM_0,
|
||||
IMM_1,
|
||||
VEC_COUNT
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -36,8 +36,7 @@ private:
|
||||
case AMDGPU::INTERP_PAIR_XY:
|
||||
case AMDGPU::INTERP_PAIR_ZW:
|
||||
case AMDGPU::INTERP_VEC_LOAD:
|
||||
case AMDGPU::DOT4_eg_pseudo:
|
||||
case AMDGPU::DOT4_r600_pseudo:
|
||||
case AMDGPU::DOT_4:
|
||||
return 4;
|
||||
case AMDGPU::KILL:
|
||||
return 0;
|
||||
@ -71,8 +70,7 @@ private:
|
||||
case AMDGPU::INTERP_PAIR_ZW:
|
||||
case AMDGPU::INTERP_VEC_LOAD:
|
||||
case AMDGPU::COPY:
|
||||
case AMDGPU::DOT4_eg_pseudo:
|
||||
case AMDGPU::DOT4_r600_pseudo:
|
||||
case AMDGPU::DOT_4:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -182,6 +182,41 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
MI.eraseFromParent();
|
||||
continue;
|
||||
}
|
||||
case AMDGPU::DOT_4: {
|
||||
|
||||
const R600RegisterInfo &TRI = TII->getRegisterInfo();
|
||||
|
||||
unsigned DstReg = MI.getOperand(0).getReg();
|
||||
unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
|
||||
|
||||
for (unsigned Chan = 0; Chan < 4; ++Chan) {
|
||||
bool Mask = (Chan != TRI.getHWRegChan(DstReg));
|
||||
unsigned SubDstReg =
|
||||
AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
|
||||
MachineInstr *BMI =
|
||||
TII->buildSlotOfVectorInstruction(MBB, &MI, Chan, SubDstReg);
|
||||
if (Chan > 0) {
|
||||
BMI->bundleWithPred();
|
||||
}
|
||||
if (Mask) {
|
||||
TII->addFlag(BMI, 0, MO_FLAG_MASK);
|
||||
}
|
||||
if (Chan != 3)
|
||||
TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
|
||||
unsigned Opcode = BMI->getOpcode();
|
||||
// While not strictly necessary from hw point of view, we force
|
||||
// all src operands of a dot4 inst to belong to the same slot.
|
||||
unsigned Src0 = BMI->getOperand(
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC0))
|
||||
.getReg();
|
||||
unsigned Src1 = BMI->getOperand(
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC1))
|
||||
.getReg();
|
||||
assert(TRI.getHWRegChan(Src0) == TRI.getHWRegChan(Src1));
|
||||
}
|
||||
MI.eraseFromParent();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
bool IsReduction = TII->isReductionOp(MI.getOpcode());
|
||||
@ -268,12 +303,6 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
case AMDGPU::CUBE_eg_pseudo:
|
||||
Opcode = AMDGPU::CUBE_eg_real;
|
||||
break;
|
||||
case AMDGPU::DOT4_r600_pseudo:
|
||||
Opcode = AMDGPU::DOT4_r600_real;
|
||||
break;
|
||||
case AMDGPU::DOT4_eg_pseudo:
|
||||
Opcode = AMDGPU::DOT4_eg_real;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -631,6 +631,27 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
||||
};
|
||||
return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
|
||||
}
|
||||
case AMDGPUIntrinsic::AMDGPU_dp4: {
|
||||
SDValue Args[8] = {
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
|
||||
DAG.getConstant(0, MVT::i32)),
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
|
||||
DAG.getConstant(0, MVT::i32)),
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
|
||||
DAG.getConstant(1, MVT::i32)),
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
|
||||
DAG.getConstant(1, MVT::i32)),
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
|
||||
DAG.getConstant(2, MVT::i32)),
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
|
||||
DAG.getConstant(2, MVT::i32)),
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
|
||||
DAG.getConstant(3, MVT::i32)),
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
|
||||
DAG.getConstant(3, MVT::i32))
|
||||
};
|
||||
return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
|
||||
}
|
||||
|
||||
case r600_read_ngroups_x:
|
||||
return LowerImplicitParameter(DAG, VT, DL, 0);
|
||||
|
@ -116,9 +116,6 @@ bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
|
||||
bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
|
||||
switch(Opcode) {
|
||||
default: return false;
|
||||
case AMDGPU::DOT4_r600_pseudo:
|
||||
case AMDGPU::DOT4_eg_pseudo:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -866,6 +863,95 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB
|
||||
return MIB;
|
||||
}
|
||||
|
||||
#define OPERAND_CASE(Label) \
|
||||
case Label: { \
|
||||
static const R600Operands::VecOps Ops[] = \
|
||||
{ \
|
||||
Label##_X, \
|
||||
Label##_Y, \
|
||||
Label##_Z, \
|
||||
Label##_W \
|
||||
}; \
|
||||
return Ops[Slot]; \
|
||||
}
|
||||
|
||||
static R600Operands::VecOps
|
||||
getSlotedOps(R600Operands::Ops Op, unsigned Slot) {
|
||||
switch (Op) {
|
||||
OPERAND_CASE(R600Operands::UPDATE_EXEC_MASK)
|
||||
OPERAND_CASE(R600Operands::UPDATE_PREDICATE)
|
||||
OPERAND_CASE(R600Operands::WRITE)
|
||||
OPERAND_CASE(R600Operands::OMOD)
|
||||
OPERAND_CASE(R600Operands::DST_REL)
|
||||
OPERAND_CASE(R600Operands::CLAMP)
|
||||
OPERAND_CASE(R600Operands::SRC0)
|
||||
OPERAND_CASE(R600Operands::SRC0_NEG)
|
||||
OPERAND_CASE(R600Operands::SRC0_REL)
|
||||
OPERAND_CASE(R600Operands::SRC0_ABS)
|
||||
OPERAND_CASE(R600Operands::SRC0_SEL)
|
||||
OPERAND_CASE(R600Operands::SRC1)
|
||||
OPERAND_CASE(R600Operands::SRC1_NEG)
|
||||
OPERAND_CASE(R600Operands::SRC1_REL)
|
||||
OPERAND_CASE(R600Operands::SRC1_ABS)
|
||||
OPERAND_CASE(R600Operands::SRC1_SEL)
|
||||
OPERAND_CASE(R600Operands::PRED_SEL)
|
||||
default:
|
||||
llvm_unreachable("Wrong Operand");
|
||||
}
|
||||
}
|
||||
|
||||
#undef OPERAND_CASE
|
||||
|
||||
static int
|
||||
getVecOperandIdx(R600Operands::VecOps Op) {
|
||||
return 1 + Op;
|
||||
}
|
||||
|
||||
|
||||
MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
|
||||
MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
|
||||
const {
|
||||
assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
|
||||
unsigned Opcode;
|
||||
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
|
||||
Opcode = AMDGPU::DOT4_r600;
|
||||
else
|
||||
Opcode = AMDGPU::DOT4_eg;
|
||||
MachineBasicBlock::iterator I = MI;
|
||||
MachineOperand &Src0 = MI->getOperand(
|
||||
getVecOperandIdx(getSlotedOps(R600Operands::SRC0, Slot)));
|
||||
MachineOperand &Src1 = MI->getOperand(
|
||||
getVecOperandIdx(getSlotedOps(R600Operands::SRC1, Slot)));
|
||||
MachineInstr *MIB = buildDefaultInstruction(
|
||||
MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
|
||||
static const R600Operands::Ops Operands[14] = {
|
||||
R600Operands::UPDATE_EXEC_MASK,
|
||||
R600Operands::UPDATE_PREDICATE,
|
||||
R600Operands::WRITE,
|
||||
R600Operands::OMOD,
|
||||
R600Operands::DST_REL,
|
||||
R600Operands::CLAMP,
|
||||
R600Operands::SRC0_NEG,
|
||||
R600Operands::SRC0_REL,
|
||||
R600Operands::SRC0_ABS,
|
||||
R600Operands::SRC0_SEL,
|
||||
R600Operands::SRC1_NEG,
|
||||
R600Operands::SRC1_REL,
|
||||
R600Operands::SRC1_ABS,
|
||||
R600Operands::SRC1_SEL,
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < 14; i++) {
|
||||
MachineOperand &MO = MI->getOperand(
|
||||
getVecOperandIdx(getSlotedOps(Operands[i], Slot)));
|
||||
assert (MO.isImm());
|
||||
setImmOperand(MIB, Operands[i], MO.getImm());
|
||||
}
|
||||
MIB->getOperand(20).setImm(0);
|
||||
return MIB;
|
||||
}
|
||||
|
||||
MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned DstReg,
|
||||
|
@ -198,6 +198,11 @@ namespace llvm {
|
||||
unsigned Src0Reg,
|
||||
unsigned Src1Reg = 0) const;
|
||||
|
||||
MachineInstr *buildSlotOfVectorInstruction(MachineBasicBlock &MBB,
|
||||
MachineInstr *MI,
|
||||
unsigned Slot,
|
||||
unsigned DstReg) const;
|
||||
|
||||
MachineInstr *buildMovImm(MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned DstReg,
|
||||
|
@ -593,6 +593,13 @@ def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
|
||||
[SDNPVariadic]
|
||||
>;
|
||||
|
||||
def DOT4 : SDNode<"AMDGPUISD::DOT4",
|
||||
SDTypeProfile<1, 8, [SDTCisFP<0>, SDTCisVT<1, f32>, SDTCisVT<2, f32>,
|
||||
SDTCisVT<3, f32>, SDTCisVT<4, f32>, SDTCisVT<5, f32>,
|
||||
SDTCisVT<6, f32>, SDTCisVT<7, f32>, SDTCisVT<8, f32>]>,
|
||||
[]
|
||||
>;
|
||||
|
||||
def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>;
|
||||
|
||||
def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>;
|
||||
@ -1229,17 +1236,49 @@ class CNDGE_Common <bits<5> inst> : R600_3OP <
|
||||
[(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))]
|
||||
>;
|
||||
|
||||
multiclass DOT4_Common <bits<11> inst> {
|
||||
|
||||
def _pseudo : R600_REDUCTION <inst,
|
||||
(ins R600_Reg128:$src0, R600_Reg128:$src1),
|
||||
"DOT4 $dst $src0, $src1",
|
||||
[(set f32:$dst, (int_AMDGPU_dp4 v4f32:$src0, v4f32:$src1))]
|
||||
>;
|
||||
|
||||
def _real : R600_2OP <inst, "DOT4", []>;
|
||||
let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
|
||||
class R600_VEC2OP<list<dag> pattern> : InstR600 <(outs R600_Reg32:$dst), (ins
|
||||
// Slot X
|
||||
UEM:$update_exec_mask_X, UP:$update_pred_X, WRITE:$write_X,
|
||||
OMOD:$omod_X, REL:$dst_rel_X, CLAMP:$clamp_X,
|
||||
R600_TReg32_X:$src0_X, NEG:$src0_neg_X, REL:$src0_rel_X, ABS:$src0_abs_X, SEL:$src0_sel_X,
|
||||
R600_TReg32_X:$src1_X, NEG:$src1_neg_X, REL:$src1_rel_X, ABS:$src1_abs_X, SEL:$src1_sel_X,
|
||||
R600_Pred:$pred_sel_X,
|
||||
// Slot Y
|
||||
UEM:$update_exec_mask_Y, UP:$update_pred_Y, WRITE:$write_Y,
|
||||
OMOD:$omod_Y, REL:$dst_rel_Y, CLAMP:$clamp_Y,
|
||||
R600_TReg32_Y:$src0_Y, NEG:$src0_neg_Y, REL:$src0_rel_Y, ABS:$src0_abs_Y, SEL:$src0_sel_Y,
|
||||
R600_TReg32_Y:$src1_Y, NEG:$src1_neg_Y, REL:$src1_rel_Y, ABS:$src1_abs_Y, SEL:$src1_sel_Y,
|
||||
R600_Pred:$pred_sel_Y,
|
||||
// Slot Z
|
||||
UEM:$update_exec_mask_Z, UP:$update_pred_Z, WRITE:$write_Z,
|
||||
OMOD:$omod_Z, REL:$dst_rel_Z, CLAMP:$clamp_Z,
|
||||
R600_TReg32_Z:$src0_Z, NEG:$src0_neg_Z, REL:$src0_rel_Z, ABS:$src0_abs_Z, SEL:$src0_sel_Z,
|
||||
R600_TReg32_Z:$src1_Z, NEG:$src1_neg_Z, REL:$src1_rel_Z, ABS:$src1_abs_Z, SEL:$src1_sel_Z,
|
||||
R600_Pred:$pred_sel_Z,
|
||||
// Slot W
|
||||
UEM:$update_exec_mask_W, UP:$update_pred_W, WRITE:$write_W,
|
||||
OMOD:$omod_W, REL:$dst_rel_W, CLAMP:$clamp_W,
|
||||
R600_TReg32_W:$src0_W, NEG:$src0_neg_W, REL:$src0_rel_W, ABS:$src0_abs_W, SEL:$src0_sel_W,
|
||||
R600_TReg32_W:$src1_W, NEG:$src1_neg_W, REL:$src1_rel_W, ABS:$src1_abs_W, SEL:$src1_sel_W,
|
||||
R600_Pred:$pred_sel_W,
|
||||
LITERAL:$literal0, LITERAL:$literal1),
|
||||
"",
|
||||
pattern,
|
||||
AnyALU> {}
|
||||
}
|
||||
|
||||
def DOT_4 : R600_VEC2OP<[(set R600_Reg32:$dst, (DOT4
|
||||
R600_TReg32_X:$src0_X, R600_TReg32_X:$src1_X,
|
||||
R600_TReg32_Y:$src0_Y, R600_TReg32_Y:$src1_Y,
|
||||
R600_TReg32_Z:$src0_Z, R600_TReg32_Z:$src1_Z,
|
||||
R600_TReg32_W:$src0_W, R600_TReg32_W:$src1_W))]>;
|
||||
|
||||
|
||||
class DOT4_Common <bits<11> inst> : R600_2OP <inst, "DOT4", []>;
|
||||
|
||||
|
||||
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
|
||||
multiclass CUBE_Common <bits<11> inst> {
|
||||
|
||||
@ -1412,7 +1451,7 @@ let Predicates = [isR600] in {
|
||||
def CNDE_r600 : CNDE_Common<0x18>;
|
||||
def CNDGT_r600 : CNDGT_Common<0x19>;
|
||||
def CNDGE_r600 : CNDGE_Common<0x1A>;
|
||||
defm DOT4_r600 : DOT4_Common<0x50>;
|
||||
def DOT4_r600 : DOT4_Common<0x50>;
|
||||
defm CUBE_r600 : CUBE_Common<0x52>;
|
||||
def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
|
||||
def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
|
||||
@ -1611,7 +1650,7 @@ let Predicates = [isEGorCayman] in {
|
||||
def CNDGE_eg : CNDGE_Common<0x1B>;
|
||||
def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
|
||||
def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
|
||||
defm DOT4_eg : DOT4_Common<0xBE>;
|
||||
def DOT4_eg : DOT4_Common<0xBE>;
|
||||
defm CUBE_eg : CUBE_Common<0xC0>;
|
||||
|
||||
let hasSideEffects = 1 in {
|
||||
|
@ -185,6 +185,7 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
|
||||
case AMDGPU::INTERP_PAIR_XY:
|
||||
case AMDGPU::INTERP_PAIR_ZW:
|
||||
case AMDGPU::INTERP_VEC_LOAD:
|
||||
case AMDGPU::DOT_4:
|
||||
return AluT_XYZW;
|
||||
case AMDGPU::COPY:
|
||||
if (TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) {
|
||||
@ -252,8 +253,7 @@ int R600SchedStrategy::getInstKind(SUnit* SU) {
|
||||
case AMDGPU::INTERP_PAIR_XY:
|
||||
case AMDGPU::INTERP_PAIR_ZW:
|
||||
case AMDGPU::INTERP_VEC_LOAD:
|
||||
case AMDGPU::DOT4_eg_pseudo:
|
||||
case AMDGPU::DOT4_r600_pseudo:
|
||||
case AMDGPU::DOT_4:
|
||||
return IDAlu;
|
||||
case AMDGPU::TEX_VTX_CONSTBUF:
|
||||
case AMDGPU::TEX_VTX_TEXBUF:
|
||||
|
@ -86,7 +86,8 @@ private:
|
||||
if (BI->getOperand(OperandIdx).getImm() == 0)
|
||||
continue;
|
||||
unsigned Dst = BI->getOperand(0).getReg();
|
||||
if (BI->getOpcode() == AMDGPU::DOT4_r600_real) {
|
||||
if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
|
||||
BI->getOpcode() == AMDGPU::DOT4_eg) {
|
||||
Result[Dst] = AMDGPU::PV_X;
|
||||
continue;
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc < %s -march=r600 | FileCheck %s
|
||||
|
||||
;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
|
||||
;CHECK-NEXT: CNDGE T{{[0-9].[XYZW]}}, PV.x
|
||||
;CHECK: CNDGE * T{{[0-9].[XYZW]}}, PV.x
|
||||
|
||||
define void @main() #0 {
|
||||
main_body:
|
||||
|
Loading…
x
Reference in New Issue
Block a user