mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-13 22:24:07 +00:00
Update GATHER instructions to support 2 read-write operands. Patch from myself and Manman Ren.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160110 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -572,8 +572,14 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
|
|||||||
// Classify VEX_B, VEX_4V, VEX_R, VEX_X
|
// Classify VEX_B, VEX_4V, VEX_R, VEX_X
|
||||||
unsigned NumOps = Desc.getNumOperands();
|
unsigned NumOps = Desc.getNumOperands();
|
||||||
unsigned CurOp = 0;
|
unsigned CurOp = 0;
|
||||||
if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1)
|
if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0)
|
||||||
++CurOp;
|
++CurOp;
|
||||||
|
else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) {
|
||||||
|
assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
|
||||||
|
// Special case for GATHER with 2 TIED_TO operands
|
||||||
|
// Skip the first 2 operands: dst, mask_wb
|
||||||
|
CurOp += 2;
|
||||||
|
}
|
||||||
|
|
||||||
switch (TSFlags & X86II::FormMask) {
|
switch (TSFlags & X86II::FormMask) {
|
||||||
case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this!");
|
case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this!");
|
||||||
@ -971,11 +977,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
|||||||
// FIXME: This should be handled during MCInst lowering.
|
// FIXME: This should be handled during MCInst lowering.
|
||||||
unsigned NumOps = Desc.getNumOperands();
|
unsigned NumOps = Desc.getNumOperands();
|
||||||
unsigned CurOp = 0;
|
unsigned CurOp = 0;
|
||||||
if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1)
|
if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0)
|
||||||
++CurOp;
|
++CurOp;
|
||||||
else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, MCOI::TIED_TO)== 0)
|
else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) {
|
||||||
// Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
|
assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
|
||||||
--NumOps;
|
// Special case for GATHER with 2 TIED_TO operands
|
||||||
|
// Skip the first 2 operands: dst, mask_wb
|
||||||
|
CurOp += 2;
|
||||||
|
}
|
||||||
|
|
||||||
// Keep track of the current byte being emitted.
|
// Keep track of the current byte being emitted.
|
||||||
unsigned CurByte = 0;
|
unsigned CurByte = 0;
|
||||||
|
@ -935,8 +935,15 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
|
|||||||
// Classify VEX_B, VEX_4V, VEX_R, VEX_X
|
// Classify VEX_B, VEX_4V, VEX_R, VEX_X
|
||||||
unsigned NumOps = Desc->getNumOperands();
|
unsigned NumOps = Desc->getNumOperands();
|
||||||
unsigned CurOp = 0;
|
unsigned CurOp = 0;
|
||||||
if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) != -1)
|
if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0)
|
||||||
++CurOp;
|
++CurOp;
|
||||||
|
else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) {
|
||||||
|
assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
|
||||||
|
// Special case for GATHER with 2 TIED_TO operands
|
||||||
|
// Skip the first 2 operands: dst, mask_wb
|
||||||
|
CurOp += 2;
|
||||||
|
}
|
||||||
|
|
||||||
switch (TSFlags & X86II::FormMask) {
|
switch (TSFlags & X86II::FormMask) {
|
||||||
case X86II::MRMInitReg:
|
case X86II::MRMInitReg:
|
||||||
// Duplicate register.
|
// Duplicate register.
|
||||||
@ -1117,11 +1124,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
|
|||||||
// If this is a two-address instruction, skip one of the register operands.
|
// If this is a two-address instruction, skip one of the register operands.
|
||||||
unsigned NumOps = Desc->getNumOperands();
|
unsigned NumOps = Desc->getNumOperands();
|
||||||
unsigned CurOp = 0;
|
unsigned CurOp = 0;
|
||||||
if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) != -1)
|
if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0)
|
||||||
++CurOp;
|
++CurOp;
|
||||||
else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1,MCOI::TIED_TO)== 0)
|
else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) {
|
||||||
// Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
|
assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
|
||||||
--NumOps;
|
// Special case for GATHER with 2 TIED_TO operands
|
||||||
|
// Skip the first 2 operands: dst, mask_wb
|
||||||
|
CurOp += 2;
|
||||||
|
}
|
||||||
|
|
||||||
uint64_t TSFlags = Desc->TSFlags;
|
uint64_t TSFlags = Desc->TSFlags;
|
||||||
|
|
||||||
|
@ -1966,14 +1966,22 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
|
|||||||
if (!Scale)
|
if (!Scale)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(),
|
||||||
|
MVT::Other);
|
||||||
|
|
||||||
// Memory Operands: Base, Scale, Index, Disp, Segment
|
// Memory Operands: Base, Scale, Index, Disp, Segment
|
||||||
SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32);
|
SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32);
|
||||||
SDValue Segment = CurDAG->getRegister(0, MVT::i32);
|
SDValue Segment = CurDAG->getRegister(0, MVT::i32);
|
||||||
const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx,
|
const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx,
|
||||||
Disp, Segment, VMask, Chain};
|
Disp, Segment, VMask, Chain};
|
||||||
SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
|
SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
|
||||||
VSrc.getValueType(), MVT::Other,
|
VTs, Ops, array_lengthof(Ops));
|
||||||
Ops, array_lengthof(Ops));
|
// Node has 2 outputs: VDst and MVT::Other.
|
||||||
|
// ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other.
|
||||||
|
// We replace VDst of Node with VDst of ResNode, and Other of Node with Other
|
||||||
|
// of ResNode.
|
||||||
|
ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
|
||||||
|
ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2));
|
||||||
return ResNode;
|
return ResNode;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2034,7 +2042,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
|
|||||||
}
|
}
|
||||||
SDNode *RetVal = SelectGather(Node, Opc);
|
SDNode *RetVal = SelectGather(Node, Opc);
|
||||||
if (RetVal)
|
if (RetVal)
|
||||||
return RetVal;
|
// We already called ReplaceUses inside SelectGather.
|
||||||
|
return NULL;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8038,19 +8038,19 @@ defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
|
|||||||
// VGATHER - GATHER Operations
|
// VGATHER - GATHER Operations
|
||||||
multiclass avx2_gather<bits<8> opc, string OpcodeStr,
|
multiclass avx2_gather<bits<8> opc, string OpcodeStr,
|
||||||
RegisterClass RC256, X86MemOperand memop256> {
|
RegisterClass RC256, X86MemOperand memop256> {
|
||||||
def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
|
def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst, VR128:$mask_wb),
|
||||||
(ins VR128:$src1, v128mem:$src2, VR128:$mask),
|
(ins VR128:$src1, v128mem:$src2, VR128:$mask),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
|
"\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
|
||||||
[]>, VEX_4VOp3;
|
[]>, VEX_4VOp3;
|
||||||
def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst),
|
def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst, RC256:$mask_wb),
|
||||||
(ins RC256:$src1, memop256:$src2, RC256:$mask),
|
(ins RC256:$src1, memop256:$src2, RC256:$mask),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
|
"\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
|
||||||
[]>, VEX_4VOp3, VEX_L;
|
[]>, VEX_4VOp3, VEX_L;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Constraints = "$src1 = $dst" in {
|
let Constraints = "$src1 = $dst, $mask = $mask_wb" in {
|
||||||
defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, v128mem>, VEX_W;
|
defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, v128mem>, VEX_W;
|
||||||
defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, v256mem>, VEX_W;
|
defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, v256mem>, VEX_W;
|
||||||
defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, v256mem>;
|
defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, v256mem>;
|
||||||
|
@ -1136,3 +1136,22 @@ define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, i8* %a1,
|
|||||||
}
|
}
|
||||||
declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*,
|
declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*,
|
||||||
<4 x i64>, <4 x i32>, i8) nounwind readonly
|
<4 x i64>, <4 x i32>, i8) nounwind readonly
|
||||||
|
|
||||||
|
; PR13298
|
||||||
|
define <8 x float> @test_gather_mask(<8 x float> %a0, float* %a,
|
||||||
|
<8 x i32> %idx, <8 x float> %mask,
|
||||||
|
float* nocapture %out) {
|
||||||
|
; CHECK: test_gather_mask
|
||||||
|
; CHECK: vmovdqa %ymm2, [[DEST:%.*]]
|
||||||
|
; CHECK: vgatherdps [[DEST]]
|
||||||
|
;; gather with mask
|
||||||
|
%a_i8 = bitcast float* %a to i8*
|
||||||
|
%res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0,
|
||||||
|
i8* %a_i8, <8 x i32> %idx, <8 x float> %mask, i8 4) ;
|
||||||
|
|
||||||
|
;; for debugging, we'll just dump out the mask
|
||||||
|
%out_ptr = bitcast float * %out to <8 x float> *
|
||||||
|
store <8 x float> %mask, <8 x float> * %out_ptr, align 4
|
||||||
|
|
||||||
|
ret <8 x float> %res
|
||||||
|
}
|
||||||
|
@ -277,8 +277,8 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void RecognizableInstr::processInstr(DisassemblerTables &tables,
|
void RecognizableInstr::processInstr(DisassemblerTables &tables,
|
||||||
const CodeGenInstruction &insn,
|
const CodeGenInstruction &insn,
|
||||||
InstrUID uid)
|
InstrUID uid)
|
||||||
{
|
{
|
||||||
// Ignore "asm parser only" instructions.
|
// Ignore "asm parser only" instructions.
|
||||||
if (insn.TheDef->getValueAsBit("isAsmParserOnly"))
|
if (insn.TheDef->getValueAsBit("isAsmParserOnly"))
|
||||||
@ -508,13 +508,13 @@ bool RecognizableInstr::has256BitOperands() const {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RecognizableInstr::handleOperand(
|
void RecognizableInstr::handleOperand(bool optional, unsigned &operandIndex,
|
||||||
bool optional,
|
unsigned &physicalOperandIndex,
|
||||||
unsigned &operandIndex,
|
unsigned &numPhysicalOperands,
|
||||||
unsigned &physicalOperandIndex,
|
const unsigned *operandMapping,
|
||||||
unsigned &numPhysicalOperands,
|
OperandEncoding (*encodingFromString)
|
||||||
unsigned *operandMapping,
|
(const std::string&,
|
||||||
OperandEncoding (*encodingFromString)(const std::string&, bool hasOpSizePrefix)) {
|
bool hasOpSizePrefix)) {
|
||||||
if (optional) {
|
if (optional) {
|
||||||
if (physicalOperandIndex >= numPhysicalOperands)
|
if (physicalOperandIndex >= numPhysicalOperands)
|
||||||
return;
|
return;
|
||||||
@ -563,7 +563,6 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
|
|||||||
|
|
||||||
const std::vector<CGIOperandList::OperandInfo> &OperandList = *Operands;
|
const std::vector<CGIOperandList::OperandInfo> &OperandList = *Operands;
|
||||||
|
|
||||||
unsigned operandIndex;
|
|
||||||
unsigned numOperands = OperandList.size();
|
unsigned numOperands = OperandList.size();
|
||||||
unsigned numPhysicalOperands = 0;
|
unsigned numPhysicalOperands = 0;
|
||||||
|
|
||||||
@ -575,12 +574,13 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
|
|||||||
|
|
||||||
assert(numOperands <= X86_MAX_OPERANDS && "X86_MAX_OPERANDS is not large enough");
|
assert(numOperands <= X86_MAX_OPERANDS && "X86_MAX_OPERANDS is not large enough");
|
||||||
|
|
||||||
for (operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
|
for (unsigned operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
|
||||||
if (OperandList[operandIndex].Constraints.size()) {
|
if (OperandList[operandIndex].Constraints.size()) {
|
||||||
const CGIOperandList::ConstraintInfo &Constraint =
|
const CGIOperandList::ConstraintInfo &Constraint =
|
||||||
OperandList[operandIndex].Constraints[0];
|
OperandList[operandIndex].Constraints[0];
|
||||||
if (Constraint.isTied()) {
|
if (Constraint.isTied()) {
|
||||||
operandMapping[operandIndex] = Constraint.getTiedOperand();
|
operandMapping[operandIndex] = operandIndex;
|
||||||
|
operandMapping[Constraint.getTiedOperand()] = operandIndex;
|
||||||
} else {
|
} else {
|
||||||
++numPhysicalOperands;
|
++numPhysicalOperands;
|
||||||
operandMapping[operandIndex] = operandIndex;
|
operandMapping[operandIndex] = operandIndex;
|
||||||
@ -621,7 +621,7 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
|
|||||||
class##EncodingFromString);
|
class##EncodingFromString);
|
||||||
|
|
||||||
// operandIndex should always be < numOperands
|
// operandIndex should always be < numOperands
|
||||||
operandIndex = 0;
|
unsigned operandIndex = 0;
|
||||||
// physicalOperandIndex should always be < numPhysicalOperands
|
// physicalOperandIndex should always be < numPhysicalOperands
|
||||||
unsigned physicalOperandIndex = 0;
|
unsigned physicalOperandIndex = 0;
|
||||||
|
|
||||||
|
@ -204,7 +204,7 @@ private:
|
|||||||
unsigned &operandIndex,
|
unsigned &operandIndex,
|
||||||
unsigned &physicalOperandIndex,
|
unsigned &physicalOperandIndex,
|
||||||
unsigned &numPhysicalOperands,
|
unsigned &numPhysicalOperands,
|
||||||
unsigned *operandMapping,
|
const unsigned *operandMapping,
|
||||||
OperandEncoding (*encodingFromString)
|
OperandEncoding (*encodingFromString)
|
||||||
(const std::string&,
|
(const std::string&,
|
||||||
bool hasOpSizePrefix));
|
bool hasOpSizePrefix));
|
||||||
|
Reference in New Issue
Block a user