mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-28 21:34:23 +00:00
Cleanup pcmp(e/i)str(m/i) instruction definitions and load folding support.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167652 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
89443ff7ae
commit
9c7ae01f39
@ -2679,85 +2679,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
|
|||||||
|
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: Custom handling because TableGen doesn't support multiple implicit
|
|
||||||
// defs in an instruction pattern
|
|
||||||
case X86ISD::PCMPESTRI: {
|
|
||||||
SDValue N0 = Node->getOperand(0);
|
|
||||||
SDValue N1 = Node->getOperand(1);
|
|
||||||
SDValue N2 = Node->getOperand(2);
|
|
||||||
SDValue N3 = Node->getOperand(3);
|
|
||||||
SDValue N4 = Node->getOperand(4);
|
|
||||||
|
|
||||||
// Make sure last argument is a constant
|
|
||||||
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N4);
|
|
||||||
if (!Cst)
|
|
||||||
break;
|
|
||||||
|
|
||||||
uint64_t Imm = Cst->getZExtValue();
|
|
||||||
|
|
||||||
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
|
|
||||||
X86::EAX, N1, SDValue()).getValue(1);
|
|
||||||
InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
|
|
||||||
N3, InFlag).getValue(1);
|
|
||||||
|
|
||||||
SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag };
|
|
||||||
unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr :
|
|
||||||
X86::PCMPESTRIrr;
|
|
||||||
InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops,
|
|
||||||
array_lengthof(Ops)), 0);
|
|
||||||
|
|
||||||
if (!SDValue(Node, 0).use_empty()) {
|
|
||||||
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
|
|
||||||
X86::ECX, NVT, InFlag);
|
|
||||||
InFlag = Result.getValue(2);
|
|
||||||
ReplaceUses(SDValue(Node, 0), Result);
|
|
||||||
}
|
|
||||||
if (!SDValue(Node, 1).use_empty()) {
|
|
||||||
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
|
|
||||||
X86::EFLAGS, NVT, InFlag);
|
|
||||||
InFlag = Result.getValue(2);
|
|
||||||
ReplaceUses(SDValue(Node, 1), Result);
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
// FIXME: Custom handling because TableGen doesn't support multiple implicit
|
|
||||||
// defs in an instruction pattern
|
|
||||||
case X86ISD::PCMPISTRI: {
|
|
||||||
SDValue N0 = Node->getOperand(0);
|
|
||||||
SDValue N1 = Node->getOperand(1);
|
|
||||||
SDValue N2 = Node->getOperand(2);
|
|
||||||
|
|
||||||
// Make sure last argument is a constant
|
|
||||||
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N2);
|
|
||||||
if (!Cst)
|
|
||||||
break;
|
|
||||||
|
|
||||||
uint64_t Imm = Cst->getZExtValue();
|
|
||||||
|
|
||||||
SDValue Ops[] = { N0, N1, getI8Imm(Imm) };
|
|
||||||
unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr :
|
|
||||||
X86::PCMPISTRIrr;
|
|
||||||
SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops,
|
|
||||||
array_lengthof(Ops)), 0);
|
|
||||||
|
|
||||||
if (!SDValue(Node, 0).use_empty()) {
|
|
||||||
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
|
|
||||||
X86::ECX, NVT, InFlag);
|
|
||||||
InFlag = Result.getValue(2);
|
|
||||||
ReplaceUses(SDValue(Node, 0), Result);
|
|
||||||
}
|
|
||||||
if (!SDValue(Node, 1).use_empty()) {
|
|
||||||
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
|
|
||||||
X86::EFLAGS, NVT, InFlag);
|
|
||||||
InFlag = Result.getValue(2);
|
|
||||||
ReplaceUses(SDValue(Node, 1), Result);
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SDNode *ResNode = SelectCode(Node);
|
SDNode *ResNode = SelectCode(Node);
|
||||||
|
@ -12045,6 +12045,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
case X86ISD::FNMSUB: return "X86ISD::FNMSUB";
|
case X86ISD::FNMSUB: return "X86ISD::FNMSUB";
|
||||||
case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB";
|
case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB";
|
||||||
case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD";
|
case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD";
|
||||||
|
case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI";
|
||||||
|
case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -12839,8 +12841,8 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
|
|||||||
// or XMM0_V32I8 in AVX all of this code can be replaced with that
|
// or XMM0_V32I8 in AVX all of this code can be replaced with that
|
||||||
// in the .td file.
|
// in the .td file.
|
||||||
MachineBasicBlock *
|
MachineBasicBlock *
|
||||||
X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
|
X86TargetLowering::EmitPCMPSTRM(MachineInstr *MI, MachineBasicBlock *BB,
|
||||||
unsigned numArgs, bool memArg) const {
|
bool Implicit, bool MemArg) const {
|
||||||
assert(Subtarget->hasSSE42() &&
|
assert(Subtarget->hasSSE42() &&
|
||||||
"Target must have SSE4.2 or AVX features enabled");
|
"Target must have SSE4.2 or AVX features enabled");
|
||||||
|
|
||||||
@ -12848,23 +12850,30 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
|
|||||||
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
|
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
|
||||||
unsigned Opc;
|
unsigned Opc;
|
||||||
if (!Subtarget->hasAVX()) {
|
if (!Subtarget->hasAVX()) {
|
||||||
if (memArg)
|
if (MemArg)
|
||||||
Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
|
Opc = Implicit ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
|
||||||
else
|
else
|
||||||
Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
|
Opc = Implicit ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
|
||||||
} else {
|
} else {
|
||||||
if (memArg)
|
if (MemArg)
|
||||||
Opc = numArgs == 3 ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm;
|
Opc = Implicit ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm;
|
||||||
else
|
else
|
||||||
Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr;
|
Opc = Implicit ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned NumArgs = Implicit ? 3 : 5;
|
||||||
|
if (MemArg)
|
||||||
|
NumArgs += X86::AddrNumOperands;
|
||||||
|
|
||||||
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
|
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
|
||||||
for (unsigned i = 0; i < numArgs; ++i) {
|
for (unsigned i = 0; i < NumArgs; ++i) {
|
||||||
MachineOperand &Op = MI->getOperand(i+1);
|
MachineOperand &Op = MI->getOperand(i+1);
|
||||||
if (!(Op.isReg() && Op.isImplicit()))
|
if (!(Op.isReg() && Op.isImplicit()))
|
||||||
MIB.addOperand(Op);
|
MIB.addOperand(Op);
|
||||||
}
|
}
|
||||||
|
if (MemArg)
|
||||||
|
MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
|
||||||
|
|
||||||
BuildMI(*BB, MI, dl,
|
BuildMI(*BB, MI, dl,
|
||||||
TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
|
TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
|
||||||
.addReg(X86::XMM0);
|
.addReg(X86::XMM0);
|
||||||
@ -12873,6 +12882,50 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
|
|||||||
return BB;
|
return BB;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: Custom handling because TableGen doesn't support multiple implicit
|
||||||
|
// defs in an instruction pattern
|
||||||
|
MachineBasicBlock *
|
||||||
|
X86TargetLowering::EmitPCMPSTRI(MachineInstr *MI, MachineBasicBlock *BB,
|
||||||
|
bool Implicit, bool MemArg) const {
|
||||||
|
assert(Subtarget->hasSSE42() &&
|
||||||
|
"Target must have SSE4.2 or AVX features enabled");
|
||||||
|
|
||||||
|
DebugLoc dl = MI->getDebugLoc();
|
||||||
|
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
|
||||||
|
unsigned Opc;
|
||||||
|
if (!Subtarget->hasAVX()) {
|
||||||
|
if (MemArg)
|
||||||
|
Opc = Implicit ? X86::PCMPISTRIrm : X86::PCMPESTRIrm;
|
||||||
|
else
|
||||||
|
Opc = Implicit ? X86::PCMPISTRIrr : X86::PCMPESTRIrr;
|
||||||
|
} else {
|
||||||
|
if (MemArg)
|
||||||
|
Opc = Implicit ? X86::VPCMPISTRIrm : X86::VPCMPESTRIrm;
|
||||||
|
else
|
||||||
|
Opc = Implicit ? X86::VPCMPISTRIrr : X86::VPCMPESTRIrr;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned NumArgs = Implicit ? 3 : 5;
|
||||||
|
if (MemArg)
|
||||||
|
NumArgs += X86::AddrNumOperands;
|
||||||
|
|
||||||
|
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
|
||||||
|
for (unsigned i = 0; i < NumArgs; ++i) {
|
||||||
|
MachineOperand &Op = MI->getOperand(i+1);
|
||||||
|
if (!(Op.isReg() && Op.isImplicit()))
|
||||||
|
MIB.addOperand(Op);
|
||||||
|
}
|
||||||
|
if (MemArg)
|
||||||
|
MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
|
||||||
|
|
||||||
|
BuildMI(*BB, MI, dl,
|
||||||
|
TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
|
||||||
|
.addReg(X86::ECX);
|
||||||
|
|
||||||
|
MI->eraseFromParent();
|
||||||
|
return BB;
|
||||||
|
}
|
||||||
|
|
||||||
MachineBasicBlock *
|
MachineBasicBlock *
|
||||||
X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const {
|
X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const {
|
||||||
DebugLoc dl = MI->getDebugLoc();
|
DebugLoc dl = MI->getDebugLoc();
|
||||||
@ -13891,24 +13944,51 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
|||||||
case X86::VPCMPESTRM128REG:
|
case X86::VPCMPESTRM128REG:
|
||||||
case X86::PCMPESTRM128MEM:
|
case X86::PCMPESTRM128MEM:
|
||||||
case X86::VPCMPESTRM128MEM: {
|
case X86::VPCMPESTRM128MEM: {
|
||||||
unsigned NumArgs;
|
bool Implicit, MemArg;
|
||||||
bool MemArg;
|
|
||||||
switch (MI->getOpcode()) {
|
switch (MI->getOpcode()) {
|
||||||
default: llvm_unreachable("illegal opcode!");
|
default: llvm_unreachable("illegal opcode!");
|
||||||
case X86::PCMPISTRM128REG:
|
case X86::PCMPISTRM128REG:
|
||||||
case X86::VPCMPISTRM128REG:
|
case X86::VPCMPISTRM128REG:
|
||||||
NumArgs = 3; MemArg = false; break;
|
Implicit = true; MemArg = false; break;
|
||||||
case X86::PCMPISTRM128MEM:
|
case X86::PCMPISTRM128MEM:
|
||||||
case X86::VPCMPISTRM128MEM:
|
case X86::VPCMPISTRM128MEM:
|
||||||
NumArgs = 3; MemArg = true; break;
|
Implicit = true; MemArg = true; break;
|
||||||
case X86::PCMPESTRM128REG:
|
case X86::PCMPESTRM128REG:
|
||||||
case X86::VPCMPESTRM128REG:
|
case X86::VPCMPESTRM128REG:
|
||||||
NumArgs = 5; MemArg = false; break;
|
Implicit = false; MemArg = false; break;
|
||||||
case X86::PCMPESTRM128MEM:
|
case X86::PCMPESTRM128MEM:
|
||||||
case X86::VPCMPESTRM128MEM:
|
case X86::VPCMPESTRM128MEM:
|
||||||
NumArgs = 5; MemArg = true; break;
|
Implicit = false; MemArg = true; break;
|
||||||
}
|
}
|
||||||
return EmitPCMP(MI, BB, NumArgs, MemArg);
|
return EmitPCMPSTRM(MI, BB, Implicit, MemArg);
|
||||||
|
}
|
||||||
|
|
||||||
|
// String/text processing lowering.
|
||||||
|
case X86::PCMPISTRIREG:
|
||||||
|
case X86::VPCMPISTRIREG:
|
||||||
|
case X86::PCMPISTRIMEM:
|
||||||
|
case X86::VPCMPISTRIMEM:
|
||||||
|
case X86::PCMPESTRIREG:
|
||||||
|
case X86::VPCMPESTRIREG:
|
||||||
|
case X86::PCMPESTRIMEM:
|
||||||
|
case X86::VPCMPESTRIMEM: {
|
||||||
|
bool Implicit, MemArg;
|
||||||
|
switch (MI->getOpcode()) {
|
||||||
|
default: llvm_unreachable("illegal opcode!");
|
||||||
|
case X86::PCMPISTRIREG:
|
||||||
|
case X86::VPCMPISTRIREG:
|
||||||
|
Implicit = true; MemArg = false; break;
|
||||||
|
case X86::PCMPISTRIMEM:
|
||||||
|
case X86::VPCMPISTRIMEM:
|
||||||
|
Implicit = true; MemArg = true; break;
|
||||||
|
case X86::PCMPESTRIREG:
|
||||||
|
case X86::VPCMPESTRIREG:
|
||||||
|
Implicit = false; MemArg = false; break;
|
||||||
|
case X86::PCMPESTRIMEM:
|
||||||
|
case X86::VPCMPESTRIMEM:
|
||||||
|
Implicit = false; MemArg = true; break;
|
||||||
|
}
|
||||||
|
return EmitPCMPSTRI(MI, BB, Implicit, MemArg);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Thread synchronization.
|
// Thread synchronization.
|
||||||
|
@ -871,13 +871,15 @@ namespace llvm {
|
|||||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||||
LLVMContext &Context) const;
|
LLVMContext &Context) const;
|
||||||
|
|
||||||
/// Utility function to emit string processing sse4.2 instructions
|
/// Utility functions to emit string processing sse4.2 instructions
|
||||||
/// that return in xmm0.
|
/// that return in xmm0.
|
||||||
/// This takes the instruction to expand, the associated machine basic
|
/// This takes the instruction to expand, the associated machine basic
|
||||||
/// block, the number of args, and whether or not the second arg is
|
/// block, the number of args, and whether or not the second arg is
|
||||||
/// in memory or not.
|
/// in memory or not.
|
||||||
MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB,
|
MachineBasicBlock *EmitPCMPSTRM(MachineInstr *MI, MachineBasicBlock *BB,
|
||||||
unsigned argNum, bool inMem) const;
|
bool Implicit, bool MemArg) const;
|
||||||
|
MachineBasicBlock *EmitPCMPSTRI(MachineInstr *MI, MachineBasicBlock *BB,
|
||||||
|
bool Implicit, bool MemArg) const;
|
||||||
|
|
||||||
/// Utility functions to emit monitor and mwait instructions. These
|
/// Utility functions to emit monitor and mwait instructions. These
|
||||||
/// need to make sure that the arguments to the intrinsic are in the
|
/// need to make sure that the arguments to the intrinsic are in the
|
||||||
|
@ -7002,8 +7002,8 @@ multiclass pseudo_pcmpistrm<string asm> {
|
|||||||
imm:$src3))]>;
|
imm:$src3))]>;
|
||||||
def MEM : PseudoI<(outs VR128:$dst),
|
def MEM : PseudoI<(outs VR128:$dst),
|
||||||
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
|
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
|
||||||
[(set VR128:$dst, (int_x86_sse42_pcmpistrm128
|
[(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1,
|
||||||
VR128:$src1, (load addr:$src2), imm:$src3))]>;
|
(bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Defs = [EFLAGS], usesCustomInserter = 1 in {
|
let Defs = [EFLAGS], usesCustomInserter = 1 in {
|
||||||
@ -7011,24 +7011,22 @@ let Defs = [EFLAGS], usesCustomInserter = 1 in {
|
|||||||
defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[UseSSE42]>;
|
defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[UseSSE42]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in {
|
multiclass pcmpistrm_SS42AI<string asm> {
|
||||||
def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
|
def rr : SS42AI<0x62, MRMSrcReg, (outs),
|
||||||
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
|
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
|
||||||
"vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
|
!strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
|
||||||
|
[]>, OpSize;
|
||||||
let mayLoad = 1 in
|
let mayLoad = 1 in
|
||||||
def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
|
def rm :SS42AI<0x62, MRMSrcMem, (outs),
|
||||||
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
|
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
|
||||||
"vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
|
!strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
|
||||||
|
[]>, OpSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in {
|
let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in {
|
||||||
def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
|
let Predicates = [HasAVX] in
|
||||||
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
|
defm VPCMPISTRM128 : pcmpistrm_SS42AI<"vpcmpistrm">, VEX;
|
||||||
"pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
|
defm PCMPISTRM128 : pcmpistrm_SS42AI<"pcmpistrm"> ;
|
||||||
let mayLoad = 1 in
|
|
||||||
def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
|
|
||||||
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
|
|
||||||
"pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Packed Compare Explicit Length Strings, Return Mask
|
// Packed Compare Explicit Length Strings, Return Mask
|
||||||
@ -7039,8 +7037,8 @@ multiclass pseudo_pcmpestrm<string asm> {
|
|||||||
VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
|
VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
|
||||||
def MEM : PseudoI<(outs VR128:$dst),
|
def MEM : PseudoI<(outs VR128:$dst),
|
||||||
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
|
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
|
||||||
[(set VR128:$dst, (int_x86_sse42_pcmpestrm128
|
[(set VR128:$dst, (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX,
|
||||||
VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>;
|
(bc_v16i8 (memopv2i64 addr:$src3)), EDX, imm:$src5))]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
|
let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
|
||||||
@ -7048,64 +7046,94 @@ let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
|
|||||||
defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[UseSSE42]>;
|
defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[UseSSE42]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX],
|
multiclass SS42AI_pcmpestrm<string asm> {
|
||||||
Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
|
def rr : SS42AI<0x60, MRMSrcReg, (outs),
|
||||||
def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
|
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
|
||||||
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
|
!strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
|
||||||
"vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
|
[]>, OpSize;
|
||||||
let mayLoad = 1 in
|
let mayLoad = 1 in
|
||||||
def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
|
def rm : SS42AI<0x60, MRMSrcMem, (outs),
|
||||||
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
|
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
|
||||||
"vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
|
!strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
|
||||||
|
[]>, OpSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
|
let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
|
||||||
def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
|
let Predicates = [HasAVX] in
|
||||||
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
|
defm VPCMPESTRM128 : SS42AI_pcmpestrm<"vpcmpestrm">, VEX;
|
||||||
"pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
|
defm PCMPESTRM128 : SS42AI_pcmpestrm<"pcmpestrm">;
|
||||||
let mayLoad = 1 in
|
|
||||||
def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
|
|
||||||
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
|
|
||||||
"pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Packed Compare Implicit Length Strings, Return Index
|
// Packed Compare Implicit Length Strings, Return Index
|
||||||
let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in {
|
multiclass pseudo_pcmpistri<string asm> {
|
||||||
multiclass SS42AI_pcmpistri<string asm> {
|
def REG : PseudoI<(outs GR32:$dst),
|
||||||
def rr : SS42AI<0x63, MRMSrcReg, (outs),
|
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
|
||||||
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
|
[(set GR32:$dst, EFLAGS,
|
||||||
!strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
|
(X86pcmpistri VR128:$src1, VR128:$src2, imm:$src3))]>;
|
||||||
[]>, OpSize;
|
def MEM : PseudoI<(outs GR32:$dst),
|
||||||
let mayLoad = 1 in
|
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
|
||||||
def rm : SS42AI<0x63, MRMSrcMem, (outs),
|
[(set GR32:$dst, EFLAGS, (X86pcmpistri VR128:$src1,
|
||||||
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
|
(bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>;
|
||||||
!strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
|
|
||||||
[]>, OpSize;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX] in
|
let Defs = [EFLAGS], usesCustomInserter = 1 in {
|
||||||
defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX;
|
defm VPCMPISTRI : pseudo_pcmpistri<"#VPCMPISTRI">, Requires<[HasAVX]>;
|
||||||
defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">;
|
defm PCMPISTRI : pseudo_pcmpistri<"#PCMPISTRI">, Requires<[UseSSE42]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
multiclass SS42AI_pcmpistri<string asm> {
|
||||||
|
def rr : SS42AI<0x63, MRMSrcReg, (outs),
|
||||||
|
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
|
||||||
|
!strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
|
||||||
|
[]>, OpSize;
|
||||||
|
let mayLoad = 1 in
|
||||||
|
def rm : SS42AI<0x63, MRMSrcMem, (outs),
|
||||||
|
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
|
||||||
|
!strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
|
||||||
|
[]>, OpSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in {
|
||||||
|
let Predicates = [HasAVX] in
|
||||||
|
defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX;
|
||||||
|
defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">;
|
||||||
|
}
|
||||||
|
|
||||||
// Packed Compare Explicit Length Strings, Return Index
|
// Packed Compare Explicit Length Strings, Return Index
|
||||||
let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
|
multiclass pseudo_pcmpestri<string asm> {
|
||||||
multiclass SS42AI_pcmpestri<string asm> {
|
def REG : PseudoI<(outs GR32:$dst),
|
||||||
def rr : SS42AI<0x61, MRMSrcReg, (outs),
|
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
|
||||||
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
|
[(set GR32:$dst, EFLAGS,
|
||||||
!strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
|
(X86pcmpestri VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
|
||||||
[]>, OpSize;
|
def MEM : PseudoI<(outs GR32:$dst),
|
||||||
let mayLoad = 1 in
|
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
|
||||||
def rm : SS42AI<0x61, MRMSrcMem, (outs),
|
[(set GR32:$dst, EFLAGS,
|
||||||
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
|
(X86pcmpestri VR128:$src1, EAX, (bc_v16i8 (memopv2i64 addr:$src3)), EDX,
|
||||||
!strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
|
imm:$src5))]>;
|
||||||
[]>, OpSize;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX] in
|
let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
|
||||||
defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX;
|
defm VPCMPESTRI : pseudo_pcmpestri<"#VPCMPESTRI">, Requires<[HasAVX]>;
|
||||||
defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">;
|
defm PCMPESTRI : pseudo_pcmpestri<"#PCMPESTRI">, Requires<[UseSSE42]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
multiclass SS42AI_pcmpestri<string asm> {
|
||||||
|
def rr : SS42AI<0x61, MRMSrcReg, (outs),
|
||||||
|
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
|
||||||
|
!strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
|
||||||
|
[]>, OpSize;
|
||||||
|
let mayLoad = 1 in
|
||||||
|
def rm : SS42AI<0x61, MRMSrcMem, (outs),
|
||||||
|
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
|
||||||
|
!strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
|
||||||
|
[]>, OpSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
|
||||||
|
let Predicates = [HasAVX] in
|
||||||
|
defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX;
|
||||||
|
defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">;
|
||||||
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// SSE4.2 - CRC Instructions
|
// SSE4.2 - CRC Instructions
|
||||||
|
@ -1140,9 +1140,9 @@ declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) noun
|
|||||||
|
|
||||||
|
|
||||||
define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
|
define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
|
||||||
; CHECK: movl
|
; CHECK: movl $7
|
||||||
; CHECK: movl
|
; CHECK: movl $7
|
||||||
; CHECK: vpcmpestri
|
; CHECK: vpcmpestri $7
|
||||||
; CHECK: movl
|
; CHECK: movl
|
||||||
%res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
|
%res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
|
||||||
ret i32 %res
|
ret i32 %res
|
||||||
@ -1150,6 +1150,18 @@ define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
|
|||||||
declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
|
declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
|
||||||
|
; CHECK: movl $7
|
||||||
|
; CHECK: movl $7
|
||||||
|
; CHECK: vpcmpestri $7, (
|
||||||
|
; CHECK: movl
|
||||||
|
%1 = load <16 x i8>* %a0
|
||||||
|
%2 = load <16 x i8>* %a2
|
||||||
|
%res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1]
|
||||||
|
ret i32 %res
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
|
define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
|
||||||
; CHECK: movl
|
; CHECK: movl
|
||||||
; CHECK: movl
|
; CHECK: movl
|
||||||
@ -1216,8 +1228,19 @@ define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
|
|||||||
declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
|
declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) {
|
||||||
|
; CHECK: movl $7
|
||||||
|
; CHECK: movl $7
|
||||||
|
; CHECK: vpcmpestrm $7,
|
||||||
|
; CHECK-NOT: vmov
|
||||||
|
%1 = load <16 x i8>* %a2
|
||||||
|
%res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
|
||||||
|
ret <16 x i8> %res
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
|
define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
|
||||||
; CHECK: vpcmpistri
|
; CHECK: vpcmpistri $7
|
||||||
; CHECK: movl
|
; CHECK: movl
|
||||||
%res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
|
%res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
|
||||||
ret i32 %res
|
ret i32 %res
|
||||||
@ -1225,6 +1248,16 @@ define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
|
|||||||
declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
|
||||||
|
; CHECK: vpcmpistri $7, (
|
||||||
|
; CHECK: movl
|
||||||
|
%1 = load <16 x i8>* %a0
|
||||||
|
%2 = load <16 x i8>* %a1
|
||||||
|
%res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1]
|
||||||
|
ret i32 %res
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
|
define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
|
||||||
; CHECK: vpcmpistri
|
; CHECK: vpcmpistri
|
||||||
; CHECK: seta
|
; CHECK: seta
|
||||||
@ -1271,7 +1304,7 @@ declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind rea
|
|||||||
|
|
||||||
|
|
||||||
define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
|
define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
|
||||||
; CHECK: vpcmpistrm
|
; CHECK: vpcmpistrm $7
|
||||||
; CHECK-NOT: vmov
|
; CHECK-NOT: vmov
|
||||||
%res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
|
%res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
|
||||||
ret <16 x i8> %res
|
ret <16 x i8> %res
|
||||||
@ -1279,6 +1312,15 @@ define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
|
|||||||
declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) {
|
||||||
|
; CHECK: vpcmpistrm $7, (
|
||||||
|
; CHECK-NOT: vmov
|
||||||
|
%1 = load <16 x i8>* %a1
|
||||||
|
%res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
|
||||||
|
ret <16 x i8> %res
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
|
define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||||
; CHECK: vaddss
|
; CHECK: vaddss
|
||||||
%res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
|
%res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user