Implement sse4.2 string/text processing instructions:

Add patterns and instruction encoding information.
Add custom lowering to deal with hardwired return register of
uncertain type (xmm0).


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@79377 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Eric Christopher 2009-08-18 22:50:32 +00:00
parent 4b9e1d291c
commit b120ab4057
4 changed files with 178 additions and 1 deletions

View File

@ -7595,6 +7595,43 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
return nextMBB;
}
MachineBasicBlock *
X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
unsigned numArgs, bool memArg) const {
MachineFunction *F = BB->getParent();
DebugLoc dl = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
unsigned Opc;
if (memArg) {
Opc = numArgs == 3 ?
X86::PCMPISTRM128rm :
X86::PCMPESTRM128rm;
} else {
Opc = numArgs == 3 ?
X86::PCMPISTRM128rr :
X86::PCMPESTRM128rr;
}
MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc));
for (unsigned i = 0; i < numArgs; ++i) {
MachineOperand &Op = MI->getOperand(i+1);
if (!(Op.isReg() && Op.isImplicit()))
MIB.addOperand(Op);
}
BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
.addReg(X86::XMM0);
F->DeleteMachineInstr(MI);
return BB;
}
MachineBasicBlock *
X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
MachineInstr *MI,
@ -7804,6 +7841,17 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
return BB;
}
// String/text processing lowering.
case X86::PCMPISTRM128REG:
return EmitPCMP(MI, BB, 3, false /* in-mem */);
case X86::PCMPISTRM128MEM:
return EmitPCMP(MI, BB, 3, true /* in-mem */);
case X86::PCMPESTRM128REG:
return EmitPCMP(MI, BB, 5, false /* in mem */);
case X86::PCMPESTRM128MEM:
return EmitPCMP(MI, BB, 5, true /* in mem */);
// Atomic Lowering.
case X86::ATOMAND32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
X86::AND32ri, X86::MOV32rm,

View File

@ -693,6 +693,14 @@ namespace llvm {
const Value *DstSV, uint64_t DstSVOff,
const Value *SrcSV, uint64_t SrcSVOff);
/// Utility function to emit string processing sse4.2 instructions
/// that return in xmm0.
// This takes the instruction to expand, the associated machine basic
// block, the number of args, and whether or not the second arg is
// in memory or not.
MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB,
unsigned argNum, bool inMem) const;
/// Utility function to emit atomic bitwise operations (and, or, xor).
// It takes the bitwise instruction to expand, the associated machine basic
// block, and the associated X86 opcodes for reg/reg and reg/imm.

View File

@ -235,6 +235,11 @@ class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, TF, Requires<[HasSSE42]>;
// SS42AI = SSE 4.2 instructions with TA prefix
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE42]>;
// X86-64 Instruction templates...
//
@ -288,4 +293,3 @@ class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> patter
: Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;

View File

@ -3657,6 +3657,11 @@ def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movntdqa\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>;
//===----------------------------------------------------------------------===//
// SSE4.2 Instructions
//===----------------------------------------------------------------------===//
/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
let Constraints = "$src1 = $dst" in {
multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
@ -3739,3 +3744,115 @@ let Constraints = "$src1 = $dst" in {
(int_x86_sse42_crc32_64 GR64:$src1, GR64:$src2))]>,
OpSize, REX_W;
}
// String/text processing instructions.
let Defs = [EFLAGS], usesCustomDAGSchedInserter = 1 in {
def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"#PCMPISTRM128rr PSEUDO!",
[(set VR128:$dst,
(int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
imm:$src3))]>, OpSize;
def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"#PCMPISTRM128rm PSEUDO!",
[(set VR128:$dst,
(int_x86_sse42_pcmpistrm128 VR128:$src1,
(load addr:$src2),
imm:$src3))]>, OpSize;
}
let Defs = [XMM0, EFLAGS] in {
def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}",
[]>, OpSize;
def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}",
[]>, OpSize;
}
let Defs = [EFLAGS], Uses = [EAX, EDX],
usesCustomDAGSchedInserter = 1 in {
def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
"#PCMPESTRM128rr PSEUDO!",
[(set VR128:$dst,
(int_x86_sse42_pcmpestrm128 VR128:$src1, EAX,
VR128:$src3,
EDX, imm:$src5))]>, OpSize;
def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
"#PCMPESTRM128rm PSEUDO!",
[(set VR128:$dst,
(int_x86_sse42_pcmpestrm128 VR128:$src1, EAX,
(load addr:$src3),
EDX, imm:$src5))]>, OpSize;
}
let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
def PCMPESTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
"pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}",
[]>, OpSize;
def PCMPESTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
"pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}",
[]>, OpSize;
}
let Defs = [ECX, EFLAGS] in {
multiclass SS42AI_pcmpistri<Intrinsic IntId128> {
def rr : SS42AI<0x63, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
[(set ECX,
(IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
(implicit EFLAGS)]>,
OpSize;
def rm : SS42AI<0x63, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
[(set ECX,
(IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
(implicit EFLAGS)]>,
OpSize;
}
}
defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
let Defs = [ECX, EFLAGS] in {
let Uses = [EAX, EDX] in {
multiclass SS42AI_pcmpestri<Intrinsic IntId128> {
def rr : SS42AI<0x61, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
"pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
[(set ECX,
(IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
(implicit EFLAGS)]>,
OpSize;
def rm : SS42AI<0x61, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
"pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
[(set ECX,
(IntId128 VR128:$src1, EAX, (load addr:$src3),
EDX, imm:$src5)),
(implicit EFLAGS)]>,
OpSize;
}
}
}
defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;