Replace vpermd/vpermps intrinic patterns with custom lowering to target specific nodes.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154801 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Craig Topper 2012-04-16 07:13:00 +00:00
parent 7d31d75a77
commit ffa6c40ecf
2 changed files with 14 additions and 18 deletions

View File

@ -9597,6 +9597,12 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_avx_vpermil_pd_256: case Intrinsic::x86_avx_vpermil_pd_256:
return DAG.getNode(X86ISD::VPERMILP, dl, Op.getValueType(), return DAG.getNode(X86ISD::VPERMILP, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2)); Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
// Operands intentionally swapped. Mask is last operand to intrinsic,
// but second operand for node/intruction.
return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(1));
// ptest and testp intrinsics. The intrinsic these come from are designed to // ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest // return an integer value, not just an instruction so lower it to the ptest

View File

@ -7735,24 +7735,26 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
// //
multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
Intrinsic Int> { ValueType OpVT> {
def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2), (ins VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (Int VR256:$src2, VR256:$src1))]>, VEX_4V; [(set VR256:$dst,
(OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, VEX_4V;
def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2), (ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (Int (bitconvert (mem_frag addr:$src2)), [(set VR256:$dst,
VR256:$src1))]>, (OpVT (X86VPermv VR256:$src1,
(bitconvert (mem_frag addr:$src2)))))]>,
VEX_4V; VEX_4V;
} }
defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, int_x86_avx2_permd>; defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, v8i32>;
let ExeDomain = SSEPackedSingle in let ExeDomain = SSEPackedSingle in
defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>; defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, v8f32>;
multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
ValueType OpVT> { ValueType OpVT> {
@ -7775,18 +7777,6 @@ defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, v4i64>, VEX_W;
let ExeDomain = SSEPackedDouble in let ExeDomain = SSEPackedDouble in
defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, v4f64>, VEX_W; defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, v4f64>, VEX_W;
let Predicates = [HasAVX2] in {
def : Pat<(v8i32 (X86VPermv VR256:$src1, VR256:$src2)),
(VPERMDYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v8f32 (X86VPermv VR256:$src1, VR256:$src2)),
(VPERMPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v8i32 (X86VPermv VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
(VPERMDYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8f32 (X86VPermv VR256:$src1, (memopv8f32 addr:$src2))),
(VPERMPSYrm VR256:$src1, addr:$src2)>;
}
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks // VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
// //