diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 4a5b3d6b131..f4abba98c08 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -1092,17 +1092,17 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx_vpermil_pd : GCCBuiltin<"__builtin_ia32_vpermilpd">, + def int_x86_avx_vpermil_pd : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx_vpermil_ps : GCCBuiltin<"__builtin_ia32_vpermilps">, + def int_x86_avx_vpermil_ps : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx_vpermil_pd_256 : GCCBuiltin<"__builtin_ia32_vpermilpd256">, + def int_x86_avx_vpermil_pd_256 : Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx_vpermil_ps_256 : GCCBuiltin<"__builtin_ia32_vpermilps256">, + def int_x86_avx_vpermil_ps_256 : Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b2eb0432e4c..7660b953699 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9488,6 +9488,12 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_avx2_vperm2i128: return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::x86_avx_vpermil_ps: + case Intrinsic::x86_avx_vpermil_pd: + case Intrinsic::x86_avx_vpermil_ps_256: + case Intrinsic::x86_avx_vpermil_pd_256: + return DAG.getNode(X86ISD::VPERMILP, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index fb70b9cf0af..96403197ace 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7129,8 +7129,8 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", // multiclass avx_permil opc_rm, bits<8> opc_rmi, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop_f, - X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag, - Intrinsic IntVar, Intrinsic IntImm> { + X86MemOperand x86memop_i, PatFrag i_frag, + Intrinsic IntVar, ValueType vt> { def rr : AVX8I opc_rm, bits<8> opc_rmi, string OpcodeStr, def ri : AVXAIi8, VEX; + [(set RC:$dst, (vt (X86VPermilp RC:$src1, (i8 imm:$src2))))]>, VEX; def mi : AVXAIi8, VEX; + [(set RC:$dst, + (vt (X86VPermilp (memop addr:$src1), (i8 imm:$src2))))]>, VEX; } let ExeDomain = SSEPackedSingle in { defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, - memopv4f32, memopv2i64, - int_x86_avx_vpermilvar_ps, - int_x86_avx_vpermil_ps>; + memopv2i64, int_x86_avx_vpermilvar_ps, v4f32>; defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, - memopv8f32, memopv4i64, - int_x86_avx_vpermilvar_ps_256, - int_x86_avx_vpermil_ps_256>; + memopv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>; } let ExeDomain = SSEPackedDouble in { defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, - memopv2f64, memopv2i64, - int_x86_avx_vpermilvar_pd, - int_x86_avx_vpermil_pd>; + memopv2i64, int_x86_avx_vpermilvar_pd, v2f64>; defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, - memopv4f64, memopv4i64, - int_x86_avx_vpermilvar_pd_256, - int_x86_avx_vpermil_pd_256>; + memopv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>; } let Predicates = [HasAVX] in { -def : Pat<(v8f32 (X86VPermilp VR256:$src1, (i8 imm:$imm))), - (VPERMILPSYri VR256:$src1, imm:$imm)>; -def : Pat<(v4f64 (X86VPermilp VR256:$src1, (i8 imm:$imm))), - (VPERMILPDYri VR256:$src1, imm:$imm)>; def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))), (VPERMILPSYri VR256:$src1, imm:$imm)>; def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))), (VPERMILPDYri VR256:$src1, imm:$imm)>; -def : Pat<(v8f32 (X86VPermilp (memopv8f32 addr:$src1), (i8 imm:$imm))), - (VPERMILPSYmi addr:$src1, imm:$imm)>; -def : Pat<(v4f64 (X86VPermilp (memopv4f64 addr:$src1), (i8 imm:$imm))), - (VPERMILPDYmi addr:$src1, imm:$imm)>; def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (memopv4i64 addr:$src1)), (i8 imm:$imm))), (VPERMILPSYmi addr:$src1, imm:$imm)>; def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))), (VPERMILPDYmi addr:$src1, imm:$imm)>; -def : Pat<(v4f32 (X86VPermilp VR128:$src1, (i8 imm:$imm))), - (VPERMILPSri VR128:$src1, imm:$imm)>; -def : Pat<(v2f64 (X86VPermilp VR128:$src1, (i8 imm:$imm))), - (VPERMILPDri VR128:$src1, imm:$imm)>; def : Pat<(v2i64 (X86VPermilp VR128:$src1, (i8 imm:$imm))), (VPERMILPDri VR128:$src1, imm:$imm)>; -def : Pat<(v4f32 (X86VPermilp (memopv4f32 addr:$src1), (i8 imm:$imm))), - (VPERMILPSmi addr:$src1, imm:$imm)>; -def : Pat<(v2f64 (X86VPermilp (memopv2f64 addr:$src1), (i8 imm:$imm))), - (VPERMILPDmi addr:$src1, imm:$imm)>; def : Pat<(v2i64 (X86VPermilp (memopv2i64 addr:$src1), (i8 imm:$imm))), (VPERMILPDmi addr:$src1, imm:$imm)>; }