diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 7634fb67791..c63323843ae 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -313,12 +313,36 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Integer shift ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_sse2_psll_w : + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_sse2_psll_d : + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_sse2_psll_q : + Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v4i32_ty], [IntrNoMem]>; def int_x86_sse2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi128">, Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, llvm_int_ty], [IntrNoMem]>; + def int_x86_sse2_psrl_w : + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_sse2_psrl_d : + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_sse2_psrl_q : + Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v4i32_ty], [IntrNoMem]>; def int_x86_sse2_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi128">, Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, llvm_int_ty], [IntrNoMem]>; + def int_x86_sse2_psra_w : + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_sse2_psra_d : + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty], [IntrNoMem]>; } // Conversion ops diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 45093bcaa14..98ca527578c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1522,10 +1522,108 @@ def PSADBWrm : PDI<0xE0, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2) } let isTwoAddress = 1 in { +def PSLLWrr : PDIi8<0xF1, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), + "psllw {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psll_w VR128:$src1, + VR128:$src2))]>; +def PSLLWrm : PDIi8<0xF1, MRMSrcReg, (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "psllw {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psll_w VR128:$src1, + (bc_v4i32 (loadv2i64 addr:$src2))))]>; +def PSLLWri : PDIi8<0x71, MRM6r, (ops VR128:$dst, VR128:$src1, i32i8imm:$src2), + "psllw {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psll_w VR128:$src1, + (scalar_to_vector (i32 imm:$src2))))]>; +def PSLLDrr : PDIi8<0xF2, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), + "pslld {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psll_d VR128:$src1, + VR128:$src2))]>; +def PSLLDrm : PDIi8<0xF2, MRMSrcReg, (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "pslld {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psll_d VR128:$src1, + (bc_v4i32 (loadv2i64 addr:$src2))))]>; +def PSLLDri : PDIi8<0x72, MRM6r, (ops VR128:$dst, VR128:$src1, i32i8imm:$src2), + "pslld {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psll_d VR128:$src1, + (scalar_to_vector (i32 imm:$src2))))]>; +def PSLLQrr : PDIi8<0xF3, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), + "psllq {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psll_q VR128:$src1, + VR128:$src2))]>; +def PSLLQrm : PDIi8<0xF3, MRMSrcReg, (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "psllq {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psll_q VR128:$src1, + (bc_v4i32 (loadv2i64 addr:$src2))))]>; +def PSLLQri : PDIi8<0x73, MRM6r, (ops VR128:$dst, VR128:$src1, i32i8imm:$src2), + "psllq {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psll_q VR128:$src1, + (scalar_to_vector (i32 imm:$src2))))]>; def PSLLDQri : PDIi8<0x73, MRM7r, (ops VR128:$dst, VR128:$src1, i32i8imm:$src2), "pslldq {$src2, $dst|$dst, $src2}", []>; -def PSRLDQri : PDIi8<0x73, MRM7r, (ops VR128:$dst, VR128:$src1, i32i8imm:$src2), + +def PSRLWrr : PDIi8<0xD1, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), + "psrlw {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psrl_w VR128:$src1, + VR128:$src2))]>; +def PSRLWrm : PDIi8<0xD1, MRMSrcReg, (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "psrlw {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psrl_w VR128:$src1, + (bc_v4i32 (loadv2i64 addr:$src2))))]>; +def PSRLWri : PDIi8<0x71, MRM2r, (ops VR128:$dst, VR128:$src1, i32i8imm:$src2), + "psrlw {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psrl_w VR128:$src1, + (scalar_to_vector (i32 imm:$src2))))]>; +def PSRLDrr : PDIi8<0xD2, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), + "psrld {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psrl_d VR128:$src1, + VR128:$src2))]>; +def PSRLDrm : PDIi8<0xD2, MRMSrcReg, (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "psrld {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psrl_d VR128:$src1, + (bc_v4i32 (loadv2i64 addr:$src2))))]>; +def PSRLDri : PDIi8<0x72, MRM2r, (ops VR128:$dst, VR128:$src1, i32i8imm:$src2), + "psrld {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psrl_d VR128:$src1, + (scalar_to_vector (i32 imm:$src2))))]>; +def PSRLQrr : PDIi8<0xD3, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), + "psrlq {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psrl_q VR128:$src1, + VR128:$src2))]>; +def PSRLQrm : PDIi8<0xD3, MRMSrcReg, (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "psrlq {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psrl_q VR128:$src1, + (bc_v4i32 (loadv2i64 addr:$src2))))]>; +def PSRLQri : PDIi8<0x73, MRM2r, (ops VR128:$dst, VR128:$src1, i32i8imm:$src2), + "psrlq {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psrl_q VR128:$src1, + (scalar_to_vector (i32 imm:$src2))))]>; +def PSRLDQri : PDIi8<0x73, MRM3r, (ops VR128:$dst, VR128:$src1, i32i8imm:$src2), "psrldq {$src2, $dst|$dst, $src2}", []>; + +def PSRAWrr : PDIi8<0xE1, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), + "psraw {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psra_w VR128:$src1, + VR128:$src2))]>; +def PSRAWrm : PDIi8<0xE1, MRMSrcReg, (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "psraw {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psra_w VR128:$src1, + (bc_v4i32 (loadv2i64 addr:$src2))))]>; +def PSRAWri : PDIi8<0x71, MRM4r, (ops VR128:$dst, VR128:$src1, i32i8imm:$src2), + "psraw {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psra_w VR128:$src1, + (scalar_to_vector (i32 imm:$src2))))]>; +def PSRADrr : PDIi8<0xE2, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), + "psrad {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psra_d VR128:$src1, + VR128:$src2))]>; +def PSRADrm : PDIi8<0xE2, MRMSrcReg, (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "psrad {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psra_d VR128:$src1, + (bc_v4i32 (loadv2i64 addr:$src2))))]>; +def PSRADri : PDIi8<0x72, MRM4r, (ops VR128:$dst, VR128:$src1, i32i8imm:$src2), + "psrad {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_psra_d VR128:$src1, + (scalar_to_vector (i32 imm:$src2))))]>; } // Logical