diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 06ea3ae3b51..58ad8d40afb 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -1570,9 +1570,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_mmx_movnt_dq : GCCBuiltin<"__builtin_ia32_movntq">, Intrinsic<[], [llvm_ptr_ty, llvm_v1i64_ty], []>; -// def int_x86_mmx_palignr_b : GCCBuiltin<"__builtin_ia32_palignr">, -// Intrinsic<[llvm_v1i64_ty], [llvm_1i64_ty, -// llvm_v1i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_mmx_palignr_b : GCCBuiltin<"__builtin_ia32_palignr">, + Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, + llvm_v1i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_mmx_pextr_w : Intrinsic<[llvm_i32_ty], [llvm_v1i64_ty, llvm_i32_ty], diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 11d4179534d..2c8414ec751 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -18,7 +18,7 @@ //===----------------------------------------------------------------------===// let Constraints = "$src1 = $dst" in { - // MMXI_binop_rm - Simple MMX binary operator. + // MMXI_binop_rm - Simple MMX binary operator based on llvm operator. multiclass MMXI_binop_rm opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, bit Commutable = 0> { def rr : MMXI; } + // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic. multiclass MMXI_binop_rm_int opc, string OpcodeStr, Intrinsic IntId, bit Commutable = 0> { def rr : MMXI; } + // MMXI_binop_rm_int2 - Simple MMX binary operator based on intrinsic, with a + // different name for the generated instructions than MMXI_binop_rm uses. + // Thus int2 and rm can coexist for different implementations of the same + // instruction, while int and rm cannot. This is temporary during transition + // to intrinsic-only implementation. When it is removed, remove the FIXME + // from X86RecognizableInstr.cpp. + multiclass MMXI_binop_rm_int2 opc, string OpcodeStr, Intrinsic IntId, + bit Commutable = 0> { + def irr : MMXI { + let isCommutable = Commutable; + } + def irm : MMXI; + } + // MMXI_binop_rm_v1i64 - Simple MMX binary operator whose type is v1i64. // // FIXME: we could eliminate this and use MMXI_binop_rm instead if tblgen knew @@ -188,11 +210,14 @@ def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), // Arithmetic Instructions // -- Addition -defm MMX_PADDB : MMXI_binop_rm<0xFC, "paddb", add, v8i8, 1>; -defm MMX_PADDW : MMXI_binop_rm<0xFD, "paddw", add, v4i16, 1>; -defm MMX_PADDD : MMXI_binop_rm<0xFE, "paddd", add, v2i32, 1>; -defm MMX_PADDQ : MMXI_binop_rm<0xD4, "paddq", add, v1i64, 1>; - +defm MMX_PADDB : MMXI_binop_rm<0xFC, "paddb", add, v8i8, 1>, + MMXI_binop_rm_int2<0xFC, "paddb", int_x86_mmx_padd_b, 1>; +defm MMX_PADDW : MMXI_binop_rm<0xFD, "paddw", add, v4i16, 1>, + MMXI_binop_rm_int2<0xFD, "paddw", int_x86_mmx_padd_w, 1>; +defm MMX_PADDD : MMXI_binop_rm<0xFE, "paddd", add, v2i32, 1>, + MMXI_binop_rm_int2<0xFE, "paddd", int_x86_mmx_padd_d, 1>; +defm MMX_PADDQ : MMXI_binop_rm<0xD4, "paddq", add, v1i64, 1>, + MMXI_binop_rm_int2<0xD4, "paddq", int_x86_mmx_padd_q, 1>; defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, 1>; defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w, 1>; @@ -200,10 +225,14 @@ defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b, 1>; defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w, 1>; // -- Subtraction -defm MMX_PSUBB : MMXI_binop_rm<0xF8, "psubb", sub, v8i8>; -defm MMX_PSUBW : MMXI_binop_rm<0xF9, "psubw", sub, v4i16>; -defm MMX_PSUBD : MMXI_binop_rm<0xFA, "psubd", sub, v2i32>; -defm MMX_PSUBQ : MMXI_binop_rm<0xFB, "psubq", sub, v1i64>; +defm MMX_PSUBB : MMXI_binop_rm<0xF8, "psubb", sub, v8i8>, + MMXI_binop_rm_int2<0xF8, "psubb", int_x86_mmx_psub_b>; +defm MMX_PSUBW : MMXI_binop_rm<0xF9, "psubw", sub, v4i16>, + MMXI_binop_rm_int2<0xF9, "psubw", int_x86_mmx_psub_w>; +defm MMX_PSUBD : MMXI_binop_rm<0xFA, "psubd", sub, v2i32>, + MMXI_binop_rm_int2<0xFA, "psubd", int_x86_mmx_psub_d>; +defm MMX_PSUBQ : MMXI_binop_rm<0xFB, "psubq", sub, v1i64>, + MMXI_binop_rm_int2<0xFB, "psubq", int_x86_mmx_psub_q>; defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b>; defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>; @@ -212,7 +241,8 @@ defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b>; defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w>; // -- Multiplication -defm MMX_PMULLW : MMXI_binop_rm<0xD5, "pmullw", mul, v4i16, 1>; +defm MMX_PMULLW : MMXI_binop_rm<0xD5, "pmullw", mul, v4i16, 1>, + MMXI_binop_rm_int2<0xD5, "pmullw", int_x86_mmx_pmull_w, 1>; defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w, 1>; defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, 1>; @@ -233,9 +263,13 @@ defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w, 1>; defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw, 1>; // Logical Instructions -defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>; -defm MMX_POR : MMXI_binop_rm_v1i64<0xEB, "por" , or, 1>; -defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>; +defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>, + MMXI_binop_rm_int2<0xDB, "pand", int_x86_mmx_pand, 1>; +defm MMX_POR : MMXI_binop_rm_v1i64<0xEB, "por" , or, 1>, + MMXI_binop_rm_int2<0xEB, "por" , int_x86_mmx_por, 1>; +defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>, + MMXI_binop_rm_int2<0xEF, "pxor", int_x86_mmx_pxor, 1>; +defm MMX_PANDN : MMXI_binop_rm_int2<0xDF, "pandn", int_x86_mmx_pandn, 1>; let Constraints = "$src1 = $dst" in { def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg, @@ -363,6 +397,18 @@ let Constraints = "$src1 = $dst" in { (v2i32 (mmx_unpckl VR64:$src1, (bc_v2i32 (load_mmx addr:$src2)))))]>; } +defm MMX_PUNPCKHBW : MMXI_binop_rm_int2<0x68, "punpckhbw", + int_x86_mmx_punpckhbw>; +defm MMX_PUNPCKHWD : MMXI_binop_rm_int2<0x69, "punpckhwd", + int_x86_mmx_punpckhwd>; +defm MMX_PUNPCKHDQ : MMXI_binop_rm_int2<0x6A, "punpckhdq", + int_x86_mmx_punpckhdq>; +defm MMX_PUNPCKLBW : MMXI_binop_rm_int2<0x60, "punpcklbw", + int_x86_mmx_punpcklbw>; +defm MMX_PUNPCKLWD : MMXI_binop_rm_int2<0x61, "punpcklwd", + int_x86_mmx_punpcklwd>; +defm MMX_PUNPCKLDQ : MMXI_binop_rm_int2<0x62, "punpckldq", + int_x86_mmx_punpckldq>; // -- Pack Instructions defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index d203c2733ce..878e26f9a33 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3718,13 +3718,22 @@ def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), // SSSE3 - Packed Align Instruction Patterns //===---------------------------------------------------------------------===// -multiclass ssse3_palign_mm { +multiclass ssse3_palign_mm { def R64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; def R64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; + def R64irr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), + (ins VR64:$src1, VR64:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>; + def R64irm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), + (ins VR64:$src1, i64mem:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR64:$dst, (IntId VR64:$src1, + (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>; } multiclass ssse3_palign { @@ -3748,7 +3757,7 @@ let isAsmParserOnly = 1, Predicates = [HasAVX] in defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V; let Constraints = "$src1 = $dst" in defm PALIGN : ssse3_palign<"palignr">, - ssse3_palign_mm<"palignr">; + ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b>; let AddedComplexity = 5 in { diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp index 4dba85b1668..a0d9aa2da26 100644 --- a/utils/TableGen/X86RecognizableInstr.cpp +++ b/utils/TableGen/X86RecognizableInstr.cpp @@ -311,7 +311,7 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const { return FILTER_STRONG; // Special cases. - + if (Name.find("PCMPISTRI") != Name.npos && Name != "PCMPISTRI") return FILTER_WEAK; if (Name.find("PCMPESTRI") != Name.npos && Name != "PCMPESTRI") @@ -368,6 +368,12 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const { (Name.find("to") != Name.npos))) return FILTER_WEAK; + // Filter out the intrinsic form of instructions that also have an llvm + // operator form. FIXME this is temporary. + if (Name.find("irm") != Name.npos || + Name.find("irr") != Name.npos) + return FILTER_WEAK; + return FILTER_NORMAL; }