From cdd1eeca2c2da2e1c9b48e04f2f779ffe5cf3666 Mon Sep 17 00:00:00 2001 From: Nate Begeman Date: Tue, 12 Feb 2008 22:51:28 +0000 Subject: [PATCH] SSE4.1 64b integer insert/extract pattern support Move formats into the formats file git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@47035 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 13 +++++---- lib/Target/X86/X86Instr64bit.td | 43 ++++++++++++++++++++++++++---- lib/Target/X86/X86InstrFormats.td | 29 ++++++++++++++++++++ lib/Target/X86/X86InstrSSE.td | 29 -------------------- 4 files changed, 73 insertions(+), 41 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ab970e556f8..3e85538d9e0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -636,7 +636,6 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); - // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); // Custom lower build_vector, vector_shuffle, and extract_vector_elt. @@ -652,9 +651,12 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); - if (Subtarget->is64Bit()) + if (Subtarget->is64Bit()) { + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); + } // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { @@ -698,11 +700,8 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); if (Subtarget->is64Bit()) { - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); - - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); } } diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 9895853b283..5c7f093f6ab 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1271,8 +1271,41 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), // X86-64 SSE4.1 Instructions //===----------------------------------------------------------------------===// -// PEXTRB, unary, TA, 0x14, REX.W -// PEXTRW, unary, TA, 0x15, REX.W -// PEXTRQ, unary, TA, 0x16, REX.W -// EXTRACTPS, unary, TA, 0x17, REX.W -// PINSRQ, 2addr, binary, TA, 0x22, REX.W +/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination +multiclass SS41I_extract64 opc, string OpcodeStr> { + def rr : SS4AI, OpSize, REX_W; + def mr : SS4AI, OpSize, REX_W; +} + +defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; + +let isTwoAddress = 1 in { + multiclass SS41I_insert64 opc, string OpcodeStr> { + def rr : SS4AI, + OpSize, REX_W; + def rm : SS4AI, OpSize, REX_W; + } +} + +defm PINSRQ : SS41I_insert64<0x22, "pinsrq">; diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 3767e277476..86a327d64fc 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -185,6 +185,35 @@ class S3I o, Format F, dag outs, dag ins, string asm, list pattern> : I, TB, OpSize, Requires<[HasSSE3]>; +// SSSE3 Instruction Templates: +// +// SS38I - SSSE3 instructions with T8 prefix. +// SS3AI - SSSE3 instructions with TA prefix. +// +// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version +// uses the MMX registers. We put those instructions here because they better +// fit into the SSSE3 instruction category rather than the MMX category. + +class SS38I o, Format F, dag outs, dag ins, string asm, + list pattern> + : I, T8, Requires<[HasSSSE3]>; +class SS3AI o, Format F, dag outs, dag ins, string asm, + list pattern> + : I, TA, Requires<[HasSSSE3]>; + +// SSE4.1 Instruction Templates: +// +// SS48I - SSE 4.1 instructions with T8 prefix. +// SS41AI - SSE 4.1 instructions with TA prefix. +// +class SS48I o, Format F, dag outs, dag ins, string asm, + list pattern> + : I, T8, Requires<[HasSSE41]>; +class SS4AI o, Format F, dag outs, dag ins, string asm, + list pattern> + : I, TA, Requires<[HasSSE41]>; + + // X86-64 Instruction templates... // diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index e50716b2c20..183ee2c69f9 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2434,22 +2434,6 @@ let AddedComplexity = 20 in // SSSE3 Instructions //===----------------------------------------------------------------------===// -// SSSE3 Instruction Templates: -// -// SS38I - SSSE3 instructions with T8 prefix. -// SS3AI - SSSE3 instructions with TA prefix. -// -// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version -// uses the MMX registers. We put those instructions here because they better -// fit into the SSSE3 instruction category rather than the MMX category. - -class SS38I o, Format F, dag outs, dag ins, string asm, - list pattern> - : I, T8, Requires<[HasSSSE3]>; -class SS3AI o, Format F, dag outs, dag ins, string asm, - list pattern> - : I, TA, Requires<[HasSSSE3]>; - /// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8. multiclass SS3I_unop_rm_int_8 opc, string OpcodeStr, Intrinsic IntId64, Intrinsic IntId128> { @@ -3038,19 +3022,6 @@ def : Pat<(store (v16i8 VR128:$src), addr:$dst), // SSE4.1 Instructions //===----------------------------------------------------------------------===// -// SSE4.1 Instruction Templates: -// -// SS48I - SSE 4.1 instructions with T8 prefix. -// SS41AI - SSE 4.1 instructions with TA prefix. -// -class SS48I o, Format F, dag outs, dag ins, string asm, - list pattern> - : I, T8, Requires<[HasSSE41]>; -class SS4AI o, Format F, dag outs, dag ins, string asm, - list pattern> - : I, TA, Requires<[HasSSE41]>; - - multiclass sse41_fp_unop_rm opcss, bits<8> opcps, bits<8> opcsd, bits<8> opcpd, string OpcodeStr,