From c8eb880a7fb0958a3a048a82c8558beec11f1209 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 6 Nov 2011 23:04:08 +0000 Subject: [PATCH] More AVX2 instructions and their intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143895 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IntrinsicsX86.td | 21 ++++- .../X86/Disassembler/X86DisassemblerDecoder.c | 11 ++- .../X86DisassemblerDecoderCommon.h | 3 +- lib/Target/X86/X86InstrFormats.td | 2 +- lib/Target/X86/X86InstrSSE.td | 78 +++++++++++++++++-- test/CodeGen/X86/avx2-intrinsics-x86.ll | 40 ++++++++++ utils/TableGen/X86DisassemblerTables.cpp | 7 +- utils/TableGen/X86RecognizableInstr.cpp | 9 ++- 8 files changed, 152 insertions(+), 19 deletions(-) diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index b443a096cf5..753f77bbf5f 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -1112,7 +1112,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx_vperm2f128_ps_256 : - GCCBuiltin<"__builtin_ia32_vperm2f128_ps256">, + GCCBuiltin<"_builtin_ia32_vperm2f128_ps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx_vperm2f128_si_256 : @@ -1716,6 +1716,25 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; } +// Vector permutation +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_permd : GCCBuiltin<"__builtin_ia32_permvarsi256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; + def int_x86_avx2_permq : GCCBuiltin<"__builtin_ia32_permdi256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx2_permps : GCCBuiltin<"__builtin_ia32_permvarsf256">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; + def int_x86_avx2_permpd : GCCBuiltin<"__builtin_ia32_permdf256">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx2_vperm2i128 : GCCBuiltin<"__builtin_ia32_permti256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; +} + // Misc. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">, diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index f9b0fe5d51b..1a248073392 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -773,17 +773,20 @@ static int getID(struct InternalInstruction* insn) { if (insn->rexPrefix & 0x08) attrMask |= ATTR_REXW; - + if (getIDWithAttrMask(&instructionID, insn, attrMask)) return -1; - + /* The following clauses compensate for limitations of the tables. */ - - if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW)) { + + if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) && + !(attrMask & ATTR_OPSIZE)) { /* * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit * has precedence since there are no L-bit with W-bit entries in the tables. * So if the L-bit isn't significant we should use the W-bit instead. + * We only need to do this if the instruction doesn't specify OpSize since + * there is a VEX_L_W_OPSIZE table. */ const struct InstructionSpecifier *spec; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 8b7933545a5..a7ef0cc4325 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -111,7 +111,8 @@ enum attributeBits { ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \ ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\ ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XD prefix")\ - ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") + ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") \ + ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize") #define ENUM_ENTRY(n, r, d) n, diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index b7c172e03df..ecd6a93ef0e 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -460,7 +460,7 @@ class AVX28I o, Format F, dag outs, dag ins, string asm, list pattern> : I, T8, OpSize, Requires<[HasAVX2]>; -class AVX2Ii8 o, Format F, dag outs, dag ins, string asm, +class AVX2AIi8 o, Format F, dag outs, dag ins, string asm, list pattern> : Ii8, TA, OpSize, Requires<[HasAVX2]>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index acd9a804386..de7326a2801 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7310,14 +7310,17 @@ def : Pat<(v4i64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))), //===----------------------------------------------------------------------===// // VPERM2F128 - Permute Floating-Point Values in 128-bit chunks // +let neverHasSideEffects = 1 in { def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, VEX_4V; +let mayLoad = 1 in def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, i8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, VEX_4V; +} def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; @@ -7402,18 +7405,18 @@ defm VCVTPS2PHY : f16c_ps2ph; // AVX2 Instructions //===----------------------------------------------------------------------===// -/// AVX2I_binop_rmi_int - AVX2 binary operator with 8-bit immediate -multiclass AVX2I_binop_rmi_int opc, string OpcodeStr, +/// AVX2_binop_rmi_int - AVX2 binary operator with 8-bit immediate +multiclass AVX2_binop_rmi_int opc, string OpcodeStr, Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, X86MemOperand x86memop> { let isCommutable = 1 in - def rri : AVX2Ii8, VEX_4V; - def rmi : AVX2Ii8 opc, string OpcodeStr, } let isCommutable = 0 in { -defm VPBLENDD : AVX2I_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128, - VR128, memopv16i8, i128mem>; -defm VPBLENDDY : AVX2I_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256, - VR256, memopv32i8, i256mem>; +defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128, + VR128, memopv16i8, i128mem>; +defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256, + VR256, memopv32i8, i256mem>; } //===----------------------------------------------------------------------===// @@ -7465,3 +7468,62 @@ defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32, defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, int_x86_avx2_pbroadcastq_128, int_x86_avx2_pbroadcastq_256>; + +//===----------------------------------------------------------------------===// +// VPERM - Permute instructions +// + +multiclass avx2_perm opc, string OpcodeStr, PatFrag mem_frag, + Intrinsic Int> { + def Yrr : AVX28I, VEX_4V; + def Yrm : AVX28I, + VEX_4V; +} + +defm VPERMD : avx2_perm<0x36, "vpermd", memopv8i32, int_x86_avx2_permd>; +defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>; + +multiclass avx2_perm_imm opc, string OpcodeStr, PatFrag mem_frag, + Intrinsic Int> { + def Yrr : AVX2AIi8, VEX; + def Yrm : AVX2AIi8, + VEX; +} + +defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, int_x86_avx2_permq>, + VEX_W; +defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>, + VEX_W; + +//===----------------------------------------------------------------------===// +// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks +// +def VPERM2I128rr : AVXAIi8<0x46, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, i8imm:$src3), + "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + [(set VR256:$dst, + (int_x86_avx2_vperm2i128 VR256:$src1, VR256:$src2, imm:$src3))]>, + VEX_4V; +def VPERM2I128rm : AVXAIi8<0x46, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, f256mem:$src2, i8imm:$src3), + "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + [(set VR256:$dst, + (int_x86_avx2_vperm2i128 VR256:$src1, (memopv4i64 addr:$src2), + imm:$src3))]>, + VEX_4V; diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll index 24471e6f1a0..7d67b998d81 100644 --- a/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -846,3 +846,43 @@ define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) { ret <4 x i64> %res } declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly + + +define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) { + ; CHECK: vpermd + %res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly + + +define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vpermps + %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly + + +define <4 x i64> @test_x86_avx2_permq(<4 x i64> %a0) { + ; CHECK: vpermq + %res = call <4 x i64> @llvm.x86.avx2.permq(<4 x i64> %a0, i8 7) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.permq(<4 x i64>, i8) nounwind readonly + + +define <4 x double> @test_x86_avx2_permpd(<4 x double> %a0) { + ; CHECK: vpermpd + %res = call <4 x double> @llvm.x86.avx2.permpd(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx2.permpd(<4 x double>, i8) nounwind readonly + + +define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) { + ; CHECK: vperm2i128 + %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 1) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp index e8c9a489732..61e94521f59 100644 --- a/utils/TableGen/X86DisassemblerTables.cpp +++ b/utils/TableGen/X86DisassemblerTables.cpp @@ -95,7 +95,10 @@ static inline bool inheritsFrom(InstructionContext child, case IC_VEX_L: case IC_VEX_L_XS: case IC_VEX_L_XD: + return false; case IC_VEX_L_OPSIZE: + return inheritsFrom(child, IC_VEX_L_W_OPSIZE); + case IC_VEX_L_W_OPSIZE: return false; default: llvm_unreachable("Unknown instruction class"); @@ -494,7 +497,9 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const { for (index = 0; index < 256; ++index) { o.indent(i * 2); - if ((index & ATTR_VEXL) && (index & ATTR_OPSIZE)) + if ((index & ATTR_VEXL) && (index & ATTR_REXW) && (index & ATTR_OPSIZE)) + o << "IC_VEX_L_W_OPSIZE"; + else if ((index & ATTR_VEXL) && (index & ATTR_OPSIZE)) o << "IC_VEX_L_OPSIZE"; else if ((index & ATTR_VEXL) && (index & ATTR_XD)) o << "IC_VEX_L_XD"; diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp index d4c9629226f..3478809c7aa 100644 --- a/utils/TableGen/X86RecognizableInstr.cpp +++ b/utils/TableGen/X86RecognizableInstr.cpp @@ -285,9 +285,12 @@ InstructionContext RecognizableInstr::insnContext() const { InstructionContext insnContext; if (HasVEX_4VPrefix || HasVEX_4VOp3Prefix|| HasVEXPrefix) { - if (HasVEX_LPrefix && HasVEX_WPrefix) - llvm_unreachable("Don't support VEX.L and VEX.W together"); - else if (HasOpSizePrefix && HasVEX_LPrefix) + if (HasVEX_LPrefix && HasVEX_WPrefix) { + if (HasOpSizePrefix) + insnContext = IC_VEX_L_W_OPSIZE; + else + llvm_unreachable("Don't support VEX.L and VEX.W together"); + } else if (HasOpSizePrefix && HasVEX_LPrefix) insnContext = IC_VEX_L_OPSIZE; else if (HasOpSizePrefix && HasVEX_WPrefix) insnContext = IC_VEX_W_OPSIZE;