From 6bd9567a6a1ba62118cdd258ddc52ea8f82ff511 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 16 Jun 2008 20:29:38 +0000 Subject: [PATCH] - Add "Commutative" property to intrinsics. This allows tblgen to generate the commuted variants for dagisel matching code. - Mark lots of X86 intrinsics as "Commutative" to allow load folding. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52353 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Intrinsics.td | 3 + include/llvm/IntrinsicsX86.td | 216 +++++++++++++++----------- test/CodeGen/X86/commute-intrinsic.ll | 15 ++ utils/TableGen/CodeGenDAGPatterns.cpp | 36 ++++- utils/TableGen/CodeGenDAGPatterns.h | 4 + utils/TableGen/CodeGenIntrinsics.h | 4 + utils/TableGen/CodeGenTarget.cpp | 3 + 7 files changed, 189 insertions(+), 92 deletions(-) create mode 100644 test/CodeGen/X86/commute-intrinsic.ll diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index 18c42e41f8a..866107cbfb3 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -48,6 +48,9 @@ def IntrWriteArgMem : IntrinsicProperty; // default if the intrinsic has no other Intr*Mem property. def IntrWriteMem : IntrinsicProperty; +// Commutative - This intrinsic is commutative: X op Y == Y op X. +def Commutative : IntrinsicProperty; + //===----------------------------------------------------------------------===// // Types used by intrinsics. //===----------------------------------------------------------------------===// diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 372f7211df6..dbb8496cb41 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -19,13 +19,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse_add_ss : GCCBuiltin<"__builtin_ia32_addss">, Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, - llvm_v4f32_ty], [IntrNoMem]>; + llvm_v4f32_ty], [IntrNoMem, Commutative]>; def int_x86_sse_sub_ss : GCCBuiltin<"__builtin_ia32_subss">, Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_mul_ss : GCCBuiltin<"__builtin_ia32_mulss">, Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, - llvm_v4f32_ty], [IntrNoMem]>; + llvm_v4f32_ty], [IntrNoMem, Commutative]>; def int_x86_sse_div_ss : GCCBuiltin<"__builtin_ia32_divss">, Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; @@ -176,13 +176,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_add_sd : GCCBuiltin<"__builtin_ia32_addsd">, Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, - llvm_v2f64_ty], [IntrNoMem]>; + llvm_v2f64_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_sub_sd : GCCBuiltin<"__builtin_ia32_subsd">, Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_mul_sd : GCCBuiltin<"__builtin_ia32_mulsd">, Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, - llvm_v2f64_ty], [IntrNoMem]>; + llvm_v2f64_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_div_sd : GCCBuiltin<"__builtin_ia32_divsd">, Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; @@ -256,16 +256,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb128">, Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, - llvm_v16i8_ty], [IntrNoMem]>; + llvm_v16i8_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw128">, Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, - llvm_v8i16_ty], [IntrNoMem]>; + llvm_v8i16_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb128">, Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, - llvm_v16i8_ty], [IntrNoMem]>; + llvm_v16i8_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw128">, Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, - llvm_v8i16_ty], [IntrNoMem]>; + llvm_v8i16_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb128">, Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -280,37 +280,37 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_v8i16_ty], [IntrNoMem]>; def int_x86_sse2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw128">, Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, - llvm_v8i16_ty], [IntrNoMem]>; + llvm_v8i16_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw128">, Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, - llvm_v8i16_ty], [IntrNoMem]>; + llvm_v8i16_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq128">, Intrinsic<[llvm_v2i64_ty, llvm_v4i32_ty, - llvm_v4i32_ty], [IntrNoMem]>; + llvm_v4i32_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd128">, Intrinsic<[llvm_v4i32_ty, llvm_v8i16_ty, - llvm_v8i16_ty], [IntrNoMem]>; + llvm_v8i16_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb128">, Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, - llvm_v16i8_ty], [IntrNoMem]>; + llvm_v16i8_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw128">, Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, - llvm_v8i16_ty], [IntrNoMem]>; + llvm_v8i16_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub128">, Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, - llvm_v16i8_ty], [IntrNoMem]>; + llvm_v16i8_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw128">, Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, - llvm_v8i16_ty], [IntrNoMem]>; + llvm_v8i16_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_pminu_b : GCCBuiltin<"__builtin_ia32_pminub128">, Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, - llvm_v16i8_ty], [IntrNoMem]>; + llvm_v16i8_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw128">, Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, - llvm_v8i16_ty], [IntrNoMem]>; + llvm_v8i16_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw128">, Intrinsic<[llvm_v2i64_ty, llvm_v16i8_ty, - llvm_v16i8_ty], [IntrNoMem]>; + llvm_v16i8_ty], [IntrNoMem, Commutative]>; } // Integer shift ops. @@ -553,24 +553,24 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_ssse3_phadd_w : GCCBuiltin<"__builtin_ia32_phaddw">, Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, - llvm_v4i16_ty], [IntrNoMem]>; + llvm_v4i16_ty], [IntrNoMem, Commutative]>; def int_x86_ssse3_phadd_w_128 : GCCBuiltin<"__builtin_ia32_phaddw128">, Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, - llvm_v8i16_ty], [IntrNoMem]>; + llvm_v8i16_ty], [IntrNoMem, Commutative]>; def int_x86_ssse3_phadd_d : GCCBuiltin<"__builtin_ia32_phaddd">, Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty, - llvm_v2i32_ty], [IntrNoMem]>; + llvm_v2i32_ty], [IntrNoMem, Commutative]>; def int_x86_ssse3_phadd_d_128 : GCCBuiltin<"__builtin_ia32_phaddd128">, Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, - llvm_v4i32_ty], [IntrNoMem]>; + llvm_v4i32_ty], [IntrNoMem, Commutative]>; def int_x86_ssse3_phadd_sw : GCCBuiltin<"__builtin_ia32_phaddsw">, Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, - llvm_v4i16_ty], [IntrNoMem]>; + llvm_v4i16_ty], [IntrNoMem, Commutative]>; def int_x86_ssse3_phadd_sw_128 : GCCBuiltin<"__builtin_ia32_phaddsw128">, Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, - llvm_v4i32_ty], [IntrNoMem]>; + llvm_v4i32_ty], [IntrNoMem, Commutative]>; def int_x86_ssse3_phsub_w : GCCBuiltin<"__builtin_ia32_phsubw">, Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, @@ -595,17 +595,17 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_ssse3_pmadd_ub_sw : GCCBuiltin<"__builtin_ia32_pmaddubsw">, Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, - llvm_v4i16_ty], [IntrNoMem]>; + llvm_v4i16_ty], [IntrNoMem, Commutative]>; def int_x86_ssse3_pmadd_ub_sw_128 : GCCBuiltin<"__builtin_ia32_pmaddubsw128">, Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, - llvm_v8i16_ty], [IntrNoMem]>; + llvm_v8i16_ty], [IntrNoMem, Commutative]>; def int_x86_ssse3_pmul_hr_sw : GCCBuiltin<"__builtin_ia32_pmulhrsw">, Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, - llvm_v4i16_ty], [IntrNoMem]>; + llvm_v4i16_ty], [IntrNoMem, Commutative]>; def int_x86_ssse3_pmul_hr_sw_128 : GCCBuiltin<"__builtin_ia32_pmulhrsw128">, Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, - llvm_v8i16_ty], [IntrNoMem]>; + llvm_v8i16_ty], [IntrNoMem, Commutative]>; } // Shuffle ops @@ -692,128 +692,170 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Vector sign and zero extend let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_pmovsxbd : GCCBuiltin<"__builtin_ia32_pmovsxbd128">, - Intrinsic<[llvm_v4i32_ty, llvm_v16i8_ty]>; + Intrinsic<[llvm_v4i32_ty, llvm_v16i8_ty], + [IntrNoMem]>; def int_x86_sse41_pmovsxbq : GCCBuiltin<"__builtin_ia32_pmovsxbq128">, - Intrinsic<[llvm_v2i64_ty, llvm_v16i8_ty]>; + Intrinsic<[llvm_v2i64_ty, llvm_v16i8_ty], + [IntrNoMem]>; def int_x86_sse41_pmovsxbw : GCCBuiltin<"__builtin_ia32_pmovsxbw128">, - Intrinsic<[llvm_v8i16_ty, llvm_v16i8_ty]>; + Intrinsic<[llvm_v8i16_ty, llvm_v16i8_ty], + [IntrNoMem]>; def int_x86_sse41_pmovsxdq : GCCBuiltin<"__builtin_ia32_pmovsxdq128">, - Intrinsic<[llvm_v2i64_ty, llvm_v4i32_ty]>; + Intrinsic<[llvm_v2i64_ty, llvm_v4i32_ty], + [IntrNoMem]>; def int_x86_sse41_pmovsxwd : GCCBuiltin<"__builtin_ia32_pmovsxwd128">, - Intrinsic<[llvm_v4i32_ty, llvm_v8i16_ty]>; + Intrinsic<[llvm_v4i32_ty, llvm_v8i16_ty], + [IntrNoMem]>; def int_x86_sse41_pmovsxwq : GCCBuiltin<"__builtin_ia32_pmovsxwq128">, - Intrinsic<[llvm_v2i64_ty, llvm_v8i16_ty]>; + Intrinsic<[llvm_v2i64_ty, llvm_v8i16_ty], + [IntrNoMem]>; def int_x86_sse41_pmovzxbd : GCCBuiltin<"__builtin_ia32_pmovzxbd128">, - Intrinsic<[llvm_v4i32_ty, llvm_v16i8_ty]>; + Intrinsic<[llvm_v4i32_ty, llvm_v16i8_ty], + [IntrNoMem]>; def int_x86_sse41_pmovzxbq : GCCBuiltin<"__builtin_ia32_pmovzxbq128">, - Intrinsic<[llvm_v2i64_ty, llvm_v16i8_ty]>; + Intrinsic<[llvm_v2i64_ty, llvm_v16i8_ty], + [IntrNoMem]>; def int_x86_sse41_pmovzxbw : GCCBuiltin<"__builtin_ia32_pmovzxbw128">, - Intrinsic<[llvm_v8i16_ty, llvm_v16i8_ty]>; + Intrinsic<[llvm_v8i16_ty, llvm_v16i8_ty], + [IntrNoMem]>; def int_x86_sse41_pmovzxdq : GCCBuiltin<"__builtin_ia32_pmovzxdq128">, - Intrinsic<[llvm_v2i64_ty, llvm_v4i32_ty]>; + Intrinsic<[llvm_v2i64_ty, llvm_v4i32_ty], + [IntrNoMem]>; def int_x86_sse41_pmovzxwd : GCCBuiltin<"__builtin_ia32_pmovzxwd128">, - Intrinsic<[llvm_v4i32_ty, llvm_v8i16_ty]>; + Intrinsic<[llvm_v4i32_ty, llvm_v8i16_ty], + [IntrNoMem]>; def int_x86_sse41_pmovzxwq : GCCBuiltin<"__builtin_ia32_pmovzxwq128">, - Intrinsic<[llvm_v2i64_ty, llvm_v8i16_ty]>; + Intrinsic<[llvm_v2i64_ty, llvm_v8i16_ty], + [IntrNoMem]>; } // Vector min element let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_phminposuw : GCCBuiltin<"__builtin_ia32_phminposuw128">, - Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty]>; + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; } // Vector compare, min, max let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_pcmpeqq : GCCBuiltin<"__builtin_ia32_pcmpeqq">, - Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty]>; + Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem, Commutative]>; def int_x86_sse41_pmaxsb : GCCBuiltin<"__builtin_ia32_pmaxsb128">, - Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty]>; + Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem, Commutative]>; def int_x86_sse41_pmaxsd : GCCBuiltin<"__builtin_ia32_pmaxsd128">, - Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty]>; + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem, Commutative]>; def int_x86_sse41_pmaxud : GCCBuiltin<"__builtin_ia32_pmaxud128">, - Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty]>; + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem, Commutative]>; def int_x86_sse41_pmaxuw : GCCBuiltin<"__builtin_ia32_pmaxuw128">, - Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty]>; + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem, Commutative]>; def int_x86_sse41_pminsb : GCCBuiltin<"__builtin_ia32_pminsb128">, - Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty]>; + Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem, Commutative]>; def int_x86_sse41_pminsd : GCCBuiltin<"__builtin_ia32_pminsd128">, - Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty]>; + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem, Commutative]>; def int_x86_sse41_pminud : GCCBuiltin<"__builtin_ia32_pminud128">, - Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty]>; + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem, Commutative]>; def int_x86_sse41_pminuw : GCCBuiltin<"__builtin_ia32_pminuw128">, - Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty]>; + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem, Commutative]>; } // Vector pack let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_packusdw : GCCBuiltin<"__builtin_ia32_packusdw128">, - Intrinsic<[llvm_v8i16_ty, llvm_v4i32_ty, llvm_v4i32_ty]>; + Intrinsic<[llvm_v8i16_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; } // Vector multiply let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_pmuldq : GCCBuiltin<"__builtin_ia32_pmuldq128">, - Intrinsic<[llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty]>; + Intrinsic<[llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem, Commutative]>; def int_x86_sse41_pmulld : GCCBuiltin<"__builtin_ia32_pmulld128">, - Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty]>; + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem, Commutative]>; } // Vector extract let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_pextrb : - Intrinsic<[llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty]>; + Intrinsic<[llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem]>; def int_x86_sse41_pextrd : - Intrinsic<[llvm_i32_ty, llvm_v4i32_ty, llvm_i32_ty]>; + Intrinsic<[llvm_i32_ty, llvm_v4i32_ty, llvm_i32_ty], + [IntrNoMem]>; def int_x86_sse41_pextrq : - Intrinsic<[llvm_i64_ty, llvm_v2i64_ty, llvm_i32_ty]>; + Intrinsic<[llvm_i64_ty, llvm_v2i64_ty, llvm_i32_ty], + [IntrNoMem]>; def int_x86_sse41_extractps : GCCBuiltin<"__builtin_ia32_extractps128">, - Intrinsic<[llvm_i32_ty, llvm_v4f32_ty, llvm_i32_ty]>; + Intrinsic<[llvm_i32_ty, llvm_v4f32_ty, llvm_i32_ty], + [IntrNoMem]>; } // Vector insert let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_pinsrb : GCCBuiltin<"__builtin_ia32_vec_set_v16qi">, - Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty]>; + Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; def int_x86_sse41_pinsrd : GCCBuiltin<"__builtin_ia32_vec_set_v4si">, - Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty]>; + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; def int_x86_sse41_pinsrq : GCCBuiltin<"__builtin_ia32_vec_set_v2di">, - Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty]>; + Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty], + [IntrNoMem]>; def int_x86_sse41_insertps : GCCBuiltin<"__builtin_ia32_insertps128">, - Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty]>; + Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty], + [IntrNoMem]>; } // Vector blend let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_pblendvb : GCCBuiltin<"__builtin_ia32_pblendvb128">, - Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty]>; + Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; def int_x86_sse41_pblendw : GCCBuiltin<"__builtin_ia32_pblendw128">, - Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty]>; + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], + [IntrNoMem]>; def int_x86_sse41_blendpd : GCCBuiltin<"__builtin_ia32_blendpd">, - Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty]>; + Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty], + [IntrNoMem]>; def int_x86_sse41_blendps : GCCBuiltin<"__builtin_ia32_blendps">, - Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty]>; + Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty], + [IntrNoMem]>; def int_x86_sse41_blendvpd : GCCBuiltin<"__builtin_ia32_blendvpd">, - Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty]>; + Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; def int_x86_sse41_blendvps : GCCBuiltin<"__builtin_ia32_blendvps">, - Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty]>; + Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; } // Vector dot product let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_dppd : GCCBuiltin<"__builtin_ia32_dppd">, - Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty]>; + Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty], + [IntrNoMem, Commutative]>; def int_x86_sse41_dpps : GCCBuiltin<"__builtin_ia32_dpps">, - Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty]>; + Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty], + [IntrNoMem, Commutative]>; } // Vector sum of absolute differences let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw128">, - Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty]>; + Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, Commutative]>; } -// Vector sum of absolute differences +// Cacheability support ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa">, Intrinsic<[llvm_v2i64_ty, llvm_ptr_ty], [IntrReadMem]>; @@ -836,17 +878,17 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Addition def int_x86_mmx_padds_b : GCCBuiltin<"__builtin_ia32_paddsb">, Intrinsic<[llvm_v8i8_ty, llvm_v8i8_ty, - llvm_v8i8_ty], [IntrNoMem]>; + llvm_v8i8_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_padds_w : GCCBuiltin<"__builtin_ia32_paddsw">, Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, - llvm_v4i16_ty], [IntrNoMem]>; + llvm_v4i16_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb">, Intrinsic<[llvm_v8i8_ty, llvm_v8i8_ty, - llvm_v8i8_ty], [IntrNoMem]>; + llvm_v8i8_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw">, Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, - llvm_v4i16_ty], [IntrNoMem]>; + llvm_v4i16_ty], [IntrNoMem, Commutative]>; // Subtraction def int_x86_mmx_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb">, @@ -866,45 +908,45 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Multiplication def int_x86_mmx_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw">, Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, - llvm_v4i16_ty], [IntrNoMem]>; + llvm_v4i16_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw">, Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, - llvm_v4i16_ty], [IntrNoMem]>; + llvm_v4i16_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq">, Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty, - llvm_v2i32_ty], [IntrNoMem]>; + llvm_v2i32_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd">, Intrinsic<[llvm_v2i32_ty, llvm_v4i16_ty, - llvm_v4i16_ty], [IntrNoMem]>; + llvm_v4i16_ty], [IntrNoMem, Commutative]>; // Averages def int_x86_mmx_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb">, Intrinsic<[llvm_v8i8_ty, llvm_v8i8_ty, - llvm_v8i8_ty], [IntrNoMem]>; + llvm_v8i8_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw">, Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, - llvm_v4i16_ty], [IntrNoMem]>; + llvm_v4i16_ty], [IntrNoMem, Commutative]>; // Maximum def int_x86_mmx_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub">, Intrinsic<[llvm_v8i8_ty, llvm_v8i8_ty, - llvm_v8i8_ty], [IntrNoMem]>; + llvm_v8i8_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw">, Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, - llvm_v4i16_ty], [IntrNoMem]>; + llvm_v4i16_ty], [IntrNoMem, Commutative]>; // Minimum def int_x86_mmx_pminu_b : GCCBuiltin<"__builtin_ia32_pminub">, Intrinsic<[llvm_v8i8_ty, llvm_v8i8_ty, - llvm_v8i8_ty], [IntrNoMem]>; + llvm_v8i8_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw">, Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, - llvm_v4i16_ty], [IntrNoMem]>; + llvm_v4i16_ty], [IntrNoMem, Commutative]>; // Packed sum of absolute differences def int_x86_mmx_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw">, Intrinsic<[llvm_v4i16_ty, llvm_v8i8_ty, - llvm_v8i8_ty], [IntrNoMem]>; + llvm_v8i8_ty], [IntrNoMem, Commutative]>; } // Integer shift ops. diff --git a/test/CodeGen/X86/commute-intrinsic.ll b/test/CodeGen/X86/commute-intrinsic.ll new file mode 100644 index 00000000000..12c0e03f6f4 --- /dev/null +++ b/test/CodeGen/X86/commute-intrinsic.ll @@ -0,0 +1,15 @@ +; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+sse2 -relocation-model=static | not grep movaps + +@a = external global <2 x i64> ; <<2 x i64>*> [#uses=1] + +define <2 x i64> @madd(<2 x i64> %b) nounwind { +entry: + %tmp2 = load <2 x i64>* @a, align 16 ; <<2 x i64>> [#uses=1] + %tmp6 = bitcast <2 x i64> %b to <8 x i16> ; <<8 x i16>> [#uses=1] + %tmp9 = bitcast <2 x i64> %tmp2 to <8 x i16> ; <<8 x i16>> [#uses=1] + %tmp11 = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd( <8 x i16> %tmp9, <8 x i16> %tmp6 ) nounwind readnone ; <<4 x i32>> [#uses=1] + %tmp14 = bitcast <4 x i32> %tmp11 to <2 x i64> ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp14 +} + +declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp index 882c7245f61..44dbe6c95d1 100644 --- a/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/utils/TableGen/CodeGenDAGPatterns.cpp @@ -742,6 +742,15 @@ getIntrinsicInfo(const CodeGenDAGPatterns &CDP) const { return &CDP.getIntrinsicInfo(IID); } +/// isCommutativeIntrinsic - Return true if the node corresponds to a +/// commutative intrinsic. +bool +TreePatternNode::isCommutativeIntrinsic(const CodeGenDAGPatterns &CDP) const { + if (const CodeGenIntrinsic *Int = getIntrinsicInfo(CDP)) + return Int->isCommutative; + return false; +} + /// ApplyTypeConstraints - Apply all of the type constraints relevent to /// this node and its children in the tree. This returns true if it makes a @@ -999,11 +1008,13 @@ bool TreePatternNode::canPatternMatch(std::string &Reason, // If this node is a commutative operator, check that the LHS isn't an // immediate. const SDNodeInfo &NodeInfo = CDP.getSDNodeInfo(getOperator()); - if (NodeInfo.hasProperty(SDNPCommutative)) { + bool isCommIntrinsic = isCommutativeIntrinsic(CDP); + if (NodeInfo.hasProperty(SDNPCommutative) || isCommIntrinsic) { // Scan all of the operands of the node and make sure that only the last one // is a constant node, unless the RHS also is. if (!OnlyOnRHSOfCommutative(getChild(getNumChildren()-1))) { - for (unsigned i = 0, e = getNumChildren()-1; i != e; ++i) + bool Skip = isCommIntrinsic ? 1 : 0; // First operand is intrinsic id. + for (unsigned i = Skip, e = getNumChildren()-1; i != e; ++i) if (OnlyOnRHSOfCommutative(getChild(i))) { Reason="Immediate value must be on the RHS of commutative operators!"; return false; @@ -2250,8 +2261,10 @@ static void GenerateVariantsOf(TreePatternNode *N, CombineChildVariants(N, ChildVariants, OutVariants, CDP, DepVars); // If this node is commutative, consider the commuted order. - if (NodeInfo.hasProperty(SDNPCommutative)) { - assert(N->getNumChildren()==2 &&"Commutative but doesn't have 2 children!"); + bool isCommIntrinsic = N->isCommutativeIntrinsic(CDP); + if (NodeInfo.hasProperty(SDNPCommutative) || isCommIntrinsic) { + assert((N->getNumChildren()==2 || isCommIntrinsic) && + "Commutative but doesn't have 2 children!"); // Don't count children which are actually register references. unsigned NC = 0; for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i) { @@ -2265,7 +2278,20 @@ static void GenerateVariantsOf(TreePatternNode *N, NC++; } // Consider the commuted order. - if (NC == 2) + if (isCommIntrinsic) { + // Commutative intrinsic. First operand is the intrinsic id, 2nd and 3rd + // operands are the commutative operands, and there might be more operands + // after those. + assert(NC >= 3 && + "Commutative intrinsic should have at least 3 childrean!"); + std::vector > Variants; + Variants.push_back(ChildVariants[0]); // Intrinsic id. + Variants.push_back(ChildVariants[2]); + Variants.push_back(ChildVariants[1]); + for (unsigned i = 3; i != NC; ++i) + Variants.push_back(ChildVariants[i]); + CombineChildVariants(N, Variants, OutVariants, CDP, DepVars); + } else if (NC == 2) CombineChildVariants(N, ChildVariants[1], ChildVariants[0], OutVariants, CDP, DepVars); } diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h index 40cfa88cd2f..50c39bcf169 100644 --- a/utils/TableGen/CodeGenDAGPatterns.h +++ b/utils/TableGen/CodeGenDAGPatterns.h @@ -221,6 +221,10 @@ public: /// getIntrinsicInfo - If this node corresponds to an intrinsic, return the /// CodeGenIntrinsic information for it, otherwise return a null pointer. const CodeGenIntrinsic *getIntrinsicInfo(const CodeGenDAGPatterns &CDP) const; + + /// isCommutativeIntrinsic - Return true if the node is an intrinsic which is + /// marked isCommutative. + bool isCommutativeIntrinsic(const CodeGenDAGPatterns &CDP) const; void print(std::ostream &OS) const; void dump() const; diff --git a/utils/TableGen/CodeGenIntrinsics.h b/utils/TableGen/CodeGenIntrinsics.h index a66c30b6cb8..4de93864a8a 100644 --- a/utils/TableGen/CodeGenIntrinsics.h +++ b/utils/TableGen/CodeGenIntrinsics.h @@ -49,6 +49,10 @@ namespace llvm { // types. bool isOverloaded; + // isCommutative - True if the intrinsic is commutative. + // + bool isCommutative; + CodeGenIntrinsic(Record *R); }; diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp index a76f5cd55f8..9b3864780da 100644 --- a/utils/TableGen/CodeGenTarget.cpp +++ b/utils/TableGen/CodeGenTarget.cpp @@ -404,6 +404,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) { std::string DefName = R->getName(); ModRef = WriteMem; isOverloaded = false; + isCommutative = false; if (DefName.size() <= 4 || std::string(DefName.begin(), DefName.begin()+4) != "int_") @@ -469,6 +470,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) { ModRef = WriteArgMem; else if (Property->getName() == "IntrWriteMem") ModRef = WriteMem; + else if (Property->getName() == "Commutative") + isCommutative = true; else assert(0 && "Unknown property!"); }