From 4db2dbf921d76cb0ea018a4813ad4c4032cdf15a Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 26 Jul 2012 22:59:06 +0000 Subject: [PATCH] Eliminate sub_ss, sub_sd from broadcast patterns. The (COPY_TO_REGCLASS GR32:$src, VR128) pattern looks odd, but copyPhysReg does the right thing with it. (The old pattern would eventually produce the same cross-class copy). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160830 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 58 ++++++++++------------------------- 1 file changed, 16 insertions(+), 42 deletions(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index c4d5ad07660..fb3953b2c8b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7704,24 +7704,18 @@ let Predicates = [HasAVX2] in { // is used by additional users, which prevents the pattern selection. let AddedComplexity = 20 in { def : Pat<(v4f32 (X86VBroadcast FR32:$src)), - (VBROADCASTSSrr - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>; + (VBROADCASTSSrr (COPY_TO_REGCLASS FR32:$src, VR128))>; def : Pat<(v8f32 (X86VBroadcast FR32:$src)), - (VBROADCASTSSYrr - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>; + (VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>; def : Pat<(v4f64 (X86VBroadcast FR64:$src)), - (VBROADCASTSDYrr - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd))>; + (VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>; def : Pat<(v4i32 (X86VBroadcast GR32:$src)), - (VBROADCASTSSrr - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>; + (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>; def : Pat<(v8i32 (X86VBroadcast GR32:$src)), - (VBROADCASTSSYrr - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>; + (VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>; def : Pat<(v4i64 (X86VBroadcast GR64:$src)), - (VBROADCASTSDYrr - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd))>; + (VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>; } } @@ -7745,46 +7739,26 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), let AddedComplexity = 20 in { // 128bit broadcasts: def : Pat<(v4f32 (X86VBroadcast FR32:$src)), - (VPSHUFDri - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), 0)>; + (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>; def : Pat<(v8f32 (X86VBroadcast FR32:$src)), (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), - (VPSHUFDri - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), 0), - sub_xmm), - (VPSHUFDri - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), - 0), 1)>; + (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm), + (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>; def : Pat<(v4f64 (X86VBroadcast FR64:$src)), (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), - (VPSHUFDri - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd), - 0x44), - sub_xmm), - (VPSHUFDri - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd), - 0x44), 1)>; + (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm), + (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>; def : Pat<(v4i32 (X86VBroadcast GR32:$src)), - (VPSHUFDri - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss), 0)>; + (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>; def : Pat<(v8i32 (X86VBroadcast GR32:$src)), (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), - (VPSHUFDri - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss), 0), - sub_xmm), - (VPSHUFDri - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss), - 0), 1)>; + (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), sub_xmm), + (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), 1)>; def : Pat<(v4i64 (X86VBroadcast GR64:$src)), (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), - (VPSHUFDri - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd), - 0x44), - sub_xmm), - (VPSHUFDri - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd), - 0x44), 1)>; + (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), sub_xmm), + (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>; } }