AVX: We lower VECTOR_SHUFFLE and BUILD_VECTOR nodes into vbroadcast instructions

using the pattern (vbroadcast (i32load src)). In some cases, after we generate
this pattern new users are added to the load node, which prevent the selection
of the blend pattern. This commit provides fallback patterns which perform
in-vector broadcast (using in-vector vbroadcast in AVX2 and pshufd on AVX1).



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@155437 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nadav Rotem 2012-04-24 11:07:03 +00:00
parent adb082cd6a
commit d1a79136e3
2 changed files with 83 additions and 5 deletions

View File

@ -7723,6 +7723,20 @@ let Predicates = [HasAVX2] in {
(VPBROADCASTQrm addr:$src)>;
def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
(VPBROADCASTQYrm addr:$src)>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
let AddedComplexity = 20 in {
def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
(VBROADCASTSSrr
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
(VBROADCASTSSYrr
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
(VBROADCASTSDrr
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd))>;
}
}
// AVX1 broadcast patterns
@ -7735,11 +7749,38 @@ def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
(VBROADCASTSSYrm addr:$src)>;
def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
(VBROADCASTSDrm addr:$src)>;
def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
(VBROADCASTSSrm addr:$src)>;
def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
(VBROADCASTSSrm addr:$src)>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
let AddedComplexity = 20 in {
// 128bit broadcasts:
def : Pat<(v2f64 (X86VBroadcast FR64:$src)),
(VPSHUFDri
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd), 0)>;
def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
(VPSHUFDri
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), 0)>;
def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
(VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
(VPSHUFDri
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), 0),
sub_xmm),
(VPSHUFDri
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss),
0), 1)>;
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
(VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
(VPSHUFDri
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd), 0),
sub_xmm),
(VPSHUFDri
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd),
0), 1)>;
}
}
//===----------------------------------------------------------------------===//

View File

@ -160,6 +160,15 @@ entry:
ret <8 x i32> %g
}
; CHECK: V113
; CHECK: vbroadcastss
; CHECK: ret
define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
entry:
%g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000>
ret <8 x float> %g
}
; CHECK: _e2
; CHECK: vbroadcastss
; CHECK: ret
@ -179,9 +188,37 @@ define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
%vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
%vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
%vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
%vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 3
%vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 3
%vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 3
%vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 3
%vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 4
%vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 5
%vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 6
%vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 7
ret <8 x i8> %vecinit7.i
}
define void @crash() nounwind alwaysinline {
WGLoopsEntry:
br i1 undef, label %ret, label %footer329VF
footer329VF:
%A.0.inVF = fmul float undef, 6.553600e+04
%B.0.in407VF = fmul <8 x float> undef, <float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04>
%A.0VF = fptosi float %A.0.inVF to i32
%B.0408VF = fptosi <8 x float> %B.0.in407VF to <8 x i32>
%0 = and <8 x i32> %B.0408VF, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%1 = and i32 %A.0VF, 65535
%temp1098VF = insertelement <8 x i32> undef, i32 %1, i32 0
%vector1099VF = shufflevector <8 x i32> %temp1098VF, <8 x i32> undef, <8 x i32> zeroinitializer
br i1 undef, label %preload1201VF, label %footer349VF
preload1201VF:
br label %footer349VF
footer349VF:
%2 = mul nsw <8 x i32> undef, %0
%3 = mul nsw <8 x i32> undef, %vector1099VF
br label %footer329VF
ret:
ret void
}