Files
llvm-6502/test/CodeGen/X86/vec_set-3.ll
Chandler Carruth ae98867126 [x86] Teach the new v4i32 shuffle lowering some more tricks to recognize
vzext patterns and insert-element patterns that for SSE4 have dedicated
instructions.

With this we can enable the experimental mode in a regression test that
happens to cover some of the past set of issues. You can see that the
new logic does significantly better here on the floating point cases.

A follow-up to this change and the previous ones will hoist the logic
into helpers so it can be shared across element type sizes as in this
particular case it generalizes cleanly.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217136 91177308-0d34-0410-b5e6-96231b3b80d8
2014-09-04 09:26:30 +00:00

55 lines
1.8 KiB
LLVM

; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn | FileCheck %s
; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-EXP
define <4 x float> @test(float %a) {
; CHECK-LABEL: test:
; CHECK: movss {{.*}}, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
; CHECK-NEXT: retl
;
; CHECK-EXP-LABEL: test:
; CHECK-EXP: insertps $285, {{.*}}, %xmm0
; CHECK-EXP-NEXT: retl
entry:
%tmp = insertelement <4 x float> zeroinitializer, float %a, i32 1
%tmp5 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 2
%tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 3
ret <4 x float> %tmp6
}
define <2 x i64> @test2(i32 %a) {
; CHECK-LABEL: test2:
; CHECK: movd {{.*}}, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,0,1]
; CHECK-NEXT: retl
;
; CHECK-EXP-LABEL: test2:
; CHECK-EXP: movd {{.*}}, %xmm0
; CHECK-EXP-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,0,1]
; CHECK-EXP-NEXT: retl
entry:
%tmp7 = insertelement <4 x i32> zeroinitializer, i32 %a, i32 2
%tmp9 = insertelement <4 x i32> %tmp7, i32 0, i32 3
%tmp10 = bitcast <4 x i32> %tmp9 to <2 x i64>
ret <2 x i64> %tmp10
}
define <4 x float> @test3(<4 x float> %A) {
; CHECK-LABEL: test3:
; CHECK: xorps %[[X1:xmm[0-9]+]], %[[X1]]
; CHECK-NEXT: movss %xmm0, %[[X1]]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = [[X1]][1,0,1,1]
; CHECK-NEXT: retl
;
; CHECK-EXP-LABEL: test3:
; CHECK-EXP: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
; CHECK-EXP-NEXT: retl
%tmp0 = extractelement <4 x float> %A, i32 0
%tmp1 = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef >, float %tmp0, i32 1
%tmp2 = insertelement <4 x float> %tmp1, float 0.000000e+00, i32 2
ret <4 x float> %tmp2
}