llvm-6502/test/CodeGen/X86/sse1.ll

; Tests for SSE1 and below, without SSE2+.
; RUN: llc < %s -march=x86 -mcpu=pentium3 -O3 | FileCheck %s
; RUN: llc < %s -march=x86-64 -mcpu=pentium3 -O3 | FileCheck %s

define <8 x i16> @test1(<8 x i32> %a) nounwind {
; CHECK: test1
  ret <8 x i16> zeroinitializer
}

define <8 x i16> @test2(<8 x i32> %a) nounwind {
; CHECK: test2
  %c = trunc <8 x i32> %a to <8 x i16>            ; <<8 x i16>> [#uses=1]
  ret <8 x i16> %c
}

; PR7993
;define <4 x i32> @test3(<4 x i16> %a) nounwind {
;  %c = sext <4 x i16> %a to <4 x i32>             ; <<4 x i32>> [#uses=1]
;  ret <4 x i32> %c
;}

; This should not emit shuffles to populate the top 2 elements of the 4-element
; vector that this ends up returning.
; rdar://8368414
define <2 x float> @test4(<2 x float> %A, <2 x float> %B) nounwind {
entry:
  %tmp7 = extractelement <2 x float> %A, i32 0
  %tmp5 = extractelement <2 x float> %A, i32 1
  %tmp3 = extractelement <2 x float> %B, i32 0
  %tmp1 = extractelement <2 x float> %B, i32 1
  %add.r = fadd float %tmp7, %tmp3
  %add.i = fsub float %tmp5, %tmp1
  %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0
  %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1
  ret <2 x float> %tmp9
; CHECK: test4:
; CHECK-NOT: shufps	$16
; CHECK: shufps	$1, 
; CHECK-NOT: shufps	$16
; CHECK: shufps	$1, 
; CHECK-NOT: shufps	$16
; CHECK: unpcklps
; CHECK-NOT: shufps	$16
; CHECK: ret
}
fix sse1 only codegen in x86-64 mode, which is something we apparently try to support. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112168 91177308-0d34-0410-b5e6-96231b3b80d8 2010-08-26 05:24:29 +00:00			`; Tests for SSE1 and below, without SSE2+.`
Make sure this forces the x86 targets git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112169 91177308-0d34-0410-b5e6-96231b3b80d8 2010-08-26 05:25:05 +00:00			`; RUN: llc < %s -march=x86 -mcpu=pentium3 -O3 \| FileCheck %s`
			`; RUN: llc < %s -march=x86-64 -mcpu=pentium3 -O3 \| FileCheck %s`
fix sse1 only codegen in x86-64 mode, which is something we apparently try to support. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112168 91177308-0d34-0410-b5e6-96231b3b80d8 2010-08-26 05:24:29 +00:00
			`define <8 x i16> @test1(<8 x i32> %a) nounwind {`
			`; CHECK: test1`
			`ret <8 x i16> zeroinitializer`
			`}`
implement SplitVecOp_CONCAT_VECTORS, fixing the included testcase with SSE1. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112171 91177308-0d34-0410-b5e6-96231b3b80d8 2010-08-26 05:51:22 +00:00
			`define <8 x i16> @test2(<8 x i32> %a) nounwind {`
			`; CHECK: test2`
			`%c = trunc <8 x i32> %a to <8 x i16> ; <<8 x i16>> [#uses=1]`
			`ret <8 x i16> %c`
			`}`
Add a hackaround for PR7993 which is causing failures on x86 builders that lack sse2. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112175 91177308-0d34-0410-b5e6-96231b3b80d8 2010-08-26 06:57:07 +00:00
			`; PR7993`
			`;define <4 x i32> @test3(<4 x i16> %a) nounwind {`
			`; %c = sext <4 x i16> %a to <4 x i32> ; <<4 x i32>> [#uses=1]`
			`; ret <4 x i32> %c`
			`;}`
fix the BuildVector -> unpcklps logic to not do pointless shuffles when the top elements of a vector are undefined. This happens all the time for X86-64 ABI stuff because only the low 2 elements of a 4 element vector are defined. For example, on: _Complex float f32(_Complex float A, _Complex float B) { return A+B; } We used to produce (with SSE2, SSE4.1+ uses insertps): _f32: ## @f32 movdqa %xmm0, %xmm2 addss %xmm1, %xmm2 pshufd $16, %xmm2, %xmm2 pshufd $1, %xmm1, %xmm1 pshufd $1, %xmm0, %xmm0 addss %xmm1, %xmm0 pshufd $16, %xmm0, %xmm1 movdqa %xmm2, %xmm0 unpcklps %xmm1, %xmm0 ret We now produce: _f32: ## @f32 movdqa %xmm0, %xmm2 addss %xmm1, %xmm2 pshufd $1, %xmm1, %xmm1 pshufd $1, %xmm0, %xmm3 addss %xmm1, %xmm3 movaps %xmm2, %xmm0 unpcklps %xmm3, %xmm0 ret This implements rdar://8368414 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112378 91177308-0d34-0410-b5e6-96231b3b80d8 2010-08-28 17:28:30 +00:00
			`; This should not emit shuffles to populate the top 2 elements of the 4-element`
			`; vector that this ends up returning.`
			`; rdar://8368414`
			`define <2 x float> @test4(<2 x float> %A, <2 x float> %B) nounwind {`
			`entry:`
			`%tmp7 = extractelement <2 x float> %A, i32 0`
			`%tmp5 = extractelement <2 x float> %A, i32 1`
			`%tmp3 = extractelement <2 x float> %B, i32 0`
			`%tmp1 = extractelement <2 x float> %B, i32 1`
			`%add.r = fadd float %tmp7, %tmp3`
			`%add.i = fsub float %tmp5, %tmp1`
			`%tmp11 = insertelement <2 x float> undef, float %add.r, i32 0`
			`%tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1`
			`ret <2 x float> %tmp9`
			`; CHECK: test4:`
			`; CHECK-NOT: shufps $16`
			`; CHECK: shufps $1,`
			`; CHECK-NOT: shufps $16`
			`; CHECK: shufps $1,`
			`; CHECK-NOT: shufps $16`
			`; CHECK: unpcklps`
			`; CHECK-NOT: shufps $16`
			`; CHECK: ret`
			`}`