llvm-6502/test/CodeGen/X86/sse_partial_update.ll

; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse2 -mcpu=nehalem | FileCheck %s

; rdar: 12558838
; PR14221
; There is a mismatch between the intrinsic and the actual instruction.
; The actual instruction has a partial update of dest, while the intrinsic
; passes through the upper FP values. Here, we make sure the source and
; destination of rsqrtss are the same.
define void @t1(<4 x float> %a) nounwind uwtable ssp {
entry:
; CHECK: t1:
; CHECK: rsqrtss %xmm0, %xmm0
  %0 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a) nounwind
  %a.addr.0.extract = extractelement <4 x float> %0, i32 0
  %conv = fpext float %a.addr.0.extract to double
  %a.addr.4.extract = extractelement <4 x float> %0, i32 1
  %conv3 = fpext float %a.addr.4.extract to double
  tail call void @callee(double %conv, double %conv3) nounwind
  ret void
}
declare void @callee(double, double)
declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone

define void @t2(<4 x float> %a) nounwind uwtable ssp {
entry:
; CHECK: t2:
; CHECK: rcpss %xmm0, %xmm0
  %0 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a) nounwind
  %a.addr.0.extract = extractelement <4 x float> %0, i32 0
  %conv = fpext float %a.addr.0.extract to double
  %a.addr.4.extract = extractelement <4 x float> %0, i32 1
  %conv3 = fpext float %a.addr.4.extract to double
  tail call void @callee(double %conv, double %conv3) nounwind
  ret void
}
declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
X86 SSE: update rsqrtss and rcpss to use two source operands and the first source operand is tied to the destination operand. This is to accurately model the corresponding instructions where the upper bits are unmodified. rdar://12558838 PR14221 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167064 91177308-0d34-0410-b5e6-96231b3b80d8 2012-10-30 23:53:59 +00:00			`; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse2 -mcpu=nehalem \| FileCheck %s`

			`; rdar: 12558838`
			`; PR14221`
			`; There is a mismatch between the intrinsic and the actual instruction.`
			`; The actual instruction has a partial update of dest, while the intrinsic`
			`; passes through the upper FP values. Here, we make sure the source and`
			`; destination of rsqrtss are the same.`
			`define void @t1(<4 x float> %a) nounwind uwtable ssp {`
			`entry:`
			`; CHECK: t1:`
			`; CHECK: rsqrtss %xmm0, %xmm0`
			`%0 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a) nounwind`
			`%a.addr.0.extract = extractelement <4 x float> %0, i32 0`
			`%conv = fpext float %a.addr.0.extract to double`
			`%a.addr.4.extract = extractelement <4 x float> %0, i32 1`
			`%conv3 = fpext float %a.addr.4.extract to double`
			`tail call void @callee(double %conv, double %conv3) nounwind`
			`ret void`
			`}`
			`declare void @callee(double, double)`
			`declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone`

			`define void @t2(<4 x float> %a) nounwind uwtable ssp {`
			`entry:`
			`; CHECK: t2:`
			`; CHECK: rcpss %xmm0, %xmm0`
			`%0 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a) nounwind`
			`%a.addr.0.extract = extractelement <4 x float> %0, i32 0`
			`%conv = fpext float %a.addr.0.extract to double`
			`%a.addr.4.extract = extractelement <4 x float> %0, i32 1`
			`%conv3 = fpext float %a.addr.4.extract to double`
			`tail call void @callee(double %conv, double %conv3) nounwind`
			`ret void`
			`}`
			`declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone`