mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-15 07:34:33 +00:00
4269590166
This patch adds shuffle matching for the SSE3 MOVDDUP, MOVSLDUP and MOVSHDUP instructions. The big use of these being that they avoid many single source shuffles from needing to use (pre-AVX) dual source instructions such as SHUFPD/SHUFPS: causing extra moves and preventing load folds. Adding these instructions uncovered an issue in XFormVExtractWithShuffleIntoLoad which crashed on single operand shuffle instructions (now fixed). It also involved fixing getTargetShuffleMask to correctly identify theses instructions as unary shuffles. Also adds a missing tablegen pattern for MOVDDUP. Differential Revision: http://reviews.llvm.org/D7042 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226716 91177308-0d34-0410-b5e6-96231b3b80d8
70 lines
2.4 KiB
LLVM
70 lines
2.4 KiB
LLVM
; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse2 -mcpu=nehalem | FileCheck %s
|
|
|
|
; rdar: 12558838
|
|
; PR14221
|
|
; There is a mismatch between the intrinsic and the actual instruction.
|
|
; The actual instruction has a partial update of dest, while the intrinsic
|
|
; passes through the upper FP values. Here, we make sure the source and
|
|
; destination of each scalar unary op are the same.
|
|
|
|
define void @rsqrtss(<4 x float> %a) nounwind uwtable ssp {
|
|
entry:
|
|
; CHECK-LABEL: rsqrtss:
|
|
; CHECK: rsqrtss %xmm0, %xmm0
|
|
; CHECK-NEXT: cvtss2sd %xmm0
|
|
; CHECK-NEXT: movshdup
|
|
; CHECK-NEXT: cvtss2sd %xmm0
|
|
; CHECK-NEXT: movap
|
|
; CHECK-NEXT: jmp
|
|
|
|
%0 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a) nounwind
|
|
%a.addr.0.extract = extractelement <4 x float> %0, i32 0
|
|
%conv = fpext float %a.addr.0.extract to double
|
|
%a.addr.4.extract = extractelement <4 x float> %0, i32 1
|
|
%conv3 = fpext float %a.addr.4.extract to double
|
|
tail call void @callee(double %conv, double %conv3) nounwind
|
|
ret void
|
|
}
|
|
declare void @callee(double, double)
|
|
declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
|
|
|
|
define void @rcpss(<4 x float> %a) nounwind uwtable ssp {
|
|
entry:
|
|
; CHECK-LABEL: rcpss:
|
|
; CHECK: rcpss %xmm0, %xmm0
|
|
; CHECK-NEXT: cvtss2sd %xmm0
|
|
; CHECK-NEXT: movshdup
|
|
; CHECK-NEXT: cvtss2sd %xmm0
|
|
; CHECK-NEXT: movap
|
|
; CHECK-NEXT: jmp
|
|
|
|
%0 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a) nounwind
|
|
%a.addr.0.extract = extractelement <4 x float> %0, i32 0
|
|
%conv = fpext float %a.addr.0.extract to double
|
|
%a.addr.4.extract = extractelement <4 x float> %0, i32 1
|
|
%conv3 = fpext float %a.addr.4.extract to double
|
|
tail call void @callee(double %conv, double %conv3) nounwind
|
|
ret void
|
|
}
|
|
declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
|
|
|
|
define void @sqrtss(<4 x float> %a) nounwind uwtable ssp {
|
|
entry:
|
|
; CHECK-LABEL: sqrtss:
|
|
; CHECK: sqrtss %xmm0, %xmm0
|
|
; CHECK-NEXT: cvtss2sd %xmm0
|
|
; CHECK-NEXT: movshdup
|
|
; CHECK-NEXT: cvtss2sd %xmm0
|
|
; CHECK-NEXT: movap
|
|
; CHECK-NEXT: jmp
|
|
|
|
%0 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a) nounwind
|
|
%a.addr.0.extract = extractelement <4 x float> %0, i32 0
|
|
%conv = fpext float %a.addr.0.extract to double
|
|
%a.addr.4.extract = extractelement <4 x float> %0, i32 1
|
|
%conv3 = fpext float %a.addr.4.extract to double
|
|
tail call void @callee(double %conv, double %conv3) nounwind
|
|
ret void
|
|
}
|
|
declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
|