mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-06 09:44:39 +00:00
Model sqrtsd as a binary operation with one source operand tied to the destination (PR14221)
This patch fixes the following miscompile: define void @sqrtsd(<2 x double> %a) nounwind uwtable ssp { %0 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a) nounwind %a0 = extractelement <2 x double> %0, i32 0 %conv = fptrunc double %a0 to float %a1 = extractelement <2 x double> %0, i32 1 %conv3 = fptrunc double %a1 to float tail call void @callee2(float %conv, float %conv3) nounwind ret void } Current codegen: sqrtsd %xmm0, %xmm1 ## high element of %xmm1 is undef here xorps %xmm0, %xmm0 cvtsd2ss %xmm1, %xmm0 shufpd $1, %xmm1, %xmm1 cvtsd2ss %xmm1, %xmm1 ## operating on undef value jmp _callee This is a continuation of http://llvm.org/viewvc/llvm-project?view=revision&revision=224624 ( http://reviews.llvm.org/D6330 ) which was itself a continuation of r167064 ( http://llvm.org/viewvc/llvm-project?view=revision&revision=167064 ). All of these patches are partial fixes for PR14221 ( http://llvm.org/bugs/show_bug.cgi?id=14221 ); this should be the final patch needed to resolve that bug. Differential Revision: http://reviews.llvm.org/D6885 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227111 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8a5ad05c13
commit
956d6f0cf5
@ -3647,8 +3647,10 @@ let Predicates = [HasAVX] in {
|
||||
}
|
||||
|
||||
/// sse2_fp_unop_s - SSE2 unops in scalar form.
|
||||
// FIXME: Combine the following sse2 classes with the sse1 classes above.
|
||||
// The only usage of these is for SQRT[S/P]D. See sse12_fp_binop* for example.
|
||||
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode, Intrinsic F64Int, OpndItins itins> {
|
||||
SDNode OpNode, OpndItins itins> {
|
||||
let Predicates = [HasAVX], hasSideEffects = 0 in {
|
||||
def V#NAME#SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst),
|
||||
(ins FR64:$src1, FR64:$src2),
|
||||
@ -3681,16 +3683,18 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
|
||||
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
|
||||
[(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD,
|
||||
Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>;
|
||||
let isCodeGenOnly = 1 in {
|
||||
def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (F64Int VR128:$src))], itins.rr>,
|
||||
Sched<[itins.Sched]>;
|
||||
def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
|
||||
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>,
|
||||
Sched<[itins.Sched.Folded]>;
|
||||
}
|
||||
let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in {
|
||||
def SDr_Int :
|
||||
SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
|
||||
[], itins.rr>, Sched<[itins.Sched]>;
|
||||
|
||||
let mayLoad = 1, hasSideEffects = 0 in
|
||||
def SDm_Int :
|
||||
SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
|
||||
!strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
|
||||
[], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
} // isCodeGenOnly, Constraints
|
||||
}
|
||||
|
||||
/// sse2_fp_unop_p - SSE2 unops in vector forms.
|
||||
@ -3732,8 +3736,7 @@ let Predicates = [HasAVX] in {
|
||||
// Square root.
|
||||
defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>,
|
||||
sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>,
|
||||
sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd,
|
||||
SSE_SQRTSD>,
|
||||
sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD>,
|
||||
sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>;
|
||||
|
||||
// Reciprocal approximations. Note that these typically require refinement
|
||||
@ -3812,6 +3815,8 @@ let Predicates = [UseSSE1] in {
|
||||
(RCPSSr_Int VR128:$src, VR128:$src)>;
|
||||
def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
|
||||
(SQRTSSr_Int VR128:$src, VR128:$src)>;
|
||||
def : Pat<(int_x86_sse2_sqrt_sd VR128:$src),
|
||||
(SQRTSDr_Int VR128:$src, VR128:$src)>;
|
||||
}
|
||||
|
||||
// There is no f64 version of the reciprocal approximation instructions.
|
||||
|
@ -67,3 +67,26 @@ entry:
|
||||
ret void
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
|
||||
|
||||
define void @sqrtsd(<2 x double> %a) nounwind uwtable ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: sqrtsd:
|
||||
; CHECK: sqrtsd %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtsd2ss %xmm0
|
||||
; CHECK-NEXT: shufpd
|
||||
; CHECK-NEXT: cvtsd2ss %xmm0
|
||||
; CHECK-NEXT: movap
|
||||
; CHECK-NEXT: jmp
|
||||
|
||||
%0 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a) nounwind
|
||||
%a0 = extractelement <2 x double> %0, i32 0
|
||||
%conv = fptrunc double %a0 to float
|
||||
%a1 = extractelement <2 x double> %0, i32 1
|
||||
%conv3 = fptrunc double %a1 to float
|
||||
tail call void @callee2(float %conv, float %conv3) nounwind
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @callee2(float, float)
|
||||
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user