mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-02 22:04:55 +00:00
[X86][SSE] Vector double -> float conversion memory folding (cvtpd2ps)
Added a missing memory folding relationship for the (V)CVTPD2PS instruction - we can safely fold these for stack reloads. Differential Revision: http://reviews.llvm.org/D6663 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224383 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e1bf514b2b
commit
2385e98928
@ -450,6 +450,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
||||
{ X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 },
|
||||
{ X86::CVTDQ2PSrr, X86::CVTDQ2PSrm, TB_ALIGN_16 },
|
||||
{ X86::CVTPD2DQrr, X86::CVTPD2DQrm, TB_ALIGN_16 },
|
||||
{ X86::CVTPD2PSrr, X86::CVTPD2PSrm, TB_ALIGN_16 },
|
||||
{ X86::CVTPS2DQrr, X86::CVTPS2DQrm, TB_ALIGN_16 },
|
||||
{ X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 },
|
||||
{ X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 },
|
||||
@ -531,6 +532,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
||||
{ X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 },
|
||||
{ X86::VCVTDQ2PSrr, X86::VCVTDQ2PSrm, 0 },
|
||||
{ X86::VCVTPD2DQrr, X86::VCVTPD2DQXrm, 0 },
|
||||
{ X86::VCVTPD2PSrr, X86::VCVTPD2PSXrm, 0 },
|
||||
{ X86::VCVTPS2DQrr, X86::VCVTPS2DQrm, 0 },
|
||||
{ X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, 0 },
|
||||
{ X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 },
|
||||
@ -569,6 +571,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
||||
// AVX 256-bit foldable instructions
|
||||
{ X86::VCVTDQ2PSYrr, X86::VCVTDQ2PSYrm, 0 },
|
||||
{ X86::VCVTPD2DQYrr, X86::VCVTPD2DQYrm, 0 },
|
||||
{ X86::VCVTPD2PSYrr, X86::VCVTPD2PSYrm, 0 },
|
||||
{ X86::VCVTPS2DQYrr, X86::VCVTPS2DQYrm, 0 },
|
||||
{ X86::VCVTTPD2DQYrr, X86::VCVTTPD2DQYrm, 0 },
|
||||
{ X86::VCVTTPS2DQYrr, X86::VCVTTPS2DQYrm, 0 },
|
||||
|
@ -37,6 +37,21 @@ define void @stack_fold_cvtdq2ps(<128 x i32>* %a, <128 x i32>* %b, <128 x float>
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stack_fold_cvtpd2ps(<128 x double>* %a, <128 x double>* %b, <128 x float>* %c) {
|
||||
;CHECK-LABEL: stack_fold_cvtpd2ps
|
||||
;CHECK: vcvtpd2psy {{[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
|
||||
%1 = load <128 x double>* %a
|
||||
%2 = load <128 x double>* %b
|
||||
%3 = fadd <128 x double> %1, %2
|
||||
%4 = fsub <128 x double> %1, %2
|
||||
%5 = fptrunc <128 x double> %3 to <128 x float>
|
||||
%6 = fptrunc <128 x double> %4 to <128 x float>
|
||||
%7 = fadd <128 x float> %5, %6
|
||||
store <128 x float> %7, <128 x float>* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stack_fold_cvttpd2dq(<64 x double>* %a, <64 x double>* %b, <64 x i32>* %c) #0 {
|
||||
;CHECK-LABEL: stack_fold_cvttpd2dq
|
||||
;CHECK: vcvttpd2dqy {{[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
|
Loading…
Reference in New Issue
Block a user