mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
fix for PR20354 - Miscompile of fabs due to vectorization
This is intended to be the minimal change needed to fix PR20354 ( http://llvm.org/bugs/show_bug.cgi?id=20354 ). The check for a vector operation was wrong; we need to check that the fabs itself is not a vector operation. This patch will not generate the optimal code. A constant pool load and 'and' op will be generated instead of just returning a value that we can calculate in advance (as we do for the scalar case). I've put a 'TODO' comment for that here and expect to have that patch ready soon. There is a very similar optimization that we can do in visitFNEG, so I've put another 'TODO' there and expect to have another patch for that too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214670 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
48e1bd7287
commit
0f2bc49126
@ -7311,6 +7311,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
|
||||
|
||||
// Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
|
||||
// constant pool values.
|
||||
// TODO: We can also optimize for vectors here, but we need to make sure
|
||||
// that the sign mask is created properly for each vector element.
|
||||
if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST &&
|
||||
!VT.isVector() &&
|
||||
N0.getNode()->hasOneUse() &&
|
||||
@ -7403,10 +7405,12 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
|
||||
|
||||
// Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
|
||||
// constant pool values.
|
||||
// TODO: We can also optimize for vectors here, but we need to make sure
|
||||
// that the sign mask is created properly for each vector element.
|
||||
if (!TLI.isFAbsFree(VT) &&
|
||||
N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
|
||||
N0.getOperand(0).getValueType().isInteger() &&
|
||||
!N0.getOperand(0).getValueType().isVector()) {
|
||||
!VT.isVector()) {
|
||||
SDValue Int = N0.getOperand(0);
|
||||
EVT IntVT = Int.getValueType();
|
||||
if (IntVT.isInteger() && !IntVT.isVector()) {
|
||||
|
@ -1,9 +1,9 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s
|
||||
; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
|
||||
|
||||
|
||||
define <2 x double> @fabs_v2f64(<2 x double> %p)
|
||||
{
|
||||
; CHECK: fabs_v2f64
|
||||
; CHECK-LABEL: fabs_v2f64
|
||||
; CHECK: vandps
|
||||
%t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
|
||||
ret <2 x double> %t
|
||||
@ -12,7 +12,7 @@ declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
|
||||
|
||||
define <4 x float> @fabs_v4f32(<4 x float> %p)
|
||||
{
|
||||
; CHECK: fabs_v4f32
|
||||
; CHECK-LABEL: fabs_v4f32
|
||||
; CHECK: vandps
|
||||
%t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
|
||||
ret <4 x float> %t
|
||||
@ -21,7 +21,7 @@ declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
|
||||
|
||||
define <4 x double> @fabs_v4f64(<4 x double> %p)
|
||||
{
|
||||
; CHECK: fabs_v4f64
|
||||
; CHECK-LABEL: fabs_v4f64
|
||||
; CHECK: vandps
|
||||
%t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
|
||||
ret <4 x double> %t
|
||||
@ -30,9 +30,31 @@ declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
|
||||
|
||||
define <8 x float> @fabs_v8f32(<8 x float> %p)
|
||||
{
|
||||
; CHECK: fabs_v8f32
|
||||
; CHECK-LABEL: fabs_v8f32
|
||||
; CHECK: vandps
|
||||
%t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
|
||||
ret <8 x float> %t
|
||||
}
|
||||
declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
|
||||
|
||||
; PR20354: when generating code for a vector fabs op,
|
||||
; make sure the correct mask is used for all vector elements.
|
||||
; CHECK-LABEL: LCPI4_0
|
||||
; CHECK: .long 2147483648
|
||||
; CHECK: .long 2147483648
|
||||
; CHECK-LABEL: LCPI4_1
|
||||
; CHECK: .long 2147483647
|
||||
; CHECK: .long 2147483647
|
||||
; CHECK-LABEL: fabs_v2f32_1
|
||||
; CHECK: vmovdqa LCPI4_0, %xmm0
|
||||
; CHECK: vpand LCPI4_1, %xmm0, %xmm0
|
||||
; CHECK: vmovd %xmm0, %eax
|
||||
; CHECK: vpextrd $1, %xmm0, %edx
|
||||
define i64 @fabs_v2f32_1() {
|
||||
%highbits = bitcast i64 9223372039002259456 to <2 x float> ; 0x8000_0000_8000_0000
|
||||
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %highbits)
|
||||
%ret = bitcast <2 x float> %fabs to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.fabs.v2f32(<2 x float> %p)
|
||||
|
Loading…
Reference in New Issue
Block a user