[DAGCombiner] Fix wrong folding of a build_vector into a blend with zero.

Method 'visitBUILD_VECTOR' in the DAGCombiner knows how to combine a
build_vector of a bunch of extract_vector_elt nodes and constant zero nodes
into a shuffle blend with a zero vector.

However, method 'visitBUILD_VECTOR' forgot that a floating point
build_vector may contain negative zero as well as positive zero.

Example:

define <2 x double> @example(<2 x double> %A) {
entry:
  %0 = extractelement <2 x double> %A, i32 0
  %1 = insertelement <2 x double> undef, double %0, i32 0
  %2 = insertelement <2 x double> %1, double -0.0, i32 1
  ret <2 x double> %2
}

Before this patch, llc (with -mattr=+sse4.1) wrongly generated
  movq   %xmm0, %xmm0  # xmm0 = xmm0[0],zero

So, the sign bit of the negative zero was effectively lost.

This patch fixes the problem by adding explicit checks for positive zero.

With this patch, llc produces the following code for the example above:
  movhpd .LCPI0_0(%rip), %xmm0

where .LCPI0_0 referes to a 'double -0'.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@239070 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Andrea Di Biagio 2015-06-04 19:15:01 +00:00
parent 65cae0d169
commit c07ee0c4ff
2 changed files with 46 additions and 3 deletions

View File

@ -1587,6 +1587,11 @@ static bool isNullConstant(SDValue V) {
return Const != nullptr && Const->isNullValue();
}
static bool isNullFPConstant(SDValue V) {
ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
return Const != nullptr && Const->isZero() && !Const->isNegative();
}
static bool isAllOnesConstant(SDValue V) {
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
return Const != nullptr && Const->isAllOnesValue();
@ -11912,9 +11917,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (Op.getOpcode() == ISD::UNDEF) continue;
// See if we can combine this build_vector into a blend with a zero vector.
if (!VecIn2.getNode() && (isNullConstant(Op) ||
(Op.getOpcode() == ISD::ConstantFP &&
cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) {
if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) {
UsesZeroVector = true;
continue;
}

View File

@ -0,0 +1,40 @@
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s
; Verify that the DAGCombiner doesn't wrongly fold a build_vector into a
; blend with a zero vector if the build_vector contains negative zero.
;
; TODO: the codegen for function 'test_negative_zero_1' is sub-optimal.
; Ideally, we should generate a single shuffle blend operation.
define <4 x float> @test_negative_zero_1(<4 x float> %A) {
; CHECK-LABEL: test_negative_zero_1:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movapd %xmm0, %xmm1
; CHECK-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
; CHECK-NEXT: xorps %xmm2, %xmm2
; CHECK-NEXT: blendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; CHECK-NEXT: retq
entry:
%0 = extractelement <4 x float> %A, i32 0
%1 = insertelement <4 x float> undef, float %0, i32 0
%2 = insertelement <4 x float> %1, float -0.0, i32 1
%3 = extractelement <4 x float> %A, i32 2
%4 = insertelement <4 x float> %2, float %3, i32 2
%5 = insertelement <4 x float> %4, float 0.0, i32 3
ret <4 x float> %5
}
define <2 x double> @test_negative_zero_2(<2 x double> %A) {
; CHECK-LABEL: test_negative_zero_2:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movhpd {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
entry:
%0 = extractelement <2 x double> %A, i32 0
%1 = insertelement <2 x double> undef, double %0, i32 0
%2 = insertelement <2 x double> %1, double -0.0, i32 1
ret <2 x double> %2
}