[PowerPC] Enable splat generation for BUILD_VECTOR with little endian

When enabling PPC64LE, I disabled some optimizations of BUILD_VECTOR
nodes for little endian because wrong results were produced.  I've
subsequently investigated and found this is due to a call to
BuildVectorSDNode::isConstantSplat that was always specifying
big-endian.  With this changed to correctly identify the target
endianness, the optimizations work as expected.

I found another case of a call to the same method with big-endian
hardcoded, in PPC::isAllNegativeZeroVector().  I discovered this was
an orphaned method with no callers, so I've just removed it.

The existing test/CodeGen/PowerPC/vec_constants.ll checks these
optimizations, so for testing I've just added a variant for little
endian.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234011 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bill Schmidt 2015-04-03 13:48:24 +00:00
parent e5ecd32488
commit 1c4b6de5ed
3 changed files with 4 additions and 41 deletions

View File

@ -1287,22 +1287,6 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
return true;
}
/// isAllNegativeZeroVector - Returns true if all elements of build_vector
/// are -0.0.
bool PPC::isAllNegativeZeroVector(SDNode *N) {
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
APInt APVal, APUndef;
unsigned BitSize;
bool HasAnyUndefs;
if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
return CFP->getValueAPF().isNegZero();
return false;
}
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
@ -6609,7 +6593,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
unsigned SplatBitSize;
bool HasAnyUndefs;
if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
HasAnyUndefs, 0, true) || SplatBitSize > 32)
HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
SplatBitSize > 32)
return SDValue();
unsigned SplatBits = APSplatBits.getZExtValue();
@ -6676,22 +6661,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
// The remaining cases assume either big endian element order or
// a splat-size that equates to the element size of the vector
// to be built. An example that doesn't work for little endian is
// {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits
// and a vector element size of 16 bits. The code below will
// produce the vector in big endian element order, which for little
// endian is {-1, 0, -1, 0, -1, 0, -1, 0}.
// For now, just avoid these optimizations in that case.
// FIXME: Develop correct optimizations for LE with mismatched
// splat and element sizes.
if (Subtarget.isLittleEndian() &&
SplatSize != Op.getValueType().getVectorElementType().getSizeInBits())
return SDValue();
// Check to see if this is a wide variety of vsplti*, binop self cases.
static const signed char SplatCsts[] = {
-1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,

View File

@ -368,10 +368,6 @@ namespace llvm {
/// VSPLTB/VSPLTH/VSPLTW.
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
/// isAllNegativeZeroVector - Returns true if all elements of build_vector
/// are -0.0.
bool isAllNegativeZeroVector(SDNode *N);
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG);

View File

@ -1,7 +1,5 @@
; RUN: llc -O0 -mcpu=pwr7 < %s | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
; RUN: llc -O0 -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
; RUN: llc -O0 -mcpu=pwr7 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind {
%tmp = load <4 x i32>, <4 x i32>* %P1 ; <<4 x i32>> [#uses=1]