From 1c4b6de5edb308a4bea0cb75013e5a2cafcbe65e Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Fri, 3 Apr 2015 13:48:24 +0000 Subject: [PATCH] [PowerPC] Enable splat generation for BUILD_VECTOR with little endian When enabling PPC64LE, I disabled some optimizations of BUILD_VECTOR nodes for little endian because wrong results were produced. I've subsequently investigated and found this is due to a call to BuildVectorSDNode::isConstantSplat that was always specifying big-endian. With this changed to correctly identify the target endianness, the optimizations work as expected. I found another case of a call to the same method with big-endian hardcoded, in PPC::isAllNegativeZeroVector(). I discovered this was an orphaned method with no callers, so I've just removed it. The existing test/CodeGen/PowerPC/vec_constants.ll checks these optimizations, so for testing I've just added a variant for little endian. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234011 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 35 ++------------------------ lib/Target/PowerPC/PPCISelLowering.h | 4 --- test/CodeGen/PowerPC/vec_constants.ll | 6 ++--- 3 files changed, 4 insertions(+), 41 deletions(-) diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 1c2659cbb70..7e2e9b3d808 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1287,22 +1287,6 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { return true; } -/// isAllNegativeZeroVector - Returns true if all elements of build_vector -/// are -0.0. -bool PPC::isAllNegativeZeroVector(SDNode *N) { - BuildVectorSDNode *BV = cast(N); - - APInt APVal, APUndef; - unsigned BitSize; - bool HasAnyUndefs; - - if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) - if (ConstantFPSDNode *CFP = dyn_cast(N->getOperand(0))) - return CFP->getValueAPF().isNegZero(); - - return false; -} - /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, @@ -6609,7 +6593,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, unsigned SplatBitSize; bool HasAnyUndefs; if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, - HasAnyUndefs, 0, true) || SplatBitSize > 32) + HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || + SplatBitSize > 32) return SDValue(); unsigned SplatBits = APSplatBits.getZExtValue(); @@ -6676,22 +6661,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } - // The remaining cases assume either big endian element order or - // a splat-size that equates to the element size of the vector - // to be built. An example that doesn't work for little endian is - // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits - // and a vector element size of 16 bits. The code below will - // produce the vector in big endian element order, which for little - // endian is {-1, 0, -1, 0, -1, 0, -1, 0}. - - // For now, just avoid these optimizations in that case. - // FIXME: Develop correct optimizations for LE with mismatched - // splat and element sizes. - - if (Subtarget.isLittleEndian() && - SplatSize != Op.getValueType().getVectorElementType().getSizeInBits()) - return SDValue(); - // Check to see if this is a wide variety of vsplti*, binop self cases. static const signed char SplatCsts[] = { -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 8afd7ef2ef2..695fc3ff5bb 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -368,10 +368,6 @@ namespace llvm { /// VSPLTB/VSPLTH/VSPLTW. bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); - /// isAllNegativeZeroVector - Returns true if all elements of build_vector - /// are -0.0. - bool isAllNegativeZeroVector(SDNode *N); - /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG); diff --git a/test/CodeGen/PowerPC/vec_constants.ll b/test/CodeGen/PowerPC/vec_constants.ll index 45df814365e..858b85dce54 100644 --- a/test/CodeGen/PowerPC/vec_constants.ll +++ b/test/CodeGen/PowerPC/vec_constants.ll @@ -1,7 +1,5 @@ -; RUN: llc -O0 -mcpu=pwr7 < %s | FileCheck %s - -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" -target triple = "powerpc64-unknown-linux-gnu" +; RUN: llc -O0 -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -O0 -mcpu=pwr7 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind { %tmp = load <4 x i32>, <4 x i32>* %P1 ; <<4 x i32>> [#uses=1]