Make the BUILD_VECTOR lowering code much more aggressive w.r.t constant vectors.

Remove some done items from the todo list.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27729 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2006-04-16 01:01:29 +00:00
parent 7f6cc0ccb5
commit b17f1679e3
2 changed files with 106 additions and 67 deletions

View File

@ -936,8 +936,6 @@ static SDOperand LowerSRA(SDOperand Op, SelectionDAG &DAG) {
// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
// zero. Return true if this is not an array of constants, false if it is.
//
// Note that VectorBits/UndefBits are returned in 'little endian' form, so
// elements 0,1 go in VectorBits[0] and 2,3 go in VectorBits[1] for a v4i32.
static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
uint64_t UndefBits[2]) {
// Start with zero'd results.
@ -948,7 +946,7 @@ static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
SDOperand OpVal = BV->getOperand(i);
unsigned PartNo = i >= e/2; // In the upper 128 bits?
unsigned SlotNo = i & (e/2-1); // Which subpiece of the uint64_t it is.
unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
uint64_t EltBits = 0;
if (OpVal.getOpcode() == ISD::UNDEF) {
@ -974,6 +972,59 @@ static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
return false;
}
// If this is a splat (repetition) of a value across the whole vector, return
// the smallest size that splats it. For example, "0x01010101010101..." is a
// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
// SplatSize = 1 byte.
static bool isConstantSplat(const uint64_t Bits128[2],
const uint64_t Undef128[2],
unsigned &SplatBits, unsigned &SplatUndef,
unsigned &SplatSize) {
// Don't let undefs prevent splats from matching. See if the top 64-bits are
// the same as the lower 64-bits, ignoring undefs.
if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0]))
return false; // Can't be a splat if two pieces don't match.
uint64_t Bits64 = Bits128[0] | Bits128[1];
uint64_t Undef64 = Undef128[0] & Undef128[1];
// Check that the top 32-bits are the same as the lower 32-bits, ignoring
// undefs.
if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64))
return false; // Can't be a splat if two pieces don't match.
uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
// If the top 16-bits are different than the lower 16-bits, ignoring
// undefs, we have an i32 splat.
if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) {
SplatBits = Bits32;
SplatUndef = Undef32;
SplatSize = 4;
return true;
}
uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
// If the top 8-bits are different than the lower 8-bits, ignoring
// undefs, we have an i16 splat.
if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) {
SplatBits = Bits16;
SplatUndef = Undef16;
SplatSize = 2;
return true;
}
// Otherwise, we have an 8-bit splat.
SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
SplatSize = 1;
return true;
}
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen
@ -989,54 +1040,52 @@ static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits))
return SDOperand(); // Not a constant vector.
// See if this is all zeros.
if ((VectorBits[0] | VectorBits[1]) == 0) {
// Canonicalize all zero vectors to be v4i32.
if (Op.getValueType() != MVT::v4i32) {
SDOperand Z = DAG.getConstant(0, MVT::i32);
Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z);
Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z);
// If this is a splat (repetition) of a value across the whole vector, return
// the smallest size that splats it. For example, "0x01010101010101..." is a
// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
// SplatSize = 1 byte.
unsigned SplatBits, SplatUndef, SplatSize;
if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){
bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0;
// First, handle single instruction cases.
// All zeros?
if (SplatBits == 0) {
// Canonicalize all zero vectors to be v4i32.
if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
SDOperand Z = DAG.getConstant(0, MVT::i32);
Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z);
Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z);
}
return Op;
}
return Op;
}
// Check to see if this is something we can use VSPLTI* to form.
MVT::ValueType CanonicalVT = MVT::Other;
SDNode *CST = 0;
if ((CST = PPC::get_VSPLTI_elt(Op.Val, 4, DAG).Val)) // vspltisw
CanonicalVT = MVT::v4i32;
else if ((CST = PPC::get_VSPLTI_elt(Op.Val, 2, DAG).Val)) // vspltish
CanonicalVT = MVT::v8i16;
else if ((CST = PPC::get_VSPLTI_elt(Op.Val, 1, DAG).Val)) // vspltisb
CanonicalVT = MVT::v16i8;
// If this matches one of the vsplti* patterns, force it to the canonical
// type for the pattern.
if (CST) {
if (Op.getValueType() != CanonicalVT) {
// Convert the splatted element to the right element type.
SDOperand Elt = DAG.getNode(ISD::TRUNCATE,
MVT::getVectorBaseType(CanonicalVT),
SDOperand(CST, 0));
std::vector<SDOperand> Ops(MVT::getVectorNumElements(CanonicalVT), Elt);
SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, Ops);
Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize);
if (SextVal >= -16 && SextVal <= 15) {
const MVT::ValueType VTys[] = { // canonical VT to use for each size.
MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
};
MVT::ValueType CanonicalVT = VTys[SplatSize-1];
// If this is a non-canonical splat for this value,
if (Op.getValueType() != CanonicalVT || HasAnyUndefs) {
SDOperand Elt = DAG.getConstant(SplatBits,
MVT::getVectorBaseType(CanonicalVT));
std::vector<SDOperand> Ops(MVT::getVectorNumElements(CanonicalVT), Elt);
SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, Ops);
Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
}
return Op;
}
return Op;
}
// If this is some other splat of 4-byte elements, see if we can handle it
// in another way.
// FIXME: Make this more undef happy and work with other widths (1,2 bytes).
if (VectorBits[0] == VectorBits[1] &&
unsigned(VectorBits[0]) == unsigned(VectorBits[0] >> 32)) {
unsigned Bits = unsigned(VectorBits[0]);
// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
// 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). These are important
// for fneg/fabs.
if (Bits == 0x80000000 || Bits == 0x7FFFFFFF) {
if (SplatSize == 4 &&
SplatBits == 0x80000000 || SplatBits == (0x7FFFFFFF&~SplatUndef)) {
// Make -1 and vspltisw -1:
SDOperand OnesI = DAG.getConstant(~0U, MVT::i32);
SDOperand OnesV = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
@ -1049,13 +1098,13 @@ static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
OnesV, OnesV);
// If this is 0x7FFF_FFFF, xor by OnesV to invert it.
if (Bits == 0x7FFFFFFF)
if (SplatBits == 0x80000000)
Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV);
return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
}
}
return SDOperand();
}

View File

@ -40,8 +40,16 @@ a load/store/lve*x sequence.
//===----------------------------------------------------------------------===//
There are a wide range of vector constants we can generate with combinations of
altivec instructions. Examples
GCC does: "t=vsplti*, r = t+t" for constants it can't generate with one vsplti
altivec instructions.
Examples, these work with all widths:
Splat(+/- 16,18,20,22,24,28,30): t = vspliti I/2, r = t+t
Splat(+/- 17,19,21,23,25,29): t = vsplti +/-15, t2 = vsplti I-15, r=t + t2
Splat(31): t = vsplti FB, r = srl t,t
Splat(256): t = vsplti 1, r = vsldoi t, t, 1
Lots more are listed here:
http://www.informatik.uni-bremen.de/~hobold/AltiVec.html
This should be added to the ISD::BUILD_VECTOR case in
PPCTargetLowering::LowerOperation.
@ -52,19 +60,6 @@ FABS/FNEG can be codegen'd with the appropriate and/xor of -0.0.
//===----------------------------------------------------------------------===//
Codegen the constant here with something better than a constant pool load.
void %test_f(<4 x float>* %P, <4 x float>* %Q, float %X) {
%tmp = load <4 x float>* %Q
%tmp = cast <4 x float> %tmp to <4 x int>
%tmp1 = and <4 x int> %tmp, < int 2147483647, int 2147483647, int 2147483647, int 2147483647 >
%tmp2 = cast <4 x int> %tmp1 to <4 x float>
store <4 x float> %tmp2, <4 x float>* %P
ret void
}
//===----------------------------------------------------------------------===//
For functions that use altivec AND have calls, we are VRSAVE'ing all call
clobbered regs.
@ -92,11 +87,6 @@ cond code on CR6.
//===----------------------------------------------------------------------===//
SROA should turn "vector unions" into the appropriate insert/extract element
instructions.
//===----------------------------------------------------------------------===//
We need a way to teach tblgen that some operands of an intrinsic are required to
be constants. The verifier should enforce this constraint.