diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 69bc3bb57fa..c25497aa97c 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -992,10 +992,8 @@ public: SDValue CreateStackTemporary(EVT VT1, EVT VT2); /// FoldConstantArithmetic - - SDValue FoldConstantArithmetic(unsigned Opcode, - EVT VT, - ConstantSDNode *Cst1, - ConstantSDNode *Cst2); + SDValue FoldConstantArithmetic(unsigned Opcode, EVT VT, + SDNode *Cst1, SDNode *Cst2); /// FoldSetCC - Constant fold a setcc to true or false. SDValue FoldSetCC(EVT VT, SDValue N1, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 57241227d9e..09885d8deaf 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2680,44 +2680,117 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, return SDValue(N, 0); } -SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, - EVT VT, - ConstantSDNode *Cst1, - ConstantSDNode *Cst2) { - const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue(); +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, + SDNode *Cst1, SDNode *Cst2) { + SmallVector, 4> Inputs; + SmallVector Outputs; + EVT SVT = VT.getScalarType(); - switch (Opcode) { - case ISD::ADD: return getConstant(C1 + C2, VT); - case ISD::SUB: return getConstant(C1 - C2, VT); - case ISD::MUL: return getConstant(C1 * C2, VT); - case ISD::UDIV: - if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT); - break; - case ISD::UREM: - if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT); - break; - case ISD::SDIV: - if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT); - break; - case ISD::SREM: - if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT); - break; - case ISD::AND: return getConstant(C1 & C2, VT); - case ISD::OR: return getConstant(C1 | C2, VT); - case ISD::XOR: return getConstant(C1 ^ C2, VT); - case ISD::SHL: return getConstant(C1 << C2, VT); - case ISD::SRL: return getConstant(C1.lshr(C2), VT); - case ISD::SRA: return getConstant(C1.ashr(C2), VT); - case ISD::ROTL: return getConstant(C1.rotl(C2), VT); - case ISD::ROTR: return getConstant(C1.rotr(C2), VT); - default: break; + ConstantSDNode *Scalar1 = dyn_cast(Cst1); + ConstantSDNode *Scalar2 = dyn_cast(Cst2); + if (Scalar1 && Scalar2) { + // Scalar instruction. + Inputs.push_back(std::make_pair(Scalar1, Scalar2)); + } else { + // For vectors extract each constant element into Inputs so we can constant + // fold them individually. + BuildVectorSDNode *BV1 = dyn_cast(Cst1); + BuildVectorSDNode *BV2 = dyn_cast(Cst2); + if (!BV1 || !BV2) + return SDValue(); + + assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!"); + + for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) { + ConstantSDNode *V1 = dyn_cast(BV1->getOperand(I)); + ConstantSDNode *V2 = dyn_cast(BV2->getOperand(I)); + if (!V1 || !V2) // Not a constant, bail. + return SDValue(); + + // Avoid BUILD_VECTOR nodes that perform implicit truncation. + // FIXME: This is valid and could be handled by truncating the APInts. + if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) + return SDValue(); + + Inputs.push_back(std::make_pair(V1, V2)); + } } - return SDValue(); + // We have a number of constant values, constant fold them element by element. + for (unsigned I = 0, E = Inputs.size(); I != E; ++I) { + const APInt &C1 = Inputs[I].first->getAPIntValue(); + const APInt &C2 = Inputs[I].second->getAPIntValue(); + + switch (Opcode) { + case ISD::ADD: + Outputs.push_back(getConstant(C1 + C2, SVT)); + break; + case ISD::SUB: + Outputs.push_back(getConstant(C1 - C2, SVT)); + break; + case ISD::MUL: + Outputs.push_back(getConstant(C1 * C2, SVT)); + break; + case ISD::UDIV: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.udiv(C2), SVT)); + break; + case ISD::UREM: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.urem(C2), SVT)); + break; + case ISD::SDIV: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.sdiv(C2), SVT)); + break; + case ISD::SREM: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.srem(C2), SVT)); + break; + case ISD::AND: + Outputs.push_back(getConstant(C1 & C2, SVT)); + break; + case ISD::OR: + Outputs.push_back(getConstant(C1 | C2, SVT)); + break; + case ISD::XOR: + Outputs.push_back(getConstant(C1 ^ C2, SVT)); + break; + case ISD::SHL: + Outputs.push_back(getConstant(C1 << C2, SVT)); + break; + case ISD::SRL: + Outputs.push_back(getConstant(C1.lshr(C2), SVT)); + break; + case ISD::SRA: + Outputs.push_back(getConstant(C1.ashr(C2), SVT)); + break; + case ISD::ROTL: + Outputs.push_back(getConstant(C1.rotl(C2), SVT)); + break; + case ISD::ROTR: + Outputs.push_back(getConstant(C1.rotr(C2), SVT)); + break; + default: + return SDValue(); + } + } + + // Handle the scalar case first. + if (Outputs.size() == 1) + return Outputs.back(); + + // Otherwise build a big vector out of the scalar elements we generated. + return getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, Outputs.data(), + Outputs.size()); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, - SDValue N1, SDValue N2) { +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, + SDValue N2) { ConstantSDNode *N1C = dyn_cast(N1.getNode()); ConstantSDNode *N2C = dyn_cast(N2.getNode()); switch (Opcode) { @@ -3013,16 +3086,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, } } - if (N1C) { - if (N2C) { - SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C); - if (SV.getNode()) return SV; - } else { // Cannonicalize constant to RHS if commutative - if (isCommutativeBinOp(Opcode)) { - std::swap(N1C, N2C); - std::swap(N1, N2); - } - } + // Perform trivial constant folding. + SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode()); + if (SV.getNode()) return SV; + + // Canonicalize constant to RHS if commutative. + if (N1C && !N2C && isCommutativeBinOp(Opcode)) { + std::swap(N1C, N2C); + std::swap(N1, N2); } // Constant fold FP operations. @@ -3030,7 +3101,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, ConstantFPSDNode *N2CFP = dyn_cast(N2.getNode()); if (N1CFP) { if (!N2CFP && isCommutativeBinOp(Opcode)) { - // Cannonicalize constant to RHS if commutative + // Canonicalize constant to RHS if commutative. std::swap(N1CFP, N2CFP); std::swap(N1, N2); } else if (N2CFP) { diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 9a68927103e..4cd0fc484a1 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5032,9 +5032,14 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // If this value is in the range [-32,30] and is even, use: // tmp = VSPLTI[bhw], result = add tmp, tmp if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { + // FIXME: This is currently disabled because the ADD will be folded back + // into an invalid BUILD_VECTOR immediately. + return SDValue(); +#if 0 SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl); Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); +#endif } // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is diff --git a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll index 8802b97d2a6..00a402e0e48 100644 --- a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll +++ b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vadduhm ; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubuhm +; XFAIL: * define <4 x i32> @test() nounwind { ret <4 x i32> < i32 4293066722, i32 4293066722, i32 4293066722, i32 4293066722> diff --git a/test/CodeGen/X86/2011-11-30-or.ll b/test/CodeGen/X86/2011-11-30-or.ll index f66248bc5a7..8ac4632329b 100644 --- a/test/CodeGen/X86/2011-11-30-or.ll +++ b/test/CodeGen/X86/2011-11-30-or.ll @@ -8,9 +8,9 @@ target triple = "x86_64-apple-macosx10.6.6" ; CHECK: pblendvb %xmm1, %xmm2 ; CHECK: ret -define void @select_func() { +define void @select_func(<8 x i16> %in) { entry: - %c.lobit.i.i.i = ashr <8 x i16> , + %c.lobit.i.i.i = ashr <8 x i16> %in, %and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, %and.i5.i.i.i = bitcast <8 x i16> %and.i56.i.i.i to <2 x i64> %neg.i.i.i.i = xor <8 x i16> %c.lobit.i.i.i, diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll index 34445428ea2..2b6b9ef8f31 100644 --- a/test/CodeGen/X86/blend-msb.ll +++ b/test/CodeGen/X86/blend-msb.ll @@ -5,7 +5,8 @@ ; shifting the needed bit to the MSB, and not using shl+sra. ;CHECK: vsel_float -;CHECK: pslld +;CHECK: movl $-2147483648 +;CHECK-NEXT: movd ;CHECK-NEXT: blendvps ;CHECK: ret define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) { @@ -14,7 +15,8 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) { } ;CHECK: vsel_4xi8 -;CHECK: pslld +;CHECK: movl $-2147483648 +;CHECK-NEXT: movd ;CHECK-NEXT: blendvps ;CHECK: ret define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) { diff --git a/test/CodeGen/X86/vector-gep.ll b/test/CodeGen/X86/vector-gep.ll index d08e2a07469..ec93ce0761c 100644 --- a/test/CodeGen/X86/vector-gep.ll +++ b/test/CodeGen/X86/vector-gep.ll @@ -8,10 +8,8 @@ entry: %vecinit2.i = insertelement <4 x i32*> %vecinit.i, i32* %ptr, i32 1 %vecinit4.i = insertelement <4 x i32*> %vecinit2.i, i32* %ptr, i32 2 %vecinit6.i = insertelement <4 x i32*> %vecinit4.i, i32* %ptr, i32 3 -;CHECK: pslld $2 ;CHECK: padd %A2 = getelementptr <4 x i32*> %vecinit6.i, <4 x i32> -;CHECK: pslld $2 ;CHECK: padd %A3 = getelementptr <4 x i32*> %A2, <4 x i32> ret <4 x i32*> %A3 @@ -21,7 +19,6 @@ entry: ;CHECK: AGEP1: define i32 @AGEP1(<4 x i32*> %param) nounwind { entry: -;CHECK: pslld $2 ;CHECK: padd %A2 = getelementptr <4 x i32*> %param, <4 x i32> %k = extractelement <4 x i32*> %A2, i32 3