From abbbfbd6726c7af8b27479b4311fe6bb6c40b52b Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 20 Apr 2011 01:19:45 +0000 Subject: [PATCH] Rewrite the expander for umulo/smulo to remember to sign extend the input manually and pass all (now) 4 arguments to the mul libcall. Add a new ExpandLibCall for just this (copied gratuitously from type legalization). Fixes rdar://9292577 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129842 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 66 ++++++++++++++++++++---- test/CodeGen/ARM/umulo-32.ll | 27 ++++++++++ 2 files changed, 84 insertions(+), 9 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index e605e5ae03c..2b6c56eafd7 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -142,6 +142,9 @@ private: DebugLoc dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); + SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, + unsigned NumOps, bool isSigned, DebugLoc dl); + std::pair ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, @@ -2056,6 +2059,40 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, return CallInfo.first; } +/// ExpandLibCall - Generate a libcall taking the given operands as arguments +/// and returning a result of type RetVT. +SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, DebugLoc dl) { + TargetLowering::ArgListTy Args; + Args.reserve(NumOps); + + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0; i != NumOps; ++i) { + Entry.Node = Ops[i]; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + std::pair CallInfo = + TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + false, 0, TLI.getLibcallCallingConv(LC), false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + + return CallInfo.first; +} + // ExpandChainLibCall - Expand a node into a call to a libcall. Similar to // ExpandLibCall except that the first operand is the in-chain. std::pair @@ -3355,6 +3392,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::UMULO: case ISD::SMULO: { EVT VT = Node->getValueType(0); + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); SDValue BottomHalf; @@ -3372,7 +3410,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, TopHalf = BottomHalf.getValue(1); } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2))) { - EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); @@ -3385,7 +3422,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // have a libcall big enough. // Also, we can fall back to a division in some cases, but that's a big // performance hit in the general case. - EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (WideVT == MVT::i16) LC = RTLIB::MUL_I16; @@ -3396,15 +3432,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, else if (WideVT == MVT::i128) LC = RTLIB::MUL_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); - LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); - RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); + + // The high part is obtained by SRA'ing all but one of the bits of low + // part. + unsigned LoSize = VT.getSizeInBits(); + SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); - SDValue Ret = ExpandLibCall(LC, Node, isSigned); - BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret); - TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret, - DAG.getConstant(VT.getSizeInBits(), TLI.getPointerTy())); - TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf); + // Here we're passing the 2 arguments explicitly as 4 arguments that are + // pre-lowered to the correct types. This all depends upon WideVT not + // being a legal type for the architecture and thus has to be split to + // two arguments. + SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; + SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); + BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, + DAG.getIntPtrConstant(0)); + TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, + DAG.getIntPtrConstant(1)); } + if (isSigned) { Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy(BottomHalf.getValueType())); diff --git a/test/CodeGen/ARM/umulo-32.ll b/test/CodeGen/ARM/umulo-32.ll index aa7d28a6234..fa5c0168fef 100644 --- a/test/CodeGen/ARM/umulo-32.ll +++ b/test/CodeGen/ARM/umulo-32.ll @@ -12,3 +12,30 @@ define i32 @func(i32 %a) nounwind { } declare %umul.ty @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone + +define i32 @f(i32 %argc, i8** %argv) ssp { +; CHECK: func +; CHECK: str r0 +; CHECK: movs r2 +; CHECK: mov r1 +; CHECK: mov r3 +; CHECK: muldi3 +%1 = alloca i32, align 4 +%2 = alloca i32, align 4 +%3 = alloca i8**, align 4 +%m_degree = alloca i32, align 4 +store i32 0, i32* %1 +store i32 %argc, i32* %2, align 4 +store i8** %argv, i8*** %3, align 4 +store i32 10, i32* %m_degree, align 4 +%4 = load i32* %m_degree, align 4 +%5 = call %umul.ty @llvm.umul.with.overflow.i32(i32 %4, i32 8) +%6 = extractvalue %umul.ty %5, 1 +%7 = extractvalue %umul.ty %5, 0 +%8 = select i1 %6, i32 -1, i32 %7 +%9 = call noalias i8* @_Znam(i32 %8) +%10 = bitcast i8* %9 to double* +ret i32 0 +} + +declare noalias i8* @_Znam(i32)