diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index 089e61f0ff7..e76362d36f7 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -203,12 +203,15 @@ def int_siglongjmp : Intrinsic<[llvm_void_ty, llvm_ptr_ty, llvm_i32_ty]>; // None of these intrinsics accesses memory at all. let Properties = [IntrNoMem] in { - def int_part_select : - Intrinsic<[llvm_int_ty, llvm_int_ty, llvm_i32_ty, llvm_i32_ty]>; def int_bswap: Intrinsic<[llvm_int_ty, llvm_int_ty]>; def int_ctpop: Intrinsic<[llvm_i32_ty, llvm_int_ty]>; def int_ctlz : Intrinsic<[llvm_i32_ty, llvm_int_ty]>; def int_cttz : Intrinsic<[llvm_i32_ty, llvm_int_ty]>; + def int_part_select : + Intrinsic<[llvm_int_ty, llvm_int_ty, llvm_i32_ty, llvm_i32_ty]>; + def int_part_set : + Intrinsic<[llvm_int_ty, llvm_int_ty, llvm_int_ty, llvm_i32_ty, + llvm_i32_ty]>; } //===------------------------ Debugger Intrinsics -------------------------===// diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 2c64b20a32f..907a5c0ea05 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -236,14 +236,17 @@ static Value *LowerCTLZ(Value *V, Instruction *IP) { return LowerCTPOP(V, IP); } -/// Convert the llvm.bit.part_select.iX.iY.iZ intrinsic. This intrinsic takes -/// three integer operands of arbitrary bit width. The first operand is the -/// value from which to select the bits. The second and third operands define a -/// range of bits to select. The result is the bits selected and has a -/// corresponding width of Left-Right (second operand - third operand). -/// @see IEEE 1666-2005, System C, Section 7.2.6, pg 175. -/// @brief Lowering of llvm.bit.part_select intrinsic. -static Instruction *LowerBitPartSelect(CallInst *CI) { +/// Convert the llvm.part.select.iX.iY intrinsic. This intrinsic takes +/// three integer arguments. The first argument is the Value from which the +/// bits will be selected. It may be of any bit width. The second and third +/// arguments specify a range of bits to select with the second argument +/// specifying the low bit and the third argument specifying the high bit. Both +/// must be type i32. The result is the corresponding selected bits from the +/// Value in the same width as the Value (first argument). If the low bit index +/// is higher than the high bit index then the inverse selection is done and +/// the bits are returned in inverse order. +/// @brief Lowering of llvm.part.select intrinsic. +static Instruction *LowerPartSelect(CallInst *CI) { // Make sure we're dealing with a part select intrinsic here Function *F = CI->getCalledFunction(); const FunctionType *FT = F->getFunctionType(); @@ -268,8 +271,8 @@ static Instruction *LowerBitPartSelect(CallInst *CI) { // Get the arguments to the function Value* Val = F->getOperand(0); - Value* Left = F->getOperand(1); - Value* Right = F->getOperand(2); + Value* Right = F->getOperand(1); + Value* Left = F->getOperand(2); // We want to select a range of bits here such that [Left, Right] is shifted // down to the low bits. However, it is quite possible that Left is smaller @@ -406,6 +409,181 @@ static Instruction *LowerBitPartSelect(CallInst *CI) { return new CallInst(F, Args, 3, CI->getName(), CI); } +/// Convert the llvm.part.set.iX.iY.iZ intrinsic. This intrinsic takes +/// four integer arguments (iAny %Value, iAny %Replacement, i32 %Low, i32 %High) +/// The first two arguments can be any bit width. The result is the same width +/// as %Value. The operation replaces bits between %Low and %High with the value +/// in %Replacement. If %Replacement is not the same width, it is truncated or +/// zero extended as appropriate to fit the bits being replaced. If %Low is +/// greater than %High then the inverse set of bits are replaced. +/// @brief Lowering of llvm.bit.part.set intrinsic. +static Instruction *LowerPartSet(CallInst *CI) { + // Make sure we're dealing with a part select intrinsic here + Function *F = CI->getCalledFunction(); + const FunctionType *FT = F->getFunctionType(); + if (!F->isDeclaration() || !FT->getReturnType()->isInteger() || + FT->getNumParams() != 4 || !FT->getParamType(0)->isInteger() || + !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger() || + !FT->getParamType(3)->isInteger()) + return CI; + + // Get the intrinsic implementation function by converting all the . to _ + // in the intrinsic's function name and then reconstructing the function + // declaration. + std::string Name(F->getName()); + for (unsigned i = 4; i < Name.length(); ++i) + if (Name[i] == '.') + Name[i] = '_'; + Module* M = F->getParent(); + F = cast(M->getOrInsertFunction(Name, FT)); + F->setLinkage(GlobalValue::InternalLinkage); + + // If we haven't defined the impl function yet, do so now + if (F->isDeclaration()) { + // Note: the following code is based on code generated by llvm2cpp with + // the following input. This is just *one* example of a generated function. + // The functions vary by bit width of result and first two arguments. + // The generated code has been changed to deal with any bit width not just + // the 32/64 bitwidths used in the above sample. + // + // define i64 @part_set(i64 %Val, i32 %Rep, i32 %Lo, i32 %Hi) { + // entry: + // %is_forward = icmp ult i32 %Lo, %Hi + // %Lo.pn = select i1 %is_forward, i32 %Hi, i32 %Lo + // %Hi.pn = select i1 %is_forward, i32 %Lo, i32 %Hi + // %iftmp.16.0 = sub i32 %Lo.pn, %Hi.pn + // icmp ult i32 %iftmp.16.0, 32 + // br i1 %1, label %cond_true11, label %cond_next19 + // cond_true11: + // %tmp13 = sub i32 32, %iftmp.16.0 + // %tmp14 = lshr i32 -1, %tmp13 + // %tmp16 = and i32 %tmp14, %Rep + // br label %cond_next19 + // cond_next19: + // %iftmp.17.0 = phi i32 [ %tmp16, %cond_true11 ], [ %Rep, %entry ] + // %tmp2021 = zext i32 %iftmp.17.0 to i64 + // icmp ugt i32 %Lo, %Hi + // br i1 %2, label %cond_next60, label %cond_true24 + // cond_true24: + // %tmp25.cast = zext i32 %Hi to i64 + // %tmp26 = lshr i64 -1, %tmp25.cast + // %tmp27.cast = zext i32 %Lo to i64 + // %tmp28 = shl i64 %tmp26, %tmp27.cast + // %tmp28not = xor i64 %tmp28, -1 + // %tmp31 = shl i64 %tmp2021, %tmp27.cast + // %tmp34 = and i64 %tmp28not, %Val + // %Val_addr.064 = or i64 %tmp31, %tmp34 + // ret i64 %Val_addr.064 + // cond_next60: + // %tmp39.cast = zext i32 %Lo to i64 + // %tmp40 = shl i64 -1, %tmp39.cast + // %tmp41.cast = zext i32 %Hi to i64 + // %tmp42 = shl i64 -1, %tmp41.cast + // %tmp45.demorgan = or i64 %tmp42, %tmp40 + // %tmp45 = xor i64 %tmp45.demorgan, -1 + // %tmp47 = and i64 %tmp45, %Val + // %tmp50 = shl i64 %tmp2021, %tmp39.cast + // %tmp52 = sub i32 32, %Hi + // %tmp52.cast = zext i32 %tmp52 to i64 + // %tmp54 = lshr i64 %tmp2021, %tmp52.cast + // %tmp57 = or i64 %tmp50, %tmp47 + // %Val_addr.0 = or i64 %tmp57, %tmp54 + // ret i64 %Val_addr.0 + // } + + // Get the arguments for the function. + Function::arg_iterator args = F->arg_begin(); + Value* Val = args++; Val->setName("Val"); + Value* Rep = args++; Rep->setName("Rep"); + Value* Lo = args++; Lo->setName("Lo"); + Value* Hi = args++; Hi->setName("Hi"); + + // Get some types we need + const IntegerType* ValTy = cast(Val->getType()); + const IntegerType* RepTy = cast(Rep->getType()); + uint32_t ValBits = ValTy->getBitWidth(); + uint32_t RepBits = RepTy->getBitWidth(); + + // Constant Definitions + ConstantInt* RepBitWidth = ConstantInt::get(Type::Int32Ty, RepBits); + ConstantInt* RepMask = ConstantInt::getAllOnesValue(RepTy); + ConstantInt* ValMask = ConstantInt::getAllOnesValue(ValTy); + + BasicBlock* entry = new BasicBlock("entry",F,0); + BasicBlock* large = new BasicBlock("large",F,0); + BasicBlock* small = new BasicBlock("small",F,0); + BasicBlock* forward = new BasicBlock("cond_true24",F,0); + BasicBlock* reverse = new BasicBlock("cond_next60",F,0); + + // Block entry (entry) + // First, convert Lo and Hi to ValTy bit width + if (ValBits > 32) { + Hi = new ZExtInst(Hi, ValTy, "", entry); + Lo = new ZExtInst(Lo, ValTy, "", entry); + } else if (ValBits < 32) { + Hi = new TruncInst(Hi, ValTy, "", entry); + Lo = new TruncInst(Lo, ValTy, "", entry); + } + ICmpInst* is_forward = + new ICmpInst(ICmpInst::ICMP_ULT, Lo, Hi, "", entry); + SelectInst* Lo_pn = new SelectInst(is_forward, Hi, Lo, "", entry); + SelectInst* Hi_pn = new SelectInst(is_forward, Lo, Hi, "", entry); + BinaryOperator* NumBits = BinaryOperator::createSub(Lo_pn, Hi_pn, "",entry); + ICmpInst* is_large = + new ICmpInst(ICmpInst::ICMP_ULT, NumBits, RepBitWidth, "", entry); + new BranchInst(large, small, is_large, entry); + + // Block "large" + BinaryOperator* MaskBits = + BinaryOperator::createSub(RepBitWidth, NumBits, "", large); + BinaryOperator* Mask1 = + BinaryOperator::createLShr(RepMask, MaskBits, "", large); + BinaryOperator* Rep2 = BinaryOperator::createAnd(Mask1, Rep, "", large); + new BranchInst(small, large); + + // Block "small" + PHINode* Rep3 = new PHINode(RepTy, "", small); + Rep3->reserveOperandSpace(2); + Rep3->addIncoming(Rep2, small); + Rep3->addIncoming(Rep, entry); + CastInst* Rep4 = new ZExtInst(Rep3, ValTy, "", small); + ICmpInst* is_reverse = + new ICmpInst(ICmpInst::ICMP_UGT, Lo, Hi, "", small); + new BranchInst(reverse, forward, is_reverse, small); + + // Block "forward" + Value* t1 = BinaryOperator::createLShr(ValMask, Hi, "", forward); + Value* t2 = BinaryOperator::createShl(t1, Lo, "", forward); + Value* nott2 = BinaryOperator::createXor(t2, ValMask, "", forward); + Value* t3 = BinaryOperator::createShl(Rep4, Lo, "", forward); + Value* t4 = BinaryOperator::createAnd(nott2, Val, "", forward); + Value* FRslt = BinaryOperator::createOr(t3, t4, "", forward); + new ReturnInst(FRslt, forward); + + // Block "reverse" + Value* t5 = BinaryOperator::createShl(ValMask, Lo, "", reverse); + Value* t6 = BinaryOperator::createShl(ValMask, Hi, "", reverse); + Value* t7 = BinaryOperator::createOr(t6, t5, "", reverse); + Value* t8 = BinaryOperator::createXor(t7, ValMask, "", reverse); + Value* t9 = BinaryOperator::createAnd(t8, Val, "", reverse); + Value* t10 = BinaryOperator::createShl(Rep4, Lo, "", reverse); + Value* t11 = BinaryOperator::createSub(RepBitWidth, Hi, "", reverse); + Value* t12 = new ZExtInst(t11, ValTy, "", reverse); + Value* t13 = BinaryOperator::createLShr(Rep4, t12, "",reverse); + Value* t14 = BinaryOperator::createOr(t10, t9, "", reverse); + Value* RRslt = BinaryOperator::createOr(t14, t13, "", reverse); + new ReturnInst(RRslt, reverse); + } + + // Return a call to the implementation function + Value *Args[3]; + Args[0] = CI->getOperand(0); + Args[1] = CI->getOperand(1); + Args[2] = CI->getOperand(2); + Args[3] = CI->getOperand(3); + return new CallInst(F, Args, 4, CI->getName(), CI); +} + void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { Function *Callee = CI->getCalledFunction(); @@ -476,7 +654,11 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { } case Intrinsic::part_select: - CI->replaceAllUsesWith(LowerBitPartSelect(CI)); + CI->replaceAllUsesWith(LowerPartSelect(CI)); + break; + + case Intrinsic::part_set: + CI->replaceAllUsesWith(LowerPartSet(CI)); break; case Intrinsic::stacksave: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 9683d486eca..77374f63c84 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2691,7 +2691,12 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { } case Intrinsic::part_select: { // Currently not implemented: just abort - assert(0 && "bit_part_select intrinsic not implemented"); + assert(0 && "part_select intrinsic not implemented"); + abort(); + } + case Intrinsic::part_set: { + // Currently not implemented: just abort + assert(0 && "part_set intrinsic not implemented"); abort(); } case Intrinsic::bswap: diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 89a08b0770d..d09f157b7b1 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -1028,13 +1028,14 @@ void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F, ...) { if (GotBits < 16 || GotBits % 16 != 0) CheckFailed("Intrinsic requires even byte width argument", F); /* FALL THROUGH */ + case Intrinsic::part_set: case Intrinsic::part_select: if (ArgNo == 1) { unsigned ResultBits = cast(FTy->getReturnType())->getBitWidth(); if (GotBits != ResultBits) - CheckFailed("Intrinsic requires parameter and result bit " - "widths to match", F); + CheckFailed("Intrinsic requires the bit widths of the first " + "parameter and the result to match", F); } break; }