mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-25 10:27:04 +00:00 
			
		
		
		
	Teach the inline cost analysis about calls that can be simplified and
how to propagate constants through insert and extract value
instructions.
With the recent improvements to instsimplify, this allows inline cost
analysis to constant fold through intrinsic functions, including notably
the with.overflow intrinsic math routines which often show up inside of
STL abstractions. This is yet another piece in the puzzle of breaking
down the code for:
  void f() {
    std::vector<int> v;
    v.push_back(1);
  }
But it still isn't enough. There are a pile of bugs in inline cost still
blocking this.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171195 91177308-0d34-0410-b5e6-96231b3b80d8
			
			
This commit is contained in:
		| @@ -96,6 +96,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { | ||||
|                            int InstructionCost); | ||||
|   bool isGEPOffsetConstant(GetElementPtrInst &GEP); | ||||
|   bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); | ||||
|   bool simplifyCallSite(Function *F, CallSite CS); | ||||
|   ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); | ||||
|  | ||||
|   // Custom analysis routines. | ||||
| @@ -124,6 +125,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { | ||||
|   bool visitBinaryOperator(BinaryOperator &I); | ||||
|   bool visitLoad(LoadInst &I); | ||||
|   bool visitStore(StoreInst &I); | ||||
|   bool visitExtractValue(ExtractValueInst &I); | ||||
|   bool visitInsertValue(InsertValueInst &I); | ||||
|   bool visitCallSite(CallSite CS); | ||||
|  | ||||
| public: | ||||
| @@ -610,6 +613,73 @@ bool CallAnalyzer::visitStore(StoreInst &I) { | ||||
|   return false; | ||||
| } | ||||
|  | ||||
| bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { | ||||
|   // Constant folding for extract value is trivial. | ||||
|   Constant *C = dyn_cast<Constant>(I.getAggregateOperand()); | ||||
|   if (!C) | ||||
|     C = SimplifiedValues.lookup(I.getAggregateOperand()); | ||||
|   if (C) { | ||||
|     SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices()); | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   // SROA can look through these but give them a cost. | ||||
|   return false; | ||||
| } | ||||
|  | ||||
| bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { | ||||
|   // Constant folding for insert value is trivial. | ||||
|   Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand()); | ||||
|   if (!AggC) | ||||
|     AggC = SimplifiedValues.lookup(I.getAggregateOperand()); | ||||
|   Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand()); | ||||
|   if (!InsertedC) | ||||
|     InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand()); | ||||
|   if (AggC && InsertedC) { | ||||
|     SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC, | ||||
|                                                         I.getIndices()); | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   // SROA can look through these but give them a cost. | ||||
|   return false; | ||||
| } | ||||
|  | ||||
| /// \brief Try to simplify a call site. | ||||
| /// | ||||
| /// Takes a concrete function and callsite and tries to actually simplify it by | ||||
| /// analyzing the arguments and call itself with instsimplify. Returns true if | ||||
| /// it has simplified the callsite to some other entity (a constant), making it | ||||
| /// free. | ||||
| bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { | ||||
|   // FIXME: Using the instsimplify logic directly for this is inefficient | ||||
|   // because we have to continually rebuild the argument list even when no | ||||
|   // simplifications can be performed. Until that is fixed with remapping | ||||
|   // inside of instsimplify, directly constant fold calls here. | ||||
|   if (!canConstantFoldCallTo(F)) | ||||
|     return false; | ||||
|  | ||||
|   // Try to re-map the arguments to constants. | ||||
|   SmallVector<Constant *, 4> ConstantArgs; | ||||
|   ConstantArgs.reserve(CS.arg_size()); | ||||
|   for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); | ||||
|        I != E; ++I) { | ||||
|     Constant *C = dyn_cast<Constant>(*I); | ||||
|     if (!C) | ||||
|       C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I)); | ||||
|     if (!C) | ||||
|       return false; // This argument doesn't map to a constant. | ||||
|  | ||||
|     ConstantArgs.push_back(C); | ||||
|   } | ||||
|   if (Constant *C = ConstantFoldCall(F, ConstantArgs)) { | ||||
|     SimplifiedValues[CS.getInstruction()] = C; | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   return false; | ||||
| } | ||||
|  | ||||
| bool CallAnalyzer::visitCallSite(CallSite CS) { | ||||
|   if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() && | ||||
|       !F.getFnAttributes().hasAttribute(Attribute::ReturnsTwice)) { | ||||
| @@ -621,20 +691,26 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { | ||||
|       cast<CallInst>(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate)) | ||||
|     ContainsNoDuplicateCall = true; | ||||
|  | ||||
|   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { | ||||
|     switch (II->getIntrinsicID()) { | ||||
|     default: | ||||
|       return Base::visitCallSite(CS); | ||||
|  | ||||
|     case Intrinsic::memset: | ||||
|     case Intrinsic::memcpy: | ||||
|     case Intrinsic::memmove: | ||||
|       // SROA can usually chew through these intrinsics, but they aren't free. | ||||
|       return false; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   if (Function *F = CS.getCalledFunction()) { | ||||
|     // When we have a concrete function, first try to simplify it directly. | ||||
|     if (simplifyCallSite(F, CS)) | ||||
|       return true; | ||||
|  | ||||
|     // Next check if it is an intrinsic we know about. | ||||
|     // FIXME: Lift this into part of the InstVisitor. | ||||
|     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { | ||||
|       switch (II->getIntrinsicID()) { | ||||
|       default: | ||||
|         return Base::visitCallSite(CS); | ||||
|  | ||||
|       case Intrinsic::memset: | ||||
|       case Intrinsic::memcpy: | ||||
|       case Intrinsic::memmove: | ||||
|         // SROA can usually chew through these intrinsics, but they aren't free. | ||||
|         return false; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     if (F == CS.getInstruction()->getParent()->getParent()) { | ||||
|       // This flag will fully abort the analysis, so don't bother with anything | ||||
|       // else. | ||||
|   | ||||
| @@ -111,6 +111,44 @@ bb.false: | ||||
|   ret i32 %sub | ||||
| } | ||||
|  | ||||
| declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b) | ||||
|  | ||||
| define i8 @caller4(i8 %z) { | ||||
| ; Check that we can constant fold through intrinsics such as the | ||||
| ; overflow-detecting arithmetic instrinsics. These are particularly important | ||||
| ; as they are used heavily in standard library code and generic C++ code where | ||||
| ; the arguments are oftent constant but complete generality is required. | ||||
| ; | ||||
| ; CHECK: @caller4 | ||||
| ; CHECK-NOT: call | ||||
| ; CHECK: ret i8 -1 | ||||
|  | ||||
| entry: | ||||
|   %x = call i8 @callee4(i8 254, i8 14, i8 %z) | ||||
|   ret i8 %x | ||||
| } | ||||
|  | ||||
| define i8 @callee4(i8 %x, i8 %y, i8 %z) { | ||||
|   %uadd = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %x, i8 %y) | ||||
|   %o = extractvalue {i8, i1} %uadd, 1 | ||||
|   br i1 %o, label %bb.true, label %bb.false | ||||
|  | ||||
| bb.true: | ||||
|   ret i8 -1 | ||||
|  | ||||
| bb.false: | ||||
|   ; This block musn't be counted in the inline cost. | ||||
|   %z1 = add i8 %z, 1 | ||||
|   %z2 = add i8 %z1, 1 | ||||
|   %z3 = add i8 %z2, 1 | ||||
|   %z4 = add i8 %z3, 1 | ||||
|   %z5 = add i8 %z4, 1 | ||||
|   %z6 = add i8 %z5, 1 | ||||
|   %z7 = add i8 %z6, 1 | ||||
|   %z8 = add i8 %z7, 1 | ||||
|   ret i8 %z8 | ||||
| } | ||||
|  | ||||
|  | ||||
| define i32 @PR13412.main() { | ||||
| ; This is a somewhat complicated three layer subprogram that was reported to | ||||
|   | ||||
		Reference in New Issue
	
	Block a user