diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index d6cb261d42b..278292f4ccb 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -60,6 +60,7 @@ SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), STATISTIC(NumSpeculations, "Number of speculative executed instructions"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); +STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); namespace { @@ -3252,12 +3253,19 @@ namespace { uint64_t TableSize, ConstantInt *Offset, const SmallVector, 4>& Values, - Constant *DefaultValue); + Constant *DefaultValue, + const TargetData *TD); /// BuildLookup - Build instructions with Builder to retrieve the value at /// the position given by Index in the lookup table. Value *BuildLookup(Value *Index, IRBuilder<> &Builder); + /// WouldFitInRegister - Return true if a table with TableSize elements of + /// type ElementType would fit in a target-legal register. + static bool WouldFitInRegister(const TargetData *TD, + uint64_t TableSize, + const Type *ElementType); + private: // Depending on the contents of the table, it can be represented in // different ways. @@ -3266,6 +3274,11 @@ namespace { // store that single value and return it for each lookup. SingleValueKind, + // For small tables with integer elements, we can pack them into a bitmap + // that fits into a target-legal register. Values are retrieved by + // shift and mask operations. + BitMapKind, + // The table is stored as an array of values. Values are retrieved by load // instructions from the table. ArrayKind @@ -3274,6 +3287,10 @@ namespace { // For SingleValueKind, this is the single value. Constant *SingleValue; + // For BitMapKind, this is the bitmap. + ConstantInt *BitMap; + IntegerType *BitMapElementTy; + // For ArrayKind, this is the array. GlobalVariable *Array; }; @@ -3283,7 +3300,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M, uint64_t TableSize, ConstantInt *Offset, const SmallVector, 4>& Values, - Constant *DefaultValue) { + Constant *DefaultValue, + const TargetData *TD) { assert(Values.size() && "Can't build lookup table without values."); assert(TableSize >= Values.size() && "Can't fit values in table."); @@ -3323,6 +3341,21 @@ SwitchLookupTable::SwitchLookupTable(Module &M, return; } + // If the type is integer and the table fits in a register, build a bitmap. + if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) { + IntegerType *IT = cast(DefaultValue->getType()); + APInt TableInt(TableSize * IT->getBitWidth(), 0); + for (uint64_t I = TableSize; I > 0; --I) { + TableInt <<= IT->getBitWidth(); + TableInt |= cast(TableContents[I - 1])->getZExtValue(); + } + BitMap = ConstantInt::get(M.getContext(), TableInt); + BitMapElementTy = IT; + Kind = BitMapKind; + ++NumBitMaps; + return; + } + // Store the table in an array. ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize); Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); @@ -3339,6 +3372,32 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { switch (Kind) { case SingleValueKind: return SingleValue; + case BitMapKind: { + // Type of the bitmap (e.g. i59). + IntegerType *MapTy = BitMap->getType(); + + // Cast Index to the same type as the bitmap. + // Note: The Index is <= the number of elements in the table, so + // truncating it to the width of the bitmask is safe. + Value *ShiftAmt = Index; + IntegerType *IndexTy = cast(Index->getType()); + if (IndexTy->getBitWidth() < MapTy->getBitWidth()) + ShiftAmt = Builder.CreateZExt(ShiftAmt, MapTy, "switch.zext"); + else if (IndexTy->getBitWidth() > MapTy->getBitWidth()) + ShiftAmt = Builder.CreateTrunc(ShiftAmt, MapTy, "switch.trunc"); + + // Multiply the shift amount by the element width. + ShiftAmt = Builder.CreateMul(ShiftAmt, + ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()), + "switch.shiftamt"); + + // Shift down. + Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt, + "switch.downshift"); + // Mask off. + return Builder.CreateTrunc(DownShifted, BitMapElementTy, + "switch.masked"); + } case ArrayKind: { Value *GEPIndices[] = { Builder.getInt32(0), Index }; Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices, @@ -3349,35 +3408,53 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { llvm_unreachable("Unknown lookup table kind!"); } +bool SwitchLookupTable::WouldFitInRegister(const TargetData *TD, + uint64_t TableSize, + const Type *ElementType) { + if (!TD) + return false; + const IntegerType *IT = dyn_cast(ElementType); + if (!IT) + return false; + // FIXME: If the type is wider than it needs to be, e.g. i8 but all values + // are <= 15, we could try to narrow the type. + return TD->fitsInLegalInteger(TableSize * IT->getBitWidth()); +} + /// ShouldBuildLookupTable - Determine whether a lookup table should be built /// for this switch, based on the number of caes, size of the table and the /// types of the results. static bool ShouldBuildLookupTable(SwitchInst *SI, - uint64_t TableSize) { + uint64_t TableSize, + const TargetData *TD, + const SmallDenseMap& ResultTypes) { // The table density should be at least 40%. This is the same criterion as for // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. // FIXME: Find the best cut-off. if (SI->getNumCases() * 10 >= TableSize * 4) return true; - return false; + // If each table would fit in a register, we should build it anyway. + for (SmallDenseMap::const_iterator I = ResultTypes.begin(), + E = ResultTypes.end(); I != E; ++I) { + if (!SwitchLookupTable::WouldFitInRegister(TD, TableSize, I->second)) + return false; + } + return true; } /// SwitchToLookupTable - If the switch is only used to initialize one or more /// phi nodes in a common successor block with different constant values, /// replace the switch with lookup tables. static bool SwitchToLookupTable(SwitchInst *SI, - IRBuilder<> &Builder) { + IRBuilder<> &Builder, + const TargetData* TD) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); // FIXME: Handle unreachable cases. // FIXME: If the switch is too sparse for a lookup table, perhaps we could // split off a dense part and build a lookup table for that. - // FIXME: If the results are all integers and the lookup table would fit in a - // target-legal register, we should store them as a bitmap and use shift/mask - // to look up the result. - // FIXME: This creates arrays of GEPs to constant strings, which means each // GEP needs a runtime relocation in PIC code. We should just build one big // string and lookup indices into that. @@ -3441,7 +3518,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1)) return false; uint64_t TableSize = RangeSpread.getLimitedValue() + 1; - if (!ShouldBuildLookupTable(SI, TableSize)) + if (!ShouldBuildLookupTable(SI, TableSize, TD, ResultTypes)) return false; // Create the BB that does the lookups. @@ -3467,7 +3544,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, PHINode *PHI = PHIs[I]; SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI], - DefaultResults[PHI]); + DefaultResults[PHI], TD); Value *Result = Table.BuildLookup(TableIndex, Builder); @@ -3537,7 +3614,7 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (ForwardSwitchConditionToPHI(SI)) return SimplifyCFG(BB) | true; - if (SwitchToLookupTable(SI, Builder)) + if (SwitchToLookupTable(SI, Builder, TD)) return SimplifyCFG(BB) | true; return false; diff --git a/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll index f0bd688050b..97c80b9d48a 100644 --- a/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll +++ b/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll @@ -6,17 +6,14 @@ target triple = "x86_64-unknown-linux-gnu" ; The table for @f ; CHECK: @switch.table = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1] -; The int table for @h -; CHECK: @switch.table1 = private unnamed_addr constant [4 x i8] c"*\09X\05" - ; The float table for @h -; CHECK: @switch.table2 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000] +; CHECK: @switch.table1 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000] ; The table for @foostring -; CHECK: @switch.table3 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)] +; CHECK: @switch.table2 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)] ; The table for @earlyreturncrash -; CHECK: @switch.table4 = private unnamed_addr constant [4 x i32] [i32 42, i32 9, i32 88, i32 5] +; CHECK: @switch.table3 = private unnamed_addr constant [4 x i32] [i32 42, i32 9, i32 88, i32 5] ; A simple int-to-int selection switch. ; It is dense enough to be replaced by table lookup. @@ -88,14 +85,15 @@ sw.epilog: ; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4 ; CHECK-NEXT: br i1 %0, label %switch.lookup, label %sw.epilog ; CHECK: switch.lookup: -; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8]* @switch.table1, i32 0, i32 %switch.tableidx -; CHECK-NEXT: %switch.load = load i8* %switch.gep -; CHECK-NEXT: %switch.gep1 = getelementptr inbounds [4 x float]* @switch.table2, i32 0, i32 %switch.tableidx -; CHECK-NEXT: %switch.load2 = load float* %switch.gep1 +; CHECK-NEXT: %switch.shiftamt = mul i32 %switch.tableidx, 8 +; CHECK-NEXT: %switch.downshift = lshr i32 89655594, %switch.shiftamt +; CHECK-NEXT: %switch.masked = trunc i32 %switch.downshift to i8 +; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x float]* @switch.table1, i32 0, i32 %switch.tableidx +; CHECK-NEXT: %switch.load = load float* %switch.gep ; CHECK-NEXT: br label %sw.epilog ; CHECK: sw.epilog: -; CHECK-NEXT: %a.0 = phi i8 [ %switch.load, %switch.lookup ], [ 7, %entry ] -; CHECK-NEXT: %b.0 = phi float [ %switch.load2, %switch.lookup ], [ 0x4023FAE140000000, %entry ] +; CHECK-NEXT: %a.0 = phi i8 [ %switch.masked, %switch.lookup ], [ 7, %entry ] +; CHECK-NEXT: %b.0 = phi float [ %switch.load, %switch.lookup ], [ 0x4023FAE140000000, %entry ] ; CHECK-NEXT: call void @dummy(i8 signext %a.0, float %b.0) ; CHECK-NEXT: ret void } @@ -137,7 +135,7 @@ return: ; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4 ; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return ; CHECK: switch.lookup: -; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table3, i32 0, i32 %switch.tableidx +; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table2, i32 0, i32 %switch.tableidx ; CHECK-NEXT: %switch.load = load i8** %switch.gep ; CHECK-NEXT: ret i8* %switch.load } @@ -166,9 +164,70 @@ sw.epilog: ; CHECK: @earlyreturncrash ; CHECK: switch.lookup: -; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32]* @switch.table4, i32 0, i32 %switch.tableidx +; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32]* @switch.table3, i32 0, i32 %switch.tableidx ; CHECK-NEXT: %switch.load = load i32* %switch.gep ; CHECK-NEXT: ret i32 %switch.load ; CHECK: sw.epilog: ; CHECK-NEXT: ret i32 7 } + + +; Example 7 from http://blog.regehr.org/archives/320 +; It is not dense enough for a regular table, but the results +; can be packed into a bitmap. + +define i32 @crud(i8 zeroext %c) { +entry: + %cmp = icmp ult i8 %c, 33 + br i1 %cmp, label %lor.end, label %switch.early.test + +switch.early.test: + switch i8 %c, label %lor.rhs [ + i8 92, label %lor.end + i8 62, label %lor.end + i8 60, label %lor.end + i8 59, label %lor.end + i8 58, label %lor.end + i8 46, label %lor.end + i8 44, label %lor.end + i8 34, label %lor.end + i8 39, label %switch.edge + ] + +switch.edge: br label %lor.end +lor.rhs: br label %lor.end + +lor.end: + %0 = phi i1 [ true, %switch.early.test ], + [ false, %lor.rhs ], + [ true, %entry ], + [ true, %switch.early.test ], + [ true, %switch.early.test ], + [ true, %switch.early.test ], + [ true, %switch.early.test ], + [ true, %switch.early.test ], + [ true, %switch.early.test ], + [ true, %switch.early.test ], + [ true, %switch.edge ] + %lor.ext = zext i1 %0 to i32 + ret i32 %lor.ext + +; CHECK: @crud +; CHECK: entry: +; CHECK-NEXT: %cmp = icmp ult i8 %c, 33 +; CHECK-NEXT: br i1 %cmp, label %lor.end, label %switch.early.test +; CHECK: switch.early.test: +; CHECK-NEXT: %switch.tableidx = sub i8 %c, 34 +; CHECK-NEXT: %0 = icmp ult i8 %switch.tableidx, 59 +; CHECK-NEXT: br i1 %0, label %switch.lookup, label %lor.end +; CHECK: switch.lookup: +; CHECK-NEXT: %switch.zext = zext i8 %switch.tableidx to i59 +; CHECK-NEXT: %switch.shiftamt = mul i59 %switch.zext, 1 +; CHECK-NEXT: %switch.downshift = lshr i59 -288230375765830623, %switch.shiftamt +; CHECK-NEXT: %switch.masked = trunc i59 %switch.downshift to i1 +; CHECK-NEXT: br label %lor.end +; CHECK: lor.end: +; CHECK-NEXT: %1 = phi i1 [ true, %entry ], [ %switch.masked, %switch.lookup ], [ false, %switch.early.test ] +; CHECK-NEXT: %lor.ext = zext i1 %1 to i32 +; CHECK-NEXT: ret i32 %lor.ext +}