diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 51fbbf3b655..5dfd4472bbd 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -6053,6 +6053,14 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // form "i != 47 & i != 87". Same state transitions as for true elements. int FirstFalseElement = Undefined, SecondFalseElement = Undefined; + /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these + /// define a state machine that triggers for ranges of values that the index + /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'. + /// This is -2 when undefined, -3 when overdefined, and otherwise the last + /// index in the range (inclusive). We use -2 for undefined here because we + /// use relative comparisons and don't want 0-1 to match -1. + int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined; + // MagicBitvector - This is a magic bitvector where we set a bit if the // comparison is true for element 'i'. If there are 64 elements or less in // the array, this will fully represent all the comparison results. @@ -6067,7 +6075,15 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, Init->getOperand(i), CompareRHS, TD); // If the result is undef for this element, ignore it. - if (isa(C)) continue; + if (isa(C)) { + // Extend range state machines to cover this element in case there is an + // undef in the middle of the range. + if (TrueRangeEnd == (int)i-1) + TrueRangeEnd = i; + if (FalseRangeEnd == (int)i-1) + FalseRangeEnd = i; + continue; + } // If we can't compute the result for any of the elements, we have to give // up evaluating the entire conditional. @@ -6077,32 +6093,54 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // update our state machines. bool IsTrueForElt = !cast(C)->isZero(); - // State machine for single index comparison. + // State machine for single/double/range index comparison. if (IsTrueForElt) { // Update the TrueElement state machine. if (FirstTrueElement == Undefined) - FirstTrueElement = i; - else if (SecondTrueElement == Undefined) - SecondTrueElement = i; - else - SecondTrueElement = Overdefined; + FirstTrueElement = TrueRangeEnd = i; // First true element. + else { + // Update double-compare state machine. + if (SecondTrueElement == Undefined) + SecondTrueElement = i; + else + SecondTrueElement = Overdefined; + + // Update range state machine. + if (TrueRangeEnd == (int)i-1) + TrueRangeEnd = i; + else + TrueRangeEnd = Overdefined; + } } else { // Update the FalseElement state machine. if (FirstFalseElement == Undefined) - FirstFalseElement = i; - else if (SecondFalseElement == Undefined) - SecondFalseElement = i; - else - SecondFalseElement = Overdefined; + FirstFalseElement = FalseRangeEnd = i; // First false element. + else { + // Update double-compare state machine. + if (SecondFalseElement == Undefined) + SecondFalseElement = i; + else + SecondFalseElement = Overdefined; + + // Update range state machine. + if (FalseRangeEnd == (int)i-1) + FalseRangeEnd = i; + else + FalseRangeEnd = Overdefined; + } } + // If this element is in range, update our magic bitvector. if (i < 64 && IsTrueForElt) MagicBitvector |= 1ULL << i; - // If all of our states become overdefined, bail out early. - if (i >= 64 && SecondTrueElement == Overdefined && - SecondFalseElement == Overdefined) + // If all of our states become overdefined, bail out early. Since the + // predicate is expensive, only check it every 8 elements. This is only + // really useful for really huge arrays. + if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined && + SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined && + FalseRangeEnd == Overdefined) return 0; } @@ -6110,6 +6148,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // order the state machines in complexity of the generated code. Value *Idx = GEP->getOperand(2); + // If the comparison is only true for one or two elements, emit direct // comparisons. if (SecondTrueElement != Overdefined) { @@ -6150,6 +6189,37 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, return BinaryOperator::CreateAnd(C1, C2); } + // If the comparison can be replaced with a range comparison for the elements + // where it is true, emit the range check. + if (TrueRangeEnd != Overdefined) { + assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare"); + + // Generate (i-FirstTrue) getType(), -FirstTrueElement); + Idx = Builder->CreateAdd(Idx, Offs); + } + + Value *End = ConstantInt::get(Idx->getType(), + TrueRangeEnd-FirstTrueElement+1); + return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); + } + + // False range check. + if (FalseRangeEnd != Overdefined) { + assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare"); + // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse). + if (FirstFalseElement) { + Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); + Idx = Builder->CreateAdd(Idx, Offs); + } + + Value *End = ConstantInt::get(Idx->getType(), + FalseRangeEnd-FirstFalseElement); + return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); + } + + // If a 32-bit or 64-bit magic bitvector captures the entire comparison state // of this load, replace it with computation that does: // ((magic_cst >> i) & 1) != 0 @@ -6166,14 +6236,8 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); } - // TODO: Range check - // TODO: GEP 0, i, 4 // TODO: A[i]&4 == 0 - - //errs() << "XFORM: " << *GV << "\n"; - //errs() << "\t" << *GEP << "\n"; - //errs() << "\t " << ICI << "\n\n\n\n"; - + // TODO: GEP 0, i, 4 return 0; } diff --git a/test/Transforms/InstCombine/load-cmp.ll b/test/Transforms/InstCombine/load-cmp.ll index fabafed93f2..69e13cb1d06 100644 --- a/test/Transforms/InstCombine/load-cmp.ll +++ b/test/Transforms/InstCombine/load-cmp.ll @@ -2,7 +2,8 @@ @G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85, i16 73, i16 82, i16 69, i16 68, i16 0] -@GD = internal constant [3 x double] [double 1.0, double 4.0, double -20.0] +@GD = internal constant [6 x double] + [double -10.0, double 1.0, double 4.0, double 2.0, double -20.0, double -40.0] define i1 @test1(i32 %X) { %P = getelementptr [10 x i16]* @G16, i32 0, i32 %X @@ -25,12 +26,12 @@ define i1 @test2(i32 %X) { } define i1 @test3(i32 %X) { - %P = getelementptr [3 x double]* @GD, i32 0, i32 %X + %P = getelementptr [6 x double]* @GD, i32 0, i32 %X %Q = load double* %P %R = fcmp oeq double %Q, 1.0 ret i1 %R ; CHECK: @test3 -; CHECK-NEXT: %R = icmp eq i32 %X, 0 +; CHECK-NEXT: %R = icmp eq i32 %X, 1 ; CHECK-NEXT: ret i1 %R } @@ -57,3 +58,25 @@ define i1 @test5(i32 %X) { ; CHECK-NEXT: %R = or i1 ; CHECK-NEXT: ret i1 %R } + +define i1 @test6(i32 %X) { + %P = getelementptr [6 x double]* @GD, i32 0, i32 %X + %Q = load double* %P + %R = fcmp ogt double %Q, 0.0 + ret i1 %R +; CHECK: @test6 +; CHECK-NEXT: add i32 %X, -1 +; CHECK-NEXT: %R = icmp ult i32 {{.*}}, 3 +; CHECK-NEXT: ret i1 %R +} + +define i1 @test7(i32 %X) { + %P = getelementptr [6 x double]* @GD, i32 0, i32 %X + %Q = load double* %P + %R = fcmp olt double %Q, 0.0 + ret i1 %R +; CHECK: @test7 +; CHECK-NEXT: add i32 %X, -1 +; CHECK-NEXT: %R = icmp ugt i32 {{.*}}, 2 +; CHECK-NEXT: ret i1 %R +}