From 8c5ad3a5da7419af4d31963c3824836b5da52478 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Thu, 11 Feb 2010 06:26:33 +0000 Subject: [PATCH] Rename ValueRequiresCast to ShouldOptimizeCast, to better reflect what it does. Enhance it to return false to optimizing vector sign extensions from vector comparisions, which is the idiom used to get a splatted vector for a vector comparison. Doing this breaks vector-casts.ll, add some compensating transformations to handle the important case they cover without depending on this canonicalization. This fixes rdar://7434900 a serious pessimization of vector compares. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@95855 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombine.h | 11 +- .../InstCombine/InstCombineAndOrXor.cpp | 118 +++++++++++------- .../InstCombine/InstCombineCasts.cpp | 23 ++-- test/Transforms/InstCombine/vector-casts.ll | 16 +++ 4 files changed, 113 insertions(+), 55 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 536790004e8..09accb6b857 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -199,11 +199,12 @@ private: SmallVectorImpl &NewIndices); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); - /// ValueRequiresCast - Return true if the cast from "V to Ty" actually - /// results in any code being generated. It does not require codegen if V is - /// simple enough or if the cast can be folded into other casts. - bool ValueRequiresCast(Instruction::CastOps opcode,const Value *V, - const Type *Ty); + /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually + /// results in any code being generated and is interesting to optimize out. If + /// the cast can be eliminated by some other simple transformation, we prefer + /// to do the simplification first. + bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V, + const Type *Ty); Instruction *visitCallSite(CallSite CS); bool transformConstExprCastCall(CallSite CS); diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 28fd70ef69f..515753fae49 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -932,24 +932,49 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (ICmpInst *LHS = dyn_cast(Op0)) if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) return Res; - + + // If and'ing two fcmp, try combine them into one. + if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) + if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) + if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) + return Res; + + // fold (and (cast A), (cast B)) -> (cast (and A, B)) if (CastInst *Op0C = dyn_cast(Op0)) - if (CastInst *Op1C = dyn_cast(Op1)) - if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ? - const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && - SrcTy->isIntOrIntVector() && - // Only do this if the casts both really cause code to be generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType()) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType())) { - Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0), - Op1C->getOperand(0), I.getName()); + if (CastInst *Op1C = dyn_cast(Op1)) { + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ? + SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVector()) { + Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); + + // Only do this if the casts both really cause code to be generated. + if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && + ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { + Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } + + // If this is and(cast(icmp), cast(icmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (ICmpInst *RHS = dyn_cast(Op1COp)) + if (ICmpInst *LHS = dyn_cast(Op0COp)) + if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) { + InsertNewInstBefore(Res, I); + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + } + + // If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (FCmpInst *RHS = dyn_cast(Op1COp)) + if (FCmpInst *LHS = dyn_cast(Op0COp)) + if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) { + InsertNewInstBefore(Res, I); + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + } } + } // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts. if (BinaryOperator *SI1 = dyn_cast(Op1)) { @@ -965,13 +990,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } } - // If and'ing two fcmp, try combine them into one. - if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) { - if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) - if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) - return Res; - } - return Changed ? &I : 0; } @@ -1669,37 +1687,51 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) return Res; + // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) + if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) + if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) + if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) + return Res; + // fold (or (cast A), (cast B)) -> (cast (or A, B)) if (CastInst *Op0C = dyn_cast(Op0)) { if (CastInst *Op1C = dyn_cast(Op1)) if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? - if (!isa(Op0C->getOperand(0)) || - !isa(Op1C->getOperand(0))) { - const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && - SrcTy->isIntOrIntVector() && + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVector()) { + Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); + + if ((!isa(Op0COp) || !isa(Op1COp)) && // Only do this if the casts both really cause code to be // generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType()) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType())) { - Value *NewOp = Builder->CreateOr(Op0C->getOperand(0), - Op1C->getOperand(0), I.getName()); + ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && + ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { + Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } + + // If this is or(cast(icmp), cast(icmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (ICmpInst *RHS = dyn_cast(Op1COp)) + if (ICmpInst *LHS = dyn_cast(Op0COp)) + if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) { + InsertNewInstBefore(Res, I); + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + } + + // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (FCmpInst *RHS = dyn_cast(Op1COp)) + if (FCmpInst *LHS = dyn_cast(Op0COp)) + if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) { + InsertNewInstBefore(Res, I); + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + } } } } - - // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) - if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) { - if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) - if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) - return Res; - } - return Changed ? &I : 0; } @@ -1986,10 +2018,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { const Type *SrcTy = Op0C->getOperand(0)->getType(); if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && // Only do this if the casts both really cause code to be generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType()) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType())) { + ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0), + I.getType()) && + ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0), + I.getType())) { Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), Op1C->getOperand(0), I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 09cd21f1443..68e17e59f6a 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -255,17 +255,26 @@ isEliminableCastPair( return Instruction::CastOps(Res); } -/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results -/// in any code being generated. It does not require codegen if V is simple -/// enough or if the cast can be folded into other casts. -bool InstCombiner::ValueRequiresCast(Instruction::CastOps opcode,const Value *V, - const Type *Ty) { +/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually +/// results in any code being generated and is interesting to optimize out. If +/// the cast can be eliminated by some other simple transformation, we prefer +/// to do the simplification first. +bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V, + const Type *Ty) { + // Noop casts and casts of constants should be eliminated trivially. if (V->getType() == Ty || isa(V)) return false; - // If this is another cast that can be eliminated, it isn't codegen either. + // If this is another cast that can be eliminated, we prefer to have it + // eliminated. if (const CastInst *CI = dyn_cast(V)) - if (isEliminableCastPair(CI, opcode, Ty, TD)) + if (isEliminableCastPair(CI, opc, Ty, TD)) return false; + + // If this is a vector sext from a compare, then we don't want to break the + // idiom where each element of the extended vector is either zero or all ones. + if (opc == Instruction::SExt && isa(V) && isa(Ty)) + return false; + return true; } diff --git a/test/Transforms/InstCombine/vector-casts.ll b/test/Transforms/InstCombine/vector-casts.ll index 470d4854753..24bd04dcb2d 100644 --- a/test/Transforms/InstCombine/vector-casts.ll +++ b/test/Transforms/InstCombine/vector-casts.ll @@ -51,6 +51,22 @@ entry: } +; rdar://7434900 +define <2 x i64> @test5(<4 x float> %a, <4 x float> %b) nounwind readnone { +entry: + %cmp = fcmp ult <4 x float> %a, zeroinitializer + %sext = sext <4 x i1> %cmp to <4 x i32> + %cmp4 = fcmp ult <4 x float> %b, zeroinitializer + %sext5 = sext <4 x i1> %cmp4 to <4 x i32> + %and = and <4 x i32> %sext, %sext5 + %conv = bitcast <4 x i32> %and to <2 x i64> + ret <2 x i64> %conv + +; CHECK: @test5 +; CHECK: sext <4 x i1> %cmp to <4 x i32> +; CHECK: sext <4 x i1> %cmp4 to <4 x i32> +} + define void @convert(<2 x i32>* %dst.addr, <2 x i64> %src) nounwind { entry: