Rename ValueRequiresCast to ShouldOptimizeCast, to better reflect

what it does.  Enhance it to return false to optimizing vector
sign extensions from vector comparisions, which is the idiom used
to get a splatted vector for a vector comparison.

Doing this breaks vector-casts.ll, add some compensating 
transformations to handle the important case they cover without
depending on this canonicalization.

This fixes rdar://7434900 a serious pessimization of vector compares.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@95855 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2010-02-11 06:26:33 +00:00
parent 2e1cdbf92d
commit 8c5ad3a5da
4 changed files with 113 additions and 55 deletions

View File

@ -199,11 +199,12 @@ private:
SmallVectorImpl<Value*> &NewIndices);
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
/// ValueRequiresCast - Return true if the cast from "V to Ty" actually
/// results in any code being generated. It does not require codegen if V is
/// simple enough or if the cast can be folded into other casts.
bool ValueRequiresCast(Instruction::CastOps opcode,const Value *V,
const Type *Ty);
/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
/// results in any code being generated and is interesting to optimize out. If
/// the cast can be eliminated by some other simple transformation, we prefer
/// to do the simplification first.
bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V,
const Type *Ty);
Instruction *visitCallSite(CallSite CS);
bool transformConstExprCastCall(CallSite CS);

View File

@ -932,24 +932,49 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS))
return Res;
// If and'ing two fcmp, try combine them into one.
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
return Res;
// fold (and (cast A), (cast B)) -> (cast (and A, B))
if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ?
const Type *SrcTy = Op0C->getOperand(0)->getType();
if (SrcTy == Op1C->getOperand(0)->getType() &&
SrcTy->isIntOrIntVector() &&
// Only do this if the casts both really cause code to be generated.
ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
I.getType()) &&
ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
I.getType())) {
Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0),
Op1C->getOperand(0), I.getName());
if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) {
const Type *SrcTy = Op0C->getOperand(0)->getType();
if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ?
SrcTy == Op1C->getOperand(0)->getType() &&
SrcTy->isIntOrIntVector()) {
Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
// Only do this if the casts both really cause code to be generated.
if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName());
return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
}
// If this is and(cast(icmp), cast(icmp)), try to fold this even if the
// cast is otherwise not optimizable. This happens for vector sexts.
if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) {
InsertNewInstBefore(Res, I);
return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
}
// If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the
// cast is otherwise not optimizable. This happens for vector sexts.
if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) {
InsertNewInstBefore(Res, I);
return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
}
}
}
// (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts.
if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
@ -965,13 +990,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
}
}
// If and'ing two fcmp, try combine them into one.
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
return Res;
}
return Changed ? &I : 0;
}
@ -1669,37 +1687,51 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS))
return Res;
// (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y)
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
return Res;
// fold (or (cast A), (cast B)) -> (cast (or A, B))
if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
if (!isa<ICmpInst>(Op0C->getOperand(0)) ||
!isa<ICmpInst>(Op1C->getOperand(0))) {
const Type *SrcTy = Op0C->getOperand(0)->getType();
if (SrcTy == Op1C->getOperand(0)->getType() &&
SrcTy->isIntOrIntVector() &&
const Type *SrcTy = Op0C->getOperand(0)->getType();
if (SrcTy == Op1C->getOperand(0)->getType() &&
SrcTy->isIntOrIntVector()) {
Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) &&
// Only do this if the casts both really cause code to be
// generated.
ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
I.getType()) &&
ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
I.getType())) {
Value *NewOp = Builder->CreateOr(Op0C->getOperand(0),
Op1C->getOperand(0), I.getName());
ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
}
// If this is or(cast(icmp), cast(icmp)), try to fold this even if the
// cast is otherwise not optimizable. This happens for vector sexts.
if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) {
InsertNewInstBefore(Res, I);
return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
}
// If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
// cast is otherwise not optimizable. This happens for vector sexts.
if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) {
InsertNewInstBefore(Res, I);
return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
}
}
}
}
// (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y)
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
return Res;
}
return Changed ? &I : 0;
}
@ -1986,10 +2018,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
const Type *SrcTy = Op0C->getOperand(0)->getType();
if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
// Only do this if the casts both really cause code to be generated.
ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
I.getType()) &&
ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
I.getType())) {
ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0),
I.getType()) &&
ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0),
I.getType())) {
Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
Op1C->getOperand(0), I.getName());
return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());

View File

@ -255,17 +255,26 @@ isEliminableCastPair(
return Instruction::CastOps(Res);
}
/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results
/// in any code being generated. It does not require codegen if V is simple
/// enough or if the cast can be folded into other casts.
bool InstCombiner::ValueRequiresCast(Instruction::CastOps opcode,const Value *V,
const Type *Ty) {
/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
/// results in any code being generated and is interesting to optimize out. If
/// the cast can be eliminated by some other simple transformation, we prefer
/// to do the simplification first.
bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
const Type *Ty) {
// Noop casts and casts of constants should be eliminated trivially.
if (V->getType() == Ty || isa<Constant>(V)) return false;
// If this is another cast that can be eliminated, it isn't codegen either.
// If this is another cast that can be eliminated, we prefer to have it
// eliminated.
if (const CastInst *CI = dyn_cast<CastInst>(V))
if (isEliminableCastPair(CI, opcode, Ty, TD))
if (isEliminableCastPair(CI, opc, Ty, TD))
return false;
// If this is a vector sext from a compare, then we don't want to break the
// idiom where each element of the extended vector is either zero or all ones.
if (opc == Instruction::SExt && isa<CmpInst>(V) && isa<VectorType>(Ty))
return false;
return true;
}

View File

@ -51,6 +51,22 @@ entry:
}
; rdar://7434900
define <2 x i64> @test5(<4 x float> %a, <4 x float> %b) nounwind readnone {
entry:
%cmp = fcmp ult <4 x float> %a, zeroinitializer
%sext = sext <4 x i1> %cmp to <4 x i32>
%cmp4 = fcmp ult <4 x float> %b, zeroinitializer
%sext5 = sext <4 x i1> %cmp4 to <4 x i32>
%and = and <4 x i32> %sext, %sext5
%conv = bitcast <4 x i32> %and to <2 x i64>
ret <2 x i64> %conv
; CHECK: @test5
; CHECK: sext <4 x i1> %cmp to <4 x i32>
; CHECK: sext <4 x i1> %cmp4 to <4 x i32>
}
define void @convert(<2 x i32>* %dst.addr, <2 x i64> %src) nounwind {
entry: