Rename ValueRequiresCast to ShouldOptimizeCast, to better reflect

what it does.  Enhance it to return false to optimizing vector
sign extensions from vector comparisions, which is the idiom used
to get a splatted vector for a vector comparison.

Doing this breaks vector-casts.ll, add some compensating 
transformations to handle the important case they cover without
depending on this canonicalization.

This fixes rdar://7434900 a serious pessimization of vector compares.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@95855 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner
2010-02-11 06:26:33 +00:00
parent 2e1cdbf92d
commit 8c5ad3a5da
4 changed files with 113 additions and 55 deletions

View File

@@ -199,11 +199,12 @@ private:
SmallVectorImpl<Value*> &NewIndices); SmallVectorImpl<Value*> &NewIndices);
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
/// ValueRequiresCast - Return true if the cast from "V to Ty" actually /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
/// results in any code being generated. It does not require codegen if V is /// results in any code being generated and is interesting to optimize out. If
/// simple enough or if the cast can be folded into other casts. /// the cast can be eliminated by some other simple transformation, we prefer
bool ValueRequiresCast(Instruction::CastOps opcode,const Value *V, /// to do the simplification first.
const Type *Ty); bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V,
const Type *Ty);
Instruction *visitCallSite(CallSite CS); Instruction *visitCallSite(CallSite CS);
bool transformConstExprCastCall(CallSite CS); bool transformConstExprCastCall(CallSite CS);

View File

@@ -932,24 +932,49 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0)) if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS))
return Res; return Res;
// If and'ing two fcmp, try combine them into one.
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
return Res;
// fold (and (cast A), (cast B)) -> (cast (and A, B)) // fold (and (cast A), (cast B)) -> (cast (and A, B))
if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) {
if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ? const Type *SrcTy = Op0C->getOperand(0)->getType();
const Type *SrcTy = Op0C->getOperand(0)->getType(); if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ?
if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy == Op1C->getOperand(0)->getType() &&
SrcTy->isIntOrIntVector() && SrcTy->isIntOrIntVector()) {
// Only do this if the casts both really cause code to be generated. Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
I.getType()) && // Only do this if the casts both really cause code to be generated.
ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
I.getType())) { ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0), Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName());
Op1C->getOperand(0), I.getName());
return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
} }
// If this is and(cast(icmp), cast(icmp)), try to fold this even if the
// cast is otherwise not optimizable. This happens for vector sexts.
if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) {
InsertNewInstBefore(Res, I);
return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
}
// If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the
// cast is otherwise not optimizable. This happens for vector sexts.
if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) {
InsertNewInstBefore(Res, I);
return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
}
} }
}
// (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts. // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts.
if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) { if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
@@ -965,13 +990,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
} }
} }
// If and'ing two fcmp, try combine them into one.
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
return Res;
}
return Changed ? &I : 0; return Changed ? &I : 0;
} }
@@ -1669,37 +1687,51 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS))
return Res; return Res;
// (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y)
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
return Res;
// fold (or (cast A), (cast B)) -> (cast (or A, B)) // fold (or (cast A), (cast B)) -> (cast (or A, B))
if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
if (!isa<ICmpInst>(Op0C->getOperand(0)) || const Type *SrcTy = Op0C->getOperand(0)->getType();
!isa<ICmpInst>(Op1C->getOperand(0))) { if (SrcTy == Op1C->getOperand(0)->getType() &&
const Type *SrcTy = Op0C->getOperand(0)->getType(); SrcTy->isIntOrIntVector()) {
if (SrcTy == Op1C->getOperand(0)->getType() && Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
SrcTy->isIntOrIntVector() &&
if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) &&
// Only do this if the casts both really cause code to be // Only do this if the casts both really cause code to be
// generated. // generated.
ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
I.getType()) && ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
I.getType())) {
Value *NewOp = Builder->CreateOr(Op0C->getOperand(0),
Op1C->getOperand(0), I.getName());
return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
} }
// If this is or(cast(icmp), cast(icmp)), try to fold this even if the
// cast is otherwise not optimizable. This happens for vector sexts.
if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) {
InsertNewInstBefore(Res, I);
return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
}
// If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
// cast is otherwise not optimizable. This happens for vector sexts.
if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) {
InsertNewInstBefore(Res, I);
return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
}
} }
} }
} }
// (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y)
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
return Res;
}
return Changed ? &I : 0; return Changed ? &I : 0;
} }
@@ -1986,10 +2018,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
const Type *SrcTy = Op0C->getOperand(0)->getType(); const Type *SrcTy = Op0C->getOperand(0)->getType();
if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
// Only do this if the casts both really cause code to be generated. // Only do this if the casts both really cause code to be generated.
ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0),
I.getType()) && I.getType()) &&
ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0),
I.getType())) { I.getType())) {
Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
Op1C->getOperand(0), I.getName()); Op1C->getOperand(0), I.getName());
return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());

View File

@@ -255,17 +255,26 @@ isEliminableCastPair(
return Instruction::CastOps(Res); return Instruction::CastOps(Res);
} }
/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
/// in any code being generated. It does not require codegen if V is simple /// results in any code being generated and is interesting to optimize out. If
/// enough or if the cast can be folded into other casts. /// the cast can be eliminated by some other simple transformation, we prefer
bool InstCombiner::ValueRequiresCast(Instruction::CastOps opcode,const Value *V, /// to do the simplification first.
const Type *Ty) { bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
const Type *Ty) {
// Noop casts and casts of constants should be eliminated trivially.
if (V->getType() == Ty || isa<Constant>(V)) return false; if (V->getType() == Ty || isa<Constant>(V)) return false;
// If this is another cast that can be eliminated, it isn't codegen either. // If this is another cast that can be eliminated, we prefer to have it
// eliminated.
if (const CastInst *CI = dyn_cast<CastInst>(V)) if (const CastInst *CI = dyn_cast<CastInst>(V))
if (isEliminableCastPair(CI, opcode, Ty, TD)) if (isEliminableCastPair(CI, opc, Ty, TD))
return false; return false;
// If this is a vector sext from a compare, then we don't want to break the
// idiom where each element of the extended vector is either zero or all ones.
if (opc == Instruction::SExt && isa<CmpInst>(V) && isa<VectorType>(Ty))
return false;
return true; return true;
} }

View File

@@ -51,6 +51,22 @@ entry:
} }
; rdar://7434900
define <2 x i64> @test5(<4 x float> %a, <4 x float> %b) nounwind readnone {
entry:
%cmp = fcmp ult <4 x float> %a, zeroinitializer
%sext = sext <4 x i1> %cmp to <4 x i32>
%cmp4 = fcmp ult <4 x float> %b, zeroinitializer
%sext5 = sext <4 x i1> %cmp4 to <4 x i32>
%and = and <4 x i32> %sext, %sext5
%conv = bitcast <4 x i32> %and to <2 x i64>
ret <2 x i64> %conv
; CHECK: @test5
; CHECK: sext <4 x i1> %cmp to <4 x i32>
; CHECK: sext <4 x i1> %cmp4 to <4 x i32>
}
define void @convert(<2 x i32>* %dst.addr, <2 x i64> %src) nounwind { define void @convert(<2 x i32>* %dst.addr, <2 x i64> %src) nounwind {
entry: entry: