Optimize sext/zext insertion algorithm in back-end.

With this optimization, we will not always insert zext for values crossing
basic blocks, but insert sext if the users of a value crossing basic block
has preference of sign predicate.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218101 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jiangning Liu 2014-09-19 05:30:35 +00:00
parent a562871c67
commit 61519cd699
6 changed files with 324 additions and 12 deletions

View File

@ -21,6 +21,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Target/TargetRegisterInfo.h"
@ -106,6 +107,10 @@ public:
KnownZero(1, 0) {}
};
/// Record the preferred extend type (ISD::SIGN_EXTEND or ISD::ZERO_EXTEND)
/// for a value.
DenseMap<const Value *, ISD::NodeType> PreferredExtendType;
/// VisitedBBs - The set of basic blocks visited thus far by instruction
/// selection.
SmallPtrSet<const BasicBlock*, 4> VisitedBBs;

View File

@ -56,6 +56,28 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
return false;
}
static ISD::NodeType getPreferredExtendForValue(const Value *V) {
// For the users of the source value being used for compare instruction, if
// the number of signed predicate is greater than unsigned predicate, we
// prefer to use SIGN_EXTEND.
//
// With this optimization, we would be able to reduce some redundant sign or
// zero extension instruction, and eventually more machine CSE opportunities
// can be exposed.
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
unsigned NumOfSigned = 0, NumOfUnsigned = 0;
for (const User *U : V->users()) {
if (const auto *CI = dyn_cast<CmpInst>(U)) {
NumOfSigned += CI->isSigned();
NumOfUnsigned += CI->isUnsigned();
}
}
if (NumOfSigned > NumOfUnsigned)
ExtendKind = ISD::SIGN_EXTEND;
return ExtendKind;
}
void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
SelectionDAG *DAG) {
const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
@ -182,6 +204,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
}
}
}
// Decide the preferred extend type for a value.
PreferredExtendType[I] = getPreferredExtendForValue(I);
}
// Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This

View File

@ -862,7 +862,26 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
switch (CCCode) {
default: llvm_unreachable("Unknown integer comparison!");
case ISD::SETEQ:
case ISD::SETNE:
case ISD::SETNE: {
SDValue OpL = GetPromotedInteger(NewLHS);
SDValue OpR = GetPromotedInteger(NewRHS);
// We would prefer to promote the comparison operand with sign extension,
// if we find the operand is actually to truncate an AssertSext. With this
// optimization, we can avoid inserting real truncate instruction, which
// is redudant eventually.
if (OpL->getOpcode() == ISD::AssertSext &&
cast<VTSDNode>(OpL->getOperand(1))->getVT() == NewLHS.getValueType() &&
OpR->getOpcode() == ISD::AssertSext &&
cast<VTSDNode>(OpR->getOperand(1))->getVT() == NewRHS.getValueType()) {
NewLHS = OpL;
NewRHS = OpR;
} else {
NewLHS = ZExtPromotedInteger(NewLHS);
NewRHS = ZExtPromotedInteger(NewRHS);
}
break;
}
case ISD::SETUGE:
case ISD::SETUGT:
case ISD::SETULE:

View File

@ -646,8 +646,10 @@ namespace {
/// specified value into the registers specified by this object. This uses
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
void getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl,
SDValue &Chain, SDValue *Flag, const Value *V) const;
void
getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain,
SDValue *Flag, const Value *V,
ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const;
/// AddInlineAsmOperands - Add this value to the specified inlineasm node
/// operand list. This adds the code marker, matching input operand index
@ -762,9 +764,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl,
SDValue &Chain, SDValue *Flag,
const Value *V) const {
SDValue &Chain, SDValue *Flag, const Value *V,
ISD::NodeType PreferredExtendType) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
ISD::NodeType ExtendKind = PreferredExtendType;
// Get the list of the values's legal parts.
unsigned NumRegs = Regs.size();
@ -773,8 +776,9 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl,
EVT ValueVT = ValueVTs[Value];
unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
MVT RegisterVT = RegVTs[Value];
ISD::NodeType ExtendKind =
TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND: ISD::ANY_EXTEND;
if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
ExtendKind = ISD::ZERO_EXTEND;
getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
&Parts[Part], NumParts, RegisterVT, V, ExtendKind);
@ -7429,7 +7433,12 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
RegsForValue RFV(V->getContext(), *TLI, Reg, V->getType());
SDValue Chain = DAG.getEntryNode();
RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V);
ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
FuncInfo.PreferredExtendType.end())
? ISD::ANY_EXTEND
: FuncInfo.PreferredExtendType[V];
RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);
PendingExports.push_back(Chain);
}

View File

@ -509,7 +509,7 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD_EXT]]
ret i8 %old
}
@ -534,7 +534,7 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD_EXT]]
ret i16 %old
}
@ -607,7 +607,7 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD_EXT]]
ret i8 %old
}
@ -632,7 +632,7 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD_EXT]]
ret i16 %old
}

View File

@ -0,0 +1,254 @@
; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 | FileCheck %s
; The following cases are for i16
%struct.s_signed_i16 = type { i16, i16, i16 }
%struct.s_unsigned_i16 = type { i16, i16, i16 }
@cost_s_i8_i16 = common global %struct.s_signed_i16 zeroinitializer, align 2
@cost_u_i16 = common global %struct.s_unsigned_i16 zeroinitializer, align 2
define void @test_i16_2cmp_signed_1() {
; CHECK-LABEL: test_i16_2cmp_signed_1
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
; CHECK-NEXT: b.gt
; CHECK-NOT: cmp
; CHECK: b.ne
entry:
%0 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2
%1 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2
%cmp = icmp sgt i16 %0, %1
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
br label %if.end8
if.else: ; preds = %entry
%cmp5 = icmp eq i16 %0, %1
br i1 %cmp5, label %if.then7, label %if.end8
if.then7: ; preds = %if.else
store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
br label %if.end8
if.end8: ; preds = %if.else, %if.then7, %if.then
ret void
}
define void @test_i16_2cmp_signed_2() {
; CHECK-LABEL: test_i16_2cmp_signed_2
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
; CHECK-NEXT: b.le
; CHECK-NOT: cmp
; CHECK: b.ge
entry:
%0 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2
%1 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2
%cmp = icmp sgt i16 %0, %1
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
br label %if.end8
if.else: ; preds = %entry
%cmp5 = icmp slt i16 %0, %1
br i1 %cmp5, label %if.then7, label %if.end8
if.then7: ; preds = %if.else
store i16 %1, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
br label %if.end8
if.end8: ; preds = %if.else, %if.then7, %if.then
ret void
}
define void @test_i16_2cmp_unsigned_1() {
; CHECK-LABEL: test_i16_2cmp_unsigned_1
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
; CHECK-NEXT: b.hi
; CHECK-NOT: cmp
; CHECK: b.ne
entry:
%0 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2
%1 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2
%cmp = icmp ugt i16 %0, %1
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
br label %if.end8
if.else: ; preds = %entry
%cmp5 = icmp eq i16 %0, %1
br i1 %cmp5, label %if.then7, label %if.end8
if.then7: ; preds = %if.else
store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
br label %if.end8
if.end8: ; preds = %if.else, %if.then7, %if.then
ret void
}
define void @test_i16_2cmp_unsigned_2() {
; CHECK-LABEL: test_i16_2cmp_unsigned_2
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
; CHECK-NEXT: b.ls
; CHECK-NOT: cmp
; CHECK: b.hs
entry:
%0 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2
%1 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2
%cmp = icmp ugt i16 %0, %1
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
br label %if.end8
if.else: ; preds = %entry
%cmp5 = icmp ult i16 %0, %1
br i1 %cmp5, label %if.then7, label %if.end8
if.then7: ; preds = %if.else
store i16 %1, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
br label %if.end8
if.end8: ; preds = %if.else, %if.then7, %if.then
ret void
}
; The following cases are for i8
%struct.s_signed_i8 = type { i8, i8, i8 }
%struct.s_unsigned_i8 = type { i8, i8, i8 }
@cost_s = common global %struct.s_signed_i8 zeroinitializer, align 2
@cost_u_i8 = common global %struct.s_unsigned_i8 zeroinitializer, align 2
define void @test_i8_2cmp_signed_1() {
; CHECK-LABEL: test_i8_2cmp_signed_1
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
; CHECK-NEXT: b.gt
; CHECK-NOT: cmp
; CHECK: b.ne
entry:
%0 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2
%1 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2
%cmp = icmp sgt i8 %0, %1
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
br label %if.end8
if.else: ; preds = %entry
%cmp5 = icmp eq i8 %0, %1
br i1 %cmp5, label %if.then7, label %if.end8
if.then7: ; preds = %if.else
store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
br label %if.end8
if.end8: ; preds = %if.else, %if.then7, %if.then
ret void
}
define void @test_i8_2cmp_signed_2() {
; CHECK-LABEL: test_i8_2cmp_signed_2
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
; CHECK-NEXT: b.le
; CHECK-NOT: cmp
; CHECK: b.ge
entry:
%0 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2
%1 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2
%cmp = icmp sgt i8 %0, %1
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
br label %if.end8
if.else: ; preds = %entry
%cmp5 = icmp slt i8 %0, %1
br i1 %cmp5, label %if.then7, label %if.end8
if.then7: ; preds = %if.else
store i8 %1, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
br label %if.end8
if.end8: ; preds = %if.else, %if.then7, %if.then
ret void
}
define void @test_i8_2cmp_unsigned_1() {
; CHECK-LABEL: test_i8_2cmp_unsigned_1
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
; CHECK-NEXT: b.hi
; CHECK-NOT: cmp
; CHECK: b.ne
entry:
%0 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2
%1 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2
%cmp = icmp ugt i8 %0, %1
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
br label %if.end8
if.else: ; preds = %entry
%cmp5 = icmp eq i8 %0, %1
br i1 %cmp5, label %if.then7, label %if.end8
if.then7: ; preds = %if.else
store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
br label %if.end8
if.end8: ; preds = %if.else, %if.then7, %if.then
ret void
}
define void @test_i8_2cmp_unsigned_2() {
; CHECK-LABEL: test_i8_2cmp_unsigned_2
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
; CHECK-NEXT: b.ls
; CHECK-NOT: cmp
; CHECK: b.hs
entry:
%0 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2
%1 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2
%cmp = icmp ugt i8 %0, %1
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
br label %if.end8
if.else: ; preds = %entry
%cmp5 = icmp ult i8 %0, %1
br i1 %cmp5, label %if.then7, label %if.end8
if.then7: ; preds = %if.else
store i8 %1, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
br label %if.end8
if.end8: ; preds = %if.else, %if.then7, %if.then
ret void
}
; Make sure the case below won't crash.
; The optimization of ZERO_EXTEND and SIGN_EXTEND in type legalization stage can't assert
; the operand of a set_cc is always a TRUNCATE.
define i1 @foo(float %inl, float %inr) {
%lval = fptosi float %inl to i8
%rval = fptosi float %inr to i8
%sum = icmp eq i8 %lval, %rval
ret i1 %sum
}