diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h index 057bd8f84fc..e94c10569f3 100644 --- a/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -21,6 +21,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -106,6 +107,10 @@ public: KnownZero(1, 0) {} }; + /// Record the preferred extend type (ISD::SIGN_EXTEND or ISD::ZERO_EXTEND) + /// for a value. + DenseMap PreferredExtendType; + /// VisitedBBs - The set of basic blocks visited thus far by instruction /// selection. SmallPtrSet VisitedBBs; diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 1f58d7c301f..eb1508b8ab0 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -56,6 +56,28 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { return false; } +static ISD::NodeType getPreferredExtendForValue(const Value *V) { + // For the users of the source value being used for compare instruction, if + // the number of signed predicate is greater than unsigned predicate, we + // prefer to use SIGN_EXTEND. + // + // With this optimization, we would be able to reduce some redundant sign or + // zero extension instruction, and eventually more machine CSE opportunities + // can be exposed. + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + unsigned NumOfSigned = 0, NumOfUnsigned = 0; + for (const User *U : V->users()) { + if (const auto *CI = dyn_cast(U)) { + NumOfSigned += CI->isSigned(); + NumOfUnsigned += CI->isUnsigned(); + } + } + if (NumOfSigned > NumOfUnsigned) + ExtendKind = ISD::SIGN_EXTEND; + + return ExtendKind; +} + void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, SelectionDAG *DAG) { const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); @@ -182,6 +204,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } } } + + // Decide the preferred extend type for a value. + PreferredExtendType[I] = getPreferredExtendForValue(I); } // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 435ecfabd3f..6b00bb10824 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -862,7 +862,26 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, switch (CCCode) { default: llvm_unreachable("Unknown integer comparison!"); case ISD::SETEQ: - case ISD::SETNE: + case ISD::SETNE: { + SDValue OpL = GetPromotedInteger(NewLHS); + SDValue OpR = GetPromotedInteger(NewRHS); + + // We would prefer to promote the comparison operand with sign extension, + // if we find the operand is actually to truncate an AssertSext. With this + // optimization, we can avoid inserting real truncate instruction, which + // is redudant eventually. + if (OpL->getOpcode() == ISD::AssertSext && + cast(OpL->getOperand(1))->getVT() == NewLHS.getValueType() && + OpR->getOpcode() == ISD::AssertSext && + cast(OpR->getOperand(1))->getVT() == NewRHS.getValueType()) { + NewLHS = OpL; + NewRHS = OpR; + } else { + NewLHS = ZExtPromotedInteger(NewLHS); + NewRHS = ZExtPromotedInteger(NewRHS); + } + break; + } case ISD::SETUGE: case ISD::SETUGT: case ISD::SETULE: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 3ab0deaea6b..6d302f98f55 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -646,8 +646,10 @@ namespace { /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. - void getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, - SDValue &Chain, SDValue *Flag, const Value *V) const; + void + getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, + SDValue *Flag, const Value *V, + ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const; /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker, matching input operand index @@ -762,9 +764,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, - SDValue &Chain, SDValue *Flag, - const Value *V) const { + SDValue &Chain, SDValue *Flag, const Value *V, + ISD::NodeType PreferredExtendType) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + ISD::NodeType ExtendKind = PreferredExtendType; // Get the list of the values's legal parts. unsigned NumRegs = Regs.size(); @@ -773,8 +776,9 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, EVT ValueVT = ValueVTs[Value]; unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); MVT RegisterVT = RegVTs[Value]; - ISD::NodeType ExtendKind = - TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND: ISD::ANY_EXTEND; + + if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) + ExtendKind = ISD::ZERO_EXTEND; getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], NumParts, RegisterVT, V, ExtendKind); @@ -7429,7 +7433,12 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); RegsForValue RFV(V->getContext(), *TLI, Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); - RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V); + + ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == + FuncInfo.PreferredExtendType.end()) + ? ISD::ANY_EXTEND + : FuncInfo.PreferredExtendType[V]; + RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType); PendingExports.push_back(Chain); } diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll index 26301b92f9f..ef209e9c6e5 100644 --- a/test/CodeGen/AArch64/atomic-ops.ll +++ b/test/CodeGen/AArch64/atomic-ops.ll @@ -509,7 +509,7 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind { ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] +; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD_EXT]] ret i8 %old } @@ -534,7 +534,7 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind { ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] +; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD_EXT]] ret i16 %old } @@ -607,7 +607,7 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind { ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] +; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD_EXT]] ret i8 %old } @@ -632,7 +632,7 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind { ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] +; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD_EXT]] ret i16 %old } diff --git a/test/CodeGen/AArch64/rm_redundant_cmp.ll b/test/CodeGen/AArch64/rm_redundant_cmp.ll new file mode 100644 index 00000000000..36dc118ed1a --- /dev/null +++ b/test/CodeGen/AArch64/rm_redundant_cmp.ll @@ -0,0 +1,254 @@ +; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 | FileCheck %s + +; The following cases are for i16 + +%struct.s_signed_i16 = type { i16, i16, i16 } +%struct.s_unsigned_i16 = type { i16, i16, i16 } + +@cost_s_i8_i16 = common global %struct.s_signed_i16 zeroinitializer, align 2 +@cost_u_i16 = common global %struct.s_unsigned_i16 zeroinitializer, align 2 + +define void @test_i16_2cmp_signed_1() { +; CHECK-LABEL: test_i16_2cmp_signed_1 +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK-NEXT: b.gt +; CHECK-NOT: cmp +; CHECK: b.ne +entry: + %0 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2 + %1 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2 + %cmp = icmp sgt i16 %0, %1 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2 + br label %if.end8 + +if.else: ; preds = %entry + %cmp5 = icmp eq i16 %0, %1 + br i1 %cmp5, label %if.then7, label %if.end8 + +if.then7: ; preds = %if.else + store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2 + br label %if.end8 + +if.end8: ; preds = %if.else, %if.then7, %if.then + ret void +} + +define void @test_i16_2cmp_signed_2() { +; CHECK-LABEL: test_i16_2cmp_signed_2 +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK-NEXT: b.le +; CHECK-NOT: cmp +; CHECK: b.ge +entry: + %0 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2 + %1 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2 + %cmp = icmp sgt i16 %0, %1 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2 + br label %if.end8 + +if.else: ; preds = %entry + %cmp5 = icmp slt i16 %0, %1 + br i1 %cmp5, label %if.then7, label %if.end8 + +if.then7: ; preds = %if.else + store i16 %1, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2 + br label %if.end8 + +if.end8: ; preds = %if.else, %if.then7, %if.then + ret void +} + +define void @test_i16_2cmp_unsigned_1() { +; CHECK-LABEL: test_i16_2cmp_unsigned_1 +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK-NEXT: b.hi +; CHECK-NOT: cmp +; CHECK: b.ne +entry: + %0 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2 + %1 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2 + %cmp = icmp ugt i16 %0, %1 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2 + br label %if.end8 + +if.else: ; preds = %entry + %cmp5 = icmp eq i16 %0, %1 + br i1 %cmp5, label %if.then7, label %if.end8 + +if.then7: ; preds = %if.else + store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2 + br label %if.end8 + +if.end8: ; preds = %if.else, %if.then7, %if.then + ret void +} + +define void @test_i16_2cmp_unsigned_2() { +; CHECK-LABEL: test_i16_2cmp_unsigned_2 +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK-NEXT: b.ls +; CHECK-NOT: cmp +; CHECK: b.hs +entry: + %0 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2 + %1 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2 + %cmp = icmp ugt i16 %0, %1 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2 + br label %if.end8 + +if.else: ; preds = %entry + %cmp5 = icmp ult i16 %0, %1 + br i1 %cmp5, label %if.then7, label %if.end8 + +if.then7: ; preds = %if.else + store i16 %1, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2 + br label %if.end8 + +if.end8: ; preds = %if.else, %if.then7, %if.then + ret void +} + +; The following cases are for i8 + +%struct.s_signed_i8 = type { i8, i8, i8 } +%struct.s_unsigned_i8 = type { i8, i8, i8 } + +@cost_s = common global %struct.s_signed_i8 zeroinitializer, align 2 +@cost_u_i8 = common global %struct.s_unsigned_i8 zeroinitializer, align 2 + + +define void @test_i8_2cmp_signed_1() { +; CHECK-LABEL: test_i8_2cmp_signed_1 +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK-NEXT: b.gt +; CHECK-NOT: cmp +; CHECK: b.ne +entry: + %0 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2 + %1 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2 + %cmp = icmp sgt i8 %0, %1 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2 + br label %if.end8 + +if.else: ; preds = %entry + %cmp5 = icmp eq i8 %0, %1 + br i1 %cmp5, label %if.then7, label %if.end8 + +if.then7: ; preds = %if.else + store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2 + br label %if.end8 + +if.end8: ; preds = %if.else, %if.then7, %if.then + ret void +} + +define void @test_i8_2cmp_signed_2() { +; CHECK-LABEL: test_i8_2cmp_signed_2 +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK-NEXT: b.le +; CHECK-NOT: cmp +; CHECK: b.ge +entry: + %0 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2 + %1 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2 + %cmp = icmp sgt i8 %0, %1 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2 + br label %if.end8 + +if.else: ; preds = %entry + %cmp5 = icmp slt i8 %0, %1 + br i1 %cmp5, label %if.then7, label %if.end8 + +if.then7: ; preds = %if.else + store i8 %1, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2 + br label %if.end8 + +if.end8: ; preds = %if.else, %if.then7, %if.then + ret void +} + +define void @test_i8_2cmp_unsigned_1() { +; CHECK-LABEL: test_i8_2cmp_unsigned_1 +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK-NEXT: b.hi +; CHECK-NOT: cmp +; CHECK: b.ne +entry: + %0 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2 + %1 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2 + %cmp = icmp ugt i8 %0, %1 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2 + br label %if.end8 + +if.else: ; preds = %entry + %cmp5 = icmp eq i8 %0, %1 + br i1 %cmp5, label %if.then7, label %if.end8 + +if.then7: ; preds = %if.else + store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2 + br label %if.end8 + +if.end8: ; preds = %if.else, %if.then7, %if.then + ret void +} + +define void @test_i8_2cmp_unsigned_2() { +; CHECK-LABEL: test_i8_2cmp_unsigned_2 +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK-NEXT: b.ls +; CHECK-NOT: cmp +; CHECK: b.hs +entry: + %0 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2 + %1 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2 + %cmp = icmp ugt i8 %0, %1 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2 + br label %if.end8 + +if.else: ; preds = %entry + %cmp5 = icmp ult i8 %0, %1 + br i1 %cmp5, label %if.then7, label %if.end8 + +if.then7: ; preds = %if.else + store i8 %1, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2 + br label %if.end8 + +if.end8: ; preds = %if.else, %if.then7, %if.then + ret void +} + +; Make sure the case below won't crash. + +; The optimization of ZERO_EXTEND and SIGN_EXTEND in type legalization stage can't assert +; the operand of a set_cc is always a TRUNCATE. + +define i1 @foo(float %inl, float %inr) { + %lval = fptosi float %inl to i8 + %rval = fptosi float %inr to i8 + %sum = icmp eq i8 %lval, %rval + ret i1 %sum +}