diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 6212918d6c4..2aed0848002 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2390,21 +2390,29 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } if (N0 == N1) { + // The sext(setcc()) => setcc() optimization relies on the appropriate + // constant being emitted. + uint64_t EqVal; + switch (getBooleanContents(N0.getValueType().isVector())) { + default: llvm_unreachable("Unknown boolean contents!"); + case UndefinedBooleanContent: + case ZeroOrOneBooleanContent: + EqVal = ISD::isTrueWhenEqual(Cond); + break; + case ZeroOrNegativeOneBooleanContent: + EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0; + break; + } + // We can always fold X == X for integer setcc's. if (N0.getValueType().isInteger()) { - switch (getBooleanContents(N0.getValueType().isVector())) { - case UndefinedBooleanContent: - case ZeroOrOneBooleanContent: - return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); - case ZeroOrNegativeOneBooleanContent: - return DAG.getConstant(ISD::isTrueWhenEqual(Cond) ? -1 : 0, VT); - } + return DAG.getConstant(EqVal, VT); } unsigned UOF = ISD::getUnorderedFlavor(Cond); if (UOF == 2) // FP operators that are undefined on NaNs. - return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + return DAG.getConstant(EqVal, VT); if (UOF == unsigned(ISD::isTrueWhenEqual(Cond))) - return DAG.getConstant(UOF, VT); + return DAG.getConstant(EqVal, VT); // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO // if it is not already. ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; diff --git a/test/CodeGen/X86/sext-setcc-self.ll b/test/CodeGen/X86/sext-setcc-self.ll new file mode 100644 index 00000000000..c64a43c643c --- /dev/null +++ b/test/CodeGen/X86/sext-setcc-self.ll @@ -0,0 +1,55 @@ +; RUN: llc -march=x86 -mcpu=nehalem < %s | FileCheck %s + +define <4 x i32> @test_ueq(<4 x float> %in) { +entry: + ; CHECK: pcmpeqd %xmm0, %xmm0 + ; CHECK-NEXT: ret + %0 = fcmp ueq <4 x float> %in, %in + %1 = sext <4 x i1> %0 to <4 x i32> + ret <4 x i32> %1 +} + +define <4 x i32> @test_uge(<4 x float> %in) { +entry: + ; CHECK: pcmpeqd %xmm0, %xmm0 + ; CHECK-NEXT: ret + %0 = fcmp uge <4 x float> %in, %in + %1 = sext <4 x i1> %0 to <4 x i32> + ret <4 x i32> %1 +} + +define <4 x i32> @test_ule(<4 x float> %in) { +entry: + ; CHECK: pcmpeqd %xmm0, %xmm0 + ; CHECK-NEXT: ret + %0 = fcmp ule <4 x float> %in, %in + %1 = sext <4 x i1> %0 to <4 x i32> + ret <4 x i32> %1 +} + +define <4 x i32> @test_one(<4 x float> %in) { +entry: + ; CHECK: xorps %xmm0, %xmm0 + ; CHECK-NEXT: ret + %0 = fcmp one <4 x float> %in, %in + %1 = sext <4 x i1> %0 to <4 x i32> + ret <4 x i32> %1 +} + +define <4 x i32> @test_ogt(<4 x float> %in) { +entry: + ; CHECK: xorps %xmm0, %xmm0 + ; CHECK-NEXT: ret + %0 = fcmp ogt <4 x float> %in, %in + %1 = sext <4 x i1> %0 to <4 x i32> + ret <4 x i32> %1 +} + +define <4 x i32> @test_olt(<4 x float> %in) { +entry: + ; CHECK: xorps %xmm0, %xmm0 + ; CHECK-NEXT: ret + %0 = fcmp olt <4 x float> %in, %in + %1 = sext <4 x i1> %0 to <4 x i32> + ret <4 x i32> %1 +}