diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6b23d6104ff..95c99e1dc27 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -380,9 +380,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i16 , Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i64 , Expand); if (Subtarget->hasBMI()) { setOperationAction(ISD::CTTZ , MVT::i8 , Promote); } else { @@ -394,9 +391,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i8 , Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i16 , Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i64 , Expand); if (Subtarget->hasLZCNT()) { setOperationAction(ISD::CTLZ , MVT::i8 , Promote); } else { diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index e0cf669beed..f593092df76 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -1753,3 +1753,20 @@ def : Pat<(and GR64:$src1, i64immSExt8:$src2), (AND64ri8 GR64:$src1, i64immSExt8:$src2)>; def : Pat<(and GR64:$src1, i64immSExt32:$src2), (AND64ri32 GR64:$src1, i64immSExt32:$src2)>; + +// Bit scan instruction patterns to match explicit zero-undef behavior. +def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>; +def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>; +def : Pat<(cttz_zero_undef GR64:$src), (BSF64rr GR64:$src)>; +def : Pat<(cttz_zero_undef (loadi16 addr:$src)), (BSF16rm addr:$src)>; +def : Pat<(cttz_zero_undef (loadi32 addr:$src)), (BSF32rm addr:$src)>; +def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm addr:$src)>; +def : Pat<(ctlz_zero_undef GR16:$src), (XOR16ri (BSR16rr GR16:$src), 15)>; +def : Pat<(ctlz_zero_undef GR32:$src), (XOR32ri (BSR32rr GR32:$src), 31)>; +def : Pat<(ctlz_zero_undef GR64:$src), (XOR64ri8 (BSR64rr GR64:$src), 63)>; +def : Pat<(ctlz_zero_undef (loadi16 addr:$src)), + (XOR16ri (BSR16rm addr:$src), 15)>; +def : Pat<(ctlz_zero_undef (loadi32 addr:$src)), + (XOR32ri (BSR32rm addr:$src), 31)>; +def : Pat<(ctlz_zero_undef (loadi64 addr:$src)), + (XOR64ri8 (BSR64rm addr:$src), 63)>; diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll index 9b26efd10de..4e080309b6f 100644 --- a/test/CodeGen/X86/clz.ll +++ b/test/CodeGen/X86/clz.ll @@ -1,48 +1,65 @@ ; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s define i32 @t1(i32 %x) nounwind { - %tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 true ) - ret i32 %tmp + %tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 true ) + ret i32 %tmp ; CHECK: t1: ; CHECK: bsrl -; CHECK: cmov +; CHECK-NOT: cmov +; CHECK: xorl $31, +; CHECK: ret } declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone define i32 @t2(i32 %x) nounwind { - %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 true ) - ret i32 %tmp + %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 true ) + ret i32 %tmp ; CHECK: t2: ; CHECK: bsfl -; CHECK: cmov +; CHECK-NOT: cmov +; CHECK: ret } declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone define i16 @t3(i16 %x, i16 %y) nounwind { entry: - %tmp1 = add i16 %x, %y - %tmp2 = tail call i16 @llvm.ctlz.i16( i16 %tmp1, i1 true ) ; [#uses=1] - ret i16 %tmp2 + %tmp1 = add i16 %x, %y + %tmp2 = tail call i16 @llvm.ctlz.i16( i16 %tmp1, i1 true ) ; [#uses=1] + ret i16 %tmp2 ; CHECK: t3: ; CHECK: bsrw -; CHECK: cmov +; CHECK-NOT: cmov +; CHECK: xorw $15, +; CHECK: ret } declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone +define i32 @t4(i32 %n) nounwind { +entry: +; Generate a cmov to handle zero inputs when necessary. +; CHECK: t4: +; CHECK: bsrl +; CHECK: cmov +; CHECK: xorl $31, +; CHECK: ret + %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %n, i1 false) + ret i32 %tmp1 +} + +define i32 @t5(i32 %n) nounwind { +entry: ; Don't generate the cmovne when the source is known non-zero (and bsr would ; not set ZF). ; rdar://9490949 - -define i32 @t4(i32 %n) nounwind { -entry: -; CHECK: t4: +; CHECK: t5: ; CHECK: bsrl ; CHECK-NOT: cmov +; CHECK: xorl $31, ; CHECK: ret %or = or i32 %n, 1 - %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %or, i1 true) + %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %or, i1 false) ret i32 %tmp1 }