Fix ctlz and cttz. llvm definition requires them to return number of bits in of the src type when value is zero.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45029 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2007-12-14 08:30:15 +00:00
parent 7c07aebd19
commit 152804e9c1
3 changed files with 53 additions and 15 deletions

View File

@ -5352,15 +5352,26 @@ SDOperand X86TargetLowering::LowerCTLZ(SDOperand Op, SelectionDAG &DAG) {
Op = Op.getOperand(0); Op = Op.getOperand(0);
if (VT == MVT::i8) { if (VT == MVT::i8) {
// Zero extend to i32 since there is not an i8 bsr.
OpVT = MVT::i32; OpVT = MVT::i32;
Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op);
} }
if (VT == MVT::i32 || VT == MVT::i64)
return DAG.getNode(ISD::XOR, OpVT, DAG.getNode(X86ISD::BSR, OpVT, Op),
DAG.getConstant(NumBits-1, OpVT));
Op = DAG.getNode(ISD::SUB, OpVT, DAG.getConstant(NumBits-1, OpVT), // Issue a bsr (scan bits in reverse) which also sets EFLAGS.
DAG.getNode(X86ISD::BSR, OpVT, Op)); SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
Op = DAG.getNode(X86ISD::BSR, VTs, Op);
// If src is zero (i.e. bsr sets ZF), returns NumBits.
SmallVector<SDOperand, 4> Ops;
Ops.push_back(Op);
Ops.push_back(DAG.getConstant(NumBits+NumBits-1, OpVT));
Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8));
Ops.push_back(Op.getValue(1));
Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4);
// Finally xor with NumBits-1.
Op = DAG.getNode(ISD::XOR, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
if (VT == MVT::i8) if (VT == MVT::i8)
Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op);
return Op; return Op;
@ -5369,13 +5380,26 @@ SDOperand X86TargetLowering::LowerCTLZ(SDOperand Op, SelectionDAG &DAG) {
SDOperand X86TargetLowering::LowerCTTZ(SDOperand Op, SelectionDAG &DAG) { SDOperand X86TargetLowering::LowerCTTZ(SDOperand Op, SelectionDAG &DAG) {
MVT::ValueType VT = Op.getValueType(); MVT::ValueType VT = Op.getValueType();
MVT::ValueType OpVT = VT; MVT::ValueType OpVT = VT;
unsigned NumBits = MVT::getSizeInBits(VT);
Op = Op.getOperand(0); Op = Op.getOperand(0);
if (VT == MVT::i8) { if (VT == MVT::i8) {
OpVT = MVT::i32; OpVT = MVT::i32;
Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op);
} }
Op = DAG.getNode(X86ISD::BSF, OpVT, Op);
// Issue a bsf (scan bits forward) which also sets EFLAGS.
SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
Op = DAG.getNode(X86ISD::BSF, VTs, Op);
// If src is zero (i.e. bsf sets ZF), returns NumBits.
SmallVector<SDOperand, 4> Ops;
Ops.push_back(Op);
Ops.push_back(DAG.getConstant(NumBits, OpVT));
Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8));
Ops.push_back(Op.getValue(1));
Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4);
if (VT == MVT::i8) if (VT == MVT::i8)
Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op);
return Op; return Op;

View File

@ -451,29 +451,33 @@ def XCHG32rm : I<0x87, MRMSrcMem,
let Defs = [EFLAGS] in { let Defs = [EFLAGS] in {
def BSF16rr : I<0xBC, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), def BSF16rr : I<0xBC, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),
"bsf{w}\t{$src, $dst||$dst, $src}", "bsf{w}\t{$src, $dst||$dst, $src}",
[(set GR16:$dst, (X86bsf GR16:$src))]>, TB; [(set GR16:$dst, (X86bsf GR16:$src)), (implicit EFLAGS)]>, TB;
def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bsf{w}\t{$src, $dst||$dst, $src}", "bsf{w}\t{$src, $dst||$dst, $src}",
[(set GR16:$dst, (X86bsf (loadi16 addr:$src)))]>, TB; [(set GR16:$dst, (X86bsf (loadi16 addr:$src))),
(implicit EFLAGS)]>, TB;
def BSF32rr : I<0xBC, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), def BSF32rr : I<0xBC, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),
"bsf{l}\t{$src, $dst||$dst, $src}", "bsf{l}\t{$src, $dst||$dst, $src}",
[(set GR32:$dst, (X86bsf GR32:$src))]>, TB; [(set GR32:$dst, (X86bsf GR32:$src)), (implicit EFLAGS)]>, TB;
def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsf{l}\t{$src, $dst||$dst, $src}", "bsf{l}\t{$src, $dst||$dst, $src}",
[(set GR32:$dst, (X86bsf (loadi32 addr:$src)))]>, TB; [(set GR32:$dst, (X86bsf (loadi32 addr:$src))),
(implicit EFLAGS)]>, TB;
def BSR16rr : I<0xBD, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), def BSR16rr : I<0xBD, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),
"bsr{w}\t{$src, $dst||$dst, $src}", "bsr{w}\t{$src, $dst||$dst, $src}",
[(set GR16:$dst, (X86bsr GR16:$src))]>, TB; [(set GR16:$dst, (X86bsr GR16:$src)), (implicit EFLAGS)]>, TB;
def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bsr{w}\t{$src, $dst||$dst, $src}", "bsr{w}\t{$src, $dst||$dst, $src}",
[(set GR16:$dst, (X86bsr (loadi16 addr:$src)))]>, TB; [(set GR16:$dst, (X86bsr (loadi16 addr:$src))),
(implicit EFLAGS)]>, TB;
def BSR32rr : I<0xBD, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), def BSR32rr : I<0xBD, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),
"bsr{l}\t{$src, $dst||$dst, $src}", "bsr{l}\t{$src, $dst||$dst, $src}",
[(set GR32:$dst, (X86bsr GR32:$src))]>, TB; [(set GR32:$dst, (X86bsr GR32:$src)), (implicit EFLAGS)]>, TB;
def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsr{l}\t{$src, $dst||$dst, $src}", "bsr{l}\t{$src, $dst||$dst, $src}",
[(set GR32:$dst, (X86bsr (loadi32 addr:$src)))]>, TB; [(set GR32:$dst, (X86bsr (loadi32 addr:$src))),
(implicit EFLAGS)]>, TB;
} // Defs = [EFLAGS] } // Defs = [EFLAGS]
def LEA16r : I<0x8D, MRMSrcMem, def LEA16r : I<0x8D, MRMSrcMem,

View File

@ -1,5 +1,6 @@
; RUN: llvm-as < %s | llc -march=x86 | grep bsr ; RUN: llvm-as < %s | llc -march=x86 | grep bsr | count 2
; RUN: llvm-as < %s | llc -march=x86 | grep bsf ; RUN: llvm-as < %s | llc -march=x86 | grep bsf
; RUN: llvm-as < %s | llc -march=x86 | grep cmov | count 3
define i32 @t1(i32 %x) nounwind { define i32 @t1(i32 %x) nounwind {
%tmp = tail call i32 @llvm.ctlz.i32( i32 %x ) %tmp = tail call i32 @llvm.ctlz.i32( i32 %x )
@ -14,3 +15,12 @@ define i32 @t2(i32 %x) nounwind {
} }
declare i32 @llvm.cttz.i32(i32) nounwind readnone declare i32 @llvm.cttz.i32(i32) nounwind readnone
define i16 @t3(i16 %x, i16 %y) nounwind {
entry:
%tmp1 = add i16 %x, %y
%tmp2 = tail call i16 @llvm.ctlz.i16( i16 %tmp1 ) ; <i16> [#uses=1]
ret i16 %tmp2
}
declare i16 @llvm.ctlz.i16(i16) nounwind readnone