From 18efe269b12624d74c0af6104e88864d6a932344 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Fri, 14 Dec 2007 02:13:44 +0000 Subject: [PATCH] Implement ctlz and cttz with bsr and bsf. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45024 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 57 +++++++++++++++++++++++++----- lib/Target/X86/X86ISelLowering.h | 7 ++++ lib/Target/X86/X86InstrInfo.td | 31 ++++++++++++++++ lib/Target/X86/X86InstrX86-64.td | 17 +++++++++ test/CodeGen/X86/clz.ll | 16 +++++++++ 5 files changed, 119 insertions(+), 9 deletions(-) create mode 100644 test/CodeGen/X86/clz.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 07e19aad34b..1d64b1d859c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -210,18 +210,18 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::FLT_ROUNDS , MVT::i32 , Custom); setOperationAction(ISD::CTPOP , MVT::i8 , Expand); - setOperationAction(ISD::CTTZ , MVT::i8 , Expand); - setOperationAction(ISD::CTLZ , MVT::i8 , Expand); + setOperationAction(ISD::CTTZ , MVT::i8 , Custom); + setOperationAction(ISD::CTLZ , MVT::i8 , Custom); setOperationAction(ISD::CTPOP , MVT::i16 , Expand); - setOperationAction(ISD::CTTZ , MVT::i16 , Expand); - setOperationAction(ISD::CTLZ , MVT::i16 , Expand); + setOperationAction(ISD::CTTZ , MVT::i16 , Custom); + setOperationAction(ISD::CTLZ , MVT::i16 , Custom); setOperationAction(ISD::CTPOP , MVT::i32 , Expand); - setOperationAction(ISD::CTTZ , MVT::i32 , Expand); - setOperationAction(ISD::CTLZ , MVT::i32 , Expand); + setOperationAction(ISD::CTTZ , MVT::i32 , Custom); + setOperationAction(ISD::CTLZ , MVT::i32 , Custom); if (Subtarget->is64Bit()) { setOperationAction(ISD::CTPOP , MVT::i64 , Expand); - setOperationAction(ISD::CTTZ , MVT::i64 , Expand); - setOperationAction(ISD::CTLZ , MVT::i64 , Expand); + setOperationAction(ISD::CTTZ , MVT::i64 , Custom); + setOperationAction(ISD::CTLZ , MVT::i64 , Custom); } setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); @@ -5345,6 +5345,42 @@ SDOperand X86TargetLowering::LowerFLT_ROUNDS(SDOperand Op, SelectionDAG &DAG) { ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal); } +SDOperand X86TargetLowering::LowerCTLZ(SDOperand Op, SelectionDAG &DAG) { + MVT::ValueType VT = Op.getValueType(); + MVT::ValueType OpVT = VT; + unsigned NumBits = MVT::getSizeInBits(VT); + + Op = Op.getOperand(0); + if (VT == MVT::i8) { + OpVT = MVT::i32; + Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); + } + if (VT == MVT::i32 || VT == MVT::i64) + return DAG.getNode(ISD::XOR, OpVT, DAG.getNode(X86ISD::BSR, OpVT, Op), + DAG.getConstant(NumBits-1, OpVT)); + + Op = DAG.getNode(ISD::SUB, OpVT, DAG.getConstant(NumBits-1, OpVT), + DAG.getNode(X86ISD::BSR, OpVT, Op)); + if (VT == MVT::i8) + Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); + return Op; +} + +SDOperand X86TargetLowering::LowerCTTZ(SDOperand Op, SelectionDAG &DAG) { + MVT::ValueType VT = Op.getValueType(); + MVT::ValueType OpVT = VT; + + Op = Op.getOperand(0); + if (VT == MVT::i8) { + OpVT = MVT::i32; + Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); + } + Op = DAG.getNode(X86ISD::BSF, OpVT, Op); + if (VT == MVT::i8) + Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); + return Op; +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { @@ -5387,7 +5423,8 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); case ISD::FLT_ROUNDS: return LowerFLT_ROUNDS(Op, DAG); - + case ISD::CTLZ: return LowerCTLZ(Op, DAG); + case ISD::CTTZ: return LowerCTTZ(Op, DAG); // FIXME: REMOVE THIS WHEN LegalizeDAGTypes lands. case ISD::READCYCLECOUNTER: @@ -5407,6 +5444,8 @@ SDNode *X86TargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) { const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: return NULL; + case X86ISD::BSF: return "X86ISD::BSF"; + case X86ISD::BSR: return "X86ISD::BSR"; case X86ISD::SHLD: return "X86ISD::SHLD"; case X86ISD::SHRD: return "X86ISD::SHRD"; case X86ISD::FAND: return "X86ISD::FAND"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 5fe49f8d81b..c366a461dfc 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -28,6 +28,11 @@ namespace llvm { // Start the numbering where the builtin ops leave off. FIRST_NUMBER = ISD::BUILTIN_OP_END+X86::INSTRUCTION_LIST_END, + /// BSF - Bit scan forward. + /// BSR - Bit scan reverse. + BSF, + BSR, + /// SHLD, SHRD - Double shift instructions. These correspond to /// X86::SHLDxx and X86::SHRDxx instructions. SHLD, @@ -489,6 +494,8 @@ namespace llvm { SDOperand LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG); SDOperand LowerTRAMPOLINE(SDOperand Op, SelectionDAG &DAG); SDOperand LowerFLT_ROUNDS(SDOperand Op, SelectionDAG &DAG); + SDOperand LowerCTLZ(SDOperand Op, SelectionDAG &DAG); + SDOperand LowerCTTZ(SDOperand Op, SelectionDAG &DAG); SDNode *ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG); SDNode *ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG); }; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 1fb162d0cb2..28ef8388a90 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -57,6 +57,8 @@ def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; +def X86bsf : SDNode<"X86ISD::BSF", SDTIntUnaryOp>; +def X86bsr : SDNode<"X86ISD::BSR", SDTIntUnaryOp>; def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>; def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>; @@ -445,6 +447,35 @@ def XCHG32rm : I<0x87, MRMSrcMem, (outs), (ins GR32:$src1, i32mem:$src2), "xchg{l}\t{$src2|$src1}, {$src1|$src2}", []>; +// Bit scan instructions. +let Defs = [EFLAGS] in { +def BSF16rr : I<0xBC, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), + "bsf{w}\t{$src, $dst||$dst, $src}", + [(set GR16:$dst, (X86bsf GR16:$src))]>, TB; +def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "bsf{w}\t{$src, $dst||$dst, $src}", + [(set GR16:$dst, (X86bsf (loadi16 addr:$src)))]>, TB; +def BSF32rr : I<0xBC, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), + "bsf{l}\t{$src, $dst||$dst, $src}", + [(set GR32:$dst, (X86bsf GR32:$src))]>, TB; +def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "bsf{l}\t{$src, $dst||$dst, $src}", + [(set GR32:$dst, (X86bsf (loadi32 addr:$src)))]>, TB; + +def BSR16rr : I<0xBD, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), + "bsr{w}\t{$src, $dst||$dst, $src}", + [(set GR16:$dst, (X86bsr GR16:$src))]>, TB; +def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "bsr{w}\t{$src, $dst||$dst, $src}", + [(set GR16:$dst, (X86bsr (loadi16 addr:$src)))]>, TB; +def BSR32rr : I<0xBD, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), + "bsr{l}\t{$src, $dst||$dst, $src}", + [(set GR32:$dst, (X86bsr GR32:$src))]>, TB; +def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "bsr{l}\t{$src, $dst||$dst, $src}", + [(set GR32:$dst, (X86bsr (loadi32 addr:$src)))]>, TB; +} // Defs = [EFLAGS] + def LEA16r : I<0x8D, MRMSrcMem, (outs GR16:$dst), (ins i32mem:$src), "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize; diff --git a/lib/Target/X86/X86InstrX86-64.td b/lib/Target/X86/X86InstrX86-64.td index 6dea8401348..f153c4d7a65 100644 --- a/lib/Target/X86/X86InstrX86-64.td +++ b/lib/Target/X86/X86InstrX86-64.td @@ -167,6 +167,23 @@ def XCHG64mr : RI<0x87, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), def XCHG64rm : RI<0x87, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2), "xchg{q}\t{$src2|$src1}, {$src1|$src2}", []>; +// Bit scan instructions. +let Defs = [EFLAGS] in { +def BSF64rr : RI<0xBC, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), + "bsf{q}\t{$src, $dst||$dst, $src}", + [(set GR64:$dst, (X86bsf GR64:$src))]>, TB; +def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "bsf{q}\t{$src, $dst||$dst, $src}", + [(set GR64:$dst, (X86bsf (loadi64 addr:$src)))]>, TB; + +def BSR64rr : RI<0xBD, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), + "bsr{q}\t{$src, $dst||$dst, $src}", + [(set GR64:$dst, (X86bsr GR64:$src))]>, TB; +def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "bsr{q}\t{$src, $dst||$dst, $src}", + [(set GR64:$dst, (X86bsr (loadi64 addr:$src)))]>, TB; +} // Defs = [EFLAGS] + // Repeat string ops let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI] in def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll new file mode 100644 index 00000000000..0505529cc48 --- /dev/null +++ b/test/CodeGen/X86/clz.ll @@ -0,0 +1,16 @@ +; RUN: llvm-as < %s | llc -march=x86 | grep bsr +; RUN: llvm-as < %s | llc -march=x86 | grep bsf + +define i32 @t1(i32 %x) nounwind { + %tmp = tail call i32 @llvm.ctlz.i32( i32 %x ) + ret i32 %tmp +} + +declare i32 @llvm.ctlz.i32(i32) nounwind readnone + +define i32 @t2(i32 %x) nounwind { + %tmp = tail call i32 @llvm.cttz.i32( i32 %x ) + ret i32 %tmp +} + +declare i32 @llvm.cttz.i32(i32) nounwind readnone