diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 133ae7066e9..104b91fd353 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -104,6 +104,8 @@ def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", "Support 16-bit floating point conversion instructions">; def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true", "Support LZCNT instruction">; +def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true", + "Support BMI instructions">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -158,6 +160,12 @@ def : Proc<"core-avx-i", [FeatureSSE42, FeatureCMPXCHG16B, FeatureAES, FeatureCLMUL, FeatureRDRAND, FeatureF16C]>; +// Haswell +def : Proc<"core-avx2", [FeatureSSE42, FeatureCMPXCHG16B, FeatureAES, + FeatureCLMUL, FeatureRDRAND, FeatureF16C, + FeatureFMA3, FeatureMOVBE, FeatureLZCNT, + FeatureBMI]>; + def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [Feature3DNow]>; def : Proc<"k6-3", [Feature3DNow]>; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 251064b3bb0..f85c201d01c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -379,11 +379,15 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FREM , MVT::f80 , Expand); setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); - setOperationAction(ISD::CTTZ , MVT::i8 , Custom); - setOperationAction(ISD::CTTZ , MVT::i16 , Custom); - setOperationAction(ISD::CTTZ , MVT::i32 , Custom); - if (Subtarget->is64Bit()) - setOperationAction(ISD::CTTZ , MVT::i64 , Custom); + if (Subtarget->hasBMI()) { + setOperationAction(ISD::CTTZ , MVT::i8 , Promote); + } else { + setOperationAction(ISD::CTTZ , MVT::i8 , Custom); + setOperationAction(ISD::CTTZ , MVT::i16 , Custom); + setOperationAction(ISD::CTTZ , MVT::i32 , Custom); + if (Subtarget->is64Bit()) + setOperationAction(ISD::CTTZ , MVT::i64 , Custom); + } if (Subtarget->hasLZCNT()) { setOperationAction(ISD::CTLZ , MVT::i8 , Promote); diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index c43351a0293..cc358feb97e 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -478,6 +478,7 @@ def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">; def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">; def HasF16C : Predicate<"Subtarget->hasF16C()">; def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">; +def HasBMI : Predicate<"Subtarget->hasBMI()">; def FPStackf32 : Predicate<"!Subtarget->hasXMM()">; def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; @@ -1372,6 +1373,37 @@ let Predicates = [HasLZCNT], Defs = [EFLAGS] in { (implicit EFLAGS)]>, XS; } +//===----------------------------------------------------------------------===// +// TZCNT Instruction +// +let Predicates = [HasBMI], Defs = [EFLAGS] in { + def TZCNT16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), + "tzcnt{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (cttz GR16:$src)), (implicit EFLAGS)]>, XS, + OpSize; + def TZCNT16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "tzcnt{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (cttz (loadi16 addr:$src))), + (implicit EFLAGS)]>, XS, OpSize; + + def TZCNT32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "tzcnt{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (cttz GR32:$src)), (implicit EFLAGS)]>, XS; + def TZCNT32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "tzcnt{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (cttz (loadi32 addr:$src))), + (implicit EFLAGS)]>, XS; + + def TZCNT64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "tzcnt{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (cttz GR64:$src)), (implicit EFLAGS)]>, + XS; + def TZCNT64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "tzcnt{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (cttz (loadi64 addr:$src))), + (implicit EFLAGS)]>, XS; +} + //===----------------------------------------------------------------------===// // Subsystems. //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index c2f60be3219..7064dd06fa3 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -290,6 +290,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , HasRDRAND(false) , HasF16C(false) , HasLZCNT(false) + , HasBMI(false) , IsBTMemSlow(false) , IsUAMemFast(false) , HasVectorUAMem(false) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index f67575a94df..3258d3d0ada 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -102,6 +102,9 @@ protected: /// HasLZCNT - Processor has LZCNT instruction. bool HasLZCNT; + /// HasBMI - Processor has BMI1 instructions. + bool HasBMI; + /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; @@ -188,6 +191,7 @@ public: bool hasRDRAND() const { return HasRDRAND; } bool hasF16C() const { return HasF16C; } bool hasLZCNT() const { return HasLZCNT; } + bool hasBMI() const { return HasBMI; } bool isBTMemSlow() const { return IsBTMemSlow; } bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool hasVectorUAMem() const { return HasVectorUAMem; } diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll new file mode 100644 index 00000000000..e0d1b583aa9 --- /dev/null +++ b/test/CodeGen/X86/bmi.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s -march=x86-64 -mattr=+bmi | FileCheck %s + +define i32 @t1(i32 %x) nounwind { + %tmp = tail call i32 @llvm.cttz.i32( i32 %x ) + ret i32 %tmp +; CHECK: t1: +; CHECK: tzcntl +} + +declare i32 @llvm.cttz.i32(i32) nounwind readnone + +define i16 @t2(i16 %x) nounwind { + %tmp = tail call i16 @llvm.cttz.i16( i16 %x ) + ret i16 %tmp +; CHECK: t2: +; CHECK: tzcntw +} + +declare i16 @llvm.cttz.i16(i16) nounwind readnone + +define i64 @t3(i64 %x) nounwind { + %tmp = tail call i64 @llvm.cttz.i64( i64 %x ) + ret i64 %tmp +; CHECK: t3: +; CHECK: tzcntq +} + +declare i64 @llvm.cttz.i64(i64) nounwind readnone + +define i8 @t4(i8 %x) nounwind { + %tmp = tail call i8 @llvm.cttz.i8( i8 %x ) + ret i8 %tmp +; CHECK: t4: +; CHECK: tzcntw +} + +declare i8 @llvm.cttz.i8(i8) nounwind readnone + diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt index 1540d12a280..8ca3015c2cd 100644 --- a/test/MC/Disassembler/X86/simple-tests.txt +++ b/test/MC/Disassembler/X86/simple-tests.txt @@ -497,3 +497,12 @@ # CHECK: lzcntq %rax, %rax 0xf3 0x48 0x0f 0xbd 0xc0 + +# CHECK: tzcntl %eax, %eax +0xf3 0x0f 0xbc 0xc0 + +# CHECK: tzcntw %ax, %ax +0x66 0xf3 0x0f 0xbc 0xc0 + +# CHECK: tzcntq %rax, %rax +0xf3 0x48 0x0f 0xbc 0xc0 diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt index 146b6667e51..e58356bd432 100644 --- a/test/MC/Disassembler/X86/x86-32.txt +++ b/test/MC/Disassembler/X86/x86-32.txt @@ -477,3 +477,9 @@ # CHECK: lzcntw %ax, %ax 0x66 0xf3 0x0f 0xbd 0xc0 + +# CHECK: tzcntl %eax, %eax +0xf3 0x0f 0xbc 0xc0 + +# CHECK: tzcntw %ax, %ax +0x66 0xf3 0x0f 0xbc 0xc0