diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 3352d6740a5..4eb696090be 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -2602,7 +2602,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_i32_ty], [IntrNoMem]>; def int_x86_tbm_bextri_u64 : GCCBuiltin<"__builtin_ia32_bextri_u64">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, - llvm_i32_ty], [IntrNoMem]>; + llvm_i64_ty], [IntrNoMem]>; def int_x86_tbm_blcfill_u32 : GCCBuiltin<"__builtin_ia32_blcfill_u32">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6289f9ebdbf..5178cad0056 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17578,22 +17578,6 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 && isAllOnes(N1.getOperand(1))) return DAG.getNode(X86ISD::BLSR, DL, VT, N0); - - // Check for BEXTR - if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL) { - ConstantSDNode *MaskNode = dyn_cast(N1); - ConstantSDNode *ShiftNode = dyn_cast(N0.getOperand(1)); - if (MaskNode && ShiftNode) { - uint64_t Mask = MaskNode->getZExtValue(); - uint64_t Shift = ShiftNode->getZExtValue(); - if (isMask_64(Mask)) { - uint64_t MaskSize = CountPopulation_64(Mask); - if (Shift + MaskSize <= VT.getSizeInBits()) - return DAG.getNode(X86ISD::BEXTR, DL, VT, N0.getOperand(0), - DAG.getConstant(Shift | (MaskSize << 8), VT)); - } - } - } } if (Subtarget->hasBMI2()) { @@ -17622,6 +17606,23 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, } } + // Check for BEXTR. + if ((Subtarget->hasBMI() || Subtarget->hasTBM()) && + (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)) { + ConstantSDNode *MaskNode = dyn_cast(N1); + ConstantSDNode *ShiftNode = dyn_cast(N0.getOperand(1)); + if (MaskNode && ShiftNode) { + uint64_t Mask = MaskNode->getZExtValue(); + uint64_t Shift = ShiftNode->getZExtValue(); + if (isMask_64(Mask)) { + uint64_t MaskSize = CountPopulation_64(Mask); + if (Shift + MaskSize <= VT.getSizeInBits()) + return DAG.getNode(X86ISD::BEXTR, DL, VT, N0.getOperand(0), + DAG.getConstant(Shift | (MaskSize << 8), VT)); + } + } + } // BEXTR + return SDValue(); } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 41d04390f90..b63fbe99283 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1875,14 +1875,16 @@ def : Pat<(X86bzhi (loadi64 addr:$src1), GR8:$src2), (BZHI64rm addr:$src1, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; -def : Pat<(X86bextr GR32:$src1, GR32:$src2), - (BEXTR32rr GR32:$src1, GR32:$src2)>; -def : Pat<(X86bextr (loadi32 addr:$src1), GR32:$src2), - (BEXTR32rm addr:$src1, GR32:$src2)>; -def : Pat<(X86bextr GR64:$src1, GR64:$src2), - (BEXTR64rr GR64:$src1, GR64:$src2)>; -def : Pat<(X86bextr (loadi64 addr:$src1), GR64:$src2), - (BEXTR64rm addr:$src1, GR64:$src2)>; +let Predicates = [HasBMI] in { + def : Pat<(X86bextr GR32:$src1, GR32:$src2), + (BEXTR32rr GR32:$src1, GR32:$src2)>; + def : Pat<(X86bextr (loadi32 addr:$src1), GR32:$src2), + (BEXTR32rm addr:$src1, GR32:$src2)>; + def : Pat<(X86bextr GR64:$src1, GR64:$src2), + (BEXTR64rr GR64:$src1, GR64:$src2)>; + def : Pat<(X86bextr (loadi64 addr:$src1), GR64:$src2), + (BEXTR64rm addr:$src1, GR64:$src2)>; +} // HasBMI multiclass bmi_pdep_pext opc, RegisterClass RC, string OpcodeStr, X86MemOperand x86memop, PatFrag ld_frag, - Intrinsic Int> { - def rr : Ii32 { + def ri : Ii32, + [(set RC:$dst, (Int RC:$src1, immoperator:$cntl))]>, XOP, XOPA, VEX; - def mr : Ii32, + [(set RC:$dst, (Int (ld_frag addr:$src1), immoperator:$cntl))]>, XOP, XOPA, VEX; } defm BEXTRI32 : tbm_ternary_imm_intr<0x10, GR32, "bextr", i32mem, loadi32, - int_x86_tbm_bextri_u32>; + int_x86_tbm_bextri_u32, i32imm, imm>; defm BEXTRI64 : tbm_ternary_imm_intr<0x10, GR64, "bextr", i64mem, loadi64, - int_x86_tbm_bextri_u64>, VEX_W; + int_x86_tbm_bextri_u64, i64i32imm, + i64immSExt32>, VEX_W; multiclass tbm_binary_rm opc, Format FormReg, Format FormMem, RegisterClass RC, string OpcodeStr, @@ -1985,6 +1989,67 @@ defm TZMSK : tbm_binary_intr<0x01, "tzmsk", MRM4r, MRM4m, int_x86_tbm_tzmsk_u64>; } // isAsmParserOnly, HasTBM, EFLAGS +//===----------------------------------------------------------------------===// +// Pattern fragments to auto generate TBM instructions. +//===----------------------------------------------------------------------===// + +let Predicates = [HasTBM] in { + def : Pat<(X86bextr GR32:$src1, (i32 imm:$src2)), + (BEXTRI32ri GR32:$src1, imm:$src2)>; + def : Pat<(X86bextr (loadi32 addr:$src1), (i32 imm:$src2)), + (BEXTRI32mi addr:$src1, imm:$src2)>; + def : Pat<(X86bextr GR64:$src1, i64immSExt32:$src2), + (BEXTRI64ri GR64:$src1, i64immSExt32:$src2)>; + def : Pat<(X86bextr (loadi64 addr:$src1), i64immSExt32:$src2), + (BEXTRI64mi addr:$src1, i64immSExt32:$src2)>; + + // FIXME: patterns for the load versions are not implemented + def : Pat<(and GR32:$src, (add GR32:$src, 1)), + (BLCFILL_32rr GR32:$src)>; + def : Pat<(and GR64:$src, (add GR64:$src, 1)), + (BLCFILL_64rr GR64:$src)>; + + def : Pat<(or GR32:$src, (not (add GR32:$src, 1))), + (BLCI_32rr GR32:$src)>; + def : Pat<(or GR64:$src, (not (add GR64:$src, 1))), + (BLCI_64rr GR64:$src)>; + + def : Pat<(and (not GR32:$src), (add GR32:$src, 1)), + (BLCIC_32rr GR32:$src)>; + def : Pat<(and (not GR64:$src), (add GR64:$src, 1)), + (BLCIC_64rr GR64:$src)>; + + def : Pat<(xor GR32:$src, (add GR32:$src, 1)), + (BLCMSK_32rr GR32:$src)>; + def : Pat<(xor GR64:$src, (add GR64:$src, 1)), + (BLCMSK_64rr GR64:$src)>; + + def : Pat<(or GR32:$src, (add GR32:$src, 1)), + (BLCS_32rr GR32:$src)>; + def : Pat<(or GR64:$src, (add GR64:$src, 1)), + (BLCS_64rr GR64:$src)>; + + def : Pat<(or GR32:$src, (add GR32:$src, -1)), + (BLSFILL_32rr GR32:$src)>; + def : Pat<(or GR64:$src, (add GR64:$src, -1)), + (BLSFILL_64rr GR64:$src)>; + + def : Pat<(or (not GR32:$src), (add GR32:$src, -1)), + (BLSIC_32rr GR32:$src)>; + def : Pat<(or (not GR64:$src), (add GR64:$src, -1)), + (BLSIC_64rr GR64:$src)>; + + def : Pat<(or (not GR32:$src), (add GR32:$src, 1)), + (T1MSKC_32rr GR32:$src)>; + def : Pat<(or (not GR64:$src), (add GR64:$src, 1)), + (T1MSKC_64rr GR64:$src)>; + + def : Pat<(and (not GR32:$src), (add GR32:$src, -1)), + (TZMSK_32rr GR32:$src)>; + def : Pat<(and (not GR64:$src), (add GR64:$src, -1)), + (TZMSK_64rr GR64:$src)>; +} // HasTBM + //===----------------------------------------------------------------------===// // Subsystems. //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/tbm-intrinsics-x86_64.ll b/test/CodeGen/X86/tbm-intrinsics-x86_64.ll index add061fc7f5..16343c0dbab 100644 --- a/test/CodeGen/X86/tbm-intrinsics-x86_64.ll +++ b/test/CodeGen/X86/tbm-intrinsics-x86_64.ll @@ -26,11 +26,11 @@ entry: ; CHECK-LABEL: test_x86_tbm_bextri_u64: ; CHECK-NOT: mov ; CHECK: bextr $ - %0 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a, i32 2814) + %0 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a, i64 2814) ret i64 %0 } -declare i64 @llvm.x86.tbm.bextri.u64(i64, i32) nounwind readnone +declare i64 @llvm.x86.tbm.bextri.u64(i64, i64) nounwind readnone define i64 @test_x86_tbm_bextri_u64_m(i64* nocapture %a) nounwind readonly { entry: @@ -38,7 +38,7 @@ entry: ; CHECK-NOT: mov ; CHECK: bextr $ %tmp1 = load i64* %a, align 8 - %0 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %tmp1, i32 2814) + %0 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %tmp1, i64 2814) ret i64 %0 } diff --git a/test/CodeGen/X86/tbm_patterns.ll b/test/CodeGen/X86/tbm_patterns.ll new file mode 100644 index 00000000000..a8d55481b5b --- /dev/null +++ b/test/CodeGen/X86/tbm_patterns.ll @@ -0,0 +1,466 @@ +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+tbm < %s | FileCheck %s + +define i32 @test_x86_tbm_bextri_u32(i32 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_bextri_u32: + ; CHECK-NOT: mov + ; CHECK: bextr $ + %0 = lshr i32 %a, 4 + %1 = and i32 %0, 4095 + ret i32 %1 +} + +define i32 @test_x86_tbm_bextri_u32_m(i32* nocapture %a) nounwind readonly { +entry: + ; CHECK-LABEL: test_x86_tbm_bextri_u32_m: + ; CHECK-NOT: mov + ; CHECK: bextr $ + %0 = load i32* %a + %1 = lshr i32 %0, 4 + %2 = and i32 %1, 4095 + ret i32 %2 +} + +define i64 @test_x86_tbm_bextri_u64(i64 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_bextri_u64: + ; CHECK-NOT: mov + ; CHECK: bextr $ + %0 = lshr i64 %a, 4 + %1 = and i64 %0, 4095 + ret i64 %1 +} + +define i64 @test_x86_tbm_bextri_u64_m(i64* nocapture %a) nounwind readonly { +entry: + ; CHECK-LABEL: test_x86_tbm_bextri_u64_m: + ; CHECK-NOT: mov + ; CHECK: bextr $ + %0 = load i64* %a + %1 = lshr i64 %0, 4 + %2 = and i64 %1, 4095 + ret i64 %2 +} + +define i32 @test_x86_tbm_blcfill_u32(i32 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blcfill_u32: + ; CHECK-NOT: mov + ; CHECK: blcfill % + %0 = add i32 %a, 1 + %1 = and i32 %0, %a + ret i32 %1 +} + +define i64 @test_x86_tbm_blcfill_u64(i64 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blcfill_u64: + ; CHECK-NOT: mov + ; CHECK: blcfill % + %0 = add i64 %a, 1 + %1 = and i64 %0, %a + ret i64 %1 +} + +define i32 @test_x86_tbm_blci_u32(i32 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blci_u32: + ; CHECK-NOT: mov + ; CHECK: blci % + %0 = add i32 1, %a + %1 = xor i32 %0, -1 + %2 = or i32 %1, %a + ret i32 %2 +} + +define i64 @test_x86_tbm_blci_u64(i64 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blci_u64: + ; CHECK-NOT: mov + ; CHECK: blci % + %0 = add i64 1, %a + %1 = xor i64 %0, -1 + %2 = or i64 %1, %a + ret i64 %2 +} + +define i32 @test_x86_tbm_blcic_u32(i32 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blcic_u32: + ; CHECK-NOT: mov + ; CHECK: blcic % + %0 = xor i32 %a, -1 + %1 = add i32 %a, 1 + %2 = and i32 %1, %0 + ret i32 %2 +} + +define i64 @test_x86_tbm_blcic_u64(i64 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blcic_u64: + ; CHECK-NOT: mov + ; CHECK: blcic % + %0 = xor i64 %a, -1 + %1 = add i64 %a, 1 + %2 = and i64 %1, %0 + ret i64 %2 +} + +define i32 @test_x86_tbm_blcmsk_u32(i32 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blcmsk_u32: + ; CHECK-NOT: mov + ; CHECK: blcmsk % + %0 = add i32 %a, 1 + %1 = xor i32 %0, %a + ret i32 %1 +} + +define i64 @test_x86_tbm_blcmsk_u64(i64 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blcmsk_u64: + ; CHECK-NOT: mov + ; CHECK: blcmsk % + %0 = add i64 %a, 1 + %1 = xor i64 %0, %a + ret i64 %1 +} + +define i32 @test_x86_tbm_blcs_u32(i32 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blcs_u32: + ; CHECK-NOT: mov + ; CHECK: blcs % + %0 = add i32 %a, 1 + %1 = or i32 %0, %a + ret i32 %1 +} + +define i64 @test_x86_tbm_blcs_u64(i64 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blcs_u64: + ; CHECK-NOT: mov + ; CHECK: blcs % + %0 = add i64 %a, 1 + %1 = or i64 %0, %a + ret i64 %1 +} + +define i32 @test_x86_tbm_blsfill_u32(i32 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blsfill_u32: + ; CHECK-NOT: mov + ; CHECK: blsfill % + %0 = add i32 %a, -1 + %1 = or i32 %0, %a + ret i32 %1 +} + +define i64 @test_x86_tbm_blsfill_u64(i64 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blsfill_u64: + ; CHECK-NOT: mov + ; CHECK: blsfill % + %0 = add i64 %a, -1 + %1 = or i64 %0, %a + ret i64 %1 +} + +define i32 @test_x86_tbm_blsic_u32(i32 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blsic_u32: + ; CHECK-NOT: mov + ; CHECK: blsic % + %0 = xor i32 %a, -1 + %1 = add i32 %a, -1 + %2 = or i32 %0, %1 + ret i32 %2 +} + +define i64 @test_x86_tbm_blsic_u64(i64 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blsic_u64: + ; CHECK-NOT: mov + ; CHECK: blsic % + %0 = xor i64 %a, -1 + %1 = add i64 %a, -1 + %2 = or i64 %0, %1 + ret i64 %2 +} + +define i32 @test_x86_tbm_t1mskc_u32(i32 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_t1mskc_u32: + ; CHECK-NOT: mov + ; CHECK: t1mskc % + %0 = xor i32 %a, -1 + %1 = add i32 %a, 1 + %2 = or i32 %0, %1 + ret i32 %2 +} + +define i64 @Ttest_x86_tbm_t1mskc_u64(i64 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_t1mskc_u64: + ; CHECK-NOT: mov + ; CHECK: t1mskc % + %0 = xor i64 %a, -1 + %1 = add i64 %a, 1 + %2 = or i64 %0, %1 + ret i64 %2 +} + +define i32 @test_x86_tbm_tzmsk_u32(i32 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_tzmsk_u32: + ; CHECK-NOT: mov + ; CHECK: tzmsk % + %0 = xor i32 %a, -1 + %1 = add i32 %a, -1 + %2 = and i32 %0, %1 + ret i32 %2 +} + +define i64 @test_x86_tbm_tzmsk_u64(i64 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_tzmsk_u64: + ; CHECK-NOT: mov + ; CHECK: tzmsk % + %0 = xor i64 %a, -1 + %1 = add i64 %a, -1 + %2 = and i64 %0, %1 + ret i64 %2 +} +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+tbm < %s | FileCheck %s + +define i32 @test_x86_tbm_bextri_u32(i32 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_bextri_u32: + ; CHECK-NOT: mov + ; CHECK: bextr $ + %0 = lshr i32 %a, 4 + %1 = and i32 %0, 4095 + ret i32 %1 +} + +define i32 @test_x86_tbm_bextri_u32_m(i32* nocapture %a) nounwind readonly { +entry: + ; CHECK-LABEL: test_x86_tbm_bextri_u32_m: + ; CHECK-NOT: mov + ; CHECK: bextr $ + %0 = load i32* %a + %1 = lshr i32 %0, 4 + %2 = and i32 %1, 4095 + ret i32 %2 +} + +define i64 @test_x86_tbm_bextri_u64(i64 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_bextri_u64: + ; CHECK-NOT: mov + ; CHECK: bextr $ + %0 = lshr i64 %a, 4 + %1 = and i64 %0, 4095 + ret i64 %1 +} + +define i64 @test_x86_tbm_bextri_u64_m(i64* nocapture %a) nounwind readonly { +entry: + ; CHECK-LABEL: test_x86_tbm_bextri_u64_m: + ; CHECK-NOT: mov + ; CHECK: bextr $ + %0 = load i64* %a + %1 = lshr i64 %0, 4 + %2 = and i64 %1, 4095 + ret i64 %2 +} + +define i32 @test_x86_tbm_blcfill_u32(i32 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blcfill_u32: + ; CHECK-NOT: mov + ; CHECK: blcfill % + %0 = add i32 %a, 1 + %1 = and i32 %0, %a + ret i32 %1 +} + +define i64 @test_x86_tbm_blcfill_u64(i64 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blcfill_u64: + ; CHECK-NOT: mov + ; CHECK: blcfill % + %0 = add i64 %a, 1 + %1 = and i64 %0, %a + ret i64 %1 +} + +define i32 @test_x86_tbm_blci_u32(i32 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blci_u32: + ; CHECK-NOT: mov + ; CHECK: blci % + %0 = add i32 1, %a + %1 = xor i32 %0, -1 + %2 = or i32 %1, %a + ret i32 %2 +} + +define i64 @test_x86_tbm_blci_u64(i64 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blci_u64: + ; CHECK-NOT: mov + ; CHECK: blci % + %0 = add i64 1, %a + %1 = xor i64 %0, -1 + %2 = or i64 %1, %a + ret i64 %2 +} + +define i32 @test_x86_tbm_blcic_u32(i32 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blcic_u32: + ; CHECK-NOT: mov + ; CHECK: blcic % + %0 = xor i32 %a, -1 + %1 = add i32 %a, 1 + %2 = and i32 %1, %0 + ret i32 %2 +} + +define i64 @test_x86_tbm_blcic_u64(i64 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blcic_u64: + ; CHECK-NOT: mov + ; CHECK: blcic % + %0 = xor i64 %a, -1 + %1 = add i64 %a, 1 + %2 = and i64 %1, %0 + ret i64 %2 +} + +define i32 @test_x86_tbm_blcmsk_u32(i32 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blcmsk_u32: + ; CHECK-NOT: mov + ; CHECK: blcmsk % + %0 = add i32 %a, 1 + %1 = xor i32 %0, %a + ret i32 %1 +} + +define i64 @test_x86_tbm_blcmsk_u64(i64 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blcmsk_u64: + ; CHECK-NOT: mov + ; CHECK: blcmsk % + %0 = add i64 %a, 1 + %1 = xor i64 %0, %a + ret i64 %1 +} + +define i32 @test_x86_tbm_blcs_u32(i32 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blcs_u32: + ; CHECK-NOT: mov + ; CHECK: blcs % + %0 = add i32 %a, 1 + %1 = or i32 %0, %a + ret i32 %1 +} + +define i64 @test_x86_tbm_blcs_u64(i64 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blcs_u64: + ; CHECK-NOT: mov + ; CHECK: blcs % + %0 = add i64 %a, 1 + %1 = or i64 %0, %a + ret i64 %1 +} + +define i32 @test_x86_tbm_blsfill_u32(i32 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blsfill_u32: + ; CHECK-NOT: mov + ; CHECK: blsfill % + %0 = add i32 %a, -1 + %1 = or i32 %0, %a + ret i32 %1 +} + +define i64 @test_x86_tbm_blsfill_u64(i64 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blsfill_u64: + ; CHECK-NOT: mov + ; CHECK: blsfill % + %0 = add i64 %a, -1 + %1 = or i64 %0, %a + ret i64 %1 +} + +define i32 @test_x86_tbm_blsic_u32(i32 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blsic_u32: + ; CHECK-NOT: mov + ; CHECK: blsic % + %0 = xor i32 %a, -1 + %1 = add i32 %a, -1 + %2 = or i32 %0, %1 + ret i32 %2 +} + +define i64 @test_x86_tbm_blsic_u64(i64 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_blsic_u64: + ; CHECK-NOT: mov + ; CHECK: blsic % + %0 = xor i64 %a, -1 + %1 = add i64 %a, -1 + %2 = or i64 %0, %1 + ret i64 %2 +} + +define i32 @test_x86_tbm_t1mskc_u32(i32 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_t1mskc_u32: + ; CHECK-NOT: mov + ; CHECK: t1mskc % + %0 = xor i32 %a, -1 + %1 = add i32 %a, 1 + %2 = or i32 %0, %1 + ret i32 %2 +} + +define i64 @Ttest_x86_tbm_t1mskc_u64(i64 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_t1mskc_u64: + ; CHECK-NOT: mov + ; CHECK: t1mskc % + %0 = xor i64 %a, -1 + %1 = add i64 %a, 1 + %2 = or i64 %0, %1 + ret i64 %2 +} + +define i32 @test_x86_tbm_tzmsk_u32(i32 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_tzmsk_u32: + ; CHECK-NOT: mov + ; CHECK: tzmsk % + %0 = xor i32 %a, -1 + %1 = add i32 %a, -1 + %2 = and i32 %0, %1 + ret i32 %2 +} + +define i64 @test_x86_tbm_tzmsk_u64(i64 %a) nounwind readnone { +entry: + ; CHECK-LABEL: test_x86_tbm_tzmsk_u64: + ; CHECK-NOT: mov + ; CHECK: tzmsk % + %0 = xor i64 %a, -1 + %1 = add i64 %a, -1 + %2 = and i64 %0, %1 + ret i64 %2 +}