From b6ac30a15591a8dab3cff3f0891d7e1ca9476826 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 30 Aug 2013 06:52:21 +0000 Subject: [PATCH] Teach X86 backend to create BMI2 BZHI instructions from (and X, (add (shl 1, Y), -1)). Fixes PR17038. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189653 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 68 ++++++++++++++++++++++-------- lib/Target/X86/X86ISelLowering.h | 1 + lib/Target/X86/X86InstrInfo.td | 10 +++++ test/CodeGen/X86/bmi.ll | 45 ++++++++++++++++++++ 4 files changed, 106 insertions(+), 18 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 81008e92868..ecbd24febc6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -13473,6 +13473,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::BLSI: return "X86ISD::BLSI"; case X86ISD::BLSMSK: return "X86ISD::BLSMSK"; case X86ISD::BLSR: return "X86ISD::BLSR"; + case X86ISD::BZHI: return "X86ISD::BZHI"; case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::PTEST: return "X86ISD::PTEST"; case X86ISD::TESTP: return "X86ISD::TESTP"; @@ -17279,33 +17280,64 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, if (R.getNode()) return R; - // Create BLSI, and BLSR instructions + // Create BLSI, BLSR, and BZHI instructions // BLSI is X & (-X) // BLSR is X & (X-1) - if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) { + // BZHI is X & ((1 << Y) - 1) + if (VT == MVT::i32 || VT == MVT::i64) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDLoc DL(N); - // Check LHS for neg - if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 && - isZero(N0.getOperand(0))) - return DAG.getNode(X86ISD::BLSI, DL, VT, N1); + if (Subtarget->hasBMI()) { + // Check LHS for neg + if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 && + isZero(N0.getOperand(0))) + return DAG.getNode(X86ISD::BLSI, DL, VT, N1); - // Check RHS for neg - if (N1.getOpcode() == ISD::SUB && N1.getOperand(1) == N0 && - isZero(N1.getOperand(0))) - return DAG.getNode(X86ISD::BLSI, DL, VT, N0); + // Check RHS for neg + if (N1.getOpcode() == ISD::SUB && N1.getOperand(1) == N0 && + isZero(N1.getOperand(0))) + return DAG.getNode(X86ISD::BLSI, DL, VT, N0); - // Check LHS for X-1 - if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 && - isAllOnes(N0.getOperand(1))) - return DAG.getNode(X86ISD::BLSR, DL, VT, N1); + // Check LHS for X-1 + if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 && + isAllOnes(N0.getOperand(1))) + return DAG.getNode(X86ISD::BLSR, DL, VT, N1); - // Check RHS for X-1 - if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 && - isAllOnes(N1.getOperand(1))) - return DAG.getNode(X86ISD::BLSR, DL, VT, N0); + // Check RHS for X-1 + if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 && + isAllOnes(N1.getOperand(1))) + return DAG.getNode(X86ISD::BLSR, DL, VT, N0); + } + + if (Subtarget->hasBMI2()) { + // Check for (and (add (shl 1, Y), -1), X) + if (N0.getOpcode() == ISD::ADD && isAllOnes(N0.getOperand(1))) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::SHL) { + SDValue N001 = N00.getOperand(1); + assert(N001.getValueType() == MVT::i8 && "unexpected type"); + ConstantSDNode *C = dyn_cast(N00.getOperand(0)); + if (C && C->getZExtValue() == 1) + return DAG.getNode(X86ISD::BZHI, DL, VT, N1, + DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N001)); + } + } + + // Check for (and X, (add (shl 1, Y), -1)) + if (N1.getOpcode() == ISD::ADD && isAllOnes(N1.getOperand(1))) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::SHL) { + SDValue N101 = N10.getOperand(1); + assert(N101.getValueType() == MVT::i8 && "unexpected type"); + ConstantSDNode *C = dyn_cast(N10.getOperand(0)); + if (C && C->getZExtValue() == 1) + return DAG.getNode(X86ISD::BZHI, DL, VT, N0, + DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N101)); + } + } + } return SDValue(); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 632a5b63bac..6a2e4d27905 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -295,6 +295,7 @@ namespace llvm { BLSI, // BLSI - Extract lowest set isolated bit BLSMSK, // BLSMSK - Get mask up to lowest set bit BLSR, // BLSR - Reset lowest set bit + BZHI, // BZHI - Zero high bits UMUL, // LOW, HI, FLAGS = umul LHS, RHS diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index c2d180713e5..41e2c4435ae 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -253,6 +253,7 @@ def X86andn_flag : SDNode<"X86ISD::ANDN", SDTBinaryArithWithFlags>; def X86blsi : SDNode<"X86ISD::BLSI", SDTIntUnaryOp>; def X86blsmsk : SDNode<"X86ISD::BLSMSK", SDTIntUnaryOp>; def X86blsr : SDNode<"X86ISD::BLSR", SDTIntUnaryOp>; +def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntBinOp>; def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; @@ -1856,6 +1857,15 @@ let Predicates = [HasBMI2], Defs = [EFLAGS] in { int_x86_bmi_bzhi_64, loadi64>, VEX_W; } +def : Pat<(X86bzhi GR32:$src1, GR32:$src2), + (BZHI32rr GR32:$src1, GR32:$src2)>; +def : Pat<(X86bzhi (loadi32 addr:$src1), GR32:$src2), + (BZHI32rm addr:$src1, GR32:$src2)>; +def : Pat<(X86bzhi GR64:$src1, GR64:$src2), + (BZHI64rr GR64:$src1, GR64:$src2)>; +def : Pat<(X86bzhi (loadi64 addr:$src1), GR64:$src2), + (BZHI64rm addr:$src1, GR64:$src2)>; + multiclass bmi_pdep_pext { diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll index 4eda8888b66..757e98d28d2 100644 --- a/test/CodeGen/X86/bmi.ll +++ b/test/CodeGen/X86/bmi.ll @@ -146,6 +146,51 @@ define i64 @bzhi64(i64 %x, i64 %y) nounwind readnone { declare i64 @llvm.x86.bmi.bzhi.64(i64, i64) nounwind readnone +define i32 @bzhi32b(i32 %x, i8 zeroext %index) #0 { +entry: + %conv = zext i8 %index to i32 + %shl = shl i32 1, %conv + %sub = add nsw i32 %shl, -1 + %and = and i32 %sub, %x + ret i32 %and +; CHECK-LABEL: bzhi32b: +; CHECK: bzhil +} + +define i32 @bzhi32b_load(i32* %w, i8 zeroext %index) #0 { +entry: + %x = load i32* %w + %conv = zext i8 %index to i32 + %shl = shl i32 1, %conv + %sub = add nsw i32 %shl, -1 + %and = and i32 %sub, %x + ret i32 %and +; CHECK-LABEL: bzhi32b_load: +; CHECK: bzhil {{.*}}, ({{.*}}), {{.*}} +} + +define i32 @bzhi32c(i32 %x, i8 zeroext %index) #0 { +entry: + %conv = zext i8 %index to i32 + %shl = shl i32 1, %conv + %sub = add nsw i32 %shl, -1 + %and = and i32 %x, %sub + ret i32 %and +; CHECK-LABEL: bzhi32c: +; CHECK: bzhil +} + +define i64 @bzhi64b(i64 %x, i8 zeroext %index) #0 { +entry: + %conv = zext i8 %index to i64 + %shl = shl i64 1, %conv + %sub = add nsw i64 %shl, -1 + %and = and i64 %x, %sub + ret i64 %and +; CHECK-LABEL: bzhi64b: +; CHECK: bzhiq +} + define i32 @blsi32(i32 %x) nounwind readnone { %tmp = sub i32 0, %x %tmp2 = and i32 %x, %tmp