diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9974d8c997d..4c40fe1803e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -990,6 +990,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::BUILD_VECTOR); setTargetDAGCombine(ISD::SELECT); + setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); @@ -9157,6 +9158,53 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// PerformANDCombine - Look for SSE and instructions of this form: +/// (and x, (build_vector c1,c2,c3,c4)). If there exists a use of a build_vector +/// that's the bitwise complement of the mask, then transform the node to +/// (and (xor x, (build_vector -1,-1,-1,-1)), (build_vector ~c1,~c2,~c3,~c4)). +static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + EVT VT = N->getValueType(0); + if (!VT.isVector() || !VT.isInteger()) + return SDValue(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (N0.getOpcode() == ISD::XOR || !N1.hasOneUse()) + return SDValue(); + + if (N1.getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElts = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + SmallVector Mask; + Mask.reserve(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Arg = N1.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) { + Mask.push_back(Arg); + continue; + } + ConstantSDNode *C = dyn_cast(Arg); + if (!C) return SDValue(); + Mask.push_back(DAG.getConstant(~C->getAPIntValue(), EltVT)); + } + N1 = DAG.getNode(ISD::BUILD_VECTOR, N1.getDebugLoc(), VT, + &Mask[0], NumElts); + if (!N1.use_empty()) { + unsigned Bits = EltVT.getSizeInBits(); + Mask.clear(); + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(DAG.getConstant(APInt::getAllOnesValue(Bits), EltVT)); + SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + VT, &Mask[0], NumElts); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + N0, NewMask), N1); + } + } + + return SDValue(); +} /// PerformMulCombine - Optimize a single multiply with constant into two /// in order to implement it with two cheaper instructions, e.g. @@ -9305,7 +9353,7 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, } } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { if (ConstantSDNode *C = dyn_cast(InVec.getOperand(2))) { - unsigned SplatIdx = cast(ShAmtOp)->getSplatIndex(); + unsigned SplatIdx= cast(ShAmtOp)->getSplatIndex(); if (C->getZExtValue() == SplatIdx) BaseShAmt = InVec.getOperand(1); } @@ -9690,6 +9738,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this); case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); + case ISD::AND: return PerformANDCombine(N, DAG, DCI); case ISD::MUL: return PerformMulCombine(N, DAG, DCI); case ISD::SHL: case ISD::SRA: diff --git a/test/CodeGen/X86/lsr-reuse-trunc.ll b/test/CodeGen/X86/lsr-reuse-trunc.ll index d1d714491fa..a663a220e62 100644 --- a/test/CodeGen/X86/lsr-reuse-trunc.ll +++ b/test/CodeGen/X86/lsr-reuse-trunc.ll @@ -1,10 +1,19 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s ; Full strength reduction wouldn't reduce register pressure, so LSR should ; stick with indexing here. -; CHECK: movaps (%rsi,%rax,4), %xmm3 -; CHECK: movaps %xmm3, (%rdi,%rax,4) +; Also checks andps and andnps shares the same constantpool. Previously llvm +; will codegen two andps, one using 0x80000000, the other 0x7fffffff. +; rdar://7323335 + +; CHECK: movaps LCPI1_0 +; CHECK: movaps LCPI1_1 +; CHECK-NOT: movaps LCPI1_2 +; CHECK: movaps (%rsi,%rax,4), %xmm2 +; CHECK: andps +; CHECK: andnps +; CHECK: movaps %xmm2, (%rdi,%rax,4) ; CHECK: addq $4, %rax ; CHECK: cmpl %eax, (%rdx) ; CHECK-NEXT: jg diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll index 01d73736d6c..e1d0fe76657 100644 --- a/test/CodeGen/X86/sink-hoist.ll +++ b/test/CodeGen/X86/sink-hoist.ll @@ -63,7 +63,6 @@ entry: ; CHECK: vv: ; CHECK: LCPI4_0(%rip), %xmm0 ; CHECK: LCPI4_1(%rip), %xmm1 -; CHECK: LCPI4_2(%rip), %xmm2 ; CHECK: align ; CHECK-NOT: LCPI ; CHECK: ret