Add a neat little two's complement hack for x86.

On x86 we can't encode an immediate LHS of a sub directly. If the RHS comes from a XOR with a constant we can
fold the negation into the xor and add one to the immediate of the sub. Then we can turn the sub into an add,
which can be commuted and encoded efficiently.

This code is generated for __builtin_clz and friends.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@136167 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Benjamin Kramer 2011-07-26 22:42:13 +00:00
parent 4ea496846a
commit 162ee5c725
3 changed files with 49 additions and 28 deletions

View File

@ -2076,12 +2076,11 @@ generates (x86_64):
jb LBB0_2
## BB#1:
decl %edi
movl $63, %eax
bsrl %edi, %ecx
cmovel %eax, %ecx
xorl $31, %ecx
movl $32, %eax
subl %ecx, %eax
movl $63, %ecx
bsrl %edi, %eax
cmovel %ecx, %eax
xorl $-32, %eax
addl $33, %eax
LBB0_2:
ret
@ -2091,26 +2090,10 @@ The cmov and the early test are redundant:
jb LBB0_2
## BB#1:
decl %edi
bsrl %edi, %ecx
xorl $31, %ecx
movl $32, %eax
subl %ecx, %eax
bsrl %edi, %eax
xorl $-32, %eax
addl $33, %eax
LBB0_2:
ret
If we want to get really fancy we could use some two's complement magic:
xorl %eax, %eax
cmpl $2, %edi
jb LBB0_2
## BB#1:
decl %edi
bsrl %edi, %ecx
xorl $-32, %ecx
leal 33(%ecx), %eax
LBB0_2:
ret
This is only useful on targets that can't encode the first operand of a sub
directly. The rule is C1 - (X^C2) -> (C1+1) + (X^~C2).
//===---------------------------------------------------------------------===//

View File

@ -12550,7 +12550,7 @@ static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
// (add Y, (setne X, 0)) -> sbb -1, Y
// (sub (sete X, 0), Y) -> sbb 0, Y
// (sub (setne X, 0), Y) -> adc -1, Y
static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) {
static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) {
DebugLoc DL = N->getDebugLoc();
// Look through ZExts.
@ -12586,6 +12586,33 @@ static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) {
DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
}
static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG) {
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
// X86 can't encode an immediate LHS of a sub. See if we can push the
// negation into a preceding instruction.
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op0)) {
uint64_t Op0C = C->getSExtValue();
// If the RHS of the sub is a XOR with one use and a constant, invert the
// immediate. Then add one to the LHS of the sub so we can turn
// X-Y -> X+~Y+1, saving one register.
if (Op1->hasOneUse() && Op1.getOpcode() == ISD::XOR &&
isa<ConstantSDNode>(Op1.getOperand(1))) {
uint64_t XorC = cast<ConstantSDNode>(Op1.getOperand(1))->getSExtValue();
EVT VT = Op0.getValueType();
SDValue NewXor = DAG.getNode(ISD::XOR, Op1.getDebugLoc(), VT,
Op1.getOperand(0),
DAG.getConstant(~XorC, VT));
return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, NewXor,
DAG.getConstant(Op0C+1, VT));
}
}
return OptimizeConditionalInDecrement(N, DAG);
}
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@ -12595,8 +12622,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
case ISD::ADD:
case ISD::SUB: return OptimizeConditonalInDecrement(N, DAG);
case ISD::ADD: return OptimizeConditionalInDecrement(N, DAG);
case ISD::SUB: return PerformSubCombine(N, DAG);
case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI);
case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
case ISD::SHL:

11
test/CodeGen/X86/sub.ll Normal file
View File

@ -0,0 +1,11 @@
; RUN: llc -march=x86 < sub.ll | FileCheck %s
define i32 @test1(i32 %x) {
%xor = xor i32 %x, 31
%sub = sub i32 32, %xor
ret i32 %sub
; CHECK: test1:
; CHECK: xorl $-32
; CHECK-NEXT: addl $33
; CHECK-NEXT: ret
}