Add a DAGCombine to turn (ctpop x) u< 2 into (x & x-1) == 0.

This shaves off 4 popcounts from the hacked 186.crafty source.

This is enabled even when a native popcount instruction is available. The
combined code is one operation longer but it should be faster nevertheless.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123621 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Benjamin Kramer 2011-01-17 12:04:57 +00:00
parent 8702e8be8d
commit d822892455
2 changed files with 55 additions and 0 deletions

View File

@ -1870,6 +1870,30 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
SDValue CTPOP = N0;
// Look through truncs that don't change the value of a ctpop.
if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
CTPOP = N0.getOperand(0);
if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
(N0 == CTPOP || N0.getValueType().getSizeInBits() >=
Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) {
EVT CTVT = CTPOP.getValueType();
SDValue CTOp = CTPOP.getOperand(0);
// (ctpop x) u< 2 -> (x & x-1) == 0
// (ctpop x) u> 1 -> (x & x-1) != 0
if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
DAG.getConstant(1, CTVT));
SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC);
}
// TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
}
// If the LHS is '(and load, const)', the RHS is 0,
// the test is for equality or unsigned, and all 1 bits of the const are
// in the same partial word, see if we can shorten the load.

View File

@ -0,0 +1,31 @@
; RUN: llc -march=x86-64 < %s | FileCheck %s
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
define i32 @test1(i64 %x) nounwind readnone {
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
%cast = trunc i64 %count to i32
%cmp = icmp ugt i32 %cast, 1
%conv = zext i1 %cmp to i32
ret i32 %conv
; CHECK: test1:
; CHECK: leaq -1(%rdi)
; CHECK-NEXT: testq
; CHECK-NEXT: setne
; CHECK: ret
}
define i32 @test2(i64 %x) nounwind readnone {
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
%cast = trunc i64 %count to i32
%cmp = icmp ult i32 %cast, 2
%conv = zext i1 %cmp to i32
ret i32 %conv
; CHECK: test2:
; CHECK: leaq -1(%rdi)
; CHECK-NEXT: testq
; CHECK-NEXT: sete
; CHECK: ret
}