[PowerPC] Fold i1 extensions with other ops

Consider this function from our README.txt file:

  int foo(int a, int b) { return (a < b) << 4; }

We now explicitly track CR bits by default, so the comment in the README.txt
about not really having a SETCC is no longer accurate, but we did generate this
somewhat silly code:

        cmpw 0, 3, 4
        li 3, 0
        li 12, 1
        isel 3, 12, 3, 0
        sldi 3, 3, 4
        blr

which generates the zext as a select between 0 and 1, and then shifts the
result by a constant amount. Here we preprocess the DAG in order to fold the
results of operations on an extension of an i1 value into the SELECT_I[48]
pseudo instruction when the resulting constant can be materialized using one
instruction (just like the 0 and 1). This was not implemented as a DAGCombine
because the resulting code would have been anti-canonical and depends on
replacing chained user nodes, which does not fit well into the lowering
paradigm. Now we generate:

        cmpw 0, 3, 4
        li 3, 0
        li 12, 16
        isel 3, 12, 3, 0
        blr

which is less silly.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225203 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel 2015-01-05 21:10:24 +00:00
parent dd0552884b
commit ccc83e4a08
3 changed files with 125 additions and 17 deletions

View File

@ -217,6 +217,7 @@ private:
void PeepholeCROps();
SDValue combineToCMPB(SDNode *N);
void foldBoolExts(SDValue &Res, SDNode *&N);
bool AllUsersSelectZero(SDNode *N);
void SwapAllSelectUsers(SDNode *N);
@ -3173,6 +3174,73 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
return Res;
}
// When CR bit registers are enabled, an extension of an i1 variable to a i32
// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
// involves constant materialization of a 0 or a 1 or both. If the result of
// the extension is then operated upon by some operator that can be constant
// folded with a constant 0 or 1, and that constant can be materialized using
// only one instruction (like a zero or one), then we should fold in those
// operations with the select.
void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
if (!PPCSubTarget->useCRBits())
return;
if (N->getOpcode() != ISD::ZERO_EXTEND &&
N->getOpcode() != ISD::SIGN_EXTEND &&
N->getOpcode() != ISD::ANY_EXTEND)
return;
if (N->getOperand(0).getValueType() != MVT::i1)
return;
if (!N->hasOneUse())
return;
SDLoc dl(N);
EVT VT = N->getValueType(0);
SDValue Cond = N->getOperand(0);
SDValue ConstTrue =
CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, VT);
SDValue ConstFalse = CurDAG->getConstant(0, VT);
do {
SDNode *User = *N->use_begin();
if (User->getNumOperands() != 2)
break;
auto TryFold = [this, N, User](SDValue Val) {
SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
return CurDAG->FoldConstantArithmetic(User->getOpcode(),
User->getValueType(0),
O0.getNode(), O1.getNode());
};
SDValue TrueRes = TryFold(ConstTrue);
if (!TrueRes)
break;
SDValue FalseRes = TryFold(ConstFalse);
if (!FalseRes)
break;
// For us to materialize these using one instruction, we must be able to
// represent them as signed 16-bit integers.
uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
if (!isInt<16>(True) || !isInt<16>(False))
break;
// We can replace User with a new SELECT node, and try again to see if we
// can fold the select with its user.
Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
N = User;
ConstTrue = TrueRes;
ConstFalse = FalseRes;
} while (N->hasOneUse());
}
void PPCDAGToDAGISel::PreprocessISelDAG() {
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
++Position;
@ -3191,6 +3259,9 @@ void PPCDAGToDAGISel::PreprocessISelDAG() {
break;
}
if (!Res)
foldBoolExts(Res, N);
if (Res) {
DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
DEBUG(N->dump(CurDAG));

View File

@ -252,23 +252,6 @@ anything though, because the compares still wouldn't be shared.
===-------------------------------------------------------------------------===
We should custom expand setcc instead of pretending that we have it. That
would allow us to expose the access of the crbit after the mfcr, allowing
that access to be trivially folded into other ops. A simple example:
int foo(int a, int b) { return (a < b) << 4; }
compiles into:
_foo:
cmpw cr7, r3, r4
mfcr r2, 1
rlwinm r2, r2, 29, 31, 31
slwi r3, r2, 4
blr
===-------------------------------------------------------------------------===
Fold add and sub with constant into non-extern, non-weak addresses so this:
static int a;

View File

@ -0,0 +1,54 @@
; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
; Function Attrs: nounwind readnone
define signext i32 @foo(i32 signext %a, i32 signext %b) #0 {
entry:
%cmp = icmp slt i32 %a, %b
%conv = zext i1 %cmp to i32
%shl = shl nuw nsw i32 %conv, 4
ret i32 %shl
; CHECK-LABEL: @foo
; CHECK-DAG: cmpw
; CHECK-DAG: li [[REG1:[0-9]+]], 0
; CHECK-DAG: li [[REG2:[0-9]+]], 16
; CHECK: isel 3, [[REG2]], [[REG1]],
; CHECK: blr
}
; Function Attrs: nounwind readnone
define signext i32 @foo2(i32 signext %a, i32 signext %b) #0 {
entry:
%cmp = icmp slt i32 %a, %b
%conv = zext i1 %cmp to i32
%shl = shl nuw nsw i32 %conv, 4
%add1 = or i32 %shl, 5
ret i32 %add1
; CHECK-LABEL: @foo2
; CHECK-DAG: cmpw
; CHECK-DAG: li [[REG1:[0-9]+]], 5
; CHECK-DAG: li [[REG2:[0-9]+]], 21
; CHECK: isel 3, [[REG2]], [[REG1]],
; CHECK: blr
}
; Function Attrs: nounwind readnone
define signext i32 @foo3(i32 signext %a, i32 signext %b) #0 {
entry:
%cmp = icmp sle i32 %a, %b
%conv = zext i1 %cmp to i32
%shl = shl nuw nsw i32 %conv, 4
ret i32 %shl
; CHECK-LABEL: @foo3
; CHECK-DAG: cmpw
; CHECK-DAG: li [[REG1:[0-9]+]], 16
; CHECK: isel 3, 0, [[REG1]],
; CHECK: blr
}
attributes #0 = { nounwind readnone }