From 7466678003f38f985d5b2dffd0917643137b11cf Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 26 Feb 2011 22:48:07 +0000 Subject: [PATCH] Add some DAGCombines for (adde 0, 0, glue), which are useful to optimize legalized code for large integer arithmetic. 1. Inform users of ADDEs with two 0 operands that it never sets carry 2. Fold other ADDs or ADDCs into the ADDE if possible It would be neat if we could do the same thing for SETCC+ADD eventually, but we can't do that in target independent code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126557 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 38 ++++++++++++++++++++++++ lib/Target/README.txt | 37 ----------------------- test/CodeGen/X86/adde-carry.ll | 26 ++++++++++++++++ 3 files changed, 64 insertions(+), 37 deletions(-) create mode 100644 test/CodeGen/X86/adde-carry.ll diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9cc70a30927..943731fe5a5 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1290,6 +1290,16 @@ SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, return SDValue(); } +/// isCarryMaterialization - Returns true if V is an ADDE node that is known to +/// return 0 or 1 depending on the carry flag. +static bool isCarryMaterialization(SDValue V) { + if (V.getOpcode() != ISD::ADDE) + return false; + + ConstantSDNode *C = dyn_cast(V.getOperand(0)); + return C && C->isNullValue() && V.getOperand(0) == V.getOperand(1); +} + SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1453,6 +1463,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); } + // add (adde 0, 0, glue), X -> adde X, 0, glue + if (N0->hasOneUse() && isCarryMaterialization(N0)) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), + DAG.getVTList(VT, MVT::Glue), N1, N0.getOperand(0), + N0.getOperand(2)); + + // add X, (adde 0, 0, glue) -> adde X, 0, glue + if (N1->hasOneUse() && isCarryMaterialization(N1)) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), + DAG.getVTList(VT, MVT::Glue), N0, N1.getOperand(0), + N1.getOperand(2)); + return SDValue(); } @@ -1496,6 +1518,16 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { N->getDebugLoc(), MVT::Glue)); } + // addc (adde 0, 0, glue), X -> adde X, 0, glue + if (N0->hasOneUse() && isCarryMaterialization(N0)) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N1, + DAG.getConstant(0, VT), N0.getOperand(2)); + + // addc X, (adde 0, 0, glue) -> adde X, 0, glue + if (N1->hasOneUse() && isCarryMaterialization(N1)) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N0, + DAG.getConstant(0, VT), N1.getOperand(2)); + return SDValue(); } @@ -1506,6 +1538,12 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); + // If both operands are null we know that carry out will always be false. + if (N0C && N0C->isNullValue() && N0 == N1) + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), DAG.getNode(ISD::CARRY_FALSE, + N->getDebugLoc(), + MVT::Glue)); + // canonicalize constant to RHS if (N0C && !N1C) return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), diff --git a/lib/Target/README.txt b/lib/Target/README.txt index f85914b61d9..e01df010436 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -1780,43 +1780,6 @@ case it choses instead to keep the max operation obvious. //===---------------------------------------------------------------------===// -Take the following testcase on x86-64 (similar testcases exist for all targets -with addc/adde): - -define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b, -i64 %c) nounwind { -entry: - %0 = zext i64 %a to i128 ; [#uses=1] - %1 = zext i64 %b to i128 ; [#uses=1] - %2 = add i128 %1, %0 ; [#uses=2] - %3 = zext i64 %c to i128 ; [#uses=1] - %4 = shl i128 %3, 64 ; [#uses=1] - %5 = add i128 %4, %2 ; [#uses=1] - %6 = lshr i128 %5, 64 ; [#uses=1] - %7 = trunc i128 %6 to i64 ; [#uses=1] - store i64 %7, i64* %s, align 8 - %8 = trunc i128 %2 to i64 ; [#uses=1] - store i64 %8, i64* %t, align 8 - ret void -} - -Generated code: - addq %rcx, %rdx - sbbq %rax, %rax - subq %rax, %r8 - movq %r8, (%rdi) - movq %rdx, (%rsi) - ret - -Expected code: - addq %rcx, %rdx - adcq $0, %r8 - movq %r8, (%rdi) - movq %rdx, (%rsi) - ret - -//===---------------------------------------------------------------------===// - Switch lowering generates less than ideal code for the following switch: define void @a(i32 %x) nounwind { entry: diff --git a/test/CodeGen/X86/adde-carry.ll b/test/CodeGen/X86/adde-carry.ll new file mode 100644 index 00000000000..98c4f993431 --- /dev/null +++ b/test/CodeGen/X86/adde-carry.ll @@ -0,0 +1,26 @@ +; RUN: llc -march=x86-64 < %s | FileCheck %s -check-prefix=CHECK-64 +; RUN: llc -march=x86 < %s | FileCheck %s -check-prefix=CHECK-32 + +define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b, i64 %c) nounwind { +entry: + %0 = zext i64 %a to i128 + %1 = zext i64 %b to i128 + %2 = add i128 %1, %0 + %3 = zext i64 %c to i128 + %4 = shl i128 %3, 64 + %5 = add i128 %4, %2 + %6 = lshr i128 %5, 64 + %7 = trunc i128 %6 to i64 + store i64 %7, i64* %s, align 8 + %8 = trunc i128 %2 to i64 + store i64 %8, i64* %t, align 8 + ret void + +; CHECK-32: addl +; CHECK-32: adcl +; CHECK-32: adcl $0 +; CHECK-32: adcl $0 + +; CHECK-64: addq +; CHECK-64: adcq $0 +}