mirror of
https://github.com/autc04/Retro68.git
synced 2025-02-26 13:29:28 +00:00
6305 lines
180 KiB
C
6305 lines
180 KiB
C
/* Reassociation for trees.
|
|
Copyright (C) 2005-2018 Free Software Foundation, Inc.
|
|
Contributed by Daniel Berlin <dan@dberlin.org>
|
|
|
|
This file is part of GCC.
|
|
|
|
GCC is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3, or (at your option)
|
|
any later version.
|
|
|
|
GCC is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with GCC; see the file COPYING3. If not see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#include "config.h"
|
|
#include "system.h"
|
|
#include "coretypes.h"
|
|
#include "backend.h"
|
|
#include "target.h"
|
|
#include "rtl.h"
|
|
#include "tree.h"
|
|
#include "gimple.h"
|
|
#include "cfghooks.h"
|
|
#include "alloc-pool.h"
|
|
#include "tree-pass.h"
|
|
#include "memmodel.h"
|
|
#include "tm_p.h"
|
|
#include "ssa.h"
|
|
#include "optabs-tree.h"
|
|
#include "gimple-pretty-print.h"
|
|
#include "diagnostic-core.h"
|
|
#include "fold-const.h"
|
|
#include "stor-layout.h"
|
|
#include "cfganal.h"
|
|
#include "gimple-fold.h"
|
|
#include "tree-eh.h"
|
|
#include "gimple-iterator.h"
|
|
#include "gimplify-me.h"
|
|
#include "tree-cfg.h"
|
|
#include "tree-ssa-loop.h"
|
|
#include "flags.h"
|
|
#include "tree-ssa.h"
|
|
#include "langhooks.h"
|
|
#include "cfgloop.h"
|
|
#include "params.h"
|
|
#include "builtins.h"
|
|
#include "gimplify.h"
|
|
#include "case-cfn-macros.h"
|
|
|
|
/* This is a simple global reassociation pass. It is, in part, based
|
|
on the LLVM pass of the same name (They do some things more/less
|
|
than we do, in different orders, etc).
|
|
|
|
It consists of five steps:
|
|
|
|
1. Breaking up subtract operations into addition + negate, where
|
|
it would promote the reassociation of adds.
|
|
|
|
2. Left linearization of the expression trees, so that (A+B)+(C+D)
|
|
becomes (((A+B)+C)+D), which is easier for us to rewrite later.
|
|
During linearization, we place the operands of the binary
|
|
expressions into a vector of operand_entry_*
|
|
|
|
3. Optimization of the operand lists, eliminating things like a +
|
|
-a, a & a, etc.
|
|
|
|
3a. Combine repeated factors with the same occurrence counts
|
|
into a __builtin_powi call that will later be optimized into
|
|
an optimal number of multiplies.
|
|
|
|
4. Rewrite the expression trees we linearized and optimized so
|
|
they are in proper rank order.
|
|
|
|
5. Repropagate negates, as nothing else will clean it up ATM.
|
|
|
|
A bit of theory on #4, since nobody seems to write anything down
|
|
about why it makes sense to do it the way they do it:
|
|
|
|
We could do this much nicer theoretically, but don't (for reasons
|
|
explained after how to do it theoretically nice :P).
|
|
|
|
In order to promote the most redundancy elimination, you want
|
|
binary expressions whose operands are the same rank (or
|
|
preferably, the same value) exposed to the redundancy eliminator,
|
|
for possible elimination.
|
|
|
|
So the way to do this if we really cared, is to build the new op
|
|
tree from the leaves to the roots, merging as you go, and putting the
|
|
new op on the end of the worklist, until you are left with one
|
|
thing on the worklist.
|
|
|
|
IE if you have to rewrite the following set of operands (listed with
|
|
rank in parentheses), with opcode PLUS_EXPR:
|
|
|
|
a (1), b (1), c (1), d (2), e (2)
|
|
|
|
|
|
We start with our merge worklist empty, and the ops list with all of
|
|
those on it.
|
|
|
|
You want to first merge all leaves of the same rank, as much as
|
|
possible.
|
|
|
|
So first build a binary op of
|
|
|
|
mergetmp = a + b, and put "mergetmp" on the merge worklist.
|
|
|
|
Because there is no three operand form of PLUS_EXPR, c is not going to
|
|
be exposed to redundancy elimination as a rank 1 operand.
|
|
|
|
So you might as well throw it on the merge worklist (you could also
|
|
consider it to now be a rank two operand, and merge it with d and e,
|
|
but in this case, you then have evicted e from a binary op. So at
|
|
least in this situation, you can't win.)
|
|
|
|
Then build a binary op of d + e
|
|
mergetmp2 = d + e
|
|
|
|
and put mergetmp2 on the merge worklist.
|
|
|
|
so merge worklist = {mergetmp, c, mergetmp2}
|
|
|
|
Continue building binary ops of these operations until you have only
|
|
one operation left on the worklist.
|
|
|
|
So we have
|
|
|
|
build binary op
|
|
mergetmp3 = mergetmp + c
|
|
|
|
worklist = {mergetmp2, mergetmp3}
|
|
|
|
mergetmp4 = mergetmp2 + mergetmp3
|
|
|
|
worklist = {mergetmp4}
|
|
|
|
because we have one operation left, we can now just set the original
|
|
statement equal to the result of that operation.
|
|
|
|
This will at least expose a + b and d + e to redundancy elimination
|
|
as binary operations.
|
|
|
|
For extra points, you can reuse the old statements to build the
|
|
mergetmps, since you shouldn't run out.
|
|
|
|
So why don't we do this?
|
|
|
|
Because it's expensive, and rarely will help. Most trees we are
|
|
reassociating have 3 or less ops. If they have 2 ops, they already
|
|
will be written into a nice single binary op. If you have 3 ops, a
|
|
single simple check suffices to tell you whether the first two are of the
|
|
same rank. If so, you know to order it
|
|
|
|
mergetmp = op1 + op2
|
|
newstmt = mergetmp + op3
|
|
|
|
instead of
|
|
mergetmp = op2 + op3
|
|
newstmt = mergetmp + op1
|
|
|
|
If all three are of the same rank, you can't expose them all in a
|
|
single binary operator anyway, so the above is *still* the best you
|
|
can do.
|
|
|
|
Thus, this is what we do. When we have three ops left, we check to see
|
|
what order to put them in, and call it a day. As a nod to vector sum
|
|
reduction, we check if any of the ops are really a phi node that is a
|
|
destructive update for the associating op, and keep the destructive
|
|
update together for vector sum reduction recognition. */
|
|
|
|
/* Enable insertion of __builtin_powi calls during execute_reassoc. See
|
|
point 3a in the pass header comment. */
|
|
static bool reassoc_insert_powi_p;
|
|
|
|
/* Statistics */
|
|
static struct
|
|
{
|
|
int linearized;
|
|
int constants_eliminated;
|
|
int ops_eliminated;
|
|
int rewritten;
|
|
int pows_encountered;
|
|
int pows_created;
|
|
} reassociate_stats;
|
|
|
|
/* Operator, rank pair. */
|
|
struct operand_entry
|
|
{
|
|
unsigned int rank;
|
|
unsigned int id;
|
|
tree op;
|
|
unsigned int count;
|
|
gimple *stmt_to_insert;
|
|
};
|
|
|
|
static object_allocator<operand_entry> operand_entry_pool
|
|
("operand entry pool");
|
|
|
|
/* This is used to assign a unique ID to each struct operand_entry
|
|
so that qsort results are identical on different hosts. */
|
|
static unsigned int next_operand_entry_id;
|
|
|
|
/* Starting rank number for a given basic block, so that we can rank
|
|
operations using unmovable instructions in that BB based on the bb
|
|
depth. */
|
|
static long *bb_rank;
|
|
|
|
/* Operand->rank hashtable. */
|
|
static hash_map<tree, long> *operand_rank;
|
|
|
|
/* Vector of SSA_NAMEs on which after reassociate_bb is done with
|
|
all basic blocks the CFG should be adjusted - basic blocks
|
|
split right after that SSA_NAME's definition statement and before
|
|
the only use, which must be a bit ior. */
|
|
static vec<tree> reassoc_branch_fixups;
|
|
|
|
/* Forward decls. */
|
|
static long get_rank (tree);
|
|
static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
|
|
|
|
/* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
|
|
possibly added by gsi_remove. */
|
|
|
|
bool
|
|
reassoc_remove_stmt (gimple_stmt_iterator *gsi)
|
|
{
|
|
gimple *stmt = gsi_stmt (*gsi);
|
|
|
|
if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
|
|
return gsi_remove (gsi, true);
|
|
|
|
gimple_stmt_iterator prev = *gsi;
|
|
gsi_prev (&prev);
|
|
unsigned uid = gimple_uid (stmt);
|
|
basic_block bb = gimple_bb (stmt);
|
|
bool ret = gsi_remove (gsi, true);
|
|
if (!gsi_end_p (prev))
|
|
gsi_next (&prev);
|
|
else
|
|
prev = gsi_start_bb (bb);
|
|
gimple *end_stmt = gsi_stmt (*gsi);
|
|
while ((stmt = gsi_stmt (prev)) != end_stmt)
|
|
{
|
|
gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
|
|
gimple_set_uid (stmt, uid);
|
|
gsi_next (&prev);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/* Bias amount for loop-carried phis. We want this to be larger than
|
|
the depth of any reassociation tree we can see, but not larger than
|
|
the rank difference between two blocks. */
|
|
#define PHI_LOOP_BIAS (1 << 15)
|
|
|
|
/* Rank assigned to a phi statement. If STMT is a loop-carried phi of
|
|
an innermost loop, and the phi has only a single use which is inside
|
|
the loop, then the rank is the block rank of the loop latch plus an
|
|
extra bias for the loop-carried dependence. This causes expressions
|
|
calculated into an accumulator variable to be independent for each
|
|
iteration of the loop. If STMT is some other phi, the rank is the
|
|
block rank of its containing block. */
|
|
static long
|
|
phi_rank (gimple *stmt)
|
|
{
|
|
basic_block bb = gimple_bb (stmt);
|
|
struct loop *father = bb->loop_father;
|
|
tree res;
|
|
unsigned i;
|
|
use_operand_p use;
|
|
gimple *use_stmt;
|
|
|
|
/* We only care about real loops (those with a latch). */
|
|
if (!father->latch)
|
|
return bb_rank[bb->index];
|
|
|
|
/* Interesting phis must be in headers of innermost loops. */
|
|
if (bb != father->header
|
|
|| father->inner)
|
|
return bb_rank[bb->index];
|
|
|
|
/* Ignore virtual SSA_NAMEs. */
|
|
res = gimple_phi_result (stmt);
|
|
if (virtual_operand_p (res))
|
|
return bb_rank[bb->index];
|
|
|
|
/* The phi definition must have a single use, and that use must be
|
|
within the loop. Otherwise this isn't an accumulator pattern. */
|
|
if (!single_imm_use (res, &use, &use_stmt)
|
|
|| gimple_bb (use_stmt)->loop_father != father)
|
|
return bb_rank[bb->index];
|
|
|
|
/* Look for phi arguments from within the loop. If found, bias this phi. */
|
|
for (i = 0; i < gimple_phi_num_args (stmt); i++)
|
|
{
|
|
tree arg = gimple_phi_arg_def (stmt, i);
|
|
if (TREE_CODE (arg) == SSA_NAME
|
|
&& !SSA_NAME_IS_DEFAULT_DEF (arg))
|
|
{
|
|
gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
|
|
if (gimple_bb (def_stmt)->loop_father == father)
|
|
return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
|
|
}
|
|
}
|
|
|
|
/* Must be an uninteresting phi. */
|
|
return bb_rank[bb->index];
|
|
}
|
|
|
|
/* If EXP is an SSA_NAME defined by a PHI statement that represents a
|
|
loop-carried dependence of an innermost loop, return TRUE; else
|
|
return FALSE. */
|
|
static bool
|
|
loop_carried_phi (tree exp)
|
|
{
|
|
gimple *phi_stmt;
|
|
long block_rank;
|
|
|
|
if (TREE_CODE (exp) != SSA_NAME
|
|
|| SSA_NAME_IS_DEFAULT_DEF (exp))
|
|
return false;
|
|
|
|
phi_stmt = SSA_NAME_DEF_STMT (exp);
|
|
|
|
if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
|
|
return false;
|
|
|
|
/* Non-loop-carried phis have block rank. Loop-carried phis have
|
|
an additional bias added in. If this phi doesn't have block rank,
|
|
it's biased and should not be propagated. */
|
|
block_rank = bb_rank[gimple_bb (phi_stmt)->index];
|
|
|
|
if (phi_rank (phi_stmt) != block_rank)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/* Return the maximum of RANK and the rank that should be propagated
|
|
from expression OP. For most operands, this is just the rank of OP.
|
|
For loop-carried phis, the value is zero to avoid undoing the bias
|
|
in favor of the phi. */
|
|
static long
|
|
propagate_rank (long rank, tree op)
|
|
{
|
|
long op_rank;
|
|
|
|
if (loop_carried_phi (op))
|
|
return rank;
|
|
|
|
op_rank = get_rank (op);
|
|
|
|
return MAX (rank, op_rank);
|
|
}
|
|
|
|
/* Look up the operand rank structure for expression E. */
|
|
|
|
static inline long
|
|
find_operand_rank (tree e)
|
|
{
|
|
long *slot = operand_rank->get (e);
|
|
return slot ? *slot : -1;
|
|
}
|
|
|
|
/* Insert {E,RANK} into the operand rank hashtable. */
|
|
|
|
static inline void
|
|
insert_operand_rank (tree e, long rank)
|
|
{
|
|
gcc_assert (rank > 0);
|
|
gcc_assert (!operand_rank->put (e, rank));
|
|
}
|
|
|
|
/* Given an expression E, return the rank of the expression. */
|
|
|
|
static long
|
|
get_rank (tree e)
|
|
{
|
|
/* SSA_NAME's have the rank of the expression they are the result
|
|
of.
|
|
For globals and uninitialized values, the rank is 0.
|
|
For function arguments, use the pre-setup rank.
|
|
For PHI nodes, stores, asm statements, etc, we use the rank of
|
|
the BB.
|
|
For simple operations, the rank is the maximum rank of any of
|
|
its operands, or the bb_rank, whichever is less.
|
|
I make no claims that this is optimal, however, it gives good
|
|
results. */
|
|
|
|
/* We make an exception to the normal ranking system to break
|
|
dependences of accumulator variables in loops. Suppose we
|
|
have a simple one-block loop containing:
|
|
|
|
x_1 = phi(x_0, x_2)
|
|
b = a + x_1
|
|
c = b + d
|
|
x_2 = c + e
|
|
|
|
As shown, each iteration of the calculation into x is fully
|
|
dependent upon the iteration before it. We would prefer to
|
|
see this in the form:
|
|
|
|
x_1 = phi(x_0, x_2)
|
|
b = a + d
|
|
c = b + e
|
|
x_2 = c + x_1
|
|
|
|
If the loop is unrolled, the calculations of b and c from
|
|
different iterations can be interleaved.
|
|
|
|
To obtain this result during reassociation, we bias the rank
|
|
of the phi definition x_1 upward, when it is recognized as an
|
|
accumulator pattern. The artificial rank causes it to be
|
|
added last, providing the desired independence. */
|
|
|
|
if (TREE_CODE (e) == SSA_NAME)
|
|
{
|
|
ssa_op_iter iter;
|
|
gimple *stmt;
|
|
long rank;
|
|
tree op;
|
|
|
|
if (SSA_NAME_IS_DEFAULT_DEF (e))
|
|
return find_operand_rank (e);
|
|
|
|
stmt = SSA_NAME_DEF_STMT (e);
|
|
if (gimple_code (stmt) == GIMPLE_PHI)
|
|
return phi_rank (stmt);
|
|
|
|
if (!is_gimple_assign (stmt))
|
|
return bb_rank[gimple_bb (stmt)->index];
|
|
|
|
/* If we already have a rank for this expression, use that. */
|
|
rank = find_operand_rank (e);
|
|
if (rank != -1)
|
|
return rank;
|
|
|
|
/* Otherwise, find the maximum rank for the operands. As an
|
|
exception, remove the bias from loop-carried phis when propagating
|
|
the rank so that dependent operations are not also biased. */
|
|
/* Simply walk over all SSA uses - this takes advatage of the
|
|
fact that non-SSA operands are is_gimple_min_invariant and
|
|
thus have rank 0. */
|
|
rank = 0;
|
|
FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
|
|
rank = propagate_rank (rank, op);
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "Rank for ");
|
|
print_generic_expr (dump_file, e);
|
|
fprintf (dump_file, " is %ld\n", (rank + 1));
|
|
}
|
|
|
|
/* Note the rank in the hashtable so we don't recompute it. */
|
|
insert_operand_rank (e, (rank + 1));
|
|
return (rank + 1);
|
|
}
|
|
|
|
/* Constants, globals, etc., are rank 0 */
|
|
return 0;
|
|
}
|
|
|
|
|
|
/* We want integer ones to end up last no matter what, since they are
|
|
the ones we can do the most with. */
|
|
#define INTEGER_CONST_TYPE 1 << 4
|
|
#define FLOAT_ONE_CONST_TYPE 1 << 3
|
|
#define FLOAT_CONST_TYPE 1 << 2
|
|
#define OTHER_CONST_TYPE 1 << 1
|
|
|
|
/* Classify an invariant tree into integer, float, or other, so that
|
|
we can sort them to be near other constants of the same type. */
|
|
static inline int
|
|
constant_type (tree t)
|
|
{
|
|
if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
|
|
return INTEGER_CONST_TYPE;
|
|
else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
|
|
{
|
|
/* Sort -1.0 and 1.0 constants last, while in some cases
|
|
const_binop can't optimize some inexact operations, multiplication
|
|
by -1.0 or 1.0 can be always merged with others. */
|
|
if (real_onep (t) || real_minus_onep (t))
|
|
return FLOAT_ONE_CONST_TYPE;
|
|
return FLOAT_CONST_TYPE;
|
|
}
|
|
else
|
|
return OTHER_CONST_TYPE;
|
|
}
|
|
|
|
/* qsort comparison function to sort operand entries PA and PB by rank
|
|
so that the sorted array is ordered by rank in decreasing order. */
|
|
static int
|
|
sort_by_operand_rank (const void *pa, const void *pb)
|
|
{
|
|
const operand_entry *oea = *(const operand_entry *const *)pa;
|
|
const operand_entry *oeb = *(const operand_entry *const *)pb;
|
|
|
|
if (oeb->rank != oea->rank)
|
|
return oeb->rank > oea->rank ? 1 : -1;
|
|
|
|
/* It's nicer for optimize_expression if constants that are likely
|
|
to fold when added/multiplied/whatever are put next to each
|
|
other. Since all constants have rank 0, order them by type. */
|
|
if (oea->rank == 0)
|
|
{
|
|
if (constant_type (oeb->op) != constant_type (oea->op))
|
|
return constant_type (oea->op) - constant_type (oeb->op);
|
|
else
|
|
/* To make sorting result stable, we use unique IDs to determine
|
|
order. */
|
|
return oeb->id > oea->id ? 1 : -1;
|
|
}
|
|
|
|
if (TREE_CODE (oea->op) != SSA_NAME)
|
|
{
|
|
if (TREE_CODE (oeb->op) != SSA_NAME)
|
|
return oeb->id > oea->id ? 1 : -1;
|
|
else
|
|
return 1;
|
|
}
|
|
else if (TREE_CODE (oeb->op) != SSA_NAME)
|
|
return -1;
|
|
|
|
/* Lastly, make sure the versions that are the same go next to each
|
|
other. */
|
|
if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
|
|
{
|
|
/* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
|
|
versions of removed SSA_NAMEs, so if possible, prefer to sort
|
|
based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
|
|
See PR60418. */
|
|
gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
|
|
gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
|
|
basic_block bba = gimple_bb (stmta);
|
|
basic_block bbb = gimple_bb (stmtb);
|
|
if (bbb != bba)
|
|
{
|
|
/* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
|
|
but the other might not. */
|
|
if (!bba)
|
|
return 1;
|
|
if (!bbb)
|
|
return -1;
|
|
/* If neither is, compare bb_rank. */
|
|
if (bb_rank[bbb->index] != bb_rank[bba->index])
|
|
return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
|
|
}
|
|
|
|
bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
|
|
bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
|
|
if (da != db)
|
|
return da ? 1 : -1;
|
|
|
|
return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
|
|
}
|
|
|
|
return oeb->id > oea->id ? 1 : -1;
|
|
}
|
|
|
|
/* Add an operand entry to *OPS for the tree operand OP. */
|
|
|
|
static void
|
|
add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
|
|
{
|
|
operand_entry *oe = operand_entry_pool.allocate ();
|
|
|
|
oe->op = op;
|
|
oe->rank = get_rank (op);
|
|
oe->id = next_operand_entry_id++;
|
|
oe->count = 1;
|
|
oe->stmt_to_insert = stmt_to_insert;
|
|
ops->safe_push (oe);
|
|
}
|
|
|
|
/* Add an operand entry to *OPS for the tree operand OP with repeat
|
|
count REPEAT. */
|
|
|
|
static void
|
|
add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
|
|
HOST_WIDE_INT repeat)
|
|
{
|
|
operand_entry *oe = operand_entry_pool.allocate ();
|
|
|
|
oe->op = op;
|
|
oe->rank = get_rank (op);
|
|
oe->id = next_operand_entry_id++;
|
|
oe->count = repeat;
|
|
oe->stmt_to_insert = NULL;
|
|
ops->safe_push (oe);
|
|
|
|
reassociate_stats.pows_encountered++;
|
|
}
|
|
|
|
/* Return true if STMT is reassociable operation containing a binary
|
|
operation with tree code CODE, and is inside LOOP. */
|
|
|
|
static bool
|
|
is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
|
|
{
|
|
basic_block bb = gimple_bb (stmt);
|
|
|
|
if (gimple_bb (stmt) == NULL)
|
|
return false;
|
|
|
|
if (!flow_bb_inside_loop_p (loop, bb))
|
|
return false;
|
|
|
|
if (is_gimple_assign (stmt)
|
|
&& gimple_assign_rhs_code (stmt) == code
|
|
&& has_single_use (gimple_assign_lhs (stmt)))
|
|
{
|
|
tree rhs1 = gimple_assign_rhs1 (stmt);
|
|
tree rhs2 = gimple_assign_rhs1 (stmt);
|
|
if (TREE_CODE (rhs1) == SSA_NAME
|
|
&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
|
|
return false;
|
|
if (rhs2
|
|
&& TREE_CODE (rhs2) == SSA_NAME
|
|
&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
/* Return true if STMT is a nop-conversion. */
|
|
|
|
static bool
|
|
gimple_nop_conversion_p (gimple *stmt)
|
|
{
|
|
if (gassign *ass = dyn_cast <gassign *> (stmt))
|
|
{
|
|
if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
|
|
&& tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
|
|
TREE_TYPE (gimple_assign_rhs1 (ass))))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/* Given NAME, if NAME is defined by a unary operation OPCODE, return the
|
|
operand of the negate operation. Otherwise, return NULL. */
|
|
|
|
static tree
|
|
get_unary_op (tree name, enum tree_code opcode)
|
|
{
|
|
gimple *stmt = SSA_NAME_DEF_STMT (name);
|
|
|
|
/* Look through nop conversions (sign changes). */
|
|
if (gimple_nop_conversion_p (stmt)
|
|
&& TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
|
|
stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
|
|
|
|
if (!is_gimple_assign (stmt))
|
|
return NULL_TREE;
|
|
|
|
if (gimple_assign_rhs_code (stmt) == opcode)
|
|
return gimple_assign_rhs1 (stmt);
|
|
return NULL_TREE;
|
|
}
|
|
|
|
/* Return true if OP1 and OP2 have the same value if casted to either type. */
|
|
|
|
static bool
|
|
ops_equal_values_p (tree op1, tree op2)
|
|
{
|
|
if (op1 == op2)
|
|
return true;
|
|
|
|
tree orig_op1 = op1;
|
|
if (TREE_CODE (op1) == SSA_NAME)
|
|
{
|
|
gimple *stmt = SSA_NAME_DEF_STMT (op1);
|
|
if (gimple_nop_conversion_p (stmt))
|
|
{
|
|
op1 = gimple_assign_rhs1 (stmt);
|
|
if (op1 == op2)
|
|
return true;
|
|
}
|
|
}
|
|
|
|
if (TREE_CODE (op2) == SSA_NAME)
|
|
{
|
|
gimple *stmt = SSA_NAME_DEF_STMT (op2);
|
|
if (gimple_nop_conversion_p (stmt))
|
|
{
|
|
op2 = gimple_assign_rhs1 (stmt);
|
|
if (op1 == op2
|
|
|| orig_op1 == op2)
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
/* If CURR and LAST are a pair of ops that OPCODE allows us to
|
|
eliminate through equivalences, do so, remove them from OPS, and
|
|
return true. Otherwise, return false. */
|
|
|
|
static bool
|
|
eliminate_duplicate_pair (enum tree_code opcode,
|
|
vec<operand_entry *> *ops,
|
|
bool *all_done,
|
|
unsigned int i,
|
|
operand_entry *curr,
|
|
operand_entry *last)
|
|
{
|
|
|
|
/* If we have two of the same op, and the opcode is & |, min, or max,
|
|
we can eliminate one of them.
|
|
If we have two of the same op, and the opcode is ^, we can
|
|
eliminate both of them. */
|
|
|
|
if (last && last->op == curr->op)
|
|
{
|
|
switch (opcode)
|
|
{
|
|
case MAX_EXPR:
|
|
case MIN_EXPR:
|
|
case BIT_IOR_EXPR:
|
|
case BIT_AND_EXPR:
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "Equivalence: ");
|
|
print_generic_expr (dump_file, curr->op);
|
|
fprintf (dump_file, " [&|minmax] ");
|
|
print_generic_expr (dump_file, last->op);
|
|
fprintf (dump_file, " -> ");
|
|
print_generic_stmt (dump_file, last->op);
|
|
}
|
|
|
|
ops->ordered_remove (i);
|
|
reassociate_stats.ops_eliminated ++;
|
|
|
|
return true;
|
|
|
|
case BIT_XOR_EXPR:
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "Equivalence: ");
|
|
print_generic_expr (dump_file, curr->op);
|
|
fprintf (dump_file, " ^ ");
|
|
print_generic_expr (dump_file, last->op);
|
|
fprintf (dump_file, " -> nothing\n");
|
|
}
|
|
|
|
reassociate_stats.ops_eliminated += 2;
|
|
|
|
if (ops->length () == 2)
|
|
{
|
|
ops->truncate (0);
|
|
add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
|
|
*all_done = true;
|
|
}
|
|
else
|
|
{
|
|
ops->ordered_remove (i-1);
|
|
ops->ordered_remove (i-1);
|
|
}
|
|
|
|
return true;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static vec<tree> plus_negates;
|
|
|
|
/* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
|
|
expression, look in OPS for a corresponding positive operation to cancel
|
|
it out. If we find one, remove the other from OPS, replace
|
|
OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
|
|
return false. */
|
|
|
|
static bool
|
|
eliminate_plus_minus_pair (enum tree_code opcode,
|
|
vec<operand_entry *> *ops,
|
|
unsigned int currindex,
|
|
operand_entry *curr)
|
|
{
|
|
tree negateop;
|
|
tree notop;
|
|
unsigned int i;
|
|
operand_entry *oe;
|
|
|
|
if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
|
|
return false;
|
|
|
|
negateop = get_unary_op (curr->op, NEGATE_EXPR);
|
|
notop = get_unary_op (curr->op, BIT_NOT_EXPR);
|
|
if (negateop == NULL_TREE && notop == NULL_TREE)
|
|
return false;
|
|
|
|
/* Any non-negated version will have a rank that is one less than
|
|
the current rank. So once we hit those ranks, if we don't find
|
|
one, we can stop. */
|
|
|
|
for (i = currindex + 1;
|
|
ops->iterate (i, &oe)
|
|
&& oe->rank >= curr->rank - 1 ;
|
|
i++)
|
|
{
|
|
if (negateop
|
|
&& ops_equal_values_p (oe->op, negateop))
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "Equivalence: ");
|
|
print_generic_expr (dump_file, negateop);
|
|
fprintf (dump_file, " + -");
|
|
print_generic_expr (dump_file, oe->op);
|
|
fprintf (dump_file, " -> 0\n");
|
|
}
|
|
|
|
ops->ordered_remove (i);
|
|
add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
|
|
ops->ordered_remove (currindex);
|
|
reassociate_stats.ops_eliminated ++;
|
|
|
|
return true;
|
|
}
|
|
else if (notop
|
|
&& ops_equal_values_p (oe->op, notop))
|
|
{
|
|
tree op_type = TREE_TYPE (oe->op);
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "Equivalence: ");
|
|
print_generic_expr (dump_file, notop);
|
|
fprintf (dump_file, " + ~");
|
|
print_generic_expr (dump_file, oe->op);
|
|
fprintf (dump_file, " -> -1\n");
|
|
}
|
|
|
|
ops->ordered_remove (i);
|
|
add_to_ops_vec (ops, build_all_ones_cst (op_type));
|
|
ops->ordered_remove (currindex);
|
|
reassociate_stats.ops_eliminated ++;
|
|
|
|
return true;
|
|
}
|
|
}
|
|
|
|
/* If CURR->OP is a negate expr without nop conversion in a plus expr:
|
|
save it for later inspection in repropagate_negates(). */
|
|
if (negateop != NULL_TREE
|
|
&& gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
|
|
plus_negates.safe_push (curr->op);
|
|
|
|
return false;
|
|
}
|
|
|
|
/* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
|
|
bitwise not expression, look in OPS for a corresponding operand to
|
|
cancel it out. If we find one, remove the other from OPS, replace
|
|
OPS[CURRINDEX] with 0, and return true. Otherwise, return
|
|
false. */
|
|
|
|
static bool
|
|
eliminate_not_pairs (enum tree_code opcode,
|
|
vec<operand_entry *> *ops,
|
|
unsigned int currindex,
|
|
operand_entry *curr)
|
|
{
|
|
tree notop;
|
|
unsigned int i;
|
|
operand_entry *oe;
|
|
|
|
if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
|
|
|| TREE_CODE (curr->op) != SSA_NAME)
|
|
return false;
|
|
|
|
notop = get_unary_op (curr->op, BIT_NOT_EXPR);
|
|
if (notop == NULL_TREE)
|
|
return false;
|
|
|
|
/* Any non-not version will have a rank that is one less than
|
|
the current rank. So once we hit those ranks, if we don't find
|
|
one, we can stop. */
|
|
|
|
for (i = currindex + 1;
|
|
ops->iterate (i, &oe)
|
|
&& oe->rank >= curr->rank - 1;
|
|
i++)
|
|
{
|
|
if (oe->op == notop)
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "Equivalence: ");
|
|
print_generic_expr (dump_file, notop);
|
|
if (opcode == BIT_AND_EXPR)
|
|
fprintf (dump_file, " & ~");
|
|
else if (opcode == BIT_IOR_EXPR)
|
|
fprintf (dump_file, " | ~");
|
|
print_generic_expr (dump_file, oe->op);
|
|
if (opcode == BIT_AND_EXPR)
|
|
fprintf (dump_file, " -> 0\n");
|
|
else if (opcode == BIT_IOR_EXPR)
|
|
fprintf (dump_file, " -> -1\n");
|
|
}
|
|
|
|
if (opcode == BIT_AND_EXPR)
|
|
oe->op = build_zero_cst (TREE_TYPE (oe->op));
|
|
else if (opcode == BIT_IOR_EXPR)
|
|
oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
|
|
|
|
reassociate_stats.ops_eliminated += ops->length () - 1;
|
|
ops->truncate (0);
|
|
ops->quick_push (oe);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/* Use constant value that may be present in OPS to try to eliminate
|
|
operands. Note that this function is only really used when we've
|
|
eliminated ops for other reasons, or merged constants. Across
|
|
single statements, fold already does all of this, plus more. There
|
|
is little point in duplicating logic, so I've only included the
|
|
identities that I could ever construct testcases to trigger. */
|
|
|
|
static void
|
|
eliminate_using_constants (enum tree_code opcode,
|
|
vec<operand_entry *> *ops)
|
|
{
|
|
operand_entry *oelast = ops->last ();
|
|
tree type = TREE_TYPE (oelast->op);
|
|
|
|
if (oelast->rank == 0
|
|
&& (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
|
|
{
|
|
switch (opcode)
|
|
{
|
|
case BIT_AND_EXPR:
|
|
if (integer_zerop (oelast->op))
|
|
{
|
|
if (ops->length () != 1)
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file, "Found & 0, removing all other ops\n");
|
|
|
|
reassociate_stats.ops_eliminated += ops->length () - 1;
|
|
|
|
ops->truncate (0);
|
|
ops->quick_push (oelast);
|
|
return;
|
|
}
|
|
}
|
|
else if (integer_all_onesp (oelast->op))
|
|
{
|
|
if (ops->length () != 1)
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file, "Found & -1, removing\n");
|
|
ops->pop ();
|
|
reassociate_stats.ops_eliminated++;
|
|
}
|
|
}
|
|
break;
|
|
case BIT_IOR_EXPR:
|
|
if (integer_all_onesp (oelast->op))
|
|
{
|
|
if (ops->length () != 1)
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file, "Found | -1, removing all other ops\n");
|
|
|
|
reassociate_stats.ops_eliminated += ops->length () - 1;
|
|
|
|
ops->truncate (0);
|
|
ops->quick_push (oelast);
|
|
return;
|
|
}
|
|
}
|
|
else if (integer_zerop (oelast->op))
|
|
{
|
|
if (ops->length () != 1)
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file, "Found | 0, removing\n");
|
|
ops->pop ();
|
|
reassociate_stats.ops_eliminated++;
|
|
}
|
|
}
|
|
break;
|
|
case MULT_EXPR:
|
|
if (integer_zerop (oelast->op)
|
|
|| (FLOAT_TYPE_P (type)
|
|
&& !HONOR_NANS (type)
|
|
&& !HONOR_SIGNED_ZEROS (type)
|
|
&& real_zerop (oelast->op)))
|
|
{
|
|
if (ops->length () != 1)
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file, "Found * 0, removing all other ops\n");
|
|
|
|
reassociate_stats.ops_eliminated += ops->length () - 1;
|
|
ops->truncate (1);
|
|
ops->quick_push (oelast);
|
|
return;
|
|
}
|
|
}
|
|
else if (integer_onep (oelast->op)
|
|
|| (FLOAT_TYPE_P (type)
|
|
&& !HONOR_SNANS (type)
|
|
&& real_onep (oelast->op)))
|
|
{
|
|
if (ops->length () != 1)
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file, "Found * 1, removing\n");
|
|
ops->pop ();
|
|
reassociate_stats.ops_eliminated++;
|
|
return;
|
|
}
|
|
}
|
|
break;
|
|
case BIT_XOR_EXPR:
|
|
case PLUS_EXPR:
|
|
case MINUS_EXPR:
|
|
if (integer_zerop (oelast->op)
|
|
|| (FLOAT_TYPE_P (type)
|
|
&& (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
|
|
&& fold_real_zero_addition_p (type, oelast->op,
|
|
opcode == MINUS_EXPR)))
|
|
{
|
|
if (ops->length () != 1)
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file, "Found [|^+] 0, removing\n");
|
|
ops->pop ();
|
|
reassociate_stats.ops_eliminated++;
|
|
return;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
|
|
bool, bool);
|
|
|
|
/* Structure for tracking and counting operands. */
|
|
struct oecount {
|
|
unsigned int cnt;
|
|
unsigned int id;
|
|
enum tree_code oecode;
|
|
tree op;
|
|
};
|
|
|
|
|
|
/* The heap for the oecount hashtable and the sorted list of operands. */
|
|
static vec<oecount> cvec;
|
|
|
|
|
|
/* Oecount hashtable helpers. */
|
|
|
|
struct oecount_hasher : int_hash <int, 0, 1>
|
|
{
|
|
static inline hashval_t hash (int);
|
|
static inline bool equal (int, int);
|
|
};
|
|
|
|
/* Hash function for oecount. */
|
|
|
|
inline hashval_t
|
|
oecount_hasher::hash (int p)
|
|
{
|
|
const oecount *c = &cvec[p - 42];
|
|
return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
|
|
}
|
|
|
|
/* Comparison function for oecount. */
|
|
|
|
inline bool
|
|
oecount_hasher::equal (int p1, int p2)
|
|
{
|
|
const oecount *c1 = &cvec[p1 - 42];
|
|
const oecount *c2 = &cvec[p2 - 42];
|
|
return c1->oecode == c2->oecode && c1->op == c2->op;
|
|
}
|
|
|
|
/* Comparison function for qsort sorting oecount elements by count. */
|
|
|
|
static int
|
|
oecount_cmp (const void *p1, const void *p2)
|
|
{
|
|
const oecount *c1 = (const oecount *)p1;
|
|
const oecount *c2 = (const oecount *)p2;
|
|
if (c1->cnt != c2->cnt)
|
|
return c1->cnt > c2->cnt ? 1 : -1;
|
|
else
|
|
/* If counts are identical, use unique IDs to stabilize qsort. */
|
|
return c1->id > c2->id ? 1 : -1;
|
|
}
|
|
|
|
/* Return TRUE iff STMT represents a builtin call that raises OP
|
|
to some exponent. */
|
|
|
|
static bool
|
|
stmt_is_power_of_op (gimple *stmt, tree op)
|
|
{
|
|
if (!is_gimple_call (stmt))
|
|
return false;
|
|
|
|
switch (gimple_call_combined_fn (stmt))
|
|
{
|
|
CASE_CFN_POW:
|
|
CASE_CFN_POWI:
|
|
return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/* Given STMT which is a __builtin_pow* call, decrement its exponent
|
|
in place and return the result. Assumes that stmt_is_power_of_op
|
|
was previously called for STMT and returned TRUE. */
|
|
|
|
static HOST_WIDE_INT
|
|
decrement_power (gimple *stmt)
|
|
{
|
|
REAL_VALUE_TYPE c, cint;
|
|
HOST_WIDE_INT power;
|
|
tree arg1;
|
|
|
|
switch (gimple_call_combined_fn (stmt))
|
|
{
|
|
CASE_CFN_POW:
|
|
arg1 = gimple_call_arg (stmt, 1);
|
|
c = TREE_REAL_CST (arg1);
|
|
power = real_to_integer (&c) - 1;
|
|
real_from_integer (&cint, VOIDmode, power, SIGNED);
|
|
gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
|
|
return power;
|
|
|
|
CASE_CFN_POWI:
|
|
arg1 = gimple_call_arg (stmt, 1);
|
|
power = TREE_INT_CST_LOW (arg1) - 1;
|
|
gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
|
|
return power;
|
|
|
|
default:
|
|
gcc_unreachable ();
|
|
}
|
|
}
|
|
|
|
/* Replace SSA defined by STMT and replace all its uses with new
|
|
SSA. Also return the new SSA. */
|
|
|
|
static tree
|
|
make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
|
|
{
|
|
gimple *use_stmt;
|
|
use_operand_p use;
|
|
imm_use_iterator iter;
|
|
tree new_lhs, new_debug_lhs = NULL_TREE;
|
|
tree lhs = gimple_get_lhs (stmt);
|
|
|
|
new_lhs = make_ssa_name (TREE_TYPE (lhs));
|
|
gimple_set_lhs (stmt, new_lhs);
|
|
|
|
/* Also need to update GIMPLE_DEBUGs. */
|
|
FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
|
|
{
|
|
tree repl = new_lhs;
|
|
if (is_gimple_debug (use_stmt))
|
|
{
|
|
if (new_debug_lhs == NULL_TREE)
|
|
{
|
|
new_debug_lhs = make_node (DEBUG_EXPR_DECL);
|
|
gdebug *def_temp
|
|
= gimple_build_debug_bind (new_debug_lhs,
|
|
build2 (opcode, TREE_TYPE (lhs),
|
|
new_lhs, op),
|
|
stmt);
|
|
DECL_ARTIFICIAL (new_debug_lhs) = 1;
|
|
TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
|
|
SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
|
|
gimple_set_uid (def_temp, gimple_uid (stmt));
|
|
gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
|
|
gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
|
|
}
|
|
repl = new_debug_lhs;
|
|
}
|
|
FOR_EACH_IMM_USE_ON_STMT (use, iter)
|
|
SET_USE (use, repl);
|
|
update_stmt (use_stmt);
|
|
}
|
|
return new_lhs;
|
|
}
|
|
|
|
/* Replace all SSAs defined in STMTS_TO_FIX and replace its
|
|
uses with new SSAs. Also do this for the stmt that defines DEF
|
|
if *DEF is not OP. */
|
|
|
|
static void
|
|
make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
|
|
vec<gimple *> &stmts_to_fix)
|
|
{
|
|
unsigned i;
|
|
gimple *stmt;
|
|
|
|
if (*def != op
|
|
&& TREE_CODE (*def) == SSA_NAME
|
|
&& (stmt = SSA_NAME_DEF_STMT (*def))
|
|
&& gimple_code (stmt) != GIMPLE_NOP)
|
|
*def = make_new_ssa_for_def (stmt, opcode, op);
|
|
|
|
FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
|
|
make_new_ssa_for_def (stmt, opcode, op);
|
|
}
|
|
|
|
/* Find the single immediate use of STMT's LHS, and replace it
|
|
with OP. Remove STMT. If STMT's LHS is the same as *DEF,
|
|
replace *DEF with OP as well. */
|
|
|
|
static void
|
|
propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
|
|
{
|
|
tree lhs;
|
|
gimple *use_stmt;
|
|
use_operand_p use;
|
|
gimple_stmt_iterator gsi;
|
|
|
|
if (is_gimple_call (stmt))
|
|
lhs = gimple_call_lhs (stmt);
|
|
else
|
|
lhs = gimple_assign_lhs (stmt);
|
|
|
|
gcc_assert (has_single_use (lhs));
|
|
single_imm_use (lhs, &use, &use_stmt);
|
|
if (lhs == *def)
|
|
*def = op;
|
|
SET_USE (use, op);
|
|
if (TREE_CODE (op) != SSA_NAME)
|
|
update_stmt (use_stmt);
|
|
gsi = gsi_for_stmt (stmt);
|
|
unlink_stmt_vdef (stmt);
|
|
reassoc_remove_stmt (&gsi);
|
|
release_defs (stmt);
|
|
}
|
|
|
|
/* Walks the linear chain with result *DEF searching for an operation
|
|
with operand OP and code OPCODE removing that from the chain. *DEF
|
|
is updated if there is only one operand but no operation left. */
|
|
|
|
static void
|
|
zero_one_operation (tree *def, enum tree_code opcode, tree op)
|
|
{
|
|
tree orig_def = *def;
|
|
gimple *stmt = SSA_NAME_DEF_STMT (*def);
|
|
/* PR72835 - Record the stmt chain that has to be updated such that
|
|
we dont use the same LHS when the values computed are different. */
|
|
auto_vec<gimple *, 64> stmts_to_fix;
|
|
|
|
do
|
|
{
|
|
tree name;
|
|
|
|
if (opcode == MULT_EXPR)
|
|
{
|
|
if (stmt_is_power_of_op (stmt, op))
|
|
{
|
|
if (decrement_power (stmt) == 1)
|
|
{
|
|
if (stmts_to_fix.length () > 0)
|
|
stmts_to_fix.pop ();
|
|
propagate_op_to_single_use (op, stmt, def);
|
|
}
|
|
break;
|
|
}
|
|
else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
|
|
{
|
|
if (gimple_assign_rhs1 (stmt) == op)
|
|
{
|
|
tree cst = build_minus_one_cst (TREE_TYPE (op));
|
|
if (stmts_to_fix.length () > 0)
|
|
stmts_to_fix.pop ();
|
|
propagate_op_to_single_use (cst, stmt, def);
|
|
break;
|
|
}
|
|
else if (integer_minus_onep (op)
|
|
|| real_minus_onep (op))
|
|
{
|
|
gimple_assign_set_rhs_code
|
|
(stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
name = gimple_assign_rhs1 (stmt);
|
|
|
|
/* If this is the operation we look for and one of the operands
|
|
is ours simply propagate the other operand into the stmts
|
|
single use. */
|
|
if (gimple_assign_rhs_code (stmt) == opcode
|
|
&& (name == op
|
|
|| gimple_assign_rhs2 (stmt) == op))
|
|
{
|
|
if (name == op)
|
|
name = gimple_assign_rhs2 (stmt);
|
|
if (stmts_to_fix.length () > 0)
|
|
stmts_to_fix.pop ();
|
|
propagate_op_to_single_use (name, stmt, def);
|
|
break;
|
|
}
|
|
|
|
/* We might have a multiply of two __builtin_pow* calls, and
|
|
the operand might be hiding in the rightmost one. Likewise
|
|
this can happen for a negate. */
|
|
if (opcode == MULT_EXPR
|
|
&& gimple_assign_rhs_code (stmt) == opcode
|
|
&& TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
|
|
&& has_single_use (gimple_assign_rhs2 (stmt)))
|
|
{
|
|
gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
|
|
if (stmt_is_power_of_op (stmt2, op))
|
|
{
|
|
if (decrement_power (stmt2) == 1)
|
|
propagate_op_to_single_use (op, stmt2, def);
|
|
else
|
|
stmts_to_fix.safe_push (stmt2);
|
|
break;
|
|
}
|
|
else if (is_gimple_assign (stmt2)
|
|
&& gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
|
|
{
|
|
if (gimple_assign_rhs1 (stmt2) == op)
|
|
{
|
|
tree cst = build_minus_one_cst (TREE_TYPE (op));
|
|
propagate_op_to_single_use (cst, stmt2, def);
|
|
break;
|
|
}
|
|
else if (integer_minus_onep (op)
|
|
|| real_minus_onep (op))
|
|
{
|
|
stmts_to_fix.safe_push (stmt2);
|
|
gimple_assign_set_rhs_code
|
|
(stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Continue walking the chain. */
|
|
gcc_assert (name != op
|
|
&& TREE_CODE (name) == SSA_NAME);
|
|
stmt = SSA_NAME_DEF_STMT (name);
|
|
stmts_to_fix.safe_push (stmt);
|
|
}
|
|
while (1);
|
|
|
|
if (stmts_to_fix.length () > 0 || *def == orig_def)
|
|
make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
|
|
}
|
|
|
|
/* Returns true if statement S1 dominates statement S2. Like
|
|
stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
|
|
|
|
static bool
|
|
reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
|
|
{
|
|
basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
|
|
|
|
/* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
|
|
SSA_NAME. Assume it lives at the beginning of function and
|
|
thus dominates everything. */
|
|
if (!bb1 || s1 == s2)
|
|
return true;
|
|
|
|
/* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
|
|
if (!bb2)
|
|
return false;
|
|
|
|
if (bb1 == bb2)
|
|
{
|
|
/* PHIs in the same basic block are assumed to be
|
|
executed all in parallel, if only one stmt is a PHI,
|
|
it dominates the other stmt in the same basic block. */
|
|
if (gimple_code (s1) == GIMPLE_PHI)
|
|
return true;
|
|
|
|
if (gimple_code (s2) == GIMPLE_PHI)
|
|
return false;
|
|
|
|
gcc_assert (gimple_uid (s1) && gimple_uid (s2));
|
|
|
|
if (gimple_uid (s1) < gimple_uid (s2))
|
|
return true;
|
|
|
|
if (gimple_uid (s1) > gimple_uid (s2))
|
|
return false;
|
|
|
|
gimple_stmt_iterator gsi = gsi_for_stmt (s1);
|
|
unsigned int uid = gimple_uid (s1);
|
|
for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
{
|
|
gimple *s = gsi_stmt (gsi);
|
|
if (gimple_uid (s) != uid)
|
|
break;
|
|
if (s == s2)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
|
|
}
|
|
|
|
/* Insert STMT after INSERT_POINT. */
|
|
|
|
static void
|
|
insert_stmt_after (gimple *stmt, gimple *insert_point)
|
|
{
|
|
gimple_stmt_iterator gsi;
|
|
basic_block bb;
|
|
|
|
if (gimple_code (insert_point) == GIMPLE_PHI)
|
|
bb = gimple_bb (insert_point);
|
|
else if (!stmt_ends_bb_p (insert_point))
|
|
{
|
|
gsi = gsi_for_stmt (insert_point);
|
|
gimple_set_uid (stmt, gimple_uid (insert_point));
|
|
gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
|
|
return;
|
|
}
|
|
else
|
|
/* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
|
|
thus if it must end a basic block, it should be a call that can
|
|
throw, or some assignment that can throw. If it throws, the LHS
|
|
of it will not be initialized though, so only valid places using
|
|
the SSA_NAME should be dominated by the fallthru edge. */
|
|
bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
|
|
gsi = gsi_after_labels (bb);
|
|
if (gsi_end_p (gsi))
|
|
{
|
|
gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
|
|
gimple_set_uid (stmt,
|
|
gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
|
|
}
|
|
else
|
|
gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
|
|
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
|
|
}
|
|
|
|
/* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
|
|
the result. Places the statement after the definition of either
|
|
OP1 or OP2. Returns the new statement. */
|
|
|
|
static gimple *
|
|
build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
|
|
{
|
|
gimple *op1def = NULL, *op2def = NULL;
|
|
gimple_stmt_iterator gsi;
|
|
tree op;
|
|
gassign *sum;
|
|
|
|
/* Create the addition statement. */
|
|
op = make_ssa_name (type);
|
|
sum = gimple_build_assign (op, opcode, op1, op2);
|
|
|
|
/* Find an insertion place and insert. */
|
|
if (TREE_CODE (op1) == SSA_NAME)
|
|
op1def = SSA_NAME_DEF_STMT (op1);
|
|
if (TREE_CODE (op2) == SSA_NAME)
|
|
op2def = SSA_NAME_DEF_STMT (op2);
|
|
if ((!op1def || gimple_nop_p (op1def))
|
|
&& (!op2def || gimple_nop_p (op2def)))
|
|
{
|
|
gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
|
|
if (gsi_end_p (gsi))
|
|
{
|
|
gimple_stmt_iterator gsi2
|
|
= gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
|
|
gimple_set_uid (sum,
|
|
gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
|
|
}
|
|
else
|
|
gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
|
|
gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
|
|
}
|
|
else
|
|
{
|
|
gimple *insert_point;
|
|
if ((!op1def || gimple_nop_p (op1def))
|
|
|| (op2def && !gimple_nop_p (op2def)
|
|
&& reassoc_stmt_dominates_stmt_p (op1def, op2def)))
|
|
insert_point = op2def;
|
|
else
|
|
insert_point = op1def;
|
|
insert_stmt_after (sum, insert_point);
|
|
}
|
|
update_stmt (sum);
|
|
|
|
return sum;
|
|
}
|
|
|
|
/* Perform un-distribution of divisions and multiplications.
|
|
A * X + B * X is transformed into (A + B) * X and A / X + B / X
|
|
to (A + B) / X for real X.
|
|
|
|
The algorithm is organized as follows.
|
|
|
|
- First we walk the addition chain *OPS looking for summands that
|
|
are defined by a multiplication or a real division. This results
|
|
in the candidates bitmap with relevant indices into *OPS.
|
|
|
|
- Second we build the chains of multiplications or divisions for
|
|
these candidates, counting the number of occurrences of (operand, code)
|
|
pairs in all of the candidates chains.
|
|
|
|
- Third we sort the (operand, code) pairs by number of occurrence and
|
|
process them starting with the pair with the most uses.
|
|
|
|
* For each such pair we walk the candidates again to build a
|
|
second candidate bitmap noting all multiplication/division chains
|
|
that have at least one occurrence of (operand, code).
|
|
|
|
* We build an alternate addition chain only covering these
|
|
candidates with one (operand, code) operation removed from their
|
|
multiplication/division chain.
|
|
|
|
* The first candidate gets replaced by the alternate addition chain
|
|
multiplied/divided by the operand.
|
|
|
|
* All candidate chains get disabled for further processing and
|
|
processing of (operand, code) pairs continues.
|
|
|
|
The alternate addition chains built are re-processed by the main
|
|
reassociation algorithm which allows optimizing a * x * y + b * y * x
|
|
to (a + b ) * x * y in one invocation of the reassociation pass. */
|
|
|
|
static bool
|
|
undistribute_ops_list (enum tree_code opcode,
|
|
vec<operand_entry *> *ops, struct loop *loop)
|
|
{
|
|
unsigned int length = ops->length ();
|
|
operand_entry *oe1;
|
|
unsigned i, j;
|
|
unsigned nr_candidates, nr_candidates2;
|
|
sbitmap_iterator sbi0;
|
|
vec<operand_entry *> *subops;
|
|
bool changed = false;
|
|
unsigned int next_oecount_id = 0;
|
|
|
|
if (length <= 1
|
|
|| opcode != PLUS_EXPR)
|
|
return false;
|
|
|
|
/* Build a list of candidates to process. */
|
|
auto_sbitmap candidates (length);
|
|
bitmap_clear (candidates);
|
|
nr_candidates = 0;
|
|
FOR_EACH_VEC_ELT (*ops, i, oe1)
|
|
{
|
|
enum tree_code dcode;
|
|
gimple *oe1def;
|
|
|
|
if (TREE_CODE (oe1->op) != SSA_NAME)
|
|
continue;
|
|
oe1def = SSA_NAME_DEF_STMT (oe1->op);
|
|
if (!is_gimple_assign (oe1def))
|
|
continue;
|
|
dcode = gimple_assign_rhs_code (oe1def);
|
|
if ((dcode != MULT_EXPR
|
|
&& dcode != RDIV_EXPR)
|
|
|| !is_reassociable_op (oe1def, dcode, loop))
|
|
continue;
|
|
|
|
bitmap_set_bit (candidates, i);
|
|
nr_candidates++;
|
|
}
|
|
|
|
if (nr_candidates < 2)
|
|
return false;
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "searching for un-distribute opportunities ");
|
|
print_generic_expr (dump_file,
|
|
(*ops)[bitmap_first_set_bit (candidates)]->op, 0);
|
|
fprintf (dump_file, " %d\n", nr_candidates);
|
|
}
|
|
|
|
/* Build linearized sub-operand lists and the counting table. */
|
|
cvec.create (0);
|
|
|
|
hash_table<oecount_hasher> ctable (15);
|
|
|
|
/* ??? Macro arguments cannot have multi-argument template types in
|
|
them. This typedef is needed to workaround that limitation. */
|
|
typedef vec<operand_entry *> vec_operand_entry_t_heap;
|
|
subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
|
|
EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
|
|
{
|
|
gimple *oedef;
|
|
enum tree_code oecode;
|
|
unsigned j;
|
|
|
|
oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
|
|
oecode = gimple_assign_rhs_code (oedef);
|
|
linearize_expr_tree (&subops[i], oedef,
|
|
associative_tree_code (oecode), false);
|
|
|
|
FOR_EACH_VEC_ELT (subops[i], j, oe1)
|
|
{
|
|
oecount c;
|
|
int *slot;
|
|
int idx;
|
|
c.oecode = oecode;
|
|
c.cnt = 1;
|
|
c.id = next_oecount_id++;
|
|
c.op = oe1->op;
|
|
cvec.safe_push (c);
|
|
idx = cvec.length () + 41;
|
|
slot = ctable.find_slot (idx, INSERT);
|
|
if (!*slot)
|
|
{
|
|
*slot = idx;
|
|
}
|
|
else
|
|
{
|
|
cvec.pop ();
|
|
cvec[*slot - 42].cnt++;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Sort the counting table. */
|
|
cvec.qsort (oecount_cmp);
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
oecount *c;
|
|
fprintf (dump_file, "Candidates:\n");
|
|
FOR_EACH_VEC_ELT (cvec, j, c)
|
|
{
|
|
fprintf (dump_file, " %u %s: ", c->cnt,
|
|
c->oecode == MULT_EXPR
|
|
? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
|
|
print_generic_expr (dump_file, c->op);
|
|
fprintf (dump_file, "\n");
|
|
}
|
|
}
|
|
|
|
/* Process the (operand, code) pairs in order of most occurrence. */
|
|
auto_sbitmap candidates2 (length);
|
|
while (!cvec.is_empty ())
|
|
{
|
|
oecount *c = &cvec.last ();
|
|
if (c->cnt < 2)
|
|
break;
|
|
|
|
/* Now collect the operands in the outer chain that contain
|
|
the common operand in their inner chain. */
|
|
bitmap_clear (candidates2);
|
|
nr_candidates2 = 0;
|
|
EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
|
|
{
|
|
gimple *oedef;
|
|
enum tree_code oecode;
|
|
unsigned j;
|
|
tree op = (*ops)[i]->op;
|
|
|
|
/* If we undistributed in this chain already this may be
|
|
a constant. */
|
|
if (TREE_CODE (op) != SSA_NAME)
|
|
continue;
|
|
|
|
oedef = SSA_NAME_DEF_STMT (op);
|
|
oecode = gimple_assign_rhs_code (oedef);
|
|
if (oecode != c->oecode)
|
|
continue;
|
|
|
|
FOR_EACH_VEC_ELT (subops[i], j, oe1)
|
|
{
|
|
if (oe1->op == c->op)
|
|
{
|
|
bitmap_set_bit (candidates2, i);
|
|
++nr_candidates2;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (nr_candidates2 >= 2)
|
|
{
|
|
operand_entry *oe1, *oe2;
|
|
gimple *prod;
|
|
int first = bitmap_first_set_bit (candidates2);
|
|
|
|
/* Build the new addition chain. */
|
|
oe1 = (*ops)[first];
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "Building (");
|
|
print_generic_expr (dump_file, oe1->op);
|
|
}
|
|
zero_one_operation (&oe1->op, c->oecode, c->op);
|
|
EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
|
|
{
|
|
gimple *sum;
|
|
oe2 = (*ops)[i];
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, " + ");
|
|
print_generic_expr (dump_file, oe2->op);
|
|
}
|
|
zero_one_operation (&oe2->op, c->oecode, c->op);
|
|
sum = build_and_add_sum (TREE_TYPE (oe1->op),
|
|
oe1->op, oe2->op, opcode);
|
|
oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
|
|
oe2->rank = 0;
|
|
oe1->op = gimple_get_lhs (sum);
|
|
}
|
|
|
|
/* Apply the multiplication/division. */
|
|
prod = build_and_add_sum (TREE_TYPE (oe1->op),
|
|
oe1->op, c->op, c->oecode);
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
|
|
print_generic_expr (dump_file, c->op);
|
|
fprintf (dump_file, "\n");
|
|
}
|
|
|
|
/* Record it in the addition chain and disable further
|
|
undistribution with this op. */
|
|
oe1->op = gimple_assign_lhs (prod);
|
|
oe1->rank = get_rank (oe1->op);
|
|
subops[first].release ();
|
|
|
|
changed = true;
|
|
}
|
|
|
|
cvec.pop ();
|
|
}
|
|
|
|
for (i = 0; i < ops->length (); ++i)
|
|
subops[i].release ();
|
|
free (subops);
|
|
cvec.release ();
|
|
|
|
return changed;
|
|
}
|
|
|
|
/* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
|
|
expression, examine the other OPS to see if any of them are comparisons
|
|
of the same values, which we may be able to combine or eliminate.
|
|
For example, we can rewrite (a < b) | (a == b) as (a <= b). */
|
|
|
|
static bool
|
|
eliminate_redundant_comparison (enum tree_code opcode,
|
|
vec<operand_entry *> *ops,
|
|
unsigned int currindex,
|
|
operand_entry *curr)
|
|
{
|
|
tree op1, op2;
|
|
enum tree_code lcode, rcode;
|
|
gimple *def1, *def2;
|
|
int i;
|
|
operand_entry *oe;
|
|
|
|
if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
|
|
return false;
|
|
|
|
/* Check that CURR is a comparison. */
|
|
if (TREE_CODE (curr->op) != SSA_NAME)
|
|
return false;
|
|
def1 = SSA_NAME_DEF_STMT (curr->op);
|
|
if (!is_gimple_assign (def1))
|
|
return false;
|
|
lcode = gimple_assign_rhs_code (def1);
|
|
if (TREE_CODE_CLASS (lcode) != tcc_comparison)
|
|
return false;
|
|
op1 = gimple_assign_rhs1 (def1);
|
|
op2 = gimple_assign_rhs2 (def1);
|
|
|
|
/* Now look for a similar comparison in the remaining OPS. */
|
|
for (i = currindex + 1; ops->iterate (i, &oe); i++)
|
|
{
|
|
tree t;
|
|
|
|
if (TREE_CODE (oe->op) != SSA_NAME)
|
|
continue;
|
|
def2 = SSA_NAME_DEF_STMT (oe->op);
|
|
if (!is_gimple_assign (def2))
|
|
continue;
|
|
rcode = gimple_assign_rhs_code (def2);
|
|
if (TREE_CODE_CLASS (rcode) != tcc_comparison)
|
|
continue;
|
|
|
|
/* If we got here, we have a match. See if we can combine the
|
|
two comparisons. */
|
|
if (opcode == BIT_IOR_EXPR)
|
|
t = maybe_fold_or_comparisons (lcode, op1, op2,
|
|
rcode, gimple_assign_rhs1 (def2),
|
|
gimple_assign_rhs2 (def2));
|
|
else
|
|
t = maybe_fold_and_comparisons (lcode, op1, op2,
|
|
rcode, gimple_assign_rhs1 (def2),
|
|
gimple_assign_rhs2 (def2));
|
|
if (!t)
|
|
continue;
|
|
|
|
/* maybe_fold_and_comparisons and maybe_fold_or_comparisons
|
|
always give us a boolean_type_node value back. If the original
|
|
BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
|
|
we need to convert. */
|
|
if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
|
|
t = fold_convert (TREE_TYPE (curr->op), t);
|
|
|
|
if (TREE_CODE (t) != INTEGER_CST
|
|
&& !operand_equal_p (t, curr->op, 0))
|
|
{
|
|
enum tree_code subcode;
|
|
tree newop1, newop2;
|
|
if (!COMPARISON_CLASS_P (t))
|
|
continue;
|
|
extract_ops_from_tree (t, &subcode, &newop1, &newop2);
|
|
STRIP_USELESS_TYPE_CONVERSION (newop1);
|
|
STRIP_USELESS_TYPE_CONVERSION (newop2);
|
|
if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
|
|
continue;
|
|
}
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "Equivalence: ");
|
|
print_generic_expr (dump_file, curr->op);
|
|
fprintf (dump_file, " %s ", op_symbol_code (opcode));
|
|
print_generic_expr (dump_file, oe->op);
|
|
fprintf (dump_file, " -> ");
|
|
print_generic_expr (dump_file, t);
|
|
fprintf (dump_file, "\n");
|
|
}
|
|
|
|
/* Now we can delete oe, as it has been subsumed by the new combined
|
|
expression t. */
|
|
ops->ordered_remove (i);
|
|
reassociate_stats.ops_eliminated ++;
|
|
|
|
/* If t is the same as curr->op, we're done. Otherwise we must
|
|
replace curr->op with t. Special case is if we got a constant
|
|
back, in which case we add it to the end instead of in place of
|
|
the current entry. */
|
|
if (TREE_CODE (t) == INTEGER_CST)
|
|
{
|
|
ops->ordered_remove (currindex);
|
|
add_to_ops_vec (ops, t);
|
|
}
|
|
else if (!operand_equal_p (t, curr->op, 0))
|
|
{
|
|
gimple *sum;
|
|
enum tree_code subcode;
|
|
tree newop1;
|
|
tree newop2;
|
|
gcc_assert (COMPARISON_CLASS_P (t));
|
|
extract_ops_from_tree (t, &subcode, &newop1, &newop2);
|
|
STRIP_USELESS_TYPE_CONVERSION (newop1);
|
|
STRIP_USELESS_TYPE_CONVERSION (newop2);
|
|
gcc_checking_assert (is_gimple_val (newop1)
|
|
&& is_gimple_val (newop2));
|
|
sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
|
|
curr->op = gimple_get_lhs (sum);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
/* Transform repeated addition of same values into multiply with
|
|
constant. */
|
|
static bool
|
|
transform_add_to_multiply (vec<operand_entry *> *ops)
|
|
{
|
|
operand_entry *oe;
|
|
tree op = NULL_TREE;
|
|
int j;
|
|
int i, start = -1, end = 0, count = 0;
|
|
auto_vec<std::pair <int, int> > indxs;
|
|
bool changed = false;
|
|
|
|
if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
|
|
&& (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
|
|
|| !flag_unsafe_math_optimizations))
|
|
return false;
|
|
|
|
/* Look for repeated operands. */
|
|
FOR_EACH_VEC_ELT (*ops, i, oe)
|
|
{
|
|
if (start == -1)
|
|
{
|
|
count = 1;
|
|
op = oe->op;
|
|
start = i;
|
|
}
|
|
else if (operand_equal_p (oe->op, op, 0))
|
|
{
|
|
count++;
|
|
end = i;
|
|
}
|
|
else
|
|
{
|
|
if (count > 1)
|
|
indxs.safe_push (std::make_pair (start, end));
|
|
count = 1;
|
|
op = oe->op;
|
|
start = i;
|
|
}
|
|
}
|
|
|
|
if (count > 1)
|
|
indxs.safe_push (std::make_pair (start, end));
|
|
|
|
for (j = indxs.length () - 1; j >= 0; --j)
|
|
{
|
|
/* Convert repeated operand addition to multiplication. */
|
|
start = indxs[j].first;
|
|
end = indxs[j].second;
|
|
op = (*ops)[start]->op;
|
|
count = end - start + 1;
|
|
for (i = end; i >= start; --i)
|
|
ops->unordered_remove (i);
|
|
tree tmp = make_ssa_name (TREE_TYPE (op));
|
|
tree cst = build_int_cst (integer_type_node, count);
|
|
gassign *mul_stmt
|
|
= gimple_build_assign (tmp, MULT_EXPR,
|
|
op, fold_convert (TREE_TYPE (op), cst));
|
|
gimple_set_visited (mul_stmt, true);
|
|
add_to_ops_vec (ops, tmp, mul_stmt);
|
|
changed = true;
|
|
}
|
|
|
|
return changed;
|
|
}
|
|
|
|
|
|
/* Perform various identities and other optimizations on the list of
|
|
operand entries, stored in OPS. The tree code for the binary
|
|
operation between all the operands is OPCODE. */
|
|
|
|
static void
|
|
optimize_ops_list (enum tree_code opcode,
|
|
vec<operand_entry *> *ops)
|
|
{
|
|
unsigned int length = ops->length ();
|
|
unsigned int i;
|
|
operand_entry *oe;
|
|
operand_entry *oelast = NULL;
|
|
bool iterate = false;
|
|
|
|
if (length == 1)
|
|
return;
|
|
|
|
oelast = ops->last ();
|
|
|
|
/* If the last two are constants, pop the constants off, merge them
|
|
and try the next two. */
|
|
if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
|
|
{
|
|
operand_entry *oelm1 = (*ops)[length - 2];
|
|
|
|
if (oelm1->rank == 0
|
|
&& is_gimple_min_invariant (oelm1->op)
|
|
&& useless_type_conversion_p (TREE_TYPE (oelm1->op),
|
|
TREE_TYPE (oelast->op)))
|
|
{
|
|
tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
|
|
oelm1->op, oelast->op);
|
|
|
|
if (folded && is_gimple_min_invariant (folded))
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file, "Merging constants\n");
|
|
|
|
ops->pop ();
|
|
ops->pop ();
|
|
|
|
add_to_ops_vec (ops, folded);
|
|
reassociate_stats.constants_eliminated++;
|
|
|
|
optimize_ops_list (opcode, ops);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
eliminate_using_constants (opcode, ops);
|
|
oelast = NULL;
|
|
|
|
for (i = 0; ops->iterate (i, &oe);)
|
|
{
|
|
bool done = false;
|
|
|
|
if (eliminate_not_pairs (opcode, ops, i, oe))
|
|
return;
|
|
if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
|
|
|| (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
|
|
|| (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
|
|
{
|
|
if (done)
|
|
return;
|
|
iterate = true;
|
|
oelast = NULL;
|
|
continue;
|
|
}
|
|
oelast = oe;
|
|
i++;
|
|
}
|
|
|
|
length = ops->length ();
|
|
oelast = ops->last ();
|
|
|
|
if (iterate)
|
|
optimize_ops_list (opcode, ops);
|
|
}
|
|
|
|
/* The following functions are subroutines to optimize_range_tests and allow
|
|
it to try to change a logical combination of comparisons into a range
|
|
test.
|
|
|
|
For example, both
|
|
X == 2 || X == 5 || X == 3 || X == 4
|
|
and
|
|
X >= 2 && X <= 5
|
|
are converted to
|
|
(unsigned) (X - 2) <= 3
|
|
|
|
For more information see comments above fold_test_range in fold-const.c,
|
|
this implementation is for GIMPLE. */
|
|
|
|
struct range_entry
|
|
{
|
|
tree exp;
|
|
tree low;
|
|
tree high;
|
|
bool in_p;
|
|
bool strict_overflow_p;
|
|
unsigned int idx, next;
|
|
};
|
|
|
|
/* This is similar to make_range in fold-const.c, but on top of
|
|
GIMPLE instead of trees. If EXP is non-NULL, it should be
|
|
an SSA_NAME and STMT argument is ignored, otherwise STMT
|
|
argument should be a GIMPLE_COND. */
|
|
|
|
static void
|
|
init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
|
|
{
|
|
int in_p;
|
|
tree low, high;
|
|
bool is_bool, strict_overflow_p;
|
|
|
|
r->exp = NULL_TREE;
|
|
r->in_p = false;
|
|
r->strict_overflow_p = false;
|
|
r->low = NULL_TREE;
|
|
r->high = NULL_TREE;
|
|
if (exp != NULL_TREE
|
|
&& (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
|
|
return;
|
|
|
|
/* Start with simply saying "EXP != 0" and then look at the code of EXP
|
|
and see if we can refine the range. Some of the cases below may not
|
|
happen, but it doesn't seem worth worrying about this. We "continue"
|
|
the outer loop when we've changed something; otherwise we "break"
|
|
the switch, which will "break" the while. */
|
|
low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
|
|
high = low;
|
|
in_p = 0;
|
|
strict_overflow_p = false;
|
|
is_bool = false;
|
|
if (exp == NULL_TREE)
|
|
is_bool = true;
|
|
else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
|
|
{
|
|
if (TYPE_UNSIGNED (TREE_TYPE (exp)))
|
|
is_bool = true;
|
|
else
|
|
return;
|
|
}
|
|
else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
|
|
is_bool = true;
|
|
|
|
while (1)
|
|
{
|
|
enum tree_code code;
|
|
tree arg0, arg1, exp_type;
|
|
tree nexp;
|
|
location_t loc;
|
|
|
|
if (exp != NULL_TREE)
|
|
{
|
|
if (TREE_CODE (exp) != SSA_NAME
|
|
|| SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
|
|
break;
|
|
|
|
stmt = SSA_NAME_DEF_STMT (exp);
|
|
if (!is_gimple_assign (stmt))
|
|
break;
|
|
|
|
code = gimple_assign_rhs_code (stmt);
|
|
arg0 = gimple_assign_rhs1 (stmt);
|
|
arg1 = gimple_assign_rhs2 (stmt);
|
|
exp_type = TREE_TYPE (exp);
|
|
}
|
|
else
|
|
{
|
|
code = gimple_cond_code (stmt);
|
|
arg0 = gimple_cond_lhs (stmt);
|
|
arg1 = gimple_cond_rhs (stmt);
|
|
exp_type = boolean_type_node;
|
|
}
|
|
|
|
if (TREE_CODE (arg0) != SSA_NAME)
|
|
break;
|
|
loc = gimple_location (stmt);
|
|
switch (code)
|
|
{
|
|
case BIT_NOT_EXPR:
|
|
if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
|
|
/* Ensure the range is either +[-,0], +[0,0],
|
|
-[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
|
|
-[1,1]. If it is e.g. +[-,-] or -[-,-]
|
|
or similar expression of unconditional true or
|
|
false, it should not be negated. */
|
|
&& ((high && integer_zerop (high))
|
|
|| (low && integer_onep (low))))
|
|
{
|
|
in_p = !in_p;
|
|
exp = arg0;
|
|
continue;
|
|
}
|
|
break;
|
|
case SSA_NAME:
|
|
exp = arg0;
|
|
continue;
|
|
CASE_CONVERT:
|
|
if (is_bool)
|
|
{
|
|
if ((TYPE_PRECISION (exp_type) == 1
|
|
|| TREE_CODE (exp_type) == BOOLEAN_TYPE)
|
|
&& TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
|
|
return;
|
|
}
|
|
else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
|
|
{
|
|
if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
|
|
is_bool = true;
|
|
else
|
|
return;
|
|
}
|
|
else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
|
|
is_bool = true;
|
|
goto do_default;
|
|
case EQ_EXPR:
|
|
case NE_EXPR:
|
|
case LT_EXPR:
|
|
case LE_EXPR:
|
|
case GE_EXPR:
|
|
case GT_EXPR:
|
|
is_bool = true;
|
|
/* FALLTHRU */
|
|
default:
|
|
if (!is_bool)
|
|
return;
|
|
do_default:
|
|
nexp = make_range_step (loc, code, arg0, arg1, exp_type,
|
|
&low, &high, &in_p,
|
|
&strict_overflow_p);
|
|
if (nexp != NULL_TREE)
|
|
{
|
|
exp = nexp;
|
|
gcc_assert (TREE_CODE (exp) == SSA_NAME);
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
if (is_bool)
|
|
{
|
|
r->exp = exp;
|
|
r->in_p = in_p;
|
|
r->low = low;
|
|
r->high = high;
|
|
r->strict_overflow_p = strict_overflow_p;
|
|
}
|
|
}
|
|
|
|
/* Comparison function for qsort. Sort entries
|
|
without SSA_NAME exp first, then with SSA_NAMEs sorted
|
|
by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
|
|
by increasing ->low and if ->low is the same, by increasing
|
|
->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
|
|
maximum. */
|
|
|
|
static int
|
|
range_entry_cmp (const void *a, const void *b)
|
|
{
|
|
const struct range_entry *p = (const struct range_entry *) a;
|
|
const struct range_entry *q = (const struct range_entry *) b;
|
|
|
|
if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
|
|
{
|
|
if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
|
|
{
|
|
/* Group range_entries for the same SSA_NAME together. */
|
|
if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
|
|
return -1;
|
|
else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
|
|
return 1;
|
|
/* If ->low is different, NULL low goes first, then by
|
|
ascending low. */
|
|
if (p->low != NULL_TREE)
|
|
{
|
|
if (q->low != NULL_TREE)
|
|
{
|
|
tree tem = fold_binary (LT_EXPR, boolean_type_node,
|
|
p->low, q->low);
|
|
if (tem && integer_onep (tem))
|
|
return -1;
|
|
tem = fold_binary (GT_EXPR, boolean_type_node,
|
|
p->low, q->low);
|
|
if (tem && integer_onep (tem))
|
|
return 1;
|
|
}
|
|
else
|
|
return 1;
|
|
}
|
|
else if (q->low != NULL_TREE)
|
|
return -1;
|
|
/* If ->high is different, NULL high goes last, before that by
|
|
ascending high. */
|
|
if (p->high != NULL_TREE)
|
|
{
|
|
if (q->high != NULL_TREE)
|
|
{
|
|
tree tem = fold_binary (LT_EXPR, boolean_type_node,
|
|
p->high, q->high);
|
|
if (tem && integer_onep (tem))
|
|
return -1;
|
|
tem = fold_binary (GT_EXPR, boolean_type_node,
|
|
p->high, q->high);
|
|
if (tem && integer_onep (tem))
|
|
return 1;
|
|
}
|
|
else
|
|
return -1;
|
|
}
|
|
else if (q->high != NULL_TREE)
|
|
return 1;
|
|
/* If both ranges are the same, sort below by ascending idx. */
|
|
}
|
|
else
|
|
return 1;
|
|
}
|
|
else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
|
|
return -1;
|
|
|
|
if (p->idx < q->idx)
|
|
return -1;
|
|
else
|
|
{
|
|
gcc_checking_assert (p->idx > q->idx);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
/* Helper function for update_range_test. Force EXPR into an SSA_NAME,
|
|
insert needed statements BEFORE or after GSI. */
|
|
|
|
static tree
|
|
force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
|
|
{
|
|
enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
|
|
tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
|
|
if (TREE_CODE (ret) != SSA_NAME)
|
|
{
|
|
gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
|
|
if (before)
|
|
gsi_insert_before (gsi, g, GSI_SAME_STMT);
|
|
else
|
|
gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
|
|
ret = gimple_assign_lhs (g);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/* Helper routine of optimize_range_test.
|
|
[EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
|
|
RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
|
|
OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
|
|
is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
|
|
an array of COUNT pointers to other ranges. Return
|
|
true if the range merge has been successful.
|
|
If OPCODE is ERROR_MARK, this is called from within
|
|
maybe_optimize_range_tests and is performing inter-bb range optimization.
|
|
In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
|
|
oe->rank. */
|
|
|
|
static bool
|
|
update_range_test (struct range_entry *range, struct range_entry *otherrange,
|
|
struct range_entry **otherrangep,
|
|
unsigned int count, enum tree_code opcode,
|
|
vec<operand_entry *> *ops, tree exp, gimple_seq seq,
|
|
bool in_p, tree low, tree high, bool strict_overflow_p)
|
|
{
|
|
operand_entry *oe = (*ops)[range->idx];
|
|
tree op = oe->op;
|
|
gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
|
|
: last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
|
|
location_t loc = gimple_location (stmt);
|
|
tree optype = op ? TREE_TYPE (op) : boolean_type_node;
|
|
tree tem = build_range_check (loc, optype, unshare_expr (exp),
|
|
in_p, low, high);
|
|
enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
|
|
gimple_stmt_iterator gsi;
|
|
unsigned int i, uid;
|
|
|
|
if (tem == NULL_TREE)
|
|
return false;
|
|
|
|
/* If op is default def SSA_NAME, there is no place to insert the
|
|
new comparison. Give up, unless we can use OP itself as the
|
|
range test. */
|
|
if (op && SSA_NAME_IS_DEFAULT_DEF (op))
|
|
{
|
|
if (op == range->exp
|
|
&& ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
|
|
|| TREE_CODE (optype) == BOOLEAN_TYPE)
|
|
&& (op == tem
|
|
|| (TREE_CODE (tem) == EQ_EXPR
|
|
&& TREE_OPERAND (tem, 0) == op
|
|
&& integer_onep (TREE_OPERAND (tem, 1))))
|
|
&& opcode != BIT_IOR_EXPR
|
|
&& (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
|
|
{
|
|
stmt = NULL;
|
|
tem = op;
|
|
}
|
|
else
|
|
return false;
|
|
}
|
|
|
|
if (strict_overflow_p && issue_strict_overflow_warning (wc))
|
|
warning_at (loc, OPT_Wstrict_overflow,
|
|
"assuming signed overflow does not occur "
|
|
"when simplifying range test");
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
struct range_entry *r;
|
|
fprintf (dump_file, "Optimizing range tests ");
|
|
print_generic_expr (dump_file, range->exp);
|
|
fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
|
|
print_generic_expr (dump_file, range->low);
|
|
fprintf (dump_file, ", ");
|
|
print_generic_expr (dump_file, range->high);
|
|
fprintf (dump_file, "]");
|
|
for (i = 0; i < count; i++)
|
|
{
|
|
if (otherrange)
|
|
r = otherrange + i;
|
|
else
|
|
r = otherrangep[i];
|
|
if (r->exp
|
|
&& r->exp != range->exp
|
|
&& TREE_CODE (r->exp) == SSA_NAME)
|
|
{
|
|
fprintf (dump_file, " and ");
|
|
print_generic_expr (dump_file, r->exp);
|
|
}
|
|
else
|
|
fprintf (dump_file, " and");
|
|
fprintf (dump_file, " %c[", r->in_p ? '+' : '-');
|
|
print_generic_expr (dump_file, r->low);
|
|
fprintf (dump_file, ", ");
|
|
print_generic_expr (dump_file, r->high);
|
|
fprintf (dump_file, "]");
|
|
}
|
|
fprintf (dump_file, "\n into ");
|
|
print_generic_expr (dump_file, tem);
|
|
fprintf (dump_file, "\n");
|
|
}
|
|
|
|
if (opcode == BIT_IOR_EXPR
|
|
|| (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
|
|
tem = invert_truthvalue_loc (loc, tem);
|
|
|
|
tem = fold_convert_loc (loc, optype, tem);
|
|
if (stmt)
|
|
{
|
|
gsi = gsi_for_stmt (stmt);
|
|
uid = gimple_uid (stmt);
|
|
}
|
|
else
|
|
{
|
|
gsi = gsi_none ();
|
|
uid = 0;
|
|
}
|
|
if (stmt == NULL)
|
|
gcc_checking_assert (tem == op);
|
|
/* In rare cases range->exp can be equal to lhs of stmt.
|
|
In that case we have to insert after the stmt rather then before
|
|
it. If stmt is a PHI, insert it at the start of the basic block. */
|
|
else if (op != range->exp)
|
|
{
|
|
gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
|
|
tem = force_into_ssa_name (&gsi, tem, true);
|
|
gsi_prev (&gsi);
|
|
}
|
|
else if (gimple_code (stmt) != GIMPLE_PHI)
|
|
{
|
|
gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
|
|
tem = force_into_ssa_name (&gsi, tem, false);
|
|
}
|
|
else
|
|
{
|
|
gsi = gsi_after_labels (gimple_bb (stmt));
|
|
if (!gsi_end_p (gsi))
|
|
uid = gimple_uid (gsi_stmt (gsi));
|
|
else
|
|
{
|
|
gsi = gsi_start_bb (gimple_bb (stmt));
|
|
uid = 1;
|
|
while (!gsi_end_p (gsi))
|
|
{
|
|
uid = gimple_uid (gsi_stmt (gsi));
|
|
gsi_next (&gsi);
|
|
}
|
|
}
|
|
gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
|
|
tem = force_into_ssa_name (&gsi, tem, true);
|
|
if (gsi_end_p (gsi))
|
|
gsi = gsi_last_bb (gimple_bb (stmt));
|
|
else
|
|
gsi_prev (&gsi);
|
|
}
|
|
for (; !gsi_end_p (gsi); gsi_prev (&gsi))
|
|
if (gimple_uid (gsi_stmt (gsi)))
|
|
break;
|
|
else
|
|
gimple_set_uid (gsi_stmt (gsi), uid);
|
|
|
|
oe->op = tem;
|
|
range->exp = exp;
|
|
range->low = low;
|
|
range->high = high;
|
|
range->in_p = in_p;
|
|
range->strict_overflow_p = false;
|
|
|
|
for (i = 0; i < count; i++)
|
|
{
|
|
if (otherrange)
|
|
range = otherrange + i;
|
|
else
|
|
range = otherrangep[i];
|
|
oe = (*ops)[range->idx];
|
|
/* Now change all the other range test immediate uses, so that
|
|
those tests will be optimized away. */
|
|
if (opcode == ERROR_MARK)
|
|
{
|
|
if (oe->op)
|
|
oe->op = build_int_cst (TREE_TYPE (oe->op),
|
|
oe->rank == BIT_IOR_EXPR ? 0 : 1);
|
|
else
|
|
oe->op = (oe->rank == BIT_IOR_EXPR
|
|
? boolean_false_node : boolean_true_node);
|
|
}
|
|
else
|
|
oe->op = error_mark_node;
|
|
range->exp = NULL_TREE;
|
|
range->low = NULL_TREE;
|
|
range->high = NULL_TREE;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/* Optimize X == CST1 || X == CST2
|
|
if popcount (CST1 ^ CST2) == 1 into
|
|
(X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
|
|
Similarly for ranges. E.g.
|
|
X != 2 && X != 3 && X != 10 && X != 11
|
|
will be transformed by the previous optimization into
|
|
!((X - 2U) <= 1U || (X - 10U) <= 1U)
|
|
and this loop can transform that into
|
|
!(((X & ~8) - 2U) <= 1U). */
|
|
|
|
static bool
|
|
optimize_range_tests_xor (enum tree_code opcode, tree type,
|
|
tree lowi, tree lowj, tree highi, tree highj,
|
|
vec<operand_entry *> *ops,
|
|
struct range_entry *rangei,
|
|
struct range_entry *rangej)
|
|
{
|
|
tree lowxor, highxor, tem, exp;
|
|
/* Check lowi ^ lowj == highi ^ highj and
|
|
popcount (lowi ^ lowj) == 1. */
|
|
lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
|
|
if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
|
|
return false;
|
|
if (!integer_pow2p (lowxor))
|
|
return false;
|
|
highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
|
|
if (!tree_int_cst_equal (lowxor, highxor))
|
|
return false;
|
|
|
|
tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
|
|
exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
|
|
lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
|
|
highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
|
|
if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
|
|
NULL, rangei->in_p, lowj, highj,
|
|
rangei->strict_overflow_p
|
|
|| rangej->strict_overflow_p))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
/* Optimize X == CST1 || X == CST2
|
|
if popcount (CST2 - CST1) == 1 into
|
|
((X - CST1) & ~(CST2 - CST1)) == 0.
|
|
Similarly for ranges. E.g.
|
|
X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
|
|
|| X == 75 || X == 45
|
|
will be transformed by the previous optimization into
|
|
(X - 43U) <= 3U || (X - 75U) <= 3U
|
|
and this loop can transform that into
|
|
((X - 43U) & ~(75U - 43U)) <= 3U. */
|
|
static bool
|
|
optimize_range_tests_diff (enum tree_code opcode, tree type,
|
|
tree lowi, tree lowj, tree highi, tree highj,
|
|
vec<operand_entry *> *ops,
|
|
struct range_entry *rangei,
|
|
struct range_entry *rangej)
|
|
{
|
|
tree tem1, tem2, mask;
|
|
/* Check highi - lowi == highj - lowj. */
|
|
tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
|
|
if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
|
|
return false;
|
|
tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
|
|
if (!tree_int_cst_equal (tem1, tem2))
|
|
return false;
|
|
/* Check popcount (lowj - lowi) == 1. */
|
|
tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
|
|
if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
|
|
return false;
|
|
if (!integer_pow2p (tem1))
|
|
return false;
|
|
|
|
type = unsigned_type_for (type);
|
|
tem1 = fold_convert (type, tem1);
|
|
tem2 = fold_convert (type, tem2);
|
|
lowi = fold_convert (type, lowi);
|
|
mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
|
|
tem1 = fold_build2 (MINUS_EXPR, type,
|
|
fold_convert (type, rangei->exp), lowi);
|
|
tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
|
|
lowj = build_int_cst (type, 0);
|
|
if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
|
|
NULL, rangei->in_p, lowj, tem2,
|
|
rangei->strict_overflow_p
|
|
|| rangej->strict_overflow_p))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
/* It does some common checks for function optimize_range_tests_xor and
|
|
optimize_range_tests_diff.
|
|
If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
|
|
Else it calls optimize_range_tests_diff. */
|
|
|
|
static bool
|
|
optimize_range_tests_1 (enum tree_code opcode, int first, int length,
|
|
bool optimize_xor, vec<operand_entry *> *ops,
|
|
struct range_entry *ranges)
|
|
{
|
|
int i, j;
|
|
bool any_changes = false;
|
|
for (i = first; i < length; i++)
|
|
{
|
|
tree lowi, highi, lowj, highj, type, tem;
|
|
|
|
if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
|
|
continue;
|
|
type = TREE_TYPE (ranges[i].exp);
|
|
if (!INTEGRAL_TYPE_P (type))
|
|
continue;
|
|
lowi = ranges[i].low;
|
|
if (lowi == NULL_TREE)
|
|
lowi = TYPE_MIN_VALUE (type);
|
|
highi = ranges[i].high;
|
|
if (highi == NULL_TREE)
|
|
continue;
|
|
for (j = i + 1; j < length && j < i + 64; j++)
|
|
{
|
|
bool changes;
|
|
if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
|
|
continue;
|
|
lowj = ranges[j].low;
|
|
if (lowj == NULL_TREE)
|
|
continue;
|
|
highj = ranges[j].high;
|
|
if (highj == NULL_TREE)
|
|
highj = TYPE_MAX_VALUE (type);
|
|
/* Check lowj > highi. */
|
|
tem = fold_binary (GT_EXPR, boolean_type_node,
|
|
lowj, highi);
|
|
if (tem == NULL_TREE || !integer_onep (tem))
|
|
continue;
|
|
if (optimize_xor)
|
|
changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
|
|
highi, highj, ops,
|
|
ranges + i, ranges + j);
|
|
else
|
|
changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
|
|
highi, highj, ops,
|
|
ranges + i, ranges + j);
|
|
if (changes)
|
|
{
|
|
any_changes = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return any_changes;
|
|
}
|
|
|
|
/* Helper function of optimize_range_tests_to_bit_test. Handle a single
|
|
range, EXP, LOW, HIGH, compute bit mask of bits to test and return
|
|
EXP on success, NULL otherwise. */
|
|
|
|
static tree
|
|
extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
|
|
wide_int *mask, tree *totallowp)
|
|
{
|
|
tree tem = int_const_binop (MINUS_EXPR, high, low);
|
|
if (tem == NULL_TREE
|
|
|| TREE_CODE (tem) != INTEGER_CST
|
|
|| TREE_OVERFLOW (tem)
|
|
|| tree_int_cst_sgn (tem) == -1
|
|
|| compare_tree_int (tem, prec) != -1)
|
|
return NULL_TREE;
|
|
|
|
unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
|
|
*mask = wi::shifted_mask (0, max, false, prec);
|
|
if (TREE_CODE (exp) == BIT_AND_EXPR
|
|
&& TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
|
|
{
|
|
widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
|
|
msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
|
|
if (wi::popcount (msk) == 1
|
|
&& wi::ltu_p (msk, prec - max))
|
|
{
|
|
*mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
|
|
max += msk.to_uhwi ();
|
|
exp = TREE_OPERAND (exp, 0);
|
|
if (integer_zerop (low)
|
|
&& TREE_CODE (exp) == PLUS_EXPR
|
|
&& TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
|
|
{
|
|
tree ret = TREE_OPERAND (exp, 0);
|
|
STRIP_NOPS (ret);
|
|
widest_int bias
|
|
= wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
|
|
TYPE_PRECISION (TREE_TYPE (low))));
|
|
tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
|
|
if (totallowp)
|
|
{
|
|
*totallowp = tbias;
|
|
return ret;
|
|
}
|
|
else if (!tree_int_cst_lt (totallow, tbias))
|
|
return NULL_TREE;
|
|
bias = wi::to_widest (tbias);
|
|
bias -= wi::to_widest (totallow);
|
|
if (bias >= 0 && bias < prec - max)
|
|
{
|
|
*mask = wi::lshift (*mask, bias);
|
|
return ret;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (totallowp)
|
|
return exp;
|
|
if (!tree_int_cst_lt (totallow, low))
|
|
return exp;
|
|
tem = int_const_binop (MINUS_EXPR, low, totallow);
|
|
if (tem == NULL_TREE
|
|
|| TREE_CODE (tem) != INTEGER_CST
|
|
|| TREE_OVERFLOW (tem)
|
|
|| compare_tree_int (tem, prec - max) == 1)
|
|
return NULL_TREE;
|
|
|
|
*mask = wi::lshift (*mask, wi::to_widest (tem));
|
|
return exp;
|
|
}
|
|
|
|
/* Attempt to optimize small range tests using bit test.
|
|
E.g.
|
|
X != 43 && X != 76 && X != 44 && X != 78 && X != 49
|
|
&& X != 77 && X != 46 && X != 75 && X != 45 && X != 82
|
|
has been by earlier optimizations optimized into:
|
|
((X - 43U) & ~32U) > 3U && X != 49 && X != 82
|
|
As all the 43 through 82 range is less than 64 numbers,
|
|
for 64-bit word targets optimize that into:
|
|
(X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
|
|
|
|
static bool
|
|
optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
|
|
vec<operand_entry *> *ops,
|
|
struct range_entry *ranges)
|
|
{
|
|
int i, j;
|
|
bool any_changes = false;
|
|
int prec = GET_MODE_BITSIZE (word_mode);
|
|
auto_vec<struct range_entry *, 64> candidates;
|
|
|
|
for (i = first; i < length - 2; i++)
|
|
{
|
|
tree lowi, highi, lowj, highj, type;
|
|
|
|
if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
|
|
continue;
|
|
type = TREE_TYPE (ranges[i].exp);
|
|
if (!INTEGRAL_TYPE_P (type))
|
|
continue;
|
|
lowi = ranges[i].low;
|
|
if (lowi == NULL_TREE)
|
|
lowi = TYPE_MIN_VALUE (type);
|
|
highi = ranges[i].high;
|
|
if (highi == NULL_TREE)
|
|
continue;
|
|
wide_int mask;
|
|
tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
|
|
highi, &mask, &lowi);
|
|
if (exp == NULL_TREE)
|
|
continue;
|
|
bool strict_overflow_p = ranges[i].strict_overflow_p;
|
|
candidates.truncate (0);
|
|
int end = MIN (i + 64, length);
|
|
for (j = i + 1; j < end; j++)
|
|
{
|
|
tree exp2;
|
|
if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
|
|
continue;
|
|
if (ranges[j].exp == exp)
|
|
;
|
|
else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
|
|
{
|
|
exp2 = TREE_OPERAND (ranges[j].exp, 0);
|
|
if (exp2 == exp)
|
|
;
|
|
else if (TREE_CODE (exp2) == PLUS_EXPR)
|
|
{
|
|
exp2 = TREE_OPERAND (exp2, 0);
|
|
STRIP_NOPS (exp2);
|
|
if (exp2 != exp)
|
|
continue;
|
|
}
|
|
else
|
|
continue;
|
|
}
|
|
else
|
|
continue;
|
|
lowj = ranges[j].low;
|
|
if (lowj == NULL_TREE)
|
|
continue;
|
|
highj = ranges[j].high;
|
|
if (highj == NULL_TREE)
|
|
highj = TYPE_MAX_VALUE (type);
|
|
wide_int mask2;
|
|
exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
|
|
highj, &mask2, NULL);
|
|
if (exp2 != exp)
|
|
continue;
|
|
mask |= mask2;
|
|
strict_overflow_p |= ranges[j].strict_overflow_p;
|
|
candidates.safe_push (&ranges[j]);
|
|
}
|
|
|
|
/* If we need otherwise 3 or more comparisons, use a bit test. */
|
|
if (candidates.length () >= 2)
|
|
{
|
|
tree high = wide_int_to_tree (TREE_TYPE (lowi),
|
|
wi::to_widest (lowi)
|
|
+ prec - 1 - wi::clz (mask));
|
|
operand_entry *oe = (*ops)[ranges[i].idx];
|
|
tree op = oe->op;
|
|
gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
|
|
: last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
|
|
location_t loc = gimple_location (stmt);
|
|
tree optype = op ? TREE_TYPE (op) : boolean_type_node;
|
|
|
|
/* See if it isn't cheaper to pretend the minimum value of the
|
|
range is 0, if maximum value is small enough.
|
|
We can avoid then subtraction of the minimum value, but the
|
|
mask constant could be perhaps more expensive. */
|
|
if (compare_tree_int (lowi, 0) > 0
|
|
&& compare_tree_int (high, prec) < 0)
|
|
{
|
|
int cost_diff;
|
|
HOST_WIDE_INT m = tree_to_uhwi (lowi);
|
|
rtx reg = gen_raw_REG (word_mode, 10000);
|
|
bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
|
|
cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
|
|
GEN_INT (-m)), speed_p);
|
|
rtx r = immed_wide_int_const (mask, word_mode);
|
|
cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
|
|
word_mode, speed_p);
|
|
r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
|
|
cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
|
|
word_mode, speed_p);
|
|
if (cost_diff > 0)
|
|
{
|
|
mask = wi::lshift (mask, m);
|
|
lowi = build_zero_cst (TREE_TYPE (lowi));
|
|
}
|
|
}
|
|
|
|
tree tem = build_range_check (loc, optype, unshare_expr (exp),
|
|
false, lowi, high);
|
|
if (tem == NULL_TREE || is_gimple_val (tem))
|
|
continue;
|
|
tree etype = unsigned_type_for (TREE_TYPE (exp));
|
|
exp = fold_build2_loc (loc, MINUS_EXPR, etype,
|
|
fold_convert_loc (loc, etype, exp),
|
|
fold_convert_loc (loc, etype, lowi));
|
|
exp = fold_convert_loc (loc, integer_type_node, exp);
|
|
tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
|
|
exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
|
|
build_int_cst (word_type, 1), exp);
|
|
exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
|
|
wide_int_to_tree (word_type, mask));
|
|
exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
|
|
build_zero_cst (word_type));
|
|
if (is_gimple_val (exp))
|
|
continue;
|
|
|
|
/* The shift might have undefined behavior if TEM is true,
|
|
but reassociate_bb isn't prepared to have basic blocks
|
|
split when it is running. So, temporarily emit a code
|
|
with BIT_IOR_EXPR instead of &&, and fix it up in
|
|
branch_fixup. */
|
|
gimple_seq seq;
|
|
tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
|
|
gcc_assert (TREE_CODE (tem) == SSA_NAME);
|
|
gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
|
|
gimple_seq seq2;
|
|
exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
|
|
gimple_seq_add_seq_without_update (&seq, seq2);
|
|
gcc_assert (TREE_CODE (exp) == SSA_NAME);
|
|
gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
|
|
gimple *g = gimple_build_assign (make_ssa_name (optype),
|
|
BIT_IOR_EXPR, tem, exp);
|
|
gimple_set_location (g, loc);
|
|
gimple_seq_add_stmt_without_update (&seq, g);
|
|
exp = gimple_assign_lhs (g);
|
|
tree val = build_zero_cst (optype);
|
|
if (update_range_test (&ranges[i], NULL, candidates.address (),
|
|
candidates.length (), opcode, ops, exp,
|
|
seq, false, val, val, strict_overflow_p))
|
|
{
|
|
any_changes = true;
|
|
reassoc_branch_fixups.safe_push (tem);
|
|
}
|
|
else
|
|
gimple_seq_discard (seq);
|
|
}
|
|
}
|
|
return any_changes;
|
|
}
|
|
|
|
/* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
|
|
and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */
|
|
|
|
static bool
|
|
optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
|
|
vec<operand_entry *> *ops,
|
|
struct range_entry *ranges)
|
|
{
|
|
int i;
|
|
unsigned int b;
|
|
bool any_changes = false;
|
|
auto_vec<int, 128> buckets;
|
|
auto_vec<int, 32> chains;
|
|
auto_vec<struct range_entry *, 32> candidates;
|
|
|
|
for (i = first; i < length; i++)
|
|
{
|
|
if (ranges[i].exp == NULL_TREE
|
|
|| TREE_CODE (ranges[i].exp) != SSA_NAME
|
|
|| !ranges[i].in_p
|
|
|| TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
|
|
|| TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
|
|
|| ranges[i].low == NULL_TREE
|
|
|| ranges[i].low != ranges[i].high)
|
|
continue;
|
|
|
|
bool zero_p = integer_zerop (ranges[i].low);
|
|
if (!zero_p && !integer_all_onesp (ranges[i].low))
|
|
continue;
|
|
|
|
b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
|
|
if (buckets.length () <= b)
|
|
buckets.safe_grow_cleared (b + 1);
|
|
if (chains.length () <= (unsigned) i)
|
|
chains.safe_grow (i + 1);
|
|
chains[i] = buckets[b];
|
|
buckets[b] = i + 1;
|
|
}
|
|
|
|
FOR_EACH_VEC_ELT (buckets, b, i)
|
|
if (i && chains[i - 1])
|
|
{
|
|
int j, k = i;
|
|
for (j = chains[i - 1]; j; j = chains[j - 1])
|
|
{
|
|
gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
|
|
gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
|
|
if (reassoc_stmt_dominates_stmt_p (gk, gj))
|
|
k = j;
|
|
}
|
|
tree type1 = TREE_TYPE (ranges[k - 1].exp);
|
|
tree type2 = NULL_TREE;
|
|
bool strict_overflow_p = false;
|
|
candidates.truncate (0);
|
|
for (j = i; j; j = chains[j - 1])
|
|
{
|
|
tree type = TREE_TYPE (ranges[j - 1].exp);
|
|
strict_overflow_p |= ranges[j - 1].strict_overflow_p;
|
|
if (j == k
|
|
|| useless_type_conversion_p (type1, type))
|
|
;
|
|
else if (type2 == NULL_TREE
|
|
|| useless_type_conversion_p (type2, type))
|
|
{
|
|
if (type2 == NULL_TREE)
|
|
type2 = type;
|
|
candidates.safe_push (&ranges[j - 1]);
|
|
}
|
|
}
|
|
unsigned l = candidates.length ();
|
|
for (j = i; j; j = chains[j - 1])
|
|
{
|
|
tree type = TREE_TYPE (ranges[j - 1].exp);
|
|
if (j == k)
|
|
continue;
|
|
if (useless_type_conversion_p (type1, type))
|
|
;
|
|
else if (type2 == NULL_TREE
|
|
|| useless_type_conversion_p (type2, type))
|
|
continue;
|
|
candidates.safe_push (&ranges[j - 1]);
|
|
}
|
|
gimple_seq seq = NULL;
|
|
tree op = NULL_TREE;
|
|
unsigned int id;
|
|
struct range_entry *r;
|
|
candidates.safe_push (&ranges[k - 1]);
|
|
FOR_EACH_VEC_ELT (candidates, id, r)
|
|
{
|
|
gimple *g;
|
|
if (id == 0)
|
|
{
|
|
op = r->exp;
|
|
continue;
|
|
}
|
|
if (id == l)
|
|
{
|
|
g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
|
|
gimple_seq_add_stmt_without_update (&seq, g);
|
|
op = gimple_assign_lhs (g);
|
|
}
|
|
tree type = TREE_TYPE (r->exp);
|
|
tree exp = r->exp;
|
|
if (id >= l && !useless_type_conversion_p (type1, type))
|
|
{
|
|
g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
|
|
gimple_seq_add_stmt_without_update (&seq, g);
|
|
exp = gimple_assign_lhs (g);
|
|
}
|
|
g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
|
|
(b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
|
|
op, exp);
|
|
gimple_seq_add_stmt_without_update (&seq, g);
|
|
op = gimple_assign_lhs (g);
|
|
}
|
|
candidates.pop ();
|
|
if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
|
|
candidates.length (), opcode, ops, op,
|
|
seq, true, ranges[k - 1].low,
|
|
ranges[k - 1].low, strict_overflow_p))
|
|
any_changes = true;
|
|
else
|
|
gimple_seq_discard (seq);
|
|
}
|
|
|
|
return any_changes;
|
|
}
|
|
|
|
/* Attempt to optimize for signed a and b where b is known to be >= 0:
|
|
a >= 0 && a < b into (unsigned) a < (unsigned) b
|
|
a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
|
|
|
|
static bool
|
|
optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
|
|
vec<operand_entry *> *ops,
|
|
struct range_entry *ranges,
|
|
basic_block first_bb)
|
|
{
|
|
int i;
|
|
bool any_changes = false;
|
|
hash_map<tree, int> *map = NULL;
|
|
|
|
for (i = first; i < length; i++)
|
|
{
|
|
if (ranges[i].exp == NULL_TREE
|
|
|| TREE_CODE (ranges[i].exp) != SSA_NAME
|
|
|| !ranges[i].in_p)
|
|
continue;
|
|
|
|
tree type = TREE_TYPE (ranges[i].exp);
|
|
if (!INTEGRAL_TYPE_P (type)
|
|
|| TYPE_UNSIGNED (type)
|
|
|| ranges[i].low == NULL_TREE
|
|
|| !integer_zerop (ranges[i].low)
|
|
|| ranges[i].high != NULL_TREE)
|
|
continue;
|
|
/* EXP >= 0 here. */
|
|
if (map == NULL)
|
|
map = new hash_map <tree, int>;
|
|
map->put (ranges[i].exp, i);
|
|
}
|
|
|
|
if (map == NULL)
|
|
return false;
|
|
|
|
for (i = 0; i < length; i++)
|
|
{
|
|
bool in_p = ranges[i].in_p;
|
|
if (ranges[i].low == NULL_TREE
|
|
|| ranges[i].high == NULL_TREE)
|
|
continue;
|
|
if (!integer_zerop (ranges[i].low)
|
|
|| !integer_zerop (ranges[i].high))
|
|
{
|
|
if (ranges[i].exp
|
|
&& TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
|
|
&& TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
|
|
&& integer_onep (ranges[i].low)
|
|
&& integer_onep (ranges[i].high))
|
|
in_p = !in_p;
|
|
else
|
|
continue;
|
|
}
|
|
|
|
gimple *stmt;
|
|
tree_code ccode;
|
|
tree rhs1, rhs2;
|
|
if (ranges[i].exp)
|
|
{
|
|
if (TREE_CODE (ranges[i].exp) != SSA_NAME)
|
|
continue;
|
|
stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
|
|
if (!is_gimple_assign (stmt))
|
|
continue;
|
|
ccode = gimple_assign_rhs_code (stmt);
|
|
rhs1 = gimple_assign_rhs1 (stmt);
|
|
rhs2 = gimple_assign_rhs2 (stmt);
|
|
}
|
|
else
|
|
{
|
|
operand_entry *oe = (*ops)[ranges[i].idx];
|
|
stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
|
|
if (gimple_code (stmt) != GIMPLE_COND)
|
|
continue;
|
|
ccode = gimple_cond_code (stmt);
|
|
rhs1 = gimple_cond_lhs (stmt);
|
|
rhs2 = gimple_cond_rhs (stmt);
|
|
}
|
|
|
|
if (TREE_CODE (rhs1) != SSA_NAME
|
|
|| rhs2 == NULL_TREE
|
|
|| TREE_CODE (rhs2) != SSA_NAME)
|
|
continue;
|
|
|
|
switch (ccode)
|
|
{
|
|
case GT_EXPR:
|
|
case GE_EXPR:
|
|
case LT_EXPR:
|
|
case LE_EXPR:
|
|
break;
|
|
default:
|
|
continue;
|
|
}
|
|
if (in_p)
|
|
ccode = invert_tree_comparison (ccode, false);
|
|
switch (ccode)
|
|
{
|
|
case GT_EXPR:
|
|
case GE_EXPR:
|
|
std::swap (rhs1, rhs2);
|
|
ccode = swap_tree_comparison (ccode);
|
|
break;
|
|
case LT_EXPR:
|
|
case LE_EXPR:
|
|
break;
|
|
default:
|
|
gcc_unreachable ();
|
|
}
|
|
|
|
int *idx = map->get (rhs1);
|
|
if (idx == NULL)
|
|
continue;
|
|
|
|
/* maybe_optimize_range_tests allows statements without side-effects
|
|
in the basic blocks as long as they are consumed in the same bb.
|
|
Make sure rhs2's def stmt is not among them, otherwise we can't
|
|
use safely get_nonzero_bits on it. E.g. in:
|
|
# RANGE [-83, 1] NONZERO 173
|
|
# k_32 = PHI <k_47(13), k_12(9)>
|
|
...
|
|
if (k_32 >= 0)
|
|
goto <bb 5>; [26.46%]
|
|
else
|
|
goto <bb 9>; [73.54%]
|
|
|
|
<bb 5> [local count: 140323371]:
|
|
# RANGE [0, 1] NONZERO 1
|
|
_5 = (int) k_32;
|
|
# RANGE [0, 4] NONZERO 4
|
|
_21 = _5 << 2;
|
|
# RANGE [0, 4] NONZERO 4
|
|
iftmp.0_44 = (char) _21;
|
|
if (k_32 < iftmp.0_44)
|
|
goto <bb 6>; [84.48%]
|
|
else
|
|
goto <bb 9>; [15.52%]
|
|
the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
|
|
k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
|
|
to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
|
|
those stmts even for negative k_32 and the value ranges would be no
|
|
longer guaranteed and so the optimization would be invalid. */
|
|
if (opcode == ERROR_MARK)
|
|
{
|
|
gimple *g = SSA_NAME_DEF_STMT (rhs2);
|
|
basic_block bb2 = gimple_bb (g);
|
|
if (bb2
|
|
&& bb2 != first_bb
|
|
&& dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
|
|
{
|
|
/* As an exception, handle a few common cases. */
|
|
if (gimple_assign_cast_p (g)
|
|
&& INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g)))
|
|
&& TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (g)))
|
|
&& (TYPE_PRECISION (TREE_TYPE (rhs2))
|
|
> TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (g)))))
|
|
/* Zero-extension is always ok. */ ;
|
|
else if (is_gimple_assign (g)
|
|
&& gimple_assign_rhs_code (g) == BIT_AND_EXPR
|
|
&& TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
|
|
&& !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
|
|
/* Masking with INTEGER_CST with MSB clear is always ok
|
|
too. */ ;
|
|
else
|
|
continue;
|
|
}
|
|
}
|
|
|
|
wide_int nz = get_nonzero_bits (rhs2);
|
|
if (wi::neg_p (nz))
|
|
continue;
|
|
|
|
/* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
|
|
and RHS2 is known to be RHS2 >= 0. */
|
|
tree utype = unsigned_type_for (TREE_TYPE (rhs1));
|
|
|
|
enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
|
|
if ((ranges[*idx].strict_overflow_p
|
|
|| ranges[i].strict_overflow_p)
|
|
&& issue_strict_overflow_warning (wc))
|
|
warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
|
|
"assuming signed overflow does not occur "
|
|
"when simplifying range test");
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
struct range_entry *r = &ranges[*idx];
|
|
fprintf (dump_file, "Optimizing range test ");
|
|
print_generic_expr (dump_file, r->exp);
|
|
fprintf (dump_file, " +[");
|
|
print_generic_expr (dump_file, r->low);
|
|
fprintf (dump_file, ", ");
|
|
print_generic_expr (dump_file, r->high);
|
|
fprintf (dump_file, "] and comparison ");
|
|
print_generic_expr (dump_file, rhs1);
|
|
fprintf (dump_file, " %s ", op_symbol_code (ccode));
|
|
print_generic_expr (dump_file, rhs2);
|
|
fprintf (dump_file, "\n into (");
|
|
print_generic_expr (dump_file, utype);
|
|
fprintf (dump_file, ") ");
|
|
print_generic_expr (dump_file, rhs1);
|
|
fprintf (dump_file, " %s (", op_symbol_code (ccode));
|
|
print_generic_expr (dump_file, utype);
|
|
fprintf (dump_file, ") ");
|
|
print_generic_expr (dump_file, rhs2);
|
|
fprintf (dump_file, "\n");
|
|
}
|
|
|
|
operand_entry *oe = (*ops)[ranges[i].idx];
|
|
ranges[i].in_p = 0;
|
|
if (opcode == BIT_IOR_EXPR
|
|
|| (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
|
|
{
|
|
ranges[i].in_p = 1;
|
|
ccode = invert_tree_comparison (ccode, false);
|
|
}
|
|
|
|
unsigned int uid = gimple_uid (stmt);
|
|
gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
|
|
gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
|
|
gimple_set_uid (g, uid);
|
|
rhs1 = gimple_assign_lhs (g);
|
|
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
|
g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
|
|
gimple_set_uid (g, uid);
|
|
rhs2 = gimple_assign_lhs (g);
|
|
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
|
if (tree_swap_operands_p (rhs1, rhs2))
|
|
{
|
|
std::swap (rhs1, rhs2);
|
|
ccode = swap_tree_comparison (ccode);
|
|
}
|
|
if (gimple_code (stmt) == GIMPLE_COND)
|
|
{
|
|
gcond *c = as_a <gcond *> (stmt);
|
|
gimple_cond_set_code (c, ccode);
|
|
gimple_cond_set_lhs (c, rhs1);
|
|
gimple_cond_set_rhs (c, rhs2);
|
|
update_stmt (stmt);
|
|
}
|
|
else
|
|
{
|
|
tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
|
|
if (!INTEGRAL_TYPE_P (ctype)
|
|
|| (TREE_CODE (ctype) != BOOLEAN_TYPE
|
|
&& TYPE_PRECISION (ctype) != 1))
|
|
ctype = boolean_type_node;
|
|
g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
|
|
gimple_set_uid (g, uid);
|
|
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
|
if (oe->op && ctype != TREE_TYPE (oe->op))
|
|
{
|
|
g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
|
|
NOP_EXPR, gimple_assign_lhs (g));
|
|
gimple_set_uid (g, uid);
|
|
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
|
}
|
|
ranges[i].exp = gimple_assign_lhs (g);
|
|
oe->op = ranges[i].exp;
|
|
ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
|
|
ranges[i].high = ranges[i].low;
|
|
}
|
|
ranges[i].strict_overflow_p = false;
|
|
oe = (*ops)[ranges[*idx].idx];
|
|
/* Now change all the other range test immediate uses, so that
|
|
those tests will be optimized away. */
|
|
if (opcode == ERROR_MARK)
|
|
{
|
|
if (oe->op)
|
|
oe->op = build_int_cst (TREE_TYPE (oe->op),
|
|
oe->rank == BIT_IOR_EXPR ? 0 : 1);
|
|
else
|
|
oe->op = (oe->rank == BIT_IOR_EXPR
|
|
? boolean_false_node : boolean_true_node);
|
|
}
|
|
else
|
|
oe->op = error_mark_node;
|
|
ranges[*idx].exp = NULL_TREE;
|
|
ranges[*idx].low = NULL_TREE;
|
|
ranges[*idx].high = NULL_TREE;
|
|
any_changes = true;
|
|
}
|
|
|
|
delete map;
|
|
return any_changes;
|
|
}
|
|
|
|
/* Optimize range tests, similarly how fold_range_test optimizes
|
|
it on trees. The tree code for the binary
|
|
operation between all the operands is OPCODE.
|
|
If OPCODE is ERROR_MARK, optimize_range_tests is called from within
|
|
maybe_optimize_range_tests for inter-bb range optimization.
|
|
In that case if oe->op is NULL, oe->id is bb->index whose
|
|
GIMPLE_COND is && or ||ed into the test, and oe->rank says
|
|
the actual opcode.
|
|
FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
|
|
|
|
static bool
|
|
optimize_range_tests (enum tree_code opcode,
|
|
vec<operand_entry *> *ops, basic_block first_bb)
|
|
{
|
|
unsigned int length = ops->length (), i, j, first;
|
|
operand_entry *oe;
|
|
struct range_entry *ranges;
|
|
bool any_changes = false;
|
|
|
|
if (length == 1)
|
|
return false;
|
|
|
|
ranges = XNEWVEC (struct range_entry, length);
|
|
for (i = 0; i < length; i++)
|
|
{
|
|
oe = (*ops)[i];
|
|
ranges[i].idx = i;
|
|
init_range_entry (ranges + i, oe->op,
|
|
oe->op
|
|
? NULL
|
|
: last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
|
|
/* For | invert it now, we will invert it again before emitting
|
|
the optimized expression. */
|
|
if (opcode == BIT_IOR_EXPR
|
|
|| (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
|
|
ranges[i].in_p = !ranges[i].in_p;
|
|
}
|
|
|
|
qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
|
|
for (i = 0; i < length; i++)
|
|
if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
|
|
break;
|
|
|
|
/* Try to merge ranges. */
|
|
for (first = i; i < length; i++)
|
|
{
|
|
tree low = ranges[i].low;
|
|
tree high = ranges[i].high;
|
|
int in_p = ranges[i].in_p;
|
|
bool strict_overflow_p = ranges[i].strict_overflow_p;
|
|
int update_fail_count = 0;
|
|
|
|
for (j = i + 1; j < length; j++)
|
|
{
|
|
if (ranges[i].exp != ranges[j].exp)
|
|
break;
|
|
if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
|
|
ranges[j].in_p, ranges[j].low, ranges[j].high))
|
|
break;
|
|
strict_overflow_p |= ranges[j].strict_overflow_p;
|
|
}
|
|
|
|
if (j == i + 1)
|
|
continue;
|
|
|
|
if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
|
|
opcode, ops, ranges[i].exp, NULL, in_p,
|
|
low, high, strict_overflow_p))
|
|
{
|
|
i = j - 1;
|
|
any_changes = true;
|
|
}
|
|
/* Avoid quadratic complexity if all merge_ranges calls would succeed,
|
|
while update_range_test would fail. */
|
|
else if (update_fail_count == 64)
|
|
i = j - 1;
|
|
else
|
|
++update_fail_count;
|
|
}
|
|
|
|
any_changes |= optimize_range_tests_1 (opcode, first, length, true,
|
|
ops, ranges);
|
|
|
|
if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
|
|
any_changes |= optimize_range_tests_1 (opcode, first, length, false,
|
|
ops, ranges);
|
|
if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
|
|
any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
|
|
ops, ranges);
|
|
any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
|
|
ops, ranges);
|
|
any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
|
|
ranges, first_bb);
|
|
|
|
if (any_changes && opcode != ERROR_MARK)
|
|
{
|
|
j = 0;
|
|
FOR_EACH_VEC_ELT (*ops, i, oe)
|
|
{
|
|
if (oe->op == error_mark_node)
|
|
continue;
|
|
else if (i != j)
|
|
(*ops)[j] = oe;
|
|
j++;
|
|
}
|
|
ops->truncate (j);
|
|
}
|
|
|
|
XDELETEVEC (ranges);
|
|
return any_changes;
|
|
}
|
|
|
|
/* A subroutine of optimize_vec_cond_expr to extract and canonicalize
|
|
the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
|
|
otherwise the comparison code. */
|
|
|
|
static tree_code
|
|
ovce_extract_ops (tree var, gassign **rets, bool *reti)
|
|
{
|
|
if (TREE_CODE (var) != SSA_NAME)
|
|
return ERROR_MARK;
|
|
|
|
gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
|
|
if (stmt == NULL)
|
|
return ERROR_MARK;
|
|
|
|
/* ??? If we start creating more COND_EXPR, we could perform
|
|
this same optimization with them. For now, simplify. */
|
|
if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
|
|
return ERROR_MARK;
|
|
|
|
tree cond = gimple_assign_rhs1 (stmt);
|
|
tree_code cmp = TREE_CODE (cond);
|
|
if (TREE_CODE_CLASS (cmp) != tcc_comparison)
|
|
return ERROR_MARK;
|
|
|
|
/* ??? For now, allow only canonical true and false result vectors.
|
|
We could expand this to other constants should the need arise,
|
|
but at the moment we don't create them. */
|
|
tree t = gimple_assign_rhs2 (stmt);
|
|
tree f = gimple_assign_rhs3 (stmt);
|
|
bool inv;
|
|
if (integer_all_onesp (t))
|
|
inv = false;
|
|
else if (integer_all_onesp (f))
|
|
{
|
|
cmp = invert_tree_comparison (cmp, false);
|
|
inv = true;
|
|
}
|
|
else
|
|
return ERROR_MARK;
|
|
if (!integer_zerop (f))
|
|
return ERROR_MARK;
|
|
|
|
/* Success! */
|
|
if (rets)
|
|
*rets = stmt;
|
|
if (reti)
|
|
*reti = inv;
|
|
return cmp;
|
|
}
|
|
|
|
/* Optimize the condition of VEC_COND_EXPRs which have been combined
|
|
with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
|
|
|
|
static bool
|
|
optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
|
|
{
|
|
unsigned int length = ops->length (), i, j;
|
|
bool any_changes = false;
|
|
|
|
if (length == 1)
|
|
return false;
|
|
|
|
for (i = 0; i < length; ++i)
|
|
{
|
|
tree elt0 = (*ops)[i]->op;
|
|
|
|
gassign *stmt0;
|
|
bool invert;
|
|
tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
|
|
if (cmp0 == ERROR_MARK)
|
|
continue;
|
|
|
|
for (j = i + 1; j < length; ++j)
|
|
{
|
|
tree &elt1 = (*ops)[j]->op;
|
|
|
|
gassign *stmt1;
|
|
tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
|
|
if (cmp1 == ERROR_MARK)
|
|
continue;
|
|
|
|
tree cond0 = gimple_assign_rhs1 (stmt0);
|
|
tree x0 = TREE_OPERAND (cond0, 0);
|
|
tree y0 = TREE_OPERAND (cond0, 1);
|
|
|
|
tree cond1 = gimple_assign_rhs1 (stmt1);
|
|
tree x1 = TREE_OPERAND (cond1, 0);
|
|
tree y1 = TREE_OPERAND (cond1, 1);
|
|
|
|
tree comb;
|
|
if (opcode == BIT_AND_EXPR)
|
|
comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
|
|
else if (opcode == BIT_IOR_EXPR)
|
|
comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
|
|
else
|
|
gcc_unreachable ();
|
|
if (comb == NULL)
|
|
continue;
|
|
|
|
/* Success! */
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "Transforming ");
|
|
print_generic_expr (dump_file, cond0);
|
|
fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
|
|
print_generic_expr (dump_file, cond1);
|
|
fprintf (dump_file, " into ");
|
|
print_generic_expr (dump_file, comb);
|
|
fputc ('\n', dump_file);
|
|
}
|
|
|
|
gimple_assign_set_rhs1 (stmt0, comb);
|
|
if (invert)
|
|
std::swap (*gimple_assign_rhs2_ptr (stmt0),
|
|
*gimple_assign_rhs3_ptr (stmt0));
|
|
update_stmt (stmt0);
|
|
|
|
elt1 = error_mark_node;
|
|
any_changes = true;
|
|
}
|
|
}
|
|
|
|
if (any_changes)
|
|
{
|
|
operand_entry *oe;
|
|
j = 0;
|
|
FOR_EACH_VEC_ELT (*ops, i, oe)
|
|
{
|
|
if (oe->op == error_mark_node)
|
|
continue;
|
|
else if (i != j)
|
|
(*ops)[j] = oe;
|
|
j++;
|
|
}
|
|
ops->truncate (j);
|
|
}
|
|
|
|
return any_changes;
|
|
}
|
|
|
|
/* Return true if STMT is a cast like:
|
|
<bb N>:
|
|
...
|
|
_123 = (int) _234;
|
|
|
|
<bb M>:
|
|
# _345 = PHI <_123(N), 1(...), 1(...)>
|
|
where _234 has bool type, _123 has single use and
|
|
bb N has a single successor M. This is commonly used in
|
|
the last block of a range test.
|
|
|
|
Also Return true if STMT is tcc_compare like:
|
|
<bb N>:
|
|
...
|
|
_234 = a_2(D) == 2;
|
|
|
|
<bb M>:
|
|
# _345 = PHI <_234(N), 1(...), 1(...)>
|
|
_346 = (int) _345;
|
|
where _234 has booltype, single use and
|
|
bb N has a single successor M. This is commonly used in
|
|
the last block of a range test. */
|
|
|
|
static bool
|
|
final_range_test_p (gimple *stmt)
|
|
{
|
|
basic_block bb, rhs_bb, lhs_bb;
|
|
edge e;
|
|
tree lhs, rhs;
|
|
use_operand_p use_p;
|
|
gimple *use_stmt;
|
|
|
|
if (!gimple_assign_cast_p (stmt)
|
|
&& (!is_gimple_assign (stmt)
|
|
|| (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
|
|
!= tcc_comparison)))
|
|
return false;
|
|
bb = gimple_bb (stmt);
|
|
if (!single_succ_p (bb))
|
|
return false;
|
|
e = single_succ_edge (bb);
|
|
if (e->flags & EDGE_COMPLEX)
|
|
return false;
|
|
|
|
lhs = gimple_assign_lhs (stmt);
|
|
rhs = gimple_assign_rhs1 (stmt);
|
|
if (gimple_assign_cast_p (stmt)
|
|
&& (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
|
|
|| TREE_CODE (rhs) != SSA_NAME
|
|
|| TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
|
|
return false;
|
|
|
|
if (!gimple_assign_cast_p (stmt)
|
|
&& (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
|
|
return false;
|
|
|
|
/* Test whether lhs is consumed only by a PHI in the only successor bb. */
|
|
if (!single_imm_use (lhs, &use_p, &use_stmt))
|
|
return false;
|
|
|
|
if (gimple_code (use_stmt) != GIMPLE_PHI
|
|
|| gimple_bb (use_stmt) != e->dest)
|
|
return false;
|
|
|
|
/* And that the rhs is defined in the same loop. */
|
|
if (gimple_assign_cast_p (stmt))
|
|
{
|
|
if (TREE_CODE (rhs) != SSA_NAME
|
|
|| !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
|
|
|| !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
if (TREE_CODE (lhs) != SSA_NAME
|
|
|| !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
|
|
|| !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/* Return true if BB is suitable basic block for inter-bb range test
|
|
optimization. If BACKWARD is true, BB should be the only predecessor
|
|
of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
|
|
or compared with to find a common basic block to which all conditions
|
|
branch to if true resp. false. If BACKWARD is false, TEST_BB should
|
|
be the only predecessor of BB. */
|
|
|
|
static bool
|
|
suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
|
|
bool backward)
|
|
{
|
|
edge_iterator ei, ei2;
|
|
edge e, e2;
|
|
gimple *stmt;
|
|
gphi_iterator gsi;
|
|
bool other_edge_seen = false;
|
|
bool is_cond;
|
|
|
|
if (test_bb == bb)
|
|
return false;
|
|
/* Check last stmt first. */
|
|
stmt = last_stmt (bb);
|
|
if (stmt == NULL
|
|
|| (gimple_code (stmt) != GIMPLE_COND
|
|
&& (backward || !final_range_test_p (stmt)))
|
|
|| gimple_visited_p (stmt)
|
|
|| stmt_could_throw_p (stmt)
|
|
|| *other_bb == bb)
|
|
return false;
|
|
is_cond = gimple_code (stmt) == GIMPLE_COND;
|
|
if (is_cond)
|
|
{
|
|
/* If last stmt is GIMPLE_COND, verify that one of the succ edges
|
|
goes to the next bb (if BACKWARD, it is TEST_BB), and the other
|
|
to *OTHER_BB (if not set yet, try to find it out). */
|
|
if (EDGE_COUNT (bb->succs) != 2)
|
|
return false;
|
|
FOR_EACH_EDGE (e, ei, bb->succs)
|
|
{
|
|
if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
|
|
return false;
|
|
if (e->dest == test_bb)
|
|
{
|
|
if (backward)
|
|
continue;
|
|
else
|
|
return false;
|
|
}
|
|
if (e->dest == bb)
|
|
return false;
|
|
if (*other_bb == NULL)
|
|
{
|
|
FOR_EACH_EDGE (e2, ei2, test_bb->succs)
|
|
if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
|
|
return false;
|
|
else if (e->dest == e2->dest)
|
|
*other_bb = e->dest;
|
|
if (*other_bb == NULL)
|
|
return false;
|
|
}
|
|
if (e->dest == *other_bb)
|
|
other_edge_seen = true;
|
|
else if (backward)
|
|
return false;
|
|
}
|
|
if (*other_bb == NULL || !other_edge_seen)
|
|
return false;
|
|
}
|
|
else if (single_succ (bb) != *other_bb)
|
|
return false;
|
|
|
|
/* Now check all PHIs of *OTHER_BB. */
|
|
e = find_edge (bb, *other_bb);
|
|
e2 = find_edge (test_bb, *other_bb);
|
|
for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
{
|
|
gphi *phi = gsi.phi ();
|
|
/* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
|
|
corresponding to BB and TEST_BB predecessor must be the same. */
|
|
if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
|
|
gimple_phi_arg_def (phi, e2->dest_idx), 0))
|
|
{
|
|
/* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
|
|
one of the PHIs should have the lhs of the last stmt in
|
|
that block as PHI arg and that PHI should have 0 or 1
|
|
corresponding to it in all other range test basic blocks
|
|
considered. */
|
|
if (!is_cond)
|
|
{
|
|
if (gimple_phi_arg_def (phi, e->dest_idx)
|
|
== gimple_assign_lhs (stmt)
|
|
&& (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
|
|
|| integer_onep (gimple_phi_arg_def (phi,
|
|
e2->dest_idx))))
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
gimple *test_last = last_stmt (test_bb);
|
|
if (gimple_code (test_last) != GIMPLE_COND
|
|
&& gimple_phi_arg_def (phi, e2->dest_idx)
|
|
== gimple_assign_lhs (test_last)
|
|
&& (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
|
|
|| integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
|
|
continue;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/* Return true if BB doesn't have side-effects that would disallow
|
|
range test optimization, all SSA_NAMEs set in the bb are consumed
|
|
in the bb and there are no PHIs. */
|
|
|
|
static bool
|
|
no_side_effect_bb (basic_block bb)
|
|
{
|
|
gimple_stmt_iterator gsi;
|
|
gimple *last;
|
|
|
|
if (!gimple_seq_empty_p (phi_nodes (bb)))
|
|
return false;
|
|
last = last_stmt (bb);
|
|
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
{
|
|
gimple *stmt = gsi_stmt (gsi);
|
|
tree lhs;
|
|
imm_use_iterator imm_iter;
|
|
use_operand_p use_p;
|
|
|
|
if (is_gimple_debug (stmt))
|
|
continue;
|
|
if (gimple_has_side_effects (stmt))
|
|
return false;
|
|
if (stmt == last)
|
|
return true;
|
|
if (!is_gimple_assign (stmt))
|
|
return false;
|
|
lhs = gimple_assign_lhs (stmt);
|
|
if (TREE_CODE (lhs) != SSA_NAME)
|
|
return false;
|
|
if (gimple_assign_rhs_could_trap_p (stmt))
|
|
return false;
|
|
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
|
|
{
|
|
gimple *use_stmt = USE_STMT (use_p);
|
|
if (is_gimple_debug (use_stmt))
|
|
continue;
|
|
if (gimple_bb (use_stmt) != bb)
|
|
return false;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
|
|
return true and fill in *OPS recursively. */
|
|
|
|
static bool
|
|
get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
|
|
struct loop *loop)
|
|
{
|
|
gimple *stmt = SSA_NAME_DEF_STMT (var);
|
|
tree rhs[2];
|
|
int i;
|
|
|
|
if (!is_reassociable_op (stmt, code, loop))
|
|
return false;
|
|
|
|
rhs[0] = gimple_assign_rhs1 (stmt);
|
|
rhs[1] = gimple_assign_rhs2 (stmt);
|
|
gimple_set_visited (stmt, true);
|
|
for (i = 0; i < 2; i++)
|
|
if (TREE_CODE (rhs[i]) == SSA_NAME
|
|
&& !get_ops (rhs[i], code, ops, loop)
|
|
&& has_single_use (rhs[i]))
|
|
{
|
|
operand_entry *oe = operand_entry_pool.allocate ();
|
|
|
|
oe->op = rhs[i];
|
|
oe->rank = code;
|
|
oe->id = 0;
|
|
oe->count = 1;
|
|
oe->stmt_to_insert = NULL;
|
|
ops->safe_push (oe);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/* Find the ops that were added by get_ops starting from VAR, see if
|
|
they were changed during update_range_test and if yes, create new
|
|
stmts. */
|
|
|
|
static tree
|
|
update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
|
|
unsigned int *pidx, struct loop *loop)
|
|
{
|
|
gimple *stmt = SSA_NAME_DEF_STMT (var);
|
|
tree rhs[4];
|
|
int i;
|
|
|
|
if (!is_reassociable_op (stmt, code, loop))
|
|
return NULL;
|
|
|
|
rhs[0] = gimple_assign_rhs1 (stmt);
|
|
rhs[1] = gimple_assign_rhs2 (stmt);
|
|
rhs[2] = rhs[0];
|
|
rhs[3] = rhs[1];
|
|
for (i = 0; i < 2; i++)
|
|
if (TREE_CODE (rhs[i]) == SSA_NAME)
|
|
{
|
|
rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
|
|
if (rhs[2 + i] == NULL_TREE)
|
|
{
|
|
if (has_single_use (rhs[i]))
|
|
rhs[2 + i] = ops[(*pidx)++]->op;
|
|
else
|
|
rhs[2 + i] = rhs[i];
|
|
}
|
|
}
|
|
if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
|
|
&& (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
|
|
{
|
|
gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
|
|
var = make_ssa_name (TREE_TYPE (var));
|
|
gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
|
|
rhs[2], rhs[3]);
|
|
gimple_set_uid (g, gimple_uid (stmt));
|
|
gimple_set_visited (g, true);
|
|
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
|
}
|
|
return var;
|
|
}
|
|
|
|
/* Structure to track the initial value passed to get_ops and
|
|
the range in the ops vector for each basic block. */
|
|
|
|
struct inter_bb_range_test_entry
|
|
{
|
|
tree op;
|
|
unsigned int first_idx, last_idx;
|
|
};
|
|
|
|
/* Inter-bb range test optimization.
|
|
|
|
Returns TRUE if a gimple conditional is optimized to a true/false,
|
|
otherwise return FALSE.
|
|
|
|
This indicates to the caller that it should run a CFG cleanup pass
|
|
once reassociation is completed. */
|
|
|
|
static bool
|
|
maybe_optimize_range_tests (gimple *stmt)
|
|
{
|
|
basic_block first_bb = gimple_bb (stmt);
|
|
basic_block last_bb = first_bb;
|
|
basic_block other_bb = NULL;
|
|
basic_block bb;
|
|
edge_iterator ei;
|
|
edge e;
|
|
auto_vec<operand_entry *> ops;
|
|
auto_vec<inter_bb_range_test_entry> bbinfo;
|
|
bool any_changes = false;
|
|
bool cfg_cleanup_needed = false;
|
|
|
|
/* Consider only basic blocks that end with GIMPLE_COND or
|
|
a cast statement satisfying final_range_test_p. All
|
|
but the last bb in the first_bb .. last_bb range
|
|
should end with GIMPLE_COND. */
|
|
if (gimple_code (stmt) == GIMPLE_COND)
|
|
{
|
|
if (EDGE_COUNT (first_bb->succs) != 2)
|
|
return cfg_cleanup_needed;
|
|
}
|
|
else if (final_range_test_p (stmt))
|
|
other_bb = single_succ (first_bb);
|
|
else
|
|
return cfg_cleanup_needed;
|
|
|
|
if (stmt_could_throw_p (stmt))
|
|
return cfg_cleanup_needed;
|
|
|
|
/* As relative ordering of post-dominator sons isn't fixed,
|
|
maybe_optimize_range_tests can be called first on any
|
|
bb in the range we want to optimize. So, start searching
|
|
backwards, if first_bb can be set to a predecessor. */
|
|
while (single_pred_p (first_bb))
|
|
{
|
|
basic_block pred_bb = single_pred (first_bb);
|
|
if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
|
|
break;
|
|
if (!no_side_effect_bb (first_bb))
|
|
break;
|
|
first_bb = pred_bb;
|
|
}
|
|
/* If first_bb is last_bb, other_bb hasn't been computed yet.
|
|
Before starting forward search in last_bb successors, find
|
|
out the other_bb. */
|
|
if (first_bb == last_bb)
|
|
{
|
|
other_bb = NULL;
|
|
/* As non-GIMPLE_COND last stmt always terminates the range,
|
|
if forward search didn't discover anything, just give up. */
|
|
if (gimple_code (stmt) != GIMPLE_COND)
|
|
return cfg_cleanup_needed;
|
|
/* Look at both successors. Either it ends with a GIMPLE_COND
|
|
and satisfies suitable_cond_bb, or ends with a cast and
|
|
other_bb is that cast's successor. */
|
|
FOR_EACH_EDGE (e, ei, first_bb->succs)
|
|
if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
|
|
|| e->dest == first_bb)
|
|
return cfg_cleanup_needed;
|
|
else if (single_pred_p (e->dest))
|
|
{
|
|
stmt = last_stmt (e->dest);
|
|
if (stmt
|
|
&& gimple_code (stmt) == GIMPLE_COND
|
|
&& EDGE_COUNT (e->dest->succs) == 2)
|
|
{
|
|
if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
|
|
break;
|
|
else
|
|
other_bb = NULL;
|
|
}
|
|
else if (stmt
|
|
&& final_range_test_p (stmt)
|
|
&& find_edge (first_bb, single_succ (e->dest)))
|
|
{
|
|
other_bb = single_succ (e->dest);
|
|
if (other_bb == first_bb)
|
|
other_bb = NULL;
|
|
}
|
|
}
|
|
if (other_bb == NULL)
|
|
return cfg_cleanup_needed;
|
|
}
|
|
/* Now do the forward search, moving last_bb to successor bbs
|
|
that aren't other_bb. */
|
|
while (EDGE_COUNT (last_bb->succs) == 2)
|
|
{
|
|
FOR_EACH_EDGE (e, ei, last_bb->succs)
|
|
if (e->dest != other_bb)
|
|
break;
|
|
if (e == NULL)
|
|
break;
|
|
if (!single_pred_p (e->dest))
|
|
break;
|
|
if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
|
|
break;
|
|
if (!no_side_effect_bb (e->dest))
|
|
break;
|
|
last_bb = e->dest;
|
|
}
|
|
if (first_bb == last_bb)
|
|
return cfg_cleanup_needed;
|
|
/* Here basic blocks first_bb through last_bb's predecessor
|
|
end with GIMPLE_COND, all of them have one of the edges to
|
|
other_bb and another to another block in the range,
|
|
all blocks except first_bb don't have side-effects and
|
|
last_bb ends with either GIMPLE_COND, or cast satisfying
|
|
final_range_test_p. */
|
|
for (bb = last_bb; ; bb = single_pred (bb))
|
|
{
|
|
enum tree_code code;
|
|
tree lhs, rhs;
|
|
inter_bb_range_test_entry bb_ent;
|
|
|
|
bb_ent.op = NULL_TREE;
|
|
bb_ent.first_idx = ops.length ();
|
|
bb_ent.last_idx = bb_ent.first_idx;
|
|
e = find_edge (bb, other_bb);
|
|
stmt = last_stmt (bb);
|
|
gimple_set_visited (stmt, true);
|
|
if (gimple_code (stmt) != GIMPLE_COND)
|
|
{
|
|
use_operand_p use_p;
|
|
gimple *phi;
|
|
edge e2;
|
|
unsigned int d;
|
|
|
|
lhs = gimple_assign_lhs (stmt);
|
|
rhs = gimple_assign_rhs1 (stmt);
|
|
gcc_assert (bb == last_bb);
|
|
|
|
/* stmt is
|
|
_123 = (int) _234;
|
|
OR
|
|
_234 = a_2(D) == 2;
|
|
|
|
followed by:
|
|
<bb M>:
|
|
# _345 = PHI <_123(N), 1(...), 1(...)>
|
|
|
|
or 0 instead of 1. If it is 0, the _234
|
|
range test is anded together with all the
|
|
other range tests, if it is 1, it is ored with
|
|
them. */
|
|
single_imm_use (lhs, &use_p, &phi);
|
|
gcc_assert (gimple_code (phi) == GIMPLE_PHI);
|
|
e2 = find_edge (first_bb, other_bb);
|
|
d = e2->dest_idx;
|
|
gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
|
|
if (integer_zerop (gimple_phi_arg_def (phi, d)))
|
|
code = BIT_AND_EXPR;
|
|
else
|
|
{
|
|
gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
|
|
code = BIT_IOR_EXPR;
|
|
}
|
|
|
|
/* If _234 SSA_NAME_DEF_STMT is
|
|
_234 = _567 | _789;
|
|
(or &, corresponding to 1/0 in the phi arguments,
|
|
push into ops the individual range test arguments
|
|
of the bitwise or resp. and, recursively. */
|
|
if (TREE_CODE (rhs) == SSA_NAME
|
|
&& (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
|
|
!= tcc_comparison)
|
|
&& !get_ops (rhs, code, &ops,
|
|
loop_containing_stmt (stmt))
|
|
&& has_single_use (rhs))
|
|
{
|
|
/* Otherwise, push the _234 range test itself. */
|
|
operand_entry *oe = operand_entry_pool.allocate ();
|
|
|
|
oe->op = rhs;
|
|
oe->rank = code;
|
|
oe->id = 0;
|
|
oe->count = 1;
|
|
oe->stmt_to_insert = NULL;
|
|
ops.safe_push (oe);
|
|
bb_ent.last_idx++;
|
|
bb_ent.op = rhs;
|
|
}
|
|
else if (is_gimple_assign (stmt)
|
|
&& (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
|
|
== tcc_comparison)
|
|
&& !get_ops (lhs, code, &ops,
|
|
loop_containing_stmt (stmt))
|
|
&& has_single_use (lhs))
|
|
{
|
|
operand_entry *oe = operand_entry_pool.allocate ();
|
|
oe->op = lhs;
|
|
oe->rank = code;
|
|
oe->id = 0;
|
|
oe->count = 1;
|
|
ops.safe_push (oe);
|
|
bb_ent.last_idx++;
|
|
bb_ent.op = lhs;
|
|
}
|
|
else
|
|
{
|
|
bb_ent.last_idx = ops.length ();
|
|
bb_ent.op = rhs;
|
|
}
|
|
bbinfo.safe_push (bb_ent);
|
|
continue;
|
|
}
|
|
/* Otherwise stmt is GIMPLE_COND. */
|
|
code = gimple_cond_code (stmt);
|
|
lhs = gimple_cond_lhs (stmt);
|
|
rhs = gimple_cond_rhs (stmt);
|
|
if (TREE_CODE (lhs) == SSA_NAME
|
|
&& INTEGRAL_TYPE_P (TREE_TYPE (lhs))
|
|
&& ((code != EQ_EXPR && code != NE_EXPR)
|
|
|| rhs != boolean_false_node
|
|
/* Either push into ops the individual bitwise
|
|
or resp. and operands, depending on which
|
|
edge is other_bb. */
|
|
|| !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
|
|
^ (code == EQ_EXPR))
|
|
? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
|
|
loop_containing_stmt (stmt))))
|
|
{
|
|
/* Or push the GIMPLE_COND stmt itself. */
|
|
operand_entry *oe = operand_entry_pool.allocate ();
|
|
|
|
oe->op = NULL;
|
|
oe->rank = (e->flags & EDGE_TRUE_VALUE)
|
|
? BIT_IOR_EXPR : BIT_AND_EXPR;
|
|
/* oe->op = NULL signs that there is no SSA_NAME
|
|
for the range test, and oe->id instead is the
|
|
basic block number, at which's end the GIMPLE_COND
|
|
is. */
|
|
oe->id = bb->index;
|
|
oe->count = 1;
|
|
oe->stmt_to_insert = NULL;
|
|
ops.safe_push (oe);
|
|
bb_ent.op = NULL;
|
|
bb_ent.last_idx++;
|
|
}
|
|
else if (ops.length () > bb_ent.first_idx)
|
|
{
|
|
bb_ent.op = lhs;
|
|
bb_ent.last_idx = ops.length ();
|
|
}
|
|
bbinfo.safe_push (bb_ent);
|
|
if (bb == first_bb)
|
|
break;
|
|
}
|
|
if (ops.length () > 1)
|
|
any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
|
|
if (any_changes)
|
|
{
|
|
unsigned int idx, max_idx = 0;
|
|
/* update_ops relies on has_single_use predicates returning the
|
|
same values as it did during get_ops earlier. Additionally it
|
|
never removes statements, only adds new ones and it should walk
|
|
from the single imm use and check the predicate already before
|
|
making those changes.
|
|
On the other side, the handling of GIMPLE_COND directly can turn
|
|
previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
|
|
it needs to be done in a separate loop afterwards. */
|
|
for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
|
|
{
|
|
if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
|
|
&& bbinfo[idx].op != NULL_TREE)
|
|
{
|
|
tree new_op;
|
|
|
|
max_idx = idx;
|
|
stmt = last_stmt (bb);
|
|
new_op = update_ops (bbinfo[idx].op,
|
|
(enum tree_code)
|
|
ops[bbinfo[idx].first_idx]->rank,
|
|
ops, &bbinfo[idx].first_idx,
|
|
loop_containing_stmt (stmt));
|
|
if (new_op == NULL_TREE)
|
|
{
|
|
gcc_assert (bb == last_bb);
|
|
new_op = ops[bbinfo[idx].first_idx++]->op;
|
|
}
|
|
if (bbinfo[idx].op != new_op)
|
|
{
|
|
imm_use_iterator iter;
|
|
use_operand_p use_p;
|
|
gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
|
|
|
|
FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
|
|
if (is_gimple_debug (use_stmt))
|
|
continue;
|
|
else if (gimple_code (use_stmt) == GIMPLE_COND
|
|
|| gimple_code (use_stmt) == GIMPLE_PHI)
|
|
FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
|
|
SET_USE (use_p, new_op);
|
|
else if ((is_gimple_assign (use_stmt)
|
|
&& (TREE_CODE_CLASS
|
|
(gimple_assign_rhs_code (use_stmt))
|
|
== tcc_comparison)))
|
|
cast_or_tcc_cmp_stmt = use_stmt;
|
|
else if (gimple_assign_cast_p (use_stmt))
|
|
cast_or_tcc_cmp_stmt = use_stmt;
|
|
else
|
|
gcc_unreachable ();
|
|
|
|
if (cast_or_tcc_cmp_stmt)
|
|
{
|
|
gcc_assert (bb == last_bb);
|
|
tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
|
|
tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
|
|
enum tree_code rhs_code
|
|
= gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
|
|
? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
|
|
: CONVERT_EXPR;
|
|
gassign *g;
|
|
if (is_gimple_min_invariant (new_op))
|
|
{
|
|
new_op = fold_convert (TREE_TYPE (lhs), new_op);
|
|
g = gimple_build_assign (new_lhs, new_op);
|
|
}
|
|
else
|
|
g = gimple_build_assign (new_lhs, rhs_code, new_op);
|
|
gimple_stmt_iterator gsi
|
|
= gsi_for_stmt (cast_or_tcc_cmp_stmt);
|
|
gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
|
|
gimple_set_visited (g, true);
|
|
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
|
FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
|
|
if (is_gimple_debug (use_stmt))
|
|
continue;
|
|
else if (gimple_code (use_stmt) == GIMPLE_COND
|
|
|| gimple_code (use_stmt) == GIMPLE_PHI)
|
|
FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
|
|
SET_USE (use_p, new_lhs);
|
|
else
|
|
gcc_unreachable ();
|
|
}
|
|
}
|
|
}
|
|
if (bb == first_bb)
|
|
break;
|
|
}
|
|
for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
|
|
{
|
|
if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
|
|
&& bbinfo[idx].op == NULL_TREE
|
|
&& ops[bbinfo[idx].first_idx]->op != NULL_TREE)
|
|
{
|
|
gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
|
|
|
|
if (idx > max_idx)
|
|
max_idx = idx;
|
|
|
|
/* If we collapse the conditional to a true/false
|
|
condition, then bubble that knowledge up to our caller. */
|
|
if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
|
|
{
|
|
gimple_cond_make_false (cond_stmt);
|
|
cfg_cleanup_needed = true;
|
|
}
|
|
else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
|
|
{
|
|
gimple_cond_make_true (cond_stmt);
|
|
cfg_cleanup_needed = true;
|
|
}
|
|
else
|
|
{
|
|
gimple_cond_set_code (cond_stmt, NE_EXPR);
|
|
gimple_cond_set_lhs (cond_stmt,
|
|
ops[bbinfo[idx].first_idx]->op);
|
|
gimple_cond_set_rhs (cond_stmt, boolean_false_node);
|
|
}
|
|
update_stmt (cond_stmt);
|
|
}
|
|
if (bb == first_bb)
|
|
break;
|
|
}
|
|
|
|
/* The above changes could result in basic blocks after the first
|
|
modified one, up to and including last_bb, to be executed even if
|
|
they would not be in the original program. If the value ranges of
|
|
assignment lhs' in those bbs were dependent on the conditions
|
|
guarding those basic blocks which now can change, the VRs might
|
|
be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
|
|
are only used within the same bb, it should be not a big deal if
|
|
we just reset all the VRs in those bbs. See PR68671. */
|
|
for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
|
|
reset_flow_sensitive_info_in_bb (bb);
|
|
}
|
|
return cfg_cleanup_needed;
|
|
}
|
|
|
|
/* Return true if OPERAND is defined by a PHI node which uses the LHS
|
|
of STMT in it's operands. This is also known as a "destructive
|
|
update" operation. */
|
|
|
|
static bool
|
|
is_phi_for_stmt (gimple *stmt, tree operand)
|
|
{
|
|
gimple *def_stmt;
|
|
gphi *def_phi;
|
|
tree lhs;
|
|
use_operand_p arg_p;
|
|
ssa_op_iter i;
|
|
|
|
if (TREE_CODE (operand) != SSA_NAME)
|
|
return false;
|
|
|
|
lhs = gimple_assign_lhs (stmt);
|
|
|
|
def_stmt = SSA_NAME_DEF_STMT (operand);
|
|
def_phi = dyn_cast <gphi *> (def_stmt);
|
|
if (!def_phi)
|
|
return false;
|
|
|
|
FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
|
|
if (lhs == USE_FROM_PTR (arg_p))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
/* Remove def stmt of VAR if VAR has zero uses and recurse
|
|
on rhs1 operand if so. */
|
|
|
|
static void
|
|
remove_visited_stmt_chain (tree var)
|
|
{
|
|
gimple *stmt;
|
|
gimple_stmt_iterator gsi;
|
|
|
|
while (1)
|
|
{
|
|
if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
|
|
return;
|
|
stmt = SSA_NAME_DEF_STMT (var);
|
|
if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
|
|
{
|
|
var = gimple_assign_rhs1 (stmt);
|
|
gsi = gsi_for_stmt (stmt);
|
|
reassoc_remove_stmt (&gsi);
|
|
release_defs (stmt);
|
|
}
|
|
else
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* This function checks three consequtive operands in
|
|
passed operands vector OPS starting from OPINDEX and
|
|
swaps two operands if it is profitable for binary operation
|
|
consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
|
|
|
|
We pair ops with the same rank if possible.
|
|
|
|
The alternative we try is to see if STMT is a destructive
|
|
update style statement, which is like:
|
|
b = phi (a, ...)
|
|
a = c + b;
|
|
In that case, we want to use the destructive update form to
|
|
expose the possible vectorizer sum reduction opportunity.
|
|
In that case, the third operand will be the phi node. This
|
|
check is not performed if STMT is null.
|
|
|
|
We could, of course, try to be better as noted above, and do a
|
|
lot of work to try to find these opportunities in >3 operand
|
|
cases, but it is unlikely to be worth it. */
|
|
|
|
static void
|
|
swap_ops_for_binary_stmt (vec<operand_entry *> ops,
|
|
unsigned int opindex, gimple *stmt)
|
|
{
|
|
operand_entry *oe1, *oe2, *oe3;
|
|
|
|
oe1 = ops[opindex];
|
|
oe2 = ops[opindex + 1];
|
|
oe3 = ops[opindex + 2];
|
|
|
|
if ((oe1->rank == oe2->rank
|
|
&& oe2->rank != oe3->rank)
|
|
|| (stmt && is_phi_for_stmt (stmt, oe3->op)
|
|
&& !is_phi_for_stmt (stmt, oe1->op)
|
|
&& !is_phi_for_stmt (stmt, oe2->op)))
|
|
std::swap (*oe1, *oe3);
|
|
else if ((oe1->rank == oe3->rank
|
|
&& oe2->rank != oe3->rank)
|
|
|| (stmt && is_phi_for_stmt (stmt, oe2->op)
|
|
&& !is_phi_for_stmt (stmt, oe1->op)
|
|
&& !is_phi_for_stmt (stmt, oe3->op)))
|
|
std::swap (*oe1, *oe2);
|
|
}
|
|
|
|
/* If definition of RHS1 or RHS2 dominates STMT, return the later of those
|
|
two definitions, otherwise return STMT. */
|
|
|
|
static inline gimple *
|
|
find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
|
|
{
|
|
if (TREE_CODE (rhs1) == SSA_NAME
|
|
&& reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
|
|
stmt = SSA_NAME_DEF_STMT (rhs1);
|
|
if (TREE_CODE (rhs2) == SSA_NAME
|
|
&& reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
|
|
stmt = SSA_NAME_DEF_STMT (rhs2);
|
|
return stmt;
|
|
}
|
|
|
|
/* If the stmt that defines operand has to be inserted, insert it
|
|
before the use. */
|
|
static void
|
|
insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
|
|
{
|
|
gcc_assert (is_gimple_assign (stmt_to_insert));
|
|
tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
|
|
tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
|
|
gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
|
|
gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
|
|
gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
|
|
|
|
/* If the insert point is not stmt, then insert_point would be
|
|
the point where operand rhs1 or rhs2 is defined. In this case,
|
|
stmt_to_insert has to be inserted afterwards. This would
|
|
only happen when the stmt insertion point is flexible. */
|
|
if (stmt == insert_point)
|
|
gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
|
|
else
|
|
insert_stmt_after (stmt_to_insert, insert_point);
|
|
}
|
|
|
|
|
|
/* Recursively rewrite our linearized statements so that the operators
|
|
match those in OPS[OPINDEX], putting the computation in rank
|
|
order. Return new lhs.
|
|
CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
|
|
the current stmt and during recursive invocations.
|
|
NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
|
|
recursive invocations. */
|
|
|
|
static tree
|
|
rewrite_expr_tree (gimple *stmt, unsigned int opindex,
|
|
vec<operand_entry *> ops, bool changed, bool next_changed)
|
|
{
|
|
tree rhs1 = gimple_assign_rhs1 (stmt);
|
|
tree rhs2 = gimple_assign_rhs2 (stmt);
|
|
tree lhs = gimple_assign_lhs (stmt);
|
|
operand_entry *oe;
|
|
|
|
/* The final recursion case for this function is that you have
|
|
exactly two operations left.
|
|
If we had exactly one op in the entire list to start with, we
|
|
would have never called this function, and the tail recursion
|
|
rewrites them one at a time. */
|
|
if (opindex + 2 == ops.length ())
|
|
{
|
|
operand_entry *oe1, *oe2;
|
|
|
|
oe1 = ops[opindex];
|
|
oe2 = ops[opindex + 1];
|
|
|
|
if (rhs1 != oe1->op || rhs2 != oe2->op)
|
|
{
|
|
gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
|
|
unsigned int uid = gimple_uid (stmt);
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "Transforming ");
|
|
print_gimple_stmt (dump_file, stmt, 0);
|
|
}
|
|
|
|
/* If the stmt that defines operand has to be inserted, insert it
|
|
before the use. */
|
|
if (oe1->stmt_to_insert)
|
|
insert_stmt_before_use (stmt, oe1->stmt_to_insert);
|
|
if (oe2->stmt_to_insert)
|
|
insert_stmt_before_use (stmt, oe2->stmt_to_insert);
|
|
/* Even when changed is false, reassociation could have e.g. removed
|
|
some redundant operations, so unless we are just swapping the
|
|
arguments or unless there is no change at all (then we just
|
|
return lhs), force creation of a new SSA_NAME. */
|
|
if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
|
|
{
|
|
gimple *insert_point
|
|
= find_insert_point (stmt, oe1->op, oe2->op);
|
|
lhs = make_ssa_name (TREE_TYPE (lhs));
|
|
stmt
|
|
= gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
|
|
oe1->op, oe2->op);
|
|
gimple_set_uid (stmt, uid);
|
|
gimple_set_visited (stmt, true);
|
|
if (insert_point == gsi_stmt (gsi))
|
|
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
|
|
else
|
|
insert_stmt_after (stmt, insert_point);
|
|
}
|
|
else
|
|
{
|
|
gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
|
|
== stmt);
|
|
gimple_assign_set_rhs1 (stmt, oe1->op);
|
|
gimple_assign_set_rhs2 (stmt, oe2->op);
|
|
update_stmt (stmt);
|
|
}
|
|
|
|
if (rhs1 != oe1->op && rhs1 != oe2->op)
|
|
remove_visited_stmt_chain (rhs1);
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, " into ");
|
|
print_gimple_stmt (dump_file, stmt, 0);
|
|
}
|
|
}
|
|
return lhs;
|
|
}
|
|
|
|
/* If we hit here, we should have 3 or more ops left. */
|
|
gcc_assert (opindex + 2 < ops.length ());
|
|
|
|
/* Rewrite the next operator. */
|
|
oe = ops[opindex];
|
|
|
|
/* If the stmt that defines operand has to be inserted, insert it
|
|
before the use. */
|
|
if (oe->stmt_to_insert)
|
|
insert_stmt_before_use (stmt, oe->stmt_to_insert);
|
|
|
|
/* Recurse on the LHS of the binary operator, which is guaranteed to
|
|
be the non-leaf side. */
|
|
tree new_rhs1
|
|
= rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
|
|
changed || oe->op != rhs2 || next_changed,
|
|
false);
|
|
|
|
if (oe->op != rhs2 || new_rhs1 != rhs1)
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "Transforming ");
|
|
print_gimple_stmt (dump_file, stmt, 0);
|
|
}
|
|
|
|
/* If changed is false, this is either opindex == 0
|
|
or all outer rhs2's were equal to corresponding oe->op,
|
|
and powi_result is NULL.
|
|
That means lhs is equivalent before and after reassociation.
|
|
Otherwise ensure the old lhs SSA_NAME is not reused and
|
|
create a new stmt as well, so that any debug stmts will be
|
|
properly adjusted. */
|
|
if (changed)
|
|
{
|
|
gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
|
|
unsigned int uid = gimple_uid (stmt);
|
|
gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
|
|
|
|
lhs = make_ssa_name (TREE_TYPE (lhs));
|
|
stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
|
|
new_rhs1, oe->op);
|
|
gimple_set_uid (stmt, uid);
|
|
gimple_set_visited (stmt, true);
|
|
if (insert_point == gsi_stmt (gsi))
|
|
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
|
|
else
|
|
insert_stmt_after (stmt, insert_point);
|
|
}
|
|
else
|
|
{
|
|
gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
|
|
== stmt);
|
|
gimple_assign_set_rhs1 (stmt, new_rhs1);
|
|
gimple_assign_set_rhs2 (stmt, oe->op);
|
|
update_stmt (stmt);
|
|
}
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, " into ");
|
|
print_gimple_stmt (dump_file, stmt, 0);
|
|
}
|
|
}
|
|
return lhs;
|
|
}
|
|
|
|
/* Find out how many cycles we need to compute statements chain.
|
|
OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
|
|
maximum number of independent statements we may execute per cycle. */
|
|
|
|
static int
|
|
get_required_cycles (int ops_num, int cpu_width)
|
|
{
|
|
int res;
|
|
int elog;
|
|
unsigned int rest;
|
|
|
|
/* While we have more than 2 * cpu_width operands
|
|
we may reduce number of operands by cpu_width
|
|
per cycle. */
|
|
res = ops_num / (2 * cpu_width);
|
|
|
|
/* Remained operands count may be reduced twice per cycle
|
|
until we have only one operand. */
|
|
rest = (unsigned)(ops_num - res * cpu_width);
|
|
elog = exact_log2 (rest);
|
|
if (elog >= 0)
|
|
res += elog;
|
|
else
|
|
res += floor_log2 (rest) + 1;
|
|
|
|
return res;
|
|
}
|
|
|
|
/* Returns an optimal number of registers to use for computation of
|
|
given statements. */
|
|
|
|
static int
|
|
get_reassociation_width (int ops_num, enum tree_code opc,
|
|
machine_mode mode)
|
|
{
|
|
int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
|
|
int width;
|
|
int width_min;
|
|
int cycles_best;
|
|
|
|
if (param_width > 0)
|
|
width = param_width;
|
|
else
|
|
width = targetm.sched.reassociation_width (opc, mode);
|
|
|
|
if (width == 1)
|
|
return width;
|
|
|
|
/* Get the minimal time required for sequence computation. */
|
|
cycles_best = get_required_cycles (ops_num, width);
|
|
|
|
/* Check if we may use less width and still compute sequence for
|
|
the same time. It will allow us to reduce registers usage.
|
|
get_required_cycles is monotonically increasing with lower width
|
|
so we can perform a binary search for the minimal width that still
|
|
results in the optimal cycle count. */
|
|
width_min = 1;
|
|
while (width > width_min)
|
|
{
|
|
int width_mid = (width + width_min) / 2;
|
|
|
|
if (get_required_cycles (ops_num, width_mid) == cycles_best)
|
|
width = width_mid;
|
|
else if (width_min < width_mid)
|
|
width_min = width_mid;
|
|
else
|
|
break;
|
|
}
|
|
|
|
return width;
|
|
}
|
|
|
|
/* Recursively rewrite our linearized statements so that the operators
|
|
match those in OPS[OPINDEX], putting the computation in rank
|
|
order and trying to allow operations to be executed in
|
|
parallel. */
|
|
|
|
static void
|
|
rewrite_expr_tree_parallel (gassign *stmt, int width,
|
|
vec<operand_entry *> ops)
|
|
{
|
|
enum tree_code opcode = gimple_assign_rhs_code (stmt);
|
|
int op_num = ops.length ();
|
|
gcc_assert (op_num > 0);
|
|
int stmt_num = op_num - 1;
|
|
gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
|
|
int op_index = op_num - 1;
|
|
int stmt_index = 0;
|
|
int ready_stmts_end = 0;
|
|
int i = 0;
|
|
gimple *stmt1 = NULL, *stmt2 = NULL;
|
|
tree last_rhs1 = gimple_assign_rhs1 (stmt);
|
|
|
|
/* We start expression rewriting from the top statements.
|
|
So, in this loop we create a full list of statements
|
|
we will work with. */
|
|
stmts[stmt_num - 1] = stmt;
|
|
for (i = stmt_num - 2; i >= 0; i--)
|
|
stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
|
|
|
|
for (i = 0; i < stmt_num; i++)
|
|
{
|
|
tree op1, op2;
|
|
|
|
/* Determine whether we should use results of
|
|
already handled statements or not. */
|
|
if (ready_stmts_end == 0
|
|
&& (i - stmt_index >= width || op_index < 1))
|
|
ready_stmts_end = i;
|
|
|
|
/* Now we choose operands for the next statement. Non zero
|
|
value in ready_stmts_end means here that we should use
|
|
the result of already generated statements as new operand. */
|
|
if (ready_stmts_end > 0)
|
|
{
|
|
op1 = gimple_assign_lhs (stmts[stmt_index++]);
|
|
if (ready_stmts_end > stmt_index)
|
|
op2 = gimple_assign_lhs (stmts[stmt_index++]);
|
|
else if (op_index >= 0)
|
|
{
|
|
operand_entry *oe = ops[op_index--];
|
|
stmt2 = oe->stmt_to_insert;
|
|
op2 = oe->op;
|
|
}
|
|
else
|
|
{
|
|
gcc_assert (stmt_index < i);
|
|
op2 = gimple_assign_lhs (stmts[stmt_index++]);
|
|
}
|
|
|
|
if (stmt_index >= ready_stmts_end)
|
|
ready_stmts_end = 0;
|
|
}
|
|
else
|
|
{
|
|
if (op_index > 1)
|
|
swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
|
|
operand_entry *oe2 = ops[op_index--];
|
|
operand_entry *oe1 = ops[op_index--];
|
|
op2 = oe2->op;
|
|
stmt2 = oe2->stmt_to_insert;
|
|
op1 = oe1->op;
|
|
stmt1 = oe1->stmt_to_insert;
|
|
}
|
|
|
|
/* If we emit the last statement then we should put
|
|
operands into the last statement. It will also
|
|
break the loop. */
|
|
if (op_index < 0 && stmt_index == i)
|
|
i = stmt_num - 1;
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "Transforming ");
|
|
print_gimple_stmt (dump_file, stmts[i], 0);
|
|
}
|
|
|
|
/* If the stmt that defines operand has to be inserted, insert it
|
|
before the use. */
|
|
if (stmt1)
|
|
insert_stmt_before_use (stmts[i], stmt1);
|
|
if (stmt2)
|
|
insert_stmt_before_use (stmts[i], stmt2);
|
|
stmt1 = stmt2 = NULL;
|
|
|
|
/* We keep original statement only for the last one. All
|
|
others are recreated. */
|
|
if (i == stmt_num - 1)
|
|
{
|
|
gimple_assign_set_rhs1 (stmts[i], op1);
|
|
gimple_assign_set_rhs2 (stmts[i], op2);
|
|
update_stmt (stmts[i]);
|
|
}
|
|
else
|
|
{
|
|
stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
|
|
}
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, " into ");
|
|
print_gimple_stmt (dump_file, stmts[i], 0);
|
|
}
|
|
}
|
|
|
|
remove_visited_stmt_chain (last_rhs1);
|
|
}
|
|
|
|
/* Transform STMT, which is really (A +B) + (C + D) into the left
|
|
linear form, ((A+B)+C)+D.
|
|
Recurse on D if necessary. */
|
|
|
|
static void
|
|
linearize_expr (gimple *stmt)
|
|
{
|
|
gimple_stmt_iterator gsi;
|
|
gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
|
|
gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
|
|
gimple *oldbinrhs = binrhs;
|
|
enum tree_code rhscode = gimple_assign_rhs_code (stmt);
|
|
gimple *newbinrhs = NULL;
|
|
struct loop *loop = loop_containing_stmt (stmt);
|
|
tree lhs = gimple_assign_lhs (stmt);
|
|
|
|
gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
|
|
&& is_reassociable_op (binrhs, rhscode, loop));
|
|
|
|
gsi = gsi_for_stmt (stmt);
|
|
|
|
gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
|
|
binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
|
|
gimple_assign_rhs_code (binrhs),
|
|
gimple_assign_lhs (binlhs),
|
|
gimple_assign_rhs2 (binrhs));
|
|
gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
|
|
gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
|
|
gimple_set_uid (binrhs, gimple_uid (stmt));
|
|
|
|
if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
|
|
newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "Linearized: ");
|
|
print_gimple_stmt (dump_file, stmt, 0);
|
|
}
|
|
|
|
reassociate_stats.linearized++;
|
|
update_stmt (stmt);
|
|
|
|
gsi = gsi_for_stmt (oldbinrhs);
|
|
reassoc_remove_stmt (&gsi);
|
|
release_defs (oldbinrhs);
|
|
|
|
gimple_set_visited (stmt, true);
|
|
gimple_set_visited (binlhs, true);
|
|
gimple_set_visited (binrhs, true);
|
|
|
|
/* Tail recurse on the new rhs if it still needs reassociation. */
|
|
if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
|
|
/* ??? This should probably be linearize_expr (newbinrhs) but I don't
|
|
want to change the algorithm while converting to tuples. */
|
|
linearize_expr (stmt);
|
|
}
|
|
|
|
/* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
|
|
it. Otherwise, return NULL. */
|
|
|
|
static gimple *
|
|
get_single_immediate_use (tree lhs)
|
|
{
|
|
use_operand_p immuse;
|
|
gimple *immusestmt;
|
|
|
|
if (TREE_CODE (lhs) == SSA_NAME
|
|
&& single_imm_use (lhs, &immuse, &immusestmt)
|
|
&& is_gimple_assign (immusestmt))
|
|
return immusestmt;
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/* Recursively negate the value of TONEGATE, and return the SSA_NAME
|
|
representing the negated value. Insertions of any necessary
|
|
instructions go before GSI.
|
|
This function is recursive in that, if you hand it "a_5" as the
|
|
value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
|
|
transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
|
|
|
|
static tree
|
|
negate_value (tree tonegate, gimple_stmt_iterator *gsip)
|
|
{
|
|
gimple *negatedefstmt = NULL;
|
|
tree resultofnegate;
|
|
gimple_stmt_iterator gsi;
|
|
unsigned int uid;
|
|
|
|
/* If we are trying to negate a name, defined by an add, negate the
|
|
add operands instead. */
|
|
if (TREE_CODE (tonegate) == SSA_NAME)
|
|
negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
|
|
if (TREE_CODE (tonegate) == SSA_NAME
|
|
&& is_gimple_assign (negatedefstmt)
|
|
&& TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
|
|
&& has_single_use (gimple_assign_lhs (negatedefstmt))
|
|
&& gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
|
|
{
|
|
tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
|
|
tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
|
|
tree lhs = gimple_assign_lhs (negatedefstmt);
|
|
gimple *g;
|
|
|
|
gsi = gsi_for_stmt (negatedefstmt);
|
|
rhs1 = negate_value (rhs1, &gsi);
|
|
|
|
gsi = gsi_for_stmt (negatedefstmt);
|
|
rhs2 = negate_value (rhs2, &gsi);
|
|
|
|
gsi = gsi_for_stmt (negatedefstmt);
|
|
lhs = make_ssa_name (TREE_TYPE (lhs));
|
|
gimple_set_visited (negatedefstmt, true);
|
|
g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
|
|
gimple_set_uid (g, gimple_uid (negatedefstmt));
|
|
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
|
return lhs;
|
|
}
|
|
|
|
tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
|
|
resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
|
|
NULL_TREE, true, GSI_SAME_STMT);
|
|
gsi = *gsip;
|
|
uid = gimple_uid (gsi_stmt (gsi));
|
|
for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
|
|
{
|
|
gimple *stmt = gsi_stmt (gsi);
|
|
if (gimple_uid (stmt) != 0)
|
|
break;
|
|
gimple_set_uid (stmt, uid);
|
|
}
|
|
return resultofnegate;
|
|
}
|
|
|
|
/* Return true if we should break up the subtract in STMT into an add
|
|
with negate. This is true when we the subtract operands are really
|
|
adds, or the subtract itself is used in an add expression. In
|
|
either case, breaking up the subtract into an add with negate
|
|
exposes the adds to reassociation. */
|
|
|
|
static bool
|
|
should_break_up_subtract (gimple *stmt)
|
|
{
|
|
tree lhs = gimple_assign_lhs (stmt);
|
|
tree binlhs = gimple_assign_rhs1 (stmt);
|
|
tree binrhs = gimple_assign_rhs2 (stmt);
|
|
gimple *immusestmt;
|
|
struct loop *loop = loop_containing_stmt (stmt);
|
|
|
|
if (TREE_CODE (binlhs) == SSA_NAME
|
|
&& is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
|
|
return true;
|
|
|
|
if (TREE_CODE (binrhs) == SSA_NAME
|
|
&& is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
|
|
return true;
|
|
|
|
if (TREE_CODE (lhs) == SSA_NAME
|
|
&& (immusestmt = get_single_immediate_use (lhs))
|
|
&& is_gimple_assign (immusestmt)
|
|
&& (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
|
|
|| (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
|
|
&& gimple_assign_rhs1 (immusestmt) == lhs)
|
|
|| gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
/* Transform STMT from A - B into A + -B. */
|
|
|
|
static void
|
|
break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
|
|
{
|
|
tree rhs1 = gimple_assign_rhs1 (stmt);
|
|
tree rhs2 = gimple_assign_rhs2 (stmt);
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "Breaking up subtract ");
|
|
print_gimple_stmt (dump_file, stmt, 0);
|
|
}
|
|
|
|
rhs2 = negate_value (rhs2, gsip);
|
|
gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
|
|
update_stmt (stmt);
|
|
}
|
|
|
|
/* Determine whether STMT is a builtin call that raises an SSA name
|
|
to an integer power and has only one use. If so, and this is early
|
|
reassociation and unsafe math optimizations are permitted, place
|
|
the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
|
|
If any of these conditions does not hold, return FALSE. */
|
|
|
|
static bool
|
|
acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
|
|
{
|
|
tree arg1;
|
|
REAL_VALUE_TYPE c, cint;
|
|
|
|
switch (gimple_call_combined_fn (stmt))
|
|
{
|
|
CASE_CFN_POW:
|
|
if (flag_errno_math)
|
|
return false;
|
|
|
|
*base = gimple_call_arg (stmt, 0);
|
|
arg1 = gimple_call_arg (stmt, 1);
|
|
|
|
if (TREE_CODE (arg1) != REAL_CST)
|
|
return false;
|
|
|
|
c = TREE_REAL_CST (arg1);
|
|
|
|
if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
|
|
return false;
|
|
|
|
*exponent = real_to_integer (&c);
|
|
real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
|
|
if (!real_identical (&c, &cint))
|
|
return false;
|
|
|
|
break;
|
|
|
|
CASE_CFN_POWI:
|
|
*base = gimple_call_arg (stmt, 0);
|
|
arg1 = gimple_call_arg (stmt, 1);
|
|
|
|
if (!tree_fits_shwi_p (arg1))
|
|
return false;
|
|
|
|
*exponent = tree_to_shwi (arg1);
|
|
break;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
|
|
/* Expanding negative exponents is generally unproductive, so we don't
|
|
complicate matters with those. Exponents of zero and one should
|
|
have been handled by expression folding. */
|
|
if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/* Try to derive and add operand entry for OP to *OPS. Return false if
|
|
unsuccessful. */
|
|
|
|
static bool
|
|
try_special_add_to_ops (vec<operand_entry *> *ops,
|
|
enum tree_code code,
|
|
tree op, gimple* def_stmt)
|
|
{
|
|
tree base = NULL_TREE;
|
|
HOST_WIDE_INT exponent = 0;
|
|
|
|
if (TREE_CODE (op) != SSA_NAME
|
|
|| ! has_single_use (op))
|
|
return false;
|
|
|
|
if (code == MULT_EXPR
|
|
&& reassoc_insert_powi_p
|
|
&& flag_unsafe_math_optimizations
|
|
&& is_gimple_call (def_stmt)
|
|
&& acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
|
|
{
|
|
add_repeat_to_ops_vec (ops, base, exponent);
|
|
gimple_set_visited (def_stmt, true);
|
|
return true;
|
|
}
|
|
else if (code == MULT_EXPR
|
|
&& is_gimple_assign (def_stmt)
|
|
&& gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
|
|
&& !HONOR_SNANS (TREE_TYPE (op))
|
|
&& (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
|
|
|| !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
|
|
{
|
|
tree rhs1 = gimple_assign_rhs1 (def_stmt);
|
|
tree cst = build_minus_one_cst (TREE_TYPE (op));
|
|
add_to_ops_vec (ops, rhs1);
|
|
add_to_ops_vec (ops, cst);
|
|
gimple_set_visited (def_stmt, true);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/* Recursively linearize a binary expression that is the RHS of STMT.
|
|
Place the operands of the expression tree in the vector named OPS. */
|
|
|
|
static void
|
|
linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
|
|
bool is_associative, bool set_visited)
|
|
{
|
|
tree binlhs = gimple_assign_rhs1 (stmt);
|
|
tree binrhs = gimple_assign_rhs2 (stmt);
|
|
gimple *binlhsdef = NULL, *binrhsdef = NULL;
|
|
bool binlhsisreassoc = false;
|
|
bool binrhsisreassoc = false;
|
|
enum tree_code rhscode = gimple_assign_rhs_code (stmt);
|
|
struct loop *loop = loop_containing_stmt (stmt);
|
|
|
|
if (set_visited)
|
|
gimple_set_visited (stmt, true);
|
|
|
|
if (TREE_CODE (binlhs) == SSA_NAME)
|
|
{
|
|
binlhsdef = SSA_NAME_DEF_STMT (binlhs);
|
|
binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
|
|
&& !stmt_could_throw_p (binlhsdef));
|
|
}
|
|
|
|
if (TREE_CODE (binrhs) == SSA_NAME)
|
|
{
|
|
binrhsdef = SSA_NAME_DEF_STMT (binrhs);
|
|
binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
|
|
&& !stmt_could_throw_p (binrhsdef));
|
|
}
|
|
|
|
/* If the LHS is not reassociable, but the RHS is, we need to swap
|
|
them. If neither is reassociable, there is nothing we can do, so
|
|
just put them in the ops vector. If the LHS is reassociable,
|
|
linearize it. If both are reassociable, then linearize the RHS
|
|
and the LHS. */
|
|
|
|
if (!binlhsisreassoc)
|
|
{
|
|
/* If this is not a associative operation like division, give up. */
|
|
if (!is_associative)
|
|
{
|
|
add_to_ops_vec (ops, binrhs);
|
|
return;
|
|
}
|
|
|
|
if (!binrhsisreassoc)
|
|
{
|
|
if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
|
|
add_to_ops_vec (ops, binrhs);
|
|
|
|
if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
|
|
add_to_ops_vec (ops, binlhs);
|
|
|
|
return;
|
|
}
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "swapping operands of ");
|
|
print_gimple_stmt (dump_file, stmt, 0);
|
|
}
|
|
|
|
swap_ssa_operands (stmt,
|
|
gimple_assign_rhs1_ptr (stmt),
|
|
gimple_assign_rhs2_ptr (stmt));
|
|
update_stmt (stmt);
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, " is now ");
|
|
print_gimple_stmt (dump_file, stmt, 0);
|
|
}
|
|
|
|
/* We want to make it so the lhs is always the reassociative op,
|
|
so swap. */
|
|
std::swap (binlhs, binrhs);
|
|
}
|
|
else if (binrhsisreassoc)
|
|
{
|
|
linearize_expr (stmt);
|
|
binlhs = gimple_assign_rhs1 (stmt);
|
|
binrhs = gimple_assign_rhs2 (stmt);
|
|
}
|
|
|
|
gcc_assert (TREE_CODE (binrhs) != SSA_NAME
|
|
|| !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
|
|
rhscode, loop));
|
|
linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
|
|
is_associative, set_visited);
|
|
|
|
if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
|
|
add_to_ops_vec (ops, binrhs);
|
|
}
|
|
|
|
/* Repropagate the negates back into subtracts, since no other pass
|
|
currently does it. */
|
|
|
|
static void
|
|
repropagate_negates (void)
|
|
{
|
|
unsigned int i = 0;
|
|
tree negate;
|
|
|
|
FOR_EACH_VEC_ELT (plus_negates, i, negate)
|
|
{
|
|
gimple *user = get_single_immediate_use (negate);
|
|
|
|
if (!user || !is_gimple_assign (user))
|
|
continue;
|
|
|
|
/* The negate operand can be either operand of a PLUS_EXPR
|
|
(it can be the LHS if the RHS is a constant for example).
|
|
|
|
Force the negate operand to the RHS of the PLUS_EXPR, then
|
|
transform the PLUS_EXPR into a MINUS_EXPR. */
|
|
if (gimple_assign_rhs_code (user) == PLUS_EXPR)
|
|
{
|
|
/* If the negated operand appears on the LHS of the
|
|
PLUS_EXPR, exchange the operands of the PLUS_EXPR
|
|
to force the negated operand to the RHS of the PLUS_EXPR. */
|
|
if (gimple_assign_rhs1 (user) == negate)
|
|
{
|
|
swap_ssa_operands (user,
|
|
gimple_assign_rhs1_ptr (user),
|
|
gimple_assign_rhs2_ptr (user));
|
|
}
|
|
|
|
/* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
|
|
the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
|
|
if (gimple_assign_rhs2 (user) == negate)
|
|
{
|
|
tree rhs1 = gimple_assign_rhs1 (user);
|
|
tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
|
|
gimple_stmt_iterator gsi = gsi_for_stmt (user);
|
|
gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
|
|
update_stmt (user);
|
|
}
|
|
}
|
|
else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
|
|
{
|
|
if (gimple_assign_rhs1 (user) == negate)
|
|
{
|
|
/* We have
|
|
x = -a
|
|
y = x - b
|
|
which we transform into
|
|
x = a + b
|
|
y = -x .
|
|
This pushes down the negate which we possibly can merge
|
|
into some other operation, hence insert it into the
|
|
plus_negates vector. */
|
|
gimple *feed = SSA_NAME_DEF_STMT (negate);
|
|
tree a = gimple_assign_rhs1 (feed);
|
|
tree b = gimple_assign_rhs2 (user);
|
|
gimple_stmt_iterator gsi = gsi_for_stmt (feed);
|
|
gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
|
|
tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
|
|
gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
|
|
gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
|
|
gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
|
|
user = gsi_stmt (gsi2);
|
|
update_stmt (user);
|
|
reassoc_remove_stmt (&gsi);
|
|
release_defs (feed);
|
|
plus_negates.safe_push (gimple_assign_lhs (user));
|
|
}
|
|
else
|
|
{
|
|
/* Transform "x = -a; y = b - x" into "y = b + a", getting
|
|
rid of one operation. */
|
|
gimple *feed = SSA_NAME_DEF_STMT (negate);
|
|
tree a = gimple_assign_rhs1 (feed);
|
|
tree rhs1 = gimple_assign_rhs1 (user);
|
|
gimple_stmt_iterator gsi = gsi_for_stmt (user);
|
|
gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
|
|
update_stmt (gsi_stmt (gsi));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Returns true if OP is of a type for which we can do reassociation.
|
|
That is for integral or non-saturating fixed-point types, and for
|
|
floating point type when associative-math is enabled. */
|
|
|
|
static bool
|
|
can_reassociate_p (tree op)
|
|
{
|
|
tree type = TREE_TYPE (op);
|
|
if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
|
|
return false;
|
|
if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
|
|
|| NON_SAT_FIXED_POINT_TYPE_P (type)
|
|
|| (flag_associative_math && FLOAT_TYPE_P (type)))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
/* Break up subtract operations in block BB.
|
|
|
|
We do this top down because we don't know whether the subtract is
|
|
part of a possible chain of reassociation except at the top.
|
|
|
|
IE given
|
|
d = f + g
|
|
c = a + e
|
|
b = c - d
|
|
q = b - r
|
|
k = t - q
|
|
|
|
we want to break up k = t - q, but we won't until we've transformed q
|
|
= b - r, which won't be broken up until we transform b = c - d.
|
|
|
|
En passant, clear the GIMPLE visited flag on every statement
|
|
and set UIDs within each basic block. */
|
|
|
|
static void
|
|
break_up_subtract_bb (basic_block bb)
|
|
{
|
|
gimple_stmt_iterator gsi;
|
|
basic_block son;
|
|
unsigned int uid = 1;
|
|
|
|
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
{
|
|
gimple *stmt = gsi_stmt (gsi);
|
|
gimple_set_visited (stmt, false);
|
|
gimple_set_uid (stmt, uid++);
|
|
|
|
if (!is_gimple_assign (stmt)
|
|
|| !can_reassociate_p (gimple_assign_lhs (stmt)))
|
|
continue;
|
|
|
|
/* Look for simple gimple subtract operations. */
|
|
if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
|
|
{
|
|
if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
|
|
|| !can_reassociate_p (gimple_assign_rhs2 (stmt)))
|
|
continue;
|
|
|
|
/* Check for a subtract used only in an addition. If this
|
|
is the case, transform it into add of a negate for better
|
|
reassociation. IE transform C = A-B into C = A + -B if C
|
|
is only used in an addition. */
|
|
if (should_break_up_subtract (stmt))
|
|
break_up_subtract (stmt, &gsi);
|
|
}
|
|
else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
|
|
&& can_reassociate_p (gimple_assign_rhs1 (stmt)))
|
|
plus_negates.safe_push (gimple_assign_lhs (stmt));
|
|
}
|
|
for (son = first_dom_son (CDI_DOMINATORS, bb);
|
|
son;
|
|
son = next_dom_son (CDI_DOMINATORS, son))
|
|
break_up_subtract_bb (son);
|
|
}
|
|
|
|
/* Used for repeated factor analysis. */
|
|
struct repeat_factor
|
|
{
|
|
/* An SSA name that occurs in a multiply chain. */
|
|
tree factor;
|
|
|
|
/* Cached rank of the factor. */
|
|
unsigned rank;
|
|
|
|
/* Number of occurrences of the factor in the chain. */
|
|
HOST_WIDE_INT count;
|
|
|
|
/* An SSA name representing the product of this factor and
|
|
all factors appearing later in the repeated factor vector. */
|
|
tree repr;
|
|
};
|
|
|
|
|
|
static vec<repeat_factor> repeat_factor_vec;
|
|
|
|
/* Used for sorting the repeat factor vector. Sort primarily by
|
|
ascending occurrence count, secondarily by descending rank. */
|
|
|
|
static int
|
|
compare_repeat_factors (const void *x1, const void *x2)
|
|
{
|
|
const repeat_factor *rf1 = (const repeat_factor *) x1;
|
|
const repeat_factor *rf2 = (const repeat_factor *) x2;
|
|
|
|
if (rf1->count != rf2->count)
|
|
return rf1->count - rf2->count;
|
|
|
|
return rf2->rank - rf1->rank;
|
|
}
|
|
|
|
/* Look for repeated operands in OPS in the multiply tree rooted at
|
|
STMT. Replace them with an optimal sequence of multiplies and powi
|
|
builtin calls, and remove the used operands from OPS. Return an
|
|
SSA name representing the value of the replacement sequence. */
|
|
|
|
static tree
|
|
attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
|
|
{
|
|
unsigned i, j, vec_len;
|
|
int ii;
|
|
operand_entry *oe;
|
|
repeat_factor *rf1, *rf2;
|
|
repeat_factor rfnew;
|
|
tree result = NULL_TREE;
|
|
tree target_ssa, iter_result;
|
|
tree type = TREE_TYPE (gimple_get_lhs (stmt));
|
|
tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
|
|
gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
|
|
gimple *mul_stmt, *pow_stmt;
|
|
|
|
/* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
|
|
target. */
|
|
if (!powi_fndecl)
|
|
return NULL_TREE;
|
|
|
|
/* Allocate the repeated factor vector. */
|
|
repeat_factor_vec.create (10);
|
|
|
|
/* Scan the OPS vector for all SSA names in the product and build
|
|
up a vector of occurrence counts for each factor. */
|
|
FOR_EACH_VEC_ELT (*ops, i, oe)
|
|
{
|
|
if (TREE_CODE (oe->op) == SSA_NAME)
|
|
{
|
|
FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
|
|
{
|
|
if (rf1->factor == oe->op)
|
|
{
|
|
rf1->count += oe->count;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (j >= repeat_factor_vec.length ())
|
|
{
|
|
rfnew.factor = oe->op;
|
|
rfnew.rank = oe->rank;
|
|
rfnew.count = oe->count;
|
|
rfnew.repr = NULL_TREE;
|
|
repeat_factor_vec.safe_push (rfnew);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Sort the repeated factor vector by (a) increasing occurrence count,
|
|
and (b) decreasing rank. */
|
|
repeat_factor_vec.qsort (compare_repeat_factors);
|
|
|
|
/* It is generally best to combine as many base factors as possible
|
|
into a product before applying __builtin_powi to the result.
|
|
However, the sort order chosen for the repeated factor vector
|
|
allows us to cache partial results for the product of the base
|
|
factors for subsequent use. When we already have a cached partial
|
|
result from a previous iteration, it is best to make use of it
|
|
before looking for another __builtin_pow opportunity.
|
|
|
|
As an example, consider x * x * y * y * y * z * z * z * z.
|
|
We want to first compose the product x * y * z, raise it to the
|
|
second power, then multiply this by y * z, and finally multiply
|
|
by z. This can be done in 5 multiplies provided we cache y * z
|
|
for use in both expressions:
|
|
|
|
t1 = y * z
|
|
t2 = t1 * x
|
|
t3 = t2 * t2
|
|
t4 = t1 * t3
|
|
result = t4 * z
|
|
|
|
If we instead ignored the cached y * z and first multiplied by
|
|
the __builtin_pow opportunity z * z, we would get the inferior:
|
|
|
|
t1 = y * z
|
|
t2 = t1 * x
|
|
t3 = t2 * t2
|
|
t4 = z * z
|
|
t5 = t3 * t4
|
|
result = t5 * y */
|
|
|
|
vec_len = repeat_factor_vec.length ();
|
|
|
|
/* Repeatedly look for opportunities to create a builtin_powi call. */
|
|
while (true)
|
|
{
|
|
HOST_WIDE_INT power;
|
|
|
|
/* First look for the largest cached product of factors from
|
|
preceding iterations. If found, create a builtin_powi for
|
|
it if the minimum occurrence count for its factors is at
|
|
least 2, or just use this cached product as our next
|
|
multiplicand if the minimum occurrence count is 1. */
|
|
FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
|
|
{
|
|
if (rf1->repr && rf1->count > 0)
|
|
break;
|
|
}
|
|
|
|
if (j < vec_len)
|
|
{
|
|
power = rf1->count;
|
|
|
|
if (power == 1)
|
|
{
|
|
iter_result = rf1->repr;
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
unsigned elt;
|
|
repeat_factor *rf;
|
|
fputs ("Multiplying by cached product ", dump_file);
|
|
for (elt = j; elt < vec_len; elt++)
|
|
{
|
|
rf = &repeat_factor_vec[elt];
|
|
print_generic_expr (dump_file, rf->factor);
|
|
if (elt < vec_len - 1)
|
|
fputs (" * ", dump_file);
|
|
}
|
|
fputs ("\n", dump_file);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
|
|
pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
|
|
build_int_cst (integer_type_node,
|
|
power));
|
|
gimple_call_set_lhs (pow_stmt, iter_result);
|
|
gimple_set_location (pow_stmt, gimple_location (stmt));
|
|
gimple_set_uid (pow_stmt, gimple_uid (stmt));
|
|
gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
unsigned elt;
|
|
repeat_factor *rf;
|
|
fputs ("Building __builtin_pow call for cached product (",
|
|
dump_file);
|
|
for (elt = j; elt < vec_len; elt++)
|
|
{
|
|
rf = &repeat_factor_vec[elt];
|
|
print_generic_expr (dump_file, rf->factor);
|
|
if (elt < vec_len - 1)
|
|
fputs (" * ", dump_file);
|
|
}
|
|
fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
|
|
power);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* Otherwise, find the first factor in the repeated factor
|
|
vector whose occurrence count is at least 2. If no such
|
|
factor exists, there are no builtin_powi opportunities
|
|
remaining. */
|
|
FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
|
|
{
|
|
if (rf1->count >= 2)
|
|
break;
|
|
}
|
|
|
|
if (j >= vec_len)
|
|
break;
|
|
|
|
power = rf1->count;
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
unsigned elt;
|
|
repeat_factor *rf;
|
|
fputs ("Building __builtin_pow call for (", dump_file);
|
|
for (elt = j; elt < vec_len; elt++)
|
|
{
|
|
rf = &repeat_factor_vec[elt];
|
|
print_generic_expr (dump_file, rf->factor);
|
|
if (elt < vec_len - 1)
|
|
fputs (" * ", dump_file);
|
|
}
|
|
fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
|
|
}
|
|
|
|
reassociate_stats.pows_created++;
|
|
|
|
/* Visit each element of the vector in reverse order (so that
|
|
high-occurrence elements are visited first, and within the
|
|
same occurrence count, lower-ranked elements are visited
|
|
first). Form a linear product of all elements in this order
|
|
whose occurrencce count is at least that of element J.
|
|
Record the SSA name representing the product of each element
|
|
with all subsequent elements in the vector. */
|
|
if (j == vec_len - 1)
|
|
rf1->repr = rf1->factor;
|
|
else
|
|
{
|
|
for (ii = vec_len - 2; ii >= (int)j; ii--)
|
|
{
|
|
tree op1, op2;
|
|
|
|
rf1 = &repeat_factor_vec[ii];
|
|
rf2 = &repeat_factor_vec[ii + 1];
|
|
|
|
/* Init the last factor's representative to be itself. */
|
|
if (!rf2->repr)
|
|
rf2->repr = rf2->factor;
|
|
|
|
op1 = rf1->factor;
|
|
op2 = rf2->repr;
|
|
|
|
target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
|
|
mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
|
|
op1, op2);
|
|
gimple_set_location (mul_stmt, gimple_location (stmt));
|
|
gimple_set_uid (mul_stmt, gimple_uid (stmt));
|
|
gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
|
|
rf1->repr = target_ssa;
|
|
|
|
/* Don't reprocess the multiply we just introduced. */
|
|
gimple_set_visited (mul_stmt, true);
|
|
}
|
|
}
|
|
|
|
/* Form a call to __builtin_powi for the maximum product
|
|
just formed, raised to the power obtained earlier. */
|
|
rf1 = &repeat_factor_vec[j];
|
|
iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
|
|
pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
|
|
build_int_cst (integer_type_node,
|
|
power));
|
|
gimple_call_set_lhs (pow_stmt, iter_result);
|
|
gimple_set_location (pow_stmt, gimple_location (stmt));
|
|
gimple_set_uid (pow_stmt, gimple_uid (stmt));
|
|
gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
|
|
}
|
|
|
|
/* If we previously formed at least one other builtin_powi call,
|
|
form the product of this one and those others. */
|
|
if (result)
|
|
{
|
|
tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
|
|
mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
|
|
result, iter_result);
|
|
gimple_set_location (mul_stmt, gimple_location (stmt));
|
|
gimple_set_uid (mul_stmt, gimple_uid (stmt));
|
|
gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
|
|
gimple_set_visited (mul_stmt, true);
|
|
result = new_result;
|
|
}
|
|
else
|
|
result = iter_result;
|
|
|
|
/* Decrement the occurrence count of each element in the product
|
|
by the count found above, and remove this many copies of each
|
|
factor from OPS. */
|
|
for (i = j; i < vec_len; i++)
|
|
{
|
|
unsigned k = power;
|
|
unsigned n;
|
|
|
|
rf1 = &repeat_factor_vec[i];
|
|
rf1->count -= power;
|
|
|
|
FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
|
|
{
|
|
if (oe->op == rf1->factor)
|
|
{
|
|
if (oe->count <= k)
|
|
{
|
|
ops->ordered_remove (n);
|
|
k -= oe->count;
|
|
|
|
if (k == 0)
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
oe->count -= k;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* At this point all elements in the repeated factor vector have a
|
|
remaining occurrence count of 0 or 1, and those with a count of 1
|
|
don't have cached representatives. Re-sort the ops vector and
|
|
clean up. */
|
|
ops->qsort (sort_by_operand_rank);
|
|
repeat_factor_vec.release ();
|
|
|
|
/* Return the final product computed herein. Note that there may
|
|
still be some elements with single occurrence count left in OPS;
|
|
those will be handled by the normal reassociation logic. */
|
|
return result;
|
|
}
|
|
|
|
/* Attempt to optimize
|
|
CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
|
|
CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
|
|
|
|
static void
|
|
attempt_builtin_copysign (vec<operand_entry *> *ops)
|
|
{
|
|
operand_entry *oe;
|
|
unsigned int i;
|
|
unsigned int length = ops->length ();
|
|
tree cst = ops->last ()->op;
|
|
|
|
if (length == 1 || TREE_CODE (cst) != REAL_CST)
|
|
return;
|
|
|
|
FOR_EACH_VEC_ELT (*ops, i, oe)
|
|
{
|
|
if (TREE_CODE (oe->op) == SSA_NAME
|
|
&& has_single_use (oe->op))
|
|
{
|
|
gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
|
|
if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
|
|
{
|
|
tree arg0, arg1;
|
|
switch (gimple_call_combined_fn (old_call))
|
|
{
|
|
CASE_CFN_COPYSIGN:
|
|
CASE_CFN_COPYSIGN_FN:
|
|
arg0 = gimple_call_arg (old_call, 0);
|
|
arg1 = gimple_call_arg (old_call, 1);
|
|
/* The first argument of copysign must be a constant,
|
|
otherwise there's nothing to do. */
|
|
if (TREE_CODE (arg0) == REAL_CST)
|
|
{
|
|
tree type = TREE_TYPE (arg0);
|
|
tree mul = const_binop (MULT_EXPR, type, cst, arg0);
|
|
/* If we couldn't fold to a single constant, skip it.
|
|
That happens e.g. for inexact multiplication when
|
|
-frounding-math. */
|
|
if (mul == NULL_TREE)
|
|
break;
|
|
/* Instead of adjusting OLD_CALL, let's build a new
|
|
call to not leak the LHS and prevent keeping bogus
|
|
debug statements. DCE will clean up the old call. */
|
|
gcall *new_call;
|
|
if (gimple_call_internal_p (old_call))
|
|
new_call = gimple_build_call_internal
|
|
(IFN_COPYSIGN, 2, mul, arg1);
|
|
else
|
|
new_call = gimple_build_call
|
|
(gimple_call_fndecl (old_call), 2, mul, arg1);
|
|
tree lhs = make_ssa_name (type);
|
|
gimple_call_set_lhs (new_call, lhs);
|
|
gimple_set_location (new_call,
|
|
gimple_location (old_call));
|
|
insert_stmt_after (new_call, old_call);
|
|
/* We've used the constant, get rid of it. */
|
|
ops->pop ();
|
|
bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
|
|
/* Handle the CST1 < 0 case by negating the result. */
|
|
if (cst1_neg)
|
|
{
|
|
tree negrhs = make_ssa_name (TREE_TYPE (lhs));
|
|
gimple *negate_stmt
|
|
= gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
|
|
insert_stmt_after (negate_stmt, new_call);
|
|
oe->op = negrhs;
|
|
}
|
|
else
|
|
oe->op = lhs;
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "Optimizing copysign: ");
|
|
print_generic_expr (dump_file, cst);
|
|
fprintf (dump_file, " * COPYSIGN (");
|
|
print_generic_expr (dump_file, arg0);
|
|
fprintf (dump_file, ", ");
|
|
print_generic_expr (dump_file, arg1);
|
|
fprintf (dump_file, ") into %sCOPYSIGN (",
|
|
cst1_neg ? "-" : "");
|
|
print_generic_expr (dump_file, mul);
|
|
fprintf (dump_file, ", ");
|
|
print_generic_expr (dump_file, arg1);
|
|
fprintf (dump_file, "\n");
|
|
}
|
|
return;
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
|
|
|
|
static void
|
|
transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
|
|
{
|
|
tree rhs1;
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "Transforming ");
|
|
print_gimple_stmt (dump_file, stmt, 0);
|
|
}
|
|
|
|
rhs1 = gimple_assign_rhs1 (stmt);
|
|
gimple_assign_set_rhs_from_tree (gsi, new_rhs);
|
|
update_stmt (stmt);
|
|
remove_visited_stmt_chain (rhs1);
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, " into ");
|
|
print_gimple_stmt (dump_file, stmt, 0);
|
|
}
|
|
}
|
|
|
|
/* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
|
|
|
|
static void
|
|
transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
|
|
tree rhs1, tree rhs2)
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "Transforming ");
|
|
print_gimple_stmt (dump_file, stmt, 0);
|
|
}
|
|
|
|
gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
|
|
update_stmt (gsi_stmt (*gsi));
|
|
remove_visited_stmt_chain (rhs1);
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, " into ");
|
|
print_gimple_stmt (dump_file, stmt, 0);
|
|
}
|
|
}
|
|
|
|
/* Reassociate expressions in basic block BB and its post-dominator as
|
|
children.
|
|
|
|
Bubble up return status from maybe_optimize_range_tests. */
|
|
|
|
static bool
|
|
reassociate_bb (basic_block bb)
|
|
{
|
|
gimple_stmt_iterator gsi;
|
|
basic_block son;
|
|
gimple *stmt = last_stmt (bb);
|
|
bool cfg_cleanup_needed = false;
|
|
|
|
if (stmt && !gimple_visited_p (stmt))
|
|
cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
|
|
|
|
for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
|
|
{
|
|
stmt = gsi_stmt (gsi);
|
|
|
|
if (is_gimple_assign (stmt)
|
|
&& !stmt_could_throw_p (stmt))
|
|
{
|
|
tree lhs, rhs1, rhs2;
|
|
enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
|
|
|
|
/* If this is not a gimple binary expression, there is
|
|
nothing for us to do with it. */
|
|
if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
|
|
continue;
|
|
|
|
/* If this was part of an already processed statement,
|
|
we don't need to touch it again. */
|
|
if (gimple_visited_p (stmt))
|
|
{
|
|
/* This statement might have become dead because of previous
|
|
reassociations. */
|
|
if (has_zero_uses (gimple_get_lhs (stmt)))
|
|
{
|
|
reassoc_remove_stmt (&gsi);
|
|
release_defs (stmt);
|
|
/* We might end up removing the last stmt above which
|
|
places the iterator to the end of the sequence.
|
|
Reset it to the last stmt in this case which might
|
|
be the end of the sequence as well if we removed
|
|
the last statement of the sequence. In which case
|
|
we need to bail out. */
|
|
if (gsi_end_p (gsi))
|
|
{
|
|
gsi = gsi_last_bb (bb);
|
|
if (gsi_end_p (gsi))
|
|
break;
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
|
|
lhs = gimple_assign_lhs (stmt);
|
|
rhs1 = gimple_assign_rhs1 (stmt);
|
|
rhs2 = gimple_assign_rhs2 (stmt);
|
|
|
|
/* For non-bit or min/max operations we can't associate
|
|
all types. Verify that here. */
|
|
if (rhs_code != BIT_IOR_EXPR
|
|
&& rhs_code != BIT_AND_EXPR
|
|
&& rhs_code != BIT_XOR_EXPR
|
|
&& rhs_code != MIN_EXPR
|
|
&& rhs_code != MAX_EXPR
|
|
&& (!can_reassociate_p (lhs)
|
|
|| !can_reassociate_p (rhs1)
|
|
|| !can_reassociate_p (rhs2)))
|
|
continue;
|
|
|
|
if (associative_tree_code (rhs_code))
|
|
{
|
|
auto_vec<operand_entry *> ops;
|
|
tree powi_result = NULL_TREE;
|
|
bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
|
|
|
|
/* There may be no immediate uses left by the time we
|
|
get here because we may have eliminated them all. */
|
|
if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
|
|
continue;
|
|
|
|
gimple_set_visited (stmt, true);
|
|
linearize_expr_tree (&ops, stmt, true, true);
|
|
ops.qsort (sort_by_operand_rank);
|
|
int orig_len = ops.length ();
|
|
optimize_ops_list (rhs_code, &ops);
|
|
if (undistribute_ops_list (rhs_code, &ops,
|
|
loop_containing_stmt (stmt)))
|
|
{
|
|
ops.qsort (sort_by_operand_rank);
|
|
optimize_ops_list (rhs_code, &ops);
|
|
}
|
|
|
|
if (rhs_code == PLUS_EXPR
|
|
&& transform_add_to_multiply (&ops))
|
|
ops.qsort (sort_by_operand_rank);
|
|
|
|
if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
|
|
{
|
|
if (is_vector)
|
|
optimize_vec_cond_expr (rhs_code, &ops);
|
|
else
|
|
optimize_range_tests (rhs_code, &ops, NULL);
|
|
}
|
|
|
|
if (rhs_code == MULT_EXPR && !is_vector)
|
|
{
|
|
attempt_builtin_copysign (&ops);
|
|
|
|
if (reassoc_insert_powi_p
|
|
&& flag_unsafe_math_optimizations)
|
|
powi_result = attempt_builtin_powi (stmt, &ops);
|
|
}
|
|
|
|
operand_entry *last;
|
|
bool negate_result = false;
|
|
if (ops.length () > 1
|
|
&& rhs_code == MULT_EXPR)
|
|
{
|
|
last = ops.last ();
|
|
if ((integer_minus_onep (last->op)
|
|
|| real_minus_onep (last->op))
|
|
&& !HONOR_SNANS (TREE_TYPE (lhs))
|
|
&& (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
|
|
|| !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
|
|
{
|
|
ops.pop ();
|
|
negate_result = true;
|
|
}
|
|
}
|
|
|
|
tree new_lhs = lhs;
|
|
/* If the operand vector is now empty, all operands were
|
|
consumed by the __builtin_powi optimization. */
|
|
if (ops.length () == 0)
|
|
transform_stmt_to_copy (&gsi, stmt, powi_result);
|
|
else if (ops.length () == 1)
|
|
{
|
|
tree last_op = ops.last ()->op;
|
|
|
|
/* If the stmt that defines operand has to be inserted, insert it
|
|
before the use. */
|
|
if (ops.last ()->stmt_to_insert)
|
|
insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
|
|
if (powi_result)
|
|
transform_stmt_to_multiply (&gsi, stmt, last_op,
|
|
powi_result);
|
|
else
|
|
transform_stmt_to_copy (&gsi, stmt, last_op);
|
|
}
|
|
else
|
|
{
|
|
machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
|
|
int ops_num = ops.length ();
|
|
int width = get_reassociation_width (ops_num, rhs_code, mode);
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
fprintf (dump_file,
|
|
"Width = %d was chosen for reassociation\n", width);
|
|
|
|
|
|
/* For binary bit operations, if there are at least 3
|
|
operands and the last last operand in OPS is a constant,
|
|
move it to the front. This helps ensure that we generate
|
|
(X & Y) & C rather than (X & C) & Y. The former will
|
|
often match a canonical bit test when we get to RTL. */
|
|
if (ops.length () > 2
|
|
&& (rhs_code == BIT_AND_EXPR
|
|
|| rhs_code == BIT_IOR_EXPR
|
|
|| rhs_code == BIT_XOR_EXPR)
|
|
&& TREE_CODE (ops.last ()->op) == INTEGER_CST)
|
|
std::swap (*ops[0], *ops[ops_num - 1]);
|
|
|
|
if (width > 1
|
|
&& ops.length () > 3)
|
|
rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
|
|
width, ops);
|
|
else
|
|
{
|
|
/* When there are three operands left, we want
|
|
to make sure the ones that get the double
|
|
binary op are chosen wisely. */
|
|
int len = ops.length ();
|
|
if (len >= 3)
|
|
swap_ops_for_binary_stmt (ops, len - 3, stmt);
|
|
|
|
new_lhs = rewrite_expr_tree (stmt, 0, ops,
|
|
powi_result != NULL
|
|
|| negate_result,
|
|
len != orig_len);
|
|
}
|
|
|
|
/* If we combined some repeated factors into a
|
|
__builtin_powi call, multiply that result by the
|
|
reassociated operands. */
|
|
if (powi_result)
|
|
{
|
|
gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
|
|
tree type = TREE_TYPE (lhs);
|
|
tree target_ssa = make_temp_ssa_name (type, NULL,
|
|
"reassocpow");
|
|
gimple_set_lhs (lhs_stmt, target_ssa);
|
|
update_stmt (lhs_stmt);
|
|
if (lhs != new_lhs)
|
|
{
|
|
target_ssa = new_lhs;
|
|
new_lhs = lhs;
|
|
}
|
|
mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
|
|
powi_result, target_ssa);
|
|
gimple_set_location (mul_stmt, gimple_location (stmt));
|
|
gimple_set_uid (mul_stmt, gimple_uid (stmt));
|
|
gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
|
|
}
|
|
}
|
|
|
|
if (negate_result)
|
|
{
|
|
stmt = SSA_NAME_DEF_STMT (lhs);
|
|
tree tmp = make_ssa_name (TREE_TYPE (lhs));
|
|
gimple_set_lhs (stmt, tmp);
|
|
if (lhs != new_lhs)
|
|
tmp = new_lhs;
|
|
gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
|
|
tmp);
|
|
gimple_set_uid (neg_stmt, gimple_uid (stmt));
|
|
gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
|
|
update_stmt (stmt);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
|
|
son;
|
|
son = next_dom_son (CDI_POST_DOMINATORS, son))
|
|
cfg_cleanup_needed |= reassociate_bb (son);
|
|
|
|
return cfg_cleanup_needed;
|
|
}
|
|
|
|
/* Add jumps around shifts for range tests turned into bit tests.
|
|
For each SSA_NAME VAR we have code like:
|
|
VAR = ...; // final stmt of range comparison
|
|
// bit test here...;
|
|
OTHERVAR = ...; // final stmt of the bit test sequence
|
|
RES = VAR | OTHERVAR;
|
|
Turn the above into:
|
|
VAR = ...;
|
|
if (VAR != 0)
|
|
goto <l3>;
|
|
else
|
|
goto <l2>;
|
|
<l2>:
|
|
// bit test here...;
|
|
OTHERVAR = ...;
|
|
<l3>:
|
|
# RES = PHI<1(l1), OTHERVAR(l2)>; */
|
|
|
|
static void
|
|
branch_fixup (void)
|
|
{
|
|
tree var;
|
|
unsigned int i;
|
|
|
|
FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
|
|
{
|
|
gimple *def_stmt = SSA_NAME_DEF_STMT (var);
|
|
gimple *use_stmt;
|
|
use_operand_p use;
|
|
bool ok = single_imm_use (var, &use, &use_stmt);
|
|
gcc_assert (ok
|
|
&& is_gimple_assign (use_stmt)
|
|
&& gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
|
|
&& gimple_bb (def_stmt) == gimple_bb (use_stmt));
|
|
|
|
basic_block cond_bb = gimple_bb (def_stmt);
|
|
basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
|
|
basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
|
|
|
|
gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
|
|
gimple *g = gimple_build_cond (NE_EXPR, var,
|
|
build_zero_cst (TREE_TYPE (var)),
|
|
NULL_TREE, NULL_TREE);
|
|
location_t loc = gimple_location (use_stmt);
|
|
gimple_set_location (g, loc);
|
|
gsi_insert_after (&gsi, g, GSI_NEW_STMT);
|
|
|
|
edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
|
|
etrue->probability = profile_probability::even ();
|
|
edge efalse = find_edge (cond_bb, then_bb);
|
|
efalse->flags = EDGE_FALSE_VALUE;
|
|
efalse->probability -= etrue->probability;
|
|
then_bb->count -= etrue->count ();
|
|
|
|
tree othervar = NULL_TREE;
|
|
if (gimple_assign_rhs1 (use_stmt) == var)
|
|
othervar = gimple_assign_rhs2 (use_stmt);
|
|
else if (gimple_assign_rhs2 (use_stmt) == var)
|
|
othervar = gimple_assign_rhs1 (use_stmt);
|
|
else
|
|
gcc_unreachable ();
|
|
tree lhs = gimple_assign_lhs (use_stmt);
|
|
gphi *phi = create_phi_node (lhs, merge_bb);
|
|
add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
|
|
add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
|
|
gsi = gsi_for_stmt (use_stmt);
|
|
gsi_remove (&gsi, true);
|
|
|
|
set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
|
|
set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
|
|
}
|
|
reassoc_branch_fixups.release ();
|
|
}
|
|
|
|
void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
|
|
void debug_ops_vector (vec<operand_entry *> ops);
|
|
|
|
/* Dump the operand entry vector OPS to FILE. */
|
|
|
|
void
|
|
dump_ops_vector (FILE *file, vec<operand_entry *> ops)
|
|
{
|
|
operand_entry *oe;
|
|
unsigned int i;
|
|
|
|
FOR_EACH_VEC_ELT (ops, i, oe)
|
|
{
|
|
fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
|
|
print_generic_expr (file, oe->op);
|
|
fprintf (file, "\n");
|
|
}
|
|
}
|
|
|
|
/* Dump the operand entry vector OPS to STDERR. */
|
|
|
|
DEBUG_FUNCTION void
|
|
debug_ops_vector (vec<operand_entry *> ops)
|
|
{
|
|
dump_ops_vector (stderr, ops);
|
|
}
|
|
|
|
/* Bubble up return status from reassociate_bb. */
|
|
|
|
static bool
|
|
do_reassoc (void)
|
|
{
|
|
break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
|
|
return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
|
|
}
|
|
|
|
/* Initialize the reassociation pass. */
|
|
|
|
static void
|
|
init_reassoc (void)
|
|
{
|
|
int i;
|
|
long rank = 2;
|
|
int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
|
|
|
|
/* Find the loops, so that we can prevent moving calculations in
|
|
them. */
|
|
loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
|
|
|
|
memset (&reassociate_stats, 0, sizeof (reassociate_stats));
|
|
|
|
next_operand_entry_id = 0;
|
|
|
|
/* Reverse RPO (Reverse Post Order) will give us something where
|
|
deeper loops come later. */
|
|
pre_and_rev_post_order_compute (NULL, bbs, false);
|
|
bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
|
|
operand_rank = new hash_map<tree, long>;
|
|
|
|
/* Give each default definition a distinct rank. This includes
|
|
parameters and the static chain. Walk backwards over all
|
|
SSA names so that we get proper rank ordering according
|
|
to tree_swap_operands_p. */
|
|
for (i = num_ssa_names - 1; i > 0; --i)
|
|
{
|
|
tree name = ssa_name (i);
|
|
if (name && SSA_NAME_IS_DEFAULT_DEF (name))
|
|
insert_operand_rank (name, ++rank);
|
|
}
|
|
|
|
/* Set up rank for each BB */
|
|
for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
|
|
bb_rank[bbs[i]] = ++rank << 16;
|
|
|
|
free (bbs);
|
|
calculate_dominance_info (CDI_POST_DOMINATORS);
|
|
plus_negates = vNULL;
|
|
}
|
|
|
|
/* Cleanup after the reassociation pass, and print stats if
|
|
requested. */
|
|
|
|
static void
|
|
fini_reassoc (void)
|
|
{
|
|
statistics_counter_event (cfun, "Linearized",
|
|
reassociate_stats.linearized);
|
|
statistics_counter_event (cfun, "Constants eliminated",
|
|
reassociate_stats.constants_eliminated);
|
|
statistics_counter_event (cfun, "Ops eliminated",
|
|
reassociate_stats.ops_eliminated);
|
|
statistics_counter_event (cfun, "Statements rewritten",
|
|
reassociate_stats.rewritten);
|
|
statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
|
|
reassociate_stats.pows_encountered);
|
|
statistics_counter_event (cfun, "Built-in powi calls created",
|
|
reassociate_stats.pows_created);
|
|
|
|
delete operand_rank;
|
|
operand_entry_pool.release ();
|
|
free (bb_rank);
|
|
plus_negates.release ();
|
|
free_dominance_info (CDI_POST_DOMINATORS);
|
|
loop_optimizer_finalize ();
|
|
}
|
|
|
|
/* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
|
|
insertion of __builtin_powi calls.
|
|
|
|
Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
|
|
optimization of a gimple conditional. Otherwise returns zero. */
|
|
|
|
static unsigned int
|
|
execute_reassoc (bool insert_powi_p)
|
|
{
|
|
reassoc_insert_powi_p = insert_powi_p;
|
|
|
|
init_reassoc ();
|
|
|
|
bool cfg_cleanup_needed;
|
|
cfg_cleanup_needed = do_reassoc ();
|
|
repropagate_negates ();
|
|
branch_fixup ();
|
|
|
|
fini_reassoc ();
|
|
return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
|
|
}
|
|
|
|
namespace {
|
|
|
|
const pass_data pass_data_reassoc =
|
|
{
|
|
GIMPLE_PASS, /* type */
|
|
"reassoc", /* name */
|
|
OPTGROUP_NONE, /* optinfo_flags */
|
|
TV_TREE_REASSOC, /* tv_id */
|
|
( PROP_cfg | PROP_ssa ), /* properties_required */
|
|
0, /* properties_provided */
|
|
0, /* properties_destroyed */
|
|
0, /* todo_flags_start */
|
|
TODO_update_ssa_only_virtuals, /* todo_flags_finish */
|
|
};
|
|
|
|
class pass_reassoc : public gimple_opt_pass
|
|
{
|
|
public:
|
|
pass_reassoc (gcc::context *ctxt)
|
|
: gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
|
|
{}
|
|
|
|
/* opt_pass methods: */
|
|
opt_pass * clone () { return new pass_reassoc (m_ctxt); }
|
|
void set_pass_param (unsigned int n, bool param)
|
|
{
|
|
gcc_assert (n == 0);
|
|
insert_powi_p = param;
|
|
}
|
|
virtual bool gate (function *) { return flag_tree_reassoc != 0; }
|
|
virtual unsigned int execute (function *)
|
|
{ return execute_reassoc (insert_powi_p); }
|
|
|
|
private:
|
|
/* Enable insertion of __builtin_powi calls during execute_reassoc. See
|
|
point 3a in the pass header comment. */
|
|
bool insert_powi_p;
|
|
}; // class pass_reassoc
|
|
|
|
} // anon namespace
|
|
|
|
gimple_opt_pass *
|
|
make_pass_reassoc (gcc::context *ctxt)
|
|
{
|
|
return new pass_reassoc (ctxt);
|
|
}
|