2015-08-28 15:33:40 +00:00
|
|
|
|
/* Shrink-wrapping related optimizations.
|
2022-10-27 18:55:19 +00:00
|
|
|
|
Copyright (C) 1987-2022 Free Software Foundation, Inc.
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
|
|
|
|
This file is part of GCC.
|
|
|
|
|
|
|
|
|
|
GCC is free software; you can redistribute it and/or modify it under
|
|
|
|
|
the terms of the GNU General Public License as published by the Free
|
|
|
|
|
Software Foundation; either version 3, or (at your option) any later
|
|
|
|
|
version.
|
|
|
|
|
|
|
|
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
|
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
|
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
|
|
|
for more details.
|
|
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
|
along with GCC; see the file COPYING3. If not see
|
|
|
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
|
|
|
|
|
|
/* This file handles shrink-wrapping related optimizations. */
|
|
|
|
|
|
|
|
|
|
#include "config.h"
|
|
|
|
|
#include "system.h"
|
|
|
|
|
#include "coretypes.h"
|
2017-04-10 11:32:00 +00:00
|
|
|
|
#include "backend.h"
|
|
|
|
|
#include "target.h"
|
2015-08-28 15:33:40 +00:00
|
|
|
|
#include "rtl.h"
|
2017-04-10 11:32:00 +00:00
|
|
|
|
#include "tree.h"
|
|
|
|
|
#include "cfghooks.h"
|
|
|
|
|
#include "df.h"
|
2017-10-07 00:16:47 +00:00
|
|
|
|
#include "memmodel.h"
|
2017-04-10 11:32:00 +00:00
|
|
|
|
#include "tm_p.h"
|
2015-08-28 15:33:40 +00:00
|
|
|
|
#include "regs.h"
|
2017-10-07 00:16:47 +00:00
|
|
|
|
#include "insn-config.h"
|
2017-04-10 11:32:00 +00:00
|
|
|
|
#include "emit-rtl.h"
|
2015-08-28 15:33:40 +00:00
|
|
|
|
#include "output.h"
|
|
|
|
|
#include "tree-pass.h"
|
|
|
|
|
#include "cfgrtl.h"
|
2017-10-07 00:16:47 +00:00
|
|
|
|
#include "cfgbuild.h"
|
2015-08-28 15:33:40 +00:00
|
|
|
|
#include "bb-reorder.h"
|
|
|
|
|
#include "shrink-wrap.h"
|
|
|
|
|
#include "regcprop.h"
|
|
|
|
|
#include "rtl-iter.h"
|
2017-04-10 11:32:00 +00:00
|
|
|
|
#include "valtrack.h"
|
2022-10-27 18:55:19 +00:00
|
|
|
|
#include "function-abi.h"
|
|
|
|
|
#include "print-rtl.h"
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
|
|
|
|
/* Return true if INSN requires the stack frame to be set up.
|
|
|
|
|
PROLOGUE_USED contains the hard registers used in the function
|
|
|
|
|
prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the
|
|
|
|
|
prologue to set up for the function. */
|
|
|
|
|
bool
|
|
|
|
|
requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
|
|
|
|
|
HARD_REG_SET set_up_by_prologue)
|
|
|
|
|
{
|
|
|
|
|
df_ref def, use;
|
|
|
|
|
HARD_REG_SET hardregs;
|
|
|
|
|
unsigned regno;
|
|
|
|
|
|
2022-10-27 18:55:19 +00:00
|
|
|
|
if (CALL_P (insn) && !FAKE_CALL_P (insn))
|
2015-08-28 15:33:40 +00:00
|
|
|
|
return !SIBLING_CALL_P (insn);
|
|
|
|
|
|
|
|
|
|
/* We need a frame to get the unique CFA expected by the unwinder. */
|
|
|
|
|
if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
CLEAR_HARD_REG_SET (hardregs);
|
|
|
|
|
FOR_EACH_INSN_DEF (def, insn)
|
|
|
|
|
{
|
|
|
|
|
rtx dreg = DF_REF_REG (def);
|
|
|
|
|
|
|
|
|
|
if (!REG_P (dreg))
|
|
|
|
|
continue;
|
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
add_to_hard_reg_set (&hardregs, GET_MODE (dreg), REGNO (dreg));
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
|
|
|
|
if (hard_reg_set_intersect_p (hardregs, prologue_used))
|
|
|
|
|
return true;
|
2022-10-27 18:55:19 +00:00
|
|
|
|
hardregs &= ~crtl->abi->full_reg_clobbers ();
|
2015-08-28 15:33:40 +00:00
|
|
|
|
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
|
|
|
|
|
if (TEST_HARD_REG_BIT (hardregs, regno)
|
|
|
|
|
&& df_regs_ever_live_p (regno))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
FOR_EACH_INSN_USE (use, insn)
|
|
|
|
|
{
|
|
|
|
|
rtx reg = DF_REF_REG (use);
|
|
|
|
|
|
|
|
|
|
if (!REG_P (reg))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
add_to_hard_reg_set (&hardregs, GET_MODE (reg),
|
|
|
|
|
REGNO (reg));
|
|
|
|
|
}
|
|
|
|
|
if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* See whether there has a single live edge from BB, which dest uses
|
|
|
|
|
[REGNO, END_REGNO). Return the live edge if its dest bb has
|
|
|
|
|
one or two predecessors. Otherwise return NULL. */
|
|
|
|
|
|
|
|
|
|
static edge
|
|
|
|
|
live_edge_for_reg (basic_block bb, int regno, int end_regno)
|
|
|
|
|
{
|
|
|
|
|
edge e, live_edge;
|
|
|
|
|
edge_iterator ei;
|
|
|
|
|
bitmap live;
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
live_edge = NULL;
|
|
|
|
|
FOR_EACH_EDGE (e, ei, bb->succs)
|
|
|
|
|
{
|
|
|
|
|
live = df_get_live_in (e->dest);
|
|
|
|
|
for (i = regno; i < end_regno; i++)
|
|
|
|
|
if (REGNO_REG_SET_P (live, i))
|
|
|
|
|
{
|
|
|
|
|
if (live_edge && live_edge != e)
|
|
|
|
|
return NULL;
|
|
|
|
|
live_edge = e;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* We can sometimes encounter dead code. Don't try to move it
|
|
|
|
|
into the exit block. */
|
|
|
|
|
if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
/* Reject targets of abnormal edges. This is needed for correctness
|
|
|
|
|
on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
|
|
|
|
|
exception edges even though it is generally treated as call-saved
|
|
|
|
|
for the majority of the compilation. Moving across abnormal edges
|
|
|
|
|
isn't going to be interesting for shrink-wrap usage anyway. */
|
|
|
|
|
if (live_edge->flags & EDGE_ABNORMAL)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
/* When live_edge->dest->preds == 2, we can create a new block on
|
|
|
|
|
the edge to make it meet the requirement. */
|
|
|
|
|
if (EDGE_COUNT (live_edge->dest->preds) > 2)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
return live_edge;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Try to move INSN from BB to a successor. Return true on success.
|
|
|
|
|
USES and DEFS are the set of registers that are used and defined
|
|
|
|
|
after INSN in BB. SPLIT_P indicates whether a live edge from BB
|
|
|
|
|
is splitted or not. */
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
|
2022-10-27 18:55:19 +00:00
|
|
|
|
const_hard_reg_set uses,
|
|
|
|
|
const_hard_reg_set defs,
|
2017-04-10 11:32:00 +00:00
|
|
|
|
bool *split_p,
|
|
|
|
|
struct dead_debug_local *debug)
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
|
|
|
|
rtx set, src, dest;
|
2018-12-28 15:30:48 +00:00
|
|
|
|
bitmap live_out, live_in, bb_uses = NULL, bb_defs = NULL;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
unsigned int i, dregno, end_dregno;
|
|
|
|
|
unsigned int sregno = FIRST_PSEUDO_REGISTER;
|
|
|
|
|
unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
|
|
|
|
|
basic_block next_block;
|
|
|
|
|
edge live_edge;
|
2017-04-10 11:32:00 +00:00
|
|
|
|
rtx_insn *dinsn;
|
|
|
|
|
df_ref def;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
|
|
|
|
/* Look for a simple register assignment. We don't use single_set here
|
|
|
|
|
because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
|
|
|
|
|
destinations. */
|
|
|
|
|
if (!INSN_P (insn))
|
|
|
|
|
return false;
|
|
|
|
|
set = PATTERN (insn);
|
|
|
|
|
if (GET_CODE (set) != SET)
|
|
|
|
|
return false;
|
|
|
|
|
src = SET_SRC (set);
|
|
|
|
|
dest = SET_DEST (set);
|
|
|
|
|
|
|
|
|
|
/* For the destination, we want only a register. Also disallow STACK
|
|
|
|
|
or FRAME related adjustments. They are likely part of the prologue,
|
|
|
|
|
so keep them in the entry block. */
|
|
|
|
|
if (!REG_P (dest)
|
|
|
|
|
|| dest == stack_pointer_rtx
|
|
|
|
|
|| dest == frame_pointer_rtx
|
|
|
|
|
|| dest == hard_frame_pointer_rtx)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/* For the source, we want one of:
|
|
|
|
|
(1) A (non-overlapping) register
|
|
|
|
|
(2) A constant,
|
|
|
|
|
(3) An expression involving no more than one register.
|
|
|
|
|
|
|
|
|
|
That last point comes from the code following, which was originally
|
|
|
|
|
written to handle only register move operations, and still only handles
|
|
|
|
|
a single source register when checking for overlaps. Happily, the
|
|
|
|
|
same checks can be applied to expressions like (plus reg const). */
|
|
|
|
|
|
|
|
|
|
if (CONSTANT_P (src))
|
|
|
|
|
;
|
|
|
|
|
else if (!REG_P (src))
|
|
|
|
|
{
|
|
|
|
|
rtx src_inner = NULL_RTX;
|
|
|
|
|
|
|
|
|
|
if (can_throw_internal (insn))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
subrtx_var_iterator::array_type array;
|
|
|
|
|
FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
|
|
|
|
|
{
|
|
|
|
|
rtx x = *iter;
|
|
|
|
|
switch (GET_RTX_CLASS (GET_CODE (x)))
|
|
|
|
|
{
|
|
|
|
|
case RTX_CONST_OBJ:
|
|
|
|
|
case RTX_COMPARE:
|
|
|
|
|
case RTX_COMM_COMPARE:
|
|
|
|
|
case RTX_BIN_ARITH:
|
|
|
|
|
case RTX_COMM_ARITH:
|
|
|
|
|
case RTX_UNARY:
|
|
|
|
|
case RTX_TERNARY:
|
|
|
|
|
/* Constant or expression. Continue. */
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case RTX_OBJ:
|
|
|
|
|
case RTX_EXTRA:
|
|
|
|
|
switch (GET_CODE (x))
|
|
|
|
|
{
|
|
|
|
|
case UNSPEC:
|
|
|
|
|
case SUBREG:
|
|
|
|
|
case STRICT_LOW_PART:
|
|
|
|
|
case PC:
|
|
|
|
|
case LO_SUM:
|
|
|
|
|
/* Ok. Continue. */
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case REG:
|
|
|
|
|
/* Fail if we see a second inner register. */
|
|
|
|
|
if (src_inner != NULL)
|
|
|
|
|
return false;
|
|
|
|
|
src_inner = x;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (src_inner != NULL)
|
|
|
|
|
src = src_inner;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Make sure that the source register isn't defined later in BB. */
|
|
|
|
|
if (REG_P (src))
|
|
|
|
|
{
|
|
|
|
|
sregno = REGNO (src);
|
|
|
|
|
end_sregno = END_REGNO (src);
|
|
|
|
|
if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Make sure that the destination register isn't referenced later in BB. */
|
|
|
|
|
dregno = REGNO (dest);
|
|
|
|
|
end_dregno = END_REGNO (dest);
|
|
|
|
|
if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
|
|
|
|
|
|| overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/* See whether there is a successor block to which we could move INSN. */
|
|
|
|
|
live_edge = live_edge_for_reg (bb, dregno, end_dregno);
|
|
|
|
|
if (!live_edge)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
next_block = live_edge->dest;
|
|
|
|
|
/* Create a new basic block on the edge. */
|
|
|
|
|
if (EDGE_COUNT (next_block->preds) == 2)
|
|
|
|
|
{
|
|
|
|
|
/* split_edge for a block with only one successor is meaningless. */
|
|
|
|
|
if (EDGE_COUNT (bb->succs) == 1)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */
|
|
|
|
|
if (!df_live)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
basic_block old_dest = live_edge->dest;
|
|
|
|
|
next_block = split_edge (live_edge);
|
|
|
|
|
|
|
|
|
|
/* We create a new basic block. Call df_grow_bb_info to make sure
|
|
|
|
|
all data structures are allocated. */
|
|
|
|
|
df_grow_bb_info (df_live);
|
|
|
|
|
|
|
|
|
|
bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
|
|
|
|
|
df_get_live_in (old_dest));
|
|
|
|
|
df_set_bb_dirty (next_block);
|
|
|
|
|
|
|
|
|
|
/* We should not split more than once for a function. */
|
|
|
|
|
if (*split_p)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
*split_p = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* At this point we are committed to moving INSN, but let's try to
|
|
|
|
|
move it as far as we can. */
|
|
|
|
|
do
|
|
|
|
|
{
|
2018-12-28 15:30:48 +00:00
|
|
|
|
if (MAY_HAVE_DEBUG_BIND_INSNS)
|
2017-04-10 11:32:00 +00:00
|
|
|
|
{
|
|
|
|
|
FOR_BB_INSNS_REVERSE (bb, dinsn)
|
2018-12-28 15:30:48 +00:00
|
|
|
|
if (DEBUG_BIND_INSN_P (dinsn))
|
2017-04-10 11:32:00 +00:00
|
|
|
|
{
|
|
|
|
|
df_ref use;
|
|
|
|
|
FOR_EACH_INSN_USE (use, dinsn)
|
|
|
|
|
if (refers_to_regno_p (dregno, end_dregno,
|
|
|
|
|
DF_REF_REG (use), (rtx *) NULL))
|
|
|
|
|
dead_debug_add (debug, use, DF_REF_REGNO (use));
|
|
|
|
|
}
|
|
|
|
|
else if (dinsn == insn)
|
|
|
|
|
break;
|
|
|
|
|
}
|
2015-08-28 15:33:40 +00:00
|
|
|
|
live_out = df_get_live_out (bb);
|
|
|
|
|
live_in = df_get_live_in (next_block);
|
|
|
|
|
bb = next_block;
|
|
|
|
|
|
|
|
|
|
/* Check whether BB uses DEST or clobbers DEST. We need to add
|
|
|
|
|
INSN to BB if so. Either way, DEST is no longer live on entry,
|
|
|
|
|
except for any part that overlaps SRC (next loop). */
|
2018-12-28 15:30:48 +00:00
|
|
|
|
if (!*split_p)
|
|
|
|
|
{
|
|
|
|
|
bb_uses = &DF_LR_BB_INFO (bb)->use;
|
|
|
|
|
bb_defs = &DF_LR_BB_INFO (bb)->def;
|
|
|
|
|
}
|
2015-08-28 15:33:40 +00:00
|
|
|
|
if (df_live)
|
|
|
|
|
{
|
|
|
|
|
for (i = dregno; i < end_dregno; i++)
|
|
|
|
|
{
|
|
|
|
|
if (*split_p
|
|
|
|
|
|| REGNO_REG_SET_P (bb_uses, i)
|
|
|
|
|
|| REGNO_REG_SET_P (bb_defs, i)
|
|
|
|
|
|| REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
|
|
|
|
|
next_block = NULL;
|
|
|
|
|
CLEAR_REGNO_REG_SET (live_out, i);
|
|
|
|
|
CLEAR_REGNO_REG_SET (live_in, i);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Check whether BB clobbers SRC. We need to add INSN to BB if so.
|
|
|
|
|
Either way, SRC is now live on entry. */
|
|
|
|
|
for (i = sregno; i < end_sregno; i++)
|
|
|
|
|
{
|
|
|
|
|
if (*split_p
|
|
|
|
|
|| REGNO_REG_SET_P (bb_defs, i)
|
|
|
|
|
|| REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
|
|
|
|
|
next_block = NULL;
|
|
|
|
|
SET_REGNO_REG_SET (live_out, i);
|
|
|
|
|
SET_REGNO_REG_SET (live_in, i);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
/* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
|
|
|
|
|
DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e.
|
|
|
|
|
at -O1, just give up searching NEXT_BLOCK. */
|
|
|
|
|
next_block = NULL;
|
|
|
|
|
for (i = dregno; i < end_dregno; i++)
|
|
|
|
|
{
|
|
|
|
|
CLEAR_REGNO_REG_SET (live_out, i);
|
|
|
|
|
CLEAR_REGNO_REG_SET (live_in, i);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (i = sregno; i < end_sregno; i++)
|
|
|
|
|
{
|
|
|
|
|
SET_REGNO_REG_SET (live_out, i);
|
|
|
|
|
SET_REGNO_REG_SET (live_in, i);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If we don't need to add the move to BB, look for a single
|
|
|
|
|
successor block. */
|
|
|
|
|
if (next_block)
|
|
|
|
|
{
|
|
|
|
|
live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
|
|
|
|
|
if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
|
|
|
|
|
break;
|
|
|
|
|
next_block = live_edge->dest;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
while (next_block);
|
|
|
|
|
|
|
|
|
|
/* For the new created basic block, there is no dataflow info at all.
|
|
|
|
|
So skip the following dataflow update and check. */
|
|
|
|
|
if (!(*split_p))
|
|
|
|
|
{
|
|
|
|
|
/* BB now defines DEST. It only uses the parts of DEST that overlap SRC
|
|
|
|
|
(next loop). */
|
|
|
|
|
for (i = dregno; i < end_dregno; i++)
|
|
|
|
|
{
|
|
|
|
|
CLEAR_REGNO_REG_SET (bb_uses, i);
|
|
|
|
|
SET_REGNO_REG_SET (bb_defs, i);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* BB now uses SRC. */
|
|
|
|
|
for (i = sregno; i < end_sregno; i++)
|
|
|
|
|
SET_REGNO_REG_SET (bb_uses, i);
|
|
|
|
|
}
|
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
/* Insert debug temps for dead REGs used in subsequent debug insns. */
|
|
|
|
|
if (debug->used && !bitmap_empty_p (debug->used))
|
|
|
|
|
FOR_EACH_INSN_DEF (def, insn)
|
|
|
|
|
dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn,
|
|
|
|
|
DEBUG_TEMP_BEFORE_WITH_VALUE);
|
|
|
|
|
|
2019-06-02 15:48:37 +00:00
|
|
|
|
rtx_insn *insn_copy = emit_insn_after (PATTERN (insn), bb_note (bb));
|
|
|
|
|
/* Update the LABEL_NUSES count on any referenced labels. The ideal
|
|
|
|
|
solution here would be to actually move the instruction instead
|
|
|
|
|
of copying/deleting it as this loses some notations on the
|
|
|
|
|
insn. */
|
|
|
|
|
mark_jump_label (PATTERN (insn), insn_copy, 0);
|
2015-08-28 15:33:40 +00:00
|
|
|
|
delete_insn (insn);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Look for register copies in the first block of the function, and move
|
|
|
|
|
them down into successor blocks if the register is used only on one
|
|
|
|
|
path. This exposes more opportunities for shrink-wrapping. These
|
|
|
|
|
kinds of sets often occur when incoming argument registers are moved
|
|
|
|
|
to call-saved registers because their values are live across one or
|
|
|
|
|
more calls during the function. */
|
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
static void
|
2015-08-28 15:33:40 +00:00
|
|
|
|
prepare_shrink_wrap (basic_block entry_block)
|
|
|
|
|
{
|
|
|
|
|
rtx_insn *insn, *curr;
|
|
|
|
|
rtx x;
|
|
|
|
|
HARD_REG_SET uses, defs;
|
|
|
|
|
df_ref def, use;
|
|
|
|
|
bool split_p = false;
|
2017-04-10 11:32:00 +00:00
|
|
|
|
unsigned int i;
|
|
|
|
|
struct dead_debug_local debug;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
|
|
|
|
if (JUMP_P (BB_END (entry_block)))
|
|
|
|
|
{
|
|
|
|
|
/* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
|
|
|
|
|
to sink the copies from parameter to callee saved register out of
|
|
|
|
|
entry block. copyprop_hardreg_forward_bb_without_debug_insn is called
|
|
|
|
|
to release some dependences. */
|
|
|
|
|
copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
|
|
|
|
|
}
|
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
dead_debug_local_init (&debug, NULL, NULL);
|
2015-08-28 15:33:40 +00:00
|
|
|
|
CLEAR_HARD_REG_SET (uses);
|
|
|
|
|
CLEAR_HARD_REG_SET (defs);
|
2017-04-10 11:32:00 +00:00
|
|
|
|
|
2015-08-28 15:33:40 +00:00
|
|
|
|
FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
|
|
|
|
|
if (NONDEBUG_INSN_P (insn)
|
|
|
|
|
&& !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
|
2017-04-10 11:32:00 +00:00
|
|
|
|
&split_p, &debug))
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
|
|
|
|
/* Add all defined registers to DEFs. */
|
|
|
|
|
FOR_EACH_INSN_DEF (def, insn)
|
|
|
|
|
{
|
|
|
|
|
x = DF_REF_REG (def);
|
|
|
|
|
if (REG_P (x) && HARD_REGISTER_P (x))
|
2017-04-10 11:32:00 +00:00
|
|
|
|
for (i = REGNO (x); i < END_REGNO (x); i++)
|
|
|
|
|
SET_HARD_REG_BIT (defs, i);
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Add all used registers to USESs. */
|
|
|
|
|
FOR_EACH_INSN_USE (use, insn)
|
|
|
|
|
{
|
|
|
|
|
x = DF_REF_REG (use);
|
|
|
|
|
if (REG_P (x) && HARD_REGISTER_P (x))
|
2017-04-10 11:32:00 +00:00
|
|
|
|
for (i = REGNO (x); i < END_REGNO (x); i++)
|
|
|
|
|
SET_HARD_REG_BIT (uses, i);
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-10 11:32:00 +00:00
|
|
|
|
|
|
|
|
|
dead_debug_local_finish (&debug, NULL);
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
2019-06-02 15:48:37 +00:00
|
|
|
|
/* Return whether basic block PRO can get the prologue. It cannot if it
|
2017-04-10 11:32:00 +00:00
|
|
|
|
has incoming complex edges that need a prologue inserted (we make a new
|
|
|
|
|
block for the prologue, so those edges would need to be redirected, which
|
2019-06-02 15:48:37 +00:00
|
|
|
|
does not work). It also cannot if there exist registers live on entry
|
2017-04-10 11:32:00 +00:00
|
|
|
|
to PRO that are clobbered by the prologue. */
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
can_get_prologue (basic_block pro, HARD_REG_SET prologue_clobbered)
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
|
|
|
|
edge e;
|
2017-04-10 11:32:00 +00:00
|
|
|
|
edge_iterator ei;
|
|
|
|
|
FOR_EACH_EDGE (e, ei, pro->preds)
|
2022-10-27 18:55:19 +00:00
|
|
|
|
if (e->flags & EDGE_COMPLEX
|
2017-04-10 11:32:00 +00:00
|
|
|
|
&& !dominated_by_p (CDI_DOMINATORS, e->src, pro))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
HARD_REG_SET live;
|
|
|
|
|
REG_SET_TO_HARD_REG_SET (live, df_get_live_in (pro));
|
|
|
|
|
if (hard_reg_set_intersect_p (live, prologue_clobbered))
|
|
|
|
|
return false;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
return true;
|
|
|
|
|
}
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
/* Return whether we can duplicate basic block BB for shrink wrapping. We
|
|
|
|
|
cannot if the block cannot be duplicated at all, or if any of its incoming
|
|
|
|
|
edges are complex and come from a block that does not require a prologue
|
|
|
|
|
(we cannot redirect such edges), or if the block is too big to copy.
|
|
|
|
|
PRO is the basic block before which we would put the prologue, MAX_SIZE is
|
|
|
|
|
the maximum size block we allow to be copied. */
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
can_dup_for_shrink_wrapping (basic_block bb, basic_block pro, unsigned max_size)
|
|
|
|
|
{
|
|
|
|
|
if (!can_duplicate_block_p (bb))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
edge e;
|
|
|
|
|
edge_iterator ei;
|
|
|
|
|
FOR_EACH_EDGE (e, ei, bb->preds)
|
|
|
|
|
if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
|
|
|
|
|
&& !dominated_by_p (CDI_DOMINATORS, e->src, pro))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
unsigned size = 0;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
rtx_insn *insn;
|
|
|
|
|
FOR_BB_INSNS (bb, insn)
|
|
|
|
|
if (NONDEBUG_INSN_P (insn))
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-04-10 11:32:00 +00:00
|
|
|
|
size += get_attr_min_length (insn);
|
|
|
|
|
if (size > max_size)
|
|
|
|
|
return false;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
2017-04-10 11:32:00 +00:00
|
|
|
|
|
|
|
|
|
return true;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
/* Do whatever needs to be done for exits that run without prologue.
|
|
|
|
|
Sibcalls need nothing done. Normal exits get a simple_return inserted. */
|
2017-04-10 11:32:00 +00:00
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
static void
|
|
|
|
|
handle_simple_exit (edge e)
|
2017-04-10 11:32:00 +00:00
|
|
|
|
{
|
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
if (e->flags & EDGE_SIBCALL)
|
|
|
|
|
{
|
2022-10-27 18:55:19 +00:00
|
|
|
|
/* Tell function.cc to take no further action on this edge. */
|
2017-10-07 00:16:47 +00:00
|
|
|
|
e->flags |= EDGE_IGNORE;
|
2017-04-10 11:32:00 +00:00
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
e->flags &= ~EDGE_FALLTHRU;
|
|
|
|
|
emit_barrier_after_bb (e->src);
|
|
|
|
|
return;
|
|
|
|
|
}
|
2017-04-10 11:32:00 +00:00
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
/* If the basic block the edge comes from has multiple successors,
|
|
|
|
|
split the edge. */
|
|
|
|
|
if (EDGE_COUNT (e->src->succs) > 1)
|
|
|
|
|
{
|
|
|
|
|
basic_block old_bb = e->src;
|
|
|
|
|
rtx_insn *end = BB_END (old_bb);
|
|
|
|
|
rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end);
|
|
|
|
|
basic_block new_bb = create_basic_block (note, note, old_bb);
|
|
|
|
|
BB_COPY_PARTITION (new_bb, old_bb);
|
|
|
|
|
BB_END (old_bb) = end;
|
|
|
|
|
|
|
|
|
|
redirect_edge_succ (e, new_bb);
|
2018-12-28 15:30:48 +00:00
|
|
|
|
new_bb->count = e->count ();
|
2017-10-07 00:16:47 +00:00
|
|
|
|
e->flags |= EDGE_FALLTHRU;
|
|
|
|
|
|
2018-12-28 15:30:48 +00:00
|
|
|
|
e = make_single_succ_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
|
2017-10-07 00:16:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
e->flags &= ~EDGE_FALLTHRU;
|
|
|
|
|
rtx_jump_insn *ret = emit_jump_insn_after (targetm.gen_simple_return (),
|
|
|
|
|
BB_END (e->src));
|
|
|
|
|
JUMP_LABEL (ret) = simple_return_rtx;
|
|
|
|
|
emit_barrier_after_bb (e->src);
|
|
|
|
|
|
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "Made simple_return with UID %d in bb %d\n",
|
|
|
|
|
INSN_UID (ret), e->src->index);
|
2017-04-10 11:32:00 +00:00
|
|
|
|
}
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
|
|
|
|
/* Try to perform a kind of shrink-wrapping, making sure the
|
|
|
|
|
prologue/epilogue is emitted only around those parts of the
|
2017-04-10 11:32:00 +00:00
|
|
|
|
function that require it.
|
|
|
|
|
|
|
|
|
|
There will be exactly one prologue, and it will be executed either
|
|
|
|
|
zero or one time, on any path. Depending on where the prologue is
|
|
|
|
|
placed, some of the basic blocks can be reached via both paths with
|
|
|
|
|
and without a prologue. Such blocks will be duplicated here, and the
|
|
|
|
|
edges changed to match.
|
|
|
|
|
|
|
|
|
|
Paths that go to the exit without going through the prologue will use
|
|
|
|
|
a simple_return instead of the epilogue. We maximize the number of
|
|
|
|
|
those, making sure to only duplicate blocks that can be duplicated.
|
|
|
|
|
If the prologue can then still be placed in multiple locations, we
|
|
|
|
|
place it as early as possible.
|
|
|
|
|
|
|
|
|
|
An example, where we duplicate blocks with control flow (legend:
|
|
|
|
|
_B_egin, _R_eturn and _S_imple_return; edges without arrowhead should
|
|
|
|
|
be taken to point down or to the right, to simplify the diagram; here,
|
|
|
|
|
block 3 needs a prologue, the rest does not):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
B B
|
|
|
|
|
| |
|
|
|
|
|
2 2
|
|
|
|
|
|\ |\
|
|
|
|
|
| 3 becomes | 3
|
|
|
|
|
|/ | \
|
|
|
|
|
4 7 4
|
|
|
|
|
|\ |\ |\
|
|
|
|
|
| 5 | 8 | 5
|
|
|
|
|
|/ |/ |/
|
|
|
|
|
6 9 6
|
|
|
|
|
| | |
|
|
|
|
|
R S R
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(bb 4 is duplicated to 7, and so on; the prologue is inserted on the
|
|
|
|
|
edge 2->3).
|
|
|
|
|
|
|
|
|
|
Another example, where part of a loop is duplicated (again, bb 3 is
|
|
|
|
|
the only block that needs a prologue):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
B 3<-- B ->3<--
|
|
|
|
|
| | | | | | |
|
|
|
|
|
| v | becomes | | v |
|
|
|
|
|
2---4--- 2---5-- 4---
|
|
|
|
|
| | |
|
|
|
|
|
R S R
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(bb 4 is duplicated to 5; the prologue is inserted on the edge 5->3).
|
|
|
|
|
|
|
|
|
|
ENTRY_EDGE is the edge where the prologue will be placed, possibly
|
2017-10-07 00:16:47 +00:00
|
|
|
|
changed by this function. PROLOGUE_SEQ is the prologue we will insert. */
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
|
|
|
|
void
|
2017-10-07 00:16:47 +00:00
|
|
|
|
try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq)
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-04-10 11:32:00 +00:00
|
|
|
|
/* If we cannot shrink-wrap, are told not to shrink-wrap, or it makes
|
|
|
|
|
no sense to shrink-wrap: then do not shrink-wrap! */
|
|
|
|
|
|
|
|
|
|
if (!SHRINK_WRAPPING_ENABLED)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (crtl->profile && !targetm.profile_before_prologue ())
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (crtl->calls_eh_return)
|
|
|
|
|
return;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
bool empty_prologue = true;
|
|
|
|
|
for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
|
|
|
|
|
if (!(NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END))
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-04-10 11:32:00 +00:00
|
|
|
|
empty_prologue = false;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
break;
|
|
|
|
|
}
|
2017-04-10 11:32:00 +00:00
|
|
|
|
if (empty_prologue)
|
|
|
|
|
return;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
/* Move some code down to expose more shrink-wrapping opportunities. */
|
|
|
|
|
|
|
|
|
|
basic_block entry = (*entry_edge)->dest;
|
|
|
|
|
prepare_shrink_wrap (entry);
|
|
|
|
|
|
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
|
|
|
|
|
|
|
|
|
|
/* Compute the registers set and used in the prologue. */
|
|
|
|
|
|
|
|
|
|
HARD_REG_SET prologue_clobbered, prologue_used;
|
|
|
|
|
CLEAR_HARD_REG_SET (prologue_clobbered);
|
|
|
|
|
CLEAR_HARD_REG_SET (prologue_used);
|
|
|
|
|
for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
|
|
|
|
|
if (NONDEBUG_INSN_P (insn))
|
|
|
|
|
{
|
|
|
|
|
HARD_REG_SET this_used;
|
|
|
|
|
CLEAR_HARD_REG_SET (this_used);
|
|
|
|
|
note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used);
|
2022-10-27 18:55:19 +00:00
|
|
|
|
this_used &= ~prologue_clobbered;
|
|
|
|
|
prologue_used |= this_used;
|
|
|
|
|
note_stores (insn, record_hard_reg_sets, &prologue_clobbered);
|
2017-04-10 11:32:00 +00:00
|
|
|
|
}
|
|
|
|
|
CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
|
|
|
|
|
if (frame_pointer_needed)
|
|
|
|
|
CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
|
|
|
|
|
|
|
|
|
|
/* Find out what registers are set up by the prologue; any use of these
|
|
|
|
|
cannot happen before the prologue. */
|
|
|
|
|
|
|
|
|
|
struct hard_reg_set_container set_up_by_prologue;
|
|
|
|
|
CLEAR_HARD_REG_SET (set_up_by_prologue.set);
|
|
|
|
|
add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, STACK_POINTER_REGNUM);
|
|
|
|
|
add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
|
|
|
|
|
if (frame_pointer_needed)
|
|
|
|
|
add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
|
|
|
|
|
HARD_FRAME_POINTER_REGNUM);
|
|
|
|
|
if (pic_offset_table_rtx
|
|
|
|
|
&& (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
|
|
|
|
|
add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
|
|
|
|
|
PIC_OFFSET_TABLE_REGNUM);
|
|
|
|
|
if (crtl->drap_reg)
|
|
|
|
|
add_to_hard_reg_set (&set_up_by_prologue.set,
|
|
|
|
|
GET_MODE (crtl->drap_reg),
|
|
|
|
|
REGNO (crtl->drap_reg));
|
|
|
|
|
if (targetm.set_up_by_prologue)
|
|
|
|
|
targetm.set_up_by_prologue (&set_up_by_prologue);
|
|
|
|
|
|
|
|
|
|
/* We will insert the prologue before the basic block PRO. PRO should
|
|
|
|
|
dominate all basic blocks that need the prologue to be executed
|
|
|
|
|
before them. First, make PRO the "tightest wrap" possible. */
|
|
|
|
|
|
|
|
|
|
calculate_dominance_info (CDI_DOMINATORS);
|
|
|
|
|
|
|
|
|
|
basic_block pro = 0;
|
|
|
|
|
|
|
|
|
|
basic_block bb;
|
|
|
|
|
edge e;
|
|
|
|
|
edge_iterator ei;
|
|
|
|
|
FOR_EACH_BB_FN (bb, cfun)
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-04-10 11:32:00 +00:00
|
|
|
|
rtx_insn *insn;
|
|
|
|
|
FOR_BB_INSNS (bb, insn)
|
|
|
|
|
if (NONDEBUG_INSN_P (insn)
|
|
|
|
|
&& requires_stack_frame_p (insn, prologue_used,
|
|
|
|
|
set_up_by_prologue.set))
|
|
|
|
|
{
|
|
|
|
|
if (dump_file)
|
2022-10-27 18:55:19 +00:00
|
|
|
|
{
|
|
|
|
|
fprintf (dump_file, "Block %d needs prologue due to insn %d:\n",
|
|
|
|
|
bb->index, INSN_UID (insn));
|
|
|
|
|
print_rtl_single (dump_file, insn);
|
|
|
|
|
}
|
2017-04-10 11:32:00 +00:00
|
|
|
|
pro = nearest_common_dominator (CDI_DOMINATORS, pro, bb);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If nothing needs a prologue, just put it at the start. This really
|
|
|
|
|
shouldn't happen, but we cannot fix it here. */
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
if (pro == 0)
|
|
|
|
|
{
|
2015-08-28 15:33:40 +00:00
|
|
|
|
if (dump_file)
|
2017-04-10 11:32:00 +00:00
|
|
|
|
fprintf(dump_file, "Nothing needs a prologue, but it isn't empty; "
|
|
|
|
|
"putting it at the start.\n");
|
|
|
|
|
pro = entry;
|
|
|
|
|
}
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "After wrapping required blocks, PRO is now %d\n",
|
|
|
|
|
pro->index);
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
/* Now see if we can put the prologue at the start of PRO. Putting it
|
|
|
|
|
there might require duplicating a block that cannot be duplicated,
|
|
|
|
|
or in some cases we cannot insert the prologue there at all. If PRO
|
|
|
|
|
wont't do, try again with the immediate dominator of PRO, and so on.
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
The blocks that need duplicating are those reachable from PRO but
|
|
|
|
|
not dominated by it. We keep in BB_WITH a bitmap of the blocks
|
|
|
|
|
reachable from PRO that we already found, and in VEC a stack of
|
|
|
|
|
those we still need to consider (to find successors). */
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2018-12-28 15:30:48 +00:00
|
|
|
|
auto_bitmap bb_with;
|
2017-04-10 11:32:00 +00:00
|
|
|
|
bitmap_set_bit (bb_with, pro->index);
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
vec<basic_block> vec;
|
|
|
|
|
vec.create (n_basic_blocks_for_fn (cfun));
|
|
|
|
|
vec.quick_push (pro);
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
unsigned max_grow_size = get_uncond_jump_length ();
|
2022-10-27 18:55:19 +00:00
|
|
|
|
max_grow_size *= param_max_grow_copy_bb_insns;
|
|
|
|
|
|
|
|
|
|
basic_block checked_pro = NULL;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2022-10-27 18:55:19 +00:00
|
|
|
|
while (pro != entry)
|
2017-04-10 11:32:00 +00:00
|
|
|
|
{
|
2022-10-27 18:55:19 +00:00
|
|
|
|
if (pro != checked_pro)
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2022-10-27 18:55:19 +00:00
|
|
|
|
while (pro != entry && !can_get_prologue (pro, prologue_clobbered))
|
|
|
|
|
{
|
|
|
|
|
pro = get_immediate_dominator (CDI_DOMINATORS, pro);
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2022-10-27 18:55:19 +00:00
|
|
|
|
if (bitmap_set_bit (bb_with, pro->index))
|
|
|
|
|
vec.quick_push (pro);
|
|
|
|
|
}
|
|
|
|
|
checked_pro = pro;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
2022-10-27 18:55:19 +00:00
|
|
|
|
if (vec.is_empty ())
|
|
|
|
|
break;
|
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
basic_block bb = vec.pop ();
|
|
|
|
|
if (!can_dup_for_shrink_wrapping (bb, pro, max_grow_size))
|
|
|
|
|
while (!dominated_by_p (CDI_DOMINATORS, bb, pro))
|
|
|
|
|
{
|
|
|
|
|
gcc_assert (pro != entry);
|
|
|
|
|
|
|
|
|
|
pro = get_immediate_dominator (CDI_DOMINATORS, pro);
|
|
|
|
|
|
|
|
|
|
if (bitmap_set_bit (bb_with, pro->index))
|
|
|
|
|
vec.quick_push (pro);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FOR_EACH_EDGE (e, ei, bb->succs)
|
|
|
|
|
if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
|
|
|
|
|
&& bitmap_set_bit (bb_with, e->dest->index))
|
|
|
|
|
vec.quick_push (e->dest);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "Avoiding non-duplicatable blocks, PRO is now %d\n",
|
|
|
|
|
pro->index);
|
|
|
|
|
|
|
|
|
|
/* If we can move PRO back without having to duplicate more blocks, do so.
|
|
|
|
|
We do this because putting the prologue earlier is better for scheduling.
|
|
|
|
|
|
|
|
|
|
We can move back to a block PRE if every path from PRE will eventually
|
|
|
|
|
need a prologue, that is, PRO is a post-dominator of PRE. PRE needs
|
|
|
|
|
to dominate every block reachable from itself. We keep in BB_TMP a
|
|
|
|
|
bitmap of the blocks reachable from PRE that we already found, and in
|
|
|
|
|
VEC a stack of those we still need to consider.
|
|
|
|
|
|
|
|
|
|
Any block reachable from PRE is also reachable from all predecessors
|
|
|
|
|
of PRE, so if we find we need to move PRE back further we can leave
|
|
|
|
|
everything not considered so far on the stack. Any block dominated
|
|
|
|
|
by PRE is also dominated by all other dominators of PRE, so anything
|
|
|
|
|
found good for some PRE does not need to be reconsidered later.
|
|
|
|
|
|
|
|
|
|
We don't need to update BB_WITH because none of the new blocks found
|
|
|
|
|
can jump to a block that does not need the prologue. */
|
|
|
|
|
|
|
|
|
|
if (pro != entry)
|
|
|
|
|
{
|
|
|
|
|
calculate_dominance_info (CDI_POST_DOMINATORS);
|
|
|
|
|
|
2018-12-28 15:30:48 +00:00
|
|
|
|
auto_bitmap bb_tmp;
|
2017-04-10 11:32:00 +00:00
|
|
|
|
bitmap_copy (bb_tmp, bb_with);
|
|
|
|
|
basic_block last_ok = pro;
|
|
|
|
|
vec.truncate (0);
|
|
|
|
|
|
|
|
|
|
while (pro != entry)
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-04-10 11:32:00 +00:00
|
|
|
|
basic_block pre = get_immediate_dominator (CDI_DOMINATORS, pro);
|
|
|
|
|
if (!dominated_by_p (CDI_POST_DOMINATORS, pre, pro))
|
|
|
|
|
break;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
if (bitmap_set_bit (bb_tmp, pre->index))
|
|
|
|
|
vec.quick_push (pre);
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
bool ok = true;
|
|
|
|
|
while (!vec.is_empty ())
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-04-10 11:32:00 +00:00
|
|
|
|
if (!dominated_by_p (CDI_DOMINATORS, vec.last (), pre))
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-04-10 11:32:00 +00:00
|
|
|
|
ok = false;
|
|
|
|
|
break;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
basic_block bb = vec.pop ();
|
|
|
|
|
FOR_EACH_EDGE (e, ei, bb->succs)
|
|
|
|
|
if (bitmap_set_bit (bb_tmp, e->dest->index))
|
|
|
|
|
vec.quick_push (e->dest);
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
2017-04-10 11:32:00 +00:00
|
|
|
|
|
|
|
|
|
if (ok && can_get_prologue (pre, prologue_clobbered))
|
|
|
|
|
last_ok = pre;
|
|
|
|
|
|
|
|
|
|
pro = pre;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
pro = last_ok;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-04-10 11:32:00 +00:00
|
|
|
|
free_dominance_info (CDI_POST_DOMINATORS);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
vec.release ();
|
|
|
|
|
|
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "Bumping back to anticipatable blocks, PRO is now %d\n",
|
|
|
|
|
pro->index);
|
|
|
|
|
|
|
|
|
|
if (pro == entry)
|
|
|
|
|
{
|
|
|
|
|
free_dominance_info (CDI_DOMINATORS);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Compute what fraction of the frequency and count of the blocks that run
|
|
|
|
|
both with and without prologue are for running with prologue. This gives
|
|
|
|
|
the correct answer for reducible flow graphs; for irreducible flow graphs
|
|
|
|
|
our profile is messed up beyond repair anyway. */
|
|
|
|
|
|
2018-12-28 15:30:48 +00:00
|
|
|
|
profile_count num = profile_count::zero ();
|
|
|
|
|
profile_count den = profile_count::zero ();
|
2017-04-10 11:32:00 +00:00
|
|
|
|
|
|
|
|
|
FOR_EACH_EDGE (e, ei, pro->preds)
|
|
|
|
|
if (!dominated_by_p (CDI_DOMINATORS, e->src, pro))
|
|
|
|
|
{
|
2018-12-28 15:30:48 +00:00
|
|
|
|
if (e->count ().initialized_p ())
|
|
|
|
|
num += e->count ();
|
|
|
|
|
if (e->src->count.initialized_p ())
|
|
|
|
|
den += e->src->count;
|
2017-04-10 11:32:00 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* All is okay, so do it. */
|
|
|
|
|
|
|
|
|
|
crtl->shrink_wrapped = true;
|
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "Performing shrink-wrapping.\n");
|
|
|
|
|
|
|
|
|
|
/* Copy the blocks that can run both with and without prologue. The
|
|
|
|
|
originals run with prologue, the copies without. Store a pointer to
|
|
|
|
|
the copy in the ->aux field of the original. */
|
|
|
|
|
|
|
|
|
|
FOR_EACH_BB_FN (bb, cfun)
|
|
|
|
|
if (bitmap_bit_p (bb_with, bb->index)
|
|
|
|
|
&& !dominated_by_p (CDI_DOMINATORS, bb, pro))
|
|
|
|
|
{
|
|
|
|
|
basic_block dup = duplicate_block (bb, 0, 0);
|
|
|
|
|
|
|
|
|
|
bb->aux = dup;
|
|
|
|
|
|
|
|
|
|
if (JUMP_P (BB_END (dup)) && !any_condjump_p (BB_END (dup)))
|
|
|
|
|
emit_barrier_after_bb (dup);
|
|
|
|
|
|
|
|
|
|
if (EDGE_COUNT (dup->succs) == 0)
|
|
|
|
|
emit_barrier_after_bb (dup);
|
|
|
|
|
|
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index);
|
2018-12-28 15:30:48 +00:00
|
|
|
|
|
|
|
|
|
if (num == profile_count::zero () || den.nonzero_p ())
|
|
|
|
|
bb->count = bb->count.apply_scale (num, den);
|
2017-04-10 11:32:00 +00:00
|
|
|
|
dup->count -= bb->count;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Now change the edges to point to the copies, where appropriate. */
|
|
|
|
|
|
|
|
|
|
FOR_EACH_BB_FN (bb, cfun)
|
|
|
|
|
if (!dominated_by_p (CDI_DOMINATORS, bb, pro))
|
|
|
|
|
{
|
|
|
|
|
basic_block src = bb;
|
|
|
|
|
if (bitmap_bit_p (bb_with, bb->index))
|
|
|
|
|
src = (basic_block) bb->aux;
|
|
|
|
|
|
|
|
|
|
FOR_EACH_EDGE (e, ei, src->succs)
|
|
|
|
|
{
|
|
|
|
|
if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (bitmap_bit_p (bb_with, e->dest->index)
|
|
|
|
|
&& !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-04-10 11:32:00 +00:00
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "Redirecting edge %d->%d to %d\n",
|
|
|
|
|
e->src->index, e->dest->index,
|
|
|
|
|
((basic_block) e->dest->aux)->index);
|
|
|
|
|
redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
2017-04-10 11:32:00 +00:00
|
|
|
|
else if (e->flags & EDGE_FALLTHRU
|
|
|
|
|
&& bitmap_bit_p (bb_with, bb->index))
|
|
|
|
|
force_nonfallthru (e);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Also redirect the function entry edge if necessary. */
|
|
|
|
|
|
|
|
|
|
FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
|
|
|
|
|
if (bitmap_bit_p (bb_with, e->dest->index)
|
|
|
|
|
&& !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
|
|
|
|
|
{
|
|
|
|
|
basic_block split_bb = split_edge (e);
|
|
|
|
|
e = single_succ_edge (split_bb);
|
|
|
|
|
redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
/* Make a simple_return for those exits that run without prologue. */
|
2017-04-10 11:32:00 +00:00
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
FOR_EACH_BB_REVERSE_FN (bb, cfun)
|
2017-04-10 11:32:00 +00:00
|
|
|
|
if (!bitmap_bit_p (bb_with, bb->index))
|
|
|
|
|
FOR_EACH_EDGE (e, ei, bb->succs)
|
|
|
|
|
if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
2017-10-07 00:16:47 +00:00
|
|
|
|
handle_simple_exit (e);
|
2017-04-10 11:32:00 +00:00
|
|
|
|
|
|
|
|
|
/* Finally, we want a single edge to put the prologue on. Make a new
|
|
|
|
|
block before the PRO block; the edge beteen them is the edge we want.
|
|
|
|
|
Then redirect those edges into PRO that come from blocks without the
|
|
|
|
|
prologue, to point to the new block instead. The new prologue block
|
|
|
|
|
is put at the end of the insn chain. */
|
|
|
|
|
|
|
|
|
|
basic_block new_bb = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
|
|
|
|
|
BB_COPY_PARTITION (new_bb, pro);
|
2018-12-28 15:30:48 +00:00
|
|
|
|
new_bb->count = profile_count::zero ();
|
2017-04-10 11:32:00 +00:00
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "Made prologue block %d\n", new_bb->index);
|
|
|
|
|
|
|
|
|
|
for (ei = ei_start (pro->preds); (e = ei_safe_edge (ei)); )
|
|
|
|
|
{
|
|
|
|
|
if (bitmap_bit_p (bb_with, e->src->index)
|
|
|
|
|
|| dominated_by_p (CDI_DOMINATORS, e->src, pro))
|
|
|
|
|
{
|
|
|
|
|
ei_next (&ei);
|
|
|
|
|
continue;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
2018-12-28 15:30:48 +00:00
|
|
|
|
new_bb->count += e->count ();
|
2017-04-10 11:32:00 +00:00
|
|
|
|
|
|
|
|
|
redirect_edge_and_branch_force (e, new_bb);
|
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "Redirected edge from %d\n", e->src->index);
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
2017-04-10 11:32:00 +00:00
|
|
|
|
|
|
|
|
|
*entry_edge = make_single_succ_edge (new_bb, pro, EDGE_FALLTHRU);
|
|
|
|
|
force_nonfallthru (*entry_edge);
|
|
|
|
|
|
|
|
|
|
free_dominance_info (CDI_DOMINATORS);
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
2017-10-07 00:16:47 +00:00
|
|
|
|
|
|
|
|
|
/* Separate shrink-wrapping
|
|
|
|
|
|
|
|
|
|
Instead of putting all of the prologue and epilogue in one spot, we
|
|
|
|
|
can put parts of it in places where those components are executed less
|
|
|
|
|
frequently. The following code does this, for prologue and epilogue
|
|
|
|
|
components that can be put in more than one location, and where those
|
|
|
|
|
components can be executed more than once (the epilogue component will
|
|
|
|
|
always be executed before the prologue component is executed a second
|
|
|
|
|
time).
|
|
|
|
|
|
|
|
|
|
What exactly is a component is target-dependent. The more usual
|
|
|
|
|
components are simple saves/restores to/from the frame of callee-saved
|
|
|
|
|
registers. This code treats components abstractly (as an sbitmap),
|
|
|
|
|
letting the target handle all details.
|
|
|
|
|
|
|
|
|
|
Prologue components are placed in such a way that for every component
|
|
|
|
|
the prologue is executed as infrequently as possible. We do this by
|
|
|
|
|
walking the dominator tree, comparing the cost of placing a prologue
|
|
|
|
|
component before a block to the sum of costs determined for all subtrees
|
|
|
|
|
of that block.
|
|
|
|
|
|
|
|
|
|
From this placement, we then determine for each component all blocks
|
|
|
|
|
where at least one of this block's dominators (including itself) will
|
|
|
|
|
get a prologue inserted. That then is how the components are placed.
|
|
|
|
|
We could place the epilogue components a bit smarter (we can save a
|
|
|
|
|
bit of code size sometimes); this is a possible future improvement.
|
|
|
|
|
|
|
|
|
|
Prologues and epilogues are preferably placed into a block, either at
|
|
|
|
|
the beginning or end of it, if it is needed for all predecessor resp.
|
|
|
|
|
successor edges; or placed on the edge otherwise.
|
|
|
|
|
|
|
|
|
|
If the placement of any prologue/epilogue leads to a situation we cannot
|
|
|
|
|
handle (for example, an abnormal edge would need to be split, or some
|
|
|
|
|
targets want to use some specific registers that may not be available
|
|
|
|
|
where we want to put them), separate shrink-wrapping for the components
|
|
|
|
|
in that prologue/epilogue is aborted. */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Print the sbitmap COMPONENTS to the DUMP_FILE if not empty, with the
|
|
|
|
|
label LABEL. */
|
|
|
|
|
static void
|
|
|
|
|
dump_components (const char *label, sbitmap components)
|
|
|
|
|
{
|
|
|
|
|
if (bitmap_empty_p (components))
|
|
|
|
|
return;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
fprintf (dump_file, " [%s", label);
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
for (unsigned int j = 0; j < components->n_bits; j++)
|
|
|
|
|
if (bitmap_bit_p (components, j))
|
|
|
|
|
fprintf (dump_file, " %u", j);
|
|
|
|
|
|
|
|
|
|
fprintf (dump_file, "]");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* The data we collect for each bb. */
|
|
|
|
|
struct sw {
|
|
|
|
|
/* What components does this BB need? */
|
|
|
|
|
sbitmap needs_components;
|
|
|
|
|
|
|
|
|
|
/* What components does this BB have? This is the main decision this
|
|
|
|
|
pass makes. */
|
|
|
|
|
sbitmap has_components;
|
|
|
|
|
|
|
|
|
|
/* The components for which we placed code at the start of the BB (instead
|
|
|
|
|
of on all incoming edges). */
|
|
|
|
|
sbitmap head_components;
|
|
|
|
|
|
|
|
|
|
/* The components for which we placed code at the end of the BB (instead
|
|
|
|
|
of on all outgoing edges). */
|
|
|
|
|
sbitmap tail_components;
|
|
|
|
|
|
|
|
|
|
/* The frequency of executing the prologue for this BB, if a prologue is
|
|
|
|
|
placed on this BB. This is a pessimistic estimate (no prologue is
|
|
|
|
|
needed for edges from blocks that have the component under consideration
|
|
|
|
|
active already). */
|
|
|
|
|
gcov_type own_cost;
|
|
|
|
|
|
|
|
|
|
/* The frequency of executing the prologue for this BB and all BBs
|
|
|
|
|
dominated by it. */
|
|
|
|
|
gcov_type total_cost;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* A helper function for accessing the pass-specific info. */
|
|
|
|
|
static inline struct sw *
|
|
|
|
|
SW (basic_block bb)
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-10-07 00:16:47 +00:00
|
|
|
|
gcc_assert (bb->aux);
|
|
|
|
|
return (struct sw *) bb->aux;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Create the pass-specific data structures for separately shrink-wrapping
|
|
|
|
|
with components COMPONENTS. */
|
|
|
|
|
static void
|
|
|
|
|
init_separate_shrink_wrap (sbitmap components)
|
|
|
|
|
{
|
|
|
|
|
basic_block bb;
|
|
|
|
|
FOR_ALL_BB_FN (bb, cfun)
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-10-07 00:16:47 +00:00
|
|
|
|
bb->aux = xcalloc (1, sizeof (struct sw));
|
|
|
|
|
|
|
|
|
|
SW (bb)->needs_components = targetm.shrink_wrap.components_for_bb (bb);
|
|
|
|
|
|
|
|
|
|
/* Mark all basic blocks without successor as needing all components.
|
|
|
|
|
This avoids problems in at least cfgcleanup, sel-sched, and
|
|
|
|
|
regrename (largely to do with all paths to such a block still
|
|
|
|
|
needing the same dwarf CFI info). */
|
|
|
|
|
if (EDGE_COUNT (bb->succs) == 0)
|
|
|
|
|
bitmap_copy (SW (bb)->needs_components, components);
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
if (dump_file)
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-10-07 00:16:47 +00:00
|
|
|
|
fprintf (dump_file, "bb %d components:", bb->index);
|
|
|
|
|
dump_components ("has", SW (bb)->needs_components);
|
|
|
|
|
fprintf (dump_file, "\n");
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
2017-10-07 00:16:47 +00:00
|
|
|
|
|
|
|
|
|
SW (bb)->has_components = sbitmap_alloc (SBITMAP_SIZE (components));
|
|
|
|
|
SW (bb)->head_components = sbitmap_alloc (SBITMAP_SIZE (components));
|
|
|
|
|
SW (bb)->tail_components = sbitmap_alloc (SBITMAP_SIZE (components));
|
|
|
|
|
bitmap_clear (SW (bb)->has_components);
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
2017-10-07 00:16:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Destroy the pass-specific data. */
|
|
|
|
|
static void
|
|
|
|
|
fini_separate_shrink_wrap (void)
|
|
|
|
|
{
|
|
|
|
|
basic_block bb;
|
|
|
|
|
FOR_ALL_BB_FN (bb, cfun)
|
|
|
|
|
if (bb->aux)
|
|
|
|
|
{
|
|
|
|
|
sbitmap_free (SW (bb)->needs_components);
|
|
|
|
|
sbitmap_free (SW (bb)->has_components);
|
|
|
|
|
sbitmap_free (SW (bb)->head_components);
|
|
|
|
|
sbitmap_free (SW (bb)->tail_components);
|
|
|
|
|
free (bb->aux);
|
|
|
|
|
bb->aux = 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
/* Place the prologue for component WHICH, in the basic blocks dominated
|
|
|
|
|
by HEAD. Do a DFS over the dominator tree, and set bit WHICH in the
|
|
|
|
|
HAS_COMPONENTS of a block if either the block has that bit set in
|
|
|
|
|
NEEDS_COMPONENTS, or it is cheaper to place the prologue here than in all
|
|
|
|
|
dominator subtrees separately. */
|
|
|
|
|
static void
|
|
|
|
|
place_prologue_for_one_component (unsigned int which, basic_block head)
|
|
|
|
|
{
|
|
|
|
|
/* The block we are currently dealing with. */
|
|
|
|
|
basic_block bb = head;
|
|
|
|
|
/* Is this the first time we visit this block, i.e. have we just gone
|
|
|
|
|
down the tree. */
|
|
|
|
|
bool first_visit = true;
|
|
|
|
|
|
|
|
|
|
/* Walk the dominator tree, visit one block per iteration of this loop.
|
|
|
|
|
Each basic block is visited twice: once before visiting any children
|
|
|
|
|
of the block, and once after visiting all of them (leaf nodes are
|
|
|
|
|
visited only once). As an optimization, we do not visit subtrees
|
|
|
|
|
that can no longer influence the prologue placement. */
|
|
|
|
|
for (;;)
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-10-07 00:16:47 +00:00
|
|
|
|
/* First visit of a block: set the (children) cost accumulator to zero;
|
|
|
|
|
if the block does not have the component itself, walk down. */
|
|
|
|
|
if (first_visit)
|
|
|
|
|
{
|
|
|
|
|
/* Initialize the cost. The cost is the block execution frequency
|
|
|
|
|
that does not come from backedges. Calculating this by simply
|
|
|
|
|
adding the cost of all edges that aren't backedges does not
|
|
|
|
|
work: this does not always add up to the block frequency at
|
|
|
|
|
all, and even if it does, rounding error makes for bad
|
|
|
|
|
decisions. */
|
2018-12-28 15:30:48 +00:00
|
|
|
|
SW (bb)->own_cost = bb->count.to_frequency (cfun);
|
2017-10-07 00:16:47 +00:00
|
|
|
|
|
|
|
|
|
edge e;
|
|
|
|
|
edge_iterator ei;
|
|
|
|
|
FOR_EACH_EDGE (e, ei, bb->preds)
|
|
|
|
|
if (dominated_by_p (CDI_DOMINATORS, e->src, bb))
|
|
|
|
|
{
|
|
|
|
|
if (SW (bb)->own_cost > EDGE_FREQUENCY (e))
|
|
|
|
|
SW (bb)->own_cost -= EDGE_FREQUENCY (e);
|
|
|
|
|
else
|
|
|
|
|
SW (bb)->own_cost = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
SW (bb)->total_cost = 0;
|
|
|
|
|
|
|
|
|
|
if (!bitmap_bit_p (SW (bb)->needs_components, which)
|
|
|
|
|
&& first_dom_son (CDI_DOMINATORS, bb))
|
|
|
|
|
{
|
|
|
|
|
bb = first_dom_son (CDI_DOMINATORS, bb);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
/* If this block does need the component itself, or it is cheaper to
|
|
|
|
|
put the prologue here than in all the descendants that need it,
|
|
|
|
|
mark it so. If this block's immediate post-dominator is dominated
|
|
|
|
|
by this block, and that needs the prologue, we can put it on this
|
|
|
|
|
block as well (earlier is better). */
|
|
|
|
|
if (bitmap_bit_p (SW (bb)->needs_components, which)
|
|
|
|
|
|| SW (bb)->total_cost > SW (bb)->own_cost)
|
|
|
|
|
{
|
|
|
|
|
SW (bb)->total_cost = SW (bb)->own_cost;
|
|
|
|
|
bitmap_set_bit (SW (bb)->has_components, which);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
basic_block kid = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
|
|
|
|
|
if (dominated_by_p (CDI_DOMINATORS, kid, bb)
|
|
|
|
|
&& bitmap_bit_p (SW (kid)->has_components, which))
|
|
|
|
|
{
|
|
|
|
|
SW (bb)->total_cost = SW (bb)->own_cost;
|
|
|
|
|
bitmap_set_bit (SW (bb)->has_components, which);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* We are back where we started, so we are done now. */
|
|
|
|
|
if (bb == head)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/* We now know the cost of the subtree rooted at the current block.
|
|
|
|
|
Accumulate this cost in the parent. */
|
|
|
|
|
basic_block parent = get_immediate_dominator (CDI_DOMINATORS, bb);
|
|
|
|
|
SW (parent)->total_cost += SW (bb)->total_cost;
|
|
|
|
|
|
|
|
|
|
/* Don't walk the tree down unless necessary. */
|
|
|
|
|
if (next_dom_son (CDI_DOMINATORS, bb)
|
|
|
|
|
&& SW (parent)->total_cost <= SW (parent)->own_cost)
|
|
|
|
|
{
|
|
|
|
|
bb = next_dom_son (CDI_DOMINATORS, bb);
|
|
|
|
|
first_visit = true;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
bb = parent;
|
|
|
|
|
first_visit = false;
|
|
|
|
|
}
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
/* Set HAS_COMPONENTS in every block to the maximum it can be set to without
|
|
|
|
|
setting it on any path from entry to exit where it was not already set
|
|
|
|
|
somewhere (or, for blocks that have no path to the exit, consider only
|
2019-06-02 15:48:37 +00:00
|
|
|
|
paths from the entry to the block itself). Return whether any changes
|
|
|
|
|
were made to some HAS_COMPONENTS. */
|
|
|
|
|
static bool
|
2017-10-07 00:16:47 +00:00
|
|
|
|
spread_components (sbitmap components)
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-10-07 00:16:47 +00:00
|
|
|
|
basic_block entry_block = ENTRY_BLOCK_PTR_FOR_FN (cfun);
|
|
|
|
|
basic_block exit_block = EXIT_BLOCK_PTR_FOR_FN (cfun);
|
|
|
|
|
|
|
|
|
|
/* A stack of all blocks left to consider, and a bitmap of all blocks
|
|
|
|
|
on that stack. */
|
|
|
|
|
vec<basic_block> todo;
|
|
|
|
|
todo.create (n_basic_blocks_for_fn (cfun));
|
2018-12-28 15:30:48 +00:00
|
|
|
|
auto_bitmap seen;
|
2017-10-07 00:16:47 +00:00
|
|
|
|
|
2018-12-28 15:30:48 +00:00
|
|
|
|
auto_sbitmap old (SBITMAP_SIZE (components));
|
2017-10-07 00:16:47 +00:00
|
|
|
|
|
|
|
|
|
/* Find for every block the components that are *not* needed on some path
|
|
|
|
|
from the entry to that block. Do this with a flood fill from the entry
|
|
|
|
|
block. Every block can be visited at most as often as the number of
|
|
|
|
|
components (plus one), and usually much less often. */
|
|
|
|
|
|
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "Spreading down...\n");
|
|
|
|
|
|
|
|
|
|
basic_block bb;
|
|
|
|
|
FOR_ALL_BB_FN (bb, cfun)
|
|
|
|
|
bitmap_clear (SW (bb)->head_components);
|
|
|
|
|
|
|
|
|
|
bitmap_copy (SW (entry_block)->head_components, components);
|
|
|
|
|
|
2015-08-28 15:33:40 +00:00
|
|
|
|
edge e;
|
|
|
|
|
edge_iterator ei;
|
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
todo.quick_push (single_succ (entry_block));
|
|
|
|
|
bitmap_set_bit (seen, single_succ (entry_block)->index);
|
|
|
|
|
while (!todo.is_empty ())
|
|
|
|
|
{
|
|
|
|
|
bb = todo.pop ();
|
|
|
|
|
|
|
|
|
|
bitmap_copy (old, SW (bb)->head_components);
|
|
|
|
|
|
|
|
|
|
FOR_EACH_EDGE (e, ei, bb->preds)
|
|
|
|
|
bitmap_ior (SW (bb)->head_components, SW (bb)->head_components,
|
|
|
|
|
SW (e->src)->head_components);
|
|
|
|
|
|
|
|
|
|
bitmap_and_compl (SW (bb)->head_components, SW (bb)->head_components,
|
|
|
|
|
SW (bb)->has_components);
|
|
|
|
|
|
|
|
|
|
if (!bitmap_equal_p (old, SW (bb)->head_components))
|
|
|
|
|
FOR_EACH_EDGE (e, ei, bb->succs)
|
|
|
|
|
if (bitmap_set_bit (seen, e->dest->index))
|
|
|
|
|
todo.quick_push (e->dest);
|
|
|
|
|
|
|
|
|
|
bitmap_clear_bit (seen, bb->index);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Find for every block the components that are *not* needed on some reverse
|
|
|
|
|
path from the exit to that block. */
|
|
|
|
|
|
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "Spreading up...\n");
|
|
|
|
|
|
|
|
|
|
/* First, mark all blocks not reachable from the exit block as not needing
|
|
|
|
|
any component on any path to the exit. Mark everything, and then clear
|
|
|
|
|
again by a flood fill. */
|
|
|
|
|
|
|
|
|
|
FOR_ALL_BB_FN (bb, cfun)
|
|
|
|
|
bitmap_copy (SW (bb)->tail_components, components);
|
|
|
|
|
|
|
|
|
|
FOR_EACH_EDGE (e, ei, exit_block->preds)
|
|
|
|
|
{
|
|
|
|
|
todo.quick_push (e->src);
|
|
|
|
|
bitmap_set_bit (seen, e->src->index);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (!todo.is_empty ())
|
|
|
|
|
{
|
|
|
|
|
bb = todo.pop ();
|
|
|
|
|
|
|
|
|
|
if (!bitmap_empty_p (SW (bb)->tail_components))
|
|
|
|
|
FOR_EACH_EDGE (e, ei, bb->preds)
|
|
|
|
|
if (bitmap_set_bit (seen, e->src->index))
|
|
|
|
|
todo.quick_push (e->src);
|
|
|
|
|
|
|
|
|
|
bitmap_clear (SW (bb)->tail_components);
|
|
|
|
|
|
|
|
|
|
bitmap_clear_bit (seen, bb->index);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* And then, flood fill backwards to find for every block the components
|
|
|
|
|
not needed on some path to the exit. */
|
|
|
|
|
|
|
|
|
|
bitmap_copy (SW (exit_block)->tail_components, components);
|
|
|
|
|
|
|
|
|
|
FOR_EACH_EDGE (e, ei, exit_block->preds)
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-10-07 00:16:47 +00:00
|
|
|
|
todo.quick_push (e->src);
|
|
|
|
|
bitmap_set_bit (seen, e->src->index);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (!todo.is_empty ())
|
|
|
|
|
{
|
|
|
|
|
bb = todo.pop ();
|
|
|
|
|
|
|
|
|
|
bitmap_copy (old, SW (bb)->tail_components);
|
|
|
|
|
|
|
|
|
|
FOR_EACH_EDGE (e, ei, bb->succs)
|
|
|
|
|
bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components,
|
|
|
|
|
SW (e->dest)->tail_components);
|
|
|
|
|
|
|
|
|
|
bitmap_and_compl (SW (bb)->tail_components, SW (bb)->tail_components,
|
|
|
|
|
SW (bb)->has_components);
|
|
|
|
|
|
|
|
|
|
if (!bitmap_equal_p (old, SW (bb)->tail_components))
|
|
|
|
|
FOR_EACH_EDGE (e, ei, bb->preds)
|
|
|
|
|
if (bitmap_set_bit (seen, e->src->index))
|
|
|
|
|
todo.quick_push (e->src);
|
|
|
|
|
|
|
|
|
|
bitmap_clear_bit (seen, bb->index);
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-28 15:30:48 +00:00
|
|
|
|
todo.release ();
|
|
|
|
|
|
2022-10-27 18:55:19 +00:00
|
|
|
|
/* Finally, mark everything not needed both forwards and backwards. */
|
2017-10-07 00:16:47 +00:00
|
|
|
|
|
2019-06-02 15:48:37 +00:00
|
|
|
|
bool did_changes = false;
|
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
FOR_EACH_BB_FN (bb, cfun)
|
|
|
|
|
{
|
2019-06-02 15:48:37 +00:00
|
|
|
|
bitmap_copy (old, SW (bb)->has_components);
|
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
bitmap_and (SW (bb)->head_components, SW (bb)->head_components,
|
|
|
|
|
SW (bb)->tail_components);
|
|
|
|
|
bitmap_and_compl (SW (bb)->has_components, components,
|
|
|
|
|
SW (bb)->head_components);
|
2019-06-02 15:48:37 +00:00
|
|
|
|
|
|
|
|
|
if (!did_changes && !bitmap_equal_p (old, SW (bb)->has_components))
|
|
|
|
|
did_changes = true;
|
2017-10-07 00:16:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FOR_ALL_BB_FN (bb, cfun)
|
|
|
|
|
{
|
|
|
|
|
if (dump_file)
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-10-07 00:16:47 +00:00
|
|
|
|
fprintf (dump_file, "bb %d components:", bb->index);
|
|
|
|
|
dump_components ("has", SW (bb)->has_components);
|
|
|
|
|
fprintf (dump_file, "\n");
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
2017-10-07 00:16:47 +00:00
|
|
|
|
}
|
2019-06-02 15:48:37 +00:00
|
|
|
|
|
|
|
|
|
return did_changes;
|
2017-10-07 00:16:47 +00:00
|
|
|
|
}
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
/* If we cannot handle placing some component's prologues or epilogues where
|
|
|
|
|
we decided we should place them, unmark that component in COMPONENTS so
|
|
|
|
|
that it is not wrapped separately. */
|
|
|
|
|
static void
|
|
|
|
|
disqualify_problematic_components (sbitmap components)
|
|
|
|
|
{
|
2018-12-28 15:30:48 +00:00
|
|
|
|
auto_sbitmap pro (SBITMAP_SIZE (components));
|
|
|
|
|
auto_sbitmap epi (SBITMAP_SIZE (components));
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
basic_block bb;
|
|
|
|
|
FOR_EACH_BB_FN (bb, cfun)
|
|
|
|
|
{
|
|
|
|
|
edge e;
|
|
|
|
|
edge_iterator ei;
|
|
|
|
|
FOR_EACH_EDGE (e, ei, bb->succs)
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-10-07 00:16:47 +00:00
|
|
|
|
/* Find which components we want pro/epilogues for here. */
|
|
|
|
|
bitmap_and_compl (epi, SW (e->src)->has_components,
|
|
|
|
|
SW (e->dest)->has_components);
|
|
|
|
|
bitmap_and_compl (pro, SW (e->dest)->has_components,
|
|
|
|
|
SW (e->src)->has_components);
|
|
|
|
|
|
|
|
|
|
/* Ask the target what it thinks about things. */
|
|
|
|
|
if (!bitmap_empty_p (epi))
|
|
|
|
|
targetm.shrink_wrap.disqualify_components (components, e, epi,
|
|
|
|
|
false);
|
|
|
|
|
if (!bitmap_empty_p (pro))
|
|
|
|
|
targetm.shrink_wrap.disqualify_components (components, e, pro,
|
|
|
|
|
true);
|
|
|
|
|
|
|
|
|
|
/* If this edge doesn't need splitting, we're fine. */
|
|
|
|
|
if (single_pred_p (e->dest)
|
|
|
|
|
&& e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* If the edge can be split, that is fine too. */
|
|
|
|
|
if ((e->flags & EDGE_ABNORMAL) == 0)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* We also can handle sibcalls. */
|
|
|
|
|
if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
|
|
|
{
|
|
|
|
|
gcc_assert (e->flags & EDGE_SIBCALL);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
/* Remove from consideration those components we would need
|
|
|
|
|
pro/epilogues for on edges where we cannot insert them. */
|
|
|
|
|
bitmap_and_compl (components, components, epi);
|
|
|
|
|
bitmap_and_compl (components, components, pro);
|
|
|
|
|
|
|
|
|
|
if (dump_file && !bitmap_subset_p (epi, components))
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-10-07 00:16:47 +00:00
|
|
|
|
fprintf (dump_file, " BAD epi %d->%d", e->src->index,
|
|
|
|
|
e->dest->index);
|
|
|
|
|
if (e->flags & EDGE_EH)
|
|
|
|
|
fprintf (dump_file, " for EH");
|
|
|
|
|
dump_components ("epi", epi);
|
|
|
|
|
fprintf (dump_file, "\n");
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
2017-10-07 00:16:47 +00:00
|
|
|
|
|
|
|
|
|
if (dump_file && !bitmap_subset_p (pro, components))
|
|
|
|
|
{
|
|
|
|
|
fprintf (dump_file, " BAD pro %d->%d", e->src->index,
|
|
|
|
|
e->dest->index);
|
|
|
|
|
if (e->flags & EDGE_EH)
|
|
|
|
|
fprintf (dump_file, " for EH");
|
|
|
|
|
dump_components ("pro", pro);
|
|
|
|
|
fprintf (dump_file, "\n");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Place code for prologues and epilogues for COMPONENTS where we can put
|
|
|
|
|
that code at the start of basic blocks. */
|
|
|
|
|
static void
|
|
|
|
|
emit_common_heads_for_components (sbitmap components)
|
|
|
|
|
{
|
2018-12-28 15:30:48 +00:00
|
|
|
|
auto_sbitmap pro (SBITMAP_SIZE (components));
|
|
|
|
|
auto_sbitmap epi (SBITMAP_SIZE (components));
|
|
|
|
|
auto_sbitmap tmp (SBITMAP_SIZE (components));
|
2017-10-07 00:16:47 +00:00
|
|
|
|
|
|
|
|
|
basic_block bb;
|
|
|
|
|
FOR_ALL_BB_FN (bb, cfun)
|
|
|
|
|
bitmap_clear (SW (bb)->head_components);
|
|
|
|
|
|
|
|
|
|
FOR_EACH_BB_FN (bb, cfun)
|
|
|
|
|
{
|
|
|
|
|
/* Find which prologue resp. epilogue components are needed for all
|
|
|
|
|
predecessor edges to this block. */
|
|
|
|
|
|
|
|
|
|
/* First, select all possible components. */
|
|
|
|
|
bitmap_copy (epi, components);
|
|
|
|
|
bitmap_copy (pro, components);
|
|
|
|
|
|
|
|
|
|
edge e;
|
|
|
|
|
edge_iterator ei;
|
|
|
|
|
FOR_EACH_EDGE (e, ei, bb->preds)
|
|
|
|
|
{
|
|
|
|
|
if (e->flags & EDGE_ABNORMAL)
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-10-07 00:16:47 +00:00
|
|
|
|
bitmap_clear (epi);
|
|
|
|
|
bitmap_clear (pro);
|
|
|
|
|
break;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
/* Deselect those epilogue components that should not be inserted
|
|
|
|
|
for this edge. */
|
|
|
|
|
bitmap_and_compl (tmp, SW (e->src)->has_components,
|
|
|
|
|
SW (e->dest)->has_components);
|
|
|
|
|
bitmap_and (epi, epi, tmp);
|
|
|
|
|
|
|
|
|
|
/* Similar, for the prologue. */
|
|
|
|
|
bitmap_and_compl (tmp, SW (e->dest)->has_components,
|
|
|
|
|
SW (e->src)->has_components);
|
|
|
|
|
bitmap_and (pro, pro, tmp);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
|
|
|
|
|
fprintf (dump_file, " bb %d", bb->index);
|
|
|
|
|
|
|
|
|
|
if (dump_file && !bitmap_empty_p (epi))
|
|
|
|
|
dump_components ("epi", epi);
|
|
|
|
|
if (dump_file && !bitmap_empty_p (pro))
|
|
|
|
|
dump_components ("pro", pro);
|
|
|
|
|
|
|
|
|
|
if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
|
|
|
|
|
fprintf (dump_file, "\n");
|
|
|
|
|
|
|
|
|
|
/* Place code after the BB note. */
|
|
|
|
|
if (!bitmap_empty_p (pro))
|
|
|
|
|
{
|
|
|
|
|
start_sequence ();
|
|
|
|
|
targetm.shrink_wrap.emit_prologue_components (pro);
|
|
|
|
|
rtx_insn *seq = get_insns ();
|
|
|
|
|
end_sequence ();
|
|
|
|
|
record_prologue_seq (seq);
|
|
|
|
|
|
|
|
|
|
emit_insn_after (seq, bb_note (bb));
|
|
|
|
|
|
|
|
|
|
bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, pro);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!bitmap_empty_p (epi))
|
|
|
|
|
{
|
|
|
|
|
start_sequence ();
|
|
|
|
|
targetm.shrink_wrap.emit_epilogue_components (epi);
|
|
|
|
|
rtx_insn *seq = get_insns ();
|
|
|
|
|
end_sequence ();
|
|
|
|
|
record_epilogue_seq (seq);
|
|
|
|
|
|
|
|
|
|
emit_insn_after (seq, bb_note (bb));
|
|
|
|
|
|
|
|
|
|
bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, epi);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Place code for prologues and epilogues for COMPONENTS where we can put
|
|
|
|
|
that code at the end of basic blocks. */
|
|
|
|
|
static void
|
|
|
|
|
emit_common_tails_for_components (sbitmap components)
|
|
|
|
|
{
|
2018-12-28 15:30:48 +00:00
|
|
|
|
auto_sbitmap pro (SBITMAP_SIZE (components));
|
|
|
|
|
auto_sbitmap epi (SBITMAP_SIZE (components));
|
|
|
|
|
auto_sbitmap tmp (SBITMAP_SIZE (components));
|
2017-10-07 00:16:47 +00:00
|
|
|
|
|
|
|
|
|
basic_block bb;
|
|
|
|
|
FOR_ALL_BB_FN (bb, cfun)
|
|
|
|
|
bitmap_clear (SW (bb)->tail_components);
|
|
|
|
|
|
|
|
|
|
FOR_EACH_BB_FN (bb, cfun)
|
|
|
|
|
{
|
|
|
|
|
/* Find which prologue resp. epilogue components are needed for all
|
|
|
|
|
successor edges from this block. */
|
|
|
|
|
if (EDGE_COUNT (bb->succs) == 0)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* First, select all possible components. */
|
|
|
|
|
bitmap_copy (epi, components);
|
|
|
|
|
bitmap_copy (pro, components);
|
|
|
|
|
|
|
|
|
|
edge e;
|
|
|
|
|
edge_iterator ei;
|
|
|
|
|
FOR_EACH_EDGE (e, ei, bb->succs)
|
|
|
|
|
{
|
|
|
|
|
if (e->flags & EDGE_ABNORMAL)
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-10-07 00:16:47 +00:00
|
|
|
|
bitmap_clear (epi);
|
|
|
|
|
bitmap_clear (pro);
|
|
|
|
|
break;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
2017-10-07 00:16:47 +00:00
|
|
|
|
|
|
|
|
|
/* Deselect those epilogue components that should not be inserted
|
|
|
|
|
for this edge, and also those that are already put at the head
|
|
|
|
|
of the successor block. */
|
|
|
|
|
bitmap_and_compl (tmp, SW (e->src)->has_components,
|
|
|
|
|
SW (e->dest)->has_components);
|
|
|
|
|
bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
|
|
|
|
|
bitmap_and (epi, epi, tmp);
|
|
|
|
|
|
|
|
|
|
/* Similarly, for the prologue. */
|
|
|
|
|
bitmap_and_compl (tmp, SW (e->dest)->has_components,
|
|
|
|
|
SW (e->src)->has_components);
|
|
|
|
|
bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
|
|
|
|
|
bitmap_and (pro, pro, tmp);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If the last insn of this block is a control flow insn we cannot
|
|
|
|
|
put anything after it. We can put our code before it instead,
|
|
|
|
|
but only if that jump insn is a simple jump. */
|
|
|
|
|
rtx_insn *last_insn = BB_END (bb);
|
|
|
|
|
if (control_flow_insn_p (last_insn) && !simplejump_p (last_insn))
|
|
|
|
|
{
|
|
|
|
|
bitmap_clear (epi);
|
|
|
|
|
bitmap_clear (pro);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
|
|
|
|
|
fprintf (dump_file, " bb %d", bb->index);
|
|
|
|
|
|
|
|
|
|
if (dump_file && !bitmap_empty_p (epi))
|
|
|
|
|
dump_components ("epi", epi);
|
|
|
|
|
if (dump_file && !bitmap_empty_p (pro))
|
|
|
|
|
dump_components ("pro", pro);
|
|
|
|
|
|
|
|
|
|
if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
|
|
|
|
|
fprintf (dump_file, "\n");
|
|
|
|
|
|
|
|
|
|
/* Put the code at the end of the BB, but before any final jump. */
|
|
|
|
|
if (!bitmap_empty_p (epi))
|
|
|
|
|
{
|
|
|
|
|
start_sequence ();
|
|
|
|
|
targetm.shrink_wrap.emit_epilogue_components (epi);
|
|
|
|
|
rtx_insn *seq = get_insns ();
|
|
|
|
|
end_sequence ();
|
|
|
|
|
record_epilogue_seq (seq);
|
|
|
|
|
|
|
|
|
|
if (control_flow_insn_p (last_insn))
|
|
|
|
|
emit_insn_before (seq, last_insn);
|
|
|
|
|
else
|
|
|
|
|
emit_insn_after (seq, last_insn);
|
|
|
|
|
|
|
|
|
|
bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, epi);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!bitmap_empty_p (pro))
|
|
|
|
|
{
|
|
|
|
|
start_sequence ();
|
|
|
|
|
targetm.shrink_wrap.emit_prologue_components (pro);
|
|
|
|
|
rtx_insn *seq = get_insns ();
|
|
|
|
|
end_sequence ();
|
|
|
|
|
record_prologue_seq (seq);
|
|
|
|
|
|
|
|
|
|
if (control_flow_insn_p (last_insn))
|
|
|
|
|
emit_insn_before (seq, last_insn);
|
|
|
|
|
else
|
|
|
|
|
emit_insn_after (seq, last_insn);
|
|
|
|
|
|
|
|
|
|
bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, pro);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Place prologues and epilogues for COMPONENTS on edges, if we haven't already
|
|
|
|
|
placed them inside blocks directly. */
|
|
|
|
|
static void
|
|
|
|
|
insert_prologue_epilogue_for_components (sbitmap components)
|
|
|
|
|
{
|
2018-12-28 15:30:48 +00:00
|
|
|
|
auto_sbitmap pro (SBITMAP_SIZE (components));
|
|
|
|
|
auto_sbitmap epi (SBITMAP_SIZE (components));
|
2017-10-07 00:16:47 +00:00
|
|
|
|
|
|
|
|
|
basic_block bb;
|
|
|
|
|
FOR_EACH_BB_FN (bb, cfun)
|
|
|
|
|
{
|
|
|
|
|
if (!bb->aux)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
edge e;
|
|
|
|
|
edge_iterator ei;
|
|
|
|
|
FOR_EACH_EDGE (e, ei, bb->succs)
|
|
|
|
|
{
|
|
|
|
|
/* Find which pro/epilogue components are needed on this edge. */
|
|
|
|
|
bitmap_and_compl (epi, SW (e->src)->has_components,
|
|
|
|
|
SW (e->dest)->has_components);
|
|
|
|
|
bitmap_and_compl (pro, SW (e->dest)->has_components,
|
|
|
|
|
SW (e->src)->has_components);
|
|
|
|
|
bitmap_and (epi, epi, components);
|
|
|
|
|
bitmap_and (pro, pro, components);
|
|
|
|
|
|
|
|
|
|
/* Deselect those we already have put at the head or tail of the
|
|
|
|
|
edge's dest resp. src. */
|
|
|
|
|
bitmap_and_compl (epi, epi, SW (e->dest)->head_components);
|
|
|
|
|
bitmap_and_compl (pro, pro, SW (e->dest)->head_components);
|
|
|
|
|
bitmap_and_compl (epi, epi, SW (e->src)->tail_components);
|
|
|
|
|
bitmap_and_compl (pro, pro, SW (e->src)->tail_components);
|
|
|
|
|
|
|
|
|
|
if (!bitmap_empty_p (epi) || !bitmap_empty_p (pro))
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-10-07 00:16:47 +00:00
|
|
|
|
if (dump_file)
|
|
|
|
|
{
|
|
|
|
|
fprintf (dump_file, " %d->%d", e->src->index,
|
|
|
|
|
e->dest->index);
|
|
|
|
|
dump_components ("epi", epi);
|
|
|
|
|
dump_components ("pro", pro);
|
|
|
|
|
if (e->flags & EDGE_SIBCALL)
|
|
|
|
|
fprintf (dump_file, " (SIBCALL)");
|
|
|
|
|
else if (e->flags & EDGE_ABNORMAL)
|
|
|
|
|
fprintf (dump_file, " (ABNORMAL)");
|
|
|
|
|
fprintf (dump_file, "\n");
|
|
|
|
|
}
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
/* Put the epilogue components in place. */
|
|
|
|
|
start_sequence ();
|
|
|
|
|
targetm.shrink_wrap.emit_epilogue_components (epi);
|
|
|
|
|
rtx_insn *seq = get_insns ();
|
|
|
|
|
end_sequence ();
|
|
|
|
|
record_epilogue_seq (seq);
|
2015-08-28 15:33:40 +00:00
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
if (e->flags & EDGE_SIBCALL)
|
|
|
|
|
{
|
|
|
|
|
gcc_assert (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun));
|
|
|
|
|
|
|
|
|
|
rtx_insn *insn = BB_END (e->src);
|
|
|
|
|
gcc_assert (CALL_P (insn) && SIBLING_CALL_P (insn));
|
|
|
|
|
emit_insn_before (seq, insn);
|
|
|
|
|
}
|
|
|
|
|
else if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
|
|
|
{
|
|
|
|
|
gcc_assert (e->flags & EDGE_FALLTHRU);
|
|
|
|
|
basic_block new_bb = split_edge (e);
|
|
|
|
|
emit_insn_after (seq, BB_END (new_bb));
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
insert_insn_on_edge (seq, e);
|
|
|
|
|
|
|
|
|
|
/* Put the prologue components in place. */
|
|
|
|
|
start_sequence ();
|
|
|
|
|
targetm.shrink_wrap.emit_prologue_components (pro);
|
|
|
|
|
seq = get_insns ();
|
|
|
|
|
end_sequence ();
|
|
|
|
|
record_prologue_seq (seq);
|
|
|
|
|
|
|
|
|
|
insert_insn_on_edge (seq, e);
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
commit_edge_insertions ();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* The main entry point to this subpass. FIRST_BB is where the prologue
|
|
|
|
|
would be normally put. */
|
|
|
|
|
void
|
|
|
|
|
try_shrink_wrapping_separate (basic_block first_bb)
|
|
|
|
|
{
|
|
|
|
|
if (!(SHRINK_WRAPPING_ENABLED
|
|
|
|
|
&& flag_shrink_wrap_separate
|
|
|
|
|
&& optimize_function_for_speed_p (cfun)
|
|
|
|
|
&& targetm.shrink_wrap.get_separate_components))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/* We don't handle "strange" functions. */
|
|
|
|
|
if (cfun->calls_alloca
|
|
|
|
|
|| cfun->calls_setjmp
|
|
|
|
|
|| cfun->can_throw_non_call_exceptions
|
|
|
|
|
|| crtl->calls_eh_return
|
|
|
|
|
|| crtl->has_nonlocal_goto
|
|
|
|
|
|| crtl->saves_all_registers)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/* Ask the target what components there are. If it returns NULL, don't
|
|
|
|
|
do anything. */
|
|
|
|
|
sbitmap components = targetm.shrink_wrap.get_separate_components ();
|
|
|
|
|
if (!components)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/* We need LIVE info, not defining anything in the entry block and not
|
|
|
|
|
using anything in the exit block. A block then needs a component if
|
|
|
|
|
the register for that component is in the IN or GEN or KILL set for
|
|
|
|
|
that block. */
|
|
|
|
|
df_scan->local_flags |= DF_SCAN_EMPTY_ENTRY_EXIT;
|
|
|
|
|
df_update_entry_exit_and_calls ();
|
|
|
|
|
df_live_add_problem ();
|
|
|
|
|
df_live_set_all_dirty ();
|
|
|
|
|
df_analyze ();
|
|
|
|
|
|
|
|
|
|
calculate_dominance_info (CDI_DOMINATORS);
|
|
|
|
|
calculate_dominance_info (CDI_POST_DOMINATORS);
|
|
|
|
|
|
|
|
|
|
init_separate_shrink_wrap (components);
|
|
|
|
|
|
|
|
|
|
sbitmap_iterator sbi;
|
|
|
|
|
unsigned int j;
|
|
|
|
|
EXECUTE_IF_SET_IN_BITMAP (components, 0, j, sbi)
|
|
|
|
|
place_prologue_for_one_component (j, first_bb);
|
|
|
|
|
|
2019-06-02 15:48:37 +00:00
|
|
|
|
/* Try to minimize the number of saves and restores. Do this as long as
|
|
|
|
|
it changes anything. This does not iterate more than a few times. */
|
|
|
|
|
int spread_times = 0;
|
|
|
|
|
while (spread_components (components))
|
|
|
|
|
{
|
|
|
|
|
spread_times++;
|
|
|
|
|
|
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "Now spread %d times.\n", spread_times);
|
|
|
|
|
}
|
2017-10-07 00:16:47 +00:00
|
|
|
|
|
|
|
|
|
disqualify_problematic_components (components);
|
|
|
|
|
|
|
|
|
|
/* Don't separately shrink-wrap anything where the "main" prologue will
|
|
|
|
|
go; the target code can often optimize things if it is presented with
|
|
|
|
|
all components together (say, if it generates store-multiple insns). */
|
|
|
|
|
bitmap_and_compl (components, components, SW (first_bb)->has_components);
|
|
|
|
|
|
|
|
|
|
if (bitmap_empty_p (components))
|
2015-08-28 15:33:40 +00:00
|
|
|
|
{
|
2017-10-07 00:16:47 +00:00
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "Not wrapping anything separately.\n");
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (dump_file)
|
|
|
|
|
{
|
|
|
|
|
fprintf (dump_file, "The components we wrap separately are");
|
|
|
|
|
dump_components ("sep", components);
|
|
|
|
|
fprintf (dump_file, "\n");
|
2017-04-10 11:32:00 +00:00
|
|
|
|
|
2017-10-07 00:16:47 +00:00
|
|
|
|
fprintf (dump_file, "... Inserting common heads...\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
emit_common_heads_for_components (components);
|
|
|
|
|
|
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "... Inserting common tails...\n");
|
|
|
|
|
|
|
|
|
|
emit_common_tails_for_components (components);
|
|
|
|
|
|
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "... Inserting the more difficult ones...\n");
|
|
|
|
|
|
|
|
|
|
insert_prologue_epilogue_for_components (components);
|
|
|
|
|
|
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "... Done.\n");
|
|
|
|
|
|
|
|
|
|
targetm.shrink_wrap.set_handled_components (components);
|
|
|
|
|
|
|
|
|
|
crtl->shrink_wrapped_separate = true;
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|
2017-10-07 00:16:47 +00:00
|
|
|
|
|
|
|
|
|
fini_separate_shrink_wrap ();
|
|
|
|
|
|
|
|
|
|
sbitmap_free (components);
|
|
|
|
|
free_dominance_info (CDI_DOMINATORS);
|
|
|
|
|
free_dominance_info (CDI_POST_DOMINATORS);
|
|
|
|
|
|
|
|
|
|
/* All done. */
|
|
|
|
|
df_scan->local_flags &= ~DF_SCAN_EMPTY_ENTRY_EXIT;
|
|
|
|
|
df_update_entry_exit_and_calls ();
|
|
|
|
|
df_live_set_all_dirty ();
|
|
|
|
|
df_analyze ();
|
2015-08-28 15:33:40 +00:00
|
|
|
|
}
|