1641 lines
51 KiB
C++
1641 lines
51 KiB
C++
/* SLP - Pattern matcher on SLP trees
|
|
Copyright (C) 2020-2022 Free Software Foundation, Inc.
|
|
|
|
This file is part of GCC.
|
|
|
|
GCC is free software; you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free
|
|
Software Foundation; either version 3, or (at your option) any later
|
|
version.
|
|
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with GCC; see the file COPYING3. If not see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#include "config.h"
|
|
#include "system.h"
|
|
#include "coretypes.h"
|
|
#include "backend.h"
|
|
#include "target.h"
|
|
#include "rtl.h"
|
|
#include "tree.h"
|
|
#include "gimple.h"
|
|
#include "tree-pass.h"
|
|
#include "ssa.h"
|
|
#include "optabs-tree.h"
|
|
#include "insn-config.h"
|
|
#include "recog.h" /* FIXME: for insn_data */
|
|
#include "fold-const.h"
|
|
#include "stor-layout.h"
|
|
#include "gimple-iterator.h"
|
|
#include "cfgloop.h"
|
|
#include "tree-vectorizer.h"
|
|
#include "langhooks.h"
|
|
#include "gimple-walk.h"
|
|
#include "dbgcnt.h"
|
|
#include "tree-vector-builder.h"
|
|
#include "vec-perm-indices.h"
|
|
#include "gimple-fold.h"
|
|
#include "internal-fn.h"
|
|
|
|
/* SLP Pattern matching mechanism.
|
|
|
|
This extension to the SLP vectorizer allows one to transform the generated SLP
|
|
tree based on any pattern. The difference between this and the normal vect
|
|
pattern matcher is that unlike the former, this matcher allows you to match
|
|
with instructions that do not belong to the same SSA dominator graph.
|
|
|
|
The only requirement that this pattern matcher has is that you are only
|
|
only allowed to either match an entire group or none.
|
|
|
|
The pattern matcher currently only allows you to perform replacements to
|
|
internal functions.
|
|
|
|
Once the patterns are matched it is one way, these cannot be undone. It is
|
|
currently not supported to match patterns recursively.
|
|
|
|
To add a new pattern, implement the vect_pattern class and add the type to
|
|
slp_patterns.
|
|
|
|
*/
|
|
|
|
/*******************************************************************************
|
|
* vect_pattern class
|
|
******************************************************************************/
|
|
|
|
/* Default implementation of recognize that performs matching, validation and
|
|
replacement of nodes but that can be overriden if required. */
|
|
|
|
static bool
|
|
vect_pattern_validate_optab (internal_fn ifn, slp_tree node)
|
|
{
|
|
tree vectype = SLP_TREE_VECTYPE (node);
|
|
if (ifn == IFN_LAST || !vectype)
|
|
return false;
|
|
|
|
if (dump_enabled_p ())
|
|
dump_printf_loc (MSG_NOTE, vect_location,
|
|
"Found %s pattern in SLP tree\n",
|
|
internal_fn_name (ifn));
|
|
|
|
if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
|
|
{
|
|
if (dump_enabled_p ())
|
|
dump_printf_loc (MSG_NOTE, vect_location,
|
|
"Target supports %s vectorization with mode %T\n",
|
|
internal_fn_name (ifn), vectype);
|
|
}
|
|
else
|
|
{
|
|
if (dump_enabled_p ())
|
|
{
|
|
if (!vectype)
|
|
dump_printf_loc (MSG_NOTE, vect_location,
|
|
"Target does not support vector type for %T\n",
|
|
SLP_TREE_DEF_TYPE (node));
|
|
else
|
|
dump_printf_loc (MSG_NOTE, vect_location,
|
|
"Target does not support %s for vector type "
|
|
"%T\n", internal_fn_name (ifn), vectype);
|
|
}
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/*******************************************************************************
|
|
* General helper types
|
|
******************************************************************************/
|
|
|
|
/* The COMPLEX_OPERATION enum denotes the possible pair of operations that can
|
|
be matched when looking for expressions that we are interested matching for
|
|
complex numbers addition and mla. */
|
|
|
|
typedef enum _complex_operation : unsigned {
|
|
PLUS_PLUS,
|
|
MINUS_PLUS,
|
|
PLUS_MINUS,
|
|
MULT_MULT,
|
|
CMPLX_NONE
|
|
} complex_operation_t;
|
|
|
|
/*******************************************************************************
|
|
* General helper functions
|
|
******************************************************************************/
|
|
|
|
/* Helper function of linear_loads_p that checks to see if the load permutation
|
|
is sequential and in monotonically increasing order of loads with no gaps.
|
|
*/
|
|
|
|
static inline complex_perm_kinds_t
|
|
is_linear_load_p (load_permutation_t loads)
|
|
{
|
|
if (loads.length() == 0)
|
|
return PERM_UNKNOWN;
|
|
|
|
unsigned load, i;
|
|
complex_perm_kinds_t candidates[4]
|
|
= { PERM_ODDODD
|
|
, PERM_EVENEVEN
|
|
, PERM_EVENODD
|
|
, PERM_ODDEVEN
|
|
};
|
|
|
|
int valid_patterns = 4;
|
|
FOR_EACH_VEC_ELT (loads, i, load)
|
|
{
|
|
unsigned adj_load = load % 2;
|
|
if (candidates[0] != PERM_UNKNOWN && adj_load != 1)
|
|
{
|
|
candidates[0] = PERM_UNKNOWN;
|
|
valid_patterns--;
|
|
}
|
|
if (candidates[1] != PERM_UNKNOWN && adj_load != 0)
|
|
{
|
|
candidates[1] = PERM_UNKNOWN;
|
|
valid_patterns--;
|
|
}
|
|
if (candidates[2] != PERM_UNKNOWN && load != i)
|
|
{
|
|
candidates[2] = PERM_UNKNOWN;
|
|
valid_patterns--;
|
|
}
|
|
if (candidates[3] != PERM_UNKNOWN
|
|
&& load != (i % 2 == 0 ? i + 1 : i - 1))
|
|
{
|
|
candidates[3] = PERM_UNKNOWN;
|
|
valid_patterns--;
|
|
}
|
|
|
|
if (valid_patterns == 0)
|
|
return PERM_UNKNOWN;
|
|
}
|
|
|
|
for (i = 0; i < sizeof(candidates); i++)
|
|
if (candidates[i] != PERM_UNKNOWN)
|
|
return candidates[i];
|
|
|
|
return PERM_UNKNOWN;
|
|
}
|
|
|
|
/* Combine complex_perm_kinds A and B into a new permute kind that describes the
|
|
resulting operation. */
|
|
|
|
static inline complex_perm_kinds_t
|
|
vect_merge_perms (complex_perm_kinds_t a, complex_perm_kinds_t b)
|
|
{
|
|
if (a == b)
|
|
return a;
|
|
|
|
if (a == PERM_TOP)
|
|
return b;
|
|
|
|
if (b == PERM_TOP)
|
|
return a;
|
|
|
|
return PERM_UNKNOWN;
|
|
}
|
|
|
|
/* Check to see if all loads rooted in ROOT are linear. Linearity is
|
|
defined as having no gaps between values loaded. */
|
|
|
|
static complex_perm_kinds_t
|
|
linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root)
|
|
{
|
|
if (!root)
|
|
return PERM_UNKNOWN;
|
|
|
|
unsigned i;
|
|
complex_perm_kinds_t *tmp;
|
|
|
|
if ((tmp = perm_cache->get (root)) != NULL)
|
|
return *tmp;
|
|
|
|
complex_perm_kinds_t retval = PERM_UNKNOWN;
|
|
perm_cache->put (root, retval);
|
|
|
|
/* If it's a load node, then just read the load permute. */
|
|
if (SLP_TREE_LOAD_PERMUTATION (root).exists ())
|
|
{
|
|
retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
|
|
perm_cache->put (root, retval);
|
|
return retval;
|
|
}
|
|
else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
|
|
{
|
|
retval = PERM_TOP;
|
|
perm_cache->put (root, retval);
|
|
return retval;
|
|
}
|
|
|
|
complex_perm_kinds_t kind = PERM_TOP;
|
|
|
|
slp_tree child;
|
|
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, child)
|
|
{
|
|
complex_perm_kinds_t res = linear_loads_p (perm_cache, child);
|
|
kind = vect_merge_perms (kind, res);
|
|
/* Unknown and Top are not valid on blends as they produce no permute. */
|
|
retval = kind;
|
|
if (kind == PERM_UNKNOWN || kind == PERM_TOP)
|
|
return retval;
|
|
}
|
|
|
|
retval = kind;
|
|
|
|
perm_cache->put (root, retval);
|
|
return retval;
|
|
}
|
|
|
|
|
|
/* This function attempts to make a node rooted in NODE is linear. If the node
|
|
if already linear than the node itself is returned in RESULT.
|
|
|
|
If the node is not linear then a new VEC_PERM_EXPR node is created with a
|
|
lane permute that when applied will make the node linear. If such a
|
|
permute cannot be created then FALSE is returned from the function.
|
|
|
|
Here linearity is defined as having a sequential, monotically increasing
|
|
load position inside the load permute generated by the loads reachable from
|
|
NODE. */
|
|
|
|
static slp_tree
|
|
vect_build_swap_evenodd_node (slp_tree node)
|
|
{
|
|
/* Attempt to linearise the permute. */
|
|
vec<std::pair<unsigned, unsigned> > zipped;
|
|
zipped.create (SLP_TREE_LANES (node));
|
|
|
|
for (unsigned x = 0; x < SLP_TREE_LANES (node); x+=2)
|
|
{
|
|
zipped.quick_push (std::make_pair (0, x+1));
|
|
zipped.quick_push (std::make_pair (0, x));
|
|
}
|
|
|
|
/* Create the new permute node and store it instead. */
|
|
slp_tree vnode = vect_create_new_slp_node (1, VEC_PERM_EXPR);
|
|
SLP_TREE_LANE_PERMUTATION (vnode) = zipped;
|
|
SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (node);
|
|
SLP_TREE_CHILDREN (vnode).quick_push (node);
|
|
SLP_TREE_REF_COUNT (vnode) = 1;
|
|
SLP_TREE_LANES (vnode) = SLP_TREE_LANES (node);
|
|
SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (node);
|
|
SLP_TREE_REF_COUNT (node)++;
|
|
return vnode;
|
|
}
|
|
|
|
/* Checks to see of the expression represented by NODE is a gimple assign with
|
|
code CODE. */
|
|
|
|
static inline bool
|
|
vect_match_expression_p (slp_tree node, tree_code code)
|
|
{
|
|
if (!node
|
|
|| !SLP_TREE_REPRESENTATIVE (node))
|
|
return false;
|
|
|
|
gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
|
|
if (!is_gimple_assign (expr)
|
|
|| gimple_assign_rhs_code (expr) != code)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/* Check if the given lane permute in PERMUTES matches an alternating sequence
|
|
of {even odd even odd ...}. This to account for unrolled loops. Further
|
|
mode there resulting permute must be linear. */
|
|
|
|
static inline bool
|
|
vect_check_evenodd_blend (lane_permutation_t &permutes,
|
|
unsigned even, unsigned odd)
|
|
{
|
|
if (permutes.length () == 0
|
|
|| permutes.length () % 2 != 0)
|
|
return false;
|
|
|
|
unsigned val[2] = {even, odd};
|
|
unsigned seed = 0;
|
|
for (unsigned i = 0; i < permutes.length (); i++)
|
|
if (permutes[i].first != val[i % 2]
|
|
|| permutes[i].second != seed++)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/* This function will match the two gimple expressions representing NODE1 and
|
|
NODE2 in parallel and returns the pair operation that represents the two
|
|
expressions in the two statements.
|
|
|
|
If match is successful then the corresponding complex_operation is
|
|
returned and the arguments to the two matched operations are returned in OPS.
|
|
|
|
If TWO_OPERANDS it is expected that the LANES of the parent VEC_PERM select
|
|
from the two nodes alternatingly.
|
|
|
|
If unsuccessful then CMPLX_NONE is returned and OPS is untouched.
|
|
|
|
e.g. the following gimple statements
|
|
|
|
stmt 0 _39 = _37 + _12;
|
|
stmt 1 _6 = _38 - _36;
|
|
|
|
will return PLUS_MINUS along with OPS containing {_37, _12, _38, _36}.
|
|
*/
|
|
|
|
static complex_operation_t
|
|
vect_detect_pair_op (slp_tree node1, slp_tree node2, lane_permutation_t &lanes,
|
|
bool two_operands = true, vec<slp_tree> *ops = NULL)
|
|
{
|
|
complex_operation_t result = CMPLX_NONE;
|
|
|
|
if (vect_match_expression_p (node1, MINUS_EXPR)
|
|
&& vect_match_expression_p (node2, PLUS_EXPR)
|
|
&& (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
|
|
result = MINUS_PLUS;
|
|
else if (vect_match_expression_p (node1, PLUS_EXPR)
|
|
&& vect_match_expression_p (node2, MINUS_EXPR)
|
|
&& (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
|
|
result = PLUS_MINUS;
|
|
else if (vect_match_expression_p (node1, PLUS_EXPR)
|
|
&& vect_match_expression_p (node2, PLUS_EXPR))
|
|
result = PLUS_PLUS;
|
|
else if (vect_match_expression_p (node1, MULT_EXPR)
|
|
&& vect_match_expression_p (node2, MULT_EXPR))
|
|
result = MULT_MULT;
|
|
|
|
if (result != CMPLX_NONE && ops != NULL)
|
|
{
|
|
if (two_operands)
|
|
{
|
|
auto l0node = SLP_TREE_CHILDREN (node1);
|
|
auto l1node = SLP_TREE_CHILDREN (node2);
|
|
|
|
/* Check if the tree is connected as we expect it. */
|
|
if (!((l0node[0] == l1node[0] && l0node[1] == l1node[1])
|
|
|| (l0node[0] == l1node[1] && l0node[1] == l1node[0])))
|
|
return CMPLX_NONE;
|
|
}
|
|
ops->safe_push (node1);
|
|
ops->safe_push (node2);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/* Overload of vect_detect_pair_op that matches against the representative
|
|
statements in the children of NODE. It is expected that NODE has exactly
|
|
two children and when TWO_OPERANDS then NODE must be a VEC_PERM. */
|
|
|
|
static complex_operation_t
|
|
vect_detect_pair_op (slp_tree node, bool two_operands = true,
|
|
vec<slp_tree> *ops = NULL)
|
|
{
|
|
if (!two_operands && SLP_TREE_CODE (node) == VEC_PERM_EXPR)
|
|
return CMPLX_NONE;
|
|
|
|
if (SLP_TREE_CHILDREN (node).length () != 2)
|
|
return CMPLX_NONE;
|
|
|
|
vec<slp_tree> children = SLP_TREE_CHILDREN (node);
|
|
lane_permutation_t &lanes = SLP_TREE_LANE_PERMUTATION (node);
|
|
|
|
return vect_detect_pair_op (children[0], children[1], lanes, two_operands,
|
|
ops);
|
|
}
|
|
|
|
/*******************************************************************************
|
|
* complex_pattern class
|
|
******************************************************************************/
|
|
|
|
/* SLP Complex Numbers pattern matching.
|
|
|
|
As an example, the following simple loop:
|
|
|
|
double a[restrict N]; double b[restrict N]; double c[restrict N];
|
|
|
|
for (int i=0; i < N; i+=2)
|
|
{
|
|
c[i] = a[i] - b[i+1];
|
|
c[i+1] = a[i+1] + b[i];
|
|
}
|
|
|
|
which represents a complex addition on with a rotation of 90* around the
|
|
argand plane. i.e. if `a` and `b` were complex numbers then this would be the
|
|
same as `a + (b * I)`.
|
|
|
|
Here the expressions for `c[i]` and `c[i+1]` are independent but have to be
|
|
both recognized in order for the pattern to work. As an SLP tree this is
|
|
represented as
|
|
|
|
+--------------------------------+
|
|
| stmt 0 *_9 = _10; |
|
|
| stmt 1 *_15 = _16; |
|
|
+--------------------------------+
|
|
|
|
|
|
|
|
v
|
|
+--------------------------------+
|
|
| stmt 0 _10 = _4 - _8; |
|
|
| stmt 1 _16 = _12 + _14; |
|
|
| lane permutation { 0[0] 1[1] } |
|
|
+--------------------------------+
|
|
| |
|
|
| |
|
|
| |
|
|
+-----+ | | +-----+
|
|
| | | | | |
|
|
+-----| { } |<-----+ +----->| { } --------+
|
|
| | | +------------------| | |
|
|
| +-----+ | +-----+ |
|
|
| | | |
|
|
| | | |
|
|
| +------|------------------+ |
|
|
| | | |
|
|
v v v v
|
|
+--------------------------+ +--------------------------------+
|
|
| stmt 0 _8 = *_7; | | stmt 0 _4 = *_3; |
|
|
| stmt 1 _14 = *_13; | | stmt 1 _12 = *_11; |
|
|
| load permutation { 1 0 } | | load permutation { 0 1 } |
|
|
+--------------------------+ +--------------------------------+
|
|
|
|
The pattern matcher allows you to replace both statements 0 and 1 or none at
|
|
all. Because this operation is a two operands operation the actual nodes
|
|
being replaced are those in the { } nodes. The actual scalar statements
|
|
themselves are not replaced or used during the matching but instead the
|
|
SLP_TREE_REPRESENTATIVE statements are inspected. You are also allowed to
|
|
replace and match on any number of nodes.
|
|
|
|
Because the pattern matcher matches on the representative statement for the
|
|
SLP node the case of two_operators it allows you to match the children of the
|
|
node. This is done using the method `recognize ()`.
|
|
|
|
*/
|
|
|
|
/* The complex_pattern class contains common code for pattern matchers that work
|
|
on complex numbers. These provide functionality to allow de-construction and
|
|
validation of sequences depicting/transforming REAL and IMAG pairs. */
|
|
|
|
class complex_pattern : public vect_pattern
|
|
{
|
|
protected:
|
|
auto_vec<slp_tree> m_workset;
|
|
complex_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
|
|
: vect_pattern (node, m_ops, ifn)
|
|
{
|
|
this->m_workset.safe_push (*node);
|
|
}
|
|
|
|
public:
|
|
void build (vec_info *);
|
|
|
|
static internal_fn
|
|
matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
|
|
vec<slp_tree> *);
|
|
};
|
|
|
|
/* Create a replacement pattern statement for each node in m_node and inserts
|
|
the new statement into m_node as the new representative statement. The old
|
|
statement is marked as being in a pattern defined by the new statement. The
|
|
statement is created as call to internal function IFN with m_num_args
|
|
arguments.
|
|
|
|
Futhermore the new pattern is also added to the vectorization information
|
|
structure VINFO and the old statement STMT_INFO is marked as unused while
|
|
the new statement is marked as used and the number of SLP uses of the new
|
|
statement is incremented.
|
|
|
|
The newly created SLP nodes are marked as SLP only and will be dissolved
|
|
if SLP is aborted.
|
|
|
|
The newly created gimple call is returned and the BB remains unchanged.
|
|
|
|
This default method is designed to only match against simple operands where
|
|
all the input and output types are the same.
|
|
*/
|
|
|
|
void
|
|
complex_pattern::build (vec_info *vinfo)
|
|
{
|
|
stmt_vec_info stmt_info;
|
|
|
|
auto_vec<tree> args;
|
|
args.create (this->m_num_args);
|
|
args.quick_grow_cleared (this->m_num_args);
|
|
slp_tree node;
|
|
unsigned ix;
|
|
stmt_vec_info call_stmt_info;
|
|
gcall *call_stmt = NULL;
|
|
|
|
/* Now modify the nodes themselves. */
|
|
FOR_EACH_VEC_ELT (this->m_workset, ix, node)
|
|
{
|
|
/* Calculate the location of the statement in NODE to replace. */
|
|
stmt_info = SLP_TREE_REPRESENTATIVE (node);
|
|
stmt_vec_info reduc_def
|
|
= STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
|
|
gimple* old_stmt = STMT_VINFO_STMT (stmt_info);
|
|
tree lhs_old_stmt = gimple_get_lhs (old_stmt);
|
|
tree type = TREE_TYPE (lhs_old_stmt);
|
|
|
|
/* Create the argument set for use by gimple_build_call_internal_vec. */
|
|
for (unsigned i = 0; i < this->m_num_args; i++)
|
|
args[i] = lhs_old_stmt;
|
|
|
|
/* Create the new pattern statements. */
|
|
call_stmt = gimple_build_call_internal_vec (this->m_ifn, args);
|
|
tree var = make_temp_ssa_name (type, call_stmt, "slp_patt");
|
|
gimple_call_set_lhs (call_stmt, var);
|
|
gimple_set_location (call_stmt, gimple_location (old_stmt));
|
|
gimple_call_set_nothrow (call_stmt, true);
|
|
|
|
/* Adjust the book-keeping for the new and old statements for use during
|
|
SLP. This is required to get the right VF and statement during SLP
|
|
analysis. These changes are created after relevancy has been set for
|
|
the nodes as such we need to manually update them. Any changes will be
|
|
undone if SLP is cancelled. */
|
|
call_stmt_info
|
|
= vinfo->add_pattern_stmt (call_stmt, stmt_info);
|
|
|
|
/* Make sure to mark the representative statement pure_slp and
|
|
relevant and transfer reduction info. */
|
|
STMT_VINFO_RELEVANT (call_stmt_info) = vect_used_in_scope;
|
|
STMT_SLP_TYPE (call_stmt_info) = pure_slp;
|
|
STMT_VINFO_REDUC_DEF (call_stmt_info) = reduc_def;
|
|
|
|
gimple_set_bb (call_stmt, gimple_bb (stmt_info->stmt));
|
|
STMT_VINFO_VECTYPE (call_stmt_info) = SLP_TREE_VECTYPE (node);
|
|
STMT_VINFO_SLP_VECT_ONLY_PATTERN (call_stmt_info) = true;
|
|
|
|
/* Since we are replacing all the statements in the group with the same
|
|
thing it doesn't really matter. So just set it every time a new stmt
|
|
is created. */
|
|
SLP_TREE_REPRESENTATIVE (node) = call_stmt_info;
|
|
SLP_TREE_LANE_PERMUTATION (node).release ();
|
|
SLP_TREE_CODE (node) = CALL_EXPR;
|
|
}
|
|
}
|
|
|
|
/*******************************************************************************
|
|
* complex_add_pattern class
|
|
******************************************************************************/
|
|
|
|
class complex_add_pattern : public complex_pattern
|
|
{
|
|
protected:
|
|
complex_add_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
|
|
: complex_pattern (node, m_ops, ifn)
|
|
{
|
|
this->m_num_args = 2;
|
|
}
|
|
|
|
public:
|
|
void build (vec_info *);
|
|
static internal_fn
|
|
matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
|
|
slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
|
|
|
|
static vect_pattern*
|
|
recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
|
|
slp_tree *);
|
|
|
|
static vect_pattern*
|
|
mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
|
|
{
|
|
return new complex_add_pattern (node, m_ops, ifn);
|
|
}
|
|
};
|
|
|
|
/* Perform a replacement of the detected complex add pattern with the new
|
|
instruction sequences. */
|
|
|
|
void
|
|
complex_add_pattern::build (vec_info *vinfo)
|
|
{
|
|
SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
|
|
|
|
slp_tree node = this->m_ops[0];
|
|
vec<slp_tree> children = SLP_TREE_CHILDREN (node);
|
|
|
|
/* First re-arrange the children. */
|
|
SLP_TREE_CHILDREN (*this->m_node)[0] = children[0];
|
|
SLP_TREE_CHILDREN (*this->m_node)[1] =
|
|
vect_build_swap_evenodd_node (children[1]);
|
|
|
|
SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[0])++;
|
|
SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[1])++;
|
|
vect_free_slp_tree (this->m_ops[0]);
|
|
vect_free_slp_tree (this->m_ops[1]);
|
|
|
|
complex_pattern::build (vinfo);
|
|
}
|
|
|
|
/* Pattern matcher for trying to match complex addition pattern in SLP tree.
|
|
|
|
If no match is found then IFN is set to IFN_LAST.
|
|
This function matches the patterns shaped as:
|
|
|
|
c[i] = a[i] - b[i+1];
|
|
c[i+1] = a[i+1] + b[i];
|
|
|
|
If a match occurred then TRUE is returned, else FALSE. The initial match is
|
|
expected to be in OP1 and the initial match operands in args0. */
|
|
|
|
internal_fn
|
|
complex_add_pattern::matches (complex_operation_t op,
|
|
slp_tree_to_load_perm_map_t *perm_cache,
|
|
slp_compat_nodes_map_t * /* compat_cache */,
|
|
slp_tree *node, vec<slp_tree> *ops)
|
|
{
|
|
internal_fn ifn = IFN_LAST;
|
|
|
|
/* Find the two components. Rotation in the complex plane will modify
|
|
the operations:
|
|
|
|
* Rotation 0: + +
|
|
* Rotation 90: - +
|
|
* Rotation 180: - -
|
|
* Rotation 270: + -
|
|
|
|
Rotation 0 and 180 can be handled by normal SIMD code, so we don't need
|
|
to care about them here. */
|
|
if (op == MINUS_PLUS)
|
|
ifn = IFN_COMPLEX_ADD_ROT90;
|
|
else if (op == PLUS_MINUS)
|
|
ifn = IFN_COMPLEX_ADD_ROT270;
|
|
else
|
|
return ifn;
|
|
|
|
/* verify that there is a permute, otherwise this isn't a pattern we
|
|
we support. */
|
|
gcc_assert (ops->length () == 2);
|
|
|
|
vec<slp_tree> children = SLP_TREE_CHILDREN ((*ops)[0]);
|
|
|
|
/* First node must be unpermuted. */
|
|
if (linear_loads_p (perm_cache, children[0]) != PERM_EVENODD)
|
|
return IFN_LAST;
|
|
|
|
/* Second node must be permuted. */
|
|
if (linear_loads_p (perm_cache, children[1]) != PERM_ODDEVEN)
|
|
return IFN_LAST;
|
|
|
|
if (!vect_pattern_validate_optab (ifn, *node))
|
|
return IFN_LAST;
|
|
|
|
return ifn;
|
|
}
|
|
|
|
/* Attempt to recognize a complex add pattern. */
|
|
|
|
vect_pattern*
|
|
complex_add_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
|
|
slp_compat_nodes_map_t *compat_cache,
|
|
slp_tree *node)
|
|
{
|
|
auto_vec<slp_tree> ops;
|
|
complex_operation_t op
|
|
= vect_detect_pair_op (*node, true, &ops);
|
|
internal_fn ifn
|
|
= complex_add_pattern::matches (op, perm_cache, compat_cache, node, &ops);
|
|
if (ifn == IFN_LAST)
|
|
return NULL;
|
|
|
|
return new complex_add_pattern (node, &ops, ifn);
|
|
}
|
|
|
|
/*******************************************************************************
|
|
* complex_mul_pattern
|
|
******************************************************************************/
|
|
|
|
/* Helper function to check if PERM is KIND or PERM_TOP. */
|
|
|
|
static inline bool
|
|
is_eq_or_top (slp_tree_to_load_perm_map_t *perm_cache,
|
|
slp_tree op1, complex_perm_kinds_t kind1,
|
|
slp_tree op2, complex_perm_kinds_t kind2)
|
|
{
|
|
complex_perm_kinds_t perm1 = linear_loads_p (perm_cache, op1);
|
|
if (perm1 != kind1 && perm1 != PERM_TOP)
|
|
return false;
|
|
|
|
complex_perm_kinds_t perm2 = linear_loads_p (perm_cache, op2);
|
|
if (perm2 != kind2 && perm2 != PERM_TOP)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
enum _conj_status { CONJ_NONE, CONJ_FST, CONJ_SND };
|
|
|
|
static inline bool
|
|
compatible_complex_nodes_p (slp_compat_nodes_map_t *compat_cache,
|
|
slp_tree a, int *pa, slp_tree b, int *pb)
|
|
{
|
|
bool *tmp;
|
|
std::pair<slp_tree, slp_tree> key = std::make_pair(a, b);
|
|
if ((tmp = compat_cache->get (key)) != NULL)
|
|
return *tmp;
|
|
|
|
compat_cache->put (key, false);
|
|
|
|
if (SLP_TREE_CHILDREN (a).length () != SLP_TREE_CHILDREN (b).length ())
|
|
return false;
|
|
|
|
if (SLP_TREE_DEF_TYPE (a) != SLP_TREE_DEF_TYPE (b))
|
|
return false;
|
|
|
|
/* Only internal nodes can be loads, as such we can't check further if they
|
|
are externals. */
|
|
if (SLP_TREE_DEF_TYPE (a) != vect_internal_def)
|
|
{
|
|
for (unsigned i = 0; i < SLP_TREE_SCALAR_OPS (a).length (); i++)
|
|
{
|
|
tree op1 = SLP_TREE_SCALAR_OPS (a)[pa[i % 2]];
|
|
tree op2 = SLP_TREE_SCALAR_OPS (b)[pb[i % 2]];
|
|
if (!operand_equal_p (op1, op2, 0))
|
|
return false;
|
|
}
|
|
|
|
compat_cache->put (key, true);
|
|
return true;
|
|
}
|
|
|
|
auto a_stmt = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (a));
|
|
auto b_stmt = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (b));
|
|
|
|
if (gimple_code (a_stmt) != gimple_code (b_stmt))
|
|
return false;
|
|
|
|
/* code, children, type, externals, loads, constants */
|
|
if (gimple_num_args (a_stmt) != gimple_num_args (b_stmt))
|
|
return false;
|
|
|
|
/* At this point, a and b are known to be the same gimple operations. */
|
|
if (is_gimple_call (a_stmt))
|
|
{
|
|
if (!compatible_calls_p (dyn_cast <gcall *> (a_stmt),
|
|
dyn_cast <gcall *> (b_stmt)))
|
|
return false;
|
|
}
|
|
else if (!is_gimple_assign (a_stmt))
|
|
return false;
|
|
else
|
|
{
|
|
tree_code acode = gimple_assign_rhs_code (a_stmt);
|
|
tree_code bcode = gimple_assign_rhs_code (b_stmt);
|
|
if ((acode == REALPART_EXPR || acode == IMAGPART_EXPR)
|
|
&& (bcode == REALPART_EXPR || bcode == IMAGPART_EXPR))
|
|
return true;
|
|
|
|
if (acode != bcode)
|
|
return false;
|
|
}
|
|
|
|
if (!SLP_TREE_LOAD_PERMUTATION (a).exists ()
|
|
|| !SLP_TREE_LOAD_PERMUTATION (b).exists ())
|
|
{
|
|
for (unsigned i = 0; i < gimple_num_args (a_stmt); i++)
|
|
{
|
|
tree t1 = gimple_arg (a_stmt, i);
|
|
tree t2 = gimple_arg (b_stmt, i);
|
|
if (TREE_CODE (t1) != TREE_CODE (t2))
|
|
return false;
|
|
|
|
/* If SSA name then we will need to inspect the children
|
|
so we can punt here. */
|
|
if (TREE_CODE (t1) == SSA_NAME)
|
|
continue;
|
|
|
|
if (!operand_equal_p (t1, t2, 0))
|
|
return false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
auto dr1 = STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (a));
|
|
auto dr2 = STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (b));
|
|
/* Don't check the last dimension as that's checked by the lineary
|
|
checks. This check is also much stricter than what we need
|
|
because it doesn't consider loading from adjacent elements
|
|
in the same struct as loading from the same base object.
|
|
But for now, I'll play it safe. */
|
|
if (!same_data_refs (dr1, dr2, 1))
|
|
return false;
|
|
}
|
|
|
|
for (unsigned i = 0; i < SLP_TREE_CHILDREN (a).length (); i++)
|
|
{
|
|
if (!compatible_complex_nodes_p (compat_cache,
|
|
SLP_TREE_CHILDREN (a)[i], pa,
|
|
SLP_TREE_CHILDREN (b)[i], pb))
|
|
return false;
|
|
}
|
|
|
|
compat_cache->put (key, true);
|
|
return true;
|
|
}
|
|
|
|
static inline bool
|
|
vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
|
|
slp_compat_nodes_map_t *compat_cache,
|
|
vec<slp_tree> &left_op,
|
|
vec<slp_tree> &right_op,
|
|
bool subtract,
|
|
enum _conj_status *_status)
|
|
{
|
|
auto_vec<slp_tree> ops;
|
|
enum _conj_status stats = CONJ_NONE;
|
|
|
|
/* The complex operations can occur in two layouts and two permute sequences
|
|
so declare them and re-use them. */
|
|
int styles[][4] = { { 0, 2, 1, 3} /* {L1, R1} + {L2, R2}. */
|
|
, { 0, 3, 1, 2} /* {L1, R2} + {L2, R1}. */
|
|
};
|
|
|
|
/* Now for the corresponding permutes that go with these values. */
|
|
complex_perm_kinds_t perms[][4]
|
|
= { { PERM_EVENEVEN, PERM_ODDODD, PERM_EVENODD, PERM_ODDEVEN }
|
|
, { PERM_EVENODD, PERM_ODDEVEN, PERM_EVENEVEN, PERM_ODDODD }
|
|
};
|
|
|
|
/* These permutes are used during comparisons of externals on which
|
|
we require strict equality. */
|
|
int cq[][4][2]
|
|
= { { { 0, 0 }, { 1, 1 }, { 0, 1 }, { 1, 0 } }
|
|
, { { 0, 1 }, { 1, 0 }, { 0, 0 }, { 1, 1 } }
|
|
};
|
|
|
|
/* Default to style and perm 0, most operations use this one. */
|
|
int style = 0;
|
|
int perm = subtract ? 1 : 0;
|
|
|
|
/* Check if we have a negate operation, if so absorb the node and continue
|
|
looking. */
|
|
bool neg0 = vect_match_expression_p (right_op[0], NEGATE_EXPR);
|
|
bool neg1 = vect_match_expression_p (right_op[1], NEGATE_EXPR);
|
|
|
|
/* Determine which style we're looking at. We only have different ones
|
|
whenever a conjugate is involved. */
|
|
if (neg0 && neg1)
|
|
;
|
|
else if (neg0)
|
|
{
|
|
right_op[0] = SLP_TREE_CHILDREN (right_op[0])[0];
|
|
stats = CONJ_FST;
|
|
if (subtract)
|
|
perm = 0;
|
|
}
|
|
else if (neg1)
|
|
{
|
|
right_op[1] = SLP_TREE_CHILDREN (right_op[1])[0];
|
|
stats = CONJ_SND;
|
|
perm = 1;
|
|
}
|
|
|
|
*_status = stats;
|
|
|
|
/* Flatten the inputs after we've remapped them. */
|
|
ops.create (4);
|
|
ops.safe_splice (left_op);
|
|
ops.safe_splice (right_op);
|
|
|
|
/* Extract out the elements to check. */
|
|
slp_tree op0 = ops[styles[style][0]];
|
|
slp_tree op1 = ops[styles[style][1]];
|
|
slp_tree op2 = ops[styles[style][2]];
|
|
slp_tree op3 = ops[styles[style][3]];
|
|
|
|
/* Do cheapest test first. If failed no need to analyze further. */
|
|
if (linear_loads_p (perm_cache, op0) != perms[perm][0]
|
|
|| linear_loads_p (perm_cache, op1) != perms[perm][1]
|
|
|| !is_eq_or_top (perm_cache, op2, perms[perm][2], op3, perms[perm][3]))
|
|
return false;
|
|
|
|
return compatible_complex_nodes_p (compat_cache, op0, cq[perm][0], op1,
|
|
cq[perm][1])
|
|
&& compatible_complex_nodes_p (compat_cache, op2, cq[perm][2], op3,
|
|
cq[perm][3]);
|
|
}
|
|
|
|
/* This function combines two nodes containing only even and only odd lanes
|
|
together into a single node which contains the nodes in even/odd order
|
|
by using a lane permute.
|
|
|
|
The lanes in EVEN and ODD are duplicated 2 times inside the vectors.
|
|
So for a lanes = 4 EVEN contains {EVEN1, EVEN1, EVEN2, EVEN2}.
|
|
|
|
The tree REPRESENTATION is taken from the supplied REP along with the
|
|
vectype which must be the same between all three nodes.
|
|
*/
|
|
|
|
static slp_tree
|
|
vect_build_combine_node (slp_tree even, slp_tree odd, slp_tree rep)
|
|
{
|
|
vec<std::pair<unsigned, unsigned> > perm;
|
|
perm.create (SLP_TREE_LANES (rep));
|
|
|
|
for (unsigned x = 0; x < SLP_TREE_LANES (rep); x+=2)
|
|
{
|
|
perm.quick_push (std::make_pair (0, x));
|
|
perm.quick_push (std::make_pair (1, x+1));
|
|
}
|
|
|
|
slp_tree vnode = vect_create_new_slp_node (2, SLP_TREE_CODE (even));
|
|
SLP_TREE_CODE (vnode) = VEC_PERM_EXPR;
|
|
SLP_TREE_LANE_PERMUTATION (vnode) = perm;
|
|
|
|
SLP_TREE_CHILDREN (vnode).create (2);
|
|
SLP_TREE_CHILDREN (vnode).quick_push (even);
|
|
SLP_TREE_CHILDREN (vnode).quick_push (odd);
|
|
SLP_TREE_REF_COUNT (even)++;
|
|
SLP_TREE_REF_COUNT (odd)++;
|
|
SLP_TREE_REF_COUNT (vnode) = 1;
|
|
|
|
SLP_TREE_LANES (vnode) = SLP_TREE_LANES (rep);
|
|
gcc_assert (perm.length () == SLP_TREE_LANES (vnode));
|
|
/* Representation is set to that of the current node as the vectorizer
|
|
can't deal with VEC_PERMs with no representation, as would be the
|
|
case with invariants. */
|
|
SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (rep);
|
|
SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (rep);
|
|
return vnode;
|
|
}
|
|
|
|
class complex_mul_pattern : public complex_pattern
|
|
{
|
|
protected:
|
|
complex_mul_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
|
|
: complex_pattern (node, m_ops, ifn)
|
|
{
|
|
this->m_num_args = 2;
|
|
}
|
|
|
|
public:
|
|
void build (vec_info *);
|
|
static internal_fn
|
|
matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
|
|
slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
|
|
|
|
static vect_pattern*
|
|
recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
|
|
slp_tree *);
|
|
|
|
static vect_pattern*
|
|
mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
|
|
{
|
|
return new complex_mul_pattern (node, m_ops, ifn);
|
|
}
|
|
|
|
};
|
|
|
|
/* Pattern matcher for trying to match complex multiply and complex multiply
|
|
and accumulate pattern in SLP tree. If the operation matches then IFN
|
|
is set to the operation it matched and the arguments to the two
|
|
replacement statements are put in m_ops.
|
|
|
|
If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
|
|
|
|
This function matches the patterns shaped as:
|
|
|
|
double ax = (b[i+1] * a[i]);
|
|
double bx = (a[i+1] * b[i]);
|
|
|
|
c[i] = c[i] - ax;
|
|
c[i+1] = c[i+1] + bx;
|
|
|
|
If a match occurred then TRUE is returned, else FALSE. The initial match is
|
|
expected to be in OP1 and the initial match operands in args0. */
|
|
|
|
internal_fn
|
|
complex_mul_pattern::matches (complex_operation_t op,
|
|
slp_tree_to_load_perm_map_t *perm_cache,
|
|
slp_compat_nodes_map_t *compat_cache,
|
|
slp_tree *node, vec<slp_tree> *ops)
|
|
{
|
|
internal_fn ifn = IFN_LAST;
|
|
|
|
if (op != MINUS_PLUS)
|
|
return IFN_LAST;
|
|
|
|
auto childs = *ops;
|
|
auto l0node = SLP_TREE_CHILDREN (childs[0]);
|
|
|
|
bool mul0 = vect_match_expression_p (l0node[0], MULT_EXPR);
|
|
bool mul1 = vect_match_expression_p (l0node[1], MULT_EXPR);
|
|
if (!mul0 && !mul1)
|
|
return IFN_LAST;
|
|
|
|
/* Now operand2+4 may lead to another expression. */
|
|
auto_vec<slp_tree> left_op, right_op;
|
|
slp_tree add0 = NULL;
|
|
|
|
/* Check if we may be a multiply add. */
|
|
if (!mul0
|
|
&& vect_match_expression_p (l0node[0], PLUS_EXPR))
|
|
{
|
|
auto vals = SLP_TREE_CHILDREN (l0node[0]);
|
|
/* Check if it's a multiply, otherwise no idea what this is. */
|
|
if (!(mul0 = vect_match_expression_p (vals[1], MULT_EXPR)))
|
|
return IFN_LAST;
|
|
|
|
/* Check if the ADD is linear, otherwise it's not valid complex FMA. */
|
|
if (linear_loads_p (perm_cache, vals[0]) != PERM_EVENODD)
|
|
return IFN_LAST;
|
|
|
|
left_op.safe_splice (SLP_TREE_CHILDREN (vals[1]));
|
|
add0 = vals[0];
|
|
}
|
|
else
|
|
left_op.safe_splice (SLP_TREE_CHILDREN (l0node[0]));
|
|
|
|
right_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
|
|
|
|
if (left_op.length () != 2
|
|
|| right_op.length () != 2
|
|
|| !mul0
|
|
|| !mul1
|
|
|| linear_loads_p (perm_cache, left_op[1]) == PERM_ODDEVEN)
|
|
return IFN_LAST;
|
|
|
|
enum _conj_status status;
|
|
if (!vect_validate_multiplication (perm_cache, compat_cache, left_op,
|
|
right_op, false, &status))
|
|
return IFN_LAST;
|
|
|
|
if (status == CONJ_NONE)
|
|
{
|
|
if (add0)
|
|
ifn = IFN_COMPLEX_FMA;
|
|
else
|
|
ifn = IFN_COMPLEX_MUL;
|
|
}
|
|
else
|
|
{
|
|
if(add0)
|
|
ifn = IFN_COMPLEX_FMA_CONJ;
|
|
else
|
|
ifn = IFN_COMPLEX_MUL_CONJ;
|
|
}
|
|
|
|
if (!vect_pattern_validate_optab (ifn, *node))
|
|
return IFN_LAST;
|
|
|
|
ops->truncate (0);
|
|
ops->create (add0 ? 4 : 3);
|
|
|
|
if (add0)
|
|
ops->quick_push (add0);
|
|
|
|
complex_perm_kinds_t kind = linear_loads_p (perm_cache, left_op[0]);
|
|
if (kind == PERM_EVENODD || kind == PERM_TOP)
|
|
{
|
|
ops->quick_push (left_op[1]);
|
|
ops->quick_push (right_op[1]);
|
|
ops->quick_push (left_op[0]);
|
|
}
|
|
else if (kind == PERM_EVENEVEN && status != CONJ_SND)
|
|
{
|
|
ops->quick_push (left_op[0]);
|
|
ops->quick_push (right_op[0]);
|
|
ops->quick_push (left_op[1]);
|
|
}
|
|
else
|
|
{
|
|
ops->quick_push (left_op[0]);
|
|
ops->quick_push (right_op[1]);
|
|
ops->quick_push (left_op[1]);
|
|
}
|
|
|
|
return ifn;
|
|
}
|
|
|
|
/* Attempt to recognize a complex mul pattern. */
|
|
|
|
vect_pattern*
|
|
complex_mul_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
|
|
slp_compat_nodes_map_t *compat_cache,
|
|
slp_tree *node)
|
|
{
|
|
auto_vec<slp_tree> ops;
|
|
complex_operation_t op
|
|
= vect_detect_pair_op (*node, true, &ops);
|
|
internal_fn ifn
|
|
= complex_mul_pattern::matches (op, perm_cache, compat_cache, node, &ops);
|
|
if (ifn == IFN_LAST)
|
|
return NULL;
|
|
|
|
return new complex_mul_pattern (node, &ops, ifn);
|
|
}
|
|
|
|
/* Perform a replacement of the detected complex mul pattern with the new
|
|
instruction sequences. */
|
|
|
|
void
|
|
complex_mul_pattern::build (vec_info *vinfo)
|
|
{
|
|
slp_tree node;
|
|
unsigned i;
|
|
switch (this->m_ifn)
|
|
{
|
|
case IFN_COMPLEX_MUL:
|
|
case IFN_COMPLEX_MUL_CONJ:
|
|
{
|
|
slp_tree newnode
|
|
= vect_build_combine_node (this->m_ops[0], this->m_ops[1],
|
|
*this->m_node);
|
|
SLP_TREE_REF_COUNT (this->m_ops[2])++;
|
|
|
|
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
|
|
vect_free_slp_tree (node);
|
|
|
|
/* First re-arrange the children. */
|
|
SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
|
|
SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[2];
|
|
SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
|
|
break;
|
|
}
|
|
case IFN_COMPLEX_FMA:
|
|
case IFN_COMPLEX_FMA_CONJ:
|
|
{
|
|
SLP_TREE_REF_COUNT (this->m_ops[0])++;
|
|
slp_tree newnode
|
|
= vect_build_combine_node (this->m_ops[1], this->m_ops[2],
|
|
*this->m_node);
|
|
SLP_TREE_REF_COUNT (this->m_ops[3])++;
|
|
|
|
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
|
|
vect_free_slp_tree (node);
|
|
|
|
/* First re-arrange the children. */
|
|
SLP_TREE_CHILDREN (*this->m_node).safe_grow (3);
|
|
SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[3];
|
|
SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
|
|
SLP_TREE_CHILDREN (*this->m_node)[2] = this->m_ops[0];
|
|
|
|
/* Tell the builder to expect an extra argument. */
|
|
this->m_num_args++;
|
|
break;
|
|
}
|
|
default:
|
|
gcc_unreachable ();
|
|
}
|
|
|
|
/* And then rewrite the node itself. */
|
|
complex_pattern::build (vinfo);
|
|
}
|
|
|
|
/*******************************************************************************
|
|
* complex_fms_pattern class
|
|
******************************************************************************/
|
|
|
|
class complex_fms_pattern : public complex_pattern
|
|
{
|
|
protected:
|
|
complex_fms_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
|
|
: complex_pattern (node, m_ops, ifn)
|
|
{
|
|
this->m_num_args = 3;
|
|
}
|
|
|
|
public:
|
|
void build (vec_info *);
|
|
static internal_fn
|
|
matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
|
|
slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
|
|
|
|
static vect_pattern*
|
|
recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
|
|
slp_tree *);
|
|
|
|
static vect_pattern*
|
|
mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
|
|
{
|
|
return new complex_fms_pattern (node, m_ops, ifn);
|
|
}
|
|
};
|
|
|
|
|
|
/* Pattern matcher for trying to match complex multiply and subtract pattern
|
|
in SLP tree. If the operation matches then IFN is set to the operation
|
|
it matched and the arguments to the two replacement statements are put in
|
|
m_ops.
|
|
|
|
If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
|
|
|
|
This function matches the patterns shaped as:
|
|
|
|
double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
|
|
double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
|
|
|
|
c[i] = c[i] - ax;
|
|
c[i+1] = c[i+1] + bx;
|
|
|
|
If a match occurred then TRUE is returned, else FALSE. The initial match is
|
|
expected to be in OP1 and the initial match operands in args0. */
|
|
|
|
internal_fn
|
|
complex_fms_pattern::matches (complex_operation_t op,
|
|
slp_tree_to_load_perm_map_t *perm_cache,
|
|
slp_compat_nodes_map_t *compat_cache,
|
|
slp_tree * ref_node, vec<slp_tree> *ops)
|
|
{
|
|
internal_fn ifn = IFN_LAST;
|
|
|
|
/* We need to ignore the two_operands nodes that may also match,
|
|
for that we can check if they have any scalar statements and also
|
|
check that it's not a permute node as we're looking for a normal
|
|
MINUS_EXPR operation. */
|
|
if (op != CMPLX_NONE)
|
|
return IFN_LAST;
|
|
|
|
slp_tree root = *ref_node;
|
|
if (!vect_match_expression_p (root, MINUS_EXPR))
|
|
return IFN_LAST;
|
|
|
|
/* TODO: Support invariants here, with the new layout CADD now
|
|
can match before we get a chance to try CFMS. */
|
|
auto nodes = SLP_TREE_CHILDREN (root);
|
|
if (!vect_match_expression_p (nodes[1], MULT_EXPR)
|
|
|| vect_detect_pair_op (nodes[0]) != PLUS_MINUS)
|
|
return IFN_LAST;
|
|
|
|
auto childs = SLP_TREE_CHILDREN (nodes[0]);
|
|
auto l0node = SLP_TREE_CHILDREN (childs[0]);
|
|
|
|
/* Now operand2+4 may lead to another expression. */
|
|
auto_vec<slp_tree> left_op, right_op;
|
|
left_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
|
|
right_op.safe_splice (SLP_TREE_CHILDREN (nodes[1]));
|
|
|
|
/* If these nodes don't have any children then they're
|
|
not ones we're interested in. */
|
|
if (left_op.length () != 2
|
|
|| right_op.length () != 2
|
|
|| !vect_match_expression_p (l0node[1], MULT_EXPR))
|
|
return IFN_LAST;
|
|
|
|
enum _conj_status status;
|
|
if (!vect_validate_multiplication (perm_cache, compat_cache, right_op,
|
|
left_op, true, &status))
|
|
return IFN_LAST;
|
|
|
|
if (status == CONJ_NONE)
|
|
ifn = IFN_COMPLEX_FMS;
|
|
else
|
|
ifn = IFN_COMPLEX_FMS_CONJ;
|
|
|
|
if (!vect_pattern_validate_optab (ifn, *ref_node))
|
|
return IFN_LAST;
|
|
|
|
ops->truncate (0);
|
|
ops->create (4);
|
|
|
|
complex_perm_kinds_t kind = linear_loads_p (perm_cache, right_op[0]);
|
|
if (kind == PERM_EVENODD)
|
|
{
|
|
ops->quick_push (l0node[0]);
|
|
ops->quick_push (right_op[0]);
|
|
ops->quick_push (right_op[1]);
|
|
ops->quick_push (left_op[1]);
|
|
}
|
|
else
|
|
{
|
|
ops->quick_push (l0node[0]);
|
|
ops->quick_push (right_op[1]);
|
|
ops->quick_push (right_op[0]);
|
|
ops->quick_push (left_op[0]);
|
|
}
|
|
|
|
return ifn;
|
|
}
|
|
|
|
/* Attempt to recognize a complex mul pattern. */
|
|
|
|
vect_pattern*
|
|
complex_fms_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
|
|
slp_compat_nodes_map_t *compat_cache,
|
|
slp_tree *node)
|
|
{
|
|
auto_vec<slp_tree> ops;
|
|
complex_operation_t op
|
|
= vect_detect_pair_op (*node, true, &ops);
|
|
internal_fn ifn
|
|
= complex_fms_pattern::matches (op, perm_cache, compat_cache, node, &ops);
|
|
if (ifn == IFN_LAST)
|
|
return NULL;
|
|
|
|
return new complex_fms_pattern (node, &ops, ifn);
|
|
}
|
|
|
|
/* Perform a replacement of the detected complex mul pattern with the new
|
|
instruction sequences. */
|
|
|
|
void
|
|
complex_fms_pattern::build (vec_info *vinfo)
|
|
{
|
|
slp_tree node;
|
|
unsigned i;
|
|
slp_tree newnode =
|
|
vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node);
|
|
SLP_TREE_REF_COUNT (this->m_ops[0])++;
|
|
SLP_TREE_REF_COUNT (this->m_ops[1])++;
|
|
|
|
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
|
|
vect_free_slp_tree (node);
|
|
|
|
SLP_TREE_CHILDREN (*this->m_node).release ();
|
|
SLP_TREE_CHILDREN (*this->m_node).create (3);
|
|
|
|
/* First re-arrange the children. */
|
|
SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[1]);
|
|
SLP_TREE_CHILDREN (*this->m_node).quick_push (newnode);
|
|
SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[0]);
|
|
|
|
/* And then rewrite the node itself. */
|
|
complex_pattern::build (vinfo);
|
|
}
|
|
|
|
/*******************************************************************************
|
|
* complex_operations_pattern class
|
|
******************************************************************************/
|
|
|
|
/* This function combines all the existing pattern matchers above into one class
|
|
that shares the functionality between them. The initial match is shared
|
|
between all complex operations. */
|
|
|
|
class complex_operations_pattern : public complex_pattern
|
|
{
|
|
protected:
|
|
complex_operations_pattern (slp_tree *node, vec<slp_tree> *m_ops,
|
|
internal_fn ifn)
|
|
: complex_pattern (node, m_ops, ifn)
|
|
{
|
|
this->m_num_args = 0;
|
|
}
|
|
|
|
public:
|
|
void build (vec_info *);
|
|
static internal_fn
|
|
matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
|
|
slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
|
|
|
|
static vect_pattern*
|
|
recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
|
|
slp_tree *);
|
|
};
|
|
|
|
/* Dummy matches implementation for proxy object. */
|
|
|
|
internal_fn
|
|
complex_operations_pattern::
|
|
matches (complex_operation_t /* op */,
|
|
slp_tree_to_load_perm_map_t * /* perm_cache */,
|
|
slp_compat_nodes_map_t * /* compat_cache */,
|
|
slp_tree * /* ref_node */, vec<slp_tree> * /* ops */)
|
|
{
|
|
return IFN_LAST;
|
|
}
|
|
|
|
/* Attempt to recognize a complex mul pattern. */
|
|
|
|
vect_pattern*
|
|
complex_operations_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
|
|
slp_compat_nodes_map_t *ccache,
|
|
slp_tree *node)
|
|
{
|
|
auto_vec<slp_tree> ops;
|
|
complex_operation_t op
|
|
= vect_detect_pair_op (*node, true, &ops);
|
|
internal_fn ifn = IFN_LAST;
|
|
|
|
ifn = complex_fms_pattern::matches (op, perm_cache, ccache, node, &ops);
|
|
if (ifn != IFN_LAST)
|
|
return complex_fms_pattern::mkInstance (node, &ops, ifn);
|
|
|
|
ifn = complex_mul_pattern::matches (op, perm_cache, ccache, node, &ops);
|
|
if (ifn != IFN_LAST)
|
|
return complex_mul_pattern::mkInstance (node, &ops, ifn);
|
|
|
|
ifn = complex_add_pattern::matches (op, perm_cache, ccache, node, &ops);
|
|
if (ifn != IFN_LAST)
|
|
return complex_add_pattern::mkInstance (node, &ops, ifn);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/* Dummy implementation of build. */
|
|
|
|
void
|
|
complex_operations_pattern::build (vec_info * /* vinfo */)
|
|
{
|
|
gcc_unreachable ();
|
|
}
|
|
|
|
|
|
/* The addsub_pattern. */
|
|
|
|
class addsub_pattern : public vect_pattern
|
|
{
|
|
public:
|
|
addsub_pattern (slp_tree *node, internal_fn ifn)
|
|
: vect_pattern (node, NULL, ifn) {};
|
|
|
|
void build (vec_info *);
|
|
|
|
static vect_pattern*
|
|
recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
|
|
slp_tree *);
|
|
};
|
|
|
|
vect_pattern *
|
|
addsub_pattern::recognize (slp_tree_to_load_perm_map_t *,
|
|
slp_compat_nodes_map_t *, slp_tree *node_)
|
|
{
|
|
slp_tree node = *node_;
|
|
if (SLP_TREE_CODE (node) != VEC_PERM_EXPR
|
|
|| SLP_TREE_CHILDREN (node).length () != 2
|
|
|| SLP_TREE_LANE_PERMUTATION (node).length () % 2)
|
|
return NULL;
|
|
|
|
/* Match a blend of a plus and a minus op with the same number of plus and
|
|
minus lanes on the same operands. */
|
|
unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
|
|
unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
|
|
if (l0 == l1)
|
|
return NULL;
|
|
bool l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0],
|
|
PLUS_EXPR);
|
|
if (!l0add_p
|
|
&& !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], MINUS_EXPR))
|
|
return NULL;
|
|
bool l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1],
|
|
PLUS_EXPR);
|
|
if (!l1add_p
|
|
&& !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], MINUS_EXPR))
|
|
return NULL;
|
|
|
|
slp_tree l0node = SLP_TREE_CHILDREN (node)[l0];
|
|
slp_tree l1node = SLP_TREE_CHILDREN (node)[l1];
|
|
if (!((SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[0]
|
|
&& SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[1])
|
|
|| (SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[1]
|
|
&& SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[0])))
|
|
return NULL;
|
|
|
|
for (unsigned i = 0; i < SLP_TREE_LANE_PERMUTATION (node).length (); ++i)
|
|
{
|
|
std::pair<unsigned, unsigned> perm = SLP_TREE_LANE_PERMUTATION (node)[i];
|
|
/* It has to be alternating -, +, -,
|
|
While we could permute the .ADDSUB inputs and the .ADDSUB output
|
|
that's only profitable over the add + sub + blend if at least
|
|
one of the permute is optimized which we can't determine here. */
|
|
if (perm.first != ((i & 1) ? l1 : l0)
|
|
|| perm.second != i)
|
|
return NULL;
|
|
}
|
|
|
|
/* Now we have either { -, +, -, + ... } (!l0add_p) or { +, -, +, - ... }
|
|
(l0add_p), see whether we have FMA variants. */
|
|
if (!l0add_p
|
|
&& vect_match_expression_p (SLP_TREE_CHILDREN (l0node)[0], MULT_EXPR))
|
|
{
|
|
/* (c * d) -+ a */
|
|
if (vect_pattern_validate_optab (IFN_VEC_FMADDSUB, node))
|
|
return new addsub_pattern (node_, IFN_VEC_FMADDSUB);
|
|
}
|
|
else if (l0add_p
|
|
&& vect_match_expression_p (SLP_TREE_CHILDREN (l1node)[0], MULT_EXPR))
|
|
{
|
|
/* (c * d) +- a */
|
|
if (vect_pattern_validate_optab (IFN_VEC_FMSUBADD, node))
|
|
return new addsub_pattern (node_, IFN_VEC_FMSUBADD);
|
|
}
|
|
|
|
if (!l0add_p && vect_pattern_validate_optab (IFN_VEC_ADDSUB, node))
|
|
return new addsub_pattern (node_, IFN_VEC_ADDSUB);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
void
|
|
addsub_pattern::build (vec_info *vinfo)
|
|
{
|
|
slp_tree node = *m_node;
|
|
|
|
unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
|
|
unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
|
|
|
|
switch (m_ifn)
|
|
{
|
|
case IFN_VEC_ADDSUB:
|
|
{
|
|
slp_tree sub = SLP_TREE_CHILDREN (node)[l0];
|
|
slp_tree add = SLP_TREE_CHILDREN (node)[l1];
|
|
|
|
/* Modify the blend node in-place. */
|
|
SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0];
|
|
SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1];
|
|
SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
|
|
SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
|
|
|
|
/* Build IFN_VEC_ADDSUB from the sub representative operands. */
|
|
stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub);
|
|
gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2,
|
|
gimple_assign_rhs1 (rep->stmt),
|
|
gimple_assign_rhs2 (rep->stmt));
|
|
gimple_call_set_lhs (call, make_ssa_name
|
|
(TREE_TYPE (gimple_assign_lhs (rep->stmt))));
|
|
gimple_call_set_nothrow (call, true);
|
|
gimple_set_bb (call, gimple_bb (rep->stmt));
|
|
stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, rep);
|
|
SLP_TREE_REPRESENTATIVE (node) = new_rep;
|
|
STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
|
|
STMT_SLP_TYPE (new_rep) = pure_slp;
|
|
STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
|
|
STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
|
|
STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (rep));
|
|
SLP_TREE_CODE (node) = ERROR_MARK;
|
|
SLP_TREE_LANE_PERMUTATION (node).release ();
|
|
|
|
vect_free_slp_tree (sub);
|
|
vect_free_slp_tree (add);
|
|
break;
|
|
}
|
|
case IFN_VEC_FMADDSUB:
|
|
case IFN_VEC_FMSUBADD:
|
|
{
|
|
slp_tree sub, add;
|
|
if (m_ifn == IFN_VEC_FMADDSUB)
|
|
{
|
|
sub = SLP_TREE_CHILDREN (node)[l0];
|
|
add = SLP_TREE_CHILDREN (node)[l1];
|
|
}
|
|
else /* m_ifn == IFN_VEC_FMSUBADD */
|
|
{
|
|
sub = SLP_TREE_CHILDREN (node)[l1];
|
|
add = SLP_TREE_CHILDREN (node)[l0];
|
|
}
|
|
slp_tree mul = SLP_TREE_CHILDREN (sub)[0];
|
|
/* Modify the blend node in-place. */
|
|
SLP_TREE_CHILDREN (node).safe_grow (3, true);
|
|
SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (mul)[0];
|
|
SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (mul)[1];
|
|
SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (sub)[1];
|
|
SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
|
|
SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
|
|
SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[2])++;
|
|
|
|
/* Build IFN_VEC_FMADDSUB from the mul/sub representative operands. */
|
|
stmt_vec_info srep = SLP_TREE_REPRESENTATIVE (sub);
|
|
stmt_vec_info mrep = SLP_TREE_REPRESENTATIVE (mul);
|
|
gcall *call = gimple_build_call_internal (m_ifn, 3,
|
|
gimple_assign_rhs1 (mrep->stmt),
|
|
gimple_assign_rhs2 (mrep->stmt),
|
|
gimple_assign_rhs2 (srep->stmt));
|
|
gimple_call_set_lhs (call, make_ssa_name
|
|
(TREE_TYPE (gimple_assign_lhs (srep->stmt))));
|
|
gimple_call_set_nothrow (call, true);
|
|
gimple_set_bb (call, gimple_bb (srep->stmt));
|
|
stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, srep);
|
|
SLP_TREE_REPRESENTATIVE (node) = new_rep;
|
|
STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
|
|
STMT_SLP_TYPE (new_rep) = pure_slp;
|
|
STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
|
|
STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
|
|
STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (srep));
|
|
SLP_TREE_CODE (node) = ERROR_MARK;
|
|
SLP_TREE_LANE_PERMUTATION (node).release ();
|
|
|
|
vect_free_slp_tree (sub);
|
|
vect_free_slp_tree (add);
|
|
break;
|
|
}
|
|
default:;
|
|
}
|
|
}
|
|
|
|
/*******************************************************************************
|
|
* Pattern matching definitions
|
|
******************************************************************************/
|
|
|
|
#define SLP_PATTERN(x) &x::recognize
|
|
vect_pattern_decl_t slp_patterns[]
|
|
{
|
|
/* For least amount of back-tracking and more efficient matching
|
|
order patterns from the largest to the smallest. Especially if they
|
|
overlap in what they can detect. */
|
|
|
|
SLP_PATTERN (complex_operations_pattern),
|
|
SLP_PATTERN (addsub_pattern)
|
|
};
|
|
#undef SLP_PATTERN
|
|
|
|
/* Set the number of SLP pattern matchers available. */
|
|
size_t num__slp_patterns = sizeof(slp_patterns)/sizeof(vect_pattern_decl_t);
|