mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-22 13:29:44 +00:00
[x86] Remove the last vestiges of the BLENDI-based ADDSUB pattern
matching. This design just fundamentally didn't work because ADDSUB is available prior to any legal lowerings of BLENDI nodes. Instead, we have a dedicated ADDSUB synthetic ISD node which is pattern matched trivially into the instructions. These nodes are then recognized by both the existing and a trivial new lowering combine in the backend. Removing these patterns required adding 2 missing shuffle masks to the DAG combine, without which tests would have failed. Added the masks and a helpful assert as well to catch if anything ever goes wrong here. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217851 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c9bc145e31
commit
2e363ece75
@ -19990,6 +19990,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
|
||||
/// they're unused.
|
||||
static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
|
||||
SDLoc DL(N);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
// We only handle target-independent shuffles.
|
||||
// FIXME: It would be easy and harmless to use the target shuffle mask
|
||||
@ -20021,9 +20022,17 @@ static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
|
||||
|
||||
// We're looking for blends between FADD and FSUB nodes. We insist on these
|
||||
// nodes being lined up in a specific expected pattern.
|
||||
if (!isShuffleEquivalent(Mask, 0, 5, 2, 7))
|
||||
if (!(isShuffleEquivalent(Mask, 0, 3) ||
|
||||
isShuffleEquivalent(Mask, 0, 5, 2, 7) ||
|
||||
isShuffleEquivalent(Mask, 0, 9, 2, 11, 4, 13, 6, 15)))
|
||||
return SDValue();
|
||||
|
||||
// Only specific types are legal at this point, assert so we notice if and
|
||||
// when these change.
|
||||
assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v8f32 ||
|
||||
VT == MVT::v4f64) &&
|
||||
"Unknown vector type encountered!");
|
||||
|
||||
// FIXME: Munge the inputs through no-op shuffles that drop the undef lanes to
|
||||
// allow nuking any instructions that feed only those lanes.
|
||||
|
||||
|
@ -5387,39 +5387,6 @@ let Predicates = [HasAVX] in {
|
||||
(VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
|
||||
def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (v4f64 (memop addr:$rhs)))),
|
||||
(VADDSUBPDYrm VR256:$lhs, f256mem:$rhs)>;
|
||||
|
||||
// Constant 170 corresponds to the binary mask '10101010'.
|
||||
// When used as a blend mask, it allows selecting eight elements from two
|
||||
// input vectors as follow:
|
||||
// - Even-numbered values in the destination are copied from
|
||||
// the corresponding elements in the first input vector;
|
||||
// - Odd-numbered values in the destination are copied from
|
||||
// the corresponding elements in the second input vector.
|
||||
|
||||
def : Pat<(v8f32 (X86Blendi (v8f32 (fsub VR256:$lhs, VR256:$rhs)),
|
||||
(v8f32 (fadd VR256:$lhs, VR256:$rhs)), (i8 170))),
|
||||
(VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>;
|
||||
|
||||
// Constant 10 corresponds to the binary mask '1010'.
|
||||
// In the two pattens below, constant 10 is used as a blend mask to select
|
||||
// - the 1st and 3rd element from the first input vector (the 'fsub' node);
|
||||
// - the 2nd and 4th element from the second input vector (the 'fadd' node).
|
||||
|
||||
def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
|
||||
(v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i8 10))),
|
||||
(VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
|
||||
def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
|
||||
(v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i8 10))),
|
||||
(VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
|
||||
def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
|
||||
(v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i8 10))),
|
||||
(VADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
|
||||
def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)),
|
||||
(v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i8 2))),
|
||||
(VADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
|
||||
def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
|
||||
(v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
|
||||
(VADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
|
||||
}
|
||||
|
||||
let Predicates = [UseSSE3] in {
|
||||
@ -5431,22 +5398,6 @@ let Predicates = [UseSSE3] in {
|
||||
(ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
|
||||
def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 (memop addr:$rhs)))),
|
||||
(ADDSUBPDrm VR128:$lhs, f128mem:$rhs)>;
|
||||
|
||||
// Constant 10 corresponds to the binary mask '1010'.
|
||||
// In the pattern below, it is used as a blend mask to select:
|
||||
// - the 1st and 3rd element from the first input vector (the fsub node);
|
||||
// - the 2nd and 4th element from the second input vector (the fadd node).
|
||||
|
||||
def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
|
||||
(v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i8 10))),
|
||||
(ADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
|
||||
|
||||
def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)),
|
||||
(v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i8 2))),
|
||||
(ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
|
||||
def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
|
||||
(v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
|
||||
(ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
Loading…
x
Reference in New Issue
Block a user