[x86] Start fixing our emission of ADDSUBPS and ADDSUBPD instructions by

introducing a synthetic X86 ISD node representing this generic operation. The relevant patterns for mapping these nodes into the concrete instructions are also added, and a gnarly bit of C++ code in the target-specific DAG combiner is replaced with simple code emitting this primitive. The next step is to generically combine blends of adds and subs into this node so that we can drop the reliance on an SSE4.1 ISD node (BLENDI) when matching an SSE3 feature (ADDSUB). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217819 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-06 09:44:39 +00:00 · 2014-09-15 20:09:47 +00:00 · 2014-09-15 20:09:47 +00:00 · fa6cf7e73c
commit fa6cf7e73c
parent 6c7ec4aae9
4 changed files with 37 additions and 26 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -6501,14 +6501,14 @@ static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,

  for (unsigned i = 0, e = NumElts; i != e; i++) {
    SDValue Op = BV->getOperand(i);
-      
+
    // Skip 'undef' values.
    unsigned Opcode = Op.getOpcode();
    if (Opcode == ISD::UNDEF) {
      std::swap(ExpectedOpcode, NextExpectedOpcode);
      continue;
    }
-      
+
    // Early exit if we found an unexpected opcode.
    if (Opcode != ExpectedOpcode)
      return SDValue();
@ -6565,31 +6565,9 @@ static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,
  // Don't try to fold this build_vector into a VSELECT if it has
  // too many UNDEF operands.
  if (AddFound && SubFound && InVec0.getOpcode() != ISD::UNDEF &&
-      InVec1.getOpcode() != ISD::UNDEF) {
-    // Emit a sequence of vector add and sub followed by a VSELECT.
-    // The new VSELECT will be lowered into a BLENDI.
-    // At ISel stage, we pattern-match the sequence 'add + sub + BLENDI'
-    // and emit a single ADDSUB instruction.
-    SDValue Sub = DAG.getNode(ExpectedOpcode, DL, VT, InVec0, InVec1);
-    SDValue Add = DAG.getNode(NextExpectedOpcode, DL, VT, InVec0, InVec1);
+      InVec1.getOpcode() != ISD::UNDEF)
+    return DAG.getNode(X86ISD::ADDSUB, DL, VT, InVec0, InVec1);

-    // Construct the VSELECT mask.
-    EVT MaskVT = VT.changeVectorElementTypeToInteger();
-    EVT SVT = MaskVT.getVectorElementType();
-    unsigned SVTBits = SVT.getSizeInBits();
-    SmallVector<SDValue, 8> Ops;
-
-    for (unsigned i = 0, e = NumElts; i != e; ++i) {
-      APInt Value = i & 1 ? APInt::getNullValue(SVTBits) :
-                            APInt::getAllOnesValue(SVTBits);
-      SDValue Constant = DAG.getConstant(Value, SVT);
-      Ops.push_back(Constant);
-    }
-
-    SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVT, Ops);
-    return DAG.getSelect(DL, VT, Mask, Sub, Add);
-  }
-  
  return SDValue();
 }

--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@ -193,6 +193,9 @@ namespace llvm {
      /// BLENDI - Blend where the selector is an immediate.
      BLENDI,

+      /// ADDSUB - Combined add and sub on an FP vector.
+      ADDSUB,
+
      // SUBUS - Integer sub with unsigned saturation.
      SUBUS,

--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@ -248,6 +248,9 @@ def X86Vextract   : SDNode<"X86ISD::VEXTRACT",  SDTypeProfile<1, 2,
                              [SDTCisVec<1>, SDTCisPtrTy<2>]>, []>;

 def X86Blendi    : SDNode<"X86ISD::BLENDI",   SDTBlend>;
+
+def X86Addsub    : SDNode<"X86ISD::ADDSUB", SDTFPBinOp>;
+
 def X86Fmadd     : SDNode<"X86ISD::FMADD",     SDTFma>;
 def X86Fnmadd    : SDNode<"X86ISD::FNMADD",    SDTFma>;
 def X86Fmsub     : SDNode<"X86ISD::FMSUB",     SDTFma>;
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@ -5370,6 +5370,24 @@ let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {

 // Patterns used to select 'addsub' instructions.
 let Predicates = [HasAVX] in {
+  def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 VR128:$rhs))),
+            (VADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
+  def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 (memop addr:$rhs)))),
+            (VADDSUBPSrm VR128:$lhs, f128mem:$rhs)>;
+  def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 VR128:$rhs))),
+            (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
+  def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 (memop addr:$rhs)))),
+            (VADDSUBPDrm VR128:$lhs, f128mem:$rhs)>;
+
+  def : Pat<(v8f32 (X86Addsub (v8f32 VR256:$lhs), (v8f32 VR256:$rhs))),
+            (VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>;
+  def : Pat<(v8f32 (X86Addsub (v8f32 VR256:$lhs), (v8f32 (memop addr:$rhs)))),
+            (VADDSUBPSYrm VR256:$lhs, f256mem:$rhs)>;
+  def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (v4f64 VR256:$rhs))),
+            (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
+  def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (v4f64 (memop addr:$rhs)))),
+            (VADDSUBPDYrm VR256:$lhs, f256mem:$rhs)>;
+
  // Constant 170 corresponds to the binary mask '10101010'.
  // When used as a blend mask, it allows selecting eight elements from two
  // input vectors as follow:
@ -5405,6 +5423,15 @@ let Predicates = [HasAVX] in {
 }

 let Predicates = [UseSSE3] in {
+  def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 VR128:$rhs))),
+            (ADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
+  def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 (memop addr:$rhs)))),
+            (ADDSUBPSrm VR128:$lhs, f128mem:$rhs)>;
+  def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 VR128:$rhs))),
+            (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
+  def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 (memop addr:$rhs)))),
+            (ADDSUBPDrm VR128:$lhs, f128mem:$rhs)>;
+
  // Constant 10 corresponds to the binary mask '1010'.
  // In the pattern below, it is used as a blend mask to select:
  // - the 1st and 3rd element from the first input vector (the fsub node);