[X86][SSE] Sign extension for target vector sizes less than 128 bits (pt1)

This patch adds support for sign extension for sub 128-bit vectors, such as to v2i32. It concatenates with UNDEF subvectors up to 128-bits, performs the sign extension (i.e. as v4i32) and then extracts the target subvector. Patch 1/2 of D10589 - the second patch covers the conversion of v2i8/v2i16 to v2f64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241323 91177308-0d34-0410-b5e6-96231b3b80d8
2025-11-02 07:17:36 +00:00 · 2015-07-03 07:51:01 +00:00
parent 6890be345e
commit e3c6222c76
2 changed files with 60 additions and 6 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -24233,23 +24233,37 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
    return SDValue();
  }
-  if (VT.isVector()) {
+  if (VT.isVector() && Subtarget->hasSSE2()) {
-    auto ExtendToVec128 = [&DAG](SDLoc DL, SDValue N) {
+    auto ExtendVecSize = [&DAG](SDLoc DL, SDValue N, unsigned Size) {
      EVT InVT = N.getValueType();
      EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
-                                   128 / InVT.getScalarSizeInBits());
+                                   Size / InVT.getScalarSizeInBits());
-      SmallVector<SDValue, 8> Opnds(128 / InVT.getSizeInBits(),
+      SmallVector<SDValue, 8> Opnds(Size / InVT.getSizeInBits(),
                                    DAG.getUNDEF(InVT));
      Opnds[0] = N;
      return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Opnds);
    };
    // If target-size is less than 128-bits, extend to a type that would extend
    // to 128 bits, extend that and extract the original target vector.
    if (VT.getSizeInBits() < 128 && !(128 % VT.getSizeInBits()) &&
        (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
        (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
      unsigned Scale = 128 / VT.getSizeInBits();
      EVT ExVT =
          EVT::getVectorVT(*DAG.getContext(), SVT, 128 / SVT.getSizeInBits());
      SDValue Ex = ExtendVecSize(DL, N0, Scale * InVT.getSizeInBits());
      SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, ExVT, Ex);
      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SExt,
                         DAG.getIntPtrConstant(0, DL));
    }
    // If target-size is 128-bits, then convert to ISD::SIGN_EXTEND_VECTOR_INREG
    // which ensures lowering to X86ISD::VSEXT (pmovsx*).
    if (VT.getSizeInBits() == 128 &&
        (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
        (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
-      SDValue ExOp = ExtendToVec128(DL, N0);
+      SDValue ExOp = ExtendVecSize(DL, N0, 128);
      return DAG.getSignExtendVectorInReg(ExOp, DL, VT);
    }
@@ -24268,7 +24282,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
           ++i, Offset += NumSubElts) {
        SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
                                     DAG.getIntPtrConstant(Offset, DL));
-        SrcVec = ExtendToVec128(DL, SrcVec);
+        SrcVec = ExtendVecSize(DL, SrcVec, 128);
        SrcVec = DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT);
        Opnds.push_back(SrcVec);
      }
--- a/test/CodeGen/X86/vector-sext.ll
+++ b/test/CodeGen/X86/vector-sext.ll
@@ -117,6 +117,46 @@ entry:
  ret <4 x i64>%B
 }
 define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp {
 ; SSE2-LABEL: sext_2i8_to_i32:
 ; SSE2:       # BB#0: # %entry
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; SSE2-NEXT:    psraw $8, %xmm0
 ; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: sext_2i8_to_i32:
 ; SSSE3:       # BB#0: # %entry
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; SSSE3-NEXT:    psraw $8, %xmm0
 ; SSSE3-NEXT:    movd %xmm0, %eax
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: sext_2i8_to_i32:
 ; SSE41:       # BB#0: # %entry
 ; SSE41-NEXT:    pmovsxbw %xmm0, %xmm0
 ; SSE41-NEXT:    movd %xmm0, %eax
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: sext_2i8_to_i32:
 ; AVX:       # BB#0: # %entry
 ; AVX-NEXT:    vpmovsxbw %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
 ;
 ; X32-SSE41-LABEL: sext_2i8_to_i32:
 ; X32-SSE41:       # BB#0: # %entry
 ; X32-SSE41:         pmovsxbw %xmm0, %xmm0
 ; X32-SSE41-NEXT:    movd %xmm0, %eax
 ; X32-SSE41-NEXT:    popl %edx
 ; X32-SSE41-NEXT:    retl
 entry:
  %Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
  %Ex = sext <2 x i8> %Shuf to <2 x i16>
  %Bc = bitcast <2 x i16> %Ex to i32
  ret i32 %Bc
 }
 define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) {
 ; SSE2-LABEL: load_sext_test1:
 ; SSE2:       # BB#0: # %entry