[X86][SSE] Fix PerformSExtCombine bug that accessed the wrong return value of an aggregate type.

Fix to rL237885 to ensure that it accesses the correct return value of an aggregate type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240223 91177308-0d34-0410-b5e6-96231b3b80d8
2025-07-24 06:25:18 +00:00 · 2015-06-20 16:19:24 +00:00
parent 793ca75174
commit 1eabc9fb9d
2 changed files with 53 additions and 3 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -24239,7 +24239,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
  SDValue N0 = N->getOperand(0);
  EVT VT = N->getValueType(0);
  EVT SVT = VT.getScalarType();
-  EVT InVT = N0->getValueType(0);
+  EVT InVT = N0.getValueType();
  EVT InSVT = InVT.getScalarType();
  SDLoc DL(N);

@@ -24257,7 +24257,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
  }

  if (!DCI.isBeforeLegalizeOps()) {
-    if (N0.getValueType() == MVT::i1) {
+    if (InVT == MVT::i1) {
      SDValue Zero = DAG.getConstant(0, DL, VT);
      SDValue AllOnes =
        DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT);
@@ -24268,7 +24268,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,

  if (VT.isVector()) {
    auto ExtendToVec128 = [&DAG](SDLoc DL, SDValue N) {
-      EVT InVT = N->getValueType(0);
+      EVT InVT = N.getValueType();
      EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
                                   128 / InVT.getScalarSizeInBits());
      SmallVector<SDValue, 8> Opnds(128 / InVT.getSizeInBits(),
--- a/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/test/CodeGen/X86/vec_int_to_fp.ll
@@ -1328,3 +1328,53 @@ define <8 x float> @uitofp_8vf32_i8(<16 x i8> %a) {
  %cvt = uitofp <8 x i8> %shuf to <8 x float>
  ret <8 x float> %cvt
 }
+
+;
+; Aggregates
+;
+
+%Arguments = type <{ <8 x i8>, <8 x i16>, <8 x float>* }>
+define void @aggregate_sitofp_8f32_i16(%Arguments* nocapture readonly %a0) {
+; SSE2-LABEL: aggregate_sitofp_8f32_i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movq 24(%rdi), %rax
+; SSE2-NEXT:    movdqu 8(%rdi), %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    psrad $16, %xmm1
+; SSE2-NEXT:    cvtdq2ps %xmm1, %xmm1
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    psrad $16, %xmm0
+; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
+; SSE2-NEXT:    movaps %xmm0, (%rax)
+; SSE2-NEXT:    movaps %xmm1, 16(%rax)
+; SSE2-NEXT:    retq
+;
+; AVX1-LABEL: aggregate_sitofp_8f32_i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    movq 24(%rdi), %rax
+; AVX1-NEXT:    vmovdqu 8(%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT:    vmovaps %ymm0, (%rax)
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: aggregate_sitofp_8f32_i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    movq 24(%rdi), %rax
+; AVX2-NEXT:    vpmovsxwd 8(%rdi), %ymm0
+; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; AVX2-NEXT:    vmovaps %ymm0, (%rax)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+ %1 = load %Arguments, %Arguments* %a0, align 1
+ %2 = extractvalue %Arguments %1, 1
+ %3 = extractvalue %Arguments %1, 2
+ %4 = sitofp <8 x i16> %2 to <8 x float>
+ store <8 x float> %4, <8 x float>* %3, align 32
+ ret void
+}