From f3ff7c32f78f9fafe72a182837f42e14a12cff70 Mon Sep 17 00:00:00 2001 From: Cameron McInally Date: Wed, 5 Mar 2014 19:41:16 +0000 Subject: [PATCH] Lower AVX v4i64->v4i32 truncate to one shuffle. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@202996 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 14 ++------------ test/CodeGen/X86/avx-trunc.ll | 8 +++++--- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5058fb9b7a5..951d3ded99d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9134,24 +9134,14 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { DAG.getIntPtrConstant(0)); } - // On AVX, v4i64 -> v4i32 becomes a sequence that uses PSHUFD and MOVLHPS. SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, DAG.getIntPtrConstant(0)); SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, DAG.getIntPtrConstant(2)); - OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo); OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi); - - // The PSHUFD mask: - static const int ShufMask1[] = {0, 2, 0, 0}; - SDValue Undef = DAG.getUNDEF(VT); - OpLo = DAG.getVectorShuffle(VT, DL, OpLo, Undef, ShufMask1); - OpHi = DAG.getVectorShuffle(VT, DL, OpHi, Undef, ShufMask1); - - // The MOVLHPS mask: - static const int ShufMask2[] = {0, 1, 4, 5}; - return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask2); + static const int ShufMask[] = {0, 2, 4, 6}; + return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask); } if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) { diff --git a/test/CodeGen/X86/avx-trunc.ll b/test/CodeGen/X86/avx-trunc.ll index 58d0a356909..bf8d9a7f1a4 100644 --- a/test/CodeGen/X86/avx-trunc.ll +++ b/test/CodeGen/X86/avx-trunc.ll @@ -1,13 +1,15 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s define <4 x i32> @trunc_64_32(<4 x i64> %A) nounwind uwtable readnone ssp{ -; CHECK: trunc_64_32 -; CHECK: pshufd +; CHECK-LABEL: trunc_64_32 +; CHECK: shufps +; CHECK-NOT: pshufd +; CHECK-NOT: movlhps %B = trunc <4 x i64> %A to <4 x i32> ret <4 x i32>%B } define <8 x i16> @trunc_32_16(<8 x i32> %A) nounwind uwtable readnone ssp{ -; CHECK: trunc_32_16 +; CHECK-LABEL: trunc_32_16 ; CHECK: pshufb %B = trunc <8 x i32> %A to <8 x i16> ret <8 x i16>%B