From 8c8fe42a0d81f002fbae486ed45027d32dae7eeb Mon Sep 17 00:00:00 2001
From: Adam Nemet <anemet@apple.com>
Date: Mon, 17 Mar 2014 17:06:14 +0000
Subject: [PATCH] [VectorLegalizer/X86] Don't unvectorize fp_to_uint for
 v8f32->v8i16

Rather than LegalizeAction::Expand, this needs LegalizeAction::Promote to get
promoted to fp_to_sint v8f32->v8i32.  This is a legal operation on AVX.

For that to work properly, we also need to teach the legalizer about the
specific promotion required here.  The default vector promotion uses
bitcasting to a vector type of the same total size.  We want to promote the
vector element type, effectively widening the operation and then truncating
the result.  This is analogous to the current logic of how int_to_fp is
promoted.

The change also factors out some code from the int_to_fp promotion code to
ValueType::widenIntegerVectorElementType.  This is now shared between
int_to_fp and fp_to_int.

There is no longer need for the custom lowering of fp_to_sint f32->v8i16 in
X86.  It can now go through the new target-independent fp_to_*int promotion
logic.

I also checked that no other target uses Promote for these ops yet, so there
shouldn't be any unexpected change in behavior.

Fixes <rdar://problem/16202247>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204058 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/CodeGen/ValueTypes.h             |  8 ++++
 .../SelectionDAG/LegalizeVectorOps.cpp        | 48 ++++++++++++++++---
 lib/Target/X86/X86ISelLowering.cpp            | 15 +++---
 test/CodeGen/X86/avx-cvt-2.ll                 | 43 +++++++++++++++++
 4 files changed, 98 insertions(+), 16 deletions(-)
 create mode 100644 test/CodeGen/X86/avx-cvt-2.ll

diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index b5dc14a7047..8cf26fa30d3 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -280,6 +280,14 @@ namespace llvm {
       return getIntegerVT(Context, (EVTSize + 1) / 2);
     }
 
+    /// \brief Return a VT for an integer vector type with the size of the
+    /// elements doubled. The typed returned may be an extended type.
+    EVT widenIntegerVectorElementType(LLVMContext &Context) const {
+      EVT EltVT = getVectorElementType();
+      EltVT = EVT::getIntegerVT(Context, 2 * EltVT.getSizeInBits());
+      return EVT::getVectorVT(Context, EltVT, getVectorNumElements());
+    }
+
     /// isPow2VectorType - Returns true if the given vector is a power of 2.
     bool isPow2VectorType() const {
       unsigned NElts = getVectorNumElements();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 8f039817dd4..551d0549c8c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -77,6 +77,10 @@ class VectorLegalizer {
   // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input
   // operand to the next size up.
   SDValue PromoteVectorOpINT_TO_FP(SDValue Op);
+  // Implements FP_TO_[SU]INT vector promotion of the result type; it is
+  // promoted to the next size up integer type.  The result is then truncated
+  // back to the original type.
+  SDValue PromoteVectorOpFP_TO_INT(SDValue Op, bool isSigned);
 
   public:
   bool Run();
@@ -274,6 +278,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
       Result = PromoteVectorOpINT_TO_FP(Op);
       Changed = true;
       break;
+    case ISD::FP_TO_UINT:
+    case ISD::FP_TO_SINT:
+      // Promote the operation by extending the operand.
+      Result = PromoteVectorOpFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT);
+      Changed = true;
+      break;
     }
     break;
   case TargetLowering::Legal: break;
@@ -352,14 +362,9 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) {
   //
   // Increase the bitwidth of the element to the next pow-of-two
   // (which is greater than 8 bits).
-  unsigned NumElts = VT.getVectorNumElements();
-  EVT EltVT = VT.getVectorElementType();
-  EltVT = EVT::getIntegerVT(*DAG.getContext(), 2 * EltVT.getSizeInBits());
-  assert(EltVT.isSimple() && "Promoting to a non-simple vector type!");
-
-  // Build a new vector type and check if it is legal.
-  MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
 
+  EVT NVT = VT.widenIntegerVectorElementType(*DAG.getContext());
+  assert(NVT.isSimple() && "Promoting to a non-simple vector type!");
   SDLoc dl(Op);
   SmallVector<SDValue, 4> Operands(Op.getNumOperands());
 
@@ -376,6 +381,35 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) {
                      Operands.size());
 }
 
+// For FP_TO_INT we promote the result type to a vector type with wider
+// elements and then truncate the result.  This is different from the default
+// PromoteVector which uses bitcast to promote thus assumning that the
+// promoted vector type has the same overall size.
+SDValue VectorLegalizer::PromoteVectorOpFP_TO_INT(SDValue Op, bool isSigned) {
+  assert(Op.getNode()->getNumValues() == 1 &&
+         "Can't promote a vector with multiple results!");
+  EVT VT = Op.getValueType();
+
+  EVT NewVT;
+  unsigned NewOpc;
+  while (1) {
+    NewVT = VT.widenIntegerVectorElementType(*DAG.getContext());
+    assert(NewVT.isSimple() && "Promoting to a non-simple vector type!");
+    if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) {
+      NewOpc = ISD::FP_TO_SINT;
+      break;
+    }
+    if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewVT)) {
+      NewOpc = ISD::FP_TO_UINT;
+      break;
+    }
+  }
+
+  SDLoc loc(Op);
+  SDValue promoted  = DAG.getNode(NewOpc, SDLoc(Op), NewVT, Op.getOperand(0));
+  return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted);
+}
+
 
 SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
   SDLoc dl(Op);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 2c991aa8e24..9169d8c5e11 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1154,9 +1154,12 @@ void X86TargetLowering::resetOperationActions() {
     setOperationAction(ISD::FNEG,               MVT::v4f64, Custom);
     setOperationAction(ISD::FABS,               MVT::v4f64, Custom);
 
-    setOperationAction(ISD::FP_TO_SINT,         MVT::v8i16, Custom);
-
+    // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
+    // even though v8i16 is a legal type.
+    setOperationAction(ISD::FP_TO_SINT,         MVT::v8i16, Promote);
+    setOperationAction(ISD::FP_TO_UINT,         MVT::v8i16, Promote);
     setOperationAction(ISD::FP_TO_SINT,         MVT::v8i32, Legal);
+
     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i16, Promote);
     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i32, Legal);
     setOperationAction(ISD::FP_ROUND,           MVT::v4f32, Legal);
@@ -9225,13 +9228,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
 SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
                                            SelectionDAG &DAG) const {
   MVT VT = Op.getSimpleValueType();
-  if (VT.isVector()) {
-    if (VT == MVT::v8i16)
-      return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT,
-                         DAG.getNode(ISD::FP_TO_SINT, SDLoc(Op),
-                                     MVT::v8i32, Op.getOperand(0)));
-    return SDValue();
-  }
+  assert(!VT.isVector());
 
   std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
     /*IsSigned=*/ true, /*IsReplace=*/ false);
diff --git a/test/CodeGen/X86/avx-cvt-2.ll b/test/CodeGen/X86/avx-cvt-2.ll
new file mode 100644
index 00000000000..8cc7190fcc6
--- /dev/null
+++ b/test/CodeGen/X86/avx-cvt-2.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s
+
+; Check that we generate vector conversion from float to narrower int types
+
+%f32vec_t = type <8 x float>
+%i16vec_t = type <8 x i16>
+%i8vec_t =  type <8 x i8>
+
+define void @fptoui16(%f32vec_t %a, %i16vec_t *%p) {
+; CHECK-LABEL: fptoui16:
+; CHECK: vcvttps2dq %ymm
+; CHECK-NOT: vcvttss2si
+  %b = fptoui %f32vec_t %a to %i16vec_t
+  store %i16vec_t %b, %i16vec_t * %p
+  ret void
+}
+
+define void @fptosi16(%f32vec_t %a, %i16vec_t *%p) {
+; CHECK-LABEL: fptosi16:
+; CHECK: vcvttps2dq %ymm
+; CHECK-NOT: vcvttss2si
+  %b = fptosi %f32vec_t %a to %i16vec_t
+  store %i16vec_t %b, %i16vec_t * %p
+  ret void
+}
+
+define void @fptoui8(%f32vec_t %a, %i8vec_t *%p) {
+; CHECK-LABEL: fptoui8:
+; CHECK: vcvttps2dq %ymm
+; CHECK-NOT: vcvttss2si
+  %b = fptoui %f32vec_t %a to %i8vec_t
+  store %i8vec_t %b, %i8vec_t * %p
+  ret void
+}
+
+define void @fptosi8(%f32vec_t %a, %i8vec_t *%p) {
+; CHECK-LABEL: fptosi8:
+; CHECK: vcvttps2dq %ymm
+; CHECK-NOT: vcvttss2si
+  %b = fptosi %f32vec_t %a to %i8vec_t
+  store %i8vec_t %b, %i8vec_t * %p
+  ret void
+}