From bd1729e5d4d0c860732d5d1560554dd3e049452e Mon Sep 17 00:00:00 2001
From: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Date: Tue, 10 Feb 2015 12:04:41 +0000
Subject: [PATCH] [X86][FastIsel] Avoid introducing legacy SSE instructions if
 the target has AVX.

This patch teaches X86FastISel how to select AVX instructions for scalar
float/double convert operations.

Before this patch, X86FastISel always selected legacy SSE instructions
for FPExt (from float to double) and FPTrunc (from double to float).

For example:
\code
  define double @foo(float %f) {
    %conv = fpext float %f to double
    ret double %conv
  }
\end code

Before (with -mattr=+avx -fast-isel) X86FastIsel selected a CVTSS2SDrr which is
legacy SSE:
  cvtss2sd %xmm0, %xmm0

With this patch, X86FastIsel selects a VCVTSS2SDrr instead:
  vcvtss2sd %xmm0, %xmm0, %xmm0

Added test fast-isel-fptrunc-fpext.ll to check both the register-register and
the register-memory float/double conversion variants.

Differential Revision: http://reviews.llvm.org/D7438


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228682 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86FastISel.cpp              | 64 +++++++++++---------
 test/CodeGen/X86/fast-isel-fptrunc-fpext.ll | 65 +++++++++++++++++++++
 2 files changed, 101 insertions(+), 28 deletions(-)
 create mode 100644 test/CodeGen/X86/fast-isel-fptrunc-fpext.ll

diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 158e5b4fee1..91ecdf10c99 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -123,6 +123,9 @@ private:
 
   bool X86SelectTrunc(const Instruction *I);
 
+  bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
+                               const TargetRegisterClass *RC);
+
   bool X86SelectFPExt(const Instruction *I);
   bool X86SelectFPTrunc(const Instruction *I);
 
@@ -2001,41 +2004,46 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) {
   return false;
 }
 
+// Helper method used by X86SelectFPExt and X86SelectFPTrunc.
+bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
+                                          unsigned TargetOpc,
+                                          const TargetRegisterClass *RC) {
+  assert((I->getOpcode() == Instruction::FPExt ||
+          I->getOpcode() == Instruction::FPTrunc) &&
+         "Instruction must be an FPExt or FPTrunc!");
+
+  unsigned OpReg = getRegForValue(I->getOperand(0));
+  if (OpReg == 0)
+    return false;
+
+  unsigned ResultReg = createResultReg(RC);
+  MachineInstrBuilder MIB;
+  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
+                ResultReg);
+  if (Subtarget->hasAVX())
+    MIB.addReg(OpReg);
+  MIB.addReg(OpReg);
+  updateValueMap(I, ResultReg);
+  return true;
+}
+
 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
-  // fpext from float to double.
-  if (X86ScalarSSEf64 &&
-      I->getType()->isDoubleTy()) {
-    const Value *V = I->getOperand(0);
-    if (V->getType()->isFloatTy()) {
-      unsigned OpReg = getRegForValue(V);
-      if (OpReg == 0) return false;
-      unsigned ResultReg = createResultReg(&X86::FR64RegClass);
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-              TII.get(X86::CVTSS2SDrr), ResultReg)
-        .addReg(OpReg);
-      updateValueMap(I, ResultReg);
-      return true;
-    }
+  if (X86ScalarSSEf64 && I->getType()->isDoubleTy() &&
+      I->getOperand(0)->getType()->isFloatTy()) {
+    // fpext from float to double.
+    unsigned Opc = Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
+    return X86SelectFPExtOrFPTrunc(I, Opc, &X86::FR64RegClass);
   }
 
   return false;
 }
 
 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
-  if (X86ScalarSSEf64) {
-    if (I->getType()->isFloatTy()) {
-      const Value *V = I->getOperand(0);
-      if (V->getType()->isDoubleTy()) {
-        unsigned OpReg = getRegForValue(V);
-        if (OpReg == 0) return false;
-        unsigned ResultReg = createResultReg(&X86::FR32RegClass);
-        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-                TII.get(X86::CVTSD2SSrr), ResultReg)
-          .addReg(OpReg);
-        updateValueMap(I, ResultReg);
-        return true;
-      }
-    }
+  if (X86ScalarSSEf64 && I->getType()->isFloatTy() &&
+      I->getOperand(0)->getType()->isDoubleTy()) {
+    // fptrunc from double to float.
+    unsigned Opc = Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
+    return X86SelectFPExtOrFPTrunc(I, Opc, &X86::FR32RegClass);
   }
 
   return false;
diff --git a/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll b/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
new file mode 100644
index 00000000000..e898989b5c3
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel | FileCheck %s --check-prefix=ALL --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+;
+; Verify that fast-isel doesn't select legacy SSE instructions on targets that
+; feature AVX.
+;
+; Test cases are obtained from the following code snippet:
+; ///
+; double single_to_double_rr(float x) {
+;   return (double)x;
+; }
+; float double_to_single_rr(double x) {
+;   return (float)x;
+; }
+; double single_to_double_rm(float *x) {
+;   return (double)*x;
+; }
+; float double_to_single_rm(double *x) {
+;   return (float)*x;
+; }
+; ///
+
+define double @single_to_double_rr(float %x) {
+; ALL-LABEL: single_to_double_rr:
+; SSE-NOT: vcvtss2sd
+; AVX: vcvtss2sd %xmm0, %xmm0, %xmm0
+; ALL: ret
+entry:
+  %conv = fpext float %x to double
+  ret double %conv
+}
+
+define float @double_to_single_rr(double %x) {
+; ALL-LABEL: double_to_single_rr:
+; SSE-NOT: vcvtsd2ss
+; AVX: vcvtsd2ss %xmm0, %xmm0, %xmm0
+; ALL: ret
+entry:
+  %conv = fptrunc double %x to float
+  ret float %conv
+}
+
+define double @single_to_double_rm(float* %x) {
+; ALL-LABEL: single_to_double_rm:
+; SSE: cvtss2sd (%rdi), %xmm0
+; AVX: vmovss (%rdi), %xmm0
+; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
+; ALL-NEXT: ret
+entry:
+  %0 = load float* %x, align 4
+  %conv = fpext float %0 to double
+  ret double %conv
+}
+
+define float @double_to_single_rm(double* %x) {
+; ALL-LABEL: double_to_single_rm:
+; SSE: cvtsd2ss (%rdi), %xmm0
+; AVX: vmovsd (%rdi), %xmm0
+; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
+; ALL-NEXT: ret
+entry:
+  %0 = load double* %x, align 8
+  %conv = fptrunc double %0 to float
+  ret float %conv
+}