From 68e132866236f5d59271d2c7ffb77a9c8e743752 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard@amd.com>
Date: Fri, 12 Jul 2013 18:14:56 +0000
Subject: [PATCH] R600/SI: Add initial double precision support for SI

Patch by: Niels Ole Salscheider

Reviewed-by: Tom Stellard <thomas.stellard@amd.com>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186177 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/AMDGPUISelLowering.cpp |  6 ++++++
 lib/Target/R600/SIISelLowering.cpp     |  1 +
 lib/Target/R600/SIInstructions.td      | 30 +++++++++++++++++++++++++-
 test/CodeGen/R600/fadd64.ll            | 13 +++++++++++
 test/CodeGen/R600/fdiv64.ll            | 14 ++++++++++++
 test/CodeGen/R600/fmul64.ll            | 13 +++++++++++
 test/CodeGen/R600/load64.ll            | 20 +++++++++++++++++
 7 files changed, 96 insertions(+), 1 deletion(-)
 create mode 100644 test/CodeGen/R600/fadd64.ll
 create mode 100644 test/CodeGen/R600/fdiv64.ll
 create mode 100644 test/CodeGen/R600/fmul64.ll
 create mode 100644 test/CodeGen/R600/load64.ll

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 7fad3bbc6c8..9891ad32fa7 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -60,12 +60,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::STORE, MVT::v4f32, Promote);
   AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
 
+  setOperationAction(ISD::STORE, MVT::f64, Promote);
+  AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
+
   setOperationAction(ISD::LOAD, MVT::f32, Promote);
   AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
 
   setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
   AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
 
+  setOperationAction(ISD::LOAD, MVT::f64, Promote);
+  AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
+
   setOperationAction(ISD::MUL, MVT::i64, Expand);
 
   setOperationAction(ISD::UDIV, MVT::i32, Expand);
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index a314bc40c48..4d0fdf3c038 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -45,6 +45,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
 
   addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass);
   addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass);
+  addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass);
 
   addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
   addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 5a1bf305f29..8436b67a20d 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -663,7 +663,9 @@ defm V_RSQ_LEGACY_F32 : VOP1_32 <
   [(set f32:$dst, (int_AMDGPU_rsq f32:$src0))]
 >;
 defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>;
-defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>;
+defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64",
+  [(set f64:$dst, (fdiv FP_ONE, f64:$src0))]
+>;
 defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
 defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>;
 defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>;
@@ -1008,10 +1010,25 @@ def V_LSHR_B64 : VOP3_64_Shift <0x00000162, "V_LSHR_B64",
 >;
 def V_ASHR_I64 : VOP3_64_Shift <0x00000163, "V_ASHR_I64", []>;
 
+let isCommutable = 1 in {
+
 def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>;
 def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>;
 def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>;
 def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>;
+
+} // isCommutable = 1
+
+def : Pat <
+  (fadd f64:$src0, f64:$src1),
+  (V_ADD_F64 $src0, $src1, (i64 0))
+>;
+
+def : Pat <
+  (fmul f64:$src0, f64:$src1),
+  (V_MUL_F64 $src0, $src1, (i64 0))
+>;
+
 def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
 
 let isCommutable = 1 in {
@@ -1434,6 +1451,10 @@ def : BitConvert <i32, f32, VReg_32>;
 def : BitConvert <f32, i32, SReg_32>;
 def : BitConvert <f32, i32, VReg_32>;
 
+def : BitConvert <i64, f64, VReg_64>;
+
+def : BitConvert <f64, i64, VReg_64>;
+
 /********** =================== **********/
 /********** Src & Dst modifiers **********/
 /********** =================== **********/
@@ -1522,6 +1543,11 @@ def : Pat<
   (V_MUL_F32_e32 $src0, (V_RCP_F32_e32 $src1))
 >;
 
+def : Pat<
+  (fdiv f64:$src0, f64:$src1),
+  (V_MUL_F64 $src0, (V_RCP_F64_e32 $src1), (i64 0))
+>;
+
 def : Pat <
   (fcos f32:$src0),
   (V_COS_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
@@ -1672,6 +1698,8 @@ multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
   >;
 }
 
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, i64,
+                          global_load, constant_load>;
 defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32,
                           global_load, constant_load>;
 defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32,
diff --git a/test/CodeGen/R600/fadd64.ll b/test/CodeGen/R600/fadd64.ll
new file mode 100644
index 00000000000..130302f634f
--- /dev/null
+++ b/test/CodeGen/R600/fadd64.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s
+
+; CHECK: @fadd_f64
+; CHECK: V_ADD_F64 {{VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}}
+
+define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                      double addrspace(1)* %in2) {
+   %r0 = load double addrspace(1)* %in1
+   %r1 = load double addrspace(1)* %in2
+   %r2 = fadd double %r0, %r1
+   store double %r2, double addrspace(1)* %out
+   ret void
+}
diff --git a/test/CodeGen/R600/fdiv64.ll b/test/CodeGen/R600/fdiv64.ll
new file mode 100644
index 00000000000..76c5ca37697
--- /dev/null
+++ b/test/CodeGen/R600/fdiv64.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s
+
+; CHECK: @fdiv_f64
+; CHECK: V_RCP_F64_e32 {{VGPR[0-9]+_VGPR[0-9]+}}
+; CHECK: V_MUL_F64 {{VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}}
+
+define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                      double addrspace(1)* %in2) {
+   %r0 = load double addrspace(1)* %in1
+   %r1 = load double addrspace(1)* %in2
+   %r2 = fdiv double %r0, %r1
+   store double %r2, double addrspace(1)* %out
+   ret void
+}
diff --git a/test/CodeGen/R600/fmul64.ll b/test/CodeGen/R600/fmul64.ll
new file mode 100644
index 00000000000..8a57d4a86b3
--- /dev/null
+++ b/test/CodeGen/R600/fmul64.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s
+
+; CHECK: @fmul_f64
+; CHECK: V_MUL_F64 {{VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}}
+
+define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                      double addrspace(1)* %in2) {
+   %r0 = load double addrspace(1)* %in1
+   %r1 = load double addrspace(1)* %in2
+   %r2 = fmul double %r0, %r1
+   store double %r2, double addrspace(1)* %out
+   ret void
+}
diff --git a/test/CodeGen/R600/load64.ll b/test/CodeGen/R600/load64.ll
new file mode 100644
index 00000000000..3b4a8f8f3b0
--- /dev/null
+++ b/test/CodeGen/R600/load64.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s
+
+; load a f64 value from the global address space.
+; CHECK: @load_f64
+; CHECK: BUFFER_LOAD_DWORDX2 VGPR{{[0-9]+}}
+define void @load_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
+entry:
+  %0 = load double addrspace(1)* %in
+  store double %0, double addrspace(1)* %out
+  ret void
+}
+
+; Load a f64 value from the constant address space.
+; CHECK: @load_const_addrspace_f64
+; CHECK: S_LOAD_DWORDX2 SGPR{{[0-9]+}}
+define void @load_const_addrspace_f64(double addrspace(1)* %out, double addrspace(2)* %in) {
+  %1 = load double addrspace(2)* %in
+  store double %1, double addrspace(1)* %out
+  ret void
+}