From 1cb1107c660bdade8b033bae10bf223d977691e5 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard@amd.com>
Date: Tue, 25 Jun 2013 02:39:20 +0000
Subject: [PATCH] R600: Fix typo in R600Schedule.td

This should only make a difference in programs that use a lot of the
vector ALU instructions like BFI_INT and BIT_ALIGN.  There is a slight
improvement in the phatk bitcoin mining kernel with this patch on
Evergreen (vector size == 1):

Before:
1173 Instruction Groups / 9520 dwords

After:
1167 Instruction Groups / 9510 dwords

Reviewed-by: Reviewed-by: Vincent Lejeune<vljn at ovi.com>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184819 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/R600Schedule.td |  4 ++--
 test/CodeGen/R600/packetizer.ll | 34 +++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 2 deletions(-)
 create mode 100644 test/CodeGen/R600/packetizer.ll
diff --git a/lib/Target/R600/R600Schedule.td b/lib/Target/R600/R600Schedule.td
index 78a460ae9d7..207233d7e76 100644
--- a/lib/Target/R600/R600Schedule.td
+++ b/lib/Target/R600/R600Schedule.td
@@ -29,7 +29,7 @@ def R600_VLIW5_Itin : ProcessorItineraries <
   [],
   [
     InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>,
-    InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
+    InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W]>]>,
     InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>,
     InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
   ]
@@ -40,7 +40,7 @@ def R600_VLIW4_Itin : ProcessorItineraries <
   [],
   [
     InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W]>]>,
-    InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
+    InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W]>]>,
     InstrItinData<TransALU, [InstrStage<1, [ALU_NULL]>]>,
     InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
   ]
diff --git a/test/CodeGen/R600/packetizer.ll b/test/CodeGen/R600/packetizer.ll
new file mode 100644
index 00000000000..0a405c57ea9
--- /dev/null
+++ b/test/CodeGen/R600/packetizer.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s
+
+; CHECK: @test
+; CHECK: BIT_ALIGN_INT T{{[0-9]}}.X
+; CHECK: BIT_ALIGN_INT T{{[0-9]}}.Y
+; CHECK: BIT_ALIGN_INT T{{[0-9]}}.Z
+; CHECK: BIT_ALIGN_INT * T{{[0-9]}}.W
+
+define void @test(i32 addrspace(1)* %out, i32 %x_arg, i32 %y_arg, i32 %z_arg, i32 %w_arg, i32 %e) {
+entry:
+  %shl = sub i32 32, %e
+  %x = add i32 %x_arg, 1
+  %x.0 = shl i32 %x, %shl
+  %x.1 = lshr i32 %x, %e
+  %x.2 = or i32 %x.0, %x.1
+  %y = add i32 %y_arg, 1
+  %y.0 = shl i32 %y, %shl
+  %y.1 = lshr i32 %y, %e
+  %y.2 = or i32 %y.0, %y.1
+  %z = add i32 %z_arg, 1
+  %z.0 = shl i32 %z, %shl
+  %z.1 = lshr i32 %z, %e
+  %z.2 = or i32 %z.0, %z.1
+  %w = add i32 %w_arg, 1
+  %w.0 = shl i32 %w, %shl
+  %w.1 = lshr i32 %w, %e
+  %w.2 = or i32 %w.0, %w.1
+  %xy = or i32 %x.2, %y.2
+  %zw = or i32 %z.2, %w.2
+  %xyzw = or i32 %xy, %zw
+  store i32 %xyzw, i32 addrspace(1)* %out
+  ret void
+}