From b1e8cad61e64dd7f56b0c62b53f9c1fc86d599f7 Mon Sep 17 00:00:00 2001
From: Dan Gohman <gohman@apple.com>
Date: Mon, 28 Jul 2008 22:18:25 +0000
Subject: [PATCH] Add x86 isel patterns to match what would be a
 ZERO_EXTEND_INREG operation, which is represented in codegen as an 'and'
 operation. This matches them with movz instructions, instead of leaving them
 to be matched by and instructions with an immediate field.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@54147 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/CodeGen/SelectionDAGNodes.h |  2 +
 lib/Target/X86/X86Instr64bit.td          |  6 +++
 lib/Target/X86/X86InstrInfo.td           | 10 ++++
 test/CodeGen/X86/zext-inreg-0.ll         | 62 ++++++++++++++++++++++++
 test/CodeGen/X86/zext-inreg-1.ll         | 18 +++++++
 5 files changed, 98 insertions(+)
 create mode 100644 test/CodeGen/X86/zext-inreg-0.ll
 create mode 100644 test/CodeGen/X86/zext-inreg-1.ll

diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index dbb12947f20..73edcb8d49a 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -377,6 +377,8 @@ namespace ISD {
     // extending the low 8 bits of a 32-bit register to fill the top 24 bits
     // with the 7th bit).  The size of the smaller type is indicated by the 1th
     // operand, a ValueType node.
+    // Note that there is intentionally no corresponding ZERO_EXTEND_INREG; an
+    // AND with an appropriate constant is used instead.
     SIGN_EXTEND_INREG,
 
     /// FP_TO_[US]INT - Convert a floating point value to a signed or unsigned
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 23a403068bf..932e1b7afc7 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -1240,6 +1240,12 @@ def : Pat<(and GR64:$src, i64immFFFFFFFF),
           (SUBREG_TO_REG (i64 0), 
             (i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)),
             x86_subreg_32bit)>;
+// r & (2^16-1) ==> movz
+def : Pat<(and GR64:$src, 0xffff),
+          (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR64:$src, 0xff),
+          (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>;
 
 // (shl x, 1) ==> (add x, x)
 def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 4f3a3af0f97..9996195407a 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -2763,6 +2763,16 @@ def : Pat<(i32 (and (loadi32 addr:$src), (i32 65535))),(MOVZX32rm16 addr:$src)>;
 // Some peepholes
 //===----------------------------------------------------------------------===//
 
+// r & (2^16-1) ==> movz
+def : Pat<(and GR32:$src1, 0xffff),
+           (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+           (MOVZX32rr8 (i8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+           (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, x86_subreg_8bit)))>;
+
 // (shl x, 1) ==> (add x, x)
 def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr  GR8 :$src1, GR8 :$src1)>;
 def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
diff --git a/test/CodeGen/X86/zext-inreg-0.ll b/test/CodeGen/X86/zext-inreg-0.ll
new file mode 100644
index 00000000000..a4ffd67f388
--- /dev/null
+++ b/test/CodeGen/X86/zext-inreg-0.ll
@@ -0,0 +1,62 @@
+; RUN: llvm-as < %s | llc -march=x86 | not grep and
+; RUN: llvm-as < %s | llc -march=x86-64 | not grep and
+
+; These should use movzbl instead of 'and 255'.
+; This related to not having a ZERO_EXTEND_REG opcode.
+
+define i32 @a(i32 %d) nounwind  {
+        %e = add i32 %d, 1
+        %retval = and i32 %e, 255
+        ret i32 %retval
+}
+define i32 @b(float %d) nounwind  {
+        %tmp12 = fptoui float %d to i8
+        %retval = zext i8 %tmp12 to i32
+        ret i32 %retval
+}
+define i32 @c(i32 %d) nounwind  {
+        %e = add i32 %d, 1
+        %retval = and i32 %e, 65535
+        ret i32 %retval
+}
+define i64 @d(i64 %d) nounwind  {
+        %e = add i64 %d, 1
+        %retval = and i64 %e, 255
+        ret i64 %retval
+}
+define i64 @e(i64 %d) nounwind  {
+        %e = add i64 %d, 1
+        %retval = and i64 %e, 65535
+        ret i64 %retval
+}
+define i64 @f(i64 %d) nounwind  {
+        %e = add i64 %d, 1
+        %retval = and i64 %e, 4294967295
+        ret i64 %retval
+}
+
+define i32 @g(i8 %d) nounwind  {
+        %e = add i8 %d, 1
+        %retval = zext i8 %e to i32
+        ret i32 %retval
+}
+define i32 @h(i16 %d) nounwind  {
+        %e = add i16 %d, 1
+        %retval = zext i16 %e to i32
+        ret i32 %retval
+}
+define i64 @i(i8 %d) nounwind  {
+        %e = add i8 %d, 1
+        %retval = zext i8 %e to i64
+        ret i64 %retval
+}
+define i64 @j(i16 %d) nounwind  {
+        %e = add i16 %d, 1
+        %retval = zext i16 %e to i64
+        ret i64 %retval
+}
+define i64 @k(i32 %d) nounwind  {
+        %e = add i32 %d, 1
+        %retval = zext i32 %e to i64
+        ret i64 %retval
+}
diff --git a/test/CodeGen/X86/zext-inreg-1.ll b/test/CodeGen/X86/zext-inreg-1.ll
new file mode 100644
index 00000000000..4a80fe5fe35
--- /dev/null
+++ b/test/CodeGen/X86/zext-inreg-1.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-as < %s | llc -march=x86 | not grep and
+
+; These tests differ from the ones in zext-inreg-0.ll in that
+; on x86-64 they do require and instructions.
+
+; These should use movzbl instead of 'and 255'.
+; This related to not having ZERO_EXTEND_REG node.
+
+define i64 @g(i64 %d) nounwind  {
+        %e = add i64 %d, 1
+        %retval = and i64 %e, 1099511627775
+        ret i64 %retval
+}
+define i64 @h(i64 %d) nounwind  {
+        %e = add i64 %d, 1
+        %retval = and i64 %e, 281474976710655
+        ret i64 %retval
+}