Fix 9267; Add vector zext support.

The DAGCombiner folds the zext into complex load instructions. This patch prevents this optimization on vectors since none of the supported targets knows how to perform load+vector_zext in one instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126080 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-13 20:32:21 +00:00 · 2011-02-20 12:37:50 +00:00 · 2011-02-20 12:37:50 +00:00 · ed9b934f65
commit ed9b934f65
parent 1a4021a2be
3 changed files with 78 additions and 5 deletions
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@ -4614,10 +4614,11 @@ entry:
 <h5>Arguments:</h5>
-<p>The '<tt>zext</tt>' instruction takes a value to cast, which must be of
+<p>The '<tt>zext</tt>' instruction takes a value to cast, and a type to cast it to.
-   <a href="#t_integer">integer</a> type, and a type to cast it to, which must
+   Both types must be of <a href="#t_integer">integer</a> types, or vectors
-   also be of <a href="#t_integer">integer</a> type. The bit size of the
+   of the same number of integers.
-   <tt>value</tt> must be smaller than the bit size of the destination type,
+   The bit size of the <tt>value</tt> must be smaller than
   the bit size of the destination type,
   <tt>ty2</tt>.</p>
 <h5>Semantics:</h5>
@ -4630,6 +4631,7 @@ entry:
 <pre>
  %X = zext i32 257 to i64              <i>; yields i64:257</i>
  %Y = zext i1 true to i32              <i>; yields i32:1</i>
  %Z = zext &lt;2 x i16&gt; &lt;i16 8, i16 7&gt; to &lt;2 x i32&gt; <i>; yields &lt;i32 8, i32 7&gt;</i>
 </pre>
 </div>
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -3887,7 +3887,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
  }
  // fold (zext (load x)) -> (zext (truncate (zextload x)))
-  if (ISD::isNON_EXTLoad(N0.getNode()) &&
+  // None of the supported targets knows how to perform load and vector_zext
  // in one instruction.  We only perform this transformation on scalar zext.
  if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
       TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
    bool DoXform = true;
--- a/test/CodeGen/X86/vec_zext.ll
+++ b/test/CodeGen/X86/vec_zext.ll
@ -0,0 +1,69 @@
 ; RUN: llc < %s -march=x86-64
 ; PR 9267
 define<4 x i32> @func_16_32() {
  %F = load <4 x i16>* undef
  %G = zext <4 x i16> %F to <4 x i32>
  %H = load <4 x i16>* undef
  %Y = zext <4 x i16> %H to <4 x i32>
  %T = add <4 x i32> %Y, %G
  store <4 x i32>%T , <4 x i32>* undef
  ret <4 x i32> %T
 }
 define<4 x i64> @func_16_64() {
  %F = load <4 x i16>* undef
  %G = zext <4 x i16> %F to <4 x i64>
  %H = load <4 x i16>* undef
  %Y = zext <4 x i16> %H to <4 x i64>
  %T = xor <4 x i64> %Y, %G
  store <4 x i64>%T , <4 x i64>* undef
  ret <4 x i64> %T
 }
 define<4 x i64> @func_32_64() {
  %F = load <4 x i32>* undef
  %G = zext <4 x i32> %F to <4 x i64>
  %H = load <4 x i32>* undef
  %Y = zext <4 x i32> %H to <4 x i64>
  %T = or <4 x i64> %Y, %G
  ret <4 x i64> %T
 }
 define<4 x i16> @func_8_16() {
  %F = load <4 x i8>* undef
  %G = zext <4 x i8> %F to <4 x i16>
  %H = load <4 x i8>* undef
  %Y = zext <4 x i8> %H to <4 x i16>
  %T = add <4 x i16> %Y, %G
  ret <4 x i16> %T
 }
 define<4 x i32> @func_8_32() {
  %F = load <4 x i8>* undef
  %G = zext <4 x i8> %F to <4 x i32>
  %H = load <4 x i8>* undef
  %Y = zext <4 x i8> %H to <4 x i32>
  %T = sub <4 x i32> %Y, %G
  ret <4 x i32> %T
 }
 define<4 x i64> @func_8_64() {
  %F = load <4 x i8>* undef
  %G = zext <4 x i8> %F to <4 x i64>
  %H = load <4 x i8>* undef
  %Y = zext <4 x i8> %H to <4 x i64>
  %T = add <4 x i64> %Y, %G
  ret <4 x i64> %T
 }
 define<4 x i32> @const_16_32() {
  %G = zext <4 x i16> <i16 0, i16 3, i16 8, i16 7> to <4 x i32>
  ret <4 x i32> %G
 }
 define<4 x i64> @const_16_64() {
  %G = zext <4 x i16> <i16 0, i16 3, i16 8, i16 7> to <4 x i64>
  ret <4 x i64> %G
 }