diff --git a/docs/LangRef.html b/docs/LangRef.html index fa6a0784653..81fa8cc6e89 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -4614,10 +4614,11 @@ entry:
Arguments:
-

The 'zext' instruction takes a value to cast, which must be of - integer type, and a type to cast it to, which must - also be of integer type. The bit size of the - value must be smaller than the bit size of the destination type, +

The 'zext' instruction takes a value to cast, and a type to cast it to. + Both types must be of integer types, or vectors + of the same number of integers. + The bit size of the value must be smaller than + the bit size of the destination type, ty2.

Semantics:
@@ -4630,6 +4631,7 @@ entry:
   %X = zext i32 257 to i64              ; yields i64:257
   %Y = zext i1 true to i32              ; yields i32:1
+  %Z = zext <2 x i16> <i16 8, i16 7> to <2 x i32> ; yields <i32 8, i32 7>
 
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 90356021f60..c5f0324ac41 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3887,7 +3887,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } // fold (zext (load x)) -> (zext (truncate (zextload x))) - if (ISD::isNON_EXTLoad(N0.getNode()) && + // None of the supported targets knows how to perform load and vector_zext + // in one instruction. We only perform this transformation on scalar zext. + if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { bool DoXform = true; diff --git a/test/CodeGen/X86/vec_zext.ll b/test/CodeGen/X86/vec_zext.ll new file mode 100644 index 00000000000..615a50b7afc --- /dev/null +++ b/test/CodeGen/X86/vec_zext.ll @@ -0,0 +1,69 @@ +; RUN: llc < %s -march=x86-64 +; PR 9267 + +define<4 x i32> @func_16_32() { + %F = load <4 x i16>* undef + %G = zext <4 x i16> %F to <4 x i32> + %H = load <4 x i16>* undef + %Y = zext <4 x i16> %H to <4 x i32> + %T = add <4 x i32> %Y, %G + store <4 x i32>%T , <4 x i32>* undef + ret <4 x i32> %T +} + +define<4 x i64> @func_16_64() { + %F = load <4 x i16>* undef + %G = zext <4 x i16> %F to <4 x i64> + %H = load <4 x i16>* undef + %Y = zext <4 x i16> %H to <4 x i64> + %T = xor <4 x i64> %Y, %G + store <4 x i64>%T , <4 x i64>* undef + ret <4 x i64> %T +} + +define<4 x i64> @func_32_64() { + %F = load <4 x i32>* undef + %G = zext <4 x i32> %F to <4 x i64> + %H = load <4 x i32>* undef + %Y = zext <4 x i32> %H to <4 x i64> + %T = or <4 x i64> %Y, %G + ret <4 x i64> %T +} + +define<4 x i16> @func_8_16() { + %F = load <4 x i8>* undef + %G = zext <4 x i8> %F to <4 x i16> + %H = load <4 x i8>* undef + %Y = zext <4 x i8> %H to <4 x i16> + %T = add <4 x i16> %Y, %G + ret <4 x i16> %T +} + +define<4 x i32> @func_8_32() { + %F = load <4 x i8>* undef + %G = zext <4 x i8> %F to <4 x i32> + %H = load <4 x i8>* undef + %Y = zext <4 x i8> %H to <4 x i32> + %T = sub <4 x i32> %Y, %G + ret <4 x i32> %T +} + +define<4 x i64> @func_8_64() { + %F = load <4 x i8>* undef + %G = zext <4 x i8> %F to <4 x i64> + %H = load <4 x i8>* undef + %Y = zext <4 x i8> %H to <4 x i64> + %T = add <4 x i64> %Y, %G + ret <4 x i64> %T +} + +define<4 x i32> @const_16_32() { + %G = zext <4 x i16> to <4 x i32> + ret <4 x i32> %G +} + +define<4 x i64> @const_16_64() { + %G = zext <4 x i16> to <4 x i64> + ret <4 x i64> %G +} +