diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index bf0dcd20e25..f412f843455 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -585,6 +585,15 @@ let AddedComplexity = 15 in { (MMX_MOVZDI2PDIrr GR32:$src)>; } +let AddedComplexity = 20 in { + def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (load_mmx addr:$src)))), + (MMX_MOVZDI2PDIrm addr:$src)>; + def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (load_mmx addr:$src)))), + (MMX_MOVZDI2PDIrm addr:$src)>; + def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))), + (MMX_MOVZDI2PDIrm addr:$src)>; +} + // Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower // 8 or 16-bits matter. def : Pat<(bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))), diff --git a/test/CodeGen/X86/mmx-vzmovl.ll b/test/CodeGen/X86/mmx-vzmovl.ll new file mode 100644 index 00000000000..95f95794531 --- /dev/null +++ b/test/CodeGen/X86/mmx-vzmovl.ll @@ -0,0 +1,15 @@ +; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep movd +; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep movq + +define void @foo(<1 x i64>* %a, <1 x i64>* %b) nounwind { +entry: + %0 = load <1 x i64>* %a, align 8 ; <<1 x i64>> [#uses=1] + %1 = bitcast <1 x i64> %0 to <2 x i32> ; <<2 x i32>> [#uses=1] + %2 = and <2 x i32> %1, < i32 -1, i32 0 > ; <<2 x i32>> [#uses=1] + %3 = bitcast <2 x i32> %2 to <1 x i64> ; <<1 x i64>> [#uses=1] + store <1 x i64> %3, <1 x i64>* %b, align 8 + br label %bb2 + +bb2: ; preds = %entry + ret void +}