From 1e9ccd68d40c3d79b2f25f471553914d73bdee58 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Fri, 20 Jan 2012 20:59:56 +0000 Subject: [PATCH] ARM vector any_extends need to be selected to vmovl. We have patterns for vector sext and zext operations but were missing anyext. Without those patterns, codegen will fail when the selection DAG has any_extend nodes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148568 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 3 +++ test/CodeGen/ARM/vmov.ll | 17 +++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 781d1583408..a2df5bde880 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5024,6 +5024,9 @@ defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, // VMOVL : Vector Lengthening Move defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; +def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; +def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; +def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; // Vector Conversions. diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll index be95657915c..0c2387960b4 100644 --- a/test/CodeGen/ARM/vmov.ll +++ b/test/CodeGen/ARM/vmov.ll @@ -381,3 +381,20 @@ entry: store <4 x float> %b, <4 x float> *%p ret void } + +; Vector any_extends must be selected as either vmovl.u or vmovl.s. +; rdar://10723651 +define void @any_extend(<4 x i1> %x, <4 x i32> %y) nounwind ssp { +entry: +;CHECK: any_extend +;CHECK: vmovl + %and.i186 = zext <4 x i1> %x to <4 x i32> + %add.i185 = sub <4 x i32> %and.i186, %y + %sub.i = sub <4 x i32> %add.i185, zeroinitializer + %add.i = add <4 x i32> %sub.i, zeroinitializer + %vmovn.i = trunc <4 x i32> %add.i to <4 x i16> + tail call void @llvm.arm.neon.vst1.v4i16(i8* undef, <4 x i16> %vmovn.i, i32 2) + unreachable +} + +declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind