From 2ae0eec1c03fa005136b8724faab38048878f253 Mon Sep 17 00:00:00 2001 From: Anton Korobeynikov Date: Mon, 2 Nov 2009 00:12:06 +0000 Subject: [PATCH] Handle splats of undefs properly. This includes the testcase for PR5364 as well. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@85767 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 3 +++ test/CodeGen/ARM/2009-11-02-NegativeLane.ll | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 test/CodeGen/ARM/2009-11-02-NegativeLane.ll diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 88649ab2bed..41a597a00fb 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2735,6 +2735,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { int Lane = SVN->getSplatIndex(); + // If this is undef splat, generate it via "just" vdup, if possible. + if (Lane == -1) Lane = 0; + if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); } diff --git a/test/CodeGen/ARM/2009-11-02-NegativeLane.ll b/test/CodeGen/ARM/2009-11-02-NegativeLane.ll new file mode 100644 index 00000000000..f2288c3710e --- /dev/null +++ b/test/CodeGen/ARM/2009-11-02-NegativeLane.ll @@ -0,0 +1,20 @@ +; RUN: llc -mcpu=cortex-a8 < %s | grep vdup.32 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" +target triple = "armv7-eabi" + +define arm_aapcs_vfpcc void @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind { +entry: + br i1 undef, label %return, label %bb + +bb: ; preds = %bb, %entry + %0 = load float* undef, align 4 ; [#uses=1] + %1 = insertelement <4 x float> undef, float %0, i32 2 ; <<4 x float>> [#uses=1] + %2 = insertelement <4 x float> %1, float undef, i32 3 ; <<4 x float>> [#uses=1] + %3 = fmul <4 x float> undef, %2 ; <<4 x float>> [#uses=1] + %4 = extractelement <4 x float> %3, i32 1 ; [#uses=1] + store float %4, float* undef, align 4 + br i1 undef, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +}