mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-11 11:34:02 +00:00
ARM: Creating a vector from a lane of another.
The VDUP instruction source register doesn't allow a non-constant lane index, so make sure we don't construct a ARM::VDUPLANE node asking it to do so. rdar://13328063 http://llvm.org/bugs/show_bug.cgi?id=13963 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176413 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
65da9f1ce1
commit
7bf504c58f
@ -4452,8 +4452,11 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
|
||||
// If we are VDUPing a value that comes directly from a vector, that will
|
||||
// cause an unnecessary move to and from a GPR, where instead we could
|
||||
// just use VDUPLANE.
|
||||
if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
|
||||
// just use VDUPLANE. We can only do this if the lane being extracted
|
||||
// is at a constant index, as the VDUP from lane instructions only have
|
||||
// constant-index forms.
|
||||
if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
|
||||
isa<ConstantSDNode>(Value->getOperand(1))) {
|
||||
// We need to create a new undef vector to use for the VDUPLANE if the
|
||||
// size of the vector from which we get the value is different than the
|
||||
// size of the vector that we need to create. We will insert the element
|
||||
|
@ -0,0 +1,14 @@
|
||||
; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
|
||||
|
||||
define void @bar(<4 x i32>* %p, i32 %lane, <4 x i32> %phitmp) nounwind {
|
||||
; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[SOURCE:[0-9]+]]:128]
|
||||
; CHECK: add.w r[[ADDR:[0-9]+]], r[[SOURCE]], {{r[0-9]+}}, lsl #2
|
||||
; CHECK: vld1.32 {[[DREG:d[0-9]+]][], [[DREG2:d[0-9]+]][]}, [r[[ADDR]]:32]
|
||||
; CHECK: vst1.32 {[[DREG]], [[DREG2]]}, [r0]
|
||||
%val = extractelement <4 x i32> %phitmp, i32 %lane
|
||||
%r1 = insertelement <4 x i32> undef, i32 %val, i32 1
|
||||
%r2 = insertelement <4 x i32> %r1, i32 %val, i32 2
|
||||
%r3 = insertelement <4 x i32> %r2, i32 %val, i32 3
|
||||
store <4 x i32> %r3, <4 x i32>* %p, align 4
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user