mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-27 14:34:58 +00:00
Improve ARM64 vector creation
This patch improves the performance of vector creation in caseiswhere where several of the lanes in the vector are a constant floating point value. It also includes new patterns to fold together some of the instructions when the value is 0.0f. Test cases included. rdar://16349427 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206496 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
4af58f145d
commit
5540570374
@ -4891,7 +4891,7 @@ FailedModImm:
|
||||
if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
|
||||
isConstant = false;
|
||||
|
||||
if (isa<ConstantSDNode>(V)) {
|
||||
if (isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V)) {
|
||||
++NumConstantLanes;
|
||||
if (!ConstantValue.getNode())
|
||||
ConstantValue = V;
|
||||
@ -4955,7 +4955,7 @@ FailedModImm:
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
SDValue V = Op.getOperand(i);
|
||||
SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
|
||||
if (!isa<ConstantSDNode>(V)) {
|
||||
if (!isa<ConstantSDNode>(V) && !isa<ConstantFPSDNode>(V)) {
|
||||
// Note that type legalization likely mucked about with the VT of the
|
||||
// source operand, so we may have to convert it here before inserting.
|
||||
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
|
||||
|
@ -3472,6 +3472,9 @@ def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
|
||||
def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
|
||||
def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
|
||||
|
||||
def : Pat<(v2f64 (ARM64dup (f64 fpimm0))), (MOVIv2d_ns (i32 0))>;
|
||||
def : Pat<(v4f32 (ARM64dup (f32 fpimm0))), (MOVIv2d_ns (i32 0))>;
|
||||
|
||||
// EDIT per word & halfword: 2s, 4h, 4s, & 8h
|
||||
defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
|
||||
def : Pat<(v2i32 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
|
||||
|
33
test/CodeGen/ARM64/vector-insertion.ll
Normal file
33
test/CodeGen/ARM64/vector-insertion.ll
Normal file
@ -0,0 +1,33 @@
|
||||
; RUN: llc -march=arm64 -mcpu=generic < %s | FileCheck %s
|
||||
|
||||
define void @test0f(float* nocapture %x, float %a) #0 {
|
||||
entry:
|
||||
%0 = insertelement <4 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %a, i32 0
|
||||
%1 = bitcast float* %x to <4 x float>*
|
||||
store <4 x float> %0, <4 x float>* %1, align 16
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test0f
|
||||
; CHECK: movi.2d v[[TEMP:[0-9]+]], #0000000000000000
|
||||
; CHECK: ins.s v[[TEMP]][0], v{{[0-9]+}}[0]
|
||||
; CHECK: str q[[TEMP]], [x0]
|
||||
; CHECK: ret
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
define void @test1f(float* nocapture %x, float %a) #0 {
|
||||
entry:
|
||||
%0 = insertelement <4 x float> <float undef, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, float %a, i32 0
|
||||
%1 = bitcast float* %x to <4 x float>*
|
||||
store <4 x float> %0, <4 x float>* %1, align 16
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test1f
|
||||
; CHECK: fmov s[[TEMP:[0-9]+]], #1.000000e+00
|
||||
; CHECK: dup.4s v[[TEMP2:[0-9]+]], v[[TEMP]][0]
|
||||
; CHECK: ins.s v[[TEMP2]][0], v0[0]
|
||||
; CHECK: str q[[TEMP2]], [x0]
|
||||
; CHECK: ret
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user