mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-28 06:32:09 +00:00
vml[as].f32 cause stalls in following advanced SIMD instructions. Avoid using
them for scalar floating point operations for now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@85697 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1820ec5e7a
commit
8cd0a8cb82
@ -2841,13 +2841,16 @@ def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul,1>;
|
||||
def : N3VDsPat<fmul, VMULfd_sfp>;
|
||||
|
||||
// Vector Multiply-Accumulate/Subtract used for single-precision FP
|
||||
let neverHasSideEffects = 1 in
|
||||
def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>;
|
||||
def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>;
|
||||
// vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so
|
||||
// we want to avoid them for now. e.g., alternating vmla/vadd instructions.
|
||||
|
||||
let neverHasSideEffects = 1 in
|
||||
def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>;
|
||||
def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>;
|
||||
//let neverHasSideEffects = 1 in
|
||||
//def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>;
|
||||
//def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>;
|
||||
|
||||
//let neverHasSideEffects = 1 in
|
||||
//def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>;
|
||||
//def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>;
|
||||
|
||||
// Vector Absolute used for single-precision FP
|
||||
let neverHasSideEffects = 1 in
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmla.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
|
||||
; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
|
||||
; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmla.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
|
||||
|
||||
define float @test(float %acc, float %a, float %b) {
|
||||
|
@ -7,8 +7,10 @@ entry:
|
||||
; VFP2: fnmacs
|
||||
; NEON: fnmacs
|
||||
|
||||
; NEONFP: vmls
|
||||
; NEONFP-NOT: vmls
|
||||
; NEONFP-NOT: fcpys
|
||||
; NEONFP: vmul.f32
|
||||
; NEONFP: vsub.f32
|
||||
; NEONFP: fmrs
|
||||
|
||||
%0 = fmul float %a, %b
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep fcpys | count 5
|
||||
; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep fcpys | count 4
|
||||
|
||||
define arm_apcscc void @fht(float* nocapture %fz, i16 signext %n) nounwind {
|
||||
entry:
|
||||
|
Loading…
x
Reference in New Issue
Block a user