ARM: add a couple more NEON predicates.

The fused multiply instructions were added in VFPv4 but are still NEON
instructions, in particular they shouldn't be available on a Cortex-M4 not
matter how floaty it is.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193342 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tim Northover 2013-10-24 12:48:05 +00:00
parent eac623a18b
commit e2dee623e0
2 changed files with 16 additions and 5 deletions

View File

@ -4301,19 +4301,19 @@ def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
v2f32, fmul_su, fadd_mlx>,
Requires<[HasVFP4,UseFusedMAC]>;
Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
v4f32, fmul_su, fadd_mlx>,
Requires<[HasVFP4,UseFusedMAC]>;
Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
// Fused Vector Multiply Subtract (floating-point)
def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
v2f32, fmul_su, fsub_mlx>,
Requires<[HasVFP4,UseFusedMAC]>;
Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
v4f32, fmul_su, fsub_mlx>,
Requires<[HasVFP4,UseFusedMAC]>;
Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),

View File

@ -1,6 +1,8 @@
@ RUN: llvm-mc < %s -triple armv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4 | FileCheck %s --check-prefix=ARM
@ RUN: llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4 | FileCheck %s --check-prefix=THUMB
@ RUN: llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mcpu=cortex-m4 | FileCheck %s --check-prefix=THUMB_V7EM
@ RUN: not llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mcpu=cortex-m4 > %t 2> %t2
@ RUN: FileCheck %s < %t --check-prefix=THUMB_V7EM
@ RUN: FileCheck %s < %t2 --check-prefix=THUMB_V7EM-ERRORS
@ ARM: vfma.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xe2,0xee]
@ THUMB: vfma.f64 d16, d18, d17 @ encoding: [0xe2,0xee,0xa1,0x0b]
@ -13,10 +15,14 @@ vfma.f32 s2, s4, s0
@ ARM: vfma.f32 d16, d18, d17 @ encoding: [0xb1,0x0c,0x42,0xf2]
@ THUMB: vfma.f32 d16, d18, d17 @ encoding: [0x42,0xef,0xb1,0x0c]
@ THUMB_V7EM-ERRORS: error: instruction requires: NEON
@ THUMB_V7EM-ERRORS-NEXT: vfma.f32 d16, d18, d17
vfma.f32 d16, d18, d17
@ ARM: vfma.f32 q2, q4, q0 @ encoding: [0x50,0x4c,0x08,0xf2]
@ THUMB: vfma.f32 q2, q4, q0 @ encoding: [0x08,0xef,0x50,0x4c]
@ THUMB_V7EM-ERRORS: error: instruction requires: NEON
@ THUMB_V7EM-ERRORS-NEXT: vfma.f32 q2, q4, q0
vfma.f32 q2, q4, q0
@ ARM: vfnma.f64 d16, d18, d17 @ encoding: [0xe1,0x0b,0xd2,0xee]
@ -39,10 +45,14 @@ vfms.f32 s2, s4, s0
@ ARM: vfms.f32 d16, d18, d17 @ encoding: [0xb1,0x0c,0x62,0xf2]
@ THUMB: vfms.f32 d16, d18, d17 @ encoding: [0x62,0xef,0xb1,0x0c]
@ THUMB_V7EM-ERRORS: error: instruction requires: NEON
@ THUMB_V7EM-ERRORS-NEXT: vfms.f32 d16, d18, d17
vfms.f32 d16, d18, d17
@ ARM: vfms.f32 q2, q4, q0 @ encoding: [0x50,0x4c,0x28,0xf2]
@ THUMB: vfms.f32 q2, q4, q0 @ encoding: [0x28,0xef,0x50,0x4c]
@ THUMB_V7EM-ERRORS: error: instruction requires: NEON
@ THUMB_V7EM-ERRORS-NEXT: vfms.f32 q2, q4, q0
vfms.f32 q2, q4, q0
@ ARM: vfnms.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xd2,0xee]
@ -51,4 +61,5 @@ vfnms.f64 d16, d18, d17
@ ARM: vfnms.f32 s2, s4, s0 @ encoding: [0x00,0x1a,0x92,0xee]
@ THUMB: vfnms.f32 s2, s4, s0 @ encoding: [0x92,0xee,0x00,0x1a]
@ THUMB_V7EM: vfnms.f32 s2, s4, s0 @ encoding: [0x92,0xee,0x00,0x1a]
vfnms.f32 s2, s4, s0