From d7795540d0538fb79e70d0519858d463ac4375af Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Thu, 21 Oct 2010 00:48:00 +0000 Subject: [PATCH] Implement correct encodings for NEON vadd, both integer and floating point. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116981 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 38 ++++++++++++++++++---- test/MC/ARM/neon-fp-encoding.ll | 56 +++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 6 deletions(-) create mode 100644 test/MC/ARM/neon-fp-encoding.ll diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 95458a5d572..b9e819403e0 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1177,9 +1177,21 @@ class N3VD op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V { + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), N3RegFrm, itin, + OpcodeStr, Dt, "$Dd, $Dn, $Dm", "", + [(set DPR:$Dd, (ResTy (OpNode (OpTy DPR:$Dn), (OpTy DPR:$Dm))))]> { + // Instruction operands. + bits<5> Dd; + bits<5> Dn; + bits<5> Dm; + + let Inst{15-12} = Dd{3-0}; + let Inst{22} = Dd{4}; + let Inst{19-16} = Dn{3-0}; + let Inst{7} = Dn{4}; + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let isCommutable = Commutable; } // Same as N3VD but no data type. @@ -1220,10 +1232,24 @@ class N3VQ op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V { + (outs QPR:$Dd), (ins QPR:$Dn, QPR:$Dm), N3RegFrm, itin, + OpcodeStr, Dt, "$Dd, $Dn, $Dm", "", + [(set QPR:$Dd, (ResTy (OpNode (OpTy QPR:$Dn), (OpTy QPR:$Dm))))]> { let isCommutable = Commutable; + + bits<4> Dd; + bits<4> Dn; + bits<4> Dm; + + let Inst{15-13} = Dd{2-0}; + let Inst{22} = Dd{3}; + let Inst{12} = 0; + let Inst{19-17} = Dn{2-0}; + let Inst{7} = Dn{3}; + let Inst{16} = 0; + let Inst{3-1} = Dm{2-0}; + let Inst{5} = Dm{3}; + let Inst{0} = 0; } class N3VQX op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, diff --git a/test/MC/ARM/neon-fp-encoding.ll b/test/MC/ARM/neon-fp-encoding.ll new file mode 100644 index 00000000000..3da1ec9d5af --- /dev/null +++ b/test/MC/ARM/neon-fp-encoding.ll @@ -0,0 +1,56 @@ +; RUN: llc -show-mc-encoding -march=arm -mcpu=cortex-a8 -mattr=+neon < %s | FileCheck %s + +; CHECK: vadd_8xi8 +define <8 x i8> @vadd_8xi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B +; CHECK: vadd.i8 d16, d17, d16 @ encoding: [0xa0,0x08,0x41,0xf2] + %tmp3 = add <8 x i8> %tmp1, %tmp2 + ret <8 x i8> %tmp3 +} + +; CHECK: vadd_4xi16 +define <4 x i16> @vadd_4xi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B +; CHECK: vadd.i16 d16, d17, d16 @ encoding: [0xa0,0x08,0x51,0xf2] + %tmp3 = add <4 x i16> %tmp1, %tmp2 + ret <4 x i16> %tmp3 +} + +; CHECK: vadd_1xi64 +define <1 x i64> @vadd_1xi64(<1 x i64>* %A, <1 x i64>* %B) nounwind { + %tmp1 = load <1 x i64>* %A + %tmp2 = load <1 x i64>* %B +; CHECK: vadd.i64 d16, d17, d16 @ encoding: [0xa0,0x08,0x71,0xf2] + %tmp3 = add <1 x i64> %tmp1, %tmp2 + ret <1 x i64> %tmp3 +} + +; CHECK: vadd_2xi32 +define <2 x i32> @vadd_2xi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B +; CHECK: vadd.i32 d16, d17, d16 @ encoding: [0xa0,0x08,0x61,0xf2] + %tmp3 = add <2 x i32> %tmp1, %tmp2 + ret <2 x i32> %tmp3 +} + +; CHECK: vadd_2xfloat +define <2 x float> @vadd_2xfloat(<2 x float>* %A, <2 x float>* %B) nounwind { + %tmp1 = load <2 x float>* %A + %tmp2 = load <2 x float>* %B +; CHECK: vadd.f32 d16, d16, d17 @ encoding: [0xa1,0x0d,0x40,0xf2] + %tmp3 = fadd <2 x float> %tmp1, %tmp2 + ret <2 x float> %tmp3 +} + +; CHECK: vadd_4xfloat +define <4 x float> @vadd_4xfloat(<4 x float>* %A, <4 x float>* %B) nounwind { + %tmp1 = load <4 x float>* %A + %tmp2 = load <4 x float>* %B +; CHECK: vadd.f32 q8, q8, q9 @ encoding: [0xe2,0x0d,0x40,0xf2] + %tmp3 = fadd <4 x float> %tmp1, %tmp2 + ret <4 x float> %tmp3 +} +