From 4ea250524f77a67102118747dad6ee69f9f3b3aa Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Thu, 4 Jul 2013 14:57:20 +0000
Subject: [PATCH] Add support for MC assembling and disassembling of vsel{ge,
 gt, eq, vs} instructions.

This adds a new decoder table/namespace 'VFPV8', as these instructions have their
top 4 bits as 0b1111, while other Thumb instructions have 0b1110.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185642 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/ARM/ARMInstrFormats.td             | 58 +++++++++++++++++++
 lib/Target/ARM/ARMInstrVFP.td                 | 19 ++++++
 lib/Target/ARM/AsmParser/ARMAsmParser.cpp     |  4 +-
 .../ARM/Disassembler/ARMDisassembler.cpp      | 15 +++++
 test/MC/ARM/v8fp.s                            | 18 ++++++
 test/MC/Disassembler/ARM/v8fp.txt             | 25 ++++++++
 6 files changed, 137 insertions(+), 2 deletions(-)
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 239632f3538..91564da8e62 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -1548,6 +1548,35 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
   let Inst{4}     = op4;
 }
 
+// FP, binary, not predicated
+class ADbInp<bits<5> opcod1, bits<2> opcod2, dag oops, dag iops,
+           InstrItinClass itin, string asm, list<dag> pattern>
+  : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone,
+          VFPBinaryFrm, itin, asm, "", pattern>
+{
+  // Instruction operands.
+  bits<5> Dd;
+  bits<5> Dn;
+  bits<5> Dm;
+
+  let Inst{31-28} = 0b1111;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Dm{3-0};
+  let Inst{5}     = Dm{4};
+  let Inst{19-16} = Dn{3-0};
+  let Inst{7}     = Dn{4};
+  let Inst{15-12} = Dd{3-0};
+  let Inst{22}    = Dd{4};
+
+  let Inst{27-23} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 1; // double precision
+  let Inst{6}     = 0;
+  let Inst{4}     = 0;
+}
+
 // Single precision, unary
 class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
            bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
@@ -1607,6 +1636,35 @@ class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
   let Inst{4}     = op4;
 }
 
+// Single precision, binary, not predicated
+class ASbInp<bits<5> opcod1, bits<2> opcod2, dag oops, dag iops,
+           InstrItinClass itin, string asm, list<dag> pattern>
+  : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone,
+          VFPBinaryFrm, itin, asm, "", pattern>
+{
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Sn;
+  bits<5> Sm;
+
+  let Inst{31-28} = 0b1111;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{19-16} = Sn{4-1};
+  let Inst{7}     = Sn{0};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+
+  let Inst{27-23} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 0; // Single precision
+  let Inst{6}     = 0;
+  let Inst{4}     = 0;
+}
+
 // Single precision binary, if no NEON. Same as ASbI except not available if
 // NEON is enabled.
 class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index e1d470f1e2b..dcac7544373 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -333,6 +333,25 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0,
   let D = VFPNeonA8Domain;
 }
 
+multiclass vsel_inst<string op, bits<2> opc> {
+  let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in {
+    def S : ASbInp<0b11100, opc,
+                   (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                   NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"),
+                   []>, Requires<[HasV8FP]>;
+
+    def D : ADbInp<0b11100, opc,
+                   (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+                   NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"),
+                   []>, Requires<[HasV8FP]>;
+  }
+}
+
+defm VSELGT : vsel_inst<"gt", 0b11>;
+defm VSELGE : vsel_inst<"ge", 0b10>;
+defm VSELEQ : vsel_inst<"eq", 0b00>;
+defm VSELVS : vsel_inst<"vs", 0b01>;
+
 // Match reassociated forms only if not sign dependent rounding.
 def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
           (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index bd4ea535567..687ea3f9410 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -4905,7 +4905,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
       Mnemonic == "vcgt"  || Mnemonic == "vcle"   || Mnemonic == "smlal" ||
       Mnemonic == "umaal" || Mnemonic == "umlal"  || Mnemonic == "vabal" ||
       Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" ||
-      Mnemonic == "fmuls")
+      Mnemonic == "fmuls" || Mnemonic.startswith("vsel"))
     return Mnemonic;
 
   // First, split out any predication code. Ignore mnemonics we know aren't
@@ -5005,7 +5005,7 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
   if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" ||
       Mnemonic == "cps" ||  Mnemonic == "it" ||  Mnemonic == "cbz" ||
       Mnemonic == "trap" || Mnemonic == "setend" ||
-      Mnemonic.startswith("cps")) {
+      Mnemonic.startswith("cps") || Mnemonic.startswith("vsel")) {
     // These mnemonics are never predicable
     CanAcceptPredicationCode = false;
   } else if (!isThumb()) {
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 31941c10ea4..d5b749d14aa 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -455,6 +455,13 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     return result;
   }
 
+  MI.clear();
+  result = decodeInstruction(DecoderTableVFPV832, MI, insn, Address, this, STI);
+  if (result != MCDisassembler::Fail) {
+    Size = 4;
+    return result;
+  }
+
   MI.clear();
   result = decodeInstruction(DecoderTableNEONData32, MI, insn, Address,
                              this, STI);
@@ -764,6 +771,14 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     }
   }
 
+  MI.clear();
+  result = decodeInstruction(DecoderTableVFPV832, MI, insn32, Address, this, STI);
+  if (result != MCDisassembler::Fail) {
+    Size = 4;
+    UpdateThumbVFPPredicate(MI);
+    return result;
+  }
+
   if (fieldFromInstruction(insn32, 28, 4) == 0xE) {
     MI.clear();
     result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address,
diff --git a/test/MC/ARM/v8fp.s b/test/MC/ARM/v8fp.s
index f6bf7b9e95e..440a94ca1ad 100644
--- a/test/MC/ARM/v8fp.s
+++ b/test/MC/ARM/v8fp.s
@@ -21,3 +21,21 @@
 @ CHECK: vcvtbeq.f64.f16 d3, s1     @ encoding: [0x60,0x3b,0xb2,0x0e]
   vcvtblt.f16.f64 s4, d1
 @ CHECK: vcvtblt.f16.f64 s4, d1     @ encoding: [0x41,0x2b,0xb3,0xbe]
+
+@ VSEL
+  vselge.f32 s4, s1, s23
+@ CHECK: vselge.f32 s4, s1, s23    @ encoding: [0xab,0x2a,0x20,0xfe]
+  vselge.f64 d30, d31, d23
+@ CHECK: vselge.f64 d30, d31, d23  @ encoding: [0xa7,0xeb,0x6f,0xfe]
+  vselgt.f32 s0, s1, s0
+@ CHECK: vselgt.f32 s0, s1, s0    @ encoding: [0x80,0x0a,0x30,0xfe]
+  vselgt.f64 d5, d10, d20
+@ CHECK: vselgt.f64 d5, d10, d20  @ encoding: [0x24,0x5b,0x3a,0xfe]
+  vseleq.f32 s30, s28, s23
+@ CHECK: vseleq.f32 s30, s28, s23 @ encoding: [0x2b,0xfa,0x0e,0xfe]
+  vseleq.f64 d2, d4, d8
+@ CHECK: vseleq.f64 d2, d4, d8    @ encoding: [0x08,0x2b,0x04,0xfe]
+  vselvs.f32 s21, s16, s14
+@ CHECK: vselvs.f32 s21, s16, s14 @ encoding: [0x07,0xaa,0x58,0xfe]
+  vselvs.f64 d0, d1, d31
+@ CHECK: vselvs.f64 d0, d1, d31   @ encoding: [0x2f,0x0b,0x11,0xfe]
diff --git a/test/MC/Disassembler/ARM/v8fp.txt b/test/MC/Disassembler/ARM/v8fp.txt
index 76125dacfd3..a4a91c14959 100644
--- a/test/MC/Disassembler/ARM/v8fp.txt
+++ b/test/MC/Disassembler/ARM/v8fp.txt
@@ -23,3 +23,28 @@
 
 0x41 0x2b 0xb3 0xbe
 # CHECK: vcvtblt.f16.f64 s4, d1
+
+
+0xab 0x2a 0x20 0xfe
+# CHECK: vselge.f32 s4, s1, s23
+
+0xa7 0xeb 0x6f 0xfe
+# CHECK: vselge.f64 d30, d31, d23
+
+0x80 0x0a 0x30 0xfe
+# CHECK: vselgt.f32 s0, s1, s0
+
+0x24 0x5b 0x3a 0xfe
+# CHECK: vselgt.f64 d5, d10, d20
+
+0x2b 0xfa 0x0e 0xfe
+# CHECK: vseleq.f32 s30, s28, s23
+
+0x08 0x2b 0x04 0xfe
+# CHECK: vseleq.f64 d2, d4, d8
+
+0x07 0xaa 0x58 0xfe
+# CHECK: vselvs.f32 s21, s16, s14
+
+0x2f 0x0b 0x11 0xfe
+# CHECK: vselvs.f64 d0, d1, d31