From 88cf038436a142611424c895c601731ffa7c993f Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Thu, 14 Oct 2010 01:02:08 +0000
Subject: [PATCH] - Add encodings for multiply add/subtract instructions in all
 their glory. - Add missing patterns for some multiply add/subtract
 instructions. - Add encodings for VMRS and VMSR.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116464 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/ARM/ARMCodeEmitter.cpp |  10 +-
 lib/Target/ARM/ARMInstrVFP.td     | 158 ++++++++++++++++++++----------
 test/MC/ARM/simple-fp-encoding.ll |  94 ++++++++++++++++++
 3 files changed, 200 insertions(+), 62 deletions(-)
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index b86c8c902ee..9254fcd9174 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -1595,7 +1595,7 @@ void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) {
   // Set the conditional execution predicate
   Binary |= II->getPredicate(&MI) << ARMII::CondShift;
 
-  switch(Opcode) {
+  switch (Opcode) {
   default:
     llvm_unreachable("ARMCodeEmitter::emitMiscInstruction");
 
@@ -1603,14 +1603,6 @@ void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) {
     // No further encoding needed.
     break;
 
-  case ARM::VMRS:
-  case ARM::VMSR: {
-    const MachineOperand &MO0 = MI.getOperand(0);
-    // Encode Rt.
-    Binary |= getARMRegisterNumbering(MO0.getReg()) << ARMII::RegRdShift;
-    break;
-  }
-
   case ARM::FCONSTD:
   case ARM::FCONSTS: {
     // Encode Dd / Sd.
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 3a13f37b0f1..a67e64594a3 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -25,16 +25,16 @@ def arm_ftoui  : SDNode<"ARMISD::FTOUI",  SDT_FTOI>;
 def arm_ftosi  : SDNode<"ARMISD::FTOSI",  SDT_FTOI>;
 def arm_sitof  : SDNode<"ARMISD::SITOF",  SDT_ITOF>;
 def arm_uitof  : SDNode<"ARMISD::UITOF",  SDT_ITOF>;
-def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>;
+def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag, SDNPOutFlag]>;
 def arm_cmpfp  : SDNode<"ARMISD::CMPFP",  SDT_ARMCmp, [SDNPOutFlag]>;
 def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>;
 def arm_fmdrr  : SDNode<"ARMISD::VMOVDRR",  SDT_VMOVDRR>;
 
+
 //===----------------------------------------------------------------------===//
 // Operand Definitions.
 //
 
-
 def vfp_f32imm : Operand<f32>,
                  PatLeaf<(f32 fpimm), [{
       return ARM::getVFPf32Imm(N->getValueAPF()) != -1;
@@ -672,7 +672,7 @@ def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
 def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
                                 (outs SPR:$Sd), (ins DPR:$Dm),
                                 IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm",
-                                [(set SPR:$Sd, (int_arm_vcvtru (f64 DPR:$Dm)))]> {
+                                [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{
   let Inst{7} = 0; // Z bit
 }
 
@@ -735,7 +735,7 @@ def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
                        (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
                  IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits",
                  [/* For disassembly only; pattern left blank */]>;
-}
+} // End of 'let isCodeGenOnly = 1 in'
 
 // Fixed-Point to FP:
 
@@ -779,7 +779,7 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
                        (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
                  IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits",
                  [/* For disassembly only; pattern left blank */]>;
-}
+} // End of 'let isCodeGenOnly = 1 in'
 
 } // End of 'let Constraints = "$src = $dst" in'
 
@@ -787,62 +787,100 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
 // FP FMA Operations.
 //
 
-def VMLAD : ADbI_vmlX<0b11100, 0b00, 0, 0,
-                (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
-                IIC_fpMAC64, "vmla", ".f64\t$dst, $a, $b",
-                [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b),
-                                      (f64 DPR:$dstin)))]>,
-                RegConstraint<"$dstin = $dst">;
+class ADbI_vmlX_Encode<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4,
+                       dag oops, dag iops, InstrItinClass itin, string opc,
+                       string asm, list<dag> pattern>
+  : ADbI_vmlX<opcod1, opcod2, op6, op4, oops, iops, itin, opc, asm, pattern> {
+  // Instruction operands.
+  bits<5> Dd;
+  bits<5> Dn;
+  bits<5> Dm;
 
-def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
-                 (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                 IIC_fpMAC32, "vmla", ".f32\t$dst, $a, $b",
-                 [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
-                 RegConstraint<"$dstin = $dst">;
+  // Encode instruction operands.
+  let Inst{19-16} = Dn{3-0};
+  let Inst{7}     = Dn{4};
+  let Inst{15-12} = Dd{3-0};
+  let Inst{22}    = Dd{4};
+  let Inst{3-0}   = Dm{3-0};
+  let Inst{5}     = Dm{4};
+}
 
-def VNMLSD : ADbI_vmlX<0b11100, 0b01, 0, 0,
-                (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
-                IIC_fpMAC64, "vnmls", ".f64\t$dst, $a, $b",
-                [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b),
-                                (f64 DPR:$dstin)))]>,
-                RegConstraint<"$dstin = $dst">;
+def VMLAD : ADbI_vmlX_Encode<0b11100, 0b00, 0, 0,
+                             (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+                             IIC_fpMAC64, "vmla", ".f64\t$Dd, $Dn, $Dm",
+                             [(set DPR:$Dd, (fadd (fmul DPR:$Dn, DPR:$Dm),
+                                                  (f64 DPR:$Ddin)))]>,
+                             RegConstraint<"$Ddin = $Dd">;
 
-def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
-                (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                IIC_fpMAC32, "vnmls", ".f32\t$dst, $a, $b",
-                [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
-                RegConstraint<"$dstin = $dst">;
+def VMLAS : ASbIn_Encode<0b11100, 0b00, 0, 0,
+                         (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+                         IIC_fpMAC32, "vmla", ".f32\t$Sd, $Sn, $Sm",
+                         [(set SPR:$Sd, (fadd (fmul SPR:$Sn, SPR:$Sm),
+                                              SPR:$Sdin))]>,
+                         RegConstraint<"$Sdin = $Sd">;
 
-def VMLSD : ADbI_vmlX<0b11100, 0b00, 1, 0,
-                 (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
-                 IIC_fpMAC64, "vmls", ".f64\t$dst, $a, $b",
-             [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)),
-                             (f64 DPR:$dstin)))]>,
-                RegConstraint<"$dstin = $dst">;
+def : Pat<(fadd DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))),
+          (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
+def : Pat<(fadd SPR:$dstin, (fmul SPR:$a, SPR:$b)),
+          (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
 
-def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
-                  (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                  IIC_fpMAC32, "vmls", ".f32\t$dst, $a, $b",
-             [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
-                RegConstraint<"$dstin = $dst">;
+def VMLSD : ADbI_vmlX_Encode<0b11100, 0b00, 1, 0,
+                             (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+                             IIC_fpMAC64, "vmls", ".f64\t$Dd, $Dn, $Dm",
+                             [(set DPR:$Dd, (fadd (fneg (fmul DPR:$Dn,DPR:$Dm)),
+                                                        (f64 DPR:$Ddin)))]>,
+                             RegConstraint<"$Ddin = $Dd">;
+
+def VMLSS : ASbIn_Encode<0b11100, 0b00, 1, 0,
+                         (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+                         IIC_fpMAC32, "vmls", ".f32\t$Sd, $Sn, $Sm",
+                         [(set SPR:$Sd, (fadd (fneg (fmul SPR:$Sn, SPR:$Sm)),
+                                                    SPR:$Sdin))]>,
+                         RegConstraint<"$Sdin = $Sd">;
 
 def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))),
           (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
 def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)),
           (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
 
-def VNMLAD : ADbI_vmlX<0b11100, 0b01, 1, 0,
-                 (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
-                 IIC_fpMAC64, "vnmla", ".f64\t$dst, $a, $b",
-             [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)),
-                             (f64 DPR:$dstin)))]>,
-                RegConstraint<"$dstin = $dst">;
+def VNMLAD : ADbI_vmlX_Encode<0b11100, 0b01, 1, 0,
+                              (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+                              IIC_fpMAC64, "vnmla", ".f64\t$Dd, $Dn, $Dm",
+                              [(set DPR:$Dd,(fsub (fneg (fmul DPR:$Dn,DPR:$Dm)),
+                                                  (f64 DPR:$Ddin)))]>,
+                RegConstraint<"$Ddin = $Dd">;
+
+def VNMLAS : ASbI_Encode<0b11100, 0b01, 1, 0,
+                         (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+                         IIC_fpMAC32, "vnmla", ".f32\t$Sd, $Sn, $Sm",
+                         [(set SPR:$Sd, (fsub (fneg (fmul SPR:$Sn, SPR:$Sm)),
+                                              SPR:$Sdin))]>,
+                         RegConstraint<"$Sdin = $Sd">;
+
+def : Pat<(fsub (fneg (fmul DPR:$a, (f64 DPR:$b))), DPR:$dstin),
+          (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
+def : Pat<(fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin),
+          (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
+
+def VNMLSD : ADbI_vmlX_Encode<0b11100, 0b01, 0, 0,
+                              (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+                              IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",
+                              [(set DPR:$Dd, (fsub (fmul DPR:$Dn, DPR:$Dm),
+                                                   (f64 DPR:$Ddin)))]>,
+                              RegConstraint<"$Ddin = $Dd">;
+
+def VNMLSS : ASbI_Encode<0b11100, 0b01, 0, 0,
+                         (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+                         IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
+                         [(set SPR:$Sd, (fsub (fmul SPR:$Sn, SPR:$Sm),
+                                              SPR:$Sdin))]>,
+                         RegConstraint<"$Sdin = $Sd">;
+
+def : Pat<(fsub (fmul DPR:$a, (f64 DPR:$b)), DPR:$dstin),
+          (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
+def : Pat<(fsub (fmul SPR:$a, SPR:$b), SPR:$dstin),
+          (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
 
-def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
-                (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                IIC_fpMAC32, "vnmla", ".f32\t$dst, $a, $b",
-             [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
-                RegConstraint<"$dstin = $dst">;
 
 //===----------------------------------------------------------------------===//
 // FP Conditional moves.
@@ -894,20 +932,34 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
 
 // FPSCR <-> GPR (for disassembly only)
 let hasSideEffects = 1, Uses = [FPSCR] in
-def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT,
-                 "vmrs", "\t$dst, fpscr",
-             [(set GPR:$dst, (int_arm_get_fpscr))]> {
+def VMRS : VFPAI<(outs GPR:$Rt), (ins), VFPMiscFrm, IIC_fpSTAT,
+                 "vmrs", "\t$Rt, fpscr",
+                 [(set GPR:$Rt, (int_arm_get_fpscr))]> {
+  // Instruction operand.
+  bits<4> Rt;
+
+  // Encode instruction operand.
+  let Inst{15-12} = Rt;
+
   let Inst{27-20} = 0b11101111;
   let Inst{19-16} = 0b0001;
   let Inst{11-8}  = 0b1010;
   let Inst{7}     = 0;
+  let Inst{6-5}   = 0b00;
   let Inst{4}     = 1;
+  let Inst{3-0}   = 0b0000;
 }
 
 let Defs = [FPSCR] in 
 def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, 
                  "vmsr", "\tfpscr, $src",
-             [(int_arm_set_fpscr GPR:$src)]> {
+                 [(int_arm_set_fpscr GPR:$src)]> {
+  // Instruction operand.
+  bits<4> src;
+
+  // Encode instruction operand.
+  let Inst{15-12} = src;
+
   let Inst{27-20} = 0b11101110;
   let Inst{19-16} = 0b0001;
   let Inst{11-8}  = 0b1010;
diff --git a/test/MC/ARM/simple-fp-encoding.ll b/test/MC/ARM/simple-fp-encoding.ll
index 98565e9dfce..90f81696bda 100644
--- a/test/MC/ARM/simple-fp-encoding.ll
+++ b/test/MC/ARM/simple-fp-encoding.ll
@@ -256,3 +256,97 @@ entry:
   %conv = fptoui float %a to i32
   ret i32 %conv
 }
+
+define double @f90(double %a, double %b, double %c) nounwind readnone {
+entry:
+; CHECK: f90
+; FIXME: vmla.f64 d16, d18, d17      @ encoding: [0xa1,0x0b,0x42,0xee]
+  %mul = fmul double %a, %b
+  %add = fadd double %mul, %c
+  ret double %add
+}
+
+define float @f91(float %a, float %b, float %c) nounwind readnone {
+entry:
+; CHECK: f91
+; CHECK: vmla.f32 s2, s1, s0         @ encoding: [0x80,0x1a,0x00,0xee]
+  %mul = fmul float %a, %b
+  %add = fadd float %mul, %c
+  ret float %add
+}
+
+define double @f94(double %a, double %b, double %c) nounwind readnone {
+entry:
+; CHECK: f94
+; CHECK: vmls.f64 d16, d18, d17      @ encoding: [0xe1,0x0b,0x42,0xee]
+  %mul = fmul double %a, %b
+  %sub = fsub double %c, %mul
+  ret double %sub
+}
+
+define float @f95(float %a, float %b, float %c) nounwind readnone {
+entry:
+; CHECK: f95
+; CHECK: vmls.f32 s2, s1, s0         @ encoding: [0xc0,0x1a,0x00,0xee]
+  %mul = fmul float %a, %b
+  %sub = fsub float %c, %mul
+  ret float %sub
+}
+
+define double @f96(double %a, double %b, double %c) nounwind readnone {
+entry:
+; CHECK: f96
+; CHECK: vnmla.f64 d16, d18, d17     @ encoding: [0xe1,0x0b,0x52,0xee]
+  %mul = fmul double %a, %b
+  %sub = fsub double -0.000000e+00, %mul
+  %sub3 = fsub double %sub, %c
+  ret double %sub3
+}
+
+define float @f97(float %a, float %b, float %c) nounwind readnone {
+entry:
+; CHECK: f97
+; CHECK: vnmla.f32 s2, s1, s0        @ encoding: [0xc0,0x1a,0x10,0xee]
+  %mul = fmul float %a, %b
+  %sub = fsub float -0.000000e+00, %mul
+  %sub3 = fsub float %sub, %c
+  ret float %sub3
+}
+
+define double @f92(double %a, double %b, double %c) nounwind readnone {
+entry:
+; CHECK: f92
+; CHECK: vnmls.f64 d16, d18, d17     @ encoding: [0xa1,0x0b,0x52,0xee]
+  %mul = fmul double %a, %b
+  %sub = fsub double %mul, %c
+  ret double %sub
+}
+
+define float @f93(float %a, float %b, float %c) nounwind readnone {
+entry:
+; CHECK: f93
+; CHECK: vnmls.f32 s2, s1, s0        @ encoding: [0x80,0x1a,0x10,0xee]
+  %mul = fmul float %a, %b
+  %sub = fsub float %mul, %c
+  ret float %sub
+}
+
+define i32 @f100() nounwind readnone {
+entry:
+; CHECK: f100
+; CHECK: vmrs r0, fpscr              @ encoding: [0x10,0x0a,0xf1,0xee]
+  %0 = tail call i32 @llvm.arm.get.fpscr()
+  ret i32 %0
+}
+
+declare i32 @llvm.arm.get.fpscr() nounwind readnone
+
+define void @f101(i32 %a) nounwind {
+entry:
+; CHECK: f101
+; CHECK: vmsr fpscr, r0              @ encoding: [0x10,0x0a,0xe1,0xee]
+  tail call void @llvm.arm.set.fpscr(i32 %a)
+  ret void
+}
+
+declare void @llvm.arm.set.fpscr(i32) nounwind