From 103ba845f09252d90a05109af7174f54bf412daf Mon Sep 17 00:00:00 2001
From: Renato Golin <renato.golin@linaro.org>
Date: Tue, 16 Jul 2013 09:32:17 +0000
Subject: [PATCH] ARM EABI divmod support

This patch enables calls to __aeabi_idivmod when in EABI mode,
by using the remainder value returned on registers (R1),
enabled by the ARM triple "none-eabi". Note that Darwin and
GNUEABI triples will continue lowering on GNU style, that is,
using the stack for the remainder.

Still need to add SREM/UREM support fix for 64-bit lowering.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186390 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/ARM/ARMISelLowering.cpp |  80 +++++++++++-
 lib/Target/ARM/ARMISelLowering.h   |   1 +
 lib/Target/ARM/ARMSubtarget.h      |   8 ++
 test/CodeGen/ARM/divmod-eabi.ll    | 202 +++++++++++++++++++++++++++++
 4 files changed, 289 insertions(+), 2 deletions(-)
 create mode 100644 test/CodeGen/ARM/divmod-eabi.ll

diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index fdc015b80ae..3648199989a 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -693,10 +693,36 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::SDIV,  MVT::i32, Expand);
     setOperationAction(ISD::UDIV,  MVT::i32, Expand);
   }
+
+  // FIXME: Also set divmod for SREM on EABI
   setOperationAction(ISD::SREM,  MVT::i32, Expand);
   setOperationAction(ISD::UREM,  MVT::i32, Expand);
-  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
-  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+  // Register based DivRem for AEABI (RTABI 4.2)
+  if (Subtarget->isTargetAEABI()) {
+    setLibcallName(RTLIB::SDIVREM_I8,  "__aeabi_idivmod");
+    setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
+    setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");
+    setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod");
+    setLibcallName(RTLIB::UDIVREM_I8,  "__aeabi_uidivmod");
+    setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod");
+    setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod");
+    setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod");
+
+    setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS);
+
+    setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
+    setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
+  } else {
+    setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+    setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+  }
 
   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
   setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
@@ -5863,6 +5889,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SUBE:          return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
   case ISD::ATOMIC_LOAD:
   case ISD::ATOMIC_STORE:  return LowerAtomicLoadStore(Op, DAG);
+  case ISD::SDIVREM:
+  case ISD::UDIVREM:       return LowerDivRem(Op, DAG);
   }
 }
 
@@ -10677,6 +10705,54 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
 }
 
+SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
+  assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only");
+  unsigned Opcode = Op->getOpcode();
+  assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
+      "Invalid opcode for Div/Rem lowering");
+  bool isSigned = (Opcode == ISD::SDIVREM);
+  EVT VT = Op->getValueType(0);
+  Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+
+  RTLIB::Libcall LC;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unexpected request for libcall!");
+  case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
+  case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+  case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+  case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+  }
+
+  SDValue InChain = DAG.getEntryNode();
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = Op->getOperand(i).getValueType();
+    Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = Op->getOperand(i);
+    Entry.Ty = ArgTy;
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+
+  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
+                                         getPointerTy());
+
+  Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL);
+
+  SDLoc dl(Op);
+  TargetLowering::
+  CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true,
+                    0, getLibcallCallingConv(LC), /*isTailCall=*/false,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG, dl);
+  std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
+
+  return CallInfo.first;
+}
+
 bool
 ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The ARM target isn't yet aware of offsets.
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index ed6c4057b65..beba5ce7715 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -457,6 +457,7 @@ namespace llvm {
                             const ARMSubtarget *ST) const;
     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
                               const ARMSubtarget *ST) const;
+    SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
 
     /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
     /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 63ba6c562a3..ad7f1b3e348 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -280,6 +280,14 @@ public:
   bool isTargetNaCl() const { return TargetTriple.getOS() == Triple::NaCl; }
   bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
   bool isTargetELF() const { return !isTargetDarwin(); }
+  // ARM EABI is the bare-metal EABI described in ARM ABI documents and
+  // can be accessed via -target arm-none-eabi. This is NOT GNUEABI.
+  // FIXME: Add a flag for bare-metal for that target and set Triple::EABI
+  // even for GNUEABI, so we can make a distinction here and still conform to
+  // the EABI on GNU (and Android) mode. This requires change in Clang, too.
+  bool isTargetAEABI() const {
+    return TargetTriple.getEnvironment() == Triple::EABI;
+  }
 
   bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
   bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
diff --git a/test/CodeGen/ARM/divmod-eabi.ll b/test/CodeGen/ARM/divmod-eabi.ll
new file mode 100644
index 00000000000..ab98491ec34
--- /dev/null
+++ b/test/CodeGen/ARM/divmod-eabi.ll
@@ -0,0 +1,202 @@
+; RUN: llc -mtriple armv7-none-eabi %s -o - | FileCheck %s --check-prefix=EABI
+; RUN: llc -mtriple armv7-linux-gnueabi %s -o - | FileCheck %s --check-prefix=GNU
+; RUN: llc -mtriple armv7-apple-darwin %s -o - | FileCheck %s --check-prefix=DARWIN
+
+define signext i16 @f16(i16 signext %a, i16 signext %b) {
+; EABI: f16:
+; GNU: f16:
+; DARWIN: f16:
+entry:
+  %conv = sext i16 %a to i32
+  %conv1 = sext i16 %b to i32
+  %div = sdiv i32 %conv, %conv1
+  %rem = srem i32 %conv, %conv1
+; EABI: __aeabi_idivmod
+; EABI: mov [[div:r[0-9]+]], r0
+; EABI: mov [[rem:r[0-9]+]], r1
+; GNU: __aeabi_idiv
+; GNU: mov [[sum:r[0-9]+]], r0
+; GNU: __modsi3
+; GNU: add [[sum]]{{.*}}r0
+; DARWIN: ___divsi3
+; DARWIN: mov [[sum:r[0-9]+]], r0
+; DARWIN: __modsi3
+; DARWIN: add [[sum]]{{.*}}r0
+  %rem8 = srem i32 %conv1, %conv
+; EABI: __aeabi_idivmod
+; GNU: __modsi3
+; DARWIN: __modsi3
+  %add = add nsw i32 %rem, %div
+  %add13 = add nsw i32 %add, %rem8
+  %conv14 = trunc i32 %add13 to i16
+; EABI: add r0{{.*}}r1
+; EABI: sxth r0, r0
+; GNU: add r0{{.*}}[[sum]]
+; GNU: sxth r0, r0
+; DARWIN: add r0{{.*}}[[sum]]
+; DARWIN: sxth r0, r0
+  ret i16 %conv14
+}
+
+define i32 @f32(i32 %a, i32 %b) {
+; EABI: f32:
+; GNU: f32:
+; DARWIN: f32:
+entry:
+  %div = sdiv i32 %a, %b
+  %rem = srem i32 %a, %b
+; EABI: __aeabi_idivmod
+; EABI: mov [[div:r[0-9]+]], r0
+; EABI: mov [[rem:r[0-9]+]], r1
+; GNU: __aeabi_idiv
+; GNU: mov [[sum:r[0-9]+]], r0
+; GNU: __modsi3
+; GNU: add [[sum]]{{.*}}r0
+; DARWIN: ___divsi3
+; DARWIN: mov [[sum:r[0-9]+]], r0
+; DARWIN: __modsi3
+; DARWIN: add [[sum]]{{.*}}r0
+  %rem1 = srem i32 %b, %a
+; EABI: __aeabi_idivmod
+; GNU: __modsi3
+; DARWIN: __modsi3
+  %add = add nsw i32 %rem, %div
+  %add2 = add nsw i32 %add, %rem1
+; EABI: add r0{{.*}}r1
+; GNU: add r0{{.*}}[[sum]]
+; DARWIN: add r0{{.*}}[[sum]]
+  ret i32 %add2
+}
+
+define i32 @uf(i32 %a, i32 %b) {
+; EABI: uf:
+; GNU: uf:
+; DARWIN: uf:
+entry:
+  %div = udiv i32 %a, %b
+  %rem = urem i32 %a, %b
+; EABI: __aeabi_uidivmod
+; GNU: __aeabi_uidiv
+; GNU: mov [[sum:r[0-9]+]], r0
+; GNU: __umodsi3
+; GNU: add [[sum]]{{.*}}r0
+; DARWIN: ___udivsi3
+; DARWIN: mov [[sum:r[0-9]+]], r0
+; DARWIN: __umodsi3
+; DARWIN: add [[sum]]{{.*}}r0
+  %rem1 = urem i32 %b, %a
+; EABI: __aeabi_uidivmod
+; GNU: __umodsi3
+; DARWIN: __umodsi3
+  %add = add nuw i32 %rem, %div
+  %add2 = add nuw i32 %add, %rem1
+; EABI: add r0{{.*}}r1
+; GNU: add r0{{.*}}[[sum]]
+; DARWIN: add r0{{.*}}[[sum]]
+  ret i32 %add2
+}
+
+; FIXME: AEABI is not lowering long u/srem into u/ldivmod
+define i64 @longf(i64 %a, i64 %b) {
+; EABI: longf:
+; GNU: longf:
+; DARWIN: longf:
+entry:
+  %div = sdiv i64 %a, %b
+  %rem = srem i64 %a, %b
+; EABI: __aeabi_ldivmod
+; GNU: __aeabi_ldivmod
+; GNU: mov [[div1:r[0-9]+]], r0
+; GNU: mov [[div2:r[0-9]+]], r1
+; DARWIN: ___divdi3
+; DARWIN: mov [[div1:r[0-9]+]], r0
+; DARWIN: mov [[div2:r[0-9]+]], r1
+; DARWIN: __moddi3
+  %add = add nsw i64 %rem, %div
+; GNU: adds r0{{.*}}[[div1]]
+; GNU: adc r1{{.*}}[[div2]]
+; DARWIN: adds r0{{.*}}[[div1]]
+; DARWIN: adc r1{{.*}}[[div2]]
+  ret i64 %add
+}
+
+define i32 @g1(i32 %a, i32 %b) {
+; EABI: g1:
+; GNU: g1:
+; DARWIN: g1:
+entry:
+  %div = sdiv i32 %a, %b
+  %rem = srem i32 %a, %b
+; EABI: __aeabi_idivmod
+; GNU: __aeabi_idiv
+; GNU: mov [[sum:r[0-9]+]], r0
+; GNU: __modsi3
+; DARWIN: ___divsi3
+; DARWIN: mov [[sum:r[0-9]+]], r0
+; DARWIN: __modsi3
+  %add = add nsw i32 %rem, %div
+; EABI:	add	r0{{.*}}r1
+; GNU: add r0{{.*}}[[sum]]
+; DARWIN: add r0{{.*}}[[sum]]
+  ret i32 %add
+}
+
+; On both Darwin and Gnu, this is just a call to __modsi3
+define i32 @g2(i32 %a, i32 %b) {
+; EABI: g2:
+; GNU: g2:
+; DARWIN: g2:
+entry:
+  %rem = srem i32 %a, %b
+; EABI: __aeabi_idivmod
+; GNU: __modsi3
+; DARWIN: __modsi3
+  ret i32 %rem
+; EABI:	mov	r0, r1
+}
+
+define i32 @g3(i32 %a, i32 %b) {
+; EABI: g3:
+; GNU: g3:
+; DARWIN: g3:
+entry:
+  %rem = srem i32 %a, %b
+; EABI: __aeabi_idivmod
+; EABI: mov [[mod:r[0-9]+]], r1
+; GNU: __modsi3
+; GNU: mov [[sum:r[0-9]+]], r0
+; DARWIN: __modsi3
+; DARWIN: mov [[sum:r[0-9]+]], r0
+  %rem1 = srem i32 %b, %rem
+; EABI: __aeabi_idivmod
+; GNU: __modsi3
+; DARWIN: __modsi3
+  %add = add nsw i32 %rem1, %rem
+; EABI: add r0, r1, [[mod]]
+; GNU: add r0{{.*}}[[sum]]
+; DARWIN: add r0{{.*}}[[sum]]
+  ret i32 %add
+}
+
+define i32 @g4(i32 %a, i32 %b) {
+; EABI: g4:
+; GNU: g4:
+; DARWIN: g4:
+entry:
+  %div = sdiv i32 %a, %b
+; EABI: __aeabi_idivmod
+; EABI: mov [[div:r[0-9]+]], r0
+; GNU __aeabi_idiv
+; GNU: mov [[sum:r[0-9]+]], r0
+; DARWIN: ___divsi3
+; DARWIN: mov [[sum:r[0-9]+]], r0
+  %rem = srem i32 %b, %div
+; EABI: __aeabi_idivmod
+; GNU: __modsi3
+; DARWIN: __modsi3
+  %add = add nsw i32 %rem, %div
+; EABI: add r0, r1, [[div]]
+; GNU: add r0{{.*}}[[sum]]
+; DARWIN: add r0{{.*}}[[sum]]
+  ret i32 %add
+}