From 103ba845f09252d90a05109af7174f54bf412daf Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Tue, 16 Jul 2013 09:32:17 +0000 Subject: [PATCH] ARM EABI divmod support This patch enables calls to __aeabi_idivmod when in EABI mode, by using the remainder value returned on registers (R1), enabled by the ARM triple "none-eabi". Note that Darwin and GNUEABI triples will continue lowering on GNU style, that is, using the stack for the remainder. Still need to add SREM/UREM support fix for 64-bit lowering. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186390 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 80 +++++++++++- lib/Target/ARM/ARMISelLowering.h | 1 + lib/Target/ARM/ARMSubtarget.h | 8 ++ test/CodeGen/ARM/divmod-eabi.ll | 202 +++++++++++++++++++++++++++++ 4 files changed, 289 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/ARM/divmod-eabi.ll diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index fdc015b80ae..3648199989a 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -693,10 +693,36 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SDIV, MVT::i32, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); } + + // FIXME: Also set divmod for SREM on EABI setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::SDIVREM, MVT::i32, Expand); - setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + // Register based DivRem for AEABI (RTABI 4.2) + if (Subtarget->isTargetAEABI()) { + setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod"); + setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod"); + setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod"); + setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod"); + setLibcallName(RTLIB::UDIVREM_I8, "__aeabi_uidivmod"); + setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod"); + setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod"); + setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod"); + + setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS); + + setOperationAction(ISD::SDIVREM, MVT::i32, Custom); + setOperationAction(ISD::UDIVREM, MVT::i32, Custom); + } else { + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + } setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); @@ -5863,6 +5889,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); + case ISD::SDIVREM: + case ISD::UDIVREM: return LowerDivRem(Op, DAG); } } @@ -10677,6 +10705,54 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } +SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only"); + unsigned Opcode = Op->getOpcode(); + assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && + "Invalid opcode for Div/Rem lowering"); + bool isSigned = (Opcode == ISD::SDIVREM); + EVT VT = Op->getValueType(0); + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + + RTLIB::Libcall LC; + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + } + + SDValue InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { + EVT ArgVT = Op->getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Op->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + + SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), + getPointerTy()); + + Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL); + + SDLoc dl(Op); + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true, + 0, getLibcallCallingConv(LC), /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + std::pair CallInfo = LowerCallTo(CLI); + + return CallInfo.first; +} + bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index ed6c4057b65..beba5ce7715 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -457,6 +457,7 @@ namespace llvm { const ARMSubtarget *ST) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const; + SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 63ba6c562a3..ad7f1b3e348 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -280,6 +280,14 @@ public: bool isTargetNaCl() const { return TargetTriple.getOS() == Triple::NaCl; } bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } bool isTargetELF() const { return !isTargetDarwin(); } + // ARM EABI is the bare-metal EABI described in ARM ABI documents and + // can be accessed via -target arm-none-eabi. This is NOT GNUEABI. + // FIXME: Add a flag for bare-metal for that target and set Triple::EABI + // even for GNUEABI, so we can make a distinction here and still conform to + // the EABI on GNU (and Android) mode. This requires change in Clang, too. + bool isTargetAEABI() const { + return TargetTriple.getEnvironment() == Triple::EABI; + } bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; } bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; } diff --git a/test/CodeGen/ARM/divmod-eabi.ll b/test/CodeGen/ARM/divmod-eabi.ll new file mode 100644 index 00000000000..ab98491ec34 --- /dev/null +++ b/test/CodeGen/ARM/divmod-eabi.ll @@ -0,0 +1,202 @@ +; RUN: llc -mtriple armv7-none-eabi %s -o - | FileCheck %s --check-prefix=EABI +; RUN: llc -mtriple armv7-linux-gnueabi %s -o - | FileCheck %s --check-prefix=GNU +; RUN: llc -mtriple armv7-apple-darwin %s -o - | FileCheck %s --check-prefix=DARWIN + +define signext i16 @f16(i16 signext %a, i16 signext %b) { +; EABI: f16: +; GNU: f16: +; DARWIN: f16: +entry: + %conv = sext i16 %a to i32 + %conv1 = sext i16 %b to i32 + %div = sdiv i32 %conv, %conv1 + %rem = srem i32 %conv, %conv1 +; EABI: __aeabi_idivmod +; EABI: mov [[div:r[0-9]+]], r0 +; EABI: mov [[rem:r[0-9]+]], r1 +; GNU: __aeabi_idiv +; GNU: mov [[sum:r[0-9]+]], r0 +; GNU: __modsi3 +; GNU: add [[sum]]{{.*}}r0 +; DARWIN: ___divsi3 +; DARWIN: mov [[sum:r[0-9]+]], r0 +; DARWIN: __modsi3 +; DARWIN: add [[sum]]{{.*}}r0 + %rem8 = srem i32 %conv1, %conv +; EABI: __aeabi_idivmod +; GNU: __modsi3 +; DARWIN: __modsi3 + %add = add nsw i32 %rem, %div + %add13 = add nsw i32 %add, %rem8 + %conv14 = trunc i32 %add13 to i16 +; EABI: add r0{{.*}}r1 +; EABI: sxth r0, r0 +; GNU: add r0{{.*}}[[sum]] +; GNU: sxth r0, r0 +; DARWIN: add r0{{.*}}[[sum]] +; DARWIN: sxth r0, r0 + ret i16 %conv14 +} + +define i32 @f32(i32 %a, i32 %b) { +; EABI: f32: +; GNU: f32: +; DARWIN: f32: +entry: + %div = sdiv i32 %a, %b + %rem = srem i32 %a, %b +; EABI: __aeabi_idivmod +; EABI: mov [[div:r[0-9]+]], r0 +; EABI: mov [[rem:r[0-9]+]], r1 +; GNU: __aeabi_idiv +; GNU: mov [[sum:r[0-9]+]], r0 +; GNU: __modsi3 +; GNU: add [[sum]]{{.*}}r0 +; DARWIN: ___divsi3 +; DARWIN: mov [[sum:r[0-9]+]], r0 +; DARWIN: __modsi3 +; DARWIN: add [[sum]]{{.*}}r0 + %rem1 = srem i32 %b, %a +; EABI: __aeabi_idivmod +; GNU: __modsi3 +; DARWIN: __modsi3 + %add = add nsw i32 %rem, %div + %add2 = add nsw i32 %add, %rem1 +; EABI: add r0{{.*}}r1 +; GNU: add r0{{.*}}[[sum]] +; DARWIN: add r0{{.*}}[[sum]] + ret i32 %add2 +} + +define i32 @uf(i32 %a, i32 %b) { +; EABI: uf: +; GNU: uf: +; DARWIN: uf: +entry: + %div = udiv i32 %a, %b + %rem = urem i32 %a, %b +; EABI: __aeabi_uidivmod +; GNU: __aeabi_uidiv +; GNU: mov [[sum:r[0-9]+]], r0 +; GNU: __umodsi3 +; GNU: add [[sum]]{{.*}}r0 +; DARWIN: ___udivsi3 +; DARWIN: mov [[sum:r[0-9]+]], r0 +; DARWIN: __umodsi3 +; DARWIN: add [[sum]]{{.*}}r0 + %rem1 = urem i32 %b, %a +; EABI: __aeabi_uidivmod +; GNU: __umodsi3 +; DARWIN: __umodsi3 + %add = add nuw i32 %rem, %div + %add2 = add nuw i32 %add, %rem1 +; EABI: add r0{{.*}}r1 +; GNU: add r0{{.*}}[[sum]] +; DARWIN: add r0{{.*}}[[sum]] + ret i32 %add2 +} + +; FIXME: AEABI is not lowering long u/srem into u/ldivmod +define i64 @longf(i64 %a, i64 %b) { +; EABI: longf: +; GNU: longf: +; DARWIN: longf: +entry: + %div = sdiv i64 %a, %b + %rem = srem i64 %a, %b +; EABI: __aeabi_ldivmod +; GNU: __aeabi_ldivmod +; GNU: mov [[div1:r[0-9]+]], r0 +; GNU: mov [[div2:r[0-9]+]], r1 +; DARWIN: ___divdi3 +; DARWIN: mov [[div1:r[0-9]+]], r0 +; DARWIN: mov [[div2:r[0-9]+]], r1 +; DARWIN: __moddi3 + %add = add nsw i64 %rem, %div +; GNU: adds r0{{.*}}[[div1]] +; GNU: adc r1{{.*}}[[div2]] +; DARWIN: adds r0{{.*}}[[div1]] +; DARWIN: adc r1{{.*}}[[div2]] + ret i64 %add +} + +define i32 @g1(i32 %a, i32 %b) { +; EABI: g1: +; GNU: g1: +; DARWIN: g1: +entry: + %div = sdiv i32 %a, %b + %rem = srem i32 %a, %b +; EABI: __aeabi_idivmod +; GNU: __aeabi_idiv +; GNU: mov [[sum:r[0-9]+]], r0 +; GNU: __modsi3 +; DARWIN: ___divsi3 +; DARWIN: mov [[sum:r[0-9]+]], r0 +; DARWIN: __modsi3 + %add = add nsw i32 %rem, %div +; EABI: add r0{{.*}}r1 +; GNU: add r0{{.*}}[[sum]] +; DARWIN: add r0{{.*}}[[sum]] + ret i32 %add +} + +; On both Darwin and Gnu, this is just a call to __modsi3 +define i32 @g2(i32 %a, i32 %b) { +; EABI: g2: +; GNU: g2: +; DARWIN: g2: +entry: + %rem = srem i32 %a, %b +; EABI: __aeabi_idivmod +; GNU: __modsi3 +; DARWIN: __modsi3 + ret i32 %rem +; EABI: mov r0, r1 +} + +define i32 @g3(i32 %a, i32 %b) { +; EABI: g3: +; GNU: g3: +; DARWIN: g3: +entry: + %rem = srem i32 %a, %b +; EABI: __aeabi_idivmod +; EABI: mov [[mod:r[0-9]+]], r1 +; GNU: __modsi3 +; GNU: mov [[sum:r[0-9]+]], r0 +; DARWIN: __modsi3 +; DARWIN: mov [[sum:r[0-9]+]], r0 + %rem1 = srem i32 %b, %rem +; EABI: __aeabi_idivmod +; GNU: __modsi3 +; DARWIN: __modsi3 + %add = add nsw i32 %rem1, %rem +; EABI: add r0, r1, [[mod]] +; GNU: add r0{{.*}}[[sum]] +; DARWIN: add r0{{.*}}[[sum]] + ret i32 %add +} + +define i32 @g4(i32 %a, i32 %b) { +; EABI: g4: +; GNU: g4: +; DARWIN: g4: +entry: + %div = sdiv i32 %a, %b +; EABI: __aeabi_idivmod +; EABI: mov [[div:r[0-9]+]], r0 +; GNU __aeabi_idiv +; GNU: mov [[sum:r[0-9]+]], r0 +; DARWIN: ___divsi3 +; DARWIN: mov [[sum:r[0-9]+]], r0 + %rem = srem i32 %b, %div +; EABI: __aeabi_idivmod +; GNU: __modsi3 +; DARWIN: __modsi3 + %add = add nsw i32 %rem, %div +; EABI: add r0, r1, [[div]] +; GNU: add r0{{.*}}[[sum]] +; DARWIN: add r0{{.*}}[[sum]] + ret i32 %add +}