diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index a584188a101..9c937edee4c 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -37,6 +37,26 @@ def RetCC_PPC : CallingConv<[ ]>; +// Note that we don't currently have calling conventions for 64-bit +// PowerPC, but handle all the complexities of the ABI in the lowering +// logic. FIXME: See if the logic can be simplified with use of CCs. +// This may require some extensions to current table generation. + +// Simple return-value convention for 64-bit ELF PowerPC fast isel. +// All small ints are promoted to i64. Vector types, quadword ints, +// and multiple register returns are "supported" to avoid compile +// errors, but none are handled by the fast selector. +def RetCC_PPC64_ELF_FIS : CallingConv<[ + CCIfType<[i8], CCPromoteToType>, + CCIfType<[i16], CCPromoteToType>, + CCIfType<[i32], CCPromoteToType>, + CCIfType<[i64], CCAssignToReg<[X3, X4]>>, + CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, + CCIfType<[f32], CCAssignToReg<[F1, F2]>>, + CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>, + CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>> +]>; + //===----------------------------------------------------------------------===// // PowerPC System V Release 4 32-bit ABI //===----------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index ebc705704d9..8db4432734d 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -95,6 +95,8 @@ class PPCFastISel : public FastISel { private: bool SelectBranch(const Instruction *I); bool SelectIndirectBr(const Instruction *I); + bool SelectRet(const Instruction *I); + bool SelectIntExt(const Instruction *I); // Utility routines. private: @@ -109,6 +111,10 @@ class PPCFastISel : public FastISel { unsigned PPCMaterialize64BitInt(int64_t Imm, const TargetRegisterClass *RC); + // Call handling routines. + private: + CCAssignFn *usePPC32CCs(unsigned Flag); + private: #include "PPCGenFastISel.inc" @@ -116,6 +122,21 @@ class PPCFastISel : public FastISel { } // end anonymous namespace +#include "PPCGenCallingConv.inc" + +// Function whose sole purpose is to kill compiler warnings +// stemming from unused functions included from PPCGenCallingConv.inc. +CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) { + if (Flag == 1) + return CC_PPC32_SVR4; + else if (Flag == 2) + return CC_PPC32_SVR4_ByVal; + else if (Flag == 3) + return CC_PPC32_SVR4_VarArg; + else + return RetCC_PPC; +} + static Optional getComparePred(CmpInst::Predicate Pred) { switch (Pred) { // These are not representable with any single compare. @@ -309,13 +330,164 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, return true; } +// Attempt to fast-select a return instruction. +bool PPCFastISel::SelectRet(const Instruction *I) { + + if (!FuncInfo.CanLowerReturn) + return false; + + const ReturnInst *Ret = cast(I); + const Function &F = *I->getParent()->getParent(); + + // Build a list of return value registers. + SmallVector RetRegs; + CallingConv::ID CC = F.getCallingConv(); + + if (Ret->getNumOperands() > 0) { + SmallVector Outs; + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ValLocs; + CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, *Context); + CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS); + const Value *RV = Ret->getOperand(0); + + // FIXME: Only one output register for now. + if (ValLocs.size() > 1) + return false; + + // Special case for returning a constant integer of any size. + // Materialize the constant as an i64 and copy it to the return + // register. This avoids an unnecessary extend or truncate. + if (isa(*RV)) { + const Constant *C = cast(RV); + unsigned SrcReg = PPCMaterializeInt(C, MVT::i64); + unsigned RetReg = ValLocs[0].getLocReg(); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + RetReg).addReg(SrcReg); + RetRegs.push_back(RetReg); + + } else { + unsigned Reg = getRegForValue(RV); + + if (Reg == 0) + return false; + + // Copy the result values into the output registers. + for (unsigned i = 0; i < ValLocs.size(); ++i) { + + CCValAssign &VA = ValLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + RetRegs.push_back(VA.getLocReg()); + unsigned SrcReg = Reg + VA.getValNo(); + + EVT RVEVT = TLI.getValueType(RV->getType()); + if (!RVEVT.isSimple()) + return false; + MVT RVVT = RVEVT.getSimpleVT(); + MVT DestVT = VA.getLocVT(); + + if (RVVT != DestVT && RVVT != MVT::i8 && + RVVT != MVT::i16 && RVVT != MVT::i32) + return false; + + if (RVVT != DestVT) { + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + llvm_unreachable("Full value assign but types don't match?"); + case CCValAssign::AExt: + case CCValAssign::ZExt: { + const TargetRegisterClass *RC = + (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned TmpReg = createResultReg(RC); + if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true)) + return false; + SrcReg = TmpReg; + break; + } + case CCValAssign::SExt: { + const TargetRegisterClass *RC = + (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned TmpReg = createResultReg(RC); + if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false)) + return false; + SrcReg = TmpReg; + break; + } + } + } + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), RetRegs[i]) + .addReg(SrcReg); + } + } + } + + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(PPC::BLR)); + + for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) + MIB.addReg(RetRegs[i], RegState::Implicit); + + return true; +} + // Attempt to emit an integer extend of SrcReg into DestReg. Both // signed and zero extensions are supported. Return false if we -// can't handle it. Not yet implemented. +// can't handle it. bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg, bool IsZExt) { - return (SrcVT == MVT::i8 && SrcReg && DestVT == MVT::i8 && DestReg - && IsZExt && false); + if (DestVT != MVT::i32 && DestVT != MVT::i64) + return false; + if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32) + return false; + + // Signed extensions use EXTSB, EXTSH, EXTSW. + if (!IsZExt) { + unsigned Opc; + if (SrcVT == MVT::i8) + Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64; + else if (SrcVT == MVT::i16) + Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64; + else { + assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??"); + Opc = PPC::EXTSW_32_64; + } + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addReg(SrcReg); + + // Unsigned 32-bit extensions use RLWINM. + } else if (DestVT == MVT::i32) { + unsigned MB; + if (SrcVT == MVT::i8) + MB = 24; + else { + assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??"); + MB = 16; + } + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLWINM), + DestReg) + .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31); + + // Unsigned 64-bit extensions use RLDICL (with a 32-bit source). + } else { + unsigned MB; + if (SrcVT == MVT::i8) + MB = 56; + else if (SrcVT == MVT::i16) + MB = 48; + else + MB = 32; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(PPC::RLDICL_32_64), DestReg) + .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB); + } + + return true; } // Attempt to fast-select an indirect branch instruction. @@ -335,6 +507,45 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) { return true; } +// Attempt to fast-select an integer extend instruction. +bool PPCFastISel::SelectIntExt(const Instruction *I) { + Type *DestTy = I->getType(); + Value *Src = I->getOperand(0); + Type *SrcTy = Src->getType(); + + bool IsZExt = isa(I); + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) return false; + + EVT SrcEVT, DestEVT; + SrcEVT = TLI.getValueType(SrcTy, true); + DestEVT = TLI.getValueType(DestTy, true); + if (!SrcEVT.isSimple()) + return false; + if (!DestEVT.isSimple()) + return false; + + MVT SrcVT = SrcEVT.getSimpleVT(); + MVT DestVT = DestEVT.getSimpleVT(); + + // If we know the register class needed for the result of this + // instruction, use it. Otherwise pick the register class of the + // correct size that does not contain X0/R0, since we don't know + // whether downstream uses permit that assignment. + unsigned AssignedReg = FuncInfo.ValueMap[I]; + const TargetRegisterClass *RC = + (AssignedReg ? MRI.getRegClass(AssignedReg) : + (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass : + &PPC::GPRC_and_GPRC_NOR0RegClass)); + unsigned ResultReg = createResultReg(RC); + + if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt)) + return false; + + UpdateValueMap(I, ResultReg); + return true; +} + // Attempt to fast-select an instruction that wasn't handled by // the table-generated machinery. bool PPCFastISel::TargetSelectInstruction(const Instruction *I) { @@ -344,6 +555,11 @@ bool PPCFastISel::TargetSelectInstruction(const Instruction *I) { return SelectBranch(I); case Instruction::IndirectBr: return SelectIndirectBr(I); + case Instruction::Ret: + return SelectRet(I); + case Instruction::ZExt: + case Instruction::SExt: + return SelectIntExt(I); // Here add other flavors of Instruction::XXX that automated // cases don't catch. For example, switches are terminators // that aren't yet handled. diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index f78bb3873ee..92579040b6e 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -506,6 +506,14 @@ defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS), [(set i64:$rA, (sext_inreg i64:$rS, i16))]>; } // Interpretation64Bit +// For fast-isel: +let isCodeGenOnly = 1 in { +def EXTSB8_32_64 : XForm_11<31, 954, (outs g8rc:$rA), (ins gprc:$rS), + "extsb $rA, $rS", IntSimple, []>, isPPC64; +def EXTSH8_32_64 : XForm_11<31, 922, (outs g8rc:$rA), (ins gprc:$rS), + "extsh $rA, $rS", IntSimple, []>, isPPC64; +} // isCodeGenOnly for fast-isel + defm EXTSW : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS), "extsw", "$rA, $rS", IntSimple, [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64; @@ -569,6 +577,14 @@ defm RLDICL : MDForm_1r<30, 0, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), "rldicl", "$rA, $rS, $SH, $MBE", IntRotateDI, []>, isPPC64; +// For fast-isel: +let isCodeGenOnly = 1 in +def RLDICL_32_64 : MDForm_1<30, 0, + (outs g8rc:$rA), + (ins gprc:$rS, u6imm:$SH, u6imm:$MBE), + "rldicl $rA, $rS, $SH, $MBE", IntRotateDI, + []>, isPPC64; +// End fast-isel. defm RLDICR : MDForm_1r<30, 1, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), "rldicr", "$rA, $rS, $SH, $MBE", IntRotateDI, diff --git a/test/CodeGen/PowerPC/fast-isel-ext.ll b/test/CodeGen/PowerPC/fast-isel-ext.ll new file mode 100644 index 00000000000..753305a68dd --- /dev/null +++ b/test/CodeGen/PowerPC/fast-isel-ext.ll @@ -0,0 +1,75 @@ +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64 + +; zext + +define i32 @zext_8_32(i8 %a) nounwind ssp { +; ELF64: zext_8_32 + %r = zext i8 %a to i32 +; ELF64: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31 + ret i32 %r +} + +define i32 @zext_16_32(i16 %a) nounwind ssp { +; ELF64: zext_16_32 + %r = zext i16 %a to i32 +; ELF64: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31 + ret i32 %r +} + +define i64 @zext_8_64(i8 %a) nounwind ssp { +; ELF64: zext_8_64 + %r = zext i8 %a to i64 +; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56 + ret i64 %r +} + +define i64 @zext_16_64(i16 %a) nounwind ssp { +; ELF64: zext_16_64 + %r = zext i16 %a to i64 +; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48 + ret i64 %r +} + +define i64 @zext_32_64(i32 %a) nounwind ssp { +; ELF64: zext_32_64 + %r = zext i32 %a to i64 +; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32 + ret i64 %r +} + +; sext + +define i32 @sext_8_32(i8 %a) nounwind ssp { +; ELF64: sext_8_32 + %r = sext i8 %a to i32 +; ELF64: extsb + ret i32 %r +} + +define i32 @sext_16_32(i16 %a) nounwind ssp { +; ELF64: sext_16_32 + %r = sext i16 %a to i32 +; ELF64: extsh + ret i32 %r +} + +define i64 @sext_8_64(i8 %a) nounwind ssp { +; ELF64: sext_8_64 + %r = sext i8 %a to i64 +; ELF64: extsb + ret i64 %r +} + +define i64 @sext_16_64(i16 %a) nounwind ssp { +; ELF64: sext_16_64 + %r = sext i16 %a to i64 +; ELF64: extsh + ret i64 %r +} + +define i64 @sext_32_64(i32 %a) nounwind ssp { +; ELF64: sext_32_64 + %r = sext i32 %a to i64 +; ELF64: extsw + ret i64 %r +} diff --git a/test/CodeGen/PowerPC/fast-isel-ret.ll b/test/CodeGen/PowerPC/fast-isel-ret.ll new file mode 100644 index 00000000000..fa19f8b11fd --- /dev/null +++ b/test/CodeGen/PowerPC/fast-isel-ret.ll @@ -0,0 +1,142 @@ +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64 + +define signext i8 @ret2(i8 signext %a) nounwind uwtable ssp { +entry: +; ELF64: ret2 +; ELF64: extsb +; ELF64: blr + ret i8 %a +} + +define zeroext i8 @ret3(i8 signext %a) nounwind uwtable ssp { +entry: +; ELF64: ret3 +; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56 +; ELF64: blr + ret i8 %a +} + +define signext i16 @ret4(i16 signext %a) nounwind uwtable ssp { +entry: +; ELF64: ret4 +; ELF64: extsh +; ELF64: blr + ret i16 %a +} + +define zeroext i16 @ret5(i16 signext %a) nounwind uwtable ssp { +entry: +; ELF64: ret5 +; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48 +; ELF64: blr + ret i16 %a +} + +define i16 @ret6(i16 %a) nounwind uwtable ssp { +entry: +; ELF64: ret6 +; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48 +; ELF64: blr + ret i16 %a +} + +define signext i32 @ret7(i32 signext %a) nounwind uwtable ssp { +entry: +; ELF64: ret7 +; ELF64: extsw +; ELF64: blr + ret i32 %a +} + +define zeroext i32 @ret8(i32 signext %a) nounwind uwtable ssp { +entry: +; ELF64: ret8 +; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32 +; ELF64: blr + ret i32 %a +} + +define i32 @ret9(i32 %a) nounwind uwtable ssp { +entry: +; ELF64: ret9 +; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32 +; ELF64: blr + ret i32 %a +} + +define i64 @ret10(i64 %a) nounwind uwtable ssp { +entry: +; ELF64: ret10 +; ELF64-NOT: exts +; ELF64-NOT: rldicl +; ELF64: blr + ret i64 %a +} + +define float @ret11(float %a) nounwind uwtable ssp { +entry: +; ELF64: ret11 +; ELF64: blr + ret float %a +} + +define double @ret12(double %a) nounwind uwtable ssp { +entry: +; ELF64: ret12 +; ELF64: blr + ret double %a +} + +define i8 @ret13() nounwind uwtable ssp { +entry: +; ELF64: ret13 +; ELF64: li +; ELF64: blr + ret i8 15; +} + +define i16 @ret14() nounwind uwtable ssp { +entry: +; ELF64: ret14 +; ELF64: li +; ELF64: blr + ret i16 -225; +} + +define i32 @ret15() nounwind uwtable ssp { +entry: +; ELF64: ret15 +; ELF64: lis +; ELF64: ori +; ELF64: blr + ret i32 278135; +} + +define i64 @ret16() nounwind uwtable ssp { +entry: +; ELF64: ret16 +; ELF64: li +; ELF64: sldi +; ELF64: oris +; ELF64: ori +; ELF64: blr + ret i64 27813515225; +} + +define float @ret17() nounwind uwtable ssp { +entry: +; ELF64: ret17 +; ELF64: addis +; ELF64: lfs +; ELF64: blr + ret float 2.5; +} + +define double @ret18() nounwind uwtable ssp { +entry: +; ELF64: ret18 +; ELF64: addis +; ELF64: lfd +; ELF64: blr + ret double 2.5e-33; +}