From a01b583dbc82bb41a5615ceddd2d2c99972aa65e Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Sun, 18 Jan 2015 12:08:47 +0000 Subject: [PATCH] [PowerPC] Initial PPC64 calling-convention changes for fastcc The default calling convention specified by the PPC64 ELF (V1 and V2) ABI is designed to work with both prototyped and non-prototyped/varargs functions. As a result, GPRs and stack space are allocated for every argument, even those that are passed in floating-point or vector registers. GlobalOpt::OptimizeFunctions will transform local non-varargs functions (that do not have their address taken) to use the 'fast' calling convention. When functions are using the 'fast' calling convention, don't allocate GPRs for arguments passed in other types of registers, and don't allocate stack space for arguments passed in registers. Other changes for the fast calling convention may be added in the future. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226399 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFastISel.cpp | 5 +- lib/Target/PowerPC/PPCISelLowering.cpp | 219 +++++-- .../CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll | 56 ++ test/CodeGen/PowerPC/ppc64-fastcc.ll | 540 ++++++++++++++++++ 4 files changed, 757 insertions(+), 63 deletions(-) create mode 100644 test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll create mode 100644 test/CodeGen/PowerPC/ppc64-fastcc.ll diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 13bd0c7be2c..7af601e8f73 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -1275,7 +1275,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl &Args, // Prepare to assign register arguments. Every argument uses up a // GPR protocol register even if it's passed in a floating-point - // register. + // register (unless we're using the fast calling convention). unsigned NextGPR = PPC::X3; unsigned NextFPR = PPC::F1; @@ -1325,7 +1325,8 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl &Args, unsigned ArgReg; if (ArgVT == MVT::f32 || ArgVT == MVT::f64) { ArgReg = NextFPR++; - ++NextGPR; + if (CC != CallingConv::Fast) + ++NextGPR; } else ArgReg = NextGPR++; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 20e5115445e..58eddd7e910 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2623,6 +2623,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( MachineFrameInfo *MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo(); + assert(!(CallConv == CallingConv::Fast && isVarArg) && + "fastcc not supported on varargs functions"); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && @@ -2674,7 +2677,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( // although the first ones are often in registers. unsigned ArgOffset = LinkageSize; - unsigned GPR_idx, FPR_idx = 0, VR_idx = 0; + unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; SmallVector MemOps; Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; @@ -2689,19 +2692,31 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); CurArgIdx = Ins[ArgNo].OrigArgIndex; - /* Respect alignment of argument on the stack. */ - unsigned Align = - CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); - ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; - unsigned CurArgOffset = ArgOffset; + // We re-align the argument offset for each argument, except when using the + // fast calling convention, when we need to make sure we do that only when + // we'll actually use a stack slot. + unsigned CurArgOffset, Align; + auto ComputeArgOffset = [&]() { + /* Respect alignment of argument on the stack. */ + Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); + ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; + CurArgOffset = ArgOffset; + }; - /* Compute GPR index associated with argument offset. */ - GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; - GPR_idx = std::min(GPR_idx, Num_GPR_Regs); + if (CallConv != CallingConv::Fast) { + ComputeArgOffset(); + + /* Compute GPR index associated with argument offset. */ + GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; + GPR_idx = std::min(GPR_idx, Num_GPR_Regs); + } // FIXME the codegen can be much improved in some cases. // We do not have to keep everything in memory. if (Flags.isByVal()) { + if (CallConv == CallingConv::Fast) + ComputeArgOffset(); + // ObjSize is the true size, ArgSize rounded up to multiple of registers. ObjSize = Flags.getByValSize(); ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; @@ -2745,7 +2760,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( InVals.push_back(Arg); if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store; @@ -2807,7 +2822,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( // passed directly. Clang may use those instead of "byval" aggregate // types to avoid forcing arguments to memory unnecessarily. if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) @@ -2815,10 +2830,14 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( // value to MVT::i64 and then truncate to the correct register size. ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); } else { + if (CallConv == CallingConv::Fast) + ComputeArgOffset(); + needsLoad = true; ArgSize = PtrByteSize; } - ArgOffset += 8; + if (CallConv != CallingConv::Fast || needsLoad) + ArgOffset += 8; break; case MVT::f32: @@ -2838,11 +2857,11 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++FPR_idx; - } else if (GPR_idx != Num_GPR_Regs) { + } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) { // This can only ever happen in the presence of f32 array types, // since otherwise we never run out of FPRs before running out // of GPRs. - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::f32) { @@ -2854,16 +2873,21 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal); } else { + if (CallConv == CallingConv::Fast) + ComputeArgOffset(); + needsLoad = true; } // When passing an array of floats, the array occupies consecutive // space in the argument area; only round up to the next doubleword // at the end of the array. Otherwise, each float takes 8 bytes. - ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize; - ArgOffset += ArgSize; - if (Flags.isInConsecutiveRegsLast()) - ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + if (CallConv != CallingConv::Fast || needsLoad) { + ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize; + ArgOffset += ArgSize; + if (Flags.isInConsecutiveRegsLast()) + ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + } break; case MVT::v4f32: case MVT::v4i32: @@ -2881,9 +2905,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++VR_idx; } else { + if (CallConv == CallingConv::Fast) + ComputeArgOffset(); + needsLoad = true; } - ArgOffset += 16; + if (CallConv != CallingConv::Fast || needsLoad) + ArgOffset += 16; break; } @@ -4270,6 +4298,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallConv == CallingConv::Fast) MF.getInfo()->setHasFastCall(); + assert(!(CallConv == CallingConv::Fast && isVarArg) && + "fastcc not supported on varargs functions"); + // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage @@ -4277,6 +4308,30 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false, isELFv2ABI); unsigned NumBytes = LinkageSize; + unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + + static const MCPhysReg GPR[] = { + PPC::X3, PPC::X4, PPC::X5, PPC::X6, + PPC::X7, PPC::X8, PPC::X9, PPC::X10, + }; + static const MCPhysReg *FPR = GetFPR(); + + static const MCPhysReg VR[] = { + PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, + PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 + }; + static const MCPhysReg VSRH[] = { + PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, + PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 + }; + + const unsigned NumGPRs = array_lengthof(GPR); + const unsigned NumFPRs = 13; + const unsigned NumVRs = array_lengthof(VR); + + // When using the fast calling convention, we don't provide backing for + // arguments that will be in registers. + unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0; // Add up all the space actually used. for (unsigned i = 0; i != NumOps; ++i) { @@ -4284,6 +4339,35 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, EVT ArgVT = Outs[i].VT; EVT OrigVT = Outs[i].ArgVT; + if (CallConv == CallingConv::Fast) { + if (Flags.isByVal()) + NumGPRsUsed += (Flags.getByValSize()+7)/8; + else + switch (ArgVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected ValueType for argument!"); + case MVT::i1: + case MVT::i32: + case MVT::i64: + if (++NumGPRsUsed <= NumGPRs) + continue; + break; + case MVT::f32: + case MVT::f64: + if (++NumFPRsUsed <= NumFPRs) + continue; + break; + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + case MVT::v2f64: + case MVT::v2i64: + if (++NumVRsUsed <= NumVRs) + continue; + break; + } + } + /* Respect alignment of argument on the stack. */ unsigned Align = CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); @@ -4340,26 +4424,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // must be stored to our stack, and loaded into integer regs as well, if // any integer regs are available for argument passing. unsigned ArgOffset = LinkageSize; - unsigned GPR_idx, FPR_idx = 0, VR_idx = 0; - - static const MCPhysReg GPR[] = { - PPC::X3, PPC::X4, PPC::X5, PPC::X6, - PPC::X7, PPC::X8, PPC::X9, PPC::X10, - }; - static const MCPhysReg *FPR = GetFPR(); - - static const MCPhysReg VR[] = { - PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, - PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 - }; - static const MCPhysReg VSRH[] = { - PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, - PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 - }; - - const unsigned NumGPRs = array_lengthof(GPR); - const unsigned NumFPRs = 13; - const unsigned NumVRs = array_lengthof(VR); SmallVector, 8> RegsToPass; SmallVector TailCallArguments; @@ -4371,22 +4435,31 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, EVT ArgVT = Outs[i].VT; EVT OrigVT = Outs[i].ArgVT; - /* Respect alignment of argument on the stack. */ - unsigned Align = - CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); - ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; - - /* Compute GPR index associated with argument offset. */ - GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; - GPR_idx = std::min(GPR_idx, NumGPRs); - // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. SDValue PtrOff; - PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); + // We re-align the argument offset for each argument, except when using the + // fast calling convention, when we need to make sure we do that only when + // we'll actually use a stack slot. + auto ComputePtrOff = [&]() { + /* Respect alignment of argument on the stack. */ + unsigned Align = + CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); + ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; - PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); + PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); + + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); + }; + + if (CallConv != CallingConv::Fast) { + ComputePtrOff(); + + /* Compute GPR index associated with argument offset. */ + GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; + GPR_idx = std::min(GPR_idx, NumGPRs); + } // Promote integers to 64-bit values. if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { @@ -4411,6 +4484,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, if (Size == 0) continue; + if (CallConv == CallingConv::Fast) + ComputePtrOff(); + // All aggregates smaller than 8 bytes must be passed right-justified. if (Size==1 || Size==2 || Size==4) { EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); @@ -4419,7 +4495,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, MachinePointerInfo(), VT, false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); - RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); ArgOffset += PtrByteSize; continue; @@ -4481,7 +4557,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, MachinePointerInfo(), false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); - RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); // Done with this argument. ArgOffset += PtrByteSize; @@ -4517,13 +4593,19 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // passed directly. Clang may use those instead of "byval" aggregate // types to avoid forcing arguments to memory unnecessarily. if (GPR_idx != NumGPRs) { - RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); } else { + if (CallConv == CallingConv::Fast) + ComputePtrOff(); + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, false, MemOpChains, TailCallArguments, dl); + if (CallConv == CallingConv::Fast) + ArgOffset += PtrByteSize; } - ArgOffset += PtrByteSize; + if (CallConv != CallingConv::Fast) + ArgOffset += PtrByteSize; break; case MVT::f32: case MVT::f64: { @@ -4537,6 +4619,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // then the parameter save area. For now, put all arguments to vararg // routines always in both locations (FPR *and* GPR or stack slot). bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs; + bool NeededLoad = false; // First load the argument into the next available FPR. if (FPR_idx != NumFPRs) @@ -4545,7 +4628,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // Next, load the argument into GPR or stack slot if needed. if (!NeedGPROrStack) ; - else if (GPR_idx != NumGPRs) { + else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) { // In the non-vararg case, this can only ever happen in the // presence of f32 array types, since otherwise we never run // out of FPRs before running out of GPRs. @@ -4584,8 +4667,11 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, ArgVal = SDValue(); if (ArgVal.getNode()) - RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal)); } else { + if (CallConv == CallingConv::Fast) + ComputePtrOff(); + // Single-precision floating-point values are mapped to the // second (rightmost) word of the stack doubleword. if (Arg.getValueType() == MVT::f32 && @@ -4597,14 +4683,18 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, false, MemOpChains, TailCallArguments, dl); + + NeededLoad = true; } // When passing an array of floats, the array occupies consecutive // space in the argument area; only round up to the next doubleword // at the end of the array. Otherwise, each float takes 8 bytes. - ArgOffset += (Arg.getValueType() == MVT::f32 && - Flags.isInConsecutiveRegs()) ? 4 : 8; - if (Flags.isInConsecutiveRegsLast()) - ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + if (CallConv != CallingConv::Fast || NeededLoad) { + ArgOffset += (Arg.getValueType() == MVT::f32 && + Flags.isInConsecutiveRegs()) ? 4 : 8; + if (Flags.isInConsecutiveRegsLast()) + ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + } break; } case MVT::v4f32: @@ -4663,11 +4753,18 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, RegsToPass.push_back(std::make_pair(VReg, Arg)); } else { + if (CallConv == CallingConv::Fast) + ComputePtrOff(); + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, true, MemOpChains, TailCallArguments, dl); + if (CallConv == CallingConv::Fast) + ArgOffset += 16; } - ArgOffset += 16; + + if (CallConv != CallingConv::Fast) + ArgOffset += 16; break; } } diff --git a/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll b/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll new file mode 100644 index 00000000000..941513fb927 --- /dev/null +++ b/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll @@ -0,0 +1,56 @@ +; RUN: llc -mcpu=pwr7 -mattr=-vsx -fast-isel -fast-isel-abort < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define fastcc i64 @g1(i64 %g1, double %f1, i64 %g2, double %f2, i64 %g3, double %f3, i64 %g4, double %f4) #0 { + ret i64 %g1 + +; CHECK-LABEL: @g1 +; CHECK-NOT: mr 3, +; CHECK: blr +} + +define fastcc i64 @g2(i64 %g1, double %f1, i64 %g2, double %f2, i64 %g3, double %f3, i64 %g4, double %f4) #0 { + ret i64 %g2 + +; CHECK-LABEL: @g2 +; CHECK: mr 3, 4 +; CHECK-NEXT: blr +} + +define fastcc i64 @g3(i64 %g1, double %f1, i64 %g2, double %f2, i64 %g3, double %f3, i64 %g4, double %f4) #0 { + ret i64 %g3 + +; CHECK-LABEL: @g3 +; CHECK: mr 3, 5 +; CHECK-NEXT: blr +} + +define fastcc double @f2(i64 %g1, double %f1, i64 %g2, double %f2, i64 %g3, double %f3, i64 %g4, double %f4) #0 { + ret double %f2 + +; CHECK-LABEL: @f2 +; CHECK: fmr 1, 2 +; CHECK-NEXT: blr +} + +define void @cg2(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, i64 %v, double 0.0, i64 0, double 0.0, i64 0, double 0.0) + ret void + +; CHECK-LABEL: @cg2 +; CHECK: mr 4, 3 +; CHECK: blr +} + +define void @cf2(double %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, i64 0, double %v, i64 0, double 0.0, i64 0, double 0.0) + ret void + +; CHECK-LABEL: @cf2 +; CHECK: mr 2, 1 +; CHECK: blr +} + +attributes #0 = { nounwind } + diff --git a/test/CodeGen/PowerPC/ppc64-fastcc.ll b/test/CodeGen/PowerPC/ppc64-fastcc.ll new file mode 100644 index 00000000000..bb1365a3b67 --- /dev/null +++ b/test/CodeGen/PowerPC/ppc64-fastcc.ll @@ -0,0 +1,540 @@ +; RUN: llc -mcpu=pwr7 -mattr=-vsx < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define fastcc i64 @g1(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g1 + +; CHECK-LABEL: @g1 +; CHECK-NOT: mr 3, +; CHECK: blr +} + +define fastcc i64 @g2(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g2 + +; CHECK-LABEL: @g2 +; CHECK: mr 3, 4 +; CHECK-NEXT: blr +} + +define fastcc i64 @g3(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g3 + +; CHECK-LABEL: @g3 +; CHECK: mr 3, 5 +; CHECK-NEXT: blr +} + +define fastcc i64 @g4(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g4 + +; CHECK-LABEL: @g4 +; CHECK: mr 3, 6 +; CHECK-NEXT: blr +} + +define fastcc i64 @g5(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g5 + +; CHECK-LABEL: @g5 +; CHECK: mr 3, 7 +; CHECK-NEXT: blr +} + +define fastcc i64 @g6(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g6 + +; CHECK-LABEL: @g6 +; CHECK: mr 3, 8 +; CHECK-NEXT: blr +} + +define fastcc i64 @g7(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g7 + +; CHECK-LABEL: @g7 +; CHECK: mr 3, 9 +; CHECK-NEXT: blr +} + +define fastcc i64 @g8(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g8 + +; CHECK-LABEL: @g8 +; CHECK: mr 3, 10 +; CHECK-NEXT: blr +} + +define fastcc i64 @g9(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g9 + +; CHECK-LABEL: @g9 +; CHECK: ld 3, 48(1) +; CHECK-NEXT: blr +} + +define fastcc i64 @g10(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g10 + +; CHECK-LABEL: @g10 +; CHECK: ld 3, 56(1) +; CHECK-NEXT: blr +} + +define fastcc i64 @g11(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g11 + +; CHECK-LABEL: @g11 +; CHECK: ld 3, 64(1) +; CHECK-NEXT: blr +} + +define fastcc double @f1(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f1 + +; CHECK-LABEL: @f1 +; CHECK-NOT: fmr 1, +; CHECK: blr +} + +define fastcc double @f2(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f2 + +; CHECK-LABEL: @f2 +; CHECK: fmr 1, 2 +; CHECK-NEXT: blr +} + +define fastcc double @f3(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f3 + +; CHECK-LABEL: @f3 +; CHECK: fmr 1, 3 +; CHECK-NEXT: blr +} + +define fastcc double @f4(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f4 + +; CHECK-LABEL: @f4 +; CHECK: fmr 1, 4 +; CHECK-NEXT: blr +} + +define fastcc double @f5(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f5 + +; CHECK-LABEL: @f5 +; CHECK: fmr 1, 5 +; CHECK-NEXT: blr +} + +define fastcc double @f6(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f6 + +; CHECK-LABEL: @f6 +; CHECK: fmr 1, 6 +; CHECK-NEXT: blr +} + +define fastcc double @f7(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f7 + +; CHECK-LABEL: @f7 +; CHECK: fmr 1, 7 +; CHECK-NEXT: blr +} + +define fastcc double @f8(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f8 + +; CHECK-LABEL: @f8 +; CHECK: fmr 1, 8 +; CHECK-NEXT: blr +} + +define fastcc double @f9(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f9 + +; CHECK-LABEL: @f9 +; CHECK: fmr 1, 9 +; CHECK-NEXT: blr +} + +define fastcc double @f10(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f10 + +; CHECK-LABEL: @f10 +; CHECK: fmr 1, 10 +; CHECK-NEXT: blr +} + +define fastcc double @f11(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f11 + +; CHECK-LABEL: @f11 +; CHECK: fmr 1, 11 +; CHECK-NEXT: blr +} + +define fastcc double @f12(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f12 + +; CHECK-LABEL: @f12 +; CHECK: fmr 1, 12 +; CHECK-NEXT: blr +} + +define fastcc double @f13(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f13 + +; CHECK-LABEL: @f13 +; CHECK: fmr 1, 13 +; CHECK-NEXT: blr +} + +define fastcc double @f14(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f14 + +; CHECK-LABEL: @f14 +; CHECK: lfd 1, 120(1) +; CHECK-NEXT: blr +} + +define fastcc double @f15(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f15 + +; CHECK-LABEL: @f15 +; CHECK: lfd 1, 152(1) +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v1(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v1 + +; CHECK-LABEL: @v1 +; CHECK-NOT: vor 2, +; CHECK: blr +} + +define fastcc <4 x i32> @v2(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v2 + +; CHECK-LABEL: @v2 +; CHECK: vor 2, 3, 3 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v3(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v3 + +; CHECK-LABEL: @v3 +; CHECK: vor 2, 4, 4 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v4(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v4 + +; CHECK-LABEL: @v4 +; CHECK: vor 2, 5, 5 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v5(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v5 + +; CHECK-LABEL: @v5 +; CHECK: vor 2, 6, 6 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v6(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v6 + +; CHECK-LABEL: @v6 +; CHECK: vor 2, 7, 7 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v7(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v7 + +; CHECK-LABEL: @v7 +; CHECK: vor 2, 8, 8 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v8(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v8 + +; CHECK-LABEL: @v8 +; CHECK: vor 2, 9, 9 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v9(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v9 + +; CHECK-LABEL: @v9 +; CHECK: vor 2, 10, 10 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v10(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v10 + +; CHECK-LABEL: @v10 +; CHECK: vor 2, 11, 11 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v11(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v11 + +; CHECK-LABEL: @v11 +; CHECK: vor 2, 12, 12 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v12(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v12 + +; CHECK-LABEL: @v12 +; CHECK: vor 2, 13, 13 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v13(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v13 + +; CHECK-LABEL: @v13 +; CHECK: addi [[REG1:[0-9]+]], 1, 96 +; CHECK-NEXT: lvx 2, 0, [[REG1]] +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v14(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v14 + +; CHECK-LABEL: @v14 +; CHECK: addi [[REG1:[0-9]+]], 1, 128 +; CHECK-NEXT: lvx 2, 0, [[REG1]] +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v15(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v15 + +; CHECK-LABEL: @v15 +; CHECK: addi [[REG1:[0-9]+]], 1, 160 +; CHECK-NEXT: lvx 2, 0, [[REG1]] +; CHECK-NEXT: blr +} + +define void @cg1(i64 %v) #0 { + tail call fastcc i64 @g1(i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg1 +; CHECK-NOT: {{^[ \t]*}}mr 3, +; CHECK: blr +} + +define void @cg2(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg2 +; CHECK: mr 4, 3 +; CHECK: blr +} + +define void @cg3(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg3 +; CHECK: mr 5, 3 +; CHECK: blr +} + +define void @cg4(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg4 +; CHECK: mr 6, 3 +; CHECK: blr +} + +define void @cg5(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg5 +; CHECK: mr 7, 3 +; CHECK: blr +} + +define void @cg6(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg6 +; CHECK: mr 8, 3 +; CHECK: blr +} + +define void @cg7(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg7 +; CHECK: mr 9, 3 +; CHECK: blr +} + +define void @cg8(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg8 +; CHECK: mr 10, 3 +; CHECK: blr +} + +define void @cg9(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg9 +; CHECK: mr [[REG1:[0-9]+]], 3 +; CHECK: std [[REG1]], 48(1) +; CHECK: blr +} + +define void @cg10(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg10 +; CHECK: mr [[REG1:[0-9]+]], 3 +; CHECK: std [[REG1]], 56(1) +; CHECK: blr +} + +define void @cg11(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg11 +; CHECK: mr [[REG1:[0-9]+]], 3 +; CHECK: std [[REG1]], 64(1) +; CHECK: blr +} + +define void @cf1(double %v) #0 { + tail call fastcc i64 @g1(i64 0, double %v, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cf1 +; CHECK-NOT: fmr 1, +; CHECK: blr +} + +define void @cf2(double %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double %v, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cf2 +; CHECK: fmr 2, 1 +; CHECK: blr +} + +define void @cf3(double %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double %v, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cf3 +; CHECK: fmr 3, 1 +; CHECK: blr +} + +define void @cf4(double %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double %v, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cf4 +; CHECK: fmr 4, 1 +; CHECK: blr +} + +define void @cf5(double %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double %v, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cf5 +; CHECK: fmr 5, 1 +; CHECK: blr +} + +define void @cf14(double %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double %v, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cf14 +; CHECK: stfd 1, 120(1) +; CHECK: blr +} + +define void @cf15(double %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double %v, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cf15 +; CHECK: stfd 1, 152(1) +; CHECK: blr +} + +define void @cv2(<4 x i32> %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> %v, i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cv2 +; CHECK: vor 3, 2, 2 +; CHECK: blr +} + +define void @cv3(<4 x i32> %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> %v, i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cv3 +; CHECK: vor 4, 2, 2 +; CHECK: blr +} + +define void @cv13(<4 x i32> %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> %v, i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cv13 +; CHECK: li [[REG1:[0-9]+]], 96 +; CHECK: stvx 2, 1, [[REG1]] +; CHECK: blr +} + +define void @cv14(<4 x i32> %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> %v, i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cv14 +; CHECK: li [[REG1:[0-9]+]], 128 +; CHECK: stvx 2, 1, [[REG1]] +; CHECK: blr +} + +attributes #0 = { nounwind } +