diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 13bd0c7be2c..7af601e8f73 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -1275,7 +1275,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl &Args, // Prepare to assign register arguments. Every argument uses up a // GPR protocol register even if it's passed in a floating-point - // register. + // register (unless we're using the fast calling convention). unsigned NextGPR = PPC::X3; unsigned NextFPR = PPC::F1; @@ -1325,7 +1325,8 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl &Args, unsigned ArgReg; if (ArgVT == MVT::f32 || ArgVT == MVT::f64) { ArgReg = NextFPR++; - ++NextGPR; + if (CC != CallingConv::Fast) + ++NextGPR; } else ArgReg = NextGPR++; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 20e5115445e..58eddd7e910 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2623,6 +2623,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( MachineFrameInfo *MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo(); + assert(!(CallConv == CallingConv::Fast && isVarArg) && + "fastcc not supported on varargs functions"); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && @@ -2674,7 +2677,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( // although the first ones are often in registers. unsigned ArgOffset = LinkageSize; - unsigned GPR_idx, FPR_idx = 0, VR_idx = 0; + unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; SmallVector MemOps; Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; @@ -2689,19 +2692,31 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); CurArgIdx = Ins[ArgNo].OrigArgIndex; - /* Respect alignment of argument on the stack. */ - unsigned Align = - CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); - ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; - unsigned CurArgOffset = ArgOffset; + // We re-align the argument offset for each argument, except when using the + // fast calling convention, when we need to make sure we do that only when + // we'll actually use a stack slot. + unsigned CurArgOffset, Align; + auto ComputeArgOffset = [&]() { + /* Respect alignment of argument on the stack. */ + Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); + ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; + CurArgOffset = ArgOffset; + }; - /* Compute GPR index associated with argument offset. */ - GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; - GPR_idx = std::min(GPR_idx, Num_GPR_Regs); + if (CallConv != CallingConv::Fast) { + ComputeArgOffset(); + + /* Compute GPR index associated with argument offset. */ + GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; + GPR_idx = std::min(GPR_idx, Num_GPR_Regs); + } // FIXME the codegen can be much improved in some cases. // We do not have to keep everything in memory. if (Flags.isByVal()) { + if (CallConv == CallingConv::Fast) + ComputeArgOffset(); + // ObjSize is the true size, ArgSize rounded up to multiple of registers. ObjSize = Flags.getByValSize(); ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; @@ -2745,7 +2760,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( InVals.push_back(Arg); if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store; @@ -2807,7 +2822,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( // passed directly. Clang may use those instead of "byval" aggregate // types to avoid forcing arguments to memory unnecessarily. if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) @@ -2815,10 +2830,14 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( // value to MVT::i64 and then truncate to the correct register size. ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); } else { + if (CallConv == CallingConv::Fast) + ComputeArgOffset(); + needsLoad = true; ArgSize = PtrByteSize; } - ArgOffset += 8; + if (CallConv != CallingConv::Fast || needsLoad) + ArgOffset += 8; break; case MVT::f32: @@ -2838,11 +2857,11 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++FPR_idx; - } else if (GPR_idx != Num_GPR_Regs) { + } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) { // This can only ever happen in the presence of f32 array types, // since otherwise we never run out of FPRs before running out // of GPRs. - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::f32) { @@ -2854,16 +2873,21 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal); } else { + if (CallConv == CallingConv::Fast) + ComputeArgOffset(); + needsLoad = true; } // When passing an array of floats, the array occupies consecutive // space in the argument area; only round up to the next doubleword // at the end of the array. Otherwise, each float takes 8 bytes. - ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize; - ArgOffset += ArgSize; - if (Flags.isInConsecutiveRegsLast()) - ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + if (CallConv != CallingConv::Fast || needsLoad) { + ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize; + ArgOffset += ArgSize; + if (Flags.isInConsecutiveRegsLast()) + ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + } break; case MVT::v4f32: case MVT::v4i32: @@ -2881,9 +2905,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++VR_idx; } else { + if (CallConv == CallingConv::Fast) + ComputeArgOffset(); + needsLoad = true; } - ArgOffset += 16; + if (CallConv != CallingConv::Fast || needsLoad) + ArgOffset += 16; break; } @@ -4270,6 +4298,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallConv == CallingConv::Fast) MF.getInfo()->setHasFastCall(); + assert(!(CallConv == CallingConv::Fast && isVarArg) && + "fastcc not supported on varargs functions"); + // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage @@ -4277,6 +4308,30 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false, isELFv2ABI); unsigned NumBytes = LinkageSize; + unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + + static const MCPhysReg GPR[] = { + PPC::X3, PPC::X4, PPC::X5, PPC::X6, + PPC::X7, PPC::X8, PPC::X9, PPC::X10, + }; + static const MCPhysReg *FPR = GetFPR(); + + static const MCPhysReg VR[] = { + PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, + PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 + }; + static const MCPhysReg VSRH[] = { + PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, + PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 + }; + + const unsigned NumGPRs = array_lengthof(GPR); + const unsigned NumFPRs = 13; + const unsigned NumVRs = array_lengthof(VR); + + // When using the fast calling convention, we don't provide backing for + // arguments that will be in registers. + unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0; // Add up all the space actually used. for (unsigned i = 0; i != NumOps; ++i) { @@ -4284,6 +4339,35 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, EVT ArgVT = Outs[i].VT; EVT OrigVT = Outs[i].ArgVT; + if (CallConv == CallingConv::Fast) { + if (Flags.isByVal()) + NumGPRsUsed += (Flags.getByValSize()+7)/8; + else + switch (ArgVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected ValueType for argument!"); + case MVT::i1: + case MVT::i32: + case MVT::i64: + if (++NumGPRsUsed <= NumGPRs) + continue; + break; + case MVT::f32: + case MVT::f64: + if (++NumFPRsUsed <= NumFPRs) + continue; + break; + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + case MVT::v2f64: + case MVT::v2i64: + if (++NumVRsUsed <= NumVRs) + continue; + break; + } + } + /* Respect alignment of argument on the stack. */ unsigned Align = CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); @@ -4340,26 +4424,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // must be stored to our stack, and loaded into integer regs as well, if // any integer regs are available for argument passing. unsigned ArgOffset = LinkageSize; - unsigned GPR_idx, FPR_idx = 0, VR_idx = 0; - - static const MCPhysReg GPR[] = { - PPC::X3, PPC::X4, PPC::X5, PPC::X6, - PPC::X7, PPC::X8, PPC::X9, PPC::X10, - }; - static const MCPhysReg *FPR = GetFPR(); - - static const MCPhysReg VR[] = { - PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, - PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 - }; - static const MCPhysReg VSRH[] = { - PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, - PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 - }; - - const unsigned NumGPRs = array_lengthof(GPR); - const unsigned NumFPRs = 13; - const unsigned NumVRs = array_lengthof(VR); SmallVector, 8> RegsToPass; SmallVector TailCallArguments; @@ -4371,22 +4435,31 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, EVT ArgVT = Outs[i].VT; EVT OrigVT = Outs[i].ArgVT; - /* Respect alignment of argument on the stack. */ - unsigned Align = - CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); - ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; - - /* Compute GPR index associated with argument offset. */ - GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; - GPR_idx = std::min(GPR_idx, NumGPRs); - // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. SDValue PtrOff; - PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); + // We re-align the argument offset for each argument, except when using the + // fast calling convention, when we need to make sure we do that only when + // we'll actually use a stack slot. + auto ComputePtrOff = [&]() { + /* Respect alignment of argument on the stack. */ + unsigned Align = + CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); + ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; - PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); + PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); + + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); + }; + + if (CallConv != CallingConv::Fast) { + ComputePtrOff(); + + /* Compute GPR index associated with argument offset. */ + GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; + GPR_idx = std::min(GPR_idx, NumGPRs); + } // Promote integers to 64-bit values. if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { @@ -4411,6 +4484,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, if (Size == 0) continue; + if (CallConv == CallingConv::Fast) + ComputePtrOff(); + // All aggregates smaller than 8 bytes must be passed right-justified. if (Size==1 || Size==2 || Size==4) { EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); @@ -4419,7 +4495,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, MachinePointerInfo(), VT, false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); - RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); ArgOffset += PtrByteSize; continue; @@ -4481,7 +4557,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, MachinePointerInfo(), false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); - RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); // Done with this argument. ArgOffset += PtrByteSize; @@ -4517,13 +4593,19 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // passed directly. Clang may use those instead of "byval" aggregate // types to avoid forcing arguments to memory unnecessarily. if (GPR_idx != NumGPRs) { - RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); } else { + if (CallConv == CallingConv::Fast) + ComputePtrOff(); + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, false, MemOpChains, TailCallArguments, dl); + if (CallConv == CallingConv::Fast) + ArgOffset += PtrByteSize; } - ArgOffset += PtrByteSize; + if (CallConv != CallingConv::Fast) + ArgOffset += PtrByteSize; break; case MVT::f32: case MVT::f64: { @@ -4537,6 +4619,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // then the parameter save area. For now, put all arguments to vararg // routines always in both locations (FPR *and* GPR or stack slot). bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs; + bool NeededLoad = false; // First load the argument into the next available FPR. if (FPR_idx != NumFPRs) @@ -4545,7 +4628,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // Next, load the argument into GPR or stack slot if needed. if (!NeedGPROrStack) ; - else if (GPR_idx != NumGPRs) { + else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) { // In the non-vararg case, this can only ever happen in the // presence of f32 array types, since otherwise we never run // out of FPRs before running out of GPRs. @@ -4584,8 +4667,11 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, ArgVal = SDValue(); if (ArgVal.getNode()) - RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal)); } else { + if (CallConv == CallingConv::Fast) + ComputePtrOff(); + // Single-precision floating-point values are mapped to the // second (rightmost) word of the stack doubleword. if (Arg.getValueType() == MVT::f32 && @@ -4597,14 +4683,18 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, false, MemOpChains, TailCallArguments, dl); + + NeededLoad = true; } // When passing an array of floats, the array occupies consecutive // space in the argument area; only round up to the next doubleword // at the end of the array. Otherwise, each float takes 8 bytes. - ArgOffset += (Arg.getValueType() == MVT::f32 && - Flags.isInConsecutiveRegs()) ? 4 : 8; - if (Flags.isInConsecutiveRegsLast()) - ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + if (CallConv != CallingConv::Fast || NeededLoad) { + ArgOffset += (Arg.getValueType() == MVT::f32 && + Flags.isInConsecutiveRegs()) ? 4 : 8; + if (Flags.isInConsecutiveRegsLast()) + ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + } break; } case MVT::v4f32: @@ -4663,11 +4753,18 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, RegsToPass.push_back(std::make_pair(VReg, Arg)); } else { + if (CallConv == CallingConv::Fast) + ComputePtrOff(); + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, true, MemOpChains, TailCallArguments, dl); + if (CallConv == CallingConv::Fast) + ArgOffset += 16; } - ArgOffset += 16; + + if (CallConv != CallingConv::Fast) + ArgOffset += 16; break; } } diff --git a/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll b/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll new file mode 100644 index 00000000000..941513fb927 --- /dev/null +++ b/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll @@ -0,0 +1,56 @@ +; RUN: llc -mcpu=pwr7 -mattr=-vsx -fast-isel -fast-isel-abort < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define fastcc i64 @g1(i64 %g1, double %f1, i64 %g2, double %f2, i64 %g3, double %f3, i64 %g4, double %f4) #0 { + ret i64 %g1 + +; CHECK-LABEL: @g1 +; CHECK-NOT: mr 3, +; CHECK: blr +} + +define fastcc i64 @g2(i64 %g1, double %f1, i64 %g2, double %f2, i64 %g3, double %f3, i64 %g4, double %f4) #0 { + ret i64 %g2 + +; CHECK-LABEL: @g2 +; CHECK: mr 3, 4 +; CHECK-NEXT: blr +} + +define fastcc i64 @g3(i64 %g1, double %f1, i64 %g2, double %f2, i64 %g3, double %f3, i64 %g4, double %f4) #0 { + ret i64 %g3 + +; CHECK-LABEL: @g3 +; CHECK: mr 3, 5 +; CHECK-NEXT: blr +} + +define fastcc double @f2(i64 %g1, double %f1, i64 %g2, double %f2, i64 %g3, double %f3, i64 %g4, double %f4) #0 { + ret double %f2 + +; CHECK-LABEL: @f2 +; CHECK: fmr 1, 2 +; CHECK-NEXT: blr +} + +define void @cg2(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, i64 %v, double 0.0, i64 0, double 0.0, i64 0, double 0.0) + ret void + +; CHECK-LABEL: @cg2 +; CHECK: mr 4, 3 +; CHECK: blr +} + +define void @cf2(double %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, i64 0, double %v, i64 0, double 0.0, i64 0, double 0.0) + ret void + +; CHECK-LABEL: @cf2 +; CHECK: mr 2, 1 +; CHECK: blr +} + +attributes #0 = { nounwind } + diff --git a/test/CodeGen/PowerPC/ppc64-fastcc.ll b/test/CodeGen/PowerPC/ppc64-fastcc.ll new file mode 100644 index 00000000000..bb1365a3b67 --- /dev/null +++ b/test/CodeGen/PowerPC/ppc64-fastcc.ll @@ -0,0 +1,540 @@ +; RUN: llc -mcpu=pwr7 -mattr=-vsx < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define fastcc i64 @g1(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g1 + +; CHECK-LABEL: @g1 +; CHECK-NOT: mr 3, +; CHECK: blr +} + +define fastcc i64 @g2(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g2 + +; CHECK-LABEL: @g2 +; CHECK: mr 3, 4 +; CHECK-NEXT: blr +} + +define fastcc i64 @g3(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g3 + +; CHECK-LABEL: @g3 +; CHECK: mr 3, 5 +; CHECK-NEXT: blr +} + +define fastcc i64 @g4(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g4 + +; CHECK-LABEL: @g4 +; CHECK: mr 3, 6 +; CHECK-NEXT: blr +} + +define fastcc i64 @g5(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g5 + +; CHECK-LABEL: @g5 +; CHECK: mr 3, 7 +; CHECK-NEXT: blr +} + +define fastcc i64 @g6(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g6 + +; CHECK-LABEL: @g6 +; CHECK: mr 3, 8 +; CHECK-NEXT: blr +} + +define fastcc i64 @g7(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g7 + +; CHECK-LABEL: @g7 +; CHECK: mr 3, 9 +; CHECK-NEXT: blr +} + +define fastcc i64 @g8(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g8 + +; CHECK-LABEL: @g8 +; CHECK: mr 3, 10 +; CHECK-NEXT: blr +} + +define fastcc i64 @g9(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g9 + +; CHECK-LABEL: @g9 +; CHECK: ld 3, 48(1) +; CHECK-NEXT: blr +} + +define fastcc i64 @g10(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g10 + +; CHECK-LABEL: @g10 +; CHECK: ld 3, 56(1) +; CHECK-NEXT: blr +} + +define fastcc i64 @g11(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret i64 %g11 + +; CHECK-LABEL: @g11 +; CHECK: ld 3, 64(1) +; CHECK-NEXT: blr +} + +define fastcc double @f1(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f1 + +; CHECK-LABEL: @f1 +; CHECK-NOT: fmr 1, +; CHECK: blr +} + +define fastcc double @f2(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f2 + +; CHECK-LABEL: @f2 +; CHECK: fmr 1, 2 +; CHECK-NEXT: blr +} + +define fastcc double @f3(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f3 + +; CHECK-LABEL: @f3 +; CHECK: fmr 1, 3 +; CHECK-NEXT: blr +} + +define fastcc double @f4(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f4 + +; CHECK-LABEL: @f4 +; CHECK: fmr 1, 4 +; CHECK-NEXT: blr +} + +define fastcc double @f5(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f5 + +; CHECK-LABEL: @f5 +; CHECK: fmr 1, 5 +; CHECK-NEXT: blr +} + +define fastcc double @f6(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f6 + +; CHECK-LABEL: @f6 +; CHECK: fmr 1, 6 +; CHECK-NEXT: blr +} + +define fastcc double @f7(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f7 + +; CHECK-LABEL: @f7 +; CHECK: fmr 1, 7 +; CHECK-NEXT: blr +} + +define fastcc double @f8(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f8 + +; CHECK-LABEL: @f8 +; CHECK: fmr 1, 8 +; CHECK-NEXT: blr +} + +define fastcc double @f9(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f9 + +; CHECK-LABEL: @f9 +; CHECK: fmr 1, 9 +; CHECK-NEXT: blr +} + +define fastcc double @f10(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f10 + +; CHECK-LABEL: @f10 +; CHECK: fmr 1, 10 +; CHECK-NEXT: blr +} + +define fastcc double @f11(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f11 + +; CHECK-LABEL: @f11 +; CHECK: fmr 1, 11 +; CHECK-NEXT: blr +} + +define fastcc double @f12(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f12 + +; CHECK-LABEL: @f12 +; CHECK: fmr 1, 12 +; CHECK-NEXT: blr +} + +define fastcc double @f13(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f13 + +; CHECK-LABEL: @f13 +; CHECK: fmr 1, 13 +; CHECK-NEXT: blr +} + +define fastcc double @f14(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f14 + +; CHECK-LABEL: @f14 +; CHECK: lfd 1, 120(1) +; CHECK-NEXT: blr +} + +define fastcc double @f15(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret double %f15 + +; CHECK-LABEL: @f15 +; CHECK: lfd 1, 152(1) +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v1(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v1 + +; CHECK-LABEL: @v1 +; CHECK-NOT: vor 2, +; CHECK: blr +} + +define fastcc <4 x i32> @v2(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v2 + +; CHECK-LABEL: @v2 +; CHECK: vor 2, 3, 3 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v3(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v3 + +; CHECK-LABEL: @v3 +; CHECK: vor 2, 4, 4 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v4(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v4 + +; CHECK-LABEL: @v4 +; CHECK: vor 2, 5, 5 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v5(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v5 + +; CHECK-LABEL: @v5 +; CHECK: vor 2, 6, 6 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v6(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v6 + +; CHECK-LABEL: @v6 +; CHECK: vor 2, 7, 7 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v7(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v7 + +; CHECK-LABEL: @v7 +; CHECK: vor 2, 8, 8 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v8(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v8 + +; CHECK-LABEL: @v8 +; CHECK: vor 2, 9, 9 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v9(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v9 + +; CHECK-LABEL: @v9 +; CHECK: vor 2, 10, 10 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v10(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v10 + +; CHECK-LABEL: @v10 +; CHECK: vor 2, 11, 11 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v11(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v11 + +; CHECK-LABEL: @v11 +; CHECK: vor 2, 12, 12 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v12(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v12 + +; CHECK-LABEL: @v12 +; CHECK: vor 2, 13, 13 +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v13(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v13 + +; CHECK-LABEL: @v13 +; CHECK: addi [[REG1:[0-9]+]], 1, 96 +; CHECK-NEXT: lvx 2, 0, [[REG1]] +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v14(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v14 + +; CHECK-LABEL: @v14 +; CHECK: addi [[REG1:[0-9]+]], 1, 128 +; CHECK-NEXT: lvx 2, 0, [[REG1]] +; CHECK-NEXT: blr +} + +define fastcc <4 x i32> @v15(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 { + ret <4 x i32> %v15 + +; CHECK-LABEL: @v15 +; CHECK: addi [[REG1:[0-9]+]], 1, 160 +; CHECK-NEXT: lvx 2, 0, [[REG1]] +; CHECK-NEXT: blr +} + +define void @cg1(i64 %v) #0 { + tail call fastcc i64 @g1(i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg1 +; CHECK-NOT: {{^[ \t]*}}mr 3, +; CHECK: blr +} + +define void @cg2(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg2 +; CHECK: mr 4, 3 +; CHECK: blr +} + +define void @cg3(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg3 +; CHECK: mr 5, 3 +; CHECK: blr +} + +define void @cg4(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg4 +; CHECK: mr 6, 3 +; CHECK: blr +} + +define void @cg5(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg5 +; CHECK: mr 7, 3 +; CHECK: blr +} + +define void @cg6(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg6 +; CHECK: mr 8, 3 +; CHECK: blr +} + +define void @cg7(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg7 +; CHECK: mr 9, 3 +; CHECK: blr +} + +define void @cg8(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg8 +; CHECK: mr 10, 3 +; CHECK: blr +} + +define void @cg9(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg9 +; CHECK: mr [[REG1:[0-9]+]], 3 +; CHECK: std [[REG1]], 48(1) +; CHECK: blr +} + +define void @cg10(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg10 +; CHECK: mr [[REG1:[0-9]+]], 3 +; CHECK: std [[REG1]], 56(1) +; CHECK: blr +} + +define void @cg11(i64 %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 %v, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cg11 +; CHECK: mr [[REG1:[0-9]+]], 3 +; CHECK: std [[REG1]], 64(1) +; CHECK: blr +} + +define void @cf1(double %v) #0 { + tail call fastcc i64 @g1(i64 0, double %v, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cf1 +; CHECK-NOT: fmr 1, +; CHECK: blr +} + +define void @cf2(double %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double %v, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cf2 +; CHECK: fmr 2, 1 +; CHECK: blr +} + +define void @cf3(double %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double %v, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cf3 +; CHECK: fmr 3, 1 +; CHECK: blr +} + +define void @cf4(double %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double %v, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cf4 +; CHECK: fmr 4, 1 +; CHECK: blr +} + +define void @cf5(double %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double %v, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cf5 +; CHECK: fmr 5, 1 +; CHECK: blr +} + +define void @cf14(double %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double %v, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cf14 +; CHECK: stfd 1, 120(1) +; CHECK: blr +} + +define void @cf15(double %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double %v, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cf15 +; CHECK: stfd 1, 152(1) +; CHECK: blr +} + +define void @cv2(<4 x i32> %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> %v, i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cv2 +; CHECK: vor 3, 2, 2 +; CHECK: blr +} + +define void @cv3(<4 x i32> %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> %v, i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cv3 +; CHECK: vor 4, 2, 2 +; CHECK: blr +} + +define void @cv13(<4 x i32> %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> %v, i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cv13 +; CHECK: li [[REG1:[0-9]+]], 96 +; CHECK: stvx 2, 1, [[REG1]] +; CHECK: blr +} + +define void @cv14(<4 x i32> %v) #0 { + tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> %v, i64 0, double 0.0, <4 x i32> , i64 0, double 0.0, <4 x i32> ) + ret void + +; CHECK-LABEL: @cv14 +; CHECK: li [[REG1:[0-9]+]], 128 +; CHECK: stvx 2, 1, [[REG1]] +; CHECK: blr +} + +attributes #0 = { nounwind } +