diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index c9b490b617d..b44b6c3ae14 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -215,10 +215,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::VASTART , MVT::Other, Custom); // VAARG is custom lowered with the 32-bit SVR4 ABI. - if ( TM.getSubtarget().isSVR4ABI() - && !TM.getSubtarget().isPPC64()) + if (TM.getSubtarget().isSVR4ABI() + && !TM.getSubtarget().isPPC64()) { setOperationAction(ISD::VAARG, MVT::Other, Custom); - else + setOperationAction(ISD::VAARG, MVT::i64, Custom); + } else setOperationAction(ISD::VAARG, MVT::Other, Expand); // Use the default implementation. @@ -1262,9 +1263,110 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const { + SDNode *Node = Op.getNode(); + EVT VT = Node->getValueType(0); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SDValue InChain = Node->getOperand(0); + SDValue VAListPtr = Node->getOperand(1); + const Value *SV = cast(Node->getOperand(2))->getValue(); + DebugLoc dl = Node->getDebugLoc(); - llvm_unreachable("VAARG not yet implemented for the SVR4 ABI!"); - return SDValue(); // Not reached + assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); + + // gpr_index + SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, + VAListPtr, MachinePointerInfo(SV), MVT::i8, + false, false, 0); + InChain = GprIndex.getValue(1); + + if (VT == MVT::i64) { + // Check if GprIndex is even + SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, + DAG.getConstant(1, MVT::i32)); + SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, + DAG.getConstant(0, MVT::i32), ISD::SETNE); + SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, + DAG.getConstant(1, MVT::i32)); + // Align GprIndex to be even if it isn't + GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, + GprIndex); + } + + // fpr index is 1 byte after gpr + SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, + DAG.getConstant(1, MVT::i32)); + + // fpr + SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, + FprPtr, MachinePointerInfo(SV), MVT::i8, + false, false, 0); + InChain = FprIndex.getValue(1); + + SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, + DAG.getConstant(8, MVT::i32)); + + SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, + DAG.getConstant(4, MVT::i32)); + + // areas + SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, + MachinePointerInfo(), false, false, 0); + InChain = OverflowArea.getValue(1); + + SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, + MachinePointerInfo(), false, false, 0); + InChain = RegSaveArea.getValue(1); + + // select overflow_area if index > 8 + SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, + DAG.getConstant(8, MVT::i32), ISD::SETLT); + + SDValue Area = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, RegSaveArea, + OverflowArea); + + // adjustment constant gpr_index * 4/8 + SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, + VT.isInteger() ? GprIndex : FprIndex, + DAG.getConstant(VT.isInteger() ? 4 : 8, + MVT::i32)); + + // OurReg = RegSaveArea + RegConstant + SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, + RegConstant); + + // Floating types are 32 bytes into RegSaveArea + if (VT.isFloatingPoint()) + OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, + DAG.getConstant(32, MVT::i32)); + + // increase {f,g}pr_index by 1 (or 2 if VT is i64) + SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, + VT.isInteger() ? GprIndex : FprIndex, + DAG.getConstant(VT == MVT::i64 ? 2 : 1, + MVT::i32)); + + InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, + VT.isInteger() ? VAListPtr : FprPtr, + MachinePointerInfo(SV), + MVT::i8, false, false, 0); + + // determine if we should load from reg_save_area or overflow_area + SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); + + // increase overflow_area by 4/8 if gpr/fpr > 8 + SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, + DAG.getConstant(VT.isInteger() ? 4 : 8, + MVT::i32)); + + OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, + OverflowAreaPlusN); + + InChain = DAG.getTruncStore(InChain, dl, OverflowArea, + OverflowAreaPtr, + MachinePointerInfo(), + MVT::i32, false, false, 0); + + return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0); } SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, @@ -4429,11 +4531,27 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { void PPCTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, SelectionDAG &DAG) const { + const TargetMachine &TM = getTargetMachine(); DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { default: assert(false && "Do not know how to custom type legalize this operation!"); return; + case ISD::VAARG: { + if (!TM.getSubtarget().isSVR4ABI() + || TM.getSubtarget().isPPC64()) + return; + + EVT VT = N->getValueType(0); + + if (VT == MVT::i64) { + SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget); + + Results.push_back(NewNode); + Results.push_back(NewNode.getValue(1)); + } + return; + } case ISD::FP_ROUND_INREG: { assert(N->getValueType(0) == MVT::ppcf128); assert(N->getOperand(0).getValueType() == MVT::ppcf128); diff --git a/test/CodeGen/PowerPC/ppc32-vaarg.ll b/test/CodeGen/PowerPC/ppc32-vaarg.ll new file mode 100644 index 00000000000..604299183f9 --- /dev/null +++ b/test/CodeGen/PowerPC/ppc32-vaarg.ll @@ -0,0 +1,167 @@ +; RUN: llc -O0 < %s | FileCheck %s +;ModuleID = 'test.c' +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32" +target triple = "powerpc-unknown-freebsd9.0" + +%struct.__va_list_tag = type { i8, i8, i16, i8*, i8* } + +@var1 = common global i64 0, align 8 +@var2 = common global double 0.0, align 8 +@var3 = common global i32 0, align 4 + +define void @ppcvaargtest(%struct.__va_list_tag* %ap) nounwind { + entry: + %x = va_arg %struct.__va_list_tag* %ap, i64; Get from r5,r6 +; CHECK: lbz 4, 0(3) +; CHECK-NEXT: lwz 5, 4(3) +; CHECK-NEXT: rlwinm 6, 4, 0, 31, 31 +; CHECK-NEXT: cmplwi 0, 6, 0 +; CHECK-NEXT: addi 6, 4, 1 +; CHECK-NEXT: stw 3, -4(1) +; CHECK-NEXT: stw 6, -8(1) +; CHECK-NEXT: stw 4, -12(1) +; CHECK-NEXT: stw 5, -16(1) +; CHECK-NEXT: bne 0, .LBB0_2 +; CHECK-NEXT: # BB#1: # %entry +; CHECK-NEXT: lwz 3, -12(1) +; CHECK-NEXT: stw 3, -8(1) +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: lwz 3, -8(1) +; CHECK-NEXT: lwz 4, -4(1) +; CHECK-NEXT: lwz 5, 8(4) +; CHECK-NEXT: slwi 6, 3, 2 +; CHECK-NEXT: addi 7, 3, 2 +; CHECK-NEXT: cmpwi 0, 3, 8 +; CHECK-NEXT: lwz 3, -16(1) +; CHECK-NEXT: addi 8, 3, 4 +; CHECK-NEXT: add 5, 5, 6 +; CHECK-NEXT: mfcr 0 # cr0 +; CHECK-NEXT: stw 0, -20(1) +; CHECK-NEXT: stw 5, -24(1) +; CHECK-NEXT: stw 3, -28(1) +; CHECK-NEXT: stw 7, -32(1) +; CHECK-NEXT: stw 8, -36(1) +; CHECK-NEXT: blt 0, .LBB0_4 +; CHECK-NEXT: # BB#3: # %entry +; CHECK-NEXT: lwz 3, -36(1) +; CHECK-NEXT: stw 3, -28(1) +; CHECK-NEXT: .LBB0_4: # %entry +; CHECK-NEXT: lwz 3, -28(1) +; CHECK-NEXT: lwz 4, -32(1) +; CHECK-NEXT: lwz 5, -4(1) +; CHECK-NEXT: stb 4, 0(5) +; CHECK-NEXT: lwz 4, -24(1) +; CHECK-NEXT: lwz 0, -20(1) +; CHECK-NEXT: mtcrf 128, 0 +; CHECK-NEXT: stw 3, -40(1) +; CHECK-NEXT: stw 4, -44(1) +; CHECK-NEXT: blt 0, .LBB0_6 +; CHECK-NEXT: # BB#5: # %entry +; CHECK-NEXT: lwz 3, -16(1) +; CHECK-NEXT: stw 3, -44(1) +; CHECK-NEXT: .LBB0_6: # %entry +; CHECK-NEXT: lwz 3, -44(1) +; CHECK-NEXT: lwz 4, -40(1) +; CHECK-NEXT: lwz 5, -4(1) +; CHECK-NEXT: stw 4, 4(5) + store i64 %x, i64* @var1, align 8 +; CHECK-NEXT: lis 4, var1@ha +; CHECK-NEXT: lwz 6, 4(3) +; CHECK-NEXT: lwz 3, 0(3) +; CHECK-NEXT: la 7, var1@l(4) +; CHECK-NEXT: stw 3, var1@l(4) +; CHECK-NEXT: stw 6, 4(7) + %y = va_arg %struct.__va_list_tag* %ap, double; From f1 +; CHECK-NEXT: lbz 3, 1(5) +; CHECK-NEXT: lwz 4, 4(5) +; CHECK-NEXT: lwz 6, 8(5) +; CHECK-NEXT: slwi 7, 3, 3 +; CHECK-NEXT: add 6, 6, 7 +; CHECK-NEXT: addi 7, 3, 1 +; CHECK-NEXT: cmpwi 0, 3, 8 +; CHECK-NEXT: addi 3, 4, 8 +; CHECK-NEXT: addi 6, 6, 32 +; CHECK-NEXT: mr 8, 4 +; CHECK-NEXT: mfcr 0 # cr0 +; CHECK-NEXT: stw 0, -48(1) +; CHECK-NEXT: stw 4, -52(1) +; CHECK-NEXT: stw 6, -56(1) +; CHECK-NEXT: stw 7, -60(1) +; CHECK-NEXT: stw 3, -64(1) +; CHECK-NEXT: stw 8, -68(1) +; CHECK-NEXT: blt 0, .LBB0_8 +; CHECK-NEXT: # BB#7: # %entry +; CHECK-NEXT: lwz 3, -64(1) +; CHECK-NEXT: stw 3, -68(1) +; CHECK-NEXT: .LBB0_8: # %entry +; CHECK-NEXT: lwz 3, -68(1) +; CHECK-NEXT: lwz 4, -60(1) +; CHECK-NEXT: lwz 5, -4(1) +; CHECK-NEXT: stb 4, 1(5) +; CHECK-NEXT: lwz 4, -56(1) +; CHECK-NEXT: lwz 0, -48(1) +; CHECK-NEXT: mtcrf 128, 0 +; CHECK-NEXT: stw 4, -72(1) +; CHECK-NEXT: stw 3, -76(1) +; CHECK-NEXT: blt 0, .LBB0_10 +; CHECK-NEXT: # BB#9: # %entry +; CHECK-NEXT: lwz 3, -52(1) +; CHECK-NEXT: stw 3, -72(1) +; CHECK-NEXT: .LBB0_10: # %entry +; CHECK-NEXT: lwz 3, -72(1) +; CHECK-NEXT: lwz 4, -76(1) +; CHECK-NEXT: lwz 5, -4(1) +; CHECK-NEXT: stw 4, 4(5) +; CHECK-NEXT: lfd 0, 0(3) + store double %y, double* @var2, align 8 +; CHECK-NEXT: lis 3, var2@ha +; CHECK-NEXT: stfd 0, var2@l(3) + %z = va_arg %struct.__va_list_tag* %ap, i32; From r7 +; CHECK-NEXT: lbz 3, 0(5) +; CHECK-NEXT: lwz 4, 4(5) +; CHECK-NEXT: lwz 6, 8(5) +; CHECK-NEXT: slwi 7, 3, 2 +; CHECK-NEXT: addi 8, 3, 1 +; CHECK-NEXT: cmpwi 0, 3, 8 +; CHECK-NEXT: addi 3, 4, 4 +; CHECK-NEXT: add 6, 6, 7 +; CHECK-NEXT: mr 7, 4 +; CHECK-NEXT: stw 6, -80(1) +; CHECK-NEXT: stw 8, -84(1) +; CHECK-NEXT: stw 3, -88(1) +; CHECK-NEXT: stw 4, -92(1) +; CHECK-NEXT: stw 7, -96(1) +; CHECK-NEXT: mfcr 0 # cr0 +; CHECK-NEXT: stw 0, -100(1) +; CHECK-NEXT: blt 0, .LBB0_12 +; CHECK-NEXT: # BB#11: # %entry +; CHECK-NEXT: lwz 3, -88(1) +; CHECK-NEXT: stw 3, -96(1) +; CHECK-NEXT: .LBB0_12: # %entry +; CHECK-NEXT: lwz 3, -96(1) +; CHECK-NEXT: lwz 4, -84(1) +; CHECK-NEXT: lwz 5, -4(1) +; CHECK-NEXT: stb 4, 0(5) +; CHECK-NEXT: lwz 4, -80(1) +; CHECK-NEXT: lwz 0, -100(1) +; CHECK-NEXT: mtcrf 128, 0 +; CHECK-NEXT: stw 4, -104(1) +; CHECK-NEXT: stw 3, -108(1) +; CHECK-NEXT: blt 0, .LBB0_14 +; CHECK-NEXT: # BB#13: # %entry +; CHECK-NEXT: lwz 3, -92(1) +; CHECK-NEXT: stw 3, -104(1) +; CHECK-NEXT: .LBB0_14: # %entry +; CHECK-NEXT: lwz 3, -104(1) +; CHECK-NEXT: lwz 4, -108(1) +; CHECK-NEXT: lwz 5, -4(1) +; CHECK-NEXT: stw 4, 4(5) +; CHECK-NEXT: lwz 3, 0(3) + store i32 %z, i32* @var3, align 4 +; CHECK-NEXT: lis 4, var3@ha +; CHECK-NEXT: stw 3, var3@l(4) + ret void +; CHECK-NEXT: stw 5, -112(1) +; CHECK-NEXT: blr +} +