diff --git a/lib/Target/IA64/IA64ISelPattern.cpp b/lib/Target/IA64/IA64ISelPattern.cpp index 1c53950bfe8..1b1c81fdcc9 100644 --- a/lib/Target/IA64/IA64ISelPattern.cpp +++ b/lib/Target/IA64/IA64ISelPattern.cpp @@ -118,6 +118,12 @@ namespace { BuildMI(BB, IA64::MOV, 1, IA64::rp).addReg(RP); } + void restoreSP_RP(MachineBasicBlock* BB) + { + BuildMI(BB, IA64::MOV, 1, IA64::r12).addReg(SP); + BuildMI(BB, IA64::MOV, 1, IA64::rp).addReg(RP); + } + void restoreRP(MachineBasicBlock* BB) { BuildMI(BB, IA64::MOV, 1, IA64::rp).addReg(RP); @@ -159,9 +165,11 @@ IA64TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { unsigned argPreg[8]; unsigned argOpc[8]; - unsigned used_FPArgs=0; // how many FP args have been used so far? - + unsigned used_FPArgs = 0; // how many FP args have been used so far? + + unsigned ArgOffset = 0; int count = 0; + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { SDOperand newroot, argt; @@ -206,8 +214,9 @@ IA64TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { } } else { // more than 8 args go into the frame // Create the frame index object for this incoming parameter... - int FI = MFI->CreateFixedObject(8, 16 + 8 * (count - 8)); - + ArgOffset = 16 + 8 * (count - 8); + int FI = MFI->CreateFixedObject(8, ArgOffset); + // Create the SelectionDAG nodes corresponding to a load //from this parameter SDOperand FIN = DAG.getFrameIndex(FI, MVT::i64); @@ -218,9 +227,10 @@ IA64TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { DAG.setRoot(newroot.getValue(1)); ArgValues.push_back(argt); } - -// Create a vreg to hold the output of (what will become) -// the "alloc" instruction + + + // Create a vreg to hold the output of (what will become) + // the "alloc" instruction VirtGPR = MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64)); BuildMI(&BB, IA64::PSEUDO_ALLOC, 0, VirtGPR); // we create a PSEUDO_ALLOC (pseudo)instruction for now @@ -239,10 +249,29 @@ IA64TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { BuildMI(&BB, IA64::MOV, 1, RP).addReg(IA64::rp); // ..hmm. - for (int i = 0; i < count && i < 8; ++i) { - BuildMI(&BB, argOpc[i], 1, argVreg[i]).addReg(argPreg[i]); + unsigned tempOffset=0; + + // if this is a varargs function, we simply lower llvm.va_start by + // pointing to the first entry + if(F.isVarArg()) { + tempOffset=0; + VarArgsFrameIndex = MFI->CreateFixedObject(8, tempOffset); } + // here we actually do the moving of args, and store them to the stack + // too if this is a varargs function: + for (int i = 0; i < count && i < 8; ++i) { + BuildMI(&BB, argOpc[i], 1, argVreg[i]).addReg(argPreg[i]); + if(F.isVarArg()) { + // if this is a varargs function, we copy the input registers to the stack + int FI = MFI->CreateFixedObject(8, tempOffset); + tempOffset+=8; //XXX: is it safe to use r22 like this? + BuildMI(&BB, IA64::MOV, 1, IA64::r22).addFrameIndex(FI); + // FIXME: we should use st8.spill here, one day + BuildMI(&BB, IA64::ST8, 1, IA64::r22).addReg(argPreg[i]); + } + } + return ArgValues; } @@ -253,19 +282,26 @@ IA64TargetLowering::LowerCallTo(SDOperand Chain, MachineFunction &MF = DAG.getMachineFunction(); -// fow now, we are overly-conservative and pretend that all 8 -// outgoing registers (out0-out7) are always used. FIXME - -// update comment line 137 of MachineFunction.h - MF.getInfo()->outRegsUsed=8; - unsigned NumBytes = 16; - if (Args.size() > 8) + unsigned outRegsUsed = 0; + + if (Args.size() > 8) { NumBytes += (Args.size() - 8) * 8; + outRegsUsed = 8; + } else { + outRegsUsed = Args.size(); + } + + // FIXME? this WILL fail if we ever try to pass around an arg that + // consumes more than a single output slot (a 'real' double, int128 + // some sort of aggregate etc.), as we'll underestimate how many 'outX' + // registers we use. Hopefully, the assembler will notice. + MF.getInfo()->outRegsUsed= + std::max(outRegsUsed, MF.getInfo()->outRegsUsed); Chain = DAG.getNode(ISD::ADJCALLSTACKDOWN, MVT::Other, Chain, DAG.getConstant(NumBytes, getPointerTy())); - + std::vector args_to_use; for (unsigned i = 0, e = Args.size(); i != e; ++i) { @@ -317,11 +353,25 @@ IA64TargetLowering::LowerVAStart(SDOperand Chain, SelectionDAG &DAG) { std::pair IA64TargetLowering:: LowerVAArgNext(bool isVANext, SDOperand Chain, SDOperand VAList, const Type *ArgTy, SelectionDAG &DAG) { - - assert(0 && "LowerVAArgNext not done yet!\n"); - abort(); + + MVT::ValueType ArgVT = getValueType(ArgTy); + SDOperand Result; + if (!isVANext) { + Result = DAG.getLoad(ArgVT, DAG.getEntryNode(), VAList); + } else { + unsigned Amt; + if (ArgVT == MVT::i32 || ArgVT == MVT::f32) + Amt = 8; + else { + assert((ArgVT == MVT::i64 || ArgVT == MVT::f64) && + "Other types should have been promoted for varargs!"); + Amt = 8; + } + Result = DAG.getNode(ISD::ADD, VAList.getValueType(), VAList, + DAG.getConstant(Amt, VAList.getValueType())); + } + return std::make_pair(Result, Chain); } - std::pair IA64TargetLowering:: LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, @@ -469,7 +519,8 @@ unsigned ISel::SelectExpr(SDOperand N) { << " the stack alignment yet!"; abort(); } - + +/* Select(N.getOperand(0)); if (ConstantSDNode* CN = dyn_cast(N.getOperand(1))) { @@ -487,7 +538,11 @@ unsigned ISel::SelectExpr(SDOperand N) { // Subtract size from stack pointer, thereby allocating some space. BuildMI(BB, IA64::SUB, 2, IA64::r12).addReg(IA64::r12).addReg(Tmp1); } - +*/ + Select(N.getOperand(0)); + Tmp1 = SelectExpr(N.getOperand(1)); + // Subtract size from stack pointer, thereby allocating some space. + BuildMI(BB, IA64::SUB, 2, IA64::r12).addReg(IA64::r12).addReg(Tmp1); // Put a pointer to the space into the result register, by copying the // stack pointer. BuildMI(BB, IA64::MOV, 1, Result).addReg(IA64::r12); @@ -569,16 +624,17 @@ unsigned ISel::SelectExpr(SDOperand N) { case ISD::GlobalAddress: { GlobalValue *GV = cast(N)->getGlobal(); unsigned Tmp1 = MakeReg(MVT::i64); + BuildMI(BB, IA64::ADD, 2, Tmp1).addGlobalAddress(GV).addReg(IA64::r1); - //r1==GP BuildMI(BB, IA64::LD8, 1, Result).addReg(Tmp1); + return Result; } case ISD::ExternalSymbol: { const char *Sym = cast(N)->getSymbol(); - assert(0 && "ISD::ExternalSymbol not done yet\n"); - //XXX BuildMI(BB, IA64::MOV, 1, Result).addExternalSymbol(Sym); +// assert(0 && "sorry, but what did you want an ExternalSymbol for again?"); + BuildMI(BB, IA64::MOV, 1, Result).addExternalSymbol(Sym); // XXX return Result; } @@ -944,7 +1000,8 @@ pC = pA OR pB case ISD::UREM: isModulus=true; isSigned=false; break; } - unsigned TmpPR=MakeReg(MVT::i1); // we need a scratch predicate register, + unsigned TmpPR=MakeReg(MVT::i1); // we need two scratch + unsigned TmpPR2=MakeReg(MVT::i1); // predicate registers, unsigned TmpF1=MakeReg(MVT::f64); // and one metric truckload of FP regs. unsigned TmpF2=MakeReg(MVT::f64); // lucky we have IA64? unsigned TmpF3=MakeReg(MVT::f64); // well, the real FIXME is to have @@ -960,7 +1017,7 @@ pC = pA OR pB unsigned TmpF13=MakeReg(MVT::f64); unsigned TmpF14=MakeReg(MVT::f64); unsigned TmpF15=MakeReg(MVT::f64); - + // OK, emit some code: if(!isFP) { @@ -989,6 +1046,14 @@ pC = pA OR pB BuildMI(BB, IA64::FRCPAS1FLOAT, 2, TmpF5).addReg(TmpF3).addReg(TmpF4); BuildMI(BB, IA64::FRCPAS1PREDICATE, 2, TmpPR).addReg(TmpF3).addReg(TmpF4); + if(!isModulus) { // if this is a divide, we worry about div-by-zero + unsigned bogusPR=MakeReg(MVT::i1); // won't appear, due to twoAddress + // TPCMPNE below + BuildMI(BB, IA64::CMPEQ, 2, bogusPR).addReg(IA64::r0).addReg(IA64::r0); + BuildMI(BB, IA64::TPCMPNE, 3, TmpPR2).addReg(bogusPR) + .addReg(IA64::r0).addReg(IA64::r0).addReg(TmpPR); + } + // now we apply newton's method, thrice! (FIXME: this is ~72 bits of // precision, don't need this much for f32/i32) BuildMI(BB, IA64::CFNMAS1, 4, TmpF6) @@ -1023,8 +1088,16 @@ pC = pA OR pB } if(!isModulus) { - if(isFP) - BuildMI(BB, IA64::FMOV, 1, Result).addReg(TmpF15); + if(isFP) { // extra worrying about div-by-zero + unsigned bogoResult=MakeReg(MVT::f64); + + // we do a 'conditional fmov' (of the correct result, depending + // on how the frcpa predicate turned out) + BuildMI(BB, IA64::PFMOV, 2, bogoResult) + .addReg(TmpF12).addReg(TmpPR2); + BuildMI(BB, IA64::CFMOV, 2, Result) + .addReg(bogoResult).addReg(TmpF15).addReg(TmpPR); + } else BuildMI(BB, IA64::GETFSIG, 1, Result).addReg(TmpF15); } else { // this is a modulus @@ -1329,6 +1402,7 @@ pC = pA OR pB case MVT::f64: BuildMI(BB, IA64::FMOV, 1, FPArgs[used_FPArgs++]) .addReg(argvregs[i]); + // FIXME: we don't need to do this _all_ the time: BuildMI(BB, IA64::GETFD, 1, intArgs[i]).addReg(argvregs[i]); break; } @@ -1363,6 +1437,9 @@ pC = pA OR pB break; } } + + /* XXX we want to re-enable direct branches! crippling them now + * to stress-test indirect branches.: //build the right kind of call if (GlobalAddressSDNode *GASD = dyn_cast(N.getOperand(1))) @@ -1370,23 +1447,43 @@ pC = pA OR pB BuildMI(BB, IA64::BRCALL, 1).addGlobalAddress(GASD->getGlobal(),true); IA64Lowering.restoreGP_SP_RP(BB); } - - else if (ExternalSymbolSDNode *ESSDN = + ^^^^^^^^^^^^^ we want this code one day XXX */ + if (ExternalSymbolSDNode *ESSDN = dyn_cast(N.getOperand(1))) - { - BuildMI(BB, IA64::BRCALL, 0) + { // FIXME : currently need this case for correctness, to avoid + // "non-pic code with imm relocation against dynamic symbol" errors + BuildMI(BB, IA64::BRCALL, 1) .addExternalSymbol(ESSDN->getSymbol(), true); IA64Lowering.restoreGP_SP_RP(BB); } else { - // no need to restore GP as we are doing an indirect call Tmp1 = SelectExpr(N.getOperand(1)); - // b6 is a scratch branch register, we load the target: - BuildMI(BB, IA64::MOV, 1, IA64::B6).addReg(Tmp1); + + unsigned targetEntryPoint=MakeReg(MVT::i64); + unsigned targetGPAddr=MakeReg(MVT::i64); + unsigned currentGP=MakeReg(MVT::i64); + + // b6 is a scratch branch register, we load the target entry point + // from the base of the function descriptor + BuildMI(BB, IA64::LD8, 1, targetEntryPoint).addReg(Tmp1); + BuildMI(BB, IA64::MOV, 1, IA64::B6).addReg(targetEntryPoint); + + // save the current GP: + BuildMI(BB, IA64::MOV, 1, currentGP).addReg(IA64::r1); + + /* TODO: we need to make sure doing this never, ever loads a + * bogus value into r1 (GP). */ + // load the target GP (which is at mem[functiondescriptor+8]) + BuildMI(BB, IA64::ADDIMM22, 2, targetGPAddr) + .addReg(Tmp1).addImm(8); // FIXME: addimm22? why not postincrement ld + BuildMI(BB, IA64::LD8, 1, IA64::r1).addReg(targetGPAddr); + // and then jump: (well, call) BuildMI(BB, IA64::BRCALL, 1).addReg(IA64::B6); - IA64Lowering.restoreGP_SP_RP(BB); - } + // and finally restore the old GP + BuildMI(BB, IA64::MOV, 1, IA64::r1).addReg(currentGP); + IA64Lowering.restoreSP_RP(BB); + } switch (Node->getValueType(0)) { default: assert(0 && "Unknown value type for call result!"); diff --git a/lib/Target/IA64/README b/lib/Target/IA64/README index 86606dd827b..ec94299e28f 100644 --- a/lib/Target/IA64/README +++ b/lib/Target/IA64/README @@ -77,7 +77,9 @@ INFO: KNOWN DEFECTS AT THE CURRENT TIME: - - no varargs + - C++ vtables contain naked function pointers, not function descriptors, + which is bad. see http://llvm.cs.uiuc.edu/bugs/show_bug.cgi?id=406 + - varargs are broken - alloca doesn't work (indeed, stack frame layout is bogus) - no support for big-endian environments - (not really the backend, but...) the CFE has some issues on IA64.