diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp index 6b87f7ecf6b..2e1ea044c1e 100644 --- a/lib/Target/X86/InstSelectSimple.cpp +++ b/lib/Target/X86/InstSelectSimple.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/SSARegMap.h" +#include "llvm/CodeGen/FunctionFrameInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/InstVisitor.h" #include "llvm/Target/MRegisterInfo.h" @@ -73,29 +74,12 @@ namespace { bool runOnFunction(Function &Fn) { F = &MachineFunction::construct(&Fn, TM); + // Create all of the machine basic blocks for the function... for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) F->getBasicBlockList().push_back(MBBMap[I] = new MachineBasicBlock(I)); - // Emit instructions to load the arguments... The function's arguments - // look like this: - // - // [EBP] -- copy of old EBP - // [EBP + 4] -- return address - // [EBP + 8] -- first argument (leftmost lexically) - // - // So we want to start with counter = 2. - // BB = &F->front(); - unsigned ArgOffset = 8; - for (Function::aiterator I = Fn.abegin(), E = Fn.aend(); I != E; - ++I, ArgOffset += 4) { - unsigned Reg = getReg(*I); - - // Load it out of the stack frame at EBP + 4*argPos. - - // FIXME: This should load the argument of the appropriate size!! - addRegOffset(BuildMI(BB, X86::MOVmr32, 4, Reg), X86::EBP, ArgOffset); - } + LoadArgumentsToVirtualRegs(Fn); // Instruction select everything except PHI nodes visit(Fn); @@ -123,6 +107,10 @@ namespace { BB = MBBMap[&LLVM_BB]; } + /// LoadArgumentsToVirtualRegs - Load all of the arguments to this function + /// from the stack into virtual registers. + /// + void LoadArgumentsToVirtualRegs(Function &F); /// SelectPHINodes - Insert machine code to generate phis. This is tricky /// because we have to generate our sources into the source basic blocks, @@ -170,9 +158,12 @@ namespace { void visitLoadInst(LoadInst &I); void visitStoreInst(StoreInst &I); void visitGetElementPtrInst(GetElementPtrInst &I); - void visitMallocInst(MallocInst &I); - void visitFreeInst(FreeInst &I); void visitAllocaInst(AllocaInst &I); + + // We assume that by this point, malloc instructions have been + // lowered to calls, and dlsym will magically find malloc for us. + void visitMallocInst(MallocInst &I) { visitInstruction (I); } + void visitFreeInst(FreeInst &I) { visitInstruction(I); } // Other operators void visitShiftInst(ShiftInst &I); @@ -341,6 +332,59 @@ void ISel::copyConstantToRegister(MachineBasicBlock *MBB, } } +/// LoadArgumentsToVirtualRegs - Load all of the arguments to this function from +/// the stack into virtual registers. +/// +void ISel::LoadArgumentsToVirtualRegs(Function &Fn) { + // Emit instructions to load the arguments... On entry to a function on the + // X86, the stack frame looks like this: + // + // [ESP] -- return address + // [ESP + 4] -- first argument (leftmost lexically) if four bytes in size + // [ESP + 8] -- second argument, if four bytes in size + // ... + // + unsigned ArgOffset = 0; + FunctionFrameInfo *FFI = F->getFrameInfo(); + + for (Function::aiterator I = Fn.abegin(), E = Fn.aend(); I != E; ++I) { + unsigned Reg = getReg(*I); + + ArgOffset += 4; // Each argument takes at least 4 bytes on the stack... + int FI; // Frame object index + + switch (getClassB(I->getType())) { + case cByte: + FI = FFI->CreateFixedObject(1, ArgOffset); + addFrameReference(BuildMI(BB, X86::MOVmr8, 4, Reg), FI); + break; + case cShort: + FI = FFI->CreateFixedObject(2, ArgOffset); + addFrameReference(BuildMI(BB, X86::MOVmr16, 4, Reg), FI); + break; + case cInt: + FI = FFI->CreateFixedObject(4, ArgOffset); + addFrameReference(BuildMI(BB, X86::MOVmr32, 4, Reg), FI); + break; + case cFP: + unsigned Opcode; + if (I->getType() == Type::FloatTy) { + Opcode = X86::FLDr32; + FI = FFI->CreateFixedObject(4, ArgOffset); + } else { + Opcode = X86::FLDr64; + ArgOffset += 4; // doubles require 4 additional bytes + FI = FFI->CreateFixedObject(8, ArgOffset); + } + addFrameReference(BuildMI(BB, Opcode, 4, Reg), FI); + break; + default: + assert(0 && "Unhandled argument type!"); + } + } +} + + /// SelectPHINodes - Insert machine code to generate phis. This is tricky /// because we have to generate our sources into the source basic blocks, not /// the current one. @@ -366,7 +410,8 @@ void ISel::SelectPHINodes() { // available in a virtual register, insert the computation code into // PredMBB // - + // FIXME: This should insert the code into the BOTTOM of the block, not + // the top of the block. This just makes for huge live ranges... MachineBasicBlock::iterator PI = PredMBB->begin(); while ((*PI)->getOpcode() == X86::PHI) ++PI; @@ -532,7 +577,7 @@ void ISel::visitBranchInst(BranchInst &BI) { // Compare condition with zero, followed by jump-if-equal to ifFalse, and // jump-if-nonequal to ifTrue - unsigned int condReg = getReg(BI.getCondition()); + unsigned condReg = getReg(BI.getCondition()); BuildMI(BB, X86::CMPri8, 2).addReg(condReg).addZImm(0); BuildMI(BB, X86::JNE, 1).addPCDisp(BI.getSuccessor(0)); BuildMI(BB, X86::JE, 1).addPCDisp(BI.getSuccessor(1)); @@ -543,32 +588,64 @@ void ISel::visitBranchInst(BranchInst &BI) { /// visitCallInst - Push args on stack and do a procedure call instruction. void ISel::visitCallInst(CallInst &CI) { - // keep a counter of how many bytes we pushed on the stack - unsigned bytesPushed = 0; + // Count how many bytes are to be pushed on the stack... + unsigned NumBytes = 0; - // Push the arguments on the stack in reverse order, as specified by - // the ABI. - for (unsigned i = CI.getNumOperands()-1; i >= 1; --i) { - Value *v = CI.getOperand(i); - switch (getClass(v->getType())) { - case cByte: - case cShort: - // Promote V to 32 bits wide, and move the result into EAX, - // then push EAX. - promote32 (X86::EAX, v); - BuildMI(BB, X86::PUSHr32, 1).addReg(X86::EAX); - bytesPushed += 4; - break; - case cInt: { - unsigned Reg = getReg(v); - BuildMI(BB, X86::PUSHr32, 1).addReg(Reg); - bytesPushed += 4; - break; - } - default: - // FIXME: long/ulong/float/double args not handled. - visitInstruction(CI); - break; + if (CI.getNumOperands() > 1) { + for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i) + switch (getClass(CI.getOperand(i)->getType())) { + case cByte: case cShort: case cInt: + NumBytes += 4; + break; + case cLong: + NumBytes += 8; + break; + case cFP: + NumBytes += CI.getOperand(i)->getType() == Type::FloatTy ? 4 : 8; + break; + default: assert(0 && "Unknown class!"); + } + + // Adjust the stack pointer for the new arguments... + BuildMI(BB, X86::ADJCALLSTACKDOWN, 1).addZImm(NumBytes); + + // Arguments go on the stack in reverse order, as specified by the ABI. + unsigned ArgOffset = 0; + for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i) { + Value *Arg = CI.getOperand(i); + switch (getClass(Arg->getType())) { + case cByte: + case cShort: { + // Promote arg to 32 bits wide into a temporary register... + unsigned R = makeAnotherReg(Type::UIntTy); + promote32(R, Arg); + addRegOffset(BuildMI(BB, X86::MOVrm32, 5), + X86::ESP, ArgOffset).addReg(R); + break; + } + case cInt: + addRegOffset(BuildMI(BB, X86::MOVrm32, 5), + X86::ESP, ArgOffset).addReg(getReg(Arg)); + break; + + case cFP: + if (Arg->getType() == Type::FloatTy) { + addRegOffset(BuildMI(BB, X86::FSTr32, 5), + X86::ESP, ArgOffset).addReg(getReg(Arg)); + } else { + assert(Arg->getType() == Type::DoubleTy && "Unknown FP type!"); + ArgOffset += 4; + addRegOffset(BuildMI(BB, X86::FSTr32, 5), + X86::ESP, ArgOffset).addReg(getReg(Arg)); + } + break; + + default: + // FIXME: long/ulong/float/double args not handled. + visitInstruction(CI); + break; + } + ArgOffset += 4; } } @@ -580,9 +657,7 @@ void ISel::visitCallInst(CallInst &CI) { BuildMI(BB, X86::CALLr32, 1).addReg(Reg); } - // Adjust the stack by `bytesPushed' amount if non-zero - if (bytesPushed > 0) - BuildMI(BB, X86::ADDri32,2, X86::ESP).addReg(X86::ESP).addZImm(bytesPushed); + BuildMI(BB, X86::ADJCALLSTACKUP, 1).addZImm(NumBytes); // If there is a return value, scavenge the result from the location the call // leaves it in... @@ -771,7 +846,7 @@ void ISel::visitShiftInst (ShiftInst &I) { if (OperandClass > cInt) visitInstruction(I); // Can't handle longs yet! - if (ConstantUInt *CUI = dyn_cast (I.getOperand (1))) + if (ConstantUInt *CUI = dyn_cast (I.getOperand (1))) { // The shift amount is constant, guaranteed to be a ubyte. Get its value. assert(CUI->getType() == Type::UByteTy && "Shift amount not a ubyte?"); @@ -956,9 +1031,9 @@ ISel::visitCastInst (CastInst &CI) { const Type *targetType = CI.getType (); Value *operand = CI.getOperand (0); - unsigned int operandReg = getReg (operand); + unsigned operandReg = getReg (operand); const Type *sourceType = operand->getType (); - unsigned int destReg = getReg (CI); + unsigned destReg = getReg (CI); // // Currently we handle: // @@ -1075,11 +1150,11 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, oe = IdxEnd; oi != oe; ++oi) { Value *idx = *oi; unsigned nextBasePtrReg = makeAnotherReg(Type::UIntTy); - if (const StructType *StTy = dyn_cast (Ty)) { + if (const StructType *StTy = dyn_cast(Ty)) { // It's a struct access. idx is the index into the structure, // which names the field. This index must have ubyte type. - const ConstantUInt *CUI = cast (idx); - assert (CUI->getType () == Type::UByteTy + const ConstantUInt *CUI = cast(idx); + assert(CUI->getType() == Type::UByteTy && "Funny-looking structure index in GEP"); // Use the TargetData structure to pick out what the layout of // the structure is in memory. Since the structure index must @@ -1088,14 +1163,14 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, // structure member offsets. unsigned idxValue = CUI->getValue(); unsigned memberOffset = - TD.getStructLayout (StTy)->MemberOffsets[idxValue]; + TD.getStructLayout(StTy)->MemberOffsets[idxValue]; // Emit an ADD to add memberOffset to the basePtr. BMI(MBB, IP, X86::ADDri32, 2, - nextBasePtrReg).addReg (basePtrReg).addZImm (memberOffset); + nextBasePtrReg).addReg(basePtrReg).addZImm(memberOffset); // The next type is the member of the structure selected by the // index. - Ty = StTy->getElementTypes ()[idxValue]; - } else if (const SequentialType *SqTy = cast (Ty)) { + Ty = StTy->getElementTypes()[idxValue]; + } else if (const SequentialType *SqTy = cast(Ty)) { // It's an array or pointer access: [ArraySize x ElementType]. // idx is the index into the array. Unlike with structure @@ -1103,7 +1178,7 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, // time. assert(idx->getType() == Type::LongTy && "Bad GEP array index!"); - // We want to add basePtrReg to (idxReg * sizeof ElementType). First, we + // We want to add basePtrReg to(idxReg * sizeof ElementType). First, we // must find the size of the pointed-to type (Not coincidentally, the next // type is the type of the elements in the array). Ty = SqTy->getElementType(); @@ -1143,7 +1218,7 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, } // Emit an ADD to add OffsetReg to the basePtr. BMI(MBB, IP, X86::ADDrr32, 2, - nextBasePtrReg).addReg (basePtrReg).addReg (OffsetReg); + nextBasePtrReg).addReg(basePtrReg).addReg(OffsetReg); } } // Now that we are here, further indices refer to subtypes of this @@ -1154,51 +1229,62 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, // basePtrReg. Move it to the register where we were expected to // put the answer. A 32-bit move should do it, because we are in // ILP32 land. - BMI(MBB, IP, X86::MOVrr32, 1, TargetReg).addReg (basePtrReg); + BMI(MBB, IP, X86::MOVrr32, 1, TargetReg).addReg(basePtrReg); } -/// visitMallocInst - I know that personally, whenever I want to remember -/// something, I have to clear off some space in my brain. -void -ISel::visitMallocInst (MallocInst &I) -{ - // We assume that by this point, malloc instructions have been - // lowered to calls, and dlsym will magically find malloc for us. - // So we do not want to see malloc instructions here. - visitInstruction (I); -} - - -/// visitFreeInst - same story as MallocInst -void -ISel::visitFreeInst (FreeInst &I) -{ - // We assume that by this point, free instructions have been - // lowered to calls, and dlsym will magically find free for us. - // So we do not want to see free instructions here. - visitInstruction (I); -} - - -/// visitAllocaInst - I want some stack space. Come on, man, I said I -/// want some freakin' stack space. -void -ISel::visitAllocaInst (AllocaInst &I) -{ +/// visitAllocaInst - If this is a fixed size alloca, allocate space from the +/// frame manager, otherwise do it the hard way. +/// +void ISel::visitAllocaInst(AllocaInst &I) { // Find the data size of the alloca inst's getAllocatedType. - const Type *allocatedType = I.getAllocatedType (); - const TargetData &TD = TM.DataLayout; - unsigned allocatedTypeSize = TD.getTypeSize (allocatedType); - // Keep stack 32-bit aligned. - unsigned int allocatedTypeWords = allocatedTypeSize / 4; - if (allocatedTypeSize % 4 != 0) { allocatedTypeWords++; } + const Type *Ty = I.getAllocatedType(); + unsigned TySize = TM.getTargetData().getTypeSize(Ty); + + // If this is a fixed size alloca in the entry block for the function, + // statically stack allocate the space. + // + if (ConstantUInt *CUI = dyn_cast(I.getArraySize())) { + if (I.getParent() == I.getParent()->getParent()->begin()) { + TySize *= CUI->getValue(); // Get total allocated size... + unsigned Alignment = TM.getTargetData().getTypeAlignment(Ty); + + // Create a new stack object using the frame manager... + int FrameIdx = F->getFrameInfo()->CreateStackObject(TySize, Alignment); + addFrameReference(BuildMI(BB, X86::LEAr32, 5, getReg(I)), FrameIdx); + return; + } + } + + // Create a register to hold the temporary result of multiplying the type size + // constant by the variable amount. + unsigned TotalSizeReg = makeAnotherReg(Type::UIntTy); + unsigned SrcReg1 = getReg(I.getArraySize()); + unsigned SizeReg = makeAnotherReg(Type::UIntTy); + BuildMI(BB, X86::MOVir32, 1, SizeReg).addZImm(TySize); + + // TotalSizeReg = mul , + MachineBasicBlock::iterator MBBI = BB->end(); + doMultiply(BB, MBBI, TotalSizeReg, Type::UIntTy, SrcReg1, SizeReg); + + // AddedSize = add , 15 + unsigned AddedSizeReg = makeAnotherReg(Type::UIntTy); + BuildMI(BB, X86::ADDri32, 2, AddedSizeReg).addReg(TotalSizeReg).addZImm(15); + + // AlignedSize = and , ~15 + unsigned AlignedSize = makeAnotherReg(Type::UIntTy); + BuildMI(BB, X86::ANDri32, 2, AlignedSize).addReg(AddedSizeReg).addZImm(~15); + // Subtract size from stack pointer, thereby allocating some space. - BuildMI(BB, X86::SUBri32, 2, - X86::ESP).addReg(X86::ESP).addZImm(allocatedTypeWords * 4); + BuildMI(BB, X86::SUBri32, 2, X86::ESP).addReg(X86::ESP).addZImm(AlignedSize); + // Put a pointer to the space into the result register, by copying // the stack pointer. - BuildMI (BB, X86::MOVrr32, 1, getReg (I)).addReg (X86::ESP); + BuildMI(BB, X86::MOVrr32, 1, getReg(I)).addReg(X86::ESP); + + // Inform the Frame Information that we have just allocated a variable sized + // object. + F->getFrameInfo()->CreateVariableSizedObject(); } diff --git a/lib/Target/X86/X86ISelSimple.cpp b/lib/Target/X86/X86ISelSimple.cpp index 6b87f7ecf6b..2e1ea044c1e 100644 --- a/lib/Target/X86/X86ISelSimple.cpp +++ b/lib/Target/X86/X86ISelSimple.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/SSARegMap.h" +#include "llvm/CodeGen/FunctionFrameInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/InstVisitor.h" #include "llvm/Target/MRegisterInfo.h" @@ -73,29 +74,12 @@ namespace { bool runOnFunction(Function &Fn) { F = &MachineFunction::construct(&Fn, TM); + // Create all of the machine basic blocks for the function... for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) F->getBasicBlockList().push_back(MBBMap[I] = new MachineBasicBlock(I)); - // Emit instructions to load the arguments... The function's arguments - // look like this: - // - // [EBP] -- copy of old EBP - // [EBP + 4] -- return address - // [EBP + 8] -- first argument (leftmost lexically) - // - // So we want to start with counter = 2. - // BB = &F->front(); - unsigned ArgOffset = 8; - for (Function::aiterator I = Fn.abegin(), E = Fn.aend(); I != E; - ++I, ArgOffset += 4) { - unsigned Reg = getReg(*I); - - // Load it out of the stack frame at EBP + 4*argPos. - - // FIXME: This should load the argument of the appropriate size!! - addRegOffset(BuildMI(BB, X86::MOVmr32, 4, Reg), X86::EBP, ArgOffset); - } + LoadArgumentsToVirtualRegs(Fn); // Instruction select everything except PHI nodes visit(Fn); @@ -123,6 +107,10 @@ namespace { BB = MBBMap[&LLVM_BB]; } + /// LoadArgumentsToVirtualRegs - Load all of the arguments to this function + /// from the stack into virtual registers. + /// + void LoadArgumentsToVirtualRegs(Function &F); /// SelectPHINodes - Insert machine code to generate phis. This is tricky /// because we have to generate our sources into the source basic blocks, @@ -170,9 +158,12 @@ namespace { void visitLoadInst(LoadInst &I); void visitStoreInst(StoreInst &I); void visitGetElementPtrInst(GetElementPtrInst &I); - void visitMallocInst(MallocInst &I); - void visitFreeInst(FreeInst &I); void visitAllocaInst(AllocaInst &I); + + // We assume that by this point, malloc instructions have been + // lowered to calls, and dlsym will magically find malloc for us. + void visitMallocInst(MallocInst &I) { visitInstruction (I); } + void visitFreeInst(FreeInst &I) { visitInstruction(I); } // Other operators void visitShiftInst(ShiftInst &I); @@ -341,6 +332,59 @@ void ISel::copyConstantToRegister(MachineBasicBlock *MBB, } } +/// LoadArgumentsToVirtualRegs - Load all of the arguments to this function from +/// the stack into virtual registers. +/// +void ISel::LoadArgumentsToVirtualRegs(Function &Fn) { + // Emit instructions to load the arguments... On entry to a function on the + // X86, the stack frame looks like this: + // + // [ESP] -- return address + // [ESP + 4] -- first argument (leftmost lexically) if four bytes in size + // [ESP + 8] -- second argument, if four bytes in size + // ... + // + unsigned ArgOffset = 0; + FunctionFrameInfo *FFI = F->getFrameInfo(); + + for (Function::aiterator I = Fn.abegin(), E = Fn.aend(); I != E; ++I) { + unsigned Reg = getReg(*I); + + ArgOffset += 4; // Each argument takes at least 4 bytes on the stack... + int FI; // Frame object index + + switch (getClassB(I->getType())) { + case cByte: + FI = FFI->CreateFixedObject(1, ArgOffset); + addFrameReference(BuildMI(BB, X86::MOVmr8, 4, Reg), FI); + break; + case cShort: + FI = FFI->CreateFixedObject(2, ArgOffset); + addFrameReference(BuildMI(BB, X86::MOVmr16, 4, Reg), FI); + break; + case cInt: + FI = FFI->CreateFixedObject(4, ArgOffset); + addFrameReference(BuildMI(BB, X86::MOVmr32, 4, Reg), FI); + break; + case cFP: + unsigned Opcode; + if (I->getType() == Type::FloatTy) { + Opcode = X86::FLDr32; + FI = FFI->CreateFixedObject(4, ArgOffset); + } else { + Opcode = X86::FLDr64; + ArgOffset += 4; // doubles require 4 additional bytes + FI = FFI->CreateFixedObject(8, ArgOffset); + } + addFrameReference(BuildMI(BB, Opcode, 4, Reg), FI); + break; + default: + assert(0 && "Unhandled argument type!"); + } + } +} + + /// SelectPHINodes - Insert machine code to generate phis. This is tricky /// because we have to generate our sources into the source basic blocks, not /// the current one. @@ -366,7 +410,8 @@ void ISel::SelectPHINodes() { // available in a virtual register, insert the computation code into // PredMBB // - + // FIXME: This should insert the code into the BOTTOM of the block, not + // the top of the block. This just makes for huge live ranges... MachineBasicBlock::iterator PI = PredMBB->begin(); while ((*PI)->getOpcode() == X86::PHI) ++PI; @@ -532,7 +577,7 @@ void ISel::visitBranchInst(BranchInst &BI) { // Compare condition with zero, followed by jump-if-equal to ifFalse, and // jump-if-nonequal to ifTrue - unsigned int condReg = getReg(BI.getCondition()); + unsigned condReg = getReg(BI.getCondition()); BuildMI(BB, X86::CMPri8, 2).addReg(condReg).addZImm(0); BuildMI(BB, X86::JNE, 1).addPCDisp(BI.getSuccessor(0)); BuildMI(BB, X86::JE, 1).addPCDisp(BI.getSuccessor(1)); @@ -543,32 +588,64 @@ void ISel::visitBranchInst(BranchInst &BI) { /// visitCallInst - Push args on stack and do a procedure call instruction. void ISel::visitCallInst(CallInst &CI) { - // keep a counter of how many bytes we pushed on the stack - unsigned bytesPushed = 0; + // Count how many bytes are to be pushed on the stack... + unsigned NumBytes = 0; - // Push the arguments on the stack in reverse order, as specified by - // the ABI. - for (unsigned i = CI.getNumOperands()-1; i >= 1; --i) { - Value *v = CI.getOperand(i); - switch (getClass(v->getType())) { - case cByte: - case cShort: - // Promote V to 32 bits wide, and move the result into EAX, - // then push EAX. - promote32 (X86::EAX, v); - BuildMI(BB, X86::PUSHr32, 1).addReg(X86::EAX); - bytesPushed += 4; - break; - case cInt: { - unsigned Reg = getReg(v); - BuildMI(BB, X86::PUSHr32, 1).addReg(Reg); - bytesPushed += 4; - break; - } - default: - // FIXME: long/ulong/float/double args not handled. - visitInstruction(CI); - break; + if (CI.getNumOperands() > 1) { + for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i) + switch (getClass(CI.getOperand(i)->getType())) { + case cByte: case cShort: case cInt: + NumBytes += 4; + break; + case cLong: + NumBytes += 8; + break; + case cFP: + NumBytes += CI.getOperand(i)->getType() == Type::FloatTy ? 4 : 8; + break; + default: assert(0 && "Unknown class!"); + } + + // Adjust the stack pointer for the new arguments... + BuildMI(BB, X86::ADJCALLSTACKDOWN, 1).addZImm(NumBytes); + + // Arguments go on the stack in reverse order, as specified by the ABI. + unsigned ArgOffset = 0; + for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i) { + Value *Arg = CI.getOperand(i); + switch (getClass(Arg->getType())) { + case cByte: + case cShort: { + // Promote arg to 32 bits wide into a temporary register... + unsigned R = makeAnotherReg(Type::UIntTy); + promote32(R, Arg); + addRegOffset(BuildMI(BB, X86::MOVrm32, 5), + X86::ESP, ArgOffset).addReg(R); + break; + } + case cInt: + addRegOffset(BuildMI(BB, X86::MOVrm32, 5), + X86::ESP, ArgOffset).addReg(getReg(Arg)); + break; + + case cFP: + if (Arg->getType() == Type::FloatTy) { + addRegOffset(BuildMI(BB, X86::FSTr32, 5), + X86::ESP, ArgOffset).addReg(getReg(Arg)); + } else { + assert(Arg->getType() == Type::DoubleTy && "Unknown FP type!"); + ArgOffset += 4; + addRegOffset(BuildMI(BB, X86::FSTr32, 5), + X86::ESP, ArgOffset).addReg(getReg(Arg)); + } + break; + + default: + // FIXME: long/ulong/float/double args not handled. + visitInstruction(CI); + break; + } + ArgOffset += 4; } } @@ -580,9 +657,7 @@ void ISel::visitCallInst(CallInst &CI) { BuildMI(BB, X86::CALLr32, 1).addReg(Reg); } - // Adjust the stack by `bytesPushed' amount if non-zero - if (bytesPushed > 0) - BuildMI(BB, X86::ADDri32,2, X86::ESP).addReg(X86::ESP).addZImm(bytesPushed); + BuildMI(BB, X86::ADJCALLSTACKUP, 1).addZImm(NumBytes); // If there is a return value, scavenge the result from the location the call // leaves it in... @@ -771,7 +846,7 @@ void ISel::visitShiftInst (ShiftInst &I) { if (OperandClass > cInt) visitInstruction(I); // Can't handle longs yet! - if (ConstantUInt *CUI = dyn_cast (I.getOperand (1))) + if (ConstantUInt *CUI = dyn_cast (I.getOperand (1))) { // The shift amount is constant, guaranteed to be a ubyte. Get its value. assert(CUI->getType() == Type::UByteTy && "Shift amount not a ubyte?"); @@ -956,9 +1031,9 @@ ISel::visitCastInst (CastInst &CI) { const Type *targetType = CI.getType (); Value *operand = CI.getOperand (0); - unsigned int operandReg = getReg (operand); + unsigned operandReg = getReg (operand); const Type *sourceType = operand->getType (); - unsigned int destReg = getReg (CI); + unsigned destReg = getReg (CI); // // Currently we handle: // @@ -1075,11 +1150,11 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, oe = IdxEnd; oi != oe; ++oi) { Value *idx = *oi; unsigned nextBasePtrReg = makeAnotherReg(Type::UIntTy); - if (const StructType *StTy = dyn_cast (Ty)) { + if (const StructType *StTy = dyn_cast(Ty)) { // It's a struct access. idx is the index into the structure, // which names the field. This index must have ubyte type. - const ConstantUInt *CUI = cast (idx); - assert (CUI->getType () == Type::UByteTy + const ConstantUInt *CUI = cast(idx); + assert(CUI->getType() == Type::UByteTy && "Funny-looking structure index in GEP"); // Use the TargetData structure to pick out what the layout of // the structure is in memory. Since the structure index must @@ -1088,14 +1163,14 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, // structure member offsets. unsigned idxValue = CUI->getValue(); unsigned memberOffset = - TD.getStructLayout (StTy)->MemberOffsets[idxValue]; + TD.getStructLayout(StTy)->MemberOffsets[idxValue]; // Emit an ADD to add memberOffset to the basePtr. BMI(MBB, IP, X86::ADDri32, 2, - nextBasePtrReg).addReg (basePtrReg).addZImm (memberOffset); + nextBasePtrReg).addReg(basePtrReg).addZImm(memberOffset); // The next type is the member of the structure selected by the // index. - Ty = StTy->getElementTypes ()[idxValue]; - } else if (const SequentialType *SqTy = cast (Ty)) { + Ty = StTy->getElementTypes()[idxValue]; + } else if (const SequentialType *SqTy = cast(Ty)) { // It's an array or pointer access: [ArraySize x ElementType]. // idx is the index into the array. Unlike with structure @@ -1103,7 +1178,7 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, // time. assert(idx->getType() == Type::LongTy && "Bad GEP array index!"); - // We want to add basePtrReg to (idxReg * sizeof ElementType). First, we + // We want to add basePtrReg to(idxReg * sizeof ElementType). First, we // must find the size of the pointed-to type (Not coincidentally, the next // type is the type of the elements in the array). Ty = SqTy->getElementType(); @@ -1143,7 +1218,7 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, } // Emit an ADD to add OffsetReg to the basePtr. BMI(MBB, IP, X86::ADDrr32, 2, - nextBasePtrReg).addReg (basePtrReg).addReg (OffsetReg); + nextBasePtrReg).addReg(basePtrReg).addReg(OffsetReg); } } // Now that we are here, further indices refer to subtypes of this @@ -1154,51 +1229,62 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, // basePtrReg. Move it to the register where we were expected to // put the answer. A 32-bit move should do it, because we are in // ILP32 land. - BMI(MBB, IP, X86::MOVrr32, 1, TargetReg).addReg (basePtrReg); + BMI(MBB, IP, X86::MOVrr32, 1, TargetReg).addReg(basePtrReg); } -/// visitMallocInst - I know that personally, whenever I want to remember -/// something, I have to clear off some space in my brain. -void -ISel::visitMallocInst (MallocInst &I) -{ - // We assume that by this point, malloc instructions have been - // lowered to calls, and dlsym will magically find malloc for us. - // So we do not want to see malloc instructions here. - visitInstruction (I); -} - - -/// visitFreeInst - same story as MallocInst -void -ISel::visitFreeInst (FreeInst &I) -{ - // We assume that by this point, free instructions have been - // lowered to calls, and dlsym will magically find free for us. - // So we do not want to see free instructions here. - visitInstruction (I); -} - - -/// visitAllocaInst - I want some stack space. Come on, man, I said I -/// want some freakin' stack space. -void -ISel::visitAllocaInst (AllocaInst &I) -{ +/// visitAllocaInst - If this is a fixed size alloca, allocate space from the +/// frame manager, otherwise do it the hard way. +/// +void ISel::visitAllocaInst(AllocaInst &I) { // Find the data size of the alloca inst's getAllocatedType. - const Type *allocatedType = I.getAllocatedType (); - const TargetData &TD = TM.DataLayout; - unsigned allocatedTypeSize = TD.getTypeSize (allocatedType); - // Keep stack 32-bit aligned. - unsigned int allocatedTypeWords = allocatedTypeSize / 4; - if (allocatedTypeSize % 4 != 0) { allocatedTypeWords++; } + const Type *Ty = I.getAllocatedType(); + unsigned TySize = TM.getTargetData().getTypeSize(Ty); + + // If this is a fixed size alloca in the entry block for the function, + // statically stack allocate the space. + // + if (ConstantUInt *CUI = dyn_cast(I.getArraySize())) { + if (I.getParent() == I.getParent()->getParent()->begin()) { + TySize *= CUI->getValue(); // Get total allocated size... + unsigned Alignment = TM.getTargetData().getTypeAlignment(Ty); + + // Create a new stack object using the frame manager... + int FrameIdx = F->getFrameInfo()->CreateStackObject(TySize, Alignment); + addFrameReference(BuildMI(BB, X86::LEAr32, 5, getReg(I)), FrameIdx); + return; + } + } + + // Create a register to hold the temporary result of multiplying the type size + // constant by the variable amount. + unsigned TotalSizeReg = makeAnotherReg(Type::UIntTy); + unsigned SrcReg1 = getReg(I.getArraySize()); + unsigned SizeReg = makeAnotherReg(Type::UIntTy); + BuildMI(BB, X86::MOVir32, 1, SizeReg).addZImm(TySize); + + // TotalSizeReg = mul , + MachineBasicBlock::iterator MBBI = BB->end(); + doMultiply(BB, MBBI, TotalSizeReg, Type::UIntTy, SrcReg1, SizeReg); + + // AddedSize = add , 15 + unsigned AddedSizeReg = makeAnotherReg(Type::UIntTy); + BuildMI(BB, X86::ADDri32, 2, AddedSizeReg).addReg(TotalSizeReg).addZImm(15); + + // AlignedSize = and , ~15 + unsigned AlignedSize = makeAnotherReg(Type::UIntTy); + BuildMI(BB, X86::ANDri32, 2, AlignedSize).addReg(AddedSizeReg).addZImm(~15); + // Subtract size from stack pointer, thereby allocating some space. - BuildMI(BB, X86::SUBri32, 2, - X86::ESP).addReg(X86::ESP).addZImm(allocatedTypeWords * 4); + BuildMI(BB, X86::SUBri32, 2, X86::ESP).addReg(X86::ESP).addZImm(AlignedSize); + // Put a pointer to the space into the result register, by copying // the stack pointer. - BuildMI (BB, X86::MOVrr32, 1, getReg (I)).addReg (X86::ESP); + BuildMI(BB, X86::MOVrr32, 1, getReg(I)).addReg(X86::ESP); + + // Inform the Frame Information that we have just allocated a variable sized + // object. + F->getFrameInfo()->CreateVariableSizedObject(); }