llvm-6502/lib/Bytecode/Reader/InstructionReader.cpp
Chris Lattner 4ee8ef2a5d This patch substantially simplifies and cleans up handling of basic blocks
in the bytecode parser.  Before we tried to shoehorn basic blocks into the
"getValue" code path with other types of values.  For a variety of reasons
this was a bad idea, so this patch separates it out into its own data structure.

This simplifies the code, makes it fit in 80 columns, and is also much faster.
In a testcase provided by Bill, which has lots of PHI nodes, this patch speeds
up bytecode parsing from taking 6.9s to taking 2.32s.  More speedups to
follow later.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@8977 91177308-0d34-0410-b5e6-96231b3b80d8
2003-10-08 22:52:54 +00:00

442 lines
14 KiB
C++

//===- ReadInst.cpp - Code to read an instruction from bytecode -----------===//
//
// This file defines the mechanism to read an instruction from a bytecode
// stream.
//
// Note that this library should be as fast as possible, reentrant, and
// threadsafe!!
//
// TODO: Change from getValue(Raw.Arg1) etc, to getArg(Raw, 1)
// Make it check type, so that casts are checked.
//
//===----------------------------------------------------------------------===//
#include "ReaderInternals.h"
#include "llvm/iTerminators.h"
#include "llvm/iMemory.h"
#include "llvm/iPHINode.h"
#include "llvm/iOther.h"
std::auto_ptr<RawInst>
BytecodeParser::ParseRawInst(const unsigned char *&Buf,
const unsigned char *EndBuf) {
unsigned Op, Typ;
std::auto_ptr<RawInst> Result = std::auto_ptr<RawInst>(new RawInst());
if (read(Buf, EndBuf, Op))
throw std::string("Error reading from buffer.");
// bits Instruction format: Common to all formats
// --------------------------
// 01-00: Opcode type, fixed to 1.
// 07-02: Opcode
Result->NumOperands = (Op >> 0) & 03;
Result->Opcode = (Op >> 2) & 63;
switch (Result->NumOperands) {
case 1:
// bits Instruction format:
// --------------------------
// 19-08: Resulting type plane
// 31-20: Operand #1 (if set to (2^12-1), then zero operands)
//
Result->Ty = getType((Op >> 8) & 4095);
Result->Arg1 = (Op >> 20) & 4095;
if (Result->Arg1 == 4095) // Handle special encoding for 0 operands...
Result->NumOperands = 0;
break;
case 2:
// bits Instruction format:
// --------------------------
// 15-08: Resulting type plane
// 23-16: Operand #1
// 31-24: Operand #2
//
Result->Ty = getType((Op >> 8) & 255);
Result->Arg1 = (Op >> 16) & 255;
Result->Arg2 = (Op >> 24) & 255;
break;
case 3:
// bits Instruction format:
// --------------------------
// 13-08: Resulting type plane
// 19-14: Operand #1
// 25-20: Operand #2
// 31-26: Operand #3
//
Result->Ty = getType((Op >> 8) & 63);
Result->Arg1 = (Op >> 14) & 63;
Result->Arg2 = (Op >> 20) & 63;
Result->Arg3 = (Op >> 26) & 63;
break;
case 0:
Buf -= 4; // Hrm, try this again...
if (read_vbr(Buf, EndBuf, Result->Opcode))
throw std::string("Error reading from buffer.");
Result->Opcode >>= 2;
if (read_vbr(Buf, EndBuf, Typ))
throw std::string("Error reading from buffer.");
Result->Ty = getType(Typ);
if (Result->Ty == 0)
throw std::string("Invalid type read in instruction.");
if (read_vbr(Buf, EndBuf, Result->NumOperands))
throw std::string("Error reading from buffer.");
switch (Result->NumOperands) {
case 0:
throw std::string("Zero-argument instruction found; this is invalid.");
case 1:
if (read_vbr(Buf, EndBuf, Result->Arg1))
throw std::string("Error reading from buffer");
break;
case 2:
if (read_vbr(Buf, EndBuf, Result->Arg1) ||
read_vbr(Buf, EndBuf, Result->Arg2))
throw std::string("Error reading from buffer");
break;
case 3:
if (read_vbr(Buf, EndBuf, Result->Arg1) ||
read_vbr(Buf, EndBuf, Result->Arg2) ||
read_vbr(Buf, EndBuf, Result->Arg3))
throw std::string("Error reading from buffer");
break;
default:
if (read_vbr(Buf, EndBuf, Result->Arg1) ||
read_vbr(Buf, EndBuf, Result->Arg2))
throw std::string("Error reading from buffer");
// Allocate a vector to hold arguments 3, 4, 5, 6 ...
Result->VarArgs = new std::vector<unsigned>(Result->NumOperands-2);
for (unsigned a = 0; a < Result->NumOperands-2; a++)
if (read_vbr(Buf, EndBuf, (*Result->VarArgs)[a]))
throw std::string("Error reading from buffer");
break;
}
if (align32(Buf, EndBuf))
throw std::string("Unaligned bytecode buffer.");
break;
}
#if 0
std::cerr << "NO: " << Result->NumOperands << " opcode: " << Result->Opcode
<< " Ty: "<< Result->Ty->getDescription()<< " arg1: "<< Result->Arg1
<< " arg2: " << Result->Arg2 << " arg3: " << Result->Arg3 << "\n";
#endif
return Result;
}
bool BytecodeParser::ParseInstruction(const unsigned char *&Buf,
const unsigned char *EndBuf,
Instruction *&Res) {
std::auto_ptr<RawInst> Raw = ParseRawInst(Buf, EndBuf);
if (Raw->Opcode >= Instruction::BinaryOpsBegin &&
Raw->Opcode < Instruction::BinaryOpsEnd && Raw->NumOperands == 2) {
Res = BinaryOperator::create((Instruction::BinaryOps)Raw->Opcode,
getValue(Raw->Ty, Raw->Arg1),
getValue(Raw->Ty, Raw->Arg2));
return false;
}
Value *V;
switch (Raw->Opcode) {
case Instruction::VarArg:
case Instruction::Cast: {
V = getValue(Raw->Ty, Raw->Arg1);
const Type *Ty = getType(Raw->Arg2);
if (V == 0 || Ty == 0) { std::cerr << "Invalid cast!\n"; return true; }
if (Raw->Opcode == Instruction::Cast)
Res = new CastInst(V, Ty);
else
Res = new VarArgInst(V, Ty);
return false;
}
case Instruction::PHINode: {
PHINode *PN = new PHINode(Raw->Ty);
switch (Raw->NumOperands) {
case 0:
case 1:
case 3: std::cerr << "Invalid phi node encountered!\n";
delete PN;
return true;
case 2: PN->addIncoming(getValue(Raw->Ty, Raw->Arg1),
getBasicBlock(Raw->Arg2));
break;
default:
PN->addIncoming(getValue(Raw->Ty, Raw->Arg1),
getBasicBlock(Raw->Arg2));
if (Raw->VarArgs->size() & 1) {
std::cerr << "PHI Node with ODD number of arguments!\n";
delete PN;
return true;
} else {
std::vector<unsigned> &args = *Raw->VarArgs;
for (unsigned i = 0; i < args.size(); i+=2)
PN->addIncoming(getValue(Raw->Ty, args[i]), getBasicBlock(args[i+1]));
}
delete Raw->VarArgs;
break;
}
Res = PN;
return false;
}
case Instruction::Shl:
case Instruction::Shr:
Res = new ShiftInst((Instruction::OtherOps)Raw->Opcode,
getValue(Raw->Ty, Raw->Arg1),
getValue(Type::UByteTyID, Raw->Arg2));
return false;
case Instruction::Ret:
if (Raw->NumOperands == 0) {
Res = new ReturnInst(); return false;
} else if (Raw->NumOperands == 1) {
Res = new ReturnInst(getValue(Raw->Ty, Raw->Arg1)); return false;
}
break;
case Instruction::Br:
if (Raw->NumOperands == 1) {
Res = new BranchInst(getBasicBlock(Raw->Arg1));
return false;
} else if (Raw->NumOperands == 3) {
Res = new BranchInst(getBasicBlock(Raw->Arg1), getBasicBlock(Raw->Arg2),
getValue(Type::BoolTyID , Raw->Arg3));
return false;
}
break;
case Instruction::Switch: {
SwitchInst *I =
new SwitchInst(getValue(Raw->Ty, Raw->Arg1), getBasicBlock(Raw->Arg2));
Res = I;
if (Raw->NumOperands < 3) return false; // No destinations? Weird.
if (Raw->NumOperands == 3 || Raw->VarArgs->size() & 1) {
std::cerr << "Switch statement with odd number of arguments!\n";
delete I;
return true;
}
std::vector<unsigned> &args = *Raw->VarArgs;
for (unsigned i = 0; i < args.size(); i += 2)
I->addCase(cast<Constant>(getValue(Raw->Ty, args[i])),
getBasicBlock(args[i+1]));
delete Raw->VarArgs;
return false;
}
case Instruction::Call: {
Value *F = getValue(Raw->Ty, Raw->Arg1);
if (F == 0) return true;
// Check to make sure we have a pointer to method type
const PointerType *PTy = dyn_cast<PointerType>(F->getType());
if (PTy == 0) return true;
const FunctionType *FTy = dyn_cast<FunctionType>(PTy->getElementType());
if (FTy == 0) return true;
std::vector<Value *> Params;
const FunctionType::ParamTypes &PL = FTy->getParamTypes();
if (!FTy->isVarArg()) {
FunctionType::ParamTypes::const_iterator It = PL.begin();
switch (Raw->NumOperands) {
case 0: std::cerr << "Invalid call instruction encountered!\n";
return true;
case 1: break;
case 2: Params.push_back(getValue(*It++, Raw->Arg2)); break;
case 3: Params.push_back(getValue(*It++, Raw->Arg2));
if (It == PL.end()) return true;
Params.push_back(getValue(*It++, Raw->Arg3)); break;
default:
Params.push_back(getValue(*It++, Raw->Arg2));
{
std::vector<unsigned> &args = *Raw->VarArgs;
for (unsigned i = 0; i < args.size(); i++) {
if (It == PL.end()) return true;
Params.push_back(getValue(*It++, args[i]));
if (Params.back() == 0) return true;
}
}
delete Raw->VarArgs;
}
if (It != PL.end()) return true;
} else {
if (Raw->NumOperands > 2) {
std::vector<unsigned> &args = *Raw->VarArgs;
if (args.size() < 1) return true;
if ((args.size() & 1) != 0)
return true; // Must be pairs of type/value
for (unsigned i = 0; i < args.size(); i+=2) {
const Type *Ty = getType(args[i]);
if (Ty == 0)
return true;
Value *V = getValue(Ty, args[i+1]);
if (V == 0) return true;
Params.push_back(V);
}
delete Raw->VarArgs;
}
}
Res = new CallInst(F, Params);
return false;
}
case Instruction::Invoke: {
Value *F = getValue(Raw->Ty, Raw->Arg1);
if (F == 0) return true;
// Check to make sure we have a pointer to method type
const PointerType *PTy = dyn_cast<PointerType>(F->getType());
if (PTy == 0) return true;
const FunctionType *FTy = dyn_cast<FunctionType>(PTy->getElementType());
if (FTy == 0) return true;
std::vector<Value *> Params;
const FunctionType::ParamTypes &PL = FTy->getParamTypes();
std::vector<unsigned> &args = *Raw->VarArgs;
BasicBlock *Normal, *Except;
if (!FTy->isVarArg()) {
if (Raw->NumOperands < 3) return true;
Normal = getBasicBlock(Raw->Arg2);
if (Raw->NumOperands == 3)
Except = getBasicBlock(Raw->Arg3);
else {
Except = getBasicBlock(args[0]);
FunctionType::ParamTypes::const_iterator It = PL.begin();
for (unsigned i = 1; i < args.size(); i++) {
if (It == PL.end()) return true;
Params.push_back(getValue(*It++, args[i]));
if (Params.back() == 0) return true;
}
if (It != PL.end()) return true;
}
} else {
if (args.size() < 4) return true;
if (args[0] != Type::LabelTyID || args[2] != Type::LabelTyID)
return true;
Normal = getBasicBlock(args[1]);
Except = getBasicBlock(args[3]);
if ((args.size() & 1) != 0)
return true; // Must be pairs of type/value
for (unsigned i = 4; i < args.size(); i+=2) {
Params.push_back(getValue(args[i], args[i+1]));
if (Params.back() == 0) return true;
}
}
if (Raw->NumOperands > 3)
delete Raw->VarArgs;
Res = new InvokeInst(F, Normal, Except, Params);
return false;
}
case Instruction::Malloc:
if (Raw->NumOperands > 2) return true;
V = Raw->NumOperands ? getValue(Type::UIntTyID, Raw->Arg1) : 0;
if (const PointerType *PTy = dyn_cast<PointerType>(Raw->Ty))
Res = new MallocInst(PTy->getElementType(), V);
else
return true;
return false;
case Instruction::Alloca:
if (Raw->NumOperands > 2) return true;
V = Raw->NumOperands ? getValue(Type::UIntTyID, Raw->Arg1) : 0;
if (const PointerType *PTy = dyn_cast<PointerType>(Raw->Ty))
Res = new AllocaInst(PTy->getElementType(), V);
else
return true;
return false;
case Instruction::Free:
V = getValue(Raw->Ty, Raw->Arg1);
if (!isa<PointerType>(V->getType())) return true;
Res = new FreeInst(V);
return false;
case Instruction::GetElementPtr: {
std::vector<Value*> Idx;
if (!isa<PointerType>(Raw->Ty)) return true;
const CompositeType *TopTy = dyn_cast<CompositeType>(Raw->Ty);
switch (Raw->NumOperands) {
case 0: std::cerr << "Invalid getelementptr encountered!\n"; return true;
case 1: break;
case 2:
if (!TopTy) return true;
Idx.push_back(V = getValue(TopTy->getIndexType(), Raw->Arg2));
if (!V) return true;
break;
case 3: {
if (!TopTy) return true;
Idx.push_back(V = getValue(TopTy->getIndexType(), Raw->Arg2));
if (!V) return true;
const Type *ETy = GetElementPtrInst::getIndexedType(TopTy, Idx, true);
const CompositeType *ElTy = dyn_cast_or_null<CompositeType>(ETy);
if (!ElTy) return true;
Idx.push_back(V = getValue(ElTy->getIndexType(), Raw->Arg3));
if (!V) return true;
break;
}
default:
if (!TopTy) return true;
Idx.push_back(V = getValue(TopTy->getIndexType(), Raw->Arg2));
if (!V) return true;
std::vector<unsigned> &args = *Raw->VarArgs;
for (unsigned i = 0, E = args.size(); i != E; ++i) {
const Type *ETy = GetElementPtrInst::getIndexedType(Raw->Ty, Idx, true);
const CompositeType *ElTy = dyn_cast_or_null<CompositeType>(ETy);
if (!ElTy) return true;
Idx.push_back(V = getValue(ElTy->getIndexType(), args[i]));
if (!V) return true;
}
delete Raw->VarArgs;
break;
}
Res = new GetElementPtrInst(getValue(Raw->Ty, Raw->Arg1), Idx);
return false;
}
case 62: // volatile load
case Instruction::Load:
if (Raw->NumOperands != 1) return true;
if (!isa<PointerType>(Raw->Ty)) return true;
Res = new LoadInst(getValue(Raw->Ty, Raw->Arg1), "", Raw->Opcode == 62);
return false;
case 63: // volatile store
case Instruction::Store: {
if (!isa<PointerType>(Raw->Ty) || Raw->NumOperands != 2) return true;
Value *Ptr = getValue(Raw->Ty, Raw->Arg2);
const Type *ValTy = cast<PointerType>(Ptr->getType())->getElementType();
Res = new StoreInst(getValue(ValTy, Raw->Arg1), Ptr, Raw->Opcode == 63);
return false;
}
case Instruction::Unwind:
if (Raw->NumOperands != 0) return true;
Res = new UnwindInst();
return false;
} // end switch(Raw->Opcode)
std::cerr << "Unrecognized instruction! " << Raw->Opcode
<< " ADDR = 0x" << (void*)Buf << "\n";
return true;
}