diff --git a/include/llvm/MC/MCModuleYAML.h b/include/llvm/MC/MCModuleYAML.h new file mode 100644 index 00000000000..0ea29dafe45 --- /dev/null +++ b/include/llvm/MC/MCModuleYAML.h @@ -0,0 +1,41 @@ +//===- MCModuleYAML.h - MCModule YAMLIO implementation ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file declares classes for handling the YAML representation +/// of MCModule. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_MCMODULEYAML_H +#define LLVM_OBJECT_MCMODULEYAML_H + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCModule.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +class MCInstrInfo; +class MCRegisterInfo; + +/// \brief Dump a YAML representation of the MCModule \p MCM to \p OS. +/// \returns The empty string on success, an error message on failure. +StringRef mcmodule2yaml(raw_ostream &OS, const MCModule &MCM, + const MCInstrInfo &MII, const MCRegisterInfo &MRI); + +/// \brief Creates a new module and returns it in \p MCM. +/// \returns The empty string on success, an error message on failure. +StringRef yaml2mcmodule(OwningPtr &MCM, StringRef YamlContent, + const MCInstrInfo &MII, const MCRegisterInfo &MRI); + +} // end namespace llvm + +#endif diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index 89e2aaf48b8..fffe886a4a1 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -25,6 +25,7 @@ add_llvm_library(LLVMMC MCMachOStreamer.cpp MCMachObjectTargetWriter.cpp MCModule.cpp + MCModuleYAML.cpp MCNullStreamer.cpp MCObjectFileInfo.cpp MCObjectDisassembler.cpp diff --git a/lib/MC/MCModuleYAML.cpp b/lib/MC/MCModuleYAML.cpp new file mode 100644 index 00000000000..e2de57849b9 --- /dev/null +++ b/lib/MC/MCModuleYAML.cpp @@ -0,0 +1,461 @@ +//===- MCModuleYAML.cpp - MCModule YAMLIO implementation ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of MCModule. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCModuleYAML.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/MC/MCAtom.h" +#include "llvm/MC/MCFunction.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Object/YAML.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/YAMLTraits.h" +#include + +namespace llvm { + +namespace { + +// This class is used to map opcode and register names to enum values. +// +// There are at least 3 obvious ways to do this: +// 1- Generate an MII/MRI method using a tablegen StringMatcher +// 2- Write an MII/MRI method using std::lower_bound and the assumption that +// the enums are sorted (starting at a fixed value). +// 3- Do the matching manually as is done here. +// +// Why 3? +// 1- A StringMatcher function for thousands of entries would incur +// a non-negligible binary size overhead. +// 2- The lower_bound comparators would be somewhat involved and aren't +// obviously reusable (see LessRecordRegister in llvm/TableGen/Record.h) +// 3- This isn't actually something useful outside tests (but the same argument +// can be made against having {MII,MRI}::getName). +// +// If this becomes useful outside this specific situation, feel free to do +// the Right Thing (tm) and move the functionality to MII/MRI. +// +class InstrRegInfoHolder { + typedef StringMap EnumValByNameTy; + EnumValByNameTy InstEnumValueByName; + EnumValByNameTy RegEnumValueByName; + +public: + const MCInstrInfo &MII; + const MCRegisterInfo &MRI; + InstrRegInfoHolder(const MCInstrInfo &MII, const MCRegisterInfo &MRI) + : InstEnumValueByName(NextPowerOf2(MII.getNumOpcodes())), + RegEnumValueByName(NextPowerOf2(MRI.getNumRegs())), MII(MII), MRI(MRI) { + for (int i = 0, e = MII.getNumOpcodes(); i != e; ++i) + InstEnumValueByName[MII.getName(i)] = i; + for (int i = 0, e = MRI.getNumRegs(); i != e; ++i) + RegEnumValueByName[MRI.getName(i)] = i; + } + + bool matchRegister(StringRef Name, unsigned &Reg) { + EnumValByNameTy::const_iterator It = RegEnumValueByName.find(Name); + if (It == RegEnumValueByName.end()) + return false; + Reg = It->getValue(); + return true; + } + bool matchOpcode(StringRef Name, unsigned &Opc) { + EnumValByNameTy::const_iterator It = InstEnumValueByName.find(Name); + if (It == InstEnumValueByName.end()) + return false; + Opc = It->getValue(); + return true; + } +}; + +} // end unnamed namespace + +namespace MCModuleYAML { + +LLVM_YAML_STRONG_TYPEDEF(unsigned, OpcodeEnum) + +struct Operand { + MCOperand MCOp; +}; + +struct Inst { + OpcodeEnum Opcode; + std::vector Operands; + uint64_t Size; +}; + +struct Atom { + MCAtom::AtomKind Type; + yaml::Hex64 StartAddress; + uint64_t Size; + + std::vector Insts; + object::yaml::BinaryRef Data; +}; + +struct BasicBlock { + yaml::Hex64 Address; + std::vector Preds; + std::vector Succs; +}; + +struct Function { + StringRef Name; + std::vector BasicBlocks; +}; + +struct Module { + std::vector Atoms; + std::vector Functions; +}; + +} // end namespace MCModuleYAML +} // end namespace llvm + +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::Hex64) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::MCModuleYAML::Operand) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Inst) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Atom) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::BasicBlock) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Function) + +namespace llvm { + +namespace yaml { + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &IO, MCAtom::AtomKind &Kind); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, MCModuleYAML::Atom &A); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, MCModuleYAML::Inst &I); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, MCModuleYAML::BasicBlock &BB); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, MCModuleYAML::Function &Fn); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, MCModuleYAML::Module &M); +}; + +template <> struct ScalarTraits { + static void output(const MCModuleYAML::Operand &, void *, + llvm::raw_ostream &); + static StringRef input(StringRef, void *, MCModuleYAML::Operand &); +}; + +template <> struct ScalarTraits { + static void output(const MCModuleYAML::OpcodeEnum &, void *, + llvm::raw_ostream &); + static StringRef input(StringRef, void *, MCModuleYAML::OpcodeEnum &); +}; + +void ScalarEnumerationTraits::enumeration( + IO &IO, MCAtom::AtomKind &Value) { + IO.enumCase(Value, "Text", MCAtom::TextAtom); + IO.enumCase(Value, "Data", MCAtom::DataAtom); +} + +void MappingTraits::mapping(IO &IO, MCModuleYAML::Atom &A) { + IO.mapRequired("StartAddress", A.StartAddress); + IO.mapRequired("Size", A.Size); + IO.mapRequired("Type", A.Type); + if (A.Type == MCAtom::TextAtom) + IO.mapRequired("Content", A.Insts); + else if (A.Type == MCAtom::DataAtom) + IO.mapRequired("Content", A.Data); +} + +void MappingTraits::mapping(IO &IO, MCModuleYAML::Inst &I) { + IO.mapRequired("Inst", I.Opcode); + IO.mapRequired("Size", I.Size); + IO.mapRequired("Ops", I.Operands); +} + +void +MappingTraits::mapping(IO &IO, + MCModuleYAML::BasicBlock &BB) { + IO.mapRequired("Address", BB.Address); + IO.mapRequired("Preds", BB.Preds); + IO.mapRequired("Succs", BB.Succs); +} + +void MappingTraits::mapping(IO &IO, + MCModuleYAML::Function &F) { + IO.mapRequired("Name", F.Name); + IO.mapRequired("BasicBlocks", F.BasicBlocks); +} + +void MappingTraits::mapping(IO &IO, + MCModuleYAML::Module &M) { + IO.mapRequired("Atoms", M.Atoms); + IO.mapOptional("Functions", M.Functions); +} + +void +ScalarTraits::output(const MCModuleYAML::Operand &Val, + void *Ctx, raw_ostream &Out) { + InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx; + + // FIXME: Doesn't support FPImm and expr/inst, but do these make sense? + if (Val.MCOp.isImm()) + Out << "I" << Val.MCOp.getImm(); + else if (Val.MCOp.isReg()) + Out << "R" << IRI->MRI.getName(Val.MCOp.getReg()); + else + llvm_unreachable("Trying to output invalid MCOperand!"); +} + +StringRef +ScalarTraits::input(StringRef Scalar, void *Ctx, + MCModuleYAML::Operand &Val) { + InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx; + char Type = 0; + if (Scalar.size() >= 1) + Type = Scalar.front(); + if (Type != 'R' && Type != 'I') + return "Operand must start with 'R' (register) or 'I' (immediate)."; + if (Type == 'R') { + unsigned Reg; + if (!IRI->matchRegister(Scalar.substr(1), Reg)) + return "Invalid register name."; + Val.MCOp = MCOperand::CreateReg(Reg); + } else if (Type == 'I') { + int64_t RIVal; + if (Scalar.substr(1).getAsInteger(10, RIVal)) + return "Invalid immediate value."; + Val.MCOp = MCOperand::CreateImm(RIVal); + } else { + Val.MCOp = MCOperand(); + } + return StringRef(); +} + +void ScalarTraits::output( + const MCModuleYAML::OpcodeEnum &Val, void *Ctx, raw_ostream &Out) { + InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx; + Out << IRI->MII.getName(Val); +} + +StringRef +ScalarTraits::input(StringRef Scalar, void *Ctx, + MCModuleYAML::OpcodeEnum &Val) { + InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx; + unsigned Opc; + if (!IRI->matchOpcode(Scalar, Opc)) + return "Invalid instruction opcode."; + Val = Opc; + return ""; +} + +} // end namespace yaml + +namespace { + +class MCModule2YAML { + const MCModule &MCM; + MCModuleYAML::Module YAMLModule; + void dumpAtom(const MCAtom *MCA); + void dumpFunction(const MCFunction *MCF); + void dumpBasicBlock(const MCBasicBlock *MCBB); + +public: + MCModule2YAML(const MCModule &MCM); + MCModuleYAML::Module &getYAMLModule(); +}; + +class YAML2MCModule { + MCModule &MCM; + +public: + YAML2MCModule(MCModule &MCM); + StringRef parse(const MCModuleYAML::Module &YAMLModule); +}; + +} // end unnamed namespace + +MCModule2YAML::MCModule2YAML(const MCModule &MCM) : MCM(MCM), YAMLModule() { + for (MCModule::const_atom_iterator AI = MCM.atom_begin(), AE = MCM.atom_end(); + AI != AE; ++AI) + dumpAtom(*AI); + for (MCModule::const_func_iterator FI = MCM.func_begin(), FE = MCM.func_end(); + FI != FE; ++FI) + dumpFunction(*FI); +} + +void MCModule2YAML::dumpAtom(const MCAtom *MCA) { + YAMLModule.Atoms.resize(YAMLModule.Atoms.size() + 1); + MCModuleYAML::Atom &A = YAMLModule.Atoms.back(); + A.Type = MCA->getKind(); + A.StartAddress = MCA->getBeginAddr(); + A.Size = MCA->getEndAddr() - MCA->getBeginAddr() + 1; + if (const MCTextAtom *TA = dyn_cast(MCA)) { + const size_t InstCount = TA->size(); + A.Insts.resize(InstCount); + for (size_t i = 0; i != InstCount; ++i) { + const MCDecodedInst &MCDI = TA->at(i); + A.Insts[i].Opcode = MCDI.Inst.getOpcode(); + A.Insts[i].Size = MCDI.Size; + const unsigned OpCount = MCDI.Inst.getNumOperands(); + A.Insts[i].Operands.resize(OpCount); + for (unsigned oi = 0; oi != OpCount; ++oi) + A.Insts[i].Operands[oi].MCOp = MCDI.Inst.getOperand(oi); + } + } else if (const MCDataAtom *DA = dyn_cast(MCA)) { + A.Data = DA->getData(); + } else { + llvm_unreachable("Unknown atom type."); + } +} + +void MCModule2YAML::dumpFunction(const MCFunction *MCF) { + YAMLModule.Functions.resize(YAMLModule.Functions.size() + 1); + MCModuleYAML::Function &F = YAMLModule.Functions.back(); + F.Name = MCF->getName(); + for (MCFunction::const_iterator BBI = MCF->begin(), BBE = MCF->end(); + BBI != BBE; ++BBI) { + const MCBasicBlock *MCBB = *BBI; + F.BasicBlocks.resize(F.BasicBlocks.size() + 1); + MCModuleYAML::BasicBlock &BB = F.BasicBlocks.back(); + BB.Address = MCBB->getInsts()->getBeginAddr(); + for (MCBasicBlock::pred_const_iterator PI = MCBB->pred_begin(), + PE = MCBB->pred_end(); + PI != PE; ++PI) + BB.Preds.push_back((*PI)->getInsts()->getBeginAddr()); + for (MCBasicBlock::succ_const_iterator SI = MCBB->succ_begin(), + SE = MCBB->succ_end(); + SI != SE; ++SI) + BB.Succs.push_back((*SI)->getInsts()->getBeginAddr()); + } +} + +MCModuleYAML::Module &MCModule2YAML::getYAMLModule() { return YAMLModule; } + +YAML2MCModule::YAML2MCModule(MCModule &MCM) : MCM(MCM) {} + +StringRef YAML2MCModule::parse(const MCModuleYAML::Module &YAMLModule) { + typedef std::vector::const_iterator AtomIt; + typedef std::vector::const_iterator InstIt; + typedef std::vector::const_iterator OpIt; + + typedef DenseMap AddrToTextAtomTy; + AddrToTextAtomTy TAByAddr; + + for (AtomIt AI = YAMLModule.Atoms.begin(), AE = YAMLModule.Atoms.end(); + AI != AE; ++AI) { + uint64_t StartAddress = AI->StartAddress; + if (AI->Size == 0) + return "Atoms can't be empty!"; + uint64_t EndAddress = StartAddress + AI->Size - 1; + switch (AI->Type) { + case MCAtom::TextAtom: { + MCTextAtom *TA = MCM.createTextAtom(StartAddress, EndAddress); + TAByAddr[StartAddress] = TA; + for (InstIt II = AI->Insts.begin(), IE = AI->Insts.end(); II != IE; + ++II) { + MCInst MI; + MI.setOpcode(II->Opcode); + for (OpIt OI = II->Operands.begin(), OE = II->Operands.end(); OI != OE; + ++OI) + MI.addOperand(OI->MCOp); + TA->addInst(MI, II->Size); + } + break; + } + case MCAtom::DataAtom: { + MCDataAtom *DA = MCM.createDataAtom(StartAddress, EndAddress); + SmallVector Data; + raw_svector_ostream OS(Data); + AI->Data.writeAsBinary(OS); + OS.flush(); + for (size_t i = 0, e = Data.size(); i != e; ++i) + DA->addData((uint8_t)Data[i]); + break; + } + } + } + + typedef std::vector::const_iterator FuncIt; + typedef std::vector::const_iterator BBIt; + typedef std::vector::const_iterator AddrIt; + for (FuncIt FI = YAMLModule.Functions.begin(), + FE = YAMLModule.Functions.end(); + FI != FE; ++FI) { + MCFunction *MCFN = MCM.createFunction(FI->Name); + for (BBIt BBI = FI->BasicBlocks.begin(), BBE = FI->BasicBlocks.end(); + BBI != BBE; ++BBI) { + AddrToTextAtomTy::const_iterator It = TAByAddr.find(BBI->Address); + if (It == TAByAddr.end()) + return "Basic block start address doesn't match any text atom!"; + MCFN->createBlock(*It->second); + } + for (BBIt BBI = FI->BasicBlocks.begin(), BBE = FI->BasicBlocks.end(); + BBI != BBE; ++BBI) { + MCBasicBlock *MCBB = MCFN->find(BBI->Address); + if (!MCBB) + return "Couldn't find matching basic block in function."; + for (AddrIt PI = BBI->Preds.begin(), PE = BBI->Preds.end(); PI != PE; + ++PI) { + MCBasicBlock *Pred = MCFN->find(*PI); + if (!Pred) + return "Couldn't find predecessor basic block."; + MCBB->addPredecessor(Pred); + } + for (AddrIt SI = BBI->Succs.begin(), SE = BBI->Succs.end(); SI != SE; + ++SI) { + MCBasicBlock *Succ = MCFN->find(*SI); + if (!Succ) + return "Couldn't find predecessor basic block."; + MCBB->addSuccessor(Succ); + } + } + } + return ""; +} + +StringRef mcmodule2yaml(raw_ostream &OS, const MCModule &MCM, + const MCInstrInfo &MII, const MCRegisterInfo &MRI) { + MCModule2YAML Dumper(MCM); + InstrRegInfoHolder IRI(MII, MRI); + yaml::Output YOut(OS, (void *)&IRI); + YOut << Dumper.getYAMLModule(); + return ""; +} + +StringRef yaml2mcmodule(OwningPtr &MCM, StringRef YamlContent, + const MCInstrInfo &MII, const MCRegisterInfo &MRI) { + MCM.reset(new MCModule); + YAML2MCModule Parser(*MCM); + MCModuleYAML::Module YAMLModule; + InstrRegInfoHolder IRI(MII, MRI); + yaml::Input YIn(YamlContent, (void *)&IRI); + YIn >> YAMLModule; + if (error_code ec = YIn.error()) + return ec.message(); + StringRef err = Parser.parse(YAMLModule); + if (!err.empty()) + return err; + return ""; +} + +} // end namespace llvm diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp index 122ac833987..a4cd6a2f731 100644 --- a/tools/llvm-objdump/llvm-objdump.cpp +++ b/tools/llvm-objdump/llvm-objdump.cpp @@ -31,6 +31,7 @@ #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCModule.h" +#include "llvm/MC/MCModuleYAML.h" #include "llvm/MC/MCObjectDisassembler.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectSymbolizer.h" @@ -61,6 +62,7 @@ #include #include #include + using namespace llvm; using namespace object; @@ -139,6 +141,12 @@ static cl::opt CFG("cfg", cl::desc("Create a CFG for every function found in the object" " and write it to a graphviz file")); +// FIXME: Does it make sense to have a dedicated tool for yaml cfg output? +static cl::opt +YAMLCFG("yaml-cfg", + cl::desc("Create a CFG and write it as a YAML MCModule."), + cl::value_desc("yaml output file")); + static StringRef ToolName; bool llvm::error(error_code ec) { @@ -178,6 +186,7 @@ static const Target *getTarget(const ObjectFile *Obj = NULL) { } // Write a graphviz file for the CFG inside an MCFunction. +// FIXME: Use GraphWriter static void emitDOTFile(const char *FileName, const MCFunction &f, MCInstPrinter *IP) { // Start a new dot file. @@ -333,7 +342,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { return; } - if (CFG) { + if (CFG || !YAMLCFG.empty()) { OwningPtr OD( new MCObjectDisassembler(*Obj, *DisAsm, *MIA)); OwningPtr Mod(OD->buildModule(/* withCFG */ true)); @@ -350,14 +359,25 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { } } } - for (MCModule::const_func_iterator FI = Mod->func_begin(), - FE = Mod->func_end(); - FI != FE; ++FI) { - static int filenum = 0; - emitDOTFile((Twine((*FI)->getName()) + "_" + - utostr(filenum) + ".dot").str().c_str(), - **FI, IP.get()); - ++filenum; + if (CFG) { + for (MCModule::const_func_iterator FI = Mod->func_begin(), + FE = Mod->func_end(); + FI != FE; ++FI) { + static int filenum = 0; + emitDOTFile((Twine((*FI)->getName()) + "_" + + utostr(filenum) + ".dot").str().c_str(), + **FI, IP.get()); + ++filenum; + } + } + if (!YAMLCFG.empty()) { + std::string Error; + raw_fd_ostream YAMLOut(YAMLCFG.c_str(), Error); + if (!Error.empty()) { + errs() << "llvm-objdump: warning: " << Error << '\n'; + return; + } + mcmodule2yaml(YAMLOut, *Mod, *MII, *MRI); } }