llvm-6502/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp

//===-- BPFMCCodeEmitter.cpp - Convert BPF code to machine code -----------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the BPFMCCodeEmitter class.
//
//===----------------------------------------------------------------------===//

#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;

#define DEBUG_TYPE "mccodeemitter"

namespace {
class BPFMCCodeEmitter : public MCCodeEmitter {
  BPFMCCodeEmitter(const BPFMCCodeEmitter &) = delete;
  void operator=(const BPFMCCodeEmitter &) = delete;
  const MCRegisterInfo &MRI;

public:
  BPFMCCodeEmitter(const MCRegisterInfo &mri) : MRI(mri) {}

  ~BPFMCCodeEmitter() {}

  // getBinaryCodeForInstr - TableGen'erated function for getting the
  // binary encoding for an instruction.
  uint64_t getBinaryCodeForInstr(const MCInst &MI,
                                 SmallVectorImpl<MCFixup> &Fixups,
                                 const MCSubtargetInfo &STI) const;

  // getMachineOpValue - Return binary encoding of operand. If the machin
  // operand requires relocation, record the relocation and return zero.
  unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
                             SmallVectorImpl<MCFixup> &Fixups,
                             const MCSubtargetInfo &STI) const;

  uint64_t getMemoryOpValue(const MCInst &MI, unsigned Op,
                            SmallVectorImpl<MCFixup> &Fixups,
                            const MCSubtargetInfo &STI) const;

  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                         SmallVectorImpl<MCFixup> &Fixups,
                         const MCSubtargetInfo &STI) const override;
};
}

MCCodeEmitter *llvm::createBPFMCCodeEmitter(const MCInstrInfo &MCII,
                                            const MCRegisterInfo &MRI,
                                            MCContext &Ctx) {
  return new BPFMCCodeEmitter(MRI);
}

unsigned BPFMCCodeEmitter::getMachineOpValue(const MCInst &MI,
                                             const MCOperand &MO,
                                             SmallVectorImpl<MCFixup> &Fixups,
                                             const MCSubtargetInfo &STI) const {
  if (MO.isReg())
    return MRI.getEncodingValue(MO.getReg());
  if (MO.isImm())
    return static_cast<unsigned>(MO.getImm());

  assert(MO.isExpr());

  const MCExpr *Expr = MO.getExpr();
  MCExpr::ExprKind Kind = Expr->getKind();

  assert(Kind == MCExpr::SymbolRef);

  if (MI.getOpcode() == BPF::JAL)
    // func call name
    Fixups.push_back(MCFixup::Create(0, Expr, FK_SecRel_4));
  else if (MI.getOpcode() == BPF::LD_imm64)
    Fixups.push_back(MCFixup::Create(0, Expr, FK_SecRel_8));
  else
    // bb label
    Fixups.push_back(MCFixup::Create(0, Expr, FK_PCRel_2));

  return 0;
}

// Emit one byte through output stream
void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) {
  OS << (char)C;
  ++CurByte;
}

// Emit a series of bytes (little endian)
void EmitLEConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
                    raw_ostream &OS) {
  assert(Size <= 8 && "size too big in emit constant");

  for (unsigned i = 0; i != Size; ++i) {
    EmitByte(Val & 255, CurByte, OS);
    Val >>= 8;
  }
}

// Emit a series of bytes (big endian)
void EmitBEConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
                    raw_ostream &OS) {
  assert(Size <= 8 && "size too big in emit constant");

  for (int i = (Size - 1) * 8; i >= 0; i -= 8)
    EmitByte((Val >> i) & 255, CurByte, OS);
}

void BPFMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                                         SmallVectorImpl<MCFixup> &Fixups,
                                         const MCSubtargetInfo &STI) const {
  unsigned Opcode = MI.getOpcode();
  // Keep track of the current byte being emitted
  unsigned CurByte = 0;

  if (Opcode == BPF::LD_imm64) {
    uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI);
    EmitByte(Value >> 56, CurByte, OS);
    EmitByte(((Value >> 48) & 0xff), CurByte, OS);
    EmitLEConstant(0, 2, CurByte, OS);
    EmitLEConstant(Value & 0xffffFFFF, 4, CurByte, OS);

    const MCOperand &MO = MI.getOperand(1);
    uint64_t Imm = MO.isImm() ? MO.getImm() : 0;
    EmitByte(0, CurByte, OS);
    EmitByte(0, CurByte, OS);
    EmitLEConstant(0, 2, CurByte, OS);
    EmitLEConstant(Imm >> 32, 4, CurByte, OS);
  } else {
    // Get instruction encoding and emit it
    uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI);
    EmitByte(Value >> 56, CurByte, OS);
    EmitByte((Value >> 48) & 0xff, CurByte, OS);
    EmitLEConstant((Value >> 32) & 0xffff, 2, CurByte, OS);
    EmitLEConstant(Value & 0xffffFFFF, 4, CurByte, OS);
  }
}

// Encode BPF Memory Operand
uint64_t BPFMCCodeEmitter::getMemoryOpValue(const MCInst &MI, unsigned Op,
                                            SmallVectorImpl<MCFixup> &Fixups,
                                            const MCSubtargetInfo &STI) const {
  uint64_t Encoding;
  const MCOperand Op1 = MI.getOperand(1);
  assert(Op1.isReg() && "First operand is not register.");
  Encoding = MRI.getEncodingValue(Op1.getReg());
  Encoding <<= 16;
  MCOperand Op2 = MI.getOperand(2);
  assert(Op2.isImm() && "Second operand is not immediate.");
  Encoding |= Op2.getImm() & 0xffff;
  return Encoding;
}

#include "BPFGenMCCodeEmitter.inc"
BPF backend Summary: V8->V9: - cleanup tests V7->V8: - addressed feedback from David: - switched to range-based 'for' loops - fixed formatting of tests V6->V7: - rebased and adjusted AsmPrinter args - CamelCased .td, fixed formatting, cleaned up names, removed unused patterns - diffstat: 3 files changed, 203 insertions(+), 227 deletions(-) V5->V6: - addressed feedback from Chandler: - reinstated full verbose standard banner in all files - fixed variables that were not in CamelCase - fixed names of #ifdef in header files - removed redundant braces in if/else chains with single statements - fixed comments - removed trailing empty line - dropped debug annotations from tests - diffstat of these changes: 46 files changed, 456 insertions(+), 469 deletions(-) V4->V5: - fix setLoadExtAction() interface - clang-formated all where it made sense V3->V4: - added CODE_OWNERS entry for BPF backend V2->V3: - fix metadata in tests V1->V2: - addressed feedback from Tom and Matt - removed top level change to configure (now everything via 'experimental-backend') - reworked error reporting via DiagnosticInfo (similar to R600) - added few more tests - added cmake build - added Triple::bpf - tested on linux and darwin V1 cover letter: --------------------- recently linux gained "universal in-kernel virtual machine" which is called eBPF or extended BPF. The name comes from "Berkeley Packet Filter", since new instruction set is based on it. This patch adds a new backend that emits extended BPF instruction set. The concept and development are covered by the following articles: http://lwn.net/Articles/599755/ http://lwn.net/Articles/575531/ http://lwn.net/Articles/603983/ http://lwn.net/Articles/606089/ http://lwn.net/Articles/612878/ One of use cases: dtrace/systemtap alternative. bpf syscall manpage: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=b4fc1a460f3017e958e6a8ea560ea0afd91bf6fe instruction set description and differences vs classic BPF: http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/networking/filter.txt Short summary of instruction set: - 64-bit registers R0 - return value from in-kernel function, and exit value for BPF program R1 - R5 - arguments from BPF program to in-kernel function R6 - R9 - callee saved registers that in-kernel function will preserve R10 - read-only frame pointer to access stack - two-operand instructions like +, -, *, mov, load/store - implicit prologue/epilogue (invisible stack pointer) - no floating point, no simd Short history of extended BPF in kernel: interpreter in 3.15, x64 JIT in 3.16, arm64 JIT, verifier, bpf syscall in 3.18, more to come in the future. It's a very small and simple backend. There is no support for global variables, arbitrary function calls, floating point, varargs, exceptions, indirect jumps, arbitrary pointer arithmetic, alloca, etc. From C front-end point of view it's very restricted. It's done on purpose, since kernel rejects all programs that it cannot prove safe. It rejects programs with loops and with memory accesses via arbitrary pointers. When kernel accepts the program it is guaranteed that program will terminate and will not crash the kernel. This patch implements all 'must have' bits. There are several things on TODO list, so this is not the end of development. Most of the code is a boiler plate code, copy-pasted from other backends. Only odd things are lack or < and <= instructions, specialized load_byte intrinsics and 'compare and goto' as single instruction. Current instruction set is fixed, but more instructions can be added in the future. Signed-off-by: Alexei Starovoitov <alexei.starovoitov@gmail.com> Subscribers: majnemer, chandlerc, echristo, joerg, pete, rengolin, kristof.beyls, arsenm, t.p.northover, tstellarAMD, aemerson, llvm-commits Differential Revision: http://reviews.llvm.org/D6494 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227008 91177308-0d34-0410-b5e6-96231b3b80d8 2015-01-24 17:51:26 +00:00			`//===-- BPFMCCodeEmitter.cpp - Convert BPF code to machine code -----------===//`
			`//`
			`// The LLVM Compiler Infrastructure`
			`//`
			`// This file is distributed under the University of Illinois Open Source`
			`// License. See LICENSE.TXT for details.`
			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`// This file implements the BPFMCCodeEmitter class.`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#include "MCTargetDesc/BPFMCTargetDesc.h"`
			`#include "llvm/MC/MCCodeEmitter.h"`
			`#include "llvm/MC/MCFixup.h"`
			`#include "llvm/MC/MCInst.h"`
			`#include "llvm/MC/MCInstrInfo.h"`
			`#include "llvm/MC/MCRegisterInfo.h"`
			`#include "llvm/MC/MCSubtargetInfo.h"`
			`#include "llvm/MC/MCSymbol.h"`
			`#include "llvm/ADT/Statistic.h"`
			`#include "llvm/Support/raw_ostream.h"`
			`using namespace llvm;`

			`#define DEBUG_TYPE "mccodeemitter"`

			`namespace {`
			`class BPFMCCodeEmitter : public MCCodeEmitter {`
Removing LLVM_DELETED_FUNCTION, as MSVC 2012 was the last reason for requiring the macro. NFC; LLVM edition. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229340 91177308-0d34-0410-b5e6-96231b3b80d8 2015-02-15 22:54:22 +00:00			`BPFMCCodeEmitter(const BPFMCCodeEmitter &) = delete;`
			`void operator=(const BPFMCCodeEmitter &) = delete;`
BPF backend Summary: V8->V9: - cleanup tests V7->V8: - addressed feedback from David: - switched to range-based 'for' loops - fixed formatting of tests V6->V7: - rebased and adjusted AsmPrinter args - CamelCased .td, fixed formatting, cleaned up names, removed unused patterns - diffstat: 3 files changed, 203 insertions(+), 227 deletions(-) V5->V6: - addressed feedback from Chandler: - reinstated full verbose standard banner in all files - fixed variables that were not in CamelCase - fixed names of #ifdef in header files - removed redundant braces in if/else chains with single statements - fixed comments - removed trailing empty line - dropped debug annotations from tests - diffstat of these changes: 46 files changed, 456 insertions(+), 469 deletions(-) V4->V5: - fix setLoadExtAction() interface - clang-formated all where it made sense V3->V4: - added CODE_OWNERS entry for BPF backend V2->V3: - fix metadata in tests V1->V2: - addressed feedback from Tom and Matt - removed top level change to configure (now everything via 'experimental-backend') - reworked error reporting via DiagnosticInfo (similar to R600) - added few more tests - added cmake build - added Triple::bpf - tested on linux and darwin V1 cover letter: --------------------- recently linux gained "universal in-kernel virtual machine" which is called eBPF or extended BPF. The name comes from "Berkeley Packet Filter", since new instruction set is based on it. This patch adds a new backend that emits extended BPF instruction set. The concept and development are covered by the following articles: http://lwn.net/Articles/599755/ http://lwn.net/Articles/575531/ http://lwn.net/Articles/603983/ http://lwn.net/Articles/606089/ http://lwn.net/Articles/612878/ One of use cases: dtrace/systemtap alternative. bpf syscall manpage: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=b4fc1a460f3017e958e6a8ea560ea0afd91bf6fe instruction set description and differences vs classic BPF: http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/networking/filter.txt Short summary of instruction set: - 64-bit registers R0 - return value from in-kernel function, and exit value for BPF program R1 - R5 - arguments from BPF program to in-kernel function R6 - R9 - callee saved registers that in-kernel function will preserve R10 - read-only frame pointer to access stack - two-operand instructions like +, -, *, mov, load/store - implicit prologue/epilogue (invisible stack pointer) - no floating point, no simd Short history of extended BPF in kernel: interpreter in 3.15, x64 JIT in 3.16, arm64 JIT, verifier, bpf syscall in 3.18, more to come in the future. It's a very small and simple backend. There is no support for global variables, arbitrary function calls, floating point, varargs, exceptions, indirect jumps, arbitrary pointer arithmetic, alloca, etc. From C front-end point of view it's very restricted. It's done on purpose, since kernel rejects all programs that it cannot prove safe. It rejects programs with loops and with memory accesses via arbitrary pointers. When kernel accepts the program it is guaranteed that program will terminate and will not crash the kernel. This patch implements all 'must have' bits. There are several things on TODO list, so this is not the end of development. Most of the code is a boiler plate code, copy-pasted from other backends. Only odd things are lack or < and <= instructions, specialized load_byte intrinsics and 'compare and goto' as single instruction. Current instruction set is fixed, but more instructions can be added in the future. Signed-off-by: Alexei Starovoitov <alexei.starovoitov@gmail.com> Subscribers: majnemer, chandlerc, echristo, joerg, pete, rengolin, kristof.beyls, arsenm, t.p.northover, tstellarAMD, aemerson, llvm-commits Differential Revision: http://reviews.llvm.org/D6494 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227008 91177308-0d34-0410-b5e6-96231b3b80d8 2015-01-24 17:51:26 +00:00			`const MCRegisterInfo &MRI;`

			`public:`
			`BPFMCCodeEmitter(const MCRegisterInfo &mri) : MRI(mri) {}`

			`~BPFMCCodeEmitter() {}`

			`// getBinaryCodeForInstr - TableGen'erated function for getting the`
			`// binary encoding for an instruction.`
			`uint64_t getBinaryCodeForInstr(const MCInst &MI,`
			`SmallVectorImpl<MCFixup> &Fixups,`
			`const MCSubtargetInfo &STI) const;`

			`// getMachineOpValue - Return binary encoding of operand. If the machin`
			`// operand requires relocation, record the relocation and return zero.`
			`unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,`
			`SmallVectorImpl<MCFixup> &Fixups,`
			`const MCSubtargetInfo &STI) const;`

			`uint64_t getMemoryOpValue(const MCInst &MI, unsigned Op,`
			`SmallVectorImpl<MCFixup> &Fixups,`
			`const MCSubtargetInfo &STI) const;`

			`void EncodeInstruction(const MCInst &MI, raw_ostream &OS,`
			`SmallVectorImpl<MCFixup> &Fixups,`
			`const MCSubtargetInfo &STI) const override;`
			`};`
			`}`

			`MCCodeEmitter *llvm::createBPFMCCodeEmitter(const MCInstrInfo &MCII,`
			`const MCRegisterInfo &MRI,`
			`MCContext &Ctx) {`
			`return new BPFMCCodeEmitter(MRI);`
			`}`

			`unsigned BPFMCCodeEmitter::getMachineOpValue(const MCInst &MI,`
			`const MCOperand &MO,`
			`SmallVectorImpl<MCFixup> &Fixups,`
			`const MCSubtargetInfo &STI) const {`
			`if (MO.isReg())`
			`return MRI.getEncodingValue(MO.getReg());`
			`if (MO.isImm())`
			`return static_cast<unsigned>(MO.getImm());`

			`assert(MO.isExpr());`

			`const MCExpr *Expr = MO.getExpr();`
			`MCExpr::ExprKind Kind = Expr->getKind();`

			`assert(Kind == MCExpr::SymbolRef);`

			`if (MI.getOpcode() == BPF::JAL)`
			`// func call name`
			`Fixups.push_back(MCFixup::Create(0, Expr, FK_SecRel_4));`
			`else if (MI.getOpcode() == BPF::LD_imm64)`
			`Fixups.push_back(MCFixup::Create(0, Expr, FK_SecRel_8));`
			`else`
			`// bb label`
			`Fixups.push_back(MCFixup::Create(0, Expr, FK_PCRel_2));`

			`return 0;`
			`}`

			`// Emit one byte through output stream`
			`void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) {`
			`OS << (char)C;`
			`++CurByte;`
			`}`

			`// Emit a series of bytes (little endian)`
			`void EmitLEConstant(uint64_t Val, unsigned Size, unsigned &CurByte,`
			`raw_ostream &OS) {`
			`assert(Size <= 8 && "size too big in emit constant");`

			`for (unsigned i = 0; i != Size; ++i) {`
			`EmitByte(Val & 255, CurByte, OS);`
			`Val >>= 8;`
			`}`
			`}`

			`// Emit a series of bytes (big endian)`
			`void EmitBEConstant(uint64_t Val, unsigned Size, unsigned &CurByte,`
			`raw_ostream &OS) {`
			`assert(Size <= 8 && "size too big in emit constant");`

			`for (int i = (Size - 1) * 8; i >= 0; i -= 8)`
			`EmitByte((Val >> i) & 255, CurByte, OS);`
			`}`

			`void BPFMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,`
			`SmallVectorImpl<MCFixup> &Fixups,`
			`const MCSubtargetInfo &STI) const {`
			`unsigned Opcode = MI.getOpcode();`
			`// Keep track of the current byte being emitted`
			`unsigned CurByte = 0;`

			`if (Opcode == BPF::LD_imm64) {`
			`uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI);`
			`EmitByte(Value >> 56, CurByte, OS);`
			`EmitByte(((Value >> 48) & 0xff), CurByte, OS);`
			`EmitLEConstant(0, 2, CurByte, OS);`
			`EmitLEConstant(Value & 0xffffFFFF, 4, CurByte, OS);`

			`const MCOperand &MO = MI.getOperand(1);`
			`uint64_t Imm = MO.isImm() ? MO.getImm() : 0;`
			`EmitByte(0, CurByte, OS);`
			`EmitByte(0, CurByte, OS);`
			`EmitLEConstant(0, 2, CurByte, OS);`
			`EmitLEConstant(Imm >> 32, 4, CurByte, OS);`
			`} else {`
			`// Get instruction encoding and emit it`
			`uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI);`
			`EmitByte(Value >> 56, CurByte, OS);`
			`EmitByte((Value >> 48) & 0xff, CurByte, OS);`
			`EmitLEConstant((Value >> 32) & 0xffff, 2, CurByte, OS);`
			`EmitLEConstant(Value & 0xffffFFFF, 4, CurByte, OS);`
			`}`
			`}`

			`// Encode BPF Memory Operand`
			`uint64_t BPFMCCodeEmitter::getMemoryOpValue(const MCInst &MI, unsigned Op,`
			`SmallVectorImpl<MCFixup> &Fixups,`
			`const MCSubtargetInfo &STI) const {`
			`uint64_t Encoding;`
			`const MCOperand Op1 = MI.getOperand(1);`
			`assert(Op1.isReg() && "First operand is not register.");`
			`Encoding = MRI.getEncodingValue(Op1.getReg());`
			`Encoding <<= 16;`
			`MCOperand Op2 = MI.getOperand(2);`
			`assert(Op2.isImm() && "Second operand is not immediate.");`
			`Encoding \|= Op2.getImm() & 0xffff;`
			`return Encoding;`
			`}`

			`#include "BPFGenMCCodeEmitter.inc"`