commit 5d5ddba7f10596fc574fb0639012d720b9d66c62 Author: Steven Flintham Date: Wed Jun 25 18:47:24 2014 +0100 First public release diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1be6b44 --- /dev/null +++ b/.gitignore @@ -0,0 +1,42 @@ +*~ +*.o +*.lo +.deps +.libs +Makefile +Makefile.in +aclocal.m4 +autom4te.cache +config.guess +config.h +config.log +config.status +config.sub +configure +depcomp +examples/.dirstamp +examples/lib1 +install-sh +lib6502-jit* +lib6502-jit* +libtool +ltmain.sh +m4/libtool.m4 +m4/ltoptions.m4 +m4/ltsugar.m4 +m4/ltversion.m4 +m4/lt~obsolete.m4 +missing +run6502 +stamp-h1 +test/.dirstamp +test/*.mc +test/basic-callback +test/call-illegal-callback-modify-code +test/irq-nmi +test/setjmp-trick +test/stack-code-brk +test/stack-code-jsr +test/write-callback-modify-code +test/z-self-modify-1.mc +test/z-self-modify-1.out diff --git a/AddressRange.cpp b/AddressRange.cpp new file mode 100644 index 0000000..1ec95cb --- /dev/null +++ b/AddressRange.cpp @@ -0,0 +1,42 @@ +/* Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#include "AddressRange.h" + +#include + +#include "const.h" + +AddressRange::AddressRange(uint16_t addr) +: range_begin_(addr), range_end_(range_begin_ + 1) +{ +} + +AddressRange::AddressRange(uint32_t range_begin, uint32_t range_end) +: range_begin_(range_begin), range_end_(range_end) +{ + assert(range_begin_ < memory_size); + assert(range_end_ <= (memory_size + 0xff)); + assert(range_begin_ < range_end_); +} + +bool AddressRange::all_memory() const +{ + // This doesn't catch some degenerate cases (e.g. range_begin_ = 0x1, + // range_end_ = 0x10002) but that doesn't matter. + return (range_begin_ == 0) && (range_end_ == memory_size); +} diff --git a/AddressRange.h b/AddressRange.h new file mode 100644 index 0000000..f03744a --- /dev/null +++ b/AddressRange.h @@ -0,0 +1,101 @@ +/* Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +// An AddressRange represents a contiguous range of addresses in the emulated +// memory, expressed as a half-open interval ("begin" is included, "end" is +// excluded). To allow convenient handling of cases where addresses wrap around +// at the top of memory, end may be as large as 0x100ff; this allows the +// effective address range of an instruction like LDA &ffff,Y to be represented. +// (The "largest" address accessed is &00fe, and since the interval is half-open +// end needs to allow a value one larger.) + +#ifndef ADDRESSRANGE_H +#define ADDRESSRANGE_H + +#include + +class AddressRange +{ +public: + // Convenience function; equivalent to AddressRange(addr, addr + 1) without + // any need to worry about whether addr + 1 will wrap to 0. + AddressRange(uint16_t addr); + + AddressRange(uint32_t range_begin, uint32_t range_end); + + uint32_t range_begin() const + { + return range_begin_; + } + + uint32_t range_end() const + { + return range_end_; + } + + // Return true iff AddressRange covers the whole of memory. + bool all_memory() const; + + class const_iterator + { + friend class AddressRange; + + public: + uint16_t operator*() const + { + // Truncating down to 16 bits gives exactly the behaviour we + // require if this is a range which uses values >= 0x10000 to + // indicate wrapping around to the start of memory. + return static_cast(v_); + } + + const_iterator &operator++() + { + ++v_; + return *this; + } + + bool operator!=(const const_iterator &rhs) + { + return v_ != rhs.v_; + } + + private: + const_iterator(uint32_t v) + : v_(v) + { + } + + uint32_t v_; + }; + + const_iterator begin() const + { + return const_iterator(range_begin_); + } + + const_iterator end() const + { + return const_iterator(range_end_); + } + +private: + uint32_t range_begin_; + uint32_t range_end_; +}; + +#endif diff --git a/AddressSet.cpp b/AddressSet.cpp new file mode 100644 index 0000000..971ba5a --- /dev/null +++ b/AddressSet.cpp @@ -0,0 +1,95 @@ +/* Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#include "AddressSet.h" + +#include +#include +#include + +#include "AddressRange.h" +#include "util.h" + +void AddressSet::insert(uint16_t address) +{ + set_.insert(address); +} + +void AddressSet::insert(const AddressRange &range) +{ + for (AddressRange::const_iterator it = range.begin(); it != range.end(); + ++it) + { + set_.insert(*it); + } +} + +namespace +{ + std::string dump_range(uint32_t range_start, uint32_t range_end) + { + std::stringstream s; + s << std::hex << std::setfill('0'); + if ((range_start + 1) == range_end) + { + s << "0x" << std::setw(4) << range_start; + } + else + { + // It's probably more readable to dump in this (inclusive) format + // than to insist on using the half-open intervals which are + // "natural" in the code itself. + s << "0x" << std::setw(4) << range_start << "-" << + "0x" << std::setw(4) << (range_end - 1); + } + return s.str(); + } +} + +std::string AddressSet::dump(int indent) const +{ + std::stringstream s; + + bool in_range = false; + uint32_t range_start; + uint32_t range_last; + for (AddressSet::const_iterator it = set_.begin(); it != set_.end(); ++it) + { + uint16_t i = *it; + if (!in_range) + { + range_start = i; + range_last = i; + in_range = true; + } + else + { + if (i != (range_last + 1)) + { + s << spaces(indent) << + dump_range(range_start, range_last + 1) << "\n"; + range_start = i; + } + range_last = i; + } + } + if (in_range) + { + s << spaces(indent) << dump_range(range_start, range_last + 1) << "\n"; + } + return s.str(); +} diff --git a/AddressSet.h b/AddressSet.h new file mode 100644 index 0000000..d9d8ef4 --- /dev/null +++ b/AddressSet.h @@ -0,0 +1,66 @@ +/* Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#ifndef ADDRESSSET_H +#define ADDRESSSET_H + +#include +#include +#include + +class AddressRange; + +class AddressSet +{ +private: + // This might not be the perfect representation, but it's simple and clean, + // so let's stick with it unless profiling shows this is a problem. + typedef std::set Container; + +public: + AddressSet() + { + } + + void insert(uint16_t address); + + void insert(const AddressRange &range); + + typedef Container::const_iterator const_iterator; + + const_iterator begin() const + { + return set_.begin(); + } + + const_iterator end() const + { + return set_.end(); + } + + Container::size_type size() const + { + return set_.size(); + } + + std::string dump(int indent) const; + +private: + std::set set_; +}; + +#endif diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..2cf8818 --- /dev/null +++ b/COPYING @@ -0,0 +1,60 @@ +TL;DR: If you're redistributing this you should read through the text below and +examine the headers on the individual files, but basically the C/C++ source +code (with the exception of valgrind.h, which can be removed if necessary) was +all written by Ian Piumarta or Steven Flintham and is licensed under the "MIT +(X11 flavour)" licence at the bottom of this file, just as lib6502 itself is. +The autotools infrastructure support is GPL licensed but has exceptions for use +(as is the case here) in autoconfigured packages. + + + +valgrind.h has its own license; see the comments at the top of that file. + +build-aux/tap-driver.sh (used as part of "make check") is GPLv2 licensed with +an exception (which I believe applies to this package) allowing distribution +under "the same distribution terms that you use for the rest of that program". +See the comments at the top of that file for more details. + +m4/boost.m4 (used to autoconfigure the build against the Boost libraries) is +GPLv3 licensed with an exception (which I believe applies to this package) +allowing distribution under "terms of your choice". See the comments at the top +of that file for more details. + +The text below is from Ian Piumarta's lib6502's COPYING file. lib6502-jit +contains almost all of the code and documentation from lib6502 itself. + +As the author of the remaining parts of lib6502-jit, I am granting the same +permissions and have added my own copyright notice, but the text below is +otherwise unchanged. + +-- Steven Flintham + + + +Distasteful though it is for me to have to induce from afar any perturbation +into your pursuit of happiness, this MIT (X11 flavour) license is at least +relatively benign. Investigation into copyright stupidity reveals that it is +effectively impossible to dedicate (formally) any software to the public +domain (the only sure path to this most enlightened status being to leave the +software to expire naturally from its 25-, 50-, 75- or whatever-year copyright +rot). I fear this is not going to change before the revolution comes. In the +meantime the only way I can *guarantee* you any rights at all to this software +would (unfortunately) appear to be... + + Copyright (c) 2005 Ian Piumarta + Copyright (c) 2014 Steven Flintham + + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the 'Software'), to + deal in the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, and/or sell copies + of the Software, and to permit persons to whom the Software is furnished to + do so, provided that the above copyright notice(s) and this permission + notice appear in all copies or substantial portions of the Software. + + Inclusion of the above copyright notice(s) and this permission notice in + supporting documentation would be appreciated, but is not required. + + THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. diff --git a/CREDITS b/CREDITS new file mode 100644 index 0000000..c7d726b --- /dev/null +++ b/CREDITS @@ -0,0 +1,33 @@ +lib6502-jit implements (nearly) the same API as Ian Piumarta's lib6502 +(http://www.piumarta.com/software/lib6502/) and includes virtually all of +lib6502's code and documentation with only minor modifications; the lib6502 +emulation code is used to implement the interpreted and hybrid emulation modes +in lib6502-jit. The contents of the examples and man directories are almost +verbatim copies of those in lib6502. Thanks to Ian for making lib6502 +available. Please do not send bug reports regarding lib6502-jit to Ian! + +This distribution itself doesn't contain any LLVM code, but obviously without +the LLVM project lib6502-jit could not exist. + +valgrind.h is taken from Valgrind (http://valgrind.org/). + +build-aux/tap-driver.sh is part of GNU Automake and was taken from +https://raw.githubusercontent.com/kergoth/automake/master/lib/tap-driver.sh. + +m4/boost.m4 (used to autoconfigure the build against the Boost libraries) is +taken from https://github.com/tsuna/boost.m4. + +While I'd be lying if I said I enjoyed working with Autotools, I am grateful +for the work people have put in to make it possible to build packages portably +on a range of different platforms. + +The technique (but not the code) used to translate a JITted function's machine +code into assembly in Function::dump_machine_code() is taken from the libjit +(https://www.gnu.org/software/libjit/) dump_object_code() function. + +The algorithm used to implement ADC/SDC in decimal mode is taken from +http://www.6502.org/tutorials/decimal_mode.html. The test program on the same +page was used to validate the implementation. + +Klaus Dormann's "6502 functional test" and "65C02 extended opcodes test" were +used to validate the behaviour of the emulation. diff --git a/Function.cpp b/Function.cpp new file mode 100644 index 0000000..d766bec --- /dev/null +++ b/Function.cpp @@ -0,0 +1,417 @@ +/* Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#include "Function.h" + +#include +#include +#include +#include +#include "valgrind.h" + +#include "const.h" +#include "LLVMStuff.h" +#include "M6502Internal.h" +#include "Registers.h" +#include "util.h" + +// Note that we call update_memory_snapshot() after invoking callbacks here, but +// not before. It would be correct to do so, but it's not necessary. Firstly, we +// arrange that the memory snapshot is kept up-to-date during execution under +// our control (i.e. not involving callbacks), so it isn't necessary. Secondly, +// even if it were necessary, it would be redundant, since any actions needed +// as a result of the update can wait until after the callback is called and the +// call after the callback would perform them. + +namespace +{ + // We have the callback_pc argument to allow us to special-case the + // contents of the PC register for lib6502 compatibility. Without this + // we would always pass registers.pc, which is "address of the next + // instruction to execute if the callback doesn't intervene" in PC; + // this agrees with lib6502 for JMP (absolute and indirect) but not for JSR + // or BRK. + uint16_t handle_call_callback(M6502 *mpu, uint16_t callback_pc, + uint8_t opcode) + { + Registers ®isters = mpu->internal->registers_; + uint16_t default_next_pc = registers.pc; + if (mpu->callbacks->call[registers.pc] != 0) + { + registers.pc = callback_pc; + registers.to_M6502_Registers(mpu); + TRACE("Call callback, mpu " << mpu << ", address 0x" << std::hex << + std::setfill('0') << std::setw(4) << default_next_pc << + ", data 0x" << std::setw(2) << static_cast(opcode)); + uint16_t address = default_next_pc; + if (opcode == opcode_brk) + { + address = callback_pc - 2; // lib6502 does this + } + int callback_result = + mpu->callbacks->call[default_next_pc](mpu, address, opcode); + TRACE("Callback returned 0x" << std::hex << std::setfill('0') << + std::setw(4) << callback_result); + registers.from_M6502_Registers(mpu); + mpu->internal->function_manager_.update_memory_snapshot(); + if (callback_result != 0) + { + return callback_result; + } + } + return default_next_pc; + } + + uint16_t get_stacked_pc(M6502 *mpu, int offset) + { + uint8_t s = mpu->internal->registers_.s; + + for (; offset > 0; --offset) + { + ++s; + } + + ++s; + uint8_t pushed_pc_low = mpu->memory[0x100 + s]; + ++s; + uint8_t pushed_pc_high = mpu->memory[0x100 + s]; + return pushed_pc_low | (pushed_pc_high << 8); + } + + uint16_t handle_push_and_control_transfer_opcode( + M6502 *mpu, uint16_t callback_pc, uint8_t opcode, int bytes_pushed) + { + assert(bytes_pushed >= 2); + + uint8_t s = mpu->internal->registers_.s; + for (int i = 0; i < bytes_pushed; ++i) + { + ++s; + mpu->internal->function_manager_.code_modified_at(0x100 + s); + } + + return handle_call_callback(mpu, callback_pc, opcode); + } +} + +Function::Function( + M6502 *mpu, uint16_t address, const AddressSet &code_range, + const AddressSet &optimistic_writes, llvm::Function *llvm_function) +: mpu_(mpu), + llvm_stuff_(mpu->internal->llvm_stuff_), + address_(address), + code_range_(code_range), + optimistic_writes_(optimistic_writes), + llvm_function_(llvm_function), + jitted_function_(reinterpret_cast( + llvm_stuff_.execution_engine_->getPointerToFunction(llvm_function))) +{ + llvm_stuff_.execution_engine_->runJITOnFunction(llvm_function_, &mci_); +} + +Function::~Function() +{ + TRACE("Destructor for Function at address " << std::hex << + std::setfill('0') << std::setw(4) << address_); + + VALGRIND_DISCARD_TRANSLATIONS(mci_.address(), mci_.size()); + llvm_function_->eraseFromParent(); +} + +void Function::handle_complex_result(FunctionBuilder::Result result) const +{ + Registers ®isters = mpu_->internal->registers_; + + switch (result) + { + case FunctionBuilder::result_control_transfer_direct: + CANT_HAPPEN("Direct case reached handle_complex_result()"); + + case FunctionBuilder::result_control_transfer_indirect: + registers.pc = handle_call_callback(mpu_, registers.pc, + registers.data); + break; + + case FunctionBuilder::result_brk: + registers.pc = handle_push_and_control_transfer_opcode( + mpu_, get_stacked_pc(mpu_, 1), opcode_brk, 3); + break; + + case FunctionBuilder::result_jsr_complex: + registers.pc = handle_push_and_control_transfer_opcode( + mpu_, get_stacked_pc(mpu_, 0) + 1, opcode_jsr, 2); + break; + + case FunctionBuilder::result_illegal_instruction: + { + registers.to_M6502_Registers(mpu_); + TRACE("Illegal instruction callback, mpu " << mpu_ << + ", address 0x" << std::hex << std::setfill('0') << + std::setw(4) << registers.addr << ", data 0x" << + std::setw(2) << static_cast(registers.data)); + uint16_t new_pc = + mpu_->callbacks->illegal_instruction[registers.data]( + mpu_, registers.addr, registers.data); + TRACE("Callback returned 0x" << std::hex << std::setfill('0') << + std::setw(4) << new_pc); + registers.from_M6502_Registers(mpu_); + mpu_->internal->function_manager_.update_memory_snapshot(); + if (new_pc != 0) + { + registers.pc = new_pc; + } + break; + } + + case FunctionBuilder::result_write_to_code: + TRACE("Code modified at 0x" << std::hex << std::setfill('0') << + std::setw(4) << registers.addr); + mpu_->internal->function_manager_.code_modified_at(registers.addr); + break; + + case FunctionBuilder::result_write_callback: + { + TRACE("Write callback at 0x" << std::hex << std::setfill('0') << + std::setw(4) << registers.addr << " with data 0x" << + std::setw(4) << static_cast(registers.data)); + // We *don't* invoke Registers.{to,from}_M6502Registers() before + // and after the callback. We could do this, but lib6502 itself + // (and therefore the lib6502 code used for interpreting in + // lib6502-jit) doesn't do that, so this could be confusing + // for client code. (For example, a callback might be written + // to rely on this, it would work if called from compiled code + // but wouldn't work if called from interpreted mode. So its + // behaviour in hybrid mode would be random.) + (void) mpu_->callbacks->write[registers.addr]( + mpu_, registers.addr, registers.data); + mpu_->internal->function_manager_.update_memory_snapshot(); + break; + } + + case FunctionBuilder::result_invalid_bounds: + CANT_HAPPEN("Invalid bounds inside Function for address 0x" << + std::hex << std::setfill('0') << std::setw(4) << + address_); + + default: + CANT_HAPPEN("Unknown result " << result << " from JIT function"); + } +} + +#ifdef LOG + +namespace +{ + std::string indent(int n, const std::string &s) + { + std::string prefix = spaces(n); + return apply_prefix(prefix, s); + } +} + +std::string Function::dump_all() const +{ + std::stringstream s; + s << "Function at 0x" << std::hex << std::setfill('0') << std::setw(4) << + address_ << ":\n"; + s << spaces(1) << "Code range:\n" << code_range_.dump(2) << "\n"; + s << spaces(1) << "Optimistic writes at:\n" << optimistic_writes_.dump(2) << + "\n"; + s << spaces(1) << "6502 machine code:\n" << indent(2, disassembly_) << "\n"; + s << spaces(1) << "Unoptimised IR:\n" << indent(2, unoptimised_ir_) << "\n"; + s << spaces(1) << "Optimised IR:\n" << indent(2, optimised_ir_) << "\n";; + s << spaces(1) << "Host machine code:\n" << indent(2, dump_machine_code()); + return s.str(); +} + +#endif + +namespace +{ + template + class AutoClose : boost::noncopyable + { + public: + AutoClose(Handle h) + : open_(true), h_(h) + { + } + + int close() + { + open_ = false; + return close_fn(h_); + } + + ~AutoClose() + { + if (open_) + { + close_fn(h_); // ignore return code, nothing we can do if it fails + } + } + + private: + bool open_; + Handle h_; + }; + + typedef int (*FdClose)(int); + typedef AutoClose FdAutoClose; + typedef int (*PopenClose)(FILE *); + typedef AutoClose PopenAutoClose; +} + +#ifdef LOG + +std::string Function::dump_machine_code() const +{ + try + { + // What a performance! The basic idea of outputting .bytes directives, + // assembling those and then disassembling the result is taken from + // libjit's dump_object_code(); the implementation is not copied. + + char as_output_file[] = "/tmp/lib6502-jit-XXXXXX"; + + errno = 0; + + // mkstemp() creates a unique filename and opens it. We unlink the file + // immediately so it has no name; this minimises (but does not + // eliminate; we might be killed between mkstemp() and unlink()) the + // chance of the file being left lying around. Since we need a name for + // the 'as' and 'objdump' commands, we use /dev/fd/nn to refer to it + // afterwards. + int fd = mkstemp(as_output_file); + if (fd == -1) + { + fail_errno_or("mkstemp() failed"); + } + FdAutoClose auto_close_fd(fd); + if (unlink(as_output_file) == -1) + { + fail_errno_or("unlink() failed"); + } + + { + std::stringstream as_command; + as_command << "as -o /dev/fd/" << fd << " 2>/dev/null"; + FILE *f = popen(as_command.str().c_str(), "w"); + if (f == 0) + { + fail_errno_or("popen() failed (for 'as')"); + } + PopenAutoClose auto_close_f(f); + unsigned char *p = static_cast(mci_.address()); + unsigned char *end = p + mci_.size(); + for (; p < end; ++p) + { + if (fprintf(f, ".byte %d\n", *p) < 0) + { + fail("Error writing to 'as' pipe"); + } + } + if (auto_close_f.close() != 0) + { + fail_errno_or("Error closing 'as' pipe"); + } + } + + if (lseek(fd, 0, SEEK_SET) == static_cast(-1)) + { + fail_errno_or("Error seeking on temporary file"); + } + + std::stringstream objdump_command; + // As far as I can tell, there's no guarantee how mci_.address() [a + // pointer type] will be represented in the stringstream, but in + // practice this code is not very portable anyway and this is the least + // of our worries... + objdump_command << "objdump --adjust-vma=" << + mci_.address() << " -d /dev/fd/" << fd << " 2>&1"; + FILE *g = popen(objdump_command.str().c_str(), "r"); + if (g == 0) + { + fail_errno_or("popen() failed (for 'objdump')"); + } + PopenAutoClose auto_close_g(g); + + std::stringstream code; + char buffer[1024]; + size_t bytes_read; + while ((bytes_read = fread(buffer, 1, sizeof(buffer), g)) > 0) + { + code << std::string(buffer, bytes_read); + } + if (ferror(g)) + { + fail("Error reading from 'objdump' pipe"); + } + if (auto_close_g.close() != 0) + { + fail_errno_or("Error closing 'objdump' pipe"); + } + if (auto_close_fd.close() != 0) + { + fail_errno_or("Error closing temporary file"); + } + + return code.str(); + } + catch (std::exception &e) + { + // Dumping out the generated machine code is decidedly not critical, so + // we don't allow the exception to propagate. + return std::string("Unable to dump machine code: ") + e.what(); + } +} + +void Function::fail(const std::string &error) const +{ + throw std::runtime_error(error); +} + +void Function::fail_errno_or(const std::string &error) const +{ + if (errno == 0) + { + fail(error); + } + else + { + // strerror_r() exists in various versions. If you have problems getting + // this to compile, it's probably OK to just use: + // const char *error = strerror(errno); + // given a) the limited amount of threading here and b) the fact this is + // only used to report rare errors in debug-only logging code. If push + // really comes to shove you can just do: + // const char *error = 0; + // and you'll just get unhelpful error messages. + char buffer[1024]; + const char *error = strerror_r(errno, buffer, sizeof(buffer)); + if (error != 0) + { + fail(error); + } + else + { + fail("Error occurred, and strerror() probably failed as well"); + } + } +} + +#endif diff --git a/Function.h b/Function.h new file mode 100644 index 0000000..63fd6e8 --- /dev/null +++ b/Function.h @@ -0,0 +1,112 @@ +/* Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#ifndef FUNCTION_H +#define FUNCTION_H + +#include +#include +#include "llvm/CodeGen/MachineCodeInfo.h" +#include "llvm/IR/Value.h" + +#include "AddressSet.h" +#include "FunctionBuilder.h" +#include "lib6502.h" + +struct LLVMStuff; + +class Function : boost::noncopyable +{ +public: + Function(M6502 *mpu, uint16_t address, const AddressSet &code_range, + const AddressSet &optimistic_writes, + llvm::Function *llvm_function); + ~Function(); + + uint16_t address() const + { + return address_; + } + + const AddressSet &code_range() const + { + return code_range_; + } + + const AddressSet &optimistic_writes() const + { + return optimistic_writes_; + } + + void execute() const + { + FunctionBuilder::Result result = + static_cast((*jitted_function_)()); + if (result != FunctionBuilder::result_control_transfer_direct) + { + handle_complex_result(result); + } + } + + #ifdef LOG + void set_disassembly(const std::string &s) + { + disassembly_ = s; + } + + void set_unoptimised_ir(const std::string &s) + { + unoptimised_ir_ = s; + } + + void set_optimised_ir(const std::string &s) + { + optimised_ir_ = s; + } + + std::string dump_all() const; + + std::string dump_machine_code() const; + #endif + +private: + void handle_complex_result(FunctionBuilder::Result result) const; + + #ifdef LOG + void fail(const std::string &error) const; + void fail_errno_or(const std::string &error) const; + #endif + + M6502 *mpu_; + LLVMStuff &llvm_stuff_; + uint16_t address_; + AddressSet code_range_; + AddressSet optimistic_writes_; + llvm::Function *llvm_function_; + llvm::MachineCodeInfo mci_; + + typedef int (*JitFunction)(); + JitFunction jitted_function_; + + #ifdef LOG + std::string disassembly_; + std::string unoptimised_ir_; + std::string optimised_ir_; + #endif +}; + +#endif diff --git a/FunctionBuilder.cpp b/FunctionBuilder.cpp new file mode 100644 index 0000000..9d7efb8 --- /dev/null +++ b/FunctionBuilder.cpp @@ -0,0 +1,3571 @@ +/* Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#include "FunctionBuilder.h" + +// Throughout this file we must be careful to avoid incorrect wrap-around +// handling; for example, it's wrong to do memory[pc + 2] because if pc is +// 0xffff this will access off the end of memory. We must always use uint16_t +// intermediate values to get the right wrapping behaviour. Similar +// considerations apply when using zero-page addressing; we must ensure we wrap +// around at 0xff. + +#include "config.h" + +#include +#include +#include +#include "llvm/Analysis/Passes.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/TypeBuilder.h" + +#if defined HAVE_LLVM_ANALYSIS_VERIFIER_H + #include "llvm/Analysis/Verifier.h" +#elif defined HAVE_LLVM_IR_VERIFIER_H + #include "llvm/IR/Verifier.h" +#else + #error Need LLVM Verifier.h +#endif + +#include "llvm/PassManager.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include + +#include "AddressRange.h" +#include "const.h" +#include "Function.h" +#include "LLVMStuff.h" +#include "M6502Internal.h" +#include "Registers.h" +#include "util.h" + + + +namespace llvm +{ + template + class TypeBuilder + { + public: + static StructType *get(LLVMContext &context) + { + static StructType *t = StructType::create(context, "M6502"); + return t; + } + }; + + template + class TypeBuilder + { + public: + static StructType *get(LLVMContext &context) + { + static StructType *t = StructType::create("Registers", + TypeBuilder, xcompile>::get(context), // a + TypeBuilder, xcompile>::get(context), // x + TypeBuilder, xcompile>::get(context), // y + TypeBuilder, xcompile>::get(context), // s + TypeBuilder::get(context), // flag_n + TypeBuilder::get(context), // flag_v + TypeBuilder::get(context), // flag_d + TypeBuilder::get(context), // flag_i + TypeBuilder::get(context), // flag_z + TypeBuilder::get(context), // flag_c + TypeBuilder, xcompile>::get(context), // pc + TypeBuilder, xcompile>::get(context), // addr + TypeBuilder, xcompile>::get(context), // data + NULL); + return t; + } + }; +} + +namespace +{ + const std::string hex_prefix = "&"; + + bool callback_in_bounds(const M6502_Callback *callbacks, + const AddressRange &bounds) + { + for (AddressRange::const_iterator it = bounds.begin(); + it != bounds.end(); ++it) + { + if (callbacks[*it] != 0) + { + return true; + } + } + return false; + } +} + + + +// BoundedAddress contains an llvm::Value of type i16 which refers to +// an address in the emulated memory. It additionally contains a range of +// possible addresses which the llvm::Value can evaluate to (derived from the +// addressing mode which created it). This is used to optimise the generated +// code. + +class FunctionBuilder::BoundedAddress +{ +public: + // Construct a BoundedAddress with the widest possible bounds; this + // is always safe, but if possible should be avoided as it reduces + // optimisation potential. + BoundedAddress(FunctionBuilder &fb, llvm::Value *addr); + + // Construct a BoundedAddress with the given bounds. + BoundedAddress(FunctionBuilder &fb, llvm::Value *addr, + const AddressRange &bounds); + + llvm::Value *addr() const + { + return addr_; + } + + const AddressRange &bounds() const + { + return bounds_; + } + + friend + std::ostream &operator<<(std::ostream &s, const BoundedAddress &ba) + { + std::stringstream t; + t << "[0x" << std::hex << std::setfill('0') << std::setw(4) << + ba.bounds().range_begin() << ", 0x" << std::setw(4) << + ba.bounds().range_end() << ")"; + s << t.str(); + return s; + } + +private: + llvm::Value *addr_; + AddressRange bounds_; +}; + +FunctionBuilder::BoundedAddress::BoundedAddress( + FunctionBuilder &fb, llvm::Value *addr) +: addr_(addr), bounds_(0, memory_size) +{ + assert(addr->getType() == fb.i16_type_); +} + +FunctionBuilder::BoundedAddress::BoundedAddress( + FunctionBuilder &fb, llvm::Value *addr, const AddressRange &bounds) +: addr_(addr), bounds_(bounds) +{ + assert(addr->getType() == fb.i16_type_); + +#ifndef NDEBUG + llvm::ConstantInt *addr_ci = llvm::dyn_cast(addr); + if (addr_ci != 0) + { + // We can verify the claimed bounds at compile time. + uint16_t addr16 = addr_ci->getLimitedValue(); + assert(addr16 == bounds.range_begin()); + assert(addr16 == (bounds.range_end() - 1)); + } + else + { + // We can't verify the claimed bounds at compile time, so generate code + // to check at runtime. + + llvm::BasicBlock *bounds_maybe_ok_block = + llvm::BasicBlock::Create(fb.context_, "bounds_maybe_ok_block", + fb.llvm_function_); + llvm::BasicBlock *bounds_not_ok_block = + llvm::BasicBlock::Create(fb.context_, "bounds_not_ok"); + llvm::BasicBlock *bounds_ok_block = + llvm::BasicBlock::Create(fb.context_, "bounds_ok"); + + if (bounds.range_end() <= memory_size) + { + TRACE("Generating bounds check code for non-wrapped case"); + llvm::Value *lower_bound_ok = + fb.builder_.CreateICmpUGE( + addr, fb.constant_u16(bounds.range_begin())); + fb.builder_.CreateCondBr(lower_bound_ok, bounds_maybe_ok_block, + bounds_not_ok_block); + fb.builder_.SetInsertPoint(bounds_maybe_ok_block); + llvm::Value *upper_bound_ok = + fb.builder_.CreateICmpULE( + addr, fb.constant_u16(bounds.range_end() - 1)); + fb.builder_.CreateCondBr(upper_bound_ok, bounds_ok_block, + bounds_not_ok_block); + } + else + { + TRACE("Generating bounds check code for wrapped case"); + llvm::Value *in_upper_range = + fb.builder_.CreateICmpUGE( + addr, fb.constant_u16(bounds.range_begin())); + fb.builder_.CreateCondBr(in_upper_range, bounds_ok_block, + bounds_maybe_ok_block); + fb.builder_.SetInsertPoint(bounds_maybe_ok_block); + // We want to truncate bounds.range_end() - 1 to 16 bits here. + llvm::Value *in_lower_range = + fb.builder_.CreateICmpULE( + addr, fb.constant_u16(bounds.range_end() - 1)); + fb.builder_.CreateCondBr(in_lower_range, bounds_ok_block, + bounds_not_ok_block); + } + + fb.llvm_function_->getBasicBlockList().push_back(bounds_not_ok_block); + fb.builder_.SetInsertPoint(bounds_not_ok_block); + fb.return_invalid_bounds(); + + fb.llvm_function_->getBasicBlockList().push_back(bounds_ok_block); + fb.builder_.SetInsertPoint(bounds_ok_block); + } +#endif +} + + + +FunctionBuilder::FunctionBuilder( + M6502 *mpu, const uint8_t *ct_memory, JitBool *code_at_address, + uint16_t address) +: built_(false), + mpu_(mpu), + code_at_address_(code_at_address), + address_(address), + ct_memory_(ct_memory), + callbacks_(*(mpu->callbacks)), + instructions_(0), + max_instructions_(std::max(1, mpu->internal->max_instructions_)), + context_(llvm::getGlobalContext()), + native_int_type_(llvm::TypeBuilder::get(context_)), + callback_type_(llvm::TypeBuilder::get(context_)), + i1_type_(llvm::TypeBuilder, false>::get(context_)), + i8_type_(llvm::TypeBuilder, false>::get(context_)), + i16_type_(llvm::TypeBuilder, false>::get(context_)), + i32_type_(llvm::TypeBuilder, false>::get(context_)), + i64_type_(llvm::TypeBuilder, false>::get(context_)), + jit_bool_type_(llvm::TypeBuilder::get(context_)), + builder_(mpu_->internal->llvm_stuff_.builder_), + address_block_(), + code_generated_for_address_() +{ + llvm::FunctionType *ft = llvm::TypeBuilder::get(context_); + std::stringstream name; + name << "x" << std::hex << std::setw(4) << std::setfill('0') << address_; + llvm_function_ = llvm::Function::Create( + ft, llvm::Function::PrivateLinkage, name.str(), + mpu_->internal->llvm_stuff_.module_.get()); + + llvm::BasicBlock *BB = + llvm::BasicBlock::Create(context_, "prologue", llvm_function_); + builder_.SetInsertPoint(BB); + + mpu_llvm_ = constant_ptr(mpu, "mpu"); + code_at_address_llvm_ = constant_ptr(code_at_address, "code_at_address"); + registers_ = constant_ptr(&(mpu->internal->registers_), "registers"); + read_callbacks_ = constant_ptr(callbacks_.read, "read_callbacks"); + write_callbacks_ = constant_ptr(callbacks_.write, "write_callbacks"); + call_callbacks_ = constant_ptr(callbacks_.call, "call_callbacks"); + memory_base_ = constant_ptr(mpu->memory, "memory"); + + function_result_ = + builder_.CreateAlloca(native_int_type_, 0, "function_result"); + + // Function prologue: Copy the registers from Registers into local + // variables for use. The epilogue will reverse this process before the + // function returns for registers which actually get modified. (The + // LLVM optimiser is then able to remove loads which would just load + // unused values.) + initialise_i8_reg(a_ , 0, "a"); + initialise_i8_reg(x_ , 1, "x"); + initialise_i8_reg(y_ , 2, "y"); + initialise_i8_reg(s_ , 3, "s"); + initialise_jb_reg(flag_n_, 4, "flag_n"); + initialise_jb_reg(flag_v_, 5, "flag_v"); + initialise_jb_reg(flag_d_, 6, "flag_d"); + initialise_jb_reg(flag_i_, 7, "flag_i"); + initialise_jb_reg(flag_z_, 8, "flag_z"); + initialise_jb_reg(flag_c_, 9, "flag_c"); + + pc_ = builder_.CreateAlloca(i16_type_, 0, "pc"); + builder_.CreateStore( + builder_.CreateLoad( + builder_.CreateStructGEP(registers_, 10), false, "pc"), + pc_); + + // Temporary variable used when invoking read callbacks; no need to + // initialise. + read_callback_result_ = + builder_.CreateAlloca(i8_type_, 0, "read_callback_result"); + + // Temporary variables for ADC/SBC implementation; no need to initialise. + p_tmp_ = builder_.CreateAlloca(i8_type_, 0, "p_tmp"); + l_tmp_ = builder_.CreateAlloca(i8_type_, 0, "l_tmp"); + s_tmp_ = builder_.CreateAlloca(i16_type_, 0, "s_tmp"); + t_tmp_ = builder_.CreateAlloca(i16_type_, 0, "t_tmp"); + + epilogue_ = llvm::BasicBlock::Create(context_, "epilogue"); +} + +// The Register objects are initialised using these functions instead of +// constructors mainly because we need a builder_ with an associated BasicBlock +// to initialise a Register, and we don't have that when the FunctionBuilder +// object is first constructed. + +void FunctionBuilder::initialise_i8_reg( + Register &r, int structure_index, const std::string &name) +{ + llvm::Value *v = builder_.CreateAlloca(i8_type_, 0, name); + builder_.CreateStore( + builder_.CreateLoad( + builder_.CreateStructGEP(registers_, structure_index), false, name), + v); + r.v_ = v; + r.modified_ = false; +} + +void FunctionBuilder::initialise_jb_reg( + Register &r, int structure_index, const std::string &name) +{ + llvm::Value *v = builder_.CreateAlloca(jit_bool_type_, 0, name); + builder_.CreateStore( + builder_.CreateLoad( + builder_.CreateStructGEP(registers_, structure_index), false, name), + v); + r.v_ = v; + r.modified_ = false; +} + +void FunctionBuilder::ensure_address_block_created(uint16_t addr) +{ + if (address_block_[addr] == 0) + { + std::stringstream s; + s << "l" << std::hex << std::setw(4) << std::setfill('0') << addr; + address_block_[addr] = + llvm::BasicBlock::Create(context_, s.str(), llvm_function_); + } +} + +boost::shared_ptr FunctionBuilder::build() +{ + // This can't be invoked twice on the same FunctionBuilder object; + // at present, for example, attempts to insert into 'epilogue_' crash + // (presumably because it's been used to generate code already). There + // is no reason to do this and I'm not going to convolute things to make + // this pointless case work. Even asserting that this doesn't happen + // seems like overkill, but let's do it anyway. + assert(!built_); + + // While it doesn't strictly matter, the fact that pending_ is a std::set + // means it will internally sort the addresses. This makes it more likely + // that multiple backward jumps will only result in one stretch of code + // being produced, since the furthest jump backwards will be JITted first. + pending_.insert(address_); + while (!pending_.empty()) + { + // We take addresses to JIT at from pending_ to start with, and when + // there's no "better" address... + uint16_t ct_pc = *(pending_.begin()); + + // ... but if we can continue JITting where we left off, we prefer + // to do that. Since each block of code emitted by build_at() is + // independent, this doesn't alter the behaviour of the generated + // code, but it avoids gratuitous discontinuities in the generated + // code compared with the source machine code. + do + { + pending_.erase(ct_pc); + uint16_t new_ct_pc = build_at(ct_pc); + if (new_ct_pc == ct_pc) + { + // build_at() did no work. + } + else if (new_ct_pc > ct_pc) + { + code_range_.insert(AddressRange(ct_pc, new_ct_pc)); + } + else + { + // PC wrapped around during the translation. + uint32_t range_end = new_ct_pc; + range_end += memory_size; + code_range_.insert(AddressRange(ct_pc, range_end)); + } + ct_pc = new_ct_pc; + } + while (pending_.find(ct_pc) != pending_.end()); + } + + LLVMStuff &llvm_stuff = mpu_->internal->llvm_stuff_; + llvm::FunctionPassManager fpm(llvm_stuff.module_.get()); + +#ifdef HAVE_LLVM_DATA_LAYOUT_PASS + fpm.add(new llvm::DataLayoutPass(llvm_stuff.module_.get())); +#else + fpm.add( + new llvm::DataLayout(*llvm_stuff.execution_engine_->getDataLayout())); +#endif + fpm.add(llvm::createBasicAliasAnalysisPass()); + fpm.add(llvm::createPromoteMemoryToRegisterPass()); + fpm.add(llvm::createInstructionCombiningPass()); + fpm.add(llvm::createReassociatePass()); + fpm.add(llvm::createGVNPass()); + fpm.add(llvm::createCFGSimplificationPass()); + fpm.doInitialization(); + + // We could have passed llvm_function_ to BasicBlock::Create() earlier + // and then we wouldn't need to do this push_back() here, but doing + // this means the epilogue appears at the end of the IR. It makes no + // functional difference but it seems slightly more logical to read. + llvm_function_->getBasicBlockList().push_back(epilogue_); + + builder_.SetInsertPoint(epilogue_); + if (a_.modified_) + { + builder_.CreateStore( + builder_.CreateLoad(a_.v_), + builder_.CreateStructGEP(registers_, 0)); + } + if (x_.modified_) + { + builder_.CreateStore( + builder_.CreateLoad(x_.v_), + builder_.CreateStructGEP(registers_, 1)); + } + if (y_.modified_) + { + builder_.CreateStore( + builder_.CreateLoad(y_.v_), + builder_.CreateStructGEP(registers_, 2)); + } + if (s_.modified_) + { + builder_.CreateStore( + builder_.CreateLoad(s_.v_), + builder_.CreateStructGEP(registers_, 3)); + } + if (flag_n_.modified_) + { + builder_.CreateStore( + register_load(flag_n_), + builder_.CreateStructGEP(registers_, 4)); + } + if (flag_v_.modified_) + { + builder_.CreateStore( + register_load(flag_v_), + builder_.CreateStructGEP(registers_, 5)); + } + if (flag_d_.modified_) + { + builder_.CreateStore( + register_load(flag_d_), + builder_.CreateStructGEP(registers_, 6)); + } + if (flag_i_.modified_) + { + builder_.CreateStore( + register_load(flag_i_), + builder_.CreateStructGEP(registers_, 7)); + } + if (flag_z_.modified_) + { + builder_.CreateStore( + register_load(flag_z_), + builder_.CreateStructGEP(registers_, 8)); + } + if (flag_c_.modified_) + { + builder_.CreateStore( + register_load(flag_c_), + builder_.CreateStructGEP(registers_, 9)); + } + builder_.CreateStore( + builder_.CreateLoad(pc_), + builder_.CreateStructGEP(registers_, 10)); + + builder_.CreateRet(builder_.CreateLoad(function_result_)); + + #ifdef LOG + std::string unoptimised_ir; + { + llvm::raw_string_ostream s(unoptimised_ir); + llvm_function_->print(s); + s.str(); + } + #endif + llvm::verifyFunction(*llvm_function_); + + fpm.run(*llvm_function_); + #ifdef LOG + std::string optimised_ir; + { + llvm::raw_string_ostream s(optimised_ir); + llvm_function_->print(s); + s.str(); + } + #endif + + boost::shared_ptr f( + new Function(mpu_, address_, code_range_, optimistic_writes_, + llvm_function_)); + #ifdef LOG + f->set_disassembly(disassembly_.str()); + f->set_unoptimised_ir(unoptimised_ir); + f->set_optimised_ir(optimised_ir); + #endif + + built_ = true; + return f; +} + +// This translates a linear stream of 6502 instructions into LLVM IR. The +// generation stops either when we've translated enough 6502 instructions +// or when we hit an instruction which unconditionally transfers control +// elsewhere. Branch targets found during the translation are added to pending_ +// for further consideration; at a minimum, address_block[] entries with +// associated code to transfer control to those addresses must be generated +// for each of these before terminating the build process for the function. +// +// The address of the first byte not translated is returned. +uint16_t FunctionBuilder::build_at(uint16_t ct_pc) +{ + TRACE("Translating linear stream of instructions at 0x" << std::hex << + std::setfill('0') << std::setw(4) << ct_pc); + + const uint16_t original_ct_pc = ct_pc; + // If we already translated this stretch of code, we don't need to do + // anything at all. + if (code_generated_for_address_[ct_pc]) + { + TRACE("Already translated this linear stream"); + return ct_pc; + } + + while (true) + { + TRACE("Translating at 0x" << std::hex << std::setfill('0') << + std::setw(4) << ct_pc << ", opcode 0x" << std::setw(2) << + static_cast(ct_memory_[ct_pc])); + + const uint16_t this_opcode_at = ct_pc; + + if (code_generated_for_address_[ct_pc]) + { + // We already translated this instruction, so we can stop + // translating and just jump there. Since this is just linear + // flow of control from the perspective of the 6502 code, this + // cannot trigger a call callback. + TRACE("Already translated this instruction"); + if (builder_.GetInsertBlock()->getTerminator() == 0) + { + control_transfer_to(constant_u16(ct_pc), opcode_implicit); + } + break; + } + + // Each instruction forms its own basic block (since we build up the + // IR as we go, we can't know where we might want to branch into, + // so we cannot merge multiple instructions into a single basic + // block). Basic blocks must end with a terminator, so if there isn't + // already a terminator at the end of the previous instruction's basic + // block, we insert an unconditional branch to this instruction's + // basic block. If there is already a terminator, we stop translating + // this stream of instructions unless this is the first instruction + // in this linear sequence; this way we avoid generating unreachable + // code if the previous instruction (for example) returned some kind + // of status code to our caller. (If the following instruction is + // reachable in some other way, it will be translated separately - + // as the first instruction in a linear sequence - because it will + // be present in pending.) + bool insert_block_has_terminator = + (builder_.GetInsertBlock()->getTerminator() != 0); + if (insert_block_has_terminator && (ct_pc != original_ct_pc)) + { + TRACE("Not translating as not first instruction in linear stream " + "and previous instruction's basic block has a terminator"); + break; + } + ensure_address_block_created(ct_pc); + if (!insert_block_has_terminator) + { + builder_.CreateBr(address_block_[ct_pc]); + } + builder_.SetInsertPoint(address_block_[ct_pc]); + + // Note that we only set this flag for the opcode byte, not the + // whole length of the instruction. Apart from being easiest, + // this is actually correct. Someone might do LDA #:STA or something weird like that and + // interleave instructions. + code_generated_for_address_[ct_pc] = true; + + if (instructions_ >= max_instructions_) + { + TRACE("Translated maximum number of instructions"); + // We must *not* use control_transfer_to() here; it would see + // that we have set code_generated_for_address_ and generate a + // branch to here, i.e. an infinite loop. It is correct that we + // have set code_generated_for_address_ since we must set that + // if we generate a corresponding address_block entry and we must + // do that so that any branches to this address can be resolved. + return_control_transfer_direct(constant_u16(ct_pc)); + break; + } + ++instructions_; + + uint8_t opcode = ct_memory_[ct_pc]; + if (opcode == opcode_brk) + { + disassemble1(ct_pc, "BRK"); + + llvm::Value *new_pc_low = memory_read(abs(0xfffe)); + llvm::Value *new_pc_high = memory_read(abs(0xffff)); + llvm::Value *new_pc = create_u16(new_pc_low, new_pc_high); + + // Because BRK pushes three bytes onto the stack, we devolve + // responsibility for checking for code living on the stack + // being modified to our caller (by returning result_brk), so + // we use push*raw() here. (We don't support optimistic writes; + // BRK isn't performance critical so there's no payoff for the + // extra complexity.) + + uint16_t pc_to_stack = this_opcode_at + 2; + push_u16_raw(pc_to_stack); + + llvm::Value *p = flag_byte(); + p = builder_.CreateOr(p, constant_u8(flagB | flagX)); + push_u8_raw(p); + + register_store(constant_jb(jit_bool_true), flag_i_); + register_store(constant_jb(jit_bool_false), flag_d_); + + return_brk(new_pc); + } + else if (opcode == 0x01) + { + uint8_t operand; + disassemble2(ct_pc, "ORA (", operand, ",X)"); + ora(memory_read( + zp_pre_index(constant_u8(operand), register_load(x_)))); + } + else if (opcode == 0x02) + { + illegal_instruction(ct_pc, 2); + } + else if (opcode == 0x03) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x04) + { + uint8_t operand; + disassemble2(ct_pc, "TSB ", operand); + memory_op(&FunctionBuilder::tsb, zp(operand), ct_pc); + } + else if (opcode == 0x05) + { + uint8_t operand; + disassemble2(ct_pc, "ORA ", operand); + ora(memory_read(zp(operand))); + } + else if (opcode == 0x06) + { + uint8_t operand; + disassemble2(ct_pc, "ASL ", operand); + memory_op(&FunctionBuilder::asl, zp(operand), ct_pc); + } + else if (opcode == 0x07) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x08) + { + disassemble1(ct_pc, "PHP"); + + llvm::Value *p = flag_byte(); + p = builder_.CreateOr(p, constant_u8(flagB | flagX)); + push_u8(p, ct_pc); + } + else if (opcode == 0x09) + { + uint8_t operand; + disassemble2(ct_pc, "ORA #", operand); + ora(constant_u8(operand)); + } + else if (opcode == 0x0a) + { + disassemble1(ct_pc, "ASL A"); + register_op(&FunctionBuilder::asl, a_); + } + else if (opcode == 0x0b) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x0c) + { + uint16_t operand; + disassemble3(ct_pc, "TSB ", operand); + memory_op(&FunctionBuilder::tsb, abs(operand), ct_pc); + } + else if (opcode == 0x0d) + { + uint16_t operand; + disassemble3(ct_pc, "ORA ", operand); + ora(memory_read(abs(operand))); + } + else if (opcode == 0x0e) + { + uint16_t operand; + disassemble3(ct_pc, "ASL ", operand); + memory_op(&FunctionBuilder::asl, abs(operand), ct_pc); + } + else if (opcode == 0x0f) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == opcode_bpl) + { + uint16_t target; + disassemble_branch(ct_pc, "BPL ", target); + pending_.insert(target); + branch(flag_n_, false, target); + } + else if (opcode == 0x11) + { + uint8_t operand; + disassemble2(ct_pc, "ORA (", operand, "),Y"); + ora(memory_read( + zp_post_index(constant_u8(operand), register_load(y_)))); + } + else if (opcode == 0x12) + { + uint8_t operand; + disassemble2(ct_pc, "ORA (", operand, ")"); + ora(memory_read( + zp_post_index(constant_u8(operand), constant_u8(0)))); + } + else if (opcode == 0x13) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x14) + { + uint8_t operand; + disassemble2(ct_pc, "TRB ", operand); + memory_op(&FunctionBuilder::trb, zp(operand), ct_pc); + } + else if (opcode == 0x15) + { + uint8_t operand; + disassemble2(ct_pc, "ORA ", operand, ",X"); + ora(memory_read(zp_index(constant_u8(operand), register_load(x_)))); + } + else if (opcode == 0x16) + { + uint8_t operand; + disassemble2(ct_pc, "ASL ", operand, ",X"); + memory_op(&FunctionBuilder::asl, + zp_index(constant_u8(operand), register_load(x_)), ct_pc); + } + else if (opcode == 0x17) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x18) + { + disassemble1(ct_pc, "CLC"); + register_store(constant_jb(jit_bool_false), flag_c_); + } + else if (opcode == 0x19) + { + uint16_t operand; + disassemble3(ct_pc, "ORA ", operand, ",Y"); + ora(memory_read( + abs_index(constant_u16(operand), register_load(y_)))); + } + else if (opcode == 0x1a) + { + disassemble1(ct_pc, "INC A"); + register_op(&FunctionBuilder::inc, a_); + } + else if (opcode == 0x1b) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x1c) + { + uint16_t operand; + disassemble3(ct_pc, "TRB ", operand); + memory_op(&FunctionBuilder::trb, abs(operand), ct_pc); + } + else if (opcode == 0x1d) + { + uint16_t operand; + disassemble3(ct_pc, "ORA ", operand, ",X"); + ora(memory_read( + abs_index(constant_u16(operand), register_load(x_)))); + } + else if (opcode == 0x1e) + { + uint16_t operand; + disassemble3(ct_pc, "ASL ", operand, ",X"); + memory_op( + &FunctionBuilder::asl, + abs_index(constant_u16(operand), register_load(x_)), + ct_pc); + } + else if (opcode == 0x1f) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == opcode_jsr) + { + uint16_t operand; + disassemble3(ct_pc, "JSR ", operand); + uint16_t mangled_return_addr = ct_pc - 1; + + // We are pushing two bytes onto the stack here and possibly + // requiring our caller to handle the control transfer, so the + // standard mechanisms for handling writes to code and control + // transfer aren't enough. control_transfer_to() contains special + // logic for JSR and we just use push_u16_raw() here. + push_u16_raw(mangled_return_addr); + + // We generally want to translate the subroutine code into + // this function, so control_transfer_to() can perform the + // control transfer with a simple branch. However, if there is + // a call callback, control_transfer_to() will have to arrange + // a control transfer via the generated function's caller. It + // would be strictly harmless for us to translate the subroutine + // code anyway, as it will just never be executed, but it is + // both pointless and makes the generated IR less readable (it + // has a superficially buggy appearance, since it will show a + // translation of possibly junk code at the callback address + // which may never actually execute). + bool is_call_callback = (callbacks_.call[operand] != 0); + if (!is_call_callback) + { + pending_.insert(operand); + + // We can predict that the RTS in the subroutine we are + // about to call will return to the immediately following + // instruction. (This is not guaranteed; the subroutine + // might fiddle with the stack. If that happens the "code" + // at ct_pc might be junk, but that's an acceptable risk; + // we will translate it but it will never be executed, and + // any stream of bytes can be translated even if the code + // is nonsense.) + pending_.insert(ct_pc); + predicted_rts_targets_[operand].insert(ct_pc); + } + + control_transfer_to(constant_u16(operand), opcode); + } + else if (opcode == 0x21) + { + uint8_t operand; + disassemble2(ct_pc, "AND (", operand, ",X)"); + And(memory_read( + zp_pre_index(constant_u8(operand), register_load(x_)))); + } + else if (opcode == 0x22) + { + illegal_instruction(ct_pc, 2); + } + else if (opcode == 0x23) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x24) + { + uint8_t operand; + disassemble2(ct_pc, "BIT ", operand); + bit(memory_read(zp(operand))); + } + else if (opcode == 0x25) + { + uint8_t operand; + disassemble2(ct_pc, "AND ", operand); + And(memory_read(zp(operand))); + } + else if (opcode == 0x26) + { + uint8_t operand; + disassemble2(ct_pc, "ROL ", operand); + memory_op(&FunctionBuilder::rol, zp(operand), ct_pc); + } + else if (opcode == 0x27) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x28) + { + disassemble1(ct_pc, "PLP"); + pop_flags(); + } + else if (opcode == 0x29) + { + uint8_t operand; + disassemble2(ct_pc, "AND #", operand); + And(constant_u8(operand)); + } + else if (opcode == 0x2a) + { + disassemble1(ct_pc, "ROL A"); + register_op(&FunctionBuilder::rol, a_); + } + else if (opcode == 0x2b) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x2c) + { + uint16_t operand; + disassemble3(ct_pc, "BIT ", operand); + bit(memory_read(abs(operand))); + } + else if (opcode == 0x2d) + { + uint16_t operand; + disassemble3(ct_pc, "AND ", operand); + And(memory_read(abs(operand))); + } + else if (opcode == 0x2e) + { + uint16_t operand; + disassemble3(ct_pc, "ROL ", operand); + memory_op(&FunctionBuilder::rol, abs(operand), ct_pc); + } + else if (opcode == 0x2f) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == opcode_bmi) + { + uint16_t target; + disassemble_branch(ct_pc, "BMI ", target); + pending_.insert(target); + branch(flag_n_, true, target); + } + else if (opcode == 0x31) + { + uint8_t operand; + disassemble2(ct_pc, "AND (", operand, "),Y"); + And(memory_read( + zp_post_index(constant_u8(operand), register_load(y_)))); + } + else if (opcode == 0x32) + { + uint8_t operand; + disassemble2(ct_pc, "AND (", operand, ")"); + And(memory_read( + zp_post_index(constant_u8(operand), constant_u8(0)))); + } + else if (opcode == 0x33) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x34) + { + uint8_t operand; + disassemble2(ct_pc, "BIT ", operand, ",X"); + bit(memory_read(zp_index(constant_u8(operand), register_load(x_)))); + } + else if (opcode == 0x35) + { + uint8_t operand; + disassemble2(ct_pc, "AND ", operand, ",X"); + And(memory_read(zp_index(constant_u8(operand), register_load(x_)))); + } + else if (opcode == 0x36) + { + uint8_t operand; + disassemble2(ct_pc, "ROL ", operand, ",X"); + memory_op(&FunctionBuilder::rol, + zp_index(constant_u8(operand), register_load(x_)), ct_pc); + } + else if (opcode == 0x37) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x38) + { + disassemble1(ct_pc, "SEC"); + register_store(constant_jb(jit_bool_true), flag_c_); + } + else if (opcode == 0x39) + { + uint16_t operand; + disassemble3(ct_pc, "AND ", operand, ",Y"); + And(memory_read( + abs_index(constant_u16(operand), register_load(y_)))); + } + else if (opcode == 0x3a) + { + disassemble1(ct_pc, "DEC A"); + register_op(&FunctionBuilder::dec, a_); + } + else if (opcode == 0x3b) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x3c) + { + uint16_t operand; + disassemble3(ct_pc, "BIT ", operand, ",X"); + bit(memory_read( + abs_index(constant_u16(operand), register_load(x_)))); + } + else if (opcode == 0x3d) + { + uint16_t operand; + disassemble3(ct_pc, "AND ", operand, ",X"); + And(memory_read( + abs_index(constant_u16(operand), register_load(x_)))); + } + else if (opcode == 0x3e) + { + uint16_t operand; + disassemble3(ct_pc, "ROL ", operand, ",X"); + memory_op( + &FunctionBuilder::rol, + abs_index(constant_u16(operand), register_load(x_)), + ct_pc); + } + else if (opcode == 0x3f) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == opcode_rti) + { + disassemble1(ct_pc, "RTI"); + pop_flags(); + llvm::Value *new_pc = pop_u16(); + control_transfer_to(new_pc, opcode); + } + else if (opcode == 0x41) + { + uint8_t operand; + disassemble2(ct_pc, "EOR (", operand, ",X)"); + eor(memory_read( + zp_pre_index(constant_u8(operand), register_load(x_)))); + } + else if (opcode == 0x42) + { + illegal_instruction(ct_pc, 2); + } + else if (opcode == 0x43) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x44) + { + illegal_instruction(ct_pc, 2); + } + else if (opcode == 0x45) + { + uint8_t operand; + disassemble2(ct_pc, "EOR ", operand); + eor(memory_read(zp(operand))); + } + else if (opcode == 0x46) + { + uint8_t operand; + disassemble2(ct_pc, "LSR ", operand); + memory_op(&FunctionBuilder::lsr, zp(operand), ct_pc); + } + else if (opcode == 0x47) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x48) + { + disassemble1(ct_pc, "PHA"); + push_u8(register_load(a_), ct_pc); + } + else if (opcode == 0x49) + { + uint8_t operand; + disassemble2(ct_pc, "EOR #", operand); + eor(constant_u8(operand)); + } + else if (opcode == 0x4a) + { + disassemble1(ct_pc, "LSR A"); + register_op(&FunctionBuilder::lsr, a_); + } + else if (opcode == 0x4b) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == opcode_jmp_abs) + { + uint16_t operand; + disassemble3(ct_pc, "JMP ", operand); + pending_.insert(operand); + control_transfer_to(constant_u16(operand), opcode); + } + else if (opcode == 0x4d) + { + uint16_t operand; + disassemble3(ct_pc, "EOR ", operand); + eor(memory_read(abs(operand))); + } + else if (opcode == 0x4e) + { + uint16_t operand; + disassemble3(ct_pc, "LSR ", operand); + memory_op(&FunctionBuilder::lsr, abs(operand), ct_pc); + } + else if (opcode == 0x4f) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == opcode_bvc) + { + uint16_t target; + disassemble_branch(ct_pc, "BVC ", target); + pending_.insert(target); + branch(flag_v_, false, target); + } + else if (opcode == 0x51) + { + uint8_t operand; + disassemble2(ct_pc, "EOR (", operand, "),Y"); + eor(memory_read( + zp_post_index(constant_u8(operand), register_load(y_)))); + } + else if (opcode == 0x52) + { + uint8_t operand; + disassemble2(ct_pc, "EOR (", operand, ")"); + eor(memory_read( + zp_post_index(constant_u8(operand), constant_u8(0)))); + } + else if (opcode == 0x53) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x54) + { + illegal_instruction(ct_pc, 2); + } + else if (opcode == 0x55) + { + uint8_t operand; + disassemble2(ct_pc, "EOR ", operand, ",X"); + eor(memory_read(zp_index(constant_u8(operand), register_load(x_)))); + } + else if (opcode == 0x56) + { + uint8_t operand; + disassemble2(ct_pc, "LSR ", operand, ",X"); + memory_op(&FunctionBuilder::lsr, + zp_index(constant_u8(operand), register_load(x_)), ct_pc); + } + else if (opcode == 0x57) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x58) + { + disassemble1(ct_pc, "CLI"); + register_store(constant_jb(jit_bool_false), flag_i_); + } + else if (opcode == 0x59) + { + uint16_t operand; + disassemble3(ct_pc, "EOR ", operand, ",Y"); + eor(memory_read( + abs_index(constant_u16(operand), register_load(y_)))); + } + else if (opcode == 0x5a) + { + disassemble1(ct_pc, "PHY"); + push_u8(register_load(y_), ct_pc); + } + else if (opcode == 0x5b) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x5c) + { + illegal_instruction(ct_pc, 3); + } + else if (opcode == 0x5d) + { + uint16_t operand; + disassemble3(ct_pc, "EOR ", operand, ",X"); + eor(memory_read( + abs_index(constant_u16(operand), register_load(x_)))); + } + else if (opcode == 0x5e) + { + uint16_t operand; + disassemble3(ct_pc, "LSR ", operand, ",X"); + memory_op( + &FunctionBuilder::lsr, + abs_index(constant_u16(operand), register_load(x_)), + ct_pc); + } + else if (opcode == 0x5f) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == opcode_rts) + { + disassemble1(ct_pc, "RTS"); + llvm::Value *new_pc = check_predicted_rts(original_ct_pc); + control_transfer_to(new_pc, opcode); + } + else if (opcode == 0x61) + { + uint8_t operand; + disassemble2(ct_pc, "ADC (", operand, ",X)"); + adc(memory_read( + zp_pre_index(constant_u8(operand), register_load(x_)))); + } + else if (opcode == 0x62) + { + illegal_instruction(ct_pc, 2); + } + else if (opcode == 0x63) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x64) + { + uint8_t operand; + disassemble2(ct_pc, "STZ ", operand); + memory_write(zp(operand), constant_u8(0), ct_pc); + } + else if (opcode == 0x65) + { + uint8_t operand; + disassemble2(ct_pc, "ADC ", operand); + adc(memory_read(zp(operand))); + } + else if (opcode == 0x66) + { + uint8_t operand; + disassemble2(ct_pc, "ROR ", operand); + memory_op(&FunctionBuilder::ror, zp(operand), ct_pc); + } + else if (opcode == 0x67) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x68) + { + disassemble1(ct_pc, "PLA"); + llvm::Value *data = pop_u8(); + register_store(data, a_); + set_nz(data); + } + else if (opcode == 0x69) + { + uint8_t operand; + disassemble2(ct_pc, "ADC #", operand); + adc(constant_u8(operand)); + } + else if (opcode == 0x6a) + { + disassemble1(ct_pc, "ROR A"); + register_op(&FunctionBuilder::ror, a_); + } + else if (opcode == 0x6b) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == opcode_jmp_ind_abs) + { + uint16_t operand; + disassemble3(ct_pc, "JMP (", operand, ")"); + llvm::Value *low_byte = memory_read_untrapped(abs(operand)); + // We're emulating the 65C02 here so we don't wrap if operand + // is of the form &xxFF. (Unless xx is FF, of course.) + uint16_t high_byte_at = operand + 1; + llvm::Value *high_byte = memory_read_untrapped(abs(high_byte_at)); + llvm::Value *new_pc = create_u16(low_byte, high_byte); + control_transfer_to(new_pc, opcode); + } + else if (opcode == 0x6d) + { + uint16_t operand; + disassemble3(ct_pc, "ADC ", operand); + adc(memory_read(abs(operand))); + } + else if (opcode == 0x6e) + { + uint16_t operand; + disassemble3(ct_pc, "ROR ", operand); + memory_op(&FunctionBuilder::ror, abs(operand), ct_pc); + } + else if (opcode == 0x6f) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == opcode_bvs) + { + uint16_t target; + disassemble_branch(ct_pc, "BVS ", target); + pending_.insert(target); + branch(flag_v_, true, target); + } + else if (opcode == 0x71) + { + uint8_t operand; + disassemble2(ct_pc, "ADC (", operand, "),Y"); + adc(memory_read( + zp_post_index(constant_u8(operand), register_load(y_)))); + } + else if (opcode == 0x72) + { + uint8_t operand; + disassemble2(ct_pc, "ADC (", operand, ")"); + adc(memory_read( + zp_post_index(constant_u8(operand), constant_u8(0)))); + } + else if (opcode == 0x73) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x74) + { + uint8_t operand; + disassemble2(ct_pc, "STZ ", operand, ",X"); + memory_write(zp_index(constant_u8(operand), register_load(x_)), + constant_u8(0), ct_pc); + } + else if (opcode == 0x75) + { + uint8_t operand; + disassemble2(ct_pc, "ADC ", operand, ",X"); + adc(memory_read(zp_index(constant_u8(operand), register_load(x_)))); + } + else if (opcode == 0x76) + { + uint8_t operand; + disassemble2(ct_pc, "ROR ", operand, ",X"); + memory_op(&FunctionBuilder::ror, + zp_index(constant_u8(operand), register_load(x_)), ct_pc); + } + else if (opcode == 0x77) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x78) + { + disassemble1(ct_pc, "SEI"); + register_store(constant_jb(jit_bool_true), flag_i_); + } + else if (opcode == 0x79) + { + uint16_t operand; + disassemble3(ct_pc, "ADC ", operand, ",Y"); + adc(memory_read( + abs_index(constant_u16(operand), register_load(y_)))); + } + else if (opcode == 0x7a) + { + disassemble1(ct_pc, "PLY"); + llvm::Value *data = pop_u8(); + register_store(data, y_); + set_nz(data); + } + else if (opcode == 0x7b) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == opcode_jmp_indx_abs) + { + uint16_t operand; + disassemble3(ct_pc, "JMP (", operand, ",X)"); + llvm::Value *low_byte_at = + builder_.CreateAdd( + constant_u16(operand), + zext_i16(register_load(x_))); + llvm::Value *high_byte_at = + builder_.CreateAdd(low_byte_at, constant_u16(1)); + llvm::Value *low_byte = + memory_read_untrapped(BoundedAddress(*this, low_byte_at)); + llvm::Value *high_byte = + memory_read_untrapped(BoundedAddress(*this, high_byte_at)); + llvm::Value *new_pc = create_u16(low_byte, high_byte); + control_transfer_to(new_pc, opcode); + } + else if (opcode == 0x7d) + { + uint16_t operand; + disassemble3(ct_pc, "ADC ", operand, ",X"); + adc(memory_read( + abs_index(constant_u16(operand), register_load(x_)))); + } + else if (opcode == 0x7e) + { + uint16_t operand; + disassemble3(ct_pc, "ROR ", operand, ",X"); + memory_op( + &FunctionBuilder::ror, + abs_index(constant_u16(operand), register_load(x_)), + ct_pc); + } + else if (opcode == 0x7f) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == opcode_bra) + { + uint16_t target; + disassemble_branch(ct_pc, "BRA ", target); + pending_.insert(target); + control_transfer_to(constant_u16(target), opcode); + } + else if (opcode == 0x81) + { + uint8_t operand; + disassemble2(ct_pc, "STA (", operand, ",X)"); + memory_write(zp_pre_index(constant_u8(operand), register_load(x_)), + register_load(a_), ct_pc); + } + else if (opcode == 0x82) + { + illegal_instruction(ct_pc, 2); + } + else if (opcode == 0x83) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x84) + { + uint8_t operand; + disassemble2(ct_pc, "STY ", operand); + memory_write(zp(operand), register_load(y_), ct_pc); + } + else if (opcode == 0x85) + { + uint8_t operand; + disassemble2(ct_pc, "STA ", operand); + memory_write(zp(operand), register_load(a_), ct_pc); + } + else if (opcode == 0x86) + { + uint8_t operand; + disassemble2(ct_pc, "STX ", operand); + memory_write(zp(operand), register_load(x_), ct_pc); + } + else if (opcode == 0x87) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x88) + { + disassemble1(ct_pc, "DEY"); + register_op(&FunctionBuilder::dec, y_); + } + else if (opcode == 0x89) + { + uint8_t operand; + disassemble2(ct_pc, "BIT #", operand); + // Note that unlike other BIT opcodes, this one only affects + // the Z flag. + llvm::Value *tmp = + builder_.CreateAnd(register_load(a_), constant_u8(operand)); + set_z(tmp); + } + else if (opcode == 0x8a) + { + disassemble1(ct_pc, "TXA"); + transfer(x_, a_); + } + else if (opcode == 0x8b) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x8c) + { + uint16_t operand; + disassemble3(ct_pc, "STY ", operand); + memory_write(abs(operand), register_load(y_), ct_pc); + } + else if (opcode == 0x8d) + { + uint16_t operand; + disassemble3(ct_pc, "STA ", operand); + memory_write(abs(operand), register_load(a_), ct_pc); + } + else if (opcode == 0x8e) + { + uint16_t operand; + disassemble3(ct_pc, "STX ", operand); + memory_write(abs(operand), register_load(x_), ct_pc); + } + else if (opcode == 0x8f) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == opcode_bcc) + { + uint16_t target; + disassemble_branch(ct_pc, "BCC ", target); + pending_.insert(target); + branch(flag_c_, false, target); + } + else if (opcode == 0x91) + { + uint8_t operand; + disassemble2(ct_pc, "STA (", operand, "),Y"); + memory_write(zp_post_index(constant_u8(operand), register_load(y_)), + register_load(a_), ct_pc); + } + else if (opcode == 0x92) + { + uint8_t operand; + disassemble2(ct_pc, "STA (", operand, ")"); + memory_write(zp_post_index(constant_u8(operand), constant_u8(0)), + register_load(a_), ct_pc); + } + else if (opcode == 0x93) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x94) + { + uint8_t operand; + disassemble2(ct_pc, "STY ", operand, ",X"); + memory_write(zp_index(constant_u8(operand), register_load(x_)), + register_load(y_), ct_pc); + } + else if (opcode == 0x95) + { + uint8_t operand; + disassemble2(ct_pc, "STA ", operand, ",X"); + memory_write(zp_index(constant_u8(operand), register_load(x_)), + register_load(a_), ct_pc); + } + else if (opcode == 0x96) + { + uint8_t operand; + disassemble2(ct_pc, "STX ", operand, ",Y"); + memory_write(zp_index(constant_u8(operand), register_load(y_)), + register_load(x_), ct_pc); + } + else if (opcode == 0x97) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x98) + { + disassemble1(ct_pc, "TYA"); + transfer(y_, a_); + } + else if (opcode == 0x99) + { + uint16_t operand; + disassemble3(ct_pc, "STA ", operand, ",Y"); + memory_write(abs_index(constant_u16(operand), register_load(y_)), + register_load(a_), ct_pc); + } + else if (opcode == 0x9a) + { + disassemble1(ct_pc, "TXS"); + // We don't use transfer() even though we do for TSX; TXS doesn't + // set any flags. + register_store(register_load(x_), s_); + } + else if (opcode == 0x9b) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0x9c) + { + uint16_t operand; + disassemble3(ct_pc, "STZ ", operand); + memory_write(abs(operand), constant_u8(0), ct_pc); + } + else if (opcode == 0x9d) + { + uint16_t operand; + disassemble3(ct_pc, "STA ", operand, ",X"); + memory_write(abs_index(constant_u16(operand), register_load(x_)), + register_load(a_), ct_pc); + } + else if (opcode == 0x9e) + { + uint16_t operand; + disassemble3(ct_pc, "STZ ", operand, ",X"); + memory_write(abs_index(constant_u16(operand), register_load(x_)), + constant_u8(0), ct_pc); + } + else if (opcode == 0x9f) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xa0) + { + uint8_t operand; + disassemble2(ct_pc, "LDY #", operand); + ld(y_, constant_u8(operand)); + } + else if (opcode == 0xa1) + { + uint8_t operand; + disassemble2(ct_pc, "LDA (", operand, ",X)"); + ld(a_, memory_read( + zp_pre_index(constant_u8(operand), register_load(x_)))); + } + else if (opcode == 0xa2) + { + uint8_t operand; + disassemble2(ct_pc, "LDX #", operand); + ld(x_, constant_u8(operand)); + } + else if (opcode == 0xa3) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xa4) + { + uint8_t operand; + disassemble2(ct_pc, "LDY ", operand); + ld(y_, memory_read(zp(operand))); + } + else if (opcode == 0xa5) + { + uint8_t operand; + disassemble2(ct_pc, "LDA ", operand); + ld(a_, memory_read(zp(operand))); + } + else if (opcode == 0xa6) + { + uint8_t operand; + disassemble2(ct_pc, "LDX ", operand); + ld(x_, memory_read(zp(operand))); + } + else if (opcode == 0xa7) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xa8) + { + disassemble1(ct_pc, "TAY"); + transfer(a_, y_); + } + else if (opcode == 0xa9) + { + uint8_t operand; + disassemble2(ct_pc, "LDA #", operand); + ld(a_, constant_u8(operand)); + } + else if (opcode == 0xaa) + { + disassemble1(ct_pc, "TAX"); + transfer(a_, x_); + } + else if (opcode == 0xab) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xac) + { + uint16_t operand; + disassemble3(ct_pc, "LDY ", operand); + ld(y_, memory_read(abs(operand))); + } + else if (opcode == 0xad) + { + uint16_t operand; + disassemble3(ct_pc, "LDA ", operand); + ld(a_, memory_read(abs(operand))); + } + else if (opcode == 0xae) + { + uint16_t operand; + disassemble3(ct_pc, "LDX ", operand); + ld(x_, memory_read(abs(operand))); + } + else if (opcode == 0xaf) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == opcode_bcs) + { + uint16_t target; + disassemble_branch(ct_pc, "BCS ", target); + pending_.insert(target); + branch(flag_c_, true, target); + } + else if (opcode == 0xb1) + { + uint8_t operand; + disassemble2(ct_pc, "LDA (", operand, "),Y"); + ld(a_, memory_read( + zp_post_index(constant_u8(operand), register_load(y_)))); + } + else if (opcode == 0xb2) + { + uint8_t operand; + disassemble2(ct_pc, "LDA (", operand, ")"); + ld(a_, memory_read( + zp_post_index(constant_u8(operand), constant_u8(0)))); + } + else if (opcode == 0xb3) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xb4) + { + uint8_t operand; + disassemble2(ct_pc, "LDY ", operand, ",X"); + ld(y_, memory_read( + zp_index(constant_u8(operand), register_load(x_)))); + } + else if (opcode == 0xb5) + { + uint8_t operand; + disassemble2(ct_pc, "LDA ", operand, ",X"); + ld(a_, memory_read( + zp_index(constant_u8(operand), register_load(x_)))); + } + else if (opcode == 0xb6) + { + uint8_t operand; + disassemble2(ct_pc, "LDX ", operand, ",Y"); + ld(x_, memory_read( + zp_index(constant_u8(operand), register_load(y_)))); + } + else if (opcode == 0xb7) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xb8) + { + disassemble1(ct_pc, "CLV"); + register_store(constant_jb(jit_bool_false), flag_v_); + } + else if (opcode == 0xb9) + { + uint16_t operand; + disassemble3(ct_pc, "LDA ", operand, ",Y"); + ld(a_, memory_read( + abs_index(constant_u16(operand), register_load(y_)))); + } + else if (opcode == 0xba) + { + disassemble1(ct_pc, "TSX"); + transfer(s_, x_); + } + else if (opcode == 0xbb) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xbc) + { + uint16_t operand; + disassemble3(ct_pc, "LDY ", operand, ",X"); + ld(y_, memory_read( + abs_index(constant_u16(operand), register_load(x_)))); + } + else if (opcode == 0xbd) + { + uint16_t operand; + disassemble3(ct_pc, "LDA ", operand, ",X"); + ld(a_, memory_read( + abs_index(constant_u16(operand), register_load(x_)))); + } + else if (opcode == 0xbe) + { + uint16_t operand; + disassemble3(ct_pc, "LDX ", operand, ",Y"); + ld(x_, memory_read( + abs_index(constant_u16(operand), register_load(y_)))); + } + else if (opcode == 0xbf) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xc0) + { + uint8_t operand; + disassemble2(ct_pc, "CPY #", operand); + cmp(register_load(y_), constant_u8(operand)); + } + else if (opcode == 0xc1) + { + uint8_t operand; + disassemble2(ct_pc, "CMP (", operand, ",X)"); + cmp(register_load(a_), + memory_read( + zp_pre_index(constant_u8(operand), register_load(x_)))); + } + else if (opcode == 0xc2) + { + illegal_instruction(ct_pc, 2); + } + else if (opcode == 0xc3) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xc4) + { + uint8_t operand; + disassemble2(ct_pc, "CPY ", operand); + cmp(register_load(y_), memory_read(zp(operand))); + } + else if (opcode == 0xc5) + { + uint8_t operand; + disassemble2(ct_pc, "CMP ", operand); + cmp(register_load(a_), memory_read(zp(operand))); + } + else if (opcode == 0xc6) + { + uint8_t operand; + disassemble2(ct_pc, "DEC ", operand); + memory_op(&FunctionBuilder::dec, zp(operand), ct_pc); + } + else if (opcode == 0xc7) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xc8) + { + disassemble1(ct_pc, "INY"); + register_op(&FunctionBuilder::inc, y_); + } + else if (opcode == 0xc9) + { + uint8_t operand; + disassemble2(ct_pc, "CMP #", operand); + cmp(register_load(a_), constant_u8(operand)); + } + else if (opcode == 0xca) + { + disassemble1(ct_pc, "DEX"); + register_op(&FunctionBuilder::dec, x_); + } + else if (opcode == 0xcb) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xcc) + { + uint16_t operand; + disassemble3(ct_pc, "CPY ", operand); + cmp(register_load(y_), memory_read(abs(operand))); + } + else if (opcode == 0xcd) + { + uint16_t operand; + disassemble3(ct_pc, "CMP ", operand); + cmp(register_load(a_), memory_read(abs(operand))); + } + else if (opcode == 0xce) + { + uint16_t operand; + disassemble3(ct_pc, "DEC ", operand); + memory_op(&FunctionBuilder::dec, abs(operand), ct_pc); + } + else if (opcode == 0xcf) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == opcode_bne) + { + uint16_t target; + disassemble_branch(ct_pc, "BNE ", target); + pending_.insert(target); + branch(flag_z_, false, target); + } + else if (opcode == 0xd1) + { + uint8_t operand; + disassemble2(ct_pc, "CMP (", operand, "),Y"); + cmp(register_load(a_), + memory_read( + zp_post_index(constant_u8(operand), register_load(y_)))); + } + else if (opcode == 0xd2) + { + uint8_t operand; + disassemble2(ct_pc, "CMP (", operand, ")"); + cmp(register_load(a_), + memory_read( + zp_post_index(constant_u8(operand), constant_u8(0)))); + } + else if (opcode == 0xd3) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xd4) + { + illegal_instruction(ct_pc, 2); + } + else if (opcode == 0xd5) + { + uint8_t operand; + disassemble2(ct_pc, "CMP ", operand, ",X"); + cmp(register_load(a_), + memory_read( + zp_index(constant_u8(operand), register_load(x_)))); + } + else if (opcode == 0xd6) + { + uint8_t operand; + disassemble2(ct_pc, "DEC ", operand, ",X"); + memory_op(&FunctionBuilder::dec, + zp_index(constant_u8(operand), register_load(x_)), ct_pc); + } + else if (opcode == 0xd7) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xd8) + { + disassemble1(ct_pc, "CLD"); + register_store(constant_jb(jit_bool_false), flag_d_); + } + else if (opcode == 0xd9) + { + uint16_t operand; + disassemble3(ct_pc, "CMP ", operand, ",Y"); + cmp(register_load(a_), + memory_read( + abs_index(constant_u16(operand), register_load(y_)))); + } + else if (opcode == 0xda) + { + disassemble1(ct_pc, "PHX"); + push_u8(register_load(x_), ct_pc); + } + else if (opcode == 0xdb) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xdc) + { + illegal_instruction(ct_pc, 3); + } + else if (opcode == 0xdd) + { + uint16_t operand; + disassemble3(ct_pc, "CMP ", operand, ",X"); + cmp(register_load(a_), + memory_read( + abs_index(constant_u16(operand), register_load(x_)))); + } + else if (opcode == 0xde) + { + uint16_t operand; + disassemble3(ct_pc, "DEC ", operand, ",X"); + memory_op( + &FunctionBuilder::dec, + abs_index(constant_u16(operand), register_load(x_)), + ct_pc); + } + else if (opcode == 0xdf) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xe0) + { + uint8_t operand; + disassemble2(ct_pc, "CPX #", operand); + cmp(register_load(x_), constant_u8(operand)); + } + else if (opcode == 0xe1) + { + uint8_t operand; + disassemble2(ct_pc, "SBC (", operand, ",X)"); + sbc(memory_read( + zp_pre_index(constant_u8(operand), register_load(x_)))); + } + else if (opcode == 0xe2) + { + illegal_instruction(ct_pc, 2); + } + else if (opcode == 0xe3) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xe4) + { + uint8_t operand; + disassemble2(ct_pc, "CPX ", operand); + cmp(register_load(x_), memory_read(zp(operand))); + } + else if (opcode == 0xe5) + { + uint8_t operand; + disassemble2(ct_pc, "SBC ", operand); + sbc(memory_read(zp(operand))); + } + else if (opcode == 0xe6) + { + uint8_t operand; + disassemble2(ct_pc, "INC ", operand); + memory_op(&FunctionBuilder::inc, zp(operand), ct_pc); + } + else if (opcode == 0xe7) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xe8) + { + disassemble1(ct_pc, "INX"); + register_op(&FunctionBuilder::inc, x_); + } + else if (opcode == 0xe9) + { + uint8_t operand; + disassemble2(ct_pc, "SBC #", operand); + sbc(constant_u8(operand)); + } + else if (opcode == 0xea) + { + disassemble1(ct_pc, "NOP"); + } + else if (opcode == 0xeb) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xec) + { + uint16_t operand; + disassemble3(ct_pc, "CPX ", operand); + cmp(register_load(x_), memory_read(abs(operand))); + } + else if (opcode == 0xed) + { + uint16_t operand; + disassemble3(ct_pc, "SBC ", operand); + sbc(memory_read(abs(operand))); + } + else if (opcode == 0xee) + { + uint16_t operand; + disassemble3(ct_pc, "INC ", operand); + memory_op(&FunctionBuilder::inc, abs(operand), ct_pc); + } + else if (opcode == 0xef) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == opcode_beq) + { + uint16_t target; + disassemble_branch(ct_pc, "BEQ ", target); + pending_.insert(target); + branch(flag_z_, true, target); + } + else if (opcode == 0xf1) + { + uint8_t operand; + disassemble2(ct_pc, "SBC (", operand, "),Y"); + sbc(memory_read( + zp_post_index(constant_u8(operand), register_load(y_)))); + } + else if (opcode == 0xf2) + { + uint8_t operand; + disassemble2(ct_pc, "SBC (", operand, ")"); + sbc(memory_read( + zp_post_index(constant_u8(operand), constant_u8(0)))); + } + else if (opcode == 0xf3) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xf4) + { + illegal_instruction(ct_pc, 2); + } + else if (opcode == 0xf5) + { + uint8_t operand; + disassemble2(ct_pc, "SBC ", operand, ",X"); + sbc(memory_read(zp_index(constant_u8(operand), register_load(x_)))); + } + else if (opcode == 0xf6) + { + uint8_t operand; + disassemble2(ct_pc, "INC ", operand, ",X"); + memory_op(&FunctionBuilder::inc, + zp_index(constant_u8(operand), register_load(x_)), ct_pc); + } + else if (opcode == 0xf7) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xf8) + { + disassemble1(ct_pc, "SED"); + register_store(constant_jb(jit_bool_true), flag_d_); + } + else if (opcode == 0xf9) + { + uint16_t operand; + disassemble3(ct_pc, "SBC ", operand, ",Y"); + sbc(memory_read( + abs_index(constant_u16(operand), register_load(y_)))); + } + else if (opcode == 0xfa) + { + disassemble1(ct_pc, "PLX"); + llvm::Value *data = pop_u8(); + register_store(data, x_); + set_nz(data); + } + else if (opcode == 0xfb) + { + illegal_instruction(ct_pc, 1); + } + else if (opcode == 0xfc) + { + illegal_instruction(ct_pc, 3); + } + else if (opcode == 0xfd) + { + uint16_t operand; + disassemble3(ct_pc, "SBC ", operand, ",X"); + sbc(memory_read( + abs_index(constant_u16(operand), register_load(x_)))); + } + else if (opcode == 0xfe) + { + uint16_t operand; + disassemble3(ct_pc, "INC ", operand, ",X"); + memory_op( + &FunctionBuilder::inc, + abs_index(constant_u16(operand), register_load(x_)), + ct_pc); + } + else if (opcode == 0xff) + { + illegal_instruction(ct_pc, 1); + } + else + { + CANT_HAPPEN("Unknown opcode 0x" << std::hex << opcode); + } + } + + return ct_pc; +} + +// Return the 8-bit operand of the instruction whose opcode is located at +// the given address. +uint8_t FunctionBuilder::operand8(uint16_t opcode_at) +{ + uint16_t addr = opcode_at; + return ct_memory_[++addr]; +} + +// Return the 16-bit operand of the instruction whose opcode is located at +// the given address. +uint16_t FunctionBuilder::operand16(uint16_t opcode_at) +{ + uint16_t addr = opcode_at; + uint8_t operand_low = ct_memory_[++addr]; + uint8_t operand_high = ct_memory_[++addr]; + return operand_low | (operand_high << 8); +} + +llvm::Value *FunctionBuilder::constant_i1(bool c) +{ + return llvm::ConstantInt::get(i1_type_, c); +} + +llvm::Value *FunctionBuilder::constant_u8(uint8_t c) +{ + return llvm::ConstantInt::get(i8_type_, c); +} + +llvm::Value *FunctionBuilder::constant_u16(uint16_t c) +{ + return llvm::ConstantInt::get(i16_type_, c); +} + +llvm::Value *FunctionBuilder::constant_u32(uint32_t c) +{ + return llvm::ConstantInt::get(i32_type_, c); +} + +llvm::Value *FunctionBuilder::constant_u64(uint64_t c) +{ + return llvm::ConstantInt::get(i64_type_, c); +} + +llvm::Value *FunctionBuilder::constant_i(int c) +{ + return llvm::ConstantInt::get(native_int_type_, c); +} + +llvm::Value *FunctionBuilder::constant_jb(JitBool c) +{ + return llvm::ConstantInt::get(jit_bool_type_, c); +} + +llvm::Value *FunctionBuilder::convert_i1_to_jb(llvm::Value *v) +{ + assert(v->getType() == i1_type_); + return builder_.CreateZExt(v, jit_bool_type_); +} + +llvm::Value *FunctionBuilder::convert_i8_to_jb(llvm::Value *v) +{ + assert(v->getType() == i8_type_); + return v; +} + +llvm::Value *FunctionBuilder::convert_i16_to_jb(llvm::Value *v) +{ + assert(v->getType() == i16_type_); + return convert_i1_to_jb(builder_.CreateICmpNE(v, constant_u16(0))); +} + +// JitBool values should be tested via jit_bool_is_*() and not directly; +// this is because they use a 0=false, non-0=true representation. It's not +// correct to assume they are either 0 or 1. + +llvm::Value *FunctionBuilder::jit_bool_is_true(llvm::Value *v) +{ + assert(v->getType() == jit_bool_type_); + return builder_.CreateICmpNE(v, constant_u8(0)); +} + +llvm::Value *FunctionBuilder::jit_bool_is_false(llvm::Value *v) +{ + assert(v->getType() == jit_bool_type_); + return builder_.CreateICmpEQ(v, constant_u8(0)); +} + +llvm::Value *FunctionBuilder::convert_i1_to_i8(llvm::Value *v) +{ + assert(v->getType() == i1_type_); + return builder_.CreateZExt(v, i8_type_); +} + +llvm::Value *FunctionBuilder::zext_i16(llvm::Value *v) +{ + return builder_.CreateZExt(v, i16_type_); +} + +llvm::Value *FunctionBuilder::zext_i32(llvm::Value *v) +{ + return builder_.CreateZExt(v, i32_type_); +} + +llvm::Value *FunctionBuilder::sext_i16(llvm::Value *v) +{ + return builder_.CreateSExt(v, i16_type_); +} + +llvm::Value *FunctionBuilder::trunc_i8(llvm::Value *v) +{ + return builder_.CreateTrunc(v, i8_type_); +} + +llvm::Value *FunctionBuilder::create_u16( + llvm::Value *low_byte, llvm::Value *high_byte) +{ + return builder_.CreateOr( + zext_i16(low_byte), + builder_.CreateShl(zext_i16(high_byte), 8)); +} + +llvm::Value *FunctionBuilder::register_load(const Register &r) +{ + return builder_.CreateLoad(r.v_); +} + +void FunctionBuilder::register_store(llvm::Value *v, Register &r) +{ + builder_.CreateStore(v, r.v_); + r.modified_ = true; +} + +void FunctionBuilder::register_op(OpFn op, Register &r) +{ + llvm::Value *data = register_load(r); + data = (this->*op)(data); + register_store(data, r); +} + +void FunctionBuilder::memory_op( + OpFn op, const BoundedAddress &ba, uint16_t next_opcode_at) +{ + llvm::Value *data = memory_read(ba); + data = (this->*op)(data); + memory_write(ba, data, next_opcode_at); +} + +void FunctionBuilder::adc(llvm::Value *data) +{ + llvm::BasicBlock *done_adc_block = + llvm::BasicBlock::Create(context_, "done_adc"); + llvm::BasicBlock *adc_binary_block = + llvm::BasicBlock::Create(context_, "adc_binary", llvm_function_); + llvm::BasicBlock *adc_decimal_block = + llvm::BasicBlock::Create(context_, "adc_decimal", llvm_function_); + llvm::Value *d_clear = jit_bool_is_false(register_load(flag_d_)); + builder_.CreateCondBr(d_clear, adc_binary_block, adc_decimal_block); + llvm_function_->getBasicBlockList().push_back(done_adc_block); + builder_.SetInsertPoint(adc_binary_block); + adc_binary(data); + builder_.CreateBr(done_adc_block); + builder_.SetInsertPoint(adc_decimal_block); + adc_decimal(data); + builder_.CreateBr(done_adc_block); + builder_.SetInsertPoint(done_adc_block); +} + +void FunctionBuilder::adc_binary(llvm::Value *data) +{ + llvm::Value *carry_16 = zext_i16(jit_bool_is_true(register_load(flag_c_))); + + llvm::Value *a_u16 = zext_i16(register_load(a_)); + llvm::Value *data_u16 = zext_i16(data); + llvm::Value *sum_u16 = + builder_.CreateAdd(builder_.CreateAdd(a_u16, data_u16), carry_16); + + llvm::Value *a_s16 = builder_.CreateSExt(register_load(a_), i16_type_); + llvm::Value *data_s16 = builder_.CreateSExt(data, i16_type_); + llvm::Value *sum_s16 = + builder_.CreateAdd(builder_.CreateAdd(a_s16, data_s16), carry_16); + + llvm::Value *new_a = trunc_i8(sum_u16); + register_store(new_a, a_); + set_nz(new_a); + + llvm::Value *b8 = builder_.CreateAnd( + sum_u16, + constant_u16(0x100)); + register_store(convert_i16_to_jb(b8), flag_c_); + + llvm::Value *negative_as_unsigned = + jit_bool_is_true(register_load(flag_n_)); + llvm::Value *negative_as_signed = + builder_.CreateICmpSLT(sum_s16, constant_u16(0)); + llvm::Value *new_v_as_i1 = + builder_.CreateXor(negative_as_unsigned, negative_as_signed); + register_store(convert_i1_to_jb(new_v_as_i1), flag_v_); +} + +void FunctionBuilder::adc_decimal(llvm::Value *data) +{ + // This algorithm taken from http://www.6502.org/tutorials/decimal_mode.html + + llvm::Value *carry = jit_bool_is_true(register_load(flag_c_)); + + builder_.CreateStore( + builder_.CreateAdd( + builder_.CreateAdd( + builder_.CreateAnd( + register_load(a_), + constant_u8(0x0f)), + builder_.CreateAnd( + data, + constant_u8(0x0f))), + convert_i1_to_i8(carry)), + l_tmp_); + + llvm::BasicBlock *adjust_l_block = + llvm::BasicBlock::Create(context_, "adjust_l", llvm_function_); + llvm::BasicBlock *l_done_block = + llvm::BasicBlock::Create(context_, "l_done", llvm_function_); + builder_.CreateCondBr( + builder_.CreateICmpUGE( + builder_.CreateLoad(l_tmp_), + constant_u8(0x0a)), + adjust_l_block, l_done_block); + + builder_.SetInsertPoint(adjust_l_block); + builder_.CreateStore( + builder_.CreateAdd( + builder_.CreateAnd( + builder_.CreateAdd( + builder_.CreateLoad(l_tmp_), + constant_u8(0x06)), + constant_u8(0x0f)), + constant_u8(0x10)), + l_tmp_); + builder_.CreateBr(l_done_block); + + builder_.SetInsertPoint(l_done_block); + + llvm::Value *a_and_0xf0 = + builder_.CreateAnd( + register_load(a_), + constant_u8(0xf0)); + llvm::Value *data_and_0xf0 = + builder_.CreateAnd( + data, + constant_u8(0xf0)); + + builder_.CreateStore( + builder_.CreateAdd( + builder_.CreateAdd( + zext_i16(a_and_0xf0), + zext_i16(data_and_0xf0)), + zext_i16(builder_.CreateLoad(l_tmp_))), + s_tmp_); + + llvm::BasicBlock *adjust_s_block = + llvm::BasicBlock::Create(context_, "adjust_s", llvm_function_); + llvm::BasicBlock *s_done_block = + llvm::BasicBlock::Create(context_, "s_done", llvm_function_); + builder_.CreateCondBr( + builder_.CreateICmpUGE( + builder_.CreateLoad(s_tmp_), + constant_u16(0xa0)), + adjust_s_block, s_done_block); + + builder_.SetInsertPoint(adjust_s_block); + builder_.CreateStore( + builder_.CreateAdd( + builder_.CreateLoad(s_tmp_), + constant_u16(0x60)), + s_tmp_); + builder_.CreateBr(s_done_block); + + builder_.SetInsertPoint(s_done_block); + builder_.CreateStore( + builder_.CreateAdd( + builder_.CreateAdd( + sext_i16(a_and_0xf0), + sext_i16(data_and_0xf0)), + zext_i16(builder_.CreateLoad(l_tmp_))), + t_tmp_); + + llvm::BasicBlock *v_not_done_block = + llvm::BasicBlock::Create(context_, "v_not_done", llvm_function_); + llvm::BasicBlock *v_false_block = + llvm::BasicBlock::Create(context_, "v_false", llvm_function_); + llvm::BasicBlock *v_done_block = + llvm::BasicBlock::Create(context_, "v_done", llvm_function_); + register_store(constant_jb(jit_bool_true), flag_v_); + builder_.CreateCondBr( + builder_.CreateICmpSLT( + builder_.CreateLoad(t_tmp_), + constant_u16(-128)), + v_done_block, v_not_done_block); + builder_.SetInsertPoint(v_not_done_block); + builder_.CreateCondBr( + builder_.CreateICmpSGT( + builder_.CreateLoad(t_tmp_), + constant_u16(127)), + v_done_block, v_false_block); + builder_.SetInsertPoint(v_false_block); + register_store(constant_jb(jit_bool_false), flag_v_); + builder_.CreateBr(v_done_block); + builder_.SetInsertPoint(v_done_block); + + register_store(trunc_i8(builder_.CreateLoad(s_tmp_)), a_); + set_nz(register_load(a_)); + register_store( + convert_i1_to_jb( + builder_.CreateICmpUGE( + builder_.CreateLoad(s_tmp_), + constant_u16(0x100))), + flag_c_); +} + +void FunctionBuilder::And(llvm::Value *data) +{ + llvm::Value *result = builder_.CreateAnd(register_load(a_), data); + register_store(result, a_); + set_nz(result); +} + +llvm::Value *FunctionBuilder::asl(llvm::Value *data) +{ + register_store( + convert_i8_to_jb(builder_.CreateAnd(data, constant_u8(0x80))), flag_c_); + llvm::Value *result = builder_.CreateShl(data, 1); + set_nz(result); + return result; +} + +void FunctionBuilder::bit(llvm::Value *data) +{ + register_store( + convert_i8_to_jb(builder_.CreateAnd(data, constant_u8(0x80))), flag_n_); + register_store( + convert_i8_to_jb(builder_.CreateAnd(data, constant_u8(0x40))), flag_v_); + llvm::Value *tmp = builder_.CreateAnd(register_load(a_), data); + set_z(tmp); +} + +void FunctionBuilder::branch(Register &flag, bool branch_if, uint16_t target) +{ + llvm::BasicBlock *not_taken_block = + llvm::BasicBlock::Create(context_, "branch_not_taken", llvm_function_); + ensure_address_block_created(target); + llvm::Value *flag_set = jit_bool_is_true(register_load(flag)); + if (branch_if) + { + builder_.CreateCondBr(flag_set, address_block_[target], + not_taken_block); + } + else + { + builder_.CreateCondBr(flag_set, not_taken_block, + address_block_[target]); + } + builder_.SetInsertPoint(not_taken_block); +} + +void FunctionBuilder::cmp(llvm::Value *r, llvm::Value *data) +{ + llvm::Value *sum = builder_.CreateSub(r, data); + set_nz(sum); + register_store(convert_i1_to_jb(builder_.CreateICmpUGE(r, data)), flag_c_); +} + +llvm::Value *FunctionBuilder::dec(llvm::Value *data) +{ + llvm::Value *result = builder_.CreateSub(data, constant_u8(1)); + set_nz(result); + return result; +} + +void FunctionBuilder::eor(llvm::Value *data) +{ + llvm::Value *result = builder_.CreateXor(register_load(a_), data); + register_store(result, a_); + set_nz(result); +} + +llvm::Value *FunctionBuilder::inc(llvm::Value *data) +{ + llvm::Value *result = builder_.CreateAdd(data, constant_u8(1)); + set_nz(result); + return result; +} + +void FunctionBuilder::ld(Register &r, llvm::Value *data) +{ + register_store(data, r); + set_nz(data); +} + +llvm::Value *FunctionBuilder::lsr(llvm::Value *data) +{ + register_store( + convert_i8_to_jb(builder_.CreateAnd(data, constant_u8(0x1))), flag_c_); + llvm::Value *result = builder_.CreateLShr(data, 1); + set_nz(result); + return result; +} + +void FunctionBuilder::ora(llvm::Value *data) +{ + llvm::Value *result = builder_.CreateOr(register_load(a_), data); + register_store(result, a_); + set_nz(result); +} + +void FunctionBuilder::pop_flags() +{ + llvm::Value *p = pop_u8(); + register_store( + convert_i8_to_jb(builder_.CreateAnd(p, constant_u8(flagN))), flag_n_); + register_store( + convert_i8_to_jb(builder_.CreateAnd(p, constant_u8(flagV))), flag_v_); + register_store( + convert_i8_to_jb(builder_.CreateAnd(p, constant_u8(flagD))), flag_d_); + register_store( + convert_i8_to_jb(builder_.CreateAnd(p, constant_u8(flagI))), flag_i_); + register_store( + convert_i8_to_jb(builder_.CreateAnd(p, constant_u8(flagZ))), flag_z_); + register_store( + convert_i8_to_jb(builder_.CreateAnd(p, constant_u8(flagC))), flag_c_); +} + +llvm::Value *FunctionBuilder::pop_u8() +{ + llvm::Value *new_s = builder_.CreateAdd(register_load(s_), constant_u8(1)); + register_store(new_s, s_); + return memory_read_untrapped(abs_index(constant_u16(stack), new_s)); +} + + +llvm::Value *FunctionBuilder::pop_u16() +{ + llvm::Value *low_byte = pop_u8(); + llvm::Value *high_byte = pop_u8(); + return create_u16(low_byte, high_byte); +} + +void FunctionBuilder::push_u8_raw(llvm::Value *data) +{ + memory_write_raw(abs_index(constant_u16(stack), register_load(s_)), data); + register_store(builder_.CreateSub(register_load(s_), constant_u8(1)), s_); +} + +void FunctionBuilder::push_u16_raw(uint16_t u) +{ + uint8_t high_byte = u >> 8; + uint8_t low_byte = u & 0xff; + push_u8_raw(constant_u8(high_byte)); + push_u8_raw(constant_u8(low_byte)); +} + +// Push the given value onto the stack. +// +// Note that because the push may invalidate code living on the stack, +// this may generate intructions which return control to the caller to +// deal with that, so within a given opcode being translated, no further +// code-generating functions should be called after this. +void FunctionBuilder::push_u8(llvm::Value *data, uint16_t next_opcode_at) +{ + llvm::Value *old_s = register_load(s_); + const BoundedAddress &ba = abs_index(constant_u16(stack), old_s); + register_store(builder_.CreateSub(old_s, constant_u8(1)), s_); + memory_write_untrapped(ba, data, next_opcode_at); +} + +llvm::Value *FunctionBuilder::rol(llvm::Value *data) +{ + llvm::Value *new_low_bit = + convert_i1_to_i8(jit_bool_is_true(register_load(flag_c_))); + register_store( + convert_i8_to_jb(builder_.CreateAnd(data, constant_u8(0x80))), flag_c_); + llvm::Value *result = + builder_.CreateOr(builder_.CreateShl(data, 1), new_low_bit); + set_nz(result); + return result; +} + +llvm::Value *FunctionBuilder::ror(llvm::Value *data) +{ + llvm::Value *c_as_bit = + convert_i1_to_i8(jit_bool_is_true(register_load(flag_c_))); + llvm::Value *new_high_bit = builder_.CreateShl(c_as_bit, 7); + register_store( + convert_i8_to_jb(builder_.CreateAnd(data, constant_u8(0x1))), flag_c_); + llvm::Value *result = + builder_.CreateOr(builder_.CreateLShr(data, 1), new_high_bit); + set_nz(result); + return result; +} + +void FunctionBuilder::sbc(llvm::Value *data) +{ + llvm::BasicBlock *done_sbc_block = + llvm::BasicBlock::Create(context_, "done_sbc"); + llvm::BasicBlock *sbc_binary_block = + llvm::BasicBlock::Create(context_, "sbc_binary", llvm_function_); + llvm::BasicBlock *sbc_decimal_block = + llvm::BasicBlock::Create(context_, "sbc_decimal", llvm_function_); + llvm::Value *d_clear = jit_bool_is_false(register_load(flag_d_)); + builder_.CreateCondBr(d_clear, sbc_binary_block, sbc_decimal_block); + llvm_function_->getBasicBlockList().push_back(done_sbc_block); + builder_.SetInsertPoint(sbc_binary_block); + sbc_binary(data); + builder_.CreateBr(done_sbc_block); + builder_.SetInsertPoint(sbc_decimal_block); + sbc_decimal(data); + builder_.CreateBr(done_sbc_block); + builder_.SetInsertPoint(done_sbc_block); +} + +void FunctionBuilder::sbc_binary(llvm::Value *data) +{ + llvm::Value *borrow_16 = + zext_i16(jit_bool_is_false(register_load(flag_c_))); + + sbc_overflow(data, borrow_16); // must do this before storing new value to a + + llvm::Value *a_u16 = zext_i16(register_load(a_)); + llvm::Value *data_u16 = zext_i16(data); + llvm::Value *result_u16 = + builder_.CreateSub(builder_.CreateSub(a_u16, data_u16), borrow_16); + + llvm::Value *new_a = trunc_i8(result_u16); + register_store(new_a, a_); + set_nz(new_a); + + register_store( + convert_i1_to_jb( + builder_.CreateICmpEQ( + builder_.CreateAnd(result_u16, constant_u16(0x100)), + constant_u16(0))), + flag_c_); +} + +void FunctionBuilder::sbc_decimal(llvm::Value *data) +{ + llvm::Value *borrow = jit_bool_is_false(register_load(flag_c_)); + llvm::Value *borrow_16 = zext_i16(borrow); + + sbc_overflow(data, borrow_16); // must do this before modifying a + + builder_.CreateStore( + builder_.CreateSub( + builder_.CreateSub( + builder_.CreateAnd( + register_load(a_), + constant_u8(0x0f)), + builder_.CreateAnd( + data, + constant_u8(0x0f))), + convert_i1_to_i8(borrow)), + l_tmp_); + + builder_.CreateStore( + builder_.CreateSub( + builder_.CreateSub( + zext_i16(register_load(a_)), + zext_i16(data)), + borrow_16), + s_tmp_); + + register_store( + convert_i1_to_jb( + builder_.CreateICmpEQ( + builder_.CreateAnd( + builder_.CreateLoad(s_tmp_), + constant_u16(0x100)), + constant_u16(0))), + flag_c_); + + llvm::BasicBlock *s_adjust1_block = + llvm::BasicBlock::Create(context_, "s_adjust1", llvm_function_); + llvm::BasicBlock *done_s_adjust1_block = + llvm::BasicBlock::Create(context_, "done_s_adjust1", llvm_function_); + builder_.CreateCondBr( + builder_.CreateICmpSLT( + builder_.CreateLoad(s_tmp_), + constant_u16(0)), + s_adjust1_block, + done_s_adjust1_block); + + builder_.SetInsertPoint(s_adjust1_block); + builder_.CreateStore( + builder_.CreateSub( + builder_.CreateLoad(s_tmp_), + constant_u16(0x60)), + s_tmp_); + builder_.CreateBr(done_s_adjust1_block); + + builder_.SetInsertPoint(done_s_adjust1_block); + + llvm::BasicBlock *s_adjust2_block = + llvm::BasicBlock::Create(context_, "s_adjust2", llvm_function_); + llvm::BasicBlock *done_s_adjust2_block = + llvm::BasicBlock::Create(context_, "done_s_adjust2", llvm_function_); + builder_.CreateCondBr( + builder_.CreateICmpSLT( + builder_.CreateLoad(l_tmp_), + constant_u8(0)), + s_adjust2_block, + done_s_adjust2_block); + + builder_.SetInsertPoint(s_adjust2_block); + builder_.CreateStore( + builder_.CreateSub( + builder_.CreateLoad(s_tmp_), + constant_u16(0x06)), + s_tmp_); + builder_.CreateBr(done_s_adjust2_block); + + builder_.SetInsertPoint(done_s_adjust2_block); + register_store(trunc_i8(builder_.CreateLoad(s_tmp_)), a_); + set_nz(register_load(a_)); +} + +void FunctionBuilder::sbc_overflow( + llvm::Value *data, llvm::Value *borrow_16) +{ + llvm::Value *a_s16 = sext_i16(register_load(a_)); + llvm::Value *data_s16 = sext_i16(data); + llvm::Value *result_s16 = + builder_.CreateSub(builder_.CreateSub(a_s16, data_s16), borrow_16); + + llvm::Value *negative_as_unsigned = + builder_.CreateICmpNE( + builder_.CreateAnd(result_s16, constant_u16(0x80)), + constant_u16(0)); + llvm::Value *negative_as_signed = + builder_.CreateICmpSLT(result_s16, constant_u16(0)); + + register_store( + convert_i1_to_jb( + builder_.CreateXor(negative_as_unsigned, negative_as_signed)), + flag_v_); +} + +void FunctionBuilder::transfer( + const Register &from, Register &to) +{ + llvm::Value *data = builder_.CreateLoad(from.v_); + register_store(data, to); + set_nz(data); +} + +llvm::Value *FunctionBuilder::trb(llvm::Value *data) +{ + set_z(builder_.CreateAnd(data, register_load(a_))); + + llvm::Value *result = + builder_.CreateAnd( + data, + builder_.CreateXor( + register_load(a_), + constant_u8(0xff))); + return result; +} + +llvm::Value *FunctionBuilder::tsb(llvm::Value *data) +{ + set_z(builder_.CreateAnd(data, register_load(a_))); + + llvm::Value *result = + builder_.CreateOr( + data, + register_load(a_)); + return result; +} + +void FunctionBuilder::set_nz(llvm::Value *data) +{ + register_store(convert_i8_to_jb(builder_.CreateAnd(data, 0x80)), flag_n_); + set_z(data); +} + +void FunctionBuilder::set_z(llvm::Value *data) +{ + register_store( + convert_i1_to_jb(builder_.CreateICmpEQ(data, constant_u8(0))), flag_z_); +} + +llvm::Value *FunctionBuilder::flag_byte() +{ + builder_.CreateStore(constant_u8(0), p_tmp_); + + flag_byte_bit(flag_n_, flagN); + flag_byte_bit(flag_v_, flagV); + flag_byte_bit(flag_d_, flagD); + flag_byte_bit(flag_i_, flagI); + flag_byte_bit(flag_z_, flagZ); + flag_byte_bit(flag_c_, flagC); + + return builder_.CreateLoad(p_tmp_); +} + +void FunctionBuilder::flag_byte_bit(const Register &flag_reg, uint8_t flag_bit) +{ + llvm::BasicBlock *bit_set_block = + llvm::BasicBlock::Create(context_, "bit_set", llvm_function_); + llvm::BasicBlock *bit_done_block = + llvm::BasicBlock::Create(context_, "bit_done", llvm_function_); + llvm::Value *bit_set = jit_bool_is_true(register_load(flag_reg)); + builder_.CreateCondBr(bit_set, bit_set_block, bit_done_block); + + builder_.SetInsertPoint(bit_set_block); + builder_.CreateStore( + builder_.CreateOr(builder_.CreateLoad(p_tmp_), flag_bit), p_tmp_); + builder_.CreateBr(bit_done_block); + + builder_.SetInsertPoint(bit_done_block); +} + +void FunctionBuilder::illegal_instruction(uint16_t &ct_pc, int bytes) +{ + uint16_t opcode_at = ct_pc; + uint8_t opcode = ct_memory_[opcode_at]; + + std::stringstream s; + s << "illegal " << hex_prefix << std::hex << std::setw(2) << + std::setfill('0') << static_cast(opcode) << " "; + switch (bytes) + { + case 1: + disassemble1(ct_pc, s.str()); + break; + + case 2: + { + uint8_t operand; + disassemble2(ct_pc, s.str(), operand); + break; + } + + case 3: + { + uint16_t operand; + disassemble3(ct_pc, s.str(), operand); + break; + } + + default: + CANT_HAPPEN("Invalid byte count (ct_pc 0x" << std::hex << ct_pc << + ", " << std::dec << "bytes " << bytes << ")"); + } + + if (callbacks_.illegal_instruction[opcode] != 0) + { + return_illegal_instruction(ct_pc, opcode_at, opcode); + } + else + { + // Illegal instructions are defined on the 65C02 to be no-ops. + } +} + +FunctionBuilder::BoundedAddress FunctionBuilder::zp(uint8_t addr) +{ + // We still generate a u16 for the actual llvm::Value. It probably doesn't + // make any difference but it seems logical as memory address "are" 16 bits, + // even if 8-bit ones are handled more efficiently on a real 6502. + return BoundedAddress(*this, constant_u16(addr), AddressRange(addr)); +} + +FunctionBuilder::BoundedAddress FunctionBuilder::abs(uint16_t addr) +{ + return BoundedAddress(*this, constant_u16(addr), AddressRange(addr)); +} + +FunctionBuilder::BoundedAddress FunctionBuilder::abs_index( + llvm::Value *abs, llvm::Value *index) +{ + assert(abs->getType() == i16_type_); + assert(index->getType() == i8_type_); + + llvm::ConstantInt *abs_ci = llvm::cast(abs); + uint16_t range_begin = abs_ci->getLimitedValue(); + uint32_t range_end = range_begin; + range_end += 0x100; + + return BoundedAddress(*this, builder_.CreateAdd(abs, zext_i16(index)), + AddressRange(range_begin, range_end)); +} + +FunctionBuilder::BoundedAddress FunctionBuilder::zp_index( + llvm::Value *zp, llvm::Value *index) +{ + assert(zp->getType() == i8_type_); + assert(index->getType() == i8_type_); + + return BoundedAddress(*this, zext_i16(builder_.CreateAdd(zp, index)), + AddressRange(0, 0x100)); +} + +FunctionBuilder::BoundedAddress FunctionBuilder::zp_post_index( + llvm::Value *zp, llvm::Value *index) +{ + assert(zp->getType() == i8_type_); + assert(index->getType() == i8_type_); + + llvm::Value *low_byte = + memory_read_untrapped(BoundedAddress(*this, zext_i16(zp))); + llvm::Value *high_byte_at = builder_.CreateAdd(zp, constant_u8(1)); + llvm::Value *high_byte = + memory_read_untrapped(BoundedAddress(*this, zext_i16(high_byte_at))); + llvm::Value *base_addr = create_u16(low_byte, high_byte); + return BoundedAddress(*this, + builder_.CreateAdd(base_addr, zext_i16(index))); +} + +FunctionBuilder::BoundedAddress FunctionBuilder::zp_pre_index( + llvm::Value *zp, llvm::Value *index) +{ + assert(zp->getType() == i8_type_); + assert(index->getType() == i8_type_); + + llvm::Value *low_byte_at = builder_.CreateAdd(zp, index); + llvm::Value *high_byte_at = builder_.CreateAdd(low_byte_at, constant_u8(1)); + llvm::Value *low_byte = + memory_read_untrapped(BoundedAddress(*this, zext_i16(low_byte_at))); + llvm::Value *high_byte = + memory_read_untrapped(BoundedAddress(*this, zext_i16(high_byte_at))); + return BoundedAddress(*this, create_u16(low_byte, high_byte)); +} + +llvm::Value *FunctionBuilder::check_predicted_rts(uint16_t subroutine_addr) +{ + llvm::Value *mangled_pc = pop_u16(); + llvm::Value *new_pc = builder_.CreateAdd(mangled_pc, constant_u16(1)); + + // It would be correct to just return new_pc at this point; our caller + // will use it to arrange a control transfer. Since that is a run-time + // determined value, the control transfer would have to be done by + // returning from the generated function. We may be able to make some + // plausible guesses (currently never guaranteed to be correct) which + // we can verify at run time and which if correct allow the RTS to be + // handled as a branch within the generated function. This should save + // a bit of overhead on not returning from the function and re-entering + // another and may also allow the optimiser some additional leeway. + + const AddressSet &targets = predicted_rts_targets_[subroutine_addr]; + TRACE("Generating predicted RTS code; " << targets.size() << " target(s)"); + for (AddressSet::const_iterator it = targets.begin(); it != targets.end(); + ++it) + { + const uint16_t target = *it; + llvm::BasicBlock *prediction_correct = + llvm::BasicBlock::Create(context_, "prediction_correct", + llvm_function_); + llvm::BasicBlock *prediction_incorrect = + llvm::BasicBlock::Create(context_, "prediction_incorrect", + llvm_function_); + builder_.CreateCondBr( + builder_.CreateICmpEQ(constant_u16(target), new_pc), + prediction_correct, prediction_incorrect); + builder_.SetInsertPoint(prediction_correct); + control_transfer_to(constant_u16(target), opcode_rts); + builder_.SetInsertPoint(prediction_incorrect); + } + + return new_pc; +} + +void FunctionBuilder::control_transfer_to(llvm::Value *target, uint8_t opcode) +{ + assert(target->getType() == i16_type_); + + switch (opcode) + { + case opcode_rts: + case opcode_rti: + case opcode_bra: + case opcode_bcc: + case opcode_bcs: + case opcode_bvc: + case opcode_bvs: + case opcode_beq: + case opcode_bne: + case opcode_bmi: + case opcode_bpl: + case opcode_implicit: + // This control transfer never triggers a call callback. + break; + + case opcode_jsr: + { + // This control transfer triggers a call callback if present. The + // target address is known at compile time. + llvm::ConstantInt *target_ci = + llvm::cast(target); + uint16_t target16 = target_ci->getLimitedValue(); + if (callbacks_.call[target16] != 0) + { + return_jsr_complex(target); + return; + } + + // We also need to check if the two bytes pushed onto the stack by + // the JSR have invalidated any JITted code and return control to + // our caller if so. + // + // Note that we work with a tmp_s i8 local so that if the stack + // pointer wrapped during the JSR pushes we will still work + // correctly here. + llvm::Value *tmp_s = + builder_.CreateAdd(register_load(s_), constant_u8(1)); + llvm::Value *stack_addr1 = + builder_.CreateAdd(constant_u16(stack), zext_i16(tmp_s)); + tmp_s = builder_.CreateAdd(tmp_s, constant_u8(1)); + llvm::Value *stack_addr2 = + builder_.CreateAdd(constant_u16(stack), zext_i16(tmp_s)); + + llvm::BasicBlock *code_not_modified_block = + llvm::BasicBlock::Create(context_, "code_not_modified"); + llvm::BasicBlock *code_addr1_not_modified_block = + llvm::BasicBlock::Create(context_, "code_addr1_not_modified", + llvm_function_); + llvm::BasicBlock *code_modified_block = + llvm::BasicBlock::Create(context_, "code_modified", + llvm_function_); + + const AddressRange stack_range(stack, stack + 0x100); + llvm::Value *stack_addr1_is_code = + is_code_at(BoundedAddress(*this, stack_addr1, stack_range)); + builder_.CreateCondBr(stack_addr1_is_code, code_modified_block, + code_addr1_not_modified_block); + + builder_.SetInsertPoint(code_addr1_not_modified_block); + llvm::Value *stack_addr2_is_code = + is_code_at(BoundedAddress(*this, stack_addr2, stack_range)); + builder_.CreateCondBr(stack_addr2_is_code, code_modified_block, + code_not_modified_block); + + builder_.SetInsertPoint(code_modified_block); + return_jsr_complex(target); + + llvm_function_->getBasicBlockList().push_back( + code_not_modified_block); + builder_.SetInsertPoint(code_not_modified_block); + break; + } + + case opcode_jmp_abs: + { + // This control transfer triggers a call callback if present. The + // target address is known at compile time. + llvm::ConstantInt *target_ci = + llvm::cast(target); + uint16_t target16 = target_ci->getLimitedValue(); + if (callbacks_.call[target16] != 0) + { + return_control_transfer_indirect(target, opcode); + return; + } + break; + } + + case opcode_jmp_ind_abs: + case opcode_jmp_indx_abs: + { + // This control transfer triggers a call callback if present. The + // target address is only known at run time. + assert(!llvm::isa(target)); + llvm::Value *call_callback_addr = builder_.CreateGEP( + call_callbacks_, + llvm::ArrayRef(zext_i32(target))); + llvm::Value *call_callback = + builder_.CreateLoad(call_callback_addr); + llvm::BasicBlock *call_callback_block = + llvm::BasicBlock::Create(context_, "call_callback", + llvm_function_); + llvm::BasicBlock *no_call_callback_block = + llvm::BasicBlock::Create(context_, "no_call_callback", + llvm_function_); + llvm::Value *call_callback_not_null = + builder_.CreateIsNotNull(call_callback); + builder_.CreateCondBr(call_callback_not_null, call_callback_block, + no_call_callback_block); + + builder_.SetInsertPoint(call_callback_block); + return_control_transfer_indirect(target, opcode); + + builder_.SetInsertPoint(no_call_callback_block); + break; + } + + default: + CANT_HAPPEN("Unexpected opcode 0x" << std::hex << opcode); + } + + llvm::ConstantInt *target_ci = llvm::dyn_cast(target); + if ((target_ci != 0) && ( + code_generated_for_address_[target_ci->getLimitedValue()] || + (pending_.find(target_ci->getLimitedValue()) != pending_.end()))) + { + ensure_address_block_created(target_ci->getLimitedValue()); + // The target is within this function, so we can just branch there. + builder_.CreateBr(address_block_[target_ci->getLimitedValue()]); + } + else + { + // The target isn't (knowably) within this function, so we have to + // get there via our caller. + return_control_transfer_direct(target); + } +} + +// All memory reads should be done via a call to this function, unless they are +// explicitly exempt from read callbacks. +llvm::Value *FunctionBuilder::memory_read(const BoundedAddress &ba) +{ + llvm::Value *addr = ba.addr(); + + llvm::ConstantInt *addr_ci = llvm::dyn_cast(addr); + if (addr_ci != 0) + { + uint16_t addr16 = addr_ci->getLimitedValue(); + TRACE("Load at compile-time constant address 0x" << std::hex << + std::setfill('0') << std::setw(4) << addr16); + if (callbacks_.read[addr16] != 0) + { + TRACE("Read callback exists at constant address"); + llvm::Value *callback = + constant_ptr(callbacks_.read[addr16], "read_callback"); + return call_read_callback(callback, addr); + } + + // Actually do the read from memory. + return memory_read_untrapped(ba); + } + else + { + if (callback_in_bounds(callbacks_.read, ba.bounds())) + { + TRACE("Read callback may exist; runtime check required"); + llvm::Value *read_callback_addr = builder_.CreateGEP( + read_callbacks_, llvm::ArrayRef(zext_i32(addr))); + llvm::Value *read_callback = + builder_.CreateLoad(read_callback_addr); + llvm::BasicBlock *read_callback_block = + llvm::BasicBlock::Create(context_, "read_callback", + llvm_function_); + llvm::BasicBlock *no_read_callback_block = + llvm::BasicBlock::Create(context_, "no_read_callback", + llvm_function_); + llvm::BasicBlock *memory_read_done_block = + llvm::BasicBlock::Create(context_, "memory_read_done"); + llvm::Value *read_callback_not_null = + builder_.CreateIsNotNull(read_callback); + builder_.CreateCondBr(read_callback_not_null, read_callback_block, + no_read_callback_block); + + builder_.SetInsertPoint(read_callback_block); + llvm::Value *result = call_read_callback(read_callback, ba.addr()); + builder_.CreateStore(result, read_callback_result_); + builder_.CreateBr(memory_read_done_block); + + builder_.SetInsertPoint(no_read_callback_block); + builder_.CreateStore(memory_read_untrapped(ba), + read_callback_result_); + builder_.CreateBr(memory_read_done_block); + + llvm_function_->getBasicBlockList().push_back( + memory_read_done_block); + builder_.SetInsertPoint(memory_read_done_block); + return builder_.CreateLoad(read_callback_result_); + } + else + { + TRACE("No read callback within address bounds"); + // Actually do the read from memory. + return memory_read_untrapped(ba); + } + } +} + +llvm::Value *FunctionBuilder::memory_read_untrapped(const BoundedAddress &ba) +{ + llvm::Value *host_addr = builder_.CreateGEP( + memory_base_, llvm::ArrayRef(zext_i32(ba.addr()))); + return builder_.CreateLoad(host_addr); +} + +// All memory writes should be done via a call to this function, unless they +// are explicitly exempt from triggering write callbacks. +// +// Note that because this may return to the caller to indicate +// result_write_to_code or result_write_callback, it must be the last +// code-generation function called when translating an opcode, as any +// subsequent code may not be executed. +void FunctionBuilder::memory_write(const BoundedAddress &ba, + llvm::Value *data, uint16_t next_opcode_at) +{ + llvm::ConstantInt *addr_ci = llvm::dyn_cast(ba.addr()); + if (addr_ci != 0) + { + uint16_t addr16 = addr_ci->getLimitedValue(); + TRACE("Store at compile-time constant address 0x" << std::hex << + std::setfill('0') << std::setw(4) << addr16); + if (callbacks_.write[addr16] != 0) + { + TRACE("Write callback exists at constant address"); + return_write_callback(next_opcode_at, ba.addr(), data); + return; + } + } + else + { + if (callback_in_bounds(callbacks_.write, ba.bounds())) + { + TRACE("Write callback may exist; runtime check required"); + llvm::Value *write_callback_addr = builder_.CreateGEP( + write_callbacks_, + llvm::ArrayRef(zext_i32(ba.addr()))); + llvm::Value *write_callback = + builder_.CreateLoad(write_callback_addr); + llvm::BasicBlock *write_callback_block = + llvm::BasicBlock::Create(context_, "write_callback", + llvm_function_); + llvm::BasicBlock *no_write_callback_block = + llvm::BasicBlock::Create(context_, "no_write_callback", + llvm_function_); + llvm::Value *write_callback_not_null = + builder_.CreateIsNotNull(write_callback); + builder_.CreateCondBr(write_callback_not_null, write_callback_block, + no_write_callback_block); + + builder_.SetInsertPoint(write_callback_block); + return_write_callback(next_opcode_at, ba.addr(), data); + + builder_.SetInsertPoint(no_write_callback_block); + } + else + { + TRACE("No write callback within address bounds"); + } + } + + memory_write_untrapped(ba, data, next_opcode_at); +} + +// Note that (like lib6502 proper) we don't externalise our registers before +// invoking the (read/write) callback or internalise them afterwards, so +// the callback doesn't see correct information if it examines the CPU state. +llvm::Value *FunctionBuilder::call_callback( + llvm::Value *callback, llvm::Value *addr, + llvm::Value *data) +{ + return builder_.CreateCall3(callback, mpu_llvm_, addr, data, + "callback_result"); +} + +llvm::Value *FunctionBuilder::call_read_callback( + llvm::Value *callback, llvm::Value *addr) +{ + llvm::Value *result_int = call_callback(callback, addr, constant_u8(0)); + return builder_.CreateTrunc(result_int, i8_type_); +} + +// Write to memory with no checks for modification of already JITted code or +// write callbacks. +void FunctionBuilder::memory_write_raw(const BoundedAddress &ba, + llvm::Value *data) +{ + llvm::Value *host_addr = builder_.CreateGEP( + memory_base_, llvm::ArrayRef(zext_i32(ba.addr()))); + builder_.CreateStore(data, host_addr); +} + +llvm::Value *FunctionBuilder::is_code_at(const BoundedAddress &ba) +{ + const AddressRange &bounds = ba.bounds(); + bool use_optimistic_write = !bounds.all_memory(); + for (AddressRange::const_iterator it = bounds.begin(); + use_optimistic_write && (it != bounds.end()); ++it) + { + uint16_t i = *it; + if (code_at_address_[i]) + { + TRACE("BoundedAddress " << ba << + " includes known code at 0x" << std::hex << + std::setfill('0') << std::setw(4) << i << + "; can't use optimistic write"); + use_optimistic_write = false; + } + } + + if (use_optimistic_write) + { + optimistic_writes_.insert(ba.bounds()); + return constant_i1(false); + } + else + { + llvm::Value *code_at_address_flag_addr = builder_.CreateGEP( + code_at_address_llvm_, + llvm::ArrayRef(zext_i32(ba.addr()))); + return jit_bool_is_true(builder_.CreateLoad(code_at_address_flag_addr)); + } +} + +// Write to memory, checking for modification of already JITted code but +// not for write callbacks. +// +// Note that because this may return to the caller to indicate +// result_write_to_code, it must be the last code-generation function called +// when translating an opcode, as any subsequent code may not be executed. +void FunctionBuilder::memory_write_untrapped( + const BoundedAddress &ba, llvm::Value *data, + uint16_t next_opcode_at) +{ + // Actually do the write. + memory_write_raw(ba, data); + + // Check for writes which modify JITted code. + llvm::Value *just_modified_code = is_code_at(ba); + + // The optimiser would eliminate the dead branches if just_modified_code + // is a constant false value, but to make the IR easier to read and perhaps + // help the optimiser out, let's not generate pointless code in this case. + llvm::ConstantInt *just_modified_ci = + llvm::dyn_cast(just_modified_code); + if ((just_modified_ci != 0) && !(just_modified_ci->getLimitedValue())) + { + return; + } + + llvm::BasicBlock *code_modified_block = + llvm::BasicBlock::Create(context_, "code_modified", llvm_function_); + llvm::BasicBlock *code_not_modified_block = + llvm::BasicBlock::Create(context_, "code_not_modified", llvm_function_); + builder_.CreateCondBr(just_modified_code, code_modified_block, + code_not_modified_block); + + builder_.SetInsertPoint(code_modified_block); + return_write_to_code(next_opcode_at, ba.addr()); + + builder_.SetInsertPoint(code_not_modified_block); +} + +void FunctionBuilder::return_pc(Result result, llvm::Value *new_pc) +{ + builder_.CreateStore(constant_i(result), function_result_); + builder_.CreateStore(new_pc, pc_); + builder_.CreateBr(epilogue_); +} + +void FunctionBuilder::return_pc_addr(Result result, llvm::Value *new_pc, + llvm::Value *addr) +{ + builder_.CreateStore(constant_i(result), function_result_); + builder_.CreateStore(new_pc, pc_); + builder_.CreateStore(addr, builder_.CreateStructGEP(registers_, 11)); + builder_.CreateBr(epilogue_); +} + +void FunctionBuilder::return_pc_data(Result result, llvm::Value *new_pc, + llvm::Value *data) +{ + builder_.CreateStore(constant_i(result), function_result_); + builder_.CreateStore(new_pc, pc_); + builder_.CreateStore(data, builder_.CreateStructGEP(registers_, 12)); + builder_.CreateBr(epilogue_); +} + +void FunctionBuilder::return_pc_addr_data( + Result result, llvm::Value *new_pc, llvm::Value *addr, llvm::Value *data) +{ + builder_.CreateStore(constant_i(result), function_result_); + builder_.CreateStore(new_pc, pc_); + builder_.CreateStore(addr, builder_.CreateStructGEP(registers_, 11)); + builder_.CreateStore(data, builder_.CreateStructGEP(registers_, 12)); + builder_.CreateBr(epilogue_); +} + +void FunctionBuilder::return_control_transfer_direct(llvm::Value *new_pc) +{ + return_pc(result_control_transfer_direct, new_pc); +} + +void FunctionBuilder::return_control_transfer_indirect( + llvm::Value *new_pc, uint8_t opcode) +{ + return_pc_data(result_control_transfer_indirect, new_pc, + constant_u8(opcode)); +} + +void FunctionBuilder::return_brk(llvm::Value *new_pc) +{ + return_pc(result_brk, new_pc); +} + +void FunctionBuilder::return_jsr_complex(llvm::Value *new_pc) +{ + return_pc(result_jsr_complex, new_pc); +} + +void FunctionBuilder::return_illegal_instruction( + uint16_t new_pc, uint16_t opcode_at, uint8_t opcode) +{ + return_pc_addr_data(result_illegal_instruction, constant_u16(new_pc), + constant_u16(opcode_at), constant_u8(opcode)); +} + +void FunctionBuilder::return_write_to_code(uint16_t new_pc, llvm::Value *addr) +{ + return_pc_addr(result_write_to_code, constant_u16(new_pc), addr); +} + +void FunctionBuilder::return_write_callback( + uint16_t new_pc, llvm::Value *addr, llvm::Value *data) +{ + return_pc_addr_data( + result_write_callback, constant_u16(new_pc), addr, data); +} + +void FunctionBuilder::return_invalid_bounds() +{ + builder_.CreateStore(constant_i(result_invalid_bounds), function_result_); + builder_.CreateBr(epilogue_); +} + +void FunctionBuilder::disassemble1(uint16_t &addr, const std::string &s) +{ + disassemble_hex_dump(addr, 1); + disassembly_ << s << "\n"; + ++addr; +} + +void FunctionBuilder::disassemble2( + uint16_t &addr, const std::string &prefix, uint8_t &operand, + const std::string &suffix) +{ + disassemble_hex_dump(addr, 2); + operand = operand8(addr); + disassembly_ << prefix << hex_prefix << std::setw(2) << + static_cast(operand) << suffix; + + // This is a bit of a special case, but it works so... + std::string::size_type l = prefix.length(); + if ((l > 1) && (prefix[l - 1] == '#') && isprint(operand)) + { + disassembly_ << " ('" << static_cast(operand) << "')"; + } + + disassembly_ << "\n"; + + addr += 2; +} + +void FunctionBuilder::disassemble3( + uint16_t &addr, const std::string &prefix, uint16_t &operand, + const std::string &suffix) +{ + disassemble_hex_dump(addr, 3); + operand = operand16(addr); + disassembly_ << prefix << hex_prefix << std::setw(4) << operand << suffix << + "\n"; + addr += 3; +} + +void FunctionBuilder::disassemble_branch( + uint16_t &addr, const std::string &s, uint16_t &target) +{ + disassemble_hex_dump(addr, 2); + uint8_t operand = operand8(addr); + int offset = (operand < 0x80) ? operand : -(0x100 - operand); + // The branch is relative to the PC *after* it's been moved past the + // branch instruction. + addr += 2; + target = addr + offset; + disassembly_ << s << hex_prefix << std::setw(4) << target << "\n"; +} + +void FunctionBuilder::disassemble_hex_dump(uint16_t addr, int bytes) +{ + assert(bytes <= 3); + disassembly_ << std::hex << std::setw(4) << std::setfill('0') << addr << + " "; + for (int i = 0; i < 3; ++i) + { + if (i < bytes) + { + disassembly_ << std::setw(2) << + static_cast(ct_memory_[addr + i]) << " "; + } + else + { + disassembly_ << " "; + } + } +} diff --git a/FunctionBuilder.h b/FunctionBuilder.h new file mode 100644 index 0000000..da2df8d --- /dev/null +++ b/FunctionBuilder.h @@ -0,0 +1,364 @@ +/* Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#ifndef FUNCTIONBUILDER_H +#define FUNCTIONBUILDER_H + +#include +#include +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/TypeBuilder.h" +#include "llvm/IR/Value.h" +#include +#include +#include + +#include "AddressSet.h" +#include "const.h" +#include "JitBool.h" +#include "lib6502.h" + +class Function; +struct LLVMStuff; + +class FunctionBuilder : boost::noncopyable +{ +public: + // Create a FunctionBuilder object which can be used to build a Function + // representing the code starting at 'address'. The Function object built + // will operate on the given M6502 object. The 'code_at_address' array + // will be used at compile time and at runtime to decide if writes to + // memory may invalidate already JITted code. The memory inside the M6502 + // object will be used when the Funtion object executes, but ct_memory + // will be used at compile time to determine the instructions to compile; + // see FunctionManager for more on this. + FunctionBuilder(M6502 *mpu, const uint8_t *ct_memory, + JitBool *code_at_address, uint16_t address); + + boost::shared_ptr build(); + + // Status codes returned by the JITted function + enum Result + { + // Control has transferred to the address in registers.pc. No call + // callback should be invoked, either because the JITted function knows + // there is no applicable call callback or because the control transfer + // is via an instruction which does not trigger call callbacks. + result_control_transfer_direct, + + // Control has transferred to the address in registers.pc via an + // instruction which is eligible for call callbacks. registers.data + // contains the opcode of the instruction which transferred + // control. The caller should check for an applicable call + // callback. registers.addr is *not* updated; the addr value for + // the callback is registers.pc. + result_control_transfer_indirect, + + // A BRK instruction has just been executed and registers.pc updated + // to point to the BRK vector. The caller should check to see if the + // stack pushes implicitly performed by BRK have invalidated any + // already-JITted code and for a call callback on the BRK vector. + // Neither registers.addr nor registers.data are updated. + result_brk, + + // A JSR instruction has just been executed and registers.pc + // updated to point to the destination address. One or both of the + // following may be true: - the stack pushes implicitly performed + // have invalidated some + // already-JITted code + // - a call callback is registered on the destination address It is not + // guaranteed that either of these is the case, although in practice + // with this implementation at least one should be true. Not all JSR + // instructions will necessarily cause the JITted function to return + // this value, hence the result code is result_jsr_*complex* not just + // result_jsr. Neither registers.addr nor registers.data are updated. + result_jsr_complex, + + // An illegal instruction has been executed and registers.pc updated to + // point to the following opcode. registers.addr contains the address + // of the illegal instruction and registers.data its opcode. The + // caller should check to see if a callback is registered. + result_illegal_instruction, + + // A memory write has been executed which changed an address marked + // as holding code. registers.addr contains the address modified. The + // caller should invalidate any JITted functions for this address. + result_write_to_code, + + // A memory write has occurred which triggers a write callback. Memory + // has not been updated. registers.addr and registers.data contain the + // address and the data being written respectively. The caller should + // invoke the write callback and check for writes to already-JITted + // code. + result_write_callback, + + // Internal bounds generated for an instruction's address range were + // found to be invalid by self-checking code. This can only occur + // in debug builds and then only if there is a bug in FunctionBuilder. + result_invalid_bounds + }; + +private: + uint16_t build_at(uint16_t ct_pc); + + uint8_t operand8(uint16_t opcode_at); + uint16_t operand16(uint16_t opcode_at); + + llvm::Value *constant_i1(bool c); + llvm::Value *constant_u8(uint8_t c); + llvm::Value *constant_u16(uint16_t c); + llvm::Value *constant_u32(uint32_t c); + llvm::Value *constant_u64(uint64_t c); + + template + llvm::Value *constant_ptr(T *p, const std::string &name) + { + llvm::Value *v = constant_u64(reinterpret_cast(p)); + // The name passed in never seems to be used, but maybe this will + // change in the future. It doesn't really do us any harm to pass + // it in anyway. + return builder_.CreateIntToPtr( + v, llvm::TypeBuilder::get(llvm::getGlobalContext()), + name); + } + + llvm::Value *constant_i(int c); + + llvm::Value *constant_jb(JitBool c); + llvm::Value *convert_i1_to_jb(llvm::Value *v); + llvm::Value *convert_i8_to_jb(llvm::Value *v); + llvm::Value *convert_i16_to_jb(llvm::Value *v); + llvm::Value *jit_bool_is_true(llvm::Value *v); + llvm::Value *jit_bool_is_false(llvm::Value *v); + + llvm::Value *convert_i1_to_i8(llvm::Value *v); + + llvm::Value *zext_i16(llvm::Value *v); + llvm::Value *zext_i32(llvm::Value *v); + llvm::Value *sext_i16(llvm::Value *v); + llvm::Value *trunc_i8(llvm::Value *v); + llvm::Value *create_u16(llvm::Value *low_byte, llvm::Value *high_byte); + + struct Register + { + llvm::Value *v_; + bool modified_; + }; + void initialise_i8_reg(Register &r, int structure_index, + const std::string &name); + void initialise_jb_reg(Register &r, int structure_index, + const std::string &name); + + void ensure_address_block_created(uint16_t addr); + + void return_pc(Result result, llvm::Value *new_pc); + void return_pc_addr(Result result, llvm::Value *new_pc, llvm::Value *addr); + void return_pc_data(Result result, llvm::Value *new_pc, llvm::Value *data); + void return_pc_addr_data(Result result, llvm::Value *new_pc, + llvm::Value *addr, llvm::Value *data); + void return_control_transfer_direct(llvm::Value *new_pc); + void return_control_transfer_indirect(llvm::Value *new_pc, uint8_t opcode); + void return_brk(llvm::Value *new_pc); + void return_jsr_complex(llvm::Value *new_pc); + void return_illegal_instruction(uint16_t new_pc, uint16_t opcode_at, + uint8_t opcode); + void return_write_to_code(uint16_t new_pc, llvm::Value *addr); + void return_write_callback(uint16_t new_pc, llvm::Value *addr, + llvm::Value *data); + void return_invalid_bounds(); + + class BoundedAddress; + + llvm::Value *register_load(const Register &r); + void register_store(llvm::Value *v, Register &r); + + typedef llvm::Value *(FunctionBuilder::*OpFn)(llvm::Value *data); + void register_op(OpFn op, Register &r); + void memory_op(OpFn op, const BoundedAddress &ba, uint16_t next_opcode_at); + + llvm::Value *is_code_at(const BoundedAddress &addr); + + void adc(llvm::Value *data); + void adc_llvm(llvm::Value *data); + void adc_binary(llvm::Value *data); + void adc_decimal(llvm::Value *data); + void adc_binary_llvm(llvm::Value *data); + void adc_decimal_llvm(llvm::Value *data); + void And(llvm::Value *data); + llvm::Value *asl(llvm::Value *data); + void bit(llvm::Value *data); + void branch(Register &flag, bool branch_if, uint16_t target); + void cmp(llvm::Value *r, llvm::Value *data); + void cmp_llvm(llvm::Value *r, llvm::Value *data); + llvm::Value *dec(llvm::Value *data); + void eor(llvm::Value *data); + llvm::Value *inc(llvm::Value *data); + void ld(Register &r, llvm::Value *data); + llvm::Value *lsr(llvm::Value *data); + void ora(llvm::Value *data); + void pop_flags(); + llvm::Value *pop_u8(); + llvm::Value *pop_u16(); + void push_u8_raw(llvm::Value *data); + void push_u16_raw(uint16_t u); + void push_u8(llvm::Value *data, uint16_t next_opcode_at); + llvm::Value *rol(llvm::Value *data); + llvm::Value *ror(llvm::Value *data); + void sbc(llvm::Value *data); + void sbc_binary(llvm::Value *data); + void sbc_decimal(llvm::Value *data); + void sbc_overflow(llvm::Value *data, + llvm::Value *borrow); + void transfer(const Register &from, Register &to); + llvm::Value *trb(llvm::Value *data); + llvm::Value *tsb(llvm::Value *data); + + void set_nz(llvm::Value *data); + void set_z(llvm::Value *data); + + llvm::Value *flag_byte(); + void flag_byte_bit(const Register &flag_reg, uint8_t flag_bit); + + void illegal_instruction(uint16_t &ct_pc, int bytes); + + BoundedAddress zp(uint8_t addr); + BoundedAddress abs(uint16_t addr); + BoundedAddress abs_index(llvm::Value *abs, + llvm::Value *index); + BoundedAddress zp_index(llvm::Value *zp, + llvm::Value *r); + BoundedAddress zp_post_index( + llvm::Value *zp, llvm::Value *index); + BoundedAddress zp_pre_index( + llvm::Value *zp, llvm::Value *index); + + llvm::Value *check_predicted_rts(uint16_t subroutine_addr); + + // A special opcode used as the third argument to control_transfer_to + // when there is no explicit opcode causing the control transfer; this + // is just a documented way to signal that the control transfer is direct + // and cannot trigger a call callback. + enum { + opcode_implicit = 0xff + }; + void control_transfer_to(llvm::Value *target, uint8_t opcode); + + llvm::Value *memory_read(const BoundedAddress &ba); + llvm::Value *memory_read_untrapped(const BoundedAddress &ba); + + void memory_write(const BoundedAddress &ba, + llvm::Value *data, uint16_t next_opcode_at); + void memory_write_untrapped(const BoundedAddress &ba, + llvm::Value *data, uint16_t next_opcode_at); + void memory_write_raw(const BoundedAddress &ba, + llvm::Value *data); + + llvm::Value *call_callback( + llvm::Value *callback, llvm::Value *addr, + llvm::Value *data); + llvm::Value *call_read_callback( + llvm::Value *callback, llvm::Value *addr); + + void disassemble1(uint16_t &addr, const std::string &s); + void disassemble2(uint16_t &addr, const std::string &prefix, + uint8_t &operand, const std::string &suffix = ""); + void disassemble3(uint16_t &addr, const std::string &prefix, + uint16_t &operand, const std::string &suffix = ""); + void disassemble_branch(uint16_t &addr, const std::string &s, + uint16_t &target); + void disassemble_hex_dump(uint16_t addr, int bytes); + + bool built_; + + M6502 *const mpu_; + JitBool *code_at_address_; + const uint16_t address_; + const uint8_t *const ct_memory_; + // callbacks_ is strictly redundant as it's available inside mpu, but + // it's convenient. + const M6502_Callbacks &callbacks_; + + AddressSet code_range_; + AddressSet optimistic_writes_; + + std::stringstream disassembly_; + + int instructions_; + const int max_instructions_; + + // This could be an AddressSet but since we "rely" on the order of + // iteration for pending_ it seems better to be explicit; we don't need + // any of the range-handling convenience of AddressSet here anyway. + std::set pending_; + + std::map predicted_rts_targets_; + + llvm::LLVMContext &context_; + + llvm::Type *const native_int_type_; + llvm::PointerType *const callback_type_; + llvm::Type *const i1_type_; + llvm::Type *const i8_type_; + llvm::Type *const i16_type_; + llvm::Type *const i32_type_; + llvm::Type *const i64_type_; + llvm::Type *const jit_bool_type_; + + llvm::IRBuilder<> &builder_; + + llvm::Function *llvm_function_; + + llvm::Value *registers_; + llvm::Value *code_at_address_llvm_; + llvm::Value *read_callbacks_; + llvm::Value *write_callbacks_; + llvm::Value *call_callbacks_; + llvm::Value *memory_base_; + llvm::Value *mpu_llvm_; + + llvm::Value *function_result_; + + // Note that address_block_ and code_generated_for_address_ aren't + // redundant; address_block_ elements are created (for example) when + // a branch means the corresponding address must have a BasicBlock + // created for use as a branch target, but that doesn't mean code has + // been generated for it yet. + llvm::BasicBlock *address_block_[memory_size]; + bool code_generated_for_address_[memory_size]; + + Register a_; + Register x_; + Register y_; + Register s_; + Register flag_n_; + Register flag_v_; + Register flag_d_; + Register flag_i_; + Register flag_z_; + Register flag_c_; + llvm::Value *pc_; + + llvm::Value *read_callback_result_; + llvm::Value *p_tmp_; + llvm::Value *l_tmp_; + llvm::Value *s_tmp_; + llvm::Value *t_tmp_; + + llvm::BasicBlock *epilogue_; +}; + +#endif diff --git a/FunctionManager.cpp b/FunctionManager.cpp new file mode 100644 index 0000000..51f60b7 --- /dev/null +++ b/FunctionManager.cpp @@ -0,0 +1,310 @@ +/* Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#include "FunctionManager.h" + +#include + +#include "Function.h" +#include "FunctionBuilder.h" +#include "M6502Internal.h" +#include "Registers.h" +#include "util.h" + +FunctionManager::FunctionManager(M6502 *mpu) +: jit_thread_idle_(true), work_available_(false), quit_(false), mpu_(mpu), + memory_snapshot_(), function_for_address_(), code_at_address_() +{ +} + +FunctionManager::~FunctionManager() +{ + if (jit_thread_.get_id() != boost::thread::id()) + { + TRACE("Notifying JIT thread to quit"); + { + boost::mutex::scoped_lock lock(jit_thread_cv_mutex_); + quit_ = true; + } + jit_thread_cv_.notify_all(); + TRACE("Joining with JIT thread"); + jit_thread_.join(); + } +} + +bool FunctionManager::jit_thread_idle() +{ + boost::mutex::scoped_lock lock(jit_thread_idle_mutex_); + return jit_thread_idle_; +} + +void FunctionManager::update_memory_snapshot() +{ + assert(jit_thread_idle()); + + const uint8_t *memory = mpu_->memory; + for (size_t i = 0; i < memory_size; ++i) + { + if (code_at_address_[i] && (memory_snapshot_[i] != memory[i])) + { + code_modified_at(i); + } + memory_snapshot_[i] = memory[i]; + } +} + +Function *FunctionManager::build_function_internal( + uint16_t address, const uint8_t *ct_memory) +{ + Registers ®isters = mpu_->internal->registers_; + TRACE("Building Function for code at 0x" << std::hex << std::setfill('0') << + std::setw(4) << registers.pc); + FunctionBuilder fb(mpu_, ct_memory, code_at_address_, registers.pc); + boost::shared_ptr f(fb.build()); + add_function(f); + return f.get(); +} + +Function *FunctionManager::build_function(uint16_t address, + const uint8_t *ct_memory) +{ + Function *f; + int pass = 0; + do + { + assert(pass < 2); + ++pass; + + f = build_function_internal(address, ct_memory); + + bool f_is_optimistic_self_writer = false; + const AddressSet &code_range = f->code_range(); + for (AddressSet::const_iterator it = code_range.begin(); + it != code_range.end(); ++it) + { + uint16_t i = *it; + if (code_at_address_[i] && + !optimistic_writers_for_address_[i].empty()) + { + // There is now code at an address where optimistic writes are + // performed. Future code generation won't create optimistic + // writes there because code_at_address_[i] has now been set, + // but we need to destroy existing functions which perform + // that write so they will be regenerated. + const FunctionSet &optimistic_writers = + optimistic_writers_for_address_[i]; + f_is_optimistic_self_writer = + (optimistic_writers.find(f) != optimistic_writers.end()); + destroy_functions_in_set(optimistic_writers_for_address_[i]); + if (f_is_optimistic_self_writer) + { + // destroy_functions_in_set() has now destroyed f, so a) + // code_range is no longer a valid reference b) there's + // no need to continue iterating over f's code range. + break; + } + + } + } + + // We might just have destroyed the function we built, if it modified + // its own code, so we need to loop round if so. + f = function_for_address_[address]; + if (f == 0) + { + assert(f_is_optimistic_self_writer); + TRACE("Rebuilding just-created function"); + } + } + while (f == 0); + + TRACE(f->dump_all()); + + return f; +} + +void FunctionManager::build_function_lazy(uint16_t address) +{ + assert(jit_thread_idle()); + + TRACE("Will build Function for address 0x" << std::hex << + std::setfill('0') << std::setw(4) << address << " in background"); + + // We only create the JIT thread the first time it's needed; this avoids it + // existing if the library is being used in interpreted or compiled mode. + if (jit_thread_.get_id() == boost::thread::id()) + { + TRACE("Creating JIT thread"); + boost::thread t( + std::mem_fun(&FunctionManager::build_function_thread), this); + jit_thread_.swap(t); + } + + { + boost::mutex::scoped_lock lock(jit_thread_idle_mutex_); + jit_thread_idle_ = false; + } + { + boost::mutex::scoped_lock lock(jit_thread_cv_mutex_); + work_available_ = true; + jit_thread_address_ = address; + } + jit_thread_cv_.notify_all(); +} + +void FunctionManager::build_function_thread() +{ + try + { + TRACE("JIT thread started"); + boost::mutex::scoped_lock jit_thread_cv_mutex_lock( + jit_thread_cv_mutex_); + while (true) + { + while (!quit_ && !work_available_) + { + TRACE("JIT thread waiting to be signalled"); + jit_thread_cv_.wait(jit_thread_cv_mutex_lock); + } + + if (quit_) + { + TRACE("JIT thread quitting"); + return; + } + else + { + TRACE("JIT thread about to build Function at address 0x" << + std::hex << std::setfill('0') << std::setw(4) << + jit_thread_address_); + assert(work_available_); + assert(!jit_thread_idle_); + + // Note that we translate code from memory_snapshot_ + // not mpu_->memory. This is important, even though we + // have update_memory_snapshot() which "should" invalidate + // Function objects which depend on modified code before any + // of them are used. The reason is that if a memory location + // is temporarily modified by the interpreter before it can + // be translated, then modified back to its original value + // by the interpreter before update_memory_snapshot() is + // called, update_memory_snapshot() can't notice the change, + // but the change has been compiled into the Function object. + // (See test/z-self-modify-2.xa; this breaks in hybrid mode + // if memory_snapshot_ isn't used here.) + build_function(jit_thread_address_, memory_snapshot_); + work_available_ = false; + + boost::mutex::scoped_lock jit_thread_idle_lock( + jit_thread_idle_mutex_); + jit_thread_idle_ = true; + } + } + } + catch (std::exception &e) + { + die(e.what()); + } +} + +void FunctionManager::add_function(const boost::shared_ptr &f) +{ + function_for_address_[f->address()] = f.get(); + function_for_address_owner_[f->address()] = f; + + const AddressSet &code_range = f->code_range(); + for (AddressSet::const_iterator it = code_range.begin(); + it != code_range.end(); ++it) + { + uint16_t i = *it; + functions_covering_address_[i].insert(f.get()); + code_at_address_[i] = true; + } + + const AddressSet &optimistic_writes = f->optimistic_writes(); + for (AddressSet::const_iterator it = optimistic_writes.begin(); + it != optimistic_writes.end(); ++it) + { + uint16_t i = *it; + optimistic_writers_for_address_[i].insert(f.get()); + } +} + +void FunctionManager::code_modified_at(uint16_t address) +{ + // We could just return immediately if code_at_address_[address] is false; + // sometimes we call this function without bothering to check first. + // In practice I doubt this has a significant impact on performance. + + TRACE("Code modified at 0x" << std::hex << std::setfill('0') << + std::setw(4) << address); + + destroy_functions_in_set(functions_covering_address_[address]); + + // Keep memory_snapshot_ up-to-date; this avoids harmless-but-inefficient + // destruction of perfectly valid Function objects when + // update_memory_snapshot() is called next. + memory_snapshot_[address] = mpu_->memory[address]; +} + +void FunctionManager::destroy_functions_in_set(FunctionSet &function_set) +{ + // We iterate over the set like this because destroy_function() will erase + // the function from function_set, thereby invalidating any iterator we are + // holding on to. + while (!function_set.empty()) + { + destroy_function(*function_set.begin()); + } +} + +void FunctionManager::destroy_function(Function *f) +{ + const AddressSet &code_range = f->code_range(); + for (AddressSet::const_iterator it = code_range.begin(); + it != code_range.end(); ++it) + { + uint16_t i = *it; + size_t erased_count = functions_covering_address_[i].erase(f); + ASSERT_EQUAL(erased_count, 1); + // We do *not* clear code_at_address_[i] even if + // functions_covering_address_[i] is now empty; this records the fact + // that we have executed code at this address. This is critical for + // the current implementation of build_function(); code_at_address_ + // being set is used to control optimistic vs non-optimistic writes, + // and if code_at_address_ was cleared when a function was destroyed + // a self-modifying function would cause an infinite loop inside + // build_function(). It would be OK to clear code_at_address_ for any + // addresses with empty functions_covering_address_ sets at the end + // of build_function(), but we currently don't. + } + + const AddressSet &optimistic_writes = f->optimistic_writes(); + for (AddressSet::const_iterator it = optimistic_writes.begin(); + it != optimistic_writes.end(); ++it) + { + uint16_t i = *it; + size_t erased_count = optimistic_writers_for_address_[i].erase(f); + ASSERT_EQUAL(erased_count, 1); + } + + assert(function_for_address_[f->address()] == f); + function_for_address_[f->address()] = 0; + // Do this last as it will cause the Function object to be deleted. + assert(function_for_address_owner_[f->address()].get() == f); + function_for_address_owner_[f->address()].reset(); +} diff --git a/FunctionManager.h b/FunctionManager.h new file mode 100644 index 0000000..141fe7a --- /dev/null +++ b/FunctionManager.h @@ -0,0 +1,151 @@ +/* Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#ifndef FUNCTIONMANAGER_H +#define FUNCTIONMANAGER_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "const.h" +#include "JitBool.h" +#include "lib6502.h" + +class Function; + +class FunctionManager : boost::noncopyable +{ +public: + FunctionManager(M6502 *mpu); + ~FunctionManager(); + + bool jit_thread_idle(); + + void update_memory_snapshot(); + + // Return a Function object representing the code starting at 'address'; if + // one does not already exist it will be created. This never returns null. + Function *get_function(uint16_t address) + { + Function *f = function_for_address_[address]; + if (f != 0) + { + return f; + } + else + { + return build_function(address, mpu_->memory); + } + } + + // Return a Function object representing the code starting at 'address', + // if one is available, otherwise return null. When null is returned + // a background thread may be used to generate a Function object which + // can be returned if the request is repeated in the future. + // + // This function may only be called if the last call to jit_thread_idle() + // returned true and no call has been made to get_function_lazy() since + // jit_thread_idle() was called. + // + // Currently a background thread will *always* be invoked if null is + // returned, but this is not guaranteed. For example, we may wish to + // refuse to waste time building a Function object which we expect to + // be invalidated by self-modifying code shortly afterwards. + Function *get_function_lazy(uint16_t address) + { + // This assert() is perfectly correct, but it single-handedly destroys + // the performance of a debug build; it's just not *that* valuable. + // assert(jit_thread_idle()); + + Function *f = function_for_address_[address]; + if (f != 0) + { + return f; + } + else + { + build_function_lazy(address); + return 0; + } + } + + void code_modified_at(uint16_t address); + +private: + void add_function(const boost::shared_ptr &f); + + Function *build_function(uint16_t address, const uint8_t *ct_memory); + Function *build_function_internal(uint16_t address, + const uint8_t *ct_memory); + + void build_function_lazy(uint16_t address); + void build_function_thread(); + + typedef std::set FunctionSet; + void destroy_functions_in_set(FunctionSet &function_set); + + void destroy_function(Function *f); + + boost::thread jit_thread_; + + boost::mutex jit_thread_idle_mutex_; + bool jit_thread_idle_; + + boost::mutex jit_thread_cv_mutex_; + boost::condition_variable jit_thread_cv_; + bool work_available_; + uint16_t jit_thread_address_; + bool quit_; + + M6502 *mpu_; + + // A copy of the emulated CPU's memory, used to detect changes to already + // JITted code which happen in callbacks and to avoid problems with JITting + // while the interpreter is running (in hybrid mode). + uint8_t memory_snapshot_[memory_size]; + + // We maintain this array of shared_ptr's which actually own the + // Function objects. + boost::shared_ptr function_for_address_owner_[memory_size]; + + // We maintain a parallel array of raw pointers here so that we have + // the option to allow JITted code to access it. + Function *function_for_address_[memory_size]; + + // This tracks the Function objects which contain code generated based on + // individual addresses, i.e. the Function objects which are invalidated by + // a store to a given memory location. + FunctionSet functions_covering_address_[memory_size]; + + // This tracks the Function objects which perform optimistic writes to + // individual addresses, i.e. the Function objects which are invalidated if + // it turns out an address is in fact used to hold code. + FunctionSet optimistic_writers_for_address_[memory_size]; + + // This tracks whether we have ever executed code at a given address; + // destroying all the functions in the corresponding element of + // functions_covering_address does *not* mean this is cleared. + JitBool code_at_address_[memory_size]; +}; + +#endif diff --git a/JitBool.h b/JitBool.h new file mode 100644 index 0000000..818008e --- /dev/null +++ b/JitBool.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +// JitBool is a typedef representing the type used for boolean flags in the +// JITted code, i.e. the CPU flag values and the 'code modified at' flag for +// each memory address. In reality this is not likely to change, but this at +// least helps to identify code which needs to change to support a different +// representation. FunctionBuilder.cpp also contains a number of helper +// functions which depend on the underlying type of JitBool. + +#ifndef JITBOOL_H +#define JITBOOL_H + +typedef uint8_t JitBool; +const JitBool jit_bool_false = 0; +const JitBool jit_bool_true = 1; + +#endif diff --git a/LLVMStuff.cpp b/LLVMStuff.cpp new file mode 100644 index 0000000..faebdf8 --- /dev/null +++ b/LLVMStuff.cpp @@ -0,0 +1,41 @@ +/* Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#include "LLVMStuff.h" + +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/TargetSelect.h" + +LLVMStuff::LLVMStuff() +: module_(new llvm::Module("lib6502-jit", llvm::getGlobalContext())), + builder_(llvm::getGlobalContext()) +{ + llvm::InitializeNativeTarget(); + + std::string error; + execution_engine_ = + llvm::EngineBuilder(module_.get()).setErrorStr(&error).create(); + if (execution_engine_ == 0) + { + throw std::runtime_error("Could not create LLVM ExecutionEngine: " + + error); + } +} + +LLVMStuff::~LLVMStuff() +{ +} diff --git a/LLVMStuff.h b/LLVMStuff.h new file mode 100644 index 0000000..7ba9d31 --- /dev/null +++ b/LLVMStuff.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#ifndef LLVMSTUFF_H +#define LLVMSTUFF_H + +#include +#include +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include + +struct LLVMStuff : boost::noncopyable +{ + LLVMStuff(); + ~LLVMStuff(); + + llvm::ExecutionEngine *execution_engine_; + boost::shared_ptr module_; + llvm::IRBuilder<> builder_; + +}; + +#endif diff --git a/M6502Internal.h b/M6502Internal.h new file mode 100644 index 0000000..c54131c --- /dev/null +++ b/M6502Internal.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#ifndef M6502INTERNAL_H +#define M6502INTERNAL_H + +#include "FunctionManager.h" +#include "lib6502.h" +#include "LLVMStuff.h" +#include "Registers.h" + +struct _M6502_Internal +{ + _M6502_Internal(M6502 *mpu) + : function_manager_(mpu), mode_(M6502_ModeHybrid), + max_instructions_(default_max_instructions_) + { + } + + Registers registers_; + LLVMStuff llvm_stuff_; + FunctionManager function_manager_; + + M6502_Mode mode_; + static const int default_max_instructions_ = 500; + int max_instructions_; +}; + +#endif diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..879e06d --- /dev/null +++ b/Makefile.am @@ -0,0 +1,130 @@ +ACLOCAL_AMFLAGS = -I m4 +AM_CPPFLAGS = `$(LLVMCONFIG) --cppflags` $(BOOST_CPPFLAGS) +# lib6502.c generates spurious warnings with -Wall, so we want -Wno-parentheses +# too. It's not easy to have per-source-file build flags in automake, so we +# just apply this to all C files. +AM_CFLAGS = -Wall -Wno-parentheses +AM_CXXFLAGS = `$(LLVMCONFIG) --cxxflags` -fexceptions -Wall +AM_LDFLAGS = $(BOOST_THREAD_LDFLAGS) +LIBS = `$(LLVMCONFIG) --ldflags --libs core jit native --system-libs` $(BOOST_THREAD_LIBS) + +# Some of these are included automatically, but I'd rather be explicit. +EXTRA_DIST = \ + examples/README \ + COPYING \ + CREDITS \ + lib6502-compatibility.txt \ + README \ + README.lib6502 \ + TODO \ + man/* \ + test/*.xa \ + test/*.mst \ + test/run-c-tests.sh \ + test/run-run6502-tests.sh \ + test/run-c-tests.py \ + test/run-run6502-tests.py + +man1_MANS = man/*.1 +man3_MANS = man/*.3 +lib_LTLIBRARIES = lib6502-jit.la +include_HEADERS = lib6502.h +bin_PROGRAMS = run6502 +noinst_PROGRAMS = \ + examples/lib1 +check_PROGRAMS = \ + test/basic-callback \ + test/call-illegal-callback-modify-code \ + test/irq-nmi \ + test/setjmp-trick \ + test/stack-code-brk \ + test/stack-code-jsr \ + test/write-callback-modify-code + +lib6502_jit_la_SOURCES = \ + AddressRange.cpp \ + AddressRange.h \ + AddressSet.cpp \ + AddressSet.h \ + const.h \ + Function.cpp \ + Function.h \ + FunctionBuilder.cpp \ + FunctionBuilder.h \ + FunctionManager.cpp \ + FunctionManager.h \ + JitBool.h \ + lib6502.c \ + lib6502.h \ + lib6502-jit.cpp \ + LLVMStuff.cpp \ + LLVMStuff.h \ + M6502Internal.h \ + Registers.cpp \ + Registers.h \ + util.cpp \ + util.h \ + valgrind.h + +run6502_SOURCES = \ + run6502.c +run6502_LINK = $(CXXLINK) +run6502_LDADD = lib6502-jit.la + +examples_lib1_SOURCES = \ + examples/lib1.c +examples_lib1_LINK = $(CXXLINK) +examples_lib1_LDADD = lib6502-jit.la + +test_basic_callback_SOURCES = \ + test/basic-callback.c \ + test/test-utils.c \ + test/test-utils.h +test_basic_callback_LINK = $(CXXLINK) +test_basic_callback_LDADD = lib6502-jit.la + +test_call_illegal_callback_modify_code_SOURCES = \ + test/call-illegal-callback-modify-code.c \ + test/test-utils.c \ + test/test-utils.h +test_call_illegal_callback_modify_code_LINK = $(CXXLINK) +test_call_illegal_callback_modify_code_LDADD = lib6502-jit.la + +test_irq_nmi_SOURCES = \ + test/irq-nmi.c \ + test/test-utils.c \ + test/test-utils.h +test_irq_nmi_LINK = $(CXXLINK) +test_irq_nmi_LDADD = lib6502-jit.la + +test_setjmp_trick_SOURCES = \ + test/setjmp-trick.c \ + test/test-utils.c \ + test/test-utils.h +test_setjmp_trick_LINK = $(CXXLINK) +test_setjmp_trick_LDADD = lib6502-jit.la + +test_stack_code_brk_SOURCES = \ + test/stack-code-brk.c \ + test/test-utils.c \ + test/test-utils.h +test_stack_code_brk_LINK = $(CXXLINK) +test_stack_code_brk_LDADD = lib6502-jit.la + +test_stack_code_jsr_SOURCES = \ + test/stack-code-jsr.c \ + test/test-utils.c \ + test/test-utils.h +test_stack_code_jsr_LINK = $(CXXLINK) +test_stack_code_jsr_LDADD = lib6502-jit.la + +test_write_callback_modify_code_SOURCES = \ + test/write-callback-modify-code.c \ + test/test-utils.c \ + test/test-utils.h +test_write_callback_modify_code_LINK = $(CXXLINK) +test_write_callback_modify_code_LDADD = lib6502-jit.la + +TESTS = \ + test/run-c-tests.sh \ + test/run-run6502-tests.sh diff --git a/README b/README new file mode 100644 index 0000000..0620f77 --- /dev/null +++ b/README @@ -0,0 +1,84 @@ +lib6502-jit is a (mostly) compatible implementation of Ian Piumarta's lib6502 +which uses LLVM to perform JIT compilation of 6502 machine code to host code. +This will doubtless be useful to the large community of people stuck doing +number-crunching tasks with legacy 6502 code. :-) + +README.lib6502 is a copy of the original lib6502 README. You should probably go +and read that before reading any further. + +lib6502-compatibility.txt documents the differences between lib6502 and +lib6502-jit. + +CREDITS contains acknowledgements of the various people and groups on whose +work lib6502-jit is built. + +COPYING contains license details for lib6502-jit. + +TODO contains some notes on possible enhancements to lib6502-jit. + +How to build: + +You'll need the following installed: +- a C/C++ compiler (I've tested with gcc 4.7.2, gcc 4.8.2 and clang 3.5) +- LLVM development libraries (I've tested with various 3.5 pre-release snapshots) +- boost (including boost::thread) (I've tested with 1.49, 1.54 and 1.55) + +I have somewhat reluctantly set up an autotools build system; compiling and +linking against LLVM and boost::thread on different platforms was otherwise +just that bit too fiddly. So in theory all you need to do is: + + ./configure + make + +I suggest you actually do: + CFLAGS='-g -O3' CXXFLAGS='-g -O3' ./configure +to increase the optimisation level. (I would have made that the default, but +apparently that would go against user expectations for an autotools build +system.) + +"make install" should work as well if you feel inclined to do so, but it's not +necessary. + +I've tested on three platforms, and for what it's worth here are more detailed +instructions for those: + +Ubuntu (14.04 x86): + apt-get install libboost-dev libboost-thread-dev llvm-3.5-dev libedit-dev + export CFLAGS='-g -O3' + export CXXFLAGS='-g -O3' + ./configure --with-llvm-config=llvm-config-3.5 + make + +Debian (7.5 x86-64): + apt-get install libboost-dev libboost-thread-dev + [I used the llvm-3.5-dev package from the wheezy repository here: http://llvm.org/apt/] + export CFLAGS='-g -O3' + export CXXFLAGS='-g -O3' + ./configure + make + +FreeBSD (10.0-RELEASE x86-64): + pkg install boost-all-1.55.0 + pkg install llvm-devel-3.5.r203994 + export CFLAGS='-g -O3' + export CXXFLAGS='-g -O3' + ./configure --with-llvm-config=/usr/local/llvm-devel/bin/llvm-config + make + +There are some tests which will run if you type "make check". Some will be +skipped unless you have the "xa" assembler +(http://www.floodgap.com/retrotech/xa/) on your PATH. + +The above assumes you downloaded a lib6502-jit*tar.bz2 package, which will +contain a "configure" script. This is not (following what I understand to be +best practice) checked into source control, so if you downloaded the source +using something like git or svn, you need to either: +- download the tarball - it will be much easier, especially if you're just + taking a quick look at lib6502-jit and don't plan to make changes to the code + (yet) +- install autoconf, automake and libtool, then cross your fingers and run + "autoreconf -i", which will generate a "configure" script for you if you're + lucky. + +If you have any queries, comments or bug reports, please drop me (Steven +Flintham) an e-mail at lib6502-jit@lemma.co.uk. diff --git a/README.lib6502 b/README.lib6502 new file mode 100644 index 0000000..b79e595 --- /dev/null +++ b/README.lib6502 @@ -0,0 +1,136 @@ + lib6502 - 6502 Microprocessor Emulator + + Version: 1.0 + + +WHAT IF I'M TOO LAZY TO READ 'README'S? + + make + make install + more examples/README + + +WHAT IS LIB6502? + + lib6502 is a library that emulates the 6502 microprocessor. It + comes with a small 'shell', run6502, that can execute 6502 programs + from the command line. + + lib6502 is distributed under the MIT license: it is non-infectious + and will not make your projects contagious to others the instant you + choose to use lib6502 in them. See the file COPYING for details. + + +WHERE IS THE LATEST SOURCE CODE? + + Source code for lib6502 is available from the author's home page at + 'http://piumarta.com/software'. You can download the most recent + release or use Subversion to get the very latest sources. + + +WHERE IS THE DOCUMENTATION? + + Manual pages for run6502 and lib6502 (and all the functions it + exports) should be available once it is installed. Each includes a + short 'examples' section. Use the 'man' command to read them. + + Your best place to start looking for documentation on the 6502 + itself is 'http://6502.org'. A google search of the web will also + turn up vast quantities of information about (and programs for) the + 6502. + + +HOW DO I INSTALL IT? + + It's not really big enough to warrant the whole 'configure' thing. + Any system with an ANSI compiler and C library should be able to + compile it out of the box. After unpacking the archive, just type: + + make + + to build it. If the compiler blows up immediately, edit the + Makefile and play with the '-g' and '-O' flags and then try again. + If you really can't make the compiler happy you've found a bug (read + the next section but one). Otherwise, if you want it put it + somewhere more permanent then type: + + make install + + (as root) to install it. It goes into /usr/local by default; if you + want it elsewhere then set PREFIX in the make command. For example: + + make install PREFIX=/usr + + will put everything under '/usr'. + + When you get bored with it, go back to the source directory and + type: + + make uninstall + + (with the same PREFIX you specified during the install, if + necessary.) + + +WHAT CAN I DO WITH IT? + + See the file EXAMPLES for some suggestions (all of them polite). + + If that leaves you wanting more, read the source for run6502 -- it + exercises just about every feature in lib6502. + + +HOW DO I REPORT PROBLEMS?^W^WCONTACT THE ORIGINAL AUTHOR? + + [If you wish to get in touch with the author of lib6502, this is the + address to use. Since lib6502-jit is based on lib6502 but has been + heavily modified, please do *not* report problems to this address; + use the address in README instead. -- Steve] + + Send e-mail to the author at: firstName (at) lastName (dot) com + + (For suitable values of firstName and lastName, see the last section + of this file.) + + If you're still confused, contact him at: http://piumarta.com + + +HOW CAN I HELP? + + Use it. Find bugs. Fix bugs. Make it faster. Evangelism: spread + it to as many other projects as possible, especially those that + might be using a slower emulator! Read the manual pages to see + what's considered missing, then add it, then send it in. + + (One thing that would be be really handy, and isn't mentioned in the + manual pages, is a test suite. Figure out how to test every mode in + every instruction with every possible combination of operand values + and condition codes and verify the behaviour is correct. Then write + it down in the form of a program and send it in. If it's a + self-contained program that runs once to completion then we can + probably find some real hardware to test against the test suite.) + + If you know how to write software that emulates peripheral hardware + devices, google up some details on the popular 6502-based + microcomputers (Acorn, Commodore, etc.) and add some serious system + emulation to run6502. Make it all pluggable (think dynamic + libraries over an 'agnostic' core), so we can change machines at the + flip of a (command-line) switch. (The callback mechanism in lib6502 + was designed with this kind of 'pluggable hardware emulation' in + mind.) + + +WHO WROTE THIS STUFF, AND WHY? + + lib6502 was written by Ian Piumarta. + + While writing ccg (an entirely different project that creates + runtime assemblers for dynamic code generators) he decided to + include support for an 8-bit microprocessor, just for fun. He chose + the 6502 because it was used in the first computer he owned and + programmed (an Ohio Scientific Superboard II, when he was 14) as + well as the second (an Acorn 'BBC Model B', about four years later). + lib6502 started as a 'glorified switch statement' that ran some + small test programs spewed into memory by ccg, but rapidly got out + of control over the course of a weekend. You're looking at the + result. diff --git a/Registers.cpp b/Registers.cpp new file mode 100644 index 0000000..7070557 --- /dev/null +++ b/Registers.cpp @@ -0,0 +1,59 @@ +/* Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#include "Registers.h" + +#include "const.h" +#include "lib6502.h" +#include "M6502Internal.h" + +void Registers::to_M6502_Registers(M6502 *mpu) const +{ + M6502_Registers &er = *(mpu->registers); + Registers &ir = mpu->internal->registers_; + + er.a = ir.a; + er.x = ir.x; + er.y = ir.y; + er.s = ir.s; + er.p = 0; + if (ir.flag_n) er.p |= flagN; + if (ir.flag_v) er.p |= flagV; + if (ir.flag_d) er.p |= flagD; + if (ir.flag_i) er.p |= flagI; + if (ir.flag_z) er.p |= flagZ; + if (ir.flag_c) er.p |= flagC; + er.pc = ir.pc; +} + +void Registers::from_M6502_Registers(const M6502 *mpu) +{ + M6502_Registers &er = *(mpu->registers); + Registers &ir = mpu->internal->registers_; + + ir.a = er.a; + ir.x = er.x; + ir.y = er.y; + ir.s = er.s; + ir.flag_n = ((er.p & flagN) != 0); + ir.flag_v = ((er.p & flagV) != 0); + ir.flag_d = ((er.p & flagD) != 0); + ir.flag_i = ((er.p & flagI) != 0); + ir.flag_z = ((er.p & flagZ) != 0); + ir.flag_c = ((er.p & flagC) != 0); + ir.pc = er.pc; +} diff --git a/Registers.h b/Registers.h new file mode 100644 index 0000000..467065a --- /dev/null +++ b/Registers.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#ifndef REGISTERS_H +#define REGISTERS_H + +#include +#include + +#include "JitBool.h" + +typedef struct _M6502 M6502; + +struct Registers : boost::noncopyable +{ + uint8_t a; + uint8_t x; + uint8_t y; + uint8_t s; + JitBool flag_n; + JitBool flag_v; + JitBool flag_d; + JitBool flag_i; + JitBool flag_z; + JitBool flag_c; + uint16_t pc; + + // Pseudo-registers used to communicate state for callbacks; see the + // comment describing the Result enumeration in FunctionBuilder.h. + uint16_t addr; + uint8_t data; + + void to_M6502_Registers(M6502 *mpu) const; + void from_M6502_Registers(const M6502 *mpu); +}; + +#endif diff --git a/TODO b/TODO new file mode 100644 index 0000000..d57ecb6 --- /dev/null +++ b/TODO @@ -0,0 +1,67 @@ +It would be interesting to see if this works OK on an ARM machine. + + +Running e.g. z-self-modify-1 to completion in -mc -mx 1 mode shows the memory +for the run6502 process grows steadily, but valgrind doesn't show any leaks. A +quick web search suggests this might be internal leaks in LLVM (which are only +exposed by things like this which continually JIT). I am inclined to leave this +and perhaps come back to it once LLVM 3.5 is actuallly released; if there's +still a problem then it might be worth tracking it down. + + +Would it be helpful to pass branch weights to CreateCondBr()? For example, +where we have a computed address which might trigger a read/write callback, we +could calculate the proportion of addresses in the address range which have +callbacks on them and use that as the probability of taking the callback-exists +branch. + + +We could potentially use Function objects to deduce properties of stretches of +code and use that information to improve the generated code. For example, if we +observed that a Function object didn't contain any external calls or any +stack-modification instructions except RTS then we could inline it in any +callers (adding its code ranges to their code ranges, of course) and the RTS +could be a no-op. (For 100% accuracy, the JSR should still push the return +address on the stack but not modify the stack pointer. Code executed later on +might peek at the stack and expect those values to be there.) This might in +turn allow the callers of that Function to be inlined themselves. This is just +an example. It may be that in practice deciding when to re-translate code would +cause a sufficient performance impact to just not be worth it in the first +place. + + +We could add support for counting the number of cycles executed by the JITted +code; lib6502 itself has some support for this in the form of the tick* macros, +but they don't do anything by default. + + +Would there be any performance improvement to be had by having Function objects +(tail) call one another where possible? + + +Hybrid mode currently makes no attempt to avoid re-generating Function objects +which are continually being invalidated due to self-modifying code. It might be +nice if some heuristic caused us to avoid this unnecessary work and just let +the interpreter always handle that code. + +On a related but distinct note, currently once an element of +FunctionManager::code_at_address_ is set, it is never cleared. This might cause +us to avoid optimistic writes which in reality would be OK. We could use some +heuristic to decide when to destroy Function objects which have not been +executed in a long time, and start clearing code_at_address_ elements when all +functions covering an address are removed. (See the note in +FunctionManager::destroyFunction(); this clearing must be done *outside* the +loop in FunctionManager::buildFunction(), or the implementation of +buildFunction() must be tweaked.) + +However, it may be that it just isn't worth being that clever. Any such code +would need to be triggered inside the main loop between executions of Function +objects. We could do it only every nth time, and keeping track of how many +times we've been round probably wouldn't significantly harm performance, but be +careful. + + +Would a different default value for max_instructions be better? + + +Are there any other LLVM optimisation passes which would be helpful? diff --git a/build-aux/tap-driver.sh b/build-aux/tap-driver.sh new file mode 100755 index 0000000..c011298 --- /dev/null +++ b/build-aux/tap-driver.sh @@ -0,0 +1,649 @@ +#! /bin/sh +# Copyright (C) 2011 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to or send patches to +# . + +scriptversion=2011-12-27.17; # UTC + +# Make unconditional expansion of undefined variables an error. This +# helps a lot in preventing typo-related bugs. +set -u + +me=tap-driver.sh + +fatal () +{ + echo "$me: fatal: $*" >&2 + exit 1 +} + +usage_error () +{ + echo "$me: $*" >&2 + print_usage >&2 + exit 2 +} + +print_usage () +{ + cat < + # + trap : 1 3 2 13 15 + if test $merge -gt 0; then + exec 2>&1 + else + exec 2>&3 + fi + "$@" + echo $? + ) | LC_ALL=C ${AM_TAP_AWK-awk} \ + -v me="$me" \ + -v test_script_name="$test_name" \ + -v log_file="$log_file" \ + -v trs_file="$trs_file" \ + -v expect_failure="$expect_failure" \ + -v merge="$merge" \ + -v ignore_exit="$ignore_exit" \ + -v comments="$comments" \ + -v diag_string="$diag_string" \ +' +# FIXME: the usages of "cat >&3" below could be optimized when using +# FIXME: GNU awk, and/on on systems that supports /dev/fd/. + +# Implementation note: in what follows, `result_obj` will be an +# associative array that (partly) simulates a TAP result object +# from the `TAP::Parser` perl module. + +## ----------- ## +## FUNCTIONS ## +## ----------- ## + +function fatal(msg) +{ + print me ": " msg | "cat >&2" + exit 1 +} + +function abort(where) +{ + fatal("internal error " where) +} + +# Convert a boolean to a "yes"/"no" string. +function yn(bool) +{ + return bool ? "yes" : "no"; +} + +function add_test_result(result) +{ + if (!test_results_index) + test_results_index = 0 + test_results_list[test_results_index] = result + test_results_index += 1 + test_results_seen[result] = 1; +} + +# Whether the test script should be re-run by "make recheck". +function must_recheck() +{ + for (k in test_results_seen) + if (k != "XFAIL" && k != "PASS" && k != "SKIP") + return 1 + return 0 +} + +# Whether the content of the log file associated to this test should +# be copied into the "global" test-suite.log. +function copy_in_global_log() +{ + for (k in test_results_seen) + if (k != "PASS") + return 1 + return 0 +} + +# FIXME: this can certainly be improved ... +function get_global_test_result() +{ + if ("ERROR" in test_results_seen) + return "ERROR" + if ("FAIL" in test_results_seen || "XPASS" in test_results_seen) + return "FAIL" + all_skipped = 1 + for (k in test_results_seen) + if (k != "SKIP") + all_skipped = 0 + if (all_skipped) + return "SKIP" + return "PASS"; +} + +function stringify_result_obj(result_obj) +{ + if (result_obj["is_unplanned"] || result_obj["number"] != testno) + return "ERROR" + + if (plan_seen == LATE_PLAN) + return "ERROR" + + if (result_obj["directive"] == "TODO") + return result_obj["is_ok"] ? "XPASS" : "XFAIL" + + if (result_obj["directive"] == "SKIP") + return result_obj["is_ok"] ? "SKIP" : COOKED_FAIL; + + if (length(result_obj["directive"])) + abort("in function stringify_result_obj()") + + return result_obj["is_ok"] ? COOKED_PASS : COOKED_FAIL +} + +function decorate_result(result) +{ + color_name = color_for_result[result] + if (color_name) + return color_map[color_name] "" result "" color_map["std"] + # If we are not using colorized output, or if we do not know how + # to colorize the given result, we should return it unchanged. + return result +} + +function report(result, details) +{ + if (result ~ /^(X?(PASS|FAIL)|SKIP|ERROR)/) + { + msg = ": " test_script_name + add_test_result(result) + } + else if (result == "#") + { + msg = " " test_script_name ":" + } + else + { + abort("in function report()") + } + if (length(details)) + msg = msg " " details + # Output on console might be colorized. + print decorate_result(result) msg + # Log the result in the log file too, to help debugging (this is + # especially true when said result is a TAP error or "Bail out!"). + print result msg | "cat >&3"; +} + +function testsuite_error(error_message) +{ + report("ERROR", "- " error_message) +} + +function handle_tap_result() +{ + details = result_obj["number"]; + if (length(result_obj["description"])) + details = details " " result_obj["description"] + + if (plan_seen == LATE_PLAN) + { + details = details " # AFTER LATE PLAN"; + } + else if (result_obj["is_unplanned"]) + { + details = details " # UNPLANNED"; + } + else if (result_obj["number"] != testno) + { + details = sprintf("%s # OUT-OF-ORDER (expecting %d)", + details, testno); + } + else if (result_obj["directive"]) + { + details = details " # " result_obj["directive"]; + if (length(result_obj["explanation"])) + details = details " " result_obj["explanation"] + } + + report(stringify_result_obj(result_obj), details) +} + +# `skip_reason` should be empty whenever planned > 0. +function handle_tap_plan(planned, skip_reason) +{ + planned += 0 # Avoid getting confused if, say, `planned` is "00" + if (length(skip_reason) && planned > 0) + abort("in function handle_tap_plan()") + if (plan_seen) + { + # Error, only one plan per stream is acceptable. + testsuite_error("multiple test plans") + return; + } + planned_tests = planned + # The TAP plan can come before or after *all* the TAP results; we speak + # respectively of an "early" or a "late" plan. If we see the plan line + # after at least one TAP result has been seen, assume we have a late + # plan; in this case, any further test result seen after the plan will + # be flagged as an error. + plan_seen = (testno >= 1 ? LATE_PLAN : EARLY_PLAN) + # If testno > 0, we have an error ("too many tests run") that will be + # automatically dealt with later, so do not worry about it here. If + # $plan_seen is true, we have an error due to a repeated plan, and that + # has already been dealt with above. Otherwise, we have a valid "plan + # with SKIP" specification, and should report it as a particular kind + # of SKIP result. + if (planned == 0 && testno == 0) + { + if (length(skip_reason)) + skip_reason = "- " skip_reason; + report("SKIP", skip_reason); + } +} + +function extract_tap_comment(line) +{ + if (index(line, diag_string) == 1) + { + # Strip leading `diag_string` from `line`. + line = substr(line, length(diag_string) + 1) + # And strip any leading and trailing whitespace left. + sub("^[ \t]*", "", line) + sub("[ \t]*$", "", line) + # Return what is left (if any). + return line; + } + return ""; +} + +# When this function is called, we know that line is a TAP result line, +# so that it matches the (perl) RE "^(not )?ok\b". +function setup_result_obj(line) +{ + # Get the result, and remove it from the line. + result_obj["is_ok"] = (substr(line, 1, 2) == "ok" ? 1 : 0) + sub("^(not )?ok[ \t]*", "", line) + + # If the result has an explicit number, get it and strip it; otherwise, + # automatically assing the next progresive number to it. + if (line ~ /^[0-9]+$/ || line ~ /^[0-9]+[^a-zA-Z0-9_]/) + { + match(line, "^[0-9]+") + # The final `+ 0` is to normalize numbers with leading zeros. + result_obj["number"] = substr(line, 1, RLENGTH) + 0 + line = substr(line, RLENGTH + 1) + } + else + { + result_obj["number"] = testno + } + + if (plan_seen == LATE_PLAN) + # No further test results are acceptable after a "late" TAP plan + # has been seen. + result_obj["is_unplanned"] = 1 + else if (plan_seen && testno > planned_tests) + result_obj["is_unplanned"] = 1 + else + result_obj["is_unplanned"] = 0 + + # Strip trailing and leading whitespace. + sub("^[ \t]*", "", line) + sub("[ \t]*$", "", line) + + # This will have to be corrected if we have a "TODO"/"SKIP" directive. + result_obj["description"] = line + result_obj["directive"] = "" + result_obj["explanation"] = "" + + if (index(line, "#") == 0) + return # No possible directive, nothing more to do. + + # Directives are case-insensitive. + rx = "[ \t]*#[ \t]*([tT][oO][dD][oO]|[sS][kK][iI][pP])[ \t]*" + + # See whether we have the directive, and if yes, where. + pos = match(line, rx "$") + if (!pos) + pos = match(line, rx "[^a-zA-Z0-9_]") + + # If there was no TAP directive, we have nothing more to do. + if (!pos) + return + + # Let`s now see if the TAP directive has been escaped. For example: + # escaped: ok \# SKIP + # not escaped: ok \\# SKIP + # escaped: ok \\\\\# SKIP + # not escaped: ok \ # SKIP + if (substr(line, pos, 1) == "#") + { + bslash_count = 0 + for (i = pos; i > 1 && substr(line, i - 1, 1) == "\\"; i--) + bslash_count += 1 + if (bslash_count % 2) + return # Directive was escaped. + } + + # Strip the directive and its explanation (if any) from the test + # description. + result_obj["description"] = substr(line, 1, pos - 1) + # Now remove the test description from the line, that has been dealt + # with already. + line = substr(line, pos) + # Strip the directive, and save its value (normalized to upper case). + sub("^[ \t]*#[ \t]*", "", line) + result_obj["directive"] = toupper(substr(line, 1, 4)) + line = substr(line, 5) + # Now get the explanation for the directive (if any), with leading + # and trailing whitespace removed. + sub("^[ \t]*", "", line) + sub("[ \t]*$", "", line) + result_obj["explanation"] = line +} + +function get_test_exit_message(status) +{ + if (status == 0) + return "" + if (status !~ /^[1-9][0-9]*$/) + abort("getting exit status") + if (status < 127) + exit_details = "" + else if (status == 127) + exit_details = " (command not found?)" + else if (status >= 128 && status <= 255) + exit_details = sprintf(" (terminated by signal %d?)", status - 128) + else if (status > 256 && status <= 384) + # We used to report an "abnormal termination" here, but some Korn + # shells, when a child process die due to signal number n, can leave + # in $? an exit status of 256+n instead of the more standard 128+n. + # Apparently, both behaviours are allowed by POSIX (2008), so be + # prepared to handle them both. See also Austing Group report ID + # 0000051 + exit_details = sprintf(" (terminated by signal %d?)", status - 256) + else + # Never seen in practice. + exit_details = " (abnormal termination)" + return sprintf("exited with status %d%s", status, exit_details) +} + +function write_test_results() +{ + print ":global-test-result: " get_global_test_result() > trs_file + print ":recheck: " yn(must_recheck()) > trs_file + print ":copy-in-global-log: " yn(copy_in_global_log()) > trs_file + for (i = 0; i < test_results_index; i += 1) + print ":test-result: " test_results_list[i] > trs_file + close(trs_file); +} + +BEGIN { + +## ------- ## +## SETUP ## +## ------- ## + +'"$init_colors"' + +# Properly initialized once the TAP plan is seen. +planned_tests = 0 + +COOKED_PASS = expect_failure ? "XPASS": "PASS"; +COOKED_FAIL = expect_failure ? "XFAIL": "FAIL"; + +# Enumeration-like constants to remember which kind of plan (if any) +# has been seen. It is important that NO_PLAN evaluates "false" as +# a boolean. +NO_PLAN = 0 +EARLY_PLAN = 1 +LATE_PLAN = 2 + +testno = 0 # Number of test results seen so far. +bailed_out = 0 # Whether a "Bail out!" directive has been seen. + +# Whether the TAP plan has been seen or not, and if yes, which kind +# it is ("early" is seen before any test result, "late" otherwise). +plan_seen = NO_PLAN + +## --------- ## +## PARSING ## +## --------- ## + +is_first_read = 1 + +while (1) + { + # Involutions required so that we are able to read the exit status + # from the last input line. + st = getline + if (st < 0) # I/O error. + fatal("I/O error while reading from input stream") + else if (st == 0) # End-of-input + { + if (is_first_read) + abort("in input loop: only one input line") + break + } + if (is_first_read) + { + is_first_read = 0 + nextline = $0 + continue + } + else + { + curline = nextline + nextline = $0 + $0 = curline + } + # Copy any input line verbatim into the log file. + print | "cat >&3" + # Parsing of TAP input should stop after a "Bail out!" directive. + if (bailed_out) + continue + + # TAP test result. + if ($0 ~ /^(not )?ok$/ || $0 ~ /^(not )?ok[^a-zA-Z0-9_]/) + { + testno += 1 + setup_result_obj($0) + handle_tap_result() + } + # TAP plan (normal or "SKIP" without explanation). + else if ($0 ~ /^1\.\.[0-9]+[ \t]*$/) + { + # The next two lines will put the number of planned tests in $0. + sub("^1\\.\\.", "") + sub("[^0-9]*$", "") + handle_tap_plan($0, "") + continue + } + # TAP "SKIP" plan, with an explanation. + else if ($0 ~ /^1\.\.0+[ \t]*#/) + { + # The next lines will put the skip explanation in $0, stripping + # any leading and trailing whitespace. This is a little more + # tricky in truth, since we want to also strip a potential leading + # "SKIP" string from the message. + sub("^[^#]*#[ \t]*(SKIP[: \t][ \t]*)?", "") + sub("[ \t]*$", ""); + handle_tap_plan(0, $0) + } + # "Bail out!" magic. + # Older versions of prove and TAP::Harness (e.g., 3.17) did not + # recognize a "Bail out!" directive when preceded by leading + # whitespace, but more modern versions (e.g., 3.23) do. So we + # emulate the latter, "more modern" behaviour. + else if ($0 ~ /^[ \t]*Bail out!/) + { + bailed_out = 1 + # Get the bailout message (if any), with leading and trailing + # whitespace stripped. The message remains stored in `$0`. + sub("^[ \t]*Bail out![ \t]*", ""); + sub("[ \t]*$", ""); + # Format the error message for the + bailout_message = "Bail out!" + if (length($0)) + bailout_message = bailout_message " " $0 + testsuite_error(bailout_message) + } + # Maybe we have too look for dianogtic comments too. + else if (comments != 0) + { + comment = extract_tap_comment($0); + if (length(comment)) + report("#", comment); + } + } + +## -------- ## +## FINISH ## +## -------- ## + +# A "Bail out!" directive should cause us to ignore any following TAP +# error, as well as a non-zero exit status from the TAP producer. +if (!bailed_out) + { + if (!plan_seen) + { + testsuite_error("missing test plan") + } + else if (planned_tests != testno) + { + bad_amount = testno > planned_tests ? "many" : "few" + testsuite_error(sprintf("too %s tests run (expected %d, got %d)", + bad_amount, planned_tests, testno)) + } + if (!ignore_exit) + { + # Fetch exit status from the last line. + exit_message = get_test_exit_message(nextline) + if (exit_message) + testsuite_error(exit_message) + } + } + +write_test_results() + +exit 0 + +} # End of "BEGIN" block. +' + +# TODO: document that we consume the file descriptor 3 :-( +} 3>"$log_file" + +test $? -eq 0 || fatal "I/O or internal error" + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC" +# time-stamp-end: "; # UTC" +# End: diff --git a/config.h.in b/config.h.in new file mode 100644 index 0000000..5fa9546 --- /dev/null +++ b/config.h.in @@ -0,0 +1,89 @@ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Defined if the requested minimum BOOST version is satisfied */ +#undef HAVE_BOOST + +/* Define to 1 if you have */ +#undef HAVE_BOOST_SCOPED_PTR_HPP + +/* Define to 1 if you have */ +#undef HAVE_BOOST_SHARED_PTR_HPP + +/* Define to 1 if you have */ +#undef HAVE_BOOST_SYSTEM_ERROR_CODE_HPP + +/* Define to 1 if you have */ +#undef HAVE_BOOST_THREAD_HPP + +/* Define to 1 if you have the header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Set to 1 if you have the "llvm/Analysis/Verifier.h" header file */ +#undef HAVE_LLVM_ANALYSIS_VERIFIER_H + +/* Set to 1 if you have the llvm::DataLayoutPass class */ +#undef HAVE_LLVM_DATA_LAYOUT_PASS + +/* Set to 1 if you have the "llvm/IR/Verifier.h" header file */ +#undef HAVE_LLVM_IR_VERIFIER_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#undef LT_OBJDIR + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Package copyright */ +#undef PACKAGE_COPYRIGHT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Version number of package */ +#undef VERSION diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..46bd45b --- /dev/null +++ b/configure.ac @@ -0,0 +1,94 @@ +AC_INIT([lib6502-jit], [1.0], [lib6502-jit@lemma.co.uk]) +AC_CONFIG_AUX_DIR([build-aux]) +AC_CONFIG_MACRO_DIR([m4]) +AM_INIT_AUTOMAKE([-Wall -Werror foreign subdir-objects no-dist-gzip dist-bzip2]) +AM_MAINTAINER_MODE([enable]) +LT_INIT([disable-shared]) +AC_CONFIG_HEADERS([config.h]) +AC_CONFIG_FILES([Makefile]) +AC_REQUIRE_AUX_FILE([tap-driver.sh]) + +# Copyright for configure.ac *only* +AC_COPYRIGHT([Copyright (c) 2014 Steven Flintham]) + +AC_DEFINE([PACKAGE_COPYRIGHT], ["(C) - see COPYING"], [Package copyright]) + +# for tap-driver.sh +AC_PROG_AWK + +AC_PROG_CC +AC_PROG_CXX + +BOOST_REQUIRE +BOOST_SMART_PTR +BOOST_THREAD + +# I want to: +# - use "llvm-config" (relying on PATH) if the user doesn't do anything +# special, but +# - allow the user to say --with-llvm-config=XXX to use XXX instead of +# llvm-config, where XXX might need to be found on the PATH (e.g. if +# the program is called llvm-config-3.5) or might be an absolute/ +# relative filename +# In both of the above cases, I want to actually check explicitly the +# llvm-config program can be found. This doesn't seem to be supported by +# autoconf: +# - AC_CHECK_PROG() and AC_PATH_PROG() both insist on the program name being a +# leaf name with no included path. +# - AC_CHECK_FILE() (not unreasonably) doesn't look on PATH for the file +# (and wouldn't check for executability) +# So I have to just hack it with "which" and hope. +AC_ARG_WITH( + [llvm-config], + [AS_HELP_STRING( + [--with-llvm-config=FILE], + [filename of llvm-config executable (if not on PATH)])], + [LLVMCONFIG="$withval"], + [LLVMCONFIG="llvm-config"]) +echo -n "checking for $LLVMCONFIG... " +AS_IF( + [which "$LLVMCONFIG" >/dev/null], + [echo yes], + [echo no + AC_MSG_ERROR([llvm-config not found; try --with-llvm-config=FILE?])]) + +AC_SUBST(LLVMCONFIG) + +# These variables are sacred to the user. But we need to set them in order for +# configure's test programs to find the LLVM headers. I am probably doing this +# completely wrong. In twenty years or so maybe I will achieve auto-enlightenment +# and look back at this and laugh. +SACRED_CPPFLAGS="$CPPFLAGS" +SACRED_CXXFLAGS="$CXXFLAGS" + +CPPFLAGS=["`$LLVMCONFIG --cppflags` $CPPFLAGS"] +CXXFLAGS=["`$LLVMCONFIG --cxxflags` -fexceptions $CXXFLAGS"] + +AC_LANG(C++) + +# This header moves around a bit, check for the two known possible locations. + +AC_CHECK_HEADER( + [llvm/IR/Verifier.h], + [AC_DEFINE([HAVE_LLVM_IR_VERIFIER_H], 1, [Set to 1 if you have the "llvm/IR/Verifier.h" header file])]) +AC_CHECK_HEADER( + [llvm/Analysis/Verifier.h], + [AC_DEFINE([HAVE_LLVM_ANALYSIS_VERIFIER_H], 1, [Set to 1 if you have the "llvm/Analysis/Verifier.h" header file])]) +# TODO: Can I get configure to fail if neither of the previous tests +# succeeds? Otherwise configure will succeed but the build will fail. + +# This header always exists, but DataLayoutPass isn't always present. +AC_CHECK_HEADER( + [llvm/IR/DataLayout.h], + [], + [AC_MSG_ERROR([llvm/IR/DataLayout.h not found])]) +AC_CHECK_TYPE( + [llvm::DataLayoutPass], + [AC_DEFINE([HAVE_LLVM_DATA_LAYOUT_PASS], 1, [Set to 1 if you have the llvm::DataLayoutPass class])], + [], + [#include "llvm/IR/DataLayout.h"]) + +CPPFLAGS="$SACRED_CPPFLAGS" +CXXFLAGS="$SACRED_CXXFLAGS" + +AC_OUTPUT diff --git a/const.h b/const.h new file mode 100644 index 0000000..c2bbdfd --- /dev/null +++ b/const.h @@ -0,0 +1,58 @@ +/* Copyright (c) 2005 Ian Piumarta + * Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#ifndef CONST_H +#define CONST_H + +#include + +namespace +{ + const uint8_t opcode_brk = 0x00; + const uint8_t opcode_rti = 0x40; + const uint8_t opcode_rts = 0x60; + const uint8_t opcode_bra = 0x80; + const uint8_t opcode_bcc = 0x90; + const uint8_t opcode_bcs = 0xb0; + const uint8_t opcode_bvc = 0x50; + const uint8_t opcode_bvs = 0x70; + const uint8_t opcode_beq = 0xf0; + const uint8_t opcode_bne = 0xd0; + const uint8_t opcode_bpl = 0x10; + const uint8_t opcode_bmi = 0x30; + const uint8_t opcode_jsr = 0x20; + const uint8_t opcode_jmp_abs = 0x4c; + const uint8_t opcode_jmp_ind_abs = 0x6c; + const uint8_t opcode_jmp_indx_abs = 0x7c; + + enum { + flagN= (1<<7), /* negative */ + flagV= (1<<6), /* overflow */ + flagX= (1<<5), /* unused */ + flagB= (1<<4), /* irq from brk */ + flagD= (1<<3), /* decimal mode */ + flagI= (1<<2), /* irq disable */ + flagZ= (1<<1), /* zero */ + flagC= (1<<0) /* carry */ + }; + + const uint32_t memory_size = 0x10000; + const uint16_t stack = 0x100; +} + +#endif diff --git a/examples/README b/examples/README new file mode 100644 index 0000000..e22418c --- /dev/null +++ b/examples/README @@ -0,0 +1,406 @@ +lib6502 - 6502 Microprocessor Emulator + +EXAMPLES + + This file has three sections: + + 1. PROGRAMS that you can compile and run + 2. COMMANDS that you can copy and paste into a terminal + 3. ADVANCED stuff that requires some additional setup + + A few numbered footnotes appear at the end and are referenced in the + text in square brackets [6]. + +---------------------------------------------------------------- + +1. PROGRAMS + + (We're going to start in 'serious mode'. Bear with me.) + + The file 'lib1.c' contains the example from the run6502 manual page. + Just compile and run it: + + cc -o lib1 lib1.c + ./lib1 + + The file has been commented extensively to explain exactly what is + going on. + +---------------------------------------------------------------- + +2. COMMANDS + + (Much more fun: this is the section that appeals to the geek in me.) + + 6502 machine code is pretty straightforward. (Many 6502 programmers + remember a time from their misguided childhood when they could + compose and edit programs directly in hexadecimal using their 'front + panel' monitor program -- the next best thing to programming with a + row of switches and lamps, but I digress and will leave that story + until the pdp11 emulator is ready. ;-) We can use this fact to + generate an entire program without needing an assembler. The 'perl' + program is available on most Unixy (and several other) systems and + makes it easy to create binary files from a string of hex digits. + (There is a program called 'xxd' that's very good at this kind of + thing, but you might not have it.) + + First the program (stolen from lib1.c): + + 1000 ldx #41 A241 + 1002 txa 8A + 1003 jsr FFEE 20EEFF + 1006 inx E8 + 1007 cpx #5B E05B + 1009 bne 1002 D0F7 + 100B lda #0A A90A + 100D jsr FFEE 20EEFF + 1010 brk 00 + + In C-like syntax it is equivalent to: + + regX = 'A'; + do { + regA = regX; + putchar(regA); + } while (regX != 'Z' + 1); + putchar('\n'); + + (which by today's standards is a *huge* amount of stuff packed into + just 17 bytes of 'compiled' code -- on a 386 the same program is + around 65 bytes [1], and more like 88 bytes on a 32-bit RISC [2]). + + The column on the right is the machine code in hexadecimal. When + strung out in a line it looks like this: + + A2418A20EEFFE8E05BD0F7A90A20EEFF00 + + We can tell perl to 'pack' this hexadecimal string into binary and + save the output in a file: + + echo A2418A20EEFFE8E05BD0F7A90A20EEFF00 | + perl -e 'print pack "H*",' > temp.img + + To check the contents of the file, we can load it into run6502 and + then disassemble it: + + run6502 -l 1000 temp.img -d 1000 +11 -x + + The '-l 1000 temp.img' loads the file into the 6502's memory at + address 0x1000, and the '-d 1000 +11' disassembles 17 bytes (11 in + hex) of code starting at 0x1000. The final '-x' tells run6502 not + to try to execute the code. The output should look just like the + program listing above. + + This is almost all we need to run it; just a few details remain. + + - The emulator doesn't know where to start execution. We need to + set the 'reset' vector to 0x1000 -- the address of the first + instruction in the program. The '-R 1000' option does this. + + - The program calls the 'putchar' function at address 0xFFEE to + send a character to the terminal. run6502 can emulate this for + us, with the '-P FFEE' option. + + - We have to have some way to make the processor stop execution + (there is no 'halt' instruction on the 6502, at least not the + early versions). The trick is in the last instruction 'BRK', + that generates a 'software interrupt' -- eventually jumping to + the addres in the 'interrupt vector'. If we don't set the + interrupt vector explicitly it remains empty (zero) and BRK will + try to transfer control to address 0. The '-X 0' option tells + run6502 to stop executing if/when the program attempts to + transfer control to address 0 -- which it will, when it executes + the 'BRK' instruction with an empty interrupt vector. QED :-) + + Here, then, is the complete command to run our program: + + run6502 -l 1000 temp.img -R 1000 -P FFEE -X 0 + + This program is relocatable. You can load it at address 4321 + (change both the -l and -R options) and it will work just fine. + + Google for "6502 Reference Card" (with the quotes), grab a pencil + and paper, and you can start writing 6502 programs immediately! (If + you really want to experience what it was like in the late 1970s, + but without the added fun of entering each hex digit one at a time + into a monitor program, simply avoid the temptation ever to look at + your hand-assembled code with the '-d' option. ;-) + + If you really start liking this and want to write longer programs in + text files with the hex split over many lines, you'll need a perl + script that can deal with newlines in the input. Something like + this should do the trick... + + #!/usr/bin/perl + + while () { + chomp; + print pack "H*", $_ + } + + (This script is included in the 'examples' directory, in a file + called 'hex2bin', to save you 15 seconds of copy and paste.) + + Need a fun project? Write a 6502 assembler... in 6502 machine code, + of course! Read in the assembly language text via 'getchar' (see + the '-G' option) and write out the assembled binary via 'putchar' + (the '-P' option, that we've already seen). Soon you'll be able to: + + cat prog.s | + run6502 -l 1000 asm.img -R 1000 -G FFE0 -P FFEE -X 0 > prog.img + + run6502 -l 1000 prog.img -R 1000 -G FFE0 -P FFEE -X 0 + + (The first prog.s you write should probably be the assembler itself, + transcribed from the paper copy used to hand-assemble the assembler + binary. This significant milestone can be reached with a + surprisingly simple assembler. After this pivotal moment the + assembler, assembling itself, can very quickly become very + powerful.) + +---------------------------------------------------------------- + +3. ADVANCED + + (Official justification: let's run something big and non-trivial. + More likely: a flimsy excuse for a trip down memory lane.) + + The remaining examples assume that you have access to two ROM images + from the Acorn 'BBC Model B' microcomputer: the operating system and + the BASIC language . (Just crawl into the attic, fire up the old + Beeb, '*SAVE' the images into files, and then transfer them to your + Unix box over RS423. Under no circumstances should you google for + 'Acorn BBC B OS ROMs zip', without the quotes. That would be + naughty, and probably illegal -- at least until the glorious day + when the revolution finally comes.) + + After brushing yourself down (the attic is kind of dusty, no?) save + the two ROM images as 'OS12.ROM' and 'BASIC2.ROM'. + + The first thing we can do is use run6502 as an editor to merge the + two ROMs into a single image file: + + run6502 \ + -l C000 OS12.ROM \ + -l 8000 BASIC2.ROM \ + -s 0000 +10000 bbc.img \ + -x + + (This is a single command, with '\' continuation characters joining + the lines into one. Your shell should figure it out if you just + copy and paste.) It leaves a file 'bbc.img' containing both the OS + and BASIC. + + To run this image we need the '-B' option. It enables some minimal, + totally lame, hardware emulation of the BBC computer -- just enough + to boot the 'virtual beeb' into BASIC [3]: + + run6502 -l 0 bbc.img -B + + If all goes well, you should be greeted with a 'beep' and a message + telling you what computer you have (BBC Computer), how much RAM is + available (32K), the language you've been dropped into (BASIC), and + a '>' prompt. Turn on 'CAPS LOCK' (many of us remember those days, + and some of us even used to speak in ALL CAPS) and play: + + PRINT 3+4 + + or maybe: + + 10 FOR A%=1 TO 10 + 20 PRINT A% + 30 NEXT + LIST + RUN + + or even: + + 10 P%=&2800 + 20 O%=P% + 30 [ + 40 opt3 + 50 lda #10 + 60 jsr &FFEE + 70 ldx #65 + 80 .l txa + 90 jsr &FFEE + 100 inx + 110 cpx #91 + 120 bne l + 130 lda #10 + 140 jmp &FFEE + 150 ] + 160 CALL &2800 + LIST + RUN + + (How cool is that? ;-) + + One final thing: there is an option '-i' that works just like '-l' + except that it looks to see if the image file begins with '#!'. If + so, it skips over the first line of the file, up to and including + the first newline. Why? The system call that executes programs on + Unixy systems makes the same check. If the user executes a text + file 'foo' staring with '#!prog ...' then the OS loads and runs + 'prog' instead, passing all the '...'s and the name of the text file + 'foo' as arguments [4]. If you have 'temp.img' left over from from + the second example, open it in a text editor and add a single line + at the beginning that reads: + + #!run6502 -i 1000 + + (If 'run6502' is not in your current working directory then you will + have to use the full path to the file: '#!/usr/bin/run6502' or + '#!/usr/local/bin/6502' or whatever. No spaces before the '#'!) + + Now make the image executable: + + chmod +x temp.img + + and then (as if you hadn't already guessed) execute it: + + ./temp.img + + Saves an awful lot of tedious typing. [5] + + Have fun! + +---------------------------------------------------------------- + +FOOTNOTES + + +[1] Here is the 'alphabet' program, verbatim, compiled (with + optimisation) on a 386. It's 66 bytes long, almost four times + longer than the 6502 version. (If I were more generous I might + consider that fair: 32 bits divided by 8 bits is four.) + + 0: 55 push %ebp + 1: 89 e5 mov %esp,%ebp + 3: 53 push %ebx + 4: 83 ec 14 sub $0x14,%esp + 7: bb 41 00 00 00 mov $0x41,%ebx + c: a1 00 00 00 00 mov 0x0,%eax + 11: 89 44 24 04 mov %eax,0x4(%esp) + 15: 89 1c 24 mov %ebx,(%esp) + 18: e8 fc ff ff ff call 19 + 1d: 43 inc %ebx + 1e: 83 fb 5b cmp $0x5b,%ebx + 21: 75 e9 jne c + 23: a1 00 00 00 00 mov 0x0,%eax + 28: 89 44 24 04 mov %eax,0x4(%esp) + 2c: c7 04 24 0a 00 00 00 movl $0xa,(%esp) + 33: e8 fc ff ff ff call 34 + 38: b8 00 00 00 00 mov $0x0,%eax + 3d: 83 c4 14 add $0x14,%esp + 40: 5b pop %ebx + 41: 5d pop %ebp + 42: c3 ret + + +[2] Here is the 'alphabet' program, verbatim, compiled (with + optimisation) on a PowerPC. It's 88 bytes long, more than five + times longer than the 6502 version. (I don't care what you say: + Apple Macs rule and mine has oodles of RAM to spare.) + + 00000000 mfspr r0,lr + 00000004 stmw r29,0xfff4(r1) + 00000008 stw r0,0x8(r1) + 0000000c stwu r1,0xffb0(r1) + 00000010 bcl 20,31,0x14 + 00000014 mfspr r31,lr + 00000018 li r30,0x41 + 0000001c addis r2,r31,ha16(0xa4-0x14) + 00000020 lwz r29,lo16(0xa4-0x14)(r2) + 00000024 or r3,r30,r30 + 00000028 addi r4,r29,0x58 + 0000002c bl 0x7c ; symbol stub for: _fputc + 00000030 cmpwi cr7,r30,0x5a + 00000034 addi r30,r30,0x1 + 00000038 bne cr7,0x24 + 0000003c li r3,0xa + 00000040 bl 0x5c ; symbol stub for: _fputc + 00000044 li r3,0x0 + 00000048 lwz r0,0x58(r1) + 0000004c addi r1,r1,0x50 + 00000050 mtspr lr,r0 + 00000054 lmw r29,0xfff4(r1) + 00000058 blr + + +[3] Time to 'fess up with an undocumented 'feature'. We ran our + 'bbc.img' file like this: + + run6502 -l 0 bbc.img -B + + I grew tired of typing all those '-'s and made run6502 check to + see if it was invoked with a single, non-option argument. + Running: + + run6502 bbc.img + + is precisely equivalent to the '-l -B' form above. I don't feel + too guilty about this since the manual page suggests that + providing a single, non-option argument is illegal usage. + + +[4] Okay, that might be a little confusing. Here it is written out in + full. If you have a text file called 'foo' containing + + #!/usr/bin/prog -gobble + blah blah blah + blah blah blah + + that is executable, and then you execute it like a compiled + program + + ./foo + + then the OS will notice the '#!' and run the following command + instead: + + /usr/bin/prog -gobble ./foo + + The '-gobble' tells 'prog' to eat the first line, leaving just the + blah that follows. (The reason for choosing '#!' is that '#' is + the comment character in the standard Unix shell, with the obvious + happy consequences for shell scripts.) + + +[5] We can play the same '#!' game with our 'bbc.img' file. Open it + up and add the line + + #!/usr/local/bin/run6502 -B -l 0 + + (or whatever, according to the location of the 'run6502' program), + make it executable + + chmod +x bbc.img + + and execute it: + + ./bbc.img + + To save a whopping 32K of zeros at the beginning of the file, + create the image again with + + run6502 \ + -l C000 OS12.ROM \ + -l 8000 BASIC2.ROM \ + -s 8000 +8000 bbc.img \ + -x + + and run it with + + run6502 -l 0 bbc.img -B + + and, if you like, insert the single line + + #!/usr/local/bin/run6502 -B -l 8000 + + at the start of the image file and make it executable: + + ./bbc.img + + +[6] There is no footnote 6. diff --git a/examples/hex2bin b/examples/hex2bin new file mode 100755 index 0000000..82c2a44 --- /dev/null +++ b/examples/hex2bin @@ -0,0 +1,6 @@ +#!/usr/bin/perl + +while () { + chomp; + print pack "H*", $_ +} diff --git a/examples/lib1.c b/examples/lib1.c new file mode 100644 index 0000000..6b89520 --- /dev/null +++ b/examples/lib1.c @@ -0,0 +1,108 @@ +#include +#include + +#include "lib6502.h" + +/* Emulated OS functions. */ + +#define WRCH 0xFFEE /* Write accumulator to stdout. */ + +/* Write the accumulator to stdout. This function will be invoked + * when the emulated program calls 0xFFEE. + */ +int wrch(M6502 *mpu, uint16_t address, uint8_t data) +{ + int pc; + + /* Write the character. + */ + putchar(mpu->registers->a); + + /* We arrived here from a JSR instruction. The stack contains the + * saved PC. Pop it off the stack. + */ + pc = mpu->memory[++mpu->registers->s + 0x100]; + pc |= mpu->memory[++mpu->registers->s + 0x100] << 8; + + /* The JSR instruction pushes the value of PC before it has been + * incremented to point to the instruction after the JSR. Return PC + * + 1 as the address for the next insn. Returning non-zero + * indicates that we handled the 'subroutine' ourselves, and the + * emulator should pretend the original 'JSR' neveer happened at + * all. + */ + return pc + 1; /* JSR pushes next insn addr - 1 */ +} + + +/* Exit gracefully. We arrange for this function to be called when + * the emulator tries to transfer control to address 0. + */ +int done(M6502 *mpu, uint16_t address, uint8_t data) +{ + char buffer[64]; + + /* Dump the internal state of the processor. + */ + M6502_dump(mpu, buffer); + + /* Print a cute message and quit. + */ + printf("\nBRK instruction\n%s\n", buffer); + exit(0); +} + +int main() +{ + M6502 *mpu = M6502_new(0, 0, 0); /* Make a 6502 */ + unsigned pc = 0x1000; /* PC for 'assembly' */ + + /* Install the two callback functions defined above. + */ + M6502_setCallback(mpu, call, WRCH, wrch); /* Calling FFEE -> wrch() */ + M6502_setCallback(mpu, call, 0, done); /* Calling 0 -> done() */ + + /* A few macros that dump bytes into the 6502's memory. + */ +# define gen1(X) (mpu->memory[pc++]= (uint8_t)(X)) +# define gen2(X,Y) gen1(X); gen1(Y) +# define gen3(X,Y,Z) gen1(X); gen2(Y,Z) + + /* Hand-assemble the program. + */ + gen2(0xA2, 'A' ); // LDX #'A' + gen1(0x8A ); // TXA + gen3(0x20,0xEE,0xFF); // JSR FFEE + gen1(0xE8 ); // INX + gen2(0xE0, 'Z'+1 ); // CPX #'Z'+1 + gen2(0xD0, -9 ); // BNE 0x1002 + gen2(0xA9, '\n' ); // LDA #'\n' + gen3(0x20,0xEE,0xFF); // JSR FFEE + gen2(0x00,0x00 ); // BRK + + /* Just for fun: disssemble the program. + */ + { + char insn[64]; + uint16_t ip= 0x1000; + while (ip < pc) + { + int isz = M6502_disassemble(mpu, ip, insn); + printf("%04X %s\n", ip, insn); + ip += isz; + } + } + + /* Point the RESET vector at the first instruction in the assembled + * program. + */ + M6502_setVector(mpu, RST, 0x1000); + + /* Reset the 6502 and run the program. + */ + M6502_reset(mpu); + M6502_run(mpu); + M6502_delete(mpu); /* We never reach here, but what the hey. */ + + return 0; +} diff --git a/lib6502-compatibility.txt b/lib6502-compatibility.txt new file mode 100644 index 0000000..23f88d2 --- /dev/null +++ b/lib6502-compatibility.txt @@ -0,0 +1,54 @@ +At the time of writing the latest lib6502 release is v1.3; older versions are +not considered here. + +Some things which work fine with lib6502 itself are not supported when using +lib6502-jit in hybrid (the default) or compiled execution modes. All of the +following will result in undefined behaviour unless interpreted mode is used: + +* Modifying memory which contains 6502 code (whether executed yet or not) + inside a read callback. (All other types of callbacks are allowed to + modify memory freely, including modifying code.) + +* Defining a callback after calling M6502_run(); for example, doing so inside + another callback. + +* Checking the B and X flags in the processor status register + (M6502_Registers.p) inside a callback. lib6502 tracks these flags as if they + have a real existence at all times. lib6502-jit's compiler only sets them + appropriately when pushing a copy of the processor status register onto the + stack. This difference is *not* visible to code executing on the emulated CPU, + only to callbacks. In hybrid mode, which behaviour you get will depend on + whether your callback is invoked from the interpreter or compiled code. + +The following differences exist between lib6502 and lib6502-jit in all modes, +including interpreted mode: + +* lib6502 is likely to be slightly faster than lib6502-jit in interpreted mode, + since the latter's interpreter code contains additional tests to stop + executing at certain points after n instructions have been executed. + +* Illegal instructions are treated as no-ops by default in lib6502-jit; lib6502 + aborts if an illegal instruction is executed. + +* Illegal instruction callbacks are a lib6502-jit extension and are not + available in lib6502. + +* Call callbacks in lib6502 always receive a 0 as the data argument; + lib6502-jit supplies the opcode triggering the callback as the data argument. + +* A few bugs in lib6502's emulation are resolved in lib6502-jit: + - BRK clears the D flag + - ADC/SBC exactly match the behaviour of a real 65C02 in decimal mode + - BIT #imm only modifies the Z flag, leaving N and V untouched + - TSB sets the Z flag correctly + - TRB sets the Z flag and updates memory correctly + +* lib6502's run6502 -B option skips every other (ROM name) argument; + lib6502-jit's doesn't. + +lib6502-jit's stance is that anything the code executing on the emulated CPU +does is fair game and must be handled, but that the library's client code has a +responsibility to cooperate and not do tricky things like those documented +above. If you have what you think is a reasonable requirement for behaviour +which is supported by lib6502 but doesn't work on lib6502-jit please get in +touch. diff --git a/lib6502-jit.cpp b/lib6502-jit.cpp new file mode 100644 index 0000000..02da212 --- /dev/null +++ b/lib6502-jit.cpp @@ -0,0 +1,190 @@ +/* lib6502-jit.cpp -- MOS Technology 6502 emulator -*- C -*- */ + +/* Copyright (c) 2005 Ian Piumarta + * Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#include "const.h" +#include "Function.h" +#include "FunctionBuilder.h" +#include "FunctionManager.h" +#include "M6502Internal.h" +#include "Registers.h" +#include "util.h" + +static void outOfMemory(void) +{ + die("out of memory"); +} + +M6502 *M6502_new(M6502_Registers *registers, M6502_Memory memory, M6502_Callbacks *callbacks) +{ + M6502 *mpu= (M6502 *) calloc(1, sizeof(M6502)); + if (!mpu) outOfMemory(); + + if (!registers) { registers = (M6502_Registers *)calloc(1, sizeof(M6502_Registers)); mpu->flags |= M6502_RegistersAllocated; } + if (!memory ) { memory = (uint8_t *)calloc(1, sizeof(M6502_Memory )); mpu->flags |= M6502_MemoryAllocated; } + if (!callbacks) { callbacks = (M6502_Callbacks *)calloc(1, sizeof(M6502_Callbacks)); mpu->flags |= M6502_CallbacksAllocated; } + + if (!registers || !memory || !callbacks) outOfMemory(); + + mpu->registers = registers; + mpu->memory = memory; + mpu->callbacks = callbacks; + + try + { + mpu->internal = new _M6502_Internal(mpu); + } + catch (std::exception &e) + { + die(e.what()); + } + + return mpu; +} + +void M6502_delete(M6502 *mpu) +{ + if (mpu->flags & M6502_CallbacksAllocated) free(mpu->callbacks); + if (mpu->flags & M6502_MemoryAllocated ) free(mpu->memory); + if (mpu->flags & M6502_RegistersAllocated) free(mpu->registers); + delete mpu->internal; + + free(mpu); +} + +void M6502_setMode(M6502 *mpu, M6502_Mode mode, int arg) +{ + mpu->internal->mode_ = mode; + + if (arg == 0) + { + arg = M6502_Internal::default_max_instructions_; + } + mpu->internal->max_instructions_ = arg; +} + +extern "C" void M6502_run_interpreted(M6502 *mpu, int instructions_left); + +// I don't know if it's "supposed" to work, but it doesn't seem completely +// unreasonable for a lib6502 client to do a setjmp() before invoking +// M6502_run() and have a callback function longjmp() out of the emulation. I +// believe this will work with lib6502 itself, and I would like this emulation +// to do the same. (Note that currently for both lib6502 and lib6502-jit, +// read/write callbacks don't see an up-to-date M6502_Registers object and so +// the setjmp/longjmp trick would result in restarting execution in the wrong +// place with the wrong registers. Call callbacks and illegal instruction +// callbacks should work though.) +// +// To this end, M6502_run_compiled() and M6502_run_hybrid() both update the +// Registers object from the M6502_Registers object on entry to pick up the +// current state. They also both ensure they call update_memory_snapshot() as +// appropriate in case the caller modified memory before invoking M6502_run() +// again. + +static void M6502_run_compiled(M6502 *mpu) +{ + FunctionManager &function_manager = mpu->internal->function_manager_; + function_manager.update_memory_snapshot(); + + Registers ®isters = mpu->internal->registers_; + registers.from_M6502_Registers(mpu); + + while (true) + { + Function *f = function_manager.get_function(registers.pc); + TRACE("Executing Function object for address 0x" << std::hex << + std::setfill('0') << std::setw(4) << registers.pc); + f->execute(); + } +} + +#ifdef LOG + +static std::string M6502_dump_str(M6502 *mpu) +{ + char buffer[64]; + M6502_dump(mpu, buffer); + return buffer; +} + +#endif + +static void M6502_run_hybrid(M6502 *mpu) +{ + FunctionManager &function_manager = mpu->internal->function_manager_; + Registers ®isters = mpu->internal->registers_; + registers.from_M6502_Registers(mpu); + TRACE("About to interpret, CPU state: " << M6502_dump_str(mpu)); + while (true) + { + const int instructions_to_interpret = 100; + M6502_run_interpreted(mpu, instructions_to_interpret); + if (function_manager.jit_thread_idle()) + { + TRACE("JIT thread is idle"); + registers.from_M6502_Registers(mpu); + function_manager.update_memory_snapshot(); + Function *f; + while ((f = function_manager.get_function_lazy(registers.pc)) != 0) + { + TRACE("Executing Function object for address 0x" << std::hex << + std::setfill('0') << std::setw(4) << registers.pc); + f->execute(); + } + TRACE("No Function object available for address 0x" << std::hex << + std::setfill('0') << std::setw(4) << registers.pc << + ", falling back to interpreter"); + registers.to_M6502_Registers(mpu); + TRACE("About to interpret, CPU state: " << M6502_dump_str(mpu)); + } + } +} + +void M6502_run(M6502 *mpu) +{ + try + { + switch (mpu->internal->mode_) + { + case M6502_ModeInterpreted: + while (true) + { + M6502_run_interpreted(mpu, std::numeric_limits::max()); + } + break; + + case M6502_ModeCompiled: + M6502_run_compiled(mpu); + break; + + case M6502_ModeHybrid: + M6502_run_hybrid(mpu); + break; + + default: + die("Unknown execution mode in M6502_run()"); + } + + die("M6502_run() returned!"); + } + catch (std::exception &e) + { + die(e.what()); + } +} diff --git a/lib6502.c b/lib6502.c new file mode 100644 index 0000000..866e1b9 --- /dev/null +++ b/lib6502.c @@ -0,0 +1,910 @@ +/* lib6502.c -- MOS Technology 6502 emulator -*- C -*- */ + +/* Copyright (c) 2005 Ian Piumarta + * Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +/* BUGS: + * - RTS and RTI do not check the return address for a callback + * - the disassembler cannot be configured to read two bytes for BRK + * - architectural variations (unimplemented/extended instructions) not implemented + * - ANSI versions (from from gcc extensions) of the dispatch macros are missing + * - emulator+disassembler in same object file (library is kind of pointless) + */ + +#include +#include + +#include "lib6502.h" + +typedef uint8_t byte; +typedef uint16_t word; + +enum { + flagN= (1<<7), /* negative */ + flagV= (1<<6), /* overflow */ + flagX= (1<<5), /* unused */ + flagB= (1<<4), /* irq from brk */ + flagD= (1<<3), /* decimal mode */ + flagI= (1<<2), /* irq disable */ + flagZ= (1<<1), /* zero */ + flagC= (1<<0) /* carry */ +}; + +#define getN() (P & flagN) +#define getV() (P & flagV) +#define getB() (P & flagB) +#define getD() (P & flagD) +#define getI() (P & flagI) +#define getZ() (P & flagZ) +#define getC() (P & flagC) + +#define setNVZC(N,V,Z,C) (P= (P & ~(flagN | flagV | flagZ | flagC)) | (N) | ((V)<<6) | ((Z)<<1) | (C)) +#define setNZC(N,Z,C) (P= (P & ~(flagN | flagZ | flagC)) | (N) | ((Z)<<1) | (C)) +#define setNZ(N,Z) (P= (P & ~(flagN | flagZ )) | (N) | ((Z)<<1) ) +#define setZ(Z) (P= (P & ~( flagZ )) | ((Z)<<1) ) +#define setC(C) (P= (P & ~( flagC)) | (C)) + +#define NAND(P, Q) (!((P) & (Q))) + +#define tick(n) +#define tickIf(p) + +/* memory access (indirect if callback installed) -- ARGUMENTS ARE EVALUATED MORE THAN ONCE! */ + +#define putMemory(ADDR, BYTE) \ + ( writeCallback[ADDR] \ + ? writeCallback[ADDR](mpu, ADDR, BYTE) \ + : (memory[ADDR]= BYTE) ) + +#define getMemory(ADDR) \ + ( readCallback[ADDR] \ + ? readCallback[ADDR](mpu, ADDR, 0) \ + : memory[ADDR] ) + +/* stack access (always direct) */ + +#define push(BYTE) (memory[0x0100 + S--]= (BYTE)) +#define pop() (memory[++S + 0x0100]) + +/* adressing modes (memory access direct) */ + +#define implied(ticks) \ + tick(ticks); + +#define immediate(ticks) \ + tick(ticks); \ + ea= PC++; + +#define abs(ticks) \ + tick(ticks); \ + ea= memory[PC] + (memory[PC + 1] << 8); \ + PC += 2; + +#define relative(ticks) \ + tick(ticks); \ + ea= memory[PC++]; \ + if (ea & 0x80) ea -= 0x100; \ + tickIf((ea >> 8) != (PC >> 8)); + +#define indirect(ticks) \ + tick(ticks); \ + { \ + word tmp; \ + tmp= memory[PC] + (memory[PC + 1] << 8); \ + ea = memory[tmp] + (memory[tmp + 1] << 8); \ + PC += 2; \ + } + +#define absx(ticks) \ + tick(ticks); \ + ea= memory[PC] + (memory[PC + 1] << 8); \ + PC += 2; \ + tickIf((ticks == 4) && ((ea >> 8) != ((ea + X) >> 8))); \ + ea += X; + +#define absy(ticks) \ + tick(ticks); \ + ea= memory[PC] + (memory[PC + 1] << 8); \ + PC += 2; \ + tickIf((ticks == 4) && ((ea >> 8) != ((ea + Y) >> 8))); \ + ea += Y + +#define zp(ticks) \ + tick(ticks); \ + ea= memory[PC++]; + +#define zpx(ticks) \ + tick(ticks); \ + ea= memory[PC++] + X; \ + ea &= 0x00ff; + +#define zpy(ticks) \ + tick(ticks); \ + ea= memory[PC++] + Y; \ + ea &= 0x00ff; + +#define indx(ticks) \ + tick(ticks); \ + { \ + byte tmp= memory[PC++] + X; \ + ea= memory[tmp] + (memory[tmp + 1] << 8); \ + } + +#define indy(ticks) \ + tick(ticks); \ + { \ + byte tmp= memory[PC++]; \ + ea= memory[tmp] + (memory[tmp + 1] << 8); \ + tickIf((ticks == 5) && ((ea >> 8) != ((ea + Y) >> 8))); \ + ea += Y; \ + } + +#define indabsx(ticks) \ + tick(ticks); \ + { \ + word tmp; \ + tmp= memory[PC ] + (memory[PC + 1] << 8) + X; \ + ea = memory[tmp] + (memory[tmp + 1] << 8); \ + } + +#define indzp(ticks) \ + tick(ticks); \ + { \ + byte tmp; \ + tmp= memory[PC++]; \ + ea = memory[tmp] + (memory[tmp + 1] << 8); \ + } + +/* insns */ + +#define adc(ticks, adrmode) \ + adrmode(ticks); \ + { \ + byte B= getMemory(ea); \ + if (!getD()) \ + { \ + int c= A + B + getC(); \ + int v= (int8_t)A + (int8_t)B + getC(); \ + fetch(); \ + A= c; \ + setNVZC((A & 0x80), (((A & 0x80) > 0) ^ (v < 0)), (A == 0), ((c & 0x100) > 0)); \ + next(); \ + } \ + else \ + { \ + /* Algorithm taken from http://www.6502.org/tutorials/decimal_mode.html */ \ + /* inelegant & slow, but consistent with the hw for illegal digits */ \ + int l, s, t, v; \ + l= (A & 0x0F) + (B & 0x0F) + getC(); \ + if (l >= 0x0A) { l = ((l + 0x06) & 0x0F) + 0x10; } \ + s= (A & 0xF0) + (B & 0xF0) + l; \ + t= (int8_t)(A & 0xF0) + (int8_t)(B & 0xF0) + (int8_t)l; \ + v= (t < -128) || (t > 127); \ + if (s >= 0xA0) { s += 0x60; } \ + fetch(); \ + A= s; \ + /* only C is valid on NMOS 6502 */ \ + setNVZC(s & 0x80, v, !A, (s >= 0x100)); \ + tick(1); \ + next(); \ + } \ + } + +#define sbc(ticks, adrmode) \ + adrmode(ticks); \ + { \ + byte B= getMemory(ea); \ + if (!getD()) \ + { \ + int b= 1 - (P &0x01); \ + int c= A - B - b; \ + int v= (int8_t)A - (int8_t) B - b; \ + fetch(); \ + A= c; \ + setNVZC(A & 0x80, ((A & 0x80) > 0) ^ ((v & 0x100) != 0), A == 0, c >= 0); \ + next(); \ + } \ + else \ + { \ + /* Algorithm taken from http://www.6502.org/tutorials/decimal_mode.html */ \ + int b= 1 - (P &0x01); \ + int l= (A & 0x0F) - (B & 0x0F) - b; \ + int s= A - B + getC() - 1; \ + int c= !(s & 0x100); \ + int v= (int8_t)A - (int8_t) B - b; \ + if (s < 0) { s -= 0x60; } \ + if (l < 0) { s -= 0x06; } \ + fetch(); \ + A = s; \ + /* only C is valid on NMOS 6502 */ \ + setNVZC(s & 0x80, ((v & 0x80) > 0) ^ ((v & 0x100) != 0), !A, c); \ + tick(1); \ + next(); \ + } \ + } + +#define cmpR(ticks, adrmode, R) \ + adrmode(ticks); \ + fetch(); \ + { \ + byte B= getMemory(ea); \ + byte d= R - B; \ + setNZC(d & 0x80, !d, R >= B); \ + } \ + next(); + +#define cmp(ticks, adrmode) cmpR(ticks, adrmode, A) +#define cpx(ticks, adrmode) cmpR(ticks, adrmode, X) +#define cpy(ticks, adrmode) cmpR(ticks, adrmode, Y) + +#define dec(ticks, adrmode) \ + adrmode(ticks); \ + fetch(); \ + { \ + byte B= getMemory(ea); \ + --B; \ + putMemory(ea, B); \ + setNZ(B & 0x80, !B); \ + } \ + next(); + +#define decR(ticks, adrmode, R) \ + fetch(); \ + tick(ticks); \ + --R; \ + setNZ(R & 0x80, !R); \ + next(); + +#define dea(ticks, adrmode) decR(ticks, adrmode, A) +#define dex(ticks, adrmode) decR(ticks, adrmode, X) +#define dey(ticks, adrmode) decR(ticks, adrmode, Y) + +#define inc(ticks, adrmode) \ + adrmode(ticks); \ + fetch(); \ + { \ + byte B= getMemory(ea); \ + ++B; \ + putMemory(ea, B); \ + setNZ(B & 0x80, !B); \ + } \ + next(); + +#define incR(ticks, adrmode, R) \ + fetch(); \ + tick(ticks); \ + ++R; \ + setNZ(R & 0x80, !R); \ + next(); + +#define ina(ticks, adrmode) incR(ticks, adrmode, A) +#define inx(ticks, adrmode) incR(ticks, adrmode, X) +#define iny(ticks, adrmode) incR(ticks, adrmode, Y) + +#define bit(ticks, adrmode) \ + adrmode(ticks); \ + fetch(); \ + { \ + byte B= getMemory(ea); \ + P= (P & ~(flagN | flagV | flagZ)) \ + | (B & (0xC0)) | (((A & B) == 0) << 1); \ + } \ + next(); + +/* BIT is unique in varying its behaviour based on addressing mode; + * BIT immediate only modifies the Z flag. + * http://6502.org/tutorials/65c02opcodes.html + */ +#define bim(ticks, adrmode) \ + adrmode(ticks); \ + fetch(); \ + { \ + byte B= getMemory(ea); \ + setZ((A & B) == 0); \ + } \ + next(); + +#define tsb(ticks, adrmode) \ + adrmode(ticks); \ + fetch(); \ + { \ + byte b= getMemory(ea); \ + setZ(!(b & A)); \ + b |= A; \ + putMemory(ea, b); \ + } \ + next(); + +#define trb(ticks, adrmode) \ + adrmode(ticks); \ + fetch(); \ + { \ + byte b= getMemory(ea); \ + setZ(!(b & A)); \ + b &= (A ^ 0xFF); \ + putMemory(ea, b); \ + } \ + next(); + +#define bitwise(ticks, adrmode, op) \ + adrmode(ticks); \ + fetch(); \ + A op##= getMemory(ea); \ + setNZ(A & 0x80, !A); \ + next(); + +#define and(ticks, adrmode) bitwise(ticks, adrmode, &) +#define eor(ticks, adrmode) bitwise(ticks, adrmode, ^) +#define ora(ticks, adrmode) bitwise(ticks, adrmode, |) + +#define asl(ticks, adrmode) \ + adrmode(ticks); \ + { \ + unsigned int i= getMemory(ea) << 1; \ + putMemory(ea, i); \ + fetch(); \ + setNZC(i & 0x80, !i, i >> 8); \ + } \ + next(); + +#define asla(ticks, adrmode) \ + tick(ticks); \ + fetch(); \ + { \ + int c= A >> 7; \ + A <<= 1; \ + setNZC(A & 0x80, !A, c); \ + } \ + next(); + +#define lsr(ticks, adrmode) \ + adrmode(ticks); \ + { \ + byte b= getMemory(ea); \ + int c= b & 1; \ + fetch(); \ + b >>= 1; \ + putMemory(ea, b); \ + setNZC(0, !b, c); \ + } \ + next(); + +#define lsra(ticks, adrmode) \ + tick(ticks); \ + fetch(); \ + { \ + int c= A & 1; \ + A >>= 1; \ + setNZC(0, !A, c); \ + } \ + next(); + +#define rol(ticks, adrmode) \ + adrmode(ticks); \ + { \ + word b= (getMemory(ea) << 1) | getC(); \ + fetch(); \ + putMemory(ea, b); \ + setNZC(b & 0x80, !(b & 0xFF), b >> 8); \ + } \ + next(); + +#define rola(ticks, adrmode) \ + tick(ticks); \ + fetch(); \ + { \ + word b= (A << 1) | getC(); \ + A= b; \ + setNZC(A & 0x80, !A, b >> 8); \ + } \ + next(); + +#define ror(ticks, adrmode) \ + adrmode(ticks); \ + { \ + int c= getC(); \ + byte m= getMemory(ea); \ + byte b= (c << 7) | (m >> 1); \ + fetch(); \ + putMemory(ea, b); \ + setNZC(b & 0x80, !b, m & 1); \ + } \ + next(); + +#define rora(ticks, adrmode) \ + adrmode(ticks); \ + { \ + int ci= getC(); \ + int co= A & 1; \ + fetch(); \ + A= (ci << 7) | (A >> 1); \ + setNZC(A & 0x80, !A, co); \ + } \ + next(); + +#define tRS(ticks, adrmode, R, S) \ + fetch(); \ + tick(ticks); \ + S= R; \ + setNZ(S & 0x80, !S); \ + next(); + +#define tax(ticks, adrmode) tRS(ticks, adrmode, A, X) +#define txa(ticks, adrmode) tRS(ticks, adrmode, X, A) +#define tay(ticks, adrmode) tRS(ticks, adrmode, A, Y) +#define tya(ticks, adrmode) tRS(ticks, adrmode, Y, A) +#define tsx(ticks, adrmode) tRS(ticks, adrmode, S, X) + +#define txs(ticks, adrmode) \ + fetch(); \ + tick(ticks); \ + S= X; \ + next(); + +#define ldR(ticks, adrmode, R) \ + adrmode(ticks); \ + fetch(); \ + R= getMemory(ea); \ + setNZ(R & 0x80, !R); \ + next(); + +#define lda(ticks, adrmode) ldR(ticks, adrmode, A) +#define ldx(ticks, adrmode) ldR(ticks, adrmode, X) +#define ldy(ticks, adrmode) ldR(ticks, adrmode, Y) + +#define stR(ticks, adrmode, R) \ + adrmode(ticks); \ + fetch(); \ + putMemory(ea, R); \ + next(); + +#define sta(ticks, adrmode) stR(ticks, adrmode, A) +#define stx(ticks, adrmode) stR(ticks, adrmode, X) +#define sty(ticks, adrmode) stR(ticks, adrmode, Y) +#define stz(ticks, adrmode) stR(ticks, adrmode, 0) + +/* We only set keep_running to false if we branch; this is just + * an attempt to pick points to JIT at which we have a chance of + * hitting a second time. + */ +#define branch(ticks, adrmode, cond) \ + if (cond) \ + { \ + adrmode(ticks); \ + PC += ea; \ + tick(1); \ + keep_running= (instructions_left > 0); \ + } \ + else \ + { \ + tick(ticks); \ + PC++; \ + } \ + fetch(); \ + next(); + +#define bcc(ticks, adrmode) branch(ticks, adrmode, !getC()) +#define bcs(ticks, adrmode) branch(ticks, adrmode, getC()) +#define bne(ticks, adrmode) branch(ticks, adrmode, !getZ()) +#define beq(ticks, adrmode) branch(ticks, adrmode, getZ()) +#define bpl(ticks, adrmode) branch(ticks, adrmode, !getN()) +#define bmi(ticks, adrmode) branch(ticks, adrmode, getN()) +#define bvc(ticks, adrmode) branch(ticks, adrmode, !getV()) +#define bvs(ticks, adrmode) branch(ticks, adrmode, getV()) + +#define bra(ticks, adrmode) \ + adrmode(ticks); \ + PC += ea; \ + keep_running= (instructions_left > 0); \ + fetch(); \ + tick(1); \ + next(); + +#define jmp(ticks, adrmode) \ + { \ + adrmode(ticks); \ + byte opcode= mpu->memory[PC-3]; \ + PC= ea; \ + if (mpu->callbacks->call[ea]) \ + { \ + word addr; \ + externalise(); \ + if ((addr= mpu->callbacks->call[ea](mpu, ea, opcode)))\ + { \ + internalise(); \ + PC= addr; \ + } \ + } \ + keep_running= (instructions_left > 0); \ + fetch(); \ + next(); \ + } + +#define jsr(ticks, adrmode) \ + PC++; \ + push(PC >> 8); \ + push(PC & 0xff); \ + PC--; \ + adrmode(ticks); \ + if (mpu->callbacks->call[ea]) \ + { \ + word addr; \ + externalise(); \ + if ((addr= mpu->callbacks->call[ea](mpu, ea, 0x20))) \ + { \ + internalise(); \ + PC= addr; \ + keep_running= (instructions_left > 0); \ + fetch(); \ + next(); \ + } \ + } \ + PC=ea; \ + keep_running= (instructions_left > 0); \ + fetch(); \ + next(); + +#define rts(ticks, adrmode) \ + tick(ticks); \ + PC = pop(); \ + PC |= (pop() << 8); \ + PC++; \ + keep_running= (instructions_left > 0); \ + fetch(); \ + next(); + +#define brk(ticks, adrmode) \ + tick(ticks); \ + PC++; \ + push(PC >> 8); \ + push(PC & 0xff); \ + P |= flagB; \ + /* http://www.6502.org/tutorials/65c02opcodes.html - unlike + * the 6502, the 65C02 clears D on BRK. + */ \ + P &= ~flagD; \ + push(P | flagX); \ + P |= flagI; \ + { \ + word hdlr= getMemory(0xfffe) + (getMemory(0xffff) << 8); \ + if (mpu->callbacks->call[hdlr]) \ + { \ + word addr; \ + externalise(); \ + if ((addr= mpu->callbacks->call[hdlr](mpu, PC - 2, 0))) \ + { \ + internalise(); \ + hdlr= addr; \ + } \ + } \ + PC= hdlr; \ + } \ + keep_running= (instructions_left > 0); \ + fetch(); \ + next(); + +#define rti(ticks, adrmode) \ + tick(ticks); \ + P= pop(); \ + PC= pop(); \ + PC |= (pop() << 8); \ + keep_running= (instructions_left > 0); \ + fetch(); \ + next(); + +#define nop(ticks, adrmode) \ + fetch(); \ + tick(ticks); \ + next(); + +#define ill(ticks, adrmode) \ + { \ + word addr= PC-1; \ + byte instruction= memory[addr]; \ + tick(ticks); \ + if (mpu->callbacks->illegal_instruction[instruction]) \ + { \ + adrmode(ticks); \ + externalise(); \ + if (addr= (mpu->callbacks->illegal_instruction[instruction](mpu, addr, \ + instruction))) \ + { \ + mpu->registers->pc= addr; \ + } \ + internalise(); \ + fetch(); \ + next(); \ + } \ + else \ + { \ + adrmode(ticks); \ + fetch(); \ + next(); \ + } \ + }; + +#define phR(ticks, adrmode, R) \ + fetch(); \ + tick(ticks); \ + push(R); \ + next(); + +#define pha(ticks, adrmode) phR(ticks, adrmode, A) +#define phx(ticks, adrmode) phR(ticks, adrmode, X) +#define phy(ticks, adrmode) phR(ticks, adrmode, Y) +#define php(ticks, adrmode) phR(ticks, adrmode, P | flagX | flagB) + +#define plR(ticks, adrmode, R) \ + fetch(); \ + tick(ticks); \ + R= pop(); \ + setNZ(R & 0x80, !R); \ + next(); + +#define pla(ticks, adrmode) plR(ticks, adrmode, A) +#define plx(ticks, adrmode) plR(ticks, adrmode, X) +#define ply(ticks, adrmode) plR(ticks, adrmode, Y) + +#define plp(ticks, adrmode) \ + fetch(); \ + tick(ticks); \ + P= pop(); \ + next(); + +#define clF(ticks, adrmode, F) \ + fetch(); \ + tick(ticks); \ + P &= ~F; \ + next(); + +#define clc(ticks, adrmode) clF(ticks, adrmode, flagC) +#define cld(ticks, adrmode) clF(ticks, adrmode, flagD) +#define cli(ticks, adrmode) clF(ticks, adrmode, flagI) +#define clv(ticks, adrmode) clF(ticks, adrmode, flagV) + +#define seF(ticks, adrmode, F) \ + fetch(); \ + tick(ticks); \ + P |= F; \ + next(); + +#define sec(ticks, adrmode) seF(ticks, adrmode, flagC) +#define sed(ticks, adrmode) seF(ticks, adrmode, flagD) +#define sei(ticks, adrmode) seF(ticks, adrmode, flagI) + +#define do_insns(_) \ + _(00, brk, implied, 7); _(01, ora, indx, 6); _(02, ill, zp, 2); _(03, ill, implied, 2); \ + _(04, tsb, zp, 3); _(05, ora, zp, 3); _(06, asl, zp, 5); _(07, ill, implied, 2); \ + _(08, php, implied, 3); _(09, ora, immediate, 3); _(0a, asla,implied, 2); _(0b, ill, implied, 2); \ + _(0c, tsb, abs, 4); _(0d, ora, abs, 4); _(0e, asl, abs, 6); _(0f, ill, implied, 2); \ + _(10, bpl, relative, 2); _(11, ora, indy, 5); _(12, ora, indzp, 3); _(13, ill, implied, 2); \ + _(14, trb, zp, 3); _(15, ora, zpx, 4); _(16, asl, zpx, 6); _(17, ill, implied, 2); \ + _(18, clc, implied, 2); _(19, ora, absy, 4); _(1a, ina, implied, 2); _(1b, ill, implied, 2); \ + _(1c, trb, abs, 4); _(1d, ora, absx, 4); _(1e, asl, absx, 7); _(1f, ill, implied, 2); \ + _(20, jsr, abs, 6); _(21, and, indx, 6); _(22, ill, zp, 2); _(23, ill, implied, 2); \ + _(24, bit, zp, 3); _(25, and, zp, 3); _(26, rol, zp, 5); _(27, ill, implied, 2); \ + _(28, plp, implied, 4); _(29, and, immediate, 3); _(2a, rola,implied, 2); _(2b, ill, implied, 2); \ + _(2c, bit, abs, 4); _(2d, and, abs, 4); _(2e, rol, abs, 6); _(2f, ill, implied, 2); \ + _(30, bmi, relative, 2); _(31, and, indy, 5); _(32, and, indzp, 3); _(33, ill, implied, 2); \ + _(34, bit, zpx, 4); _(35, and, zpx, 4); _(36, rol, zpx, 6); _(37, ill, implied, 2); \ + _(38, sec, implied, 2); _(39, and, absy, 4); _(3a, dea, implied, 2); _(3b, ill, implied, 2); \ + _(3c, bit, absx, 4); _(3d, and, absx, 4); _(3e, rol, absx, 7); _(3f, ill, implied, 2); \ + _(40, rti, implied, 6); _(41, eor, indx, 6); _(42, ill, zp, 2); _(43, ill, implied, 2); \ + _(44, ill, zp, 3); _(45, eor, zp, 3); _(46, lsr, zp, 5); _(47, ill, implied, 2); \ + _(48, pha, implied, 3); _(49, eor, immediate, 3); _(4a, lsra,implied, 2); _(4b, ill, implied, 2); \ + _(4c, jmp, abs, 3); _(4d, eor, abs, 4); _(4e, lsr, abs, 6); _(4f, ill, implied, 2); \ + _(50, bvc, relative, 2); _(51, eor, indy, 5); _(52, eor, indzp, 3); _(53, ill, implied, 2); \ + _(54, ill, zp, 4); _(55, eor, zpx, 4); _(56, lsr, zpx, 6); _(57, ill, implied, 2); \ + _(58, cli, implied, 2); _(59, eor, absy, 4); _(5a, phy, implied, 3); _(5b, ill, implied, 2); \ + _(5c, ill, abs, 8); _(5d, eor, absx, 4); _(5e, lsr, absx, 7); _(5f, ill, implied, 2); \ + _(60, rts, implied, 6); _(61, adc, indx, 6); _(62, ill, zp, 2); _(63, ill, implied, 2); \ + _(64, stz, zp, 3); _(65, adc, zp, 3); _(66, ror, zp, 5); _(67, ill, implied, 2); \ + _(68, pla, implied, 4); _(69, adc, immediate, 3); _(6a, rora,implied, 2); _(6b, ill, implied, 2); \ + _(6c, jmp, indirect, 5); _(6d, adc, abs, 4); _(6e, ror, abs, 6); _(6f, ill, implied, 2); \ + _(70, bvs, relative, 2); _(71, adc, indy, 5); _(72, adc, indzp, 3); _(73, ill, implied, 2); \ + _(74, stz, zpx, 4); _(75, adc, zpx, 4); _(76, ror, zpx, 6); _(77, ill, implied, 2); \ + _(78, sei, implied, 2); _(79, adc, absy, 4); _(7a, ply, implied, 4); _(7b, ill, implied, 2); \ + _(7c, jmp, indabsx, 6); _(7d, adc, absx, 4); _(7e, ror, absx, 7); _(7f, ill, implied, 2); \ + _(80, bra, relative, 2); _(81, sta, indx, 6); _(82, ill, zp, 2); _(83, ill, implied, 2); \ + _(84, sty, zp, 2); _(85, sta, zp, 2); _(86, stx, zp, 2); _(87, ill, implied, 2); \ + _(88, dey, implied, 2); _(89, bim, immediate, 2); _(8a, txa, implied, 2); _(8b, ill, implied, 2); \ + _(8c, sty, abs, 4); _(8d, sta, abs, 4); _(8e, stx, abs, 4); _(8f, ill, implied, 2); \ + _(90, bcc, relative, 2); _(91, sta, indy, 6); _(92, sta, indzp, 3); _(93, ill, implied, 2); \ + _(94, sty, zpx, 4); _(95, sta, zpx, 4); _(96, stx, zpy, 4); _(97, ill, implied, 2); \ + _(98, tya, implied, 2); _(99, sta, absy, 5); _(9a, txs, implied, 2); _(9b, ill, implied, 2); \ + _(9c, stz, abs, 4); _(9d, sta, absx, 5); _(9e, stz, absx, 5); _(9f, ill, implied, 2); \ + _(a0, ldy, immediate, 3); _(a1, lda, indx, 6); _(a2, ldx, immediate, 3); _(a3, ill, implied, 2); \ + _(a4, ldy, zp, 3); _(a5, lda, zp, 3); _(a6, ldx, zp, 3); _(a7, ill, implied, 2); \ + _(a8, tay, implied, 2); _(a9, lda, immediate, 3); _(aa, tax, implied, 2); _(ab, ill, implied, 2); \ + _(ac, ldy, abs, 4); _(ad, lda, abs, 4); _(ae, ldx, abs, 4); _(af, ill, implied, 2); \ + _(b0, bcs, relative, 2); _(b1, lda, indy, 5); _(b2, lda, indzp, 3); _(b3, ill, implied, 2); \ + _(b4, ldy, zpx, 4); _(b5, lda, zpx, 4); _(b6, ldx, zpy, 4); _(b7, ill, implied, 2); \ + _(b8, clv, implied, 2); _(b9, lda, absy, 4); _(ba, tsx, implied, 2); _(bb, ill, implied, 2); \ + _(bc, ldy, absx, 4); _(bd, lda, absx, 4); _(be, ldx, absy, 4); _(bf, ill, implied, 2); \ + _(c0, cpy, immediate, 3); _(c1, cmp, indx, 6); _(c2, ill, zp, 2); _(c3, ill, implied, 2); \ + _(c4, cpy, zp, 3); _(c5, cmp, zp, 3); _(c6, dec, zp, 5); _(c7, ill, implied, 2); \ + _(c8, iny, implied, 2); _(c9, cmp, immediate, 3); _(ca, dex, implied, 2); _(cb, ill, implied, 2); \ + _(cc, cpy, abs, 4); _(cd, cmp, abs, 4); _(ce, dec, abs, 6); _(cf, ill, implied, 2); \ + _(d0, bne, relative, 2); _(d1, cmp, indy, 5); _(d2, cmp, indzp, 3); _(d3, ill, implied, 2); \ + _(d4, ill, zp, 4); _(d5, cmp, zpx, 4); _(d6, dec, zpx, 6); _(d7, ill, implied, 2); \ + _(d8, cld, implied, 2); _(d9, cmp, absy, 4); _(da, phx, implied, 3); _(db, ill, implied, 2); \ + _(dc, ill, abs, 4); _(dd, cmp, absx, 4); _(de, dec, absx, 7); _(df, ill, implied, 2); \ + _(e0, cpx, immediate, 3); _(e1, sbc, indx, 6); _(e2, ill, zp, 2); _(e3, ill, implied, 2); \ + _(e4, cpx, zp, 3); _(e5, sbc, zp, 3); _(e6, inc, zp, 5); _(e7, ill, implied, 2); \ + _(e8, inx, implied, 2); _(e9, sbc, immediate, 3); _(ea, nop, implied, 2); _(eb, ill, implied, 2); \ + _(ec, cpx, abs, 4); _(ed, sbc, abs, 4); _(ee, inc, abs, 6); _(ef, ill, implied, 2); \ + _(f0, beq, relative, 2); _(f1, sbc, indy, 5); _(f2, sbc, indzp, 3); _(f3, ill, implied, 2); \ + _(f4, ill, zp, 4); _(f5, sbc, zpx, 4); _(f6, inc, zpx, 6); _(f7, ill, implied, 2); \ + _(f8, sed, implied, 2); _(f9, sbc, absy, 4); _(fa, plx, implied, 4); _(fb, ill, implied, 2); \ + _(fc, ill, abs, 4); _(fd, sbc, absx, 4); _(fe, inc, absx, 7); _(ff, ill, implied, 2); + + + +void M6502_irq(M6502 *mpu) +{ + if (!(mpu->registers->p & flagI)) + { + mpu->memory[0x0100 + mpu->registers->s--] = (byte)(mpu->registers->pc >> 8); + mpu->memory[0x0100 + mpu->registers->s--] = (byte)(mpu->registers->pc & 0xff); + mpu->memory[0x0100 + mpu->registers->s--] = mpu->registers->p; + mpu->registers->p &= ~flagB; + mpu->registers->p |= flagI; + mpu->registers->pc = M6502_getVector(mpu, IRQ); + } +} + + +void M6502_nmi(M6502 *mpu) +{ + mpu->memory[0x0100 + mpu->registers->s--] = (byte)(mpu->registers->pc >> 8); + mpu->memory[0x0100 + mpu->registers->s--] = (byte)(mpu->registers->pc & 0xff); + mpu->memory[0x0100 + mpu->registers->s--] = mpu->registers->p; + mpu->registers->p &= ~flagB; + mpu->registers->p |= flagI; + mpu->registers->pc = M6502_getVector(mpu, NMI); +} + + +void M6502_reset(M6502 *mpu) +{ + mpu->registers->p &= ~flagD; + mpu->registers->p |= flagI; + mpu->registers->pc = M6502_getVector(mpu, RST); +} + + +/* the compiler should elminate all call to this function */ + +static void oops(void) +{ + fprintf(stderr, "\noops -- instruction dispatch missing\n"); +} + + +void M6502_run_interpreted(M6502 *mpu, int instructions_left) +{ + int keep_running= 1; + +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) + + static void *itab[256]= { &&_00, &&_01, &&_02, &&_03, &&_04, &&_05, &&_06, &&_07, &&_08, &&_09, &&_0a, &&_0b, &&_0c, &&_0d, &&_0e, &&_0f, + &&_10, &&_11, &&_12, &&_13, &&_14, &&_15, &&_16, &&_17, &&_18, &&_19, &&_1a, &&_1b, &&_1c, &&_1d, &&_1e, &&_1f, + &&_20, &&_21, &&_22, &&_23, &&_24, &&_25, &&_26, &&_27, &&_28, &&_29, &&_2a, &&_2b, &&_2c, &&_2d, &&_2e, &&_2f, + &&_30, &&_31, &&_32, &&_33, &&_34, &&_35, &&_36, &&_37, &&_38, &&_39, &&_3a, &&_3b, &&_3c, &&_3d, &&_3e, &&_3f, + &&_40, &&_41, &&_42, &&_43, &&_44, &&_45, &&_46, &&_47, &&_48, &&_49, &&_4a, &&_4b, &&_4c, &&_4d, &&_4e, &&_4f, + &&_50, &&_51, &&_52, &&_53, &&_54, &&_55, &&_56, &&_57, &&_58, &&_59, &&_5a, &&_5b, &&_5c, &&_5d, &&_5e, &&_5f, + &&_60, &&_61, &&_62, &&_63, &&_64, &&_65, &&_66, &&_67, &&_68, &&_69, &&_6a, &&_6b, &&_6c, &&_6d, &&_6e, &&_6f, + &&_70, &&_71, &&_72, &&_73, &&_74, &&_75, &&_76, &&_77, &&_78, &&_79, &&_7a, &&_7b, &&_7c, &&_7d, &&_7e, &&_7f, + &&_80, &&_81, &&_82, &&_83, &&_84, &&_85, &&_86, &&_87, &&_88, &&_89, &&_8a, &&_8b, &&_8c, &&_8d, &&_8e, &&_8f, + &&_90, &&_91, &&_92, &&_93, &&_94, &&_95, &&_96, &&_97, &&_98, &&_99, &&_9a, &&_9b, &&_9c, &&_9d, &&_9e, &&_9f, + &&_a0, &&_a1, &&_a2, &&_a3, &&_a4, &&_a5, &&_a6, &&_a7, &&_a8, &&_a9, &&_aa, &&_ab, &&_ac, &&_ad, &&_ae, &&_af, + &&_b0, &&_b1, &&_b2, &&_b3, &&_b4, &&_b5, &&_b6, &&_b7, &&_b8, &&_b9, &&_ba, &&_bb, &&_bc, &&_bd, &&_be, &&_bf, + &&_c0, &&_c1, &&_c2, &&_c3, &&_c4, &&_c5, &&_c6, &&_c7, &&_c8, &&_c9, &&_ca, &&_cb, &&_cc, &&_cd, &&_ce, &&_cf, + &&_d0, &&_d1, &&_d2, &&_d3, &&_d4, &&_d5, &&_d6, &&_d7, &&_d8, &&_d9, &&_da, &&_db, &&_dc, &&_dd, &&_de, &&_df, + &&_e0, &&_e1, &&_e2, &&_e3, &&_e4, &&_e5, &&_e6, &&_e7, &&_e8, &&_e9, &&_ea, &&_eb, &&_ec, &&_ed, &&_ee, &&_ef, + &&_f0, &&_f1, &&_f2, &&_f3, &&_f4, &&_f5, &&_f6, &&_f7, &&_f8, &&_f9, &&_fa, &&_fb, &&_fc, &&_fd, &&_fe, &&_ff }; + + register void **itabp= &itab[0]; + register void *tpc; + +# define begin() ++instructions_left; fetch(); next() +# define fetch() tpc= itabp[memory[PC++]] +# define next() --instructions_left; if (keep_running) goto *tpc; else goto done +# define dispatch(num, name, mode, cycles) _##num: name(cycles, mode) oops(); next() +# define end() done: --PC + +#else /* (!__GNUC__) || (__STRICT_ANSI__) */ + +# define begin() for (;keep_running;--instructions_left) switch (memory[PC++]) { +# define fetch() +# define next() break +# define dispatch(num, name, mode, cycles) case 0x##num: name(cycles, mode); next() +# define end() } + +#endif + + register byte *memory= mpu->memory; + register word PC; + word ea; + byte A, X, Y, P, S; + M6502_Callback *readCallback= mpu->callbacks->read; + M6502_Callback *writeCallback= mpu->callbacks->write; + +# define internalise() A= mpu->registers->a; X= mpu->registers->x; Y= mpu->registers->y; P= mpu->registers->p; S= mpu->registers->s; PC= mpu->registers->pc +# define externalise() mpu->registers->a= A; mpu->registers->x= X; mpu->registers->y= Y; mpu->registers->p= P; mpu->registers->s= S; mpu->registers->pc= PC + + internalise(); + + begin(); + do_insns(dispatch); + end(); + + externalise(); + +# undef begin +# undef internalise +# undef externalise +# undef fetch +# undef next +# undef dispatch +# undef end +} + + +int M6502_disassemble(M6502 *mpu, word ip, char buffer[64]) +{ + char *s= buffer; + byte *b= mpu->memory + ip; + + switch (b[0]) + { +# define _implied return 1; +# define _immediate sprintf(s, "#%02X", b[1]); return 2; +# define _zp sprintf(s, "%02X", b[1]); return 2; +# define _zpx sprintf(s, "%02X,X", b[1]); return 2; +# define _zpy sprintf(s, "%02X,Y", b[1]); return 2; +# define _abs sprintf(s, "%02X%02X", b[2], b[1]); return 3; +# define _absx sprintf(s, "%02X%02X,X", b[2], b[1]); return 3; +# define _absy sprintf(s, "%02X%02X,Y", b[2], b[1]); return 3; +# define _relative sprintf(s, "%04X", ip + 2 + (int8_t)b[1]); return 2; +# define _indirect sprintf(s, "(%02X%02X)", b[2], b[1]); return 3; +# define _indzp sprintf(s, "(%02X)", b[1]); return 2; +# define _indx sprintf(s, "(%02X,X)", b[1]); return 2; +# define _indy sprintf(s, "(%02X),Y", b[1]); return 2; +# define _indabsx sprintf(s, "(%02X%02X,X)", b[2], b[1]); return 3; + +# define disassemble(num, name, mode, cycles) case 0x##num: s += sprintf(s, "%s ", #name); _##mode + do_insns(disassemble); +# undef _do + } + + return 0; +} + + +void M6502_dump(M6502 *mpu, char buffer[64]) +{ + M6502_Registers *r= mpu->registers; + uint8_t p= r->p; +# define P(N,C) (p & (1 << (N)) ? (C) : '-') + sprintf(buffer, "PC=%04X SP=%04X A=%02X X=%02X Y=%02X P=%02X %c%c%c%c%c%c%c%c", + r->pc, 0x0100 + r->s, + r->a, r->x, r->y, r->p, + P(7,'N'), P(6,'V'), P(5,'?'), P(4,'B'), P(3,'D'), P(2,'I'), P(1,'Z'), P(0,'C')); +# undef P +} diff --git a/lib6502.h b/lib6502.h new file mode 100644 index 0000000..41fc9f2 --- /dev/null +++ b/lib6502.h @@ -0,0 +1,120 @@ +/* lib6502.h -- MOS Technology 6502 emulator -*- C -*- */ + +/* Copyright (c) 2005 Ian Piumarta + * Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#ifndef __m6502_h +#define __m6502_h + +#include +#include + +#ifdef __cplusplus + extern "C" +{ +#endif + +typedef struct _M6502 M6502; +typedef struct _M6502_Registers M6502_Registers; +typedef struct _M6502_Callbacks M6502_Callbacks; +typedef struct _M6502_Internal M6502_Internal; + +typedef int (*M6502_Callback)(M6502 *mpu, uint16_t address, uint8_t data); + +typedef M6502_Callback M6502_CallbackTable[0x10000]; +typedef M6502_Callback M6502_IllegalInstructionCallbackTable[0x100]; +typedef uint8_t M6502_Memory[0x10000]; + +enum { + M6502_NMIVector= 0xfffa, M6502_NMIVectorLSB= 0xfffa, M6502_NMIVectorMSB= 0xfffb, + M6502_RSTVector= 0xfffc, M6502_RSTVectorLSB= 0xfffc, M6502_RSTVectorMSB= 0xfffd, + M6502_IRQVector= 0xfffe, M6502_IRQVectorLSB= 0xfffe, M6502_IRQVectorMSB= 0xffff +}; + +struct _M6502_Registers +{ + uint8_t a; /* accumulator */ + uint8_t x; /* X index register */ + uint8_t y; /* Y index register */ + uint8_t p; /* processor status register */ + uint8_t s; /* stack pointer */ + uint16_t pc; /* program counter */ +}; + +struct _M6502_Callbacks +{ + M6502_CallbackTable read; + M6502_CallbackTable write; + M6502_CallbackTable call; + M6502_IllegalInstructionCallbackTable illegal_instruction; +}; + +struct _M6502_Internal; + +struct _M6502 +{ + M6502_Registers *registers; + uint8_t *memory; + M6502_Callbacks *callbacks; + unsigned int flags; + + /* The following is implementation-specific; client code should only use the + * above members. + */ + M6502_Internal *internal; +}; + +enum { + M6502_RegistersAllocated = 1 << 0, + M6502_MemoryAllocated = 1 << 1, + M6502_CallbacksAllocated = 1 << 2 +}; + +typedef enum { + M6502_ModeInterpreted, + M6502_ModeCompiled, + M6502_ModeHybrid +} M6502_Mode; + +extern M6502 *M6502_new(M6502_Registers *registers, M6502_Memory memory, M6502_Callbacks *callbacks); +extern void M6502_reset(M6502 *mpu); +extern void M6502_nmi(M6502 *mpu); +extern void M6502_irq(M6502 *mpu); +extern void M6502_run(M6502 *mpu); +extern int M6502_disassemble(M6502 *mpu, uint16_t addr, char buffer[64]); +extern void M6502_dump(M6502 *mpu, char buffer[64]); +extern void M6502_delete(M6502 *mpu); +extern void M6502_setMode(M6502 *mpu, M6502_Mode mode, int arg); + +#define M6502_getVector(MPU, VEC) \ + ( ( ((MPU)->memory[M6502_##VEC##VectorLSB]) ) \ + | ((MPU)->memory[M6502_##VEC##VectorMSB] << 8) ) + +#define M6502_setVector(MPU, VEC, ADDR) \ + ( ( ((MPU)->memory[M6502_##VEC##VectorLSB]= ((uint8_t)(ADDR)) & 0xff) ) \ + , ((MPU)->memory[M6502_##VEC##VectorMSB]= (uint8_t)((ADDR) >> 8)) ) + +#define M6502_getCallback(MPU, TYPE, ADDR) ((MPU)->callbacks->TYPE[ADDR]) +#define M6502_setCallback(MPU, TYPE, ADDR, FN) ((MPU)->callbacks->TYPE[ADDR]= (FN)) + + +#ifdef __cplusplus +} +#endif + +#endif /* __m6502_h */ diff --git a/m4/boost.m4 b/m4/boost.m4 new file mode 100644 index 0000000..0a46b0e --- /dev/null +++ b/m4/boost.m4 @@ -0,0 +1,1338 @@ +# boost.m4: Locate Boost headers and libraries for autoconf-based projects. +# Copyright (C) 2007-2011, 2014 Benoit Sigoure +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Additional permission under section 7 of the GNU General Public +# License, version 3 ("GPLv3"): +# +# If you convey this file as part of a work that contains a +# configuration script generated by Autoconf, you may do so under +# terms of your choice. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +m4_define([_BOOST_SERIAL], [m4_translit([ +# serial 22 +], [# +], [])]) + +# Original sources can be found at http://github.com/tsuna/boost.m4 +# You can fetch the latest version of the script by doing: +# wget http://github.com/tsuna/boost.m4/raw/master/build-aux/boost.m4 + +# ------ # +# README # +# ------ # + +# This file provides several macros to use the various Boost libraries. +# The first macro is BOOST_REQUIRE. It will simply check if it's possible to +# find the Boost headers of a given (optional) minimum version and it will +# define BOOST_CPPFLAGS accordingly. It will add an option --with-boost to +# your configure so that users can specify non standard locations. +# If the user's environment contains BOOST_ROOT and --with-boost was not +# specified, --with-boost=$BOOST_ROOT is implicitly used. +# For more README and documentation, go to http://github.com/tsuna/boost.m4 +# Note: THESE MACROS ASSUME THAT YOU USE LIBTOOL. If you don't, don't worry, +# simply read the README, it will show you what to do step by step. + +m4_pattern_forbid([^_?(BOOST|Boost)_]) + + +# _BOOST_SED_CPP(SED-PROGRAM, PROGRAM, +# [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +# -------------------------------------------------------- +# Same as AC_EGREP_CPP, but leave the result in conftest.i. +# +# SED-PROGRAM is *not* overquoted, as in AC_EGREP_CPP. It is expanded +# in double-quotes, so escape your double quotes. +# +# It could be useful to turn this into a macro which extracts the +# value of any macro. +m4_define([_BOOST_SED_CPP], +[AC_LANG_PUSH([C++])dnl +AC_LANG_PREPROC_REQUIRE()dnl +AC_REQUIRE([AC_PROG_SED])dnl +AC_LANG_CONFTEST([AC_LANG_SOURCE([[$2]])]) +AS_IF([dnl eval is necessary to expand ac_cpp. +dnl Ultrix and Pyramid sh refuse to redirect output of eval, so use subshell. +dnl Beware of Windows end-of-lines, for instance if we are running +dnl some Windows programs under Wine. In that case, boost/version.hpp +dnl is certainly using "\r\n", but the regular Unix shell will only +dnl strip `\n' with backquotes, not the `\r'. This results in +dnl boost_cv_lib_version='1_37\r' for instance, which breaks +dnl everything else. +dnl Cannot use 'dnl' after [$4] because a trailing dnl may break AC_CACHE_CHECK +(eval "$ac_cpp conftest.$ac_ext") 2>&AS_MESSAGE_LOG_FD | + tr -d '\r' | + $SED -n -e "$1" >conftest.i 2>&1], + [$3], + [$4]) +rm -rf conftest* +AC_LANG_POP([C++])dnl +])# _BOOST_SED_CPP + + + +# BOOST_REQUIRE([VERSION], [ACTION-IF-NOT-FOUND]) +# ----------------------------------------------- +# Look for Boost. If version is given, it must either be a literal of the form +# "X.Y.Z" where X, Y and Z are integers (the ".Z" part being optional) or a +# variable "$var". +# Defines the value BOOST_CPPFLAGS. This macro only checks for headers with +# the required version, it does not check for any of the Boost libraries. +# On # success, defines HAVE_BOOST. On failure, calls the optional +# ACTION-IF-NOT-FOUND action if one was supplied. +# Otherwise aborts with an error message. +AC_DEFUN([BOOST_REQUIRE], +[AC_REQUIRE([AC_PROG_CXX])dnl +AC_REQUIRE([AC_PROG_GREP])dnl +echo "$as_me: this is boost.m4[]_BOOST_SERIAL" >&AS_MESSAGE_LOG_FD +boost_save_IFS=$IFS +boost_version_req=$1 +IFS=. +set x $boost_version_req 0 0 0 +IFS=$boost_save_IFS +shift +boost_version_req=`expr "$[1]" '*' 100000 + "$[2]" '*' 100 + "$[3]"` +boost_version_req_string=$[1].$[2].$[3] +AC_ARG_WITH([boost], + [AS_HELP_STRING([--with-boost=DIR], + [prefix of Boost $1 @<:@guess@:>@])])dnl +AC_ARG_VAR([BOOST_ROOT],[Location of Boost installation])dnl +# If BOOST_ROOT is set and the user has not provided a value to +# --with-boost, then treat BOOST_ROOT as if it the user supplied it. +if test x"$BOOST_ROOT" != x; then + if test x"$with_boost" = x; then + AC_MSG_NOTICE([Detected BOOST_ROOT; continuing with --with-boost=$BOOST_ROOT]) + with_boost=$BOOST_ROOT + else + AC_MSG_NOTICE([Detected BOOST_ROOT=$BOOST_ROOT, but overridden by --with-boost=$with_boost]) + fi +fi +AC_SUBST([DISTCHECK_CONFIGURE_FLAGS], + ["$DISTCHECK_CONFIGURE_FLAGS '--with-boost=$with_boost'"])dnl +boost_save_CPPFLAGS=$CPPFLAGS + AC_CACHE_CHECK([for Boost headers version >= $boost_version_req_string], + [boost_cv_inc_path], + [boost_cv_inc_path=no +AC_LANG_PUSH([C++])dnl +m4_pattern_allow([^BOOST_VERSION$])dnl + AC_LANG_CONFTEST([AC_LANG_PROGRAM([[#include +#if !defined BOOST_VERSION +# error BOOST_VERSION is not defined +#elif BOOST_VERSION < $boost_version_req +# error Boost headers version < $boost_version_req +#endif +]])]) + # If the user provided a value to --with-boost, use it and only it. + case $with_boost in #( + ''|yes) set x '' /opt/local/include /usr/local/include /opt/include \ + /usr/include C:/Boost/include;; #( + *) set x "$with_boost/include" "$with_boost";; + esac + shift + for boost_dir + do + # Without --layout=system, Boost (or at least some versions) installs + # itself in /include/boost-. This inner loop helps to + # find headers in such directories. + # + # Any ${boost_dir}/boost-x_xx directories are searched in reverse version + # order followed by ${boost_dir}. The final '.' is a sentinel for + # searching $boost_dir" itself. Entries are whitespace separated. + # + # I didn't indent this loop on purpose (to avoid over-indented code) + boost_layout_system_search_list=`cd "$boost_dir" 2>/dev/null \ + && ls -1 | "${GREP}" '^boost-' | sort -rn -t- -k2 \ + && echo .` + for boost_inc in $boost_layout_system_search_list + do + if test x"$boost_inc" != x.; then + boost_inc="$boost_dir/$boost_inc" + else + boost_inc="$boost_dir" # Uses sentinel in boost_layout_system_search_list + fi + if test x"$boost_inc" != x; then + # We are going to check whether the version of Boost installed + # in $boost_inc is usable by running a compilation that + # #includes it. But if we pass a -I/some/path in which Boost + # is not installed, the compiler will just skip this -I and + # use other locations (either from CPPFLAGS, or from its list + # of system include directories). As a result we would use + # header installed on the machine instead of the /some/path + # specified by the user. So in that precise case (trying + # $boost_inc), make sure the version.hpp exists. + # + # Use test -e as there can be symlinks. + test -e "$boost_inc/boost/version.hpp" || continue + CPPFLAGS="$CPPFLAGS -I$boost_inc" + fi + AC_COMPILE_IFELSE([], [boost_cv_inc_path=yes], [boost_cv_version=no]) + if test x"$boost_cv_inc_path" = xyes; then + if test x"$boost_inc" != x; then + boost_cv_inc_path=$boost_inc + fi + break 2 + fi + done + done +AC_LANG_POP([C++])dnl + ]) + case $boost_cv_inc_path in #( + no) + boost_errmsg="cannot find Boost headers version >= $boost_version_req_string" + m4_if([$2], [], [AC_MSG_ERROR([$boost_errmsg])], + [AC_MSG_NOTICE([$boost_errmsg])]) + $2 + ;;#( + yes) + BOOST_CPPFLAGS= + ;;#( + *) + AC_SUBST([BOOST_CPPFLAGS], ["-I$boost_cv_inc_path"])dnl + ;; + esac + if test x"$boost_cv_inc_path" != xno; then + AC_DEFINE([HAVE_BOOST], [1], + [Defined if the requested minimum BOOST version is satisfied]) + AC_CACHE_CHECK([for Boost's header version], + [boost_cv_lib_version], + [m4_pattern_allow([^BOOST_LIB_VERSION$])dnl + _BOOST_SED_CPP([/^boost-lib-version = /{s///;s/\"//g;p;q;}], + [#include +boost-lib-version = BOOST_LIB_VERSION], + [boost_cv_lib_version=`cat conftest.i`])]) + # e.g. "134" for 1_34_1 or "135" for 1_35 + boost_major_version=`echo "$boost_cv_lib_version" | sed 's/_//;s/_.*//'` + case $boost_major_version in #( + '' | *[[!0-9]]*) + AC_MSG_ERROR([invalid value: boost_major_version=$boost_major_version]) + ;; + esac +fi +CPPFLAGS=$boost_save_CPPFLAGS +])# BOOST_REQUIRE + + +# BOOST_STATIC() +# -------------- +# Add the "--enable-static-boost" configure argument. If this argument is given +# on the command line, static versions of the libraries will be looked up. +AC_DEFUN([BOOST_STATIC], + [AC_ARG_ENABLE([static-boost], + [AS_HELP_STRING([--enable-static-boost], + [Prefer the static boost libraries over the shared ones [no]])], + [enable_static_boost=yes], + [enable_static_boost=no])])# BOOST_STATIC + + +# BOOST_FIND_HEADER([HEADER-NAME], [ACTION-IF-NOT-FOUND], [ACTION-IF-FOUND]) +# -------------------------------------------------------------------------- +# Wrapper around AC_CHECK_HEADER for Boost headers. Useful to check for +# some parts of the Boost library which are only made of headers and don't +# require linking (such as Boost.Foreach). +# +# Default ACTION-IF-NOT-FOUND: Fail with a fatal error unless Boost couldn't be +# found in the first place, in which case by default a notice is issued to the +# user. Presumably if we haven't died already it's because it's OK to not have +# Boost, which is why only a notice is issued instead of a hard error. +# +# Default ACTION-IF-FOUND: define the preprocessor symbol HAVE_ in +# case of success # (where HEADER-NAME is written LIKE_THIS, e.g., +# HAVE_BOOST_FOREACH_HPP). +AC_DEFUN([BOOST_FIND_HEADER], +[AC_REQUIRE([BOOST_REQUIRE])dnl +if test x"$boost_cv_inc_path" = xno; then + m4_default([$2], [AC_MSG_NOTICE([Boost not available, not searching for $1])]) +else +AC_LANG_PUSH([C++])dnl +boost_save_CPPFLAGS=$CPPFLAGS +CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" +AC_CHECK_HEADER([$1], + [m4_default([$3], [AC_DEFINE(AS_TR_CPP([HAVE_$1]), [1], + [Define to 1 if you have <$1>])])], + [m4_default([$2], [AC_MSG_ERROR([cannot find $1])])]) +CPPFLAGS=$boost_save_CPPFLAGS +AC_LANG_POP([C++])dnl +fi +])# BOOST_FIND_HEADER + + +# BOOST_FIND_LIBS([COMPONENT-NAME], [CANDIDATE-LIB-NAMES], +# [PREFERRED-RT-OPT], [HEADER-NAME], [CXX-TEST], +# [CXX-PROLOGUE]) +# -------------------------------------------------------------- +# Look for the Boost library COMPONENT-NAME (e.g., `thread', for +# libboost_thread) under the possible CANDIDATE-LIB-NAMES (e.g., +# "thread_win32 thread"). Check that HEADER-NAME works and check that +# libboost_LIB-NAME can link with the code CXX-TEST. The optional +# argument CXX-PROLOGUE can be used to include some C++ code before +# the `main' function. +# +# Invokes BOOST_FIND_HEADER([HEADER-NAME]) (see above). +# +# Boost libraries typically come compiled with several flavors (with different +# runtime options) so PREFERRED-RT-OPT is the preferred suffix. A suffix is one +# or more of the following letters: sgdpn (in that order). s = static +# runtime, d = debug build, g = debug/diagnostic runtime, p = STLPort build, +# n = (unsure) STLPort build without iostreams from STLPort (it looks like `n' +# must always be used along with `p'). Additionally, PREFERRED-RT-OPT can +# start with `mt-' to indicate that there is a preference for multi-thread +# builds. Some sample values for PREFERRED-RT-OPT: (nothing), mt, d, mt-d, gdp +# ... If you want to make sure you have a specific version of Boost +# (eg, >= 1.33) you *must* invoke BOOST_REQUIRE before this macro. +AC_DEFUN([BOOST_FIND_LIBS], +[AC_REQUIRE([BOOST_REQUIRE])dnl +AC_REQUIRE([_BOOST_FIND_COMPILER_TAG])dnl +AC_REQUIRE([BOOST_STATIC])dnl +AC_REQUIRE([_BOOST_GUESS_WHETHER_TO_USE_MT])dnl +if test x"$boost_cv_inc_path" = xno; then + AC_MSG_NOTICE([Boost not available, not searching for the Boost $1 library]) +else +dnl The else branch is huge and wasn't intended on purpose. +AC_LANG_PUSH([C++])dnl +AS_VAR_PUSHDEF([Boost_lib], [boost_cv_lib_$1])dnl +AS_VAR_PUSHDEF([Boost_lib_LDFLAGS], [boost_cv_lib_$1_LDFLAGS])dnl +AS_VAR_PUSHDEF([Boost_lib_LDPATH], [boost_cv_lib_$1_LDPATH])dnl +AS_VAR_PUSHDEF([Boost_lib_LIBS], [boost_cv_lib_$1_LIBS])dnl +BOOST_FIND_HEADER([$4]) +boost_save_CPPFLAGS=$CPPFLAGS +CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" +AC_CACHE_CHECK([for the Boost $1 library], [Boost_lib], + [_BOOST_FIND_LIBS($@)]) +case $Boost_lib in #( + (no) _AC_MSG_LOG_CONFTEST + AC_MSG_ERROR([cannot find the flags to link with Boost $1]) + ;; +esac +AC_SUBST(AS_TR_CPP([BOOST_$1_LDFLAGS]), [$Boost_lib_LDFLAGS])dnl +AC_SUBST(AS_TR_CPP([BOOST_$1_LDPATH]), [$Boost_lib_LDPATH])dnl +AC_SUBST([BOOST_LDPATH], [$Boost_lib_LDPATH])dnl +AC_SUBST(AS_TR_CPP([BOOST_$1_LIBS]), [$Boost_lib_LIBS])dnl +CPPFLAGS=$boost_save_CPPFLAGS +AS_VAR_POPDEF([Boost_lib])dnl +AS_VAR_POPDEF([Boost_lib_LDFLAGS])dnl +AS_VAR_POPDEF([Boost_lib_LDPATH])dnl +AS_VAR_POPDEF([Boost_lib_LIBS])dnl +AC_LANG_POP([C++])dnl +fi +]) + + +# BOOST_FIND_LIB([LIB-NAME], +# [PREFERRED-RT-OPT], [HEADER-NAME], [CXX-TEST], +# [CXX-PROLOGUE]) +# -------------------------------------------------------------- +# Backward compatibility wrapper for BOOST_FIND_LIBS. +AC_DEFUN([BOOST_FIND_LIB], +[BOOST_FIND_LIBS([$1], $@)]) + + +# _BOOST_FIND_LIBS([LIB-NAME], [CANDIDATE-LIB-NAMES], +# [PREFERRED-RT-OPT], [HEADER-NAME], [CXX-TEST], +# [CXX-PROLOGUE]) +# -------------------------------------------------------------- +# Real implementation of BOOST_FIND_LIBS: rely on these local macros: +# Boost_lib, Boost_lib_LDFLAGS, Boost_lib_LDPATH, Boost_lib_LIBS +# +# The algorithm is as follows: first look for a given library name +# according to the user's PREFERRED-RT-OPT. For each library name, we +# prefer to use the ones that carry the tag (toolset name). Each +# library is searched through the various standard paths were Boost is +# usually installed. If we can't find the standard variants, we try +# to enforce -mt (for instance on MacOSX, libboost_thread.dylib +# doesn't exist but there's -obviously- libboost_thread-mt.dylib). +AC_DEFUN([_BOOST_FIND_LIBS], +[Boost_lib=no + case "$3" in #( + (mt | mt-) boost_mt=-mt; boost_rtopt=;; #( + (mt* | mt-*) boost_mt=-mt; boost_rtopt=`expr "X$3" : 'Xmt-*\(.*\)'`;; #( + (*) boost_mt=; boost_rtopt=$3;; + esac + if test $enable_static_boost = yes; then + boost_rtopt="s$boost_rtopt" + fi + # Find the proper debug variant depending on what we've been asked to find. + case $boost_rtopt in #( + (*d*) boost_rt_d=$boost_rtopt;; #( + (*[[sgpn]]*) # Insert the `d' at the right place (in between `sg' and `pn') + boost_rt_d=`echo "$boost_rtopt" | sed 's/\(s*g*\)\(p*n*\)/\1\2/'`;; #( + (*) boost_rt_d='-d';; + esac + # If the PREFERRED-RT-OPT are not empty, prepend a `-'. + test -n "$boost_rtopt" && boost_rtopt="-$boost_rtopt" + $boost_guess_use_mt && boost_mt=-mt + # Look for the abs path the static archive. + # $libext is computed by Libtool but let's make sure it's non empty. + test -z "$libext" && + AC_MSG_ERROR([the libext variable is empty, did you invoke Libtool?]) + boost_save_ac_objext=$ac_objext + # Generate the test file. + AC_LANG_CONFTEST([AC_LANG_PROGRAM([#include <$4> +$6], [$5])]) +dnl Optimization hacks: compiling C++ is slow, especially with Boost. What +dnl we're trying to do here is guess the right combination of link flags +dnl (LIBS / LDFLAGS) to use a given library. This can take several +dnl iterations before it succeeds and is thus *very* slow. So what we do +dnl instead is that we compile the code first (and thus get an object file, +dnl typically conftest.o). Then we try various combinations of link flags +dnl until we succeed to link conftest.o in an executable. The problem is +dnl that the various TRY_LINK / COMPILE_IFELSE macros of Autoconf always +dnl remove all the temporary files including conftest.o. So the trick here +dnl is to temporarily change the value of ac_objext so that conftest.o is +dnl preserved accross tests. This is obviously fragile and I will burn in +dnl hell for not respecting Autoconf's documented interfaces, but in the +dnl mean time, it optimizes the macro by a factor of 5 to 30. +dnl Another small optimization: the first argument of AC_COMPILE_IFELSE left +dnl empty because the test file is generated only once above (before we +dnl start the for loops). + AC_COMPILE_IFELSE([], + [ac_objext=do_not_rm_me_plz], + [AC_MSG_ERROR([cannot compile a test that uses Boost $1])]) + ac_objext=$boost_save_ac_objext + boost_failed_libs= +# Don't bother to ident the following nested for loops, only the 2 +# innermost ones matter. +for boost_lib_ in $2; do +for boost_tag_ in -$boost_cv_lib_tag ''; do +for boost_ver_ in -$boost_cv_lib_version ''; do +for boost_mt_ in $boost_mt -mt ''; do +for boost_rtopt_ in $boost_rtopt '' -d; do + for boost_lib in \ + boost_$boost_lib_$boost_tag_$boost_mt_$boost_rtopt_$boost_ver_ \ + boost_$boost_lib_$boost_tag_$boost_rtopt_$boost_ver_ \ + boost_$boost_lib_$boost_tag_$boost_mt_$boost_ver_ \ + boost_$boost_lib_$boost_tag_$boost_ver_ + do + # Avoid testing twice the same lib + case $boost_failed_libs in #( + (*@$boost_lib@*) continue;; + esac + # If with_boost is empty, we'll search in /lib first, which is not quite + # right so instead we'll try to a location based on where the headers are. + boost_tmp_lib=$with_boost + test x"$with_boost" = x && boost_tmp_lib=${boost_cv_inc_path%/include} + for boost_ldpath in "$boost_tmp_lib/lib" '' \ + /opt/local/lib* /usr/local/lib* /opt/lib* /usr/lib* \ + "$with_boost" C:/Boost/lib /lib* + do + # Don't waste time with directories that don't exist. + if test x"$boost_ldpath" != x && test ! -e "$boost_ldpath"; then + continue + fi + boost_save_LDFLAGS=$LDFLAGS + # Are we looking for a static library? + case $boost_ldpath:$boost_rtopt_ in #( + (*?*:*s*) # Yes (Non empty boost_ldpath + s in rt opt) + Boost_lib_LIBS="$boost_ldpath/lib$boost_lib.$libext" + test -e "$Boost_lib_LIBS" || continue;; #( + (*) # No: use -lboost_foo to find the shared library. + Boost_lib_LIBS="-l$boost_lib";; + esac + boost_save_LIBS=$LIBS + LIBS="$Boost_lib_LIBS $LIBS" + test x"$boost_ldpath" != x && LDFLAGS="$LDFLAGS -L$boost_ldpath" +dnl First argument of AC_LINK_IFELSE left empty because the test file is +dnl generated only once above (before we start the for loops). + _BOOST_AC_LINK_IFELSE([], + [Boost_lib=yes], [Boost_lib=no]) + ac_objext=$boost_save_ac_objext + LDFLAGS=$boost_save_LDFLAGS + LIBS=$boost_save_LIBS + if test x"$Boost_lib" = xyes; then + # Check or used cached result of whether or not using -R or + # -rpath makes sense. Some implementations of ld, such as for + # Mac OSX, require -rpath but -R is the flag known to work on + # other systems. https://github.com/tsuna/boost.m4/issues/19 + AC_CACHE_VAL([boost_cv_rpath_link_ldflag], + [case $boost_ldpath in + '') # Nothing to do. + boost_cv_rpath_link_ldflag= + boost_rpath_link_ldflag_found=yes;; + *) + for boost_cv_rpath_link_ldflag in -Wl,-R, -Wl,-rpath,; do + LDFLAGS="$boost_save_LDFLAGS -L$boost_ldpath $boost_cv_rpath_link_ldflag$boost_ldpath" + LIBS="$boost_save_LIBS $Boost_lib_LIBS" + _BOOST_AC_LINK_IFELSE([], + [boost_rpath_link_ldflag_found=yes + break], + [boost_rpath_link_ldflag_found=no]) + done + ;; + esac + AS_IF([test "x$boost_rpath_link_ldflag_found" != "xyes"], + [AC_MSG_ERROR([Unable to determine whether to use -R or -rpath])]) + LDFLAGS=$boost_save_LDFLAGS + LIBS=$boost_save_LIBS + ]) + test x"$boost_ldpath" != x && + Boost_lib_LDFLAGS="-L$boost_ldpath $boost_cv_rpath_link_ldflag$boost_ldpath" + Boost_lib_LDPATH="$boost_ldpath" + break 7 + else + boost_failed_libs="$boost_failed_libs@$boost_lib@" + fi + done + done +done +done +done +done +done # boost_lib_ +rm -f conftest.$ac_objext +]) + + + +# --------------------------------------- # +# Checks for the various Boost libraries. # +# --------------------------------------- # + +# List of boost libraries: http://www.boost.org/libs/libraries.htm +# The page http://beta.boost.org/doc/libs is useful: it gives the first release +# version of each library (among other things). + +# BOOST_DEFUN(LIBRARY, CODE) +# -------------------------- +# Define BOOST_ as a macro that runs CODE. +# +# Use indir to avoid the warning on underquoted macro name given to AC_DEFUN. +m4_define([BOOST_DEFUN], +[m4_indir([AC_DEFUN], + m4_toupper([BOOST_$1]), +[m4_pushdef([BOOST_Library], [$1])dnl +$2 +m4_popdef([BOOST_Library])dnl +]) +]) + +# BOOST_ARRAY() +# ------------- +# Look for Boost.Array +BOOST_DEFUN([Array], +[BOOST_FIND_HEADER([boost/array.hpp])]) + + +# BOOST_ASIO() +# ------------ +# Look for Boost.Asio (new in Boost 1.35). +BOOST_DEFUN([Asio], +[AC_REQUIRE([BOOST_SYSTEM])dnl +BOOST_FIND_HEADER([boost/asio.hpp])]) + + +# BOOST_BIND() +# ------------ +# Look for Boost.Bind. +BOOST_DEFUN([Bind], +[BOOST_FIND_HEADER([boost/bind.hpp])]) + + +# BOOST_CHRONO() +# -------------- +# Look for Boost.Chrono. +BOOST_DEFUN([Chrono], +[# Do we have to check for Boost.System? This link-time dependency was +# added as of 1.35.0. If we have a version <1.35, we must not attempt to +# find Boost.System as it didn't exist by then. +if test $boost_major_version -ge 135; then + BOOST_SYSTEM([$1]) +fi # end of the Boost.System check. +boost_filesystem_save_LIBS=$LIBS +boost_filesystem_save_LDFLAGS=$LDFLAGS +m4_pattern_allow([^BOOST_SYSTEM_(LIBS|LDFLAGS)$])dnl +LIBS="$LIBS $BOOST_SYSTEM_LIBS" +LDFLAGS="$LDFLAGS $BOOST_SYSTEM_LDFLAGS" +BOOST_FIND_LIB([chrono], [$1], + [boost/chrono.hpp], + [boost::chrono::thread_clock d;]) +if test $enable_static_boost = yes && test $boost_major_version -ge 135; then + BOOST_FILESYSTEM_LIBS="$BOOST_FILESYSTEM_LIBS $BOOST_SYSTEM_LIBS" +fi +LIBS=$boost_filesystem_save_LIBS +LDFLAGS=$boost_filesystem_save_LDFLAGS +])# BOOST_CHRONO + + +# BOOST_CONVERSION() +# ------------------ +# Look for Boost.Conversion (cast / lexical_cast) +BOOST_DEFUN([Conversion], +[BOOST_FIND_HEADER([boost/cast.hpp]) +BOOST_FIND_HEADER([boost/lexical_cast.hpp]) +])# BOOST_CONVERSION + + +# BOOST_CRC() +# ----------- +# Look for Boost.CRC +BOOST_DEFUN([CRC], +[BOOST_FIND_HEADER([boost/crc.hpp]) +])# BOOST_CRC + + +# BOOST_DATE_TIME([PREFERRED-RT-OPT]) +# ----------------------------------- +# Look for Boost.Date_Time. For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above. +BOOST_DEFUN([Date_Time], +[BOOST_FIND_LIB([date_time], [$1], + [boost/date_time/posix_time/posix_time.hpp], + [boost::posix_time::ptime t;]) +])# BOOST_DATE_TIME + + +# BOOST_FILESYSTEM([PREFERRED-RT-OPT]) +# ------------------------------------ +# Look for Boost.Filesystem. For the documentation of PREFERRED-RT-OPT, see +# the documentation of BOOST_FIND_LIB above. +# Do not check for boost/filesystem.hpp because this file was introduced in +# 1.34. +BOOST_DEFUN([Filesystem], +[# Do we have to check for Boost.System? This link-time dependency was +# added as of 1.35.0. If we have a version <1.35, we must not attempt to +# find Boost.System as it didn't exist by then. +if test $boost_major_version -ge 135; then + BOOST_SYSTEM([$1]) +fi # end of the Boost.System check. +boost_filesystem_save_LIBS=$LIBS +boost_filesystem_save_LDFLAGS=$LDFLAGS +m4_pattern_allow([^BOOST_SYSTEM_(LIBS|LDFLAGS)$])dnl +LIBS="$LIBS $BOOST_SYSTEM_LIBS" +LDFLAGS="$LDFLAGS $BOOST_SYSTEM_LDFLAGS" +BOOST_FIND_LIB([filesystem], [$1], + [boost/filesystem/path.hpp], [boost::filesystem::path p;]) +if test $enable_static_boost = yes && test $boost_major_version -ge 135; then + BOOST_FILESYSTEM_LIBS="$BOOST_FILESYSTEM_LIBS $BOOST_SYSTEM_LIBS" +fi +LIBS=$boost_filesystem_save_LIBS +LDFLAGS=$boost_filesystem_save_LDFLAGS +])# BOOST_FILESYSTEM + + +# BOOST_FLYWEIGHT() +# ----------------- +# Look for Boost.Flyweight. +BOOST_DEFUN([Flyweight], +[dnl There's a hidden dependency on pthreads. +AC_REQUIRE([_BOOST_PTHREAD_FLAG])dnl +BOOST_FIND_HEADER([boost/flyweight.hpp]) +AC_SUBST([BOOST_FLYWEIGHT_LIBS], [$boost_cv_pthread_flag]) +]) + + +# BOOST_FOREACH() +# --------------- +# Look for Boost.Foreach. +BOOST_DEFUN([Foreach], +[BOOST_FIND_HEADER([boost/foreach.hpp])]) + + +# BOOST_FORMAT() +# -------------- +# Look for Boost.Format. +# Note: we can't check for boost/format/format_fwd.hpp because the header isn't +# standalone. It can't be compiled because it triggers the following error: +# boost/format/detail/config_macros.hpp:88: error: 'locale' in namespace 'std' +# does not name a type +BOOST_DEFUN([Format], +[BOOST_FIND_HEADER([boost/format.hpp])]) + + +# BOOST_FUNCTION() +# ---------------- +# Look for Boost.Function +BOOST_DEFUN([Function], +[BOOST_FIND_HEADER([boost/function.hpp])]) + + +# BOOST_GEOMETRY() +# ---------------- +# Look for Boost.Geometry (new since 1.47.0). +BOOST_DEFUN([Geometry], +[BOOST_FIND_HEADER([boost/geometry.hpp]) +])# BOOST_GEOMETRY + + +# BOOST_GRAPH([PREFERRED-RT-OPT]) +# ------------------------------- +# Look for Boost.Graphs. For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above. +BOOST_DEFUN([Graph], +[BOOST_FIND_LIB([graph], [$1], + [boost/graph/adjacency_list.hpp], [boost::adjacency_list<> g;]) +])# BOOST_GRAPH + + +# BOOST_IOSTREAMS([PREFERRED-RT-OPT]) +# ----------------------------------- +# Look for Boost.IOStreams. For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above. +BOOST_DEFUN([IOStreams], +[BOOST_FIND_LIB([iostreams], [$1], + [boost/iostreams/device/file_descriptor.hpp], + [boost::iostreams::file_descriptor fd; fd.close();]) +])# BOOST_IOSTREAMS + + +# BOOST_HASH() +# ------------ +# Look for Boost.Functional/Hash +BOOST_DEFUN([Hash], +[BOOST_FIND_HEADER([boost/functional/hash.hpp])]) + + +# BOOST_LAMBDA() +# -------------- +# Look for Boost.Lambda +BOOST_DEFUN([Lambda], +[BOOST_FIND_HEADER([boost/lambda/lambda.hpp])]) + + +# BOOST_LOG([PREFERRED-RT-OPT]) +# ----------------------------- +# Look for Boost.Log. For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above. +BOOST_DEFUN([Log], +[BOOST_FIND_LIB([log], [$1], + [boost/log/core/core.hpp], + [boost::log::attribute a; a.get_value();]) +])# BOOST_LOG + + +# BOOST_LOG_SETUP([PREFERRED-RT-OPT]) +# ----------------------------------- +# Look for Boost.Log. For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above. +BOOST_DEFUN([Log_Setup], +[AC_REQUIRE([BOOST_LOG])dnl +BOOST_FIND_LIB([log_setup], [$1], + [boost/log/utility/setup/from_settings.hpp], + [boost::log::basic_settings bs; bs.empty();]) +])# BOOST_LOG_SETUP + + +# BOOST_MATH() +# ------------ +# Look for Boost.Math +# TODO: This library isn't header-only but it comes in multiple different +# flavors that don't play well with BOOST_FIND_LIB (e.g, libboost_math_c99, +# libboost_math_c99f, libboost_math_c99l, libboost_math_tr1, +# libboost_math_tr1f, libboost_math_tr1l). This macro must be fixed to do the +# right thing anyway. +BOOST_DEFUN([Math], +[BOOST_FIND_HEADER([boost/math/special_functions.hpp])]) + + +# BOOST_MPI([PREFERRED-RT-OPT]) +# ------------------------------- +# Look for Boost MPI. For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above. Uses MPICXX variable if it is +# set, otherwise tries CXX +# +BOOST_DEFUN([MPI], +[boost_save_CXX=${CXX} +boost_save_CXXCPP=${CXXCPP} +if test x"${MPICXX}" != x; then + CXX=${MPICXX} + CXXCPP="${MPICXX} -E" +fi +BOOST_FIND_LIB([mpi], [$1], + [boost/mpi.hpp], + [int argc = 0; + char **argv = 0; + boost::mpi::environment env(argc,argv);]) +CXX=${boost_save_CXX} +CXXCPP=${boost_save_CXXCPP} +])# BOOST_MPI + + +# BOOST_MULTIARRAY() +# ------------------ +# Look for Boost.MultiArray +BOOST_DEFUN([MultiArray], +[BOOST_FIND_HEADER([boost/multi_array.hpp])]) + + +# BOOST_NUMERIC_UBLAS() +# -------------------------- +# Look for Boost.NumericUblas (Basic Linear Algebra) +BOOST_DEFUN([Numeric_Ublas], +[BOOST_FIND_HEADER([boost/numeric/ublas/vector.hpp]) +])# BOOST_NUMERIC_UBLAS + + +# BOOST_NUMERIC_CONVERSION() +# -------------------------- +# Look for Boost.NumericConversion (policy-based numeric conversion) +BOOST_DEFUN([Numeric_Conversion], +[BOOST_FIND_HEADER([boost/numeric/conversion/converter.hpp]) +])# BOOST_NUMERIC_CONVERSION + + +# BOOST_OPTIONAL() +# ---------------- +# Look for Boost.Optional +BOOST_DEFUN([Optional], +[BOOST_FIND_HEADER([boost/optional.hpp])]) + + +# BOOST_PREPROCESSOR() +# -------------------- +# Look for Boost.Preprocessor +BOOST_DEFUN([Preprocessor], +[BOOST_FIND_HEADER([boost/preprocessor/repeat.hpp])]) + + +# BOOST_UNORDERED() +# ----------------- +# Look for Boost.Unordered +BOOST_DEFUN([Unordered], +[BOOST_FIND_HEADER([boost/unordered_map.hpp])]) + + +# BOOST_UUID() +# ------------ +# Look for Boost.Uuid +BOOST_DEFUN([Uuid], +[BOOST_FIND_HEADER([boost/uuid/uuid.hpp])]) + + +# BOOST_PROGRAM_OPTIONS([PREFERRED-RT-OPT]) +# ----------------------------------------- +# Look for Boost.Program_options. For the documentation of PREFERRED-RT-OPT, +# see the documentation of BOOST_FIND_LIB above. +BOOST_DEFUN([Program_Options], +[BOOST_FIND_LIB([program_options], [$1], + [boost/program_options.hpp], + [boost::program_options::options_description d("test");]) +])# BOOST_PROGRAM_OPTIONS + + + +# _BOOST_PYTHON_CONFIG(VARIABLE, FLAG) +# ------------------------------------ +# Save VARIABLE, and define it via `python-config --FLAG`. +# Substitute BOOST_PYTHON_VARIABLE. +m4_define([_BOOST_PYTHON_CONFIG], +[AC_SUBST([BOOST_PYTHON_$1], + [`python-config --$2 2>/dev/null`])dnl +boost_python_save_$1=$$1 +$1="$$1 $BOOST_PYTHON_$1"]) + + +# BOOST_PYTHON([PREFERRED-RT-OPT]) +# -------------------------------- +# Look for Boost.Python. For the documentation of PREFERRED-RT-OPT, +# see the documentation of BOOST_FIND_LIB above. +BOOST_DEFUN([Python], +[_BOOST_PYTHON_CONFIG([CPPFLAGS], [includes]) +_BOOST_PYTHON_CONFIG([LDFLAGS], [ldflags]) +_BOOST_PYTHON_CONFIG([LIBS], [libs]) +m4_pattern_allow([^BOOST_PYTHON_MODULE$])dnl +BOOST_FIND_LIBS([python], [python python3], [$1], + [boost/python.hpp], + [], [BOOST_PYTHON_MODULE(empty) {}]) +CPPFLAGS=$boost_python_save_CPPFLAGS +LDFLAGS=$boost_python_save_LDFLAGS +LIBS=$boost_python_save_LIBS +])# BOOST_PYTHON + + +# BOOST_REF() +# ----------- +# Look for Boost.Ref +BOOST_DEFUN([Ref], +[BOOST_FIND_HEADER([boost/ref.hpp])]) + + +# BOOST_REGEX([PREFERRED-RT-OPT]) +# ------------------------------- +# Look for Boost.Regex. For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above. +BOOST_DEFUN([Regex], +[BOOST_FIND_LIB([regex], [$1], + [boost/regex.hpp], + [boost::regex exp("*"); boost::regex_match("foo", exp);]) +])# BOOST_REGEX + + +# BOOST_SERIALIZATION([PREFERRED-RT-OPT]) +# --------------------------------------- +# Look for Boost.Serialization. For the documentation of PREFERRED-RT-OPT, see +# the documentation of BOOST_FIND_LIB above. +BOOST_DEFUN([Serialization], +[BOOST_FIND_LIB([serialization], [$1], + [boost/archive/text_oarchive.hpp], + [std::ostream* o = 0; // Cheap way to get an ostream... + boost::archive::text_oarchive t(*o);]) +])# BOOST_SERIALIZATION + + +# BOOST_SIGNALS([PREFERRED-RT-OPT]) +# --------------------------------- +# Look for Boost.Signals. For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above. +BOOST_DEFUN([Signals], +[BOOST_FIND_LIB([signals], [$1], + [boost/signal.hpp], + [boost::signal s;]) +])# BOOST_SIGNALS + + +# BOOST_SIGNALS2() +# ---------------- +# Look for Boost.Signals2 (new since 1.39.0). +BOOST_DEFUN([Signals2], +[BOOST_FIND_HEADER([boost/signals2.hpp]) +])# BOOST_SIGNALS2 + + +# BOOST_SMART_PTR() +# ----------------- +# Look for Boost.SmartPtr +BOOST_DEFUN([Smart_Ptr], +[BOOST_FIND_HEADER([boost/scoped_ptr.hpp]) +BOOST_FIND_HEADER([boost/shared_ptr.hpp]) +]) + + +# BOOST_STATICASSERT() +# -------------------- +# Look for Boost.StaticAssert +BOOST_DEFUN([StaticAssert], +[BOOST_FIND_HEADER([boost/static_assert.hpp])]) + + +# BOOST_STRING_ALGO() +# ------------------- +# Look for Boost.StringAlgo +BOOST_DEFUN([String_Algo], +[BOOST_FIND_HEADER([boost/algorithm/string.hpp]) +]) + + +# BOOST_SYSTEM([PREFERRED-RT-OPT]) +# -------------------------------- +# Look for Boost.System. For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above. This library was introduced in Boost +# 1.35.0. +BOOST_DEFUN([System], +[BOOST_FIND_LIB([system], [$1], + [boost/system/error_code.hpp], + [boost::system::error_code e; e.clear();]) +])# BOOST_SYSTEM + + +# BOOST_TEST([PREFERRED-RT-OPT]) +# ------------------------------ +# Look for Boost.Test. For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above. +BOOST_DEFUN([Test], +[m4_pattern_allow([^BOOST_CHECK$])dnl +BOOST_FIND_LIB([unit_test_framework], [$1], + [boost/test/unit_test.hpp], [BOOST_CHECK(2 == 2);], + [using boost::unit_test::test_suite; + test_suite* init_unit_test_suite(int argc, char ** argv) + { return NULL; }]) +])# BOOST_TEST + + +# BOOST_THREAD([PREFERRED-RT-OPT]) +# --------------------------------- +# Look for Boost.Thread. For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above. +BOOST_DEFUN([Thread], +[dnl Having the pthread flag is required at least on GCC3 where +dnl boost/thread.hpp would complain if we try to compile without +dnl -pthread on GNU/Linux. +AC_REQUIRE([_BOOST_PTHREAD_FLAG])dnl +boost_thread_save_LIBS=$LIBS +boost_thread_save_LDFLAGS=$LDFLAGS +boost_thread_save_CPPFLAGS=$CPPFLAGS +# Link-time dependency from thread to system was added as of 1.49.0. +if test $boost_major_version -ge 149; then +BOOST_SYSTEM([$1]) +fi # end of the Boost.System check. +m4_pattern_allow([^BOOST_SYSTEM_(LIBS|LDFLAGS)$])dnl +LIBS="$LIBS $BOOST_SYSTEM_LIBS $boost_cv_pthread_flag" +LDFLAGS="$LDFLAGS $BOOST_SYSTEM_LDFLAGS" +CPPFLAGS="$CPPFLAGS $boost_cv_pthread_flag" + +# When compiling for the Windows platform, the threads library is named +# differently. +case $host_os in + (*mingw*) boost_thread_lib_ext=_win32;; +esac +BOOST_FIND_LIBS([thread], [thread$boost_thread_lib_ext], + [$1], + [boost/thread.hpp], [boost::thread t; boost::mutex m;]) + +BOOST_THREAD_LIBS="$BOOST_THREAD_LIBS $BOOST_SYSTEM_LIBS $boost_cv_pthread_flag" +BOOST_THREAD_LDFLAGS="$BOOST_SYSTEM_LDFLAGS" +BOOST_CPPFLAGS="$BOOST_CPPFLAGS $boost_cv_pthread_flag" +LIBS=$boost_thread_save_LIBS +LDFLAGS=$boost_thread_save_LDFLAGS +CPPFLAGS=$boost_thread_save_CPPFLAGS +])# BOOST_THREAD + +AU_ALIAS([BOOST_THREADS], [BOOST_THREAD]) + + +# BOOST_TOKENIZER() +# ----------------- +# Look for Boost.Tokenizer +BOOST_DEFUN([Tokenizer], +[BOOST_FIND_HEADER([boost/tokenizer.hpp])]) + + +# BOOST_TRIBOOL() +# --------------- +# Look for Boost.Tribool +BOOST_DEFUN([Tribool], +[BOOST_FIND_HEADER([boost/logic/tribool_fwd.hpp]) +BOOST_FIND_HEADER([boost/logic/tribool.hpp]) +]) + + +# BOOST_TUPLE() +# ------------- +# Look for Boost.Tuple +BOOST_DEFUN([Tuple], +[BOOST_FIND_HEADER([boost/tuple/tuple.hpp])]) + + +# BOOST_TYPETRAITS() +# -------------------- +# Look for Boost.TypeTraits +BOOST_DEFUN([TypeTraits], +[BOOST_FIND_HEADER([boost/type_traits.hpp])]) + + +# BOOST_UTILITY() +# --------------- +# Look for Boost.Utility (noncopyable, result_of, base-from-member idiom, +# etc.) +BOOST_DEFUN([Utility], +[BOOST_FIND_HEADER([boost/utility.hpp])]) + + +# BOOST_VARIANT() +# --------------- +# Look for Boost.Variant. +BOOST_DEFUN([Variant], +[BOOST_FIND_HEADER([boost/variant/variant_fwd.hpp]) +BOOST_FIND_HEADER([boost/variant.hpp])]) + + +# BOOST_POINTER_CONTAINER() +# ------------------------ +# Look for Boost.PointerContainer +BOOST_DEFUN([Pointer_Container], +[BOOST_FIND_HEADER([boost/ptr_container/ptr_deque.hpp]) +BOOST_FIND_HEADER([boost/ptr_container/ptr_list.hpp]) +BOOST_FIND_HEADER([boost/ptr_container/ptr_vector.hpp]) +BOOST_FIND_HEADER([boost/ptr_container/ptr_array.hpp]) +BOOST_FIND_HEADER([boost/ptr_container/ptr_set.hpp]) +BOOST_FIND_HEADER([boost/ptr_container/ptr_map.hpp]) +])# BOOST_POINTER_CONTAINER + + +# BOOST_WAVE([PREFERRED-RT-OPT]) +# ------------------------------ +# NOTE: If you intend to use Wave/Spirit with thread support, make sure you +# call BOOST_THREAD first. +# Look for Boost.Wave. For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above. +BOOST_DEFUN([Wave], +[AC_REQUIRE([BOOST_FILESYSTEM])dnl +AC_REQUIRE([BOOST_DATE_TIME])dnl +boost_wave_save_LIBS=$LIBS +boost_wave_save_LDFLAGS=$LDFLAGS +m4_pattern_allow([^BOOST_((FILE)?SYSTEM|DATE_TIME|THREAD)_(LIBS|LDFLAGS)$])dnl +LIBS="$LIBS $BOOST_SYSTEM_LIBS $BOOST_FILESYSTEM_LIBS $BOOST_DATE_TIME_LIBS \ +$BOOST_THREAD_LIBS" +LDFLAGS="$LDFLAGS $BOOST_SYSTEM_LDFLAGS $BOOST_FILESYSTEM_LDFLAGS \ +$BOOST_DATE_TIME_LDFLAGS $BOOST_THREAD_LDFLAGS" +BOOST_FIND_LIB([wave], [$1], + [boost/wave.hpp], + [boost::wave::token_id id; get_token_name(id);]) +LIBS=$boost_wave_save_LIBS +LDFLAGS=$boost_wave_save_LDFLAGS +])# BOOST_WAVE + + +# BOOST_XPRESSIVE() +# ----------------- +# Look for Boost.Xpressive (new since 1.36.0). +BOOST_DEFUN([Xpressive], +[BOOST_FIND_HEADER([boost/xpressive/xpressive.hpp])]) + + +# ----------------- # +# Internal helpers. # +# ----------------- # + + +# _BOOST_PTHREAD_FLAG() +# --------------------- +# Internal helper for BOOST_THREAD. Computes boost_cv_pthread_flag +# which must be used in CPPFLAGS and LIBS. +# +# Yes, we *need* to put the -pthread thing in CPPFLAGS because with GCC3, +# boost/thread.hpp will trigger a #error if -pthread isn't used: +# boost/config/requires_threads.hpp:47:5: #error "Compiler threading support +# is not turned on. Please set the correct command line options for +# threading: -pthread (Linux), -pthreads (Solaris) or -mthreads (Mingw32)" +# +# Based on ACX_PTHREAD: http://autoconf-archive.cryp.to/acx_pthread.html +AC_DEFUN([_BOOST_PTHREAD_FLAG], +[AC_REQUIRE([AC_PROG_CXX])dnl +AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_LANG_PUSH([C++])dnl +AC_CACHE_CHECK([for the flags needed to use pthreads], [boost_cv_pthread_flag], +[ boost_cv_pthread_flag= + # The ordering *is* (sometimes) important. Some notes on the + # individual items follow: + # (none): in case threads are in libc; should be tried before -Kthread and + # other compiler flags to prevent continual compiler warnings + # -lpthreads: AIX (must check this before -lpthread) + # -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) + # -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) + # -llthread: LinuxThreads port on FreeBSD (also preferred to -pthread) + # -pthread: GNU Linux/GCC (kernel threads), BSD/GCC (userland threads) + # -pthreads: Solaris/GCC + # -mthreads: MinGW32/GCC, Lynx/GCC + # -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it + # doesn't hurt to check since this sometimes defines pthreads too; + # also defines -D_REENTRANT) + # ... -mt is also the pthreads flag for HP/aCC + # -lpthread: GNU Linux, etc. + # --thread-safe: KAI C++ + case $host_os in #( + *solaris*) + # On Solaris (at least, for some versions), libc contains stubbed + # (non-functional) versions of the pthreads routines, so link-based + # tests will erroneously succeed. (We need to link with -pthreads/-mt/ + # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather + # a function called by this macro, so we could check for that, but + # who knows whether they'll stub that too in a future libc.) So, + # we'll just look for -pthreads and -lpthread first: + boost_pthread_flags="-pthreads -lpthread -mt -pthread";; #( + *) + boost_pthread_flags="-lpthreads -Kthread -kthread -llthread -pthread \ + -pthreads -mthreads -lpthread --thread-safe -mt";; + esac + # Generate the test file. + AC_LANG_CONFTEST([AC_LANG_PROGRAM([#include ], + [pthread_t th; pthread_join(th, 0); + pthread_attr_init(0); pthread_cleanup_push(0, 0); + pthread_create(0,0,0,0); pthread_cleanup_pop(0);])]) + for boost_pthread_flag in '' $boost_pthread_flags; do + boost_pthread_ok=false +dnl Re-use the test file already generated. + boost_pthreads__save_LIBS=$LIBS + LIBS="$LIBS $boost_pthread_flag" + AC_LINK_IFELSE([], + [if grep ".*$boost_pthread_flag" conftest.err; then + echo "This flag seems to have triggered warnings" >&AS_MESSAGE_LOG_FD + else + boost_pthread_ok=:; boost_cv_pthread_flag=$boost_pthread_flag + fi]) + LIBS=$boost_pthreads__save_LIBS + $boost_pthread_ok && break + done +]) +AC_LANG_POP([C++])dnl +])# _BOOST_PTHREAD_FLAG + + +# _BOOST_gcc_test(MAJOR, MINOR) +# ----------------------------- +# Internal helper for _BOOST_FIND_COMPILER_TAG. +m4_define([_BOOST_gcc_test], +["defined __GNUC__ && __GNUC__ == $1 && __GNUC_MINOR__ == $2 && !defined __ICC @ gcc$1$2"])dnl + +# _BOOST_mingw_test(MAJOR, MINOR) +# ----------------------------- +# Internal helper for _BOOST_FIND_COMPILER_TAG. +m4_define([_BOOST_mingw_test], +["defined __GNUC__ && __GNUC__ == $1 && __GNUC_MINOR__ == $2 && !defined __ICC && \ + (defined WIN32 || defined WINNT || defined _WIN32 || defined __WIN32 \ + || defined __WIN32__ || defined __WINNT || defined __WINNT__) @ mgw$1$2"])dnl + + +# _BOOST_FIND_COMPILER_TAG() +# -------------------------- +# Internal. When Boost is installed without --layout=system, each library +# filename will hold a suffix that encodes the compiler used during the +# build. The Boost build system seems to call this a `tag'. +AC_DEFUN([_BOOST_FIND_COMPILER_TAG], +[AC_REQUIRE([AC_PROG_CXX])dnl +AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_CACHE_CHECK([for the toolset name used by Boost for $CXX], [boost_cv_lib_tag], +[boost_cv_lib_tag=unknown +if test x$boost_cv_inc_path != xno; then + AC_LANG_PUSH([C++])dnl + # The following tests are mostly inspired by boost/config/auto_link.hpp + # The list is sorted to most recent/common to oldest compiler (in order + # to increase the likelihood of finding the right compiler with the + # least number of compilation attempt). + # Beware that some tests are sensible to the order (for instance, we must + # look for MinGW before looking for GCC3). + # I used one compilation test per compiler with a #error to recognize + # each compiler so that it works even when cross-compiling (let me know + # if you know a better approach). + # Known missing tags (known from Boost's tools/build/v2/tools/common.jam): + # como, edg, kcc, bck, mp, sw, tru, xlc + # I'm not sure about my test for `il' (be careful: Intel's ICC pre-defines + # the same defines as GCC's). + for i in \ + _BOOST_mingw_test(4,8) \ + _BOOST_gcc_test(4, 8) \ + _BOOST_mingw_test(4,7) \ + _BOOST_gcc_test(4, 7) \ + _BOOST_mingw_test(4,6) \ + _BOOST_gcc_test(4, 6) \ + _BOOST_mingw_test(4,5) \ + _BOOST_gcc_test(4, 5) \ + _BOOST_mingw_test(4,4) \ + _BOOST_gcc_test(4, 4) \ + _BOOST_mingw_test(4,3) \ + _BOOST_gcc_test(4, 3) \ + _BOOST_mingw_test(4,2) \ + _BOOST_gcc_test(4, 2) \ + _BOOST_mingw_test(4,1) \ + _BOOST_gcc_test(4, 1) \ + _BOOST_mingw_test(4,0) \ + _BOOST_gcc_test(4, 0) \ + "defined __GNUC__ && __GNUC__ == 3 && !defined __ICC \ + && (defined WIN32 || defined WINNT || defined _WIN32 || defined __WIN32 \ + || defined __WIN32__ || defined __WINNT || defined __WINNT__) @ mgw" \ + _BOOST_gcc_test(3, 4) \ + _BOOST_gcc_test(3, 3) \ + "defined _MSC_VER && _MSC_VER >= 1500 @ vc90" \ + "defined _MSC_VER && _MSC_VER == 1400 @ vc80" \ + _BOOST_gcc_test(3, 2) \ + "defined _MSC_VER && _MSC_VER == 1310 @ vc71" \ + _BOOST_gcc_test(3, 1) \ + _BOOST_gcc_test(3, 0) \ + "defined __BORLANDC__ @ bcb" \ + "defined __ICC && (defined __unix || defined __unix__) @ il" \ + "defined __ICL @ iw" \ + "defined _MSC_VER && _MSC_VER == 1300 @ vc7" \ + _BOOST_gcc_test(2, 95) \ + "defined __MWERKS__ && __MWERKS__ <= 0x32FF @ cw9" \ + "defined _MSC_VER && _MSC_VER < 1300 && !defined UNDER_CE @ vc6" \ + "defined _MSC_VER && _MSC_VER < 1300 && defined UNDER_CE @ evc4" \ + "defined __MWERKS__ && __MWERKS__ <= 0x31FF @ cw8" + do + boost_tag_test=`expr "X$i" : 'X\([[^@]]*\) @ '` + boost_tag=`expr "X$i" : 'X[[^@]]* @ \(.*\)'` + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ +#if $boost_tag_test +/* OK */ +#else +# error $boost_tag_test +#endif +]])], [boost_cv_lib_tag=$boost_tag; break], []) + done +AC_LANG_POP([C++])dnl + case $boost_cv_lib_tag in #( + # Some newer (>= 1.35?) versions of Boost seem to only use "gcc" as opposed + # to "gcc41" for instance. + *-gcc | *'-gcc ') :;; #( Don't re-add -gcc: it's already in there. + gcc*) + boost_tag_x= + case $host_os in #( + darwin*) + if test $boost_major_version -ge 136; then + # The `x' added in r46793 of Boost. + boost_tag_x=x + fi;; + esac + # We can specify multiple tags in this variable because it's used by + # BOOST_FIND_LIB that does a `for tag in -$boost_cv_lib_tag' ... + boost_cv_lib_tag="$boost_tag_x$boost_cv_lib_tag -${boost_tag_x}gcc" + ;; #( + unknown) + AC_MSG_WARN([[could not figure out which toolset name to use for $CXX]]) + boost_cv_lib_tag= + ;; + esac +fi])dnl end of AC_CACHE_CHECK +])# _BOOST_FIND_COMPILER_TAG + + +# _BOOST_GUESS_WHETHER_TO_USE_MT() +# -------------------------------- +# Compile a small test to try to guess whether we should favor MT (Multi +# Thread) flavors of Boost. Sets boost_guess_use_mt accordingly. +AC_DEFUN([_BOOST_GUESS_WHETHER_TO_USE_MT], +[# Check whether we do better use `mt' even though we weren't ask to. +AC_LANG_PUSH([C++])dnl +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ +#if defined _REENTRANT || defined _MT || defined __MT__ +/* use -mt */ +#else +# error MT not needed +#endif +]])], [boost_guess_use_mt=:], [boost_guess_use_mt=false]) +AC_LANG_POP([C++])dnl +]) + +# _BOOST_AC_LINK_IFELSE(PROGRAM, [ACTION-IF-TRUE], [ACTION-IF-FALSE]) +# ------------------------------------------------------------------- +# Fork of _AC_LINK_IFELSE that preserves conftest.o across calls. Fragile, +# will break when Autoconf changes its internals. Requires that you manually +# rm -f conftest.$ac_objext in between to really different tests, otherwise +# you will try to link a conftest.o left behind by a previous test. +# Used to aggressively optimize BOOST_FIND_LIB (see the big comment in this +# macro). +# +# Don't use "break" in the actions, as it would short-circuit some code +# this macro runs after the actions. +m4_define([_BOOST_AC_LINK_IFELSE], +[m4_ifvaln([$1], [AC_LANG_CONFTEST([$1])])dnl +rm -f conftest$ac_exeext +boost_save_ac_ext=$ac_ext +boost_use_source=: +# If we already have a .o, re-use it. We change $ac_ext so that $ac_link +# tries to link the existing object file instead of compiling from source. +test -f conftest.$ac_objext && ac_ext=$ac_objext && boost_use_source=false && + _AS_ECHO_LOG([re-using the existing conftest.$ac_objext]) +AS_IF([_AC_DO_STDERR($ac_link) && { + test -z "$ac_[]_AC_LANG_ABBREV[]_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + $as_executable_p conftest$ac_exeext +dnl FIXME: use AS_TEST_X instead when 2.61 is widespread enough. + }], + [$2], + [if $boost_use_source; then + _AC_MSG_LOG_CONFTEST + fi + $3]) +ac_objext=$boost_save_ac_objext +ac_ext=$boost_save_ac_ext +dnl Delete also the IPA/IPO (Inter Procedural Analysis/Optimization) +dnl information created by the PGI compiler (conftest_ipa8_conftest.oo), +dnl as it would interfere with the next link command. +rm -f core conftest.err conftest_ipa8_conftest.oo \ + conftest$ac_exeext m4_ifval([$1], [conftest.$ac_ext])[]dnl +])# _BOOST_AC_LINK_IFELSE + +# Local Variables: +# mode: autoconf +# End: diff --git a/man/M6502_delete.3 b/man/M6502_delete.3 new file mode 100644 index 0000000..4bd1ff4 --- /dev/null +++ b/man/M6502_delete.3 @@ -0,0 +1 @@ +.so man3/lib6502.3 diff --git a/man/M6502_disassemble.3 b/man/M6502_disassemble.3 new file mode 100644 index 0000000..4bd1ff4 --- /dev/null +++ b/man/M6502_disassemble.3 @@ -0,0 +1 @@ +.so man3/lib6502.3 diff --git a/man/M6502_dump.3 b/man/M6502_dump.3 new file mode 100644 index 0000000..4bd1ff4 --- /dev/null +++ b/man/M6502_dump.3 @@ -0,0 +1 @@ +.so man3/lib6502.3 diff --git a/man/M6502_getCallback.3 b/man/M6502_getCallback.3 new file mode 100644 index 0000000..4bd1ff4 --- /dev/null +++ b/man/M6502_getCallback.3 @@ -0,0 +1 @@ +.so man3/lib6502.3 diff --git a/man/M6502_getVector.3 b/man/M6502_getVector.3 new file mode 100644 index 0000000..4bd1ff4 --- /dev/null +++ b/man/M6502_getVector.3 @@ -0,0 +1 @@ +.so man3/lib6502.3 diff --git a/man/M6502_irq.3 b/man/M6502_irq.3 new file mode 100644 index 0000000..4bd1ff4 --- /dev/null +++ b/man/M6502_irq.3 @@ -0,0 +1 @@ +.so man3/lib6502.3 diff --git a/man/M6502_new.3 b/man/M6502_new.3 new file mode 100644 index 0000000..4bd1ff4 --- /dev/null +++ b/man/M6502_new.3 @@ -0,0 +1 @@ +.so man3/lib6502.3 diff --git a/man/M6502_nmi.3 b/man/M6502_nmi.3 new file mode 100644 index 0000000..4bd1ff4 --- /dev/null +++ b/man/M6502_nmi.3 @@ -0,0 +1 @@ +.so man3/lib6502.3 diff --git a/man/M6502_reset.3 b/man/M6502_reset.3 new file mode 100644 index 0000000..4bd1ff4 --- /dev/null +++ b/man/M6502_reset.3 @@ -0,0 +1 @@ +.so man3/lib6502.3 diff --git a/man/M6502_run.3 b/man/M6502_run.3 new file mode 100644 index 0000000..4bd1ff4 --- /dev/null +++ b/man/M6502_run.3 @@ -0,0 +1 @@ +.so man3/lib6502.3 diff --git a/man/M6502_setCallback.3 b/man/M6502_setCallback.3 new file mode 100644 index 0000000..4bd1ff4 --- /dev/null +++ b/man/M6502_setCallback.3 @@ -0,0 +1 @@ +.so man3/lib6502.3 diff --git a/man/M6502_setMode.3 b/man/M6502_setMode.3 new file mode 100644 index 0000000..4bd1ff4 --- /dev/null +++ b/man/M6502_setMode.3 @@ -0,0 +1 @@ +.so man3/lib6502.3 diff --git a/man/M6502_setVector.3 b/man/M6502_setVector.3 new file mode 100644 index 0000000..4bd1ff4 --- /dev/null +++ b/man/M6502_setVector.3 @@ -0,0 +1 @@ +.so man3/lib6502.3 diff --git a/man/lib6502.3 b/man/lib6502.3 new file mode 100644 index 0000000..4551a3d --- /dev/null +++ b/man/lib6502.3 @@ -0,0 +1,555 @@ +.\" Copyright (c) 2005 Ian Piumarta +.\" Copyright (c) 2014 Steven Flintham +.\" +.\" Permission is hereby granted, free of charge, to any person +.\" obtaining a copy of this software and associated documentation +.\" files (the 'Software'), to deal in the Software without +.\" restriction, including without limitation the rights to use, copy, +.\" modify, merge, publish, distribute, and/or sell copies of the +.\" Software, and to permit persons to whom the Software is furnished +.\" to do so, provided that the above copyright notice(s) and this +.\" permission notice appear in all copies of the Software and that +.\" both the above copyright notice(s) and this permission notice +.\" appear in supporting documentation. +.\" +.\" THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. +.\" +.Dd June 7, 2014 +.Dt LIB6502 3 LOCAL +.Os "" +.\" ---------------------------------------------------------------- +.Sh NAME +.\" +.Nm lib6502 +.Nd 6502 microprocessor emulator +.\" ---------------------------------------------------------------- +.Sh SYNOPSIS +.\" +.In stdint.h +.In lib6502.h +.Ft M6502 * +.Fn M6502_new "M6502_Registers *registers" "M6502_Memory memory" "M6502_Callbacks *callbacks" +.Ft void +.Fn M6502_reset "M6502 *mpu" +.Ft void +.Fn M6502_nmi "M6502 *mpu" +.Ft void +.Fn M6502_irq "M6502 *mpu" +.Ft uint16_t +.Fn M6502_getVector "M6502 *mpu" "vector" +.Ft uint16_t +.Fn M6502_setVector "M6502 *mpu" "vector" "uint16_t address" +.Ft M6502_Callback +.Fn M6502_getCallback "M6502 *mpu" "type" "uint16_t address" +.Ft M6502_Callback +.Fn M6502_setCallback "M6502 *mpu" "type" "uint16_t address" "M6502_Callback callback" +.Ft void +.Fn M6502_run "M6502 *mpu" +.Ft int +.Fn M6502_disassemble "M6502 *mpu" "uint16_t address" "char buffer[64]" +.Ft void +.Fn M6502_dump "M6502 *mpu" "char buffer[64]" +.Ft void +.Fn M6502_delete "M6502 *mpu" +.Ft void +.Fn M6502_setMode "M6502 *mpu" "M6502_Mode mode" "int arg" +.\" ---------------------------------------------------------------- +.Sh DESCRIPTION +.\" +.Fn M6502_new +creates an instance of a 6502 microprocessor. +.Fn M6502_reset , +.Fn M6502_nmi +and +.Fn M6502_irq +place it into the states associated with the hardware signals for +reset, non-maskable interrupt and interrupt request, respectively. +The macros +.Fn M6502_getVector +and +.Fn M6502_setVector +read and write the vectors through which the processor jumps in +response to the above signals. The macros +.Fn M6502_getCallback +and +.Fn M6502_setVector +read and write client-supplied functions that intercept accesses to +memory. +.Fn M6502_run +begins emulated execution. +.Fn M6502_dump +and +.Fn M6502_disassemble +create human-readable representations of processor or memory state. +.Fn M6502_delete +frees all resources associated with a processor instance. +.Fn M6502_setMode +specifies the emulation mode to use for a processor instance. Each of +these functions and macros is described in more detail below. +.Pp +.Fn M6502_new +returns a pointer to a +.Fa M6502 +structure containing at least the following members: +.Bd -literal +struct _M6502 +{ + M6502_Registers *registers; /* processor state */ + uint8_t *memory; /* memory image */ + M6502_Callbacks *callbacks; /* r/w/x/illegal callbacks */ +}; +.Ed +.Pp +These members are initialised according to the supplied +.Fa registers , +.Fa memory +and +.Fa callbacks +arguments. If a given argument is NULL, the corresponding member is +initialised automatically with a suitable (non-NULL) value. +.Pp +The members of +.Fa M6502 +are as follows: +.Bl -tag -width ".Fa callbacks" +.It Fa registers +the processor state, containing all registers and condition codes. +.It Fa memory +a block of at least 64 kilobytes of storage containing the processor's +memory. (An array type +.Vt M6502_Memory, +suitable for defining values to pass as the +.Fa memory +argument, is defined in the +.In lib6502.h +include file.) +.It Fa callbacks +a structure mapping processor memory accesses to client callback +functions. +.El +.Pp +Access to the contents of the +.Fa registers +and +.Fa memory +members can be made directly. +The +.Fa registers +member is a +.Vt M6502_Registers +containing the following members: +.Bd -literal +struct _M6502_Registers +{ + uint8_t a; /* accumulator */ + uint8_t x; /* X index register */ + uint8_t y; /* Y index register */ + uint8_t p; /* processor status register */ + uint8_t s; /* stack pointer */ + uint16_t pc; /* program counter */ +}; +.Ed +.Pp +The +.Fa memory +member is an array of +.Vt unsigned char +and can be indexed directly. In addition, two convenience macros +.Fn M6502_getVector +and +.Fn M6502_setVector +provide access to the reset and interrupt vectors within +.Fa memory . +.Fn M6502_getVector +returns the address stored in the named +.Fa vector +which must be precisely one of the following: +.Bl -tag -width ".Dv RST" -offset indent +.It Dv RST +the reset vector. +.It Dv NMI +the non-maskable interrupt vector. +.It Dv IRQ +the interrupt request vector. +.El +.Pp +.Fn M6502_setVector +stores its +.Fa address +argument in the named +.Fa vector +and returns the new value. +.Pp +The +.Fa callbacks +member contains an opaque structure mapping processor memory accesses +to client callback functions. Whenever the processor performs an +access for which a corresponding entry exists in the the +.Fa callbacks +structure, the emulator suspends execution and invokes the callback to +complete the operation. Each callback function should have a +signature equivalent to: +.Bd -ragged -offset indent +int +.Va callback +(M6502 *mpu, uint16_t address, uint8_t data); +.Ed +.Pp +The macros +.Fn M6502_getCallback +and +.Fn M6502_setCallback +read and write entries in the +.Fa callbacks +structure. These macros identify a unique memory access operation +from the specified +.Fa address +on which it operates and +.Fa type +of access involved. The +.Fa type +argument must be one of the following: +.Bl -tag -width ".Dv write" +.It Dv read +the +.Fa callback +is invoked when the processor attempts to read from the +given address. The emulator passes the effective address of the +operation to the callback in its +.Fa address +argument. (The +.Fa data +argument is undefined.) The value returned by the callback will be +used by the emulator as the result of the read operation. +.It Dv write +the +.Fa callback +is invoked when the processor attempts to write to the +given address. The emulator passes the effective address of the +operation to the callback in its +.Fa address +argument and the byte being written in the +.Fa data +argument. The emulator will not perform the write operation before +invoking the callback; if the write should complete, the callback must +modify the processor's +.Fa memory +explicitly. The valued returned from the callback is ignored. +.It Dv call +the +.Fa callback +is invoked when the processor attempts to transfer control to the +given address by any instruction other than a relative branch. The +emulator passes the destination address to the callback in its +.Fa address +argument and the instruction that initiated the control transfer in +its +.Fa data +argument (one of JMP, JSR, BRK, RTS or RTI). If the callback returns +zero (the callback refuses to handle the operation) the emulator will +allow the operation to complete as normal. If the callback returns a +non-zero address (indicating that the callback has handled the +operation internally) the emulator will transfer control to that +address. +.It Dv illegal_instruction +the +.Fa callback +is invoked when the processor attempts to execute the illegal instruction +whose opcode is the given "address". The emulator passes the address of the +instruction to the callback in its +.Fa address +argument and the instruction itself in the +.Fa data +argument. If the callback returns a non-zero address the +emulator will transfer control to that address, otherwise execution will +continue at the next instruction. +.El +.Pp +.Fn M6502_getCallback +returns zero if there is no callback associated with the given +.Fa type +and +.Fa address . +Passing zero as the +.Fa callback +argument of +.Fn M6502_setCallback +removes any callback that might have been associated with +.Fa type +and +.Fa address . +.Pp +.Fn M6502_run +emulates processor execution in the given +.Fa mpu +by repeatedly fetching the instruction addressed by +.Fa pc +and dispatching to it. This function normally never returns. +.Pp +.Fn M6502_dump +writes a (NUL-terminated) symbolic representation of the processor's +internal state into the supplied +.Fa buffer . +Typical output resembles: +.Bd -literal -offset indent +PC=1010 SP=01FE A=0A X=5B Y=00 P=D1 NV-B---C +.Ed +.Pp +.Fn M6502_disassemble +writes a (NUL-terminated) symbolic representation of the instruction +in the processor's memory at the given +.Fa address +into the supplied +.Fa buffer . +It returns the size (in bytes) of the instruction. (In other words, +the amount by which +.Fa address +should be incremented to arrive at the next instruction.) +Typical output resembles: +.Bd -literal -offset indent +1009 cpx #5B +.Ed +.Pp +(The +.Fa buffer +arguments are oversized to allow for future expansion.) +.Pp +.Fn M6502_delete +frees the resources associated with the given +.Fa mpu. +Any members that were allocated implicitly (passed as NULL to +.Fn M6502_new ) +are deallocated. Members that were initialised from non-NULL +arguments are not deallocated. +.Pp +.Fn M6502_setMode +is a lib6502-jit extension which sets the emulation mode to use for the +instance to +.Fa mode , +which must be precisely one of the following: +.Bl -tag -width ".Dv RST" -offset indent +.It Dv M6502_ModeInterpreted +6502 code will be interpreted, much as in lib6502 itself. +.It Dv M6502_ModeCompiled +6502 code will always be compiled to host code before executing. This can result +in jerky execution as emulation halts during compilation. Self-modifying code +will work correctly, but if this happens a lot the repeated re-compilations +will result in very slow execution. +.It Dv M6502_ModeHybrid +6502 code will be compiled to host code but the interpreter will be used to +continue execution during compilation. Execution will be smooth and relatively +fast but performance of repeatedly executed code will vary (in theory, improve) +over time. Repeated self-modification by code will cause re-compilations but +performance will still be reasonable as the interpreter will continue execution; +the main downside is that CPU will be taken up by the compilation. (On a +machine with two or more idle cores, this is wasteful but should not +significantly harm performance, as one core will run the interpreter while the +other handles the compilation.) This is the default mode. +.El +.Pp +.Fa arg +is the maximum number of 6502 instructions to be compiled into a single unit +of code when hybrid or compiled mode is selected; it is ignored in interpreted +mode. Specifying 0 will give a reasonable default value. +.Pp +.\" ---------------------------------------------------------------- +.Sh IMPLEMENTATION NOTES +.\" +You can share the +.Fa memory +and +.Fa callbacks +members of +.Vt M6502 +between multiple instances to simulate multiprocessor hardware. +.\" ---------------------------------------------------------------- +.Sh RETURN VALUES +.\" +.Fn M6502_new +returns a pointer to a +.Vt M6502 +structure. +.Fn M6502_getVector +and +.Fn M6502_setVector +return the contents of the given +.Fa vector . +.Fn M6502_getCallback +and +.Fn M6502_setCallback +return the +.Vt M6502_Callback +function associated with the given +.Fa address +and access +.Fa type . +.Fn M6502_disassemble +returns the size (in bytes) of the instruction at the given +.Fa address . +.Fn M6502_reset , +.Fn M6502_nmi , +.Fn M6502_irq , +.Fn M6502_run , +.Fn M6502_dump, +.Fn M6502_delete +and +.Fn M6502_setMode +don't return anything (unless you forgot to include +.In lib6502.h ) . +.\" ---------------------------------------------------------------- +.Sh EXAMPLES +.\" +The following program creates a 6502 processor, sets up callbacks for +printing characters and halting after a BRK instruction, stores a +program into memory that prints the alphabet, disassembles the program +on stdout, and then executes the program. +.Bd -literal -offset indent -compact + +#include +#include +#include + +#include "lib6502.h" + +#define WRCH 0xFFEE + +int wrch(M6502 *mpu, uint16_t address, uint8_t data) +{ + int pc; + putchar(mpu->registers->a); + pc = mpu->memory[++mpu->registers->s + 0x100]; + pc |= mpu->memory[++mpu->registers->s + 0x100] << 8; + return pc + 1; /* JSR pushes next insn addr - 1 */ +} + +int done(M6502 *mpu, uint16_t address, uint8_t data) +{ + char buffer[64]; + M6502_dump(mpu, buffer); + printf("\\nBRK instruction\\n%s\\n", buffer); + exit(0); +} + +int main(int argc, char **argv) +{ + M6502 *mpu = M6502_new(0, 0, 0); + unsigned pc = 0x1000; + + mpu->callbacks->call[WRCH] = wrch; /* write character */ + mpu->callbacks->call[0000] = done; /* reached after BRK */ + +# define gen1(X) (mpu->memory[pc++] = (uint8_t)(X)) +# define gen2(X,Y) gen1(X); gen1(Y) +# define gen3(X,Y,Z) gen1(X); gen2(Y,Z) + + gen2(0xA2, 'A' ); /* LDX #'A' */ + gen1(0x8A ); /* TXA */ + gen3(0x20,0xEE,0xFF); /* JSR FFEE */ + gen1(0xE8 ); /* INX */ + gen2(0xE0, 'Z'+1 ); /* CPX #'Z'+1 */ + gen2(0xD0, -9 ); /* BNE 1002 */ + gen2(0xA9, '\\n' ); /* LDA #'\\n' */ + gen3(0x20,0xEE,0xFF); /* JSR FFEE */ + gen2(0x00,0x00 ); /* BRK */ + + { + uint16_t ip = 0x1000; + while (ip < pc) + { + char insn[64]; + ip += M6502_disassemble(mpu, ip, insn); + printf("%04X %s\\n", ip, insn); + } + } + + M6502_setVector(mpu, RST, 0x1000); + + M6502_reset(mpu); + M6502_run(mpu); + M6502_delete(mpu); + + return 0; +} +.Ed +.\" ---------------------------------------------------------------- +.Sh DIAGNOSTICS +.\" +If +.Fn M6502_new +cannot allocate sufficient memory it prints "out of memory" to stderr +and exits with a non-zero status. +.Pp +If +.Fn M6502_run +encounters an illegal or undefined instruction, it prints "undefined +instruction" and the processor's state to stderr, then exits with a +non-zero status. +.\" ---------------------------------------------------------------- +.Sh COMPATIBILITY +.\" +M6502 is a generic name. The initial letter is mandated by C naming +conventions and chosen in deference to MOS Technology, the original +designers of the processor. To the best of my knowledge the 'M' +prefix was never stamped on a physical 6502. +.Pp +The emulator implements the CMOS version of the processor (NMOS bugs +in effective address calculations involving page boundaries are +corrected). lib6502 does not tolerate the execution of undefined +instructions (which were all no-ops in the first-generation CMOS +hardware); lib6502-jit treats them as no-ops. It would be nice to +support the several alternative instruction sets (model-specific +undocumented instructions in NMOS models, and various documented +extensions in the later CMOS models) but there are currently no plans +to do so. +.Pp +The emulated 6502 will run much faster than real hardware on any +modern computer. The fastest 6502 hardware available at the time of +writing has a clock speed of 14 MHz. On a 2 GHz PowerPC, the emulated +6502 runs at almost 300 MHz (in interpreted mode). +.\" ---------------------------------------------------------------- +.Sh SEE ALSO +.\" +.Xr run6502 1 +.Pp +For development tools, documentation and source code: +.Pa http://6502.org +.\" ---------------------------------------------------------------- +.Sh AUTHORS +.\" +The original lib6502 software and manual pages were written by Ian Piumarta. +Additional changes to create lib6502-jit were made by Steven Flintham. +.Pp +The software is provided as-is, with absolutely no warranty, in the +hope that you will enjoy and benefit from it. You may use (entirely +at your own risk) and redistribute it under the terms of a very +liberal license that does not seek to restrict your rights in any way +(unlike certain so-called 'open source' licenses that significantly +limit your freedom in the name of 'free' software that is, ultimately, +anything but free). See the file COPYING for details. +.\" ---------------------------------------------------------------- +.Sh BUGS +.\" +.Fn M6502_getVector +and +.Fn M6502_setVector +evaluate their arguments more than once. +.Pp +The out-of-memory condition and attempted execution of +illegal/undefined instructions should not be fatal errors. +.Pp +There is no way to limit the duration of execution within +.Fn M6502_run +to a certain number of instructions or cycles. +.Pp +The emulator should support some means of implicit interrupt +generation, either by polling or in response to (Unix) signals. +.Pp +The +.Sx COMPATIBILITY +section in this manual page has been diverted from its legitimate +purpose. +.Pp +The plural of 'callback' really aught to be 'callsback'. +.Pp +Please send bug reports (and feature requests) to : +lib6502-jit@lemma.co.uk. diff --git a/man/run6502.1 b/man/run6502.1 new file mode 100644 index 0000000..98f761f --- /dev/null +++ b/man/run6502.1 @@ -0,0 +1,396 @@ +.\" Copyright (c) 2005 Ian Piumarta +.\" Copyright (c) 2014 Steven Flintham +.\" +.\" Permission is hereby granted, free of charge, to any person +.\" obtaining a copy of this software and associated documentation +.\" files (the 'Software'), to deal in the Software without +.\" restriction, including without limitation the rights to use, copy, +.\" modify, merge, publish, distribute, and/or sell copies of the +.\" Software, and to permit persons to whom the Software is furnished +.\" to do so, provided that the above copyright notice(s) and this +.\" permission notice appear in all copies of the Software and that +.\" both the above copyright notice(s) and this permission notice +.\" appear in supporting documentation. +.\" +.\" THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. +.\" +.Dd October 31, 2005 +.Dt RUN6502 1 LOCAL +.Os "" +.\" ---------------------------------------------------------------- +.Sh NAME +.\" +.Nm run6502 +.Nd execute a 6502 microprocessor program +.\" ---------------------------------------------------------------- +.Sh SYNOPSIS +.\" +.Nm run6502 +.Op Ar option ... +.Nm run6502 +.Op Ar option ... +.Fl B +.Op Ar +.\" ---------------------------------------------------------------- +.Sh DESCRIPTION +The +.Nm run6502 +command emulates the execution of a 6502 microprocessor. It creates a +memory image from the contents of one or more files on the command +line and then simulates a power-on hardware reset to begin execution. +.Pp +In its first form, +.Nm run6502 +emulates an embedded 6502 processor with 64 kilobytes of RAM, no +memory-mapped hardware, and no input-output capabilities. Limited +interaction with the machine is possible only through the +.Fl G , M +and +.Fl P +options. +.Pp +In its second form (with the +.Fl B +option) +.Nm run6502 +provides minimal emulation of Acorn 'BBC Model B' hardware with 32 +kilobytes of RAM, 16 kilobytes of paged language ROMs, and 16 +kilobytes of operating system ROM. A few MOS calls are intercepted to +provide keyboard input and screen output via stdin and stdout. +Switching between the sixteen paged read-only memory banks is also +supported by the usual memory-mapped control register. Any +.Ar file +arguments after the +.Fl B +are loaded into successive paged ROM banks (starting at 15 and working +down towards 0) before execution begins. +.\" ---------------------------------------------------------------- +.Ss Options +.\" +.Bl -tag -width indent +.It Fl B +enable minimal Acorn 'BBC Model B' hardware emulation: +.Bl -bullet +.It +the contents of memory between addresses 0x8000 and 0xBFFF are copied +into paged ROM number 0; +.It +memory between 0x8000 and 0xBFFF becomes bank-switchable between +sixteen different ROM images; +.It +the memory-mapped pages ('FRED', 'JIM' and 'SHEILA') between 0xFC00 +and 0xFEFF are initialised to harmless values; +.It +the upper half of the address space is write-protected; and +.It +callbacks are installed on several OS entry points to provide +input-output via stdin and stdout. +.El +.Pp +Any remaining non-option arguments on the command line will name files +to be loaded successively into paged ROMs, starting at 15 and working +downwards towards 0. +.It Fl d Ar addr Ar end +dump memory from the address +.Ar addr +(given in hexadecimal) up to (but not including) +.Ar end . +The +.Ar end +argument is either an absolute address or a relative address specified +as a '+' character followed by the number (in hexadecimal) of bytes to +dump. In other words, the following two options dump the same region +of memory: +.Bd -ragged -offset indent +.Fl d +8000 C000 +.Ed +.Bd -ragged -offset indent -compact +.Fl d +8000 +4000 +.Ed +.Pp +The format of the dump cannot currently be modified and consists of +the current address followed by one, two or three hexadecimal bytes, +and a symbolic representation of the instruction at that address. +.It Fl G Ar addr +arrange that subroutine calls to +.Ar addr +will behave as if there were an implementation of +.Xr getchar 3 +at that address, reading a character from stdin and returning it in +the accumulator. +.It Fl h +print a summary of the available options and then exit. +.It Fl I Ar addr +set the IRQ (interrupt request) vector (the address to which the +processor will transfer control upon execution of a BRK instruction). +Setting this address to zero will cause execution to halt (and the +emulator to exit) when a BRK instruction is encountered. +.It Fl i Ar addr Ar file +Load +.Ar file +into the memory image at the address +.Ar addr +(in hexadecimal), skipping over any initial '#!' interpreter line. +.It Fl l Ar addr Ar file +Load +.Ar file +into the memory image at the address +.Ar addr +(in hexadecimal). +.It Fl M Ar addrio +arrange that memory reads from address +.Ar addrio +will return the next character on stdin (blocking if necessary), and +memory writes to +.Ar addrio +will send the value written to stdout. +.It Fl mc +use compiled emulation mode. All code is compiled into host machine +code. This can make the emulation very jerky as execution halts +while compiling. +.It Fl mh +use hybrid emulation mode. Code is compiled into +host machine code, but while this is happening an interpreter allows +execution to continue. This is the default mode. +.It Fl mi +use interpreted emulation mode. All code is interpreted. +.It Fl mx Ar count +in compiled and hybrid emulation modes, set the maximum number of +6502 instructions which are translated as a unit to +.Ar count . +This has no effect in interpreted mode. A reasonable default is +chosen if this is not specified. +.It Fl N Ar addr +set the NMI (non-maskable interrupt) vector to +.Ar addr . +.It Fl P Ar addr +arrange that subroutine calls to +.Ar addr +will behave as if there were an implementation of +.Xr putchar 3 +at that address, writing the contents of the accumulator to stdout. +.It Fl R Ar addr +set the RST (hardware reset) vector. The processor will transfer +control to this address when emulated execution begins. +.It Fl s Ar addr Ar end Ar file +save the contents of memory from the address +.Ar addr +up to +.Ar end +(exclusive) to the given +.Ar file . +As with the +.Fl d +option, +.Ar end +can be absolute or '+' followed by a byte count. +.It Fl v +print version information and then exit. +.It Fl X Ar addr +arrange that any transfer of control to the address +.Ar addr +will cause an immediate exit with zero exit status. +.It Fl x +exit immediately. (Useful after +.Fl d +or when +.Nm run6502 +is being used as a trivial 'image editor', with several +.Fl l +options followed by +.Fl s +and +.Fl x . ) +.It Ar +following a +.Fl B +option, load one or more ROM image +files +into successive paged ROM slots. Other than the paging aspect, this +is equivalent to: +.Bd -ragged -offset indent +.Fl l Ar 8000 Ar image +.Ed +.El +.\" ---------------------------------------------------------------- +.Sh EXAMPLES +.\" +.Ss A Very Simple Program +The +.Xr perl 1 +command can be used to create a binary file from hexadecimal input: +.Bd -literal + echo a2418a20eeffe8e05bd0f7a90a20eeff00 | + perl -e 'print pack "H*",' > temp.img +.Ed +.Pp +The file can be loaded and executed with: +.Bd -literal + run6502 -l 1000 temp.img -R 1000 -P FFEE -X 0 +.Ed +.Pp +The contents of the file can be inspected symbolically with: +.Bd -literal + run6502 -l 1000 temp.img -d 1000 +12 +.Ed +.Pp +The options passed to +.Nm run6502 +in the above examples have the following effects: +.Bl -tag -width offset +.It \-l 1000 temp.img +loads the file +.Pa temp.img +into memory at address 0x8000. +.It \-R 1000 +sets the reset vector (the address of first instruction to be executed +after 'power on') to 0x1000. +.It \-P FFEE +arranges for calls to address 0xFFEE to behave as if there were an +implementation of +.Xr putchar 3 +at that address. +.It \-X 0 +arranges for transfers of control to address 0 to exit from the +emulator. This works in the above example because the final 'BRK' +instruction causes an implicit subroutine call through an +uninitialised interrupt vector to location 0. To see this +instruction... +.It \-d 1000 +12 +disassembles 18 bytes of memory at address 0x8000. +.El +.Ss Standalone Images +The +.Fl i +option is designed for use in the 'interpreter command' appearing on +the first line of an executable script. Adding the line +.Bd -literal + #!run6502 -R 1000 -P FFEE -X 0 -i 1000 +.Ed +.Pp +(with no leading spaces and a single trailing newline character) +to the +.Pa temp.img +file from the first example turns it into a script. If the file is +made executable with +.Bd -literal + chmod +x temp.img +.Ed +.Pp +it can be run like a standalone program: +.Bd -literal + ./temp.img +.Ed +.Ss A Very Complex Program +Consider a pair of files named +.Pa os1.2 +and +.Pa basic2 +containing (legally-acquired, of course) ROM images of Acorn MOS 1.2 +and BBC Basic 2. The following command loads each of the images into +memory at the appropriate address, cleans up the regions of memory +containing memory-mapped i/o on the BBC computer, saves a snapshot of +the entire memory to the file +.Pa image +and then exits: +.Bd -literal + run6502 -l C000 os1.2 -l 8000 basic2 -B -s0 +10000 image -x +.Ed +.Pp +Running the generated image with +.Bd -literal + run6502 image +.Ed +.Pp +will cold-start the emulated hardware, run the OS for a while, and +then drop into the language ROM. Basic programs can then be entered, +edited and run from the terminal. +.Pp +More details are given in the +.Pa README +file available in the +.Pa examples +directory of the distribution. +.Ss Exercises +Create a standalone image (one that can be run as a program, with +a '#!' interpreter line at the beginning) that contains Basic2 and +OS1.2 (as described above). This image should be no larger than 32K +(memory below 0x8000, which would be full of zeroes, should not appear +in the image file). +.\" ---------------------------------------------------------------- +.Sh DIAGNOSTICS +.\" +If nothing goes wrong, none. Otherwise lots. They should be +self-explanatory. I'm too lazy to enumerate them. +.\" ---------------------------------------------------------------- +.Sh COMPATIBILITY +.\" +See +.Xr lib6502 3 +for a discussion of the emulated instruction set. +.\" ---------------------------------------------------------------- +.Sh SEE ALSO +.\" +.Xr lib6502 3 +.Pp +The file +.Pa examples/README +in the lib6502 distribution. (Depending on your system this may be +installed in +.Pa /usr/doc/lib6502 , +.Pa /usr/local/doc/lib6502 , +.Pa /usr/share/doc/lib6502 , +or similar.) +.Pp +.Pa http://piumarta.com/software/lib6502 +for updates and documentation to lib6502. +.Pp +.Pa https://github.com/ZornsLemma/lib6502-jit +for updates and documentation to lib6502-jit. +.Pp +.Pa http://6502.org +for lots of 6502-related resources. +.\" ---------------------------------------------------------------- +.Sh AUTHORS +.\" +The original lib6502 software and manual pages were written by Ian Piumarta. +Additional changes to create lib6502-jit were made by Steven Flintham. +.Pp +The software is provided as-is, with absolutely no warranty, in the +hope that you will enjoy and benefit from it. You may use (entirely +at your own risk) and redistribute it under the terms of a very +liberal license that does not seek to restrict your rights in any way +(unlike certain so-called 'open source' licenses that significantly +limit your freedom in the name of 'free' software that is, ultimately, +anything but free). See the file COPYING for details. +.\" ---------------------------------------------------------------- +.Sh BUGS +.\" +.Bl -bullet +.It +Options must appear one at a time. +.It +Any attempt (in a load or save operation) to transfer data beyond +0xFFFF is silently truncated at the end of memory. +.It +There is no way to specify the slot into which a ROM image should be +loaded, other than implicitly according to the order of arguments on +the command line. +.It +Execution can only be started via the emulated power-up reset. There +is no support for 'warm-starting' execution in an image at an +arbitrary address. +.It +Even though the emulator fully supports them, there is no way to +artificially generate a hardware interrupt request, non-maskable +interrupt, or reset condition. If you need these, read +.Xr lib6502 3 +and write your own shell. +.It +The Acorn 'BBC Model B' hardware emulation is totally lame. +.El +.Pp +Please send bug reports (and feature requests) to : +lib6502-jit@lemma.co.uk. diff --git a/run6502.c b/run6502.c new file mode 100644 index 0000000..2e3731a --- /dev/null +++ b/run6502.c @@ -0,0 +1,599 @@ +/* run6502.c -- 6502 emulator shell -*- C -*- */ + +/* Copyright (c) 2005 Ian Piumarta + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +/* Last edited: 2005-11-02 01:18:58 by piumarta on margaux.local + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "config.h" +#include "lib6502.h" + +#undef VERSION +#define VERSION PACKAGE_NAME " " PACKAGE_VERSION " " PACKAGE_COPYRIGHT + +typedef uint8_t byte; +typedef uint16_t word; + +static char *program= 0; + +static M6502_Mode mode= M6502_ModeHybrid; +static int max_insns= 0; /* default */ + +static byte bank[0x10][0x4000]; + +static uint64_t system_time_base; + + +void fail(const char *fmt, ...) +{ + va_list ap; + fflush(stdout); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fprintf(stderr, "\n"); + exit(1); +} + + +void pfail(const char *msg) +{ + fflush(stdout); + perror(msg); + exit(1); +} + + +#define rts \ + { \ + word pc; \ + pc = mpu->memory[++mpu->registers->s + 0x100]; \ + pc |= mpu->memory[++mpu->registers->s + 0x100] << 8; \ + return pc + 1; \ + } + + +uint64_t pseudo_system_time(void) +{ + struct timespec t; + if (clock_gettime(CLOCK_MONOTONIC, &t) == -1) + { + pfail("clock_gettime() failed"); + } + long hsec= t.tv_nsec / 10000000; + return (((uint64_t) t.tv_sec) * 100) + hsec; +} + +int osword(M6502 *mpu, word address, byte data) +{ + byte *params= mpu->memory + mpu->registers->x + (mpu->registers->y << 8); + + switch (mpu->registers->a) + { + case 0x00: /* input line */ + /* On entry: XY+0,1=>string area, + * XY+2=maximum line length, + * XY+3=minimum acceptable ASCII value, + * XY+4=maximum acceptable ASCII value. + * On exit: Y is the line length (excluding CR), + * C is set if Escape terminated input. + */ + { + word offset= params[0] + (params[1] << 8); + byte *buffer= mpu->memory + offset; + byte length= params[2], minVal= params[3], maxVal= params[4], b= 0; + if (!fgets((char *) buffer, length, stdin)) + { + putchar('\n'); + exit(0); + } + for (b= 0; b < length; ++b) + if ((buffer[b] < minVal) || (buffer[b] > maxVal) || ('\n' == buffer[b])) + break; + buffer[b]= 13; + mpu->registers->y= b; + mpu->registers->p &= 0xFE; + break; + } + + case 0x01: /* read system time */ + /* On exit: XY+0..4=>5 byte time in hundredths of a second + */ + { + uint64_t system_time= pseudo_system_time() - system_time_base; + int i; + for (i= 0; i < 5; ++i) + { + params[i]= system_time & 0xFF; + system_time>>= 8; + } + break; + } + + case 0x05: /* read I/O processor memory */ + /* On entry: XY+0..3=>address to read from + * On exit: XY+4 =>the byte read + */ + { + word addr= params[0] + (params[1] << 8); + params[4]= mpu->memory[addr]; + break; + } + + default: + { + char state[64]; + M6502_dump(mpu, state); + fflush(stdout); + fprintf(stderr, "\nOSWORD %s\n", state); + fail("ABORT"); + } + break; + } + + rts; +} + + +int osbyte(M6502 *mpu, word address, byte data) +{ + switch (mpu->registers->a) + { + case 0x7A: /* perform keyboard scan */ + mpu->registers->x= 0x00; + break; + + case 0x7E: /* acknowledge detection of escape condition */ + return 1; + break; + + case 0x82: /* read machine higher order address */ + mpu->registers->y= 0x00; + mpu->registers->x= 0x00; + break; + + case 0x83: /* read top of OS ram address (OSHWM) */ + mpu->registers->y= 0x0E; + mpu->registers->x= 0x00; + break; + + case 0x84: /* read bottom of display ram address */ + mpu->registers->y= 0x80; + mpu->registers->x= 0x00; + break; + + case 0x89: /* motor control */ + break; + + case 0xDA: /* read/write number of items in vdu queue (stored at 0x026A) */ + return 0; + break; + + default: + { + char state[64]; + M6502_dump(mpu, state); + fflush(stdout); + fprintf(stderr, "\nOSBYTE %s\n", state); + fail("ABORT"); + } + break; + } + + rts; +} + + +int oscli(M6502 *mpu, word address, byte data) +{ + byte *params= mpu->memory + mpu->registers->x + (mpu->registers->y << 8); + char command[1024], *ptr= command; + int ret; + while (('*' == *params) || (' ' == *params)) + ++params; + while (13 != *params) + *ptr++= *params++; + *ptr= '\0'; + ret= system(command); + if ((ret == -1) || (WIFEXITED(ret) && (WEXITSTATUS(ret) == 127))) + { + fflush(stdout); + fprintf(stderr, "\nsystem() failed\n"); + } + rts; +} + + +int oswrch(M6502 *mpu, word address, byte data) +{ + switch (mpu->registers->a) + { + case 0x0C: + fputs("\033[2J\033[H", stdout); + break; + + default: + putchar(mpu->registers->a); + break; + } + fflush(stdout); + rts; +} + + +static int writeROM(M6502 *mpu, word address, byte value) +{ + return 0; +} + + +static int bankSelect(M6502 *mpu, word address, byte value) +{ + memcpy(mpu->memory + 0x8000, bank[value & 0x0F], 0x4000); + return 0; +} + + +static int doBtraps(int argc, char **argv, M6502 *mpu) +{ + unsigned addr; + + /* Acorn Model B ROM and memory-mapped IO */ + + for (addr= 0x8000; addr <= 0xFBFF; ++addr) mpu->callbacks->write[addr]= writeROM; + for (addr= 0xFC00; addr <= 0xFEFF; ++addr) mpu->memory[addr]= 0xFF; + for (addr= 0xFE30; addr <= 0xFE33; ++addr) mpu->callbacks->write[addr]= bankSelect; + for (addr= 0xFE40; addr <= 0xFE4F; ++addr) mpu->memory[addr]= 0x00; + for (addr= 0xFF00; addr <= 0xFFFF; ++addr) mpu->callbacks->write[addr]= writeROM; + + /* anything already loaded at 0x8000 appears in bank 0 */ + + memcpy(bank[0x00], mpu->memory + 0x8000, 0x4000); + + /* fake a few interesting OS calls */ + +# define trap(vec, addr, func) mpu->callbacks->call[addr]= (func) + trap(0x020C, 0xFFF1, osword); + trap(0x020A, 0xFFF4, osbyte); +//trap(0x0208, 0xFFF7, oscli ); /* enable this to send '*COMMAND's to system(3) :-) */ + trap(0x020E, 0xFFEE, oswrch); + trap(0x020E, 0xE0A4, oswrch); /* NVWRCH */ +#undef trap + + system_time_base= pseudo_system_time(); + + return 0; +} + + +static void usage(int status) +{ + FILE *stream= status ? stderr : stdout; + fprintf(stream, VERSION"\n"); + fprintf(stream, "please send bug reports to: %s\n", PACKAGE_BUGREPORT); + fprintf(stream, "\n"); + fprintf(stream, "usage: %s [option ...]\n", program); + fprintf(stream, " %s [option ...] -B [image ...]\n", program); + fprintf(stream, " -B -- minimal Acorn 'BBC Model B' compatibility\n"); + fprintf(stream, " -d addr last -- dump memory between addr and last\n"); + fprintf(stream, " -G addr -- emulate getchar(3) at addr\n"); + fprintf(stream, " -h -- help (print this message)\n"); + fprintf(stream, " -I addr -- set IRQ vector\n"); + fprintf(stream, " -l addr file -- load file at addr\n"); + fprintf(stream, " -M addr -- emulate memory-mapped stdio at addr\n"); + fprintf(stream, " -mc -- use compiled emulation mode\n"); + fprintf(stream, " -mh -- use hybrid emulation mode (default)\n"); + fprintf(stream, " -mi -- use interpreted emulation mode\n"); + fprintf(stream, " -mx count -- maximum instructions to JIT (-mc/-mh)\n"); + fprintf(stream, " -N addr -- set NMI vector\n"); + fprintf(stream, " -P addr -- emulate putchar(3) at addr\n"); + fprintf(stream, " -R addr -- set RST vector\n"); + fprintf(stream, " -s addr last file -- save memory from addr to last in file\n"); + fprintf(stream, " -v -- print version number then exit\n"); + fprintf(stream, " -X addr -- terminate emulation if PC reaches addr\n"); + fprintf(stream, " -x -- exit without further ado\n"); + fprintf(stream, " image -- '-l 8000 image' in available ROM slot\n"); + fprintf(stream, "\n"); + fprintf(stream, "'last' can be an address (non-inclusive) or '+size' (in bytes)\n"); + exit(status); +} + + +static int doHelp(int argc, char **argv, M6502 *mpu) +{ + usage(0); + return 0; +} + + +static int doVersion(int argc, char **argv, M6502 *mpu) +{ + puts(VERSION); + exit(0); + return 0; +} + + +static unsigned long htol(char *hex) +{ + char *end; + unsigned long l= strtol(hex, &end, 16); + if (*end) fail("bad hex number: %s", hex); + return l; +} + + +static int loadInterpreter(M6502 *mpu, word start, const char *path) +{ + FILE *file= 0; + int count= 0; + byte *memory= mpu->memory + start; + size_t max= 0x10000 - start; + int c= 0; + + if ((!(file= fopen(path, "r"))) || ('#' != fgetc(file)) || ('!' != fgetc(file))) + return 0; + while ((c= fgetc(file)) >= ' ') + ; + while ((count= fread(memory, 1, max, file)) > 0) + { + memory += count; + max -= count; + } + fclose(file); + return 1; +} + + +static int save(M6502 *mpu, word address, unsigned length, const char *path) +{ + FILE *file= 0; + int count= 0; + if (!(file= fopen(path, "w"))) + return 0; + while ((count= fwrite(mpu->memory + address, 1, length, file))) + { + address += count; + length -= count; + } + fclose(file); + return 1; +} + + +static int load(M6502 *mpu, word address, const char *path) +{ + FILE *file= 0; + int count= 0; + size_t max= 0x10000 - address; + if (!(file= fopen(path, "r"))) + return 0; + while ((count= fread(mpu->memory + address, 1, max, file)) > 0) + { + address += count; + max -= count; + } + fclose(file); + return 1; +} + + +static int doLoadInterpreter(int argc, char **argv, M6502 *mpu) +{ + if (argc < 3) usage(1); + if (!loadInterpreter(mpu, htol(argv[1]), argv[2])) pfail(argv[2]); + return 2; +} + + +static int doLoad(int argc, char **argv, M6502 *mpu) /* -l addr file */ +{ + if (argc < 3) usage(1); + if (!load(mpu, htol(argv[1]), argv[2])) pfail(argv[2]); + return 2; +} + + +static int doSave(int argc, char **argv, M6502 *mpu) /* -l addr size file */ +{ + if (argc < 4) usage(1); + if (!save(mpu, htol(argv[1]), htol(argv[2]), argv[3])) pfail(argv[3]); + return 3; +} + + +static int doMode(M6502_Mode m) +{ + mode= m; + return 0; +} + + +static int doMaxInsns(int argc, char **argv, M6502 *mpu) +{ + if (argc < 2) usage(1); + char *end; + unsigned long l= strtol(argv[1], &end, 10); + if (*end) fail("bad number: %s", argv[1]); + max_insns= l; + return 1; +} + + +#define doVEC(VEC) \ + static int do##VEC(int argc, char **argv, M6502 *mpu) \ + { \ + unsigned addr= 0; \ + if (argc < 2) usage(1); \ + addr= htol(argv[1]); \ + M6502_setVector(mpu, VEC, addr); \ + return 1; \ + } + +doVEC(IRQ); +doVEC(NMI); +doVEC(RST); + +#undef doVEC + + +static int gTrap(M6502 *mpu, word addr, byte data) { mpu->registers->a= getchar(); rts; } +static int pTrap(M6502 *mpu, word addr, byte data) { putchar(mpu->registers->a); rts; } + +static int doGtrap(int argc, char **argv, M6502 *mpu) +{ + unsigned addr; + if (argc < 2) usage(1); + addr= htol(argv[1]); + M6502_setCallback(mpu, call, addr, gTrap); + return 1; +} + +static int doPtrap(int argc, char **argv, M6502 *mpu) +{ + unsigned addr; + if (argc < 2) usage(1); + addr= htol(argv[1]); + M6502_setCallback(mpu, call, addr, pTrap); + return 1; +} + + +static int mTrapRead(M6502 *mpu, word addr, byte data) { return getchar(); } +static int mTrapWrite(M6502 *mpu, word addr, byte data) { return putchar(data); } + +static int doMtrap(int argc, char **argv, M6502 *mpu) +{ + unsigned addr= 0; + if (argc < 2) usage(1); + addr= htol(argv[1]); + M6502_setCallback(mpu, read, addr, mTrapRead); + M6502_setCallback(mpu, write, addr, mTrapWrite); + return 1; +} + + +static int xTrap(M6502 *mpu, word addr, byte data) { exit(0); return 0; } + +static int doXtrap(int argc, char **argv, M6502 *mpu) +{ + unsigned addr= 0; + if (argc < 2) usage(1); + addr= htol(argv[1]); + M6502_setCallback(mpu, call, addr, xTrap); + return 1; +} + + +static int doDisassemble(int argc, char **argv, M6502 *mpu) +{ + unsigned addr= 0, last= 0; + if (argc < 3) usage(1); + addr= htol(argv[1]); + last= ('+' == *argv[2]) ? addr + htol(1 + argv[2]) : htol(argv[2]); + while (addr < last) + { + char insn[64]; + int i= 0, size= M6502_disassemble(mpu, addr, insn); + printf("%04X ", addr); + while (i++ < size) printf("%02X", mpu->memory[addr + i - 1]); + while (i++ < 4) printf(" "); + putchar(' '); + i= 0; + while (i++ < size) putchar(isgraph(mpu->memory[addr + i - 1]) ? mpu->memory[addr + i - 1] : ' '); + while (i++ < 4) putchar(' '); + printf(" %s\n", insn); + addr += size; + } + return 2; +} + + +int main(int argc, char **argv) +{ + M6502 *mpu= M6502_new(0, 0, 0); + int bTraps= 0; + + program= argv[0]; + + if ((2 == argc) && ('-' != *argv[1])) + { + if ((!loadInterpreter(mpu, 0, argv[1])) && (!load(mpu, 0, argv[1]))) + pfail(argv[1]); + doBtraps(0, 0, mpu); + } + else + while (++argv, --argc > 0) + { + int n= 0; + if (!strcmp(*argv, "-B")) bTraps= 1; + else if (!strcmp(*argv, "-d")) n= doDisassemble(argc, argv, mpu); + else if (!strcmp(*argv, "-G")) n= doGtrap(argc, argv, mpu); + else if (!strcmp(*argv, "-h")) n= doHelp(argc, argv, mpu); + else if (!strcmp(*argv, "-i")) n= doLoadInterpreter(argc, argv, mpu); + else if (!strcmp(*argv, "-I")) n= doIRQ(argc, argv, mpu); + else if (!strcmp(*argv, "-l")) n= doLoad(argc, argv, mpu); + else if (!strcmp(*argv, "-M")) n= doMtrap(argc, argv, mpu); + else if (!strcmp(*argv, "-mc")) n= doMode(M6502_ModeCompiled); + else if (!strcmp(*argv, "-mh")) n= doMode(M6502_ModeHybrid); + else if (!strcmp(*argv, "-mi")) n= doMode(M6502_ModeInterpreted); + else if (!strcmp(*argv, "-mx")) n= doMaxInsns(argc, argv, mpu); + else if (!strcmp(*argv, "-N")) n= doNMI(argc, argv, mpu); + else if (!strcmp(*argv, "-P")) n= doPtrap(argc, argv, mpu); + else if (!strcmp(*argv, "-R")) n= doRST(argc, argv, mpu); + else if (!strcmp(*argv, "-s")) n= doSave(argc, argv, mpu); + else if (!strcmp(*argv, "-v")) n= doVersion(argc, argv, mpu); + else if (!strcmp(*argv, "-X")) n= doXtrap(argc, argv, mpu); + else if (!strcmp(*argv, "-x")) exit(0); + else if ('-' == **argv) usage(1); + else + { + /* doBtraps() left 0x8000+0x4000 in bank 0, so load */ + /* additional images starting at 15 and work down */ + static int bankSel= 0x0F; + if (!bTraps) usage(1); + if (bankSel < 0) fail("too many images"); + if (!load(mpu, 0x8000, argv[0])) pfail(argv[0]); + memcpy(bank[bankSel--], + 0x8000 + mpu->memory, + 0x4000); + n= 0; + } + argc -= n; + argv += n; + } + + M6502_setMode(mpu, mode, max_insns); + + if (bTraps) + doBtraps(0, 0, mpu); + + M6502_reset(mpu); + M6502_run(mpu); + M6502_delete(mpu); + + return 0; +} diff --git a/test/addr-wrap-1.mst b/test/addr-wrap-1.mst new file mode 100644 index 0000000..24de910 --- /dev/null +++ b/test/addr-wrap-1.mst @@ -0,0 +1 @@ +Y \ No newline at end of file diff --git a/test/addr-wrap-1.xa b/test/addr-wrap-1.xa new file mode 100644 index 0000000..a49e9d4 --- /dev/null +++ b/test/addr-wrap-1.xa @@ -0,0 +1,25 @@ +#include "config.xa" + + LDA #1 + STA $00 + STA $05 + STA $0A + LDY #$80 + CLC + LDA #0 +LOOP + ADC $FF80,Y + INY + BNE LOOP + CMP #3 + BNE FAIL + +SUCCESS + LDA #'Y' + JSR OSWRCH + JMP QUIT + +FAIL + LDA #'N' + JSR OSWRCH + JMP QUIT diff --git a/test/basic-callback.c b/test/basic-callback.c new file mode 100644 index 0000000..d2ffb27 --- /dev/null +++ b/test/basic-callback.c @@ -0,0 +1,122 @@ +/* Copyright (c) 2005 Ian Piumarta + * Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#include +#include + +#include "lib6502.h" +#include "test-utils.h" + +int done(M6502 *mpu, uint16_t address, uint8_t data) +{ + char buffer[64]; + M6502_dump_masked(mpu, buffer); + printf("\nBRK instruction: address %04X opcode %02X\n%s\n", address, data, buffer); + exit(0); +} + +int call(M6502 *mpu, uint16_t address, uint8_t data) +{ + char buffer[64]; + M6502_dump_masked(mpu, buffer); + printf("\ncall: address %04X opcode %02X\n%s\n", address, data, buffer); + return 0; +} + +int rd(M6502 *mpu, uint16_t address, uint8_t data) +{ + char buffer[64]; + M6502_dump_masked(mpu, buffer); + printf("\nrd: address %04X opcode %02X\n%s\n", address, data, buffer); + return 0; +} + +int wr(M6502 *mpu, uint16_t address, uint8_t data) +{ + char buffer[64]; + M6502_dump_masked(mpu, buffer); + printf("\nwr: address %04X opcode %02X\n%s\n", address, data, buffer); + return 0; +} + +int ill(M6502 *mpu, uint16_t address, uint8_t data) +{ + char buffer[64]; + M6502_dump_masked(mpu, buffer); + printf("\nill: address %04X opcode %02X memory %02X\n%s\n", address, data, mpu->memory[address], buffer); + return 0; +} + +int main(int argc, char *argv[]) +{ + M6502 *mpu = M6502_new(0, 0, 0); + parse_args(argc, argv, mpu); + + unsigned pc = 0x1000; + + M6502_setCallback(mpu, call, 0, done); + M6502_setCallback(mpu, call, 0x2000, call); + M6502_setCallback(mpu, call, 0x3000, call); + M6502_setCallback(mpu, call, 0x4000, call); + M6502_setCallback(mpu, read, 0x5000, rd ); + M6502_setCallback(mpu, write, 0x5000, wr ); + M6502_setCallback(mpu, illegal_instruction, 0x13, ill ); + M6502_setCallback(mpu, illegal_instruction, 0x44, ill ); + M6502_setCallback(mpu, illegal_instruction, 0x5c, ill ); + +# define gen1(X) (mpu->memory[pc++]= (uint8_t)(X)) +# define gen2(X,Y) gen1(X); gen1(Y) +# define gen3(X,Y,Z) gen1(X); gen2(Y,Z) + + gen1(0x13 ); + gen1(0x44 ); + gen1(0x13 ); // not executed, 0x44 is a two-byte illegal instruction + gen1(0x5C ); + gen1(0x13 ); // not executed, 0x5C is a two-byte illegal instruction + gen1(0x13 ); // not executed, 0x5C is a two-byte illegal instruction + gen3(0x20,0x00,0x20); // JSR &2000 + gen3(0xad,0x00,0x50); // LDA &5000 + gen2(0x64,0x70 ); // STZ &70 + gen2(0xa9,0x50 ); // LDA #&50 + gen2(0x85,0x71 ); // STA &71 + gen2(0xb2,0x70 ); // LDA (&70) + gen2(0x92,0x70 ); // STA (&70) + gen2(0x00,0x00 ); // BRK + + pc = 0x2000; + gen3(0x8d,0x00,0x50); // STA &5000 + gen3(0x4c,0x00,0x30); // JMP &3000 + + pc = 0x3000; + gen2(0xa9,0x00 ); // LDA #0 + gen3(0x8d,0x76,0x32); // STA &3276 + gen2(0xa9,0x40 ); // LDA #&40 + gen3(0x8d,0x77,0x32); // STA &3277 + gen3(0x6c,0x76,0x32); // JMP (&3276) + + pc = 0x4000; + gen1(0x60 ); // RTS + + M6502_setVector(mpu, RST, 0x1000); + + M6502_reset(mpu); + M6502_run(mpu); + M6502_delete(mpu); /* We never reach here, but what the hey. */ + + return 0; +} diff --git a/test/basic-callback.mst b/test/basic-callback.mst new file mode 100644 index 0000000..2c713d3 --- /dev/null +++ b/test/basic-callback.mst @@ -0,0 +1,33 @@ + +ill: address 1000 opcode 13 memory 13 +PC=1001 SP=0100 A=00 X=00 Y=00 P=04 -----I-- + +ill: address 1001 opcode 44 memory 44 +PC=1003 SP=0100 A=00 X=00 Y=00 P=04 -----I-- + +ill: address 1003 opcode 5C memory 5C +PC=1006 SP=0100 A=00 X=00 Y=00 P=04 -----I-- + +call: address 2000 opcode 20 +PC=1009 SP=01FE A=00 X=00 Y=00 P=04 -----I-- + +wr: address 5000 opcode 00 +PC=1009 SP=01FE A=00 X=00 Y=00 P=04 -----I-- + +call: address 3000 opcode 4C +PC=3000 SP=01FE A=00 X=00 Y=00 P=04 -----I-- + +call: address 4000 opcode 6C +PC=4000 SP=01FE A=40 X=00 Y=00 P=04 -----I-- + +rd: address 5000 opcode 00 +PC=4000 SP=01FE A=40 X=00 Y=00 P=04 -----I-- + +rd: address 5000 opcode 00 +PC=4000 SP=01FE A=40 X=00 Y=00 P=04 -----I-- + +wr: address 5000 opcode 00 +PC=4000 SP=01FE A=40 X=00 Y=00 P=04 -----I-- + +BRK instruction: address 1016 opcode 00 +PC=1018 SP=01FD A=00 X=00 Y=00 P=06 -----IZ- diff --git a/test/call-illegal-callback-modify-code.c b/test/call-illegal-callback-modify-code.c new file mode 100644 index 0000000..bf5ec76 --- /dev/null +++ b/test/call-illegal-callback-modify-code.c @@ -0,0 +1,121 @@ +/* Copyright (c) 2005 Ian Piumarta + * Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#include +#include + +#include "lib6502.h" +#include "test-utils.h" + +static uint16_t call_modify1_addr; +static uint16_t call_modify2_addr; +static uint16_t ill_modify1_addr; +static uint16_t ill_modify2_addr; + +int done(M6502 *mpu, uint16_t address, uint8_t data) +{ + char buffer[64]; + M6502_dump_masked(mpu, buffer); + printf("\nBRK instruction: address %04X opcode %02X\n%s\n", address, data, buffer); + exit(0); +} + +int call(M6502 *mpu, uint16_t address, uint8_t data) +{ + char buffer[64]; + M6502_dump_masked(mpu, buffer); + printf("\ncall: address %04X opcode %02X\n%s\n", address, data, buffer); + mpu->memory[call_modify1_addr] += 1; + mpu->memory[call_modify2_addr] += 2; + return 0; +} + +int ill(M6502 *mpu, uint16_t address, uint8_t data) +{ + char buffer[64]; + M6502_dump_masked(mpu, buffer); + printf("\nill: address %04X opcode %02X memory %02X\n%s\n", address, data, mpu->memory[address], buffer); + mpu->memory[ill_modify1_addr] += 1; + mpu->memory[ill_modify2_addr] += 2; + return 0; +} + +int oswrch(M6502 *mpu, uint16_t address, uint8_t data) +{ + putchar(mpu->registers->a); + mpu->memory[0xffee] = 0x60; // RTS + return 0; +} + +int main(int argc, char *argv[]) +{ + M6502 *mpu = M6502_new(0, 0, 0); + parse_args(argc, argv, mpu); + + unsigned pc = 0x1000; + + M6502_setCallback(mpu, call, 0, done ); + M6502_setCallback(mpu, call, 0x2000, call ); + M6502_setCallback(mpu, illegal_instruction, 0x13, ill ); + M6502_setCallback(mpu, call, 0xffee, oswrch); + +# define gen1(X) (mpu->memory[pc++]= (uint8_t)(X)) +# define gen2(X,Y) gen1(X); gen1(Y) +# define gen3(X,Y,Z) gen1(X); gen2(Y,Z) + + gen3(0x20,0x00,0x30); // JSR &3000 + gen1(0x13 ); // ill &13 + gen3(0x20,0x00,0x30); // JSR &3000 + gen1(0x13 ); // ill &13 + gen3(0x20,0x00,0x30); // JSR &3000 + gen3(0x20,0x00,0x20); // JSR &2000 + gen3(0x20,0x00,0x30); // JSR &3000 + gen3(0x20,0x00,0x20); // JSR &2000 + gen3(0x20,0x00,0x30); // JSR &3000 + gen2(0x00,0x00 ); // BRK + + pc = 0x2000; + gen1(0x60 ); // RTS + + pc = 0x3000; + gen2(0xa9,'C' ); // LDA #'C' + gen3(0x20,0xee,0xff); // JSR &FFEE + call_modify1_addr = pc + 1; + gen2(0xa9,'A' ); // LDA #'A' + gen3(0x20,0xee,0xff); // JSR &FFEE + call_modify2_addr = pc + 1; + gen2(0xa9,'A' ); // LDA #'A' + gen3(0x20,0xee,0xff); // JSR &FFEE + ill_modify1_addr = pc + 1; + gen2(0xa9,'A' ); // LDA #'A' + gen3(0x20,0xee,0xff); // JSR &FFEE + ill_modify2_addr = pc + 1; + gen2(0xa9,'A' ); // LDA #'A' + gen3(0x20,0xee,0xff); // JSR &FFEE + gen2(0xa9,'\n' ); // LDA #'\n' + gen3(0x20,0xee,0xff); // JSR &FFEE + gen1(0x60 ); // RTS + + M6502_setVector(mpu, RST, 0x1000); + + M6502_reset(mpu); + M6502_run(mpu); + M6502_delete(mpu); /* We never reach here, but what the hey. */ + + return 0; +} diff --git a/test/call-illegal-callback-modify-code.mst b/test/call-illegal-callback-modify-code.mst new file mode 100644 index 0000000..cc5acff --- /dev/null +++ b/test/call-illegal-callback-modify-code.mst @@ -0,0 +1,20 @@ +CAAAA + +ill: address 1003 opcode 13 memory 13 +PC=1004 SP=0100 A=0A X=00 Y=00 P=04 -----I-- +CAABC + +ill: address 1007 opcode 13 memory 13 +PC=1008 SP=0100 A=0A X=00 Y=00 P=04 -----I-- +CAACE + +call: address 2000 opcode 20 +PC=100E SP=01FE A=0A X=00 Y=00 P=04 -----I-- +CBCCE + +call: address 2000 opcode 20 +PC=1014 SP=01FE A=0A X=00 Y=00 P=04 -----I-- +CCECE + +BRK instruction: address 1017 opcode 00 +PC=1019 SP=01FD A=0A X=00 Y=00 P=04 -----I-- diff --git a/test/config.xa b/test/config.xa new file mode 100644 index 0000000..a7e0560 --- /dev/null +++ b/test/config.xa @@ -0,0 +1,4 @@ +OSWRCH = $FFEE +QUIT = $F000 + +*= $1E00 diff --git a/test/interleave.mst b/test/interleave.mst new file mode 100644 index 0000000..24de910 --- /dev/null +++ b/test/interleave.mst @@ -0,0 +1 @@ +Y \ No newline at end of file diff --git a/test/interleave.xa b/test/interleave.xa new file mode 100644 index 0000000..8fb5ee0 --- /dev/null +++ b/test/interleave.xa @@ -0,0 +1,38 @@ +#include "config.xa" + + JSR SETX10 + CPX #10 + BNE FAIL + JSR SETX30 + CPX #30 + BNE FAIL + JSR SETX20 + CPX #20 + BNE FAIL + JSR SETX30 + CPX #30 + BNE FAIL + JSR SETX10 + CPX #10 + BNE FAIL + JSR SETX20 + CPX #20 + BNE FAIL + +SUCCESS + LDA #'Y' + JSR OSWRCH + JMP QUIT + +FAIL + LDA #'N' + JSR OSWRCH + JMP QUIT + +; example taken from http://www.6502.org/tutorials/6502opcodes.html +SETX10 LDX #10 + .byte $2C +SETX20 LDX #20 + .byte $2C +SETX30 LDX #30 + RTS diff --git a/test/irq-nmi.c b/test/irq-nmi.c new file mode 100644 index 0000000..ae95352 --- /dev/null +++ b/test/irq-nmi.c @@ -0,0 +1,116 @@ +/* Copyright (c) 2005 Ian Piumarta + * Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#include +#include + +#include "lib6502.h" +#include "test-utils.h" + +int brk(M6502 *mpu, uint16_t address, uint8_t data) +{ + char buffer[64]; + M6502_dump_masked(mpu, buffer); + printf("\nBRK: address %04X opcode %02X\n%s\n", address, data, buffer); + exit(0); +} + +int ill(M6502 *mpu, uint16_t address, uint8_t data) +{ + char buffer[64]; + M6502_dump_masked(mpu, buffer); + printf("\nill: address %04X opcode %02X memory %02X\n%s\n", address, data, mpu->memory[address], buffer); + if (data == 0x03) + { + M6502_nmi(mpu); + } + else if (data == 0x13) + { + M6502_irq(mpu); + } + + return 0; +} + +int oswrch(M6502 *mpu, uint16_t address, uint8_t data) +{ + putchar(mpu->registers->a); + mpu->memory[0xffee] = 0x60; // RTS + return 0; +} + + +int main(int argc, char *argv[]) +{ + M6502 *mpu = M6502_new(0, 0, 0); + parse_args(argc, argv, mpu); + + unsigned pc = 0x1000; + + /* 0x3000 is the IRQ/BRK vector, but call callbacks don't trigger on + * interrupts, so this is only called on BRK. + */ + M6502_setCallback(mpu, call, 0x3000, brk ); + + M6502_setCallback(mpu, illegal_instruction, 0x03, ill ); + M6502_setCallback(mpu, illegal_instruction, 0x13, ill ); + M6502_setCallback(mpu, call, 0xffee, oswrch); + +# define gen1(X) (mpu->memory[pc++]= (uint8_t)(X)) +# define gen2(X,Y) gen1(X); gen1(Y) +# define gen3(X,Y,Z) gen1(X); gen2(Y,Z) + + gen1(0x58 ); // CLI + gen2(0xa9,'A' ); // LDA #'A' + gen3(0x20,0xee,0xff); // JSR &ffee + gen1(0x03 ); // NMI + gen2(0xa9,'B' ); // LDA #'B' + gen3(0x20,0xee,0xff); // JSR &ffee + gen1(0x13 ); // IRQ + gen2(0xa9,'C' ); // LDA #'C' + gen3(0x20,0xee,0xff); // JSR &ffee + gen1(0x78 ); // SEI + gen1(0x13 ); // IRQ (ignored) + gen1(0x03 ); // NMI + gen1(0x13 ); // IRQ (ignored) + gen2(0xa9,'D' ); // LDA #'D' + gen3(0x20,0xee,0xff); // JSR &ffee + gen1(0x58 ); // CLI + gen1(0x13 ); // IRQ + gen2(0x00,0x00 ); // BRK + + pc = 0x2000; + gen2(0xa9,'N' ); // LDA #'N' + gen3(0x20,0xee,0xff); // JSR &ffee + gen1(0x40 ); // RTI + + pc = 0x3000; + gen2(0xa9,'I' ); // LDA #'I' + gen3(0x20,0xee,0xff); // JSR &ffee + gen1(0x40 ); // RTI + + M6502_setVector(mpu, RST, 0x1000); + M6502_setVector(mpu, NMI, 0x2000); + M6502_setVector(mpu, IRQ, 0x3000); + + M6502_reset(mpu); + M6502_run(mpu); + M6502_delete(mpu); /* We never reach here, but what the hey. */ + + return 0; +} diff --git a/test/irq-nmi.mst b/test/irq-nmi.mst new file mode 100644 index 0000000..bf7d32b --- /dev/null +++ b/test/irq-nmi.mst @@ -0,0 +1,21 @@ +A +ill: address 1006 opcode 03 memory 03 +PC=1007 SP=0100 A=41 X=00 Y=00 P=00 -------- +NB +ill: address 100C opcode 13 memory 13 +PC=100D SP=0100 A=42 X=00 Y=00 P=00 -------- +IC +ill: address 1013 opcode 13 memory 13 +PC=1014 SP=0100 A=43 X=00 Y=00 P=04 -----I-- + +ill: address 1014 opcode 03 memory 03 +PC=1015 SP=0100 A=43 X=00 Y=00 P=04 -----I-- +N +ill: address 1015 opcode 13 memory 13 +PC=1016 SP=0100 A=4E X=00 Y=00 P=04 -----I-- +D +ill: address 101C opcode 13 memory 13 +PC=101D SP=0100 A=44 X=00 Y=00 P=00 -------- +I +BRK: address 101D opcode 00 +PC=101F SP=01FD A=49 X=00 Y=00 P=04 -----I-- diff --git a/test/pc-wrap-1.mst b/test/pc-wrap-1.mst new file mode 100644 index 0000000..24de910 --- /dev/null +++ b/test/pc-wrap-1.mst @@ -0,0 +1 @@ +Y \ No newline at end of file diff --git a/test/pc-wrap-1.xa b/test/pc-wrap-1.xa new file mode 100644 index 0000000..c703803 --- /dev/null +++ b/test/pc-wrap-1.xa @@ -0,0 +1,28 @@ +#include "config.xa" + +; It's not important this is self-modifying code, this is just the easiest way +; to get code at the relevant addresses without fighting with the assembler and +; the fact run6502 will clobber the top of memory to set up various vectors. + + LDA #$A9 ; LDA #n + STA $FFFE + STA $00 + LDA #'N' + STA $FFFF + LDA #'Y' + STA $01 + + LDA #$20 ; JSR abs + STA $02 + LDA #$EE + STA $03 + LDA #$FF + STA $04 + LDA #$4C ; JMP abs + STA $05 + LDA #QUIT + STA $07 + + JMP $FFFE diff --git a/test/pc-wrap-2.mst b/test/pc-wrap-2.mst new file mode 100644 index 0000000..24de910 --- /dev/null +++ b/test/pc-wrap-2.mst @@ -0,0 +1 @@ +Y \ No newline at end of file diff --git a/test/pc-wrap-2.xa b/test/pc-wrap-2.xa new file mode 100644 index 0000000..c70763f --- /dev/null +++ b/test/pc-wrap-2.xa @@ -0,0 +1,28 @@ +#include "config.xa" + +; It's not important this is self-modifying code, this is just the easiest way +; to get code at the relevant addresses without fighting with the assembler and +; the fact run6502 will clobber the top of memory to set up various vectors. + + LDA #$A9 ; LDA #n + STA $FFFD + STA $FFFF + LDA #'N' + STA $FFFE + LDA #'Y' + STA $00 + + LDA #$20 ; JSR abs + STA $01 + LDA #$EE + STA $02 + LDA #$FF + STA $03 + LDA #$4C ; JMP abs + STA $04 + LDA #QUIT + STA $06 + + JMP $FFFD diff --git a/test/run-c-tests.py b/test/run-c-tests.py new file mode 100755 index 0000000..b4a628e --- /dev/null +++ b/test/run-c-tests.py @@ -0,0 +1,33 @@ +#!/usr/bin/python + +from __future__ import print_function +import subprocess + +tests = [ + 'basic-callback', + 'call-illegal-callback-modify-code', + 'irq-nmi', + 'setjmp-trick', + 'stack-code-brk', + 'stack-code-jsr', + 'write-callback-modify-code' +] + +test_args = [ + '-mi', + '-mh', + '-mc -mx 1', + '-mc' +] + +print('1..', len(tests) * len(test_args), sep='') +i = 1 +for test_arg in test_args: + for test in tests: + result = subprocess.check_output(['test/' + test] + test_arg.split()) + expected_result = open('test/' + test + '.mst', 'rb').read() + if result == expected_result: + print('ok', i, test, test_arg) + else: + print('not ok', i, test, test_arg) + i += 1 diff --git a/test/run-c-tests.sh b/test/run-c-tests.sh new file mode 100755 index 0000000..7c60f3c --- /dev/null +++ b/test/run-c-tests.sh @@ -0,0 +1,2 @@ +#!/bin/sh +python test/run-c-tests.py diff --git a/test/run-run6502-tests.py b/test/run-run6502-tests.py new file mode 100755 index 0000000..378989e --- /dev/null +++ b/test/run-run6502-tests.py @@ -0,0 +1,59 @@ +#!/usr/bin/python + +from __future__ import print_function +import glob +import os +import subprocess + +os.chdir('test') + +# It's quite likely the "xa" assembler is not installed; don't generate +# scary test failures if that's the case. +xa_installed = True +try: + result = subprocess.check_output(['xa', '--version']) + if result.find(b'xa65') == -1: + xa_installed = False +except: + xa_installed = False + +# By default we skip slow tests (those with names starting z-) in '-mc' +# modes. +skip_slow_mc = (os.getenv('RUN_SLOW_TESTS', '0') == '0') + +# Since we didn't have to hard-code the test names in the Makefile.am, we +# use wildcards here. +tests = sorted([t for t in glob.glob('*.xa') if t != 'config.xa']) + +test_args = [ + '-mi', + '-mh', + '-mc -mx 1', + '-mc' +] + +print('1..', len(tests) * len(test_args), sep='') +i = 0 +for test_arg in test_args: + for test in tests: + i += 1 + basename = test[0:-3] + + if not xa_installed: + print('ok', i, '# skipped (xa not installed):', test, test_arg) + continue + + if skip_slow_mc and basename[0:2] == 'z-' and test_arg[0:3] == '-mc': + print('ok', i, '# skipped (slow -mc):', test, test_arg) + continue + + xa_out = basename + '.mc' + subprocess.check_call(['xa', '-o', xa_out, test]) + result = subprocess.check_output( + ['../run6502', '-l', '1e00', xa_out, '-R', '1e00', '-G', 'ffe0', + '-P', 'ffee', '-X', 'f000'] + test_arg.split()) + expected_result = open(basename + '.mst', 'rb').read() + if result == expected_result: + print('ok', i, test, test_arg) + else: + print('not ok', i, test, test_arg) diff --git a/test/run-run6502-tests.sh b/test/run-run6502-tests.sh new file mode 100755 index 0000000..c0e21dd --- /dev/null +++ b/test/run-run6502-tests.sh @@ -0,0 +1,2 @@ +#!/bin/sh +python test/run-run6502-tests.py diff --git a/test/setjmp-trick.c b/test/setjmp-trick.c new file mode 100644 index 0000000..f363d2e --- /dev/null +++ b/test/setjmp-trick.c @@ -0,0 +1,125 @@ +/* Copyright (c) 2005 Ian Piumarta + * Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#include +#include +#include + +#include "lib6502.h" +#include "test-utils.h" + +static jmp_buf env; + +int done(M6502 *mpu, uint16_t address, uint8_t data) +{ + char buffer[64]; + M6502_dump_masked(mpu, buffer); + printf("\nBRK instruction: address %04X opcode %02X\n%s\n", address, data, buffer); + longjmp(env, 1); + exit(0); +} + +int call(M6502 *mpu, uint16_t address, uint8_t data) +{ + char buffer[64]; + M6502_dump_masked(mpu, buffer); + printf("\ncall: address %04X opcode %02X\n%s\n", address, data, buffer); + mpu->registers->pc = address; + longjmp(env, 2); + return 0; +} + +int ill(M6502 *mpu, uint16_t address, uint8_t data) +{ + char buffer[64]; + M6502_dump_masked(mpu, buffer); + printf("\nill: address %04X opcode %02X memory %02X\n%s\n", address, data, mpu->memory[address], buffer); + longjmp(env, 3); + return 0; +} + +int main(int argc, char *argv[]) +{ + M6502 *mpu = M6502_new(0, 0, 0); + parse_args(argc, argv, mpu); + + unsigned pc = 0x1000; + + /* Read and write callbacks don't provide the correct, up-to-date CPU state + * in the M6502 object, so this trick is a non-starter with them. + */ + + M6502_setCallback(mpu, call, 0, done); + M6502_setCallback(mpu, call, 0x2000, call); + M6502_setCallback(mpu, call, 0x3000, call); + M6502_setCallback(mpu, call, 0x4000, call); + M6502_setCallback(mpu, illegal_instruction, 0x13, ill ); + M6502_setCallback(mpu, illegal_instruction, 0x44, ill ); + M6502_setCallback(mpu, illegal_instruction, 0x5c, ill ); + +# define gen1(X) (mpu->memory[pc++]= (uint8_t)(X)) +# define gen2(X,Y) gen1(X); gen1(Y) +# define gen3(X,Y,Z) gen1(X); gen2(Y,Z) + + gen1(0x13 ); + gen1(0x44 ); + gen1(0x13 ); // not executed, 0x44 is a two-byte illegal instruction + gen1(0x5C ); + gen1(0x13 ); // not executed, 0x5C is a two-byte illegal instruction + gen1(0x13 ); // not executed, 0x5C is a two-byte illegal instruction + gen3(0x20,0x00,0x20); // JSR &2000 + gen3(0xad,0x00,0x50); // LDA &5000 + gen2(0x00,0x00 ); // BRK + + pc = 0x2000; + gen3(0x8d,0x00,0x50); // STA &5000 + gen3(0x4c,0x00,0x30); // JMP &3000 + + pc = 0x3000; + gen2(0xa9,0x00 ); // LDA #0 + gen3(0x8d,0x76,0x32); // STA &3276 + gen2(0xa9,0x40 ); // LDA #&40 + gen3(0x8d,0x77,0x32); // STA &3277 + gen3(0x6c,0x76,0x32); // JMP (&3276) + + pc = 0x4000; + gen1(0x60 ); // RTS + + M6502_setVector(mpu, RST, 0x1000); + + M6502_reset(mpu); + while (1) + { + volatile int result = setjmp(env); + if (result == 0) + { + M6502_run(mpu); + } + else + { + printf("\nsetjmp() returned %d\n", result); + if (result == 1) + { + break; + } + } + } + M6502_delete(mpu); + + return 0; +} diff --git a/test/setjmp-trick.mst b/test/setjmp-trick.mst new file mode 100644 index 0000000..ac0bcd5 --- /dev/null +++ b/test/setjmp-trick.mst @@ -0,0 +1,35 @@ + +ill: address 1000 opcode 13 memory 13 +PC=1001 SP=0100 A=00 X=00 Y=00 P=04 -----I-- + +setjmp() returned 3 + +ill: address 1001 opcode 44 memory 44 +PC=1003 SP=0100 A=00 X=00 Y=00 P=04 -----I-- + +setjmp() returned 3 + +ill: address 1003 opcode 5C memory 5C +PC=1006 SP=0100 A=00 X=00 Y=00 P=04 -----I-- + +setjmp() returned 3 + +call: address 2000 opcode 20 +PC=1009 SP=01FE A=00 X=00 Y=00 P=04 -----I-- + +setjmp() returned 2 + +call: address 3000 opcode 4C +PC=3000 SP=01FE A=00 X=00 Y=00 P=04 -----I-- + +setjmp() returned 2 + +call: address 4000 opcode 6C +PC=4000 SP=01FE A=40 X=00 Y=00 P=04 -----I-- + +setjmp() returned 2 + +BRK instruction: address 100C opcode 00 +PC=100E SP=01FD A=00 X=00 Y=00 P=06 -----IZ- + +setjmp() returned 1 diff --git a/test/stack-code-brk.c b/test/stack-code-brk.c new file mode 100644 index 0000000..8ac2b75 --- /dev/null +++ b/test/stack-code-brk.c @@ -0,0 +1,108 @@ +/* Copyright (c) 2005 Ian Piumarta + * Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#include +#include + +#include "lib6502.h" +#include "test-utils.h" + +int done(M6502 *mpu, uint16_t address, uint8_t data) +{ + exit(0); +} + +int oswrch(M6502 *mpu, uint16_t address, uint8_t data) +{ + putchar(mpu->registers->a); + mpu->memory[0xffee] = 0x60; // RTS + return 0; +} + +# define gen1(X) (mpu->memory[pc++]= (uint8_t)(X)) +# define gen2(X,Y) gen1(X); gen1(Y) +# define gen3(X,Y,Z) gen1(X); gen2(Y,Z) + +int main(int argc, char *argv[]) +{ + M6502 *mpu = M6502_new(0, 0, 0); + parse_args(argc, argv, mpu); + + unsigned pc = 0x1000; + unsigned saved_pc; + + M6502_setCallback(mpu, call, 0xf000, done ); + M6502_setCallback(mpu, call, 0xffee, oswrch); + + gen2(0xa2, 0xff ); // LDX #&FF + gen1(0x9a ); // TXS + gen2(0xa9, 'A' ); // LDA #'A' + + // LDA #'B' is 0xa9, 0x42. So if we execute a BRK at 0x42a7, it will + // push 0x42, 0xa9 and the flags onto the stack. Since the stack grows + // downwards those bytes will be in the right order for execution. We'll + // additionally push an LDX immediate opcode so we can "execute" the flags + // value. We can nearly force the flags to be whatever we like using PLP, + // although the BRK will set the B and X bits in the stacked value. We + // demonstrate this by explicitly masking off those bits in the values we + // force into the flags. + enum { + flagX= (1<<5), /* unused */ + flagB= (1<<4) /* irq from brk */ + }; + uint8_t mask = ~(flagX | flagB); + gen2(0xa0, '0' & mask); // LDY #('0' with B/X masked off) + gen1(0x5a ); // PHY + gen1(0x28 ); // PLP + gen3(0x4c, 0xa7, 0x42); // JMP &42A7 + pc = 0x42a7; + gen2(0x00, 0x00 ); // BRK + saved_pc = pc; + pc = 0x0; // BRK vector + gen2(0xa9, 0xa2 ); // LDA # + gen1(0x48 ); // PHA + gen3(0x4c, 0xfc, 0x01); // JMP &01FC + pc = 0x200; + gen3(0x20, 0xee, 0xff); // JSR &FFEE + gen1(0x8a ); // TXA + gen3(0x20, 0xee, 0xff); // JSR &FFEE + gen1(0x68 ); // PLA + gen1(0x40 ); // RTI + pc = saved_pc; + + // Let's do the same thing again, but this time code has already been + // executed from that address on the stack, so we're verifying the change + // is picked up. We do LDA #'C' this time, so we execute the BRK from + // 0x43a7. + gen2(0xa0, '1' & mask); // LDY #('1' with B/X masked off) + gen1(0x5a ); // PHY + gen1(0x28 ); // PLP + gen3(0x4c, 0xa7, 0x43); // JMP &43A7 + pc = 0x43a7; + gen2(0x00, 0x00 ); // BRK + + gen3(0x4c, 0x00, 0xf0); // JMP &F000 (quit) + + M6502_setVector(mpu, RST, 0x1000); + + M6502_reset(mpu); + M6502_run(mpu); + M6502_delete(mpu); /* We never reach here, but what the hey. */ + + return 0; +} diff --git a/test/stack-code-brk.mst b/test/stack-code-brk.mst new file mode 100644 index 0000000..467dbb8 --- /dev/null +++ b/test/stack-code-brk.mst @@ -0,0 +1 @@ +B0C1 \ No newline at end of file diff --git a/test/stack-code-jsr.c b/test/stack-code-jsr.c new file mode 100644 index 0000000..5cac6bf --- /dev/null +++ b/test/stack-code-jsr.c @@ -0,0 +1,90 @@ +/* Copyright (c) 2005 Ian Piumarta + * Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#include +#include + +#include "lib6502.h" +#include "test-utils.h" + +int done(M6502 *mpu, uint16_t address, uint8_t data) +{ + char buffer[64]; + M6502_dump_masked(mpu, buffer); + printf("\nBRK instruction: address %04X opcode %02X\n%s\n", address, data, buffer); + exit(0); +} + +int oswrch(M6502 *mpu, uint16_t address, uint8_t data) +{ + putchar(mpu->registers->a); + mpu->memory[0xffee] = 0x60; // RTS + return 0; +} + +# define gen1(X) (mpu->memory[pc++]= (uint8_t)(X)) +# define gen2(X,Y) gen1(X); gen1(Y) +# define gen3(X,Y,Z) gen1(X); gen2(Y,Z) + +int main(int argc, char *argv[]) +{ + M6502 *mpu = M6502_new(0, 0, 0); + parse_args(argc, argv, mpu); + + unsigned pc = 0x1000; + unsigned saved_pc; + + M6502_setCallback(mpu, call, 0, done ); + M6502_setCallback(mpu, call, 0xffee, oswrch); + + gen2(0xa2, 0xff ); // LDX #&FF + gen1(0x9a ); // TXS + gen2(0xa9, 'A' ); // LDA #'A' + + // LDA #'B' is 0xa9, 0x42. So if we execute a JSR at 0x42a7, it will + // push 0x42 and then 0xa9 onto the stack. Since the stack grows downwards + // those bytes will be in the right order for execution. + gen3(0x4c, 0xa7, 0x42); // JMP &42A7 + pc = 0x42a7; + gen3(0x20, 0x00, 0x30); // JSR &3000 + saved_pc = pc; + pc = 0x3000; + gen3(0x4c, 0xfe, 0x01); // JMP &01FE + pc = 0x200; + gen3(0x20, 0xee, 0xff); // JSR &FFEE + gen1(0x60 ); // RTS + pc = saved_pc; + + // Let's do the same thing again, but this time code has already been + // executed from that address on the stack, so we're verifying the change + // is picked up. We do LDA #'C' this time, so we execute the JSR from + // 0x43a7. + gen3(0x4c, 0xa7, 0x43); // JMP &43A7 + pc = 0x43a7; + gen3(0x20, 0x00, 0x30); // JSR &3000 + + gen2(0x00, 0x00 ); // BRK + + M6502_setVector(mpu, RST, 0x1000); + + M6502_reset(mpu); + M6502_run(mpu); + M6502_delete(mpu); /* We never reach here, but what the hey. */ + + return 0; +} diff --git a/test/stack-code-jsr.mst b/test/stack-code-jsr.mst new file mode 100644 index 0000000..62ee1a3 --- /dev/null +++ b/test/stack-code-jsr.mst @@ -0,0 +1,3 @@ +BC +BRK instruction: address 43AA opcode 00 +PC=43AC SP=01FC A=43 X=FF Y=00 P=04 -----I-- diff --git a/test/test-utils.c b/test/test-utils.c new file mode 100644 index 0000000..b17819c --- /dev/null +++ b/test/test-utils.c @@ -0,0 +1,106 @@ +/* parse-args.c -- utility function for C test programs */ + +/* Copyright (c) 2005 Ian Piumarta + * Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +/* Some of this code is copy-and-pasted from run6502.c, but there's not enough + * of it for me to want to complicate things even slightly by trying to share + * it, especially since this is test code and somewhat distinct. + */ + +#include +#include +#include +#include + +#include "lib6502.h" + +static const char *program= 0; +static M6502_Mode mode= M6502_ModeHybrid; +static int max_insns= 0; /* default */ + +enum { + flagX= (1<<5), /* unused */ + flagB= (1<<4) /* irq from brk */ +}; + +void fail(const char *fmt, ...) +{ + va_list ap; + fflush(stdout); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fprintf(stderr, "\n"); + exit(1); +} + +static void usage(int status) +{ + FILE *stream = stderr; + fprintf(stream, "usage: %s [option ...]\n", program); + fprintf(stream, " -h -- help (print this message)\n"); + fprintf(stream, " -mc -- use compiled emulation mode\n"); + fprintf(stream, " -mh -- use hybrid emulation mode (default)\n"); + fprintf(stream, " -mi -- use interpreted emulation mode\n"); + fprintf(stream, " -mx count -- maximum instructions to JIT (-mc/-mh)\n"); + exit(status); +} + +static int doMode(M6502_Mode m) +{ + mode= m; + return 0; +} + +static int doMaxInsns(int argc, char **argv, M6502 *mpu) +{ + if (argc < 2) usage(1); + char *end; + unsigned long l= strtol(argv[1], &end, 10); + if (*end) fail("bad number: %s", argv[1]); + max_insns= l; + return 1; +} + +void parse_args(int argc, char *argv[], M6502 *mpu) +{ + program= argv[0]; + while (++argv, --argc > 0) + { + int n= 0; + if (!strcmp(*argv, "-h")) usage(0); + else if (!strcmp(*argv, "-mc")) n= doMode(M6502_ModeCompiled); + else if (!strcmp(*argv, "-mh")) n= doMode(M6502_ModeHybrid); + else if (!strcmp(*argv, "-mi")) n= doMode(M6502_ModeInterpreted); + else if (!strcmp(*argv, "-mx")) n= doMaxInsns(argc, argv, mpu); + else usage(1); + argc -= n; + argv += n; + } + + M6502_setMode(mpu, mode, max_insns); +} + +void M6502_dump_masked(M6502 *mpu, char buffer[64]) +{ + uint8_t orig_p = mpu->registers->p; + mpu->registers->p &= ~(flagB | flagX); + M6502_dump(mpu, buffer); + mpu->registers->p = orig_p; +} diff --git a/test/test-utils.h b/test/test-utils.h new file mode 100644 index 0000000..5b15dd7 --- /dev/null +++ b/test/test-utils.h @@ -0,0 +1,30 @@ +/* test-utils.h -- utility functions for C test programs */ + +/* Copyright (c) 2005 Ian Piumarta + * Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#ifndef PARSEARGS_H +#define PARSEARGS_H + +#include "lib6502.h" + +void parse_args(int argc, char *argv[], M6502 *mpu); + +void M6502_dump_masked(M6502 *mpu, char buffer[64]); + +#endif diff --git a/test/trivial-test.mst b/test/trivial-test.mst new file mode 100644 index 0000000..24de910 --- /dev/null +++ b/test/trivial-test.mst @@ -0,0 +1 @@ +Y \ No newline at end of file diff --git a/test/trivial-test.xa b/test/trivial-test.xa new file mode 100644 index 0000000..1448a22 --- /dev/null +++ b/test/trivial-test.xa @@ -0,0 +1,5 @@ +#include "config.xa" + + LDA #'Y' + JSR OSWRCH + JMP QUIT diff --git a/test/write-callback-modify-code.c b/test/write-callback-modify-code.c new file mode 100644 index 0000000..cb35317 --- /dev/null +++ b/test/write-callback-modify-code.c @@ -0,0 +1,100 @@ +/* Copyright (c) 2005 Ian Piumarta + * Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#include +#include + +#include "lib6502.h" +#include "test-utils.h" + +int done(M6502 *mpu, uint16_t address, uint8_t data) +{ + char buffer[64]; + M6502_dump_masked(mpu, buffer); + printf("\nBRK instruction: address %04X opcode %02X\n%s\n", address, data, buffer); + exit(0); +} + +int oswrch(M6502 *mpu, uint16_t address, uint8_t data) +{ + putchar(mpu->registers->a); + mpu->memory[0xffee] = 0x60; // RTS + return 0; +} + +# define gen1(X) (mpu->memory[pc++]= (uint8_t)(X)) +# define gen2(X,Y) gen1(X); gen1(Y) +# define gen3(X,Y,Z) gen1(X); gen2(Y,Z) + +int wr(M6502 *mpu, uint16_t address, uint8_t data) +{ + if (address != 0x42) + { + abort(); + } + + unsigned pc = 0x6000; + gen2(0xa9, data); // LDA #data + gen3(0x4c, 0x00, 0x20); // JMP &2000 + return 0; +} + +int main(int argc, char *argv[]) +{ + M6502 *mpu = M6502_new(0, 0, 0); + parse_args(argc, argv, mpu); + + unsigned pc = 0x1000; + + M6502_setCallback(mpu, call, 0, done); + M6502_setCallback(mpu, call, 0xffee, oswrch); + M6502_setCallback(mpu, write, 0x42, wr ); + + gen2(0xa9, '>' ); // LDA #'>' + gen3(0x20, 0xee, 0xff); // JSR &FFEE + gen2(0xa2, 'A' ); // LDX #'A' + gen3(0x8e, 0x42, 0x00); // STX &0042 + gen3(0x20, 0x00, 0x60); // JSR &6000 + gen1(0xe8 ); // INX + gen2(0xe0, 'Z'+1 ); // CPX #('Z'+1) + gen2(0x90, 0xf5 ); // BCC to STX + + gen2(0xa0, 0x05 ); // LDY #&05 + gen2(0xa9, '>' ); // LDA #'>' + gen3(0x20, 0xee, 0xff); // JSR &FFEE + gen2(0xa2, 'A' ); // LDX #'A' + gen2(0x96, 0x42-0x05 ); // STX (&42-&05),Y + gen3(0x20, 0x00, 0x60); // JSR &6000 + gen1(0xe8 ); // INX + gen2(0xe0, 'Z'+1 ); // CPX #('Z'+1) + gen2(0x90, 0xf6 ); // BCC to STX + + gen2(0x00, 0x00 ); // BRK + + pc = 0x2000; + gen3(0x20, 0xee, 0xff); // JSR &FFEE + gen1(0x60 ); // RTS + + M6502_setVector(mpu, RST, 0x1000); + + M6502_reset(mpu); + M6502_run(mpu); + M6502_delete(mpu); /* We never reach here, but what the hey. */ + + return 0; +} diff --git a/test/write-callback-modify-code.mst b/test/write-callback-modify-code.mst new file mode 100644 index 0000000..65de187 --- /dev/null +++ b/test/write-callback-modify-code.mst @@ -0,0 +1,3 @@ +>ABCDEFGHIJKLMNOPQRSTUVWXYZ>ABCDEFGHIJKLMNOPQRSTUVWXYZ +BRK instruction: address 1025 opcode 00 +PC=1027 SP=01FD A=5A X=5B Y=05 P=07 -----IZC diff --git a/test/z-self-modify-1.mst b/test/z-self-modify-1.mst new file mode 100644 index 0000000..24de910 --- /dev/null +++ b/test/z-self-modify-1.mst @@ -0,0 +1 @@ +Y \ No newline at end of file diff --git a/test/z-self-modify-1.xa b/test/z-self-modify-1.xa new file mode 100644 index 0000000..cdf31e2 --- /dev/null +++ b/test/z-self-modify-1.xa @@ -0,0 +1,94 @@ +; This test attempts to confirm that in hybrid mode, the JITted code is +; discarded correctly if it's modified by the interpreter. + +#include "config.xa" + +COUNT1 = $71 +COUNT2 = $72 +COUNT3 = $73 + +; We loop lots to get as much chance of a problem occurring as possible. + STZ COUNT1 +LOOP1 + LDY #0 +LOOP2 + LDX #0 +LOOP3 + +; The heart of the test. We LDA #n, then CMP
. If the two don't +; match we have a problem. +LDAOP + LDA #3 + CMP LDAOP+1 + BNE FAIL + +; We now modify the LDA operand... + INC LDAOP+1 + +; ... and occupy as much of the interpreter's time as possible while the JIT +; thread picks up the modified version (if it's not working from the snapshot). +; In reality we probably go round multiple times before the JIT completes. + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + +; And round and round we go. + DEX + BNE LOOP3 + DEY + BNE LOOP2 + DEC COUNT1 + BNE LOOP1 + +OK + LDA #'Y' + JSR OSWRCH + JMP QUIT +FAIL + LDA #'N' + JSR OSWRCH + JMP QUIT diff --git a/test/z-self-modify-2.mst b/test/z-self-modify-2.mst new file mode 100644 index 0000000..24de910 --- /dev/null +++ b/test/z-self-modify-2.mst @@ -0,0 +1 @@ +Y \ No newline at end of file diff --git a/test/z-self-modify-2.xa b/test/z-self-modify-2.xa new file mode 100644 index 0000000..81d21c4 --- /dev/null +++ b/test/z-self-modify-2.xa @@ -0,0 +1,125 @@ +; This test attempts to confirm that as subtle potential bug in the hybrid JIT +; implementation is not present. +; +; The potential problem is as follows: +; - we decide to JIT some code +; - we take a snapshot of memory +; - we kick off a JIT thread which *works off the main memory array*, not the +; snapshot +; - in the meantime the interpreter executes some code which modifies the code +; being JITted before it is actually jitted. +; - we JIT the modified version of the code +; - the interpreter then executes some code which reverts the change (A) +; - we decide to execute the JITted function. We check memory against the memory +; snapshot taken when we started JITting and find no differences in any +; addresses which contain code, because of the previous step marked (A). +; - boom, our JITted code is not doing what it should. +; +; The fix for this problem is simply to ensure that the JIT thread works off +; the snapshot of memory taken when we launched the JIT thread. Note that even +; if we fail to do this, self-modifying code which doesn't "undo" itself will +; be noticed when we use the memory snapshot to decide if the JITted code is +; still valid. +; +; This test case should execute correctly in all modes (of course), but in +; hybrid mode it should *fail* if the implementation is temporarily changed to +; JIT from mpu->memory and not memory_snapshot. At the time of writing it does. + + + +#include "config.xa" + +COUNT1 = $71 +COUNT2 = $72 +COUNT3 = $73 + +; We loop lots to get as much chance of a problem occurring as possible. + STZ COUNT1 +LOOP1 + LDY #0 +LOOP2 + LDX #0 +LOOP3 + +; The heart of the test. We LDA #n, then CMP
. If the two don't +; match we have a problem. +LDAOP + LDA #3 + CMP LDAOP+1 + BNE FAIL + +; We now modify the LDA operand... + INC LDAOP+1 + +; ... and occupy as much of the interpreter's time as possible while the JIT +; thread picks up the modified version (if it's not working from the snapshot). +; In reality we probably go round multiple times before the JIT completes. + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + +; We now put the operand back. Since we only switch from interpreting to JITting +; on a control transfer, we know the transition will occur at a point when we've +; put the operand back, which is helpful. + DEC LDAOP+1 + +; And round and round we go. + DEX + BNE LOOP3 + DEY + BNE LOOP2 + DEC COUNT1 + BNE LOOP1 + +OK + LDA #'Y' + JSR OSWRCH + JMP QUIT +FAIL + LDA #'N' + JSR OSWRCH + JMP QUIT diff --git a/util.cpp b/util.cpp new file mode 100644 index 0000000..dbcecab --- /dev/null +++ b/util.cpp @@ -0,0 +1,57 @@ +/* Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#include "util.h" + +#include +#include +#include + +boost::mutex log_mutex; + +void log(const std::string &s) +{ + boost::mutex::scoped_lock scoped_lock(log_mutex); + std::cerr << s << std::endl; +} + +void die(const char *s) +{ + fflush(stdout); + fprintf(stderr, "\n%s\n", s); + abort(); +} + +std::string spaces(int n) +{ + return std::string(4 * n, ' '); +} + +std::string apply_prefix(const std::string &prefix, const std::string &s) +{ + std::string result = prefix; + for (std::string::size_type i = 0; i < s.length(); ++i) + { + result += s[i]; + if ((s[i] == '\n') && ((i + 1) < s.length())) + { + result.append(prefix); + } + } + return result; +} + diff --git a/util.h b/util.h new file mode 100644 index 0000000..c7967c6 --- /dev/null +++ b/util.h @@ -0,0 +1,73 @@ +/* Copyright (c) 2014 Steven Flintham + * + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the 'Software'), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that both the + * above copyright notice(s) and this permission notice appear in supporting + * documentation. + * + * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. + */ + +#ifndef UTIL_H +#define UTIL_H + +#include +#include +#include +#include + +#include +#include + +#define CANT_HAPPEN(s) \ + do { \ + std::stringstream stream; \ + stream << __FILE__ << ":" << __LINE__ << ":" << s; \ + throw std::runtime_error(stream.str()); \ + } \ + while (false) + +#ifdef LOG + #define TRACE(s) \ + do { \ + std::stringstream prefix; \ + prefix << __FILE__ << ":" << __LINE__ << "\t" << \ + boost::this_thread::get_id() << "\t"; \ + std::stringstream message; \ + message << s; \ + log(apply_prefix(prefix.str(), message.str())); \ + } \ + while (false) +#else + #define TRACE(s) \ + do { \ + } \ + while (false) +#endif + +// Avoid spurious "unused variable" warnings from regular assert(). +#ifndef NDEBUG + #define ASSERT_EQUAL(x, y) assert((x) == (y)) +#else + #define ASSERT_EQUAL(x, y) \ + do { \ + x = x; \ + } \ + while (0); +#endif + +extern boost::mutex log_mutex; +void log(const std::string &s); +void die(const char *s); + +std::string spaces(int n); +std::string apply_prefix(const std::string &prefix, const std::string &s); + +#endif diff --git a/valgrind.h b/valgrind.h new file mode 100644 index 0000000..222a58e --- /dev/null +++ b/valgrind.h @@ -0,0 +1,4060 @@ +/* -*- c -*- + ---------------------------------------------------------------- + + Notice that the following BSD-style license applies to this one + file (valgrind.h) only. The rest of Valgrind is licensed under the + terms of the GNU General Public License, version 2, unless + otherwise indicated. See the COPYING file in the source + distribution for details. + + ---------------------------------------------------------------- + + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2011 Julian Seward. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ---------------------------------------------------------------- + + Notice that the above BSD-style license applies to this one file + (valgrind.h) only. The entire rest of Valgrind is licensed under + the terms of the GNU General Public License, version 2. See the + COPYING file in the source distribution for details. + + ---------------------------------------------------------------- +*/ + + +/* This file is for inclusion into client (your!) code. + + You can use these macros to manipulate and query Valgrind's + execution inside your own programs. + + The resulting executables will still run without Valgrind, just a + little bit more slowly than they otherwise would, but otherwise + unchanged. When not running on valgrind, each client request + consumes very few (eg. 7) instructions, so the resulting performance + loss is negligible unless you plan to execute client requests + millions of times per second. Nevertheless, if that is still a + problem, you can compile with the NVALGRIND symbol defined (gcc + -DNVALGRIND) so that client requests are not even compiled in. */ + +#ifndef __VALGRIND_H +#define __VALGRIND_H + + +/* ------------------------------------------------------------------ */ +/* VERSION NUMBER OF VALGRIND */ +/* ------------------------------------------------------------------ */ + +/* Specify Valgrind's version number, so that user code can + conditionally compile based on our version number. Note that these + were introduced at version 3.6 and so do not exist in version 3.5 + or earlier. The recommended way to use them to check for "version + X.Y or later" is (eg) + +#if defined(__VALGRIND_MAJOR__) && defined(__VALGRIND_MINOR__) \ + && (__VALGRIND_MAJOR__ > 3 \ + || (__VALGRIND_MAJOR__ == 3 && __VALGRIND_MINOR__ >= 6)) +*/ +#define __VALGRIND_MAJOR__ 3 +#define __VALGRIND_MINOR__ 6 + + +#include + +/* Nb: this file might be included in a file compiled with -ansi. So + we can't use C++ style "//" comments nor the "asm" keyword (instead + use "__asm__"). */ + +/* Derive some tags indicating what the target platform is. Note + that in this file we're using the compiler's CPP symbols for + identifying architectures, which are different to the ones we use + within the rest of Valgrind. Note, __powerpc__ is active for both + 32 and 64-bit PPC, whereas __powerpc64__ is only active for the + latter (on Linux, that is). + + Misc note: how to find out what's predefined in gcc by default: + gcc -Wp,-dM somefile.c +*/ +#undef PLAT_x86_darwin +#undef PLAT_amd64_darwin +#undef PLAT_x86_win32 +#undef PLAT_x86_linux +#undef PLAT_amd64_linux +#undef PLAT_ppc32_linux +#undef PLAT_ppc64_linux +#undef PLAT_arm_linux +#undef PLAT_s390x_linux + + +#if defined(__APPLE__) && defined(__i386__) +# define PLAT_x86_darwin 1 +#elif defined(__APPLE__) && defined(__x86_64__) +# define PLAT_amd64_darwin 1 +#elif defined(__MINGW32__) || defined(__CYGWIN32__) \ + || (defined(_WIN32) && defined(_M_IX86)) +# define PLAT_x86_win32 1 +#elif defined(__linux__) && defined(__i386__) +# define PLAT_x86_linux 1 +#elif defined(__linux__) && defined(__x86_64__) +# define PLAT_amd64_linux 1 +#elif defined(__linux__) && defined(__powerpc__) && !defined(__powerpc64__) +# define PLAT_ppc32_linux 1 +#elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__) +# define PLAT_ppc64_linux 1 +#elif defined(__linux__) && defined(__arm__) +# define PLAT_arm_linux 1 +#elif defined(__linux__) && defined(__s390__) && defined(__s390x__) +# define PLAT_s390x_linux 1 +#else +/* If we're not compiling for our target platform, don't generate + any inline asms. */ +# if !defined(NVALGRIND) +# define NVALGRIND 1 +# endif +#endif + + +/* ------------------------------------------------------------------ */ +/* ARCHITECTURE SPECIFICS for SPECIAL INSTRUCTIONS. There is nothing */ +/* in here of use to end-users -- skip to the next section. */ +/* ------------------------------------------------------------------ */ + +/* + * VALGRIND_DO_CLIENT_REQUEST(): a statement that invokes a Valgrind client + * request. Accepts both pointers and integers as arguments. + * + * VALGRIND_DO_CLIENT_REQUEST_STMT(): a statement that invokes a Valgrind + * client request that does not return a value. + + * VALGRIND_DO_CLIENT_REQUEST_EXPR(): a C expression that invokes a Valgrind + * client request and whose value equals the client request result. Accepts + * both pointers and integers as arguments. Note that such calls are not + * necessarily pure functions -- they may have side effects. + */ + +#define VALGRIND_DO_CLIENT_REQUEST(_zzq_rlval, _zzq_default, \ + _zzq_request, _zzq_arg1, _zzq_arg2, \ + _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + do { (_zzq_rlval) = VALGRIND_DO_CLIENT_REQUEST_EXPR((_zzq_default), \ + (_zzq_request), (_zzq_arg1), (_zzq_arg2), \ + (_zzq_arg3), (_zzq_arg4), (_zzq_arg5)); } while (0) + +#define VALGRIND_DO_CLIENT_REQUEST_STMT(_zzq_request, _zzq_arg1, \ + _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + do { (void) VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + (_zzq_request), (_zzq_arg1), (_zzq_arg2), \ + (_zzq_arg3), (_zzq_arg4), (_zzq_arg5)); } while (0) + +#if defined(NVALGRIND) + +/* Define NVALGRIND to completely remove the Valgrind magic sequence + from the compiled code (analogous to NDEBUG's effects on + assert()) */ +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + (_zzq_default) + +#else /* ! NVALGRIND */ + +/* The following defines the magic code sequences which the JITter + spots and handles magically. Don't look too closely at them as + they will rot your brain. + + The assembly code sequences for all architectures is in this one + file. This is because this file must be stand-alone, and we don't + want to have multiple files. + + For VALGRIND_DO_CLIENT_REQUEST, we must ensure that the default + value gets put in the return slot, so that everything works when + this is executed not under Valgrind. Args are passed in a memory + block, and so there's no intrinsic limit to the number that could + be passed, but it's currently five. + + The macro args are: + _zzq_rlval result lvalue + _zzq_default default value (result returned when running on real CPU) + _zzq_request request code + _zzq_arg1..5 request params + + The other two macros are used to support function wrapping, and are + a lot simpler. VALGRIND_GET_NR_CONTEXT returns the value of the + guest's NRADDR pseudo-register and whatever other information is + needed to safely run the call original from the wrapper: on + ppc64-linux, the R2 value at the divert point is also needed. This + information is abstracted into a user-visible type, OrigFn. + + VALGRIND_CALL_NOREDIR_* behaves the same as the following on the + guest, but guarantees that the branch instruction will not be + redirected: x86: call *%eax, amd64: call *%rax, ppc32/ppc64: + branch-and-link-to-r11. VALGRIND_CALL_NOREDIR is just text, not a + complete inline asm, since it needs to be combined with more magic + inline asm stuff to be useful. +*/ + +/* ------------------------- x86-{linux,darwin} ---------------- */ + +#if defined(PLAT_x86_linux) || defined(PLAT_x86_darwin) \ + || (defined(PLAT_x86_win32) && defined(__GNUC__)) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "roll $3, %%edi ; roll $13, %%edi\n\t" \ + "roll $29, %%edi ; roll $19, %%edi\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + __extension__ \ + ({volatile unsigned int _zzq_args[6]; \ + volatile unsigned int _zzq_result; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %EDX = client_request ( %EAX ) */ \ + "xchgl %%ebx,%%ebx" \ + : "=d" (_zzq_result) \ + : "a" (&_zzq_args[0]), "0" (_zzq_default) \ + : "cc", "memory" \ + ); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %EAX = guest_NRADDR */ \ + "xchgl %%ecx,%%ecx" \ + : "=a" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_EAX \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir *%EAX */ \ + "xchgl %%edx,%%edx\n\t" +#endif /* PLAT_x86_linux || PLAT_x86_darwin || (PLAT_x86_win32 && __GNUC__) */ + +/* ------------------------- x86-Win32 ------------------------- */ + +#if defined(PLAT_x86_win32) && !defined(__GNUC__) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#if defined(_MSC_VER) + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + __asm rol edi, 3 __asm rol edi, 13 \ + __asm rol edi, 29 __asm rol edi, 19 + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + valgrind_do_client_request_expr((uintptr_t)(_zzq_default), \ + (uintptr_t)(_zzq_request), (uintptr_t)(_zzq_arg1), \ + (uintptr_t)(_zzq_arg2), (uintptr_t)(_zzq_arg3), \ + (uintptr_t)(_zzq_arg4), (uintptr_t)(_zzq_arg5)) + +static __inline uintptr_t +valgrind_do_client_request_expr(uintptr_t _zzq_default, uintptr_t _zzq_request, + uintptr_t _zzq_arg1, uintptr_t _zzq_arg2, + uintptr_t _zzq_arg3, uintptr_t _zzq_arg4, + uintptr_t _zzq_arg5) +{ + volatile uintptr_t _zzq_args[6]; + volatile unsigned int _zzq_result; + _zzq_args[0] = (uintptr_t)(_zzq_request); + _zzq_args[1] = (uintptr_t)(_zzq_arg1); + _zzq_args[2] = (uintptr_t)(_zzq_arg2); + _zzq_args[3] = (uintptr_t)(_zzq_arg3); + _zzq_args[4] = (uintptr_t)(_zzq_arg4); + _zzq_args[5] = (uintptr_t)(_zzq_arg5); + __asm { __asm lea eax, _zzq_args __asm mov edx, _zzq_default + __SPECIAL_INSTRUCTION_PREAMBLE + /* %EDX = client_request ( %EAX ) */ + __asm xchg ebx,ebx + __asm mov _zzq_result, edx + } + return _zzq_result; +} + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned int __addr; \ + __asm { __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %EAX = guest_NRADDR */ \ + __asm xchg ecx,ecx \ + __asm mov __addr, eax \ + } \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_EAX ERROR + +#else +#error Unsupported compiler. +#endif + +#endif /* PLAT_x86_win32 */ + +/* ------------------------ amd64-{linux,darwin} --------------- */ + +#if defined(PLAT_amd64_linux) || defined(PLAT_amd64_darwin) + +typedef + struct { + unsigned long long int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rolq $3, %%rdi ; rolq $13, %%rdi\n\t" \ + "rolq $61, %%rdi ; rolq $51, %%rdi\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + __extension__ \ + ({ volatile unsigned long long int _zzq_args[6]; \ + volatile unsigned long long int _zzq_result; \ + _zzq_args[0] = (unsigned long long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %RDX = client_request ( %RAX ) */ \ + "xchgq %%rbx,%%rbx" \ + : "=d" (_zzq_result) \ + : "a" (&_zzq_args[0]), "0" (_zzq_default) \ + : "cc", "memory" \ + ); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned long long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %RAX = guest_NRADDR */ \ + "xchgq %%rcx,%%rcx" \ + : "=a" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_RAX \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir *%RAX */ \ + "xchgq %%rdx,%%rdx\n\t" +#endif /* PLAT_amd64_linux || PLAT_amd64_darwin */ + +/* ------------------------ ppc32-linux ------------------------ */ + +#if defined(PLAT_ppc32_linux) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \ + "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + __extension__ \ + ({ unsigned int _zzq_args[6]; \ + unsigned int _zzq_result; \ + unsigned int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 3,%1\n\t" /*default*/ \ + "mr 4,%2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" /*result*/ \ + : "=b" (_zzq_result) \ + : "b" (_zzq_default), "b" (_zzq_ptr) \ + : "cc", "memory", "r3", "r4"); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" +#endif /* PLAT_ppc32_linux */ + +/* ------------------------ ppc64-linux ------------------------ */ + +#if defined(PLAT_ppc64_linux) + +typedef + struct { + unsigned long long int nraddr; /* where's the code? */ + unsigned long long int r2; /* what tocptr do we need? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ + "rotldi 0,0,61 ; rotldi 0,0,51\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + __extension__ \ + ({ unsigned long long int _zzq_args[6]; \ + unsigned long long int _zzq_result; \ + unsigned long long int* _zzq_ptr; \ + _zzq_args[0] = (unsigned long long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 3,%1\n\t" /*default*/ \ + "mr 4,%2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" /*result*/ \ + : "=b" (_zzq_result) \ + : "b" (_zzq_default), "b" (_zzq_ptr) \ + : "cc", "memory", "r3", "r4"); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + unsigned long long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" + +#endif /* PLAT_ppc64_linux */ + +/* ------------------------- arm-linux ------------------------- */ + +#if defined(PLAT_arm_linux) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "mov r12, r12, ror #3 ; mov r12, r12, ror #13 \n\t" \ + "mov r12, r12, ror #29 ; mov r12, r12, ror #19 \n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + __extension__ \ + ({volatile unsigned int _zzq_args[6]; \ + volatile unsigned int _zzq_result; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + __asm__ volatile("mov r3, %1\n\t" /*default*/ \ + "mov r4, %2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* R3 = client_request ( R4 ) */ \ + "orr r10, r10, r10\n\t" \ + "mov %0, r3" /*result*/ \ + : "=r" (_zzq_result) \ + : "r" (_zzq_default), "r" (&_zzq_args[0]) \ + : "cc","memory", "r3", "r4"); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* R3 = guest_NRADDR */ \ + "orr r11, r11, r11\n\t" \ + "mov %0, r3" \ + : "=r" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R4 */ \ + "orr r12, r12, r12\n\t" + +#endif /* PLAT_arm_linux */ + +/* ------------------------ s390x-linux ------------------------ */ + +#if defined(PLAT_s390x_linux) + +typedef + struct { + unsigned long long int nraddr; /* where's the code? */ + } + OrigFn; + +/* __SPECIAL_INSTRUCTION_PREAMBLE will be used to identify Valgrind specific + * code. This detection is implemented in platform specific toIR.c + * (e.g. VEX/priv/guest_s390_decoder.c). + */ +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "lr 15,15\n\t" \ + "lr 1,1\n\t" \ + "lr 2,2\n\t" \ + "lr 3,3\n\t" + +#define __CLIENT_REQUEST_CODE "lr 2,2\n\t" +#define __GET_NR_CONTEXT_CODE "lr 3,3\n\t" +#define __CALL_NO_REDIR_CODE "lr 4,4\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + __extension__ \ + ({volatile unsigned long long int _zzq_args[6]; \ + volatile unsigned long long int _zzq_result; \ + _zzq_args[0] = (unsigned long long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \ + __asm__ volatile(/* r2 = args */ \ + "lgr 2,%1\n\t" \ + /* r3 = default */ \ + "lgr 3,%2\n\t" \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + __CLIENT_REQUEST_CODE \ + /* results = r3 */ \ + "lgr %0, 3\n\t" \ + : "=d" (_zzq_result) \ + : "a" (&_zzq_args[0]), "0" (_zzq_default) \ + : "cc", "2", "3", "memory" \ + ); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned long long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + __GET_NR_CONTEXT_CODE \ + "lgr %0, 3\n\t" \ + : "=a" (__addr) \ + : \ + : "cc", "3", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_R1 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + __CALL_NO_REDIR_CODE + +#endif /* PLAT_s390x_linux */ + +/* Insert assembly code for other platforms here... */ + +#endif /* NVALGRIND */ + + +/* ------------------------------------------------------------------ */ +/* PLATFORM SPECIFICS for FUNCTION WRAPPING. This is all very */ +/* ugly. It's the least-worst tradeoff I can think of. */ +/* ------------------------------------------------------------------ */ + +/* This section defines magic (a.k.a appalling-hack) macros for doing + guaranteed-no-redirection macros, so as to get from function + wrappers to the functions they are wrapping. The whole point is to + construct standard call sequences, but to do the call itself with a + special no-redirect call pseudo-instruction that the JIT + understands and handles specially. This section is long and + repetitious, and I can't see a way to make it shorter. + + The naming scheme is as follows: + + CALL_FN_{W,v}_{v,W,WW,WWW,WWWW,5W,6W,7W,etc} + + 'W' stands for "word" and 'v' for "void". Hence there are + different macros for calling arity 0, 1, 2, 3, 4, etc, functions, + and for each, the possibility of returning a word-typed result, or + no result. +*/ + +/* Use these to write the name of your wrapper. NOTE: duplicates + VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h. NOTE also: inserts + the default behaviour equivalance class tag "0000" into the name. + See pub_tool_redir.h for details -- normally you don't need to + think about this, though. */ + +/* Use an extra level of macroisation so as to ensure the soname/fnname + args are fully macro-expanded before pasting them together. */ +#define VG_CONCAT4(_aa,_bb,_cc,_dd) _aa##_bb##_cc##_dd + +#define I_WRAP_SONAME_FNNAME_ZU(soname,fnname) \ + VG_CONCAT4(_vgw00000ZU_,soname,_,fnname) + +#define I_WRAP_SONAME_FNNAME_ZZ(soname,fnname) \ + VG_CONCAT4(_vgw00000ZZ_,soname,_,fnname) + +/* Use this macro from within a wrapper function to collect the + context (address and possibly other info) of the original function. + Once you have that you can then use it in one of the CALL_FN_ + macros. The type of the argument _lval is OrigFn. */ +#define VALGRIND_GET_ORIG_FN(_lval) VALGRIND_GET_NR_CONTEXT(_lval) + +/* Derivatives of the main macros below, for calling functions + returning void. */ + +#define CALL_FN_v_v(fnptr) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_v(_junk,fnptr); } while (0) + +#define CALL_FN_v_W(fnptr, arg1) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_W(_junk,fnptr,arg1); } while (0) + +#define CALL_FN_v_WW(fnptr, arg1,arg2) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_WW(_junk,fnptr,arg1,arg2); } while (0) + +#define CALL_FN_v_WWW(fnptr, arg1,arg2,arg3) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_WWW(_junk,fnptr,arg1,arg2,arg3); } while (0) + +#define CALL_FN_v_WWWW(fnptr, arg1,arg2,arg3,arg4) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_WWWW(_junk,fnptr,arg1,arg2,arg3,arg4); } while (0) + +#define CALL_FN_v_5W(fnptr, arg1,arg2,arg3,arg4,arg5) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_5W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5); } while (0) + +#define CALL_FN_v_6W(fnptr, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_6W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5,arg6); } while (0) + +#define CALL_FN_v_7W(fnptr, arg1,arg2,arg3,arg4,arg5,arg6,arg7) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_7W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5,arg6,arg7); } while (0) + +/* ------------------------- x86-{linux,darwin} ---------------- */ + +#if defined(PLAT_x86_linux) || defined(PLAT_x86_darwin) + +/* These regs are trashed by the hidden call. No need to mention eax + as gcc can already see that, plus causes gcc to bomb. */ +#define __CALLER_SAVED_REGS /*"eax"*/ "ecx", "edx" + +/* These CALL_FN_ macros assume that on x86-linux, sizeof(unsigned + long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + "subl $12, %%esp\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $16, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + "subl $8, %%esp\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $16, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + "subl $4, %%esp\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $16, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $16, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + "subl $12, %%esp\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $32, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + "subl $8, %%esp\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $32, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + "subl $4, %%esp\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $32, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $32, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + "subl $12, %%esp\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $48, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + "subl $8, %%esp\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $48, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + "subl $4, %%esp\n\t" \ + "pushl 44(%%eax)\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $48, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + "pushl 48(%%eax)\n\t" \ + "pushl 44(%%eax)\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $48, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_x86_linux || PLAT_x86_darwin */ + +/* ------------------------ amd64-{linux,darwin} --------------- */ + +#if defined(PLAT_amd64_linux) || defined(PLAT_amd64_darwin) + +/* ARGREGS: rdi rsi rdx rcx r8 r9 (the rest on stack in R-to-L order) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS /*"rax",*/ "rcx", "rdx", "rsi", \ + "rdi", "r8", "r9", "r10", "r11" + +/* This is all pretty complex. It's so as to make stack unwinding + work reliably. See bug 243270. The basic problem is the sub and + add of 128 of %rsp in all of the following macros. If gcc believes + the CFA is in %rsp, then unwinding may fail, because what's at the + CFA is not what gcc "expected" when it constructs the CFIs for the + places where the macros are instantiated. + + But we can't just add a CFI annotation to increase the CFA offset + by 128, to match the sub of 128 from %rsp, because we don't know + whether gcc has chosen %rsp as the CFA at that point, or whether it + has chosen some other register (eg, %rbp). In the latter case, + adding a CFI annotation to change the CFA offset is simply wrong. + + So the solution is to get hold of the CFA using + __builtin_dwarf_cfa(), put it in a known register, and add a + CFI annotation to say what the register is. We choose %rbp for + this (perhaps perversely), because: + + (1) %rbp is already subject to unwinding. If a new register was + chosen then the unwinder would have to unwind it in all stack + traces, which is expensive, and + + (2) %rbp is already subject to precise exception updates in the + JIT. If a new register was chosen, we'd have to have precise + exceptions for it too, which reduces performance of the + generated code. + + However .. one extra complication. We can't just whack the result + of __builtin_dwarf_cfa() into %rbp and then add %rbp to the + list of trashed registers at the end of the inline assembly + fragments; gcc won't allow %rbp to appear in that list. Hence + instead we need to stash %rbp in %r15 for the duration of the asm, + and say that %r15 is trashed instead. gcc seems happy to go with + that. + + Oh .. and this all needs to be conditionalised so that it is + unchanged from before this commit, when compiled with older gccs + that don't support __builtin_dwarf_cfa. Furthermore, since + this header file is freestanding, it has to be independent of + config.h, and so the following conditionalisation cannot depend on + configure time checks. + + Although it's not clear from + 'defined(__GNUC__) && defined(__GCC_HAVE_DWARF2_CFI_ASM)', + this expression excludes Darwin. + .cfi directives in Darwin assembly appear to be completely + different and I haven't investigated how they work. + + For even more entertainment value, note we have to use the + completely undocumented __builtin_dwarf_cfa(), which appears to + really compute the CFA, whereas __builtin_frame_address(0) claims + to but actually doesn't. See + https://bugs.kde.org/show_bug.cgi?id=243270#c47 +*/ +#if defined(__GNUC__) && defined(__GCC_HAVE_DWARF2_CFI_ASM) +# define __FRAME_POINTER \ + ,"r"(__builtin_dwarf_cfa()) +# define VALGRIND_CFI_PROLOGUE \ + "movq %%rbp, %%r15\n\t" \ + "movq %2, %%rbp\n\t" \ + ".cfi_remember_state\n\t" \ + ".cfi_def_cfa rbp, 0\n\t" +# define VALGRIND_CFI_EPILOGUE \ + "movq %%r15, %%rbp\n\t" \ + ".cfi_restore_state\n\t" +#else +# define __FRAME_POINTER +# define VALGRIND_CFI_PROLOGUE +# define VALGRIND_CFI_EPILOGUE +#endif + + +/* These CALL_FN_ macros assume that on amd64-linux, sizeof(unsigned + long) == 8. */ + +/* NB 9 Sept 07. There is a nasty kludge here in all these CALL_FN_ + macros. In order not to trash the stack redzone, we need to drop + %rsp by 128 before the hidden call, and restore afterwards. The + nastyness is that it is only by luck that the stack still appears + to be unwindable during the hidden call - since then the behaviour + of any routine using this macro does not match what the CFI data + says. Sigh. + + Why is this important? Imagine that a wrapper has a stack + allocated local, and passes to the hidden call, a pointer to it. + Because gcc does not know about the hidden call, it may allocate + that local in the redzone. Unfortunately the hidden call may then + trash it before it comes to use it. So we must step clear of the + redzone, for the duration of the hidden call, to make it safe. + + Probably the same problem afflicts the other redzone-style ABIs too + (ppc64-linux); but for those, the stack is + self describing (none of this CFI nonsense) so at least messing + with the stack pointer doesn't give a danger of non-unwindable + stack. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "subq $128,%%rsp\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "subq $128,%%rsp\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "subq $128,%%rsp\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "subq $128,%%rsp\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "subq $128,%%rsp\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "subq $128,%%rsp\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "subq $128,%%rsp\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "subq $136,%%rsp\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $8, %%rsp\n" \ + "addq $136,%%rsp\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "subq $128,%%rsp\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $16, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "subq $136,%%rsp\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $24, %%rsp\n" \ + "addq $136,%%rsp\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "subq $128,%%rsp\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $32, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "subq $136,%%rsp\n\t" \ + "pushq 88(%%rax)\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $40, %%rsp\n" \ + "addq $136,%%rsp\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "subq $128,%%rsp\n\t" \ + "pushq 96(%%rax)\n\t" \ + "pushq 88(%%rax)\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $48, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_amd64_linux || PLAT_amd64_darwin */ + +/* ------------------------ ppc32-linux ------------------------ */ + +#if defined(PLAT_ppc32_linux) + +/* This is useful for finding out about the on-stack stuff: + + extern int f9 ( int,int,int,int,int,int,int,int,int ); + extern int f10 ( int,int,int,int,int,int,int,int,int,int ); + extern int f11 ( int,int,int,int,int,int,int,int,int,int,int ); + extern int f12 ( int,int,int,int,int,int,int,int,int,int,int,int ); + + int g9 ( void ) { + return f9(11,22,33,44,55,66,77,88,99); + } + int g10 ( void ) { + return f10(11,22,33,44,55,66,77,88,99,110); + } + int g11 ( void ) { + return f11(11,22,33,44,55,66,77,88,99,110,121); + } + int g12 ( void ) { + return f12(11,22,33,44,55,66,77,88,99,110,121,132); + } +*/ + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* These CALL_FN_ macros assume that on ppc32-linux, + sizeof(unsigned long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-16\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,16\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-16\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,16\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-32\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,16(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,32\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + _argvec[12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-32\n\t" \ + /* arg12 */ \ + "lwz 3,48(11)\n\t" \ + "stw 3,20(1)\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,16(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,32\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc32_linux */ + +/* ------------------------ ppc64-linux ------------------------ */ + +#if defined(PLAT_ppc64_linux) + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned + long) == 8. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,128" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,128" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,144" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + _argvec[2+12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ \ + /* arg12 */ \ + "ld 3,96(11)\n\t" \ + "std 3,136(1)\n\t" \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,144" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc64_linux */ + +/* ------------------------- arm-linux ------------------------- */ + +#if defined(PLAT_arm_linux) + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS "r0", "r1", "r2", "r3","r4","r14" + +/* These CALL_FN_ macros assume that on arm-linux, sizeof(unsigned + long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "mov %0, r0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "mov %0, r0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "mov %0, r0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "mov %0, r0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + "ldr r0, [%1, #20] \n\t" \ + "push {r0} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "add sp, sp, #4 \n\t" \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "push {r0, r1} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "add sp, sp, #8 \n\t" \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "push {r0, r1, r2} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "add sp, sp, #12 \n\t" \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "ldr r3, [%1, #32] \n\t" \ + "push {r0, r1, r2, r3} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "add sp, sp, #16 \n\t" \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "ldr r3, [%1, #32] \n\t" \ + "ldr r4, [%1, #36] \n\t" \ + "push {r0, r1, r2, r3, r4} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "add sp, sp, #20 \n\t" \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + "ldr r0, [%1, #40] \n\t" \ + "push {r0} \n\t" \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "ldr r3, [%1, #32] \n\t" \ + "ldr r4, [%1, #36] \n\t" \ + "push {r0, r1, r2, r3, r4} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "add sp, sp, #24 \n\t" \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + "ldr r0, [%1, #40] \n\t" \ + "ldr r1, [%1, #44] \n\t" \ + "push {r0, r1} \n\t" \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "ldr r3, [%1, #32] \n\t" \ + "ldr r4, [%1, #36] \n\t" \ + "push {r0, r1, r2, r3, r4} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "add sp, sp, #28 \n\t" \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory",__CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + "ldr r0, [%1, #40] \n\t" \ + "ldr r1, [%1, #44] \n\t" \ + "ldr r2, [%1, #48] \n\t" \ + "push {r0, r1, r2} \n\t" \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "ldr r3, [%1, #32] \n\t" \ + "ldr r4, [%1, #36] \n\t" \ + "push {r0, r1, r2, r3, r4} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "add sp, sp, #32 \n\t" \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_arm_linux */ + +/* ------------------------- s390x-linux ------------------------- */ + +#if defined(PLAT_s390x_linux) + +/* Similar workaround as amd64 (see above), but we use r11 as frame + pointer and save the old r11 in r7. r11 might be used for + argvec, therefore we copy argvec in r1 since r1 is clobbered + after the call anyway. */ +#if defined(__GNUC__) && defined(__GCC_HAVE_DWARF2_CFI_ASM) +# define __FRAME_POINTER \ + ,"d"(__builtin_dwarf_cfa()) +# define VALGRIND_CFI_PROLOGUE \ + ".cfi_remember_state\n\t" \ + "lgr 1,%1\n\t" /* copy the argvec pointer in r1 */ \ + "lgr 7,11\n\t" \ + "lgr 11,%2\n\t" \ + ".cfi_def_cfa r11, 0\n\t" +# define VALGRIND_CFI_EPILOGUE \ + "lgr 11, 7\n\t" \ + ".cfi_restore_state\n\t" +#else +# define __FRAME_POINTER +# define VALGRIND_CFI_PROLOGUE \ + "lgr 1,%1\n\t" +# define VALGRIND_CFI_EPILOGUE +#endif + + + + +/* These regs are trashed by the hidden call. Note that we overwrite + r14 in s390_irgen_noredir (VEX/priv/guest_s390_irgen.c) to give the + function a proper return address. All others are ABI defined call + clobbers. */ +#define __CALLER_SAVED_REGS "0","1","2","3","4","5","14", \ + "f0","f1","f2","f3","f4","f5","f6","f7" + + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-160\n\t" \ + "lg 1, 0(1)\n\t" /* target->r1 */ \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,160\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "d" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +/* The call abi has the arguments in r2-r6 and stack */ +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-160\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,160\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1, arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-160\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,160\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1, arg2, arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-160\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,160\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1, arg2, arg3, arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-160\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,160\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1, arg2, arg3, arg4, arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-160\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,160\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-168\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,168\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6, arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-176\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "mvc 168(8,15), 56(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,176\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6, arg7 ,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-184\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "mvc 168(8,15), 56(1)\n\t" \ + "mvc 176(8,15), 64(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,184\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6, arg7 ,arg8, arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-192\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "mvc 168(8,15), 56(1)\n\t" \ + "mvc 176(8,15), 64(1)\n\t" \ + "mvc 184(8,15), 72(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,192\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6, arg7 ,arg8, arg9, arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-200\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "mvc 168(8,15), 56(1)\n\t" \ + "mvc 176(8,15), 64(1)\n\t" \ + "mvc 184(8,15), 72(1)\n\t" \ + "mvc 192(8,15), 80(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,200\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6, arg7 ,arg8, arg9, arg10, arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-208\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "mvc 168(8,15), 56(1)\n\t" \ + "mvc 176(8,15), 64(1)\n\t" \ + "mvc 184(8,15), 72(1)\n\t" \ + "mvc 192(8,15), 80(1)\n\t" \ + "mvc 200(8,15), 88(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,208\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6, arg7 ,arg8, arg9, arg10, arg11, arg12)\ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + _argvec[12] = (unsigned long)arg12; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-216\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "mvc 168(8,15), 56(1)\n\t" \ + "mvc 176(8,15), 64(1)\n\t" \ + "mvc 184(8,15), 72(1)\n\t" \ + "mvc 192(8,15), 80(1)\n\t" \ + "mvc 200(8,15), 88(1)\n\t" \ + "mvc 208(8,15), 96(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,216\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + + +#endif /* PLAT_s390x_linux */ + + +/* ------------------------------------------------------------------ */ +/* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS. */ +/* */ +/* ------------------------------------------------------------------ */ + +/* Some request codes. There are many more of these, but most are not + exposed to end-user view. These are the public ones, all of the + form 0x1000 + small_number. + + Core ones are in the range 0x00000000--0x0000ffff. The non-public + ones start at 0x2000. +*/ + +/* These macros are used by tools -- they must be public, but don't + embed them into other programs. */ +#define VG_USERREQ_TOOL_BASE(a,b) \ + ((unsigned int)(((a)&0xff) << 24 | ((b)&0xff) << 16)) +#define VG_IS_TOOL_USERREQ(a, b, v) \ + (VG_USERREQ_TOOL_BASE(a,b) == ((v) & 0xffff0000)) + +/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !! + This enum comprises an ABI exported by Valgrind to programs + which use client requests. DO NOT CHANGE THE ORDER OF THESE + ENTRIES, NOR DELETE ANY -- add new ones at the end. */ +typedef + enum { VG_USERREQ__RUNNING_ON_VALGRIND = 0x1001, + VG_USERREQ__DISCARD_TRANSLATIONS = 0x1002, + + /* These allow any function to be called from the simulated + CPU but run on the real CPU. Nb: the first arg passed to + the function is always the ThreadId of the running + thread! So CLIENT_CALL0 actually requires a 1 arg + function, etc. */ + VG_USERREQ__CLIENT_CALL0 = 0x1101, + VG_USERREQ__CLIENT_CALL1 = 0x1102, + VG_USERREQ__CLIENT_CALL2 = 0x1103, + VG_USERREQ__CLIENT_CALL3 = 0x1104, + + /* Can be useful in regression testing suites -- eg. can + send Valgrind's output to /dev/null and still count + errors. */ + VG_USERREQ__COUNT_ERRORS = 0x1201, + + /* Allows a string (gdb monitor command) to be passed to the tool + Used for interaction with vgdb/gdb */ + VG_USERREQ__GDB_MONITOR_COMMAND = 0x1202, + + /* These are useful and can be interpreted by any tool that + tracks malloc() et al, by using vg_replace_malloc.c. */ + VG_USERREQ__MALLOCLIKE_BLOCK = 0x1301, + VG_USERREQ__RESIZEINPLACE_BLOCK = 0x130b, + VG_USERREQ__FREELIKE_BLOCK = 0x1302, + /* Memory pool support. */ + VG_USERREQ__CREATE_MEMPOOL = 0x1303, + VG_USERREQ__DESTROY_MEMPOOL = 0x1304, + VG_USERREQ__MEMPOOL_ALLOC = 0x1305, + VG_USERREQ__MEMPOOL_FREE = 0x1306, + VG_USERREQ__MEMPOOL_TRIM = 0x1307, + VG_USERREQ__MOVE_MEMPOOL = 0x1308, + VG_USERREQ__MEMPOOL_CHANGE = 0x1309, + VG_USERREQ__MEMPOOL_EXISTS = 0x130a, + + /* Allow printfs to valgrind log. */ + /* The first two pass the va_list argument by value, which + assumes it is the same size as or smaller than a UWord, + which generally isn't the case. Hence are deprecated. + The second two pass the vargs by reference and so are + immune to this problem. */ + /* both :: char* fmt, va_list vargs (DEPRECATED) */ + VG_USERREQ__PRINTF = 0x1401, + VG_USERREQ__PRINTF_BACKTRACE = 0x1402, + /* both :: char* fmt, va_list* vargs */ + VG_USERREQ__PRINTF_VALIST_BY_REF = 0x1403, + VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF = 0x1404, + + /* Stack support. */ + VG_USERREQ__STACK_REGISTER = 0x1501, + VG_USERREQ__STACK_DEREGISTER = 0x1502, + VG_USERREQ__STACK_CHANGE = 0x1503, + + /* Wine support */ + VG_USERREQ__LOAD_PDB_DEBUGINFO = 0x1601, + + /* Querying of debug info. */ + VG_USERREQ__MAP_IP_TO_SRCLOC = 0x1701, + + /* Disable/enable error reporting level. Takes a single + Word arg which is the delta to this thread's error + disablement indicator. Hence 1 disables or further + disables errors, and -1 moves back towards enablement. + Other values are not allowed. */ + VG_USERREQ__CHANGE_ERR_DISABLEMENT = 0x1801 + } Vg_ClientRequest; + +#if !defined(__GNUC__) +# define __extension__ /* */ +#endif + + +/* Returns the number of Valgrinds this code is running under. That + is, 0 if running natively, 1 if running under Valgrind, 2 if + running under Valgrind which is running under another Valgrind, + etc. */ +#define RUNNING_ON_VALGRIND \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* if not */, \ + VG_USERREQ__RUNNING_ON_VALGRIND, \ + 0, 0, 0, 0, 0) \ + + +/* Discard translation of code in the range [_qzz_addr .. _qzz_addr + + _qzz_len - 1]. Useful if you are debugging a JITter or some such, + since it provides a way to make sure valgrind will retranslate the + invalidated area. Returns no value. */ +#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DISCARD_TRANSLATIONS, \ + _qzz_addr, _qzz_len, 0, 0, 0) + + +/* These requests are for getting Valgrind itself to print something. + Possibly with a backtrace. This is a really ugly hack. The return value + is the number of characters printed, excluding the "**** " part at the + start and the backtrace (if present). */ + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) +/* Modern GCC will optimize the static routine out if unused, + and unused attribute will shut down warnings about it. */ +static int VALGRIND_PRINTF(const char *format, ...) + __attribute__((format(__printf__, 1, 2), __unused__)); +#endif +static int +#if defined(_MSC_VER) +__inline +#endif +VALGRIND_PRINTF(const char *format, ...) +{ +#if defined(NVALGRIND) + return 0; +#else /* NVALGRIND */ +#if defined(_MSC_VER) + uintptr_t _qzz_res; +#else + unsigned long _qzz_res; +#endif + va_list vargs; + va_start(vargs, format); +#if defined(_MSC_VER) + _qzz_res = VALGRIND_DO_CLIENT_REQUEST_EXPR(0, + VG_USERREQ__PRINTF_VALIST_BY_REF, + (uintptr_t)format, + (uintptr_t)&vargs, + 0, 0, 0); +#else + _qzz_res = VALGRIND_DO_CLIENT_REQUEST_EXPR(0, + VG_USERREQ__PRINTF_VALIST_BY_REF, + (unsigned long)format, + (unsigned long)&vargs, + 0, 0, 0); +#endif + va_end(vargs); + return (int)_qzz_res; +#endif /* NVALGRIND */ +} + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) +static int VALGRIND_PRINTF_BACKTRACE(const char *format, ...) + __attribute__((format(__printf__, 1, 2), __unused__)); +#endif +static int +#if defined(_MSC_VER) +__inline +#endif +VALGRIND_PRINTF_BACKTRACE(const char *format, ...) +{ +#if defined(NVALGRIND) + return 0; +#else /* NVALGRIND */ +#if defined(_MSC_VER) + uintptr_t _qzz_res; +#else + unsigned long _qzz_res; +#endif + va_list vargs; + va_start(vargs, format); +#if defined(_MSC_VER) + _qzz_res = VALGRIND_DO_CLIENT_REQUEST_EXPR(0, + VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF, + (uintptr_t)format, + (uintptr_t)&vargs, + 0, 0, 0); +#else + _qzz_res = VALGRIND_DO_CLIENT_REQUEST_EXPR(0, + VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF, + (unsigned long)format, + (unsigned long)&vargs, + 0, 0, 0); +#endif + va_end(vargs); + return (int)_qzz_res; +#endif /* NVALGRIND */ +} + + +/* These requests allow control to move from the simulated CPU to the + real CPU, calling an arbitary function. + + Note that the current ThreadId is inserted as the first argument. + So this call: + + VALGRIND_NON_SIMD_CALL2(f, arg1, arg2) + + requires f to have this signature: + + Word f(Word tid, Word arg1, Word arg2) + + where "Word" is a word-sized type. + + Note that these client requests are not entirely reliable. For example, + if you call a function with them that subsequently calls printf(), + there's a high chance Valgrind will crash. Generally, your prospects of + these working are made higher if the called function does not refer to + any global variables, and does not refer to any libc or other functions + (printf et al). Any kind of entanglement with libc or dynamic linking is + likely to have a bad outcome, for tricky reasons which we've grappled + with a lot in the past. +*/ +#define VALGRIND_NON_SIMD_CALL0(_qyy_fn) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__CLIENT_CALL0, \ + _qyy_fn, \ + 0, 0, 0, 0) + +#define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__CLIENT_CALL1, \ + _qyy_fn, \ + _qyy_arg1, 0, 0, 0) + +#define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__CLIENT_CALL2, \ + _qyy_fn, \ + _qyy_arg1, _qyy_arg2, 0, 0) + +#define VALGRIND_NON_SIMD_CALL3(_qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__CLIENT_CALL3, \ + _qyy_fn, \ + _qyy_arg1, _qyy_arg2, \ + _qyy_arg3, 0) + + +/* Counts the number of errors that have been recorded by a tool. Nb: + the tool must record the errors with VG_(maybe_record_error)() or + VG_(unique_error)() for them to be counted. */ +#define VALGRIND_COUNT_ERRORS \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + 0 /* default return */, \ + VG_USERREQ__COUNT_ERRORS, \ + 0, 0, 0, 0, 0) + +/* Several Valgrind tools (Memcheck, Massif, Helgrind, DRD) rely on knowing + when heap blocks are allocated in order to give accurate results. This + happens automatically for the standard allocator functions such as + malloc(), calloc(), realloc(), memalign(), new, new[], free(), delete, + delete[], etc. + + But if your program uses a custom allocator, this doesn't automatically + happen, and Valgrind will not do as well. For example, if you allocate + superblocks with mmap() and then allocates chunks of the superblocks, all + Valgrind's observations will be at the mmap() level and it won't know that + the chunks should be considered separate entities. In Memcheck's case, + that means you probably won't get heap block overrun detection (because + there won't be redzones marked as unaddressable) and you definitely won't + get any leak detection. + + The following client requests allow a custom allocator to be annotated so + that it can be handled accurately by Valgrind. + + VALGRIND_MALLOCLIKE_BLOCK marks a region of memory as having been allocated + by a malloc()-like function. For Memcheck (an illustrative case), this + does two things: + + - It records that the block has been allocated. This means any addresses + within the block mentioned in error messages will be + identified as belonging to the block. It also means that if the block + isn't freed it will be detected by the leak checker. + + - It marks the block as being addressable and undefined (if 'is_zeroed' is + not set), or addressable and defined (if 'is_zeroed' is set). This + controls how accesses to the block by the program are handled. + + 'addr' is the start of the usable block (ie. after any + redzone), 'sizeB' is its size. 'rzB' is the redzone size if the allocator + can apply redzones -- these are blocks of padding at the start and end of + each block. Adding redzones is recommended as it makes it much more likely + Valgrind will spot block overruns. `is_zeroed' indicates if the memory is + zeroed (or filled with another predictable value), as is the case for + calloc(). + + VALGRIND_MALLOCLIKE_BLOCK should be put immediately after the point where a + heap block -- that will be used by the client program -- is allocated. + It's best to put it at the outermost level of the allocator if possible; + for example, if you have a function my_alloc() which calls + internal_alloc(), and the client request is put inside internal_alloc(), + stack traces relating to the heap block will contain entries for both + my_alloc() and internal_alloc(), which is probably not what you want. + + For Memcheck users: if you use VALGRIND_MALLOCLIKE_BLOCK to carve out + custom blocks from within a heap block, B, that has been allocated with + malloc/calloc/new/etc, then block B will be *ignored* during leak-checking + -- the custom blocks will take precedence. + + VALGRIND_FREELIKE_BLOCK is the partner to VALGRIND_MALLOCLIKE_BLOCK. For + Memcheck, it does two things: + + - It records that the block has been deallocated. This assumes that the + block was annotated as having been allocated via + VALGRIND_MALLOCLIKE_BLOCK. Otherwise, an error will be issued. + + - It marks the block as being unaddressable. + + VALGRIND_FREELIKE_BLOCK should be put immediately after the point where a + heap block is deallocated. + + VALGRIND_RESIZEINPLACE_BLOCK informs a tool about reallocation. For + Memcheck, it does four things: + + - It records that the size of a block has been changed. This assumes that + the block was annotated as having been allocated via + VALGRIND_MALLOCLIKE_BLOCK. Otherwise, an error will be issued. + + - If the block shrunk, it marks the freed memory as being unaddressable. + + - If the block grew, it marks the new area as undefined and defines a red + zone past the end of the new block. + + - The V-bits of the overlap between the old and the new block are preserved. + + VALGRIND_RESIZEINPLACE_BLOCK should be put after allocation of the new block + and before deallocation of the old block. + + In many cases, these three client requests will not be enough to get your + allocator working well with Memcheck. More specifically, if your allocator + writes to freed blocks in any way then a VALGRIND_MAKE_MEM_UNDEFINED call + will be necessary to mark the memory as addressable just before the zeroing + occurs, otherwise you'll get a lot of invalid write errors. For example, + you'll need to do this if your allocator recycles freed blocks, but it + zeroes them before handing them back out (via VALGRIND_MALLOCLIKE_BLOCK). + Alternatively, if your allocator reuses freed blocks for allocator-internal + data structures, VALGRIND_MAKE_MEM_UNDEFINED calls will also be necessary. + + Really, what's happening is a blurring of the lines between the client + program and the allocator... after VALGRIND_FREELIKE_BLOCK is called, the + memory should be considered unaddressable to the client program, but the + allocator knows more than the rest of the client program and so may be able + to safely access it. Extra client requests are necessary for Valgrind to + understand the distinction between the allocator and the rest of the + program. + + Ignored if addr == 0. +*/ +#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MALLOCLIKE_BLOCK, \ + addr, sizeB, rzB, is_zeroed, 0) + +/* See the comment for VALGRIND_MALLOCLIKE_BLOCK for details. + Ignored if addr == 0. +*/ +#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__RESIZEINPLACE_BLOCK, \ + addr, oldSizeB, newSizeB, rzB, 0) + +/* See the comment for VALGRIND_MALLOCLIKE_BLOCK for details. + Ignored if addr == 0. +*/ +#define VALGRIND_FREELIKE_BLOCK(addr, rzB) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__FREELIKE_BLOCK, \ + addr, rzB, 0, 0, 0) + +/* Create a memory pool. */ +#define VALGRIND_CREATE_MEMPOOL(pool, rzB, is_zeroed) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__CREATE_MEMPOOL, \ + pool, rzB, is_zeroed, 0, 0) + +/* Destroy a memory pool. */ +#define VALGRIND_DESTROY_MEMPOOL(pool) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DESTROY_MEMPOOL, \ + pool, 0, 0, 0, 0) + +/* Associate a piece of memory with a memory pool. */ +#define VALGRIND_MEMPOOL_ALLOC(pool, addr, size) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MEMPOOL_ALLOC, \ + pool, addr, size, 0, 0) + +/* Disassociate a piece of memory from a memory pool. */ +#define VALGRIND_MEMPOOL_FREE(pool, addr) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MEMPOOL_FREE, \ + pool, addr, 0, 0, 0) + +/* Disassociate any pieces outside a particular range. */ +#define VALGRIND_MEMPOOL_TRIM(pool, addr, size) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MEMPOOL_TRIM, \ + pool, addr, size, 0, 0) + +/* Resize and/or move a piece associated with a memory pool. */ +#define VALGRIND_MOVE_MEMPOOL(poolA, poolB) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MOVE_MEMPOOL, \ + poolA, poolB, 0, 0, 0) + +/* Resize and/or move a piece associated with a memory pool. */ +#define VALGRIND_MEMPOOL_CHANGE(pool, addrA, addrB, size) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MEMPOOL_CHANGE, \ + pool, addrA, addrB, size, 0) + +/* Return 1 if a mempool exists, else 0. */ +#define VALGRIND_MEMPOOL_EXISTS(pool) \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + VG_USERREQ__MEMPOOL_EXISTS, \ + pool, 0, 0, 0, 0) + +/* Mark a piece of memory as being a stack. Returns a stack id. */ +#define VALGRIND_STACK_REGISTER(start, end) \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + VG_USERREQ__STACK_REGISTER, \ + start, end, 0, 0, 0) + +/* Unmark the piece of memory associated with a stack id as being a + stack. */ +#define VALGRIND_STACK_DEREGISTER(id) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__STACK_DEREGISTER, \ + id, 0, 0, 0, 0) + +/* Change the start and end address of the stack id. */ +#define VALGRIND_STACK_CHANGE(id, start, end) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__STACK_CHANGE, \ + id, start, end, 0, 0) + +/* Load PDB debug info for Wine PE image_map. */ +#define VALGRIND_LOAD_PDB_DEBUGINFO(fd, ptr, total_size, delta) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__LOAD_PDB_DEBUGINFO, \ + fd, ptr, total_size, delta, 0) + +/* Map a code address to a source file name and line number. buf64 + must point to a 64-byte buffer in the caller's address space. The + result will be dumped in there and is guaranteed to be zero + terminated. If no info is found, the first byte is set to zero. */ +#define VALGRIND_MAP_IP_TO_SRCLOC(addr, buf64) \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + VG_USERREQ__MAP_IP_TO_SRCLOC, \ + addr, buf64, 0, 0, 0) + +/* Disable error reporting for this thread. Behaves in a stack like + way, so you can safely call this multiple times provided that + VALGRIND_ENABLE_ERROR_REPORTING is called the same number of times + to re-enable reporting. The first call of this macro disables + reporting. Subsequent calls have no effect except to increase the + number of VALGRIND_ENABLE_ERROR_REPORTING calls needed to re-enable + reporting. Child threads do not inherit this setting from their + parents -- they are always created with reporting enabled. */ +#define VALGRIND_DISABLE_ERROR_REPORTING \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__CHANGE_ERR_DISABLEMENT, \ + 1, 0, 0, 0, 0) + +/* Re-enable error reporting, as per comments on + VALGRIND_DISABLE_ERROR_REPORTING. */ +#define VALGRIND_ENABLE_ERROR_REPORTING \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__CHANGE_ERR_DISABLEMENT, \ + -1, 0, 0, 0, 0) + +#undef PLAT_x86_darwin +#undef PLAT_amd64_darwin +#undef PLAT_x86_win32 +#undef PLAT_x86_linux +#undef PLAT_amd64_linux +#undef PLAT_ppc32_linux +#undef PLAT_ppc64_linux +#undef PLAT_arm_linux +#undef PLAT_s390x_linux + +#endif /* __VALGRIND_H */