1
0
mirror of https://github.com/safiire/n65.git synced 2025-01-24 17:31:38 +00:00

This is s a big rewrite including: A scoped symbol table, segment and

bank management, Use of promises to resolve symbols that are used before
they are defined.  A base class for all instructions and assembler
directives.  Hopefully my scoped symbols can be used to create C like
data structures in the zero page, ie sprite.x   New code to prodce the
final ROM.  Basically everything was rewritten.
This commit is contained in:
Safiire 2015-03-04 09:58:50 -08:00
parent 4235742c7d
commit 2c938f7312
26 changed files with 1047 additions and 703 deletions

1
.gitignore vendored
View File

@ -1,4 +1,5 @@
*.nes
*.nes.yaml
.DS_Store
Desktop.ini
Thumbs.db

View File

@ -4,10 +4,8 @@
##
## Usage: ./assembler_6502.rb <infile.asm>
##
## This the front end of the Assembler, just processes commandline arguments
## and passes them to the actual assembler.
## This file runs the assembler though the commandline frontend.
require_relative 'lib/assembler'
require_relative 'lib/front_end'
Assembler6502::FrontEnd.new(ARGV).run

View File

@ -12,7 +12,7 @@
;;;;
; Here is a good spot to associate zero page memory addresses
; with quick access variables in the program.
.segment prog 0
.org $0200
sprite:
@ -395,6 +395,7 @@ bg:
; This is CHR-ROM page 1, which starts at 0x0000, but I'm skipping the first bit because
; the first bunch of ASCII characters are not represented. This is the commodore 64's
; character ROM.
.segment char 0
.org $0200
.bytes $00,$00,$00,$00,$00,$00,$00,$00,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF ; Character 32

View File

@ -1,180 +1,200 @@
require_relative 'module_functions'
require_relative 'opcodes'
require_relative 'memory'
require_relative 'directive'
require_relative 'instruction'
require_relative 'label'
require_relative 'symbol_table'
require_relative 'memory_space'
require_relative 'parser'
module Assembler6502
####
## The Main Assembler
class Assembler
attr_reader :program_counter, :current_segment, :current_bank, :symbol_table, :virtual_memory, :promises
## Custom exceptions
class INESHeaderNotFound < StandardError; end
class MapperNotSupported < StandardError; end
##### Custom exceptions
class AddressOutOfRange < StandardError; end
class InvalidSegment < StandardError; end
class WriteOutOfBounds < StandardError; end
class INESHeaderAlreadySet < StandardError; end
class FileNotFound < StandardError; end
####
## Assemble from an asm file to a nes ROM
def self.from_file(infile, outfile)
assembler = self.new(File.read(infile))
byte_array = assembler.assemble
fail(FileNotFound, infile) unless File.exists?(infile)
assembler = self.new
program = File.read(infile)
puts "Building #{infile}"
## Process each line in the file
program.split(/\n/).each do |line|
assembler.assemble_one_line(line)
print '.'
end
puts
## Second pass to resolve any missing symbols.
print "Second pass, resolving symbols..."
assembler.fulfill_promises
puts " Done."
## Let's export the symbol table to a file
print "Writing symbol table to #{outfile}.yaml..."
File.open("#{outfile}.yaml", 'w') do |fp|
fp.write(assembler.symbol_table.export_to_yaml)
end
puts "Done."
## For right now, let's just emit the first prog bank
File.open(outfile, 'w') do |fp|
fp.write(byte_array.pack('C*'))
fp.write(assembler.emit_binary_rom)
end
puts "All Done :)"
end
####
## Instantiate a new Assembler with a full asm
## file as given in a string.
def initialize(assembly_code)
## Initialize with a bank 1 of prog space for starters
def initialize
@ines_header = nil
@assembly_code = assembly_code
@program_counter = 0x0
@current_segment = :prog
@current_bank = 0x0
@symbol_table = SymbolTable.new
@promises = []
@virtual_memory = {
:prog => [MemorySpace.create_prog_rom],
:char => []
}
end
####
## New ROM assembly, this is so simplified, and needs to take banks into account
## This will happen once I fully understand mappers and banks.
def assemble
## Assemble into a virtual memory space
virtual_memory = assemble_in_virtual_memory
## This is the main assemble method, it parses one line into an object
## which when given a reference to this assembler, controls the assembler
## itself through public methods, executing assembler directives, and
## emitting bytes into our virtual memory spaces. Empty lines or lines
## with only comments parse to nil, and we just ignore them.
def assemble_one_line(line)
parsed_object = Parser.parse(line)
## First we need to be sure we have an iNES header
fail(MapperNotSupported, "Mapper #{@ines_header.mapper} not supported") if @ines_header.mapper != 0
unless parsed_object.nil?
exec_result = parsed_object.exec(self)
## First we need to be sure we have an iNES header
fail(INESHeaderNotFound) if @ines_header.nil?
## If we have returned a promise save it for the second pass
@promises << exec_result if exec_result.kind_of?(Proc)
end
end
## Now we want to create a ROM layout for PROG
## This is simplified and only holds max two PROG entries
prog_rom = MemorySpace.new(@ines_header.prog * MemorySpace::ProgROMSize)
case @ines_header.prog
when 0
fail("You must have at least one PROG section")
exit(1)
when 1
prog_rom.write(0x0, virtual_memory.read(0xc000, MemorySpace::ProgROMSize))
when 2
prog_rom.write(0x0, virtual_memory.read(0x8000, MemorySpace::ProgROMSize))
prog_rom.write(MemorySpace::ProgROMSize, virtual_memory.read(0xC000, MemorySpace::ProgROMSize))
else
fail("I can't support more than 2 PROG sections")
exit(1)
####
## This will empty out our promise queue and try to fullfil operations
## that required an undefined symbol when first encountered.
def fulfill_promises
while promise = @promises.pop
promise.call
end
end
####
## Write to memory space. Typically, we are going to want to write
## to the location of the current PC, current segment, and current bank.
## Bounds check is inside MemorySpace#write
def write_memory(bytes, pc = @program_counter, segment = @current_segment, bank = @current_bank)
memory_space = get_virtual_memory_space(segment, bank)
memory_space.write(pc, bytes)
@program_counter += bytes.size
end
####
## Set the iNES header
def set_ines_header(ines_header)
fail(INESHeaderAlreadySet) unless @ines_header.nil?
@ines_header = ines_header
end
####
## Set the program counter
def program_counter=(address)
fail(AddressOutOfRange) unless address_within_range?(address)
@program_counter = address
end
####
## Set the current segment, prog or char.
def current_segment=(segment)
segment = segment.to_sym
unless valid_segment?(segment)
fail(InvalidSegment, "#{segment} is not a valid segment. Try prog or char")
end
@current_segment = segment
end
####
## Set the current bank, create it if it does not exist
def current_bank=(bank_number)
memory_space = get_virtual_memory_space(@current_segment, bank_number)
if memory_space.nil?
@virtual_memory[@current_segment][bank_number] = MemorySpace.create_bank(@current_segment)
end
@current_bank = bank_number
end
####
## Emit a binary ROM
def emit_binary_rom
progs = @virtual_memory[:prog]
chars = @virtual_memory[:char]
puts "iNES Header"
puts "+ #{progs.size} PROG ROM bank#{progs.size != 1 ? 's' : ''}"
puts "+ #{chars.size} CHAR ROM bank#{chars.size != 1 ? 's' : ''}"
rom_size = 0x10
rom_size += MemorySpace::BankSizes[:prog] * progs.size
rom_size += MemorySpace::BankSizes[:char] * chars.size
puts "= Output ROM will be #{rom_size} bytes"
rom = MemorySpace.new(rom_size, :rom)
offset = 0x0
offset += rom.write(0x0, @ines_header.emit_bytes)
progs.each do |prog|
offset += rom.write(offset, prog.read(0x8000, MemorySpace::BankSizes[:prog]))
end
## Now we want to create a ROM layout for CHAR
## This is simplified and only holds max two CHAR entries
char_rom = MemorySpace.new(@ines_header.char * MemorySpace::CharROMSize)
case @ines_header.char
when 0
when 1
char_rom.write(0x0, virtual_memory.read(0x0000, MemorySpace::CharROMSize))
when 2
char_rom.write(0x0, virtual_memory.read(0x0000, MemorySpace::CharROMSize))
char_rom.write(MemorySpace::CharROMSize, virtual_memory.read(0x2000, MemorySpace::CharROMSize))
else
fail("I can't support more than 2 CHAR sections")
exit(1)
end
if @ines_header.char.zero?
@ines_header.emit_bytes + prog_rom.emit_bytes
else
@ines_header.emit_bytes + prog_rom.emit_bytes + char_rom.emit_bytes
chars.each do |char|
offset += rom.write(offset, char.read(0x0, MemorySpace::BankSizes[:char]))
end
rom.emit_bytes.pack('C*')
end
private
####
## Run the assembly process into a virtual memory object
def assemble_in_virtual_memory
address = 0x0
labels = {}
memory = MemorySpace.new
unresolved_instructions = []
## Get virtual memory space
def get_virtual_memory_space(segment, bank_number)
@virtual_memory[segment][bank_number]
end
puts "Assembling, first pass..."
@assembly_code.split(/\n/).each do |raw_line|
sanitized = Assembler6502.sanitize_line(raw_line)
next if sanitized.empty?
parsed_line = Assembler6502::Instruction.parse(sanitized, address)
case parsed_line
when INESHeader
fail(SyntaxError, "Already got ines header") unless @ines_header.nil?
@ines_header = parsed_line
puts "\tGot iNES Header"
when Org
address = parsed_line.address
puts "\tMoving to address: $%X" % address
####
## Is this a 16-bit address within range?
def address_within_range?(address)
address >= 0 && address < 2**16
end
when Label
puts "\tLabel #{parsed_line.label} = $%X" % parsed_line.address
labels[parsed_line.label.to_sym] = parsed_line
when Instruction
if parsed_line.unresolved_symbols?
puts "\tSaving instruction with unresolved symbols #{parsed_line}, for second pass"
unresolved_instructions << parsed_line
else
puts "\tWriting instruction #{parsed_line}"
memory.write(parsed_line.address, parsed_line.emit_bytes)
end
address += parsed_line.length
when IncBin
puts "\t Including binary file #{parsed_line.filepath}"
memory.write(parsed_line.address, parsed_line.emit_bytes)
address += parsed_line.size
when DW
if parsed_line.unresolved_symbols?
puts "\tSaving #{parsed_line} directive with unresolved symbols, for second pass"
unresolved_instructions << parsed_line
else
puts "\tWriting #{parsed_line} to memory"
memory.write(address, parsed_line.emit_bytes)
end
address += 2
when Bytes
bytes = parsed_line.emit_bytes
puts "\tWriting raw #{bytes.size} bytes to #{sprintf("$%X", address)}"
memory.write(address, bytes)
address += bytes.size
when ASCII
bytes = parsed_line.emit_bytes
puts "\tWriting ascii string to memory \"#{bytes.pack('C*')}\""
memory.write(address, bytes)
address += bytes.size
else
fail(SyntaxError, sprintf("%.4X: Failed to parse: #{parsed_line}", address))
end
end
puts "Second pass: Resolving Symbols..."
unresolved_instructions.each do |instruction|
if instruction.unresolved_symbols?
instruction.resolve_symbols(labels)
end
puts "\tResolved #{instruction}"
memory.write(instruction.address, instruction.emit_bytes)
end
puts 'Done'
memory
####
## Is this a valid segment?
def valid_segment?(segment)
[:prog, :char].include?(segment)
end
end

View File

@ -1,192 +0,0 @@
require 'json'
module Assembler6502
####
## This class can setup an iNES Header
class INESHeader
attr_reader :prog, :char, :mapper, :mirror
####
## Construct with the right values
def initialize(prog = 0x1, char = 0x0, mapper = 0x0, mirror = 0x1)
@prog, @char, @mapper, @mirror = prog, char, mapper, mirror
end
####
## What will the size of the ROM binary be?
def rom_size
size = 0x10 # Always have a 16 byte header
size += 0x4000 * @prog # 16KB per PROG-ROM
size += 0x2000 * @char # 8KB per CHR_ROM
size
end
####
## Emit the header bytes, this is not exactly right, but it works for now.
def emit_bytes
[0x4E, 0x45, 0x53, 0x1a, @prog, @char, @mapper, @mirror, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0]
end
end
####
## This is an .org directive
class Org
attr_reader :address
####
## Initialized with start address
def initialize(address)
@address = address
end
end
####
## This is to include a binary file
class IncBin
attr_reader :address, :filepath
class FileNotFound < StandardError; end
####
## Initialize with a file path
def initialize(filepath, address)
@filepath = filepath
@address = address
unless File.exists?(filepath)
fail(FileNotFound, ".incbin can't find #{filepath}")
end
@data = File.read(filepath).unpack('C*')
end
####
## What is the size of the read data?
def size
@data.size
end
####
## Emit bytes
def emit_bytes
@data
end
end
####
## Data Word
class DW
attr_reader :address
class WordTooLarge < StandardError; end
def initialize(value, address)
@value = value
@address = address
end
def unresolved_symbols?
@value.kind_of?(Symbol)
end
def resolve_symbols(labels)
if unresolved_symbols? && labels[@value] != nil
@value = labels[@value].address
end
end
def to_s
if @value.kind_of?(Symbol)
sprintf("$%.4X | .dw #{@value}", @address)
else
sprintf("$%.4X | .dw $%.4X", @address, @value)
end
end
def emit_bytes
fail('Need to resolve symbol in .dw directive') if unresolved_symbols?
[@value & 0xFFFF].pack('S').bytes
end
end
####
## Just a bunch of bytes
class Bytes
def initialize(bytes)
@bytes = bytes.split(',').map do |byte_string|
number = byte_string.gsub('$', '')
integer = number.to_i(16)
fail(SyntaxError, "#{integer} is too large for one byte") if integer > 0xff
integer
end
end
def emit_bytes
@bytes
end
end
####
## This inserts ASCII text straight into the ROM
class ASCII
def initialize(string)
@string = string
end
def emit_bytes
@string.bytes
end
end
####
## This parses an assembler directive
class Directive
####
## This will return a new Directive, or nil if it is something else.
def self.parse(directive_line, address)
sanitized = Assembler6502.sanitize_line(directive_line)
case sanitized
when /^\.ines (.+)$/
header = JSON.parse($1)
INESHeader.new(header['prog'], header['char'], header['mapper'], header['mirror'])
when /^\.org\s+\$([0-9A-F]{4})$/
Org.new($1.to_i(16))
when /^\.incbin "([^"]+)"$/
IncBin.new($1, address)
when /^\.dw\s+\$([0-9A-F]{1,4})$/
DW.new($1.to_i(16), address)
when /^\.dw\s+([A-Za-z_][A-Za-z0-9_]+)/
DW.new($1.to_sym, address)
when /^\.ascii\s+"([^"]+)"$/
ASCII.new($1)
when /^\.bytes\s+(.+)$/
Bytes.new($1)
when /^\./
fail(SyntaxError, "Syntax Error in Directive '#{sanitized}'")
end
end
end
end

42
lib/directives/ascii.rb Normal file
View File

@ -0,0 +1,42 @@
require_relative '../instruction_base'
module Assembler6502
####
## This directive to include bytes
class ASCII < InstructionBase
####
## Try to parse an incbin directive
def self.parse(line)
match_data = line.match(/^\.ascii\s+"([^"]+)"$/)
return nil if match_data.nil?
ASCII.new($1)
end
####
## Initialize with filename
def initialize(string)
@string = string
end
####
## Execute on the assembler
def exec(assembler)
assembler.write_memory(@string.bytes)
end
####
## Display
def to_s
".ascii \"#{@string}\""
end
end
end

49
lib/directives/bytes.rb Normal file
View File

@ -0,0 +1,49 @@
require_relative '../instruction_base'
module Assembler6502
####
## This directive to include bytes
class Bytes < InstructionBase
####
## Try to parse an incbin directive
def self.parse(line)
match_data = line.match(/^\.bytes\s+(.+)$/)
return nil if match_data.nil?
bytes_array = match_data[1].split(',').map do |byte_string|
number = byte_string.gsub('$', '')
integer = number.to_i(16)
fail(SyntaxError, "#{integer} is too large for one byte") if integer > 0xff
integer
end
Bytes.new(bytes_array)
end
####
## Initialize with filename
def initialize(bytes_array)
@bytes_array = bytes_array
end
####
## Execute on the assembler
def exec(assembler)
assembler.write_memory(@bytes_array)
end
####
## Display, I don't want to write all these out
def to_s
".bytes (#{@bytes_array.length})"
end
end
end

91
lib/directives/dw.rb Normal file
View File

@ -0,0 +1,91 @@
require_relative '../instruction_base'
module Assembler6502
####
## This directive instruction can include a binary file
class DW < InstructionBase
####
## Try to parse a dw directive
def self.parse(line)
## Maybe it is a straight up bit of hex
match_data = line.match(/^\.dw\s+\$([0-9A-F]{1,4})$/)
unless match_data.nil?
word = match_data[1].to_i(16)
return DW.new(word)
end
## Or maybe it points to a symbol
match_data = line.match(/^\.dw\s+([A-Za-z_][A-Za-z0-9_\.]+)/)
unless match_data.nil?
symbol = match_data[1]
return DW.new(symbol)
end
nil
end
####
## Initialize with filename
def initialize(value)
@value = value
end
####
## Execute on the assembler, now in this case value may
## be a symbol that needs to be resolved, if so we return
## a lambda which can be executed later, with the promise
## that that symbol will have then be defined
## This is a little complicated, I admit.
def exec(assembler)
## Save these current values into the closure
pc = assembler.program_counter
segment = assembler.current_segment
bank = assembler.current_bank
## Create a promise, if this symbol is not defined yet.
promise = lambda do
value = assembler.symbol_table.resolve_symbol(@value)
bytes = [value & 0xFFFF].pack('S').bytes
assembler.write_memory(bytes, pc, segment, bank)
end
## Try to execute it now, or setup the promise to return
case @value
when Fixnum
bytes = [@value & 0xFFFF].pack('S').bytes
assembler.write_memory(bytes)
when String
begin
promise.call
rescue SymbolTable::UndefinedSymbol
## Must still advance PC before returning promise, so we'll write
## a place holder value of 0xDEAD
assembler.write_memory([0xDE, 0xAD], pc, segment, bank)
return promise
end
else
fail("Uknown argument in .dw directive")
end
end
####
## Display
def to_s
case @value
when String
".dw #{@value}"
when Fixnum
".dw $%4.X" % @value
end
end
end
end

51
lib/directives/incbin.rb Normal file
View File

@ -0,0 +1,51 @@
require_relative '../instruction_base'
module Assembler6502
####
## This directive instruction can include a binary file
class IncBin < InstructionBase
#### Custom Exceptions
class FileNotFound < StandardError; end
####
## Try to parse an incbin directive
def self.parse(line)
match_data = line.match(/^\.incbin "([^"]+)"$/)
return nil if match_data.nil?
filename = match_data[1]
IncBin.new(filename)
end
####
## Initialize with filename
def initialize(filename)
@filename = filename
end
####
## Execute on the assembler
def exec(assembler)
unless File.exists?(@filename)
fail(FileNotFound, ".incbin can't find #{@filename}")
end
data = File.read(@filename).unpack('C*')
assembler.write_memory(data)
end
####
## Display
def to_s
".incbin \"#{@filename}\""
end
end
end

View File

@ -0,0 +1,53 @@
require 'json'
require_relative '../instruction_base'
module Assembler6502
####
## This directive instruction can setup an ines header
class INESHeader < InstructionBase
attr_reader :prog, :char, :mapper, :mirror
####
## Implementation of the parser for this directive
def self.parse(line)
match_data = line.match(/^\.ines (.+)$/)
return nil if match_data.nil?
header = JSON.parse(match_data[1])
INESHeader.new(header['prog'], header['char'], header['mapper'], header['mirror'])
end
####
## Construct a header
def initialize(prog, char, mapper, mirror)
@prog, @char, @mapper, @mirror = prog, char, mapper, mirror
end
####
## Exec function the assembler will call
def exec(assembler)
assembler.set_ines_header(self)
end
####
## Emit the header bytes, this is not exactly right, but it works for now.
def emit_bytes
[0x4E, 0x45, 0x53, 0x1a, @prog, @char, @mapper, @mirror, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0]
end
####
## Display
def to_s
".ines {\"prog\": #{@prog}, \"char\": #{@char}, \"mapper\": #{@mapper}, \"mirror\": #{@mirror}}"
end
end
end

46
lib/directives/label.rb Normal file
View File

@ -0,0 +1,46 @@
module Assembler6502
####
## This class represents a label, and will create
## an entry in the symbol table associated with
## the address it appears at.
class Label
####
## Try to parse as a label
def self.parse(line)
match_data = line.match(/^([a-zA-Z][a-zA-Z0-9_]+):$/)
unless match_data.nil?
label = match_data[1].to_sym
return self.new(label)
end
nil
end
####
## Create a new label object
def initialize(symbol)
@symbol = symbol
end
####
## Create an entry in the symbol table for this label
def exec(assembler)
program_counter = assembler.program_counter
assembler.symbol_table.define_symbol(@symbol, program_counter)
end
####
## Display
def to_s
"#{@symbol}:"
end
end
end

47
lib/directives/org.rb Normal file
View File

@ -0,0 +1,47 @@
require_relative '../instruction_base'
module Assembler6502
####
## This is an .org directive
class Org < InstructionBase
attr_reader :address
####
## Try to parse an .org statement
def self.parse(line)
match_data = line.match(/^\.org\s+\$([0-9A-Fa-f]{4})$/)
return nil if match_data.nil?
address = match_data[1].to_i(16)
Org.new(address)
end
####
## Initialized with address to switch to
def initialize(address)
@address = address
end
####
## Exec this directive on the assembler
def exec(assembler)
assembler.program_counter = address
end
####
## Display
def to_s
if @address <= 0xff
".org $%2.X" % @address
else
".org $%4.X" % @address
end
end
end
end

45
lib/directives/segment.rb Normal file
View File

@ -0,0 +1,45 @@
require_relative '../instruction_base'
module Assembler6502
####
## This directive instruction can include a binary file
class Segment < InstructionBase
####
## Try to parse a dw directive
def self.parse(line)
match_data = line.match(/^.segment (prog|char) (\d+)$/i)
unless match_data.nil?
_, segment, bank = match_data.to_a
return Segment.new(segment, bank.to_i)
end
nil
end
####
## Initialize with filename
def initialize(segment, bank)
@bank = bank
@segment = segment
end
####
## Execute the segment and bank change on the assembler
def exec(assembler)
assembler.current_segment = @segment
assembler.current_bank = @bank
end
####
## Display
def to_s
".segment #{@segment} #{@bank}"
end
end
end

View File

@ -1,14 +0,0 @@
module Assembler6502
####
## This is a base class for anything which can emit bytes
class EmitsBytes
def emit_bytes
fail(NotImplementedError, "#{self.class} must implement emit_bytes")
end
end
end

View File

@ -1,4 +1,5 @@
require 'optparse'
require_relative 'assembler'
module Assembler6502
@ -53,16 +54,16 @@ module Assembler6502
exit(1)
end
begin
#begin
Assembler6502::Assembler.from_file(input_file, @options[:output_file])
rescue StandardError => error
STDERR.puts("Assemble Failed!")
STDERR.puts(error.class)
if error.message
STDERR.puts(error.message)
end
exit(1)
end
#rescue StandardError => error
#STDERR.puts("Assemble Failed!")
#STDERR.puts(error.class)
#if error.message
#STDERR.puts(error.message)
#end
#exit(1)
#end
end
private

View File

@ -1,3 +1,4 @@
require_relative 'opcodes'
module Assembler6502
@ -16,7 +17,7 @@ module Assembler6502
Hex8 = '\$([A-Fa-f0-9]{2})'
Hex16 = '\$([A-Fa-f0-9]{4})'
Immediate = '\#\$([0-9A-F]{2})'
Sym = '([a-zZ-Z_][a-zA-Z0-9_]+)'
Sym = '([a-zZ-Z_][a-zA-Z0-9_\.]+)'
Branches = '(BPL|BMI|BVC|BVS|BCC|BCS|BNE|BEQ|bpl|bmi|bvc|bvs|bcc|bcs|bne|beq)'
XReg = '[Xx]'
YReg = '[Yy]'
@ -105,48 +106,34 @@ module Assembler6502
####
## Parse one line of assembly, returns nil if the line
## is ultimately empty of instructions or labels
## is ultimately empty of asm instructions
## Raises SyntaxError if the line is malformed in some way
def self.parse(asm_line, address)
def self.parse(line)
## First, sanitize the line, which removes whitespace, and comments.
sanitized = Assembler6502.sanitize_line(asm_line)
## Empty lines assemble to nothing
return nil if sanitized.empty?
## Let's see if this line is an assembler directive
directive = Directive.parse(sanitized, address)
return directive unless directive.nil?
## Let's see if this line is a label, and try
## to create a label for the current address
label = Label.parse_label(sanitized, address)
return label unless label.nil?
## We must have some asm, so try to parse it in each addressing mode
## Try to parse this line in each addressing mode
AddressingModes.each do |mode, parse_info|
## We have regexes that match each addressing mode
match_data = parse_info[:regex].match(sanitized)
match_data = parse_info[:regex].match(line)
unless match_data.nil?
## We must have a straight instruction without labels, construct
## We must have a straight instruction without symbols, construct
## an Instruction from the match_data, and return it
_, op, arg = match_data.to_a
return Instruction.new(op, arg, mode, address)
arg = arg.to_i(16) unless arg.nil?
return Instruction.new(op, arg, mode)
else
## Can this addressing mode even use labels?
unless parse_info[:regex_label].nil?
## See if it does in fact have a label/symbolic argument
match_data = parse_info[:regex_label].match(sanitized)
## See if it does in fact have a symbolic argument
match_data = parse_info[:regex_label].match(line)
unless match_data.nil?
## Yep, the arg is a label, we can resolve that to an address later
## But for now we will create an Instruction where the label is a
## symbol reference to the label we found, ie. arg.to_sym
## We have found an assembly instruction containing a symbolic
## argument. We can resolve this symbol later by looking at the
## symbol table in the #exec method
match_array = match_data.to_a
## If we have a 4 element array, this means we matched something
@ -155,11 +142,11 @@ module Assembler6502
## Instruction, by passing an extra argument
if match_array.size == 4
_, op, byte_selector, arg = match_array
return Instruction.new(op, arg.to_sym, mode, address, byte_selector.to_sym)
return Instruction.new(op, arg, mode, byte_selector.to_sym)
puts "I found one with #{byte_selector} #{arg}"
else
_, op, arg = match_array
return Instruction.new(op, arg.to_sym, mode, address)
return Instruction.new(op, arg, mode)
end
end
end
@ -167,7 +154,7 @@ module Assembler6502
end
## We just don't recognize this line of asm, it must be a Syntax Error
fail(SyntaxError, sprintf("%.4X: ", address) + asm_line)
fail(SyntaxError, line)
end
@ -175,56 +162,105 @@ module Assembler6502
## Create an instruction. Having the instruction op a downcased symbol is nice
## because that can later be used to index into our opcodes hash in OpCodes
## OpCodes contains the definitions of each OpCode
def initialize(op, arg, mode, address, byte_selector = nil)
def initialize(op, arg, mode, byte_selector = nil)
## Lookup the definition of this opcode, otherwise it is an invalid instruction
@byte_selector = byte_selector.nil? ? nil : byte_selector.to_sym
fail(InvalidInstruction, "Bad Byte selector: #{byte_selector}") unless [:>, :<, nil].include?(@byte_selector)
@op = op.downcase.to_sym
definition = OpCodes[@op]
fail(InvalidInstruction, op) if definition.nil?
@arg = arg
## Be sure the mode is an actually supported mode.
@mode = mode.to_sym
fail(InvalidAddressingMode, mode) unless AddressingModes.has_key?(@mode)
## Make sure the address is in range
if address < 0x0 || address > 0xFFFF
fail(AddressOutOfRange, address)
end
@address = address
## Argument can either be a symbolic label, a hexidecimal number, or nil.
@arg = case arg
when Symbol then arg
when String
if arg.match(/[0-9A-F]{1,4}/).nil?
fail(SyntaxError, "#{arg} is not a valid hexidecimal number")
else
arg.to_i(16)
end
when nil then nil
else
fail(SyntaxError, "Cannot parse argument #{arg}")
end
if definition[@mode].nil?
fail(InvalidInstruction, "#{op} cannot be used in #{mode} mode")
end
@description, @flags = definition.values_at(:description, :flags)
@hex, @length, @cycles, @boundry_add = definition[@mode].values_at(:hex, :len, :cycles, :boundry_add)
end
####
## Does this instruction have unresolved symbols?
def unresolved_symbols?
@arg.kind_of?(Symbol)
## Execute writes the emitted bytes to virtual memory, and updates PC
## If there is a symbolic argument, we can try to resolve it now, or
## promise to resolve it later.
def exec(assembler)
## Save these current values into the closure/promise
pc = assembler.program_counter
segment = assembler.current_segment
bank = assembler.current_bank
## Create a promise if this symbol is not defined yet.
promise = lambda do
@arg = assembler.symbol_table.resolve_symbol(@arg)
## If the instruction uses a byte selector, we need to apply that.
@arg = apply_byte_selector(@byte_selector, @arg)
## If the instruction is relative we need to work out how far away it is
@arg = @arg - pc - 2 if @mode == :relative
assembler.write_memory(emit_bytes, pc, segment, bank)
end
case @arg
when Fixnum, NilClass
assembler.write_memory(emit_bytes)
when String
begin
promise.call
rescue SymbolTable::UndefinedSymbol
placeholder = [@hex, 0xDE, 0xAD][0...@length]
## I still have to write a placeholder instruction of the right
## length. The promise will come back and resolve the address.
assembler.write_memory(placeholder, pc, segment, bank)
return promise
end
end
end
####
## Apply a byte selector to an argument
def apply_byte_selector(byte_selector, value)
return value if byte_selector.nil?
case byte_selector
when :>
high_byte(value)
when :<
low_byte(value)
end
end
####
## Emit bytes from asm structure
def emit_bytes
case @length
when 1
[@hex]
when 2
[@hex, @arg]
when 3
[@hex] + break_16(@arg)
else
fail("Can't handle instructions > 3 bytes")
end
end
####
## Resolve symbols
=begin
def resolve_symbols(symbols)
if unresolved_symbols?
if symbols[@arg].nil?
@ -257,42 +293,18 @@ module Assembler6502
end
end
end
####
## Emit bytes from asm structure
def emit_bytes
fail(UnresolvedSymbols, "Symbol #{@arg.inspect} needs to be resolved") if unresolved_symbols?
case @length
when 1
[@hex]
when 2
[@hex, @arg]
when 3
[@hex] + break_16(@arg)
else
fail("Can't handle instructions > 3 bytes")
end
end
####
## Hex dump of this instruction
def hexdump
emit_bytes.map{|byte| sprintf("%.2X", byte & 0xFF)}
end
=end
####
## Pretty Print
def to_s
if unresolved_symbols?
display = AddressingModes[@mode][:display]
sprintf("%.4X | %s %s", @address, @op, @arg.to_s)
else
display = AddressingModes[@mode][:display]
sprintf("%.4X | #{display}", @address, @op, @arg)
end
#display = AddressingModes[@mode][:display]
#if @arg.kind_of?(String)
#sprintf("#{display} (#{@mode}, #{@arg})", @op, 0x0)
#else
#sprintf("#{display} (#{@mode})", @op, @arg)
#end
end

29
lib/instruction_base.rb Normal file
View File

@ -0,0 +1,29 @@
module Assembler6502
class InstructionBase
#####
## Sort of a "pure virtual" class method, not really tho.
def self.parse(line)
fail(NotImplementedError, "#{self.class.name} must implement self.parse")
end
####
## Does this instruction have unresolved symbols?
def unresolved_symbols?
false
end
####
## Another method subclasses will be expected to implement
def exec(assembler)
fail(NotImplementedError, "#{self.class.name} must implement exec")
end
end
end

View File

@ -1,63 +0,0 @@
module Assembler6502
####
## Represents a label
class Label
attr_reader :label, :address
def self.parse_label(asm_line, address)
sanitized = Assembler6502.sanitize_line(asm_line)
match_data = sanitized.match(/#{Instruction::Sym}:/)
unless match_data.nil?
_, label = match_data.to_a
self.new(label, address)
else
nil
end
end
####
## Create a label on an address
def initialize(label, address)
@label = label
@address = address
end
####
## Pretty print
def to_s
sprintf("%.4X | #{@label}", @address)
end
####
## Labels take no space
def length
0
end
####
## Emit bytes, (none)
def emit_bytes
[]
end
####
## Mode
def mode
"label"
end
####
## Description
def description
sprintf("Label pointing to $%.4X", @address)
end
end
end

View File

@ -1,49 +0,0 @@
module Assembler6502
####
## Let's use this to simulate a virtual address space, by default
## we simulate the 64KB of addressable space on the NES
class MemorySpace
#### Some constants, the size of PROG and CHAR ROM
INESHeaderSize = 0x10
ProgROMSize = 0x4000
CharROMSize = 0x2000
####
## Create a completely zeroed memory space, 2**16 by default
def initialize(size = 2**16)
@memory = Array.new(size, 0x0)
end
####
## Read from memory
## TODO: This could use some boundry checking
def read(address, count)
@memory[address..(address + count - 1)]
end
####
## Write to memory
## TODO: This could use some boundry checking
def write(address, bytes)
bytes.each_with_index do |byte, index|
@memory[address + index] = byte
end
end
####
## Return the memory as an array of bytes to write to disk
def emit_bytes
@memory
end
end
end

150
lib/memory_space.rb Normal file
View File

@ -0,0 +1,150 @@
module Assembler6502
####
## Let's use this to simulate a virtual address space
## Either a 16kb prog rom or 8kb char rom space.
## It can also be used to create arbitrary sized spaces
## for example to build the final binary ROM in.
class MemorySpace
#### Custom exceptions
class AccessOutsideProgRom < StandardError; end
class AccessOutsideCharRom < StandardError; end
class AccessOutOfBounds < StandardError; end
#### Some constants, the size of PROG and CHAR ROM
BankSizes = {
:ines => 0x10, # 16b
:prog => 0x4000, # 16kb
:char => 0x2000, # 8kb
}
####
## Create a new PROG ROM
def self.create_prog_rom
self.create_bank(:prog)
end
####
## Create a new CHAR ROM
def self.create_char_rom
self.create_bank(:char)
end
####
## Create a new bank
def self.create_bank(type)
self.new(BankSizes[type], type)
end
####
## Create a completely zeroed memory space
def initialize(size, type)
@type = type
@memory = Array.new(size, 0x0)
end
####
## Normalized read from memory
def read(address, count)
from_normalized = normalize_address(address)
to_normalized = normalize_address(address + (count - 1))
ensure_addresses_in_bounds!([from_normalized, to_normalized])
@memory[from_normalized..to_normalized]
end
####
## Normalized write to memory
def write(address, bytes)
from_normalized = normalize_address(address)
to_normalized = normalize_address(address + bytes.size - 1)
ensure_addresses_in_bounds!([from_normalized, to_normalized])
bytes.each_with_index do |byte, index|
@memory[from_normalized + index] = byte
end
bytes.size
end
####
## Return the memory as an array of bytes to write to disk
def emit_bytes
@memory
end
private
####
## Are the given addresses in bounds? If not blow up.
def ensure_addresses_in_bounds!(addresses)
addresses.each do |address|
unless address >= 0 && address < @memory.size
fail(AccessOutOfBounds, sprintf("Address $%.4X is out of bounds in this #{@type} bank"))
end
end
true
end
####
## Since prog rom can be loaded at either 0x8000 or 0xC000
## We should normalize the addresses to fit properly into
## these banks, basically it acts like it is mirroring addresses
## in those segments. Char rom doesn't need this. This will also
## fail if you are accessing outside of the address space.
def normalize_address(address)
case @type
when :prog
if address_inside_prog_rom1?(address)
return address - 0x8000
end
if address_inside_prog_rom2?(address)
return address - 0xC000
end
fail(AccessOutsideProgRom, sprintf("Address $%.4X is outside PROG ROM", address))
when :char
unless address_inside_char_rom?(address)
fail(AccessOutsideCharRom, sprintf("Address $%.4X is outside CHAR ROM", address))
end
address
else
address
end
end
####
## Is this address inside the prog rom 1 area?
def address_inside_prog_rom1?(address)
address >= 0x8000 && address < 0xC000
end
####
## Is this address inside the prog rom 2 area?
def address_inside_prog_rom2?(address)
address >= 0xC000 && address <= 0xffff
end
####
## Is this address inside the char rom area?
def address_inside_char_rom?(address)
address >= 0x0000 && address <= 0x1fff
end
end
end

View File

@ -1,12 +0,0 @@
module Assembler6502
####
## This cleans up a line, removing whitespace and newlines
def sanitize_line(asm_line)
sanitized = asm_line.split(';').first || ""
sanitized.strip.chomp
end
module_function :sanitize_line
end

80
lib/parser.rb Normal file
View File

@ -0,0 +1,80 @@
module Assembler6502
require_relative 'instruction'
require_relative 'directives/ines_header'
require_relative 'directives/org'
require_relative 'directives/segment'
require_relative 'directives/incbin'
require_relative 'directives/dw'
require_relative 'directives/bytes'
require_relative 'directives/ascii'
require_relative 'directives/label'
####
## This class determines what sort of line of code we
## are dealing with, parses one line, and returns an
## object deriving from InstructionBase
class Parser
#### Custom Exceptions
class CannotParse < StandardError; end
Directives = [INESHeader, Org, Segment, IncBin, DW, Bytes, ASCII]
####
## Parses a line of program source into an object
## deriving from base class InstructionBase
def self.parse(line)
sanitized = sanitize_line(line)
return nil if sanitized.empty?
## First check to see if we have a label.
label = Label.parse(sanitized)
unless label.nil?
return label
end
## Now check if we have a directive
directive = parse_directive(sanitized)
unless directive.nil?
return directive
end
## Now, surely it is an asm instruction?
instruction = Instruction.parse(sanitized)
unless instruction.nil?
return instruction
end
## Guess not, we have no idea
fail(CannotParse, sanitized)
end
private
####
## Sanitize one line of program source
def self.sanitize_line(line)
code = line.split(';').first || ""
code.strip.chomp
end
####
## Try to Parse a directive
def self.parse_directive(line)
if line.start_with?('.')
Directives.each do |directive|
object = directive.parse(line)
return object unless object.nil?
end
end
nil
end
end
end

View File

@ -78,6 +78,17 @@ module Assembler6502
value
end
####
## Export the symbol table as YAML
def export_to_yaml
@symbols.to_yaml.gsub(/(\d+)$/) do |match|
integer = match.to_i
sprintf("0x%.4X", integer)
end
end
private
####

View File

@ -1,135 +0,0 @@
gem 'minitest'
require 'minitest/autorun'
require 'minitest/unit'
require_relative '../lib/assembler.rb'
class TestAssembler < MiniTest::Test
def setup
## Remember the modes which can use 16-bit absolute labels are:
## - absolute
## - absolute_x
## - absolute_y
## The JMP instruction can use 16-bit labels
## - absolute
## - indirect (it is the only indirect instruction)
##
## The Branching instructions can use labels, but they are all relative 8-bit addresses
end
def test_adc
asm = <<-'ASM'
ADC #$FF ; Immediate
label:
ADC $FF ; Zero Page
ADC $FF, X ; Zero Page X
ADC $FFFF ; Absolute
ADC $FFFF, X ; Absolute X
ADC $FFFF, Y ; Absolute Y
ADC label ; Absolute Label
ADC label, X ; Absolute X Label
ADC label, Y ; Absolute Y Label
ADC ($FF, X) ; Indirect X
ADC ($FF), Y ; Indirect Y
ASM
assembler = Assembler6502::Assembler.new(asm)
correct = %w{69 ff 65 ff 75 ff 6d ff ff 7d ff ff 79 ff ff 6d 02 06 7d 02 06 79 02 06 61 ff 71 ff}
assert_equal(correct, assembler.hexdump)
end
def test_and
asm = <<-'ASM'
AND #$FF ; Immediate
label:
AND $FF ; Zero Page
AND $FF, X ; Zero Page X
AND $FFFF ; Absolute
AND $FFFF, X ; Absolute X
AND $FFFF, Y ; Absolute Y
AND label ; Absolute Label
AND label, X ; Absolute X Label
AND label, Y ; Absolute Y Label
AND ($FF, X) ; Indirect X
AND ($FF), Y ; Indirect Y
ASM
assembler = Assembler6502::Assembler.new(asm)
correct = %w{29 ff 25 ff 35 ff 2d ff ff 3d ff ff 39 ff ff 2d 02 06 3d 02 06 39 02 06 21 ff 31 ff}
assert_equal(correct, assembler.hexdump)
end
def test_asl
asm = <<-'ASM'
ASL ; Implied
label:
ASL $FF ; Zero Page
ASL $FF, X ; Zero Page X
ASL $FFFF ; Absolute
ASL $FFFF, X ; Absolute X
ASL label ; Absolute Label
ASL label, X ; Absolute X Label
ASM
assembler = Assembler6502::Assembler.new(asm)
correct = %w{0a 06 ff 16 ff 0e ff ff 1e ff ff 0e 01 06 1e 01 06}
assert_equal(correct, assembler.assemble)
end
def test_bit
asm = <<-'ASM'
BIT $FF ; Zero Page
label:
BIT $FFFF ; Absolute
BIT label ; Absolute
ASM
assembler = Assembler6502::Assembler.new(asm)
correct = %w{24 ff 2c ff ff 2c 02 06}
assert_equal(correct, assembler.assemble)
end
def test_branches
asm = <<-'ASM'
LDX #$08
decrement:
DEX
STX $0200
CPX #$03
BNE decrement
STX $0201
BPL decrement
BMI decrement
BVC decrement
BVS decrement
BCC decrement
BCS decrement
BEQ decrement
BRK
ASM
assembler = Assembler6502::Assembler.new(asm)
correct = %w{a2 08 ca 8e 00 02 e0 03 d0 f8 8e 01 02 10 f3 30 f1 50 ef 70 ed 90 eb b0 e9 f0 e7 00}
assert_equal(correct, assembler.assemble)
end
def test_stack_instructions
asm = <<-'ASM'
TXS
TSX
PHA
PLA
PHP
PLP
NOP
ASM
assembler = Assembler6502::Assembler.new(asm)
correct = %w{9a ba 48 68 08 28 ea}
assert_equal(correct, assembler.assemble)
end
end

82
test/test_memory_space.rb Normal file
View File

@ -0,0 +1,82 @@
gem 'minitest'
require 'minitest/autorun'
require 'minitest/unit'
require_relative '../lib/memory_space.rb'
class TestMemorySpace < MiniTest::Test
include Assembler6502
def _test_create_prog_rom
## First just try to read alll of it
space = MemorySpace.create_prog_rom
contents = space.read(0x8000, 0x4000)
assert_equal(contents.size, 0x4000)
assert(contents.all?{|byte| byte.zero?})
## It is mirrored so this should also work
space = MemorySpace.create_prog_rom
contents = space.read(0xC000, 0x4000)
assert_equal(contents.size, 0x4000)
assert(contents.all?{|byte| byte.zero?})
end
def _test_writing
## Write some bytes into prog 2 area
space = MemorySpace.create_prog_rom
space.write(0xC000, "hi there".bytes)
## Read them back..
contents = space.read(0xC000, 8)
assert_equal('hi there', contents.pack('C*'))
## Should be mirrored in prog 1
contents = space.read(0x8000, 8)
assert_equal('hi there', contents.pack('C*'))
end
def _test_reading_out_of_bounds
space = MemorySpace.create_prog_rom
assert_raises(MemorySpace::AccessOutsideProgRom) do
space.read(0x200, 10)
end
## But that is valid char rom area, so no explody
space = MemorySpace.create_char_rom
space.read(0x200, 10)
## But something like this should explode
space = MemorySpace.create_char_rom
assert_raises(MemorySpace::AccessOutsideCharRom) do
space.read(0x8001, 10)
end
end
####
## There seem to be problems writing bytes right to
## the end of the memory map, specifically where the
## vector table is in prog rom, so let's test that.
def test_writing_to_end
space = MemorySpace.create_prog_rom
bytes = [0xDE, 0xAD]
## Write the NMI address to FFFA
space.write(0xFFFA, bytes)
## Write the entry point to FFFC
space.write(0xFFFC, bytes)
## Write the irq to FFFE, and this fails, saying
## I'm trying to write to $10000 for some reason.
space.write(0xFFFE, bytes)
## Write to the very first
space.write(0x8000, bytes)
end
end

View File

@ -5,7 +5,7 @@ require 'minitest/unit'
require_relative '../lib/symbol_table.rb'
class TestAssembler < MiniTest::Test
class TestSymbolTable < MiniTest::Test
include Assembler6502
####