diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a91f608 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.nes +.DS_Store +Desktop.ini +Thumbs.db +*.deb diff --git a/README.md b/README.md index ac33353..fbf3f5b 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,35 @@ # assembler6502 + An assembler for the 6502 Chip written in Ruby + + 6502 Assembler + + Usage: ./assembler\_6502.rb -o outfile.nes + + This is a pretty straightfoward assembler, that is currently set up + to produce iNES ROM formatted binaries from simple assembly listings. + It is good at knowing which addressing modes are and are not allowed for + each instruction, and contains some examples of correct syntax. + + Parsing is done by Regular Expression, because, well the language is + so regular, it actually took less time than anything else I've tried + to parse in the past, including Scheme using parsec. + + It handles labels, and does a two pass assembly, first assembling + the byte codes, and then going back and filling in the proper addresses + where labels were used. + + I have used this to compile some code for the NES, and it ran correctly + on FCEUX, got it to make some sounds, etc. + + Some Todos: + - I need to add the .byte operator to add data bytes. + - I need to add the #\<$800 and #\>$800 style operators to select the + MSB and LSB of immediate values during assembly. + - I need to make the text/code/data sections easier to configure, it is + currently set to 0x8000 like NES Prog ROM + - I need to add commandline options through the OptionParser library + - I may make this into a Rubygem + - I need to split the project up into one class per file like usual. + - Maybe I can put some better error messages. + - I should just make a 6502 CPU emulator probably now too. diff --git a/assembler_6502.rb b/assembler_6502.rb new file mode 100755 index 0000000..8cacbd4 --- /dev/null +++ b/assembler_6502.rb @@ -0,0 +1,91 @@ +#!/usr/bin/env ruby +############################################################################### +## 6502 Assembler +## +## Usage: ./assembler_6502.rb +## +## This is a pretty straightfoward assembler, that is currently set up +## to produce iNES ROM formatted binaries from simple assembly listings. +## It is good at knowing which addressing modes are and are not allowed for +## each instruction, and contains some examples of correct syntax. +## +## Parsing is done by Regular Expression, because, well the language is +## so regular, it actually took less time than anything else I've tried +## to parse in the past, including Scheme using parsec. +## +## It handles labels, and does a two pass assembly, first assembling +## the byte codes, and then going back and filling in the proper addresses +## where labels were used. +## +## I have used this to compile some code for the NES, and it ran correctly +## on FCEUX, got it to make some sounds, etc. +## +## Some Todos: +## - I need to add the .byte operator to add data bytes. +## - I need to add the #<$800 and #>$800 style operators to select the +## MSB and LSB of immediate values during assembly. +## - I need to make the text/code/data sections easier to configure, it is +## currently set to 0x8000 like NES Prog ROM +## - I need to add commandline options through the OptionParser library +## - I may make this into a Rubygem +## - I need to split the project up into one class per file like usual. +## - Maybe I can put some better error messages. +## - I should just make a 6502 CPU emulator probably now too. + + +require 'yaml' +require 'ostruct' +require 'optparse' +require_relative 'lib/directive' +require_relative 'lib/assembler' +require_relative 'lib/instruction' +require_relative 'lib/label' + +module Assembler6502 + + ##### + ## Load in my OpCode definitions + MyDirectory = File.expand_path(File.dirname(__FILE__)) + OpCodes = YAML.load_file("#{MyDirectory}/data/opcodes.yaml") + + #### + ## Clean up a line of assembly + def sanitize_line(asm_line) + sanitized = asm_line.split(';').first || "" + sanitized.strip.chomp + end + module_function :sanitize_line + + + #### + ## Run the assembler using commandline arguments + def run + options = {:in_file => nil, :out_file => 'a.nes'} + parser = OptionParser.new do |opts| + opts.banner = "Usage: #{$0} [options]" + + opts.on('-o', '--outfile filename', 'outfile') do |out_file| + options[:out_file] = out_file; + end + + opts.on('-h', '--help', 'Displays Help') do + puts opts + exit + end + end + parser.parse!(ARGV) + options[:in_file] = ARGV.shift + unless ARGV.empty? + STDERR.puts "Ignoring extra commandline options #{ARGV.join(' ')}" + end + if options.values.any?(&:nil?) + STDERR.puts "Missing options try --help" + exit(1) + end + Assembler6502::Assembler.from_file(options[:in_file], options[:out_file]) + end + module_function :run + +end + +Assembler6502.run diff --git a/background.asm b/background.asm new file mode 100644 index 0000000..de381e9 --- /dev/null +++ b/background.asm @@ -0,0 +1,76 @@ + .inesprg 1 ; 1x 16KB PRG code + .ineschr 1 ; 1x 8KB CHR data + .inesmap 0 ; mapper 0 = NROM, no bank swapping + .inesmir 1 ; background mirroring + + +;;;;;;;;;;;;;;; + + + .bank 0 + .org $C000 +RESET: + SEI ; disable IRQs + CLD ; disable decimal mode + LDX #$40 + STX $4017 ; disable APU frame IRQ + LDX #$FF + TXS ; Set up stack + INX ; now X = 0 + STX $2000 ; disable NMI + STX $2001 ; disable rendering + STX $4010 ; disable DMC IRQs + +vblankwait1: ; First wait for vblank to make sure PPU is ready + BIT $2002 + BPL vblankwait1 + +clrmem: + LDA #$00 + STA $0000, x + STA $0100, x + STA $0200, x + STA $0400, x + STA $0500, x + STA $0600, x + STA $0700, x + LDA #$FE + STA $0300, x + INX + BNE clrmem + +vblankwait2: ; Second wait for vblank, PPU is ready after this + BIT $2002 + BPL vblankwait2 + + + LDA #%10000000 ;intensify blues + STA $2001 + +Forever: + JMP Forever ;jump back to Forever, infinite loop + + + +NMI: + RTI + +;;;;;;;;;;;;;; + + + + .bank 1 + .org $FFFA ;first of the three vectors starts here + .dw NMI ;when an NMI happens (once per frame if enabled) the + ;processor will jump to the label NMI: + .dw RESET ;when the processor first turns on or is reset, it will jump + ;to the label RESET: + .dw 0 ;external interrupt IRQ is not used in this tutorial + + +;;;;;;;;;;;;;; + + + .bank 2 + .org $0000 + .incbin "mario.chr" ;includes 8KB graphics file from SMB1 \ No newline at end of file diff --git a/beep.asm b/beep.asm new file mode 100644 index 0000000..3fe6ae7 --- /dev/null +++ b/beep.asm @@ -0,0 +1,11 @@ +reset: + LDA #$01 ; square 1 + STA $4015 + LDA #$08 ; period low + STA $4002 + LDA #$02 ; period high + STA $4003 + LDA #$BF ; volume + STA $4000 +forever: + JMP forever diff --git a/data/opcodes.yaml b/data/opcodes.yaml new file mode 100644 index 0000000..cd5180b --- /dev/null +++ b/data/opcodes.yaml @@ -0,0 +1,1030 @@ +--- +:adc: + :description: "ADd with Carry" + :flags: + - :s + - :v + - :z + - :c + :immediate: + :hex: 0x69 + :len: 2 + :cycles: 2 + :boundry_add: false + :zero_page: + :hex: 0x65 + :len: 2 + :cycles: 3 + :boundry_add: false + :zero_page_x: + :hex: 0x75 + :len: 2 + :cycles: 4 + :boundry_add: false + :absolute: + :hex: 0x6D + :len: 3 + :cycles: 4 + :boundry_add: false + :absolute_x: + :hex: 0x7D + :len: 3 + :cycles: 4 + :boundry_add: true + :absolute_y: + :hex: 0x79 + :len: 3 + :cycles: 4 + :boundry_add: true + :indirect_x: + :hex: 0x61 + :len: 2 + :cycles: 6 + :boundry_add: false + :indirect_y: + :hex: 0x71 + :len: 2 + :cycles: 5 + :boundry_add: true + +:and: + :description: "bitwise AND with accumulator" + :flags: + - :s + - :z + :immediate: + :hex: 0x29 + :len: 2 + :cycles: 2 + :boundry_add: false + :zero_page: + :hex: 0x25 + :len: 2 + :cycles: 3 + :boundry_add: false + :zero_page_x: + :hex: 0x35 + :len: 2 + :cycles: 4 + :boundry_add: false + :absolute: + :hex: 0x2D + :len: 3 + :cycles: 4 + :boundry_add: false + :absolute_x: + :hex: 0x3D + :len: 3 + :cycles: 4 + :boundry_add: true + :absolute_y: + :hex: 0x39 + :len: 3 + :cycles: 4 + :boundry_add: true + :indirect_x: + :hex: 0x21 + :len: 2 + :cycles: 6 + :boundry_add: false + :indirect_y: + :hex: 0x31 + :len: 2 + :cycles: 5 + :boundry_add: true + +:asl: + :description: "Arithmetic Shift Left" + :flags: + - :s + - :z + - :c + :implied: + :hex: 0xA + :len: 1 + :cycles: 2 + :boundry_add: false + :zero_page: + :hex: 0x6 + :len: 2 + :cycles: 5 + :boundry_add: false + :zero_page_x: + :hex: 0x16 + :len: 2 + :cycles: 6 + :boundry_add: false + :absolute: + :hex: 0xE + :len: 3 + :cycles: 6 + :boundry_add: false + :absolute_x: + :hex: 0x1E + :len: 3 + :cycles: 7 + :boundry_add: false + +:bit: + :description: "test BITs" + :flags: + - :n + - :v + - :z + :zero_page: + :hex: 0x24 + :len: 2 + :cycles: 3 + :boundry_add: false + :absolute: + :hex: 0x2C + :len: 3 + :cycles: 4 + :boundry_add: false + +:bpl: + :branch: true + :description: "Branch on PLus" + :flags: [] + :relative: + :hex: 0x10 + :len: 2 +:bmi: + :branch: true + :description: "Branch on MInus" + :flags: [] + :relative: + :hex: 0x30 + :len: 2 +:bvc: + :branch: true + :description: "Branch on oVerflow Clear" + :flags: [] + :relative: + :hex: 0x50 + :len: 2 +:bvs: + :branch: true + :description: "Branch on oVerflow Set" + :flags: [] + :relative: + :hex: 0x70 + :len: 2 +:bcc: + :branch: true + :description: "Branch on Carry Clear" + :flags: [] + :relative: + :hex: 0x90 + :len: 2 +:bcs: + :branch: true + :description: "Branch on Carry Set" + :flags: [] + :relative: + :hex: 0xB0 + :len: 2 +:bne: + :branch: true + :description: "Branch on Not Equal" + :flags: [] + :relative: + :hex: 0xD0 + :len: 2 +:beq: + :branch: true + :description: "Branch on Equal" + :flags: [] + :relative: + :hex: 0xF0 + :len: 2 + + +:brk: + :description: "BReaK" + :flags: + - :b + :implied: + :hex: 0x0 + :len: 1 + :cycles: 7 + :boundry_add: false + +:cmp: + :description: "CoMPare accumulator" + :flags: + - :s + - :c + - :z + :immediate: + :hex: 0xC9 + :len: 2 + :cycles: 2 + :boundry_add: false + :zero_page: + :hex: 0xC5 + :len: 2 + :cycles: 3 + :boundry_add: false + :zero_page_x: + :hex: 0xD5 + :len: 2 + :cycles: 4 + :boundry_add: false + :absolute: + :hex: 0xCD + :len: 3 + :cycles: 4 + :boundry_add: false + :absolute_x: + :hex: 0xDD + :len: 3 + :cycles: 4 + :boundry_add: true + :absolute_y: + :hex: 0xD9 + :len: 3 + :cycles: 4 + :boundry_add: true + :indirect_x: + :hex: 0xC1 + :len: 2 + :cycles: 6 + :boundry_add: false + :indirect_y: + :hex: 0xD1 + :len: 2 + :cycles: 5 + :boundry_add: true + +:cpx: + :description: "ComPare X register" + :flags: + - :s + - :c + - :z + :immediate: + :hex: 0xE0 + :len: 2 + :cycles: 2 + :boundry_add: false + :zero_page: + :hex: 0xE4 + :len: 2 + :cycles: 3 + :boundry_add: false + :absolute: + :hex: 0xEC + :len: 3 + :cycles: 4 + :boundry_add: false + +:cpy: + :description: "ComPare X register" + :flags: + - :s + - :c + - :z + :immediate: + :hex: 0xC0 + :len: 2 + :cycles: 2 + :boundry_add: false + :zero_page: + :hex: 0xC4 + :len: 2 + :cycles: 3 + :boundry_add: false + :absolute: + :hex: 0xCC + :len: 3 + :cycles: 4 + :boundry_add: false + +:dec: + description: "DECrement memory" + :flags: + - :s + - :z + :zero_page: + :hex: 0xC6 + :len: 2 + :cycles: 5 + :boundry_add: false + :zero_page_x: + :hex: 0xD6 + :len: 2 + :cycles: 6 + :boundry_add: false + :absolute: + :hex: 0xCE + :len: 3 + :cycles: 6 + :boundry_add: false + :absolute_x: + :hex: 0xDE + :len: 3 + :cycles: 7 + :boundry_add: false + +:eor: + :description: "bitwise Exclusive OR" + :flags: + - :s + - :z + :immediate: + :hex: 0x49 + :len: 2 + :cycles: 2 + :boundry_add: false + :zero_page: + :hex: 0x45 + :len: 2 + :cycles: 3 + :boundry_add: false + :zero_page_x: + :hex: 0x55 + :len: 2 + :cycles: 4 + :boundry_add: false + :absolute: + :hex: 0x4D + :len: 3 + :cycles: 4 + :boundry_add: false + :absolute_x: + :hex: 0x5D + :len: 3 + :cycles: 4 + :boundry_add: true + :absolute_y: + :hex: 0x59 + :len: 3 + :cycles: 4 + :boundry_add: true + :indirect_x: + :hex: 0x41 + :len: 2 + :cycles: 6 + :boundry_add: false + :indirect_y: + :hex: 0x51 + :len: 2 + :cycles: 5 + :boundry_add: true + +:clc: + :description: "CLear Carry" + :flags: + - :c + :implied: + :hex: 0x18 + :len: 1 + :cycles: 2 +:sec: + :description: "SEt Carry" + :flags: + - :c + :implied: + :hex: 0x38 + :len: 1 + :cycles: 2 +:cli: + :description: "CLear Interrupt" + :flags: + - :i + :implied: + :hex: 0x58 + :len: 1 + :cycles: 2 +:sei: + :description: "SEt Interrupt" + :flags: + - :i + :implied: + :hex: 0x78 + :len: 1 + :cycles: 2 +:clv: + :description: "CLear oVerflow" + :flags: + - :v + :implied: + :hex: 0xB8 + :len: 1 + :cycles: 2 +:cld: + :description: "CLear Decimal" + :flags: + - :d + :implied: + :hex: 0xD8 + :len: 1 + :cycles: 2 +:sed: + :description: "SEt Decimal" + :flags: + - :d + :implied: + :hex: 0xF8 + :len: 1 + :cycles: 2 + +:inc: + :description: "INCrement memory" + :flags: + - :s + - :z + :zero_page: + :hex: 0xE6 + :len: 2 + :cycles: 5 + :boundry_add: false + :zero_page_x: + :hex: 0xF6 + :len: 2 + :cycles: 6 + :boundry_add: false + :absolute: + :hex: 0xEE + :len: 3 + :cycles: 6 + :boundry_add: false + :absolute_x: + :hex: 0xFE + :len: 3 + :cycles: 7 + :boundry_add: false + +:jmp: + :description: "JuMP" + :flags: [] + :absolute: + :hex: 0x4C + :len: 3 + :cycles: 3 + :boundry_add: false + :indirect: + :hex: 0x6C + :len: 3 + :cycles: 5 + :boundry_add: false + +:jsr: + :description: "Jump to SubRoutine" + :flags: [] + :absolute: + :hex: 0x20 + :len: 3 + :cycles: 6 + :boundry_add: false + +:lda: + :description: "LoaD Accumulator" + :flags: + - :s + - :z + :immediate: + :hex: 0xA9 + :len: 2 + :cycles: 2 + :boundry_add: false + :zero_page: + :hex: 0xA5 + :len: 2 + :cycles: 3 + :boundry_add: false + :zero_page_x: + :hex: 0xB5 + :len: 2 + :cycles: 4 + :boundry_add: false + :absolute: + :hex: 0xAD + :len: 3 + :cycles: 4 + :boundry_add: false + :absolute_x: + :hex: 0xBD + :len: 3 + :cycles: 4 + :boundry_add: true + :absolute_y: + :hex: 0xB9 + :len: 3 + :cycles: 4 + :boundry_add: true + :indirect_x: + :hex: 0xA1 + :len: 2 + :cycles: 6 + :boundry_add: false + :indirect_y: + :hex: 0xB1 + :len: 2 + :cycles: 5 + :boundry_add: true + +:ldx: + :description: "LoaD X register" + :flags: + - :s + - :z + :immediate: + :hex: 0xA2 + :len: 2 + :cycles: 2 + :boundry_add: false + :zero_page: + :hex: 0xA6 + :len: 2 + :cycles: 3 + :boundry_add: false + :zero_page_y: + :hex: 0xB6 + :len: 2 + :cycles: 4 + :boundry_add: false + :absolute: + :hex: 0xAE + :len: 3 + :cycles: 4 + :boundry_add: false + :absolute_y: + :hex: 0xBE + :len: 3 + :cycles: 4 + :boundry_add: true + +:ldy: + :description: "LoaD Y register" + :flags: + - :s + - :z + :immediate: + :hex: 0xA0 + :len: 2 + :cycles: 2 + :boundry_add: false + :zero_page: + :hex: 0xA4 + :len: 2 + :cycles: 3 + :boundry_add: false + :zero_page_x: + :hex: 0xB4 + :len: 2 + :cycles: 4 + :boundry_add: false + :absolute: + :hex: 0xAC + :len: 3 + :cycles: 4 + :boundry_add: false + :absolute_x: + :hex: 0xBC + :len: 3 + :cycles: 4 + :boundry_add: true + +:lsr: + :description: "Logical Shift Right" + :flags: + - :s + - :z + - :c + :implied: + :hex: 0x4A + :len: 1 + :cycles: 2 + :boundry_add: false + :zero_page: + :hex: 0x46 + :len: 2 + :cycles: 5 + :boundry_add: false + :zero_page_x: + :hex: 0x56 + :len: 2 + :cycles: 6 + :boundry_add: false + :absolute: + :hex: 0x4E + :len: 3 + :cycles: 6 + :boundry_add: false + :absolute_x: + :hex: 0x5E + :len: 3 + :cycles: 7 + :boundry_add: false + +:nop: + :description: "No OPeration" + :flags: [] + :implied: + :hex: 0xEA + :len: 1 + :cycles: 2 + :boundry_add: false + +:ora: + :description: "bitwise OR with Accumulator" + :flags: + - :s + - :z + :immediate: + :hex: 0x9 + :len: 2 + :cycles: 2 + :boundry_add: false + :zero_page: + :hex: 0x5 + :len: 2 + :cycles: 3 + :boundry_add: false + :zero_page_x: + :hex: 0x15 + :len: 2 + :cycles: 4 + :boundry_add: false + :absolute: + :hex: 0xD + :len: 3 + :cycles: 4 + :boundry_add: false + :absolute_x: + :hex: 0x1D + :len: 3 + :cycles: 4 + :boundry_add: true + :absolute_y: + :hex: 0x19 + :len: 3 + :cycles: 4 + :boundry_add: true + :indirect_x: + :hex: 0x1 + :len: 2 + :cycles: 6 + :boundry_add: false + :indirect_y: + :hex: 0x11 + :len: 2 + :cycles: 5 + :boundry_add: true + +:tax: + :description: "Transfer A to X" + :flags: + - :s + - :z + :implied: + :hex: 0xAA + :len: 1 + :cycles: 2 + :boundry_add: false +:txa: + :description: "Transfer X to A" + :flags: + - :s + - :z + :implied: + :hex: 0x8A + :len: 1 + :cycles: 2 + :boundry_add: false +:dex: + :description: "DEcrement X" + :flags: + - :s + - :z + :implied: + :hex: 0xCA + :len: 1 + :cycles: 2 + :boundry_add: false +:inx: + :description: "INcrement X" + :flags: + - :s + - :z + :implied: + :hex: 0xEA + :len: 1 + :cycles: 2 + :boundry_add: false +:tay: + :description: "Transfer A to Y" + :flags: + - :s + - :z + :implied: + :hex: 0xA8 + :len: 1 + :cycles: 2 + :boundry_add: false +:tya: + :description: "Transfer Y to A" + :flags: + - :s + - :z + :implied: + :hex: 0x98 + :len: 1 + :cycles: 2 + :boundry_add: false +:dey: + :description: "DEcrement Y" + :flags: + - :s + - :z + :implied: + :hex: 0x88 + :len: 1 + :cycles: 2 + :boundry_add: false +:iny: + :description: "INcrement Y" + :flags: + - :s + - :z + :implied: + :hex: 0xC8 + :len: 1 + :cycles: 2 + :boundry_add: false + +:rol: + :description: "ROtate Left" + :flags: + - :s + - :z + - :c + :implied: + :hex: 0x2A + :len: 1 + :cycles: 2 + :boundry_add: false + :zero_page: + :hex: 0x26 + :len: 2 + :cycles: 5 + :boundry_add: false + :zero_page_x: + :hex: 0x36 + :len: 2 + :cycles: 6 + :boundry_add: false + :absolute: + :hex: 0x2E + :len: 3 + :cycles: 6 + :boundry_add: false + :absolute_x: + :hex: 0x3E + :len: 3 + :cycles: 7 + :boundry_add: false + +:ror: + :description: "ROtate Right" + :flags: + - :s + - :z + - :c + :implied: + :hex: 0x6A + :len: 1 + :cycles: 2 + :boundry_add: false + :zero_page: + :hex: 0x66 + :len: 2 + :cycles: 5 + :boundry_add: false + :zero_page_x: + :hex: 0x76 + :len: 2 + :cycles: 6 + :boundry_add: false + :absolute: + :hex: 0x6E + :len: 3 + :cycles: 6 + :boundry_add: false + :absolute_x: + :hex: 0x7E + :len: 3 + :cycles: 7 + :boundry_add: false + +:rti: + :description: "ReTurn from Interrupt, TODO: Flags could be wrong" + :flags: + - :n + - :v + - :u + - :b + - :d + - :i + - :z + - :c + :implied: + :hex: 0x40 + :len: 1 + :cycles: 6 + :boundry_add: false + +:rts: + :description: "ReTurn from Subroutine" + :flags: [] + :implied: + :hex: 0x60 + :len: 1 + :cycles: 6 + :boundry_add: false + +:sbc: + :description: "SuBtract with Carry" + :flags: + - :s + - :v + - :z + - :c + :immediate: + :hex: 0xE9 + :len: 2 + :cycles: 2 + :boundry_add: false + :zero_page: + :hex: 0xE5 + :len: 2 + :cycles: 3 + :boundry_add: false + :zero_page_x: + :hex: 0xF5 + :len: 2 + :cycles: 4 + :boundry_add: false + :absolute: + :hex: 0xED + :len: 3 + :cycles: 4 + :boundry_add: false + :absolute_x: + :hex: 0xFD + :len: 3 + :cycles: 4 + :boundry_add: true + :absolute_y: + :hex: 0xF9 + :len: 3 + :cycles: 4 + :boundry_add: true + :indirect_x: + :hex: 0xE1 + :len: 2 + :cycles: 6 + :boundry_add: false + :indirect_y: + :hex: 0xF1 + :len: 2 + :cycles: 5 + :boundry_add: true + +:sta: + :description: "STore Accumulator" + :flags: [] + :zero_page: + :hex: 0x85 + :len: 2 + :cycles: 3 + :boundry_add: false + :zero_page_x: + :hex: 0x95 + :len: 2 + :cycles: 4 + :boundry_add: false + :absolute: + :hex: 0x8D + :len: 3 + :cycles: 4 + :boundry_add: false + :absolute_x: + :hex: 0x9D + :len: 3 + :cycles: 5 + :boundry_add: false + :absolute_y: + :hex: 0x99 + :len: 3 + :cycles: 5 + :boundry_add: false + :indirect_x: + :hex: 0x81 + :len: 2 + :cycles: 6 + :boundry_add: false + :indirect_y: + :hex: 0x91 + :len: 2 + :cycles: 6 + :boundry_add: false + +:txs: + :description: "Transfer X to Stack ptr" + :flags: [] + :implied: + :hex: 0x9A + :len: 1 + :cycles: 2 + :boundry_add: false +:tsx: + :description: "Transfer Stack ptr to X" + :flags: [] + :implied: + :hex: 0xBA + :len: 1 + :cycles: 2 + :boundry_add: false + +:pha: + :description: "PusH Accumulator" + :flags: [] + :implied: + :hex: 0x48 + :len: 1 + :cycles: 3 + :boundry_add: false +:pla: + :description: "PuLl Accumulator" + :flags: [] + :implied: + :hex: 0x68 + :len: 1 + :cycles: 4 + :boundry_add: false + +:php: + :description: "PusH Processor status" + :flags: [] + :implied: + :hex: 0x08 + :len: 1 + :cycles: 3 + :boundry_add: false +:plp: + :description: "PuLl Processor status" + :flags: + - :n + - :v + - :u + - :b + - :d + - :i + - :z + - :c + :implied: + :hex: 0x28 + :len: 1 + :cycles: 4 + :boundry_add: false + +:stx: + :description: "STore X register" + :flags: [] + :zero_page: + :hex: 0x86 + :len: 2 + :cycles: 3 + :boundry_add: false + :zero_page_y: + :hex: 0x96 + :len: 2 + :cycles: 4 + :boundry_add: false + :absolute: + :hex: 0x8E + :len: 3 + :cycles: 4 + :boundry_add: false + +:sty: + :description: "STore Y register" + :flags: [] + :zero_page: + :hex: 0x84 + :len: 2 + :cycles: 3 + :boundry_add: false + :zero_page_x: + :hex: 0x94 + :len: 2 + :cycles: 4 + :boundry_add: false + :absolute: + :hex: 0x8C + :len: 3 + :cycles: 4 + :boundry_add: false diff --git a/lib/assembler.rb b/lib/assembler.rb new file mode 100644 index 0000000..88c9012 --- /dev/null +++ b/lib/assembler.rb @@ -0,0 +1,97 @@ + +module Assembler6502 + + #### + ## An assembler + class Assembler + attr_reader :assembly_code + + #### + ## Assemble from a file to a file + def self.from_file(infile, outfile) + assembler = self.new(File.read(infile)) + byte_array = self.create_ines_header + assembler.assemble(0x8000) + + File.open(outfile, 'w') do |fp| + fp.write(byte_array.pack('C*')) + end + end + + + #### + ## iNES Header + def self.create_ines_header + [0x4E, 0x45, 0x53, 0x1a, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0] + end + + + #### + ## Assemble 6502 Mnemomics into a program + def initialize(assembly_code, label_index = {}) + @assembly_code = assembly_code + @foreign_labels = label_index + end + + + #### + ## Assemble the 6502 assembly + def assemble(start_address = 0x600) + program_data = first_pass_parse(@assembly_code, start_address, @foreign_labels) + @foreign_labels.merge!(program_data.labels) + second_pass_resolve(program_data.instructions, @foreign_labels) + rescue => exception + STDERR.puts "Error:\n\t#{exception.message}" + exit(1) + end + + + #### + ## Just a hexdump + def hexdump + assemble.map{|byte| sprintf("%.2x", (byte & 0xFF))} + end + + + #### + ## First pass of the assembler just parses each line. + ## Collecting labels, and leaving labels in instructions + ## as placeholders, you can provide the code's start address, + ## or arbitrary labels that are not found the given asm + def first_pass_parse(assembly_code, address = 0x0600, labels = {}) + instructions = [] + + assembly_code.split(/\n/).each do |line| + parsed_line = Assembler6502::Instruction.parse(line, address) + case parsed_line + when Label + labels[parsed_line.label.to_sym] = parsed_line + when Instruction + instructions << parsed_line + address += parsed_line.length + when nil + else + fail(SyntaxError, sprintf("%.4X: Failed to parse: #{line}")) + end + end + OpenStruct.new(:instructions => instructions, :labels => labels) + end + + + #### + ## The second pass makes each instruction emit bytes + ## while also using knowledge of label addresses to + ## resolve absolute and relative usage of labels. + def second_pass_resolve(instructions, labels) + instructions.inject([]) do |sum, instruction| + if instruction.unresolved_symbols? + instruction.resolve_symbols(labels) + end + puts instruction + sum += instruction.emit_bytes + sum + end + end + + end + +end diff --git a/lib/directive.rb b/lib/directive.rb new file mode 100644 index 0000000..2fd6432 --- /dev/null +++ b/lib/directive.rb @@ -0,0 +1,28 @@ + +module Assembler6502 + + #### + ## This parses an assembler directive + class Directive + + ##### + ## Some directives are: + ## .inesprg x ; x * 16KB of PRG code + ## .ineschr x ; x * 8KB of CHR data + ## .inesmap x ; mapper. 0 = NROM, I don't know the other types + ## .inesmir x ; background mirroring, I don't know what this should be so x = 1 + ## .bank x ; Sets the bank number, there are 8 banks of 8192 bytes = 2**16 + ## .org $hhhh ; Positions the code at hex address $hhhh + ## .incbin "a" ; Assembles the contents of a binary file into current address + ## .dw x ; Assemble a 16-bit word at current address, x can be a label + ## .bytes a b c ; Assemble a sequence of bytes at the current address + + #### + ## This will return a new Directive, or nil if it is something else. + def self.parse(directive_line) + + end + + end + +end diff --git a/lib/instruction.rb b/lib/instruction.rb new file mode 100644 index 0000000..cf84b02 --- /dev/null +++ b/lib/instruction.rb @@ -0,0 +1,273 @@ + +module Assembler6502 + + #### + ## Represents a single 6502 Instruction + class Instruction + attr_reader :op, :arg, :mode, :hex, :description, :length, :cycle, :boundry_add, :flags, :address + + ## Custom Exceptions + class InvalidInstruction < StandardError; end + class UnresolvedSymbols < StandardError; end + class InvalidAddressingMode < StandardError; end + class AddressOutOfRange < StandardError; end + + Mnemonic = '([A-Z]{3})' + Hex8 = '\$([A-Z0-9]{2})' + Hex16 = '\$([A-Z0-9]{4})' + Immediate = '\#\$([0-9A-F]{2})' + Sym = '([A-Za-z_][A-Za-z0-9_]+)' + Branches = '(BPL|BMI|BVC|BVS|BCC|BCS|BNE|BEQ)' + + AddressingModes = { + :relative => { + :example => 'B** my_label', + :display => '%s $%.4X', + :regex => /$^/, # Will never match this one + :regex_label => /^#{Branches}\s+#{Sym}$/ + }, + + :immediate => { + :example => 'AAA #$FF', + :display => '%s #$%.2X', + :regex => /^#{Mnemonic}\s+#{Immediate}$/ + }, + + :implied => { + :example => 'AAA', + :display => '%s', + :regex => /^#{Mnemonic}$/ + }, + + :zero_page => { + :example => 'AAA $FF', + :display => '%s $%.2X', + :regex => /^#{Mnemonic}\s+#{Hex8}$/ + }, + + :zero_page_x => { + :example => 'AAA $FF, X', + :display => '%s $%.2X, X', + :regex => /^#{Mnemonic}\s+#{Hex8}\s?,\s?X$/ + }, + + :zero_page_y => { + :example => 'AAA $FF, Y', + :display => '%s $%.2X, Y', + :regex => /^#{Mnemonic}\s+#{Hex8}\s?,\s?Y$/ + }, + + :absolute => { + :example => 'AAA $FFFF', + :display => '%s $%.4X', + :regex => /^#{Mnemonic}\s+#{Hex16}$/, + :regex_label => /^#{Mnemonic}\s+#{Sym}$/ + }, + + :absolute_x => { + :example => 'AAA $FFFF, X', + :display => '%s $%.4X, X', + :regex => /^#{Mnemonic}\s+#{Hex16}\s?,\s?X$/, + :regex_label => /^#{Mnemonic}\s+#{Sym}\s?,\s?X$/ + }, + + :absolute_y => { + :example => 'AAA $FFFF, Y', + :display => '%s $%.4X, Y', + :regex => /^#{Mnemonic}\s+#{Hex16}\s?,\s?Y$/, + :regex_label => /^#{Mnemonic}\s+#{Sym}\s?,\s?Y$/ + }, + + :indirect => { + :example => 'AAA ($FFFF)', + :display => '%s ($%.4X)', + :regex => /^#{Mnemonic}\s+\(#{Hex16}\)$/, + :regex_label => /^#{Mnemonic}\s+\(#{Sym}\)$/ + }, + + :indirect_x => { + :example => 'AAA ($FF, X)', + :display => '%s ($%.2X, X)', + :regex => /^#{Mnemonic}\s+\(#{Hex8}\s?,\s?X\)$/, + :regex_label => /^#{Mnemonic}\s+\(#{Sym}\s?,\s?X\)$/ + }, + + :indirect_y => { + :example => 'AAA ($FF, X)', + :display => '%s ($%.2X), Y', + :regex => /^#{Mnemonic}\s+\(#{Hex8}\)\s?,\s?Y$/, + :regex_label => /^#{Mnemonic}\s+\(#{Sym}\)\s?,\s?Y$/ + } + } + + #### + ## Parse one line of assembly, returns nil if the line + ## is ultimately empty of instructions or labels + ## Raises SyntaxError if the line is malformed in some way + def self.parse(asm_line, address) + + ## First, sanitize the line, which removes whitespace, and comments. + sanitized = Assembler6502.sanitize_line(asm_line) + + ## Empty lines assemble to nothing + return nil if sanitized.empty? + + ## Let's see if this line is a label, and try + ## to create a label for the current address + label = Label.parse_label(sanitized, address) + return label unless label.nil? + + ## We must have some asm, so try to parse it in each addressing mode + AddressingModes.each do |mode, parse_info| + + ## We have regexes that match each addressing mode + match_data = parse_info[:regex].match(sanitized) + + unless match_data.nil? + ## We must have a straight instruction without labels, construct + ## an Instruction from the match_data, and return it + _, op, arg = match_data.to_a + return Instruction.new(op, arg, mode, address) + + else + ## Can this addressing mode even use labels? + unless parse_info[:regex_label].nil? + + ## See if it does in fact have a label/symbolic argument + match_data = parse_info[:regex_label].match(sanitized) + + unless match_data.nil? + ## Yep, the arg is a label, we can resolve that to an address later + ## Buf for now we will create an Instruction where the label is a + ## symbol reference to the label we found, ie. arg.to_sym + _, op, arg = match_data.to_a + return Instruction.new(op, arg.to_sym, mode, address) + end + end + end + end + + ## We just don't recognize this line of asm, it must be a Syntax Error + fail(SyntaxError, sprintf("%.4X: #{asm_line}", address)) + end + + + #### + ## Create an instruction. Having the instruction op a downcased symbol is nice + ## because that can later be used to index into our opcodes hash in OpCodes + ## OpCodes contains the definitions of each OpCode + def initialize(op, arg, mode, address) + + ## Lookup the definition of this opcode, otherwise it is an invalid instruction + @op = op.downcase.to_sym + definition = OpCodes[@op] + fail(InvalidInstruction, op) if definition.nil? + + ## Be sure the mode is an actually supported mode. + @mode = mode.to_sym + fail(InvalidAddressingMode, mode) unless AddressingModes.has_key?(@mode) + + ## Make sure the address is in range + if address < 0x0 || address > 0xFFFF + fail(AddressOutOfRange, address) + end + @address = address + + ## Argument can either be a symbolic label, a hexidecimal number, or nil. + @arg = case arg + when Symbol then arg + when String + if arg.match(/[0-9A-F]{1,4}/).nil? + fail(SyntaxError, "#{arg} is not a valid hexidecimal number") + else + arg.to_i(16) + end + when nil then nil + else + fail(SyntaxError, "Cannot parse argument #{arg}") + end + + if definition[@mode].nil? + fail(InvalidInstruction, "#{op} cannot be used in #{mode} mode") + end + @description, @flags = definition.values_at(:description, :flags) + @hex, @length, @cycles, @boundry_add = definition[@mode].values_at(:hex, :len, :cycles, :boundry_add) + end + + + #### + ## Does this instruction have unresolved symbols? + def unresolved_symbols? + @arg.kind_of?(Symbol) + end + + + #### + ## Resolve symbols + def resolve_symbols(symbols) + if unresolved_symbols? + if symbols[@arg].nil? + fail(SyntaxError, "Unknown symbol #{@arg.inspect}") + end + + ## Based on this instructions length, we should resolve the address + ## to either an absolute one, or a relative one. The only relative addresses + ## are the branching ones, which are 2 bytes in size, hence the extra 2 byte difference + case @length + when 2 + @arg = symbols[@arg].address - @address - 2 + when 3 + @arg = symbols[@arg].address + else + fail(SyntaxError, "Probably can't use symbol #{@arg.inspect} with #{@op}") + end + end + end + + + #### + ## Emit bytes from asm structure + def emit_bytes + fail(UnresolvedSymbols, "Symbol #{@arg.inspect} needs to be resolved") if unresolved_symbols? + case @length + when 1 + [@hex] + when 2 + [@hex, @arg] + when 3 + [@hex] + break_16(@arg) + else + fail("Can't handle instructions > 3 bytes") + end + end + + + #### + ## Hex dump of this instruction + def hexdump + emit_bytes.map{|byte| sprintf("%.2X", byte & 0xFF)} + end + + + #### + ## Pretty Print + def to_s + if unresolved_symbols? + display = AddressingModes[@mode][:display] + sprintf("%.4X | %s %s", @address, @op, @arg.to_s) + else + display = AddressingModes[@mode][:display] + sprintf("%.4X | #{display}", @address, @op, @arg) + end + end + + private + #### + ## Break an integer into two 8-bit parts + def break_16(integer) + [integer & 0x00FF, (integer & 0xFF00) >> 8] + end + + end + +end diff --git a/lib/label.rb b/lib/label.rb new file mode 100644 index 0000000..b94e7b5 --- /dev/null +++ b/lib/label.rb @@ -0,0 +1,63 @@ +module Assembler6502 + + #### + ## Represents a label + class Label + attr_reader :label, :address + + def self.parse_label(asm_line, address) + sanitized = Assembler6502.sanitize_line(asm_line) + match_data = sanitized.match(/([A-za-z][A-Za-z0-9]+):/) + + unless match_data.nil? + _, label = match_data.to_a + self.new(label, address) + else + nil + end + end + + + #### + ## Create a label on an address + def initialize(label, address) + @label = label + @address = address + end + + + #### + ## Pretty print + def to_s + sprintf("%.4X | #{@label}", @address) + end + + + #### + ## Labels take no space + def length + 0 + end + + + #### + ## Emit bytes, (none) + def emit_bytes + [] + end + + #### + ## Mode + def mode + "label" + end + + #### + ## Description + def description + sprintf("Label pointing to $%.4X", @address) + end + + end + +end diff --git a/mario.chr b/mario.chr new file mode 100644 index 0000000..d150ccc Binary files /dev/null and b/mario.chr differ diff --git a/test/test_assembler.rb b/test/test_assembler.rb new file mode 100644 index 0000000..ab41227 --- /dev/null +++ b/test/test_assembler.rb @@ -0,0 +1,139 @@ +gem 'minitest' +require 'minitest/autorun' +require 'minitest/unit' + +require_relative '../assembler_6502.rb' + +class TestAssembler < MiniTest::Test + + def setup + ## Remember the modes which can use 16-bit absolute labels are: + ## - absolute + ## - absolute_x + ## - absolute_y + ## The JMP instruction can use 16-bit labels + ## - absolute + ## - indirect (it is the only indirect instruction) + ## + ## The Branching instructions can use labels, but they are all relative 8-bit addresses + end + + + def test_adc + asm = <<-'ASM' + ADC #$FF ; Immediate + label: + ADC $FF ; Zero Page + ADC $FF, X ; Zero Page X + ADC $FFFF ; Absolute + ADC $FFFF, X ; Absolute X + ADC $FFFF, Y ; Absolute Y + ADC label ; Absolute Label + ADC label, X ; Absolute X Label + ADC label, Y ; Absolute Y Label + ADC ($FF, X) ; Indirect X + ADC ($FF), Y ; Indirect Y + ASM + assembler = Assembler6502::Assembler.new(asm) + correct = %w{69 ff 65 ff 75 ff 6d ff ff 7d ff ff 79 ff ff 6d 02 06 7d 02 06 79 02 06 61 ff 71 ff} + assert_equal(correct, assembler.hexdump) + end + + + def test_and + asm = <<-'ASM' + AND #$FF ; Immediate + label: + AND $FF ; Zero Page + AND $FF, X ; Zero Page X + AND $FFFF ; Absolute + AND $FFFF, X ; Absolute X + AND $FFFF, Y ; Absolute Y + AND label ; Absolute Label + AND label, X ; Absolute X Label + AND label, Y ; Absolute Y Label + AND ($FF, X) ; Indirect X + AND ($FF), Y ; Indirect Y + ASM + assembler = Assembler6502::Assembler.new(asm) + correct = %w{29 ff 25 ff 35 ff 2d ff ff 3d ff ff 39 ff ff 2d 02 06 3d 02 06 39 02 06 21 ff 31 ff} + assert_equal(correct, assembler.hexdump) + end + + + def test_asl + asm = <<-'ASM' + ASL ; Implied + label: + ASL $FF ; Zero Page + ASL $FF, X ; Zero Page X + ASL $FFFF ; Absolute + ASL $FFFF, X ; Absolute X + ASL label ; Absolute Label + ASL label, X ; Absolute X Label + ASM + assembler = Assembler6502::Assembler.new(asm) + correct = %w{0a 06 ff 16 ff 0e ff ff 1e ff ff 0e 01 06 1e 01 06} + assert_equal(correct, assembler.hexdump) + end + + + def test_bit + asm = <<-'ASM' + BIT $FF ; Zero Page + label: + BIT $FFFF ; Absolute + BIT label ; Absolute + ASM + assembler = Assembler6502::Assembler.new(asm) + correct = %w{24 ff 2c ff ff 2c 02 06} + assert_equal(correct, assembler.hexdump) + end + + + def test_branches + asm = <<-'ASM' + LDX #$08 + decrement: + DEX + STX $0200 + CPX #$03 + BNE decrement + STX $0201 + BPL decrement + BMI decrement + BVC decrement + BVS decrement + BCC decrement + BCS decrement + BEQ decrement + BRK + ASM + assembler = Assembler6502::Assembler.new(asm) + correct = %w{a2 08 ca 8e 00 02 e0 03 d0 f8 8e 01 02 10 f3 30 f1 50 ef 70 ed 90 eb b0 e9 f0 e7 00} + assert_equal(correct, assembler.hexdump) + end + + + def test_stack_instructions + asm = <<-'ASM' + TXS + TSX + PHA + PLA + PHP + PLP + NOP + ASM + assembler = Assembler6502::Assembler.new(asm) + correct = %w{9a ba 48 68 08 28 ea} + assert_equal(correct, assembler.hexdump) + end + + + + + + +end + diff --git a/utils/opcode_table_to_yaml.rb b/utils/opcode_table_to_yaml.rb new file mode 100755 index 0000000..b42719b --- /dev/null +++ b/utils/opcode_table_to_yaml.rb @@ -0,0 +1,91 @@ +#!/usr/bin/env ruby + +############################################################################### +## http://www.6502.org/tutorials/6502opcodes.html +## This web page has information about each and every 6502 instruction +## Specifically: +## +## - Description of what each of the instructions do +## - Which modes are supported by which instructions, immediate, zero page +## zero page x, and y, absolute, indirect, relative etc. +## - The hex codes each instruction assembles to, in each mode. +## - The lengths in bytes of each instruction, by mode +## - The possibly variable number of cycles each instruction takes. +## +## There are 56 of them, and in my programmer laziness I just wrote this +## script to parse the page into the data structure that you see in +## opcodes.yaml. This really helped in creating the assembler, and +## it had basically everything I needed to know, and sped up writing +## this by huge factor. So, yay to this page, and this script! + +require 'yaml' + +## Instruction name, and output structure to fill in. +name = :adc +output = {name => {}} + +## Copy paste the tables from that website into this heredoc: +text =<<-TEXT +Immediate ADC #$44 $69 2 2 +Zero Page ADC $44 $65 2 3 +Zero Page,X ADC $44,X $75 2 4 +Absolute ADC $4400 $6D 3 4 +Absolute,X ADC $4400,X $7D 3 4+ +Absolute,Y ADC $4400,Y $79 3 4+ +Indirect,X ADC ($44,X) $61 2 6 +Indirect,Y ADC ($44),Y $71 2 5+ +TEXT + + +## And now iterate over each line to extract the info +lines = text.split(/\n/) +lines.each do |line| + + ## Grab out the values we care about + parts = line.split + cycles, len, hex = parts[-1], parts[-2], parts[-3] + hex = "0x%X" % hex.gsub('$', '').to_i(16) + + match_data = cycles.match(/([0-9]+)(\+?)/) + cycles = match_data[1] + boundary = match_data[2] + hash = {:hex => hex, :len => len, :cycles => cycles, :boundry_add => boundary != ""} + + ## And now decide which mode the line belongs to, collecting each listed mode + hash = case line + when /^Accumulator/ + {:accumulator => hash} + when /^Immediate/ + {:immediate => hash} + when /^Zero Page,X/ + {:zero_page_x => hash} + when /^Zero Page,Y/ + {:zero_page_y => hash} + when /^Zero Page/ + {:zero_page => hash} + when /^Absolute,X/ + {:absolute_x => hash} + when /^Absolute,Y/ + {:absolute_y => hash} + when /^Absolute/ + {:absolute => hash} + when /^Indirect,X/ + {:indirect_x => hash} + when /^Indirect,Y/ + {:indirect_y => hash} + when /^Indirect/ + {:indirect => hash} + when /^Implied/ + {:implied => hash} + else + {} + end + output[name].merge!(hash) +end + +## Now output some yaml, and I only had to do this about 45 times +## instead of laboriously and mistak-pronely doing it by hand. +puts YAML.dump(output).gsub("'", '') + +## See opcodes.yaml +