1
0
mirror of https://github.com/mgcaret/of816.git synced 2025-01-01 06:30:35 +00:00
of816/utils/fs2asm.rb

673 lines
16 KiB
Ruby
Executable File

#!/usr/bin/ruby
# This program converts Forth source code colon definitons to OF816 assembly-language
# dictionary entries suitable for assembling with ca65.
#
# It is, in effect, a Forth compiler in its own right.
#
# Usage: fs2asm.rb index1.yaml [index2.yaml ...] forth-source.fs
# Outputs the results on stdout.
#
# The index files are generated with index.rb, and should reflect all of the dictionaries
# present in the system that will be used by the input Forth code or may have label
# collisions.
# As a trivial example, this source code:
#
# \ output boolean value as text
# : .bool ( f - )
# true if
# s" true"
# else
# s" false"
# then
# type
# ;
#
# is converted to:
#
# ; output boolean value as text
# dword DOT_BOOL,".bool"
# ENTER
# ; ( f - )
# .dword TRUE
# .dword _IF ; IF
# .dword l01 ; false branch
# SLIT "true"
# .dword _JUMP
# .dword l02
# l01: ; ELSE
# SLIT "false"
# l02: ; THEN
# .dword TYPE
# EXIT
# eword
# Features supported outside of colon definitions:
#
# The input number base can be changed with DECIMAL HEX BINARY and OCTAL.
#
# There is a basic stack that can contain numbers or labels.
# numbers may be placed on the stack in the usual fashion, and labels can be placed with
# ' (single-quote/tick).
# They can be used within definitions via LITERAL or COMPILE,
# no arithmetic is supported at this time.
#
# Words may be changed to headerless and back to normal with HEADERS and HEADERLESS.
# Unsupported items:
#
# Setting flags on words.
#
# RECURSE $HEX( TO and END-CODE
# quotations: [: and ;]
# no-name and temporary definitions: :NONAME :TEMP
# Bugs:
#
# Things that don't match words are silently converted into numbers without validation
require 'yaml'
@LABEL_CHARS = 10
@OP_CHARS = 7
@PARM_CHARS = 15
def emit_comment(comment)
if @in_colon
@asm += "#{" " * @LABEL_CHARS}; #{comment}\n"
else
@asm += "; #{comment}\n"
end
end
def emit_line(label = '', opcode = '', parm = '', comment = nil)
l = label || ''
line = "%-#{@LABEL_CHARS}s%-#{@OP_CHARS}s%-#{@PARM_CHARS}s%s" % [
label != '' ? "#{l}:" : '',
"#{opcode} ",
"#{parm}",
comment ? "; #{comment}" : ''
]
@asm += "#{line.rstrip}\n"
end
def emit_def_start(label, fname, flags = nil, comment = nil)
wmacro = @headers ? 'dword' : 'hword'
fn = fname
if fname.include?('"')
fn = fname.gsub(/"/, "'")
wmacro += 'q'
end
line = "%-#{@LABEL_CHARS}s%-#{@OP_CHARS}s%-#{@PARM_CHARS}s%s" % [
"#{wmacro} ",
"#{label},\"#{fn}\"#{flags ? ",#{flags}" : ''} ",
'',
comment ? "; #{comment}" : ''
]
@asm += "#{line.rstrip}\n"
end
def emit_def_end
@asm += "eword\n"
end
def emit_dword(label, comment = nil)
emit_line('', '.dword', label, comment)
end
# consume a character from the input source
def consume
c = @fs[@inptr]
@inptr += 1 if c
return c
end
# Skip blanks, consume characters until the next one is a blank.
# For the purposes of this function, anything ASCII 32 (" ") or less is a blank (including
# newlines). Discard the final blank and return the consumed characters
def parse_word
consumed = ""
while c = consume
if c.ord > 32
@inptr -= 1 # putback
break
end
end
while c = consume
break if c.ord <= 32
consumed += c
end
return consumed
end
# Consume characters until we reach newline or char. Discard the final newline or char
# and return the consumed characters
def parse(char = " ")
consumed = ""
while c = consume
break if c == char || c == "\n"
consumed += c
end
return consumed
end
def label_exist?(label)
@dictionary.map {|_k, v| v['label']}.include?(label)
end
def to_label(str)
label = str.upcase.tr('#$<>()', 'ndlglr')
label.gsub!(/\!/, '_STORE_')
label.gsub!(/\@/, '_AT_')
label.gsub!(/\&/, '_AND_')
label.gsub!(/\^/, 'c')
label.gsub!(/\*/, '_STAR_')
label.gsub!(/\;/, '_SEMI_')
label.gsub!(/\:/, '_COLON_')
label.gsub!(/\?/, 'q')
label.gsub!(/\'/, 'q')
label.gsub!(/\"/, 'Q')
label.gsub!(/\//, '_SLASH_')
label.gsub!(/\\/, '_BACKSLASH_')
label.gsub!(/\./, '_DOT_')
label.gsub!(/\,/, '_COMMA_')
label.gsub!(/\+/, '_PLUS_')
label.gsub!(/\-/, '_MINUS_')
label.gsub!(/\=/, '_EQUAL_')
label.tr!('^A-Za-z0-9', '_')
label.gsub!(/_+/, '_')
label.gsub!(/^_+/, '')
label.gsub!(/_+$/, '')
if label_exist?(label)
label += "_00"
while label_exist?(label)
label.next!
end
end
return label
end
def l_label
@lserial += 1
"l%02d" % @lserial
end
# consume a comment and emit as assembler comment
def f_slash_comment
emit_comment(parse("\n"))
end
def f_paren_comment
emit_comment("( #{parse(')')})")
end
# Process colon definitions
def f_colon
name = parse_word
unless name && name != ''
abort("expecting name for colon definition")
end
label = to_label(name)
if @dictionary.key?(name)
emit_comment("WARNING: #{name} is not unique, old def no longer available")
end
@dictionary[name] = {
"label" => label
}
emit_def_start(label, name)
emit_line('', 'ENTER')
@in_colon = true
@lserial = 0
while @in_colon
word = parse_word.upcase
next if word == ""
if @dictionary[word] && @dictionary[word]['flags'] && @dictionary[word]['flags'].include?('F_IMMED')
if @macros.key?(word)
@macros[word].call
else
abort("immediate word #{word} does not have matching macro")
end
elsif @dictionary[word]
emit_dword(@dictionary[word]['label'])
elsif @macros.key?(word)
@macros[word].call
else
emit_line('', 'ONLIT', word.to_i(@base))
end
end
end
def f_semicolon
abort('; while not in definition') unless @in_colon
emit_line('', 'EXIT')
emit_def_end
emit_line
@in_colon = false
abort("control-flow stack not empty") unless @cflow.empty?
end
def f_s_quote
abort('S" while not in definition') unless @in_colon
str = parse('"')
emit_line('', 'SLIT', "\"#{str}\"")
end
def f_dot_quote
abort('." while not in definition') unless @in_colon
s_quote
emit_dword('TYPE')
end
def f_abort_quote
s_quote
emit_dword('_ABORTQ')
end
def f_left_square_bracket
@in_colon = false
end
def f_right_square_bracket
@in_colon = true
end
def f_ahead(comment = nil)
abort('AHEAD outside of definition') unless @in_colon
l = l_label
emit_dword('_JUMP', comment || 'AHEAD')
emit_dword(l)
@cflow.push({d: :orig, l: l})
end
def f_if
abort('IF outside of definition') unless @in_colon
l = l_label
emit_dword('_IF', 'IF')
emit_dword(l, "false branch")
@cflow.push({d: :orig, l: l})
end
def f_else
abort('ELSE outside of definition') unless @in_colon
l = l_label
emit_dword('_JUMP')
emit_dword(l)
orig = @cflow.pop
abort('ELSE control flow mismatch') if orig[:d] != :orig
emit_line(orig[:l], '', '','ELSE')
@cflow.push({d: :orig, l: l})
end
def f_then
abort('THEN outside of definition') unless @in_colon
orig = @cflow.pop
abort('THEN control flow mismatch') if orig[:d] != :orig
emit_line(orig[:l], '', '','THEN')
end
def f_begin
abort('BEGIN outside of definition') unless @in_colon
l = l_label
emit_line(l, '', '','BEGIN')
@cflow.push({d: :dest, l: l})
end
def f_while
abort('WHILE outside of definition') unless @in_colon
emit_dword('_IF', 'WHILE')
l = l_label
emit_dword(l)
dest = @cflow.pop
@cflow.push({d: :orig, l: l})
@cflow.push(dest)
end
def f_until
abort('UNTIL outside of definition') unless @in_colon
dest = @cflow.pop
abort('UNTIL control flow mismatch') if dest[:d] != :dest
emit_dword('_IF', 'UNTIL')
emit_dword(dest[:l])
end
def f_repeat
abort('REPEAT outside of definition') unless @in_colon
dest = @cflow.pop
abort('REPEAT control flow mismatch (dest)') if dest[:d] != :dest
emit_dword('_JUMP', 'REPEAT (dest)')
emit_dword(dest[:l])
orig = @cflow.pop
abort('REPEAT control flow mismatch (orig)') if orig[:d] != :orig
emit_line(orig[:l])
end
def f_again
abort('AGAIN outside of definition') unless @in_colon
dest = @cflow.pop
abort('AGAIN control flow mismatch') if dest[:d] != :dest
emit_dword('_JUMP', 'AGAIN')
emit_dword(dest[:l])
end
def f_do
abort('DO outside of definition') unless @in_colon
emit_dword('_DO', 'DO')
ahead('to LEAVE target')
l = l_label
emit_line(l, '', '', '+LOOP target')
@cflow.push({d: :loop, l: l})
end
def f_qdo
abort('DO outside of definition') unless @in_colon
emit_dword('_QDO', '?DO')
ahead('to LEAVE target')
l = l_label
emit_line(l, '', '', '+LOOP target')
@cflow.push({d: :loop, l: l})
end
def f_ploop
abort('+LOOP outside of definition') unless @in_colon
emit_dword('_PLOOP', '+LOOP')
lp = @cflow.pop
abort('LOOP/+LOOP control flow mismatch (target)') if lp[:d] != :loop
emit_dword(lp[:l], 'to +LOOP target')
orig = @cflow.pop
abort('LOOP/+LOOP control flow mismatch (LEAVE)') if orig[:d] != :orig
emit_line(orig[:l], '', '','LEAVE target')
emit_dword('UNLOOP')
end
def f_loop
abort('LOOP outside of definition') unless @in_colon
emit_dword('ONE')
f_ploop
end
def f_case
abort('CASE outside of definition') unless @in_colon
emit_dword('_SKIP2', 'CASE')
l = l_label
@cflow.push({d: :case, l: l})
emit_line(l, '', '', 'matched case target')
ahead('to end of CASE')
end
def f_of
abort('OF outside of definition') unless @in_colon
l = l_label
emit_dword('_OF', 'OF')
@cflow.push({d: :of, l: l})
emit_dword(l, 'no match branch')
end
def f_endof
abort('ENDOF outside of definition') unless @in_colon
ot = @cflow.pop
abort('ENDOF control flow mismatch (OF)') if ot[:d] != :of
ct = @cflow[-2] # matched case target
abort('ENDOF control flow mismatch (matched CASE)') if ct[:d] != :case
emit_dword('_JUMP', 'ENDOF (matched if we got here)')
emit_dword(ct[:l])
emit_line(ot[:l])
end
def f_endcase
abort('ENDCASE outside of definition') unless @in_colon
emit_dword('DROP', 'ENDCASE')
at = @cflow.pop
abort('ENDCASE control flow mismatch (failed match jump)') if at[:d] != :orig
abort('ENDCASE control flow mismatch (CASE)') if @cflow.pop[:d] != :case
emit_line(at[:l])
end
def f_tick
abort('BUG: \' macro executed inside definition') if @in_colon
w = parse_word.upcase
abort("#{w}?") unless @dictionary.key?(w)
@stack.push(@dictionary[w]['label'])
end
def f_ctick
abort('[\'] outside of definition') unless @in_colon
w = parse_word.upcase
abort("#{w}?") unless @dictionary.key?(w)
l = @dictionary[w]['label']
emit_line('', 'ONLIT', l, "' #{w}")
end
def f_cchar
abort('[CHAR] outside of definition') unless @in_colon
c = parse_word
emit_line('', 'ONLIT', c.ord, "[CHAR] #{c}")
end
def f_ascii
c = parse_word
if @in_colon
emit_line('', 'ONLIT', c.ord, "ASCII #{c}")
else
@stack.push(c.ord)
end
end
def f_control
c = parse_word
if @in_colon
emit_line('', 'ONLIT', c.ord & 0x1F, "CONTROL #{c}")
else
@stack.push(c.ord & 0x1F)
end
end
def f_ccompile
abort('[COMPILE] outside of definition') unless @in_colon
w = parse_word.upcase
abort("#{w}?") unless @dictionary.key?(w)
l = @dictionary[w]['label']
emit_dword('_COMP_LIT', '[COMPILE]')
emit_dword(l)
end
def f_literal
abort('LITERAL outside of definition') unless @in_colon
emit_line('', 'ONLIT', @stack.pop)
end
def f_2literal
abort('LITERAL outside of definition') unless @in_colon
n2 = @stack.pop
n1 = @stack.pop
emit_line('', 'ONLIT', n1)
emit_line('', 'ONLIT', n2)
end
def f_postpone
abort('POSTPONE outside of definition') unless @in_colon
w = parse_word.upcase
abort("#{w}?") unless @dictionary.key?(w)
l = @dictionary[w]['label']
if @dictionary[w]['flags'] && @dictionary[w]['flags'].include?('F_IMMED')
emit_dword(l, 'POSTPONE (imm)')
else
emit_dword('_COMP_LIT', 'POSTPONE')
emit_dword(l)
end
end
def f_compile
abort('COMPILE outside of definition') unless @in_colon
emit_dword('_COMP_LIT', 'COMPILE')
end
def f_dnum
@stack.push(parse_word.to_i(10))
f_literal if @in_colon
end
def f_hnum
@stack.push(parse_word.to_i(16))
f_literal if @in_colon
end
def f_onum
@stack.push(parse_word.to_i(8))
f_literal if @in_colon
end
def f_binary
@base = 2
end
def f_octal
@base = 8
end
def f_decimal
@base = 10
end
def f_hex
@base = 16
end
def f_semis
abort(';CODE outside of definition') unless @in_colon
emit_line('', 'CODE')
end
def f_does
abort('DOES> outside of definition') unless @in_colon
f_semis
emit_line('', 'jsl', 'f:_does')
emit_line('', 'ENTER')
emit_dword('RPLUCKADDR')
emit_dword('INCR')
end
def f_compilecomma
abort('COMPILE, outside of definition') unless @in_colon
emit_dword(@stack.pop)
end
def f_headers
abort('HEADERS inside of definition') if @in_colon
@headers = true
emit_comment('headers')
emit_line
end
def f_headerless
abort('HEADERLESS inside of definition') if @in_colon
@headers = false
emit_comment('headerless')
emit_line
end
# Macros. For words that are normally immediates and other utility things, is searched:
# - for any and all words outside of a colon definition
# - whenever a word inside a colon definition is flagged as immediate
# - before attempts to convert an unfound word to a number
@macros = {
'\\' => method(:f_slash_comment),
'(' => method(:f_paren_comment),
'.(' => method(:f_paren_comment),
':' => method(:f_colon),
';' => method(:f_semicolon),
'S"' => method(:f_s_quote),
'."' => method(:f_dot_quote),
'"' => method(:f_s_quote), # TODO: implement IEEE 1275-1994/toke version of this
'ABORT"' => method(:f_abort_quote),
'[' => method(:f_left_square_bracket),
']' => method(:f_right_square_bracket),
'AHEAD' => method(:f_ahead),
'IF' => method(:f_if),
'ELSE' => method(:f_else),
'THEN' => method(:f_then),
'BEGIN' => method(:f_begin),
'WHILE' => method(:f_while),
'UNTIL' => method(:f_until),
'REPEAT' => method(:f_repeat),
'AGAIN' => method(:f_again),
'DO' => method(:f_do),
'QDO' => method(:f_qdo),
'+LOOP' => method(:f_ploop),
'LOOP' => method(:f_loop),
'CASE' => method(:f_case),
'OF' => method(:f_of),
'ENDOF' => method(:f_endof),
'ENDCASE' => method(:f_endcase),
'RECURSIVE' => ->{}, # no-op, it is always findable with this program
'\'' => method(:f_tick),
'[\']' => method(:f_ctick),
'[CHAR]' => method(:f_cchar),
'ASCII' => method(:f_ascii),
'CONTROL' => method(:f_control),
'[COMPILE]' => method(:f_ccompile),
'LITERAL' => method(:f_literal),
'2LITERAL' => method(:f_2literal),
'POSTPONE' => method(:f_postpone),
'COMPILE' => method(:f_compile),
'H#' => method(:f_hnum),
'D#' => method(:f_dnum),
'O#' => method(:f_onum),
'BINARY' => method(:f_binary),
'OCTAL' => method(:f_octal),
'DECIMAL' => method(:f_decimal),
'HEX' => method(:f_hex),
';CODE' => method(:f_semis),
'DOES>' => method(:f_does),
'COMPILE,' => method(:f_compilecomma),
'HEADERS' => method(:f_headers),
'HEADERLESS' => method(:f_headerless),
}
# This is the sum of all index files loaded on the command line
@dictionary = {}
while ARGV.count > 1
yfile = ARGV.shift
if File.readable?(yfile)
@dictionary.merge!(YAML.load(File.read(yfile)))
end
end
src_file = ARGV.shift
unless src_file && File.readable?(src_file)
abort("Cannot read source file!")
end
@base = 16
@fs = File.read(src_file).chars
@asm = ""
@inptr = 0
@in_colon = false
@cflow = []
@stack = []
@lserial = 0
@headers = true
while @inptr < @fs.length
word = parse_word.upcase
next if word == ""
if @macros.key?(word)
@macros[word].call
elsif @dictionary.key?(word)
if word.to_i(@base).to_s(@base).upcase == word.upcase
@stack.push(word.to_i(@base))
else
abort("#{word}: words outside of colon definitions must be macros")
end
else
@stack.push(word.to_i(@base))
end
end
abort("data stack not empty: #{@stack.inspect}") unless @stack.empty?
puts @asm