From e9b5bc820085cd8d45ac35943d343becc8912876 Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Tue, 27 Feb 2018 20:39:46 +0100 Subject: [PATCH] tinyvm --- requirements.txt | 2 + testvm.txt | 46 ++++----- tinyvm/__init__.py | 1 + tinyvm/core.py | 1 + tinyvm/main.py | 2 + tinyvm/parse.py | 24 +++-- tinyvm/vm.py | 253 +++++++++++++++++++++++++++------------------ 7 files changed, 194 insertions(+), 135 deletions(-) diff --git a/requirements.txt b/requirements.txt index a8497ebc9..c109b2627 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,4 @@ attrs ply +git+https://github.com/irmen/cbmcodecs.git@master + diff --git a/testvm.txt b/testvm.txt index 28f9f6b7f..12489db45 100644 --- a/testvm.txt +++ b/testvm.txt @@ -1,35 +1,29 @@ ; source code for a tinyvm program %block b1 %vardefs -var byte v1 0 -var word w1 2222 -var sword ws -3333 -const byte c1 99 -const sword cws -5444 -var array_byte linefeed 12 11 -var array_byte ba 10 33 -var array_byte ba2 10 [1 2 3 4 5 6 7 8 9 10] -var matrix_byte mxb 4 5 33 -var matrix_byte mxb2 3 2 [1 2 3 4 5 6] +var byte vb 1 +var word vw 2233 +var sbyte vbs -2 +var sword vws -3344 +var float vf 1.234 +var array_byte ab 3 1 +var array_sbyte asb 3 -2 +var array_word aw 3 4455 +var array_sword asw 3 -5566 +var matrix_byte mb 2 3 1 +var matrix_sbyte msb 2 3 -2 +var array_byte ab2 5 1 +var array_sbyte asb2 5 -2 +var array_word aw2 5 6677 +var array_sword asw2 5 -8899 +var matrix_byte mb2 4 5 1 +var matrix_sbyte msb2 4 5 -2 %end_vardefs %instructions - nop - nop -l1: - nop - push cws - jump_if_false l3 - push c1 - push2 c1 cws - syscall hexstr_signed - push w1 - syscall decimalstr_signed - syscall printstr - push linefeed - syscall printstr -l3: - return + push vw + pop vb + terminate %end_instructions %subblocks diff --git a/tinyvm/__init__.py b/tinyvm/__init__.py index e69de29bb..5bb534f79 100644 --- a/tinyvm/__init__.py +++ b/tinyvm/__init__.py @@ -0,0 +1 @@ +# package diff --git a/tinyvm/core.py b/tinyvm/core.py index 4db61911a..2d216752e 100644 --- a/tinyvm/core.py +++ b/tinyvm/core.py @@ -24,6 +24,7 @@ class Opcode(enum.IntEnum): CMP_GT = 302 CMP_LTE = 303 CMP_GTE = 304 + TEST = 305 RETURN = 500 JUMP = 501 JUMP_IF_TRUE = 502 diff --git a/tinyvm/main.py b/tinyvm/main.py index c087bd0ce..9a6604e7b 100644 --- a/tinyvm/main.py +++ b/tinyvm/main.py @@ -9,5 +9,7 @@ source = open(sys.argv[1]).read() parser = Parser(source) program = parser.parse() timerprogram = Program([]) +# zero page and hardware stack of a 6502 cpu are off limits for now +VM.readonly_mem_ranges = [(0x00, 0xff), (0x100, 0x1ff), (0xa000, 0xbfff), (0xe000, 0xffff)] vm = VM(program, timerprogram) vm.run() diff --git a/tinyvm/parse.py b/tinyvm/parse.py index 9b7394812..5713ba1e3 100644 --- a/tinyvm/parse.py +++ b/tinyvm/parse.py @@ -1,3 +1,4 @@ +import array from typing import Optional, List, Tuple, Dict from .core import DataType, Opcode, Program, Block, Variable, Instruction from .vm import StackValueType @@ -49,6 +50,16 @@ class Parser: self.lineno += 1 return block + def get_array_type(self, dtype: DataType) -> str: + return { + DataType.ARRAY_BYTE: 'B', + DataType.ARRAY_SBYTE: 'b', + DataType.ARRAY_WORD: 'H', + DataType.ARRAY_SWORD: 'h', + DataType.MATRIX_BYTE: 'B', + DataType.MATRIX_SBYTE: 'b' + }[dtype] + def parse_vardefs(self) -> List[Variable]: assert self.source[self.lineno].startswith("%vardefs") self.lineno += 1 @@ -68,24 +79,23 @@ class Parser: args = argstr.split(maxsplit=1) length = int(args[0]) valuestr = args[1] + typecode = self.get_array_type(dtype) if valuestr[0] == '[' and valuestr[-1] == ']': - value = bytearray([int(v) for v in valuestr[1:-1].split()]) + value = array.array(typecode, [int(v) for v in valuestr[1:-1].split()]) else: - value = bytearray([int(valuestr)]) * length + value = array.array(typecode, [int(valuestr)]) * length elif dtype in (DataType.MATRIX_BYTE, DataType.MATRIX_SBYTE): args = argstr.split(maxsplit=2) length = int(args[0]) height = int(args[1]) valuestr = args[2] + typecode = self.get_array_type(dtype) if valuestr[0] == '[' and valuestr[-1] == ']': - value = bytearray([int(v) for v in valuestr[1:-1].split()]) + value = array.array(typecode, [int(v) for v in valuestr[1:-1].split()]) else: - value = bytearray([int(valuestr)]) * length * height + value = array.array(typecode, [int(valuestr)] * length * height) else: raise TypeError("weird dtype", dtype) - if vartype == "const" and dtype not in (DataType.BYTE, DataType.WORD, DataType.SBYTE, - DataType.SWORD, DataType.FLOAT, DataType.BOOL): - raise TypeError("invalid const datatype", dtype) variables.append(Variable(name, dtype, value, length, height, vartype == "const")) self.lineno += 1 self.skip_empty() diff --git a/tinyvm/vm.py b/tinyvm/vm.py index bff211a1d..bae083c79 100644 --- a/tinyvm/vm.py +++ b/tinyvm/vm.py @@ -8,6 +8,8 @@ # 16-bit words (two 8-bit bytes, signed and unsigned) (stored in LSB order), # 5-byte MFLPT floating point # addressing is possible via byte index (for the $0000-$00ff range) or via an unsigned word. +# there is NO memory management at all; all of the mem is globally shared and always available in full. +# certain blocks of memory can be marked as read-only (write attempts will then crash the vm) # # MEMORY ACCESS: via explicit load and store instructions, # to put a value onto the stack or store the value on the top of the stack, @@ -23,13 +25,14 @@ # CPU: stack based execution, no registers. # unlimited dynamic variables (v0, v1, ...) that have a value and a type. # types: -# 1-bit boolean (can be CONST), -# 8-bit byte (singed and unsigned) (can be CONST), -# 16-bit words (two 8-bit bytes, signed and unsigned) (can be CONST), -# floating point (can be CONST), +# 1-bit boolean, +# 8-bit byte (singed and unsigned), +# 16-bit words (two 8-bit bytes, signed and unsigned), +# floating point, # array of bytes (signed and unsigned), # array of words (signed and unsigned), # matrix (2-dimensional array) of bytes (signed and unsigned). +# all of these can have the flag CONST as well which means they cannot be modified. # # push (constant, # mark, unwind to previous mark. @@ -39,22 +42,25 @@ # nop # push var / push2 var1, var2 # pop var / pop2 var1, var2 -# various arithmetic operations, logical operations, boolean comparison operations +# various arithmetic operations, logical operations, boolean test and comparison operations # jump label # jump_if_true label, jump_if_false label -# jump_if_status_XX label special system dependent status register conditional check such as carry bit or overflow bit) +# @todo jump_if_status_XX label special system dependent status register conditional check such as carry bit or overflow bit) # return (return values on stack) # syscall function (special system dependent implementation) # call function (arguments are on stack) -# enter / exit (block for function, loop) +# enter / exit (function call frame) # # TIMER INTERRUPT: triggered every 1/60th of a second. # executes on a DIFFERENT stack and with a different PROGRAM LIST, -# but with the ALL THE SAME DYNAMIC VARIABLES. +# but with access to ALL THE SAME DYNAMIC VARIABLES. # import time import itertools +import collections +import array +import pprint from .core import Instruction, Variable, Block, Program, Opcode, CONDITIONAL_OPCODES from typing import Dict, List, Tuple, Union from il65.emit import mflpt5_to_float, to_mflpt5 @@ -64,9 +70,21 @@ class ExecutionError(Exception): pass +class TerminateExecution(SystemExit): + pass + + +class MemoryAccessError(Exception): + pass + + class Memory: def __init__(self): self.mem = bytearray(65536) + self.readonly = bytearray(65536) + + def mark_readonly(self, start: int, end: int) -> None: + self.readonly[start:end+1] = [1] * (end-start+1) def get_byte(self, index: int) -> int: return self.mem[index] @@ -84,31 +102,42 @@ class Memory: return mflpt5_to_float(self.mem[index: index+5]) def set_byte(self, index: int, value: int) -> None: + if self.readonly[index]: + raise MemoryAccessError("read-only", index) self.mem[index] = value def set_sbyte(self, index: int, value: int) -> None: + if self.readonly[index]: + raise MemoryAccessError("read-only", index) self.mem[index] = value + 256 def set_word(self, index: int, value: int) -> None: + if self.readonly[index] or self.readonly[index+1]: + raise MemoryAccessError("read-only", index) hi, lo = divmod(value, 256) self.mem[index] = lo self.mem[index+1] = hi def set_sword(self, index: int, value: int) -> None: + if self.readonly[index] or self.readonly[index+1]: + raise MemoryAccessError("read-only", index) hi, lo = divmod(value + 65536, 256) self.mem[index] = lo self.mem[index+1] = hi def set_float(self, index: int, value: float) -> None: + if any(self.readonly[index:index+5]): + raise MemoryAccessError("read-only", index) self.mem[index: index+5] = to_mflpt5(value) -StackValueType = Union[bool, int, float, bytearray] +StackValueType = Union[bool, int, float, bytearray, array.array] class Stack: def __init__(self): self.stack = [] + self.pop_history = collections.deque(maxlen=10) def debug_peek(self, size: int) -> List[StackValueType]: return self.stack[-size:] @@ -117,13 +146,22 @@ class Stack: return len(self.stack) def pop(self) -> StackValueType: - return self.stack.pop() + x = self.stack.pop() + self.pop_history.append(x) + return x def pop2(self) -> Tuple[StackValueType, StackValueType]: - return self.stack.pop(), self.stack.pop() + x, y = self.stack.pop(), self.stack.pop() + self.pop_history.append(x) + self.pop_history.append(y) + return x, y def pop3(self) -> Tuple[StackValueType, StackValueType, StackValueType]: - return self.stack.pop(), self.stack.pop(), self.stack.pop() + x, y, z = self.stack.pop(), self.stack.pop(), self.stack.pop() + self.pop_history.append(x) + self.pop_history.append(y) + self.pop_history.append(z) + return x, y, z def push(self, item: StackValueType) -> None: self._typecheck(item) @@ -150,14 +188,15 @@ class Stack: self.stack[-2] = x def _typecheck(self, value: StackValueType): - if type(value) not in (bool, int, float, bytearray): - raise TypeError("stack can only contain bool, int, float, bytearray") + if type(value) not in (bool, int, float, bytearray, array.array): + raise TypeError("stack can only contain bool, int, float, (byte)array") # noinspection PyPep8Naming,PyUnusedLocal,PyMethodMayBeStatic class VM: - str_encoding = "iso-8859-15" # @todo machine encoding via cbmcodecs or something? - str_alt_encoding = "iso-8859-15" # @todo machine encoding via cbmcodecs or something? + str_encoding = "iso-8859-15" + str_alt_encoding = "iso-8859-15" + readonly_mem_ranges = [] # type: List[Tuple[int, int]] def __init__(self, program: Program, timerprogram: Program) -> None: opcode_names = [oc.name for oc in Opcode] @@ -169,6 +208,8 @@ class VM: if not method[7:] in opcode_names: raise RuntimeError("opcode method for undefined opcode " + method) self.memory = Memory() + for start, end in self.readonly_mem_ranges: + self.memory.mark_readonly(start, end) self.main_stack = Stack() self.timer_stack = Stack() (self.main_program, self.timer_program), self.variables, self.labels = self.flatten_programs(program, timerprogram) @@ -178,15 +219,21 @@ class VM: self.stack = self.main_stack self.pc = None # type: Instruction self.previous_pc = None # type: Instruction - assert all(i.next for i in self.main_program if i.opcode != Opcode.TERMINATE), "main: all instrs next must be set" - assert all(i.next for i in self.timer_program if i.opcode != Opcode.TERMINATE), "main: all instrs next must be set" - assert all(i.condnext for i in self.main_program if i.opcode in CONDITIONAL_OPCODES), "timer: all conditional instrs condnext must be set" - assert all(i.condnext for i in self.timer_program if i.opcode in CONDITIONAL_OPCODES), "timer: all conditional instrs condnext must be set" + self.system = System(self) + assert all(i.next for i in self.main_program + if i.opcode != Opcode.TERMINATE), "main: all instrs next must be set" + assert all(i.next for i in self.timer_program + if i.opcode != Opcode.TERMINATE), "main: all instrs next must be set" + assert all(i.condnext for i in self.main_program + if i.opcode in CONDITIONAL_OPCODES), "timer: all conditional instrs condnext must be set" + assert all(i.condnext for i in self.timer_program + if i.opcode in CONDITIONAL_OPCODES), "timer: all conditional instrs condnext must be set" + print("[TinyVM starting up.]") def flatten_programs(self, *programs: Program) -> Tuple[List[List[Instruction]], Dict[str, Variable], Dict[str, Instruction]]: - variables = {} # type: Dict[str, Variable] - labels = {} # type: Dict[str, Instruction] - flat_programs = [] # type: List[List[Instruction]] + variables = {} # type: Dict[str, Variable] + labels = {} # type: Dict[str, Instruction] + flat_programs = [] # type: List[List[Instruction]] for program in programs: for block in program.blocks: flat = self.flatten(block, variables, labels) @@ -256,12 +303,14 @@ class VM: # self.pc = self.previous_pc # self.program = self.mainprogram # self.stack = self.mainstack - print("Step", steps) - self.debug_stack() next_pc = getattr(self, "opcode_" + self.pc.opcode.name)(self.pc) if next_pc: self.pc = self.pc.next steps += 1 + except TerminateExecution as x: + why = str(x) + print("[TinyVM execution halted{:s}]\n".format(": "+why if why else ".")) + return except Exception as x: print("EXECUTION ERROR") self.debug_stack(5) @@ -270,43 +319,40 @@ class VM: def debug_stack(self, size: int=5) -> None: stack = self.stack.debug_peek(size) if len(stack) > 0: - print(" stack (top {:d}):".format(size)) + print("* stack (top {:d}):".format(size)) for i, value in enumerate(reversed(stack), start=1): - print(" {:d}. {:s} {!r}".format(i, type(value).__name__, value)) + print(" {:d}. {:s} {!r}".format(i, type(value).__name__, value)) else: - print(" stack is empty.") + print("* stack is empty.") + if self.stack.pop_history: + print("* last {:d} values popped from stack (most recent last):".format(self.stack.pop_history.maxlen)) + pprint.pprint(list(self.stack.pop_history), indent=2, compact=True, width=20) # type: ignore if self.pc is not None: - print(" instruction:", self.pc) + print("* instruction:", self.pc) - def _encodestr(self, string: str, alt: bool=False) -> bytearray: - return bytearray(string, self.str_alt_encoding if alt else self.str_encoding) - - def _decodestr(self, bb: bytearray, alt: bool=False) -> str: - return str(bb, self.str_alt_encoding if alt else self.str_encoding) + def assign_variable(self, variable: Variable, value: StackValueType) -> None: + assert not variable.const, "cannot modify a const" + variable.value = value def opcode_NOP(self, instruction: Instruction) -> bool: # do nothing return True def opcode_TERMINATE(self, instruction: Instruction) -> bool: - # immediately terminate the VM - raise ExecutionError("virtual machine terminated") + raise TerminateExecution() def opcode_PUSH(self, instruction: Instruction) -> bool: - # push a value onto the stack value = self.variables[instruction.args[0]].value self.stack.push(value) return True def opcode_PUSH2(self, instruction: Instruction) -> bool: - # push two values onto the stack value1 = self.variables[instruction.args[0]].value value2 = self.variables[instruction.args[1]].value self.stack.push2(value1, value2) return True def opcode_PUSH3(self, instruction: Instruction) -> bool: - # push three values onto the stack value1 = self.variables[instruction.args[0]].value value2 = self.variables[instruction.args[1]].value value3 = self.variables[instruction.args[2]].value @@ -314,115 +360,97 @@ class VM: return True def opcode_POP(self, instruction: Instruction) -> bool: - # pop value from stack and store it in a variable value = self.stack.pop() variable = self.variables[instruction.args[0]] - assert not variable.const - variable.value = value + self.assign_variable(variable, value) return True def opcode_POP2(self, instruction: Instruction) -> bool: - # pop two values from tack and store it in two variables value1, value2 = self.stack.pop2() variable = self.variables[instruction.args[0]] - assert not variable.const - variable.value = value1 + self.assign_variable(variable, value1) variable = self.variables[instruction.args[1]] - assert not variable.const - variable.value = value2 + self.assign_variable(variable, value2) return True def opcode_POP3(self, instruction: Instruction) -> bool: - # pop three values from tack and store it in two variables value1, value2, value3 = self.stack.pop3() variable = self.variables[instruction.args[0]] - assert not variable.const - variable.value = value1 + self.assign_variable(variable, value1) variable = self.variables[instruction.args[1]] - assert not variable.const - variable.value = value2 + self.assign_variable(variable, value2) variable = self.variables[instruction.args[2]] - assert not variable.const - variable.value = value3 + self.assign_variable(variable, value3) return True def opcode_ADD(self, instruction: Instruction) -> bool: - # add top to second value on stack and replace them with the result - v1, v2 = self.stack.pop2() - self.stack.push(v2 + v1) # type: ignore + second, first = self.stack.pop2() + self.stack.push(first + second) # type: ignore return True def opcode_SUB(self, instruction: Instruction) -> bool: - # subtract top from second value on stack and replace them with the result - v1, v2 = self.stack.pop2() - self.stack.push(v2 - v1) # type: ignore + second, first = self.stack.pop2() + self.stack.push(first - second) # type: ignore return True def opcode_MUL(self, instruction: Instruction) -> bool: - # multiply top with second value on stack and replace them with the result - v1, v2 = self.stack.pop2() - self.stack.push(v2 * v1) # type: ignore + second, first = self.stack.pop2() + self.stack.push(first * second) # type: ignore return True def opcode_DIV(self, instruction: Instruction) -> bool: - # divide second value by top value on stack and replace them with the result - v1, v2 = self.stack.pop2() - self.stack.push(v2 / v1) # type: ignore + second, first = self.stack.pop2() + self.stack.push(first / second) # type: ignore return True def opcode_AND(self, instruction: Instruction) -> bool: - # second value LOGICAL_AND top value on stack and replace them with the result - v1, v2 = self.stack.pop2() - self.stack.push(v2 and v1) + second, first = self.stack.pop2() + self.stack.push(first and second) return True def opcode_OR(self, instruction: Instruction) -> bool: - # second value LOGICAL_OR top value on stack and replace them with the result - v1, v2 = self.stack.pop2() - self.stack.push(v2 or v1) + second, first = self.stack.pop2() + self.stack.push(first or second) return True def opcode_XOR(self, instruction: Instruction) -> bool: - # second value LOGICAL_XOR top value on stack and replace them with the result - v1, v2 = self.stack.pop2() - i1 = 1 if v1 else 0 - i2 = 1 if v2 else 0 - self.stack.push(bool(i1 ^ i2)) + second, first = self.stack.pop2() + ifirst = 1 if first else 0 + isecond = 1 if second else 0 + self.stack.push(bool(ifirst ^ isecond)) return True def opcode_NOT(self, instruction: Instruction) -> bool: - # replace top value on stack with its LOGICAL_NOT self.stack.push(not self.stack.pop()) return True def opcode_CMP_EQ(self, instruction: Instruction) -> bool: - # replace second and top value on stack with their == comparison - v1, v2 = self.stack.pop2() - self.stack.push(v2 == v1) + second, first = self.stack.pop2() + self.stack.push(first == second) + return True + + def opcode_TEST(self, instruction: Instruction) -> bool: + self.stack.push(bool(self.stack.pop())) return True def opcode_CMP_LT(self, instruction: Instruction) -> bool: - # replace second and top value on stack with their < comparison - v1, v2 = self.stack.pop2() - self.stack.push(v2 < v1) # type: ignore + second, first = self.stack.pop2() + self.stack.push(first < second) # type: ignore return True def opcode_CMP_GT(self, instruction: Instruction) -> bool: - # replace second and top value on stack with their > comparison - v1, v2 = self.stack.pop2() - self.stack.push(v2 > v1) # type: ignore + second, first = self.stack.pop2() + self.stack.push(first > second) # type: ignore return True def opcode_CMP_LTE(self, instruction: Instruction) -> bool: - # replace second and top value on stack with their <= comparison - v1, v2 = self.stack.pop2() - self.stack.push(v2 <= v1) # type: ignore + second, first = self.stack.pop2() + self.stack.push(first <= second) # type: ignore return True def opcode_CMP_GTE(self, instruction: Instruction) -> bool: - # replace second and top value on stack with their >= comparison - v1, v2 = self.stack.pop2() - self.stack.push(v2 >= v1) # type: ignore + second, first = self.stack.pop2() + self.stack.push(first >= second) # type: ignore return True def opcode_RETURN(self, instruction: Instruction) -> bool: @@ -451,36 +479,57 @@ class VM: return False def opcode_SYSCALL(self, instruction: Instruction) -> bool: - call = getattr(self, "syscall_" + instruction.args[0], None) + call = getattr(self.system, "syscall_" + instruction.args[0], None) if call: return call() else: raise RuntimeError("no syscall method for " + instruction.args[0]) + +class System: + def __init__(self, vm: VM) -> None: + self.vm = vm + + def _encodestr(self, string: str, alt: bool=False) -> bytearray: + return bytearray(string, self.vm.str_alt_encoding if alt else self.vm.str_encoding) + + def _decodestr(self, bb: bytearray, alt: bool=False) -> str: + return str(bb, self.vm.str_alt_encoding if alt else self.vm.str_encoding) + def syscall_printstr(self) -> bool: - value = self.stack.pop() + value = self.vm.stack.pop() if isinstance(value, bytearray): print(self._decodestr(value), end="") return True else: - raise TypeError("printstr expects bytearray value", value) + raise TypeError("printstr expects bytearray", value) def syscall_decimalstr_signed(self) -> bool: - value = self.stack.pop() + value = self.vm.stack.pop() if type(value) is int: - self.stack.push(self._encodestr(str(value))) + self.vm.stack.push(self._encodestr(str(value))) return True else: - raise TypeError("decimalstr expects int value", value) + raise TypeError("decimalstr expects int", value) def syscall_hexstr_signed(self) -> bool: - value = self.stack.pop() + value = self.vm.stack.pop() if type(value) is int: if value >= 0: # type: ignore strvalue = "${:x}".format(value) else: strvalue = "-${:x}".format(-value) # type: ignore - self.stack.push(self._encodestr(strvalue)) + self.vm.stack.push(self._encodestr(strvalue)) return True else: - raise TypeError("hexstr expects int value", value) + raise TypeError("hexstr expects int", value) + + def syscall_memwrite_byte(self) -> bool: + value, address = self.vm.stack.pop2() + self.vm.memory.set_byte(address, value) # type: ignore + return True + + def syscall_memwrite_sbyte(self) -> bool: + value, address = self.vm.stack.pop2() + self.vm.memory.set_sbyte(address, value) # type: ignore + return True