py65/py65/assembler.py

import re
from py65.utils.addressing import AddressParser


class Assembler:
    Statement = re.compile(r'^([A-z]{3}[0-7]?\s+'
                           r'\(?\s*)([^,\s\)]+)(\s*[,xXyY\s]*\)?'
                           r'[,xXyY\s]*)$')

    Addressing = (
        ('zpi', "($00FF)"),
        ('zpx', "$00FF,X"),
        ('zpy', "$00FF,Y"),
        ('zpg', "$00FF"),
        ('inx', "($00FF,X)"),
        ('iax', "($FFFF,X)"),
        ('iny', "($00FF),Y"),
        ('ind', "($FFFF)"),
        ('abx', "$FFFF,X"),
        ('aby', "$FFFF,Y"),
        ('abs', "$FFFF"),
        ('rel', "$FFFF"),
        ('imp', ""),
        ('acc', ""),
        ('acc', "A"),
        ('imm', "#$FF")
    )

    def __init__(self, mpu, address_parser=None):
        """ If a configured AddressParser is passed, symbolic addresses
        may be used in the assembly statements.
        """
        self._mpu = mpu

        if address_parser is None:
            address_parser = AddressParser()
        self._address_parser = address_parser

        self._addressing = []
        numchars = mpu.BYTE_WIDTH / 4  # 1 byte = 2 chars in hex
        for mode, format in self.Addressing:
            pat = "^" + re.escape(format) + "$"
            pat = pat.replace('00', '0{%d}' % numchars)
            pat = pat.replace('FF', '([0-9A-F]{%d})' % numchars)
            self._addressing.append([mode, re.compile(pat)])

    def assemble(self, statement, pc=0000):
        """ Assemble the given assembly language statement.  If the statement
        uses relative addressing, the program counter (pc) must also be given.
        The result is a list of bytes.  Raises when assembly fails.
        """
        opcode, operand = self.normalize_and_split(statement)

        for mode, pattern in self._addressing:
            match = pattern.match(operand)

            if match:
                # check if opcode supports this addressing mode
                try:
                    bytes = [self._mpu.disassemble.index((opcode, mode))]
                except ValueError:
                    continue

                operands = match.groups()

                if mode == 'rel':
                    # relative branch
                    absolute = int(''.join(operands), 16)
                    relative = (absolute - pc) - 2
                    relative = relative & self._mpu.byteMask
                    operands = [(self._mpu.BYTE_FORMAT % relative)]

                elif len(operands) == 2:
                    # swap bytes
                    operands = (operands[1], operands[0])

                operands = [int(hex, 16) for hex in operands]
                bytes.extend(operands)

                # raise if the assembled bytes would exceed top of memory
                if (pc + len(bytes)) > (2 ** self._mpu.ADDR_WIDTH):
                    raise OverflowError

                return bytes

        # assembly failed
        raise SyntaxError(statement)

    def normalize_and_split(self, statement):
        """ Given an assembly language statement like "lda $c12,x", normalize
            the statement by uppercasing it, removing unnecessary whitespace,
            and parsing the address part using AddressParser.  The result of
            the normalization is a tuple of two strings (opcode, operand).
        """
        statement = ' '.join(statement.split())

        # normalize target in operand
        match = self.Statement.match(statement)
        if match:
            before, target, after = match.groups()

            # target is an immediate value
            if target.startswith('#'):
                try:
                    if target[1] in ("'", '"'): # quoted ascii character
                        number = ord(target[2])
                    else:
                        number = self._address_parser.number(target[1:])
                except IndexError:
                    raise SyntaxError(statement)

                if (number < 0) or (number > self._mpu.byteMask):
                    raise OverflowError
                statement = before + '#$' + self._mpu.BYTE_FORMAT % number

            # target is the accumulator
            elif target in ('a', 'A'):
                pass

            # target is an address or label
            else:
                address = self._address_parser.number(target)
                statement = before + '$' + self._mpu.ADDR_FORMAT % address + after

        # separate opcode and operand
        splitted = statement.split(" ", 2)
        opcode = splitted[0].strip().upper()
        if len(splitted) > 1:
            operand = splitted[1].strip().upper()
        else:
            operand = ''
        return (opcode, operand)
Added assembler. 2008-11-21 05:44:25 +00:00			`import re`
Reorganized utilities into separate modules. 2009-04-05 19:04:36 +00:00			`from py65.utils.addressing import AddressParser`
Added assembler. 2008-11-21 05:44:25 +00:00
PEP8 2012-11-19 20:44:30 +00:00
Added assembler. 2008-11-21 05:44:25 +00:00			`class Assembler:`
Fix assembling opcodes where the mnemonic has a digit 2012-11-19 06:17:38 +00:00			`Statement = re.compile(r'^([A-z]{3}[0-7]?\s+'`
Added assembler. 2008-11-21 05:44:25 +00:00			`r'\(?\s)([^,\s\)]+)(\s[,xXyY\s]*\)?'`
			`r'[,xXyY\s]*)$')`
Merge branch 'master' of https://github.com/BigEd/py65 Conflicts: src/py65/assembler.py src/py65/disassembler.py src/py65/tests/devices/test_mpu6502.py 2012-01-01 22:59:55 +00:00
Use tuples for the addressing patterns 2014-12-15 01:26:18 +00:00			`Addressing = (`
Generate regexps to make addressing easier to read 2014-12-15 02:05:25 +00:00			`('zpi', "($00FF)"),`
			`('zpx', "$00FF,X"),`
			`('zpy', "$00FF,Y"),`
			`('zpg', "$00FF"),`
			`('inx', "($00FF,X)"),`
			`('iax', "($FFFF,X)"),`
			`('iny', "($00FF),Y"),`
			`('ind', "($FFFF)"),`
			`('abx', "$FFFF,X"),`
			`('aby', "$FFFF,Y"),`
			`('abs', "$FFFF"),`
			`('rel', "$FFFF"),`
			`('imp', ""),`
			`('acc', ""),`
			`('acc', "A"),`
			`('imm', "#$FF")`
Use tuples for the addressing patterns 2014-12-15 01:26:18 +00:00			`)`
PEP8 2012-11-19 20:44:30 +00:00
Use same constructor signature for assembler and disassembler. 2009-08-19 04:54:29 +00:00			`def __init__(self, mpu, address_parser=None):`
Added assembler. 2008-11-21 05:44:25 +00:00			`""" If a configured AddressParser is passed, symbolic addresses`
			`may be used in the assembly statements.`
			`"""`
Remove duplication from addressing mode patterns 2014-12-15 01:17:35 +00:00			`self._mpu = mpu`

Added assembler. 2008-11-21 05:44:25 +00:00			`if address_parser is None:`
			`address_parser = AddressParser()`
			`self._address_parser = address_parser`

Remove duplication from addressing mode patterns 2014-12-15 01:17:35 +00:00			`self._addressing = []`
			`numchars = mpu.BYTE_WIDTH / 4 # 1 byte = 2 chars in hex`
Generate regexps to make addressing easier to read 2014-12-15 02:05:25 +00:00			`for mode, format in self.Addressing:`
			`pat = "^" + re.escape(format) + "$"`
			`pat = pat.replace('00', '0{%d}' % numchars)`
			`pat = pat.replace('FF', '([0-9A-F]{%d})' % numchars)`
			`self._addressing.append([mode, re.compile(pat)])`
finish basic support for 65Org16, all tests passing 2011-08-20 20:50:26 +00:00
Added assembler. 2008-11-21 05:44:25 +00:00			`def assemble(self, statement, pc=0000):`
			`""" Assemble the given assembly language statement. If the statement`
			`uses relative addressing, the program counter (pc) must also be given.`
Raise an exception when assembly fails 2012-11-19 01:05:12 +00:00			`The result is a list of bytes. Raises when assembly fails.`
Added assembler. 2008-11-21 05:44:25 +00:00			`"""`
Fix assembling opcodes where the mnemonic has a digit 2012-11-19 06:17:38 +00:00			`opcode, operand = self.normalize_and_split(statement)`
Merge branch 'master' of https://github.com/BigEd/py65 Conflicts: src/py65/assembler.py src/py65/disassembler.py src/py65/tests/devices/test_mpu6502.py 2012-01-01 22:59:55 +00:00
Remove duplication from addressing mode patterns 2014-12-15 01:17:35 +00:00			`for mode, pattern in self._addressing:`
Fix assembling opcodes where the mnemonic has a digit 2012-11-19 06:17:38 +00:00			`match = pattern.match(operand)`
Added assembler. 2008-11-21 05:44:25 +00:00
			`if match:`
Add support for ASCII chars as immediate values 2014-12-15 02:50:28 +00:00			`# check if opcode supports this addressing mode`
Added assembler. 2008-11-21 05:44:25 +00:00			`try:`
PEP8 2012-11-19 20:44:30 +00:00			`bytes = [self._mpu.disassemble.index((opcode, mode))]`
Added assembler. 2008-11-21 05:44:25 +00:00			`except ValueError:`
			`continue`

			`operands = match.groups()`

			`if mode == 'rel':`
			`# relative branch`
			`absolute = int(''.join(operands), 16)`
			`relative = (absolute - pc) - 2`
Remove duplication from addressing mode patterns 2014-12-15 01:17:35 +00:00			`relative = relative & self._mpu.byteMask`
			`operands = [(self._mpu.BYTE_FORMAT % relative)]`
Added assembler. 2008-11-21 05:44:25 +00:00
			`elif len(operands) == 2:`
			`# swap bytes`
			`operands = (operands[1], operands[0])`

PEP8 2012-11-19 20:44:30 +00:00			`operands = [int(hex, 16) for hex in operands]`
Added assembler. 2008-11-21 05:44:25 +00:00			`bytes.extend(operands)`
Raise OverflowError if assembling exceeds the top of memory 2012-11-25 19:07:12 +00:00
			`# raise if the assembled bytes would exceed top of memory`
			`if (pc + len(bytes)) > (2 ** self._mpu.ADDR_WIDTH):`
			`raise OverflowError`

Added assembler. 2008-11-21 05:44:25 +00:00			`return bytes`

			`# assembly failed`
Raise an exception when assembly fails 2012-11-19 01:05:12 +00:00			`raise SyntaxError(statement)`
Merge branch 'master' of https://github.com/BigEd/py65 Conflicts: src/py65/assembler.py src/py65/disassembler.py src/py65/tests/devices/test_mpu6502.py 2012-01-01 22:59:55 +00:00
Added assembler. 2008-11-21 05:44:25 +00:00			`def normalize_and_split(self, statement):`
			`""" Given an assembly language statement like "lda $c12,x", normalize`
			`the statement by uppercasing it, removing unnecessary whitespace,`
			`and parsing the address part using AddressParser. The result of`
Fix assembling opcodes where the mnemonic has a digit 2012-11-19 06:17:38 +00:00			`the normalization is a tuple of two strings (opcode, operand).`
Added assembler. 2008-11-21 05:44:25 +00:00			`"""`
Fix interactive assembly on Python 3 Closes #81 Closes #78 Closes #65 Closes #64 Closes #63 2024-04-12 19:24:29 +00:00			`statement = ' '.join(statement.split())`
Assembling now tolerates extra whitespace between opcode and operand 2014-01-26 05:30:27 +00:00
Added assembler. 2008-11-21 05:44:25 +00:00			`# normalize target in operand`
			`match = self.Statement.match(statement)`
			`if match:`
			`before, target, after = match.groups()`

Add support for ASCII chars as immediate values 2014-12-15 02:50:28 +00:00			`# target is an immediate value`
Added assembler. 2008-11-21 05:44:25 +00:00			`if target.startswith('#'):`
Handle all syntax errors in immediate values 2014-12-15 02:55:32 +00:00			`try:`
			`if target[1] in ("'", '"'): # quoted ascii character`
Add support for ASCII chars as immediate values 2014-12-15 02:50:28 +00:00			`number = ord(target[2])`
Handle all syntax errors in immediate values 2014-12-15 02:55:32 +00:00			`else:`
			`number = self._address_parser.number(target[1:])`
			`except IndexError:`
			`raise SyntaxError(statement)`
Add support for ASCII chars as immediate values 2014-12-15 02:50:28 +00:00
Handle all syntax errors in immediate values 2014-12-15 02:55:32 +00:00			`if (number < 0) or (number > self._mpu.byteMask):`
Added assembler. 2008-11-21 05:44:25 +00:00			`raise OverflowError`
Remove duplication from addressing mode patterns 2014-12-15 01:17:35 +00:00			`statement = before + '#$' + self._mpu.BYTE_FORMAT % number`
Added assembler. 2008-11-21 05:44:25 +00:00
			`# target is the accumulator`
Merge branch 'master' of https://github.com/BigEd/py65 Conflicts: src/py65/assembler.py src/py65/disassembler.py src/py65/tests/devices/test_mpu6502.py 2012-01-01 22:59:55 +00:00			`elif target in ('a', 'A'):`
Added assembler. 2008-11-21 05:44:25 +00:00			`pass`

			`# target is an address or label`
			`else:`
			`address = self._address_parser.number(target)`
Remove duplication from addressing mode patterns 2014-12-15 01:17:35 +00:00			`statement = before + '$' + self._mpu.ADDR_FORMAT % address + after`
Merge branch 'master' of https://github.com/BigEd/py65 Conflicts: src/py65/assembler.py src/py65/disassembler.py src/py65/tests/devices/test_mpu6502.py 2012-01-01 22:59:55 +00:00
Fix assembling opcodes where the mnemonic has a digit 2012-11-19 06:17:38 +00:00			`# separate opcode and operand`
			`splitted = statement.split(" ", 2)`
			`opcode = splitted[0].strip().upper()`
			`if len(splitted) > 1:`
			`operand = splitted[1].strip().upper()`
			`else:`
			`operand = ''`
Added assembler. 2008-11-21 05:44:25 +00:00			`return (opcode, operand)`