Update documentation.

This commit is contained in:
Michael Martin 2012-06-09 01:06:25 -07:00
parent 07f807d680
commit ffd96a8c2f
23 changed files with 3264 additions and 276 deletions

View File

@ -302,10 +302,10 @@
</para>
<programlisting>
.macro store16 ; `store16 dest, src
lda #&lt;_2
sta _1
lda #&gt;_2
sta _1+1
lda #&lt;_2
sta _1
lda #&gt;_2
sta _1+1
.macend
</programlisting>
<para>
@ -361,91 +361,202 @@
follow.
</para>
<itemizedlist>
<listitem><para><literal>.advance</literal> <emphasis>address</emphasis>:
Forces the program counter to
be <emphasis>address</emphasis>. Unlike
the <literal>.org</literal>
directive, <literal>.advance</literal> outputs zeroes until the
program counter reaches a specified address. Attempting
to <literal>.advance</literal> to a point behind the current
program counter is an assemble-time error.</para></listitem>
<listitem><para><literal>.alias</literal> <emphasis>label</emphasis> <emphasis>value</emphasis>: The
.alias directive assigns an arbitrary value to a label. This
value may be an arbitrary argument, but cannot reference any
label that has not already been defined (this prevents
recursive label dependencies).</para></listitem>
<listitem><para><literal>.byte</literal> <emphasis>arg</emphasis> [ , <emphasis>arg</emphasis>, ... ]:
Specifies a series of arguments, which are evaluated, and
strings, which are included as raw ASCII data. The final
results of these arguments must be one byte in size. Seperate
constants are seperated by comments.</para></listitem>
<listitem><para><literal>.checkpc</literal> <emphasis>address</emphasis>: Ensures that the
program counter is less than or equal to the address
specified, and emits an assemble-time error if it is not.
<emphasis>This produces no code in the final binary - it is there to
ensure that linking a large amount of data together does not
overstep memory boundaries.</emphasis></para></listitem>
<listitem><para><literal>.data</literal> <emphasis>[label]</emphasis>: Sets the segment to
the segment name specified and disallows output. If no label
is given, switches to the default data segment.</para></listitem>
<listitem><para><literal>.incbin</literal> <emphasis>filename</emphasis>: Inserts the
contents of the file specified as binary data. Use it to
include graphics information, precompiled code, or other
non-assembler data.</para></listitem>
<listitem><para><literal>.include</literal> <emphasis>filename</emphasis>: Includes the
entirety of the file specified at that point in the program.
Use this to order your final sources.</para></listitem>
<listitem><para><literal>.org</literal> <emphasis>address</emphasis>: Sets the program
counter to the address specified. <emphasis>This does not emit any
code in and of itself, nor does it overwrite anything that
previously existed.</emphasis> If you wish to jump ahead in memory,
use <literal>.advance</literal>.</para></listitem>
<listitem><para><literal>.require</literal> <emphasis>filename</emphasis>: Includes the entirety
of the file specified at that point in the program. Unlike <literal>.include</literal>,
however, code included with <literal>.require</literal> will only be inserted once.
The <literal>.require</literal> directive is useful for ensuring that certain code libraries
are somewhere in the final binary. They are also very useful for guaranteeing that
macro libraries are available.</para></listitem>
<listitem><para><literal>.space</literal> <emphasis>label</emphasis> <emphasis>size</emphasis>: This
directive is used to organize global variables. It defines the
label specified to be at the current location of the program
counter, and then advances the program counter <emphasis>size</emphasis>
steps ahead. No actual code is produced. This is equivalent
to <literal>label: .org ^+size</literal>.</para></listitem>
<listitem><para><literal>.text</literal> <emphasis>[label]</emphasis>: Sets the segment to
the segment name specified and allows output. If no label is
given, switches to the default text segment.</para></listitem>
<listitem><para><literal>.word</literal> <emphasis>arg</emphasis> [ , <emphasis>arg</emphasis>, ... ]:
Like <literal>.byte</literal>, but values are all treated as two-byte
values and stored low-end first (as is the 6502's wont). Use
this to create jump tables (an unadorned label will evaluate
to that label's location) or otherwise store 16-bit
data.</para></listitem>
<listitem><para><literal>.dword</literal> <emphasis>arg</emphasis> [ , <emphasis>arg</emphasis>, ...]:
Like <literal>.word</literal>, but for 32-bit values.</para></listitem>
<listitem><para><literal>.wordbe</literal> <emphasis>arg</emphasis> [ , <emphasis>arg</emphasis>, ...]:
Like <literal>.word</literal>, but stores the value in a big-endian format (high byte first).</para></listitem>
<listitem><para><literal>.dwordbe</literal> <emphasis>arg</emphasis> [ , <emphasis>arg</emphasis>, ...]:
Like <literal>.dword</literal>, but stores the value high byte first.</para></listitem>
<listitem><para><literal>.scope</literal>: Starts a new scope block. Labels
that begin with an underscore are only reachable from within
their innermost enclosing <literal>.scope</literal> statement.</para></listitem>
<listitem><para><literal>.scend</literal>: Ends a scope block. Makes the
temporary labels defined since the last <literal>.scope</literal>
statement unreachable, and permits them to be redefined in a
new scope.</para></listitem>
<listitem><para><literal>.macro</literal> <emphasis>name</emphasis>: Begins a macro
definition block. This is a scope block that can be inlined
at arbitrary points with <literal>.invoke</literal>. Arguments to the
macro will be bound to temporary labels with names like
<literal>_1</literal>, <literal>_2</literal>, etc.</para></listitem>
<listitem><para><literal>.macend</literal>: Ends a macro definition
block.</para></listitem>
<listitem><para><literal>.invoke</literal> <emphasis>label</emphasis> [<emphasis>argument</emphasis> [,
<emphasis>argument</emphasis> ...]]: invokes (inlines) the specified
macro, binding the values of the arguments to the ones the
macro definition intends to read. A shorthand for <literal>.invoke</literal>
is the name of the macro to invoke, backquoted.</para></listitem>
</itemizedlist>
<listitem>
<para>
<literal>.outfile</literal> <emphasis>filename</emphasis>:
Sets the filename for the output binary if one has not
already been set. If no name is ever set, the output will
be written to <literal>ophis.bin</literal>.
</para>
</listitem>
<listitem>
<para>
<literal>.advance</literal> <emphasis>address</emphasis>:
Forces the program counter to
be <emphasis>address</emphasis>. Unlike
the <literal>.org</literal>
directive, <literal>.advance</literal> outputs zeroes
until the program counter reaches a specified
address. Attempting to <literal>.advance</literal> to a
point behind the current program counter is an
assemble-time error.
</para>
</listitem>
<listitem>
<para>
<literal>.alias</literal> <emphasis>label</emphasis> <emphasis>value</emphasis>:
The .alias directive assigns an arbitrary value to a
label. This value may be an arbitrary argument, but
cannot reference any label that has not already been
defined (this prevents recursive label
dependencies).
</para>
</listitem>
<listitem>
<para>
<literal>.byte</literal> <emphasis>arg</emphasis> [
, <emphasis>arg</emphasis>, ... ]: Specifies a series of
arguments, which are evaluated, and strings, which are
included as raw ASCII data. The final results of these
arguments must be one byte in size. Seperate constants
are seperated by comments.
</para>
</listitem>
<listitem>
<para>
<literal>.checkpc</literal> <emphasis>address</emphasis>:
Ensures that the program counter is less than or equal to
the address specified, and emits an assemble-time error
if it is not. <emphasis>This produces no code in the
final binary - it is there to ensure that linking a large
amount of data together does not overstep memory
boundaries.</emphasis>
</para>
</listitem>
<listitem>
<para>
<literal>.data</literal> <emphasis>[label]</emphasis>:
Sets the segment to the segment name specified and
disallows output. If no label is given, switches to the
default data segment.
</para>
</listitem>
<listitem>
<para>
<literal>.incbin</literal> <emphasis>filename</emphasis>:
Inserts the contents of the file specified as binary
data. Use it to include graphics information, precompiled
code, or other non-assembler data.
</para>
</listitem>
<listitem>
<para>
<literal>.include</literal> <emphasis>filename</emphasis>:
Includes the entirety of the file specified at that point
in the program. Use this to order your final sources, if
you aren't doing it via the command line.
</para>
</listitem>
<listitem>
<para>
<literal>.org</literal> <emphasis>address</emphasis>:
Sets the program counter to the address
specified. <emphasis>This does not emit any code in and
of itself, nor does it overwrite anything that previously
existed.</emphasis> If you wish to jump ahead in memory,
use <literal>.advance</literal>.
</para>
</listitem>
<listitem>
<para>
<literal>.require</literal> <emphasis>filename</emphasis>:
Includes the entirety of the file specified at that point
in the program. Unlike <literal>.include</literal>,
however, code included with <literal>.require</literal>
will only be inserted once.
The <literal>.require</literal> directive is useful for
ensuring that certain code libraries are somewhere in the
final binary. They are also very useful for guaranteeing
that macro libraries are available.
</para>
</listitem>
<listitem>
<para>
<literal>.space</literal> <emphasis>label</emphasis> <emphasis>size</emphasis>:
This directive is used to organize global variables. It
defines the label specified to be at the current location
of the program counter, and then advances the program
counter <emphasis>size</emphasis> steps ahead. No actual
code is produced. This is equivalent to <literal>label:
.org ^+size</literal>.
</para>
</listitem>
<listitem>
<para>
<literal>.text</literal> <emphasis>[label]</emphasis>:
Sets the segment to the segment name specified and allows
output. If no label is given, switches to the default
text segment.
</para>
</listitem>
<listitem>
<para>
<literal>.word</literal> <emphasis>arg</emphasis> [
, <emphasis>arg</emphasis>, ... ]:
Like <literal>.byte</literal>, but values are all treated
as two-byte values and stored low-end first (as is the
6502's wont). Use this to create jump tables (an
unadorned label will evaluate to that label's location)
or otherwise store 16-bit data.
</para>
</listitem>
<listitem>
<para>
<literal>.dword</literal> <emphasis>arg</emphasis> [
, <emphasis>arg</emphasis>, ...]:
Like <literal>.word</literal>, but for 32-bit
values.
</para>
</listitem>
<listitem>
<para>
<literal>.wordbe</literal> <emphasis>arg</emphasis> [
, <emphasis>arg</emphasis>, ...]:
Like <literal>.word</literal>, but stores the value in a
big-endian format (high byte first).
</para>
</listitem>
<listitem>
<para>
<literal>.dwordbe</literal> <emphasis>arg</emphasis> [
, <emphasis>arg</emphasis>, ...]:
Like <literal>.dword</literal>, but stores the value high
byte first.
</para>
</listitem>
<listitem>
<para>
<literal>.scope</literal>: Starts a new scope
block. Labels that begin with an underscore are only
reachable from within their innermost
enclosing <literal>.scope</literal>
statement.
</para>
</listitem>
<listitem>
<para>
<literal>.scend</literal>: Ends a scope block. Makes the
temporary labels defined since the
last <literal>.scope</literal> statement unreachable, and
permits them to be redefined in a new
scope.
</para>
</listitem>
<listitem>
<para>
<literal>.macro</literal> <emphasis>name</emphasis>:
Begins a macro definition block. This is a scope block
that can be inlined at arbitrary points
with <literal>.invoke</literal>. Arguments to the macro
will be bound to temporary labels with names like
<literal>_1</literal>, <literal>_2</literal>, etc.
</para>
</listitem>
<listitem>
<para>
<literal>.macend</literal>: Ends a macro definition block.
</para>
</listitem>
<listitem>
<para>
<literal>.invoke</literal> <emphasis>label</emphasis> [<emphasis>argument</emphasis> [,
<emphasis>argument</emphasis> ...]]: invokes (inlines) the
specified macro, binding the values of the arguments to the
ones the macro definition intends to read. A shorthand
for <literal>.invoke</literal> is the name of the macro to
invoke, backquoted.
</para>
</listitem>
</itemizedlist>
</section>
</appendix>

185
doc/hll1.sgm Normal file
View File

@ -0,0 +1,185 @@
<chapter id="hll-1">
<title>The Second Step</title>
<para>
This essay discusses how to do 16-or-more bit addition and
subtraction on the 6502, and how to do unsigned comparisons
properly, thus making 16-bit arithmetic less necessary.
</para>
<section>
<title>The problem</title>
<para>
The <literal>ADC</literal>, <literal>SBC</literal>, <literal>INX</literal>,
and <literal>INY</literal> instructions are the only real
arithmetic instructions the 6502 chip has. In and of themselves,
they aren't too useful for general applications: the accumulator
can only hold 8 bits, and thus can't store any value over 255.
Matters get even worse when we're branching based on
values; <literal>BMI</literal> and <literal>BPL</literal> hinge on
the seventh (sign) bit of the result, so we can't represent any
value above 127.
</para>
</section>
<section>
<title>The solution</title>
<para>
We have two solutions available to us. First, we can use
the <quote>unsigned</quote> discipline, which involves checking
different flags, but lets us deal with values between 0 and 255
instead of -128 to 127. Second, we can trade speed and register
persistence for multiple precision arithmetic, using 16-bit
integers (-32768 to 32767, or 0-65535), 24-bit, or more.
</para>
<para>
Multiplication, division, and floating point arithmetic are beyond
the scope of this essay. The best way to deal with those is to
find a math library on the web (I
recommend <ulink url="http://www.6502.org/"></ulink>) and use the
routines there.
</para>
</section>
<section>
<title>Unsigned arithmetic</title>
<para>
When writing control code that hinges on numbers, we should always
strive to have our comparison be with zero; that way, no explicit
compare is necessary, and we can branch simply
with <literal>BEQ/BNE</literal>, which test the zero flag.
Otherwise, we use <literal>CMP</literal>.
The <literal>CMP</literal> command subtracts its argument from the
accumulator (without borrow), updates the flags, but throws away
the result. If the value is equal, the result is zero.
(<literal>CMP</literal> followed by <literal>BEQ</literal>
branches if the argument is equal to the accumulator; this is
probably why it's called <literal>BEQ</literal> and not something
like <literal>BZS</literal>.)
</para>
<para>
Intuitively, then, to check if the accumulator is <emphasis>less
than</emphasis> some value, we <literal>CMP</literal> against that
value and <literal>BMI</literal>. The <literal>BMI</literal>
command branches based on the Negative Flag, which is equal to the
seventh bit of <literal>CMP</literal>'s subtract. That's exactly
what we need, for signed arithmetic. However, this produces
problems if you're writing a boundary detector on your screen or
something and find that 192 &lt; 4. 192 is outside of a signed
byte's range, and is interpreted as if it were -64. This will not
do for most graphics applications, where your values will be
ranging from 0-319 or 0-199 or 0-255.
</para>
<para>
Instead, we take advantage of the implied subtraction
that <literal>CMP</literal> does. When subtracting, the result's
carry bit starts at 1, and gets borrowed from if necessary. Let
us consider some four-bit subtractions.
</para>
<programlisting>
C|3210 C|3210
------ ------
1|1001 9 1|1001 9
|0100 - 4 |1100 -12
------ --- ------ ---
1|0101 5 0|1101 -3
</programlisting>
<para>
The <literal>CMP</literal> command properly modifies the carry bit
to reflect this. When computing A-B, the carry bit is set if A
&gt;= B, and it's clear if A &lt; B. Consider the following two
code sequences.
</para>
<programlisting>
(1) (2)
CMP #$C0 CMP #$C0
BMI label BCC label
</programlisting>
<para>
The code in the first column treats the value in the accumulator
as a signed value, and branches if the value is less than -64.
(Because of overflow issues, it will actually branch for
accumulator values between $40 and $BF, even though it *should*
only be doing it for values between $80 and $BF. To see why,
compare $40 to $C0 and look at the result.) The second column
code treats the accumulator as holding an unsigned value, and
branches if the value is less than 192. It will branch for
accumulator values $00-$BF.
</para>
</section>
<section>
<title>16-bit addition and subtraction</title>
<para>
Time to use the carry bit for what it was meant to do. Adding two
8 bit numbers can produce a 9-bit result. That 9th bit is stored
in the carry flag. The <literal>ADC</literal> command adds the
carry value to its result, as well. Thus, carries work just as
we'd expect them to. Suppose we're storing two 16-bit values, low
byte first, in $C100-1 and $C102-3. To add them together and
store them in $C104-5, this is very easy:
</para>
<programlisting>
CLC
LDA $C100
ADC $C102
STA $C104
LDA $C101
ADC $C103
STA $C105
</programlisting>
<para>
Subtraction is identical, but you set the carry bit first
with <literal>SEC</literal> (because borrow is the complement of
carry&mdash;think about how the unsigned compare works if this
puzzles you) and, of course, using the <literal>SBC</literal>
instruction instead of <literal>ADC</literal>.
</para>
<para>
The carry/borrow bit is set appropriately to let you continue,
too. As long as you just keep working your way up to bytes of
ever-higher significance, this generalizes to 24 (do it three
times instead of two) or 32 (four, etc.) bit integers.
</para>
</section>
<section>
<title>16-bit comparisons</title>
<para>
Doing comparisons on extended precision values is about the same
as doing them on 8-bit values, but you have to have the value you
test in memory, since it won't fit in the accumulator all at once.
You don't have to store the values back anywhere, either, since
all you care about is the final state of the flags. For example,
here's a signed comparison, branching to <literal>label</literal>
if the value in $C100-1 is less than 1000 ($03E8):
</para>
<programlisting>
SEC
LDA $C100
SBC #$E8
LDA $C101 ; We only need the carry bit from that subtract
SBC #$03
BMI label
</programlisting>
<para>
All the commentary on signed and unsigned compares holds for
16-bit (or higher) integers just as it does for the 8-bit
ones.
</para>
</section>
</chapter>

880
doc/hll2.sgm Normal file
View File

@ -0,0 +1,880 @@
<chapter id="hll2">
<title>Structured Programming</title>
<para>
This essay discusses the machine language equivalents of the
basic <quote>structured programming</quote> concepts that are part
of the <quote>imperative</quote> family of programming languages:
if/then/else, for/next, while loops, and procedures. It also
discusses basic use of variables, as well as arrays, multi-byte data
types (records), and sub-byte data types (bitfields). It closes by
hand-compiling pseudo-code for an insertion sort on linked lists
into assembler. A complete Commodore 64 application is included as
a sample with this essay.
</para>
<section>
<title>Control constructs</title>
<section>
<title>Branches: <literal>if x then y else z</literal></title>
<para>
This is almost the most basic control construct.
The <emphasis>most</emphasis> basic is <literal>if x then
y</literal>, which is a simple branch instruction
(bcc/bcs/beq/bmi/bne/bpl/bvc/bvs) past the <quote>then</quote>
clause if the conditional is false:
</para>
<programlisting>
iny
bne no'overflow
inx
no'overflow:
;; rest of code
</programlisting>
<para>
This increments the value of the y register, and if it just
wrapped back around to zero, it increments the x register too.
It is basically equivalent to the C statement <literal>if
((++y)==0) ++x;</literal>. We need a few more labels to handle
else clauses as well.
</para>
<programlisting>
;; Computation of the conditional expression.
;; We assume for the sake of the example that
;; we want to execute the THEN clause if the
;; zero bit is set, otherwise the ELSE
;; clause. This will happen after a CMP,
;; which is the most common kind of 'if'
;; statement anyway.
BNE else'clause
;; THEN clause code goes here.
JMP end'of'if'stmt
else'clause:
;; ELSE clause code goes here.
end'of'if'stmt:
;; ... rest of code.
</programlisting>
</section>
<section>
<title>Free loops: <literal>while x do y</literal></title>
<para>
A <emphasis>free loop</emphasis> is one that might execute any
number of times. These are basically just a combination
of <literal>if</literal> and <literal>goto</literal>. For
a <quote>while x do y</quote> loop, that executes zero or more
times, you'd have code like this...
</para>
<programlisting>
loop'begin:
;; ... computation of condition, setting zero
;; bit if loop is finished...
beq loop'done
;; ... loop body goes here
jmp loop'begin
loop'done:
;; ... rest of program.
</programlisting>
<para>
If you want to ensure that the loop body executes at least once
(do y while x), just move the test to the end.
</para>
<programlisting>
loop'begin:
;; ... loop body goes here
;; ... computation of condition, setting zero
;; bit if loop is finished...
bne loop'begin
;; ... rest of program.
</programlisting>
<para>
The choice of zero bit is kind of arbitrary here. If the
condition involves the carry bit, or overflow, or negative, then
replace the beq with bcs/bvs/bmi appropriately.
</para>
</section>
<section>
<title>Bounded loops: <literal>for i = x to y do z</literal></title>
<para>
A special case of loops is one where you know exactly how many
times you're going through it&mdash;this is called
a <emphasis>bounded</emphasis> loop. Suppose you're copying 16
bytes from $C000 to $D000. The C code for that would look
something like this:
</para>
<programlisting>
int *a = 0xC000;
int *b = 0xD000;
int i;
for (i = 0; i < 16; i++) { a[i] = b[i]; }
</programlisting>
<para>
C doesn't directly support bounded loops;
its <literal>for</literal> statement is just <quote>syntactic
sugar</quote> for a while statement. However, we can take
advantage of special purpose machine instructions to get very
straightforward code:
</para>
<programlisting>
ldx #$00
loop:
lda $c000, x
sta $d000, x
inx
cpx #$10
bmi loop
</programlisting>
<para>
However, remember that every arithmetic operation,
including <literal>inx</literal> and <literal>dex</literal>,
sets the various flags, including the Zero bit. That means that
if we can make our computation <emphasis>end</emphasis> when the
counter hits zero, we can shave off some bytes:
</para>
<programlisting>
ldx #$10
loop:
lda #$bfff, x
sta #$cfff, x
dex
bne loop
</programlisting>
<para>
Notice that we had to change the addresses we're indexing from,
because x takes a slightly different range of values. The space
savings is small here, and it's become slightly more unclear.
(It also hasn't actually saved any time, because the lda and sta
instructions are crossing a page boundary where they weren't
before&mdash;but if the start or end arrays began at $b020 or
something this wouldn't be an issue.) This tends to work better
when the precise value of the counter isn't used in the
computation&mdash;so let us consider the NES, which uses memory
location $2007 as a port to its video memory. Suppose we wish
to jam 4,096 copies of the hex value $20 into the video memory.
We can write this <emphasis>very</emphasis> cleanly, using the X
and Y registers as indices in a nested loop.
</para>
<programlisting>
ldx #$10
ldy #$00
lda #$20
loop:
sta $2007
iny
bne loop
dex
bne loop
</programlisting>
<para>
Work through this code. Convince yourself that
the <literal>sta</literal> is executed exactly 16*256 = 4096
times.
</para>
<para>
This is an example of a <emphasis>nested</emphasis> loop: a loop
inside a loop. Since our internal loop didn't need the X or Y
registers, we got to use both of them, which is nice, because
they have special incrementing and decrementing instructions.
The accumulator lacks these instructions, so it is a poor choice
to use for index variables. If you have a bounded loop and
don't have access to registers, use memory locations
instead:
</para>
<programlisting>
lda #$10
sta counter ; loop 16 times
loop:
;; Do stuff that trashes all the registers
dec counter
bne loop
</programlisting>
<para>
That's it! These are the basic control constructs for using
inside of procedures. Before talking about how to organize
procedures, I'll briefly cover the way the 6502 handles its
stack, because stacks and procedures are very tightly
intertwined.
</para>
</section>
</section>
<section>
<title>The stack</title>
<para>
The 6502 has an onboard stack in page 1. You can modify the stack
pointer by storing values in X register and
using <literal>txs</literal>; an <quote>empty</quote> stack is
value $FF. Going into a procedure pushes the address of the next
instruction onto the stack, and RTS pops that value off and jumps
there. (Well, not precisely. JSR actually pushes a value that's
one instruction short, and RTS loads the value, increases it by
one, and THEN jumps there. But that's only an issue if you're
using RTS to implement jump tables.) On an interrupt, the next
instruction's address is pushed on the stack, then the process
flags, and it jumps to the handler. The return from interrupt
restores the flags and the PC, just as if nothing had
happened.
</para>
<para>
The stack only has 256 possible entries; since addresses take two
bytes to store, that means that if you call something that calls
something that calls something that (etc., etc., 129 times), your
computation will fail. This can happen faster if you save
registers or memory values on the stack (see below).
</para>
</section>
<section>
<title>Procedures and register saving</title>
<para>
All programming languages are designed around the concept of
procedures.<footnote><para>Yes, all of them. Functional languages
just let you do more things with them, logic programming has
implicit calls to query procedures, and
object-oriented <quote>methods</quote> are just normal procedures
that take one extra argument in secret.</para></footnote>
Procedures let you break a computation up into different parts,
then use them independently. However, compilers do a lot of work
for you behind the scenes to let you think this. Consider the
following assembler code. How many times does the loop
execute?
</para>
<programlisting>
loop: ldx #$10 jsr do'stuff dex bne loop
</programlisting>
<para>
The correct answer is <quote>I don't know, but
it <emphasis>should</emphasis> be 16.</quote> The reason we don't
know is because we're assuming here that
the <literal>do'stuff</literal> routine doesn't change the value
of the X register. If it does, than all sorts of chaos could
result. For major routines that aren't called often but are
called in places where the register state is important, you should
store the old registers on the stack with code like this:
</para>
<programlisting>
do'stuff:
pha
txa
pha
tya
pha
;; Rest of do'stuff goes here
pla
tay
pla
tax
pla
rts
</programlisting>
<para>
(Remember, the last item pushed onto the stack is the first one
pulled off, so you have to restore them in reverse order.) That's
three more bytes on the stack, so you don't want to do this if you
don't absolutely have to. If <literal>do'stuff</literal>
actually <emphasis>doesn't</emphasis> touch X, there's no need to
save and restore the value. This technique is
called <emphasis>callee-save</emphasis>.
</para>
<para>
The reverse technique is called <emphasis>caller-save</emphasis>
and pushes important registers onto the stack before the routine
is called, then restores them afterwards. Each technique has its
advantages and disadvantages. The best way to handle it in your
own code is to mark at the top of each routine which registers
need to be saved by the caller. (It's also useful to note things
like how it takes arguments and how it returns values.)
</para>
</section>
<section>
<title>Variables</title>
<para>
Variables come in several flavors.
</para>
<section>
<title>Global variables</title>
<para>
Global variables are variables that can be reached from any
point in the program. Since the 6502 has no memory protection,
these are easy to declare. Take some random chunk of unused
memory and declare it to be the global variables area. All
reasonable assemblers have commands that let you give a symbolic
name to a memory location&mdash;you can use this to give your
globals names.
</para>
</section>
<section>
<title>Local variables</title>
<para>
All modern languages have some concept of <quote>local
variables</quote>, which are data values unique to that
invocation of that procedure. In modern architecures, this data
is stored into and read directly off of the stack. The 6502
doesn't really let you do this cleanly; I'll discuss ways of
handling it in a later essay. If you're implementing a system
from scratch, you can design your memory model to not require
such extreme measures. There are three basic techniques.
</para>
<section>
<title>Treat local variables like registers</title>
<para>
This means that any memory location you use, you save on the
stack and restore afterwards. This
can <emphasis>really</emphasis> eat up stack space, and it's
really slow, it's often pointless, and it has a tendency to
overflow the stack. I can't recommend it. But it does let
you do recursion right, if you don't need to save much memory
and you aren't recursing very deep.
</para>
</section>
<section>
<title>Procedure-based memory allocation</title>
<para>
With this technique, you give each procedure its own little
chunk of memory for use with its data. All the variables are
still, technically, globals; a
routine <emphasis>could</emphasis> interfere with another's,
but the discipline of <quote>only mess with real globals, and
your own locals</quote> is very, very easy to maintain.
</para>
<para>
This has many advantages. It's <emphasis>very</emphasis>
fast, both to write and to run, because loading a variable is
an Absolute or Zero Page instruction. Also, any procedure may
call any other procedure, as long as it doesn't wind up
calling itself at some point.
</para>
<para>
It has two major disadvantages. First, if many routines need
a lot of space, it can consume more memory than it should.
Also, this technique can require significant assembler
support&mdash;you must ensure that no procedure's local
variables are defined in the same place as any other
procedure, and it essentially requires a full symbolic linker
to do right. Ophis includes commands for <emphasis>memory
segmentation simulation</emphasis> that automate most of this
task, and make writing general libraries feasible.
</para>
</section>
<section>
<title>Partition-based memory allocation</title>
<para>
It's not <emphasis>really</emphasis> necessary that no
procedure overwrite memory used by any other procedure. It's
only required that procedures don't write on the memory that
their <emphasis>callers</emphasis> use. Suppose that your
program is organized into a bunch of procedures, and each fall
into one of three sets:
</para>
<itemizedlist>
<listitem><para>Procedures in set A don't call anyone.</para></listitem>
<listitem><para>Procedures in set B only call procedures in set A.</para></listitem>
<listitem><para>Procedures in set C only call procedures in sets A or B.</para></listitem>
</itemizedlist>
<para>
Now, each <emphasis>set</emphasis> can be given its own chunk
of memory, and we can be absolutely sure that no procedures
overwrite each other. Even if every procedure in set C uses
the <emphasis>same</emphasis> memory location, they'll never
step on each other, because there's no way to get to any other
routine in set C <emphasis>from</emphasis> any routine in set
C.
</para>
<para>
This has the same time efficiencies as procedure-based memory
allocation, and, given a thoughtful design aimed at using this
technique, also can use significantly less memory at run time.
It's also requires much less assembler support, as addresses
for variables may be assigned by hand without having to worry
about those addresses already being used. However, it does
impose a very tight discipline on the design of the overall
system, so you'll have to do a lot more work before you start
actually writing code.
</para>
</section>
</section>
<section>
<title>Constants</title>
<para>
Constants are <quote>variables</quote> that don't change. If
you know that the value you're using is not going to change, you
should fold it into the code, either as an Immediate operand
wherever it's used, or (if it's more complicated than that)
as <literal>.byte</literal> commands in between the procedures.
This is especially important for ROM-based systems such as the
NES; the NES has very little RAM available, so constants should
be kept in the more plentiful ROM wherever possible.
</para>
</section>
</section>
<section>
<title>Data structures</title>
<para>
So far, we've been treating data as a bunch of one-byte values.
There really isn't a lot you can do just with bytes. This section
talks about how to deal with larger and smaller elements.
</para>
<section>
<title>Arrays</title>
<para>
An <emphasis>array</emphasis> is a bunch of data elements in a
row. An array of bytes is very easy to handle with the 6502
chip, because the various indexed addressing modes handle it for
you. Just load the index into the X or Y register and do an
absolute indexed load. In general, these are going to be
zero-indexed (that is, a 32-byte array is indexed from 0 to 31.)
This code would initialize a byte array with 32 entries to
0:
</para>
<programlisting>
lda #$00
tax
loop:
sta array,x
inx
cpx #$20
bne loop
</programlisting>
<para>
(If you count down to save instructions, remember to adjust the
base address so that it's still writing the same memory
location.)
</para>
<para>
This approach to arrays has some limits. Primary among them is
that we can't have arrays of size larger than 256; we can't fit
our index into the index register. In order to address larger
arrays, we need to use the indirect indexed addressing mode. We
use 16-bit addition to add the offset to the base pointer, then
set the Y register to 0 and then load the value
with <literal>lda (ptr),y</literal>.
</para>
<para>
Well, actually, we can do better than that. Suppose we want to
clear out 8K of ram, from $2000 to $4000. We can use the Y
register to hold the low byte of our offset, and only update the
high bit when necessary. That produces the following
loop:
</para>
<programlisting>
lda #$00 ; Set pointer value to base ($2000)
sta ptr
lda #$20
sta ptr+1
lda #$00 ; Storing a zero
ldx #$20 ; 8,192 ($2000) iterations: high byte
ldy #$00 ; low byte.
loop:
sta (ptr),y
iny
bne loop ; If we haven't wrapped around, go back
inc ptr+1 ; Otherwise update high byte
dex ; bump counter
bne loop ; and continue if we aren't done
</programlisting>
<para>
This code could be optimized further; the loop prelude in
particular loads a lot of redundant values that could be
compressed down further:
</para>
<programlisting>
lda #$00
tay
ldx #$20
sta ptr
stx ptr+1
</programlisting>
<para>
That's not directly relevant to arrays, but these sorts of
things are good things to keep in mind when writing your code.
Done well, they can make it much smaller and faster; done
carelessly, they can force a lot of bizarre dependencies on your
code and make it impossible to modify later.
</para>
</section>
<section>
<title>Records</title>
<para>
A <emphasis>record</emphasis> is a collection of values all
referred to as one variable. This has no immediate
representation in assembler. If you have a global variable
that's two bytes and a code pointer, this is exactly equivalent
to three seperate variables. You can just put one label in
front of it, and refer to the first byte
as <literal>label</literal>, the second
as <literal>label+1</literal>, and the code pointer
a <literal>label+2</literal>.
</para>
<para>
This really applies to all data structures that take up more
than one byte. When dealing with the pointer, a 16-bit value,
we refer to the low byte as <literal>ptr</literal>
(or <literal>label+2</literal>, in the example above), and the
high byte as <literal>ptr+1</literal>
(or <literal>label+3</literal>).
</para>
<para>
Arrays of records are more interesting. There are two
possibilities for these. The way most high level languages
treat it is by keeping the records contiguous. If you have an
array of two sixteen bit integers, then the records are stored
in order, one at a time. The first is in location $1000, the
next in $1004, the next in $1008, and so on. You can do this
with the 6502, but you'll probably have to use the indirect
indexed mode if you want to be able to iterate
conveniently.
</para>
<para>
Another, more unusual, but more efficient approach is to keep
each byte as a seperate array, just like in the arrays example
above. To illustrate, here's a little bit of code to go through
a contiguous array of 16 bit integers, adding their values to
some <literal>total</literal> variable:
</para>
<programlisting>
ldx #$10 ; Number of elements in the array
ldy #$00 ; Byte index from array start
loop:
clc
lda array, y ; Low byte
adc total
sta total
lda array+1, y ; High byte
adc total+1
sta total+1
iny ; Jump ahead to next entry
iny
dex ; Check for loop termination
bne loop
</programlisting>
<para>
And here's the same loop, keeping the high and low bytes in
seperate arrays:
</para>
<programlisting>
ldx #$00
loop:
clc
lda lowbyte,x
adc total
sta total
lda highbyte,x
adc total+1
sta total+1
inx
cpx #$10
bne loop
</programlisting>
<para>
Which approach is the right one depends on what you're doing.
For large arrays, the first approach is better, as you only need
to maintain one base pointer. For smaller arrays, the easier
indexing makes the second approach more convenient.
</para>
</section>
<section>
<title>Bitfields</title>
<para>
To store values that are smaller than a byte, you can save space
by putting multiple values in a byte. To extract a sub-byte
value, use the bitmasking commands:
</para>
<itemizedlist>
<listitem><para>To set bits, use the <literal>ORA</literal> command. <literal>ORA #$0F</literal> sets the lower four bits to 1 and leaves the rest unchanged.</para></listitem>
<listitem><para>To clear bits, use the <literal>AND</literal> command. <literal>AND #$F0</literal> sets the lower four bits to 0 and leaves the rest unchanged.</para></listitem>
<listitem><para>To reverse bits, use the <literal>EOR</literal> command. <literal>EOR #$0F</literal> reverses the lower four bits and leaves the rest unchanged.</para></listitem>
<listitem><para>To test if a bit is 0, AND away everything but that bit, then see if the Zero bit was set. If the bit is in the top two bits of a memory location, you can use the BIT command instead (which stores bit 7 in the Negative bit, and bit 6 in the Overflow bit).</para></listitem>
</itemizedlist>
</section>
</section>
<section>
<title>A modest example: Insertion sort on linked lists</title>
<para>
To demonstrate these techniques, we will now produce code to
perform insertion sort on a linked list. We'll start by defining
our data structure, then defining the routines we want to write,
then producing actual code for those routines. A downloadable
version that will run unmodified on a Commodore 64 closes the
chapter.
</para>
<section>
<title>The data structure</title>
<para>
We don't really want to have to deal with pointers if we can
possibly avoid it, but it's hard to do a linked list without
them. Instead of pointers, we will
use <emphasis>cursors</emphasis>: small integers that represent
the index into the array of values. This lets us use the
many-small-byte-arrays technique for our data. Furthermore, our
random data that we're sorting never has to move, so we may
declare it as a constant and only bother with changing the
values of <literal>head</literal> and
the <literal>next</literal> arrays. The data record definition
looks like this:
</para>
<programlisting>
head : byte;
data : const int[16] = [838, 618, 205, 984, 724, 301, 249, 946,
925, 43, 114, 697, 985, 633, 312, 86];
next : byte[16];
</programlisting>
<para>
Exactly how this gets represented will vary from assembler to
assembler. Ophis does it like this:
</para>
<programlisting>
.data
.space head 1
.space next 16
.text
lb: .byte &lt;$838,&lt;$618,&lt;$205,&lt;$984,&lt;$724,&lt;$301,&lt;$249,&lt;$946
.byte &lt;$925,&lt;$043,&lt;$114,&lt;$697,&lt;$985,&lt;$633,&lt;$312,&lt;$086
hb: .byte >$838,>$618,>$205,>$984,>$724,>$301,>$249,>$946
.byte >$925,>$043,>$114,>$697,>$985,>$633,>$312,>$086
</programlisting>
</section>
<section>
<title>Doing an insertion sort</title>
<para>
To do an insertion sort, we clear the list by setting the 'head'
value to -1, and then insert each element into the list one at a
time, placing each element in its proper order in the list. We
can consider the lb/hb structure alone as an array of 16
integers, and just insert each one into the list one at a
time.
</para>
<programlisting>
procedure insertion_sort
head := -1;
for i := 0 to 15 do
insert_elt i
end
end
</programlisting>
<para>
This translates pretty directly. We'll have insert_elt take its
argument in the X register, and loop with that. However, given
that insert_elt is going to be a complex procedure, we'll save
the value first. The assembler code becomes:
</para>
<programlisting>
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; insertion'sort: Sorts the list defined by head, next, hb, lb.
; Arguments: None.
; Modifies: All registers destroyed, head and next array sorted.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
insertion'sort:
lda #$FF ; Clear list by storing the terminator in 'head'
sta head
ldx #$0 ; Loop through the lb/hb array, adding each
insertion'sort'loop: ; element one at a time
txa
pha
jsr insert_elt
pla
tax
inx
cpx #$10
bne insertion'sort'loop
rts
</programlisting>
</section>
<section>
<title>Inserting an element</title>
<para>
The pseudocode for inserting an element is a bit more
complicated. If the list is empty, or the value we're inserting
goes at the front, then we have to update the value
of <literal>head</literal>. Otherwise, we can iterate through
the list until we find the element that our value fits in after
(so, the first element whose successor is larger than our
value). Then we update the next pointers directly and exit.
</para>
<programlisting>
procedure insert_elt i
begin
if head = -1 then begin
head := i;
next[i] := -1;
return;
end;
val := data[i];
if val < data[i] then begin
next[i] := head;
head := i;
return;
end;
current := head;
while (next[current] &lt;&gt; -1 and val &lt; data[next[current]]) do
current := next[current];
end;
next[i] := next[current];
next[current] := i;
end;
</programlisting>
<para>
This produces the following rather hefty chunk of code:
</para>
<programlisting>
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; insert_elt: Insert an element into the linked list. Maintains the
; list in sorted, ascending order. Used by
; insertion'sort.
; Arguments: X register holds the index of the element to add.
; Modifies: All registers destroyed; head and next arrays updated
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
.data
.space lbtoinsert 1
.space hbtoinsert 1
.space indextoinsert 1
.text
insert_elt:
ldy head ; If the list is empty, make
cpy #$FF ; head point at it, and return.
bne insert_elt'list'not'empty
stx head
tya
sta next,x
rts
insert_elt'list'not'empty:
lda lb,x ; Cache the data we're inserting
sta lbtoinsert
lda hb,x
sta hbtoinsert
stx indextoinsert
ldy head ; Compare the first value with
sec ; the data. If the data must
lda lb,y ; be inserted at the front...
sbc lbtoinsert
lda hb,y
sbc hbtoinsert
bmi insert_elt'not'smallest
tya ; Set its next pointer to the
sta next,x ; old head, update the head
stx head ; pointer, and return.
rts
insert_elt'not'smallest:
ldx head
insert_elt'loop: ; At this point, we know that
lda next,x ; argument > data[X].
tay
cpy #$FF ; if next[X] = #$FF, insert arg at end.
beq insert_elt'insert'after'current
lda lb,y ; Otherwise, compare arg to
sec ; data[next[X]]. If we insert
sbc lbtoinsert ; before that...
lda hb,y
sbc hbtoinsert
bmi insert_elt'goto'next
insert_elt'insert'after'current: ; Fix up all the next links
tya
ldy indextoinsert
sta next,y
tya
sta next,x
rts ; and return.
insert_elt'goto'next: ; Otherwise, let X = next[X]
tya ; and go looping again.
tax
jmp insert_elt'loop
</programlisting>
</section>
<section>
<title>The complete application</title>
<para>
The full application, which deals with interfacing with CBM
BASIC and handles console I/O and such, is
in <xref linkend="structure-src" endterm="structure-fname">.
</para>
</section>
</section>
</chapter>

297
doc/hll3.sgm Normal file
View File

@ -0,0 +1,297 @@
<chapter id="hll3">
<title>Pointers and Indirection</title>
<para>
The basics of pointers versus cursors (or, at the 6502 assembler
level, the indirect indexed addressing mode versus the absolute
indexed ones) were covered in <xref linkend="hll2"> This essay seeks
to explain the uses of the indirect modes, and how to implement
pointer operations with them. It does <emphasis>not</emphasis> seek to explain
why you'd want to use pointers for something to begin with; for a
tutorial on proper pointer usage, consult any decent C textbook.
</para>
<section>
<title>The absolute basics</title>
<para>
A pointer is a variable holding the address of a memory location.
Memory locations take 16 bits to represent on the 6502: thus, we
need two bytes to hold it. Any decent assembler will have ways of
taking the high and low bytes of an address; use these to acquire
the raw values you need. The 6502 chip does not have any
simple <quote>pure</quote> indirect modes (except
for <literal>JMP</literal>, which is a matter for a later essay);
all are indexed, and they're indexed different ways depending on
which index register you use.
</para>
<section>
<title>The simplest example</title>
<para>
When doing a simple, direct dereference (that is, something
equivalent to the C code <literal>c=*b;</literal>) the code
looks like this:
</para>
<programlisting>
ldy #0
lda (b), y
sta c
</programlisting>
<para>
Even with this simple example, there are several important
things to notice.
</para>
<itemizedlist>
<listitem>
<para>
The variable <literal>b</literal> <emphasis>must be on the
zero page</emphasis>, and furthermore, it <emphasis>cannot
be $FF.</emphasis> All your pointer values need to be
either stored on the zero page to begin with or copied
there before use.
</para>
</listitem>
<listitem>
<para>
The <literal>y</literal> in the <literal>lda</literal>
statement must be y. It cannot be x (that's a different
form of indirection), and it cannot be a constant. If
you're doing a lot of indirection, be sure to keep your Y
register free to handle the indexing on the
pointers.
</para>
</listitem>
<listitem>
<para>
The <literal>b</literal> variable is used alone. Statements
like <literal>lda (b+2), y</literal> are syntactically valid
and sometimes even correct: it dereferences the value next
to <literal>b</literal> after adding y to the value therein.
However, it is almost guaranteed that what you *really*
wanted to do was compute <literal>*(b+2)</literal> (that is,
take the address of b, add 2 to <emphasis>that</emphasis>,
and dereference that value); see the next section for how to
do this properly.
</para>
</listitem>
</itemizedlist>
<para>
In nearly all cases, it is the Y-register's version (Indirect
Indexed) that you want to use when you're dealing with pointers.
Even though either version could be used for this example, we
use the Y register to establish this habit.
</para>
</section>
</section>
<section>
<title>Pointer arithmetic</title>
<para>
Pointer arithmetic is an obscenely powerful and dangerous
technique. However, it's the most straightforward way to deal
with enormous arrays, structs, indexable stacks, and nearly
everything you do in C. (C has no native array or string types
primarily because it allows arbitrary pointer arithmetic, which is
strong enough to handle all of those without complaint and at
blazing speed. It also allows for all kinds of buffer overrun
security holes, but let's face it, who's going to be cracking root
on your Apple II?) There are a number of ways to implement this
on the 6502. We'll deal with them in increasing order of design
complexity.
</para>
<section>
<title>The straightforward, slow way</title>
<para>
When computing a pointer value, you simply treat the pointer as
if it were a 16-bit integer. Do all the math you need, then
when the time comes to dereference it, simply do a direct
dereference as above. This is definitely doable, and it's not
difficult. However, it is costly in both space and time.
</para>
<para>
When dealing with arbitrary indices large enough that they won't
fit in the Y register, or when creating values that you don't
intend to dereference (such as subtracting two pointers to find
the length of a string), this is also the only truly usable
technique.
</para>
</section>
<section>
<title>The clever fast way</title>
<para>
But wait, you say. Often when we compute a value, at least one
of the operations is going to be an addition, and we're almost
certain to have that value be less than 256! Surely we may save
ourselves an operation by loading that value into the Y register
and having the load operation itself perform the final
addition!
</para>
<para>
Very good. This is the fastest technique, and sometimes it's
even the most readable. These cases usually involve repeated
reading of various fields from a structure or record. The base
pointer always points to the base of the structure (or the top
of the local variable list, or what have you) and the Y register
takes values that index into that structure. This lets you keep
the pointer variable in memory largely static and requires no
explicit arithmetic instructions at all.
</para>
<para>
However, this technique is highly opaque and should always be
well documented, indicating exactly what you think you're
pointing at. Then, when you get garbage results, you can
compare your comments and the resulting Y values with the actual
definition of the structure to see who's screwing up.
</para>
<para>
For a case where we still need to do arithmetic, consider the
classic case of needing to clear out a large chunk of memory.
The following code fills the 4KB of memory between $C000 and
$D000 with zeroes:
</para>
<programlisting>
lda #$C0 ; Store #$C000 in mem (low byte first)
sta mem+1
lda #$00
sta mem
ldx #$04 ; x holds number of times to execute outer loop
tay ; accumulator and y are both 0
loop: sta (mem), y
iny
bne loop ; Inner loop ends when y wraps around to 0
inc mem+1 ; "Carry" from the iny to the core pointer
dex ; Decrement outer loop count, quit if done
bne loop
</programlisting>
<para>
Used carefully, proper use of the Y register can make your code
smaller, faster, <emphasis>and</emphasis> more readable. Used
carelessly it can make your code an unreadable, unmaintainable
mess. Use it wisely, and with care, and it will be your
greatest ally in writing flexible code.
</para>
</section>
</section>
<section>
<title>What about Indexed Indirect?</title>
<para>
This essay has concerned itself almost exclusively with the
Indirect Indexed&mdash;or (Indirect), Y&mdash;mode. What about Indexed
Indirect&mdash;(Indirect, X)? This is a <emphasis>much</emphasis>
less useful mode than the Y register's version. While the Y
register indirection lets you implement pointers and arrays in
full generality, the X register is useful for pretty much only one
application: lookup tables for single byte values.
</para>
<para>
Even coming up with a motivating example for this is difficult,
but here goes. Suppose you have multiple, widely disparate
sections of memory that you're watching for signals. The
following routine takes a resource index in the accumulator and
returns the status byte for the corresponding resource.
</para>
<programlisting>
; This data is sitting on the zero page somewhere
resource_status_table: .word resource0_status, resource1_status,
.word resource2_status, resource3_status,
; etc. etc. etc.
; This is the actual program code
.text
getstatus:
clc ; Multiply argument by 2 before putting it in X, so that it
asl ; produces a value that's properly word-indexed
tax
lda (resource_status_table, x)
rts
</programlisting>
<para>
Why having a routine such as this is better than just having the
calling routine access resourceN_status itself as an absolute
memory load is left as an exercise for the reader. That aside,
this code fragment does serve as a reminder that when indexing an
array of anything other than bytes, you must multiply your index
by the size of the objects you want to index. C does this
automatically&mdash;assembler does not. Stay sharp.
</para>
</section>
<section>
<title>Comparison with the other indexed forms</title>
<para>
Pointers are slow. It sounds odd saying this, when C is the
fastest language around on modern machines precisely because of
its powerful and extensive use of pointers. However, modern
architectures are designed to be optimized for C-style code (as an
example, the x86 architecture allows statements like <literal>mov
eax, [bs+bx+4*di]</literal> as a single instruction), while the
6502 is not. An (Indirect, Y) operation can take up to 6 cycles
to complete just on its own, while the preparation of that command
costs additional time <emphasis>and</emphasis> scribbles over a
bunch of registers, meaning memory operations to save the values
and yet more time spent. The simple code given at the beginning
of this essay&mdash;loading <literal>*b</literal> into the
accumulator&mdash;takes 7 cycles, not counting the 6 it takes to
load b with the appropriate value to begin with. If b is known to
contain a specific value, we can write a single Absolute mode
instruction to load its value, which takes only 4 cycles and also
preserves the value in the Y register. Clearly, Absolute mode
should be used whenever possible.
</para>
<para>
One might be tempted to use self-modifying code to solve this
problem. This actually doesn't pay off near enough for the hassle
it generates; for self-modifying code, the address must be
generated, then stored in the instruction, and then the data must
be loaded. Cost: 16 cycles for 2 immediate loads, 2 absolute
stores, and 1 absolute load. For the straight pointer
dereference, we generate the address, store it in the pointer,
clear the index, then dereference that. Cost: 17 cycles for 3
immediate loads, 2 zero page stores, and 1 indexed indirect load.
Furthermore, unlike in the self-modifying case, loops where simple
arithmetic is being continuously performed only require repeating
the final load instruction, which allows for much greater time
savings over an equivalent self-modifying loop.
</para>
<para>
(This point is also completely moot for NES programmers or anyone
else whose programs are sitting in ROM, because programs stored on
a ROM cannot modify themselves.)
</para>
</section>
<section>
<title>Conclusion</title>
<para>
That's pretty much it for pointers. Though they tend to make
programs hairy, and learning how to properly deal with pointers is
what separates real C programmers from the novices, the basic
mechanics of them are not complex. With pointers you can do
efficient passing of large structures, pass-by-reference,
complicated return values, and dynamic memory management&mdash;and
now these wondrous toys may be added to your assembler programs,
too (assuming you have that kind of space to play with).
</para>
</section>
</chapter>

270
doc/hll4.sgm Normal file
View File

@ -0,0 +1,270 @@
<chapter>
<title>Functionals</title>
<para>
This essay deals with indirect calls. These are the core of an
enormous number of high level languages: LISP's closures, C's
function pointers, C++ and Java's virtual method calls, and some
implementations of the <literal>switch</literal> statement.
</para>
<para>
These techniques vary in complexity, and most will not be
appropriate for large-scale assembler projects. Of them, however,
the Data-Directed approach is the most likely to lead to organized
and maintainable code.
</para>
<section>
<title>Function Pointers</title>
<para>
Because assembly language is totally untyped, function pointers
are the same as any other sixteen-bit integer. This makes
representing them really quite easy; most assemblers should permit
routines to be declared simply by naming the routine as
a <literal>.word</literal> directly.
</para>
<para>
To actually invoke these methods, copy them to some sixteen-bit
location (say, <literal>target</literal>) and then invoking the
method is a simple matter of the using an indirect jump:
the <literal>JMP&nbsp;(target)</literal> instruction.
</para>
<para>
There's really only one subtlety here, and it's that the indirect
jump is an indirect <emphasis>jump</emphasis>, not an
indirect <emphasis>function call</emphasis>. Thus, if some
function <literal>A</literal> makes in indirect jump to some
routine, when that routine returns, it returns to whoever
called <literal>A</literal>, not <literal>A</literal>
itself.
</para>
<para>
There are several ways of dealing with this, but only one correct
way, which is to structure your procedures so that any call
to <literal>JMP&nbsp;(xxxx)</literal> occurs at the very
end.
</para>
</section>
<section>
<title>A quick digression on how subroutines work</title>
<para>
Ordinarily, subroutines are called with <literal>JSR</literal> and
finished with <literal>RTS</literal>. The <literal>JSR</literal>
instruction takes its own address, adds 2 to it, and pushes this
16-bit value on the stack, high byte first, then low byte (so that
the low byte will be popped off first).
</para>
<para>
But wait, you may object. All <literal>JSR</literal> instructions
are three bytes long. This <quote>return address</quote> is in
the middle of the instruction. And you would be quite right;
the <literal>RTS</literal> instruction pops off the 16-bit
address, adds one to it, and <emphasis>then</emphasis> sets the
program counter to that value.
</para>
<para>
So it <emphasis>is</emphasis> possible to set up
a <quote><literal>JSR</literal> indirect</quote> kind of operation
by adding two to the indirect jump's address and then pushing that
value onto the stack before making the jump; however, you wouldn't
want to do this. It takes six bytes and trashes your accumulator,
and you can get the same functionality with half the space and
with no register corruption by simply defining the indirect jump
to be a one-instruction routine and <literal>JSR</literal>-ing to
it directly. As an added bonus, that way if you have multiple
indirect jumps through the same pointer, you don't need to
duplicate the jump instruction.
</para>
<para>
Does this mean that abusing <literal>JSR</literal>
and <literal>RTS</literal> is a dead-end, though? Not at all...
</para>
</section>
<section>
<title>Dispatch-on-type and Data-Directed Assembler</title>
<para>
Most of the time, you care about function pointers because you've
arranged them in some kind of table. You hand it an index
representing the type of your argument, or which method it is
you're calling, or some other determinator, and then you index
into an array of routines and execute the right one.
</para>
<para>
Writing a generic routine to do this is kind of a pain. First you
have to pass a 16-bit pointer in, then you have to dereference it
to figure out where your table is, then you have to do an indexed
dereference on <emphasis>that</emphasis> to get the routine you
want to run, then you need to copy it out to somewhere fixed so
that you can write your jump instruction. And making this
non-generic doesn't help a whole lot, since that only saves you
the first two steps, but now you have to write them out in every
single indexed jump instruction. If only there were some way to
easily and quickly pass in a local pointer directly...
</para>
<para>
Something, say, like the <literal>JSR</literal> instruction, only not for
program code.
</para>
<para>
Or we could just use the <literal>JSR</literal> statement itself,
but only call this routine at the ends of other routines, much
like we were organizing for indirect jumps to begin with. This
lets us set up routines that look like this:
</para>
<programlisting>
jump'table'alpha:
jsr do'jump'table
.word alpha'0, alpha'1, alpha'2
</programlisting>
<para>
Where the <literal>alpha'x</literal> routines are the ones to be
called when the index has that value. This leaves the
implementation of do'jump'table, which in this case uses the Y
register to hold the index:
</para>
<programlisting>
do'jump'table:
sta _scratch
pla
sta _jmpptr
pla
sta _jmpptr+1
tya
asl
tay
iny
lda (_jmpptr), y
sta _target
iny
lda (_jmpptr), y
sta _target+1
lda _scratch
jmp (_target)
</programlisting>
<para>
The <literal>TYA:ASL:TAY:INY</literal> sequence can actually be
omitted if you don't mind having your Y indices be 1, 3, 5, 7, 9,
etc., instead of 0, 1, 2, 3, 4, etc. Likewise, the instructions
dealing with <literal>_scratch</literal> can be omitted if you
don't mind trashing the accumulator. Keeping the accumulator and
X register pristine for the target call comes in handy, though,
because it means we can pass in a pointer argument purely in
registers. This will come in handy soon...
</para>
</section>
<section>
<title>VTables and Object-Oriented Assembler</title>
<para>
The usual technique for getting something that looks
object-oriented in non-object-oriented languages is to fill a
structure with function pointers, and have those functions take
the structure itself as an argument. This works just fine in
assembler, of course (and doesn't really require anything more
than your traditional jump-indirects), but it's also possible to
use a lot of the standard optimizations that languages such as C++
provide.
</para>
<para>
The most important of these is the <emphasis>vtable</emphasis>.
Each object type has its own vtable, and it's a list of function
pointers for all the methods that type provides. This is a space
savings over the traditional structs-with-function-pointers
approach because when you have many objects of the same class, you
only have to represent the vtable once. So that all objects may
be treated identically, the vtable location is traditionally fixed
as being the first entry in the corresponding structure.
</para>
<para>
Virtual method invocation takes an object pointer (traditionally
called <literal>self</literal> or <literal>this</literal>) and a
method index and invokes the approprate method on that object.
Gee, where have we seen that before?
</para>
<programlisting>
sprite'vtable:
jsr do'jump'table
.word sprite'init, sprite'update, sprite'render
</programlisting>
<para>
We mentioned before that vtables are generally the first entries
in objects. We can play another nasty trick here, paying an
additional byte per object to have the vtable be not merely a
pointer to its vtable routine, but an actual jump instruction to
it. (That is, if an object is at location X, then location X is
the byte value <literal>$4C</literal>,
representing <literal>JMP</literal>, location X+1 is the low byte
of the vtable, and location X+2 is the high byte of the vtable.)
Given that, our <literal>invokevirtual</literal> function becomes
very simple indeed:
</para>
<programlisting>
invokevirtual:
sta this
stx this+1
jmp (this)
</programlisting>
<para>
Which, combined with all our previous work here, takes
the <literal>this</literal> pointer in <literal>.AX</literal> and
a method identifier in <literal>.Y</literal> and invokes that
method on that object. Arguments besides <literal>this</literal>
need to be set up before the call
to <literal>invokevirtual</literal>, probably in some global
argument array somewhere as discussed back in <xref linkend="hll2">.
</para>
</section>
<section>
<title>A final reminder</title>
<para>
We've been talking about all these routines as if they could be
copy-pasted or hand-compiled from C++ or Java code. This isn't
really the case, primarily because <quote>local variables</quote>
in your average assembler routines aren't really local, so
multiple calls to the same method will tend to trash the program
state. And since a lot of the machinery described here shares a
lot of memory (in particular, every single method invocation
everywhere shares a <literal>this</literal>), attempting to shift
over standard OO code into this format is likely to fail
miserably.
</para>
<para>
You can get an awful lot of flexibility out of even just one layer
of method-calls, though, given a thoughtful
design. The <literal>do'jump'table</literal> routine, or one very
like it, was extremely common in NES games in the mid-1980s and
later, usually as the beginning of the frame-update loop.
</para>
<para>
If you find you really need multiple layers of method calls,
though, then you really are going to need a full-on program stack,
and that's going to be several kinds of mess. That's the topic
for the final chapter.
</para>
</section>
</chapter>

218
doc/hll5.sgm Normal file
View File

@ -0,0 +1,218 @@
<chapter>
<title>Call Stacks</title>
<para>
All our previous work has been assuming FORTRAN-style calling
conventions. In this, all procedure-local variables are actually
secretly globals. This means that a function that calls itself will
end up stomping on its previous values, and everything will be
hideously scrambled. Various workarounds for this are covered
in <xref linkend="hll2">. Here, we solve the problem fully.
</para>
<section>
<title>Recursion</title>
<para>
A procedure in C or other similar languages declares a chunk of
storage that's unique to that invocation. This chunk is just
large enough to hold the return address and all the local
variables, and is called the <emphasis>stack frame</emphasis>.
Stack frames are arranged on a <emphasis>call stack</emphasis>;
when a function is called, the stack grows with the new frame, and
when that function returns, its frame is destroyed. Once the main
function returns, the stack is empty.
</para>
<para>
Most modern architectures are designed to let you implement
variable access like this directly, without touching the registers
at all. The x86 architecture even dedicates a register to
function explicitly as the <emphasis>stack pointer</emphasis>, and
then one could read, say, the fifth 16-bit variable into the
register AX with the command <literal>MOV AX, [SP+10]</literal>.
</para>
<para>
As we saw in <xref linkend="hll3">, the 6502 isn't nearly as
convenient. We'd need to keep the stack pointer somewhere on the
zero page, then load the Y register with 10, then load the
accumulator with an indexed-indirect call. This is verbose, keeps
trashing our registers, and it's very, very slow.
</para>
<para>
So, in the spirit of programmers everywhere, we'll cheat.
</para>
</section>
<section>
<title>Our Goals</title>
<para>
The system we develop should have all of the following
characteristics.
</para>
<itemizedlist>
<listitem><para>It should be <emphasis>intuitive to program for</emphasis>. The procedure bodies should be easily readable and writable by humans, even in assembler form.</para></listitem>
<listitem><para>It should be <emphasis>efficient</emphasis>. Variable accesses are very common, so procedures shouldn't cost much to run.</para></listitem>
<listitem><para>It should allow <emphasis>multiple arity</emphasis> in both arguments and return values. We won't require that an unlimited amount of information be passable, but it should allow more than the three bytes the registers give us.</para></listitem>
<listitem><para>It should permit <emphasis>tail call elimination</emphasis>, an optimization that will allow certain forms of recursion to actually not grow the stack.</para></listitem>
</itemizedlist>
<para>
Here is a system that meets all these properties.
</para>
<itemizedlist>
<listitem><para>Reserve two bytes of the zero page for a stack pointer. At the beginning of the program, set it to the top of memory.</para></listitem>
<listitem><para>Divide the remainder of Zero Page into two parts:
<itemizedlist>
<listitem><para>The <emphasis>scratch space</emphasis>, which is where arguments and return values go, and which may be scrambled by any function call, and</para></listitem>
<listitem><para>The <emphasis>local area</emphasis>, which all functions must restore to their initial state once finished.</para></listitem>
</itemizedlist>
</para></listitem>
<listitem><para>Assign to each procedure a <emphasis>frame size</emphasis> S, which is a maximum size on the amount of the local area the procedure can use. The procedure's variables will sit in the first S bytes of the local area.</para></listitem>
<listitem><para>Upon entering the procedure, push the first S bytes of the local area onto the stack; upon exit, pop hose S bytes back on top of the local area.</para></listitem>
<listitem><para>While the procedure is running, only touch the local area and the scratch space.</para></listitem>
</itemizedlist>
<para>This meets our design criteria neatly:</para>
<itemizedlist>
<listitem><para>It's as intuitive as such a system will get. You have to call <literal>init'stack</literal> at the beginning, and you need to ensure that <literal>save'stack</literal> and <literal>restore'stack</literal> are called right. The procedure's program text can pretend that it's just referring to its own variables, just like with the old style. If a procedure doesn't call <emphasis>anyone</emphasis>, then it can just do all its work in the scratch space.</para></listitem>
<listitem><para>It's efficient; the inside of the procedure is likely to be faster and smaller than its FORTRAN-style counterpart, because all variable references are on the Zero Page.</para></listitem>
<listitem><para>Both arguments and return values can be as large as the scratch space. It's not infinite, but it's probably good enough.</para></listitem>
<listitem><para>Tail call elimination is possible; just restore the stack before making the JMP to the tail call target.</para></listitem>
</itemizedlist>
<para>
The necessary support code is pretty straightforward. The stack
modification routines take the size of the frame in the
accumulator, and while saving the local area, it copies over the
corresponding values from the scratch space. (This is because
most functions will be wanting to keep their arguments around
across calls.)
</para>
<programlisting>
.scope
; Stack routines
.data zp
.space _sp $02
.space _counter $01
.space fun'args $10
.space fun'vars $40
.text
init'stack:
lda #$00
sta _sp
lda #$A0
sta _sp+1
rts
save'stack:
sta _counter
sec
lda _sp
sbc _counter
sta _sp
lda _sp+1
sbc #$00
sta _sp+1
ldy #$00
* lda fun'vars, y
sta (_sp), y
lda fun'args, y
sta fun'vars, y
iny
dec _counter
bne -
rts
restore'stack:
pha
sta _counter
ldy #$00
* lda (_sp), y
sta fun'vars, y
iny
dec _counter
bne -
pla
clc
adc _sp
sta _sp
lda _sp+1
adc #$00
sta _sp+1
rts
.scend
</programlisting>
</section>
<section>
<title>Example: Fibonnacci Numbers</title>
<para>
About the simplest <quote>interesting</quote> recursive function
is the Fibonacci numbers. The function fib(x) is defined as being
1 if x is 0 or 1, and being fib(x-2)+fib(x-1) otherwise.
</para>
<para>
Actually expressing it like that directly produces a very
inefficient implementation, but it's a simple demonstration of the
system. Here's code for expressing the fib function:
</para>
<programlisting>
.scope
; Uint16 fib (Uint8 x): compute Xth fibonnaci number.
; fib(0) = fib(1) = 1.
; Stack usage: 3.
fib: lda #$03
jsr save'stack
lda fun'vars
cmp #$02
bcc _base
dec fun'args
jsr fib
lda fun'args
sta fun'vars+1
lda fun'args+1
sta fun'vars+2
lda fun'vars
sec
sbc #$02
sta fun'args
jsr fib
clc
lda fun'args
adc fun'vars+1
sta fun'args
lda fun'args+1
adc fun'vars+2
sta fun'args+1
jmp _done
_base: ldy #$01
sty fun'args
dey
sty fun'args+1
_done: lda #$03
jsr restore'stack
rts
.scend
</programlisting>
<para>
The full application, which deals with interfacing with CBM BASIC
and handles console I/O and such, is in <xref linkend="fib-src"
endterm="fib-fname">.
</para>
</section>
</chapter>

View File

@ -9,21 +9,60 @@
<!ENTITY samplecode SYSTEM "samplecode.sgm">
<!ENTITY pre1 SYSTEM "preface.sgm">
<!ENTITY cmdref SYSTEM "cmdref.sgm">
<!ENTITY hll1 SYSTEM "hll1.sgm">
<!ENTITY hll2 SYSTEM "hll2.sgm">
<!ENTITY hll3 SYSTEM "hll3.sgm">
<!ENTITY hll4 SYSTEM "hll4.sgm">
<!ENTITY hll5 SYSTEM "hll5.sgm">
]>
<book>
<bookinfo>
<title>Programming with Ophis</title>
<author><firstname>Michael</firstname><surname>Martin</surname></author>
<copyright><year>2006-7</year><holder>Michael Martin</holder></copyright>
<copyright><year>2006-2012</year><holder>Michael Martin</holder></copyright>
</bookinfo>
&pre1;
&part1;
&part2;
&part3;
&part4;
&part5;
&part6;
&part7;
<part label="I">
<title>Using the Ophis Assembler</title>
<partintro>
<para>
The chapters in Part 1 are a tutorial guiding you through the
features and programming model of the Ophis assembler. It uses
the Commodore 64 as its target platform.
</para>
<para>
This is not a tutorial on 6502 assembly language; those are
available elsewhere.
</para>
</partintro>
&part1;
&part2;
&part3;
&part4;
&part5;
&part6;
&part7;
</part>
<part label="II">
<title>To HLL and Back</title>
<partintro>
<para>
This is a compilation of an essay series I wrote from
2002-2005 explaining how to apply HLL constructs from
high-level languages in your assembly language projects.
</para>
<para>
The examples have been updated and modernized for Ophis 2, and
while the examples all target the Commodore 64, they are more
generally applicable.
</para>
</partintro>
&hll1;
&hll2;
&hll3;
&hll4;
&hll5;
</part>
&samplecode;
&cmdref;
</book>

View File

@ -1,6 +1,5 @@
<preface>
<title>Preface</title>
<para>
The Ophis project started on a lark back in 2001. My graduate
studies required me to learn Perl and Python, and I'd been playing
@ -8,42 +7,50 @@
to learn both languages by writing a simple cross-assembler for
the 6502 chip the C-64 used in both.
</para>
<para>
The Perl version was quickly abandoned, but the Python one slowly
grew in scope and power over the years, and by 2005 was a very
powerful, flexible macro assembler that saw more use than I'd
expect. In 2007 I finally got around to implementing the last few
features I really wanted and polishing it up for general release.
The Perl one&mdash;uncreatively
dubbed <quote>Perl65</quote>&mdash;was quickly abandoned, but the
Python one saw more work. When it came time to name it, one of the
things I had been hoping to do with the assembler was to produce
working Apple II programs. <quote>Ophis</quote> is Greek
for <quote>snake</quote>, and a number of traditions also use it
as the actual <emphasis>name</emphasis> of the serpent in the
Garden of Eden. So, Pythons, snakes, and stories involving really
old Apples all combined to name the
assembler.<footnote><para>Ironically, cross-platform development
for the Apple II is extremely difficult, and while Ophis has been
very successfully used to develop code for the Commodore 64,
Nintendo Entertainment System, and Atari 2600, it has yet to
actually be deployed on any of the Apples which inspired its
name.</para></footnote>
</para>
<para>
Part of that process has been formatting the various little
tutorials and references I'd created into a single, unified
document&mdash;the one you are now reading.
Ophis slowly grew in scope and power over the years, and by 2005
was a very powerful, flexible macro assembler that saw more use
than I'd expect. In 2007 Ophis 1.0 was formally released.
However, Ophis was written for Python 2.1 and this became more and
more untenable as time has gone by. As I started receiving patches
for parts of Ophis, and as I used it for some projects of my own,
it became clear that Ophis needed to be modernized and to become
better able to interoperate with other toolchains. It was this
process that led to Ophis 2.
</para>
<para>
This is an updated edition of <emphasis>Programming With
Ophis</emphasis>, including documentation for all new features
introduced and expanding the examples to include simple
demonstration programs for platforms besides the Commodore 64. It
also includes updated versions of the <emphasis>To HLL and
Back</emphasis> essays I wrote using Ophis and Perl65 as example
languages.
</para>
<section>
<title>Why <quote>Ophis</quote>?</title>
<para>
It's actually a kind of a horrific pun. See, I was using Python
at the time, and one of the things I had been hoping to do with
the assembler was to produce working Apple II
programs. <quote>Ophis</quote> is Greek
for <quote>snake</quote>, and a number of traditions also use it
as the actual <emphasis>name</emphasis> of the serpent in the
Garden of Eden. So, Pythons, snakes, and stories involving
really old Apples all combined to name the assembler.
</para>
</section>
<section>
<title>Getting a copy of Ophis</title>
<para>
If you're reading this as part of the Ophis install, you clearly
already have it. If not, as of this writing the homepage for
the Ophis assembler
is <ulink url="http://hkn.eecs.berkeley.edu/~mcmartin/ophis/"></ulink>. If
As of this writing, the Ophis assembler is hosted at Github. The
latest downloads and documentation will be available
at <ulink url="http://github.com/michaelcmartin/Ophis"></ulink>. If
this is out-of-date, a Web search on <quote>Ophis 6502
assembler</quote> (without the quotation marks) should yield its
page.
@ -58,17 +65,30 @@
somewhere in your path.
</para>
<para>
Windows users that have Python installed can use the same source
distributions that the other operating systems
use; <command>ophis.bat</command> will arrange the environment
variables accordingly and invoke the main script.
For Windows users, a prepackaged system made
with <command>py2exe</command> is also available. The default
Windows installer will use this. In this case, all you need to
do is have <command>ophis.exe</command> in your path.
</para>
</section>
<section>
<title>About the examples</title>
<para>
Versions of the examples in this book are available from the Ophis site. Windows users will find them packaged with the distribution; all other users can get them as a separate download or pull them directly from github.
</para>
<para>
If you are on Windows and do not have Python installed, a
prepackaged system made with <command>py2exe</command> is also
available. The default Windows installer will use this. In
this case, all you need to do is
have <command>ophis.exe</command> in your path.
The code in this book is available in
the <literal>examples/</literal> subdirectory, while extra
examples will be in subdirectories of their own with brief
descriptions.
</para>
<para>
Most examples will require use of <emphasis>platform
headers</emphasis>&mdash;standardized header files that set
useful constants for the target system and, if needed, contain
small programs to allow the program to be loaded and run. These
are stored in the <literal>platform/</literal> subdirectory.
</para>
</section>
</preface>

View File

@ -5,10 +5,11 @@
of this manual.
</para>
<section id="tutor1-src">
<title id="tutor1-fname"><filename>tutor1.oph</filename></title>
<title id="tutor1-fname"><filename>hello1.oph</filename></title>
<programlisting>
.word $0801
.org $0801
.outfile "hello.prg"
.word next, 10 ; Next line and current line number
.byte $9e," 2064",0 ; SYS 2064
@ -28,10 +29,11 @@ hello: .byte "HELLO, WORLD!", 0
</programlisting>
</section>
<section id="tutor2-src">
<title id="tutor2-fname"><filename>tutor2.oph</filename></title>
<title id="tutor2-fname"><filename>hello2.oph</filename></title>
<programlisting>
.word $0801
.org $0801
.outfile "hello.prg"
.scope
.word _next, 10 ; Next line and current line number
@ -68,85 +70,86 @@ _next: .word 0 ; End of program
.advance 2064
.require "kernal.oph"
.require "../platform/c64kernal.oph"
</programlisting>
</section>
<section id="kernal-src">
<title id="kernal-fname"><filename>kernal.oph</filename></title>
<title id="kernal-fname"><filename>c64kernal.oph</filename></title>
<programlisting>
; KERNAL routine aliases (C64)
.alias acptr $ffa5
.alias chkin $ffc6
.alias chkout $ffc9
.alias chrin $ffcf
.alias chrout $ffd2
.alias ciout $ffa8
.alias cint $ff81
.alias clall $ffe7
.alias close $ffc3
.alias clrchn $ffcc
.alias getin $ffe4
.alias iobase $fff3
.alias ioinit $ff84
.alias listen $ffb1
.alias load $ffd5
.alias membot $ff9c
.alias memtop $ff99
.alias open $ffc0
.alias plot $fff0
.alias ramtas $ff87
.alias rdtim $ffde
.alias readst $ffb7
.alias restor $ff8a
.alias save $ffd8
.alias scnkey $ff9f
.alias screen $ffed
.alias second $ff93
.alias setlfs $ffba
.alias setmsg $ff90
.alias setnam $ffbd
.alias settim $ffdb
.alias settmo $ffa2
.alias stop $ffe1
.alias talk $ffb4
.alias tksa $ff96
.alias udtim $ffea
.alias unlsn $ffae
.alias untlk $ffab
.alias vector $ff8d
.alias acptr $ffa5
.alias chkin $ffc6
.alias chkout $ffc9
.alias chrin $ffcf
.alias chrout $ffd2
.alias ciout $ffa8
.alias cint $ff81
.alias clall $ffe7
.alias close $ffc3
.alias clrchn $ffcc
.alias getin $ffe4
.alias iobase $fff3
.alias ioinit $ff84
.alias listen $ffb1
.alias load $ffd5
.alias membot $ff9c
.alias memtop $ff99
.alias open $ffc0
.alias plot $fff0
.alias ramtas $ff87
.alias rdtim $ffde
.alias readst $ffb7
.alias restor $ff8a
.alias save $ffd8
.alias scnkey $ff9f
.alias screen $ffed
.alias second $ff93
.alias setlfs $ffba
.alias setmsg $ff90
.alias setnam $ffbd
.alias settim $ffdb
.alias settmo $ffa2
.alias stop $ffe1
.alias talk $ffb4
.alias tksa $ff96
.alias udtim $ffea
.alias unlsn $ffae
.alias untlk $ffab
.alias vector $ff8d
; Character codes for the colors.
.alias color'0 144
.alias color'1 5
.alias color'2 28
.alias color'3 159
.alias color'4 156
.alias color'5 30
.alias color'6 31
.alias color'7 158
.alias color'8 129
.alias color'9 149
.alias color'10 150
.alias color'11 151
.alias color'12 152
.alias color'13 153
.alias color'14 154
.alias color'15 155
.alias color'0 144
.alias color'1 5
.alias color'2 28
.alias color'3 159
.alias color'4 156
.alias color'5 30
.alias color'6 31
.alias color'7 158
.alias color'8 129
.alias color'9 149
.alias color'10 150
.alias color'11 151
.alias color'12 152
.alias color'13 153
.alias color'14 154
.alias color'15 155
; ...and reverse video
.alias reverse'on 18
.alias reverse'off 146
.alias reverse'on 18
.alias reverse'off 146
; ...and character set
.alias upper'case 142
.alias lower'case 14
.alias upper'case 142
.alias lower'case 14
</programlisting>
</section>
<section id="tutor3-src">
<title id="tutor3-fname"><filename>tutor3.oph</filename></title>
<title id="tutor3-fname"><filename>hello3.oph</filename></title>
<programlisting>
.include "c64-1.oph"
.outfile "hello.prg"
.macro print
ldx #0
@ -194,9 +197,10 @@ target10: .byte "UNIVERSE", 0
</programlisting>
</section>
<section id="tutor4a-src">
<title id="tutor4a-fname"><filename>tutor4a.oph</filename></title>
<title id="tutor4a-fname"><filename>hello4a.oph</filename></title>
<programlisting>
.include "c64-1.oph"
.outfile "hello.prg"
.macro print
ldx #0
@ -265,9 +269,10 @@ delay: tax
</programlisting>
</section>
<section id="tutor4b-src">
<title id="tutor4b-fname"><filename>tutor4b.oph</filename></title>
<title id="tutor4b-fname"><filename>hello4b.oph</filename></title>
<programlisting>
.include "c64-1.oph"
.outfile "hello.prg"
.macro print
ldx #0
@ -338,9 +343,10 @@ delay: tax
</programlisting>
</section>
<section id="tutor4c-src">
<title id="tutor4c-fname"><filename>tutor4c.oph</filename></title>
<title id="tutor4c-fname"><filename>hello4c.oph</filename></title>
<programlisting>
.include "c64-1.oph"
.outfile "hello.prg"
.macro print
ldx #0
@ -414,9 +420,10 @@ delay: tax
</programlisting>
</section>
<section id="tutor5-src">
<title id="tutor5-fname"><filename>tutor5.oph</filename></title>
<title id="tutor5-fname"><filename>hello5.oph</filename></title>
<programlisting>
.include "c64-1.oph"
.outfile "hello.prg"
.data
.org $C000
@ -494,9 +501,10 @@ delay: sta _tmp ; save argument (rdtim destroys it)
</programlisting>
</section>
<section id="tutor6-src">
<title id="tutor6-fname"><filename>tutor6.oph</filename></title>
<title id="tutor6-fname"><filename>hello6.oph</filename></title>
<programlisting>
.include "c64-1.oph"
.outfile "hello.prg"
.data
.org $C000
@ -601,43 +609,63 @@ _done: rts
</programlisting>
</section>
<section id="c64-2-src">
<title id="c64-2-fname"><filename>c64-2.oph</filename></title>
<title id="c64-2-fname"><filename>c64_0.oph</filename></title>
<programlisting>
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Commodore 64 Basic Runtime File
;;
;; Include this at the TOP of your C64 program, and it will handle
;; hiding away the BASIC ROM and data and restoring it at the end.
;;
;; You will have a contiguous block of RAM from $0800 to $CF81, and
;; Zero Page access from $02 to $7F in the segment "zp".
.word $0801
.org $0801
; BASIC program that just calls our machine language code
.scope
.word _next, 10 ; Next line and current line number
.byte $9e," 2064",0 ; SYS 2064
.byte $9e," 2062",0 ; SYS 2062
_next: .word 0 ; End of program
.scend
.advance $0810
.require "kernal.oph"
.data zp
.data zp ; Zero Page memory segment.
.org $0002
.text
.scope
; Cache BASIC's zero page at top of available RAM.
ldx #$7E
* lda $01, x
sta $CF81, x
; Cache BASIC zero page at top of available RAM
ldx #$7E
* lda $01, x
sta $CF81, x
dex
bne -
bne -
jsr _main
; Swap out the BASIC ROM for RAM
lda $01
and #$fe
ora #$06
sta $01
; Restore BASIC's zero page and return control.
; Run the real program
jsr _main
ldx #$7E
* lda $CF81, x
sta $01, x
; Restore BASIC ROM
lda $01
ora #$07
sta $01
; Restore BASIC zero page
ldx #$7E
* lda $CF81, x
sta $01, x
dex
bne -
bne -
; Back to BASIC
rts
_main:
@ -646,9 +674,11 @@ _main:
</programlisting>
</section>
<section id="tutor7-src">
<title id="tutor7-fname"><filename>tutor7.oph</filename></title>
<title id="tutor7-fname"><filename>hello7.oph</filename></title>
<programlisting>
.include "c64-2.oph"
.include "../platform/c64_0.oph"
.require "../platform/c64kernal.oph"
.outfile "hello.prg"
.data
.org $C000
@ -744,6 +774,461 @@ _done: rts
.data zp
.checkpc $80
</programlisting>
</section>
<section id="structure-src">
<title id="structure-fname"><filename>structuredemo.oph</filename></title>
<programlisting>
.include "../platform/c64_0.oph"
.require "../platform/c64kernal.oph"
.outfile "structuredemo.prg"
jsr print'unsorted
jsr insertion'sort
jsr print'list
rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Linked list data: head, next, lb, hb.
; lb/hb: Low/high bytes of the data array. These are immutable and
; kept with the program text.
; head: Array index of the first element in the list, or #$FF if the
; list is empty
; next: Array of successor indices. If you've just read element X,
; the value of memory location next+X is the index of the
; next element. If next is #$FF, you've reached the end of
; the list.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
.data
.org $C000
.space head 1
.space next 16
.text
lb: .byte &lt;$838,&lt;$618,&lt;$205,&lt;$984,&lt;$724,&lt;$301,&lt;$249,&lt;$946
.byte &lt;$925,&lt;$043,&lt;$114,&lt;$697,&lt;$985,&lt;$633,&lt;$312,&lt;$086
hb: .byte &gt;$838,&gt;$618,&gt;$205,&gt;$984,&gt;$724,&gt;$301,&gt;$249,&gt;$946
.byte &gt;$925,&gt;$043,&gt;$114,&gt;$697,&gt;$985,&gt;$633,&gt;$312,&gt;$086
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; insertion'sort: Sorts the list defined by head, next, hb, lb.
; Arguments: None.
; Modifies: All registers destroyed, head and next array sorted.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
insertion'sort:
lda #$FF ; Clear list by storing the terminator in 'head'
sta head
ldx #$0 ; Loop through the lb/hb array, adding each
insertion'sort'loop: ; element one at a time
txa
pha
jsr insert_elt
pla
tax
inx
cpx #$10
bne insertion'sort'loop
rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; insert_elt: Insert an element into the linked list. Maintains the
; list in sorted, ascending order. Used by
; insertion'sort.
; Arguments: X register holds the index of the element to add.
; Modifies: All registers destroyed; head and next arrays updated
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
.data
.space lbtoinsert 1
.space hbtoinsert 1
.space indextoinsert 1
.text
insert_elt:
ldy head ; If the list is empty, make
cpy #$FF ; head point at it, and return.
bne insert_elt'list'not'empty
stx head
tya
sta next,x
rts
insert_elt'list'not'empty:
lda lb,x ; Cache the data we're inserting
sta lbtoinsert
lda hb,x
sta hbtoinsert
stx indextoinsert
ldy head ; Compare the first value with
sec ; the data. If the data must
lda lb,y ; be inserted at the front...
sbc lbtoinsert
lda hb,y
sbc hbtoinsert
bmi insert_elt'not'smallest
tya ; Set its next pointer to the
sta next,x ; old head, update the head
stx head ; pointer, and return.
rts
insert_elt'not'smallest:
ldx head
insert_elt'loop: ; At this point, we know that
lda next,x ; argument &gt; data[X].
tay
cpy #$FF ; if next[X] = #$FF, insert arg at end.
beq insert_elt'insert'after'current
lda lb,y ; Otherwise, compare arg to
sec ; data[next[X]]. If we insert
sbc lbtoinsert ; before that...
lda hb,y
sbc hbtoinsert
bmi insert_elt'goto'next
insert_elt'insert'after'current: ; Fix up all the next links
tya
ldy indextoinsert
sta next,y
tya
sta next,x
rts ; and return.
insert_elt'goto'next: ; Otherwise, let X = next[X]
tya ; and go looping again.
tax
jmp insert_elt'loop
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; print'unsorted: Steps through the data array and prints each value.
; Standalone procedure.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
print'unsorted:
lda #&lt;unsorted'hdr
ldx #&gt;unsorted'hdr
jsr put'string
ldy #$00
print'unsorted'loop:
lda hb, Y
jsr print'hex
lda lb, y
jsr print'hex
lda #$20
jsr chrout
iny
cpy #$10
bne print'unsorted'loop
lda #$0D
jsr chrout
rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; print'list: Starts at head, and prints out every value in the
; linked list.
; Standalone procedure.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
print'list:
lda #&lt;sorted'hdr
ldx #&gt;sorted'hdr
jsr put'string
ldy head
print'list'loop:
cpy #$FF
beq print'list'done
lda hb, y
jsr print'hex
lda lb, y
jsr print'hex
lda #$20
jsr chrout
lda next, Y
tay
jmp print'list'loop
print'list'done:
lda #$0d
jsr chrout
rts
;; String data for the above routines.
unsorted'hdr:
.byte 147 ; Clear screen first!
.byte "UNSORTED DATA:",13,0
sorted'hdr:
.byte "SORTED DATA:",13,0
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; print'hex: outputs a two-character hex representation of a one-
; byte value.
; Arguments: Byte to print in accumulator
; Modifies: .A and .X
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
print'hex:
pha
clc
lsr
lsr
lsr
lsr
tax
lda hexstr,x
jsr chrout
pla
and #$0F
tax
lda hexstr,X
jsr chrout
rts
; Character data array for print'hex.
hexstr: .byte "0123456789ABCDEF"
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; put'string: outputs a C-style null terminated string with length
; less than 256 to the screen. If 256 bytes are written
; without finding a terminator, the routine ends quietly.
; Arguments: Low byte of string address in .A, high byte in .X
; Modifies: .A and .Y
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
.data zp
.space put'string'addr 2
.text
put'string:
sta put'string'addr
stx put'string'addr+1
ldy #$00
put'string'loop:
lda (put'string'addr),y
beq put'string'done
jsr chrout
iny
bne put'string'loop
put'string'done:
rts
</programlisting>
</section>
<section id="fib-src">
<title id="fib-fname"><filename>fibonacci.oph</filename></title>
<programlisting>
.include "../platform/c64_0.oph"
.require "../platform/c64kernal.oph"
.outfile "fibonacci.prg"
lda #&lt;opening ; Print opening text
sta fun'args
lda #&gt;opening
sta fun'args+1
jsr print'string
lda #$00
sta fun'vars ; Count num from 0 to 19
* lda fun'vars ; Main loop: print num, with leading space if &lt;10
cmp #$09
bcs +
lda #$20
jsr chrout
lda fun'vars
* sta fun'args ; Copy num to args, print it, plus ": "
inc fun'args
lda #$00
sta fun'args+1
jsr print'dec
lda #$3A
jsr chrout
lda #$20
jsr chrout
lda fun'vars ; Copy num to args, call fib, print result
sta fun'args
jsr fib
jsr print'dec
lda #$0D ; Newline
jsr chrout
inc fun'vars ; Increment num; if it's 20, we're done.
lda fun'vars
cmp #20
bne -- ; Otherwise, loop.
rts
opening:
.byte 147, " FIBONACCI SEQUENCE",13,13,0
.scope
; Uint16 fib (Uint8 x): compute Xth fibonnaci number.
; fib(0) = fib(1) = 1.
; Stack usage: 3.
fib: lda #$03
jsr save'stack
lda fun'vars ; If x &lt; 2, goto _base.
cmp #$02
bcc _base
dec fun'args ; Otherwise, call fib(x-1)...
jsr fib
lda fun'args ; Copy the result to local variable...
sta fun'vars+1
lda fun'args+1
sta fun'vars+2
lda fun'vars ; Call fib(x-2)...
sec
sbc #$02
sta fun'args
jsr fib
clc ; And add the old result to it, leaving it
lda fun'args ; in the 'result' location.
adc fun'vars+1
sta fun'args
lda fun'args+1
adc fun'vars+2
sta fun'args+1
jmp _done ; and then we're done.
_base: ldy #$01 ; In the base case, just copy 1 to the
sty fun'args ; result.
dey
sty fun'args+1
_done: lda #$03
jsr restore'stack
rts
.scend
.scope
; Stack routines: init'stack, save'stack, restore'stack
.data zp
.space _sp $02
.space _counter $01
.space fun'args $10
.space fun'vars $40
.text
init'stack:
lda #$00
sta _sp
lda #$A0
sta _sp+1
rts
save'stack:
sta _counter
sec
lda _sp
sbc _counter
sta _sp
lda _sp+1
sbc #$00
sta _sp+1
ldy #$00
* lda fun'vars, y
sta (_sp), y
lda fun'args, y
sta fun'vars, y
iny
dec _counter
bne -
rts
restore'stack:
pha
sta _counter
ldy #$00
* lda (_sp), y
sta fun'vars, y
iny
dec _counter
bne -
pla
clc
adc _sp
sta _sp
lda _sp+1
adc #$00
sta _sp+1
rts
.scend
; Utility functions. print'dec prints an unsigned 16-bit integer.
; It's ugly and long, mainly because we don't bother with niceties
; like "division". print'string prints a zero-terminated string.
.scope
.data
.org fun'args
.space _val 2
.space _step 2
.space _res 1
.space _allowzero 1
.text
print'dec:
lda #$00
sta _allowzero
lda #&lt;10000
sta _step
lda #&gt;10000
sta _step+1
jsr repsub'16
lda #&lt;1000
sta _step
lda #&gt;1000
sta _step+1
jsr repsub'16
lda #0
sta _step+1
lda #100
sta _step
jsr repsub'16
lda #10
sta _step
jsr repsub'16
lda _val
jsr _print
rts
repsub'16:
lda #$00
sta _res
* lda _val
sec
sbc _step
lda _val+1
sbc _step+1
bcc _done
lda _val
sec
sbc _step
sta _val
lda _val+1
sbc _step+1
sta _val+1
inc _res
jmp -
_done: lda _res
ora _allowzero
beq _ret
sta _allowzero
lda _res
_print: clc
adc #'0
jsr chrout
_ret: rts
.scend
print'string:
ldy #$00
* lda (fun'args), y
beq +
jsr chrout
iny
jmp -
* rts
</programlisting>
</section>
</appendix>

View File

@ -256,6 +256,31 @@ hello: .byte "HELLO, WORLD!", 0
summary of available command line options.
</para>
<para>
Ophis takes a list of source files and produces an output file
based on assembling each file you give it, in order. You can add
a line to your program like this to name the output file:
</para>
<programlisting>
.outfile "hello.prg"
</programlisting>
<para>
Alternately, you can use the <option>-o</option> option on the
command line. This will override any <literal>.outfile</literal>
directives. If you don't specify any name, it will put the
output into a file named <filename>ophis.bin</filename>.
</para>
<para>
If you are using Ophis as part of some larger toolchain, you can
also make it run in <emphasis>pipe mode</emphasis>. If you give
a dash <option>-</option> as an input file or as the output
target, Ophis will use standard input or output for
communication.
</para>
<table frame="all">
<title>Ophis Options</title>
<tgroup cols='2'>
@ -266,13 +291,11 @@ hello: .byte "HELLO, WORLD!", 0
</row>
</thead>
<tbody>
<row><entry><option>-6510</option></entry><entry>Allows the 6510 undocumented opcodes as listed in the VICE documentation.</entry></row>
<row><entry><option>-65c02</option></entry><entry>Allows opcodes and addressing modes added by the 65C02.</entry></row>
<row><entry><option>-v 0</option></entry><entry>Quiet operation. Only reports errors.</entry></row>
<row><entry><option>-v 1</option></entry><entry>Default operation. Reports files as they are loaded, and gives statistics on the final output.</entry></row>
<row><entry><option>-v 2</option></entry><entry>Verbose operation. Names each assembler pass as it runs.</entry></row>
<row><entry><option>-v 3</option></entry><entry>Debug operation: Dumps the entire IR after each pass.</entry></row>
<row><entry><option>-v 4</option></entry><entry>Full debug operation: Dumps the entire IR and symbol table after each pass.</entry></row>
<row><entry><option>-o FILE</option></entry><entry>Overrides the default filename for output.</entry></row>
<row><entry><option>-u</option></entry><entry>Allows the 6510 undocumented opcodes as listed in the VICE documentation.</entry></row>
<row><entry><option>-c</option></entry><entry>Allows opcodes and addressing modes added by the 65C02.</entry></row>
<row><entry><option>-q</option></entry><entry>Quiet operation. Only reports warnings and errors.</entry></row>
<row><entry><option>-v</option></entry><entry>Verbose operation. Reports files as they are loaded.</entry></row>
</tbody>
</tgroup>
</table>
@ -283,30 +306,16 @@ hello: .byte "HELLO, WORLD!", 0
here:
</para>
<screen>
localhost$ ophis tutor1.oph tutor1.prg -v 2
Loading tutor1.oph
Running: Macro definition pass
Running: Macro expansion pass
Running: Label initialization pass
Fixpoint failed, looping back
Running: Label initialization pass
Running: Circularity check pass
Running: Expression checking pass
Running: Easy addressing modes pass
Running: Label Update Pass
Fixpoint failed, looping back
Running: Label Update Pass
Running: Instruction Collapse Pass
Running: Mode Normalization pass
Running: Label Update Pass
Running: Assembler
localhost$ ophis -v hello1.oph
Loading hello1.oph
Assembly complete: 45 bytes output (14 code, 29 data, 2 filler)
</screen>
<para>
If your emulator can run <filename>PRG</filename> files
directly, this file will now run (and
print <computeroutput>HELLO, WORLD!</computeroutput>) as many
times as you type <userinput>RUN</userinput>. Otherwise, use
This will produce a file named <filename>hello.prg</filename>. If
your emulator can run <filename>PRG</filename> files directly,
this file will now run (and print <computeroutput>HELLO,
WORLD!</computeroutput>) as many times as you
type <userinput>RUN</userinput>. Otherwise, use
a <filename>D64</filename> management utility to put
the <filename>PRG</filename> on a <filename>D64</filename>, then
load and run the file off that.

View File

@ -53,7 +53,10 @@
the KERNAL values are standard, we do not reproduce them here.
(The files in question are <xref linkend="c64-1-src"
endterm="c64-1-fname"> and <xref linkend="kernal-src"
endterm="kernal-fname">.)
endterm="kernal-fname">.) The <filename>c64kernal.oph</filename>
header is likely to be useful in your own projects, and it is
available in the <literal>platform/</literal> directory for easy
inclusion.
</para>
</section>
<section>

View File

@ -64,11 +64,11 @@ target10: .byte "Universe", 0
and lowercase are reversed, so we have messages
like <computeroutput>hELLO, sOLAR sYSTEM!</computeroutput>. For
the specific case of PETSCII, we can just fix our strings, but
that's less of an option if we're writing for the Apple II's
character set, or targeting a game console that puts its letters
in arbitrary locations. We need to remap how strings are turned
into byte values. The <literal>.charmap</literal>
and <literal>.charmapbin</literal> directives do what we need.
that's less of an option if we're writing for a game console that
puts its letters in arbitrary locations. We need to remap how
strings are turned into byte values.
The <literal>.charmap</literal> and <literal>.charmapbin</literal>
directives do what we need.
</para>
<para>
@ -102,9 +102,6 @@ target10: .byte "Universe", 0
specifies an external file, 256 bytes long, that is loaded in at
that point. A binary character map for the Commodore 64 is
provided with the sample programs
as <filename>petscii.map</filename>. There are also three
files, <filename>a2normal.map</filename>, <filename>a2inverse.map</filename>,
and <filename>a2blink.map</filename> that handle the Apple II's
very nonstandard character encodings.
as <filename>petscii.map</filename>.
</para>
</chapter>

View File

@ -48,27 +48,46 @@
locations $02-$7F are used by the BASIC interpreter, and
locations $80-$FF are used by the KERNAL. We don't need the
BASIC interpreter, though, so we can back up all of $02-$7F at
the start of our program and restore it all when we're done:
the start of our program and restore it all when we're done.
</para>
<para>
In fact, since we're disablng BASIC, we can actually also swap
out its ROM entirely and get a contiguous block of RAM from
$0002 to $CFFF:
</para>
<programlisting>
.scope
; Cache BASIC's zero page at top of available RAM.
ldx #$7E
* lda $01, x
sta $CF81, x
; Cache BASIC zero page at top of available RAM
ldx #$7E
* lda $01, x
sta $CF81, x
dex
bne -
bne -
jsr _main
; Swap out the BASIC ROM for RAM
lda $01
and #$fe
ora #$06
sta $01
; Restore BASIC's zero page and return control.
; Run the real program
jsr _main
ldx #$7E
* lda $CF81, x
sta $01, x
; Restore BASIC ROM
lda $01
ora #$07
sta $01
; Restore BASIC zero page
ldx #$7E
* lda $CF81, x
sta $01, x
dex
bne -
bne -
; Back to BASIC
rts
_main:
@ -79,7 +98,9 @@ _main:
<para>
The new, improved header file is <xref linkend="c64-2-src"
endterm="c64-2-fname">.
endterm="c64-2-fname">. This,
like <filename>c64kernal.oph</filename>, is available for use in
your own projects in the <literal>platform/</literal> directory.
</para>
<para>

213
examples/fibonacci.oph Normal file
View File

@ -0,0 +1,213 @@
.include "../platform/c64_0.oph"
.require "../platform/c64kernal.oph"
.outfile "fibonacci.prg"
lda #<opening ; Print opening text
sta fun'args
lda #>opening
sta fun'args+1
jsr print'string
lda #$00
sta fun'vars ; Count num from 0 to 19
* lda fun'vars ; Main loop: print num, with leading space if <10
cmp #$09
bcs +
lda #$20
jsr chrout
lda fun'vars
* sta fun'args ; Copy num to args, print it, plus ": "
inc fun'args
lda #$00
sta fun'args+1
jsr print'dec
lda #$3A
jsr chrout
lda #$20
jsr chrout
lda fun'vars ; Copy num to args, call fib, print result
sta fun'args
jsr fib
jsr print'dec
lda #$0D ; Newline
jsr chrout
inc fun'vars ; Increment num; if it's 20, we're done.
lda fun'vars
cmp #20
bne -- ; Otherwise, loop.
rts
opening:
.byte 147, " FIBONACCI SEQUENCE",13,13,0
.scope
; Uint16 fib (Uint8 x): compute Xth fibonnaci number.
; fib(0) = fib(1) = 1.
; Stack usage: 3.
fib: lda #$03
jsr save'stack
lda fun'vars ; If x < 2, goto _base.
cmp #$02
bcc _base
dec fun'args ; Otherwise, call fib(x-1)...
jsr fib
lda fun'args ; Copy the result to local variable...
sta fun'vars+1
lda fun'args+1
sta fun'vars+2
lda fun'vars ; Call fib(x-2)...
sec
sbc #$02
sta fun'args
jsr fib
clc ; And add the old result to it, leaving it
lda fun'args ; in the 'result' location.
adc fun'vars+1
sta fun'args
lda fun'args+1
adc fun'vars+2
sta fun'args+1
jmp _done ; and then we're done.
_base: ldy #$01 ; In the base case, just copy 1 to the
sty fun'args ; result.
dey
sty fun'args+1
_done: lda #$03
jsr restore'stack
rts
.scend
.scope
; Stack routines: init'stack, save'stack, restore'stack
.data zp
.space _sp $02
.space _counter $01
.space fun'args $10
.space fun'vars $40
.text
init'stack:
lda #$00
sta _sp
lda #$A0
sta _sp+1
rts
save'stack:
sta _counter
sec
lda _sp
sbc _counter
sta _sp
lda _sp+1
sbc #$00
sta _sp+1
ldy #$00
* lda fun'vars, y
sta (_sp), y
lda fun'args, y
sta fun'vars, y
iny
dec _counter
bne -
rts
restore'stack:
pha
sta _counter
ldy #$00
* lda (_sp), y
sta fun'vars, y
iny
dec _counter
bne -
pla
clc
adc _sp
sta _sp
lda _sp+1
adc #$00
sta _sp+1
rts
.scend
; Utility functions. print'dec prints an unsigned 16-bit integer.
; It's ugly and long, mainly because we don't bother with niceties
; like "division". print'string prints a zero-terminated string.
.scope
.data
.org fun'args
.space _val 2
.space _step 2
.space _res 1
.space _allowzero 1
.text
print'dec:
lda #$00
sta _allowzero
lda #<10000
sta _step
lda #>10000
sta _step+1
jsr repsub'16
lda #<1000
sta _step
lda #>1000
sta _step+1
jsr repsub'16
lda #0
sta _step+1
lda #100
sta _step
jsr repsub'16
lda #10
sta _step
jsr repsub'16
lda _val
jsr _print
rts
repsub'16:
lda #$00
sta _res
* lda _val
sec
sbc _step
lda _val+1
sbc _step+1
bcc _done
lda _val
sec
sbc _step
sta _val
lda _val+1
sbc _step+1
sta _val+1
inc _res
jmp -
_done: lda _res
ora _allowzero
beq _ret
sta _allowzero
lda _res
_print: clc
adc #'0
jsr chrout
_ret: rts
.scend
print'string:
ldy #$00
* lda (fun'args), y
beq +
jsr chrout
iny
jmp -
* rts

View File

@ -1,5 +1,6 @@
.word $0801
.org $0801
.outfile "hello.prg"
.word next, 10 ; Next line and current line number
.byte $9e," 2064",0 ; SYS 2064

View File

@ -1,5 +1,6 @@
.word $0801
.org $0801
.outfile "hello.prg"
.scope
.word _next, 10 ; Next line and current line number

View File

@ -1,4 +1,5 @@
.include "c64-1.oph"
.outfile "hello.prg"
.macro print
ldx #0

View File

@ -1,4 +1,5 @@
.include "c64-1.oph"
.outfile "hello.prg"
.macro print
ldx #0

View File

@ -1,4 +1,5 @@
.include "c64-1.oph"
.outfile "hello.prg"
.macro print
ldx #0

View File

@ -1,4 +1,5 @@
.include "c64-1.oph"
.outfile "hello.prg"
.macro print
ldx #0

View File

@ -1,4 +1,5 @@
.include "c64-1.oph"
.outfile "hello.prg"
.data
.org $C000

View File

@ -1,4 +1,5 @@
.include "c64-1.oph"
.outfile "hello.prg"
.data
.org $C000

232
examples/structuredemo.oph Normal file
View File

@ -0,0 +1,232 @@
.include "../platform/c64_0.oph"
.require "../platform/c64kernal.oph"
.outfile "structuredemo.prg"
jsr print'unsorted
jsr insertion'sort
jsr print'list
rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Linked list data: head, next, lb, hb.
; lb/hb: Low/high bytes of the data array. These are immutable and
; kept with the program text.
; head: Array index of the first element in the list, or #$FF if the
; list is empty
; next: Array of successor indices. If you've just read element X,
; the value of memory location next+X is the index of the
; next element. If next is #$FF, you've reached the end of
; the list.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
.data
.org $C000
.space head 1
.space next 16
.text
lb: .byte <$838,<$618,<$205,<$984,<$724,<$301,<$249,<$946
.byte <$925,<$043,<$114,<$697,<$985,<$633,<$312,<$086
hb: .byte >$838,>$618,>$205,>$984,>$724,>$301,>$249,>$946
.byte >$925,>$043,>$114,>$697,>$985,>$633,>$312,>$086
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; insertion'sort: Sorts the list defined by head, next, hb, lb.
; Arguments: None.
; Modifies: All registers destroyed, head and next array sorted.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
insertion'sort:
lda #$FF ; Clear list by storing the terminator in 'head'
sta head
ldx #$0 ; Loop through the lb/hb array, adding each
insertion'sort'loop: ; element one at a time
txa
pha
jsr insert_elt
pla
tax
inx
cpx #$10
bne insertion'sort'loop
rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; insert_elt: Insert an element into the linked list. Maintains the
; list in sorted, ascending order. Used by
; insertion'sort.
; Arguments: X register holds the index of the element to add.
; Modifies: All registers destroyed; head and next arrays updated
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
.data
.space lbtoinsert 1
.space hbtoinsert 1
.space indextoinsert 1
.text
insert_elt:
ldy head ; If the list is empty, make
cpy #$FF ; head point at it, and return.
bne insert_elt'list'not'empty
stx head
tya
sta next,x
rts
insert_elt'list'not'empty:
lda lb,x ; Cache the data we're inserting
sta lbtoinsert
lda hb,x
sta hbtoinsert
stx indextoinsert
ldy head ; Compare the first value with
sec ; the data. If the data must
lda lb,y ; be inserted at the front...
sbc lbtoinsert
lda hb,y
sbc hbtoinsert
bmi insert_elt'not'smallest
tya ; Set its next pointer to the
sta next,x ; old head, update the head
stx head ; pointer, and return.
rts
insert_elt'not'smallest:
ldx head
insert_elt'loop: ; At this point, we know that
lda next,x ; argument > data[X].
tay
cpy #$FF ; if next[X] = #$FF, insert arg at end.
beq insert_elt'insert'after'current
lda lb,y ; Otherwise, compare arg to
sec ; data[next[X]]. If we insert
sbc lbtoinsert ; before that...
lda hb,y
sbc hbtoinsert
bmi insert_elt'goto'next
insert_elt'insert'after'current: ; Fix up all the next links
tya
ldy indextoinsert
sta next,y
tya
sta next,x
rts ; and return.
insert_elt'goto'next: ; Otherwise, let X = next[X]
tya ; and go looping again.
tax
jmp insert_elt'loop
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; print'unsorted: Steps through the data array and prints each value.
; Standalone procedure.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
print'unsorted:
lda #<unsorted'hdr
ldx #>unsorted'hdr
jsr put'string
ldy #$00
print'unsorted'loop:
lda hb, Y
jsr print'hex
lda lb, y
jsr print'hex
lda #$20
jsr chrout
iny
cpy #$10
bne print'unsorted'loop
lda #$0D
jsr chrout
rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; print'list: Starts at head, and prints out every value in the
; linked list.
; Standalone procedure.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
print'list:
lda #<sorted'hdr
ldx #>sorted'hdr
jsr put'string
ldy head
print'list'loop:
cpy #$FF
beq print'list'done
lda hb, y
jsr print'hex
lda lb, y
jsr print'hex
lda #$20
jsr chrout
lda next, Y
tay
jmp print'list'loop
print'list'done:
lda #$0d
jsr chrout
rts
;; String data for the above routines.
unsorted'hdr:
.byte 147 ; Clear screen first!
.byte "UNSORTED DATA:",13,0
sorted'hdr:
.byte "SORTED DATA:",13,0
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; print'hex: outputs a two-character hex representation of a one-
; byte value.
; Arguments: Byte to print in accumulator
; Modifies: .A and .X
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
print'hex:
pha
clc
lsr
lsr
lsr
lsr
tax
lda hexstr,x
jsr chrout
pla
and #$0F
tax
lda hexstr,X
jsr chrout
rts
; Character data array for print'hex.
hexstr: .byte "0123456789ABCDEF"
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; put'string: outputs a C-style null terminated string with length
; less than 256 to the screen. If 256 bytes are written
; without finding a terminator, the routine ends quietly.
; Arguments: Low byte of string address in .A, high byte in .X
; Modifies: .A and .Y
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
.data zp
.space put'string'addr 2
.text
put'string:
sta put'string'addr
stx put'string'addr+1
ldy #$00
put'string'loop:
lda (put'string'addr),y
beq put'string'done
jsr chrout
iny
bne put'string'loop
put'string'done:
rts