Irmen de Jong a735939d1e removed confusing GPL software license reference and copyright header from library files. (because of exclusion in output files)
Reworded software license and exclusion clause somewhat again in attempt to make it even clearer.
2022-05-30 20:12:20 +02:00

1597 lines
28 KiB
NASM

; Internal Math library routines - always included by the compiler
; Generic machine independent 6502 code.
;
; some more interesting routines can be found here:
; http://6502org.wikidot.com/software-math
; http://codebase64.org/doku.php?id=base:6502_6510_maths
;
math_store_reg .byte 0 ; temporary storage
multiply_bytes .proc
; -- multiply 2 bytes A and Y, result as byte in A (signed or unsigned)
sta P8ZP_SCRATCH_B1 ; num1
sty P8ZP_SCRATCH_REG ; num2
lda #0
beq _enterloop
_doAdd clc
adc P8ZP_SCRATCH_B1
_loop asl P8ZP_SCRATCH_B1
_enterloop lsr P8ZP_SCRATCH_REG
bcs _doAdd
bne _loop
rts
.pend
multiply_bytes_into_word .proc
; -- multiply 2 bytes A and Y, result as word in A/Y (unsigned)
sta P8ZP_SCRATCH_B1
sty P8ZP_SCRATCH_REG
stx math_store_reg
lda #0
ldx #8
lsr P8ZP_SCRATCH_B1
- bcc +
clc
adc P8ZP_SCRATCH_REG
+ ror a
ror P8ZP_SCRATCH_B1
dex
bne -
tay
lda P8ZP_SCRATCH_B1
ldx math_store_reg
rts
.pend
multiply_words .proc
; -- multiply two 16-bit words into a 32-bit result (signed and unsigned)
; input: A/Y = first 16-bit number, P8ZP_SCRATCH_W1 in ZP = second 16-bit number
; output: multiply_words.result 4-bytes/32-bits product, LSB order (low-to-high)
; clobbers: A
sta P8ZP_SCRATCH_W2
sty P8ZP_SCRATCH_W2+1
stx P8ZP_SCRATCH_REG
mult16 lda #0
sta result+2 ; clear upper bits of product
sta result+3
ldx #16 ; for all 16 bits...
- lsr P8ZP_SCRATCH_W1+1 ; divide multiplier by 2
ror P8ZP_SCRATCH_W1
bcc +
lda result+2 ; get upper half of product and add multiplicand
clc
adc P8ZP_SCRATCH_W2
sta result+2
lda result+3
adc P8ZP_SCRATCH_W2+1
+ ror a ; rotate partial product
sta result+3
ror result+2
ror result+1
ror result
dex
bne -
ldx P8ZP_SCRATCH_REG
rts
result .byte 0,0,0,0
.pend
divmod_b_asm .proc
; signed byte division: make everything positive and fix sign afterwards
sta P8ZP_SCRATCH_B1
tya
eor P8ZP_SCRATCH_B1
php ; save sign
lda P8ZP_SCRATCH_B1
bpl +
eor #$ff
sec
adc #0 ; make it positive
+ pha
tya
bpl +
eor #$ff
sec
adc #0 ; make it positive
tay
+ pla
jsr divmod_ub_asm
sta _remainder
plp
bpl +
tya
eor #$ff
sec
adc #0 ; negate result
tay
+ rts
_remainder .byte 0
.pend
divmod_ub_asm .proc
; -- divide A by Y, result quotient in Y, remainder in A (unsigned)
; division by zero will result in quotient = 255 and remainder = original number
sty P8ZP_SCRATCH_REG
sta P8ZP_SCRATCH_B1
stx math_store_reg
lda #0
ldx #8
asl P8ZP_SCRATCH_B1
- rol a
cmp P8ZP_SCRATCH_REG
bcc +
sbc P8ZP_SCRATCH_REG
+ rol P8ZP_SCRATCH_B1
dex
bne -
ldy P8ZP_SCRATCH_B1
ldx math_store_reg
rts
.pend
divmod_w_asm .proc
; signed word division: make everything positive and fix sign afterwards
sta P8ZP_SCRATCH_W2
sty P8ZP_SCRATCH_W2+1
lda P8ZP_SCRATCH_W1+1
eor P8ZP_SCRATCH_W2+1
php ; save sign
lda P8ZP_SCRATCH_W1+1
bpl +
lda #0
sec
sbc P8ZP_SCRATCH_W1
sta P8ZP_SCRATCH_W1
lda #0
sbc P8ZP_SCRATCH_W1+1
sta P8ZP_SCRATCH_W1+1
+ lda P8ZP_SCRATCH_W2+1
bpl +
lda #0
sec
sbc P8ZP_SCRATCH_W2
sta P8ZP_SCRATCH_W2
lda #0
sbc P8ZP_SCRATCH_W2+1
sta P8ZP_SCRATCH_W2+1
+ tay
lda P8ZP_SCRATCH_W2
jsr divmod_uw_asm
plp ; restore sign
bpl +
sta P8ZP_SCRATCH_W2
sty P8ZP_SCRATCH_W2+1
lda #0
sec
sbc P8ZP_SCRATCH_W2
pha
lda #0
sbc P8ZP_SCRATCH_W2+1
tay
pla
+ rts
.pend
divmod_uw_asm .proc
; -- divide two unsigned words (16 bit each) into 16 bit results
; input: P8ZP_SCRATCH_W1 in ZP: 16 bit number, A/Y: 16 bit divisor
; output: P8ZP_SCRATCH_W2 in ZP: 16 bit remainder, A/Y: 16 bit division result
; division by zero will result in quotient = 65535 and remainder = divident
dividend = P8ZP_SCRATCH_W1
remainder = P8ZP_SCRATCH_W2
result = dividend ;save memory by reusing divident to store the result
sta _divisor
sty _divisor+1
stx P8ZP_SCRATCH_REG
lda #0 ;preset remainder to 0
sta remainder
sta remainder+1
ldx #16 ;repeat for each bit: ...
- asl dividend ;dividend lb & hb*2, msb -> Carry
rol dividend+1
rol remainder ;remainder lb & hb * 2 + msb from carry
rol remainder+1
lda remainder
sec
sbc _divisor ;substract divisor to see if it fits in
tay ;lb result -> Y, for we may need it later
lda remainder+1
sbc _divisor+1
bcc + ;if carry=0 then divisor didn't fit in yet
sta remainder+1 ;else save substraction result as new remainder,
sty remainder
inc result ;and INCrement result cause divisor fit in 1 times
+ dex
bne -
lda result
ldy result+1
ldx P8ZP_SCRATCH_REG
rts
_divisor .word 0
.pend
randseed .proc
; -- reset the random seeds for the byte and word random generators
; arguments: uword seed in A/Y clobbers A
; (default starting values are: A=$2c Y=$9e)
sta randword._seed
sty randword._seed+1
clc
adc #14
sta randbyte._seed
rts
.pend
randbyte .proc
; -- 8 bit pseudo random number generator into A (by just reusing randword)
jmp randword
.pend
randword .proc
; -- 16 bit pseudo random number generator into AY
; rand64k ;Factors of 65535: 3 5 17 257
lda sr1+1
asl a
asl a
eor sr1+1
asl a
eor sr1+1
asl a
asl a
eor sr1+1
asl a
rol sr1 ;shift this left, "random" bit comes from low
rol sr1+1
; rand32k ;Factors of 32767: 7 31 151 are independent and can be combined
lda sr2+1
asl a
eor sr2+1
asl a
asl a
ror sr2 ;shift this right, random bit comes from high - nicer when eor with sr1
rol sr2+1
lda sr1+1 ;can be left out
eor sr2+1 ;if you dont use
tay ;y as suggested
lda sr1 ;mix up lowbytes of SR1
eor sr2 ;and SR2 to combine both
rts
sr1 .word $a55a
sr2 .word $7653
.pend
; ----------- optimized multiplications (stack) : ---------
stack_mul_byte_3 .proc
; X + X*2
lda P8ESTACK_LO+1,x
asl a
clc
adc P8ESTACK_LO+1,x
sta P8ESTACK_LO+1,x
rts
.pend
stack_mul_word_3 .proc
; W*2 + W
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_REG
lda P8ESTACK_LO+1,x
asl a
rol P8ZP_SCRATCH_REG
clc
adc P8ESTACK_LO+1,x
sta P8ESTACK_LO+1,x
lda P8ZP_SCRATCH_REG
adc P8ESTACK_HI+1,x
sta P8ESTACK_HI+1,x
rts
.pend
stack_mul_byte_5 .proc
; X*4 + X
lda P8ESTACK_LO+1,x
asl a
asl a
clc
adc P8ESTACK_LO+1,x
sta P8ESTACK_LO+1,x
rts
.pend
stack_mul_word_5 .proc
; W*4 + W
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_REG
lda P8ESTACK_LO+1,x
asl a
rol P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
clc
adc P8ESTACK_LO+1,x
sta P8ESTACK_LO+1,x
lda P8ZP_SCRATCH_REG
adc P8ESTACK_HI+1,x
sta P8ESTACK_HI+1,x
rts
.pend
stack_mul_byte_6 .proc
; (X*2 + X)*2
lda P8ESTACK_LO+1,x
asl a
clc
adc P8ESTACK_LO+1,x
asl a
sta P8ESTACK_LO+1,x
rts
.pend
stack_mul_word_6 .proc
; (W*2 + W)*2
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_REG
lda P8ESTACK_LO+1,x
asl a
rol P8ZP_SCRATCH_REG
clc
adc P8ESTACK_LO+1,x
sta P8ESTACK_LO+1,x
lda P8ZP_SCRATCH_REG
adc P8ESTACK_HI+1,x
asl P8ESTACK_LO+1,x
rol a
sta P8ESTACK_HI+1,x
rts
.pend
stack_mul_byte_7 .proc
; X*8 - X
lda P8ESTACK_LO+1,x
asl a
asl a
asl a
sec
sbc P8ESTACK_LO+1,x
sta P8ESTACK_LO+1,x
rts
.pend
stack_mul_word_7 .proc
; W*8 - W
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_REG
lda P8ESTACK_LO+1,x
asl a
rol P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
sec
sbc P8ESTACK_LO+1,x
sta P8ESTACK_LO+1,x
lda P8ZP_SCRATCH_REG
sbc P8ESTACK_HI+1,x
sta P8ESTACK_HI+1,x
rts
.pend
stack_mul_byte_9 .proc
; X*8 + X
lda P8ESTACK_LO+1,x
asl a
asl a
asl a
clc
adc P8ESTACK_LO+1,x
sta P8ESTACK_LO+1,x
rts
.pend
stack_mul_word_9 .proc
; W*8 + W
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_REG
lda P8ESTACK_LO+1,x
asl a
rol P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
clc
adc P8ESTACK_LO+1,x
sta P8ESTACK_LO+1,x
lda P8ZP_SCRATCH_REG
adc P8ESTACK_HI+1,x
sta P8ESTACK_HI+1,x
rts
.pend
stack_mul_byte_10 .proc
; (X*4 + X)*2
lda P8ESTACK_LO+1,x
asl a
asl a
clc
adc P8ESTACK_LO+1,x
asl a
sta P8ESTACK_LO+1,x
rts
.pend
stack_mul_word_10 .proc
; (W*4 + W)*2
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_REG
lda P8ESTACK_LO+1,x
asl a
rol P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
clc
adc P8ESTACK_LO+1,x
sta P8ESTACK_LO+1,x
lda P8ZP_SCRATCH_REG
adc P8ESTACK_HI+1,x
asl P8ESTACK_LO+1,x
rol a
sta P8ESTACK_HI+1,x
rts
.pend
stack_mul_byte_11 .proc
; (X*2 + X)*4 - X
lda P8ESTACK_LO+1,x
asl a
clc
adc P8ESTACK_LO+1,x
asl a
asl a
sec
sbc P8ESTACK_LO+1,x
sta P8ESTACK_LO+1,x
rts
.pend
; mul_word_11 is skipped (too much code)
stack_mul_byte_12 .proc
; (X*2 + X)*4
lda P8ESTACK_LO+1,x
asl a
clc
adc P8ESTACK_LO+1,x
asl a
asl a
sta P8ESTACK_LO+1,x
rts
.pend
stack_mul_word_12 .proc
; (W*2 + W)*4
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_REG
lda P8ESTACK_LO+1,x
asl a
rol P8ZP_SCRATCH_REG
clc
adc P8ESTACK_LO+1,x
sta P8ESTACK_LO+1,x
lda P8ZP_SCRATCH_REG
adc P8ESTACK_HI+1,x
asl P8ESTACK_LO+1,x
rol a
asl P8ESTACK_LO+1,x
rol a
sta P8ESTACK_HI+1,x
rts
.pend
stack_mul_byte_13 .proc
; (X*2 + X)*4 + X
lda P8ESTACK_LO+1,x
asl a
clc
adc P8ESTACK_LO+1,x
asl a
asl a
clc
adc P8ESTACK_LO+1,x
sta P8ESTACK_LO+1,x
rts
.pend
; mul_word_13 is skipped (too much code)
stack_mul_byte_14 .proc
; (X*8 - X)*2
lda P8ESTACK_LO+1,x
asl a
asl a
asl a
sec
sbc P8ESTACK_LO+1,x
asl a
sta P8ESTACK_LO+1,x
rts
.pend
; mul_word_14 is skipped (too much code)
stack_mul_byte_15 .proc
; X*16 - X
lda P8ESTACK_LO+1,x
asl a
asl a
asl a
asl a
sec
sbc P8ESTACK_LO+1,x
sta P8ESTACK_LO+1,x
rts
.pend
stack_mul_word_15 .proc
; W*16 - W
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_REG
lda P8ESTACK_LO+1,x
asl a
rol P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
sec
sbc P8ESTACK_LO+1,x
sta P8ESTACK_LO+1,x
lda P8ZP_SCRATCH_REG
sbc P8ESTACK_HI+1,x
sta P8ESTACK_HI+1,x
rts
.pend
stack_mul_byte_20 .proc
; (X*4 + X)*4
lda P8ESTACK_LO+1,x
asl a
asl a
clc
adc P8ESTACK_LO+1,x
asl a
asl a
sta P8ESTACK_LO+1,x
rts
.pend
stack_mul_word_20 .proc
; (W*4 + W)*4
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_REG
lda P8ESTACK_LO+1,x
asl a
rol P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
clc
adc P8ESTACK_LO+1,x
sta P8ESTACK_LO+1,x
lda P8ZP_SCRATCH_REG
adc P8ESTACK_HI+1,x
asl P8ESTACK_LO+1,x
rol a
asl P8ESTACK_LO+1,x
rol a
sta P8ESTACK_HI+1,x
rts
.pend
stack_mul_byte_25 .proc
; (X*2 + X)*8 + X
lda P8ESTACK_LO+1,x
asl a
clc
adc P8ESTACK_LO+1,x
asl a
asl a
asl a
clc
adc P8ESTACK_LO+1,x
sta P8ESTACK_LO+1,x
rts
.pend
stack_mul_word_25 .proc
; W = (W*2 + W) *8 + W
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_W1+1
lda P8ESTACK_LO+1,x
asl a
rol P8ZP_SCRATCH_W1+1
clc
adc P8ESTACK_LO+1,x
sta P8ZP_SCRATCH_W1
lda P8ZP_SCRATCH_W1+1
adc P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_W1+1
lda P8ZP_SCRATCH_W1
asl a
rol P8ZP_SCRATCH_W1+1
asl a
rol P8ZP_SCRATCH_W1+1
asl a
rol P8ZP_SCRATCH_W1+1
clc
adc P8ESTACK_LO+1,x
sta P8ESTACK_LO+1,x
lda P8ZP_SCRATCH_W1+1
adc P8ESTACK_HI+1,x
sta P8ESTACK_HI+1,x
rts
.pend
stack_mul_byte_40 .proc
lda P8ESTACK_LO+1,x
and #7
tay
lda mul_byte_40._forties,y
sta P8ESTACK_LO+1,x
rts
.pend
stack_mul_word_40 .proc
; (W*4 + W)*8
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_REG
lda P8ESTACK_LO+1,x
asl a
rol P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
clc
adc P8ESTACK_LO+1,x
sta P8ESTACK_LO+1,x
lda P8ZP_SCRATCH_REG
adc P8ESTACK_HI+1,x
asl P8ESTACK_LO+1,x
rol a
asl P8ESTACK_LO+1,x
rol a
asl P8ESTACK_LO+1,x
rol a
sta P8ESTACK_HI+1,x
rts
.pend
stack_mul_byte_50 .proc
lda P8ESTACK_LO+1,x
and #7
tay
lda mul_byte_50._fifties, y
sta P8ESTACK_LO+1,x
rts
.pend
stack_mul_word_50 .proc
; W = W * 25 * 2
jsr stack_mul_word_25
asl P8ESTACK_LO+1,x
rol P8ESTACK_HI+1,x
rts
.pend
stack_mul_byte_80 .proc
lda P8ESTACK_LO+1,x
and #3
tay
lda mul_byte_80._eighties, y
sta P8ESTACK_LO+1,x
rts
.pend
stack_mul_word_80 .proc
; W = W * 40 * 2
jsr stack_mul_word_40
asl P8ESTACK_LO+1,x
rol P8ESTACK_HI+1,x
rts
.pend
stack_mul_byte_100 .proc
lda P8ESTACK_LO+1,x
and #3
tay
lda mul_byte_100._hundreds, y
sta P8ESTACK_LO+1,x
rts
.pend
stack_mul_word_100 .proc
; W = W * 25 * 4
jsr stack_mul_word_25
asl P8ESTACK_LO+1,x
rol P8ESTACK_HI+1,x
asl P8ESTACK_LO+1,x
rol P8ESTACK_HI+1,x
rts
.pend
stack_mul_word_320 .proc
; stackW = stackLo * 256 + stackLo * 64 (stackHi doesn't matter)
ldy P8ESTACK_LO+1,x
lda #0
sta P8ESTACK_HI+1,x
tya
asl a
rol P8ESTACK_HI+1,x
asl a
rol P8ESTACK_HI+1,x
asl a
rol P8ESTACK_HI+1,x
asl a
rol P8ESTACK_HI+1,x
asl a
rol P8ESTACK_HI+1,x
asl a
rol P8ESTACK_HI+1,x
sta P8ESTACK_LO+1,x
tya
clc
adc P8ESTACK_HI+1,x
sta P8ESTACK_HI+1,x
rts
.pend
stack_mul_word_640 .proc
; stackW = (stackLo * 2 * 320) (stackHi doesn't matter)
asl P8ESTACK_LO+1,x
jmp stack_mul_word_320
.pend
; ----------- optimized multiplications (in-place A (byte) and ?? (word)) : ---------
mul_byte_3 .proc
; A = A + A*2
sta P8ZP_SCRATCH_REG
asl a
clc
adc P8ZP_SCRATCH_REG
rts
.pend
mul_word_3 .proc
; AY = AY*2 + AY
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
sta P8ZP_SCRATCH_W2
sty P8ZP_SCRATCH_W2+1
asl a
rol P8ZP_SCRATCH_W1+1
clc
adc P8ZP_SCRATCH_W2
sta P8ZP_SCRATCH_W1
lda P8ZP_SCRATCH_W1+1
adc P8ZP_SCRATCH_W2+1
tay
lda P8ZP_SCRATCH_W1
rts
.pend
mul_byte_5 .proc
; A = A*4 + A
sta P8ZP_SCRATCH_REG
asl a
asl a
clc
adc P8ZP_SCRATCH_REG
rts
.pend
mul_word_5 .proc
; AY = AY*4 + AY
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
sta P8ZP_SCRATCH_W2
sty P8ZP_SCRATCH_W2+1
asl a
rol P8ZP_SCRATCH_W1+1
asl a
rol P8ZP_SCRATCH_W1+1
clc
adc P8ZP_SCRATCH_W2
sta P8ZP_SCRATCH_W1
lda P8ZP_SCRATCH_W1+1
adc P8ZP_SCRATCH_W2+1
tay
lda P8ZP_SCRATCH_W1
rts
.pend
mul_byte_6 .proc
; A = (A*2 + A)*2
sta P8ZP_SCRATCH_REG
asl a
clc
adc P8ZP_SCRATCH_REG
asl a
rts
.pend
mul_word_6 .proc
; AY = (AY*2 + AY)*2
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
sta P8ZP_SCRATCH_W2
sty P8ZP_SCRATCH_W2+1
asl a
rol P8ZP_SCRATCH_W1+1
clc
adc P8ZP_SCRATCH_W2
sta P8ZP_SCRATCH_W1
tay
lda P8ZP_SCRATCH_W1+1
adc P8ZP_SCRATCH_W2+1
sta P8ZP_SCRATCH_W1+1
tya
asl a
rol P8ZP_SCRATCH_W1+1
ldy P8ZP_SCRATCH_W1+1
rts
.pend
mul_byte_7 .proc
; A = A*8 - A
sta P8ZP_SCRATCH_REG
asl a
asl a
asl a
sec
sbc P8ZP_SCRATCH_REG
rts
.pend
mul_word_7 .proc
; AY = AY*8 - AY
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
sta P8ZP_SCRATCH_W2
sty P8ZP_SCRATCH_W2+1
asl a
rol P8ZP_SCRATCH_W1+1
asl a
rol P8ZP_SCRATCH_W1+1
asl a
rol P8ZP_SCRATCH_W1+1
sec
sbc P8ZP_SCRATCH_W2
sta P8ZP_SCRATCH_W1
lda P8ZP_SCRATCH_W1+1
sbc P8ZP_SCRATCH_W2+1
tay
lda P8ZP_SCRATCH_W1
rts
.pend
mul_byte_9 .proc
; A = A*8 + A
sta P8ZP_SCRATCH_REG
asl a
asl a
asl a
clc
adc P8ZP_SCRATCH_REG
rts
.pend
mul_word_9 .proc
; AY = AY*8 + AY
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
sta P8ZP_SCRATCH_W2
sty P8ZP_SCRATCH_W2+1
asl a
rol P8ZP_SCRATCH_W1+1
asl a
rol P8ZP_SCRATCH_W1+1
asl a
rol P8ZP_SCRATCH_W1+1
clc
adc P8ZP_SCRATCH_W2
sta P8ZP_SCRATCH_W1
lda P8ZP_SCRATCH_W1+1
adc P8ZP_SCRATCH_W2+1
tay
lda P8ZP_SCRATCH_W1
rts
rts
.pend
mul_byte_10 .proc
; A=(A*4 + A)*2
sta P8ZP_SCRATCH_REG
asl a
asl a
clc
adc P8ZP_SCRATCH_REG
asl a
rts
.pend
mul_word_10 .proc
; AY=(AY*4 + AY)*2
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
sta P8ZP_SCRATCH_W2
sty P8ZP_SCRATCH_W2+1
asl a
rol P8ZP_SCRATCH_W1+1
asl a
rol P8ZP_SCRATCH_W1+1
clc
adc P8ZP_SCRATCH_W2
sta P8ZP_SCRATCH_W1
lda P8ZP_SCRATCH_W1+1
adc P8ZP_SCRATCH_W2+1
sta P8ZP_SCRATCH_W1+1
lda P8ZP_SCRATCH_W1
asl a
rol P8ZP_SCRATCH_W1+1
ldy P8ZP_SCRATCH_W1+1
rts
.pend
mul_byte_11 .proc
; A=(A*2 + A)*4 - A
sta P8ZP_SCRATCH_REG
asl a
clc
adc P8ZP_SCRATCH_REG
asl a
asl a
sec
sbc P8ZP_SCRATCH_REG
rts
.pend
; mul_word_11 is skipped (too much code)
mul_byte_12 .proc
; A=(A*2 + A)*4
sta P8ZP_SCRATCH_REG
asl a
clc
adc P8ZP_SCRATCH_REG
asl a
asl a
rts
.pend
mul_word_12 .proc
; AY=(AY*2 + AY)*4
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
sta P8ZP_SCRATCH_W2
sty P8ZP_SCRATCH_W2+1
asl a
rol P8ZP_SCRATCH_W1+1
clc
adc P8ZP_SCRATCH_W2
sta P8ZP_SCRATCH_W1
lda P8ZP_SCRATCH_W1+1
adc P8ZP_SCRATCH_W2+1
sta P8ZP_SCRATCH_W1+1
lda P8ZP_SCRATCH_W1
asl a
rol P8ZP_SCRATCH_W1+1
asl a
rol P8ZP_SCRATCH_W1+1
ldy P8ZP_SCRATCH_W1+1
rts
.pend
mul_byte_13 .proc
; A=(A*2 + A)*4 + A
sta P8ZP_SCRATCH_REG
asl a
clc
adc P8ZP_SCRATCH_REG
asl a
asl a
clc
adc P8ZP_SCRATCH_REG
rts
.pend
; mul_word_13 is skipped (too much code)
mul_byte_14 .proc
; A=(A*8 - A)*2
sta P8ZP_SCRATCH_REG
asl a
asl a
asl a
sec
sbc P8ZP_SCRATCH_REG
asl a
rts
.pend
; mul_word_14 is skipped (too much code)
mul_byte_15 .proc
; A=A*16 - A
sta P8ZP_SCRATCH_REG
asl a
asl a
asl a
asl a
sec
sbc P8ZP_SCRATCH_REG
rts
.pend
mul_word_15 .proc
; AY = AY * 16 - AY
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
sta P8ZP_SCRATCH_W2
sty P8ZP_SCRATCH_W2+1
asl a
rol P8ZP_SCRATCH_W1+1
asl a
rol P8ZP_SCRATCH_W1+1
asl a
rol P8ZP_SCRATCH_W1+1
asl a
rol P8ZP_SCRATCH_W1+1
sec
sbc P8ZP_SCRATCH_W2
sta P8ZP_SCRATCH_W1
lda P8ZP_SCRATCH_W1+1
sbc P8ZP_SCRATCH_W2+1
tay
lda P8ZP_SCRATCH_W1
rts
.pend
mul_byte_20 .proc
; A=(A*4 + A)*4
sta P8ZP_SCRATCH_REG
asl a
asl a
clc
adc P8ZP_SCRATCH_REG
asl a
asl a
rts
.pend
mul_word_20 .proc
; AY = AY * 10 * 2
jsr mul_word_10
sty P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
ldy P8ZP_SCRATCH_REG
rts
.pend
mul_byte_25 .proc
; A=(A*2 + A)*8 + A
sta P8ZP_SCRATCH_REG
asl a
clc
adc P8ZP_SCRATCH_REG
asl a
asl a
asl a
clc
adc P8ZP_SCRATCH_REG
rts
.pend
mul_word_25 .proc
; AY = (AY*2 + AY) *8 + AY
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
sta P8ZP_SCRATCH_W2
sty P8ZP_SCRATCH_W2+1
asl a
rol P8ZP_SCRATCH_W1+1
clc
adc P8ZP_SCRATCH_W2
sta P8ZP_SCRATCH_W1
lda P8ZP_SCRATCH_W1+1
adc P8ZP_SCRATCH_W2+1
sta P8ZP_SCRATCH_W1+1
lda P8ZP_SCRATCH_W1
asl a
rol P8ZP_SCRATCH_W1+1
asl a
rol P8ZP_SCRATCH_W1+1
asl a
rol P8ZP_SCRATCH_W1+1
clc
adc P8ZP_SCRATCH_W2
sta P8ZP_SCRATCH_W1
lda P8ZP_SCRATCH_W1+1
adc P8ZP_SCRATCH_W2+1
tay
lda P8ZP_SCRATCH_W1
rts
.pend
mul_byte_40 .proc
and #7
tay
lda _forties,y
rts
_forties .byte 0*40, 1*40, 2*40, 3*40, 4*40, 5*40, 6*40, 7*40 & 255
.pend
mul_word_40 .proc
; AY = (AY*4 + AY)*8
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
sta P8ZP_SCRATCH_W2
sty P8ZP_SCRATCH_W2+1
asl a
rol P8ZP_SCRATCH_W1+1
asl a
rol P8ZP_SCRATCH_W1+1
clc
adc P8ZP_SCRATCH_W2
sta P8ZP_SCRATCH_W1
lda P8ZP_SCRATCH_W1+1
adc P8ZP_SCRATCH_W2+1
asl P8ZP_SCRATCH_W1
rol a
asl P8ZP_SCRATCH_W1
rol a
asl P8ZP_SCRATCH_W1
rol a
tay
lda P8ZP_SCRATCH_W1
rts
.pend
mul_byte_50 .proc
and #7
tay
lda _fifties, y
rts
_fifties .byte 0*50, 1*50, 2*50, 3*50, 4*50, 5*50, 6*50 & 255, 7*50 & 255
.pend
mul_word_50 .proc
; AY = AY * 25 * 2
jsr mul_word_25
sty P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
ldy P8ZP_SCRATCH_REG
rts
.pend
mul_byte_80 .proc
and #3
tay
lda _eighties, y
rts
_eighties .byte 0*80, 1*80, 2*80, 3*80
.pend
mul_word_80 .proc
; AY = AY * 40 * 2
jsr mul_word_40
sty P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
ldy P8ZP_SCRATCH_REG
rts
.pend
mul_byte_100 .proc
and #3
tay
lda _hundreds, y
rts
_hundreds .byte 0*100, 1*100, 2*100, 3*100 & 255
.pend
mul_word_100 .proc
; AY = AY * 25 * 4
jsr mul_word_25
sty P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
ldy P8ZP_SCRATCH_REG
rts
.pend
mul_word_320 .proc
; AY = A * 256 + A * 64 (msb in Y doesn't matter)
sta P8ZP_SCRATCH_B1
ldy #0
sty P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
asl a
rol P8ZP_SCRATCH_REG
pha
clc
lda P8ZP_SCRATCH_B1
adc P8ZP_SCRATCH_REG
tay
pla
rts
.pend
mul_word_640 .proc
; AY = (A * 2 * 320) (msb in Y doesn't matter)
asl a
jmp mul_word_320
.pend
; ----------- end optimized multiplications -----------
; bit shifts.
; anything below 3 is done inline. anything above 7 is done via other optimizations.
shift_left_w_7 .proc
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_B1
lda P8ESTACK_LO+1,x
asl a
rol P8ZP_SCRATCH_B1
_shift6 asl a
rol P8ZP_SCRATCH_B1
_shift5 asl a
rol P8ZP_SCRATCH_B1
_shift4 asl a
rol P8ZP_SCRATCH_B1
_shift3 asl a
rol P8ZP_SCRATCH_B1
asl a
rol P8ZP_SCRATCH_B1
asl a
rol P8ZP_SCRATCH_B1
sta P8ESTACK_LO+1,x
lda P8ZP_SCRATCH_B1
sta P8ESTACK_HI+1,x
rts
.pend
shift_left_w_6 .proc
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_B1
lda P8ESTACK_LO+1,x
jmp shift_left_w_7._shift6
.pend
shift_left_w_5 .proc
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_B1
lda P8ESTACK_LO+1,x
jmp shift_left_w_7._shift5
.pend
shift_left_w_4 .proc
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_B1
lda P8ESTACK_LO+1,x
jmp shift_left_w_7._shift4
.pend
shift_left_w_3 .proc
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_B1
lda P8ESTACK_LO+1,x
jmp shift_left_w_7._shift3
.pend
shift_left_w .proc
; -- variable number of shifts left
inx
ldy P8ESTACK_LO,x
bne _shift
rts
_shift asl P8ESTACK_LO+1,x
rol P8ESTACK_HI+1,x
dey
bne _shift
rts
.pend
shift_right_uw .proc
; -- uword variable number of shifts right
inx
ldy P8ESTACK_LO,x
bne _shift
rts
_shift lsr P8ESTACK_HI+1,x
ror P8ESTACK_LO+1,x
dey
bne _shift
rts
.pend
shift_right_uw_7 .proc
lda P8ESTACK_LO+1,x
sta P8ZP_SCRATCH_B1
lda P8ESTACK_HI+1,x
lsr a
ror P8ZP_SCRATCH_B1
_shift6 lsr a
ror P8ZP_SCRATCH_B1
_shift5 lsr a
ror P8ZP_SCRATCH_B1
_shift4 lsr a
ror P8ZP_SCRATCH_B1
_shift3 lsr a
ror P8ZP_SCRATCH_B1
lsr a
ror P8ZP_SCRATCH_B1
lsr a
ror P8ZP_SCRATCH_B1
sta P8ESTACK_HI+1,x
lda P8ZP_SCRATCH_B1
sta P8ESTACK_LO+1,x
rts
.pend
shift_right_uw_6 .proc
lda P8ESTACK_LO+1,x
sta P8ZP_SCRATCH_B1
lda P8ESTACK_HI+1,x
jmp shift_right_uw_7._shift6
.pend
shift_right_uw_5 .proc
lda P8ESTACK_LO+1,x
sta P8ZP_SCRATCH_B1
lda P8ESTACK_HI+1,x
jmp shift_right_uw_7._shift5
.pend
shift_right_uw_4 .proc
lda P8ESTACK_LO+1,x
sta P8ZP_SCRATCH_B1
lda P8ESTACK_HI+1,x
jmp shift_right_uw_7._shift4
.pend
shift_right_uw_3 .proc
lda P8ESTACK_LO+1,x
sta P8ZP_SCRATCH_B1
lda P8ESTACK_HI+1,x
jmp shift_right_uw_7._shift3
.pend
shift_right_w_7 .proc
lda P8ESTACK_LO+1,x
sta P8ZP_SCRATCH_W1
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_W1+1
asl a
ror P8ZP_SCRATCH_W1+1
ror P8ZP_SCRATCH_W1
lda P8ZP_SCRATCH_W1+1
_shift6 asl a
ror P8ZP_SCRATCH_W1+1
ror P8ZP_SCRATCH_W1
lda P8ZP_SCRATCH_W1+1
_shift5 asl a
ror P8ZP_SCRATCH_W1+1
ror P8ZP_SCRATCH_W1
lda P8ZP_SCRATCH_W1+1
_shift4 asl a
ror P8ZP_SCRATCH_W1+1
ror P8ZP_SCRATCH_W1
lda P8ZP_SCRATCH_W1+1
_shift3 asl a
ror P8ZP_SCRATCH_W1+1
ror P8ZP_SCRATCH_W1
lda P8ZP_SCRATCH_W1+1
asl a
ror P8ZP_SCRATCH_W1+1
ror P8ZP_SCRATCH_W1
lda P8ZP_SCRATCH_W1+1
asl a
ror P8ZP_SCRATCH_W1+1
ror P8ZP_SCRATCH_W1
lda P8ZP_SCRATCH_W1
sta P8ESTACK_LO+1,x
lda P8ZP_SCRATCH_W1+1
sta P8ESTACK_HI+1,x
rts
.pend
shift_right_w_6 .proc
lda P8ESTACK_LO+1,x
sta P8ZP_SCRATCH_W1
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_W1+1
jmp shift_right_w_7._shift6
.pend
shift_right_w_5 .proc
lda P8ESTACK_LO+1,x
sta P8ZP_SCRATCH_W1
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_W1+1
jmp shift_right_w_7._shift5
.pend
shift_right_w_4 .proc
lda P8ESTACK_LO+1,x
sta P8ZP_SCRATCH_W1
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_W1+1
jmp shift_right_w_7._shift4
.pend
shift_right_w_3 .proc
lda P8ESTACK_LO+1,x
sta P8ZP_SCRATCH_W1
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_W1+1
jmp shift_right_w_7._shift3
.pend
shift_right_w .proc
; -- signed word variable number of shifts right
inx
ldy P8ESTACK_LO,x
bne _shift
rts
_shift lda P8ESTACK_HI+1,x
asl a
ror P8ESTACK_HI+1,x
ror P8ESTACK_LO+1,x
dey
bne _shift
rts
.pend
; support for bit shifting that is too large to be unrolled:
lsr_byte_A .proc
; -- lsr signed byte in A times the value in Y (assume >0)
cmp #0
bmi _negative
- lsr a
dey
bne -
rts
_negative lsr a
ora #$80
dey
bne _negative
rts
.pend
square .proc
; -- calculate square root of signed word in AY, result in AY
; routine by Lee Davsion, source: http://6502.org/source/integers/square.htm
; using this routine is about twice as fast as doing a regular multiplication.
;
; Calculates the 16 bit unsigned integer square of the signed 16 bit integer in
; Numberl/Numberh. The result is always in the range 0 to 65025 and is held in
; Squarel/Squareh
;
; The maximum input range is only +/-255 and no checking is done to ensure that
; this is so.
;
; This routine is useful if you are trying to draw circles as for any circle
;
; x^2+y^2=r^2 where x and y are the co-ordinates of any point on the circle and
; r is the circle radius
numberl = P8ZP_SCRATCH_W1 ; number to square low byte
numberh = P8ZP_SCRATCH_W1+1 ; number to square high byte
squarel = P8ZP_SCRATCH_W2 ; square low byte
squareh = P8ZP_SCRATCH_W2+1 ; square high byte
tempsq = P8ZP_SCRATCH_B1 ; temp byte for intermediate result
sta numberl
sty numberh
stx P8ZP_SCRATCH_REG
lda #$00 ; clear a
sta squarel ; clear square low byte
; (no need to clear the high byte, it gets shifted out)
lda numberl ; get number low byte
ldx numberh ; get number high byte
bpl _nonneg ; if +ve don't negate it
; else do a two's complement
eor #$ff ; invert
sec ; +1
adc #$00 ; and add it
_nonneg:
sta tempsq ; save abs(number)
ldx #$08 ; set bit count
_nextr2bit:
asl squarel ; low byte *2
rol squareh ; high byte *2+carry from low
asl a ; shift number byte
bcc _nosqadd ; don't do add if c = 0
tay ; save a
clc ; clear carry for add
lda tempsq ; get number
adc squarel ; add number^2 low byte
sta squarel ; save number^2 low byte
lda #$00 ; clear a
adc squareh ; add number^2 high byte
sta squareh ; save number^2 high byte
tya ; get a back
_nosqadd:
dex ; decrement bit count
bne _nextr2bit ; go do next bit
lda squarel
ldy squareh
ldx P8ZP_SCRATCH_REG
rts
.pend