// Test the fast multiplication library .pc = $801 "Basic" :BasicUpstart(main) .pc = $80d "Program" .label BGCOL = $d021 .label print_char_cursor = $f .label print_line_cursor = 7 main: { lda #5 sta BGCOL jsr print_cls jsr mulf_init jsr mul16u_compare jsr mul16s_compare rts } // Perform many possible word multiplications (slow and fast) and compare the results mul16s_compare: { .label a = 3 .label b = 5 .label ms = $b .label mn = $19 .label mf = $11 .label i = 2 lda print_line_cursor sta print_char_cursor lda print_line_cursor+1 sta print_char_cursor+1 lda #0 sta i lda #<-$7fff sta b lda #>-$7fff sta b+1 lda #<-$7fff sta a lda #>-$7fff sta a+1 b1: lda #str sta print_str.str+1 jsr print_str ldy #0 b2: clc lda a adc #<$d2b sta a lda a+1 adc #>$d2b sta a+1 clc lda b adc #<$ffd sta b lda b+1 adc #>$ffd sta b+1 jsr muls16s jsr mul16s jsr mulf16s lda ms cmp mf bne !+ lda ms+1 cmp mf+1 bne !+ lda ms+2 cmp mf+2 bne !+ lda ms+3 cmp mf+3 beq b6 !: ldx #0 jmp b3 b6: ldx #1 b3: lda ms cmp mn bne !+ lda ms+1 cmp mn+1 bne !+ lda ms+2 cmp mn+2 bne !+ lda ms+3 cmp mn+3 beq b4 !: ldx #0 b4: cpx #0 bne b5 lda #2 sta BGCOL jsr mul16s_error rts b5: iny cpy #$10 bne b2 inc i lda #$10 cmp i beq !b1+ jmp b1 !b1: jsr print_ln lda print_line_cursor sta print_char_cursor lda print_line_cursor+1 sta print_char_cursor+1 lda #str1 sta print_str.str+1 jsr print_str jsr print_ln rts str1: .text "signed word multiply results match!@" } // Print a newline print_ln: { b1: lda #$28 clc adc print_line_cursor sta print_line_cursor bcc !+ inc print_line_cursor+1 !: lda print_line_cursor+1 cmp print_char_cursor+1 bcc b1 bne !+ lda print_line_cursor cmp print_char_cursor bcc b1 !: rts } // Print a zero-terminated string // print_str(byte* zeropage(9) str) print_str: { .label str = 9 b1: ldy #0 lda (str),y cmp #'@' bne b2 rts b2: ldy #0 lda (str),y sta (print_char_cursor),y inc print_char_cursor bne !+ inc print_char_cursor+1 !: inc str bne !+ inc str+1 !: jmp b1 } // mul16s_error(signed word zeropage(3) a, signed word zeropage(5) b, signed dword zeropage($b) ms, signed dword zeropage($19) mn, signed dword zeropage($11) mf) mul16s_error: { .label a = 3 .label b = 5 .label ms = $b .label mn = $19 .label mf = $11 lda #str sta print_str.str+1 jsr print_str jsr print_sword lda #str1 sta print_str.str+1 jsr print_str lda b sta print_sword.w lda b+1 sta print_sword.w+1 jsr print_sword lda #str2 sta print_str.str+1 jsr print_str jsr print_sdword lda #str3 sta print_str.str+1 jsr print_str lda mn sta print_sdword.dw lda mn+1 sta print_sdword.dw+1 lda mn+2 sta print_sdword.dw+2 lda mn+3 sta print_sdword.dw+3 jsr print_sdword lda #str4 sta print_str.str+1 jsr print_str lda mf sta print_sdword.dw lda mf+1 sta print_sdword.dw+1 lda mf+2 sta print_sdword.dw+2 lda mf+3 sta print_sdword.dw+3 jsr print_sdword jsr print_ln rts str: .text "signed word multiply mismatch @" } // Print a signed dword as HEX // print_sdword(signed dword zeropage($b) dw) print_sdword: { .label dw = $b lda dw+3 bpl b1 lda #'-' jsr print_char sec lda dw eor #$ff adc #0 sta dw lda dw+1 eor #$ff adc #0 sta dw+1 lda dw+2 eor #$ff adc #0 sta dw+2 lda dw+3 eor #$ff adc #0 sta dw+3 b1: jsr print_dword rts } // Print a dword as HEX // print_dword(dword zeropage($b) dw) print_dword: { .label dw = $b lda dw+2 sta print_word.w lda dw+3 sta print_word.w+1 jsr print_word lda dw sta print_word.w lda dw+1 sta print_word.w+1 jsr print_word rts } // Print a word as HEX // print_word(word zeropage(3) w) print_word: { .label w = 3 lda w+1 tax jsr print_byte lda w tax jsr print_byte rts } // Print a byte as HEX // print_byte(byte register(X) b) print_byte: { txa lsr lsr lsr lsr tay lda print_hextab,y jsr print_char lda #$f axs #0 lda print_hextab,x jsr print_char rts } // Print a single char // print_char(byte register(A) ch) print_char: { ldy #0 sta (print_char_cursor),y inc print_char_cursor bne !+ inc print_char_cursor+1 !: rts } // Print a signed word as HEX // print_sword(signed word zeropage(3) w) print_sword: { .label w = 3 lda w+1 bpl b1 lda #'-' jsr print_char sec lda w eor #$ff adc #0 sta w lda w+1 eor #$ff adc #0 sta w+1 b1: jsr print_word rts } // Fast multiply two signed words to a signed double word result // Fixes offsets introduced by using unsigned multiplication // mulf16s(signed word zeropage(3) a, signed word zeropage(5) b) mulf16s: { .label _9 = $38 .label _13 = $3a .label _16 = $38 .label _17 = $3a .label m = $11 .label return = $11 .label a = 3 .label b = 5 lda a sta mulf16u.a lda a+1 sta mulf16u.a+1 lda b sta mulf16u.b lda b+1 sta mulf16u.b+1 jsr mulf16u lda a+1 bpl b1 lda m+2 sta _9 lda m+3 sta _9+1 lda _16 sec sbc b sta _16 lda _16+1 sbc b+1 sta _16+1 lda _16 sta m+2 lda _16+1 sta m+3 b1: lda b+1 bpl b2 lda m+2 sta _13 lda m+3 sta _13+1 lda _17 sec sbc a sta _17 lda _17+1 sbc a+1 sta _17+1 lda _17 sta m+2 lda _17+1 sta m+3 b2: rts } // Fast multiply two unsigned words to a double word result // Done in assembler to utilize fast addition A+X // mulf16u(word zeropage($15) a, word zeropage($17) b) mulf16u: { .label memA = $f8 .label memB = $fa .label memR = $fc .label return = $11 .label a = $15 .label b = $17 lda a sta memA lda a+1 sta memA+1 lda b sta memB lda b+1 sta memB+1 lda memA sta sm1a+1 sta sm3a+1 sta sm5a+1 sta sm7a+1 eor #$ff sta sm2a+1 sta sm4a+1 sta sm6a+1 sta sm8a+1 lda memA+1 sta sm1b+1 sta sm3b+1 sta sm5b+1 sta sm7b+1 eor #$ff sta sm2b+1 sta sm4b+1 sta sm6b+1 sta sm8b+1 ldx memB sec sm1a: lda mulf_sqr1_lo,x sm2a: sbc mulf_sqr2_lo,x sta memR+0 sm3a: lda mulf_sqr1_hi,x sm4a: sbc mulf_sqr2_hi,x sta _AA+1 sec sm1b: lda mulf_sqr1_lo,x sm2b: sbc mulf_sqr2_lo,x sta _cc+1 sm3b: lda mulf_sqr1_hi,x sm4b: sbc mulf_sqr2_hi,x sta _CC+1 ldx memB+1 sec sm5a: lda mulf_sqr1_lo,x sm6a: sbc mulf_sqr2_lo,x sta _bb+1 sm7a: lda mulf_sqr1_hi,x sm8a: sbc mulf_sqr2_hi,x sta _BB+1 sec sm5b: lda mulf_sqr1_lo,x sm6b: sbc mulf_sqr2_lo,x sta _dd+1 sm7b: lda mulf_sqr1_hi,x sm8b: sbc mulf_sqr2_hi,x sta memR+3 clc _AA: lda #0 _bb: adc #0 sta memR+1 _BB: lda #0 _CC: adc #0 sta memR+2 bcc !+ inc memR+3 clc !: _cc: lda #0 adc memR+1 sta memR+1 _dd: lda #0 adc memR+2 sta memR+2 bcc !+ inc memR+3 !: lda memR sta return lda memR+1 sta return+1 lda memR+2 sta return+2 lda memR+3 sta return+3 rts } // Multiply of two signed words to a signed double word // Fixes offsets introduced by using unsigned multiplication // mul16s(signed word zeropage(3) a, signed word zeropage(5) b) mul16s: { .label _9 = $3c .label _13 = $3e .label _16 = $3c .label _17 = $3e .label m = $19 .label return = $19 .label a = 3 .label b = 5 lda b sta mul16u.mb lda b+1 sta mul16u.mb+1 lda #0 sta mul16u.mb+2 sta mul16u.mb+3 lda a sta mul16u.a lda a+1 sta mul16u.a+1 jsr mul16u lda a+1 bpl b1 lda m+2 sta _9 lda m+3 sta _9+1 lda _16 sec sbc b sta _16 lda _16+1 sbc b+1 sta _16+1 lda _16 sta m+2 lda _16+1 sta m+3 b1: lda b+1 bpl b2 lda m+2 sta _13 lda m+3 sta _13+1 lda _17 sec sbc a sta _17 lda _17+1 sbc a+1 sta _17+1 lda _17 sta m+2 lda _17+1 sta m+3 b2: rts } // Perform binary multiplication of two unsigned 16-bit words into a 32-bit unsigned double word // mul16u(word zeropage($1d) a, word zeropage($17) b) mul16u: { .label mb = $1f .label a = $1d .label res = $19 .label return = $19 .label b = $17 lda #0 sta res sta res+1 sta res+2 sta res+3 b1: lda a bne b2 lda a+1 bne b2 rts b2: lda a and #1 cmp #0 beq b3 lda res clc adc mb sta res lda res+1 adc mb+1 sta res+1 lda res+2 adc mb+2 sta res+2 lda res+3 adc mb+3 sta res+3 b3: clc ror a+1 ror a asl mb rol mb+1 rol mb+2 rol mb+3 jmp b1 } // Slow multiplication of signed words // Perform a signed multiplication by repeated addition/subtraction // muls16s(signed word zeropage(3) a, signed word zeropage(5) b) muls16s: { .label m = $b .label j = $23 .label return = $b .label i = $25 .label a = 3 .label b = 5 lda a+1 bmi b6 bmi b5 bne !+ lda a beq b5 !: lda #0 sta j sta j+1 sta m sta m+1 sta m+2 sta m+3 b3: lda b+1 ora #$7f bmi !+ lda #0 !: sta $ff lda m clc adc b sta m lda m+1 adc b+1 sta m+1 lda m+2 adc $ff sta m+2 lda m+3 adc $ff sta m+3 inc j bne !+ inc j+1 !: lda j+1 cmp a+1 bne b3 lda j cmp a bne b3 rts b5: lda #0 sta return sta return+1 sta return+2 sta return+3 rts b6: lda #0 sta i sta i+1 sta m sta m+1 sta m+2 sta m+3 b4: lda b+1 ora #$7f bmi !+ lda #0 !: sta $ff sec lda m sbc b sta m lda m+1 sbc b+1 sta m+1 lda m+2 sbc $ff sta m+2 lda m+3 sbc $ff sta m+3 lda i bne !+ dec i+1 !: dec i lda i+1 cmp a+1 bne b4 lda i cmp a bne b4 rts } // Perform many possible word multiplications (slow and fast) and compare the results mul16u_compare: { .label a = $15 .label b = $17 .label ms = $b .label mn = $19 .label mf = $11 .label i = $27 lda #0 sta i sta b sta b+1 sta a sta a+1 lda #<$400 sta print_char_cursor lda #>$400 sta print_char_cursor+1 b1: lda #str sta print_str.str+1 jsr print_str ldy #0 b2: clc lda a adc #<$d2b sta a lda a+1 adc #>$d2b sta a+1 clc lda b adc #<$ffd sta b lda b+1 adc #>$ffd sta b+1 jsr muls16u lda a sta mul16u.a lda a+1 sta mul16u.a+1 lda mul16u.b sta mul16u.mb lda mul16u.b+1 sta mul16u.mb+1 lda #0 sta mul16u.mb+2 sta mul16u.mb+3 jsr mul16u jsr mulf16u lda ms cmp mf bne !+ lda ms+1 cmp mf+1 bne !+ lda ms+2 cmp mf+2 bne !+ lda ms+3 cmp mf+3 beq b6 !: ldx #0 jmp b3 b6: ldx #1 b3: lda ms cmp mn bne !+ lda ms+1 cmp mn+1 bne !+ lda ms+2 cmp mn+2 bne !+ lda ms+3 cmp mn+3 beq b4 !: ldx #0 b4: cpx #0 bne b5 lda #2 sta BGCOL lda a sta mul16u_error.a lda a+1 sta mul16u_error.a+1 jsr mul16u_error rts b5: iny cpy #$10 beq !b2+ jmp b2 !b2: inc i lda #$10 cmp i beq !b1+ jmp b1 !b1: lda #<$400 sta print_line_cursor lda #>$400 sta print_line_cursor+1 jsr print_ln lda print_line_cursor sta print_char_cursor lda print_line_cursor+1 sta print_char_cursor+1 lda #str1 sta print_str.str+1 jsr print_str jsr print_ln rts str1: .text "word multiply results match!@" } // mul16u_error(word zeropage(3) a, word zeropage($17) b, dword zeropage($b) ms, dword zeropage($19) mn, dword zeropage($11) mf) mul16u_error: { .label a = 3 .label b = $17 .label ms = $b .label mn = $19 .label mf = $11 lda #str sta print_str.str+1 jsr print_str jsr print_word lda #str1 sta print_str.str+1 jsr print_str lda b sta print_word.w lda b+1 sta print_word.w+1 jsr print_word lda #str2 sta print_str.str+1 jsr print_str jsr print_dword lda #str3 sta print_str.str+1 jsr print_str lda mn sta print_dword.dw lda mn+1 sta print_dword.dw+1 lda mn+2 sta print_dword.dw+2 lda mn+3 sta print_dword.dw+3 jsr print_dword lda #str4 sta print_str.str+1 jsr print_str lda mf sta print_dword.dw lda mf+1 sta print_dword.dw+1 lda mf+2 sta print_dword.dw+2 lda mf+3 sta print_dword.dw+3 jsr print_dword lda #<$400 sta print_line_cursor lda #>$400 sta print_line_cursor+1 jsr print_ln rts str: .text "multiply mismatch @" } // Slow multiplication of unsigned words // Calculate an unsigned multiplication by repeated addition // muls16u(word zeropage($15) a, word zeropage($17) b) muls16u: { .label return = $b .label m = $b .label i = $28 .label a = $15 .label b = $17 lda a bne !+ lda a+1 beq b3 !: lda #0 sta i sta i+1 sta m sta m+1 sta m+2 sta m+3 b2: lda m clc adc b sta m lda m+1 adc b+1 sta m+1 lda m+2 adc #0 sta m+2 lda m+3 adc #0 sta m+3 inc i bne !+ inc i+1 !: lda i+1 cmp a+1 bne b2 lda i cmp a bne b2 rts b3: lda #0 sta return sta return+1 sta return+2 sta return+3 rts } // Initialize the mulf_sqr multiplication tables with f(x)=int(x*x/4) mulf_init: { .label sqr1_hi = $2c .label sqr = $2f .label sqr1_lo = $2a .label x_2 = $2e .label sqr2_hi = $33 .label sqr2_lo = $31 .label dir = $35 lda #0 sta x_2 lda #mulf_sqr1_hi+1 sta sqr1_hi+1 lda #mulf_sqr1_lo+1 sta sqr1_lo+1 lda #0 sta sqr sta sqr+1 tax b1: inx txa and #1 cmp #0 bne b2 inc x_2 inc sqr bne !+ inc sqr+1 !: b2: lda sqr ldy #0 sta (sqr1_lo),y lda sqr+1 sta (sqr1_hi),y inc sqr1_hi bne !+ inc sqr1_hi+1 !: lda x_2 clc adc sqr sta sqr bcc !+ inc sqr+1 !: inc sqr1_lo bne !+ inc sqr1_lo+1 !: lda sqr1_lo+1 cmp #>mulf_sqr1_lo+$200 bne b1 lda sqr1_lo cmp #mulf_sqr2_hi sta sqr2_hi+1 lda #mulf_sqr2_lo sta sqr2_lo+1 ldx #-1 b4: lda mulf_sqr1_lo,x ldy #0 sta (sqr2_lo),y lda mulf_sqr1_hi,x sta (sqr2_hi),y inc sqr2_hi bne !+ inc sqr2_hi+1 !: txa clc adc dir tax cpx #0 bne b5 lda #1 sta dir b5: inc sqr2_lo bne !+ inc sqr2_lo+1 !: lda sqr2_lo+1 cmp #>mulf_sqr2_lo+$1ff bne b4 lda sqr2_lo cmp #$400 sta sc+1 b1: lda #' ' ldy #0 sta (sc),y inc sc bne !+ inc sc+1 !: lda sc+1 cmp #>$400+$3e8 bne b1 lda sc cmp #<$400+$3e8 bne b1 rts } print_hextab: .text "0123456789abcdef" // mulf_sqr tables will contain f(x)=int(x*x/4) and g(x) = f(x-255). // f(x) = >(( x * x )/4) .align $100 mulf_sqr1_hi: .fill $200, 0 // g(x) = >((( x - 255) * ( x - 255 ))/4) .align $100 mulf_sqr2_hi: .fill $200, 0 str: .text ".@" str1: .text "*@" str2: .text " slow:@" str3: .text " / normal:@" str4: .text " / fast:@"