From 783e26d3697e7d1e8604aa8a2e06c8bce0bb62d5 Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Fri, 24 Nov 2017 14:40:50 -0500 Subject: [PATCH] tfv: update cycle counts --- gr-sim/tfv_flying_6502.c | 51 ++++++++++--------- tfv/OPTIMIZATION | 32 ++++++++++++ tfv/tfv_multiply.s | 103 +++++++++++++++++++++++---------------- tfv/tfv_zp.inc | 74 ++++++++++++++++------------ 4 files changed, 164 insertions(+), 96 deletions(-) create mode 100644 tfv/OPTIMIZATION diff --git a/gr-sim/tfv_flying_6502.c b/gr-sim/tfv_flying_6502.c index edeea155..c27fe8a7 100644 --- a/gr-sim/tfv_flying_6502.c +++ b/gr-sim/tfv_flying_6502.c @@ -83,7 +83,7 @@ static unsigned char water_map[32]={ #define LOWRES_W 40 #define LOWRES_H 40 -static int displayed=0; +static int displayed=1; struct cycle_counts { int flying; @@ -229,18 +229,11 @@ static void fixed_mul(unsigned char x_i, unsigned char x_f, num1h=x_i; num1l=x_f; - negate=0; // lda #0 2 - // sta NEGATE 4 - - // lda NUM1H 4 - - cycles.multiply+=13; - - - if (!(num1h&0x80)) goto check_num2; // bpl check_num2 2nt/3 + negate=0; + cycles.multiply+=10; + if (!(num1h&0x80)) goto check_num2; cycles.multiply--; - - negate++; // inc NEGATE 6 + negate++; num1l=~num1l; num1h=~num1h; @@ -254,13 +247,12 @@ static void fixed_mul(unsigned char x_i, unsigned char x_f, num1l&=0xff; num1h&=0xff; - // total=26 - cycles.multiply+=26; + cycles.multiply+=25; check_num2: num2h=y_i; num2l=y_f; - cycles.multiply+=7; + cycles.multiply+=6; if (!(num2h&0x80)) goto unsigned_multiply; cycles.multiply--; negate++; @@ -277,7 +269,7 @@ check_num2: num2l&=0xff; num2h&=0xff; - cycles.multiply+=30; + cycles.multiply+=25; unsigned_multiply: // if (debug) { @@ -290,7 +282,7 @@ unsigned_multiply: aa=0; // lda #0 (sz) result2=aa; // sta result+2 xx=16; // ldx #16 (sz) - cycles.multiply+=8; + cycles.multiply+=7; multiply_mainloop: cc=(num2h&1); //lsr NUM2+1 (szc) num2h>>=1; @@ -306,7 +298,7 @@ multiply_mainloop: num2l|=(cc<<7); cc=cc2; - cycles.multiply+=15; + cycles.multiply+=13; if (cc==0) goto shift_output; // bcc L2 cycles.multiply--; @@ -321,7 +313,7 @@ multiply_mainloop: aa=aa+cc+num1h; // adc NUM1+1 cc=!!(aa&0x100); aa=aa&0xff; - cycles.multiply+=22; + cycles.multiply+=18; shift_output: cc2=aa&1; aa=aa>>1; @@ -348,7 +340,7 @@ shift_output: cc=cc2; // ror result+0 xx--; // dex - cycles.multiply+=25; + cycles.multiply+=22; if (xx!=0) goto multiply_mainloop; // bne L1 cycles.multiply--; result3=aa&0xff; // sta result+3 @@ -358,7 +350,7 @@ shift_output: // printf("RAW RESULT = %02x:%02x:%02x:%02x\n", // result3&0xff,result2&0xff,result1&0xff,result0&0xff); // } - cycles.multiply+=13; + cycles.multiply+=11; if (negate&1) { // printf("NEGATING!\n"); cycles.multiply--; @@ -383,7 +375,7 @@ shift_output: aa-=result3+cc; cc=!!(aa&0x100); result3=aa; - cycles.multiply+=50; + cycles.multiply+=42; } *z_i=result2&0xff; @@ -619,6 +611,21 @@ int flying(void) { /* Flying */ /************************************************/ + /* Benchmark the multiply */ + memset(&cycles,0,sizeof(cycles)); + fixed_mul(0x1,0x0, + 0x2,0x0, + &ram[FACTOR_I],&ram[FACTOR_F]); + printf("Multiplying 1.0 * 2.0 = %d.%d, took %d cycles\n", + ram[FACTOR_I],ram[FACTOR_F],cycles.multiply); + + memset(&cycles,0,sizeof(cycles)); + fixed_mul(0xff,0xff, + 0xff,0xff, + &ram[FACTOR_I],&ram[FACTOR_F]); + printf("Multiplying ff.ff * ff.ff = %d.%d, took %d cycles\n", + ram[FACTOR_I],ram[FACTOR_F],cycles.multiply); + gr(); clear_bottom(PAGE0); clear_bottom(PAGE1); diff --git a/tfv/OPTIMIZATION b/tfv/OPTIMIZATION new file mode 100644 index 00000000..f0c31bdc --- /dev/null +++ b/tfv/OPTIMIZATION @@ -0,0 +1,32 @@ +Original implementation: + Multiplying 1.0 * 2.0 = 2.0, took 707 cycles + Multiplying ff.ff * ff.ff = 0.0, took 761 cycles + + Cycles: flying= 162 + Cycles: getkey= 46 + Cycles: page_flip= 26 + Cycles: multiply= 88,179 + Cycles: mode7= 76,077 + Cycles: lookup_map= 33,920 + Cycles: put_sprite= 2,561 + ================================== + Total = 200,971 + Frame Rate = 4.98 fps + +Update Multiply to use zero page addresses: + Multiplying 1.0 * 2.0 = 2.0, took 616 cycles + Multiplying ff.ff * ff.ff = 0.0, took 664 cycles + + Cycles: flying= 162 + Cycles: getkey= 46 + Cycles: page_flip= 26 + Cycles: multiply= 76,561 + Cycles: mode7= 76,077 + Cycles: lookup_map= 33,920 + Cycles: put_sprite= 2,561 + =================================== + Total = 189,353 + Frame Rate = 5.28 fps + + + diff --git a/tfv/tfv_multiply.s b/tfv/tfv_multiply.s index 10e80a36..c1801ab3 100644 --- a/tfv/tfv_multiply.s +++ b/tfv/tfv_multiply.s @@ -3,12 +3,12 @@ ; NUM2 is zeroed out ; result is in RESULT3:RESULT2:RESULT1:RESULT0 -NUM1L: .byte 0 -NUM1H: .byte 0 -NUM2L: .byte 0 -NUM2H: .byte 0 -RESULT: .byte 0,0,0,0 -NEGATE: .byte 0 +;NUM1L: .byte 0 +;NUM1H: .byte 0 +;NUM2L: .byte 0 +;NUM2H: .byte 0 +;RESULT: .byte 0,0,0,0 +;NEGATE: .byte 0 ; If we have 2k to spare we should check out ; http://codebase64.org/doku.php?id=base:seriously_fast_multiplication @@ -16,94 +16,113 @@ NEGATE: .byte 0 multiply: lda #$0 ; 2 - sta NEGATE ; 4 + sta NEGATE ; 3 ; Handle Signed - lda NUM1H ; 4 + lda NUM1H ; 3 bpl check_num2 ; 2nt/3 + ;============== + ; 10 - inc NEGATE ; 4 + inc NEGATE ; 3 clc ; 2s-complement NUM1H/NUM1L ; 2 - lda NUM1L ; 4 + lda NUM1L ; 3 eor #$ff ; 2 adc #$1 ; 2 - sta NUM1L ; 4 + sta NUM1L ; 3 - lda NUM1H ; 4 + lda NUM1H ; 3 eor #$ff ; 2 adc #$0 ; 2 - sta NUM1H ; 4 - + sta NUM1H ; 3 + ;=========== + ; 25 check_num2: - lda NUM2H ; 4 + lda NUM2H ; 3 bpl unsigned_multiply ; 2nt/3 + ;============== + ; 6 - inc NEGATE ; 4 + inc NEGATE ; 3 clc ; 2 - lda NUM2L ; 4 + lda NUM2L ; 3 eor #$ff ; 2 adc #$1 ; 2 - sta NUM2L ; 4 + sta NUM2L ; 3 - lda NUM2H ; 4 + lda NUM2H ; 3 eor #$ff ; 2 adc #$0 ; 2 - sta NUM2H ; 4 - + sta NUM2H ; 3 + ;============= + ; 25 unsigned_multiply: lda #0 ; Initialize RESULT to 0 ; 2 - sta RESULT+2 ; 4 + sta RESULT+2 ; 3 ldx #16 ; 16x16 multiply ; 2 + ;============ + ; 7 multiply_mainloop: - lsr NUM2H ; Shift right 16-bit NUM2 ; 6 - ror NUM2L ; low bit goes into carry ; 6 + lsr NUM2H ; Shift right 16-bit NUM2 ; 5 + ror NUM2L ; low bit goes into carry ; 5 bcc shift_output ; 0 or 1? ; 2nt/3 + ;============ + ; 13 + tay ; If 1, add NUM1 (hi byte RESULT in A) ; 2 clc ; 2 - lda NUM1L ; 4 - adc RESULT+2 ; 4 - sta RESULT+2 ; 4 + lda NUM1L ; 3 + adc RESULT+2 ; 3 + sta RESULT+2 ; 3 tya ; 2 - adc NUM1H ; 4 + adc NUM1H ; 3 + ;============ + ; 18 shift_output: ror A ; "Stairstep" shift ; 2 - ror RESULT+2 ; 6 - ror RESULT+1 ; 6 - ror RESULT ; 6 + ror RESULT+2 ; 5 + ror RESULT+1 ; 5 + ror RESULT ; 5 dex ; 2 bne multiply_mainloop ; 2nt/3 - sta RESULT+3 ; 4 + ;============= + ; 22 + + sta RESULT+3 ; 3 ;; Negate if necessary - lda NEGATE ; 4 + lda NEGATE ; 3 and #$1 ; 2 beq positive ; 2nt/3 + ;============== + ; 11 clc ; 2 - lda RESULT+0 ; 4 + lda RESULT+0 ; 3 eor #$ff ; 2 adc #$1 ; 2 - sta RESULT+0 ; 4 + sta RESULT+0 ; 3 - lda RESULT+1 ; 4 + lda RESULT+1 ; 3 eor #$ff ; 2 adc #$0 ; 2 - sta RESULT+1 ; 4 + sta RESULT+1 ; 3 - lda RESULT+2 ; 4 + lda RESULT+2 ; 3 eor #$ff ; 2 adc #$0 ; 2 - sta RESULT+2 ; 4 + sta RESULT+2 ; 3 - lda RESULT+3 ; 4 + lda RESULT+3 ; 3 eor #$ff ; 2 adc #$0 ; 2 - sta RESULT+3 ; 4 - + sta RESULT+3 ; 3 + ;=========== + ; 42 positive: rts ; 6 diff --git a/tfv/tfv_zp.inc b/tfv/tfv_zp.inc index 592b1f78..df8f6d0e 100644 --- a/tfv/tfv_zp.inc +++ b/tfv/tfv_zp.inc @@ -1,6 +1,7 @@ .define EQU = ;; Zero page monitor routines addresses + WNDLFT EQU $20 WNDWDTH EQU $21 WNDTOP EQU $22 @@ -17,6 +18,40 @@ MASK EQU $2E COLOR EQU $30 INVFLG EQU $32 +; More zero-page addresses +; we try not to conflict with anything DOS, MONITOR or BASIC related + +COLOR1 EQU $E0 +COLOR2 EQU $E1 +MATCH EQU $E2 +XX EQU $E3 +YY EQU $E4 +YADD EQU $E5 +LOOP EQU $E6 +MEMPTRL EQU $E7 +MEMPTRH EQU $E8 +NAMEL EQU $E9 +NAMEH EQU $EA +NAMEX EQU $EB +CHAR EQU $EC +DISP_PAGE EQU $ED +DRAW_PAGE EQU $EE + +FIRST EQU $F0 +LASTKEY EQU $F1 +PADDLE_STATUS EQU $F2 +XPOS EQU $F3 +YPOS EQU $F4 +TEMP EQU $FA +RUN EQU $FA +TEMP2 EQU $FB +TEMPY EQU $FB +INL EQU $FC +INH EQU $FD +OUTL EQU $FE +OUTH EQU $FF + + ;; Flying Routine Only TURNING EQU $60 @@ -49,6 +84,13 @@ DRAW_SPLASH EQU $7A SPEED EQU $7B SPLASH_COUNT EQU $7C OVER_WATER EQU $7D +NUM1L EQU $7E +NUM1H EQU $7F +NUM2L EQU $80 +NUM2H EQU $81 +RESULT EQU $82 ; 83,84,85 +NEGATE EQU $86 + SHIPY EQU $E4 ;; World Map Only @@ -110,38 +152,6 @@ COUT1 EQU $FDF0 ;; output A to screen -; Our zero-page addresses -; we try not to conflict with anything DOS, MONITOR or BASIC related - -COLOR1 EQU $E0 -COLOR2 EQU $E1 -MATCH EQU $E2 -XX EQU $E3 -YY EQU $E4 -YADD EQU $E5 -LOOP EQU $E6 -MEMPTRL EQU $E7 -MEMPTRH EQU $E8 -NAMEL EQU $E9 -NAMEH EQU $EA -NAMEX EQU $EB -CHAR EQU $EC -DISP_PAGE EQU $ED -DRAW_PAGE EQU $EE - -FIRST EQU $F0 -LASTKEY EQU $F1 -PADDLE_STATUS EQU $F2 -XPOS EQU $F3 -YPOS EQU $F4 -TEMP EQU $FA -RUN EQU $FA -TEMP2 EQU $FB -TEMPY EQU $FB -INL EQU $FC -INH EQU $FD -OUTL EQU $FE -OUTH EQU $FF