From 91706259a7249b713aea0aa74e8468d85ba5f26b Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Thu, 30 Nov 2017 00:37:57 -0500 Subject: [PATCH] tfv: use the re-arranged multiply register results to optimize cycle count actually gets worse, but that was due to a bug in the cycle counting missing two of the add routines --- gr-sim/tfv_flying_6502.c | 6 +++-- tfv/OPTIMIZATION | 15 ++++++++++++ tfv/tfv_flying.s | 50 ++++++++++++++++++++-------------------- 3 files changed, 44 insertions(+), 27 deletions(-) diff --git a/gr-sim/tfv_flying_6502.c b/gr-sim/tfv_flying_6502.c index eb02e20c..dbe54eb5 100644 --- a/gr-sim/tfv_flying_6502.c +++ b/gr-sim/tfv_flying_6502.c @@ -756,11 +756,12 @@ static void draw_background_mode7(void) { fixed_mul(ram[SPACEX_I],ram[SPACEX_F], ram[TEMP_I],ram[TEMP_F], &ram[SPACEX_I],&ram[SPACEX_F],0); - cycles.mode7+=26; + cycles.mode7+=20; fixed_add(ram[SPACEX_I],ram[SPACEX_F], ram[CX_I],ram[CX_F], &ram[SPACEX_I],&ram[SPACEX_F]); + cycles.mode7+=16; ram[TEMP_I]=fixed_sin[ram[ANGLE]&0xf].i; @@ -770,11 +771,12 @@ static void draw_background_mode7(void) { fixed_mul(ram[SPACEY_I],ram[SPACEY_F], ram[TEMP_I],ram[TEMP_F], &ram[SPACEY_I],&ram[SPACEY_F],0); - cycles.mode7+=26; + cycles.mode7+=20; fixed_add(ram[SPACEY_I],ram[SPACEY_F], ram[CY_I],ram[CY_F], &ram[SPACEY_I],&ram[SPACEY_F]); + cycles.mode7+=16; // mul7 diff --git a/tfv/OPTIMIZATION b/tfv/OPTIMIZATION index d0a86fc4..f4042697 100644 --- a/tfv/OPTIMIZATION +++ b/tfv/OPTIMIZATION @@ -210,6 +210,21 @@ Move spacez updates out of line and also do some self modifying code Total = 181,077 Frame Rate = 5.52 fps +Re-arranged multiply result register to allow more optimization. +This looks like a pessimization, but it's because the cycle counting code + had been undercounting and missed a few add routines :( + + Cycles: flying= 187 + Cycles: getkey= 46 + Cycles: page_flip= 26 + Cycles: multiply= 40,680 + Cycles: mode7= 115,470 + Cycles: lookup_map= 22,747 + Cycles: put_sprite= 2,561 + ================================ + Total = 181,717 + Frame Rate = 5.50 fps + Each cycle removed from inner X loop saves 32*40=1280 cycles diff --git a/tfv/tfv_flying.s b/tfv/tfv_flying.s index 1badc98f..0f92ef2c 100644 --- a/tfv/tfv_flying.s +++ b/tfv/tfv_flying.s @@ -828,34 +828,35 @@ spacez_shifted: ; NUM2H:NUM2L already set above sec ; don't reuse previous NUM1 ; 2 jsr multiply ; 6 - stx SPACEX_I ; 3 - sta SPACEX_F ; 3 + ; SPACEX_I in X ; + ; SPACEX_F in A ; ;========== - ; 26 + ; 20 ; fixed_add(&space_x,&cx,&space_x); clc ; 2 - lda SPACEX_F ; 3 + ; SPACEX_F still in A ; adc CX_F ; 3 sta SPACEX_F ; 3 - lda SPACEX_I ; 3 + txa ; SPACEX_I was in X ; 2 adc CX_I ; 3 sta SPACEX_I ; 3 + ;=========== + ; 16 - ; brk ; space_x = 06:bc - - lda ANGLE ; temp.i=fixed_sin[angle&0xf].i; ; 3 + ; temp.i=fixed_sin[angle&0xf].i; // sin() + lda ANGLE ; 3 and #$f ; 2 asl ; 2 tay ; 2 lda fixed_sin,Y ; 4 -; sta TEMP_I ; - sta NUM2H ; 3 - iny ; fixed_temp.f=fixed_sin[angle&0xf].f; ; 2 + sta NUM2H ; store for next mul ; 3 + + ; fixed_temp.f=fixed_sin[angle&0xf].f; // sin() + iny ; 2 lda fixed_sin,Y ; 4 - sta TEMP_F ; - sta NUM2L ; 3 + sta NUM2L ; store for next mul ; 3 ;========== ; 25 @@ -865,25 +866,24 @@ spacez_shifted: sta NUM1H ; 3 lda SPACEY_F ; 3 sta NUM1L ; 3 -; lda TEMP_I ; -; sta NUM2H ; -; lda TEMP_F ; -; sta NUM2L ; - sec ; 2 + ; NUM2H:NUM2L already set + sec ; don't reuse previous num1 ; 2 jsr multiply ; 6 - stx SPACEY_I ; 3 - sta SPACEY_F ; 3 + ; SPACEY_I in X ; + ; SPACEY_F in A ; ;========== - ; 26 + ; 20 - clc ; fixed_add(&space_y,&cy,&space_y); ; 2 - lda SPACEY_F ; 3 + ; fixed_add(&space_y,&cy,&space_y); + clc ; 2 + ; SPACEY_F in A adc CY_F ; 3 sta SPACEY_F ; 3 - lda SPACEY_I ; 3 + txa ; SPACEY_I in X ; 2 adc CY_I ; 3 sta SPACEY_I ; 3 - + ;========== + ; 16 ; mul7 ; fixed_mul(&temp,&dx,&temp); lda #CONST_LOWRES_HALF_I ; 3