tfv: use the re-arranged multiply register results to optimize

cycle count actually gets worse, but that was due to a bug in
the cycle counting missing two of the add routines
This commit is contained in:
Vince Weaver 2017-11-30 00:37:57 -05:00
parent 8155ff8408
commit 91706259a7
3 changed files with 44 additions and 27 deletions

View File

@ -756,11 +756,12 @@ static void draw_background_mode7(void) {
fixed_mul(ram[SPACEX_I],ram[SPACEX_F], fixed_mul(ram[SPACEX_I],ram[SPACEX_F],
ram[TEMP_I],ram[TEMP_F], ram[TEMP_I],ram[TEMP_F],
&ram[SPACEX_I],&ram[SPACEX_F],0); &ram[SPACEX_I],&ram[SPACEX_F],0);
cycles.mode7+=26; cycles.mode7+=20;
fixed_add(ram[SPACEX_I],ram[SPACEX_F], fixed_add(ram[SPACEX_I],ram[SPACEX_F],
ram[CX_I],ram[CX_F], ram[CX_I],ram[CX_F],
&ram[SPACEX_I],&ram[SPACEX_F]); &ram[SPACEX_I],&ram[SPACEX_F]);
cycles.mode7+=16;
ram[TEMP_I]=fixed_sin[ram[ANGLE]&0xf].i; ram[TEMP_I]=fixed_sin[ram[ANGLE]&0xf].i;
@ -770,11 +771,12 @@ static void draw_background_mode7(void) {
fixed_mul(ram[SPACEY_I],ram[SPACEY_F], fixed_mul(ram[SPACEY_I],ram[SPACEY_F],
ram[TEMP_I],ram[TEMP_F], ram[TEMP_I],ram[TEMP_F],
&ram[SPACEY_I],&ram[SPACEY_F],0); &ram[SPACEY_I],&ram[SPACEY_F],0);
cycles.mode7+=26; cycles.mode7+=20;
fixed_add(ram[SPACEY_I],ram[SPACEY_F], fixed_add(ram[SPACEY_I],ram[SPACEY_F],
ram[CY_I],ram[CY_F], ram[CY_I],ram[CY_F],
&ram[SPACEY_I],&ram[SPACEY_F]); &ram[SPACEY_I],&ram[SPACEY_F]);
cycles.mode7+=16;
// mul7 // mul7

View File

@ -210,6 +210,21 @@ Move spacez updates out of line and also do some self modifying code
Total = 181,077 Total = 181,077
Frame Rate = 5.52 fps Frame Rate = 5.52 fps
Re-arranged multiply result register to allow more optimization.
This looks like a pessimization, but it's because the cycle counting code
had been undercounting and missed a few add routines :(
Cycles: flying= 187
Cycles: getkey= 46
Cycles: page_flip= 26
Cycles: multiply= 40,680
Cycles: mode7= 115,470
Cycles: lookup_map= 22,747
Cycles: put_sprite= 2,561
================================
Total = 181,717
Frame Rate = 5.50 fps
Each cycle removed from inner X loop saves Each cycle removed from inner X loop saves
32*40=1280 cycles 32*40=1280 cycles

View File

@ -828,34 +828,35 @@ spacez_shifted:
; NUM2H:NUM2L already set above ; NUM2H:NUM2L already set above
sec ; don't reuse previous NUM1 ; 2 sec ; don't reuse previous NUM1 ; 2
jsr multiply ; 6 jsr multiply ; 6
stx SPACEX_I ; 3 ; SPACEX_I in X ;
sta SPACEX_F ; 3 ; SPACEX_F in A ;
;========== ;==========
; 26 ; 20
; fixed_add(&space_x,&cx,&space_x); ; fixed_add(&space_x,&cx,&space_x);
clc ; 2 clc ; 2
lda SPACEX_F ; 3 ; SPACEX_F still in A ;
adc CX_F ; 3 adc CX_F ; 3
sta SPACEX_F ; 3 sta SPACEX_F ; 3
lda SPACEX_I ; 3 txa ; SPACEX_I was in X ; 2
adc CX_I ; 3 adc CX_I ; 3
sta SPACEX_I ; 3 sta SPACEX_I ; 3
;===========
; 16
; brk ; space_x = 06:bc ; temp.i=fixed_sin[angle&0xf].i; // sin()
lda ANGLE ; 3
lda ANGLE ; temp.i=fixed_sin[angle&0xf].i; ; 3
and #$f ; 2 and #$f ; 2
asl ; 2 asl ; 2
tay ; 2 tay ; 2
lda fixed_sin,Y ; 4 lda fixed_sin,Y ; 4
; sta TEMP_I ; sta NUM2H ; store for next mul ; 3
sta NUM2H ; 3
iny ; fixed_temp.f=fixed_sin[angle&0xf].f; ; 2 ; fixed_temp.f=fixed_sin[angle&0xf].f; // sin()
iny ; 2
lda fixed_sin,Y ; 4 lda fixed_sin,Y ; 4
sta TEMP_F ; sta NUM2L ; store for next mul ; 3
sta NUM2L ; 3
;========== ;==========
; 25 ; 25
@ -865,25 +866,24 @@ spacez_shifted:
sta NUM1H ; 3 sta NUM1H ; 3
lda SPACEY_F ; 3 lda SPACEY_F ; 3
sta NUM1L ; 3 sta NUM1L ; 3
; lda TEMP_I ; ; NUM2H:NUM2L already set
; sta NUM2H ; sec ; don't reuse previous num1 ; 2
; lda TEMP_F ;
; sta NUM2L ;
sec ; 2
jsr multiply ; 6 jsr multiply ; 6
stx SPACEY_I ; 3 ; SPACEY_I in X ;
sta SPACEY_F ; 3 ; SPACEY_F in A ;
;========== ;==========
; 26 ; 20
clc ; fixed_add(&space_y,&cy,&space_y); ; 2 ; fixed_add(&space_y,&cy,&space_y);
lda SPACEY_F ; 3 clc ; 2
; SPACEY_F in A
adc CY_F ; 3 adc CY_F ; 3
sta SPACEY_F ; 3 sta SPACEY_F ; 3
lda SPACEY_I ; 3 txa ; SPACEY_I in X ; 2
adc CY_I ; 3 adc CY_I ; 3
sta SPACEY_I ; 3 sta SPACEY_I ; 3
;==========
; 16
; mul7 ; mul7
; fixed_mul(&temp,&dx,&temp); ; fixed_mul(&temp,&dx,&temp);
lda #CONST_LOWRES_HALF_I ; 3 lda #CONST_LOWRES_HALF_I ; 3