tfv: use the re-arranged multiply register results to optimize

cycle count actually gets worse, but that was due to a bug in
the cycle counting missing two of the add routines
This commit is contained in:
Vince Weaver 2017-11-30 00:37:57 -05:00
parent 8155ff8408
commit 91706259a7
3 changed files with 44 additions and 27 deletions

View File

@ -756,11 +756,12 @@ static void draw_background_mode7(void) {
fixed_mul(ram[SPACEX_I],ram[SPACEX_F],
ram[TEMP_I],ram[TEMP_F],
&ram[SPACEX_I],&ram[SPACEX_F],0);
cycles.mode7+=26;
cycles.mode7+=20;
fixed_add(ram[SPACEX_I],ram[SPACEX_F],
ram[CX_I],ram[CX_F],
&ram[SPACEX_I],&ram[SPACEX_F]);
cycles.mode7+=16;
ram[TEMP_I]=fixed_sin[ram[ANGLE]&0xf].i;
@ -770,11 +771,12 @@ static void draw_background_mode7(void) {
fixed_mul(ram[SPACEY_I],ram[SPACEY_F],
ram[TEMP_I],ram[TEMP_F],
&ram[SPACEY_I],&ram[SPACEY_F],0);
cycles.mode7+=26;
cycles.mode7+=20;
fixed_add(ram[SPACEY_I],ram[SPACEY_F],
ram[CY_I],ram[CY_F],
&ram[SPACEY_I],&ram[SPACEY_F]);
cycles.mode7+=16;
// mul7

View File

@ -210,6 +210,21 @@ Move spacez updates out of line and also do some self modifying code
Total = 181,077
Frame Rate = 5.52 fps
Re-arranged multiply result register to allow more optimization.
This looks like a pessimization, but it's because the cycle counting code
had been undercounting and missed a few add routines :(
Cycles: flying= 187
Cycles: getkey= 46
Cycles: page_flip= 26
Cycles: multiply= 40,680
Cycles: mode7= 115,470
Cycles: lookup_map= 22,747
Cycles: put_sprite= 2,561
================================
Total = 181,717
Frame Rate = 5.50 fps
Each cycle removed from inner X loop saves
32*40=1280 cycles

View File

@ -828,34 +828,35 @@ spacez_shifted:
; NUM2H:NUM2L already set above
sec ; don't reuse previous NUM1 ; 2
jsr multiply ; 6
stx SPACEX_I ; 3
sta SPACEX_F ; 3
; SPACEX_I in X ;
; SPACEX_F in A ;
;==========
; 26
; 20
; fixed_add(&space_x,&cx,&space_x);
clc ; 2
lda SPACEX_F ; 3
; SPACEX_F still in A ;
adc CX_F ; 3
sta SPACEX_F ; 3
lda SPACEX_I ; 3
txa ; SPACEX_I was in X ; 2
adc CX_I ; 3
sta SPACEX_I ; 3
;===========
; 16
; brk ; space_x = 06:bc
lda ANGLE ; temp.i=fixed_sin[angle&0xf].i; ; 3
; temp.i=fixed_sin[angle&0xf].i; // sin()
lda ANGLE ; 3
and #$f ; 2
asl ; 2
tay ; 2
lda fixed_sin,Y ; 4
; sta TEMP_I ;
sta NUM2H ; 3
iny ; fixed_temp.f=fixed_sin[angle&0xf].f; ; 2
sta NUM2H ; store for next mul ; 3
; fixed_temp.f=fixed_sin[angle&0xf].f; // sin()
iny ; 2
lda fixed_sin,Y ; 4
sta TEMP_F ;
sta NUM2L ; 3
sta NUM2L ; store for next mul ; 3
;==========
; 25
@ -865,25 +866,24 @@ spacez_shifted:
sta NUM1H ; 3
lda SPACEY_F ; 3
sta NUM1L ; 3
; lda TEMP_I ;
; sta NUM2H ;
; lda TEMP_F ;
; sta NUM2L ;
sec ; 2
; NUM2H:NUM2L already set
sec ; don't reuse previous num1 ; 2
jsr multiply ; 6
stx SPACEY_I ; 3
sta SPACEY_F ; 3
; SPACEY_I in X ;
; SPACEY_F in A ;
;==========
; 26
; 20
clc ; fixed_add(&space_y,&cy,&space_y); ; 2
lda SPACEY_F ; 3
; fixed_add(&space_y,&cy,&space_y);
clc ; 2
; SPACEY_F in A
adc CY_F ; 3
sta SPACEY_F ; 3
lda SPACEY_I ; 3
txa ; SPACEY_I in X ; 2
adc CY_I ; 3
sta SPACEY_I ; 3
;==========
; 16
; mul7
; fixed_mul(&temp,&dx,&temp);
lda #CONST_LOWRES_HALF_I ; 3