tfv: more optimization, move SCREEN_X to register X

This commit is contained in:
Vince Weaver 2017-11-26 21:58:58 -05:00
parent 5cee1db0b1
commit 4d86457d96
3 changed files with 69 additions and 48 deletions

View File

@ -109,16 +109,14 @@ struct cycle_counts {
int put_sprite; int put_sprite;
} cycles; } cycles;
static int last_color=0,last_xx=0,last_yy=0;
static int lookup_map(int xx, int yy) { static int lookup_map(int xx, int yy) {
int color,offset; int color,offset;
last_xx=xx; ram[LAST_SPACEX_I]=xx;
xx=xx&MASK_X; xx=xx&MASK_X;
last_yy=yy; ram[LAST_SPACEY_I]=yy;
yy=yy&MASK_Y; yy=yy&MASK_Y;
@ -132,7 +130,7 @@ static int lookup_map(int xx, int yy) {
offset=yy<<3; offset=yy<<3;
offset+=xx; offset+=xx;
cycles.lookup_map+=37; cycles.lookup_map+=39;
if ((yy>7) || (xx>7)) { if ((yy>7) || (xx>7)) {
cycles.lookup_map+=14; cycles.lookup_map+=14;
@ -156,8 +154,8 @@ static int lookup_map(int xx, int yy) {
update_cache: update_cache:
cycles.lookup_map+=9; cycles.lookup_map+=10;
last_color=color; ram[LAST_MAP_COLOR]=color;
return color; return color;
} }
@ -789,29 +787,18 @@ static void draw_background_mode7(void) {
fixed_add(ram[SPACEY_I],ram[SPACEY_F], fixed_add(ram[SPACEY_I],ram[SPACEY_F],
ram[TEMP_I],ram[TEMP_F], ram[TEMP_I],ram[TEMP_F],
&ram[SPACEY_I],&ram[SPACEY_F]); &ram[SPACEY_I],&ram[SPACEY_F]);
cycles.mode7+=25; cycles.mode7+=22;
if (!displayed) { if (!displayed) {
printf("SPACEY! %x:%x\n",ram[SPACEY_I],ram[SPACEY_F]); printf("SPACEY! %x:%x\n",ram[SPACEY_I],ram[SPACEY_F]);
} }
// go through all points in this screen line // go through all points in this screen line
for (ram[SCREEN_X] = 0; ram[SCREEN_X] = 0;
ram[SCREEN_X] < LOWRES_W;
ram[SCREEN_X]++) {
while(1) {
// get a pixel from the tile and put it on the screen // get a pixel from the tile and put it on the screen
/* cache last value */ nomatch:
cycles.mode7+=9;
if (ram[SPACEY_I]==last_yy) {
cycles.mode7+=8;
if (ram[SPACEX_I]==last_xx) {
cycles.mode7+=6;
map_color=last_color;
goto match;
}
}
map_color=lookup_map(ram[SPACEX_I],ram[SPACEY_I]); map_color=lookup_map(ram[SPACEX_I],ram[SPACEY_I]);
cycles.mode7+=6; cycles.mode7+=6;
match: match:
@ -839,7 +826,24 @@ match:
fixed_add(ram[SPACEY_I],ram[SPACEY_F], fixed_add(ram[SPACEY_I],ram[SPACEY_F],
ram[DY_I],ram[DY_F], ram[DY_I],ram[DY_F],
&ram[SPACEY_I],&ram[SPACEY_F]); &ram[SPACEY_I],&ram[SPACEY_F]);
cycles.mode7+=49;
ram[SCREEN_X]++;
if (ram[SCREEN_X] >= LOWRES_W) break;
cycles.mode7+=43;
/* cache last value */
cycles.mode7+=8;
if (ram[SPACEY_I]==ram[LAST_SPACEY_I]) {
cycles.mode7+=7;
if (ram[SPACEX_I]==ram[LAST_SPACEX_I]) {
cycles.mode7+=4;
map_color=ram[LAST_MAP_COLOR];
goto match;
}
}
goto nomatch;
} }
ram[SCREEN_Y]+=1; ram[SCREEN_Y]+=1;

View File

@ -146,5 +146,17 @@ Add some self-modifying code to inner loop:
Total = 205,761 Total = 205,761
Frame Rate = 4.86 fps Frame Rate = 4.86 fps
More self-modifying code, also move SCREEN_X to X register
Cycles: flying= 187
Cycles: getkey= 46
Cycles: page_flip= 26
Cycles: multiply= 49,613
Cycles: mode7= 118,034
Cycles: lookup_map= 22,747
Cycles: put_sprite= 2,561
================================
Total = 193,214
Frame Rate = 5.18 fps
32*40=12800 Each cycle removed from inner X loop saves
32*40=1280 cycles

View File

@ -919,7 +919,7 @@ odd_branch:
;========== ;==========
; 28 ; 28
clc ; fixed_add(&space_y,&temp,&space_y); ; 2 clc ; fixed_add(&space_y,&temp,&space_y); ; 2
lda SPACEY_F ; 3 lda SPACEY_F ; 3
; adc TEMP_F ; ; adc TEMP_F ;
adc RESULT+1 ; 3 adc RESULT+1 ; 3
@ -931,23 +931,12 @@ odd_branch:
; brk ; space_y = f7:04 ; brk ; space_y = f7:04
lda #0 ; 2 ldx #0 ; was SCREEN_X ; 2
sta SCREEN_X ; 3
;========== ;==========
; 25 ; 22
screenx_loop: screenx_loop:
; cache color and return if same as last time
lda SPACEY_I ; 3
cmp LAST_SPACEY_I ; 3
bne nomatch ; 2nt/3
lda SPACEX_I ; 3
cmp LAST_SPACEX_I ; 3
bne nomatch ; 2nt/3
lda LAST_MAP_COLOR ; 3
jmp match ; 3
;===========
; 22
nomatch: nomatch:
; do a full lookup, takes much longer ; do a full lookup, takes much longer
jsr lookup_map ; get color in A ; 6 jsr lookup_map ; get color in A ; 6
@ -1000,13 +989,29 @@ dyi_label:
adc #0 ; 2 adc #0 ; 2
sta SPACEY_I ; 3 sta SPACEY_I ; 3
inc SCREEN_X ; 5 inx ;inc SCREEN_X ; 2
lda SCREEN_X ; 3 cpx #40 ; LOWRES width ; 2
cmp #40 ; LOWRES width ; 2 beq done_screenx_loop ; 2nt/3
bne screenx_loop ; 2nt/3
;============= ;=============
; 49 ; 43
; cache color and return if same as last time
lda SPACEY_I ; 3
spacey_label:
cmp #0 ; LAST_SPACEY_I ; 2
bne nomatch ; 2nt/3
lda SPACEX_I ; 3
spacex_label:
cmp #0 ; LAST_SPACEX_I ; 2
bne nomatch ; 2nt/3
map_color_label:
lda #0 ; LAST_MAP_COLOR ; 2
jmp match ; 3
;===========
; max 19
done_screenx_loop:
inc SCREEN_Y ; 5 inc SCREEN_Y ; 5
lda SCREEN_Y ; 3 lda SCREEN_Y ; 3
cmp #40 ; LOWRES height ; 2 cmp #40 ; LOWRES height ; 2
@ -1041,13 +1046,13 @@ lookup_map:
;nomatch: ;nomatch:
lda SPACEX_I ; 3 lda SPACEX_I ; 3
;nomatch2: ;nomatch2:
sta LAST_SPACEX_I ; 3 sta spacex_label+1 ; LAST_SPACEX_I ; 4
and #CONST_MAP_MASK_X ; 2 and #CONST_MAP_MASK_X ; 2
sta SPACEX_I ; 3 sta SPACEX_I ; 3
tay ; 2 tay ; 2
lda SPACEY_I ; 3 lda SPACEY_I ; 3
sta LAST_SPACEY_I ; 3 sta spacey_label+1 ; LAST_SPACEY_I ; 4
and #CONST_MAP_MASK_Y ; wrap to 64x64 grid ; 2 and #CONST_MAP_MASK_Y ; wrap to 64x64 grid ; 2
sta SPACEY_I ; 3 sta SPACEY_I ; 3
@ -1062,7 +1067,7 @@ lookup_map:
; SPACEX_I is in y ; SPACEX_I is in y
cpy #$8 ; 2 cpy #$8 ; 2
;============ ;============
; 37 ; 39
bcs ocean_color ; bgt 8 ;^2nt/3 bcs ocean_color ; bgt 8 ;^2nt/3
ldy SPACEY_I ; 3 ldy SPACEY_I ; 3
@ -1080,7 +1085,7 @@ ocean_color:
lda water_map,Y ; the color of the sea ; 4 lda water_map,Y ; the color of the sea ; 4
update_cache: update_cache:
sta LAST_MAP_COLOR ; 3 sta map_color_label+1 ; self-modifying ; 4
rts ; 6 rts ; 6
flying_map: flying_map: