tfv: update cycle counts

This commit is contained in:
Vince Weaver 2017-11-24 14:40:50 -05:00
parent 9f8fdadc4f
commit 783e26d369
4 changed files with 164 additions and 96 deletions

View File

@ -83,7 +83,7 @@ static unsigned char water_map[32]={
#define LOWRES_W 40 #define LOWRES_W 40
#define LOWRES_H 40 #define LOWRES_H 40
static int displayed=0; static int displayed=1;
struct cycle_counts { struct cycle_counts {
int flying; int flying;
@ -229,18 +229,11 @@ static void fixed_mul(unsigned char x_i, unsigned char x_f,
num1h=x_i; num1h=x_i;
num1l=x_f; num1l=x_f;
negate=0; // lda #0 2 negate=0;
// sta NEGATE 4 cycles.multiply+=10;
if (!(num1h&0x80)) goto check_num2;
// lda NUM1H 4
cycles.multiply+=13;
if (!(num1h&0x80)) goto check_num2; // bpl check_num2 2nt/3
cycles.multiply--; cycles.multiply--;
negate++;
negate++; // inc NEGATE 6
num1l=~num1l; num1l=~num1l;
num1h=~num1h; num1h=~num1h;
@ -254,13 +247,12 @@ static void fixed_mul(unsigned char x_i, unsigned char x_f,
num1l&=0xff; num1l&=0xff;
num1h&=0xff; num1h&=0xff;
// total=26 cycles.multiply+=25;
cycles.multiply+=26;
check_num2: check_num2:
num2h=y_i; num2h=y_i;
num2l=y_f; num2l=y_f;
cycles.multiply+=7; cycles.multiply+=6;
if (!(num2h&0x80)) goto unsigned_multiply; if (!(num2h&0x80)) goto unsigned_multiply;
cycles.multiply--; cycles.multiply--;
negate++; negate++;
@ -277,7 +269,7 @@ check_num2:
num2l&=0xff; num2l&=0xff;
num2h&=0xff; num2h&=0xff;
cycles.multiply+=30; cycles.multiply+=25;
unsigned_multiply: unsigned_multiply:
// if (debug) { // if (debug) {
@ -290,7 +282,7 @@ unsigned_multiply:
aa=0; // lda #0 (sz) aa=0; // lda #0 (sz)
result2=aa; // sta result+2 result2=aa; // sta result+2
xx=16; // ldx #16 (sz) xx=16; // ldx #16 (sz)
cycles.multiply+=8; cycles.multiply+=7;
multiply_mainloop: multiply_mainloop:
cc=(num2h&1); //lsr NUM2+1 (szc) cc=(num2h&1); //lsr NUM2+1 (szc)
num2h>>=1; num2h>>=1;
@ -306,7 +298,7 @@ multiply_mainloop:
num2l|=(cc<<7); num2l|=(cc<<7);
cc=cc2; cc=cc2;
cycles.multiply+=15; cycles.multiply+=13;
if (cc==0) goto shift_output; // bcc L2 if (cc==0) goto shift_output; // bcc L2
cycles.multiply--; cycles.multiply--;
@ -321,7 +313,7 @@ multiply_mainloop:
aa=aa+cc+num1h; // adc NUM1+1 aa=aa+cc+num1h; // adc NUM1+1
cc=!!(aa&0x100); cc=!!(aa&0x100);
aa=aa&0xff; aa=aa&0xff;
cycles.multiply+=22; cycles.multiply+=18;
shift_output: shift_output:
cc2=aa&1; cc2=aa&1;
aa=aa>>1; aa=aa>>1;
@ -348,7 +340,7 @@ shift_output:
cc=cc2; // ror result+0 cc=cc2; // ror result+0
xx--; // dex xx--; // dex
cycles.multiply+=25; cycles.multiply+=22;
if (xx!=0) goto multiply_mainloop; // bne L1 if (xx!=0) goto multiply_mainloop; // bne L1
cycles.multiply--; cycles.multiply--;
result3=aa&0xff; // sta result+3 result3=aa&0xff; // sta result+3
@ -358,7 +350,7 @@ shift_output:
// printf("RAW RESULT = %02x:%02x:%02x:%02x\n", // printf("RAW RESULT = %02x:%02x:%02x:%02x\n",
// result3&0xff,result2&0xff,result1&0xff,result0&0xff); // result3&0xff,result2&0xff,result1&0xff,result0&0xff);
// } // }
cycles.multiply+=13; cycles.multiply+=11;
if (negate&1) { if (negate&1) {
// printf("NEGATING!\n"); // printf("NEGATING!\n");
cycles.multiply--; cycles.multiply--;
@ -383,7 +375,7 @@ shift_output:
aa-=result3+cc; aa-=result3+cc;
cc=!!(aa&0x100); cc=!!(aa&0x100);
result3=aa; result3=aa;
cycles.multiply+=50; cycles.multiply+=42;
} }
*z_i=result2&0xff; *z_i=result2&0xff;
@ -619,6 +611,21 @@ int flying(void) {
/* Flying */ /* Flying */
/************************************************/ /************************************************/
/* Benchmark the multiply */
memset(&cycles,0,sizeof(cycles));
fixed_mul(0x1,0x0,
0x2,0x0,
&ram[FACTOR_I],&ram[FACTOR_F]);
printf("Multiplying 1.0 * 2.0 = %d.%d, took %d cycles\n",
ram[FACTOR_I],ram[FACTOR_F],cycles.multiply);
memset(&cycles,0,sizeof(cycles));
fixed_mul(0xff,0xff,
0xff,0xff,
&ram[FACTOR_I],&ram[FACTOR_F]);
printf("Multiplying ff.ff * ff.ff = %d.%d, took %d cycles\n",
ram[FACTOR_I],ram[FACTOR_F],cycles.multiply);
gr(); gr();
clear_bottom(PAGE0); clear_bottom(PAGE0);
clear_bottom(PAGE1); clear_bottom(PAGE1);

32
tfv/OPTIMIZATION Normal file
View File

@ -0,0 +1,32 @@
Original implementation:
Multiplying 1.0 * 2.0 = 2.0, took 707 cycles
Multiplying ff.ff * ff.ff = 0.0, took 761 cycles
Cycles: flying= 162
Cycles: getkey= 46
Cycles: page_flip= 26
Cycles: multiply= 88,179
Cycles: mode7= 76,077
Cycles: lookup_map= 33,920
Cycles: put_sprite= 2,561
==================================
Total = 200,971
Frame Rate = 4.98 fps
Update Multiply to use zero page addresses:
Multiplying 1.0 * 2.0 = 2.0, took 616 cycles
Multiplying ff.ff * ff.ff = 0.0, took 664 cycles
Cycles: flying= 162
Cycles: getkey= 46
Cycles: page_flip= 26
Cycles: multiply= 76,561
Cycles: mode7= 76,077
Cycles: lookup_map= 33,920
Cycles: put_sprite= 2,561
===================================
Total = 189,353
Frame Rate = 5.28 fps

View File

@ -3,12 +3,12 @@
; NUM2 is zeroed out ; NUM2 is zeroed out
; result is in RESULT3:RESULT2:RESULT1:RESULT0 ; result is in RESULT3:RESULT2:RESULT1:RESULT0
NUM1L: .byte 0 ;NUM1L: .byte 0
NUM1H: .byte 0 ;NUM1H: .byte 0
NUM2L: .byte 0 ;NUM2L: .byte 0
NUM2H: .byte 0 ;NUM2H: .byte 0
RESULT: .byte 0,0,0,0 ;RESULT: .byte 0,0,0,0
NEGATE: .byte 0 ;NEGATE: .byte 0
; If we have 2k to spare we should check out ; If we have 2k to spare we should check out
; http://codebase64.org/doku.php?id=base:seriously_fast_multiplication ; http://codebase64.org/doku.php?id=base:seriously_fast_multiplication
@ -16,94 +16,113 @@ NEGATE: .byte 0
multiply: multiply:
lda #$0 ; 2 lda #$0 ; 2
sta NEGATE ; 4 sta NEGATE ; 3
; Handle Signed ; Handle Signed
lda NUM1H ; 4 lda NUM1H ; 3
bpl check_num2 ; 2nt/3 bpl check_num2 ; 2nt/3
;==============
; 10
inc NEGATE ; 4 inc NEGATE ; 3
clc ; 2s-complement NUM1H/NUM1L ; 2 clc ; 2s-complement NUM1H/NUM1L ; 2
lda NUM1L ; 4 lda NUM1L ; 3
eor #$ff ; 2 eor #$ff ; 2
adc #$1 ; 2 adc #$1 ; 2
sta NUM1L ; 4 sta NUM1L ; 3
lda NUM1H ; 4 lda NUM1H ; 3
eor #$ff ; 2 eor #$ff ; 2
adc #$0 ; 2 adc #$0 ; 2
sta NUM1H ; 4 sta NUM1H ; 3
;===========
; 25
check_num2: check_num2:
lda NUM2H ; 4 lda NUM2H ; 3
bpl unsigned_multiply ; 2nt/3 bpl unsigned_multiply ; 2nt/3
;==============
; 6
inc NEGATE ; 4 inc NEGATE ; 3
clc ; 2 clc ; 2
lda NUM2L ; 4 lda NUM2L ; 3
eor #$ff ; 2 eor #$ff ; 2
adc #$1 ; 2 adc #$1 ; 2
sta NUM2L ; 4 sta NUM2L ; 3
lda NUM2H ; 4 lda NUM2H ; 3
eor #$ff ; 2 eor #$ff ; 2
adc #$0 ; 2 adc #$0 ; 2
sta NUM2H ; 4 sta NUM2H ; 3
;=============
; 25
unsigned_multiply: unsigned_multiply:
lda #0 ; Initialize RESULT to 0 ; 2 lda #0 ; Initialize RESULT to 0 ; 2
sta RESULT+2 ; 4 sta RESULT+2 ; 3
ldx #16 ; 16x16 multiply ; 2 ldx #16 ; 16x16 multiply ; 2
;============
; 7
multiply_mainloop: multiply_mainloop:
lsr NUM2H ; Shift right 16-bit NUM2 ; 6 lsr NUM2H ; Shift right 16-bit NUM2 ; 5
ror NUM2L ; low bit goes into carry ; 6 ror NUM2L ; low bit goes into carry ; 5
bcc shift_output ; 0 or 1? ; 2nt/3 bcc shift_output ; 0 or 1? ; 2nt/3
;============
; 13
tay ; If 1, add NUM1 (hi byte RESULT in A) ; 2 tay ; If 1, add NUM1 (hi byte RESULT in A) ; 2
clc ; 2 clc ; 2
lda NUM1L ; 4 lda NUM1L ; 3
adc RESULT+2 ; 4 adc RESULT+2 ; 3
sta RESULT+2 ; 4 sta RESULT+2 ; 3
tya ; 2 tya ; 2
adc NUM1H ; 4 adc NUM1H ; 3
;============
; 18
shift_output: shift_output:
ror A ; "Stairstep" shift ; 2 ror A ; "Stairstep" shift ; 2
ror RESULT+2 ; 6 ror RESULT+2 ; 5
ror RESULT+1 ; 6 ror RESULT+1 ; 5
ror RESULT ; 6 ror RESULT ; 5
dex ; 2 dex ; 2
bne multiply_mainloop ; 2nt/3 bne multiply_mainloop ; 2nt/3
sta RESULT+3 ; 4 ;=============
; 22
sta RESULT+3 ; 3
;; Negate if necessary ;; Negate if necessary
lda NEGATE ; 4 lda NEGATE ; 3
and #$1 ; 2 and #$1 ; 2
beq positive ; 2nt/3 beq positive ; 2nt/3
;==============
; 11
clc ; 2 clc ; 2
lda RESULT+0 ; 4 lda RESULT+0 ; 3
eor #$ff ; 2 eor #$ff ; 2
adc #$1 ; 2 adc #$1 ; 2
sta RESULT+0 ; 4 sta RESULT+0 ; 3
lda RESULT+1 ; 4 lda RESULT+1 ; 3
eor #$ff ; 2 eor #$ff ; 2
adc #$0 ; 2 adc #$0 ; 2
sta RESULT+1 ; 4 sta RESULT+1 ; 3
lda RESULT+2 ; 4 lda RESULT+2 ; 3
eor #$ff ; 2 eor #$ff ; 2
adc #$0 ; 2 adc #$0 ; 2
sta RESULT+2 ; 4 sta RESULT+2 ; 3
lda RESULT+3 ; 4 lda RESULT+3 ; 3
eor #$ff ; 2 eor #$ff ; 2
adc #$0 ; 2 adc #$0 ; 2
sta RESULT+3 ; 4 sta RESULT+3 ; 3
;===========
; 42
positive: positive:
rts ; 6 rts ; 6

View File

@ -1,6 +1,7 @@
.define EQU = .define EQU =
;; Zero page monitor routines addresses ;; Zero page monitor routines addresses
WNDLFT EQU $20 WNDLFT EQU $20
WNDWDTH EQU $21 WNDWDTH EQU $21
WNDTOP EQU $22 WNDTOP EQU $22
@ -17,6 +18,40 @@ MASK EQU $2E
COLOR EQU $30 COLOR EQU $30
INVFLG EQU $32 INVFLG EQU $32
; More zero-page addresses
; we try not to conflict with anything DOS, MONITOR or BASIC related
COLOR1 EQU $E0
COLOR2 EQU $E1
MATCH EQU $E2
XX EQU $E3
YY EQU $E4
YADD EQU $E5
LOOP EQU $E6
MEMPTRL EQU $E7
MEMPTRH EQU $E8
NAMEL EQU $E9
NAMEH EQU $EA
NAMEX EQU $EB
CHAR EQU $EC
DISP_PAGE EQU $ED
DRAW_PAGE EQU $EE
FIRST EQU $F0
LASTKEY EQU $F1
PADDLE_STATUS EQU $F2
XPOS EQU $F3
YPOS EQU $F4
TEMP EQU $FA
RUN EQU $FA
TEMP2 EQU $FB
TEMPY EQU $FB
INL EQU $FC
INH EQU $FD
OUTL EQU $FE
OUTH EQU $FF
;; Flying Routine Only ;; Flying Routine Only
TURNING EQU $60 TURNING EQU $60
@ -49,6 +84,13 @@ DRAW_SPLASH EQU $7A
SPEED EQU $7B SPEED EQU $7B
SPLASH_COUNT EQU $7C SPLASH_COUNT EQU $7C
OVER_WATER EQU $7D OVER_WATER EQU $7D
NUM1L EQU $7E
NUM1H EQU $7F
NUM2L EQU $80
NUM2H EQU $81
RESULT EQU $82 ; 83,84,85
NEGATE EQU $86
SHIPY EQU $E4 SHIPY EQU $E4
;; World Map Only ;; World Map Only
@ -110,38 +152,6 @@ COUT1 EQU $FDF0 ;; output A to screen
; Our zero-page addresses
; we try not to conflict with anything DOS, MONITOR or BASIC related
COLOR1 EQU $E0
COLOR2 EQU $E1
MATCH EQU $E2
XX EQU $E3
YY EQU $E4
YADD EQU $E5
LOOP EQU $E6
MEMPTRL EQU $E7
MEMPTRH EQU $E8
NAMEL EQU $E9
NAMEH EQU $EA
NAMEX EQU $EB
CHAR EQU $EC
DISP_PAGE EQU $ED
DRAW_PAGE EQU $EE
FIRST EQU $F0
LASTKEY EQU $F1
PADDLE_STATUS EQU $F2
XPOS EQU $F3
YPOS EQU $F4
TEMP EQU $FA
RUN EQU $FA
TEMP2 EQU $FB
TEMPY EQU $FB
INL EQU $FC
INH EQU $FD
OUTL EQU $FE
OUTH EQU $FF