tfv: update cycle counts

This commit is contained in:
Vince Weaver 2017-11-24 14:40:50 -05:00
parent 9f8fdadc4f
commit 783e26d369
4 changed files with 164 additions and 96 deletions

View File

@ -83,7 +83,7 @@ static unsigned char water_map[32]={
#define LOWRES_W 40
#define LOWRES_H 40
static int displayed=0;
static int displayed=1;
struct cycle_counts {
int flying;
@ -229,18 +229,11 @@ static void fixed_mul(unsigned char x_i, unsigned char x_f,
num1h=x_i;
num1l=x_f;
negate=0; // lda #0 2
// sta NEGATE 4
// lda NUM1H 4
cycles.multiply+=13;
if (!(num1h&0x80)) goto check_num2; // bpl check_num2 2nt/3
negate=0;
cycles.multiply+=10;
if (!(num1h&0x80)) goto check_num2;
cycles.multiply--;
negate++; // inc NEGATE 6
negate++;
num1l=~num1l;
num1h=~num1h;
@ -254,13 +247,12 @@ static void fixed_mul(unsigned char x_i, unsigned char x_f,
num1l&=0xff;
num1h&=0xff;
// total=26
cycles.multiply+=26;
cycles.multiply+=25;
check_num2:
num2h=y_i;
num2l=y_f;
cycles.multiply+=7;
cycles.multiply+=6;
if (!(num2h&0x80)) goto unsigned_multiply;
cycles.multiply--;
negate++;
@ -277,7 +269,7 @@ check_num2:
num2l&=0xff;
num2h&=0xff;
cycles.multiply+=30;
cycles.multiply+=25;
unsigned_multiply:
// if (debug) {
@ -290,7 +282,7 @@ unsigned_multiply:
aa=0; // lda #0 (sz)
result2=aa; // sta result+2
xx=16; // ldx #16 (sz)
cycles.multiply+=8;
cycles.multiply+=7;
multiply_mainloop:
cc=(num2h&1); //lsr NUM2+1 (szc)
num2h>>=1;
@ -306,7 +298,7 @@ multiply_mainloop:
num2l|=(cc<<7);
cc=cc2;
cycles.multiply+=15;
cycles.multiply+=13;
if (cc==0) goto shift_output; // bcc L2
cycles.multiply--;
@ -321,7 +313,7 @@ multiply_mainloop:
aa=aa+cc+num1h; // adc NUM1+1
cc=!!(aa&0x100);
aa=aa&0xff;
cycles.multiply+=22;
cycles.multiply+=18;
shift_output:
cc2=aa&1;
aa=aa>>1;
@ -348,7 +340,7 @@ shift_output:
cc=cc2; // ror result+0
xx--; // dex
cycles.multiply+=25;
cycles.multiply+=22;
if (xx!=0) goto multiply_mainloop; // bne L1
cycles.multiply--;
result3=aa&0xff; // sta result+3
@ -358,7 +350,7 @@ shift_output:
// printf("RAW RESULT = %02x:%02x:%02x:%02x\n",
// result3&0xff,result2&0xff,result1&0xff,result0&0xff);
// }
cycles.multiply+=13;
cycles.multiply+=11;
if (negate&1) {
// printf("NEGATING!\n");
cycles.multiply--;
@ -383,7 +375,7 @@ shift_output:
aa-=result3+cc;
cc=!!(aa&0x100);
result3=aa;
cycles.multiply+=50;
cycles.multiply+=42;
}
*z_i=result2&0xff;
@ -619,6 +611,21 @@ int flying(void) {
/* Flying */
/************************************************/
/* Benchmark the multiply */
memset(&cycles,0,sizeof(cycles));
fixed_mul(0x1,0x0,
0x2,0x0,
&ram[FACTOR_I],&ram[FACTOR_F]);
printf("Multiplying 1.0 * 2.0 = %d.%d, took %d cycles\n",
ram[FACTOR_I],ram[FACTOR_F],cycles.multiply);
memset(&cycles,0,sizeof(cycles));
fixed_mul(0xff,0xff,
0xff,0xff,
&ram[FACTOR_I],&ram[FACTOR_F]);
printf("Multiplying ff.ff * ff.ff = %d.%d, took %d cycles\n",
ram[FACTOR_I],ram[FACTOR_F],cycles.multiply);
gr();
clear_bottom(PAGE0);
clear_bottom(PAGE1);

32
tfv/OPTIMIZATION Normal file
View File

@ -0,0 +1,32 @@
Original implementation:
Multiplying 1.0 * 2.0 = 2.0, took 707 cycles
Multiplying ff.ff * ff.ff = 0.0, took 761 cycles
Cycles: flying= 162
Cycles: getkey= 46
Cycles: page_flip= 26
Cycles: multiply= 88,179
Cycles: mode7= 76,077
Cycles: lookup_map= 33,920
Cycles: put_sprite= 2,561
==================================
Total = 200,971
Frame Rate = 4.98 fps
Update Multiply to use zero page addresses:
Multiplying 1.0 * 2.0 = 2.0, took 616 cycles
Multiplying ff.ff * ff.ff = 0.0, took 664 cycles
Cycles: flying= 162
Cycles: getkey= 46
Cycles: page_flip= 26
Cycles: multiply= 76,561
Cycles: mode7= 76,077
Cycles: lookup_map= 33,920
Cycles: put_sprite= 2,561
===================================
Total = 189,353
Frame Rate = 5.28 fps

View File

@ -3,12 +3,12 @@
; NUM2 is zeroed out
; result is in RESULT3:RESULT2:RESULT1:RESULT0
NUM1L: .byte 0
NUM1H: .byte 0
NUM2L: .byte 0
NUM2H: .byte 0
RESULT: .byte 0,0,0,0
NEGATE: .byte 0
;NUM1L: .byte 0
;NUM1H: .byte 0
;NUM2L: .byte 0
;NUM2H: .byte 0
;RESULT: .byte 0,0,0,0
;NEGATE: .byte 0
; If we have 2k to spare we should check out
; http://codebase64.org/doku.php?id=base:seriously_fast_multiplication
@ -16,94 +16,113 @@ NEGATE: .byte 0
multiply:
lda #$0 ; 2
sta NEGATE ; 4
sta NEGATE ; 3
; Handle Signed
lda NUM1H ; 4
lda NUM1H ; 3
bpl check_num2 ; 2nt/3
;==============
; 10
inc NEGATE ; 4
inc NEGATE ; 3
clc ; 2s-complement NUM1H/NUM1L ; 2
lda NUM1L ; 4
lda NUM1L ; 3
eor #$ff ; 2
adc #$1 ; 2
sta NUM1L ; 4
sta NUM1L ; 3
lda NUM1H ; 4
lda NUM1H ; 3
eor #$ff ; 2
adc #$0 ; 2
sta NUM1H ; 4
sta NUM1H ; 3
;===========
; 25
check_num2:
lda NUM2H ; 4
lda NUM2H ; 3
bpl unsigned_multiply ; 2nt/3
;==============
; 6
inc NEGATE ; 4
inc NEGATE ; 3
clc ; 2
lda NUM2L ; 4
lda NUM2L ; 3
eor #$ff ; 2
adc #$1 ; 2
sta NUM2L ; 4
sta NUM2L ; 3
lda NUM2H ; 4
lda NUM2H ; 3
eor #$ff ; 2
adc #$0 ; 2
sta NUM2H ; 4
sta NUM2H ; 3
;=============
; 25
unsigned_multiply:
lda #0 ; Initialize RESULT to 0 ; 2
sta RESULT+2 ; 4
sta RESULT+2 ; 3
ldx #16 ; 16x16 multiply ; 2
;============
; 7
multiply_mainloop:
lsr NUM2H ; Shift right 16-bit NUM2 ; 6
ror NUM2L ; low bit goes into carry ; 6
lsr NUM2H ; Shift right 16-bit NUM2 ; 5
ror NUM2L ; low bit goes into carry ; 5
bcc shift_output ; 0 or 1? ; 2nt/3
;============
; 13
tay ; If 1, add NUM1 (hi byte RESULT in A) ; 2
clc ; 2
lda NUM1L ; 4
adc RESULT+2 ; 4
sta RESULT+2 ; 4
lda NUM1L ; 3
adc RESULT+2 ; 3
sta RESULT+2 ; 3
tya ; 2
adc NUM1H ; 4
adc NUM1H ; 3
;============
; 18
shift_output:
ror A ; "Stairstep" shift ; 2
ror RESULT+2 ; 6
ror RESULT+1 ; 6
ror RESULT ; 6
ror RESULT+2 ; 5
ror RESULT+1 ; 5
ror RESULT ; 5
dex ; 2
bne multiply_mainloop ; 2nt/3
sta RESULT+3 ; 4
;=============
; 22
sta RESULT+3 ; 3
;; Negate if necessary
lda NEGATE ; 4
lda NEGATE ; 3
and #$1 ; 2
beq positive ; 2nt/3
;==============
; 11
clc ; 2
lda RESULT+0 ; 4
lda RESULT+0 ; 3
eor #$ff ; 2
adc #$1 ; 2
sta RESULT+0 ; 4
sta RESULT+0 ; 3
lda RESULT+1 ; 4
lda RESULT+1 ; 3
eor #$ff ; 2
adc #$0 ; 2
sta RESULT+1 ; 4
sta RESULT+1 ; 3
lda RESULT+2 ; 4
lda RESULT+2 ; 3
eor #$ff ; 2
adc #$0 ; 2
sta RESULT+2 ; 4
sta RESULT+2 ; 3
lda RESULT+3 ; 4
lda RESULT+3 ; 3
eor #$ff ; 2
adc #$0 ; 2
sta RESULT+3 ; 4
sta RESULT+3 ; 3
;===========
; 42
positive:
rts ; 6

View File

@ -1,6 +1,7 @@
.define EQU =
;; Zero page monitor routines addresses
WNDLFT EQU $20
WNDWDTH EQU $21
WNDTOP EQU $22
@ -17,6 +18,40 @@ MASK EQU $2E
COLOR EQU $30
INVFLG EQU $32
; More zero-page addresses
; we try not to conflict with anything DOS, MONITOR or BASIC related
COLOR1 EQU $E0
COLOR2 EQU $E1
MATCH EQU $E2
XX EQU $E3
YY EQU $E4
YADD EQU $E5
LOOP EQU $E6
MEMPTRL EQU $E7
MEMPTRH EQU $E8
NAMEL EQU $E9
NAMEH EQU $EA
NAMEX EQU $EB
CHAR EQU $EC
DISP_PAGE EQU $ED
DRAW_PAGE EQU $EE
FIRST EQU $F0
LASTKEY EQU $F1
PADDLE_STATUS EQU $F2
XPOS EQU $F3
YPOS EQU $F4
TEMP EQU $FA
RUN EQU $FA
TEMP2 EQU $FB
TEMPY EQU $FB
INL EQU $FC
INH EQU $FD
OUTL EQU $FE
OUTH EQU $FF
;; Flying Routine Only
TURNING EQU $60
@ -49,6 +84,13 @@ DRAW_SPLASH EQU $7A
SPEED EQU $7B
SPLASH_COUNT EQU $7C
OVER_WATER EQU $7D
NUM1L EQU $7E
NUM1H EQU $7F
NUM2L EQU $80
NUM2H EQU $81
RESULT EQU $82 ; 83,84,85
NEGATE EQU $86
SHIPY EQU $E4
;; World Map Only
@ -110,38 +152,6 @@ COUT1 EQU $FDF0 ;; output A to screen
; Our zero-page addresses
; we try not to conflict with anything DOS, MONITOR or BASIC related
COLOR1 EQU $E0
COLOR2 EQU $E1
MATCH EQU $E2
XX EQU $E3
YY EQU $E4
YADD EQU $E5
LOOP EQU $E6
MEMPTRL EQU $E7
MEMPTRH EQU $E8
NAMEL EQU $E9
NAMEH EQU $EA
NAMEX EQU $EB
CHAR EQU $EC
DISP_PAGE EQU $ED
DRAW_PAGE EQU $EE
FIRST EQU $F0
LASTKEY EQU $F1
PADDLE_STATUS EQU $F2
XPOS EQU $F3
YPOS EQU $F4
TEMP EQU $FA
RUN EQU $FA
TEMP2 EQU $FB
TEMPY EQU $FB
INL EQU $FC
INH EQU $FD
OUTL EQU $FE
OUTH EQU $FF