diff --git a/gr-sim/tfv_flying_6502.c b/gr-sim/tfv_flying_6502.c index 9d9650ff..0c2ead25 100644 --- a/gr-sim/tfv_flying_6502.c +++ b/gr-sim/tfv_flying_6502.c @@ -398,7 +398,7 @@ static int fixed_mul_unsigned( a&=0xff; // product[0]=a; // sta PRODUCT+0 ; - _aa=a; +// _aa=a; //sm3a: a=square1_hi[sm3a+x]; // lda square1_hi,x ; 4 @@ -569,27 +569,27 @@ static void fixed_mul(unsigned char x_i, unsigned char x_f, a=(x_i&0xff); // lda T1+1 ; 3 cycles.multiply+=12; - if ((a&0x80)==0) goto x_positive; // bpl :+ ; 3/2nt - cycles.multiply--; + if ((a&0x80)==0) goto x_positive; // bpl :+ ;^3/2nt + c=1; // sec ; 2 a=product[2]; // lda PRODUCT+2 ; 3 a+=(~y_f)+c; // sbc T2+0 ; 3 c=!(a&0x100); a&=0xff; product[2]=a; // sta PRODUCT+2 ; 3 - a=product[3]; // lda PRODUCT+3 ; 3 - a+=(~y_i)+c; // sbc T2+1 ; 3 - c=!(a&0x100); - a&=0xff; - product[3]=a; // sta PRODUCT+3 ; 3 - cycles.multiply+=20; +// a=product[3]; // lda PRODUCT+3 ; +// a+=(~y_i)+c; // sbc T2+1 ; +// c=!(a&0x100); +// a&=0xff; +// product[3]=a; // sta PRODUCT+3 ; + cycles.multiply+=10; x_positive: a=(y_i&0xff); // lda T2+1 ; 3 cycles.multiply+=6; - if ((a&0x80)==0) goto y_positive; // bpl :+ ; 3/2nt - cycles.multiply--; + if ((a&0x80)==0) goto y_positive; // bpl :+ ;^3/2nt + c=1; // sec ; 2 a=product[2]; // lda PRODUCT+2 ; 3 @@ -597,12 +597,12 @@ x_positive: c=!(a&0x100); a&=0xff; product[2]=a; // sta PRODUCT+2 ; 3 - a=product[3]; // lda PRODUCT+3 ; 3 - a+=(~x_i)+c; // sbc T1+1 ; 3 - c=!(a&0x100); - a&=0xff; - product[3]=a; // sta PRODUCT+3 ; 3 - cycles.multiply+=20; +// a=product[3]; // lda PRODUCT+3 ; +// a+=(~x_i)+c; // sbc T1+1 ; +// c=!(a&0x100); +// a&=0xff; +// product[3]=a; // sta PRODUCT+3 ; + cycles.multiply+=10; y_positive: *z_i=product[2]; *z_f=product[1]; diff --git a/tfv/OPTIMIZATION b/tfv/OPTIMIZATION index 6e6eeff0..e1d75c05 100644 --- a/tfv/OPTIMIZATION +++ b/tfv/OPTIMIZATION @@ -162,14 +162,13 @@ Remove unneeded precision in the 8.8 x 8.8 fixed point multiply Cycles: flying= 187 Cycles: getkey= 46 Cycles: page_flip= 26 - Cycles: multiply= 44,785 + Cycles: multiply= 43,588 Cycles: mode7= 118,034 Cycles: lookup_map= 22,747 Cycles: put_sprite= 2,561 ================================ - Total = 188,386 - Frame Rate = 5.31 fps - + Total = 187,189 + Frame Rate = 5.34 fps Each cycle removed from inner X loop saves 32*40=1280 cycles diff --git a/tfv/tfv_multiply.s b/tfv/tfv_multiply.s index c969474d..8130db9c 100644 --- a/tfv/tfv_multiply.s +++ b/tfv/tfv_multiply.s @@ -312,29 +312,30 @@ multiply: lda RESULT+2 ; 3 sbc NUM2L ; 3 sta RESULT+2 ; 3 - lda RESULT+3 ; 3 - sbc NUM2H ; 3 - sta RESULT+3 ; 3 +; lda RESULT+3 ; +; sbc NUM2H ; +; sta RESULT+3 ; ;============ - ; 19 + ; 10 x_positive: lda NUM2H ; y_i ; 3 ;============ ; ; 6 - bpl y_positive ; 3/2nt + + bpl y_positive ;^3/2nt sec ; 2 lda RESULT+2 ; 3 sbc NUM1L ; 3 sta RESULT+2 ; 3 - lda RESULT+3 ; 3 - sbc NUM1H ; 3 - sta RESULT+3 ; 3 +; lda RESULT+3 ; +; sbc NUM1H ; +; sta RESULT+3 ; ;=========== - ; 19 + ; 10 y_positive: lda RESULT+2 ; *z_i=product[2]; ; 3