From 8c4fd13ca37fd5e21059006a0832e6bccfa8d774 Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Tue, 12 Sep 2023 01:21:37 -0400 Subject: [PATCH] plasmag: cycle counting --- graphics/gr/plasmagoria/README | 35 +++++ graphics/gr/plasmagoria/plasmag_hgr.s | 193 +++++++++++++++----------- 2 files changed, 145 insertions(+), 83 deletions(-) create mode 100644 graphics/gr/plasmagoria/README diff --git a/graphics/gr/plasmagoria/README b/graphics/gr/plasmagoria/README new file mode 100644 index 00000000..cb15e137 --- /dev/null +++ b/graphics/gr/plasmagoria/README @@ -0,0 +1,35 @@ +Apple II display is 65 cycles per row + 65*192=12480(screen)+ 4550 more for vblank = 17030 cycles + + 17030/1023000 = 60fps + +Theory maximum screen update + completely unrolled 192*40*4 cycles = 30720 cycles + + 30720 / 1023000 = 33 fps + + + + + +17030 cycles + + ; Total of 12480 cycles to draw screen + ; Vertical blank = 4550 cycles (70 scan lines) + ; Total of 17030 cycles to get back to where was + +to fill the hi-res screen with a single color you need to update 7680 +bytes. If you completely unroll the loop this takes 30720 cycles, or +roughly 33fps. + +This plasma algorithm if drawing the whole screen (192 lines) manages +only 3fps. The video here I am skipping half the lines which gives +5.5fps. I have a version drawing every 4th line which still looks OK +that can do 9fps. + +The code is already heavily optimized with lots of self modifying code +and table lookups. I'm losing some cycles to doing page flipping, and +part of the problem is the Apple II's bizzarre framebuffer layout. I +also waste cycles because I'm doing table lookups to get the dithered +patterns, and again due to Apple II oddness (3.5 colored pixels per +byte) I need separate lookups for odd/even columns diff --git a/graphics/gr/plasmagoria/plasmag_hgr.s b/graphics/gr/plasmagoria/plasmag_hgr.s index 39e57bdf..91de7388 100644 --- a/graphics/gr/plasmagoria/plasmag_hgr.s +++ b/graphics/gr/plasmagoria/plasmag_hgr.s @@ -193,9 +193,15 @@ done_init_hires_colors_odd_l1: ; ============================================================================ +; 26+( 24*(11+(40*(39+(38*2))) = 110,690 +2174 = 112,864 = 9fps +; 26+( 24*(11+(40*(39+(38*4))) = 183,650 +2174 = 185,824 = 5.5fps +; 26+( 24*(11+(40*(39+(38*8))) = 329,570 +2174 = = 3fps + do_plasma: ; init + + ; lda #02 ; ldx #5 ;init_loop: @@ -209,142 +215,163 @@ BP3: ; Precalculate some values (inlined) ; ROUTINES PRE CALCUL ; ============================================================================ +; +; cycles = 30 + (40*53) -1 + 25 = 2174 cycles + precalc: - lda PARAM1 ; self modify various parts - sta pc_off1+1 - lda PARAM2 - sta pc_off2+1 - lda PARAM3 - sta pc_off3+1 - lda PARAM4 - sta pc_off4+1 +; 0 + lda PARAM1 ; self modify various parts ; 3 + sta pc_off1+1 ; 4 + lda PARAM2 ; 3 + sta pc_off2+1 ; 4 + lda PARAM3 ; 3 + sta pc_off3+1 ; 4 + lda PARAM4 ; 3 + sta pc_off4+1 ; 4 + +; 28 ; Table1(X) = sin1(PARAM1+X)+sin2(PARAM1+X) ; Table2(X) = sin3(PARAM3+X)+sin1(PARAM4+X) - ldx #$28 ; 40 -pc_b1: + ldx #$28 ; 40 ; 2 +; 30 + +precalc_loop: + pc_off1: - lda sin1 + lda sin1 ; 4 pc_off2: - adc sin2 - sta Table1,X + adc sin2 ; 4 + sta Table1,X ; 4 +; 12 + pc_off3: - lda sin3 + lda sin3 ; 4 pc_off4: - adc sin1 - sta Table2,X + adc sin1 ; 4 + sta Table2,X ; 4 +; 24 - inc pc_off1+1 - inc pc_off2+1 - inc pc_off3+1 - inc pc_off4+1 + inc pc_off1+1 ; 6 + inc pc_off2+1 ; 6 + inc pc_off3+1 ; 6 + inc pc_off4+1 ; 6 +; 48 + dex ; 2 + bpl precalc_loop ; 2/3 +; 53 - dex - bpl pc_b1 - - inc PARAM1 - inc PARAM1 - dec PARAM2 - inc PARAM3 - dec PARAM4 + inc PARAM1 ; 5 + inc PARAM1 ; 5 + dec PARAM2 ; 5 + inc PARAM3 ; 5 + dec PARAM4 ; 5 +; 25 ; ============================================================================ ; Display Routines ; ROUTINES AFFICHAGES ; ============================================================================ +; 26+( 24*(11+(40*(39+(38*2))) = 110,690 +; 26+( 24*(11+(40*(39+(38*4))) = 183,650 +; 26+( 24*(11+(40*(39+(38*8))) = 329,570 ; Display "Normal" ; AFFICHAGE "NORMAL" display_normal: - ldx #23 ; lines 0-23 lignes 0-23 + ldx #23 ; lines 0-23 lignes 0-23 ; 2 display_line_loop: +; 0 + lda hposn_low_div8,X ; setup line pointer ; 4 + sta GBASL ; 3 +; 7 + ldy #39 ; col 0-39 ; 2 - lda hposn_low_div8,X - sta GBASL + lda Table2,X ; setup base sine value for row ; 4 + sta display_row_sin_smc+1 ; 4 +; 17 - ldy #39 ; col 0-39 - - lda Table2,X ; setup base sine value for row - sta display_row_sin_smc+1 display_col_loop: - lda Table1,Y ; load in column sine value + + lda Table1,Y ; load in column sine value ; 4 display_row_sin_smc: - adc #00 ; add in row value - sta display_lookup_smc+1 ; patch in low byte of lookup - + adc #00 ; add in row value ; 2 + sta display_lookup_smc+1 ; patch in low byte of lookup ; 4 +; 8 ; pick 0/1 for odd even - lda display_lookup_smc+2 - eor #$01 - sta display_lookup_smc+2 - - - - - + lda display_lookup_smc+2 ; 4 + eor #$01 ; 2 + sta display_lookup_smc+2 ; 4 +; 18 ; lda hires_colors_even_l0 ; attention: must be aligned ; sta color_smc+1 - lda hposn_high_div8,X - clc - adc PAGE - sta GBASH - - lda #1 - sta COUNT + lda hposn_high_div8,X ; 4 + clc ; 2 + adc PAGE ; 3 + sta GBASH ; 3 +; 30 + lda #1 ; 2 + sta COUNT ; 3 +; 35 store_loop: color_smc: - lda display_lookup_smc+2 - eor #$02 - sta display_lookup_smc+2 + lda display_lookup_smc+2 ; 4 + eor #$02 ; 2 + sta display_lookup_smc+2 ; 4 +; 10 display_lookup_smc: - lda hires_colors_even_l0 ; attention: must be aligned + lda hires_colors_even_l0 ; attention: must be aligned ; 4 ; lda #$fe - sta (GBASL),Y - clc - lda #$10 - adc GBASH - sta GBASH - dec COUNT - bpl store_loop - dey - bpl display_col_loop + sta (GBASL),Y ; 6 + clc ; 2 + lda #$10 ; 2 + adc GBASH ; 3 + sta GBASH ; 3 + dec COUNT ; 5 + bpl store_loop ; 2/3 +; 38 - dex - bpl display_line_loop + dey ; 2 + bpl display_col_loop ; 2/3 + + dex ; 2 + bpl display_line_loop ; 2/3 ; ============================================================================ - lda PAGE - beq was_page1 + lda PAGE ; 3 + beq was_page1 ; 2/3 was_page2: - bit PAGE2 - lda #0 - beq done_pageflip + bit PAGE2 ; 4 + lda #0 ; 2 + beq done_pageflip ; 2/3 was_page1: - bit PAGE1 - lda #$20 + bit PAGE1 ; 4 + lda #$20 ; 2 done_pageflip: - sta PAGE + sta PAGE ; 3 +; 15? + inc COMPT1 ; 6 + beq display_done2 ; 2/3 - inc COMPT1 - beq display_done2 ; bne BP3 - jmp BP3 + jmp BP3 ; 3 display_done2: - dec COMPT2 - beq display_done + dec COMPT2 ; 6 + beq display_done ; 2/3 ; bne BP3 - jmp BP3 + jmp BP3 ; 3 display_done: jmp do_plasma