Optimization & refinement

- Compiler now tracks cycle counts of blitters
- More interesting sample app
- Better VBL sync macro
This commit is contained in:
Quinn Dunki 2017-05-29 13:14:37 -07:00
parent b3f9fdfd7d
commit ca2bd7c586
6 changed files with 54 additions and 58 deletions

View File

@ -51,11 +51,19 @@ def main(argv):
# Blitting functions
print "\n"
for shift in range(0,7):
# Track cycle count of the blitter. We start with fixed overhead:
# SAVE_AXY + RESTORE_AXY + rts + sprite jump table
cycleCount = 9 + 12 + 6 + 3 + 4 + 6
print "%s_SHIFT%d:" % (niceName,shift)
print "\tldx PARAM1"
print rowStartCalculatorCode();
spriteChunks = layoutSpriteChunk(pixelData,width,height,shift,xdraw)
cycleCount += 3
rowStartCode,extraCycles = rowStartCalculatorCode();
print rowStartCode
cycleCount += extraCycles
spriteChunks = layoutSpriteChunk(pixelData,width,height,shift,xdraw,cycleCount)
for row in range(height):
for chunkIndex in range(len(spriteChunks)):
@ -64,11 +72,11 @@ def main(argv):
print "\n"
def layoutSpriteChunk(pixelData,width,height,shift,xdraw):
def layoutSpriteChunk(pixelData,width,height,shift,xdraw,cycleCount):
colorStreams = byteStreamsFromPixels(pixelData,width,height,shift,bitsForColor,highBitForColor)
maskStreams = byteStreamsFromPixels(pixelData,width,height,shift,bitsForMask,highBitForMask)
code = generateBlitter(colorStreams,maskStreams,height,xdraw)
code = generateBlitter(colorStreams,maskStreams,height,xdraw,cycleCount)
return code
@ -121,11 +129,14 @@ def byteStreamsFromPixels(pixelData,width,height,shift,bitDelegate,highBitDelega
return byteStreams
def generateBlitter(colorStreams,maskStreams,height,xdraw):
def generateBlitter(colorStreams,maskStreams,height,xdraw,baseCycleCount):
byteWidth = len(colorStreams[0])
spriteChunks = [["" for y in range(height)] for x in range(byteWidth)]
cycleCount = baseCycleCount
optimizationCount = 0
for row in range(height):
byteSplits = colorStreams[row]
@ -143,23 +154,30 @@ def generateBlitter(colorStreams,maskStreams,height,xdraw):
"\tlda (SCRATCH0),y\n" + \
"\teor #%%%s\n" % byteSplits[chunkIndex] + \
"\tsta (SCRATCH0),y\n";
cycleCount += 5 + 2 + 6
else:
spriteChunks[chunkIndex][row] = \
"\tlda #%%%s\n" % byteSplits[chunkIndex] + \
"\tsta (SCRATCH0),y\n";
cycleCount += 2 + 6
else:
optimizationCount += 1
# Increment indices
if chunkIndex == len(byteSplits)-1:
spriteChunks[chunkIndex][row] += "\n"
else:
spriteChunks[chunkIndex][row] += "\tiny"
cycleCount += 2
# Finish the row
if row<height-1:
spriteChunks[chunkIndex][row] += "\tinx\n" + rowStartCalculatorCode();
rowStartCode,extraCycles = rowStartCalculatorCode()
spriteChunks[chunkIndex][row] += "\tinx\n" + rowStartCode;
cycleCount += 2 + extraCycles
else:
spriteChunks[chunkIndex][row] += "\tRESTORE_AXY\n"
spriteChunks[chunkIndex][row] += "\trts\n"
spriteChunks[chunkIndex][row] += "\trts\t;Cycle count: %d, Optimized %d rows." % (cycleCount,optimizationCount) + "\n"
return spriteChunks
@ -172,7 +190,7 @@ def rowStartCalculatorCode():
"\tsta SCRATCH0\n" + \
"\tldy PARAM0\n" + \
"\tlda DIV7_2,y\n" + \
"\ttay\n";
"\ttay\n", 4 + 3 + 4 + 3 + 3 + 4 + 2;
def fillOutByte(numBits):

Binary file not shown.

View File

@ -30,7 +30,7 @@ SCRATCH1 = $1a
SPRITEPTR_L = $1b
SPRITEPTR_H = $1c
MAXSPRITEINDEX = 19 ; Sprite count - 1
MAXSPRITEINDEX = 5 ; Sprite count - 1
MAXPOSX = 127 ; This demo doesn't wanna do 16 bit math
MAXPOSY = 127
@ -79,6 +79,7 @@ main:
; sta PARAM1
; jsr BloadHires
mainLoop:
; Draw sprites
renderLoop:
@ -120,7 +121,9 @@ restartList:
lda #MAXSPRITEINDEX
sta spriteNum
jsr delayShort
VBL_SYNC
;jsr delayShort
; Background restore
backgroundLoop:
@ -160,6 +163,7 @@ backgroundLoop:
backgroundRestartList:
lda #MAXSPRITEINDEX
sta spriteNum
jmp mainLoop ; Skip movement
movementLoop:
; Find our sprite pointer

View File

@ -122,44 +122,11 @@
; Rendering macros
;
.macro LDY_AVIEW
asl ; Find our new view record
asl
asl
asl ; Records are 16 bytes wide
tay
.endmacro
.macro LDY_ACTIVEVIEW
lda WG_ACTIVEVIEW ; Find our new view record
LDY_AVIEW
.endmacro
.macro LDX_ACTIVEVIEW
lda WG_ACTIVEVIEW ; Find our new view record
asl
asl
asl
asl ; Records are 16 bytes wide
tax
.endmacro
.macro LDY_FOCUSVIEW
lda WG_FOCUSVIEW ; Find our new view record
LDY_AVIEW
.endmacro
.macro VBL_SYNC ; Synchronize with vertical blanking
lda #$80
;macroWaitVBLToFinish:
; bit RDVBLBAR
; bmi macroWaitVBLToFinish
.macro VBL_SYNC ; Synchronize with vertical blanking
@macroWaitVBLToFinish:
lda $C019
bpl @macroWaitVBLToFinish
@macroWaitVBLToStart:
; bit RDVBLBAR
; bpl @macroWaitVBLToStart
lda $C019
bmi @macroWaitVBLToStart
.endmacro

View File

@ -38,6 +38,7 @@ META_BUFFERS:
.addr metaBuffer17
.addr metaBuffer18
.addr metaBuffer19
.addr metaBuffer20
metaBuffer0:
.byte 80 ; X pos
@ -159,6 +160,12 @@ metaBuffer19:
.byte -3 ; X vec
.byte -5 ; Y vec
metaBuffer20:
.byte 61 ; X pos
.byte 18 ; Y pos
.byte -1 ; X vec
.byte +1 ; Y vec
bgBuffer0:
.repeat 48

View File

@ -184,7 +184,7 @@ BOXW_MAG_SHIFT0:
iny
RESTORE_AXY
rts
rts ;Cycle count: 497, Optimized 26 rows.
@ -354,7 +354,7 @@ BOXW_MAG_SHIFT1:
iny
RESTORE_AXY
rts
rts ;Cycle count: 497, Optimized 26 rows.
@ -524,7 +524,7 @@ BOXW_MAG_SHIFT2:
iny
RESTORE_AXY
rts
rts ;Cycle count: 497, Optimized 26 rows.
@ -694,7 +694,7 @@ BOXW_MAG_SHIFT3:
iny
RESTORE_AXY
rts
rts ;Cycle count: 497, Optimized 26 rows.
@ -864,7 +864,7 @@ BOXW_MAG_SHIFT4:
iny
RESTORE_AXY
rts
rts ;Cycle count: 497, Optimized 26 rows.
@ -1034,7 +1034,7 @@ BOXW_MAG_SHIFT5:
iny
RESTORE_AXY
rts
rts ;Cycle count: 497, Optimized 26 rows.
@ -1204,7 +1204,7 @@ BOXW_MAG_SHIFT6:
iny
RESTORE_AXY
rts
rts ;Cycle count: 497, Optimized 26 rows.