Optimizations

- Added black rectangle compiling
- Got batched rendering working
This commit is contained in:
Quinn Dunki 2017-06-19 21:53:57 -07:00
parent 34e34055d4
commit 69e669054b
8 changed files with 1386 additions and 130 deletions

1
.gitignore vendored
View File

@ -5,3 +5,4 @@ HGR.xcodeproj/project.xcworkspace/xcuserdata/qd.xcuserdatad/UserInterfaceState.x
/hisprite.lst
/HiSprite.xcodeproj/project.xcworkspace/xcuserdata/qd.xcuserdatad/UserInterfaceState.xcuserstate
/HiSprite.xcodeproj/xcuserdata/qd.xcuserdatad/xcdebugger
/DataDrawCycleCounts.txt

View File

@ -19,9 +19,14 @@ def main(argv):
pngfile = sys.argv[1]
xdraw = 0
if len(argv)>1 and sys.argv[2] == "--xdraw":
xdraw = 1
black = 0
if len(argv)>1:
if sys.argv[2] == "--xdraw":
xdraw = 1
else:
if sys.argv[2] == "--black":
black = 1
reader = png.Reader(pngfile)
try:
pngdata = reader.asRGB8()
@ -33,7 +38,10 @@ def main(argv):
pixelData = list(pngdata[2])
byteWidth = width/2+1+1 # TODO: Calculate a power of two for this
niceName = os.path.splitext(pngfile)[0].upper()
if black:
niceName = "BLACK"
disclaimer()
# Prologue
@ -63,7 +71,7 @@ def main(argv):
print rowStartCode
cycleCount += extraCycles
spriteChunks = layoutSpriteChunk(pixelData,width,height,shift,xdraw,cycleCount)
spriteChunks = layoutSpriteChunk(pixelData,width,height,shift,xdraw,black,cycleCount)
for row in range(height):
for chunkIndex in range(len(spriteChunks)):
@ -72,11 +80,11 @@ def main(argv):
print "\n"
def layoutSpriteChunk(pixelData,width,height,shift,xdraw,cycleCount):
def layoutSpriteChunk(pixelData,width,height,shift,xdraw,black,cycleCount):
colorStreams = byteStreamsFromPixels(pixelData,width,height,shift,bitsForColor,highBitForColor)
maskStreams = byteStreamsFromPixels(pixelData,width,height,shift,bitsForMask,highBitForMask)
code = generateBlitter(colorStreams,maskStreams,height,xdraw,cycleCount)
code = generateBlitter(colorStreams,maskStreams,height,xdraw,black,cycleCount)
return code
@ -129,7 +137,7 @@ def byteStreamsFromPixels(pixelData,width,height,shift,bitDelegate,highBitDelega
return byteStreams
def generateBlitter(colorStreams,maskStreams,height,xdraw,baseCycleCount):
def generateBlitter(colorStreams,maskStreams,height,xdraw,black,baseCycleCount):
byteWidth = len(colorStreams[0])
spriteChunks = [["" for y in range(height)] for x in range(byteWidth)]
@ -149,17 +157,23 @@ def generateBlitter(colorStreams,maskStreams,height,xdraw,baseCycleCount):
byteSplits[chunkIndex] != "10000000":
# Store byte into video memory
if xdraw:
if xdraw: # Option to exclusive-or pixels into background
spriteChunks[chunkIndex][row] = \
"\tlda (SCRATCH0),y\n" + \
"\teor #%%%s\n" % byteSplits[chunkIndex] + \
"\tsta (SCRATCH0),y\n";
cycleCount += 5 + 2 + 6
else:
spriteChunks[chunkIndex][row] = \
"\tlda #%%%s\n" % byteSplits[chunkIndex] + \
"\tsta (SCRATCH0),y\n";
cycleCount += 2 + 6
if black: # Option to write "black-out" pixels instead
spriteChunks[chunkIndex][row] = \
"\tlda #0\n" + \
"\tsta (SCRATCH0),y\n";
cycleCount += 2 + 6
else: # Regular blitting
spriteChunks[chunkIndex][row] = \
"\tlda #%%%s\n" % byteSplits[chunkIndex] + \
"\tsta (SCRATCH0),y\n";
cycleCount += 2 + 6
else:
optimizationCount += 1

View File

@ -7,6 +7,7 @@
objects = {
/* Begin PBXFileReference section */
700F8D261EF8CFA100F103DE /* spritegen0b.s */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.asm; path = spritegen0b.s; sourceTree = "<group>"; };
7018A7A81E883A3A00C57515 /* spriteBuffers.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = spriteBuffers.s; sourceTree = "<group>"; };
701B5E0E1D84810000E6D330 /* macros.s */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.asm; path = macros.s; sourceTree = "<group>"; };
701B5E0F1D84810000E6D330 /* Makefile */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.make; path = Makefile; sourceTree = "<group>"; };
@ -35,6 +36,7 @@
701B5E121D8481C800E6D330 /* hgrtableX.s */,
701B5E171D84824400E6D330 /* hgrtableY.s */,
701B5E131D84820100E6D330 /* spritegen0.s */,
700F8D261EF8CFA100F103DE /* spritegen0b.s */,
701B5E141D84823300E6D330 /* spritegen1.s */,
701B5E151D84823300E6D330 /* spritegen2.s */,
701B5E161D84823300E6D330 /* spritegen3.s */,

View File

@ -192,72 +192,73 @@ restoreBackground_smc5:
; PARAM0: X pos
; PARAM1: Y pos
;
; Assumes 4-byte-wide, 8px-high sprites
; Assumes 6-byte-wide, 8px-high sprites
; 1099 cycles per call
;
BlackRect:
SAVE_AX
lda #0
pha
SAVE_AX ; 6
lda #0 ; 2
pha ; 3 9 setup
blackRect_loop:
clc
pla
pha
adc PARAM1 ; Calculate Y line
tax
clc ; 2
pla ; 4
pha ; 3
adc PARAM1 ; 3 ; Calculate Y line
tax ; 2
lda HGRROWS_H1,x ; Compute hires row
sta blackRect_smc0+2
sta blackRect_smc1+2
sta blackRect_smc2+2
sta blackRect_smc3+2
sta blackRect_smc4+2
sta blackRect_smc5+2
lda HGRROWS_L,x
sta blackRect_smc0+1
sta blackRect_smc1+1
sta blackRect_smc2+1
sta blackRect_smc3+1
sta blackRect_smc4+1
sta blackRect_smc5+1
ldx PARAM0 ; Compute hires column
lda DIV7_2,x
tax
lda HGRROWS_H1,x ; 4 ; Compute hires row
sta blackRect_smc0+2 ; 4
sta blackRect_smc1+2 ; 4
sta blackRect_smc2+2 ; 4
sta blackRect_smc3+2 ; 4
sta blackRect_smc4+2 ; 4
sta blackRect_smc5+2 ; 4
lda HGRROWS_L,x ; 4
sta blackRect_smc0+1 ; 4
sta blackRect_smc1+1 ; 4
sta blackRect_smc2+1 ; 4
sta blackRect_smc3+1 ; 4
sta blackRect_smc4+1 ; 4
sta blackRect_smc5+1 ; 4
ldx PARAM0 ; 3 ; Compute hires column
lda DIV7_2,x ; 4
tax ; 2
; 79
blackRect_smc0:
stz $2000,x
inx
stz $2000,x ; 5
inx ; 2
blackRect_smc1:
stz $2000,x
inx
stz $2000,x ; 5
inx ; 2
blackRect_smc2:
stz $2000,x
inx
stz $2000,x ; 5
inx ; 2
blackRect_smc3:
stz $2000,x
inx
stz $2000,x ; 5
inx ; 2
blackRect_smc4:
stz $2000,x
inx
stz $2000,x ; 5
inx ; 2
blackRect_smc5:
stz $2000,x
stz $2000,x ; 5
pla
inc
pha
pla ; 4
inc ;2
pha ; 3
cmp #8
bne blackRect_loop
cmp #8 ; 2
bne blackRect_loop ; 2 134 per row
pla
RESTORE_AX
rts
pla ; 4
RESTORE_AX ; 8
rts ; 6 18 cleanup
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Binary file not shown.

View File

@ -30,9 +30,11 @@ SCRATCH1 = $1a
SPRITEPTR_L = $1b
SPRITEPTR_H = $1c
MAXSPRITEINDEX = 5 ; Sprite count - 1
MAXPOSX = 127 ; This demo doesn't wanna do 16 bit math
MAXPOSY = 127
MAXSPRITEINDEX = 3 ; Sprite count - 1
MAXPOSX = 127 ; This demo doesn't wanna do 16 bit math
MAXPOSY = 127
MAXLOCALBATCHINDEX = 3 ; Sprites in batch - 1
MAXBATCHINDEX = 0 ; Number of batches - 1
; Macros
.macro BLITBYTE xPos,yPos,addr
@ -75,39 +77,43 @@ main:
mainLoop:
; Draw sprites
renderLoop:
; Find our sprite pointer
lda spriteNum
asl
tax
lda META_BUFFERS+1,x
sta SPRITEPTR_H
lda META_BUFFERS,x
sta SPRITEPTR_L
lda spriteNum ; 4
asl ; 2
tax ; 2
lda META_BUFFERS+1,x ; 4
sta SPRITEPTR_H ; 3
lda META_BUFFERS,x ; 4
sta SPRITEPTR_L ; 3
; Find Y coordinate
ldy #1
lda (SPRITEPTR_L),y
sta PARAM1
ldy #1 ; 2
lda (SPRITEPTR_L),y ; 5
sta PARAM1 ; 3
; Find X coordinate
ldy #0
lda (SPRITEPTR_L),y
sta PARAM0
ldy #0 ; 2
lda (SPRITEPTR_L),y ; 5
sta PARAM0 ; 3
jsr BOXW_MAG
jsr BOXW_MAG ; 6 48 cycles overhead to here
; Next sprite
dec spriteNum
bmi restartList
jmp renderLoop
dec spriteNum ; 6
dec batchLocalIndex ; 6
bmi restartList ; 2
jmp renderLoop ; 3 65 cycles overhead per sprite
restartList:
lda #MAXSPRITEINDEX
lda batchMaxIndex
sta spriteNum
lda #MAXLOCALBATCHINDEX
sta batchLocalIndex
; jmp batchLoop
VBL_SYNC
@ -133,17 +139,21 @@ backgroundLoop:
lda (SPRITEPTR_L),y
sta PARAM0
jsr BlackRect
jsr BLACK
; Next sprite
dec spriteNum
dec batchLocalIndex
bmi backgroundRestartList
jmp backgroundLoop
jmp backgroundLoop ; 65 cycles overhead per rect
backgroundRestartList:
lda #MAXSPRITEINDEX
sta spriteNum
jmp mainLoop ; Skip movement
; lda batchMaxIndex
; sta spriteNum
lda #MAXLOCALBATCHINDEX
sta batchLocalIndex
jmp batchLoop ; Skip movement
movementLoop:
; Find our sprite pointer
@ -204,9 +214,18 @@ flipY:
bra continueMovementList
movementRestartList:
batchLoop:
dec batchIndex
bpl batchContinue
lda #MAXBATCHINDEX
sta batchIndex
lda #MAXSPRITEINDEX
sta spriteNum
jmp renderLoop
batchContinue:
jmp mainLoop
rts
@ -242,6 +261,14 @@ delayShortInner:
spriteNum:
.byte MAXSPRITEINDEX
batchIndex:
.byte MAXBATCHINDEX
batchMaxIndex:
.byte MAXSPRITEINDEX
batchLocalIndex:
.byte MAXLOCALBATCHINDEX
bgFilename:
.byte "KOL",0
@ -250,6 +277,7 @@ bgFilename:
.include "hgrtableY.s"
.include "spriteBuffers.s"
.include "spritegen0.s"
.include "spritegen0b.s"
;.include "spritegen1.s"
;.include "spritegen2.s"
;.include "spritegen3.s"

View File

@ -29,94 +29,94 @@ META_BUFFERS:
.addr metaBuffer20
metaBuffer0:
.byte 80 ; X pos
.byte 116 ; Y pos
.byte 0 ; X pos
.byte 0 ;116 ; Y pos
.byte -1 ; X vec
.byte -3 ; Y vec
.byte -1 ; Y vec
metaBuffer1:
.byte 64 ; X pos
.byte 126 ; Y pos
.byte 4 ; X vec
.byte 3 ; Y vec
.byte 20 ; X pos
.byte 0; 126 ; Y pos
.byte 1 ; X vec
.byte 1 ; Y vec
metaBuffer2:
.byte 42 ; X pos
.byte 67 ; Y pos
.byte -3 ; X vec
.byte -3 ; Y vec
.byte 40 ; X pos
.byte 0; 67 ; Y pos
.byte -1 ; X vec
.byte -1 ; Y vec
metaBuffer3:
.byte 72 ; X pos
.byte 70 ; Y pos
.byte -3 ; X vec
.byte 5 ; Y vec
.byte 60 ; X pos
.byte 0; 70 ; Y pos
.byte -1 ; X vec
.byte 1 ; Y vec
metaBuffer4:
.byte 75 ; X pos
.byte 49 ; Y pos
.byte -5 ; X vec
.byte 3 ; Y vec
.byte 80 ; X pos
.byte 0; 49 ; Y pos
.byte -1 ; X vec
.byte 1 ; Y vec
metaBuffer5:
.byte 42 ; X pos
.byte 101 ; Y pos
.byte 4 ; X vec
.byte 2 ; Y vec
.byte 100 ; X pos
.byte 0; 101 ; Y pos
.byte 1 ; X vec
.byte 1 ; Y vec
metaBuffer6:
.byte 40 ; X pos
.byte 74 ; Y pos
.byte 4 ; X vec
.byte -5 ; Y vec
.byte 120 ; X pos
.byte 0; 74 ; Y pos
.byte 1 ; X vec
.byte -1 ; Y vec
metaBuffer7:
.byte 71 ; X pos
.byte 58 ; Y pos
.byte 5 ; X vec
.byte 5 ; Y vec
.byte 0; 58 ; Y pos
.byte 2 ; X vec
.byte 2 ; Y vec
metaBuffer8:
.byte 20 ; X pos
.byte 51 ; Y pos
.byte -5 ; X vec
.byte 3 ; Y vec
.byte 0; 51 ; Y pos
.byte -2 ; X vec
.byte 1 ; Y vec
metaBuffer9:
.byte 74 ; X pos
.byte 61 ; Y pos
.byte 0; 61 ; Y pos
.byte 1 ; X vec
.byte -2 ; Y vec
metaBuffer10:
.byte 115 ; X pos
.byte 86 ; Y pos
.byte 5 ; X vec
.byte 2 ; X vec
.byte -2 ; Y vec
metaBuffer11:
.byte 104 ; X pos
.byte 53 ; Y pos
.byte -5 ; X vec
.byte -2 ; X vec
.byte 1 ; Y vec
metaBuffer12:
.byte 21 ; X pos
.byte 2 ; Y pos
.byte 1 ; X vec
.byte 3 ; Y vec
.byte 2 ; Y vec
metaBuffer13:
.byte 2 ; X pos
.byte 127 ; Y pos
.byte 1 ; X vec
.byte -5 ; Y vec
.byte -2 ; Y vec
metaBuffer14:
.byte 97 ; X pos
.byte 123 ; Y pos
.byte 1 ; X vec
.byte -5 ; Y vec
.byte -2 ; Y vec
metaBuffer15:
.byte 5 ; X pos
@ -127,13 +127,13 @@ metaBuffer15:
metaBuffer16:
.byte 4 ; X pos
.byte 40 ; Y pos
.byte -3 ; X vec
.byte -1 ; X vec
.byte 4 ; Y vec
metaBuffer17:
.byte 43 ; X pos
.byte 67 ; Y pos
.byte 4 ; X vec
.byte 1 ; X vec
.byte -1 ; Y vec
metaBuffer18:
@ -145,8 +145,8 @@ metaBuffer18:
metaBuffer19:
.byte 75 ; X pos
.byte 4 ; Y pos
.byte -3 ; X vec
.byte -5 ; Y vec
.byte -1 ; X vec
.byte -2 ; Y vec
metaBuffer20:
.byte 61 ; X pos

1210
spritegen0b.s Normal file

File diff suppressed because it is too large Load Diff