diff --git a/docs/source/todo.rst b/docs/source/todo.rst index 4c7b4b80f..6032da960 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -2,6 +2,9 @@ TODO ==== +- optimize word multiplication if the constant multiplier is a multiple of 256 +- move current rnd() to fastrnd() and add new rnd() based on the 16-bits rndw() for better results + - optimize assigning array and struct variables (multi-element assings -> memcopy) - hoist all variable declarations up to the subroutine scope *before* even the constant folding takes place (to avoid undefined symbol errors when referring to a variable from another nested scope in the subroutine) - optimize swap of two memread values with index, using the same pointer expression/variable, like swap(@(ptr+1), @(ptr+2)) diff --git a/examples/cx16/bobs.p8 b/examples/cx16/bobs.p8 new file mode 100644 index 000000000..1d64a9920 --- /dev/null +++ b/examples/cx16/bobs.p8 @@ -0,0 +1,370 @@ +%target cx16 +%import palette +%import conv +%import textio + + +main { + sub start() { + cx16.screen_set_mode(0) + txt.print("\n\n how many sprites does\n the commander x16 have?\n") + sys.wait(180) + txt.print("\n\n the manual says: '128'.\n") + sys.wait(80) + txt.print("\n\n but that's just a manual...\n") + sys.wait(80) + txt.print("\n\n let's find out for ourselves,\n shall we?") + sys.wait(180) + + ; enable bitmap mode 320x240, 1 bpp, only layer 1 + cx16.VERA_DC_VIDEO = (cx16.VERA_DC_VIDEO & %11001111) | %00100000 + cx16.VERA_DC_HSCALE = 64 + cx16.VERA_DC_VSCALE = 64 + cx16.VERA_L1_CONFIG = %00000100 + cx16.VERA_L1_MAPBASE = 0 + cx16.VERA_L1_TILEBASE = 0 + + ; limit display heigth to 200 pixels to have enough vram for 14 backbuffers + const ubyte vstart = 20 + const ubyte vheight = 200 + cx16.VERA_CTRL = %00000010 + cx16.VERA_DC_VSTART = vstart + cx16.VERA_DC_VSTOP = vstart + vheight - 1 + + init_buffers() + palette.set_color(0, $000) + palette.set_color(1, $af8) + + cx16.set_irq(&irq, false) + + repeat { + ; don't exit + %asm {{ + wai + }} + } + } + + const ubyte num_backbuffers = 12 ; there is vram space for 14 backbuffers. reduce to make tighter "loops" + uword num_bobs = 0 + ubyte backbuffer = num_backbuffers-1 + ubyte blitbuffer = 0 + uword anim1 = $0432 + uword anim2 = $0123 + uword anim3 = $4321 + uword anim4 = $8500 + + sub irq() { + + ; palette.set_color(0, $f00) ; debug rastertime + + ; draw 2 bobs per frame to speed up bob count + ubyte vmembase = blitbuffer*4 ; 2048 * 4 per backbuffer + blit(vmembase) + blitbuffer++ + if blitbuffer==num_backbuffers + blitbuffer=0 + vmembase = blitbuffer*4 ; 2048 * 4 per backbuffer + blit(vmembase) + blitbuffer++ + if blitbuffer==num_backbuffers + blitbuffer=0 + + backbuffer++ + if backbuffer==num_backbuffers { + backbuffer=0 + num_bobs+=2 + } + + vmembase = backbuffer*4 ; 2048 * 4 per backbuffer + draw_number(vmembase, num_bobs) + cx16.VERA_L1_TILEBASE = vmembase << 2 ; flip to next backbuffer + + ; palette.set_color(0, $000) + } + + sub init_buffers() { + ; erase all vram + cx16.vaddr(0, 0, 0, true) + repeat $ffff + cx16.VERA_DATA0 = 0 + repeat $f960 + cx16.VERA_DATA0 = 0 + } + + sub blit(ubyte vmembase) { + ubyte bank = vmembase>=32 + uword vmem = vmembase * 2048 ; mkword(vmembase,0) * 8 + uword blit_x = (cos8u(msb(anim1)) as uword) + sin8u(msb(anim2))/5 + ubyte blit_y = sin8u(msb(anim3))/2 + cos8u(msb(anim4))/5 + vmem += blit_x/8 + (blit_y as uword) * 40 + + bitshift(lsb(blit_x) & 7) + + ; left column of the (shifted)sprite + ; TODO don't call vaddr, inline it here + cx16.vaddr(bank, vmem, 0, false) + cx16.VERA_ADDR_H &= 1 + cx16.VERA_ADDR_H |= %10110000 ; increment 40 for read (next line) + cx16.vaddr(bank, vmem, 1, false) + cx16.VERA_ADDR_H &= 1 + cx16.VERA_ADDR_H |= %10110000 ; increment 40 for write (next line) + ubyte ix + for ix in 0 to len(shifted_sprite)-1 step 3 { + ;cx16.VERA_DATA1 = cx16.VERA_DATA0 & shifted_mask[ix] | shifted_sprite[ix] + %asm {{ + ldy ix + lda cx16.VERA_DATA0 + and shifted_mask,y + ora shifted_sprite,y + sta cx16.VERA_DATA1 + }} + } + ; middle column of the (shifted)sprite + cx16.vaddr(bank, vmem+1, 0, false) + cx16.VERA_ADDR_H &= 1 + cx16.VERA_ADDR_H |= %10110000 ; increment 40 for read (next line) + cx16.vaddr(bank, vmem+1, 1, false) + cx16.VERA_ADDR_H &= 1 + cx16.VERA_ADDR_H |= %10110000 ; increment 40 for write (next line) + for ix in 1 to len(shifted_sprite)-1 step 3 { + ;cx16.VERA_DATA1 = cx16.VERA_DATA0 & shifted_mask[ix] | shifted_sprite[ix] + %asm {{ + ldy ix + lda cx16.VERA_DATA0 + and shifted_mask,y + ora shifted_sprite,y + sta cx16.VERA_DATA1 + }} + } + ; right column of the (shifted)sprite + cx16.vaddr(bank, vmem+2, 0, false) + cx16.VERA_ADDR_H &= 1 + cx16.VERA_ADDR_H |= %10110000 ; increment 40 for read (next line) + cx16.vaddr(bank, vmem+2, 1, false) + cx16.VERA_ADDR_H &= 1 + cx16.VERA_ADDR_H |= %10110000 ; increment 40 for write (next line) + for ix in 2 to len(shifted_sprite)-1 step 3 + ;cx16.VERA_DATA1 = cx16.VERA_DATA0 & shifted_mask[ix] | shifted_sprite[ix] + %asm {{ + ldy ix + lda cx16.VERA_DATA0 + and shifted_mask,y + ora shifted_sprite,y + sta cx16.VERA_DATA1 + }} + + anim1 += 217 + anim2 += 190 + anim3 += 222 + anim4 += 195 +; anim1 += 107 +; anim2 += 80 +; anim3 += 122 +; anim4 += 93 + } + + sub bitshift(ubyte shift) { + ubyte yix + ubyte yy + for yy in 0 to 15 { + uword @zp sprw = sprite[yy] + uword @zp maskw = mask[yy] + ubyte @zp sprite_3 = 0 + ubyte @zp mask_3 = 255 + repeat shift { + sprw >>= 1 + ror(sprite_3) + sys.set_carry() + ror(maskw) + ror(mask_3) + } + shifted_sprite[yix] = msb(sprw) + shifted_mask[yix] = msb(maskw) + yix++ + shifted_sprite[yix] = lsb(sprw) + shifted_mask[yix] = lsb(maskw) + yix++ + shifted_sprite[yix] = sprite_3 + shifted_mask[yix] = mask_3 + yix++ + } + } + + sub draw_number(ubyte vmembase, uword number) { + uword vmem = vmembase * 2048 ; mkword(vmembase,0) * 8 + ubyte bank = vmembase>=32 + vmem += 35 + ubyte thousands + ubyte hundreds + ubyte tens + ubyte ones + void conv.uword2decimal(number) + %asm {{ + lda conv.uword2decimal.decThousands + and #15 + sta thousands + lda conv.uword2decimal.decHundreds + and #15 + sta hundreds + lda conv.uword2decimal.decTens + and #15 + sta tens + lda conv.uword2decimal.decOnes + and #15 + sta ones + }} + + uword pixelsptr = &numberpixels + thousands*7 + ubyte pix + cx16.vaddr(bank, vmem, 0, false) + cx16.VERA_ADDR_H &= 1 + cx16.VERA_ADDR_H |= %10110000 ; increment 40 for read (next line) + for pix in 0 to 6 + cx16.VERA_DATA0 = pixelsptr[pix] + vmem++ + cx16.vaddr(bank, vmem, 0, false) + cx16.VERA_ADDR_H &= 1 + cx16.VERA_ADDR_H |= %10110000 ; increment 40 for read (next line) + pixelsptr = &numberpixels + hundreds*7 + for pix in 0 to 6 + cx16.VERA_DATA0 = pixelsptr[pix] + vmem++ + cx16.vaddr(bank, vmem, 0, false) + cx16.VERA_ADDR_H &= 1 + cx16.VERA_ADDR_H |= %10110000 ; increment 40 for read (next line) + pixelsptr = &numberpixels + tens*7 + for pix in 0 to 6 + cx16.VERA_DATA0 = pixelsptr[pix] + vmem++ + cx16.vaddr(bank, vmem, 0, false) + cx16.VERA_ADDR_H &= 1 + cx16.VERA_ADDR_H |= %10110000 ; increment 40 for read (next line) + pixelsptr = &numberpixels + ones*7 + for pix in 0 to 6 + cx16.VERA_DATA0 = pixelsptr[pix] + } + + ubyte[10*7] numberpixels = [ + %00111000, + %01000100, + %10000100, + %10000100, + %10000100, + %01111000, + %00000000, + + %00010000, + %00110000, + %01010000, + %00010000, + %00010000, + %01111100, + %00000000, + + %01111000, + %10000100, + %00011000, + %00110000, + %01100000, + %11111100, + %00000000, + + %01111000, + %00000100, + %00111000, + %00000100, + %00000100, + %11111000, + %00000000, + + %00010100, + %00100100, + %01000100, + %11111100, + %00000100, + %00000100, + %00000000, + + %11111000, + %10000000, + %11111000, + %00000100, + %00000100, + %11111000, + %00000000, + + %01111000, + %10000000, + %11111000, + %10000100, + %10000100, + %01111000, + %00000000, + + %11111100, + %00001000, + %00010000, + %00010000, + %00010000, + %00010000, + %00000000, + + %01111000, + %10000100, + %01111000, + %10000100, + %10000100, + %01111000, + %00000000, + + %01111000, + %10000100, + %01111100, + %00000100, + %10000100, + %01111000, + %00000000 + ] + + uword[16] sprite = [ + %0000000000000000, + %0110001110000000, + %0101001001000000, + %0100111001000000, + %0100000000100000, + %0101001000100000, + %0101001000110000, + %0100100000101000, + %0101111000101000, + %0010000001010100, + %0001111110010100, + %0001000000010000, + %0001000000010000, + %0001010111010000, + %0001101100110000, + %0000000000000000 + ] + + uword[16] mask = [ + %1111111111111111, + %1000000001111111, + %1000000000111111, + %1000000000111111, + %1000000000011111, + %1000000000011111, + %1000000000001111, + %1000000000000111, + %1000000000000111, + %1100000000001011, + %1110000000001011, + %1110000000001111, + %1110000000001111, + %1110000000001111, + %1110010011001111, + %1111111111111111 + ] + + ubyte[16*3] shifted_sprite + ubyte[16*3] shifted_mask +}