diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt index b87bd1a83..04637c475 100644 --- a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt +++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt @@ -279,20 +279,61 @@ internal class AssignmentAsmGen(private val program: PtProgram, is PtPrefix -> { if(assign.target.array==null) { if(assign.source.datatype==assign.target.datatype) { - // First assign the value to the target then apply the operator in place on the target. - // This saves a temporary variable - translateNormalAssignment( - AsmAssignment( - AsmAssignSource.fromAstSource(value.value, program, asmgen), - assign.target, program.memsizer, assign.position - ), scope - ) - when (value.operator) { - "+" -> {} - "-" -> inplaceNegate(assign, true, scope) - "~" -> inplaceInvert(assign, scope) - "not" -> throw AssemblyError("not should have been replaced in the Ast by ==0") - else -> throw AssemblyError("invalid prefix operator") + if(assign.source.datatype in IntegerDatatypes) { + val signed = assign.source.datatype in SignedDatatypes + if(assign.source.datatype in ByteDatatypes) { + assignExpressionToRegister(value.value, RegisterOrPair.A, signed) + when(value.operator) { + "+" -> {} + "-" -> { + if(asmgen.isTargetCpu(CpuType.CPU65c02)) + asmgen.out(" eor #255 | ina") + else + asmgen.out(" eor #255 | clc | adc #1") + } + "~" -> asmgen.out(" eor #255") + "not" -> throw AssemblyError("not should have been replaced in the Ast by ==0") + else -> throw AssemblyError("invalid prefix operator") + } + assignRegisterByte(assign.target, CpuRegister.A, signed) + } else { + assignExpressionToRegister(value.value, RegisterOrPair.AY, signed) + when(value.operator) { + "+" -> {} + "-" -> { + asmgen.out(""" + sec + eor #255 + adc #0 + pha + tya + eor #255 + adc #0 + tay + pla""") + } + "~" -> asmgen.out(" pha | tya | eor #255 | tay | pla | eor #255") + "not" -> throw AssemblyError("not should have been replaced in the Ast by ==0") + else -> throw AssemblyError("invalid prefix operator") + } + assignRegisterpairWord(assign.target, RegisterOrPair.AY) + } + } else { + // First assign the value to the target then apply the operator in place on the target. + // This saves a temporary variable + translateNormalAssignment( + AsmAssignment( + AsmAssignSource.fromAstSource(value.value, program, asmgen), + assign.target, program.memsizer, assign.position + ), scope + ) + when (value.operator) { + "+" -> {} + "-" -> inplaceNegate(assign, true, scope) + "~" -> inplaceInvert(assign, scope) + "not" -> throw AssemblyError("not should have been replaced in the Ast by ==0") + else -> throw AssemblyError("invalid prefix operator") + } } } else { // use a temporary variable @@ -3419,11 +3460,18 @@ internal class AssignmentAsmGen(private val program: PtProgram, DataType.BYTE -> { when (target.kind) { TargetStorageKind.VARIABLE -> { - asmgen.out(""" - lda #0 - sec - sbc ${target.asmVarname} - sta ${target.asmVarname}""") + if(asmgen.isTargetCpu(CpuType.CPU65c02)) + asmgen.out(""" + lda ${target.asmVarname} + eor #255 + ina + sta ${target.asmVarname}""") + else + asmgen.out(""" + lda #0 + sec + sbc ${target.asmVarname} + sta ${target.asmVarname}""") } TargetStorageKind.REGISTER -> { when(target.register!!) { @@ -3432,7 +3480,6 @@ internal class AssignmentAsmGen(private val program: PtProgram, asmgen.out(" eor #255 | ina") else asmgen.out(" eor #255 | clc | adc #1") - } RegisterOrPair.X -> asmgen.out(" txa | eor #255 | tax | inx") RegisterOrPair.Y -> asmgen.out(" tya | eor #255 | tay | iny") diff --git a/compiler/res/prog8lib/cx16/gfx2.p8 b/compiler/res/prog8lib/cx16/gfx2.p8 index 927eb5e4f..550a25b1f 100644 --- a/compiler/res/prog8lib/cx16/gfx2.p8 +++ b/compiler/res/prog8lib/cx16/gfx2.p8 @@ -13,13 +13,14 @@ ; SCREEN MODE LIST: ; mode 0 = reset back to default text mode ; mode 1 = bitmap 320 x 240 monochrome -; mode 2 = bitmap 320 x 240 x 4c (TODO not yet implemented) -; mode 3 = bitmap 320 x 240 x 16c (TODO not yet implemented) +; mode 2 = bitmap 320 x 240 x 4c (not yet implemented: just use 256c, there's enough vram for that) +; mode 3 = bitmap 320 x 240 x 16c (not yet implemented: just use 256c, there's enough vram for that) ; mode 4 = bitmap 320 x 240 x 256c (like SCREEN $80 but using this api instead of kernal) ; mode 5 = bitmap 640 x 480 monochrome ; mode 6 = bitmap 640 x 480 x 4c ; higher color dephts in highres are not supported due to lack of VRAM +; TODO remove the phx/plx pairs in non-stack compiler version gfx2 { @@ -46,7 +47,7 @@ gfx2 { height = 240 bpp = 1 } - ; TODO modes 2, 3 not yet implemented + ; TODO modes 2, 3 4 -> { ; lores 256c cx16.VERA_DC_VIDEO = (cx16.VERA_DC_VIDEO & %11001111) | %00100000 ; enable only layer 1 @@ -109,7 +110,7 @@ gfx2 { repeat 240/2/8 cs_innerloop640() } - ; TODO mode 2, 3 + ; TODO modes 2, 3 4 -> { ; lores 256c repeat 240/2 @@ -239,8 +240,7 @@ _done }} } 6 -> { - ; highres 4c - ; TODO also mostly usable for lores 4c? + ; highres 4c ....also mostly usable for mode 2, lores 4c? color &= 3 ubyte[4] colorbits ubyte ii @@ -594,7 +594,7 @@ _done }} } } - ; TODO mode 2,3 + ; TODO modes 2, 3 4 -> { ; lores 256c void addr_mul_24_for_lores_256c(y, x) ; 24 bits result is in r0 and r1L (highest byte) @@ -646,8 +646,7 @@ _done } } 6 -> { - ; highres 4c - ; TODO also mostly usable for lores 4c? + ; highres 4c ....also mostly usable for mode 2, lores 4c? void addr_mul_24_for_highres_4c(y, x) ; 24 bits result is in r0 and r1L (highest byte) cx16.r2L = lsb(x) & 3 ; xbits ; color &= 3 @@ -701,7 +700,7 @@ _done + }} } - ; TODO mode 2 and 3 + ; TODO modes 2, 3 4 -> { ; lores 256c void addr_mul_24_for_lores_256c(y, x) ; 24 bits result is in r0 and r1L (highest byte) @@ -875,19 +874,18 @@ skip: } sub position(uword @zp x, uword y) { - ubyte bank when active_mode { 1 -> { ; lores monochrome cx16.r0 = y*(320/8) + x/8 cx16.vaddr(0, cx16.r0, 0, 1) } - ; TODO modes 2,3 + ; TODO modes 2, 3 4 -> { ; lores 256c void addr_mul_24_for_lores_256c(y, x) ; 24 bits result is in r0 and r1L (highest byte) - bank = lsb(cx16.r1) - cx16.vaddr(bank, cx16.r0, 0, 1) + cx16.r2L = cx16.r1L + cx16.vaddr(cx16.r2L, cx16.r0, 0, 1) } 5 -> { ; highres monochrome @@ -897,24 +895,16 @@ skip: 6 -> { ; highres 4c void addr_mul_24_for_highres_4c(y, x) ; 24 bits result is in r0 and r1L (highest byte) - bank = lsb(cx16.r1) - cx16.vaddr(bank, cx16.r0, 0, 1) + cx16.r2L = cx16.r1L + cx16.vaddr(cx16.r2L, cx16.r0, 0, 1) } } } sub position2(uword @zp x, uword y, bool also_port_1) { position(x, y) - if also_port_1 { - when active_mode { - 1, 5 -> cx16.vaddr(0, cx16.r0, 1, 1) - ; TODO modes 2, 3 - 4, 6 -> { - ubyte bank = lsb(cx16.r1) - cx16.vaddr(bank, cx16.r0, 1, 1) - } - } - } + if also_port_1 + cx16.vaddr_clone(0) } inline asmsub next_pixel(ubyte color @A) { @@ -986,48 +976,96 @@ skip: sub text(uword @zp x, uword y, ubyte color, uword sctextptr) { ; -- Write some text at the given pixel position. The text string must be in screencode encoding (not petscii!). ; You must also have called text_charset() first to select and prepare the character set to use. - ; NOTE: in monochrome (1bpp) screen modes, x position is currently constrained to multiples of 8 ! TODO allow per-pixel horizontal positioning - ; TODO draw whole horizontal spans using vera auto increment if possible, instead of per-character columns uword chardataptr + ubyte[8] @shared char_bitmap_bytes_left + ubyte[8] @shared char_bitmap_bytes_right + when active_mode { 1, 5 -> { ; monochrome mode, either resolution - cx16.r2 = 40 - if active_mode==5 - cx16.r2 = 80 - while @(sctextptr) { - chardataptr = charset_addr + (@(sctextptr) as uword)*8 - cx16.vaddr(charset_bank, chardataptr, 1, 1) - position(x,y) + cx16.r3 = sctextptr + while @(cx16.r3) { + chardataptr = charset_addr + @(cx16.r3) * $0008 + ; copy the character bitmap into RAM + cx16.vaddr_autoincr(charset_bank, chardataptr, 0, 1) %asm {{ - lda cx16.VERA_ADDR_H - and #%111 ; don't auto-increment, we have to do that manually because of the ora - sta cx16.VERA_ADDR_H - lda color + ; pre-shift the bits + phx ; TODO remove in non-stack version + lda text.x + and #7 sta P8ZP_SCRATCH_B1 - ldy #8 -- lda P8ZP_SCRATCH_B1 - bne + ; white color, plot normally - lda cx16.VERA_DATA1 - eor #255 ; black color, keep only the other pixels - and cx16.VERA_DATA0 - bra ++ -+ lda cx16.VERA_DATA0 - ora cx16.VERA_DATA1 -+ sta cx16.VERA_DATA0 - lda cx16.VERA_ADDR_L - clc - adc cx16.r2 - sta cx16.VERA_ADDR_L - bcc + - inc cx16.VERA_ADDR_M -+ inc x - bne + - inc x+1 -+ dey + ldy #0 +- lda cx16.VERA_DATA0 + stz P8ZP_SCRATCH_REG + ldx P8ZP_SCRATCH_B1 + cpx #0 + beq + +- lsr a + ror P8ZP_SCRATCH_REG + dex bne - ++ sta char_bitmap_bytes_left,y + lda P8ZP_SCRATCH_REG + sta char_bitmap_bytes_right,y + iny + cpy #8 + bne -- + plx ; TODO remove in non-stack version }} - sctextptr++ + ; left part of shifted char + position2(x, y, true) + set_autoincrs_mode1_or_5() + if color { + %asm {{ + ldy #0 +- lda char_bitmap_bytes_left,y + ora cx16.VERA_DATA1 + sta cx16.VERA_DATA0 + iny + cpy #8 + bne - + }} + } else { + %asm {{ + ldy #0 +- lda char_bitmap_bytes_left,y + eor #255 + and cx16.VERA_DATA1 + sta cx16.VERA_DATA0 + iny + cpy #8 + bne - + }} + } + ; right part of shifted char + if lsb(x) & 7 { + position2(x+8, y, true) + set_autoincrs_mode1_or_5() + if color { + %asm {{ + ldy #0 + - lda char_bitmap_bytes_right,y + ora cx16.VERA_DATA1 + sta cx16.VERA_DATA0 + iny + cpy #8 + bne - + }} + } else { + %asm {{ + ldy #0 + - lda char_bitmap_bytes_right,y + eor #255 + and cx16.VERA_DATA1 + sta cx16.VERA_DATA0 + iny + cpy #8 + bne - + }} + } + } + cx16.r3++ + x += 8 } } 4 -> { @@ -1061,30 +1099,80 @@ skip: ; hires 4c ; we're going to use a few cx16 registers to make sure every variable is in zeropage in the inner loop. cx16.r11L = color - cx16.r8 = y while @(sctextptr) { chardataptr = charset_addr + (@(sctextptr) as uword)*8 cx16.vaddr(charset_bank, chardataptr, 1, true) ; for reading the chardata from Vera data channel 1 + position(x, y) ; only calculated once, we update vera address in the loop instead + cx16.VERA_ADDR_H &= $0f ; no auto increment repeat 8 { - ; TODO rewrite this inner loop partly in assembly: - ; requires expanding the charbits to 2-bits per pixel (based on color) - ; also it's way more efficient to draw whole horizontal spans instead of per-character - cx16.r9L = cx16.VERA_DATA1 ; get the next 8 horizontal character bits + cx16.r10L = cx16.VERA_DATA1 ; get the next 8 horizontal character bits cx16.r7 = x repeat 8 { - cx16.r9L <<= 1 - if_cs - plot(cx16.r7, cx16.r8, cx16.r11L) + cx16.r10L <<= 1 + if_cs { + cx16.r2L = cx16.r7L & 3 ; xbits + when cx16.r11L & 3 { + 1 -> cx16.r12L = gfx2.plot.shiftedleft_4c_1[cx16.r2L] + 2 -> cx16.r12L = gfx2.plot.shiftedleft_4c_2[cx16.r2L] + 3 -> cx16.r12L = gfx2.plot.shiftedleft_4c_3[cx16.r2L] + else -> cx16.r12L = 0 + } + cx16.VERA_DATA0 = cx16.VERA_DATA0 & gfx2.plot.mask4c[cx16.r2L] | cx16.r12L + } cx16.r7++ + if (cx16.r7 & 3) == 0 { + ; increment the pixel address by one + %asm {{ + stz cx16.VERA_CTRL + clc + lda cx16.VERA_ADDR_L + adc #1 + sta cx16.VERA_ADDR_L + lda cx16.VERA_ADDR_M + adc #0 + sta cx16.VERA_ADDR_M + lda cx16.VERA_ADDR_H + adc #0 + sta cx16.VERA_ADDR_H + }} + } } - cx16.r8++ + + ; increment pixel address to the next line + %asm {{ + stz cx16.VERA_CTRL + clc + lda cx16.VERA_ADDR_L + adc #(640-8)/4 + sta cx16.VERA_ADDR_L + lda cx16.VERA_ADDR_M + adc #0 + sta cx16.VERA_ADDR_M + lda cx16.VERA_ADDR_H + adc #0 + sta cx16.VERA_ADDR_H + }} } x+=8 - cx16.r8-=8 sctextptr++ } } } + + sub set_autoincrs_mode1_or_5() { + ; set autoincrements to go to next pixel row (40 or 80 increment) + if active_mode==1 { + cx16.VERA_CTRL = 0 + cx16.VERA_ADDR_H = cx16.VERA_ADDR_H & $0f | (11<<4) + cx16.VERA_CTRL = 1 + cx16.VERA_ADDR_H = cx16.VERA_ADDR_H & $0f | (11<<4) + } else { + cx16.VERA_CTRL = 0 + cx16.VERA_ADDR_H = cx16.VERA_ADDR_H & $0f | (12<<4) + cx16.VERA_CTRL = 1 + cx16.VERA_ADDR_H = cx16.VERA_ADDR_H & $0f | (12<<4) + } + } } asmsub cs_innerloop640() clobbers(Y) { diff --git a/compiler/res/prog8lib/cx16/syslib.p8 b/compiler/res/prog8lib/cx16/syslib.p8 index afe508a0b..bd2821a62 100644 --- a/compiler/res/prog8lib/cx16/syslib.p8 +++ b/compiler/res/prog8lib/cx16/syslib.p8 @@ -554,6 +554,25 @@ asmsub vaddr(ubyte bank @A, uword address @R0, ubyte addrsel @R1, byte autoIncrO }} } +asmsub vaddr_clone(ubyte port @A) clobbers (A,X,Y) { + ; -- clones Vera addresses from the given source port to the other one. + ; leaves CTRL on the destination port. + %asm {{ + sta VERA_CTRL + ldx VERA_ADDR_L + ldy VERA_ADDR_H + phy + ldy VERA_ADDR_M + eor #1 + sta VERA_CTRL + stx VERA_ADDR_L + sty VERA_ADDR_M + ply + sty VERA_ADDR_H + rts + }} +} + asmsub vaddr_autoincr(ubyte bank @A, uword address @R0, ubyte addrsel @R1, uword autoIncrAmount @R2) clobbers(A,Y) { ; -- setup the VERA's data address register 0 or 1 ; including setting up optional auto increment amount. diff --git a/docs/source/programming.rst b/docs/source/programming.rst index 05aa391f2..289c64f90 100644 --- a/docs/source/programming.rst +++ b/docs/source/programming.rst @@ -324,7 +324,8 @@ This way you can set the second character on the second row from the top like th An uword variable can be used in limited scenarios as a 'pointer' to a byte in memory at a specific, dynamic, location. You can use array indexing on a pointer variable to use it as a byte array at a dynamic location in memory: currently this is equivalent to directly referencing the bytes in -memory at the given index. See also :ref:`pointervars_programming` +memory at the given index. In contrast to a real array variable, the index value can be the size of a word. +See also :ref:`pointervars_programming` **LSB/MSB split word arrays:** For (u)word arrays, you can make the compiler layout the array in memory as two separate arrays, @@ -445,7 +446,7 @@ without defining a memory mapped location, you can do so by enclosing the addres This is the official syntax to 'dereference a pointer' as it is often named in other languages. You can actually also use the array indexing notation for this. It will be silently converted into -the direct memory access expression as explained above. Note that this also means that unlike regular arrays, +the direct memory access expression as explained above. Note that unlike regular arrays, the index is not limited to an ubyte value. You can use a full uword to index a pointer variable like this:: pointervar[999] = 0 ; set memory byte to zero at location pointervar + 999. diff --git a/docs/source/syntaxreference.rst b/docs/source/syntaxreference.rst index 1f7bf2c8e..0d70806e3 100644 --- a/docs/source/syntaxreference.rst +++ b/docs/source/syntaxreference.rst @@ -414,7 +414,8 @@ directly access the memory. Enclose a numeric expression or literal with ``@(... @($d020) = 0 ; set the c64 screen border to black ("poke 53280,0") @(vic+$20) = 6 ; a dynamic expression to 'calculate' the address -The array indexing notation on a uword 'pointer variable' is syntactic sugar for such a direct memory access expression:: +The array indexing notation on a uword 'pointer variable' is syntactic sugar for such a direct memory access expression, +and the index value can be larger than a byte in this case:: pointervar[999] = 0 ; equivalent to @(pointervar+999) = 0 @@ -466,7 +467,7 @@ Syntax is familiar with brackets: ``arrayvar[x]`` :: Note: you can also use array indexing on a 'pointer variable', which is basically an uword variable containing a memory address. Currently this is equivalent to directly referencing the bytes in -memory at the given index. See :ref:`pointervars` +memory at the given index (and allows index values of word size). See :ref:`pointervars` String ^^^^^^ diff --git a/docs/source/todo.rst b/docs/source/todo.rst index badf159fd..8fd4a6dba 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -1,7 +1,8 @@ TODO ==== -- IR: reduce the number of branch instructions (gradually), replace with CMP(I) + status branch instruction +- IR: reduce the number of branch instructions such as BEQ, BEQR, etc (gradually), replace with CMP(I) + status branch instruction +- IR: reduce amount of CMP/CMPI after instructions that set the status bits correctly (LOADs? INC? etc), but only after setting the status bits is verified! ... @@ -32,7 +33,6 @@ Compiler: - ir: the @split arrays are currently also split in _lsb/_msb arrays in the IR, and operations take multiple (byte) instructions that may lead to verbose and slow operation and machine code generation down the line. - ir: for expressions with array indexes that occur multiple times, can we avoid loading them into new virtualregs everytime and just reuse a single virtualreg as indexer? (simple form of common subexpression elimination) - PtAst/IR: more complex common subexpression eliminations -- can we get rid of pieces of asmgen.AssignmentAsmGen by just reusing the AugmentableAssignment ? generated code should not suffer - [problematic due to using 64tass:] better support for building library programs, where unused .proc shouldn't be deleted from the assembly? Perhaps replace all uses of .proc/.pend/.endproc by .block/.bend will fix that with a compiler flag? But all library code written in asm uses .proc already..... (textual search/replace when writing the actual asm?) @@ -46,8 +46,6 @@ Libraries: - fix the problems in atari target, and flesh out its libraries. - c128 target: make syslib more complete (missing kernal routines)? - c64: make the graphics.BITMAP_ADDRESS configurable (VIC banking) -- optimize several inner loops in gfx2 even further? -- actually implement modes 3 and perhaps even 2 to gfx2 (lores 16 color and 4 color) Optimizations: @@ -64,15 +62,8 @@ STRUCTS again? What if we were to re-introduce Structs in prog8? Some thoughts: - can contain only numeric types (byte,word,float) - no nested structs, no reference types (strings, arrays) inside structs -- is just some syntactic sugar for a scoped set of variables -> ast transform to do exactly this before codegen. Codegen doesn't know about struct. -- no arrays of struct -- because too slow on 6502 to access those, rather use struct of arrays instead. - can we make this a compiler/codegen only issue? i.e. syntax is just as if it was an array of structs? - or make it explicit in the syntax so that it is clear what the memory layout of it is. -- ability to assign struct variable to another? this is slow but can be quite handy sometimes. - however how to handle this in a function that gets the struct passed as reference? Don't allow it there? (there's no pointer dereferencing concept in prog8) -- ability to be passed as argument to a function (by reference)? - however there is no typed pointer in prog8 at the moment so this can't be implemented in a meaningful way yet, - because there is no way to reference it as the struct type again. (current ast gets the by-reference parameter - type replaced by uword) - So-- maybe don't replace the parameter type in the ast? Should fix that for str and array types as well then - +- only as a reference type (uword pointer). This removes a lot of the problems related to introducing a variable length value type. +- arrays of struct is just an array of uword pointers. Can even be @split? +- need to introduce typed pointer datatype in prog8 +- str is then syntactic sugar for pointer to character/byte? +- arrays are then syntactic sugar for pointer to byte/word/float? diff --git a/examples/cx16/amiga.p8 b/examples/cx16/amiga.p8 index e3a9a13d2..605a72b4f 100644 --- a/examples/cx16/amiga.p8 +++ b/examples/cx16/amiga.p8 @@ -175,7 +175,7 @@ widget { const ubyte height = 11 widget.highlightedrect(x+widget.window_close_icon.width, y, width-64, height, true, active) gfx2.plot(x+widget.window_close_icon.width, y+height-1, 1) ; correct bottom left corner - gfx2.text(x+32, y+1, 1, titlestr) + gfx2.text(x+26, y+1, 1, titlestr) widget.window_close_icon(x, y, active) widget.window_order_icon(x+width-22, y, active) widget.window_flipsize_icon(x+width-44, y, active) diff --git a/gradle.properties b/gradle.properties index e9d0482de..e74490277 100644 --- a/gradle.properties +++ b/gradle.properties @@ -5,4 +5,4 @@ org.gradle.daemon=true kotlin.code.style=official javaVersion=11 kotlinVersion=1.9.0 -version=9.2-SNAPSHOT +version=9.2