diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/AsmGen.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/AsmGen.kt index 08fe02d71..d16269a2c 100644 --- a/codeGenCpu6502/src/prog8/codegen/cpu6502/AsmGen.kt +++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/AsmGen.kt @@ -1322,34 +1322,19 @@ $repeatLabel""") } if(constOffset!=null) { - println("MEMWRITE POINTER - $constOffset ${addressExpr.position}") // TODO -/* val pointervar = ptrAndIndex.first as? PtIdentifier if(pointervar!=null && isZpVar(pointervar)) { - val saveA = evalBytevalueWillClobberA(ptrAndIndex.second) - if(saveA) out(" pha") - assignExpressionToRegister(ptrAndIndex.second, RegisterOrPair.Y) - if(saveA) out(" pla") - out(" sta (${asmSymbolName(pointervar)}),y") + val varname = asmSymbolName(pointervar) + out(" ldy #${256-constOffset} ; negative offset $constOffset") + out(" dec $varname+1 | sta ($varname),y | inc $varname+1") // temporarily make MSB 1 less + return true } else { // copy the pointer var to zp first - val saveA = evalBytevalueWillClobberA(ptrAndIndex.first) || evalBytevalueWillClobberA(ptrAndIndex.second) - if(saveA) out(" pha") - if(ptrAndIndex.second.isSimple()) { - assignExpressionToVariable(ptrAndIndex.first, "P8ZP_SCRATCH_W2", DataType.forDt(BaseDataType.UWORD)) - assignExpressionToRegister(ptrAndIndex.second, RegisterOrPair.Y) - if(saveA) out(" pla") - out(" sta (P8ZP_SCRATCH_W2),y") - } else { - pushCpuStack(BaseDataType.UBYTE, ptrAndIndex.second) - assignExpressionToVariable(ptrAndIndex.first, "P8ZP_SCRATCH_W2", DataType.forDt(BaseDataType.UWORD)) - restoreRegisterStack(CpuRegister.Y, true) - if(saveA) out(" pla") - out(" sta (P8ZP_SCRATCH_W2),y") - } + assignExpressionToVariable(ptrAndIndex.first, "P8ZP_SCRATCH_W2", DataType.forDt(BaseDataType.UWORD)) + out(" ldy #${256-constOffset} ; negative offset $constOffset") + out(" dec P8ZP_SCRATCH_W2+1 | sta (P8ZP_SCRATCH_W2),y | inc P8ZP_SCRATCH_W2+1") // temporarily make MSB 1 less + return true } - return true -*/ } } else { @@ -1367,8 +1352,19 @@ $repeatLabel""") } if(constOffset!=null) { - println("MEMREAD POINTER - $constOffset ${addressExpr.position}") // TODO - // TODO optimize more cases + val pointervar = ptrAndIndex.first as? PtIdentifier + if(pointervar!=null && isZpVar(pointervar)) { + val varname = asmSymbolName(pointervar) + out(" ldy #${256-constOffset} ; negative offset $constOffset") + out(" dec $varname+1 | lda ($varname),y | inc $varname+1") // temporarily make MSB 1 less + return true + } else { + // copy the pointer var to zp first + assignExpressionToVariable(ptrAndIndex.first, "P8ZP_SCRATCH_W2", DataType.forDt(BaseDataType.UWORD)) + out(" ldy #${256-constOffset} ; negative offset $constOffset") + out(" dec P8ZP_SCRATCH_W2+1 | lda (P8ZP_SCRATCH_W2),y | inc P8ZP_SCRATCH_W2+1") // temporarily make MSB 1 less + return true + } } } } diff --git a/docs/source/todo.rst b/docs/source/todo.rst index 7b41e39f6..d11f8acce 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -1,7 +1,7 @@ TODO ==== -- optimize @(cell_ptr-offset) to use DEC pointer_msb ; LDY #255 ; INC pointer_msb instead. See tryOptimizedPointerAccessWithA() +- min/max(x1,x2) lots of code in a temp word, can sometimes just use the existing variables x1 and x2 - add paypal donation button as well? @@ -21,7 +21,7 @@ Future Things and Ideas Add a -library $xxxx command line option (and/or some directive) to preselect every setting that is required to make a library at $xxxx rather than a normal loadable and runnable program? Need to add some way to generate a stable jump table at a given address. Need library to not call init_system AND init_system_phase2 not either. - Library must not include prog8_program_start stuff either. + Library must not include prog8_program_start stuff either. Must not require 'start' entrypoint either? Although they need some initialization entry point? - [problematic due to using 64tass:] better support for building library programs, where unused .proc are NOT deleted from the assembly. Perhaps replace all uses of .proc/.pend/.endproc by .block/.bend will fix that with a compiler flag? But all library code written in asm uses .proc already..... (textual search/replace when writing the actual asm?) @@ -86,15 +86,15 @@ Optimizations - if magicwall_enabled and (jiffy_counter & 3 == 1) sounds.magicwall() -> generates shortcut jump to another jump, why not immediately after the if - explode(x, y+1) pushes x on the stack and pops it, could simply load it in reverse order and not use the stack.normal - return mkword(attrs[cx16.r2L], object[cx16.r2L]) same as the explode() above -- x = y + z more efficient if rewritten to x=y; x+=z ? - return peekw(table+64+pos*2) .... or rather .. return -> can this be optimized by using a temporary variable and chop up the expression? likewise cx16.r0 = (gfx_lores.WIDTH-bmx.width)/2 + (gfx_lores.HEIGHT-bmx.height)/2*gfx_lores.WIDTH a lot of register juggling -- is there a trick to make @(pointer-1) = v more efficient? (like @(pointer+1)=v using Y indexed) - if sv=="aa" else if sv=="bb" else if sv=="cc" -> needs much code, allow when(stringvar) too to avoid reloading both strings for every case (rockrunner bdcff.p8) - if cx16.r0L>=97 and cx16.r0L<=122 {...} -> treats the boolean condition as a byte 0/1 result , can't it somehow just act on the carry bit alone? same with if x1!=x2 or y1!=y2..... but it's because of the way boolean expressions are handled... can this be optimized? -- min(x1,x2) lots of code in a temp word, can just use the existing variables x1 and x2 +- this generates empty lines in the resulting asm, why?: + cx16.r0L = cx16.r1L + cx16.r0L += cx16.r2L - Optimize the IfExpression code generation to be more like regular if-else code. (both 6502 and IR) search for "TODO don't store condition as expression" - VariableAllocator: can we think of a smarter strategy for allocating variables into zeropage, rather than first-come-first-served? diff --git a/examples/test.p8 b/examples/test.p8 index 9981afd02..cd2edaacc 100644 --- a/examples/test.p8 +++ b/examples/test.p8 @@ -4,43 +4,35 @@ main { sub start() { - ubyte[] array = [11,22,33,44,55,66,77,88,99] - uword @shared ptr = &array[5] - ubyte @shared offset + ubyte @shared bb0, bb1, bb2, bb3, bb4 + uword @shared ww0, ww1, ww2, ww3, ww4 + bb0 = min(bb1, 100) + bb1 = min(bb1, 100) + bb0 = min(100, bb1) + bb1 = min(100, bb1) + bb0 = min(bb1, bb2) + bb2 = min(bb1+bb2, bb3+bb4) - cx16.r0L = @(&start + 1) - cx16.r1L = @(&start - 1) - @(&start+1) = 99 - @(&start-1) = 99 + bb0 = max(bb1, 100) + bb1 = max(bb1, 100) + bb0 = max(100, bb1) + bb1 = max(100, bb1) + bb0 = max(bb1, bb2) + bb2 = max(bb1+bb2, bb3+bb4) -; @(ptr+1) = cx16.r0L -; @(ptr+2) = cx16.r0L -; @(ptr+offset) = cx16.r0L -; @(ptr-1) = cx16.r0L -; @(ptr-2) = cx16.r0L -; @(ptr-offset) = cx16.r0L - - -; cx16.r0L = @(ptr+1) -; cx16.r1L = @(ptr+2) -; cx16.r2L = @(ptr+offset) -; cx16.r3L = @(ptr-1) -; cx16.r4L = @(ptr-2) -; cx16.r5L = @(ptr-offset) - - - -; %asm {{ -; dec p8v_ptr+1 -; ldy #255 -; lda (p8v_ptr),y -; inc p8v_ptr+1 -; sta cx16.r0L -; }} - - repeat { - } + ww0 = min(ww1, 100) + ww1 = min(ww1, 100) + ww0 = min(100, ww1) + ww1 = min(100, ww1) + ww0 = min(ww1, ww2) + ww2 = min(ww1+ww2, ww3+ww4) + ww0 = max(ww1, 100) + ww1 = max(ww1, 100) + ww0 = max(100, ww1) + ww1 = max(100, ww1) + ww0 = max(ww1, ww2) + ww2 = max(ww1+ww2, ww3+ww4) } }