diff --git a/codeCore/src/prog8/code/ast/AstExpressions.kt b/codeCore/src/prog8/code/ast/AstExpressions.kt index 9c58e9910..897523c68 100644 --- a/codeCore/src/prog8/code/ast/AstExpressions.kt +++ b/codeCore/src/prog8/code/ast/AstExpressions.kt @@ -379,6 +379,16 @@ class PtString(val value: String, val encoding: Encoding, position: Position) : class PtTypeCast(type: BaseDataType, position: Position) : PtExpression(DataType.forDt(type), position) { val value: PtExpression get() = children.single() as PtExpression + + fun copy(): PtTypeCast { + val copy = PtTypeCast(type.base, position) + if(children[0] is PtIdentifier) { + copy.add((children[0] as PtIdentifier).copy()) + } else { + TODO("cannot copy node ${children[0]}") + } + return copy + } } diff --git a/codeCore/src/prog8/code/optimize/Optimizer.kt b/codeCore/src/prog8/code/optimize/Optimizer.kt index 77d3ef3ff..a3c09776e 100644 --- a/codeCore/src/prog8/code/optimize/Optimizer.kt +++ b/codeCore/src/prog8/code/optimize/Optimizer.kt @@ -4,12 +4,14 @@ import prog8.code.StExtSub import prog8.code.SymbolTable import prog8.code.ast.* import prog8.code.core.* +import prog8.code.target.VMTarget fun optimizeSimplifiedAst(program: PtProgram, options: CompilationOptions, st: SymbolTable, errors: IErrorReporter) { if (!options.optimize) return - while (errors.noErrors() && optimizeAssignTargets(program, st) > 0) { + while (errors.noErrors() && + optimizeAssignTargets(program, st) + optimizeWordPlusTimesTwo(program, options) > 0) { // keep rolling } } @@ -96,3 +98,42 @@ internal fun isSame(identifier: PtIdentifier, type: DataType, returnedRegister: } return false // there are no identifiers directly corresponding to cpu registers } + + +private fun optimizeWordPlusTimesTwo(program: PtProgram, options: CompilationOptions): Int { + if(options.compTarget.name== VMTarget.NAME) + return 0 + var changes = 0 + walkAst(program) { node: PtNode, depth: Int -> + if (node is PtBinaryExpression) { + if(node.operator=="*" && node.right.type.isWord && node.right.asConstValue()==2.0) { + TODO("optimize word + byte*2 (usually already replaced by w+b<<2)") + } + else if(node.operator=="<<" && node.right.asConstValue()==1.0) { + val typecast=node.left as? PtTypeCast + if(typecast!=null && typecast.type.isWord && typecast.value is PtIdentifier) { + val addition = node.parent as? PtBinaryExpression + if(addition!=null && (addition.operator=="+" || addition.operator=="-") && addition.type.isWord) { + // word + (byte<<1 as uword) (== word + byte*2) --> (word + (byte as word)) + (byte as word) + val parent = addition.parent + val index = parent.children.indexOf(addition) + val addFirst = PtBinaryExpression(addition.operator, addition.type, addition.position) + val addSecond = PtBinaryExpression(addition.operator, addition.type, addition.position) + if(addition.left===node) + addFirst.add(addition.right) + else + addFirst.add(addition.left) + addFirst.add(typecast) + addSecond.add(addFirst) + addSecond.add(typecast.copy()) + parent.children[index] = addSecond + addSecond.parent = parent + changes++ + } + } + } + } + true + } + return changes +} diff --git a/docs/source/todo.rst b/docs/source/todo.rst index 4f620d10b..8198d489d 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -1,9 +1,6 @@ TODO ==== -- word+byte*2 -> word +byte +byte, word-byte*2 -> word-byte-byte (check that it gets properly word-extended!) -- optimize pokew and peekw to no longer do a jsr - - Make some of the target machine config externally configurable (for 1 new target, the existing ones should stay as they are for the time being) - add paypal donation button as well? @@ -71,7 +68,7 @@ IR/VM Libraries --------- -- Sorting module gnomesort_uw could be optimized more, rewrite in asm? Shellshort seems consistently faster even if most of the words are already sorted. +- Sorting module gnomesort_uw could be optimized more by fully rewriting it in asm? Shellshort seems consistently faster even if most of the words are already sorted. - Add split-word array sorting routines to sorting module? - add even more general raster irq routines to build some sort of "copper list" , like Oscar64 has? - pet32 target: make syslib more complete (missing kernal routines)? diff --git a/examples/test.p8 b/examples/test.p8 index 3211c3401..b657dfc08 100644 --- a/examples/test.p8 +++ b/examples/test.p8 @@ -1,3 +1,4 @@ +;%import emudbg %import textio %option no_sysinit %zeropage basicsafe @@ -5,51 +6,146 @@ main { - - ubyte @nozp @shared staticvar=51 - sub start() { - ubyte x,y + uword @shared w1, w2 + ubyte @shared b - x = 88 - y = 99 - ubyte a,b,c,d = multi4() - ubyte e,f = multi2() - - txt.print_ub(a) - txt.spc() - txt.print_ub(b) - txt.spc() - txt.print_ub(c) - txt.spc() - txt.print_ub(d) - txt.spc() - txt.print_ub(e) - txt.spc() - txt.print_ub(f) - txt.nl() + w1 = w2 + b*$0002 + w1 = w2 + b + b +; w2 = (w1 + b as uword) + (b as uword) - } - - sub single() -> ubyte { - return cx16.r0L+cx16.r1L - } - asmsub multi1() -> ubyte @A, ubyte @Y { - %asm {{ - lda #1 - ldy #2 - rts - }} - } - - sub multi2() -> ubyte, ubyte { - cx16.r0++ - return 33,44 - } - - sub multi4() -> ubyte, ubyte, ubyte, ubyte { - cx16.r0++ - return 3,4,5,6 +; cx16.r0 = cx16.r1 + cx16.r0L*2 +; cx16.r0 = cx16.r1 + cx16.r0L*$0002 +; cx16.r0 = cx16.r1 + cx16.r0L + cx16.r0L +; cx16.r0 = cx16.r1 - cx16.r0L*2 +; cx16.r0 = cx16.r1 - cx16.r0L*$0002 +; cx16.r0 = cx16.r1 - cx16.r0L - cx16.r0L } } + + +/* +mainxxx { + + uword[50] @nosplit warray1 + uword[50] @nosplit warray2 + + sub fill_arrays() { + math.rndseed(999,1234) + for cx16.r0L in 0 to len(warray1)-1 { + warray1[cx16.r0L] = math.rndw() + warray2[cx16.r0L] = cx16.r0L * (100 as uword) + } + warray2[40] = 9900 + warray2[44] = 9910 + warray2[48] = 9920 + } + + sub perf_reset() { + emudbg.reset_cpu_cycles() + } + + sub perf_print() { + cx16.r4, cx16.r5 = emudbg.cpu_cycles() + txt.print_uwhex(cx16.r5, true) + txt.print_uwhex(cx16.r4, false) + txt.nl() + } + + sub start() { + sys.set_irqd() + fill_arrays() + + txt.print("\ngnomesort (words):\n") + perf_reset() + gnomesort_uw(warray1, len(warray1)) + perf_print() + for cx16.r0L in 0 to len(warray1)-1 { + txt.print_uw(warray1[cx16.r0L]) + txt.chrout(',') + } + txt.nl() + + txt.print("\ngnomesort (words) almost sorted:\n") + perf_reset() + gnomesort_uw(warray2, len(warray2)) + perf_print() + for cx16.r0L in 0 to len(warray2)-1 { + txt.print_uw(warray2[cx16.r0L]) + txt.chrout(',') + } + txt.nl() + txt.nl() + + fill_arrays() + + txt.print("\ngnomesort_opt (words):\n") + perf_reset() + gnomesort_uw_opt(warray1, len(warray1)) + perf_print() + for cx16.r0L in 0 to len(warray1)-1 { + txt.print_uw(warray1[cx16.r0L]) + txt.chrout(',') + } + txt.nl() + + txt.print("\ngnomesort_opt (words) almost sorted:\n") + perf_reset() + gnomesort_uw_opt(warray2, len(warray2)) + perf_print() + for cx16.r0L in 0 to len(warray2)-1 { + txt.print_uw(warray2[cx16.r0L]) + txt.chrout(',') + } + txt.nl() + sys.clear_irqd() + repeat { + } + } + + + sub gnomesort_uw(uword values, ubyte num_elements) { + ; TODO optimize this more, rewrite in asm? + ubyte @zp pos = 1 + while pos != num_elements { + uword @requirezp ptr = values+(pos*$0002) + cx16.r0 = peekw(ptr-2) + cx16.r1 = peekw(ptr) + if cx16.r0<=cx16.r1 + pos++ + else { + ; swap elements + pokew(ptr-2, cx16.r1) + pokew(ptr, cx16.r0) + pos-- + if_z + pos++ + } + } + } + + sub gnomesort_uw_opt(uword values, ubyte num_elements) { + ; TODO optimize this more, rewrite in asm? + ubyte @zp pos = 1 + uword @requirezp ptr = values+2 + while pos != num_elements { + cx16.r0 = peekw(ptr-2) + cx16.r1 = peekw(ptr) + if cx16.r0<=cx16.r1 { + pos++ + ptr+=2 + } + else { + ; swap elements + pokew(ptr-2, cx16.r1) + pokew(ptr, cx16.r0) + if pos>1 { + pos-- + ptr-=2 + } + } + } + } +} +*/