diff --git a/compiler/src/prog8/compiler/target/c64/codegen/BuiltinFunctionsAsmGen.kt b/compiler/src/prog8/compiler/target/c64/codegen/BuiltinFunctionsAsmGen.kt index 48b1b47a3..61a33b64b 100644 --- a/compiler/src/prog8/compiler/target/c64/codegen/BuiltinFunctionsAsmGen.kt +++ b/compiler/src/prog8/compiler/target/c64/codegen/BuiltinFunctionsAsmGen.kt @@ -568,6 +568,38 @@ internal class BuiltinFunctionsAsmGen(private val program: Program, private val private fun funcSwap(fcall: IFunctionCall) { val first = fcall.args[0] val second = fcall.args[1] + if(first is IdentifierReference && second is IdentifierReference) { + val firstName = asmgen.asmIdentifierName(first) + val secondName = asmgen.asmIdentifierName(second) + val dt = first.inferType(program) + if(dt.istype(DataType.BYTE) || dt.istype(DataType.UBYTE)) { + asmgen.out(" ldy $firstName | lda $secondName | sta $firstName | tya | sta $secondName") + return + } + if(dt.istype(DataType.WORD) || dt.istype(DataType.UWORD)) { + asmgen.out(""" + ldy $firstName + lda $secondName + sta $firstName + tya + sta $secondName + ldy $firstName+1 + lda $secondName+1 + sta $firstName+1 + tya + sta $secondName+1 + """) + return + } + if(dt.istype(DataType.FLOAT)) { + TODO("optimized case for swapping 2 float vars-- asm subroutine") + return + } + } + + // TODO more optimized cases? for instance swapping elements of array vars? + + // suboptimal code via the evaluation stack... asmgen.translateExpression(first) asmgen.translateExpression(second) // pop in reverse order diff --git a/compiler/src/prog8/optimizer/ExpressionSimplifier.kt b/compiler/src/prog8/optimizer/ExpressionSimplifier.kt index 9332f4b36..26739401a 100644 --- a/compiler/src/prog8/optimizer/ExpressionSimplifier.kt +++ b/compiler/src/prog8/optimizer/ExpressionSimplifier.kt @@ -22,6 +22,11 @@ import kotlin.math.pow x < 0 (for word, byte as well?): just test the most significant bit for 1 x >= 0 (for word, byte as well?): just test the most significant bit for 0 + (assignment) x += y + 1 -> x += y , x++ (add another x++ for +2) + (assignment) x += y - 1 -> x += y , x-- + (assignment) x -= y + 1 -> x -= y , x-- + (assignment) x -= y - 1 -> x -= y , x++ + Investigate what optimizations binaryen has, also see https://egorbo.com/peephole-optimizations.html */ diff --git a/examples/c64graphics.p8 b/examples/c64graphics.p8 index 84d14321f..f1bbd09e2 100644 --- a/examples/c64graphics.p8 +++ b/examples/c64graphics.p8 @@ -18,133 +18,75 @@ graphics { } sub line(uword x1, ubyte y1, uword x2, ubyte y2) { - ; Bresenham algorithm - ; This code is a bit long because each of the 8 different octants has a dedicated loop. - ; This minimizes the number of actual math operations, and allows usins simple ++ and -- operations. - ; TODO sort X/Y coordinates to eliminate some of the special cases + ; Bresenham algorithm. + ; This code special cases various quadrant loops to allow simple ++ and -- operations. + if y1>y2 { + ; make sure dy is always positive to avoid 8 instead of just 4 special cases + swap(x1, x2) + swap(y1, y2) + } word d = 0 ubyte positive_ix = true - ubyte positive_iy = true word dx = x2 - x1 as word word dy = y2 as word - y1 as word if dx < 0 { dx = -dx positive_ix = false } - if dy < 0 { - dy = -dy - positive_iy = false - } dx *= 2 dy *= 2 plotx = x1 if dx >= dy { if positive_ix { - if positive_iy { - forever { - graphics.plot(y1) - if plotx==x2 - return - plotx++ - d += dy - if d > dx { - y1++ - d -= dx - } - } - } else { - forever { - graphics.plot(y1) - if plotx==x2 - return - plotx++ - d += dy - if d > dx { - y1-- - d -= dx - } + forever { + plot(y1) + if plotx==x2 + return + plotx++ + d += dy + if d > dx { + y1++ + d -= dx } } } else { - if positive_iy { - forever { - graphics.plot(y1) - if plotx==x2 - return - plotx-- - d += dy - if d > dx { - y1++ - d -= dx - } - } - } else { - forever { - graphics.plot(y1) - if plotx==x2 - return - plotx-- - d += dy - if d > dx { - y1-- - d -= dx - } + forever { + plot(y1) + if plotx==x2 + return + plotx-- + d += dy + if d > dx { + y1++ + d -= dx } } } } else { - if positive_iy { - if positive_ix { - forever { - plot(y1) - if y1 == y2 - return - y1++ - d += dx - if d > dy { - plotx++ - d -= dy - } - } - } else { - forever { - plot(y1) - if y1 == y2 - return - y1++ - d += dx - if d > dy { - plotx-- - d -= dy - } + if positive_ix { + forever { + plot(y1) + if y1 == y2 + return + y1++ + d += dx + if d > dy { + plotx++ + d -= dy } } } else { - if positive_ix { - forever { - plot(y1) - if y1 == y2 - return - y1-- - d += dx - if d > dy { - plotx++ - d -= dy - } - } - } else { - forever { - plot(y1) - if y1 == y2 - return - y1-- - d += dx - if d > dy { - plotx-- - d -= dy - } + forever { + plot(y1) + if y1 == y2 + return + y1++ + d += dx + if d > dy { + plotx-- + d -= dy } } } @@ -272,7 +214,7 @@ _ormask .byte 128, 64, 32, 16, 8, 4, 2, 1 ; note: this can be even faster if we also have a 256 byte x-lookup table, but hey. ; see http://codebase64.org/doku.php?id=base:various_techniques_to_calculate_adresses_fast_common_screen_formats_for_pixel_graphics -; the y lookup tables encode this formula: bitmap_address + 320*(py>>3) + (py & 7) (y from 0..199) +; the y lookup tables encodes this formula: bitmap_address + 320*(py>>3) + (py & 7) (y from 0..199) _y_lookup_hi .byte $20, $20, $20, $20, $20, $20, $20, $20, $21, $21, $21, $21, $21, $21, $21, $21 .byte $22, $22, $22, $22, $22, $22, $22, $22, $23, $23, $23, $23, $23, $23, $23, $23