also optimize BRA+RTS into just BRA

release 12.0
change some single use float global constants to their asm proc
2025-11-24 06:17:39 +00:00 · 2025-11-23 15:18:05 +01:00 · 2025-11-21 21:35:51 +01:00 · 2025-11-21 00:57:43 +01:00
10 changed files with 94 additions and 49 deletions
--- a/codeGenCpu6502/src/prog8/codegen/cpu6502/AsmOptimizer.kt
+++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/AsmOptimizer.kt
@@ -544,7 +544,7 @@ private fun optimizeJsrRtsAndOtherCombinations(linesByFour: Sequence<List<Indexe
        val third = lines[2].value
        if(!haslabel(second)) {
-            if ((" jmp" in first || "\tjmp" in first ) && (" rts" in second || "\trts" in second)) {
+            if ((" jmp" in first || "\tjmp" in first || " bra" in first || "\tbra" in first ) && (" rts" in second || "\trts" in second)) {
                mods += Modification(lines[1].index, true, null)
            }
            else if ((" jsr" in first || "\tjsr" in first ) && (" rts" in second || "\trts" in second)) {
--- a/compiler/res/prog8lib/c64/floats.asm
+++ b/compiler/res/prog8lib/c64/floats.asm
@@ -2,8 +2,7 @@
 FL_ONE_const	.byte  129     			; 1.0
 FL_ZERO_const	.byte  0,0,0,0,0		; 0.0
-FL_LOG2_const	.byte  $80, $31, $72, $17, $f8	; log(2)
+; note: don't add too many constants here because they all end up in the resulting program
 FL_65536_const  .byte  $91, $00, $00, $00, $00  ; 65536.0
 		.section BSS
@@ -159,6 +158,9 @@ cast_from_long          .proc
        ldx  cx16.r0L
        ldy  cx16.r0H
        jmp  MOVMF
 FL_65536_const  .byte  $91, $00, $00, $00, $00  ; 65536.0
        ; !notreached!
        .pend
 cast_as_long            .proc
--- a/compiler/res/prog8lib/c64/floats_funcs.asm
+++ b/compiler/res/prog8lib/c64/floats_funcs.asm
@@ -2,8 +2,11 @@
 func_sign_f_into_A	.proc
 		; sign in A, also sets status flags
 		jsr  MOVFM
-		jmp  SIGN
+		jsr  SIGN
 		cmp  #0
 		rts
 		.pend
--- a/compiler/res/prog8lib/shared_floats_functions.p8
+++ b/compiler/res/prog8lib/shared_floats_functions.p8
@@ -160,6 +160,9 @@ sub log2(float value) -> float {
        ldy  #>FL_LOG2_const
        jsr  MOVFM
        jmp  FDIVT
 FL_LOG2_const	.byte  $80, $31, $72, $17, $f8	; log(2)
        ; !notreached!
    }}
 }
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@@ -1,7 +1,12 @@
 TODO
 ====
- before final release: test all examples and programs again with final version of the compiler!
+
 Weird Heisenbug
 ^^^^^^^^^^^^^^^
 - examples/cube3d-float crashes with div by zero error on C64 (works on cx16. ALready broken in v11, v10 still worked)
  caused by the RTS after JMP removal in optimizeJsrRtsAndOtherCombinations (replacing it with a NOP makes the problem disappear !??!?)
 Future Things and Ideas
@@ -59,7 +64,6 @@ Future Things and Ideas
 IR/VM
 -----
 - optimize bool b = sgn(value)<0: still does a compare with 0 even though SGN sets all status bits. What is the code when a BIT instruction is used?
 - optimize float<0 float==0 float>0 to use SGN instruction?  Check what code is generated for other data types.
 - getting it in shape for code generation: the IR file should be able to encode every detail about a prog8 program (the VM doesn't have to actually be able to run all of it though!)
 - fix call() return value handling (... what's wrong with it again?)
@@ -115,20 +119,21 @@ Libraries
 Optimizations
 -------------
- change float<0, float==0, float>0 to use sgn(float) instead? (also see IR)
+- (6502) optimize if sgn(value)<0: still does a compare with 0 even though SGN sets all status bits.
 - longvar = lptr^^ ,  lptr2^^=lptr^^  now go via temporary registers, optimize this to avoid using temps.  (seems like it is dereferencing the pointer first and then assigning the intermediate value)
 - optimize inplaceLongShiftRight() for byte aligned cases
 - more optimized operator handling of different types, for example uword a ^ byte b now does a type cast of b to word first
- optimize longEqualsValue() for const and variable operands to not assign needlessly to R0-R3.
+- optimize longEqualsValue() for long const and variable operands to not assign needlessly to R0-R3.
- optimize optimizedBitwiseExpr()  for const and variable operands to not assign needlessly to R0-R3.
+- optimize optimizedBitwiseExpr()  for long const and variable operands to not assign needlessly to R0-R3.
 - optimize inplacemodificationLongWithLiteralval() for more shift values such as 8, 16, 24 etc but take sign bit into account!
 - optimize simple cases in funcPeekL and funcPokeL
 - bind types in the Ast much sooner than the simplifiedAst creation, so that we maybe could get rid of InferredType ?
 - longvar = lptr^^  now goes via temporary registers, optimize this to avoid using temps. Also check lptr^^ = lvar.
 - Port more benchmarks from https://thred.github.io/c-bench-64/  to prog8 and see how it stacks up. (see benchmark-c/ directory)
 - Since fixing the missing zp-var initialization, programs grew in size again because STZ's reappeared. Can we add more intelligent (and correct!) optimizations to remove those STZs that might be redundant again?
 - in Identifier: use typedarray of strings instead of listOf? Other places?
 - Compilation speed regression: test/comparisons/test_word_lte.p8 compilation takes twice as long as with prog8 10.5
 - Compilation speed: try to join multiple modifications in 1 result in the AST processors instead of returning it straight away every time
- Optimize the IfExpression code generation to be more like regular if-else code.  (both 6502 and IR) search for "TODO don't store condition as expression"
+- Optimize the IfExpression code generation to be more like regular if-else code.  (both 6502 and IR) search for "TODO don't store condition as expression" ... but maybe postpone until codegen from IR, where it seems solved?
 - optimize floats.cast_from_long and floats.cast_as_long by directly accessing FAC bits?
 - VariableAllocator: can we think of a smarter strategy for allocating variables into zeropage, rather than first-come-first-served?
  for instance, vars used inside loops first, then loopvars, then uwords used as pointers (or these first??), then the rest
--- a/examples/customtarget/readme.txt
+++ b/examples/customtarget/readme.txt
@@ -8,4 +8,4 @@ Look in the Makefile to see how to build or run the various programs.
 and example program, you can find those efforts here on GitHub: https://github.com/adiee5/prog8-nes-target
 *gillham* has been working on a few other compilation targets, such as VIC-20 (various editions), Foenix, and CX16OS.
-you can find them here on GitHub: https://github.com/gillham/prog8targets
+These will be much more complete than the examples here. You can find them on GitHub: https://github.com/gillham/prog8targets
--- a/examples/pointers/fountain-cx16.p8
+++ b/examples/pointers/fountain-cx16.p8
@@ -8,6 +8,7 @@
 ;       PRG size by a lot because they embed a large multiplication lookup table.
 %import textio
 %import math
 %import syslib
@@ -38,6 +39,8 @@ main  {
        repeat {
            clear_particles()
            update_particles()
            txt.home()
            txt.print_uw(active_particles)
            sys.waitvsync()
            sys.waitvsync()
        }
--- a/examples/test.p8
+++ b/examples/test.p8
@@ -4,46 +4,50 @@
 main {
    sub start() {
-        long @shared lv = -1
+        float @shared fv
        word @shared wv = -1
        byte @shared bv = -1
        float @shared fv = -1.1
        bool b1, b2, b3, b4 = false
-        b1 = bv<0
+        fv = -22
-        b2 = wv<0
+        compares()
-        b3 = lv<0
+        fv=0
-        b4 = fv<0
+        compares()
-        txt.print_bool(b1)
+        fv=33
-        txt.print_bool(b2)
+        compares()
        txt.print_bool(b3)
        txt.print_bool(b4)
        txt.nl()
-        b1=b2=b3=b4=false
+        fv = -22
-        b1 = sgn(bv)<0
+        signs()
-        b2 = sgn(wv)<0
+        fv=0
-        b3 = sgn(lv)<0
+        signs()
-        b4 = sgn(fv)<0
+        fv=33
-        txt.print_bool(b1)
+        signs()
        txt.print_bool(b2)
        txt.print_bool(b3)
        txt.print_bool(b4)
        txt.nl()
-        bv = 1
+        sub compares() {
-        wv = 1
+            txt.print("compares\n")
-        lv = 1
+            if fv==0
-        fv = 1.1
+                txt.print(" ==0\n")
-        b1 = sgn(bv)<0
+            if fv<=0
-        b2 = sgn(wv)<0
+                txt.print(" <=0\n")
-        b3 = sgn(lv)<0
+            if fv<0
-        b4 = sgn(fv)<0
+                txt.print(" <0\n")
-        txt.print_bool(b1)
+            if fv>=0
-        txt.print_bool(b2)
+                txt.print(" >=0\n")
-        txt.print_bool(b3)
+            if fv>0
-        txt.print_bool(b4)
+                txt.print(" >0\n")
-        txt.nl()
+        }
        sub signs() {
            txt.print("signs\n")
            if sgn(fv)==0
                txt.print(" ==0\n")
            if sgn(fv)<=0
                txt.print(" <=0\n")
            if sgn(fv)<0
                txt.print(" <0\n")
            if sgn(fv)>=0
                txt.print(" >=0\n")
            if sgn(fv)>0
                txt.print(" >0\n")
        }
    }
--- a/gradle.properties
+++ b/gradle.properties
@@ -4,4 +4,4 @@ org.gradle.parallel=true
 org.gradle.daemon=true
 org.gradle.configuration-cache=false
 kotlin.code.style=official
-version=12.0-BETA7
+version=12.0
--- a/simpleAst/src/prog8/code/optimize/Optimizer.kt
+++ b/simpleAst/src/prog8/code/optimize/Optimizer.kt
@@ -13,6 +13,7 @@ fun optimizeSimplifiedAst(program: PtProgram, options: CompilationOptions, st: S
        return
    while (errors.noErrors() &&
        optimizeAssignTargets(program, st)
        + optimizeFloatComparesToZero(program)
        + optimizeBinaryExpressions(program, options) > 0) {
        // keep rolling
    }
@@ -152,3 +153,27 @@ private fun optimizeBinaryExpressions(program: PtProgram, options: CompilationOp
    }
    return changes
 }
 private fun optimizeFloatComparesToZero(program: PtProgram): Int {
    var changes = 0
    walkAst(program) { node: PtNode, depth: Int ->
        if (node is PtBinaryExpression) {
            val constvalue = node.right.asConstValue()
            if(node.type.isBool && constvalue==0.0 && node.left.type.isFloat && node.operator in ComparisonOperators) {
                // float == 0 --> sgn(float) == 0
                val sign = PtBuiltinFunctionCall("sgn", false, true, DataType.BYTE, node.position)
                sign.add(node.left)
                val replacement = PtBinaryExpression(node.operator, DataType.BOOL, node.position)
                replacement.add(sign)
                replacement.add(PtNumber(BaseDataType.BYTE, 0.0, node.position))
                replacement.parent = node.parent
                val index = node.parent.children.indexOf(node)
                node.parent.children[index] = replacement
                changes++
            }
        }
        true
    }
    return changes
 }
Author	SHA1	Message	Date
Irmen de Jong	df1a2a1611	also optimize BRA+RTS into just BRA release 12.0	2025-11-23 15:18:05 +01:00
Irmen de Jong	d19a3af9ed	change some single use float global constants to their asm proc	2025-11-21 21:35:51 +01:00
Irmen de Jong	352c11ad9f	optimize float<>0 into sgn(float)<>0	2025-11-21 00:57:43 +01:00