Compare commits

...

3 Commits

Author SHA1 Message Date
Irmen de Jong
df1a2a1611 also optimize BRA+RTS into just BRA
release 12.0
2025-11-23 15:18:05 +01:00
Irmen de Jong
d19a3af9ed change some single use float global constants to their asm proc 2025-11-21 21:35:51 +01:00
Irmen de Jong
352c11ad9f optimize float<>0 into sgn(float)<>0 2025-11-21 00:57:43 +01:00
10 changed files with 94 additions and 49 deletions

View File

@@ -544,7 +544,7 @@ private fun optimizeJsrRtsAndOtherCombinations(linesByFour: Sequence<List<Indexe
val third = lines[2].value val third = lines[2].value
if(!haslabel(second)) { if(!haslabel(second)) {
if ((" jmp" in first || "\tjmp" in first ) && (" rts" in second || "\trts" in second)) { if ((" jmp" in first || "\tjmp" in first || " bra" in first || "\tbra" in first ) && (" rts" in second || "\trts" in second)) {
mods += Modification(lines[1].index, true, null) mods += Modification(lines[1].index, true, null)
} }
else if ((" jsr" in first || "\tjsr" in first ) && (" rts" in second || "\trts" in second)) { else if ((" jsr" in first || "\tjsr" in first ) && (" rts" in second || "\trts" in second)) {

View File

@@ -2,8 +2,7 @@
FL_ONE_const .byte 129 ; 1.0 FL_ONE_const .byte 129 ; 1.0
FL_ZERO_const .byte 0,0,0,0,0 ; 0.0 FL_ZERO_const .byte 0,0,0,0,0 ; 0.0
FL_LOG2_const .byte $80, $31, $72, $17, $f8 ; log(2) ; note: don't add too many constants here because they all end up in the resulting program
FL_65536_const .byte $91, $00, $00, $00, $00 ; 65536.0
.section BSS .section BSS
@@ -159,6 +158,9 @@ cast_from_long .proc
ldx cx16.r0L ldx cx16.r0L
ldy cx16.r0H ldy cx16.r0H
jmp MOVMF jmp MOVMF
FL_65536_const .byte $91, $00, $00, $00, $00 ; 65536.0
; !notreached!
.pend .pend
cast_as_long .proc cast_as_long .proc

View File

@@ -2,8 +2,11 @@
func_sign_f_into_A .proc func_sign_f_into_A .proc
; sign in A, also sets status flags
jsr MOVFM jsr MOVFM
jmp SIGN jsr SIGN
cmp #0
rts
.pend .pend

View File

@@ -160,6 +160,9 @@ sub log2(float value) -> float {
ldy #>FL_LOG2_const ldy #>FL_LOG2_const
jsr MOVFM jsr MOVFM
jmp FDIVT jmp FDIVT
FL_LOG2_const .byte $80, $31, $72, $17, $f8 ; log(2)
; !notreached!
}} }}
} }

View File

@@ -1,7 +1,12 @@
TODO TODO
==== ====
- before final release: test all examples and programs again with final version of the compiler!
Weird Heisenbug
^^^^^^^^^^^^^^^
- examples/cube3d-float crashes with div by zero error on C64 (works on cx16. ALready broken in v11, v10 still worked)
caused by the RTS after JMP removal in optimizeJsrRtsAndOtherCombinations (replacing it with a NOP makes the problem disappear !??!?)
Future Things and Ideas Future Things and Ideas
@@ -59,7 +64,6 @@ Future Things and Ideas
IR/VM IR/VM
----- -----
- optimize bool b = sgn(value)<0: still does a compare with 0 even though SGN sets all status bits. What is the code when a BIT instruction is used?
- optimize float<0 float==0 float>0 to use SGN instruction? Check what code is generated for other data types. - optimize float<0 float==0 float>0 to use SGN instruction? Check what code is generated for other data types.
- getting it in shape for code generation: the IR file should be able to encode every detail about a prog8 program (the VM doesn't have to actually be able to run all of it though!) - getting it in shape for code generation: the IR file should be able to encode every detail about a prog8 program (the VM doesn't have to actually be able to run all of it though!)
- fix call() return value handling (... what's wrong with it again?) - fix call() return value handling (... what's wrong with it again?)
@@ -115,20 +119,21 @@ Libraries
Optimizations Optimizations
------------- -------------
- change float<0, float==0, float>0 to use sgn(float) instead? (also see IR) - (6502) optimize if sgn(value)<0: still does a compare with 0 even though SGN sets all status bits.
- longvar = lptr^^ , lptr2^^=lptr^^ now go via temporary registers, optimize this to avoid using temps. (seems like it is dereferencing the pointer first and then assigning the intermediate value)
- optimize inplaceLongShiftRight() for byte aligned cases - optimize inplaceLongShiftRight() for byte aligned cases
- more optimized operator handling of different types, for example uword a ^ byte b now does a type cast of b to word first - more optimized operator handling of different types, for example uword a ^ byte b now does a type cast of b to word first
- optimize longEqualsValue() for const and variable operands to not assign needlessly to R0-R3. - optimize longEqualsValue() for long const and variable operands to not assign needlessly to R0-R3.
- optimize optimizedBitwiseExpr() for const and variable operands to not assign needlessly to R0-R3. - optimize optimizedBitwiseExpr() for long const and variable operands to not assign needlessly to R0-R3.
- optimize inplacemodificationLongWithLiteralval() for more shift values such as 8, 16, 24 etc but take sign bit into account! - optimize inplacemodificationLongWithLiteralval() for more shift values such as 8, 16, 24 etc but take sign bit into account!
- optimize simple cases in funcPeekL and funcPokeL - optimize simple cases in funcPeekL and funcPokeL
- bind types in the Ast much sooner than the simplifiedAst creation, so that we maybe could get rid of InferredType ? - bind types in the Ast much sooner than the simplifiedAst creation, so that we maybe could get rid of InferredType ?
- longvar = lptr^^ now goes via temporary registers, optimize this to avoid using temps. Also check lptr^^ = lvar.
- Port more benchmarks from https://thred.github.io/c-bench-64/ to prog8 and see how it stacks up. (see benchmark-c/ directory) - Port more benchmarks from https://thred.github.io/c-bench-64/ to prog8 and see how it stacks up. (see benchmark-c/ directory)
- Since fixing the missing zp-var initialization, programs grew in size again because STZ's reappeared. Can we add more intelligent (and correct!) optimizations to remove those STZs that might be redundant again? - Since fixing the missing zp-var initialization, programs grew in size again because STZ's reappeared. Can we add more intelligent (and correct!) optimizations to remove those STZs that might be redundant again?
- in Identifier: use typedarray of strings instead of listOf? Other places? - in Identifier: use typedarray of strings instead of listOf? Other places?
- Compilation speed regression: test/comparisons/test_word_lte.p8 compilation takes twice as long as with prog8 10.5
- Compilation speed: try to join multiple modifications in 1 result in the AST processors instead of returning it straight away every time - Compilation speed: try to join multiple modifications in 1 result in the AST processors instead of returning it straight away every time
- Optimize the IfExpression code generation to be more like regular if-else code. (both 6502 and IR) search for "TODO don't store condition as expression" - Optimize the IfExpression code generation to be more like regular if-else code. (both 6502 and IR) search for "TODO don't store condition as expression" ... but maybe postpone until codegen from IR, where it seems solved?
- optimize floats.cast_from_long and floats.cast_as_long by directly accessing FAC bits? - optimize floats.cast_from_long and floats.cast_as_long by directly accessing FAC bits?
- VariableAllocator: can we think of a smarter strategy for allocating variables into zeropage, rather than first-come-first-served? - VariableAllocator: can we think of a smarter strategy for allocating variables into zeropage, rather than first-come-first-served?
for instance, vars used inside loops first, then loopvars, then uwords used as pointers (or these first??), then the rest for instance, vars used inside loops first, then loopvars, then uwords used as pointers (or these first??), then the rest

View File

@@ -8,4 +8,4 @@ Look in the Makefile to see how to build or run the various programs.
and example program, you can find those efforts here on GitHub: https://github.com/adiee5/prog8-nes-target and example program, you can find those efforts here on GitHub: https://github.com/adiee5/prog8-nes-target
*gillham* has been working on a few other compilation targets, such as VIC-20 (various editions), Foenix, and CX16OS. *gillham* has been working on a few other compilation targets, such as VIC-20 (various editions), Foenix, and CX16OS.
you can find them here on GitHub: https://github.com/gillham/prog8targets These will be much more complete than the examples here. You can find them on GitHub: https://github.com/gillham/prog8targets

View File

@@ -8,6 +8,7 @@
; PRG size by a lot because they embed a large multiplication lookup table. ; PRG size by a lot because they embed a large multiplication lookup table.
%import textio
%import math %import math
%import syslib %import syslib
@@ -38,6 +39,8 @@ main {
repeat { repeat {
clear_particles() clear_particles()
update_particles() update_particles()
txt.home()
txt.print_uw(active_particles)
sys.waitvsync() sys.waitvsync()
sys.waitvsync() sys.waitvsync()
} }

View File

@@ -4,46 +4,50 @@
main { main {
sub start() { sub start() {
long @shared lv = -1 float @shared fv
word @shared wv = -1
byte @shared bv = -1
float @shared fv = -1.1
bool b1, b2, b3, b4 = false
b1 = bv<0 fv = -22
b2 = wv<0 compares()
b3 = lv<0 fv=0
b4 = fv<0 compares()
txt.print_bool(b1) fv=33
txt.print_bool(b2) compares()
txt.print_bool(b3)
txt.print_bool(b4)
txt.nl() txt.nl()
b1=b2=b3=b4=false fv = -22
b1 = sgn(bv)<0 signs()
b2 = sgn(wv)<0 fv=0
b3 = sgn(lv)<0 signs()
b4 = sgn(fv)<0 fv=33
txt.print_bool(b1) signs()
txt.print_bool(b2)
txt.print_bool(b3)
txt.print_bool(b4)
txt.nl()
bv = 1 sub compares() {
wv = 1 txt.print("compares\n")
lv = 1 if fv==0
fv = 1.1 txt.print(" ==0\n")
b1 = sgn(bv)<0 if fv<=0
b2 = sgn(wv)<0 txt.print(" <=0\n")
b3 = sgn(lv)<0 if fv<0
b4 = sgn(fv)<0 txt.print(" <0\n")
txt.print_bool(b1) if fv>=0
txt.print_bool(b2) txt.print(" >=0\n")
txt.print_bool(b3) if fv>0
txt.print_bool(b4) txt.print(" >0\n")
txt.nl() }
sub signs() {
txt.print("signs\n")
if sgn(fv)==0
txt.print(" ==0\n")
if sgn(fv)<=0
txt.print(" <=0\n")
if sgn(fv)<0
txt.print(" <0\n")
if sgn(fv)>=0
txt.print(" >=0\n")
if sgn(fv)>0
txt.print(" >0\n")
}
} }

View File

@@ -4,4 +4,4 @@ org.gradle.parallel=true
org.gradle.daemon=true org.gradle.daemon=true
org.gradle.configuration-cache=false org.gradle.configuration-cache=false
kotlin.code.style=official kotlin.code.style=official
version=12.0-BETA7 version=12.0

View File

@@ -13,6 +13,7 @@ fun optimizeSimplifiedAst(program: PtProgram, options: CompilationOptions, st: S
return return
while (errors.noErrors() && while (errors.noErrors() &&
optimizeAssignTargets(program, st) optimizeAssignTargets(program, st)
+ optimizeFloatComparesToZero(program)
+ optimizeBinaryExpressions(program, options) > 0) { + optimizeBinaryExpressions(program, options) > 0) {
// keep rolling // keep rolling
} }
@@ -152,3 +153,27 @@ private fun optimizeBinaryExpressions(program: PtProgram, options: CompilationOp
} }
return changes return changes
} }
private fun optimizeFloatComparesToZero(program: PtProgram): Int {
var changes = 0
walkAst(program) { node: PtNode, depth: Int ->
if (node is PtBinaryExpression) {
val constvalue = node.right.asConstValue()
if(node.type.isBool && constvalue==0.0 && node.left.type.isFloat && node.operator in ComparisonOperators) {
// float == 0 --> sgn(float) == 0
val sign = PtBuiltinFunctionCall("sgn", false, true, DataType.BYTE, node.position)
sign.add(node.left)
val replacement = PtBinaryExpression(node.operator, DataType.BOOL, node.position)
replacement.add(sign)
replacement.add(PtNumber(BaseDataType.BYTE, 0.0, node.position))
replacement.parent = node.parent
val index = node.parent.children.indexOf(node)
node.parent.children[index] = replacement
changes++
}
}
true
}
return changes
}