optimize word+byte*2 expression to word+byte+byte (more efficient in 6502 codegen)

This commit is contained in:
Irmen de Jong 2025-01-24 21:00:00 +01:00
parent 8583a96519
commit 1e17df5296
4 changed files with 191 additions and 47 deletions

View File

@ -379,6 +379,16 @@ class PtString(val value: String, val encoding: Encoding, position: Position) :
class PtTypeCast(type: BaseDataType, position: Position) : PtExpression(DataType.forDt(type), position) {
val value: PtExpression
get() = children.single() as PtExpression
fun copy(): PtTypeCast {
val copy = PtTypeCast(type.base, position)
if(children[0] is PtIdentifier) {
copy.add((children[0] as PtIdentifier).copy())
} else {
TODO("cannot copy node ${children[0]}")
}
return copy
}
}

View File

@ -4,12 +4,14 @@ import prog8.code.StExtSub
import prog8.code.SymbolTable
import prog8.code.ast.*
import prog8.code.core.*
import prog8.code.target.VMTarget
fun optimizeSimplifiedAst(program: PtProgram, options: CompilationOptions, st: SymbolTable, errors: IErrorReporter) {
if (!options.optimize)
return
while (errors.noErrors() && optimizeAssignTargets(program, st) > 0) {
while (errors.noErrors() &&
optimizeAssignTargets(program, st) + optimizeWordPlusTimesTwo(program, options) > 0) {
// keep rolling
}
}
@ -96,3 +98,42 @@ internal fun isSame(identifier: PtIdentifier, type: DataType, returnedRegister:
}
return false // there are no identifiers directly corresponding to cpu registers
}
private fun optimizeWordPlusTimesTwo(program: PtProgram, options: CompilationOptions): Int {
if(options.compTarget.name== VMTarget.NAME)
return 0
var changes = 0
walkAst(program) { node: PtNode, depth: Int ->
if (node is PtBinaryExpression) {
if(node.operator=="*" && node.right.type.isWord && node.right.asConstValue()==2.0) {
TODO("optimize word + byte*2 (usually already replaced by w+b<<2)")
}
else if(node.operator=="<<" && node.right.asConstValue()==1.0) {
val typecast=node.left as? PtTypeCast
if(typecast!=null && typecast.type.isWord && typecast.value is PtIdentifier) {
val addition = node.parent as? PtBinaryExpression
if(addition!=null && (addition.operator=="+" || addition.operator=="-") && addition.type.isWord) {
// word + (byte<<1 as uword) (== word + byte*2) --> (word + (byte as word)) + (byte as word)
val parent = addition.parent
val index = parent.children.indexOf(addition)
val addFirst = PtBinaryExpression(addition.operator, addition.type, addition.position)
val addSecond = PtBinaryExpression(addition.operator, addition.type, addition.position)
if(addition.left===node)
addFirst.add(addition.right)
else
addFirst.add(addition.left)
addFirst.add(typecast)
addSecond.add(addFirst)
addSecond.add(typecast.copy())
parent.children[index] = addSecond
addSecond.parent = parent
changes++
}
}
}
}
true
}
return changes
}

View File

@ -1,9 +1,6 @@
TODO
====
- word+byte*2 -> word +byte +byte, word-byte*2 -> word-byte-byte (check that it gets properly word-extended!)
- optimize pokew and peekw to no longer do a jsr
- Make some of the target machine config externally configurable (for 1 new target, the existing ones should stay as they are for the time being)
- add paypal donation button as well?
@ -71,7 +68,7 @@ IR/VM
Libraries
---------
- Sorting module gnomesort_uw could be optimized more, rewrite in asm? Shellshort seems consistently faster even if most of the words are already sorted.
- Sorting module gnomesort_uw could be optimized more by fully rewriting it in asm? Shellshort seems consistently faster even if most of the words are already sorted.
- Add split-word array sorting routines to sorting module?
- add even more general raster irq routines to build some sort of "copper list" , like Oscar64 has?
- pet32 target: make syslib more complete (missing kernal routines)?

View File

@ -1,3 +1,4 @@
;%import emudbg
%import textio
%option no_sysinit
%zeropage basicsafe
@ -5,51 +6,146 @@
main {
ubyte @nozp @shared staticvar=51
sub start() {
ubyte x,y
uword @shared w1, w2
ubyte @shared b
x = 88
y = 99
ubyte a,b,c,d = multi4()
ubyte e,f = multi2()
txt.print_ub(a)
txt.spc()
txt.print_ub(b)
txt.spc()
txt.print_ub(c)
txt.spc()
txt.print_ub(d)
txt.spc()
txt.print_ub(e)
txt.spc()
txt.print_ub(f)
txt.nl()
w1 = w2 + b*$0002
w1 = w2 + b + b
; w2 = (w1 + b as uword) + (b as uword)
}
sub single() -> ubyte {
return cx16.r0L+cx16.r1L
}
asmsub multi1() -> ubyte @A, ubyte @Y {
%asm {{
lda #1
ldy #2
rts
}}
}
sub multi2() -> ubyte, ubyte {
cx16.r0++
return 33,44
}
sub multi4() -> ubyte, ubyte, ubyte, ubyte {
cx16.r0++
return 3,4,5,6
; cx16.r0 = cx16.r1 + cx16.r0L*2
; cx16.r0 = cx16.r1 + cx16.r0L*$0002
; cx16.r0 = cx16.r1 + cx16.r0L + cx16.r0L
; cx16.r0 = cx16.r1 - cx16.r0L*2
; cx16.r0 = cx16.r1 - cx16.r0L*$0002
; cx16.r0 = cx16.r1 - cx16.r0L - cx16.r0L
}
}
/*
mainxxx {
uword[50] @nosplit warray1
uword[50] @nosplit warray2
sub fill_arrays() {
math.rndseed(999,1234)
for cx16.r0L in 0 to len(warray1)-1 {
warray1[cx16.r0L] = math.rndw()
warray2[cx16.r0L] = cx16.r0L * (100 as uword)
}
warray2[40] = 9900
warray2[44] = 9910
warray2[48] = 9920
}
sub perf_reset() {
emudbg.reset_cpu_cycles()
}
sub perf_print() {
cx16.r4, cx16.r5 = emudbg.cpu_cycles()
txt.print_uwhex(cx16.r5, true)
txt.print_uwhex(cx16.r4, false)
txt.nl()
}
sub start() {
sys.set_irqd()
fill_arrays()
txt.print("\ngnomesort (words):\n")
perf_reset()
gnomesort_uw(warray1, len(warray1))
perf_print()
for cx16.r0L in 0 to len(warray1)-1 {
txt.print_uw(warray1[cx16.r0L])
txt.chrout(',')
}
txt.nl()
txt.print("\ngnomesort (words) almost sorted:\n")
perf_reset()
gnomesort_uw(warray2, len(warray2))
perf_print()
for cx16.r0L in 0 to len(warray2)-1 {
txt.print_uw(warray2[cx16.r0L])
txt.chrout(',')
}
txt.nl()
txt.nl()
fill_arrays()
txt.print("\ngnomesort_opt (words):\n")
perf_reset()
gnomesort_uw_opt(warray1, len(warray1))
perf_print()
for cx16.r0L in 0 to len(warray1)-1 {
txt.print_uw(warray1[cx16.r0L])
txt.chrout(',')
}
txt.nl()
txt.print("\ngnomesort_opt (words) almost sorted:\n")
perf_reset()
gnomesort_uw_opt(warray2, len(warray2))
perf_print()
for cx16.r0L in 0 to len(warray2)-1 {
txt.print_uw(warray2[cx16.r0L])
txt.chrout(',')
}
txt.nl()
sys.clear_irqd()
repeat {
}
}
sub gnomesort_uw(uword values, ubyte num_elements) {
; TODO optimize this more, rewrite in asm?
ubyte @zp pos = 1
while pos != num_elements {
uword @requirezp ptr = values+(pos*$0002)
cx16.r0 = peekw(ptr-2)
cx16.r1 = peekw(ptr)
if cx16.r0<=cx16.r1
pos++
else {
; swap elements
pokew(ptr-2, cx16.r1)
pokew(ptr, cx16.r0)
pos--
if_z
pos++
}
}
}
sub gnomesort_uw_opt(uword values, ubyte num_elements) {
; TODO optimize this more, rewrite in asm?
ubyte @zp pos = 1
uword @requirezp ptr = values+2
while pos != num_elements {
cx16.r0 = peekw(ptr-2)
cx16.r1 = peekw(ptr)
if cx16.r0<=cx16.r1 {
pos++
ptr+=2
}
else {
; swap elements
pokew(ptr-2, cx16.r1)
pokew(ptr, cx16.r0)
if pos>1 {
pos--
ptr-=2
}
}
}
}
}
*/