mirror of
https://github.com/irmen/prog8.git
synced 2025-02-21 10:29:03 +00:00
optimize word+byte*2 expression to word+byte+byte (more efficient in 6502 codegen)
This commit is contained in:
parent
8583a96519
commit
1e17df5296
@ -379,6 +379,16 @@ class PtString(val value: String, val encoding: Encoding, position: Position) :
|
||||
class PtTypeCast(type: BaseDataType, position: Position) : PtExpression(DataType.forDt(type), position) {
|
||||
val value: PtExpression
|
||||
get() = children.single() as PtExpression
|
||||
|
||||
fun copy(): PtTypeCast {
|
||||
val copy = PtTypeCast(type.base, position)
|
||||
if(children[0] is PtIdentifier) {
|
||||
copy.add((children[0] as PtIdentifier).copy())
|
||||
} else {
|
||||
TODO("cannot copy node ${children[0]}")
|
||||
}
|
||||
return copy
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -4,12 +4,14 @@ import prog8.code.StExtSub
|
||||
import prog8.code.SymbolTable
|
||||
import prog8.code.ast.*
|
||||
import prog8.code.core.*
|
||||
import prog8.code.target.VMTarget
|
||||
|
||||
|
||||
fun optimizeSimplifiedAst(program: PtProgram, options: CompilationOptions, st: SymbolTable, errors: IErrorReporter) {
|
||||
if (!options.optimize)
|
||||
return
|
||||
while (errors.noErrors() && optimizeAssignTargets(program, st) > 0) {
|
||||
while (errors.noErrors() &&
|
||||
optimizeAssignTargets(program, st) + optimizeWordPlusTimesTwo(program, options) > 0) {
|
||||
// keep rolling
|
||||
}
|
||||
}
|
||||
@ -96,3 +98,42 @@ internal fun isSame(identifier: PtIdentifier, type: DataType, returnedRegister:
|
||||
}
|
||||
return false // there are no identifiers directly corresponding to cpu registers
|
||||
}
|
||||
|
||||
|
||||
private fun optimizeWordPlusTimesTwo(program: PtProgram, options: CompilationOptions): Int {
|
||||
if(options.compTarget.name== VMTarget.NAME)
|
||||
return 0
|
||||
var changes = 0
|
||||
walkAst(program) { node: PtNode, depth: Int ->
|
||||
if (node is PtBinaryExpression) {
|
||||
if(node.operator=="*" && node.right.type.isWord && node.right.asConstValue()==2.0) {
|
||||
TODO("optimize word + byte*2 (usually already replaced by w+b<<2)")
|
||||
}
|
||||
else if(node.operator=="<<" && node.right.asConstValue()==1.0) {
|
||||
val typecast=node.left as? PtTypeCast
|
||||
if(typecast!=null && typecast.type.isWord && typecast.value is PtIdentifier) {
|
||||
val addition = node.parent as? PtBinaryExpression
|
||||
if(addition!=null && (addition.operator=="+" || addition.operator=="-") && addition.type.isWord) {
|
||||
// word + (byte<<1 as uword) (== word + byte*2) --> (word + (byte as word)) + (byte as word)
|
||||
val parent = addition.parent
|
||||
val index = parent.children.indexOf(addition)
|
||||
val addFirst = PtBinaryExpression(addition.operator, addition.type, addition.position)
|
||||
val addSecond = PtBinaryExpression(addition.operator, addition.type, addition.position)
|
||||
if(addition.left===node)
|
||||
addFirst.add(addition.right)
|
||||
else
|
||||
addFirst.add(addition.left)
|
||||
addFirst.add(typecast)
|
||||
addSecond.add(addFirst)
|
||||
addSecond.add(typecast.copy())
|
||||
parent.children[index] = addSecond
|
||||
addSecond.parent = parent
|
||||
changes++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
return changes
|
||||
}
|
||||
|
@ -1,9 +1,6 @@
|
||||
TODO
|
||||
====
|
||||
|
||||
- word+byte*2 -> word +byte +byte, word-byte*2 -> word-byte-byte (check that it gets properly word-extended!)
|
||||
- optimize pokew and peekw to no longer do a jsr
|
||||
|
||||
- Make some of the target machine config externally configurable (for 1 new target, the existing ones should stay as they are for the time being)
|
||||
|
||||
- add paypal donation button as well?
|
||||
@ -71,7 +68,7 @@ IR/VM
|
||||
|
||||
Libraries
|
||||
---------
|
||||
- Sorting module gnomesort_uw could be optimized more, rewrite in asm? Shellshort seems consistently faster even if most of the words are already sorted.
|
||||
- Sorting module gnomesort_uw could be optimized more by fully rewriting it in asm? Shellshort seems consistently faster even if most of the words are already sorted.
|
||||
- Add split-word array sorting routines to sorting module?
|
||||
- add even more general raster irq routines to build some sort of "copper list" , like Oscar64 has?
|
||||
- pet32 target: make syslib more complete (missing kernal routines)?
|
||||
|
180
examples/test.p8
180
examples/test.p8
@ -1,3 +1,4 @@
|
||||
;%import emudbg
|
||||
%import textio
|
||||
%option no_sysinit
|
||||
%zeropage basicsafe
|
||||
@ -5,51 +6,146 @@
|
||||
|
||||
|
||||
main {
|
||||
|
||||
ubyte @nozp @shared staticvar=51
|
||||
|
||||
sub start() {
|
||||
ubyte x,y
|
||||
uword @shared w1, w2
|
||||
ubyte @shared b
|
||||
|
||||
x = 88
|
||||
y = 99
|
||||
ubyte a,b,c,d = multi4()
|
||||
ubyte e,f = multi2()
|
||||
|
||||
txt.print_ub(a)
|
||||
txt.spc()
|
||||
txt.print_ub(b)
|
||||
txt.spc()
|
||||
txt.print_ub(c)
|
||||
txt.spc()
|
||||
txt.print_ub(d)
|
||||
txt.spc()
|
||||
txt.print_ub(e)
|
||||
txt.spc()
|
||||
txt.print_ub(f)
|
||||
txt.nl()
|
||||
w1 = w2 + b*$0002
|
||||
w1 = w2 + b + b
|
||||
; w2 = (w1 + b as uword) + (b as uword)
|
||||
|
||||
|
||||
}
|
||||
|
||||
sub single() -> ubyte {
|
||||
return cx16.r0L+cx16.r1L
|
||||
}
|
||||
asmsub multi1() -> ubyte @A, ubyte @Y {
|
||||
%asm {{
|
||||
lda #1
|
||||
ldy #2
|
||||
rts
|
||||
}}
|
||||
}
|
||||
|
||||
sub multi2() -> ubyte, ubyte {
|
||||
cx16.r0++
|
||||
return 33,44
|
||||
}
|
||||
|
||||
sub multi4() -> ubyte, ubyte, ubyte, ubyte {
|
||||
cx16.r0++
|
||||
return 3,4,5,6
|
||||
; cx16.r0 = cx16.r1 + cx16.r0L*2
|
||||
; cx16.r0 = cx16.r1 + cx16.r0L*$0002
|
||||
; cx16.r0 = cx16.r1 + cx16.r0L + cx16.r0L
|
||||
; cx16.r0 = cx16.r1 - cx16.r0L*2
|
||||
; cx16.r0 = cx16.r1 - cx16.r0L*$0002
|
||||
; cx16.r0 = cx16.r1 - cx16.r0L - cx16.r0L
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
mainxxx {
|
||||
|
||||
uword[50] @nosplit warray1
|
||||
uword[50] @nosplit warray2
|
||||
|
||||
sub fill_arrays() {
|
||||
math.rndseed(999,1234)
|
||||
for cx16.r0L in 0 to len(warray1)-1 {
|
||||
warray1[cx16.r0L] = math.rndw()
|
||||
warray2[cx16.r0L] = cx16.r0L * (100 as uword)
|
||||
}
|
||||
warray2[40] = 9900
|
||||
warray2[44] = 9910
|
||||
warray2[48] = 9920
|
||||
}
|
||||
|
||||
sub perf_reset() {
|
||||
emudbg.reset_cpu_cycles()
|
||||
}
|
||||
|
||||
sub perf_print() {
|
||||
cx16.r4, cx16.r5 = emudbg.cpu_cycles()
|
||||
txt.print_uwhex(cx16.r5, true)
|
||||
txt.print_uwhex(cx16.r4, false)
|
||||
txt.nl()
|
||||
}
|
||||
|
||||
sub start() {
|
||||
sys.set_irqd()
|
||||
fill_arrays()
|
||||
|
||||
txt.print("\ngnomesort (words):\n")
|
||||
perf_reset()
|
||||
gnomesort_uw(warray1, len(warray1))
|
||||
perf_print()
|
||||
for cx16.r0L in 0 to len(warray1)-1 {
|
||||
txt.print_uw(warray1[cx16.r0L])
|
||||
txt.chrout(',')
|
||||
}
|
||||
txt.nl()
|
||||
|
||||
txt.print("\ngnomesort (words) almost sorted:\n")
|
||||
perf_reset()
|
||||
gnomesort_uw(warray2, len(warray2))
|
||||
perf_print()
|
||||
for cx16.r0L in 0 to len(warray2)-1 {
|
||||
txt.print_uw(warray2[cx16.r0L])
|
||||
txt.chrout(',')
|
||||
}
|
||||
txt.nl()
|
||||
txt.nl()
|
||||
|
||||
fill_arrays()
|
||||
|
||||
txt.print("\ngnomesort_opt (words):\n")
|
||||
perf_reset()
|
||||
gnomesort_uw_opt(warray1, len(warray1))
|
||||
perf_print()
|
||||
for cx16.r0L in 0 to len(warray1)-1 {
|
||||
txt.print_uw(warray1[cx16.r0L])
|
||||
txt.chrout(',')
|
||||
}
|
||||
txt.nl()
|
||||
|
||||
txt.print("\ngnomesort_opt (words) almost sorted:\n")
|
||||
perf_reset()
|
||||
gnomesort_uw_opt(warray2, len(warray2))
|
||||
perf_print()
|
||||
for cx16.r0L in 0 to len(warray2)-1 {
|
||||
txt.print_uw(warray2[cx16.r0L])
|
||||
txt.chrout(',')
|
||||
}
|
||||
txt.nl()
|
||||
sys.clear_irqd()
|
||||
repeat {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
sub gnomesort_uw(uword values, ubyte num_elements) {
|
||||
; TODO optimize this more, rewrite in asm?
|
||||
ubyte @zp pos = 1
|
||||
while pos != num_elements {
|
||||
uword @requirezp ptr = values+(pos*$0002)
|
||||
cx16.r0 = peekw(ptr-2)
|
||||
cx16.r1 = peekw(ptr)
|
||||
if cx16.r0<=cx16.r1
|
||||
pos++
|
||||
else {
|
||||
; swap elements
|
||||
pokew(ptr-2, cx16.r1)
|
||||
pokew(ptr, cx16.r0)
|
||||
pos--
|
||||
if_z
|
||||
pos++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub gnomesort_uw_opt(uword values, ubyte num_elements) {
|
||||
; TODO optimize this more, rewrite in asm?
|
||||
ubyte @zp pos = 1
|
||||
uword @requirezp ptr = values+2
|
||||
while pos != num_elements {
|
||||
cx16.r0 = peekw(ptr-2)
|
||||
cx16.r1 = peekw(ptr)
|
||||
if cx16.r0<=cx16.r1 {
|
||||
pos++
|
||||
ptr+=2
|
||||
}
|
||||
else {
|
||||
; swap elements
|
||||
pokew(ptr-2, cx16.r1)
|
||||
pokew(ptr, cx16.r0)
|
||||
if pos>1 {
|
||||
pos--
|
||||
ptr-=2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
Loading…
x
Reference in New Issue
Block a user