mirror of
https://github.com/irmen/prog8.git
synced 2025-01-11 13:29:45 +00:00
code optimization for bytearray[x] +/- bytearray[y]
use adc array,y or sbc array,y instead of tempvar
This commit is contained in:
parent
7e5a9474fe
commit
eb018ae660
@ -739,15 +739,31 @@ internal class AssignmentAsmGen(private val program: PtProgram,
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
else -> {
|
else -> {
|
||||||
|
val rightArrayIndexer = expr.right as? PtArrayIndexer
|
||||||
|
if(rightArrayIndexer!=null && rightArrayIndexer.type in ByteDatatypes && left.type in ByteDatatypes) {
|
||||||
|
// special optimization for bytevalue +/- bytearr[y] : no need to use a tempvar, just use adc array,y or sbc array,y
|
||||||
assignExpressionToRegister(left, RegisterOrPair.A, left.type==DataType.BYTE)
|
assignExpressionToRegister(left, RegisterOrPair.A, left.type==DataType.BYTE)
|
||||||
asmgen.out(" pha")
|
asmgen.out(" pha")
|
||||||
assignExpressionToVariable(right, "P8ZP_SCRATCH_B1", right.type)
|
asmgen.assignExpressionToRegister(rightArrayIndexer.index, RegisterOrPair.Y, false)
|
||||||
asmgen.out(" pla")
|
asmgen.out(" pla")
|
||||||
|
val arrayvarname = if(rightArrayIndexer.usesPointerVariable)
|
||||||
|
"(${rightArrayIndexer.variable.name})"
|
||||||
|
else
|
||||||
|
asmgen.asmSymbolName(rightArrayIndexer.variable)
|
||||||
|
if (expr.operator == "+")
|
||||||
|
asmgen.out(" clc | adc $arrayvarname,y")
|
||||||
|
else
|
||||||
|
asmgen.out(" sec | sbc $arrayvarname,y")
|
||||||
|
assignRegisterByte(target, CpuRegister.A, dt in SignedDatatypes)
|
||||||
|
} else {
|
||||||
|
assignExpressionToVariable(right, "P8ZP_SCRATCH_B1", right.type)
|
||||||
|
assignExpressionToRegister(left, RegisterOrPair.A, left.type==DataType.BYTE)
|
||||||
if (expr.operator == "+")
|
if (expr.operator == "+")
|
||||||
asmgen.out(" clc | adc P8ZP_SCRATCH_B1")
|
asmgen.out(" clc | adc P8ZP_SCRATCH_B1")
|
||||||
else
|
else
|
||||||
asmgen.out(" sec | sbc P8ZP_SCRATCH_B1")
|
asmgen.out(" sec | sbc P8ZP_SCRATCH_B1")
|
||||||
assignRegisterByte(target, CpuRegister.A, dt in SignedDatatypes)
|
assignRegisterByte(target, CpuRegister.A, dt in SignedDatatypes)
|
||||||
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1857,7 +1873,7 @@ internal class AssignmentAsmGen(private val program: PtProgram,
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// No more special optmized cases yet. Do the rest via more complex evaluation
|
// No more special optimized cases yet. Do the rest via more complex evaluation
|
||||||
// note: cannot use assignTypeCastedValue because that is ourselves :P
|
// note: cannot use assignTypeCastedValue because that is ourselves :P
|
||||||
// NOTE: THIS MAY TURN INTO A STACK OVERFLOW ERROR IF IT CAN'T SIMPLIFY THE TYPECAST..... :-/
|
// NOTE: THIS MAY TURN INTO A STACK OVERFLOW ERROR IF IT CAN'T SIMPLIFY THE TYPECAST..... :-/
|
||||||
asmgen.assignExpressionTo(origTypeCastExpression, target)
|
asmgen.assignExpressionTo(origTypeCastExpression, target)
|
||||||
|
@ -78,6 +78,16 @@ internal class BeforeAsmAstChanger(val program: Program, private val options: Co
|
|||||||
val typeCast = binExpr.left as? TypecastExpression
|
val typeCast = binExpr.left as? TypecastExpression
|
||||||
if(typeCast!=null && typeCast.expression isSameAs assignment.target)
|
if(typeCast!=null && typeCast.expression isSameAs assignment.target)
|
||||||
return noModifications
|
return noModifications
|
||||||
|
|
||||||
|
if(binExpr.operator in "+-") {
|
||||||
|
val leftDt = binExpr.left.inferType(program)
|
||||||
|
val rightDt = binExpr.right.inferType(program)
|
||||||
|
if(leftDt==rightDt && leftDt.isInteger && rightDt.isInteger && binExpr.right is ArrayIndexedExpression) {
|
||||||
|
// don't split array[i] +/- array[i] (the codegen has an optimized path for this)
|
||||||
|
return noModifications
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
val sourceDt = binExpr.left.inferType(program).getOrElse { throw AssemblyError("unknown dt") }
|
val sourceDt = binExpr.left.inferType(program).getOrElse { throw AssemblyError("unknown dt") }
|
||||||
val (_, left) = binExpr.left.typecastTo(assignment.target.inferType(program).getOrElse { throw AssemblyError(
|
val (_, left) = binExpr.left.typecastTo(assignment.target.inferType(program).getOrElse { throw AssemblyError(
|
||||||
"unknown dt"
|
"unknown dt"
|
||||||
|
@ -1,9 +1,6 @@
|
|||||||
TODO
|
TODO
|
||||||
====
|
====
|
||||||
|
|
||||||
- add special optimization for @(screen+i) = xbuf[x] + ybuf[y] and @(screen+i) = xbuf[x] - ybuf[y]
|
|
||||||
(noticable in plasma.p8 and cube examples?)
|
|
||||||
|
|
||||||
- prefix prog8 subroutines with p8s_ instead of p8_ to not let them clash with variables in the asm??
|
- prefix prog8 subroutines with p8s_ instead of p8_ to not let them clash with variables in the asm??
|
||||||
- [on branch: shortcircuit] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 ....
|
- [on branch: shortcircuit] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 ....
|
||||||
- IR: reduce the number of branch instructions such as BEQ, BEQR, etc (gradually), replace with CMP(I) + status branch instruction
|
- IR: reduce the number of branch instructions such as BEQ, BEQR, etc (gradually), replace with CMP(I) + status branch instruction
|
||||||
|
@ -88,14 +88,10 @@ main {
|
|||||||
c2A += 2
|
c2A += 2
|
||||||
c2B -= 3
|
c2B -= 3
|
||||||
|
|
||||||
for y in 24 downto 0 {
|
for y in 0 to 24 {
|
||||||
for x in 39 downto 0 {
|
for x in 0 to 39 {
|
||||||
; split the array expression to avoid a prog8 temporary var inefficiency
|
@(screen+x) = xbuf[x] + ybuf[y]
|
||||||
; this pure prog8 version achieves ~17 fps
|
; max optimized asm is this: (achieving ~21 fps on the C64):
|
||||||
ubyte @zp tmp = ybuf[y]
|
|
||||||
@(screen+x) = xbuf[x] + tmp
|
|
||||||
; prog8 at this time needs a temp variable to calculate the above expression.
|
|
||||||
; in optimized asm, this is the fastest way to do this line (achieving ~21 fps on the C64):
|
|
||||||
; %asm {{
|
; %asm {{
|
||||||
; ldy p8_y
|
; ldy p8_y
|
||||||
; lda p8_ybuf,y
|
; lda p8_ybuf,y
|
||||||
|
@ -10,13 +10,13 @@ main {
|
|||||||
ubyte j = 4
|
ubyte j = 4
|
||||||
uword screen
|
uword screen
|
||||||
|
|
||||||
ubyte result = xx[i] + yy[j] ; TODO optimize to use add addr,y
|
ubyte result = xx[i] + yy[j]
|
||||||
txt.print_ub(result) ; 149
|
txt.print_ub(result) ; 149
|
||||||
txt.nl()
|
txt.nl()
|
||||||
result = xx[i] + yy[i] ; TODO optimize to use add addr,y
|
result = xx[i] + yy[i]
|
||||||
txt.print_ub(result) ; 148
|
txt.print_ub(result) ; 148
|
||||||
txt.nl()
|
txt.nl()
|
||||||
@(screen+i) = xx[i] + yy[i] ; TODO why is this using P8ZP_SCRATCH_B1?
|
@(screen+i) = xx[i] + yy[i]
|
||||||
|
|
||||||
; ubyte index = 100
|
; ubyte index = 100
|
||||||
; ubyte[] t_index = [1,2,3,4,5]
|
; ubyte[] t_index = [1,2,3,4,5]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user