code optimization for bytearray[x] +/- bytearray[y]

use adc array,y or sbc array,y instead of tempvar
This commit is contained in:
Irmen de Jong 2023-09-04 21:07:49 +02:00
parent 7e5a9474fe
commit eb018ae660
5 changed files with 43 additions and 24 deletions

View File

@ -739,15 +739,31 @@ internal class AssignmentAsmGen(private val program: PtProgram,
return true return true
} }
else -> { else -> {
val rightArrayIndexer = expr.right as? PtArrayIndexer
if(rightArrayIndexer!=null && rightArrayIndexer.type in ByteDatatypes && left.type in ByteDatatypes) {
// special optimization for bytevalue +/- bytearr[y] : no need to use a tempvar, just use adc array,y or sbc array,y
assignExpressionToRegister(left, RegisterOrPair.A, left.type==DataType.BYTE) assignExpressionToRegister(left, RegisterOrPair.A, left.type==DataType.BYTE)
asmgen.out(" pha") asmgen.out(" pha")
assignExpressionToVariable(right, "P8ZP_SCRATCH_B1", right.type) asmgen.assignExpressionToRegister(rightArrayIndexer.index, RegisterOrPair.Y, false)
asmgen.out(" pla") asmgen.out(" pla")
val arrayvarname = if(rightArrayIndexer.usesPointerVariable)
"(${rightArrayIndexer.variable.name})"
else
asmgen.asmSymbolName(rightArrayIndexer.variable)
if (expr.operator == "+")
asmgen.out(" clc | adc $arrayvarname,y")
else
asmgen.out(" sec | sbc $arrayvarname,y")
assignRegisterByte(target, CpuRegister.A, dt in SignedDatatypes)
} else {
assignExpressionToVariable(right, "P8ZP_SCRATCH_B1", right.type)
assignExpressionToRegister(left, RegisterOrPair.A, left.type==DataType.BYTE)
if (expr.operator == "+") if (expr.operator == "+")
asmgen.out(" clc | adc P8ZP_SCRATCH_B1") asmgen.out(" clc | adc P8ZP_SCRATCH_B1")
else else
asmgen.out(" sec | sbc P8ZP_SCRATCH_B1") asmgen.out(" sec | sbc P8ZP_SCRATCH_B1")
assignRegisterByte(target, CpuRegister.A, dt in SignedDatatypes) assignRegisterByte(target, CpuRegister.A, dt in SignedDatatypes)
}
return true return true
} }
} }
@ -1857,7 +1873,7 @@ internal class AssignmentAsmGen(private val program: PtProgram,
return return
} }
// No more special optmized cases yet. Do the rest via more complex evaluation // No more special optimized cases yet. Do the rest via more complex evaluation
// note: cannot use assignTypeCastedValue because that is ourselves :P // note: cannot use assignTypeCastedValue because that is ourselves :P
// NOTE: THIS MAY TURN INTO A STACK OVERFLOW ERROR IF IT CAN'T SIMPLIFY THE TYPECAST..... :-/ // NOTE: THIS MAY TURN INTO A STACK OVERFLOW ERROR IF IT CAN'T SIMPLIFY THE TYPECAST..... :-/
asmgen.assignExpressionTo(origTypeCastExpression, target) asmgen.assignExpressionTo(origTypeCastExpression, target)

View File

@ -78,6 +78,16 @@ internal class BeforeAsmAstChanger(val program: Program, private val options: Co
val typeCast = binExpr.left as? TypecastExpression val typeCast = binExpr.left as? TypecastExpression
if(typeCast!=null && typeCast.expression isSameAs assignment.target) if(typeCast!=null && typeCast.expression isSameAs assignment.target)
return noModifications return noModifications
if(binExpr.operator in "+-") {
val leftDt = binExpr.left.inferType(program)
val rightDt = binExpr.right.inferType(program)
if(leftDt==rightDt && leftDt.isInteger && rightDt.isInteger && binExpr.right is ArrayIndexedExpression) {
// don't split array[i] +/- array[i] (the codegen has an optimized path for this)
return noModifications
}
}
val sourceDt = binExpr.left.inferType(program).getOrElse { throw AssemblyError("unknown dt") } val sourceDt = binExpr.left.inferType(program).getOrElse { throw AssemblyError("unknown dt") }
val (_, left) = binExpr.left.typecastTo(assignment.target.inferType(program).getOrElse { throw AssemblyError( val (_, left) = binExpr.left.typecastTo(assignment.target.inferType(program).getOrElse { throw AssemblyError(
"unknown dt" "unknown dt"

View File

@ -1,9 +1,6 @@
TODO TODO
==== ====
- add special optimization for @(screen+i) = xbuf[x] + ybuf[y] and @(screen+i) = xbuf[x] - ybuf[y]
(noticable in plasma.p8 and cube examples?)
- prefix prog8 subroutines with p8s_ instead of p8_ to not let them clash with variables in the asm?? - prefix prog8 subroutines with p8s_ instead of p8_ to not let them clash with variables in the asm??
- [on branch: shortcircuit] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 .... - [on branch: shortcircuit] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 ....
- IR: reduce the number of branch instructions such as BEQ, BEQR, etc (gradually), replace with CMP(I) + status branch instruction - IR: reduce the number of branch instructions such as BEQ, BEQR, etc (gradually), replace with CMP(I) + status branch instruction

View File

@ -88,14 +88,10 @@ main {
c2A += 2 c2A += 2
c2B -= 3 c2B -= 3
for y in 24 downto 0 { for y in 0 to 24 {
for x in 39 downto 0 { for x in 0 to 39 {
; split the array expression to avoid a prog8 temporary var inefficiency @(screen+x) = xbuf[x] + ybuf[y]
; this pure prog8 version achieves ~17 fps ; max optimized asm is this: (achieving ~21 fps on the C64):
ubyte @zp tmp = ybuf[y]
@(screen+x) = xbuf[x] + tmp
; prog8 at this time needs a temp variable to calculate the above expression.
; in optimized asm, this is the fastest way to do this line (achieving ~21 fps on the C64):
; %asm {{ ; %asm {{
; ldy p8_y ; ldy p8_y
; lda p8_ybuf,y ; lda p8_ybuf,y

View File

@ -10,13 +10,13 @@ main {
ubyte j = 4 ubyte j = 4
uword screen uword screen
ubyte result = xx[i] + yy[j] ; TODO optimize to use add addr,y ubyte result = xx[i] + yy[j]
txt.print_ub(result) ; 149 txt.print_ub(result) ; 149
txt.nl() txt.nl()
result = xx[i] + yy[i] ; TODO optimize to use add addr,y result = xx[i] + yy[i]
txt.print_ub(result) ; 148 txt.print_ub(result) ; 148
txt.nl() txt.nl()
@(screen+i) = xx[i] + yy[i] ; TODO why is this using P8ZP_SCRATCH_B1? @(screen+i) = xx[i] + yy[i]
; ubyte index = 100 ; ubyte index = 100
; ubyte[] t_index = [1,2,3,4,5] ; ubyte[] t_index = [1,2,3,4,5]