From f14ea1b3dead376d14529fce0f2940e8dcddaa7d Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Fri, 1 Sep 2023 20:27:39 +0200 Subject: [PATCH] micro optimization to save 2 cycles: change some pha+pla into tax+txa --- .../codegen/cpu6502/BuiltinFunctionsAsmGen.kt | 8 +- .../prog8/codegen/cpu6502/ForLoopsAsmGen.kt | 4 +- .../codegen/cpu6502/FunctionCallAsmGen.kt | 4 +- .../cpu6502/assignment/AssignmentAsmGen.kt | 95 ++++++++++--------- .../assignment/AugmentableAssignmentAsmGen.kt | 29 +++--- compiler/res/prog8lib/cx16/sprites.p8 | 1 + docs/source/todo.rst | 6 +- 7 files changed, 74 insertions(+), 73 deletions(-) diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/BuiltinFunctionsAsmGen.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/BuiltinFunctionsAsmGen.kt index a33951db0..6f643f893 100644 --- a/codeGenCpu6502/src/prog8/codegen/cpu6502/BuiltinFunctionsAsmGen.kt +++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/BuiltinFunctionsAsmGen.kt @@ -742,11 +742,11 @@ internal class BuiltinFunctionsAsmGen(private val program: PtProgram, asmgen.out(""" ldy #0 lda ($varname),y - pha + tax iny lda ($varname),y tay - pla""") + txa""") } } else fallback() } @@ -759,11 +759,11 @@ internal class BuiltinFunctionsAsmGen(private val program: PtProgram, asmgen.assignExpressionToRegister(result.second, RegisterOrPair.Y) asmgen.out(""" lda ($varname),y - pha + tax iny lda ($varname),y tay - pla""") + txa""") } else fallback() } else -> fallback() diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/ForLoopsAsmGen.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/ForLoopsAsmGen.kt index 4477a9509..715838b0e 100644 --- a/codeGenCpu6502/src/prog8/codegen/cpu6502/ForLoopsAsmGen.kt +++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/ForLoopsAsmGen.kt @@ -255,11 +255,11 @@ $loopLabel""") sec sbc #<${stepsize.absoluteValue} sta $varname - pha + tax lda $varname+1 sbc #>${stepsize.absoluteValue} sta $varname+1 - pla + txa $modifiedLabel2 cmp #0 ; modified lda $varname+1 $modifiedLabel sbc #0 ; modified diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/FunctionCallAsmGen.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/FunctionCallAsmGen.kt index d9d06f376..25eaba7cd 100644 --- a/codeGenCpu6502/src/prog8/codegen/cpu6502/FunctionCallAsmGen.kt +++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/FunctionCallAsmGen.kt @@ -142,12 +142,12 @@ internal class FunctionCallAsmGen(private val program: PtProgram, private val as is PtIdentifier -> { val sourceName = asmgen.asmVariableName(value) asmgen.out(""" - pha + tax clc lda $sourceName beq + sec -+ pla""") ++ txa""") } else -> { asmgen.assignExpressionToRegister(value, RegisterOrPair.A) diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt index f8088e594..7897110b9 100644 --- a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt +++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt @@ -88,7 +88,7 @@ internal class AssignmentAsmGen(private val program: PtProgram, assignRegisterpairWord(assign.target, RegisterOrPair.AY) } else { asmgen.loadScaledArrayIndexIntoRegister(value, elementDt, CpuRegister.Y) - asmgen.out(" lda ${arrayVarName}_lsb,y | pha | lda ${arrayVarName}_msb,y | tay | pla") + asmgen.out(" lda ${arrayVarName}_lsb,y | tax | lda ${arrayVarName}_msb,y | tay | txa") assignRegisterpairWord(assign.target, RegisterOrPair.AY) } return @@ -122,7 +122,7 @@ internal class AssignmentAsmGen(private val program: PtProgram, } in WordDatatypes -> { asmgen.loadScaledArrayIndexIntoRegister(value, elementDt, CpuRegister.Y) - asmgen.out(" lda $arrayVarName,y | pha | lda $arrayVarName+1,y | tay | pla") + asmgen.out(" lda $arrayVarName,y | tax | lda $arrayVarName+1,y | tay | txa") assignRegisterpairWord(assign.target, RegisterOrPair.AY) } DataType.FLOAT -> { @@ -257,13 +257,13 @@ internal class AssignmentAsmGen(private val program: PtProgram, when (assign.target.datatype) { DataType.STR -> { asmgen.out(""" - pha - lda #<${assign.target.asmVarname} - sta P8ZP_SCRATCH_W1 - lda #>${assign.target.asmVarname} - sta P8ZP_SCRATCH_W1+1 - pla - jsr prog8_lib.strcpy""") + tax + lda #<${assign.target.asmVarname} + sta P8ZP_SCRATCH_W1 + lda #>${assign.target.asmVarname} + sta P8ZP_SCRATCH_W1+1 + txa + jsr prog8_lib.strcpy""") } DataType.UWORD -> assignRegisterpairWord(assign.target, RegisterOrPair.AY) else -> throw AssemblyError("str return value type mismatch with target") @@ -306,14 +306,14 @@ internal class AssignmentAsmGen(private val program: PtProgram, sec eor #255 adc #0 - pha + tax tya eor #255 adc #0 tay - pla""") + txa""") } - "~" -> asmgen.out(" pha | tya | eor #255 | tay | pla | eor #255") + "~" -> asmgen.out(" tax | tya | eor #255 | tay | txa | eor #255") "not" -> throw AssemblyError("not should have been replaced in the Ast by ==0") else -> throw AssemblyError("invalid prefix operator") } @@ -759,20 +759,20 @@ internal class AssignmentAsmGen(private val program: PtProgram, asmgen.out(""" clc adc P8ZP_SCRATCH_W1 - pha + tax tya adc P8ZP_SCRATCH_W1+1 tay - pla""") + txa""") else asmgen.out(""" sec sbc P8ZP_SCRATCH_W1 - pha + tax tya sbc P8ZP_SCRATCH_W1+1 tay - pla""") + txa""") assignRegisterpairWord(target, RegisterOrPair.AY) } @@ -784,20 +784,20 @@ internal class AssignmentAsmGen(private val program: PtProgram, asmgen.out(""" clc adc #<$symbol - pha + tax tya adc #>$symbol tay - pla""") + txa""") else asmgen.out(""" sec sbc #<$symbol - pha + tax tya sbc #>$symbol tay - pla""") + txa""") assignRegisterpairWord(target, RegisterOrPair.AY) return true } @@ -808,20 +808,20 @@ internal class AssignmentAsmGen(private val program: PtProgram, asmgen.out(""" clc adc $symname - pha + tax tya adc $symname+1 tay - pla""") + txa""") else asmgen.out(""" sec sbc $symname - pha + tax tya sbc $symname+1 tay - pla""") + txa""") assignRegisterpairWord(target, RegisterOrPair.AY) return true } @@ -831,20 +831,20 @@ internal class AssignmentAsmGen(private val program: PtProgram, asmgen.out(""" clc adc #<${right.number.toHex()} - pha + tax tya adc #>${right.number.toHex()} tay - pla""") + txa""") } else if(expr.operator=="-") { asmgen.out(""" sec sbc #<${right.number.toHex()} - pha + tax tya sbc #>${right.number.toHex()} tay - pla""") + txa""") } assignRegisterpairWord(target, RegisterOrPair.AY) return true @@ -926,9 +926,9 @@ internal class AssignmentAsmGen(private val program: PtProgram, } asmgen.assignWordOperandsToAYAndVar(expr.left, expr.right, "P8ZP_SCRATCH_W1") when (expr.operator) { - "&", "and" -> asmgen.out(" and P8ZP_SCRATCH_W1 | pha | tya | and P8ZP_SCRATCH_W1+1 | tay | pla") - "|", "or" -> asmgen.out(" ora P8ZP_SCRATCH_W1 | pha | tya | ora P8ZP_SCRATCH_W1+1 | tay | pla") - "^", "xor" -> asmgen.out(" eor P8ZP_SCRATCH_W1 | pha | tya | eor P8ZP_SCRATCH_W1+1 | tay | pla") + "&", "and" -> asmgen.out(" and P8ZP_SCRATCH_W1 | tax | tya | and P8ZP_SCRATCH_W1+1 | tay | txa") + "|", "or" -> asmgen.out(" ora P8ZP_SCRATCH_W1 | tax | tya | ora P8ZP_SCRATCH_W1+1 | tay | txa") + "^", "xor" -> asmgen.out(" eor P8ZP_SCRATCH_W1 | tax | tya | eor P8ZP_SCRATCH_W1+1 | tay | txa") else -> throw AssemblyError("invalid operator") } assignRegisterpairWord(target, RegisterOrPair.AY) @@ -1489,18 +1489,18 @@ internal class AssignmentAsmGen(private val program: PtProgram, is PtNumber -> { val number = right.number.toHex() when (operator) { - "&", "and" -> asmgen.out(" and #<$number | pha | tya | and #>$number | tay | pla") - "|", "or" -> asmgen.out(" ora #<$number | pha | tya | ora #>$number | tay | pla") - "^", "xor" -> asmgen.out(" eor #<$number | pha | tya | eor #>$number | tay | pla") + "&", "and" -> asmgen.out(" and #<$number | tax | tya | and #>$number | tay | txa") + "|", "or" -> asmgen.out(" ora #<$number | tax | tya | ora #>$number | tay | txa") + "^", "xor" -> asmgen.out(" eor #<$number | tax | tya | eor #>$number | tay | txa") else -> throw AssemblyError("invalid operator") } } is PtIdentifier -> { val name = asmgen.asmSymbolName(right) when (operator) { - "&", "and" -> asmgen.out(" and $name | pha | tya | and $name+1 | tay | pla") - "|", "or" -> asmgen.out(" ora $name | pha | tya | ora $name+1 | tay | pla") - "^", "xor" -> asmgen.out(" eor $name | pha | tya | eor $name+1 | tay | pla") + "&", "and" -> asmgen.out(" and $name | tax | tya | and $name+1 | tay | txa") + "|", "or" -> asmgen.out(" ora $name | tax | tya | ora $name+1 | tay | txa") + "^", "xor" -> asmgen.out(" eor $name | tax | tya | eor $name+1 | tay | txa") else -> throw AssemblyError("invalid operator") } } @@ -1510,6 +1510,11 @@ internal class AssignmentAsmGen(private val program: PtProgram, } private fun attemptAssignToByteCompareZero(expr: PtBinaryExpression, assign: AsmAssignment): Boolean { + // TODO optimized code for (word1 & word2) == 0 : +// if(expr.left.type in WordDatatypes && (expr.operator=="==" || expr.operator=="!=") && expr.left is PtBinaryExpression) { +// ... +// } + when (expr.operator) { "==" -> { when(val dt = expr.left.type) { @@ -2095,12 +2100,12 @@ internal class AssignmentAsmGen(private val program: PtProgram, if(regs!=RegisterOrPair.AY) throw AssemblyError("only supports AY here") asmgen.out(""" - pha + tax lda #<$targetAsmVarName sta P8ZP_SCRATCH_W2 lda #>$targetAsmVarName sta P8ZP_SCRATCH_W2+1 - pla + txa jsr floats.cast_from_uw""") } else -> throw AssemblyError("weird type") @@ -2123,12 +2128,12 @@ internal class AssignmentAsmGen(private val program: PtProgram, if(regs!=RegisterOrPair.AY) throw AssemblyError("only supports AY here") asmgen.out(""" - pha + tax lda #<$targetAsmVarName sta P8ZP_SCRATCH_W2 lda #>$targetAsmVarName sta P8ZP_SCRATCH_W2+1 - pla + txa jsr floats.cast_from_w""") } else -> throw AssemblyError("weird type") @@ -2562,12 +2567,12 @@ internal class AssignmentAsmGen(private val program: PtProgram, pla""") RegisterOrPair.AY -> asmgen.out(""" lda $sourceName - pha + tax ora #$7f bmi + lda #0 + tay - pla""") + txa""") RegisterOrPair.XY -> asmgen.out(""" lda $sourceName tax @@ -3770,12 +3775,12 @@ internal class AssignmentAsmGen(private val program: PtProgram, sec eor #255 adc #0 - pha + tax tya eor #255 adc #0 tay - pla""") + txa""") } RegisterOrPair.XY -> { asmgen.out(""" diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt index 008fe38e7..24dadd533 100644 --- a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt +++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt @@ -415,8 +415,7 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram, } } asmgen.out(""" - lda P8ZP_SCRATCH_W1 - pha + ldx P8ZP_SCRATCH_W1 lda P8ZP_SCRATCH_W1+1 pha lda #<$tempvar @@ -424,7 +423,7 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram, sta P8ZP_SCRATCH_W1 sty P8ZP_SCRATCH_W1+1 ply - pla + txa jsr floats.copy_float""") // copy from array into float temp var, clobbers A,Y } else -> throw AssemblyError("weird type to do in-place modification on ${target.datatype}") @@ -445,11 +444,11 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram, asmgen.out(""" clc adc $variable - pha + tay txa adc $variable+1 tax - pla""") + tya""") true } else { asmgen.out(""" @@ -469,11 +468,11 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram, asmgen.out(""" sec sbc $variable - pha + tay txa sbc $variable+1 tax - pla""") + tya""") true } else { asmgen.out(""" @@ -508,11 +507,11 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram, asmgen.out(""" clc adc #<$number - pha + tay txa adc #>$number tax - pla""") + tya""") true } } @@ -529,11 +528,11 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram, asmgen.out(""" sec sbc #<$number - pha + tay txa sbc #>$number tax - pla""") + tya""") true } } @@ -1990,12 +1989,12 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram, fun divideVarByWordInAY() { asmgen.out(""" - pha + tax lda $name sta P8ZP_SCRATCH_W1 lda $name+1 sta P8ZP_SCRATCH_W1+1 - pla""") + txa""") if (dt == DataType.WORD) asmgen.out(" jsr math.divmod_w_asm") else @@ -2007,12 +2006,12 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram, if(dt==DataType.WORD) throw AssemblyError("remainder of signed integers is not properly defined/implemented, use unsigned instead") asmgen.out(""" - pha + tax lda $name sta P8ZP_SCRATCH_W1 lda $name+1 sta P8ZP_SCRATCH_W1+1 - pla + txa jsr math.divmod_uw_asm lda P8ZP_SCRATCH_W2 ldy P8ZP_SCRATCH_W2+1 diff --git a/compiler/res/prog8lib/cx16/sprites.p8 b/compiler/res/prog8lib/cx16/sprites.p8 index bd0181470..3a1ce7e07 100644 --- a/compiler/res/prog8lib/cx16/sprites.p8 +++ b/compiler/res/prog8lib/cx16/sprites.p8 @@ -41,6 +41,7 @@ sprites { } sub pos_batch(ubyte first_spritenum, ubyte num_sprites, uword xpositions_ptr, uword ypositions_ptr) { + ; -- note: the x and y positions word arrays must be regular arrays, they cannot be @split arrays! sprite_reg = VERA_SPRITEREGS + 2 + first_spritenum*$0008 cx16.vaddr_autoincr(1, sprite_reg, 0, 8) cx16.vaddr_autoincr(1, sprite_reg+1, 1, 8) diff --git a/docs/source/todo.rst b/docs/source/todo.rst index 11798b48f..b39bf7e97 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -1,11 +1,6 @@ TODO ==== -- optimize assembly output for ( word1 & word2 ==0) ... (no need for stack pushes) - -- opimize assembly where pha/pla can be converted into tax/txa (saves 2 cycles, clobbers X) - - prefix prog8 subroutines with p8s_ instead of p8_ to not let them clash with variables in the asm? - - allow 'chained' array indexing for expressions: value = ptrarray[0][0] - allow 'chained' array indexing for assign targets: ptrarray[0][0] = 42 this is just evaluating the lhs as a uword pointer expression - [on branch: shortcircuit] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 .... @@ -61,6 +56,7 @@ Libraries: Optimizations: +- optimize assembly output for ( word1 & word2 ==0) ... (no need for stack pushes) see attemptAssignToByteCompareZero(). - VariableAllocator: can we think of a smarter strategy for allocating variables into zeropage, rather than first-come-first-served? for instance, vars used inside loops first, then loopvars, then uwords used as pointers, then the rest - various optimizers skip stuff if compTarget.name==VMTarget.NAME. Once 6502-codegen is done from IR code,