optimized some +1/-1 code

This commit is contained in:
Irmen de Jong 2019-01-09 01:43:32 +01:00
parent 46c282d6d5
commit 71e6497eed
7 changed files with 135 additions and 83 deletions

View File

@ -191,10 +191,12 @@ asmsub GETADRAY () -> clobbers(X) -> (uword @ AY) {
}}
}
asmsub print_f (float value @ AY) -> clobbers(A, Y) -> () {
sub print_f (float value) {
; ---- prints the floating point value (without a newline) using basic rom routines.
%asm {{
stx c64.SCRATCH_ZPREGX
lda #<print_f_value
ldy #>print_f_value
jsr c64flt.MOVFM ; load float into fac1
jsr c64flt.FOUT ; fac1 to string in A/Y
jsr c64.STROUT ; print string in A/Y
@ -203,10 +205,12 @@ asmsub print_f (float value @ AY) -> clobbers(A, Y) -> () {
}}
}
asmsub print_fln (float value @ AY) -> clobbers(A, Y) -> () {
sub print_fln (float value) {
; ---- prints the floating point value (with a newline at the end) using basic rom routines
%asm {{
stx c64.SCRATCH_ZPREGX
lda #<print_fln_value
ldy #>print_fln_value
jsr c64flt.MOVFM ; load float into fac1
jsr c64flt.FPRINTLN ; print fac1 with newline
ldx c64.SCRATCH_ZPREGX
@ -459,8 +463,8 @@ inc_var_f .proc
sty c64.SCRATCH_ZPWORD1+1
stx c64.SCRATCH_ZPREGX
jsr c64flt.MOVFM
lda #<c64.FL_FONE
ldy #>c64.FL_FONE
lda #<FL_FONE
ldy #>FL_FONE
jsr c64flt.FADD
ldx c64.SCRATCH_ZPWORD1
ldy c64.SCRATCH_ZPWORD1+1
@ -474,8 +478,8 @@ dec_var_f .proc
sta c64.SCRATCH_ZPWORD1
sty c64.SCRATCH_ZPWORD1+1
stx c64.SCRATCH_ZPREGX
lda #<c64.FL_FONE
ldy #>c64.FL_FONE
lda #<FL_FONE
ldy #>FL_FONE
jsr c64flt.MOVFM
lda c64.SCRATCH_ZPWORD1
ldy c64.SCRATCH_ZPWORD1+1
@ -790,8 +794,8 @@ func_ceil .proc
jsr c64flt.FCOMP
cmp #0
beq +
lda #<c64.FL_FONE
ldy #>c64.FL_FONE
lda #<FL_FONE
ldy #>FL_FONE
jsr c64flt.FADD
+ jmp push_fac1_as_result
.pend

View File

@ -1507,7 +1507,7 @@ private class StatementTranslator(private val prog: IntermediateProgram,
val vardecl = value.targetStatement(namespace) as VarDecl
prog.instr(Opcode.PUSH_ADDR_HEAPVAR, callLabel = vardecl.scopedname)
}
else -> throw CompilerException("literal float value or float variable expected")
else -> throw CompilerException("can only take address of a the float as constant literal or variable")
}
}

View File

@ -185,6 +185,7 @@ class Value(val type: DataType, numericvalueOrHeapId: Number) {
DataType.BYTE -> return Value(DataType.BYTE, 127)
DataType.UWORD -> return Value(DataType.UWORD, 65535)
DataType.WORD -> return Value(DataType.WORD, 32767)
else -> {}
}
}
val result = v1.toDouble() / v2.toDouble()

View File

@ -655,7 +655,7 @@ class AsmGen(val options: CompilationOptions, val program: IntermediateProgram,
else -> " inc ${ins.callLabel}"
}
}
Opcode.INC_VAR_UW -> {
Opcode.INC_VAR_UW, Opcode.INC_VAR_W -> {
" inc ${ins.callLabel} | bne + | inc ${ins.callLabel}+1 |+"
}
Opcode.INC_VAR_F -> {
@ -693,7 +693,7 @@ class AsmGen(val options: CompilationOptions, val program: IntermediateProgram,
else -> " dec ${ins.callLabel}"
}
}
Opcode.DEC_VAR_UW -> {
Opcode.DEC_VAR_UW, Opcode.DEC_VAR_W -> {
" lda ${ins.callLabel} | bne + | dec ${ins.callLabel}+1 |+ | dec ${ins.callLabel}"
}
Opcode.DEC_VAR_F -> {
@ -2975,45 +2975,6 @@ class AsmGen(val options: CompilationOptions, val program: IntermediateProgram,
" lda ${segment[0].callLabel} | eor #${hexVal(segment[1])} | sta ${ESTACK_LO.toHex()},x | dex "
},
// 16 bit addition avoiding excessive stack usage
// @todo optimize this even more with longer asmpatterns (avoid stack use altogether on most common operations)
AsmPattern(listOf(Opcode.PUSH_VAR_WORD, Opcode.ADD_UW),
listOf(Opcode.PUSH_VAR_WORD, Opcode.ADD_W)) { segment ->
"""
clc
lda ${segment[0].callLabel}
adc ${(ESTACK_LO+1).toHex()},x
sta ${(ESTACK_LO+1).toHex()},x
lda ${segment[0].callLabel}+1
adc ${(ESTACK_HI+1).toHex()},x
sta ${(ESTACK_HI+1).toHex()},x
"""
},
AsmPattern(listOf(Opcode.PUSH_MEM_UW, Opcode.ADD_UW),
listOf(Opcode.PUSH_MEM_W, Opcode.ADD_W)) { segment ->
"""
clc
lda ${hexVal(segment[0])}
adc ${(ESTACK_LO + 1).toHex()},x
sta ${(ESTACK_LO + 1).toHex()},x
lda ${hexValPlusOne(segment[0])}
adc ${(ESTACK_HI + 1).toHex()},x
sta ${(ESTACK_HI + 1).toHex()},x
"""
},
AsmPattern(listOf(Opcode.PUSH_WORD, Opcode.ADD_UW),
listOf(Opcode.PUSH_WORD, Opcode.ADD_W)) { segment ->
"""
clc
lda #<${hexVal(segment[0])}
adc ${(ESTACK_LO+1).toHex()},x
sta ${(ESTACK_LO+1).toHex()},x
lda #>${hexVal(segment[0])}
adc ${(ESTACK_HI+1).toHex()},x
sta ${(ESTACK_HI+1).toHex()},x
"""
},
AsmPattern(listOf(Opcode.PUSH_VAR_BYTE, Opcode.PUSH_VAR_BYTE, Opcode.MKWORD)) { segment ->
"""
lda ${segment[0].callLabel}
@ -3072,7 +3033,82 @@ class AsmGen(val options: CompilationOptions, val program: IntermediateProgram,
lda ${segment[1].callLabel}
sta ${segment[4].callLabel}+1
"""
},
// more efficient versions of x+1 and x-1 to avoid pushing the 1 on the stack @todo what about 1+x? reorder? what about x+ (-1) and x-(-1)? is that rewritten already?
AsmPattern(listOf(Opcode.PUSH_BYTE, Opcode.ADD_B), listOf(Opcode.PUSH_BYTE, Opcode.ADD_UB)) { segment ->
val amount = segment[0].arg!!.integerValue()
if(amount in 1..8) {
" inc ${(ESTACK_LO + 1).toHex()},x | ".repeat(amount)
}
else
null
},
AsmPattern(listOf(Opcode.PUSH_WORD, Opcode.ADD_UW), listOf(Opcode.PUSH_WORD, Opcode.ADD_W)) { segment ->
val amount = segment[0].arg!!.integerValue()
if(amount in 1..8) {
" inc ${(ESTACK_LO + 1).toHex()},x | bne + | inc ${(ESTACK_HI + 1).toHex()},x |+ | ".repeat(amount)
}
else
null
},
AsmPattern(listOf(Opcode.PUSH_BYTE, Opcode.SUB_B), listOf(Opcode.PUSH_BYTE, Opcode.SUB_UB)) { segment ->
val amount = segment[0].arg!!.integerValue()
if(amount in 1..8) {
" dec ${(ESTACK_LO + 1).toHex()},x | ".repeat(amount)
}
else
null
},
AsmPattern(listOf(Opcode.PUSH_WORD, Opcode.SUB_UW), listOf(Opcode.PUSH_WORD, Opcode.SUB_W)) { segment ->
val amount = segment[0].arg!!.integerValue()
if(amount in 1..8) {
" lda ${(ESTACK_LO + 1).toHex()},x | bne + | dec ${(ESTACK_HI + 1).toHex()},x |+ | dec ${(ESTACK_LO + 1).toHex()},x | ".repeat(amount)
}
else
null
},
// 16 bit addition avoiding excessive stack usage
// @todo optimize this even more with longer asmpatterns (avoid stack use altogether on most common operations)
AsmPattern(listOf(Opcode.PUSH_VAR_WORD, Opcode.ADD_UW),
listOf(Opcode.PUSH_VAR_WORD, Opcode.ADD_W)) { segment ->
"""
clc
lda ${segment[0].callLabel}
adc ${(ESTACK_LO+1).toHex()},x
sta ${(ESTACK_LO+1).toHex()},x
lda ${segment[0].callLabel}+1
adc ${(ESTACK_HI+1).toHex()},x
sta ${(ESTACK_HI+1).toHex()},x
"""
},
AsmPattern(listOf(Opcode.PUSH_MEM_UW, Opcode.ADD_UW),
listOf(Opcode.PUSH_MEM_W, Opcode.ADD_W)) { segment ->
"""
clc
lda ${hexVal(segment[0])}
adc ${(ESTACK_LO + 1).toHex()},x
sta ${(ESTACK_LO + 1).toHex()},x
lda ${hexValPlusOne(segment[0])}
adc ${(ESTACK_HI + 1).toHex()},x
sta ${(ESTACK_HI + 1).toHex()},x
"""
},
AsmPattern(listOf(Opcode.PUSH_WORD, Opcode.ADD_UW),
listOf(Opcode.PUSH_WORD, Opcode.ADD_W)) { segment ->
"""
clc
lda #<${hexVal(segment[0])}
adc ${(ESTACK_LO+1).toHex()},x
sta ${(ESTACK_LO+1).toHex()},x
lda #>${hexVal(segment[0])}
adc ${(ESTACK_HI+1).toHex()},x
sta ${(ESTACK_HI+1).toHex()},x
"""
}
)
}

View File

@ -12,6 +12,7 @@ import kotlin.math.log2
X*Y - Y -> Y*(X-1)
-X + A -> A - X
X+ (-A) -> X - A
X- (-A) -> X + A
X % 1 -> constant 0 (if X is byte/word)
X % 2 -> X and 1 (if X is byte/word)

View File

@ -3,6 +3,7 @@ package prog8.optimizing
import prog8.ast.*
import prog8.compiler.HeapValues
import prog8.functions.BuiltinFunctions
import kotlin.math.floor
/*
@ -16,7 +17,6 @@ import prog8.functions.BuiltinFunctions
todo remove if/while/repeat/for statements with empty statement blocks
todo replace if statements with only else block
todo regular subroutines that have 1 or 2 (u)byte or 1 (u)word parameters -> change to asmsub to accept these in A/Y registers instead of on stack
todo statement optimization: X+=1, X-=1 --> X++/X-- (to 3? 4? incs/decs in a row, after that use arithmetic)
todo optimize integer addition with self into shift 1 (A+=A -> A<<=1)
todo analyse for unreachable code and remove that (f.i. code after goto or return that has no label so can never be jumped to)
todo merge sequence of assignments into one to avoid repeated value loads (as long as the value is a constant and the target not a MEMORY type!)
@ -181,13 +181,31 @@ class StatementOptimizer(private val namespace: INameScope, private val heap: He
val cv = bexpr.right.constValue(namespace, heap)?.asNumericValue?.toDouble()
if(cv!=null) {
when (bexpr.operator) {
"+" -> if (cv==0.0) {
optimizationsDone++
return NopStatement(assignment.position)
"+" -> {
if (cv==0.0) {
optimizationsDone++
return NopStatement(assignment.position)
} else if(cv in 1.0..8.0 && targetDt in IntegerDatatypes && floor(cv)==cv) {
// replace by several INCs
val decs = AnonymousScope(mutableListOf(), assignment.position)
repeat(cv.toInt()) {
decs.statements.add(PostIncrDecr(target, "++", assignment.position))
}
return decs
}
}
"-" -> if (cv==0.0) {
optimizationsDone++
return NopStatement(assignment.position)
"-" -> {
if (cv==0.0) {
optimizationsDone++
return NopStatement(assignment.position)
} else if(cv in 1.0..8.0 && targetDt in IntegerDatatypes && floor(cv)==cv) {
// replace by several DECs
val decs = AnonymousScope(mutableListOf(), assignment.position)
repeat(cv.toInt()) {
decs.statements.add(PostIncrDecr(target, "--", assignment.position))
}
return decs
}
}
"*" -> if (cv==1.0) {
optimizationsDone++

View File

@ -4,42 +4,34 @@
sub start() {
; @todo more efficient +1/-1 additions in expressions
; @todo '/' with two integer operands should result in integer again instead of having to use '//' all the time?
ubyte ub = 10
byte b = -10
uword uw = 10
word w = -10
ubyte lsbb = $aa
ubyte msbb = $44
uword[4] uwarr
b = b + (-1)
b = b - (-1)
b = 1+b
b = (-1) + b
uword uw = (msbb as uword)*256 + lsbb
c64scr.print_uwhex(0, uw)
c64scr.print_uw(uw+1)
c64.CHROUT('\n')
uw = mkword(lsbb, msbb)
c64scr.print_uwhex(0, uw)
c64scr.print_uw(uw+2)
c64.CHROUT('\n')
uw = mkword($aa, $44)
c64scr.print_uwhex(0, uw)
c64scr.print_uw(uw+3)
c64.CHROUT('\n')
c64scr.print_uw(uw+4)
c64.CHROUT('\n')
uw = mkword(lsbb, $44)
c64scr.print_uwhex(0, uw)
c64scr.print_uw(uw-1)
c64.CHROUT('\n')
uw = mkword($aa, msbb)
c64scr.print_uwhex(0, uw)
c64scr.print_uw(uw-2)
c64.CHROUT('\n')
uwarr[2] = mkword(lsbb, msbb)
c64scr.print_uwhex(0, uwarr[2])
c64scr.print_uw(uw-3)
c64.CHROUT('\n')
uwarr[2] = mkword(lsbb, $44)
c64scr.print_uwhex(0, uwarr[2])
c64.CHROUT('\n')
uwarr[2] = mkword($aa, msbb)
c64scr.print_uwhex(0, uwarr[2])
c64.CHROUT('\n')
word w = mkword(lsbb,msbb) as word
c64scr.print_w(w)
c64scr.print_uw(uw-4)
c64.CHROUT('\n')
}