mirror of
https://github.com/irmen/prog8.git
synced 2024-11-26 11:49:22 +00:00
optimized byte multiplications
This commit is contained in:
parent
487faf3a08
commit
2a08c22b0f
@ -1254,4 +1254,195 @@ _magiceors .word $3f1d, $3f81, $3fa5, $3fc5, $4075, $409d, $40cd, $4109
|
||||
}}
|
||||
}
|
||||
|
||||
|
||||
%asm {{
|
||||
|
||||
mul_byte_3 .proc
|
||||
; X + X*2
|
||||
lda c64.ESTACK_LO+1,x
|
||||
asl a
|
||||
clc
|
||||
adc c64.ESTACK_LO+1,x
|
||||
sta c64.ESTACK_LO+1,x
|
||||
rts
|
||||
.pend
|
||||
|
||||
mul_byte_5 .proc
|
||||
; X + X*4
|
||||
lda c64.ESTACK_LO+1,x
|
||||
asl a
|
||||
asl a
|
||||
clc
|
||||
adc c64.ESTACK_LO+1,x
|
||||
sta c64.ESTACK_LO+1,x
|
||||
rts
|
||||
.pend
|
||||
|
||||
mul_byte_6 .proc
|
||||
; X*2 + X*4
|
||||
lda c64.ESTACK_LO+1,x
|
||||
asl a
|
||||
sta c64.SCRATCH_ZPREG
|
||||
asl a
|
||||
clc
|
||||
adc c64.SCRATCH_ZPREG
|
||||
sta c64.ESTACK_LO+1,x
|
||||
rts
|
||||
.pend
|
||||
|
||||
mul_byte_7 .proc
|
||||
; X*8 - X
|
||||
lda c64.ESTACK_LO+1,x
|
||||
asl a
|
||||
asl a
|
||||
asl a
|
||||
sec
|
||||
sbc c64.ESTACK_LO+1,x
|
||||
sta c64.ESTACK_LO+1,x
|
||||
rts
|
||||
.pend
|
||||
|
||||
mul_byte_9 .proc
|
||||
; X + X*8
|
||||
lda c64.ESTACK_LO+1,x
|
||||
asl a
|
||||
asl a
|
||||
asl a
|
||||
clc
|
||||
adc c64.ESTACK_LO+1,x
|
||||
sta c64.ESTACK_LO+1,x
|
||||
rts
|
||||
.pend
|
||||
|
||||
mul_byte_10 .proc
|
||||
; X + X + X*8
|
||||
lda c64.ESTACK_LO+1,x
|
||||
asl a
|
||||
asl a
|
||||
asl a
|
||||
clc
|
||||
adc c64.ESTACK_LO+1,x
|
||||
adc c64.ESTACK_LO+1,x
|
||||
sta c64.ESTACK_LO+1,x
|
||||
rts
|
||||
.pend
|
||||
|
||||
mul_byte_11 .proc
|
||||
; X + X + X + X*8
|
||||
lda c64.ESTACK_LO+1,x
|
||||
sta c64.SCRATCH_ZPREG
|
||||
asl a
|
||||
asl a
|
||||
asl a
|
||||
clc
|
||||
adc c64.SCRATCH_ZPREG
|
||||
adc c64.SCRATCH_ZPREG
|
||||
adc c64.SCRATCH_ZPREG
|
||||
sta c64.ESTACK_LO+1,x
|
||||
rts
|
||||
.pend
|
||||
|
||||
mul_byte_12 .proc
|
||||
; X*4 + X*8
|
||||
lda c64.ESTACK_LO+1,x
|
||||
asl a
|
||||
asl a
|
||||
sta c64.SCRATCH_ZPREG
|
||||
asl a
|
||||
clc
|
||||
adc c64.SCRATCH_ZPREG
|
||||
sta c64.ESTACK_LO+1,x
|
||||
rts
|
||||
.pend
|
||||
|
||||
mul_byte_13 .proc
|
||||
; X*16 - X -X -X
|
||||
lda c64.ESTACK_LO+1,x
|
||||
sta c64.SCRATCH_ZPREG
|
||||
asl a
|
||||
asl a
|
||||
asl a
|
||||
asl a
|
||||
sec
|
||||
sbc c64.SCRATCH_ZPREG
|
||||
sbc c64.SCRATCH_ZPREG
|
||||
sbc c64.SCRATCH_ZPREG
|
||||
sta c64.ESTACK_LO+1,x
|
||||
rts
|
||||
.pend
|
||||
|
||||
mul_byte_14 .proc
|
||||
; X*16 - X -X
|
||||
lda c64.ESTACK_LO+1,x
|
||||
asl a
|
||||
asl a
|
||||
asl a
|
||||
asl a
|
||||
sec
|
||||
sbc c64.ESTACK_LO+1,x
|
||||
sbc c64.ESTACK_LO+1,x
|
||||
sta c64.ESTACK_LO+1,x
|
||||
rts
|
||||
.pend
|
||||
|
||||
mul_byte_15 .proc
|
||||
; X*16 - X
|
||||
lda c64.ESTACK_LO+1,x
|
||||
sta c64.SCRATCH_ZPREG
|
||||
asl a
|
||||
asl a
|
||||
asl a
|
||||
asl a
|
||||
sec
|
||||
sbc c64.ESTACK_LO+1,x
|
||||
sta c64.ESTACK_LO+1,x
|
||||
rts
|
||||
.pend
|
||||
|
||||
mul_byte_20 .proc
|
||||
; X*4 + X*16
|
||||
lda c64.ESTACK_LO+1,x
|
||||
asl a
|
||||
asl a
|
||||
sta c64.SCRATCH_ZPREG
|
||||
asl a
|
||||
asl a
|
||||
clc
|
||||
adc c64.SCRATCH_ZPREG
|
||||
sta c64.ESTACK_LO+1,x
|
||||
rts
|
||||
.pend
|
||||
|
||||
mul_byte_25 .proc
|
||||
; X + X*8 + X*16
|
||||
lda c64.ESTACK_LO+1,x
|
||||
asl a
|
||||
asl a
|
||||
asl a
|
||||
sta c64.SCRATCH_ZPREG
|
||||
asl a
|
||||
clc
|
||||
adc c64.SCRATCH_ZPREG
|
||||
adc c64.ESTACK_LO+1,x
|
||||
sta c64.ESTACK_LO+1,x
|
||||
rts
|
||||
.pend
|
||||
|
||||
mul_byte_40 .proc
|
||||
; X*8 + X*32
|
||||
lda c64.ESTACK_LO+1,x
|
||||
asl a
|
||||
asl a
|
||||
asl a
|
||||
sta c64.SCRATCH_ZPREG
|
||||
asl a
|
||||
asl a
|
||||
clc
|
||||
adc c64.SCRATCH_ZPREG
|
||||
sta c64.ESTACK_LO+1,x
|
||||
rts
|
||||
.pend
|
||||
|
||||
}}
|
||||
|
||||
}
|
||||
|
@ -807,9 +807,9 @@ class AsmGen(val options: CompilationOptions, val program: IntermediateProgram,
|
||||
Opcode.SUB_W, Opcode.SUB_UW -> " jsr prog8_lib.sub_w"
|
||||
Opcode.MUL_B, Opcode.MUL_UB -> " jsr prog8_lib.mul_byte"
|
||||
Opcode.MUL_W, Opcode.MUL_UW -> " jsr prog8_lib.mul_word"
|
||||
Opcode.MUL_F -> " jsr c64flt.mul_f"
|
||||
Opcode.ADD_F -> " jsr c64flt.add_f"
|
||||
Opcode.SUB_F -> " jsr c64flt.sub_f"
|
||||
Opcode.MUL_F -> " jsr c64flt.mul_f"
|
||||
Opcode.DIV_F -> " jsr c64flt.div_f"
|
||||
Opcode.IDIV_UB -> " jsr prog8_lib.idiv_ub"
|
||||
Opcode.IDIV_B -> " jsr prog8_lib.idiv_b"
|
||||
@ -877,6 +877,40 @@ class AsmGen(val options: CompilationOptions, val program: IntermediateProgram,
|
||||
}
|
||||
}
|
||||
|
||||
private fun optimizedIntMultiplicationsOnStack(mulIns: Instruction, amount: Int): String? {
|
||||
|
||||
if(mulIns.opcode == Opcode.MUL_B || mulIns.opcode==Opcode.MUL_UB) {
|
||||
when(amount) {
|
||||
0,1,2,4,8,16,32,64,128,256 -> throw AssemblyError("multiplication by power of 2 should have been converted into a left shift instruction already")
|
||||
3,5,6,7,9,10,11,12,13,14,15,20,25,40 -> return " jsr math.mul_byte_$amount"
|
||||
else -> {}
|
||||
}
|
||||
|
||||
if(mulIns.opcode == Opcode.MUL_B) {
|
||||
when(amount) {
|
||||
-3,-5,-6,-7,-9,-10,-11,-12,-13,-14,-15,-20,-25,-40 -> return " jsr prog8_lib.neg_b | jsr math.mul_byte_${-amount}"
|
||||
else -> {}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(mulIns.opcode == Opcode.MUL_W || mulIns.opcode==Opcode.MUL_UW) {
|
||||
when(amount) {
|
||||
0,1,2,4,8,16,32,64,128,256 -> throw AssemblyError("multiplication by power of 2 should have been converted into a left shift instruction already")
|
||||
3,5,6,7,9,10,11,12,13,14,15,20,25,40 -> return " jsr math.mul_word_$amount"
|
||||
else -> {}
|
||||
}
|
||||
|
||||
if(mulIns.opcode == Opcode.MUL_W) {
|
||||
when(amount) {
|
||||
-3,-5,-6,-7,-9,-10,-11,-12,-13,-14,-15,-20,-25,-40 -> return " jsr prog8_lib.neg_w | jsr math.mul_word_${-amount}"
|
||||
else -> {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
private fun findPatterns(segment: List<Instruction>): List<AsmFragment> {
|
||||
val opcodes = segment.map { it.opcode }
|
||||
val result = mutableListOf<AsmFragment>()
|
||||
@ -3124,6 +3158,22 @@ class AsmGen(val options: CompilationOptions, val program: IntermediateProgram,
|
||||
lda #0
|
||||
+
|
||||
"""
|
||||
},
|
||||
|
||||
|
||||
// various optimizable integer multiplications
|
||||
AsmPattern(listOf(Opcode.PUSH_BYTE, Opcode.MUL_B), listOf(Opcode.PUSH_BYTE, Opcode.MUL_UB)) { segment ->
|
||||
val amount=segment[0].arg!!.integerValue()
|
||||
val result = optimizedIntMultiplicationsOnStack(segment[1], amount)
|
||||
result ?: " lda #${hexVal(segment[0])} | sta ${ESTACK_LO.toHex()},x | dex | jsr prog8_lib.mul_byte"
|
||||
},
|
||||
AsmPattern(listOf(Opcode.PUSH_WORD, Opcode.MUL_W), listOf(Opcode.PUSH_WORD, Opcode.MUL_UW)) { segment ->
|
||||
val amount=segment[0].arg!!.integerValue()
|
||||
val result = optimizedIntMultiplicationsOnStack(segment[1], amount)
|
||||
if (result != null) result else {
|
||||
val value = hexVal(segment[0])
|
||||
" lda #<$value | sta ${ESTACK_LO.toHex()},x | lda #>$value | sta ${ESTACK_HI.toHex()},x | dex | jsr prog8_lib.mul_word"
|
||||
}
|
||||
}
|
||||
|
||||
)
|
||||
|
@ -17,22 +17,6 @@ import kotlin.math.log2
|
||||
X % 2 -> X and 1 (if X is byte/word)
|
||||
|
||||
|
||||
todo often used multiplications to factors that are more efficiently calculated (via shifts)
|
||||
|
||||
X*3 -> X*2+X
|
||||
X*5 -> X*4+X
|
||||
X*6 -> X*2+X*2+X*2
|
||||
X*7 -> X*4+X*2+X
|
||||
X*9 -> X*8 + X
|
||||
X*10 -> X*8 + X*2
|
||||
X*11 -> X*8 + X*2 +X
|
||||
X*12 -> X*8 + X*4
|
||||
X*13 -> X*8 + X*4 +X
|
||||
X*14 -> X*8 + X*4 + X*2
|
||||
X*15 -> X*8 + X*4 + X*2 + X
|
||||
(and negatives)
|
||||
|
||||
|
||||
todo expression optimization: common (sub) expression elimination (turn common expressions into single subroutine call + introduce variable to hold it)
|
||||
|
||||
*/
|
||||
@ -396,6 +380,7 @@ class SimplifyExpressions(private val namespace: INameScope, private val heap: H
|
||||
// divided by a power of two => shift right
|
||||
optimizationsDone++
|
||||
val numshifts = log2(cv)
|
||||
println("DIV: SHIFT RIGHT $cv -> $numshifts") // TODO
|
||||
return BinaryExpression(expr.left, ">>", LiteralValue.optimalInteger(numshifts, expr.position), expr.position)
|
||||
}
|
||||
}
|
||||
@ -404,6 +389,7 @@ class SimplifyExpressions(private val namespace: INameScope, private val heap: H
|
||||
// divided by a negative power of two => negate, then shift right
|
||||
optimizationsDone++
|
||||
val numshifts = log2(-cv)
|
||||
println("DIV: SHIFT RIGHT $cv -> $numshifts") // TODO
|
||||
return BinaryExpression(PrefixExpression("-", expr.left, expr.position), ">>", LiteralValue.optimalInteger(numshifts, expr.position), expr.position)
|
||||
}
|
||||
}
|
||||
@ -467,7 +453,7 @@ class SimplifyExpressions(private val namespace: INameScope, private val heap: H
|
||||
if(leftValue.resultingDatatype(namespace, heap) in IntegerDatatypes) {
|
||||
// times a power of two => shift left
|
||||
optimizationsDone++
|
||||
val numshifts = log2(cv)
|
||||
val numshifts = log2(cv).toInt()
|
||||
return BinaryExpression(expr.left, "<<", LiteralValue.optimalInteger(numshifts, expr.position), expr.position)
|
||||
}
|
||||
}
|
||||
@ -475,7 +461,7 @@ class SimplifyExpressions(private val namespace: INameScope, private val heap: H
|
||||
if(leftValue.resultingDatatype(namespace, heap) in IntegerDatatypes) {
|
||||
// times a negative power of two => negate, then shift left
|
||||
optimizationsDone++
|
||||
val numshifts = log2(-cv)
|
||||
val numshifts = log2(-cv).toInt()
|
||||
return BinaryExpression(PrefixExpression("-", expr.left, expr.position), "<<", LiteralValue.optimalInteger(numshifts, expr.position), expr.position)
|
||||
}
|
||||
}
|
||||
|
191
examples/test.p8
191
examples/test.p8
@ -4,27 +4,186 @@
|
||||
|
||||
sub start() {
|
||||
|
||||
ubyte i
|
||||
byte j
|
||||
uword uw
|
||||
word w
|
||||
ubyte i = 10
|
||||
ubyte ub2
|
||||
byte j = 5
|
||||
byte b2
|
||||
uword uw = 1000
|
||||
uword uw2
|
||||
word w = 1000
|
||||
word w2
|
||||
|
||||
for i in 5 to 0 step -1 {
|
||||
c64scr.print_ub(i)
|
||||
c64.CHROUT('\n')
|
||||
}
|
||||
ub2=i*1
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*2
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*3
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*4
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*5
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*6
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*7
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*8
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*9
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*10
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*11
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*12
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*13
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*14
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*15
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*16
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*17
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*18
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*19
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*20
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*21
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*22
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*23
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*24
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
ub2=i*25
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
|
||||
for j in 5 to 0 step -1 {
|
||||
c64scr.print_b(j)
|
||||
c64.CHROUT('\n')
|
||||
}
|
||||
i=5
|
||||
ub2=i*40
|
||||
c64scr.print_ub(ub2)
|
||||
c64.CHROUT('\n')
|
||||
|
||||
for j in -5 to 0 {
|
||||
c64scr.print_b(j)
|
||||
c64.CHROUT('\n')
|
||||
}
|
||||
c64.CHROUT('\n')
|
||||
|
||||
|
||||
b2=j*1
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*2
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*3
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*4
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*5
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*6
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*7
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*8
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*9
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*10
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*11
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*12
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*13
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*14
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*15
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*16
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*17
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*18
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*19
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*20
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*21
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*22
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*23
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*24
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
b2=j*25
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
|
||||
j=3
|
||||
b2=j*40
|
||||
c64scr.print_b(b2)
|
||||
c64.CHROUT('\n')
|
||||
|
||||
c64.CHROUT('\n')
|
||||
|
||||
|
||||
;@todo multiplication by negative values
|
||||
|
||||
;@todo the same, for uword and word
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -37,16 +37,17 @@
|
||||
c64.SPRPTR[i] = $0a00/64
|
||||
}
|
||||
c64.SPENA = 255 ; enable all sprites
|
||||
c64utils.set_rasterirq(270) ; enable animation
|
||||
c64utils.set_rasterirq(220) ; enable animation
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
~ irq {
|
||||
|
||||
ubyte angle=0
|
||||
|
||||
sub irq() {
|
||||
ubyte angle ; no initialization value so it keeps the previous one.
|
||||
|
||||
c64.EXTCOL--
|
||||
|
||||
angle++
|
||||
|
Loading…
Reference in New Issue
Block a user