added some more optimized mul_byte asm routines

This commit is contained in:
Irmen de Jong 2020-09-14 22:06:40 +02:00
parent e39a38b0d9
commit d260182ef3
4 changed files with 331 additions and 15 deletions

View File

@ -312,6 +312,7 @@ _seed .word $2c9e
.pend
; ----------- optimized multiplications (stack) : ---------
stack_mul_byte_3 .proc
; X + X*2
lda P8ESTACK_LO+1,x
@ -689,15 +690,10 @@ stack_mul_word_25 .proc
.pend
stack_mul_byte_40 .proc
; (X*4 + X)*8
lda P8ESTACK_LO+1,x
asl a
asl a
clc
adc P8ESTACK_LO+1,x
asl a
asl a
asl a
and #7
tay
lda mul_byte_40._forties,y
sta P8ESTACK_LO+1,x
rts
.pend
@ -726,6 +722,309 @@ stack_mul_word_40 .proc
rts
.pend
stack_mul_byte_50 .proc
lda P8ESTACK_LO+1,x
and #7
tay
lda mul_byte_50._fifties, y
sta P8ESTACK_LO+1,x
rts
.pend
stack_mul_word_50 .proc
; TODO word * 50
.pend
stack_mul_byte_80 .proc
lda P8ESTACK_LO+1,x
and #3
tay
lda mul_byte_80._eighties, y
sta P8ESTACK_LO+1,x
rts
.pend
stack_mul_word_80 .proc
; TODO word * 80
.pend
stack_mul_byte_100 .proc
lda P8ESTACK_LO+1,x
and #3
tay
lda mul_byte_100._hundreds, y
sta P8ESTACK_LO+1,x
rts
.pend
stack_mul_word_100 .proc
; TODO word * 100
.pend
; ----------- optimized multiplications (in-place A (byte) and ?? (word)) : ---------
mul_byte_3 .proc
; A = A + A*2
sta P8ZP_SCRATCH_REG
asl a
clc
adc P8P_P8ZP_SCRATCH_REG
rts
.pend
mul_word_3 .proc
.warn "todo mul_word_3"
rts
.pend
mul_byte_5 .proc
; A = A*4 + A
sta P8ZP_SCRATCH_REG
asl a
asl a
clc
adc P8ZP_SCRATCH_REG
rts
.pend
mul_word_5 .proc
.warn "todo mul_word_5"
rts
.pend
mul_byte_6 .proc
; A = (A*2 + A)*2
sta P8ZP_SCRATCH_REG
asl a
clc
adc P8ZP_SCRATCH_REG
asl a
rts
.pend
mul_word_6 .proc
.warn "todo mul_word_6"
rts
.pend
mul_byte_7 .proc
; A = A*8 - A
sta P8ZP_SCRATCH_REG
asl a
asl a
asl a
sec
sbc P8ZP_SCRATCH_REG
rts
.pend
mul_word_7 .proc
.warn "todo mul_word_7"
rts
.pend
mul_byte_9 .proc
; A = A*8 + A
sta P8ZP_SCRATCH_REG
asl a
asl a
asl a
clc
adc P8ZP_SCRATCH_REG
rts
.pend
mul_word_9 .proc
.warn "todo mul_word_9"
rts
.pend
mul_byte_10 .proc
; A=(A*4 + A)*2
sta P8ZP_SCRATCH_REG
asl a
asl a
clc
adc P8ZP_SCRATCH_REG
asl a
rts
.pend
mul_word_10 .proc
.warn "todo mul_word_10"
rts
.pend
mul_byte_11 .proc
; A=(A*2 + A)*4 - A
sta P8ZP_SCRATCH_REG
asl a
clc
adc P8ZP_SCRATCH_REG
asl a
asl a
sec
sbc P8ZP_SCRATCH_REG
rts
.pend
; mul_word_11 is skipped (too much code)
mul_byte_12 .proc
; A=(A*2 + A)*4
sta P8ZP_SCRATCH_REG
asl a
clc
adc P8ZP_SCRATCH_REG
asl a
asl a
rts
.pend
mul_word_12 .proc
.warn "todo mul_word_12"
rts
.pend
mul_byte_13 .proc
; A=(A*2 + A)*4 + A
sta P8ZP_SCRATCH_REG
asl a
clc
adc P8ZP_SCRATCH_REG
asl a
asl a
clc
adc P8ZP_SCRATCH_REG
rts
.pend
; mul_word_13 is skipped (too much code)
mul_byte_14 .proc
; A=(A*8 - A)*2
sta P8ZP_SCRATCH_REG
asl a
asl a
asl a
sec
sbc P8ZP_SCRATCH_REG
asl a
rts
.pend
; mul_word_14 is skipped (too much code)
mul_byte_15 .proc
; A=A*16 - A
sta P8ZP_SCRATCH_REG
asl a
asl a
asl a
asl a
sec
sbc P8ZP_SCRATCH_REG
rts
.pend
mul_word_15 .proc
.warn "todo mul_word_15"
rts
.pend
mul_byte_20 .proc
; A=(A*4 + A)*4
sta P8ZP_SCRATCH_REG
asl a
asl a
clc
adc P8ZP_SCRATCH_REG
asl a
asl a
rts
.pend
mul_word_20 .proc
.warn "todo mul_word_20"
rts
.pend
mul_byte_25 .proc
; A=(A*2 + A)*8 + A
sta P8ZP_SCRATCH_REG
asl a
clc
adc P8ZP_SCRATCH_REG
asl a
asl a
asl a
clc
adc P8ZP_SCRATCH_REG
rts
.pend
mul_word_25 .proc
.warn "todo mul_word_25"
rts
.pend
mul_byte_40 .proc
and #7
tay
lda _forties,y
rts
_forties .byte 0*40, 1*40, 2*40, 3*40, 4*40, 5*40, 6*40, 7*40 & 255
.pend
mul_word_40 .proc
.warn "todo mul_word_40"
rts
.pend
mul_byte_50 .proc
and #7
tay
lda _fifties, y
rts
_fifties .byte 0*50, 1*50, 2*50, 3*50, 4*50, 5*50, 6*50 & 255, 7*50 & 255
.pend
mul_word_50 .proc
.warn "todo mul_word_50"
rts
.pend
mul_byte_80 .proc
and #3
tay
lda _eighties, y
rts
_eighties .byte 0*80, 1*80, 2*80, 3*80
.pend
mul_word_80 .proc
.warn "todo mul_word_80"
rts
.pend
mul_byte_100 .proc
and #3
tay
lda _hundreds, y
rts
_hundreds .byte 0*100, 1*100, 2*100, 3*100 & 255
.pend
mul_word_100 .proc
.warn "todo mul_word_100"
rts
.pend
; ----------- end optimized multiplications -----------
sign_b .proc
lda P8ESTACK_LO+1,x
beq _sign_zero

View File

@ -36,9 +36,8 @@ internal class AsmGen(private val program: Program,
private val outputDir: Path): IAssemblyGenerator {
// for expressions and augmented assignments:
// TODO add 50, 80 and 100 as well (especially for the commanderX16)
val optimizedByteMultiplications = setOf(3,5,6,7,9,10,11,12,13,14,15,20,25,40)
val optimizedWordMultiplications = setOf(3,5,6,7,9,10,12,15,20,25,40)
val optimizedByteMultiplications = setOf(3,5,6,7,9,10,11,12,13,14,15,20,25,40,50,80,100)
val optimizedWordMultiplications = setOf(3,5,6,7,9,10,12,15,20,25,40,50,80,100)
private val assemblyLines = mutableListOf<String>()
private val globalFloatConsts = mutableMapOf<Double, String>() // all float values in the entire program (value -> varname)

View File

@ -583,14 +583,14 @@ internal class AugmentableAssignmentAsmGen(private val program: Program,
"*" -> {
if(dt == DataType.UBYTE) {
if(value in asmgen.optimizedByteMultiplications) {
TODO("optimized var mul ubyte litval $value")
asmgen.out(" lda $name | jsr math.mul_byte_$value | sta $name")
} else {
TODO("var mul ubyte litval $value")
// asmgen.out(" jsr prog8_lib.mul_byte")
}
} else {
if(value.absoluteValue in asmgen.optimizedByteMultiplications) {
TODO("optimized var mul sbyte litval $value")
asmgen.out(" lda $name | jsr math.mul_byte_$value | sta $name")
} else {
TODO("var mul sbyte litval $value")
// asmgen.out(" jsr prog8_lib.mul_byte")

View File

@ -1,7 +1,7 @@
%import c64textio
;%import c64flt
;%option enable_floats
; %zeropage kernalsafe
%zeropage basicsafe
; TODO system reset should also work when kernal is paged out
@ -10,7 +10,25 @@ main {
sub start() {
c64.CHROUT('*')
ubyte ub1
ubyte ii
for ii in 0 to 10 {
; ub1 = ii
; ub1 *= 40 ; TODO implement non-stack optimized muls
; todo a = EXPRESSION * const -> is that optimized?
ub1 = 1+ii * 40
txt.print_ub(ub1)
c64.CHROUT(',')
ub1 = 1+ii * 50
txt.print_ub(ub1)
c64.CHROUT(',')
ub1 = 1+ii * 80
txt.print_ub(ub1)
c64.CHROUT(',')
ub1 = 1+ii * 100
txt.print_ub(ub1)
c64.CHROUT('\n')
}
;asmsub clear_screen (ubyte char @ A, ubyte color @ Y) clobbers(A) { ...}
; TODO dont cause name conflict if we define sub or sub with param 'color' or even a var 'color' later.