optimized memset and memcopy on CX16, memcopy can deal with any size now

This commit is contained in:
Irmen de Jong 2020-11-01 07:36:40 +01:00
parent e05ea887f6
commit 431f2a2088
6 changed files with 132 additions and 36 deletions

View File

@ -1388,8 +1388,8 @@ func_rndw .proc
.pend .pend
func_memcopy .proc func_memcopy255 .proc
; note: clobbers A,Y ; fast memcopy of up to 255 bytes, note: clobbers A,Y
inx inx
stx P8ZP_SCRATCH_REG stx P8ZP_SCRATCH_REG
lda P8ESTACK_LO+2,x lda P8ESTACK_LO+2,x
@ -1414,6 +1414,50 @@ func_memcopy .proc
rts rts
.pend .pend
func_memcopy .proc
; memcopy of any number of bytes, note: clobbers A,Y
inx
stx P8ZP_SCRATCH_REG
lda P8ESTACK_LO+2,x
sta P8ZP_SCRATCH_W1
lda P8ESTACK_HI+2,x
sta P8ZP_SCRATCH_W1+1
lda P8ESTACK_LO+1,x
sta P8ZP_SCRATCH_W2
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_W2+1
lda P8ESTACK_LO,x
pha
lda P8ESTACK_HI,x
pha
ldy #0
pla
tax
beq _remain
- lda (P8ZP_SCRATCH_W1),y ; move a page at a time
sta (P8ZP_SCRATCH_W2),y
iny
bne -
inc P8ZP_SCRATCH_W1+1
inc P8ZP_SCRATCH_W2+1
dex
bne -
_remain pla
tax
beq _done
- lda (P8ZP_SCRATCH_W1),y ; move the remaining bytes
sta (P8ZP_SCRATCH_W2),y
iny
dex
bne -
_done ldx P8ZP_SCRATCH_REG
inx
inx
rts
.pend
func_memset .proc func_memset .proc
; note: clobbers A,Y ; note: clobbers A,Y
inx inx
@ -1439,7 +1483,6 @@ func_memsetw .proc
; -- fill memory from (SCRATCH_ZPWORD1) number of words in SCRATCH_ZPWORD2, with word value in AY. ; -- fill memory from (SCRATCH_ZPWORD1) number of words in SCRATCH_ZPWORD2, with word value in AY.
inx inx
stx P8ZP_SCRATCH_REG
lda P8ESTACK_LO+2,x lda P8ESTACK_LO+2,x
sta P8ZP_SCRATCH_W1 sta P8ZP_SCRATCH_W1
lda P8ESTACK_HI+2,x lda P8ESTACK_HI+2,x
@ -1448,10 +1491,13 @@ func_memsetw .proc
sta P8ZP_SCRATCH_W2 sta P8ZP_SCRATCH_W2
lda P8ESTACK_HI+1,x lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_W2+1 sta P8ZP_SCRATCH_W2+1
txa
pha
lda P8ESTACK_LO,x lda P8ESTACK_LO,x
ldy P8ESTACK_HI,x ldy P8ESTACK_HI,x
jsr memsetw jsr memsetw
ldx P8ZP_SCRATCH_REG pla
tax
inx inx
inx inx
rts rts

View File

@ -7,6 +7,8 @@ import prog8.ast.expressions.*
import prog8.ast.statements.DirectMemoryWrite import prog8.ast.statements.DirectMemoryWrite
import prog8.ast.statements.FunctionCallStatement import prog8.ast.statements.FunctionCallStatement
import prog8.compiler.AssemblyError import prog8.compiler.AssemblyError
import prog8.compiler.target.CompilationTarget
import prog8.compiler.target.Cx16Target
import prog8.compiler.target.c64.codegen.assignment.AsmAssignSource import prog8.compiler.target.c64.codegen.assignment.AsmAssignSource
import prog8.compiler.target.c64.codegen.assignment.AsmAssignTarget import prog8.compiler.target.c64.codegen.assignment.AsmAssignTarget
import prog8.compiler.target.c64.codegen.assignment.AsmAssignment import prog8.compiler.target.c64.codegen.assignment.AsmAssignment
@ -78,8 +80,8 @@ internal class BuiltinFunctionsAsmGen(private val program: Program, private val
"set_irqd" -> asmgen.out(" sei") "set_irqd" -> asmgen.out(" sei")
"strlen" -> funcStrlen(fcall, resultToStack) "strlen" -> funcStrlen(fcall, resultToStack)
"strcmp" -> funcStrcmp(fcall, func, resultToStack) "strcmp" -> funcStrcmp(fcall, func, resultToStack)
"substr", "leftstr", "rightstr", "memcopy", "memset", "memsetw" -> funcMemSetCopy(fcall, func, functionName)
"memcopy", "memset", "memsetw" -> { "substr", "leftstr", "rightstr" -> {
translateArguments(fcall.args, func) translateArguments(fcall.args, func)
asmgen.out(" jsr prog8_lib.func_$functionName") asmgen.out(" jsr prog8_lib.func_$functionName")
} }
@ -88,6 +90,76 @@ internal class BuiltinFunctionsAsmGen(private val program: Program, private val
} }
} }
private fun funcMemSetCopy(fcall: IFunctionCall, func: FSignature, functionName: String) {
if(CompilationTarget.instance is Cx16Target) {
when(functionName) {
"memset" -> {
// use the ROM function of the Cx16
var src = AsmAssignSource.fromAstSource(fcall.args[0], program, asmgen)
var tgt = AsmAssignTarget(TargetStorageKind.VARIABLE, program, asmgen, DataType.UWORD, null, variableAsmName = "cx16.r0")
var assign = AsmAssignment(src, tgt, false, Position.DUMMY)
asmgen.translateNormalAssignment(assign)
src = AsmAssignSource.fromAstSource(fcall.args[1], program, asmgen)
tgt = AsmAssignTarget(TargetStorageKind.VARIABLE, program, asmgen, DataType.UWORD, null, variableAsmName = "cx16.r1")
assign = AsmAssignment(src, tgt, false, Position.DUMMY)
asmgen.translateNormalAssignment(assign)
src = AsmAssignSource.fromAstSource(fcall.args[2], program, asmgen)
tgt = AsmAssignTarget(TargetStorageKind.REGISTER, program, asmgen, DataType.UBYTE, null, register = RegisterOrPair.A)
assign = AsmAssignment(src, tgt, false, Position.DUMMY)
asmgen.translateNormalAssignment(assign)
val sub = (fcall as FunctionCallStatement).definingSubroutine()!!
asmgen.saveRegister(CpuRegister.X, false, sub)
asmgen.out(" jsr cx16.memory_fill")
asmgen.restoreRegister(CpuRegister.X, false)
}
"memcopy" -> {
val count = fcall.args[2].constValue(program)?.number?.toInt()
val countDt = fcall.args[2].inferType(program)
if((count!=null && count <= 255) || countDt.istype(DataType.UBYTE) || countDt.istype(DataType.BYTE)) {
// fast memcopy of up to 255
translateArguments(fcall.args, func)
asmgen.out(" jsr prog8_lib.func_memcopy255")
return
}
// use the ROM function of the Cx16
var src = AsmAssignSource.fromAstSource(fcall.args[0], program, asmgen)
var tgt = AsmAssignTarget(TargetStorageKind.VARIABLE, program, asmgen, DataType.UWORD, null, variableAsmName = "cx16.r0")
var assign = AsmAssignment(src, tgt, false, Position.DUMMY)
asmgen.translateNormalAssignment(assign)
src = AsmAssignSource.fromAstSource(fcall.args[1], program, asmgen)
tgt = AsmAssignTarget(TargetStorageKind.VARIABLE, program, asmgen, DataType.UWORD, null, variableAsmName = "cx16.r1")
assign = AsmAssignment(src, tgt, false, Position.DUMMY)
asmgen.translateNormalAssignment(assign)
src = AsmAssignSource.fromAstSource(fcall.args[2], program, asmgen)
tgt = AsmAssignTarget(TargetStorageKind.VARIABLE, program, asmgen, DataType.UWORD, null, variableAsmName = "cx16.r2")
assign = AsmAssignment(src, tgt, false, Position.DUMMY)
asmgen.translateNormalAssignment(assign)
val sub = (fcall as FunctionCallStatement).definingSubroutine()!!
asmgen.saveRegister(CpuRegister.X, false, sub)
asmgen.out(" jsr cx16.memory_copy")
asmgen.restoreRegister(CpuRegister.X, false)
}
"memsetw" -> {
translateArguments(fcall.args, func)
asmgen.out(" jsr prog8_lib.func_memsetw")
}
}
} else {
if(functionName=="memcopy") {
val count = fcall.args[2].constValue(program)?.number?.toInt()
val countDt = fcall.args[2].inferType(program)
if((count!=null && count <= 255) || countDt.istype(DataType.UBYTE) || countDt.istype(DataType.BYTE)) {
translateArguments(fcall.args, func)
asmgen.out(" jsr prog8_lib.func_memcopy255")
return
}
}
translateArguments(fcall.args, func)
asmgen.out(" jsr prog8_lib.func_$functionName")
}
}
private fun funcStrcmp(fcall: IFunctionCall, func: FSignature, resultToStack: Boolean) { private fun funcStrcmp(fcall: IFunctionCall, func: FSignature, resultToStack: Boolean) {
translateArguments(fcall.args, func) translateArguments(fcall.args, func)
if(resultToStack) if(resultToStack)

View File

@ -79,7 +79,7 @@ val BuiltinFunctions = mapOf(
"memcopy" to FSignature(false, listOf( "memcopy" to FSignature(false, listOf(
FParam("from", IterableDatatypes + DataType.UWORD), FParam("from", IterableDatatypes + DataType.UWORD),
FParam("to", IterableDatatypes + DataType.UWORD), FParam("to", IterableDatatypes + DataType.UWORD),
FParam("numbytes", setOf(DataType.UBYTE))), null), FParam("numbytes", setOf(DataType.UBYTE, DataType.UWORD))), null),
"memset" to FSignature(false, listOf( "memset" to FSignature(false, listOf(
FParam("address", IterableDatatypes + DataType.UWORD), FParam("address", IterableDatatypes + DataType.UWORD),
FParam("numbytes", setOf(DataType.UWORD)), FParam("numbytes", setOf(DataType.UWORD)),

View File

@ -765,12 +765,12 @@ sort(array)
Strings and memory blocks Strings and memory blocks
^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^
memcopy(from, to, numbytes) memcopy(from, to, numbytes)
Efficiently copy a number of bytes (1 - 256) from a memory location to another. Efficiently copy a number of bytes from a memory location to another.
NOTE: 'to' must NOT overlap with 'from', unless it is *before* 'from'. NOTE: 'to' must NOT overlap with 'from', unless it is *before* 'from'.
Because this function imposes some overhead to handle the parameters, Because this function imposes some overhead to handle the parameters,
it is only faster if the number of bytes is larger than a certain threshold. it is only faster if the number of bytes is larger than a certain threshold.
Compare the generated code to see if it was beneficial or not. Compare the generated code to see if it was beneficial or not.
The most efficient will always be to write a specialized copy routine in assembly yourself! The most efficient will often be to write a specialized copy routine in assembly yourself!
memset(address, numbytes, bytevalue) memset(address, numbytes, bytevalue)
Efficiently set a part of memory to the given (u)byte value. Efficiently set a part of memory to the given (u)byte value.

View File

@ -2,8 +2,6 @@
TODO TODO
==== ====
- make memset(w) and memcopy able to work with >256 bytes
- after that: make memset and memcopy use the ROM routines on the CX16
- calling convention for builtin functions no longer via stack but via statically allocated vars inside the subroutine proc (just as normal subroutines) - calling convention for builtin functions no longer via stack but via statically allocated vars inside the subroutine proc (just as normal subroutines)
- make it possible to use cpu opcodes such as 'nop' as variable names by prefixing all asm vars with something such as '_' - make it possible to use cpu opcodes such as 'nop' as variable names by prefixing all asm vars with something such as '_'
- option to load the built-in library files from a directory instead of the embedded ones (for easier library development/debugging) - option to load the built-in library files from a directory instead of the embedded ones (for easier library development/debugging)

View File

@ -9,36 +9,16 @@
main { main {
sub start() { sub start() {
float fl const uword ADDR = $0400
fl = getfloat() memset(ADDR, 40*25, 100)
floats.print_f(fl) memsetw(ADDR, 20*10, $3031)
txt.chrout('\n') memcopy(ADDR, ADDR+40*12, 20*10*2)
;memcopy(ADDR, ADDR+40*12, 255)
testX() testX()
} }
sub chrin() -> ubyte {
return 99
}
sub getstr() -> str {
@($d020)++
return "foobar"
}
sub getfloat() -> float {
float xx
xx = 123.456789
return xx
}
sub mcp(uword from, uword dest, ubyte length) {
txt.print_uw(from)
txt.print_uw(dest)
txt.print_ub(length)
}
asmsub testX() { asmsub testX() {
%asm {{ %asm {{
stx _saveX stx _saveX