optimized memset and memcopy on CX16, memcopy can deal with any size now

This commit is contained in:
Irmen de Jong 2020-11-01 07:36:40 +01:00
parent e05ea887f6
commit 431f2a2088
6 changed files with 132 additions and 36 deletions

View File

@ -1388,8 +1388,8 @@ func_rndw .proc
.pend
func_memcopy .proc
; note: clobbers A,Y
func_memcopy255 .proc
; fast memcopy of up to 255 bytes, note: clobbers A,Y
inx
stx P8ZP_SCRATCH_REG
lda P8ESTACK_LO+2,x
@ -1414,6 +1414,50 @@ func_memcopy .proc
rts
.pend
func_memcopy .proc
; memcopy of any number of bytes, note: clobbers A,Y
inx
stx P8ZP_SCRATCH_REG
lda P8ESTACK_LO+2,x
sta P8ZP_SCRATCH_W1
lda P8ESTACK_HI+2,x
sta P8ZP_SCRATCH_W1+1
lda P8ESTACK_LO+1,x
sta P8ZP_SCRATCH_W2
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_W2+1
lda P8ESTACK_LO,x
pha
lda P8ESTACK_HI,x
pha
ldy #0
pla
tax
beq _remain
- lda (P8ZP_SCRATCH_W1),y ; move a page at a time
sta (P8ZP_SCRATCH_W2),y
iny
bne -
inc P8ZP_SCRATCH_W1+1
inc P8ZP_SCRATCH_W2+1
dex
bne -
_remain pla
tax
beq _done
- lda (P8ZP_SCRATCH_W1),y ; move the remaining bytes
sta (P8ZP_SCRATCH_W2),y
iny
dex
bne -
_done ldx P8ZP_SCRATCH_REG
inx
inx
rts
.pend
func_memset .proc
; note: clobbers A,Y
inx
@ -1439,7 +1483,6 @@ func_memsetw .proc
; -- fill memory from (SCRATCH_ZPWORD1) number of words in SCRATCH_ZPWORD2, with word value in AY.
inx
stx P8ZP_SCRATCH_REG
lda P8ESTACK_LO+2,x
sta P8ZP_SCRATCH_W1
lda P8ESTACK_HI+2,x
@ -1448,10 +1491,13 @@ func_memsetw .proc
sta P8ZP_SCRATCH_W2
lda P8ESTACK_HI+1,x
sta P8ZP_SCRATCH_W2+1
txa
pha
lda P8ESTACK_LO,x
ldy P8ESTACK_HI,x
jsr memsetw
ldx P8ZP_SCRATCH_REG
pla
tax
inx
inx
rts

View File

@ -7,6 +7,8 @@ import prog8.ast.expressions.*
import prog8.ast.statements.DirectMemoryWrite
import prog8.ast.statements.FunctionCallStatement
import prog8.compiler.AssemblyError
import prog8.compiler.target.CompilationTarget
import prog8.compiler.target.Cx16Target
import prog8.compiler.target.c64.codegen.assignment.AsmAssignSource
import prog8.compiler.target.c64.codegen.assignment.AsmAssignTarget
import prog8.compiler.target.c64.codegen.assignment.AsmAssignment
@ -78,8 +80,8 @@ internal class BuiltinFunctionsAsmGen(private val program: Program, private val
"set_irqd" -> asmgen.out(" sei")
"strlen" -> funcStrlen(fcall, resultToStack)
"strcmp" -> funcStrcmp(fcall, func, resultToStack)
"substr", "leftstr", "rightstr",
"memcopy", "memset", "memsetw" -> {
"memcopy", "memset", "memsetw" -> funcMemSetCopy(fcall, func, functionName)
"substr", "leftstr", "rightstr" -> {
translateArguments(fcall.args, func)
asmgen.out(" jsr prog8_lib.func_$functionName")
}
@ -88,6 +90,76 @@ internal class BuiltinFunctionsAsmGen(private val program: Program, private val
}
}
private fun funcMemSetCopy(fcall: IFunctionCall, func: FSignature, functionName: String) {
if(CompilationTarget.instance is Cx16Target) {
when(functionName) {
"memset" -> {
// use the ROM function of the Cx16
var src = AsmAssignSource.fromAstSource(fcall.args[0], program, asmgen)
var tgt = AsmAssignTarget(TargetStorageKind.VARIABLE, program, asmgen, DataType.UWORD, null, variableAsmName = "cx16.r0")
var assign = AsmAssignment(src, tgt, false, Position.DUMMY)
asmgen.translateNormalAssignment(assign)
src = AsmAssignSource.fromAstSource(fcall.args[1], program, asmgen)
tgt = AsmAssignTarget(TargetStorageKind.VARIABLE, program, asmgen, DataType.UWORD, null, variableAsmName = "cx16.r1")
assign = AsmAssignment(src, tgt, false, Position.DUMMY)
asmgen.translateNormalAssignment(assign)
src = AsmAssignSource.fromAstSource(fcall.args[2], program, asmgen)
tgt = AsmAssignTarget(TargetStorageKind.REGISTER, program, asmgen, DataType.UBYTE, null, register = RegisterOrPair.A)
assign = AsmAssignment(src, tgt, false, Position.DUMMY)
asmgen.translateNormalAssignment(assign)
val sub = (fcall as FunctionCallStatement).definingSubroutine()!!
asmgen.saveRegister(CpuRegister.X, false, sub)
asmgen.out(" jsr cx16.memory_fill")
asmgen.restoreRegister(CpuRegister.X, false)
}
"memcopy" -> {
val count = fcall.args[2].constValue(program)?.number?.toInt()
val countDt = fcall.args[2].inferType(program)
if((count!=null && count <= 255) || countDt.istype(DataType.UBYTE) || countDt.istype(DataType.BYTE)) {
// fast memcopy of up to 255
translateArguments(fcall.args, func)
asmgen.out(" jsr prog8_lib.func_memcopy255")
return
}
// use the ROM function of the Cx16
var src = AsmAssignSource.fromAstSource(fcall.args[0], program, asmgen)
var tgt = AsmAssignTarget(TargetStorageKind.VARIABLE, program, asmgen, DataType.UWORD, null, variableAsmName = "cx16.r0")
var assign = AsmAssignment(src, tgt, false, Position.DUMMY)
asmgen.translateNormalAssignment(assign)
src = AsmAssignSource.fromAstSource(fcall.args[1], program, asmgen)
tgt = AsmAssignTarget(TargetStorageKind.VARIABLE, program, asmgen, DataType.UWORD, null, variableAsmName = "cx16.r1")
assign = AsmAssignment(src, tgt, false, Position.DUMMY)
asmgen.translateNormalAssignment(assign)
src = AsmAssignSource.fromAstSource(fcall.args[2], program, asmgen)
tgt = AsmAssignTarget(TargetStorageKind.VARIABLE, program, asmgen, DataType.UWORD, null, variableAsmName = "cx16.r2")
assign = AsmAssignment(src, tgt, false, Position.DUMMY)
asmgen.translateNormalAssignment(assign)
val sub = (fcall as FunctionCallStatement).definingSubroutine()!!
asmgen.saveRegister(CpuRegister.X, false, sub)
asmgen.out(" jsr cx16.memory_copy")
asmgen.restoreRegister(CpuRegister.X, false)
}
"memsetw" -> {
translateArguments(fcall.args, func)
asmgen.out(" jsr prog8_lib.func_memsetw")
}
}
} else {
if(functionName=="memcopy") {
val count = fcall.args[2].constValue(program)?.number?.toInt()
val countDt = fcall.args[2].inferType(program)
if((count!=null && count <= 255) || countDt.istype(DataType.UBYTE) || countDt.istype(DataType.BYTE)) {
translateArguments(fcall.args, func)
asmgen.out(" jsr prog8_lib.func_memcopy255")
return
}
}
translateArguments(fcall.args, func)
asmgen.out(" jsr prog8_lib.func_$functionName")
}
}
private fun funcStrcmp(fcall: IFunctionCall, func: FSignature, resultToStack: Boolean) {
translateArguments(fcall.args, func)
if(resultToStack)

View File

@ -79,7 +79,7 @@ val BuiltinFunctions = mapOf(
"memcopy" to FSignature(false, listOf(
FParam("from", IterableDatatypes + DataType.UWORD),
FParam("to", IterableDatatypes + DataType.UWORD),
FParam("numbytes", setOf(DataType.UBYTE))), null),
FParam("numbytes", setOf(DataType.UBYTE, DataType.UWORD))), null),
"memset" to FSignature(false, listOf(
FParam("address", IterableDatatypes + DataType.UWORD),
FParam("numbytes", setOf(DataType.UWORD)),

View File

@ -765,12 +765,12 @@ sort(array)
Strings and memory blocks
^^^^^^^^^^^^^^^^^^^^^^^^^
memcopy(from, to, numbytes)
Efficiently copy a number of bytes (1 - 256) from a memory location to another.
Efficiently copy a number of bytes from a memory location to another.
NOTE: 'to' must NOT overlap with 'from', unless it is *before* 'from'.
Because this function imposes some overhead to handle the parameters,
it is only faster if the number of bytes is larger than a certain threshold.
Compare the generated code to see if it was beneficial or not.
The most efficient will always be to write a specialized copy routine in assembly yourself!
The most efficient will often be to write a specialized copy routine in assembly yourself!
memset(address, numbytes, bytevalue)
Efficiently set a part of memory to the given (u)byte value.

View File

@ -2,8 +2,6 @@
TODO
====
- make memset(w) and memcopy able to work with >256 bytes
- after that: make memset and memcopy use the ROM routines on the CX16
- calling convention for builtin functions no longer via stack but via statically allocated vars inside the subroutine proc (just as normal subroutines)
- make it possible to use cpu opcodes such as 'nop' as variable names by prefixing all asm vars with something such as '_'
- option to load the built-in library files from a directory instead of the embedded ones (for easier library development/debugging)

View File

@ -9,36 +9,16 @@
main {
sub start() {
float fl
const uword ADDR = $0400
fl = getfloat()
floats.print_f(fl)
txt.chrout('\n')
memset(ADDR, 40*25, 100)
memsetw(ADDR, 20*10, $3031)
memcopy(ADDR, ADDR+40*12, 20*10*2)
;memcopy(ADDR, ADDR+40*12, 255)
testX()
}
sub chrin() -> ubyte {
return 99
}
sub getstr() -> str {
@($d020)++
return "foobar"
}
sub getfloat() -> float {
float xx
xx = 123.456789
return xx
}
sub mcp(uword from, uword dest, ubyte length) {
txt.print_uw(from)
txt.print_uw(dest)
txt.print_ub(length)
}
asmsub testX() {
%asm {{
stx _saveX