mirror of
https://github.com/irmen/prog8.git
synced 2024-11-26 11:49:22 +00:00
optimized memset and memcopy on CX16, memcopy can deal with any size now
This commit is contained in:
parent
e05ea887f6
commit
431f2a2088
@ -1388,8 +1388,8 @@ func_rndw .proc
|
|||||||
.pend
|
.pend
|
||||||
|
|
||||||
|
|
||||||
func_memcopy .proc
|
func_memcopy255 .proc
|
||||||
; note: clobbers A,Y
|
; fast memcopy of up to 255 bytes, note: clobbers A,Y
|
||||||
inx
|
inx
|
||||||
stx P8ZP_SCRATCH_REG
|
stx P8ZP_SCRATCH_REG
|
||||||
lda P8ESTACK_LO+2,x
|
lda P8ESTACK_LO+2,x
|
||||||
@ -1414,6 +1414,50 @@ func_memcopy .proc
|
|||||||
rts
|
rts
|
||||||
.pend
|
.pend
|
||||||
|
|
||||||
|
func_memcopy .proc
|
||||||
|
; memcopy of any number of bytes, note: clobbers A,Y
|
||||||
|
inx
|
||||||
|
stx P8ZP_SCRATCH_REG
|
||||||
|
lda P8ESTACK_LO+2,x
|
||||||
|
sta P8ZP_SCRATCH_W1
|
||||||
|
lda P8ESTACK_HI+2,x
|
||||||
|
sta P8ZP_SCRATCH_W1+1
|
||||||
|
lda P8ESTACK_LO+1,x
|
||||||
|
sta P8ZP_SCRATCH_W2
|
||||||
|
lda P8ESTACK_HI+1,x
|
||||||
|
sta P8ZP_SCRATCH_W2+1
|
||||||
|
lda P8ESTACK_LO,x
|
||||||
|
pha
|
||||||
|
lda P8ESTACK_HI,x
|
||||||
|
pha
|
||||||
|
|
||||||
|
ldy #0
|
||||||
|
pla
|
||||||
|
tax
|
||||||
|
beq _remain
|
||||||
|
- lda (P8ZP_SCRATCH_W1),y ; move a page at a time
|
||||||
|
sta (P8ZP_SCRATCH_W2),y
|
||||||
|
iny
|
||||||
|
bne -
|
||||||
|
inc P8ZP_SCRATCH_W1+1
|
||||||
|
inc P8ZP_SCRATCH_W2+1
|
||||||
|
dex
|
||||||
|
bne -
|
||||||
|
_remain pla
|
||||||
|
tax
|
||||||
|
beq _done
|
||||||
|
- lda (P8ZP_SCRATCH_W1),y ; move the remaining bytes
|
||||||
|
sta (P8ZP_SCRATCH_W2),y
|
||||||
|
iny
|
||||||
|
dex
|
||||||
|
bne -
|
||||||
|
|
||||||
|
_done ldx P8ZP_SCRATCH_REG
|
||||||
|
inx
|
||||||
|
inx
|
||||||
|
rts
|
||||||
|
.pend
|
||||||
|
|
||||||
func_memset .proc
|
func_memset .proc
|
||||||
; note: clobbers A,Y
|
; note: clobbers A,Y
|
||||||
inx
|
inx
|
||||||
@ -1439,7 +1483,6 @@ func_memsetw .proc
|
|||||||
; -- fill memory from (SCRATCH_ZPWORD1) number of words in SCRATCH_ZPWORD2, with word value in AY.
|
; -- fill memory from (SCRATCH_ZPWORD1) number of words in SCRATCH_ZPWORD2, with word value in AY.
|
||||||
|
|
||||||
inx
|
inx
|
||||||
stx P8ZP_SCRATCH_REG
|
|
||||||
lda P8ESTACK_LO+2,x
|
lda P8ESTACK_LO+2,x
|
||||||
sta P8ZP_SCRATCH_W1
|
sta P8ZP_SCRATCH_W1
|
||||||
lda P8ESTACK_HI+2,x
|
lda P8ESTACK_HI+2,x
|
||||||
@ -1448,10 +1491,13 @@ func_memsetw .proc
|
|||||||
sta P8ZP_SCRATCH_W2
|
sta P8ZP_SCRATCH_W2
|
||||||
lda P8ESTACK_HI+1,x
|
lda P8ESTACK_HI+1,x
|
||||||
sta P8ZP_SCRATCH_W2+1
|
sta P8ZP_SCRATCH_W2+1
|
||||||
|
txa
|
||||||
|
pha
|
||||||
lda P8ESTACK_LO,x
|
lda P8ESTACK_LO,x
|
||||||
ldy P8ESTACK_HI,x
|
ldy P8ESTACK_HI,x
|
||||||
jsr memsetw
|
jsr memsetw
|
||||||
ldx P8ZP_SCRATCH_REG
|
pla
|
||||||
|
tax
|
||||||
inx
|
inx
|
||||||
inx
|
inx
|
||||||
rts
|
rts
|
||||||
|
@ -7,6 +7,8 @@ import prog8.ast.expressions.*
|
|||||||
import prog8.ast.statements.DirectMemoryWrite
|
import prog8.ast.statements.DirectMemoryWrite
|
||||||
import prog8.ast.statements.FunctionCallStatement
|
import prog8.ast.statements.FunctionCallStatement
|
||||||
import prog8.compiler.AssemblyError
|
import prog8.compiler.AssemblyError
|
||||||
|
import prog8.compiler.target.CompilationTarget
|
||||||
|
import prog8.compiler.target.Cx16Target
|
||||||
import prog8.compiler.target.c64.codegen.assignment.AsmAssignSource
|
import prog8.compiler.target.c64.codegen.assignment.AsmAssignSource
|
||||||
import prog8.compiler.target.c64.codegen.assignment.AsmAssignTarget
|
import prog8.compiler.target.c64.codegen.assignment.AsmAssignTarget
|
||||||
import prog8.compiler.target.c64.codegen.assignment.AsmAssignment
|
import prog8.compiler.target.c64.codegen.assignment.AsmAssignment
|
||||||
@ -78,8 +80,8 @@ internal class BuiltinFunctionsAsmGen(private val program: Program, private val
|
|||||||
"set_irqd" -> asmgen.out(" sei")
|
"set_irqd" -> asmgen.out(" sei")
|
||||||
"strlen" -> funcStrlen(fcall, resultToStack)
|
"strlen" -> funcStrlen(fcall, resultToStack)
|
||||||
"strcmp" -> funcStrcmp(fcall, func, resultToStack)
|
"strcmp" -> funcStrcmp(fcall, func, resultToStack)
|
||||||
"substr", "leftstr", "rightstr",
|
"memcopy", "memset", "memsetw" -> funcMemSetCopy(fcall, func, functionName)
|
||||||
"memcopy", "memset", "memsetw" -> {
|
"substr", "leftstr", "rightstr" -> {
|
||||||
translateArguments(fcall.args, func)
|
translateArguments(fcall.args, func)
|
||||||
asmgen.out(" jsr prog8_lib.func_$functionName")
|
asmgen.out(" jsr prog8_lib.func_$functionName")
|
||||||
}
|
}
|
||||||
@ -88,6 +90,76 @@ internal class BuiltinFunctionsAsmGen(private val program: Program, private val
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun funcMemSetCopy(fcall: IFunctionCall, func: FSignature, functionName: String) {
|
||||||
|
if(CompilationTarget.instance is Cx16Target) {
|
||||||
|
when(functionName) {
|
||||||
|
"memset" -> {
|
||||||
|
// use the ROM function of the Cx16
|
||||||
|
var src = AsmAssignSource.fromAstSource(fcall.args[0], program, asmgen)
|
||||||
|
var tgt = AsmAssignTarget(TargetStorageKind.VARIABLE, program, asmgen, DataType.UWORD, null, variableAsmName = "cx16.r0")
|
||||||
|
var assign = AsmAssignment(src, tgt, false, Position.DUMMY)
|
||||||
|
asmgen.translateNormalAssignment(assign)
|
||||||
|
src = AsmAssignSource.fromAstSource(fcall.args[1], program, asmgen)
|
||||||
|
tgt = AsmAssignTarget(TargetStorageKind.VARIABLE, program, asmgen, DataType.UWORD, null, variableAsmName = "cx16.r1")
|
||||||
|
assign = AsmAssignment(src, tgt, false, Position.DUMMY)
|
||||||
|
asmgen.translateNormalAssignment(assign)
|
||||||
|
src = AsmAssignSource.fromAstSource(fcall.args[2], program, asmgen)
|
||||||
|
tgt = AsmAssignTarget(TargetStorageKind.REGISTER, program, asmgen, DataType.UBYTE, null, register = RegisterOrPair.A)
|
||||||
|
assign = AsmAssignment(src, tgt, false, Position.DUMMY)
|
||||||
|
asmgen.translateNormalAssignment(assign)
|
||||||
|
val sub = (fcall as FunctionCallStatement).definingSubroutine()!!
|
||||||
|
asmgen.saveRegister(CpuRegister.X, false, sub)
|
||||||
|
asmgen.out(" jsr cx16.memory_fill")
|
||||||
|
asmgen.restoreRegister(CpuRegister.X, false)
|
||||||
|
}
|
||||||
|
"memcopy" -> {
|
||||||
|
val count = fcall.args[2].constValue(program)?.number?.toInt()
|
||||||
|
val countDt = fcall.args[2].inferType(program)
|
||||||
|
if((count!=null && count <= 255) || countDt.istype(DataType.UBYTE) || countDt.istype(DataType.BYTE)) {
|
||||||
|
// fast memcopy of up to 255
|
||||||
|
translateArguments(fcall.args, func)
|
||||||
|
asmgen.out(" jsr prog8_lib.func_memcopy255")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// use the ROM function of the Cx16
|
||||||
|
var src = AsmAssignSource.fromAstSource(fcall.args[0], program, asmgen)
|
||||||
|
var tgt = AsmAssignTarget(TargetStorageKind.VARIABLE, program, asmgen, DataType.UWORD, null, variableAsmName = "cx16.r0")
|
||||||
|
var assign = AsmAssignment(src, tgt, false, Position.DUMMY)
|
||||||
|
asmgen.translateNormalAssignment(assign)
|
||||||
|
src = AsmAssignSource.fromAstSource(fcall.args[1], program, asmgen)
|
||||||
|
tgt = AsmAssignTarget(TargetStorageKind.VARIABLE, program, asmgen, DataType.UWORD, null, variableAsmName = "cx16.r1")
|
||||||
|
assign = AsmAssignment(src, tgt, false, Position.DUMMY)
|
||||||
|
asmgen.translateNormalAssignment(assign)
|
||||||
|
src = AsmAssignSource.fromAstSource(fcall.args[2], program, asmgen)
|
||||||
|
tgt = AsmAssignTarget(TargetStorageKind.VARIABLE, program, asmgen, DataType.UWORD, null, variableAsmName = "cx16.r2")
|
||||||
|
assign = AsmAssignment(src, tgt, false, Position.DUMMY)
|
||||||
|
asmgen.translateNormalAssignment(assign)
|
||||||
|
val sub = (fcall as FunctionCallStatement).definingSubroutine()!!
|
||||||
|
asmgen.saveRegister(CpuRegister.X, false, sub)
|
||||||
|
asmgen.out(" jsr cx16.memory_copy")
|
||||||
|
asmgen.restoreRegister(CpuRegister.X, false)
|
||||||
|
}
|
||||||
|
"memsetw" -> {
|
||||||
|
translateArguments(fcall.args, func)
|
||||||
|
asmgen.out(" jsr prog8_lib.func_memsetw")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if(functionName=="memcopy") {
|
||||||
|
val count = fcall.args[2].constValue(program)?.number?.toInt()
|
||||||
|
val countDt = fcall.args[2].inferType(program)
|
||||||
|
if((count!=null && count <= 255) || countDt.istype(DataType.UBYTE) || countDt.istype(DataType.BYTE)) {
|
||||||
|
translateArguments(fcall.args, func)
|
||||||
|
asmgen.out(" jsr prog8_lib.func_memcopy255")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
translateArguments(fcall.args, func)
|
||||||
|
asmgen.out(" jsr prog8_lib.func_$functionName")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private fun funcStrcmp(fcall: IFunctionCall, func: FSignature, resultToStack: Boolean) {
|
private fun funcStrcmp(fcall: IFunctionCall, func: FSignature, resultToStack: Boolean) {
|
||||||
translateArguments(fcall.args, func)
|
translateArguments(fcall.args, func)
|
||||||
if(resultToStack)
|
if(resultToStack)
|
||||||
|
@ -79,7 +79,7 @@ val BuiltinFunctions = mapOf(
|
|||||||
"memcopy" to FSignature(false, listOf(
|
"memcopy" to FSignature(false, listOf(
|
||||||
FParam("from", IterableDatatypes + DataType.UWORD),
|
FParam("from", IterableDatatypes + DataType.UWORD),
|
||||||
FParam("to", IterableDatatypes + DataType.UWORD),
|
FParam("to", IterableDatatypes + DataType.UWORD),
|
||||||
FParam("numbytes", setOf(DataType.UBYTE))), null),
|
FParam("numbytes", setOf(DataType.UBYTE, DataType.UWORD))), null),
|
||||||
"memset" to FSignature(false, listOf(
|
"memset" to FSignature(false, listOf(
|
||||||
FParam("address", IterableDatatypes + DataType.UWORD),
|
FParam("address", IterableDatatypes + DataType.UWORD),
|
||||||
FParam("numbytes", setOf(DataType.UWORD)),
|
FParam("numbytes", setOf(DataType.UWORD)),
|
||||||
|
@ -765,12 +765,12 @@ sort(array)
|
|||||||
Strings and memory blocks
|
Strings and memory blocks
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
memcopy(from, to, numbytes)
|
memcopy(from, to, numbytes)
|
||||||
Efficiently copy a number of bytes (1 - 256) from a memory location to another.
|
Efficiently copy a number of bytes from a memory location to another.
|
||||||
NOTE: 'to' must NOT overlap with 'from', unless it is *before* 'from'.
|
NOTE: 'to' must NOT overlap with 'from', unless it is *before* 'from'.
|
||||||
Because this function imposes some overhead to handle the parameters,
|
Because this function imposes some overhead to handle the parameters,
|
||||||
it is only faster if the number of bytes is larger than a certain threshold.
|
it is only faster if the number of bytes is larger than a certain threshold.
|
||||||
Compare the generated code to see if it was beneficial or not.
|
Compare the generated code to see if it was beneficial or not.
|
||||||
The most efficient will always be to write a specialized copy routine in assembly yourself!
|
The most efficient will often be to write a specialized copy routine in assembly yourself!
|
||||||
|
|
||||||
memset(address, numbytes, bytevalue)
|
memset(address, numbytes, bytevalue)
|
||||||
Efficiently set a part of memory to the given (u)byte value.
|
Efficiently set a part of memory to the given (u)byte value.
|
||||||
|
@ -2,8 +2,6 @@
|
|||||||
TODO
|
TODO
|
||||||
====
|
====
|
||||||
|
|
||||||
- make memset(w) and memcopy able to work with >256 bytes
|
|
||||||
- after that: make memset and memcopy use the ROM routines on the CX16
|
|
||||||
- calling convention for builtin functions no longer via stack but via statically allocated vars inside the subroutine proc (just as normal subroutines)
|
- calling convention for builtin functions no longer via stack but via statically allocated vars inside the subroutine proc (just as normal subroutines)
|
||||||
- make it possible to use cpu opcodes such as 'nop' as variable names by prefixing all asm vars with something such as '_'
|
- make it possible to use cpu opcodes such as 'nop' as variable names by prefixing all asm vars with something such as '_'
|
||||||
- option to load the built-in library files from a directory instead of the embedded ones (for easier library development/debugging)
|
- option to load the built-in library files from a directory instead of the embedded ones (for easier library development/debugging)
|
||||||
|
@ -9,36 +9,16 @@
|
|||||||
main {
|
main {
|
||||||
|
|
||||||
sub start() {
|
sub start() {
|
||||||
float fl
|
const uword ADDR = $0400
|
||||||
|
|
||||||
fl = getfloat()
|
memset(ADDR, 40*25, 100)
|
||||||
floats.print_f(fl)
|
memsetw(ADDR, 20*10, $3031)
|
||||||
txt.chrout('\n')
|
memcopy(ADDR, ADDR+40*12, 20*10*2)
|
||||||
|
;memcopy(ADDR, ADDR+40*12, 255)
|
||||||
|
|
||||||
testX()
|
testX()
|
||||||
}
|
}
|
||||||
|
|
||||||
sub chrin() -> ubyte {
|
|
||||||
return 99
|
|
||||||
}
|
|
||||||
|
|
||||||
sub getstr() -> str {
|
|
||||||
@($d020)++
|
|
||||||
return "foobar"
|
|
||||||
}
|
|
||||||
|
|
||||||
sub getfloat() -> float {
|
|
||||||
float xx
|
|
||||||
xx = 123.456789
|
|
||||||
return xx
|
|
||||||
}
|
|
||||||
|
|
||||||
sub mcp(uword from, uword dest, ubyte length) {
|
|
||||||
txt.print_uw(from)
|
|
||||||
txt.print_uw(dest)
|
|
||||||
txt.print_ub(length)
|
|
||||||
}
|
|
||||||
|
|
||||||
asmsub testX() {
|
asmsub testX() {
|
||||||
%asm {{
|
%asm {{
|
||||||
stx _saveX
|
stx _saveX
|
||||||
|
Loading…
Reference in New Issue
Block a user