mirror of
https://github.com/irmen/prog8.git
synced 2025-04-07 16:41:46 +00:00
faster array copy and fix for length 256
This commit is contained in:
parent
a4f697bae1
commit
88458f5355
@ -96,26 +96,24 @@ internal class BuiltinFunctionsAsmGen(private val program: PtProgram,
|
||||
asmgen.out("""
|
||||
lda #<${sourceAsm}_lsb
|
||||
ldy #>${sourceAsm}_lsb
|
||||
sta cx16.r0L
|
||||
sty cx16.r0H
|
||||
sta P8ZP_SCRATCH_W1
|
||||
sty P8ZP_SCRATCH_W1+1
|
||||
lda #<${targetAsm}_lsb
|
||||
ldy #>${targetAsm}_lsb
|
||||
sta cx16.r1L
|
||||
sty cx16.r1H
|
||||
lda #<${numElements}
|
||||
ldy #>${numElements}
|
||||
jsr sys.memcopy
|
||||
sta P8ZP_SCRATCH_W2
|
||||
sty P8ZP_SCRATCH_W2+1
|
||||
ldy #${numElements and 255}
|
||||
jsr prog8_lib.memcopy_small
|
||||
lda #<${sourceAsm}_msb
|
||||
ldy #>${sourceAsm}_msb
|
||||
sta cx16.r0L
|
||||
sty cx16.r0H
|
||||
sta P8ZP_SCRATCH_W1
|
||||
sty P8ZP_SCRATCH_W1+1
|
||||
lda #<${targetAsm}_msb
|
||||
ldy #>${targetAsm}_msb
|
||||
sta cx16.r1L
|
||||
sty cx16.r1H
|
||||
lda #<${numElements}
|
||||
ldy #>${numElements}
|
||||
jsr sys.memcopy""")
|
||||
sta P8ZP_SCRATCH_W2
|
||||
sty P8ZP_SCRATCH_W2+1
|
||||
ldy #${numElements and 255}
|
||||
jsr prog8_lib.memcopy_small""")
|
||||
}
|
||||
else if(source.type in SplitWordArrayTypes) {
|
||||
// split word array to normal word array (copy lsb and msb arrays separately)
|
||||
@ -158,15 +156,14 @@ internal class BuiltinFunctionsAsmGen(private val program: PtProgram,
|
||||
asmgen.out("""
|
||||
lda #<${sourceAsm}
|
||||
ldy #>${sourceAsm}
|
||||
sta cx16.r0L
|
||||
sty cx16.r0H
|
||||
sta P8ZP_SCRATCH_W1
|
||||
sty P8ZP_SCRATCH_W1+1
|
||||
lda #<${targetAsm}
|
||||
ldy #>${targetAsm}
|
||||
sta cx16.r1L
|
||||
sty cx16.r1H
|
||||
lda #<${numBytes}
|
||||
ldy #>${numBytes}
|
||||
jsr sys.memcopy""")
|
||||
sta P8ZP_SCRATCH_W2
|
||||
sty P8ZP_SCRATCH_W2+1
|
||||
ldy #${numBytes and 255}
|
||||
jsr prog8_lib.memcopy_small""")
|
||||
}
|
||||
}
|
||||
|
||||
@ -1408,8 +1405,7 @@ internal class BuiltinFunctionsAsmGen(private val program: PtProgram,
|
||||
ldy #>$identifierName
|
||||
sta P8ZP_SCRATCH_W1
|
||||
sty P8ZP_SCRATCH_W1+1
|
||||
lda #$numElements
|
||||
""")
|
||||
lda #${numElements and 255}""")
|
||||
}
|
||||
|
||||
private fun translateArguments(call: PtBuiltinFunctionCall, scope: IPtSubroutine?) {
|
||||
|
@ -66,11 +66,11 @@ internal class BuiltinFuncGen(private val codeGen: IRCodeGen, private val exprGe
|
||||
it += IRInstruction(Opcode.LOAD, IRDataType.WORD, reg1=fromReg, labelSymbol = source.name+"_lsb")
|
||||
it += IRInstruction(Opcode.LOAD, IRDataType.WORD, reg1=toReg, labelSymbol = target.name+"_lsb")
|
||||
it += IRInstruction(Opcode.LOAD, IRDataType.WORD, reg1=countReg, immediate = sourceLength)
|
||||
it += codeGen.makeSyscall(IMSyscall.MEMCOPY, listOf(IRDataType.WORD to fromReg, IRDataType.WORD to toReg, IRDataType.WORD to countReg), returns = null)
|
||||
it += codeGen.makeSyscall(IMSyscall.MEMCOPY_SMALL, listOf(IRDataType.WORD to fromReg, IRDataType.WORD to toReg, IRDataType.BYTE to (countReg and 255)), returns = null)
|
||||
it += IRInstruction(Opcode.LOAD, IRDataType.WORD, reg1=fromReg, labelSymbol = source.name+"_msb")
|
||||
it += IRInstruction(Opcode.LOAD, IRDataType.WORD, reg1=toReg, labelSymbol = target.name+"_msb")
|
||||
it += IRInstruction(Opcode.LOAD, IRDataType.WORD, reg1=countReg, immediate = sourceLength)
|
||||
it += codeGen.makeSyscall(IMSyscall.MEMCOPY, listOf(IRDataType.WORD to fromReg, IRDataType.WORD to toReg, IRDataType.WORD to countReg), returns = null)
|
||||
it += codeGen.makeSyscall(IMSyscall.MEMCOPY_SMALL, listOf(IRDataType.WORD to fromReg, IRDataType.WORD to toReg, IRDataType.BYTE to (countReg and 255)), returns = null)
|
||||
}
|
||||
}
|
||||
else if(source.type in SplitWordArrayTypes) {
|
||||
@ -105,7 +105,7 @@ internal class BuiltinFuncGen(private val codeGen: IRCodeGen, private val exprGe
|
||||
it += IRInstruction(Opcode.LOAD, IRDataType.WORD, reg1=toReg, labelSymbol = target.name)
|
||||
it += IRInstruction(Opcode.LOAD, IRDataType.WORD, reg1=countReg, immediate = sourceLength * eltsize)
|
||||
}
|
||||
result += codeGen.makeSyscall(IMSyscall.MEMCOPY, listOf(IRDataType.WORD to fromReg, IRDataType.WORD to toReg, IRDataType.WORD to countReg), returns = null)
|
||||
result += codeGen.makeSyscall(IMSyscall.MEMCOPY_SMALL, listOf(IRDataType.WORD to fromReg, IRDataType.WORD to toReg, IRDataType.BYTE to (countReg and 255)), returns = null)
|
||||
}
|
||||
|
||||
return ExpressionCodeResult(result, IRDataType.BYTE, -1, -1)
|
||||
@ -244,7 +244,7 @@ internal class BuiltinFuncGen(private val codeGen: IRCodeGen, private val exprGe
|
||||
addInstr(result, IRInstruction(Opcode.PREPARECALL, immediate = 2), null)
|
||||
val tr = exprGen.translateExpression(arrayName)
|
||||
addToResult(result, tr, tr.resultReg, -1)
|
||||
addInstr(result, IRInstruction(Opcode.LOAD, IRDataType.BYTE, reg1 = lengthReg, immediate = arrayLength), null)
|
||||
addInstr(result, IRInstruction(Opcode.LOAD, IRDataType.BYTE, reg1 = lengthReg, immediate = arrayLength!! and 255), null)
|
||||
result += codeGen.makeSyscall(syscall, listOf(IRDataType.WORD to tr.resultReg, IRDataType.BYTE to lengthReg), IRDataType.BYTE to tr.resultReg)
|
||||
return ExpressionCodeResult(result, IRDataType.BYTE, tr.resultReg, -1)
|
||||
}
|
||||
@ -272,7 +272,7 @@ internal class BuiltinFuncGen(private val codeGen: IRCodeGen, private val exprGe
|
||||
val tr = exprGen.translateExpression(arrayName)
|
||||
addToResult(result, tr, tr.resultReg, -1)
|
||||
val lengthReg = codeGen.registers.nextFree()
|
||||
addInstr(result, IRInstruction(Opcode.LOAD, IRDataType.BYTE, reg1 = lengthReg, immediate = arrayLength), null)
|
||||
addInstr(result, IRInstruction(Opcode.LOAD, IRDataType.BYTE, reg1 = lengthReg, immediate = arrayLength!! and 255), null)
|
||||
result += codeGen.makeSyscall(syscall, listOf(IRDataType.WORD to tr.resultReg, IRDataType.BYTE to lengthReg), IRDataType.BYTE to tr.resultReg)
|
||||
return ExpressionCodeResult(result, IRDataType.BYTE, tr.resultReg, -1)
|
||||
}
|
||||
|
@ -99,20 +99,14 @@ sys {
|
||||
rts ; nothing to copy
|
||||
|
||||
_copyshort
|
||||
; decrease source and target pointers so we can simply index by Y
|
||||
lda P8ZP_SCRATCH_W1
|
||||
bne +
|
||||
dec P8ZP_SCRATCH_W1+1
|
||||
+ dec P8ZP_SCRATCH_W1
|
||||
lda P8ZP_SCRATCH_W2
|
||||
bne +
|
||||
dec P8ZP_SCRATCH_W2+1
|
||||
+ dec P8ZP_SCRATCH_W2
|
||||
|
||||
dey
|
||||
beq +
|
||||
- lda (P8ZP_SCRATCH_W1),y
|
||||
sta (P8ZP_SCRATCH_W2),y
|
||||
dey
|
||||
bne -
|
||||
+ lda (P8ZP_SCRATCH_W1),y
|
||||
sta (P8ZP_SCRATCH_W2),y
|
||||
rts
|
||||
|
||||
_longcopy
|
||||
|
@ -641,20 +641,14 @@ _loop lda P8ZP_SCRATCH_W1
|
||||
rts ; nothing to copy
|
||||
|
||||
_copyshort
|
||||
; decrease source and target pointers so we can simply index by Y
|
||||
lda P8ZP_SCRATCH_W1
|
||||
bne +
|
||||
dec P8ZP_SCRATCH_W1+1
|
||||
+ dec P8ZP_SCRATCH_W1
|
||||
lda P8ZP_SCRATCH_W2
|
||||
bne +
|
||||
dec P8ZP_SCRATCH_W2+1
|
||||
+ dec P8ZP_SCRATCH_W2
|
||||
|
||||
dey
|
||||
beq +
|
||||
- lda (P8ZP_SCRATCH_W1),y
|
||||
sta (P8ZP_SCRATCH_W2),y
|
||||
dey
|
||||
bne -
|
||||
+ lda (P8ZP_SCRATCH_W1),y
|
||||
sta (P8ZP_SCRATCH_W2),y
|
||||
rts
|
||||
|
||||
_longcopy
|
||||
|
@ -639,20 +639,14 @@ _loop lda P8ZP_SCRATCH_W1
|
||||
rts ; nothing to copy
|
||||
|
||||
_copyshort
|
||||
; decrease source and target pointers so we can simply index by Y
|
||||
lda P8ZP_SCRATCH_W1
|
||||
bne +
|
||||
dec P8ZP_SCRATCH_W1+1
|
||||
+ dec P8ZP_SCRATCH_W1
|
||||
lda P8ZP_SCRATCH_W2
|
||||
bne +
|
||||
dec P8ZP_SCRATCH_W2+1
|
||||
+ dec P8ZP_SCRATCH_W2
|
||||
|
||||
dey
|
||||
beq +
|
||||
- lda (P8ZP_SCRATCH_W1),y
|
||||
sta (P8ZP_SCRATCH_W2),y
|
||||
dey
|
||||
bne -
|
||||
+ lda (P8ZP_SCRATCH_W1),y
|
||||
sta (P8ZP_SCRATCH_W2),y
|
||||
rts
|
||||
|
||||
_longcopy
|
||||
|
@ -1492,20 +1492,14 @@ _loop lda P8ZP_SCRATCH_W1
|
||||
rts ; nothing to copy
|
||||
|
||||
_copyshort
|
||||
; decrease source and target pointers so we can simply index by Y
|
||||
lda cx16.r0
|
||||
bne +
|
||||
dec cx16.r0+1
|
||||
+ dec cx16.r0
|
||||
lda cx16.r1
|
||||
bne +
|
||||
dec cx16.r1+1
|
||||
+ dec cx16.r1
|
||||
|
||||
dey
|
||||
beq +
|
||||
- lda (cx16.r0),y
|
||||
sta (cx16.r1),y
|
||||
dey
|
||||
bne -
|
||||
+ lda (cx16.r0),y
|
||||
sta (cx16.r1),y
|
||||
rts
|
||||
|
||||
_longcopy
|
||||
|
@ -194,20 +194,14 @@ _loop lda P8ZP_SCRATCH_W1
|
||||
rts ; nothing to copy
|
||||
|
||||
_copyshort
|
||||
; decrease source and target pointers so we can simply index by Y
|
||||
lda P8ZP_SCRATCH_W1
|
||||
bne +
|
||||
dec P8ZP_SCRATCH_W1+1
|
||||
+ dec P8ZP_SCRATCH_W1
|
||||
lda P8ZP_SCRATCH_W2
|
||||
bne +
|
||||
dec P8ZP_SCRATCH_W2+1
|
||||
+ dec P8ZP_SCRATCH_W2
|
||||
|
||||
dey
|
||||
beq +
|
||||
- lda (P8ZP_SCRATCH_W1),y
|
||||
sta (P8ZP_SCRATCH_W2),y
|
||||
dey
|
||||
bne -
|
||||
+ lda (P8ZP_SCRATCH_W1),y
|
||||
sta (P8ZP_SCRATCH_W2),y
|
||||
rts
|
||||
|
||||
_longcopy
|
||||
|
@ -405,3 +405,27 @@ _modsrcmsb lda $ffff ; modnfied msb read
|
||||
bne _modsrclsb
|
||||
rts
|
||||
.pend
|
||||
|
||||
memcopy_small .proc
|
||||
; copy up to a single page (256 bytes) of memory.
|
||||
; note: only works for NON-OVERLAPPING memory regions!
|
||||
; P8ZP_SCRATCH_W1 = from address
|
||||
; P8ZP_SCRATCH_W2 = destination address
|
||||
; Y = number of bytes to copy (where 0 means 256)
|
||||
cpy #0
|
||||
beq _fullpage
|
||||
dey
|
||||
beq _lastbyte
|
||||
_loop lda (P8ZP_SCRATCH_W1),y
|
||||
sta (P8ZP_SCRATCH_W2),y
|
||||
dey
|
||||
bne _loop
|
||||
_lastbyte lda (P8ZP_SCRATCH_W1),y
|
||||
sta (P8ZP_SCRATCH_W2),y
|
||||
rts
|
||||
_fullpage lda (P8ZP_SCRATCH_W1),y
|
||||
sta (P8ZP_SCRATCH_W2),y
|
||||
dey
|
||||
bne _fullpage
|
||||
rts
|
||||
.pend
|
||||
|
@ -5,14 +5,37 @@
|
||||
|
||||
main {
|
||||
sub start() {
|
||||
ubyte[5] cave_times
|
||||
ubyte[5] diamonds_needed
|
||||
ubyte[256] @shared arr1 = 99
|
||||
ubyte[256] @shared arr2 = 0
|
||||
uword[128] @shared warr1 = 9999
|
||||
uword[128] @shared warr2 = 0
|
||||
|
||||
cave_times = [1,2,3,4,5]
|
||||
diamonds_needed = [1,2,3,4,5]
|
||||
txt.print_ub(all(arr2))
|
||||
txt.nl()
|
||||
txt.print_ub(all(warr2))
|
||||
txt.nl()
|
||||
arr2 = arr1
|
||||
warr2 = warr1
|
||||
txt.print_ub(all(arr2))
|
||||
txt.nl()
|
||||
txt.print_ub(all(warr2))
|
||||
txt.nl()
|
||||
|
||||
|
||||
uword[] @split cave_times = [1111,2222,3333,4444]
|
||||
cave_times = [9999,8888,7777,6666]
|
||||
|
||||
for cx16.r0L in 0 to len(cave_times)-1 {
|
||||
txt.print_ub(cave_times[cx16.r0L])
|
||||
txt.print_uw(cave_times[cx16.r0L])
|
||||
txt.spc()
|
||||
}
|
||||
txt.nl()
|
||||
|
||||
ubyte[] cave_times2 = [11,22,33,44]
|
||||
cave_times2 = [99,88,77,66]
|
||||
|
||||
for cx16.r0L in 0 to len(cave_times2)-1 {
|
||||
txt.print_ub(cave_times2[cx16.r0L])
|
||||
txt.spc()
|
||||
}
|
||||
txt.nl()
|
||||
|
@ -30,6 +30,7 @@ enum class IMSyscall(val number: Int) {
|
||||
CLAMP_FLOAT(0x1016),
|
||||
CALLFAR(0x1017),
|
||||
MEMCOPY(0x1018),
|
||||
ARRAYCOPY_SPLITW_TO_NORMAL(0x1019),
|
||||
ARRAYCOPY_NORMAL_TO_SPLITW(0x101a),
|
||||
MEMCOPY_SMALL(0x1019),
|
||||
ARRAYCOPY_SPLITW_TO_NORMAL(0x101a),
|
||||
ARRAYCOPY_NORMAL_TO_SPLITW(0x101b),
|
||||
}
|
||||
|
@ -62,6 +62,7 @@ SYSCALLS:
|
||||
52 = stringcopy
|
||||
53 = ARRAYCOPY_SPLITW_TO_NORMAL
|
||||
54 = ARRAYCOPY_NORMAL_TO_SPLITW
|
||||
55 = memcopy_small
|
||||
*/
|
||||
|
||||
enum class Syscall {
|
||||
@ -120,6 +121,7 @@ enum class Syscall {
|
||||
STRINGCOPY,
|
||||
ARRAYCOPY_SPLITW_TO_NORMAL,
|
||||
ARRAYCOPY_NORMAL_TO_SPLITW,
|
||||
MEMCOPY_SMALL
|
||||
;
|
||||
|
||||
companion object {
|
||||
@ -306,7 +308,8 @@ object SysCalls {
|
||||
val (addressV, lengthV) = getArgValues(callspec.arguments, vm)
|
||||
val address = (addressV as UShort).toInt()
|
||||
val length = (lengthV as UByte).toInt()
|
||||
val addresses = IntProgression.fromClosedRange(address, address+length-1, 1)
|
||||
val endAddressExcl = address + if(length==0) 256 else length
|
||||
val addresses = IntProgression.fromClosedRange(address, endAddressExcl-1, 1)
|
||||
if(addresses.any { vm.memory.getUB(it).toInt()!=0 })
|
||||
returnValue(callspec.returns!!, 1, vm)
|
||||
else
|
||||
@ -316,7 +319,8 @@ object SysCalls {
|
||||
val (addressV, lengthV) = getArgValues(callspec.arguments, vm)
|
||||
val address = (addressV as UShort).toInt()
|
||||
val length = (lengthV as UByte).toInt()
|
||||
val addresses = IntProgression.fromClosedRange(address, address+length*2-2, 2)
|
||||
val endAddressExcl = address + if(length==0) 256*2 else length*2
|
||||
val addresses = IntProgression.fromClosedRange(address, endAddressExcl-2, 2)
|
||||
if(addresses.any { vm.memory.getUW(it).toInt()!=0 })
|
||||
returnValue(callspec.returns!!, 1, vm)
|
||||
else
|
||||
@ -326,7 +330,8 @@ object SysCalls {
|
||||
val (addressV, lengthV) = getArgValues(callspec.arguments, vm)
|
||||
val address = (addressV as UShort).toInt()
|
||||
val length = (lengthV as UByte).toInt()
|
||||
val addresses = IntProgression.fromClosedRange(address, address+length*4-2, 4)
|
||||
val endAddressExcl = address + (if(length==0) 256*vm.machinedef.FLOAT_MEM_SIZE else length*vm.machinedef.FLOAT_MEM_SIZE)
|
||||
val addresses = IntProgression.fromClosedRange(address, endAddressExcl-vm.machinedef.FLOAT_MEM_SIZE, 4)
|
||||
if(addresses.any { vm.memory.getFloat(it).toInt()!=0 })
|
||||
returnValue(callspec.returns!!, 1, vm)
|
||||
else
|
||||
@ -336,7 +341,8 @@ object SysCalls {
|
||||
val (addressV, lengthV) = getArgValues(callspec.arguments, vm)
|
||||
val address = (addressV as UShort).toInt()
|
||||
val length = (lengthV as UByte).toInt()
|
||||
val addresses = IntProgression.fromClosedRange(address, address+length-1, 1)
|
||||
val endAddressExcl = address + if(length==0) 256 else length
|
||||
val addresses = IntProgression.fromClosedRange(address, endAddressExcl-1, 1)
|
||||
if(addresses.all { vm.memory.getUB(it).toInt()!=0 })
|
||||
returnValue(callspec.returns!!, 1, vm)
|
||||
else
|
||||
@ -346,7 +352,8 @@ object SysCalls {
|
||||
val (addressV, lengthV) = getArgValues(callspec.arguments, vm)
|
||||
val address = (addressV as UShort).toInt()
|
||||
val length = (lengthV as UByte).toInt()
|
||||
val addresses = IntProgression.fromClosedRange(address, address+length*2-2, 2)
|
||||
val endAddressExcl = address + if(length==0) 256*2 else length*2
|
||||
val addresses = IntProgression.fromClosedRange(address, endAddressExcl-2, 2)
|
||||
if(addresses.all { vm.memory.getUW(it).toInt()!=0 })
|
||||
returnValue(callspec.returns!!, 1, vm)
|
||||
else
|
||||
@ -356,7 +363,8 @@ object SysCalls {
|
||||
val (addressV, lengthV) = getArgValues(callspec.arguments, vm)
|
||||
val address = (addressV as UShort).toInt()
|
||||
val length = (lengthV as UByte).toInt()
|
||||
val addresses = IntProgression.fromClosedRange(address, address+length*4-2, 4)
|
||||
val endAddressExcl = address + (if(length==0) 256*vm.machinedef.FLOAT_MEM_SIZE else length*vm.machinedef.FLOAT_MEM_SIZE)
|
||||
val addresses = IntProgression.fromClosedRange(address, endAddressExcl-vm.machinedef.FLOAT_MEM_SIZE, 4)
|
||||
if(addresses.all { vm.memory.getFloat(it).toInt()!=0 })
|
||||
returnValue(callspec.returns!!, 1, vm)
|
||||
else
|
||||
@ -553,6 +561,16 @@ object SysCalls {
|
||||
vm.memory.setUB(to+offset, vm.memory.getUB(from+offset))
|
||||
}
|
||||
}
|
||||
Syscall.MEMCOPY_SMALL -> {
|
||||
val (fromA, toA, countA) = getArgValues(callspec.arguments, vm)
|
||||
val from = (fromA as UShort).toInt()
|
||||
val to = (toA as UShort).toInt()
|
||||
val countV = (countA as UByte).toInt()
|
||||
val count = if(countV==0) 256 else countV
|
||||
for(offset in 0..<count) {
|
||||
vm.memory.setUB(to+offset, vm.memory.getUB(from+offset))
|
||||
}
|
||||
}
|
||||
Syscall.MEMSET -> {
|
||||
val (memA, numbytesA, valueA) = getArgValues(callspec.arguments, vm)
|
||||
val mem = (memA as UShort).toInt()
|
||||
|
@ -127,6 +127,7 @@ class VmProgramLoader {
|
||||
IMSyscall.CLAMP_FLOAT.number -> Syscall.CLAMP_FLOAT
|
||||
IMSyscall.CALLFAR.number -> throw IRParseException("vm doesn't support the callfar() syscall")
|
||||
IMSyscall.MEMCOPY.number -> Syscall.MEMCOPY
|
||||
IMSyscall.MEMCOPY_SMALL.number -> Syscall.MEMCOPY_SMALL
|
||||
IMSyscall.ARRAYCOPY_SPLITW_TO_NORMAL.number -> Syscall.ARRAYCOPY_SPLITW_TO_NORMAL
|
||||
IMSyscall.ARRAYCOPY_NORMAL_TO_SPLITW.number -> Syscall.ARRAYCOPY_NORMAL_TO_SPLITW
|
||||
else -> null
|
||||
|
Loading…
x
Reference in New Issue
Block a user