1
0
mirror of https://github.com/cc65/cc65.git synced 2024-06-01 13:41:34 +00:00
cc65/libsrc/common/memmove.s
Colin Leroy-Mira a310192da4 Use downwards copy on memcpy
Faster (5%), but larger memcpy (29 bytes) (no change for memmove)
2024-02-14 20:00:12 +01:00

59 lines
1.8 KiB
ArmAsm

;
; 2003-08-20, Ullrich von Bassewitz
; 2009-09-13, Christian Krueger -- performance increase (about 20%), 2013-07-25 improved unrolling
; 2015-10-23, Greg King
;
; void* __fastcall__ memmove (void* dest, const void* src, size_t size);
;
; NOTE: This function uses entry points from memcpy!
;
.export _memmove, memcpy_upwards
.import memcpy_getparams, memcpy_downwards, popax
.importzp ptr1, ptr2, ptr3, ptr4, tmp1
.macpack generic
.macpack longbranch
; ----------------------------------------------------------------------
_memmove:
jsr memcpy_getparams
; Check for the copy direction. If dest < src, we must copy upwards (start at
; low addresses and increase pointers), otherwise we must copy downwards
; (start at high addresses and decrease pointers).
cmp ptr1
txa
sbc ptr1+1
jcs memcpy_downwards ; Branch if dest < src (upwards copy)
memcpy_upwards:
ldx ptr3+1 ; Get high byte of n
beq L2 ; Jump if zero
L1: .repeat 2 ; Unroll this a bit to make it faster...
lda (ptr1),Y ; copy a byte
sta (ptr2),Y
iny
.endrepeat
bne L1
inc ptr1+1
inc ptr2+1
dex ; Next 256 byte block
bne L1 ; Repeat if any
L2: ; assert Y = 0
ldx ptr3 ; Get the low byte of n
beq done ; something to copy
L3: lda (ptr1),Y ; copy a byte
sta (ptr2),Y
iny
dex
bne L3
; Done, return dest
done: jmp popax ; Pop ptr and return as result