1
0
mirror of https://github.com/cc65/cc65.git synced 2025-01-01 03:30:20 +00:00
cc65/libsrc/common/memcpy.s

80 lines
2.7 KiB
ArmAsm

;
; Ullrich von Bassewitz, 2003-08-20
; Performance increase (about 20%) by
; Christian Krueger, 2009-09-13
;
; void* __fastcall__ memcpy (void* dest, const void* src, size_t n);
;
; NOTE: This function contains entry points for memmove, which will ressort
; to memcpy for an upwards copy. Don't change this module without looking
; at memmove!
;
.export _memcpy, memcpy_upwards, memcpy_getparams
.import popax, popptr1
.importzp sp, ptr1, ptr2, ptr3
; ----------------------------------------------------------------------
_memcpy:
jsr memcpy_getparams
memcpy_upwards: ; assert Y = 0
ldx ptr3+1 ; Get high byte of n
beq L2 ; Jump if zero
L1: .repeat 2 ; Unroll this a bit to make it faster...
lda (ptr1),Y ; copy a byte
sta (ptr2),Y
iny
.endrepeat
bne L1
inc ptr1+1
inc ptr2+1
dex ; Next 256 byte block
bne L1 ; Repeat if any
; the following section could be 10% faster if we were able to copy
; back to front - unfortunately we are forced to copy strict from
; low to high since this function is also used for
; memmove and blocks could be overlapping!
; {
L2: ; assert Y = 0
ldx ptr3 ; Get the low byte of n
beq done ; something to copy
L3: lda (ptr1),Y ; copy a byte
sta (ptr2),Y
iny
dex
bne L3
; }
done: jmp popax ; Pop ptr and return as result
; ----------------------------------------------------------------------
; Get the parameters from stack as follows:
;
; size --> ptr3
; src --> ptr1
; dest --> ptr2
; First argument (dest) will remain on stack and is returned in a/x!
memcpy_getparams: ; IMPORTANT! Function has to leave with Y=0!
sta ptr3
stx ptr3+1 ; save n to ptr3
jsr popptr1 ; save src to ptr1
; save dest to ptr2
iny ; Y=0 guaranteed by popptr1, we need '1' here...
; (direct stack access is three cycles faster
; (total cycle count with return))
lda (sp),y
tax
stx ptr2+1 ; save high byte of ptr2
dey ; Y = 0
lda (sp),y ; Get ptr2 low
sta ptr2
rts