cc65/libsrc/common/memmove.s

;
; 2003-08-20, Ullrich von Bassewitz
; 2009-09-13, Christian Krueger -- performance increase (about 20%), 2013-07-25 improved unrolling
; 2015-10-23, Greg King
;
; void* __fastcall__ memmove (void* dest, const void* src, size_t size);
;
; NOTE: This function uses entry points from memcpy!
;

        .export         _memmove
        .import         memcpy_getparams, memcpy_upwards, popax
        .importzp       ptr1, ptr2, ptr3, ptr4, tmp1

        .macpack        generic
        .macpack        longbranch

; ----------------------------------------------------------------------
_memmove:
        jsr     memcpy_getparams

; Check for the copy direction. If dest < src, we must copy upwards (start at
; low addresses and increase pointers), otherwise we must copy downwards
; (start at high addresses and decrease pointers).

        cmp     ptr1
        txa
        sbc     ptr1+1
        jcc     memcpy_upwards  ; Branch if dest < src (upwards copy)

; Copy downwards. Adjust the pointers to the end of the memory regions.

        lda     ptr1+1
        add     ptr3+1
        sta     ptr1+1

        lda     ptr2+1
        add     ptr3+1
        sta     ptr2+1

; handle fractions of a page size first

        ldy     ptr3            ; count, low byte
        bne     @entry          ; something to copy?
        beq     PageSizeCopy    ; here like bra...

@copyByte:
        lda     (ptr1),y
        sta     (ptr2),y
@entry:
        dey
        bne     @copyByte
        lda     (ptr1),y        ; copy remaining byte
        sta     (ptr2),y

PageSizeCopy:                   ; assert Y = 0
        ldx     ptr3+1          ; number of pages
        beq     done            ; none? -> done

@initBase:
        dec     ptr1+1          ; adjust base...
        dec     ptr2+1
        dey                     ; in entry case: 0 -> FF
@copyBytes:
        .repeat 3               ; unroll this a bit to make it faster...
        lda     (ptr1),y        ; important: unrolling three times gives a nice
        sta     (ptr2),y        ; 255/3 = 85 loop which ends at 0
        dey
        .endrepeat
@copyEntry:                     ; in entry case: 0 -> FF
        bne     @copyBytes
        lda     (ptr1),y        ; Y = 0, copy last byte
        sta     (ptr2),y
        dex                     ; one page to copy less
        bne     @initBase       ; still a page to copy?

; Done, return dest

done:   jmp     popax           ; Pop ptr and return as result
Split memcpy and memmove, since the former is used a lot more often than the latter. Optimized the code for smaller size and greater speed. git-svn-id: svn://svn.cc65.org/cc65/trunk@2382 b7a2c559-68d2-44c3-8de9-860c34a00d81 2003-08-20 10:17:53 +00:00			`;`
Added pce-specific substitutes for memcpy() and memmove(). They are smaller and faster because they take advantage of the pce CPU's block-copy instructions. Also, made a small improvement to the common memmove(), so that it is similar to the pce version. 2015-10-23 22:02:21 +00:00			`; 2003-08-20, Ullrich von Bassewitz`
Added further optimizations and unit tests. 2017-03-05 01:09:12 +00:00			`; 2009-09-13, Christian Krueger -- performance increase (about 20%), 2013-07-25 improved unrolling`
Added pce-specific substitutes for memcpy() and memmove(). They are smaller and faster because they take advantage of the pce CPU's block-copy instructions. Also, made a small improvement to the common memmove(), so that it is similar to the pce version. 2015-10-23 22:02:21 +00:00			`; 2015-10-23, Greg King`
Split memcpy and memmove, since the former is used a lot more often than the latter. Optimized the code for smaller size and greater speed. git-svn-id: svn://svn.cc65.org/cc65/trunk@2382 b7a2c559-68d2-44c3-8de9-860c34a00d81 2003-08-20 10:17:53 +00:00			`;`
			`; void* __fastcall__ memmove (void* dest, const void* src, size_t size);`
			`;`
			`; NOTE: This function uses entry points from memcpy!`
			`;`

Removed (pretty inconsistently used) tab chars from source code base. 2013-05-09 11:56:54 +00:00			`.export _memmove`
Improved memset/memcpy/memmove functions by Christian Krueger. git-svn-id: svn://svn.cc65.org/cc65/trunk@4200 b7a2c559-68d2-44c3-8de9-860c34a00d81 2009-09-20 14:32:25 +00:00			`.import memcpy_getparams, memcpy_upwards, popax`
Removed (pretty inconsistently used) tab chars from source code base. 2013-05-09 11:56:54 +00:00			`.importzp ptr1, ptr2, ptr3, ptr4, tmp1`
Split memcpy and memmove, since the former is used a lot more often than the latter. Optimized the code for smaller size and greater speed. git-svn-id: svn://svn.cc65.org/cc65/trunk@2382 b7a2c559-68d2-44c3-8de9-860c34a00d81 2003-08-20 10:17:53 +00:00
			`.macpack generic`
			`.macpack longbranch`

			`; ----------------------------------------------------------------------`
			`_memmove:`
			`jsr memcpy_getparams`

			`; Check for the copy direction. If dest < src, we must copy upwards (start at`
			`; low addresses and increase pointers), otherwise we must copy downwards`
			`; (start at high addresses and decrease pointers).`

Added pce-specific substitutes for memcpy() and memmove(). They are smaller and faster because they take advantage of the pce CPU's block-copy instructions. Also, made a small improvement to the common memmove(), so that it is similar to the pce version. 2015-10-23 22:02:21 +00:00			`cmp ptr1`
Split memcpy and memmove, since the former is used a lot more often than the latter. Optimized the code for smaller size and greater speed. git-svn-id: svn://svn.cc65.org/cc65/trunk@2382 b7a2c559-68d2-44c3-8de9-860c34a00d81 2003-08-20 10:17:53 +00:00			`txa`
			`sbc ptr1+1`
			`jcc memcpy_upwards ; Branch if dest < src (upwards copy)`

			`; Copy downwards. Adjust the pointers to the end of the memory regions.`

Removed (pretty inconsistently used) tab chars from source code base. 2013-05-09 11:56:54 +00:00			`lda ptr1+1`
			`add ptr3+1`
			`sta ptr1+1`
Split memcpy and memmove, since the former is used a lot more often than the latter. Optimized the code for smaller size and greater speed. git-svn-id: svn://svn.cc65.org/cc65/trunk@2382 b7a2c559-68d2-44c3-8de9-860c34a00d81 2003-08-20 10:17:53 +00:00
Removed (pretty inconsistently used) tab chars from source code base. 2013-05-09 11:56:54 +00:00			`lda ptr2+1`
			`add ptr3+1`
			`sta ptr2+1`
Split memcpy and memmove, since the former is used a lot more often than the latter. Optimized the code for smaller size and greater speed. git-svn-id: svn://svn.cc65.org/cc65/trunk@2382 b7a2c559-68d2-44c3-8de9-860c34a00d81 2003-08-20 10:17:53 +00:00
Improved memset/memcpy/memmove functions by Christian Krueger. git-svn-id: svn://svn.cc65.org/cc65/trunk@4200 b7a2c559-68d2-44c3-8de9-860c34a00d81 2009-09-20 14:32:25 +00:00			`; handle fractions of a page size first`

Removed (pretty inconsistently used) tab chars from source code base. 2013-05-09 11:56:54 +00:00			`ldy ptr3 ; count, low byte`
			`bne @entry ; something to copy?`
			`beq PageSizeCopy ; here like bra...`
Improved memset/memcpy/memmove functions by Christian Krueger. git-svn-id: svn://svn.cc65.org/cc65/trunk@4200 b7a2c559-68d2-44c3-8de9-860c34a00d81 2009-09-20 14:32:25 +00:00
			`@copyByte:`
Removed (pretty inconsistently used) tab chars from source code base. 2013-05-09 11:56:54 +00:00			`lda (ptr1),y`
			`sta (ptr2),y`
Improved memset/memcpy/memmove functions by Christian Krueger. git-svn-id: svn://svn.cc65.org/cc65/trunk@4200 b7a2c559-68d2-44c3-8de9-860c34a00d81 2009-09-20 14:32:25 +00:00			`@entry:`
Removed (pretty inconsistently used) tab chars from source code base. 2013-05-09 11:56:54 +00:00			`dey`
			`bne @copyByte`
			`lda (ptr1),y ; copy remaining byte`
			`sta (ptr2),y`
Improved memset/memcpy/memmove functions by Christian Krueger. git-svn-id: svn://svn.cc65.org/cc65/trunk@4200 b7a2c559-68d2-44c3-8de9-860c34a00d81 2009-09-20 14:32:25 +00:00
Removed (pretty inconsistently used) tab chars from source code base. 2013-05-09 11:56:54 +00:00			`PageSizeCopy: ; assert Y = 0`
			`ldx ptr3+1 ; number of pages`
			`beq done ; none? -> done`
Improved memset/memcpy/memmove functions by Christian Krueger. git-svn-id: svn://svn.cc65.org/cc65/trunk@4200 b7a2c559-68d2-44c3-8de9-860c34a00d81 2009-09-20 14:32:25 +00:00
			`@initBase:`
Removed (pretty inconsistently used) tab chars from source code base. 2013-05-09 11:56:54 +00:00			`dec ptr1+1 ; adjust base...`
			`dec ptr2+1`
			`dey ; in entry case: 0 -> FF`
Improved memset/memcpy/memmove functions by Christian Krueger. git-svn-id: svn://svn.cc65.org/cc65/trunk@4200 b7a2c559-68d2-44c3-8de9-860c34a00d81 2009-09-20 14:32:25 +00:00			`@copyBytes:`
Added further optimizations and unit tests. 2017-03-05 01:09:12 +00:00			`.repeat 3 ; unroll this a bit to make it faster...`
			`lda (ptr1),y ; important: unrolling three times gives a nice`
			`sta (ptr2),y ; 255/3 = 85 loop which ends at 0`
Removed (pretty inconsistently used) tab chars from source code base. 2013-05-09 11:56:54 +00:00			`dey`
			`.endrepeat`
			`@copyEntry: ; in entry case: 0 -> FF`
			`bne @copyBytes`
			`lda (ptr1),y ; Y = 0, copy last byte`
Improved memset/memcpy/memmove functions by Christian Krueger. git-svn-id: svn://svn.cc65.org/cc65/trunk@4200 b7a2c559-68d2-44c3-8de9-860c34a00d81 2009-09-20 14:32:25 +00:00			`sta (ptr2),y`
Removed (pretty inconsistently used) tab chars from source code base. 2013-05-09 11:56:54 +00:00			`dex ; one page to copy less`
			`bne @initBase ; still a page to copy?`
Split memcpy and memmove, since the former is used a lot more often than the latter. Optimized the code for smaller size and greater speed. git-svn-id: svn://svn.cc65.org/cc65/trunk@2382 b7a2c559-68d2-44c3-8de9-860c34a00d81 2003-08-20 10:17:53 +00:00
			`; Done, return dest`

Removed (pretty inconsistently used) tab chars from source code base. 2013-05-09 11:56:54 +00:00			`done: jmp popax ; Pop ptr and return as result`