diff --git a/libsrc/common/memcpy.s b/libsrc/common/memcpy.s index fd090c788..6b77a609b 100644 --- a/libsrc/common/memcpy.s +++ b/libsrc/common/memcpy.s @@ -10,45 +10,61 @@ ; at memmove! ; - .export _memcpy, memcpy_upwards, memcpy_getparams + .export _memcpy, memcpy_downwards, memcpy_getparams .import popax, popptr1 .importzp sp, ptr1, ptr2, ptr3 + .macpack generic ; ---------------------------------------------------------------------- _memcpy: jsr memcpy_getparams -memcpy_upwards: ; assert Y = 0 - ldx ptr3+1 ; Get high byte of n - beq L2 ; Jump if zero +memcpy_downwards: +; Copy downwards. Adjust the pointers to the end of the memory regions. -L1: .repeat 2 ; Unroll this a bit to make it faster... - lda (ptr1),Y ; copy a byte - sta (ptr2),Y - iny + lda ptr1+1 + add ptr3+1 + sta ptr1+1 + + lda ptr2+1 + add ptr3+1 + sta ptr2+1 + +; handle fractions of a page size first + + ldy ptr3 ; count, low byte + bne @entry ; something to copy? + beq PageSizeCopy ; here like bra... + +@copyByte: + lda (ptr1),y + sta (ptr2),y +@entry: + dey + bne @copyByte + lda (ptr1),y ; copy remaining byte + sta (ptr2),y + +PageSizeCopy: ; assert Y = 0 + ldx ptr3+1 ; number of pages + beq done ; none? -> done + +@initBase: + dec ptr1+1 ; adjust base... + dec ptr2+1 + dey ; in entry case: 0 -> FF +@copyBytes: + .repeat 3 ; unroll this a bit to make it faster... + lda (ptr1),y ; important: unrolling three times gives a nice + sta (ptr2),y ; 255/3 = 85 loop which ends at 0 + dey .endrepeat - bne L1 - inc ptr1+1 - inc ptr2+1 - dex ; Next 256 byte block - bne L1 ; Repeat if any - - ; the following section could be 10% faster if we were able to copy - ; back to front - unfortunately we are forced to copy strict from - ; low to high since this function is also used for - ; memmove and blocks could be overlapping! - ; { -L2: ; assert Y = 0 - ldx ptr3 ; Get the low byte of n - beq done ; something to copy - -L3: lda (ptr1),Y ; copy a byte - sta (ptr2),Y - iny - dex - bne L3 - - ; } +@copyEntry: ; in entry case: 0 -> FF + bne @copyBytes + lda (ptr1),y ; Y = 0, copy last byte + sta (ptr2),y + dex ; one page to copy less + bne @initBase ; still a page to copy? done: jmp popax ; Pop ptr and return as result diff --git a/libsrc/common/memmove.s b/libsrc/common/memmove.s index 666447cd8..5693179cd 100644 --- a/libsrc/common/memmove.s +++ b/libsrc/common/memmove.s @@ -8,8 +8,8 @@ ; NOTE: This function uses entry points from memcpy! ; - .export _memmove - .import memcpy_getparams, memcpy_upwards, popax + .export _memmove, memcpy_upwards + .import memcpy_getparams, memcpy_downwards, popax .importzp ptr1, ptr2, ptr3, ptr4, tmp1 .macpack generic @@ -26,53 +26,32 @@ _memmove: cmp ptr1 txa sbc ptr1+1 - jcc memcpy_upwards ; Branch if dest < src (upwards copy) + jcs memcpy_downwards ; Branch if dest < src (upwards copy) -; Copy downwards. Adjust the pointers to the end of the memory regions. +memcpy_upwards: + ldx ptr3+1 ; Get high byte of n + beq L2 ; Jump if zero - lda ptr1+1 - add ptr3+1 - sta ptr1+1 - - lda ptr2+1 - add ptr3+1 - sta ptr2+1 - -; handle fractions of a page size first - - ldy ptr3 ; count, low byte - bne @entry ; something to copy? - beq PageSizeCopy ; here like bra... - -@copyByte: - lda (ptr1),y - sta (ptr2),y -@entry: - dey - bne @copyByte - lda (ptr1),y ; copy remaining byte - sta (ptr2),y - -PageSizeCopy: ; assert Y = 0 - ldx ptr3+1 ; number of pages - beq done ; none? -> done - -@initBase: - dec ptr1+1 ; adjust base... - dec ptr2+1 - dey ; in entry case: 0 -> FF -@copyBytes: - .repeat 3 ; unroll this a bit to make it faster... - lda (ptr1),y ; important: unrolling three times gives a nice - sta (ptr2),y ; 255/3 = 85 loop which ends at 0 - dey +L1: .repeat 2 ; Unroll this a bit to make it faster... + lda (ptr1),Y ; copy a byte + sta (ptr2),Y + iny .endrepeat -@copyEntry: ; in entry case: 0 -> FF - bne @copyBytes - lda (ptr1),y ; Y = 0, copy last byte - sta (ptr2),y - dex ; one page to copy less - bne @initBase ; still a page to copy? + bne L1 + inc ptr1+1 + inc ptr2+1 + dex ; Next 256 byte block + bne L1 ; Repeat if any + +L2: ; assert Y = 0 + ldx ptr3 ; Get the low byte of n + beq done ; something to copy + +L3: lda (ptr1),Y ; copy a byte + sta (ptr2),Y + iny + dex + bne L3 ; Done, return dest diff --git a/test/val/lib_common_memcpy.c b/test/val/lib_common_memcpy.c new file mode 100644 index 000000000..6385bef71 --- /dev/null +++ b/test/val/lib_common_memcpy.c @@ -0,0 +1,37 @@ +#include +#include "unittest.h" + +#define BufferSize 384 // test correct page passing (>256, multiple of 128 here) + +static char Buffer1[BufferSize]; // +1 to move up (and down) +static char Buffer2[BufferSize]; // +1 to move up (and down) + + +TEST +{ + unsigned i, v; + char* p; + + for (i=0; i < BufferSize; ++i) { + Buffer1[i] = i; + Buffer2[i] = ~i; + } + + memcpy(Buffer2, Buffer1, sizeof(Buffer1)); + if (memcmp(Buffer1, Buffer2, sizeof(Buffer1))) { + printf("First memcpy failed\n"); + exit(1); + } + + for (i=0; i < BufferSize; ++i) { + Buffer1[i] = i; + Buffer2[i] = ~i; + } + + memcpy(Buffer1, Buffer2, sizeof(Buffer1)); + if (memcmp(Buffer2, Buffer1, sizeof(Buffer1))) { + printf("Second memcpy failed\n"); + exit(1); + } +} +ENDTEST