mirror of
https://github.com/cc65/cc65.git
synced 2024-06-07 23:29:39 +00:00
Use downwards copy on memcpy
Faster (5%), but larger memcpy (29 bytes) (no change for memmove)
This commit is contained in:
parent
3dfe033000
commit
a310192da4
|
@ -10,45 +10,61 @@
|
|||
; at memmove!
|
||||
;
|
||||
|
||||
.export _memcpy, memcpy_upwards, memcpy_getparams
|
||||
.export _memcpy, memcpy_downwards, memcpy_getparams
|
||||
.import popax, popptr1
|
||||
.importzp sp, ptr1, ptr2, ptr3
|
||||
.macpack generic
|
||||
|
||||
; ----------------------------------------------------------------------
|
||||
_memcpy:
|
||||
jsr memcpy_getparams
|
||||
|
||||
memcpy_upwards: ; assert Y = 0
|
||||
ldx ptr3+1 ; Get high byte of n
|
||||
beq L2 ; Jump if zero
|
||||
memcpy_downwards:
|
||||
; Copy downwards. Adjust the pointers to the end of the memory regions.
|
||||
|
||||
L1: .repeat 2 ; Unroll this a bit to make it faster...
|
||||
lda (ptr1),Y ; copy a byte
|
||||
sta (ptr2),Y
|
||||
iny
|
||||
lda ptr1+1
|
||||
add ptr3+1
|
||||
sta ptr1+1
|
||||
|
||||
lda ptr2+1
|
||||
add ptr3+1
|
||||
sta ptr2+1
|
||||
|
||||
; handle fractions of a page size first
|
||||
|
||||
ldy ptr3 ; count, low byte
|
||||
bne @entry ; something to copy?
|
||||
beq PageSizeCopy ; here like bra...
|
||||
|
||||
@copyByte:
|
||||
lda (ptr1),y
|
||||
sta (ptr2),y
|
||||
@entry:
|
||||
dey
|
||||
bne @copyByte
|
||||
lda (ptr1),y ; copy remaining byte
|
||||
sta (ptr2),y
|
||||
|
||||
PageSizeCopy: ; assert Y = 0
|
||||
ldx ptr3+1 ; number of pages
|
||||
beq done ; none? -> done
|
||||
|
||||
@initBase:
|
||||
dec ptr1+1 ; adjust base...
|
||||
dec ptr2+1
|
||||
dey ; in entry case: 0 -> FF
|
||||
@copyBytes:
|
||||
.repeat 3 ; unroll this a bit to make it faster...
|
||||
lda (ptr1),y ; important: unrolling three times gives a nice
|
||||
sta (ptr2),y ; 255/3 = 85 loop which ends at 0
|
||||
dey
|
||||
.endrepeat
|
||||
bne L1
|
||||
inc ptr1+1
|
||||
inc ptr2+1
|
||||
dex ; Next 256 byte block
|
||||
bne L1 ; Repeat if any
|
||||
|
||||
; the following section could be 10% faster if we were able to copy
|
||||
; back to front - unfortunately we are forced to copy strict from
|
||||
; low to high since this function is also used for
|
||||
; memmove and blocks could be overlapping!
|
||||
; {
|
||||
L2: ; assert Y = 0
|
||||
ldx ptr3 ; Get the low byte of n
|
||||
beq done ; something to copy
|
||||
|
||||
L3: lda (ptr1),Y ; copy a byte
|
||||
sta (ptr2),Y
|
||||
iny
|
||||
dex
|
||||
bne L3
|
||||
|
||||
; }
|
||||
@copyEntry: ; in entry case: 0 -> FF
|
||||
bne @copyBytes
|
||||
lda (ptr1),y ; Y = 0, copy last byte
|
||||
sta (ptr2),y
|
||||
dex ; one page to copy less
|
||||
bne @initBase ; still a page to copy?
|
||||
|
||||
done: jmp popax ; Pop ptr and return as result
|
||||
|
||||
|
|
|
@ -8,8 +8,8 @@
|
|||
; NOTE: This function uses entry points from memcpy!
|
||||
;
|
||||
|
||||
.export _memmove
|
||||
.import memcpy_getparams, memcpy_upwards, popax
|
||||
.export _memmove, memcpy_upwards
|
||||
.import memcpy_getparams, memcpy_downwards, popax
|
||||
.importzp ptr1, ptr2, ptr3, ptr4, tmp1
|
||||
|
||||
.macpack generic
|
||||
|
@ -26,53 +26,32 @@ _memmove:
|
|||
cmp ptr1
|
||||
txa
|
||||
sbc ptr1+1
|
||||
jcc memcpy_upwards ; Branch if dest < src (upwards copy)
|
||||
jcs memcpy_downwards ; Branch if dest < src (upwards copy)
|
||||
|
||||
; Copy downwards. Adjust the pointers to the end of the memory regions.
|
||||
memcpy_upwards:
|
||||
ldx ptr3+1 ; Get high byte of n
|
||||
beq L2 ; Jump if zero
|
||||
|
||||
lda ptr1+1
|
||||
add ptr3+1
|
||||
sta ptr1+1
|
||||
|
||||
lda ptr2+1
|
||||
add ptr3+1
|
||||
sta ptr2+1
|
||||
|
||||
; handle fractions of a page size first
|
||||
|
||||
ldy ptr3 ; count, low byte
|
||||
bne @entry ; something to copy?
|
||||
beq PageSizeCopy ; here like bra...
|
||||
|
||||
@copyByte:
|
||||
lda (ptr1),y
|
||||
sta (ptr2),y
|
||||
@entry:
|
||||
dey
|
||||
bne @copyByte
|
||||
lda (ptr1),y ; copy remaining byte
|
||||
sta (ptr2),y
|
||||
|
||||
PageSizeCopy: ; assert Y = 0
|
||||
ldx ptr3+1 ; number of pages
|
||||
beq done ; none? -> done
|
||||
|
||||
@initBase:
|
||||
dec ptr1+1 ; adjust base...
|
||||
dec ptr2+1
|
||||
dey ; in entry case: 0 -> FF
|
||||
@copyBytes:
|
||||
.repeat 3 ; unroll this a bit to make it faster...
|
||||
lda (ptr1),y ; important: unrolling three times gives a nice
|
||||
sta (ptr2),y ; 255/3 = 85 loop which ends at 0
|
||||
dey
|
||||
L1: .repeat 2 ; Unroll this a bit to make it faster...
|
||||
lda (ptr1),Y ; copy a byte
|
||||
sta (ptr2),Y
|
||||
iny
|
||||
.endrepeat
|
||||
@copyEntry: ; in entry case: 0 -> FF
|
||||
bne @copyBytes
|
||||
lda (ptr1),y ; Y = 0, copy last byte
|
||||
sta (ptr2),y
|
||||
dex ; one page to copy less
|
||||
bne @initBase ; still a page to copy?
|
||||
bne L1
|
||||
inc ptr1+1
|
||||
inc ptr2+1
|
||||
dex ; Next 256 byte block
|
||||
bne L1 ; Repeat if any
|
||||
|
||||
L2: ; assert Y = 0
|
||||
ldx ptr3 ; Get the low byte of n
|
||||
beq done ; something to copy
|
||||
|
||||
L3: lda (ptr1),Y ; copy a byte
|
||||
sta (ptr2),Y
|
||||
iny
|
||||
dex
|
||||
bne L3
|
||||
|
||||
; Done, return dest
|
||||
|
||||
|
|
37
test/val/lib_common_memcpy.c
Normal file
37
test/val/lib_common_memcpy.c
Normal file
|
@ -0,0 +1,37 @@
|
|||
#include <string.h>
|
||||
#include "unittest.h"
|
||||
|
||||
#define BufferSize 384 // test correct page passing (>256, multiple of 128 here)
|
||||
|
||||
static char Buffer1[BufferSize]; // +1 to move up (and down)
|
||||
static char Buffer2[BufferSize]; // +1 to move up (and down)
|
||||
|
||||
|
||||
TEST
|
||||
{
|
||||
unsigned i, v;
|
||||
char* p;
|
||||
|
||||
for (i=0; i < BufferSize; ++i) {
|
||||
Buffer1[i] = i;
|
||||
Buffer2[i] = ~i;
|
||||
}
|
||||
|
||||
memcpy(Buffer2, Buffer1, sizeof(Buffer1));
|
||||
if (memcmp(Buffer1, Buffer2, sizeof(Buffer1))) {
|
||||
printf("First memcpy failed\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (i=0; i < BufferSize; ++i) {
|
||||
Buffer1[i] = i;
|
||||
Buffer2[i] = ~i;
|
||||
}
|
||||
|
||||
memcpy(Buffer1, Buffer2, sizeof(Buffer1));
|
||||
if (memcmp(Buffer2, Buffer1, sizeof(Buffer1))) {
|
||||
printf("Second memcpy failed\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
ENDTEST
|
Loading…
Reference in New Issue
Block a user