1
0
mirror of https://github.com/cc65/cc65.git synced 2024-06-15 17:30:06 +00:00

Added pce-specific substitutes for memcpy() and memmove().

They are smaller and faster because they take advantage of the pce CPU's block-copy instructions.

Also, made a small improvement to the common memmove(), so that it is similar to the pce version.
This commit is contained in:
Greg King 2015-10-23 18:02:21 -04:00
parent 43bda6928d
commit 8e8d112029
3 changed files with 161 additions and 6 deletions

View File

@ -1,7 +1,7 @@
;
; Ullrich von Bassewitz, 2003-08-20
; Performance increase (about 20%) by
; Christian Krueger, 2009-09-13
; 2003-08-20, Ullrich von Bassewitz
; 2009-09-13, Christian Krueger -- performance increase (about 20%)
; 2015-10-23, Greg King
;
; void* __fastcall__ memmove (void* dest, const void* src, size_t size);
;
@ -23,8 +23,7 @@ _memmove:
; low addresses and increase pointers), otherwise we must copy downwards
; (start at high addresses and decrease pointers).
sec
sbc ptr1
cmp ptr1
txa
sbc ptr1+1
jcc memcpy_upwards ; Branch if dest < src (upwards copy)
@ -81,4 +80,3 @@ PageSizeCopy: ; assert Y = 0
; Done, return dest
done: jmp popax ; Pop ptr and return as result

94
libsrc/pce/memcpy.s Normal file
View File

@ -0,0 +1,94 @@
;
; This file, instead of "common/memcpy.s", will be assembled for the pce
; target. This version is smaller and faster because it uses the HuC6280's
; block-copy instructions.
;
; 2003-08-20, Ullrich von Bassewitz
; 2015-10-11, Greg King
;
; void* __fastcall__ memcpy (void* dest, const void* src, size_t size);
;
; NOTE: This function contains entry points for memmove, which will resort
; to memcpy for an incrementing copy. Don't change this module without looking
; at "pce/memmove.s"!
;
.export _memcpy
.export memcpy_increment, memcpy_transfer, memcpy_getparams
.import popax
.importzp sp, ptr1, ptr2, ptr3
; The structure of the transfer instructions
.struct
opcode .byte
source .addr
destination .addr
length .word
.endstruct
; ----------------------------------------------------------------------
_memcpy:
jsr memcpy_getparams
memcpy_increment:
ldy #$73 ; TII
memcpy_transfer:
sty transfer+opcode
lda ptr1
ldx ptr1+1
sta transfer+source
stx transfer+source+1
lda ptr2
ldx ptr2+1
sta transfer+destination
stx transfer+destination+1
lda ptr3
ldx ptr3+1
sta transfer+length
stx transfer+length+1
jmp transfer
; ----------------------------------------------------------------------
; Get the parameters from the stack, as follows:
;
; size --> ptr3
; src --> ptr1
; dest --> ptr2
;
; The first argument (dest) will remain on the stack; and, is returned in .XA!
memcpy_getparams:
sta ptr3
stx ptr3+1 ; save size
jsr popax
sta ptr1
stx ptr1+1 ; save src
; (Direct stack access is four cycles faster [total cycle count].)
ldy #1 ; save dest
lda (sp),y ; get high byte
tax
lda (sp) ; get low byte
sta ptr2
stx ptr2+1
rts ; return dest address (for memmove)
; ----------------------------------------------------------------------
; The transfer instructions use inline arguments.
; Therefore, we must build the instruction in the DATA segment.
.data
transfer:
tii $FFFF, $FFFF, $0001
jmp popax ; get pointer; and, return it as result

63
libsrc/pce/memmove.s Normal file
View File

@ -0,0 +1,63 @@
;
; This file, instead of "common/memmove.s", will be assembled for the pce
; target. This version is smaller and faster because it uses the HuC6280's
; block-copy instructions.
;
; 2003-08-20, Ullrich von Bassewitz
; 2015-10-23, Greg King
;
; void* __fastcall__ memmove (void* dest, const void* src, size_t size);
;
; NOTE: This function uses entry points from "pce/memcpy.s"!
;
.export _memmove
.import memcpy_getparams, memcpy_increment, memcpy_transfer
.importzp ptr1, ptr2, ptr3
.macpack generic
.macpack longbranch
; ----------------------------------------------------------------------
_memmove:
jsr memcpy_getparams
; Check for the copy direction. If dest < src, we must copy downwards (start
; at low addresses, and increase pointers); otherwise, we must copy upwards
; (start at high addresses, and decrease pointers).
cmp ptr1
txa
sbc ptr1+1
jcc memcpy_increment ; Branch if dest < src
; Copy decrementing; adjust the pointers to the end of the memory regions.
lda ptr1
add ptr3
sta ptr1
lda ptr1+1
adc ptr3+1
sta ptr1+1
lda ptr1 ; point to last byte of source
bne @L1
dec ptr1+1
@L1: dec ptr1
lda ptr2
add ptr3
sta ptr2
lda ptr2+1
adc ptr3+1
sta ptr2+1
lda ptr2 ; point to last byte of target
bne @L2
dec ptr2+1
@L2: dec ptr2
ldy #$C3 ; TDD
jmp memcpy_transfer