mirror of
https://github.com/cc65/cc65.git
synced 2025-03-01 11:29:27 +00:00
Added pce-specific substitutes for memcpy() and memmove().
They are smaller and faster because they take advantage of the pce CPU's block-copy instructions. Also, made a small improvement to the common memmove(), so that it is similar to the pce version.
This commit is contained in:
parent
43bda6928d
commit
8e8d112029
@ -1,7 +1,7 @@
|
|||||||
;
|
;
|
||||||
; Ullrich von Bassewitz, 2003-08-20
|
; 2003-08-20, Ullrich von Bassewitz
|
||||||
; Performance increase (about 20%) by
|
; 2009-09-13, Christian Krueger -- performance increase (about 20%)
|
||||||
; Christian Krueger, 2009-09-13
|
; 2015-10-23, Greg King
|
||||||
;
|
;
|
||||||
; void* __fastcall__ memmove (void* dest, const void* src, size_t size);
|
; void* __fastcall__ memmove (void* dest, const void* src, size_t size);
|
||||||
;
|
;
|
||||||
@ -23,8 +23,7 @@ _memmove:
|
|||||||
; low addresses and increase pointers), otherwise we must copy downwards
|
; low addresses and increase pointers), otherwise we must copy downwards
|
||||||
; (start at high addresses and decrease pointers).
|
; (start at high addresses and decrease pointers).
|
||||||
|
|
||||||
sec
|
cmp ptr1
|
||||||
sbc ptr1
|
|
||||||
txa
|
txa
|
||||||
sbc ptr1+1
|
sbc ptr1+1
|
||||||
jcc memcpy_upwards ; Branch if dest < src (upwards copy)
|
jcc memcpy_upwards ; Branch if dest < src (upwards copy)
|
||||||
@ -81,4 +80,3 @@ PageSizeCopy: ; assert Y = 0
|
|||||||
; Done, return dest
|
; Done, return dest
|
||||||
|
|
||||||
done: jmp popax ; Pop ptr and return as result
|
done: jmp popax ; Pop ptr and return as result
|
||||||
|
|
||||||
|
94
libsrc/pce/memcpy.s
Normal file
94
libsrc/pce/memcpy.s
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
;
|
||||||
|
; This file, instead of "common/memcpy.s", will be assembled for the pce
|
||||||
|
; target. This version is smaller and faster because it uses the HuC6280's
|
||||||
|
; block-copy instructions.
|
||||||
|
;
|
||||||
|
; 2003-08-20, Ullrich von Bassewitz
|
||||||
|
; 2015-10-11, Greg King
|
||||||
|
;
|
||||||
|
; void* __fastcall__ memcpy (void* dest, const void* src, size_t size);
|
||||||
|
;
|
||||||
|
; NOTE: This function contains entry points for memmove, which will resort
|
||||||
|
; to memcpy for an incrementing copy. Don't change this module without looking
|
||||||
|
; at "pce/memmove.s"!
|
||||||
|
;
|
||||||
|
|
||||||
|
.export _memcpy
|
||||||
|
.export memcpy_increment, memcpy_transfer, memcpy_getparams
|
||||||
|
|
||||||
|
.import popax
|
||||||
|
.importzp sp, ptr1, ptr2, ptr3
|
||||||
|
|
||||||
|
|
||||||
|
; The structure of the transfer instructions
|
||||||
|
|
||||||
|
.struct
|
||||||
|
opcode .byte
|
||||||
|
source .addr
|
||||||
|
destination .addr
|
||||||
|
length .word
|
||||||
|
.endstruct
|
||||||
|
|
||||||
|
; ----------------------------------------------------------------------
|
||||||
|
_memcpy:
|
||||||
|
jsr memcpy_getparams
|
||||||
|
|
||||||
|
memcpy_increment:
|
||||||
|
ldy #$73 ; TII
|
||||||
|
|
||||||
|
memcpy_transfer:
|
||||||
|
sty transfer+opcode
|
||||||
|
|
||||||
|
lda ptr1
|
||||||
|
ldx ptr1+1
|
||||||
|
sta transfer+source
|
||||||
|
stx transfer+source+1
|
||||||
|
|
||||||
|
lda ptr2
|
||||||
|
ldx ptr2+1
|
||||||
|
sta transfer+destination
|
||||||
|
stx transfer+destination+1
|
||||||
|
|
||||||
|
lda ptr3
|
||||||
|
ldx ptr3+1
|
||||||
|
sta transfer+length
|
||||||
|
stx transfer+length+1
|
||||||
|
|
||||||
|
jmp transfer
|
||||||
|
|
||||||
|
; ----------------------------------------------------------------------
|
||||||
|
; Get the parameters from the stack, as follows:
|
||||||
|
;
|
||||||
|
; size --> ptr3
|
||||||
|
; src --> ptr1
|
||||||
|
; dest --> ptr2
|
||||||
|
;
|
||||||
|
; The first argument (dest) will remain on the stack; and, is returned in .XA!
|
||||||
|
|
||||||
|
memcpy_getparams:
|
||||||
|
sta ptr3
|
||||||
|
stx ptr3+1 ; save size
|
||||||
|
|
||||||
|
jsr popax
|
||||||
|
sta ptr1
|
||||||
|
stx ptr1+1 ; save src
|
||||||
|
|
||||||
|
; (Direct stack access is four cycles faster [total cycle count].)
|
||||||
|
|
||||||
|
ldy #1 ; save dest
|
||||||
|
lda (sp),y ; get high byte
|
||||||
|
tax
|
||||||
|
lda (sp) ; get low byte
|
||||||
|
sta ptr2
|
||||||
|
stx ptr2+1
|
||||||
|
rts ; return dest address (for memmove)
|
||||||
|
|
||||||
|
; ----------------------------------------------------------------------
|
||||||
|
; The transfer instructions use inline arguments.
|
||||||
|
; Therefore, we must build the instruction in the DATA segment.
|
||||||
|
|
||||||
|
.data
|
||||||
|
|
||||||
|
transfer:
|
||||||
|
tii $FFFF, $FFFF, $0001
|
||||||
|
jmp popax ; get pointer; and, return it as result
|
63
libsrc/pce/memmove.s
Normal file
63
libsrc/pce/memmove.s
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
;
|
||||||
|
; This file, instead of "common/memmove.s", will be assembled for the pce
|
||||||
|
; target. This version is smaller and faster because it uses the HuC6280's
|
||||||
|
; block-copy instructions.
|
||||||
|
;
|
||||||
|
; 2003-08-20, Ullrich von Bassewitz
|
||||||
|
; 2015-10-23, Greg King
|
||||||
|
;
|
||||||
|
; void* __fastcall__ memmove (void* dest, const void* src, size_t size);
|
||||||
|
;
|
||||||
|
; NOTE: This function uses entry points from "pce/memcpy.s"!
|
||||||
|
;
|
||||||
|
|
||||||
|
.export _memmove
|
||||||
|
|
||||||
|
.import memcpy_getparams, memcpy_increment, memcpy_transfer
|
||||||
|
.importzp ptr1, ptr2, ptr3
|
||||||
|
|
||||||
|
.macpack generic
|
||||||
|
.macpack longbranch
|
||||||
|
|
||||||
|
|
||||||
|
; ----------------------------------------------------------------------
|
||||||
|
_memmove:
|
||||||
|
jsr memcpy_getparams
|
||||||
|
|
||||||
|
; Check for the copy direction. If dest < src, we must copy downwards (start
|
||||||
|
; at low addresses, and increase pointers); otherwise, we must copy upwards
|
||||||
|
; (start at high addresses, and decrease pointers).
|
||||||
|
|
||||||
|
cmp ptr1
|
||||||
|
txa
|
||||||
|
sbc ptr1+1
|
||||||
|
jcc memcpy_increment ; Branch if dest < src
|
||||||
|
|
||||||
|
; Copy decrementing; adjust the pointers to the end of the memory regions.
|
||||||
|
|
||||||
|
lda ptr1
|
||||||
|
add ptr3
|
||||||
|
sta ptr1
|
||||||
|
lda ptr1+1
|
||||||
|
adc ptr3+1
|
||||||
|
sta ptr1+1
|
||||||
|
|
||||||
|
lda ptr1 ; point to last byte of source
|
||||||
|
bne @L1
|
||||||
|
dec ptr1+1
|
||||||
|
@L1: dec ptr1
|
||||||
|
|
||||||
|
lda ptr2
|
||||||
|
add ptr3
|
||||||
|
sta ptr2
|
||||||
|
lda ptr2+1
|
||||||
|
adc ptr3+1
|
||||||
|
sta ptr2+1
|
||||||
|
|
||||||
|
lda ptr2 ; point to last byte of target
|
||||||
|
bne @L2
|
||||||
|
dec ptr2+1
|
||||||
|
@L2: dec ptr2
|
||||||
|
|
||||||
|
ldy #$C3 ; TDD
|
||||||
|
jmp memcpy_transfer
|
Loading…
x
Reference in New Issue
Block a user