From 8e8d1120290c743ea97d8e8889c039341f3f514d Mon Sep 17 00:00:00 2001 From: Greg King Date: Fri, 23 Oct 2015 18:02:21 -0400 Subject: [PATCH] Added pce-specific substitutes for memcpy() and memmove(). They are smaller and faster because they take advantage of the pce CPU's block-copy instructions. Also, made a small improvement to the common memmove(), so that it is similar to the pce version. --- libsrc/common/memmove.s | 10 ++--- libsrc/pce/memcpy.s | 94 +++++++++++++++++++++++++++++++++++++++++ libsrc/pce/memmove.s | 63 +++++++++++++++++++++++++++ 3 files changed, 161 insertions(+), 6 deletions(-) create mode 100644 libsrc/pce/memcpy.s create mode 100644 libsrc/pce/memmove.s diff --git a/libsrc/common/memmove.s b/libsrc/common/memmove.s index 94ad7d102..9c33124f1 100644 --- a/libsrc/common/memmove.s +++ b/libsrc/common/memmove.s @@ -1,7 +1,7 @@ ; -; Ullrich von Bassewitz, 2003-08-20 -; Performance increase (about 20%) by -; Christian Krueger, 2009-09-13 +; 2003-08-20, Ullrich von Bassewitz +; 2009-09-13, Christian Krueger -- performance increase (about 20%) +; 2015-10-23, Greg King ; ; void* __fastcall__ memmove (void* dest, const void* src, size_t size); ; @@ -23,8 +23,7 @@ _memmove: ; low addresses and increase pointers), otherwise we must copy downwards ; (start at high addresses and decrease pointers). - sec - sbc ptr1 + cmp ptr1 txa sbc ptr1+1 jcc memcpy_upwards ; Branch if dest < src (upwards copy) @@ -81,4 +80,3 @@ PageSizeCopy: ; assert Y = 0 ; Done, return dest done: jmp popax ; Pop ptr and return as result - diff --git a/libsrc/pce/memcpy.s b/libsrc/pce/memcpy.s new file mode 100644 index 000000000..b06dad645 --- /dev/null +++ b/libsrc/pce/memcpy.s @@ -0,0 +1,94 @@ +; +; This file, instead of "common/memcpy.s", will be assembled for the pce +; target. This version is smaller and faster because it uses the HuC6280's +; block-copy instructions. +; +; 2003-08-20, Ullrich von Bassewitz +; 2015-10-11, Greg King +; +; void* __fastcall__ memcpy (void* dest, const void* src, size_t size); +; +; NOTE: This function contains entry points for memmove, which will resort +; to memcpy for an incrementing copy. Don't change this module without looking +; at "pce/memmove.s"! +; + + .export _memcpy + .export memcpy_increment, memcpy_transfer, memcpy_getparams + + .import popax + .importzp sp, ptr1, ptr2, ptr3 + + +; The structure of the transfer instructions + + .struct +opcode .byte +source .addr +destination .addr +length .word + .endstruct + +; ---------------------------------------------------------------------- +_memcpy: + jsr memcpy_getparams + +memcpy_increment: + ldy #$73 ; TII + +memcpy_transfer: + sty transfer+opcode + + lda ptr1 + ldx ptr1+1 + sta transfer+source + stx transfer+source+1 + + lda ptr2 + ldx ptr2+1 + sta transfer+destination + stx transfer+destination+1 + + lda ptr3 + ldx ptr3+1 + sta transfer+length + stx transfer+length+1 + + jmp transfer + +; ---------------------------------------------------------------------- +; Get the parameters from the stack, as follows: +; +; size --> ptr3 +; src --> ptr1 +; dest --> ptr2 +; +; The first argument (dest) will remain on the stack; and, is returned in .XA! + +memcpy_getparams: + sta ptr3 + stx ptr3+1 ; save size + + jsr popax + sta ptr1 + stx ptr1+1 ; save src + +; (Direct stack access is four cycles faster [total cycle count].) + + ldy #1 ; save dest + lda (sp),y ; get high byte + tax + lda (sp) ; get low byte + sta ptr2 + stx ptr2+1 + rts ; return dest address (for memmove) + +; ---------------------------------------------------------------------- +; The transfer instructions use inline arguments. +; Therefore, we must build the instruction in the DATA segment. + +.data + +transfer: + tii $FFFF, $FFFF, $0001 + jmp popax ; get pointer; and, return it as result diff --git a/libsrc/pce/memmove.s b/libsrc/pce/memmove.s new file mode 100644 index 000000000..9a7feebf5 --- /dev/null +++ b/libsrc/pce/memmove.s @@ -0,0 +1,63 @@ +; +; This file, instead of "common/memmove.s", will be assembled for the pce +; target. This version is smaller and faster because it uses the HuC6280's +; block-copy instructions. +; +; 2003-08-20, Ullrich von Bassewitz +; 2015-10-23, Greg King +; +; void* __fastcall__ memmove (void* dest, const void* src, size_t size); +; +; NOTE: This function uses entry points from "pce/memcpy.s"! +; + + .export _memmove + + .import memcpy_getparams, memcpy_increment, memcpy_transfer + .importzp ptr1, ptr2, ptr3 + + .macpack generic + .macpack longbranch + + +; ---------------------------------------------------------------------- +_memmove: + jsr memcpy_getparams + +; Check for the copy direction. If dest < src, we must copy downwards (start +; at low addresses, and increase pointers); otherwise, we must copy upwards +; (start at high addresses, and decrease pointers). + + cmp ptr1 + txa + sbc ptr1+1 + jcc memcpy_increment ; Branch if dest < src + +; Copy decrementing; adjust the pointers to the end of the memory regions. + + lda ptr1 + add ptr3 + sta ptr1 + lda ptr1+1 + adc ptr3+1 + sta ptr1+1 + + lda ptr1 ; point to last byte of source + bne @L1 + dec ptr1+1 +@L1: dec ptr1 + + lda ptr2 + add ptr3 + sta ptr2 + lda ptr2+1 + adc ptr3+1 + sta ptr2+1 + + lda ptr2 ; point to last byte of target + bne @L2 + dec ptr2+1 +@L2: dec ptr2 + + ldy #$C3 ; TDD + jmp memcpy_transfer