mirror of
https://github.com/cc65/cc65.git
synced 2026-04-26 13:18:31 +00:00
Improved memset/memcpy/memmove functions by Christian Krueger.
git-svn-id: svn://svn.cc65.org/cc65/trunk@4200 b7a2c559-68d2-44c3-8de9-860c34a00d81
This commit is contained in:
+50
-26
@@ -1,9 +1,11 @@
|
||||
;
|
||||
; void* memset (void* ptr, int c, size_t n);
|
||||
; void* _bzero (void* ptr, size_t n);
|
||||
; void bzero (void* ptr, size_t n);
|
||||
; void* __fastcall__ memset (void* ptr, int c, size_t n);
|
||||
; void* __fastcall__ _bzero (void* ptr, size_t n);
|
||||
; void __fastcall__ bzero (void* ptr, size_t n);
|
||||
;
|
||||
; Ullrich von Bassewitz, 29.05.1998
|
||||
; Performance increase (about 20%) by
|
||||
; Christian Krueger, 12.09.2009
|
||||
;
|
||||
; NOTE: bzero will return it's first argument as memset does. It is no problem
|
||||
; to declare the return value as void, since it may be ignored. _bzero
|
||||
@@ -15,57 +17,79 @@
|
||||
|
||||
.export _memset, _bzero, __bzero
|
||||
.import popax
|
||||
.importzp sp, ptr1, ptr2, ptr3, tmp1
|
||||
.importzp sp, ptr1, ptr2, ptr3
|
||||
|
||||
_bzero:
|
||||
__bzero:
|
||||
sta ptr3
|
||||
stx ptr3+1 ; Save n
|
||||
lda #0 ; Fill with zeros
|
||||
ldx #0 ; Fill with zeros
|
||||
beq common
|
||||
|
||||
|
||||
_memset:
|
||||
sta ptr3 ; Save n
|
||||
stx ptr3+1
|
||||
jsr popax ; Get c
|
||||
tax
|
||||
|
||||
; Common stuff for memset and bzero from here
|
||||
|
||||
common: sta tmp1 ; Save the fill value
|
||||
ldy #1
|
||||
common: ; Fill value is in X!
|
||||
ldy #1
|
||||
lda (sp),y
|
||||
tax
|
||||
dey
|
||||
sta ptr1+1 ; save high byte of ptr
|
||||
dey ; Y = 0
|
||||
lda (sp),y ; Get ptr
|
||||
sta ptr1
|
||||
stx ptr1+1 ; Save work copy
|
||||
|
||||
lda tmp1 ; Load fill value
|
||||
ldy #0
|
||||
lsr ptr3+1 ; divide number of
|
||||
ror ptr3 ; bytes by two to increase
|
||||
bcc evenCount ; speed (ptr3 = ptr3/2)
|
||||
oddCount:
|
||||
; y is still 0 here
|
||||
txa ; restore fill value
|
||||
sta (ptr1),y ; save value and increase
|
||||
inc ptr1 ; dest. pointer
|
||||
bne evenCount
|
||||
inc ptr1+1
|
||||
evenCount:
|
||||
lda ptr1 ; build second pointer section
|
||||
clc
|
||||
adc ptr3 ; ptr2 = ptr1 + (length/2) <- ptr3
|
||||
sta ptr2
|
||||
lda ptr1+1
|
||||
adc ptr3+1
|
||||
sta ptr2+1
|
||||
|
||||
txa ; restore fill value
|
||||
ldx ptr3+1 ; Get high byte of n
|
||||
beq L2 ; Jump if zero
|
||||
|
||||
; Set 256 byte blocks
|
||||
|
||||
; Set 256/512 byte blocks
|
||||
; y is still 0 here
|
||||
L1: .repeat 2 ; Unroll this a bit to make it faster
|
||||
sta (ptr1),y ; Set one byte
|
||||
iny
|
||||
sta (ptr1),y ; Set byte in lower section
|
||||
sta (ptr2),y ; Set byte in upper section
|
||||
iny
|
||||
.endrepeat
|
||||
bne L1
|
||||
inc ptr1+1
|
||||
inc ptr2+1
|
||||
dex ; Next 256 byte block
|
||||
bne L1 ; Repeat if any
|
||||
|
||||
; Set the remaining bytes if any
|
||||
|
||||
L2: ldx ptr3 ; Get the low byte of n
|
||||
beq L9 ; Low byte is zero
|
||||
|
||||
L3: sta (ptr1),y ; Set one byte
|
||||
iny
|
||||
dex ; Done?
|
||||
bne L3
|
||||
|
||||
L9: jmp popax ; Pop ptr and return as result
|
||||
L2: ldy ptr3 ; Get the low byte of n
|
||||
bne L3 ; something to set?
|
||||
jmp popax ; no -> Pop ptr and return as result
|
||||
|
||||
L3a: sta (ptr1),y ; set bytes in low
|
||||
sta (ptr2),y ; and high section
|
||||
L3: dey
|
||||
bne L3a
|
||||
sta (ptr1),y ; Set remaining byte(s)
|
||||
sta (ptr2),y
|
||||
jmp popax ; Pop ptr and return as result
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user