mirror of
https://github.com/cc65/cc65.git
synced 2024-06-07 23:29:39 +00:00
Little strdup optimisation
-12 bytes on disk -20 cycles per strdup -6 cycles per strlen called from strdup
This commit is contained in:
parent
169c9c0da0
commit
3a439e0e1b
|
@ -7,13 +7,13 @@
|
||||||
|
|
||||||
.export _strcspn
|
.export _strcspn
|
||||||
.import popptr1, _strlen
|
.import popptr1, _strlen
|
||||||
.importzp ptr1, ptr2, tmp1, tmp2
|
.importzp ptr1, ptr4, tmp1, tmp2
|
||||||
|
|
||||||
_strcspn:
|
_strcspn:
|
||||||
jsr _strlen ; get length in a/x and transfer s2 to ptr2
|
jsr _strlen ; get length in a/x and transfer s2 to ptr4
|
||||||
; Note: It does not make sense to
|
; Note: It does not make sense to
|
||||||
; have more than 255 test chars, so
|
; have more than 255 test chars, so
|
||||||
; we don't support a high byte here! (ptr2+1 is
|
; we don't support a high byte here! (ptr4+1 is
|
||||||
; also unchanged in strlen then (important!))
|
; also unchanged in strlen then (important!))
|
||||||
; -> the original implementation also
|
; -> the original implementation also
|
||||||
; ignored this case
|
; ignored this case
|
||||||
|
@ -38,7 +38,7 @@ checkNext:
|
||||||
iny
|
iny
|
||||||
check: cpy tmp1 ; compare with length of test character string
|
check: cpy tmp1 ; compare with length of test character string
|
||||||
beq endOfTestChars
|
beq endOfTestChars
|
||||||
cmp (ptr2),y ; found matching char?
|
cmp (ptr4),y ; found matching char?
|
||||||
bne checkNext
|
bne checkNext
|
||||||
|
|
||||||
leave: txa ; restore position of finding
|
leave: txa ; restore position of finding
|
||||||
|
|
|
@ -1,85 +1,62 @@
|
||||||
;
|
;
|
||||||
; Ullrich von Bassewitz, 18.07.2000
|
; Ullrich von Bassewitz, 18.07.2000
|
||||||
|
; Colin Leroy-Mira, 05.01.2024
|
||||||
;
|
;
|
||||||
; char* __fastcall__ strdup (const char* S);
|
; char* __fastcall__ strdup (const char* S);
|
||||||
;
|
;
|
||||||
; Note: The code knowns which zero page locations are used by malloc.
|
; Note: The code knowns which zero page locations are used by malloc,
|
||||||
|
; memcpy and strlen.
|
||||||
;
|
;
|
||||||
|
|
||||||
.importzp sp, tmp1, ptr4
|
.importzp ptr2, ptr3, ptr4, tmp1, tmp2, tmp3
|
||||||
.import pushax, decsp4, incsp4
|
.import _strlen_ptr4, _malloc, _memcpy, pushax
|
||||||
.import _strlen, _malloc, _memcpy
|
|
||||||
.export _strdup
|
.export _strdup
|
||||||
|
|
||||||
.macpack cpu
|
.macpack cpu
|
||||||
.macpack generic
|
|
||||||
|
|
||||||
_strdup:
|
_strdup:
|
||||||
|
; Get length (and store source in ptr4)
|
||||||
|
sta ptr4
|
||||||
|
stx ptr4+1
|
||||||
|
stx tmp1 ; Backup high byte, which
|
||||||
|
jsr _strlen_ptr4 ; strlen may increment
|
||||||
|
|
||||||
; Since we need some place to store the intermediate results, allocate a
|
; Add null byte for terminator
|
||||||
; stack frame. To make this somewhat more efficient, create the stackframe
|
.if (.cpu .bitand ::CPU_ISET_65SC02)
|
||||||
; as needed for the final call to the memcpy function.
|
inc a
|
||||||
|
|
||||||
pha ; decsp will destroy A (but not X)
|
|
||||||
jsr decsp4 ; Target/source
|
|
||||||
|
|
||||||
; Store the pointer into the source slot
|
|
||||||
|
|
||||||
ldy #1
|
|
||||||
txa
|
|
||||||
sta (sp),y
|
|
||||||
pla
|
|
||||||
.if (.cpu .bitand CPU_ISET_65SC02)
|
|
||||||
sta (sp)
|
|
||||||
.else
|
.else
|
||||||
dey
|
clc
|
||||||
sta (sp),y
|
adc #1
|
||||||
.endif
|
.endif
|
||||||
|
bne :+
|
||||||
; Get length of S (which is still in a/x)
|
|
||||||
|
|
||||||
jsr _strlen
|
|
||||||
|
|
||||||
; Calculate strlen(S)+1 (the space needed)
|
|
||||||
|
|
||||||
add #1
|
|
||||||
bcc @L1
|
|
||||||
inx
|
inx
|
||||||
|
|
||||||
; Save the space we're about to allocate in ptr4
|
; Store length
|
||||||
|
: sta tmp2
|
||||||
@L1: sta ptr4
|
stx tmp3
|
||||||
stx ptr4+1
|
|
||||||
|
|
||||||
; Allocate memory. _malloc will not use ptr4
|
|
||||||
|
|
||||||
|
; Allocate memory
|
||||||
jsr _malloc
|
jsr _malloc
|
||||||
|
|
||||||
; Store the result into the target stack slot
|
; Check for NULL
|
||||||
|
bne :+
|
||||||
ldy #2
|
cpx #$00
|
||||||
sta (sp),y ; Store low byte
|
|
||||||
sta tmp1
|
|
||||||
txa ; Get high byte
|
|
||||||
iny
|
|
||||||
sta (sp),y ; Store high byte
|
|
||||||
|
|
||||||
; Check for a NULL pointer
|
|
||||||
|
|
||||||
ora tmp1
|
|
||||||
beq OutOfMemory
|
beq OutOfMemory
|
||||||
|
|
||||||
; Copy the string. memcpy will return the target string which is exactly
|
; Push dest
|
||||||
; what we need here. It will also drop the allocated stack frame.
|
: jsr pushax
|
||||||
|
|
||||||
|
; Push source
|
||||||
lda ptr4
|
lda ptr4
|
||||||
ldx ptr4+1 ; Load size
|
ldx tmp1
|
||||||
jmp _memcpy ; Copy string, drop stackframe
|
jsr pushax
|
||||||
|
|
||||||
; Out of memory, return NULL (A = 0)
|
; Push length
|
||||||
|
lda tmp2
|
||||||
|
ldx tmp3
|
||||||
|
|
||||||
|
; Copy and return the dest pointer
|
||||||
|
jmp _memcpy
|
||||||
|
|
||||||
OutOfMemory:
|
OutOfMemory:
|
||||||
tax
|
rts
|
||||||
jmp incsp4 ; Drop stack frame
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,19 +2,20 @@
|
||||||
; Ullrich von Bassewitz, 31.05.1998
|
; Ullrich von Bassewitz, 31.05.1998
|
||||||
;
|
;
|
||||||
; Note: strspn & strcspn call internally this function and rely on
|
; Note: strspn & strcspn call internally this function and rely on
|
||||||
; the usage of only ptr2 here! Keep in mind when appling changes
|
; the usage of only ptr4 here! Keep in mind when appling changes
|
||||||
; and check the other implementations too!
|
; and check the other implementations too!
|
||||||
;
|
;
|
||||||
; size_t __fastcall__ strlen (const char* s);
|
; size_t __fastcall__ strlen (const char* s);
|
||||||
;
|
;
|
||||||
|
|
||||||
.export _strlen
|
.export _strlen, _strlen_ptr4
|
||||||
.importzp ptr2
|
.importzp ptr4
|
||||||
.macpack cpu
|
.macpack cpu
|
||||||
|
|
||||||
_strlen:
|
_strlen:
|
||||||
sta ptr2 ; Save s
|
sta ptr4 ; Save s
|
||||||
stx ptr2+1
|
stx ptr4+1
|
||||||
|
_strlen_ptr4:
|
||||||
.if (.cpu .bitand ::CPU_ISET_HUC6280)
|
.if (.cpu .bitand ::CPU_ISET_HUC6280)
|
||||||
clx
|
clx
|
||||||
cly
|
cly
|
||||||
|
@ -27,11 +28,11 @@ _strlen:
|
||||||
.endif
|
.endif
|
||||||
.endif
|
.endif
|
||||||
|
|
||||||
L1: lda (ptr2),y
|
L1: lda (ptr4),y
|
||||||
beq L9
|
beq L9
|
||||||
iny
|
iny
|
||||||
bne L1
|
bne L1
|
||||||
inc ptr2+1
|
inc ptr4+1
|
||||||
inx
|
inx
|
||||||
bne L1
|
bne L1
|
||||||
|
|
||||||
|
|
|
@ -7,13 +7,13 @@
|
||||||
|
|
||||||
.export _strspn
|
.export _strspn
|
||||||
.import popptr1, _strlen
|
.import popptr1, _strlen
|
||||||
.importzp ptr1, ptr2, tmp1, tmp2
|
.importzp ptr1, ptr4, tmp1, tmp2
|
||||||
|
|
||||||
_strspn:
|
_strspn:
|
||||||
jsr _strlen ; get length in a/x and transfer s2 to ptr2
|
jsr _strlen ; get length in a/x and transfer s2 to ptr4
|
||||||
; Note: It does not make sense to
|
; Note: It does not make sense to
|
||||||
; have more than 255 test chars, so
|
; have more than 255 test chars, so
|
||||||
; we don't support a high byte here! (ptr2+1 is
|
; we don't support a high byte here! (ptr4+1 is
|
||||||
; also unchanged in strlen then (important!))
|
; also unchanged in strlen then (important!))
|
||||||
; -> the original implementation also
|
; -> the original implementation also
|
||||||
; ignored this case
|
; ignored this case
|
||||||
|
@ -38,7 +38,7 @@ checkNext:
|
||||||
iny
|
iny
|
||||||
check: cpy tmp1 ; compare with length of test character string
|
check: cpy tmp1 ; compare with length of test character string
|
||||||
beq leave
|
beq leave
|
||||||
cmp (ptr2),y ; found matching char?
|
cmp (ptr4),y ; found matching char?
|
||||||
bne checkNext
|
bne checkNext
|
||||||
|
|
||||||
foundTestChar:
|
foundTestChar:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user