mirror of
https://github.com/cc65/cc65.git
synced 2025-04-06 04:41:08 +00:00
Merge pull request #657 from IrgendwerA8/VariousSpeedSizeOptimizations
Various speed size optimizations
This commit is contained in:
commit
f485be1b84
@ -6,11 +6,11 @@
|
||||
;
|
||||
|
||||
.export _ltoa, _ultoa
|
||||
.import popax
|
||||
.import popax, popptr1, negeax
|
||||
.import __hextab, __longminstr
|
||||
.importzp sreg, ptr1, ptr2, ptr3, tmp1
|
||||
|
||||
|
||||
.macpack cpu
|
||||
|
||||
.code
|
||||
|
||||
@ -19,17 +19,15 @@
|
||||
;
|
||||
|
||||
dopop: sta tmp1 ; will loose high byte
|
||||
jsr popax ; get s
|
||||
sta ptr1
|
||||
stx ptr1+1
|
||||
sta sreg ; save for return
|
||||
stx sreg+1
|
||||
jsr popax ; get low word of value
|
||||
jsr popax ; get s to ptr2
|
||||
sta ptr2
|
||||
stx ptr2+1
|
||||
jsr popax ; get high word of value
|
||||
sta ptr3
|
||||
sta ptr3 ; save for return
|
||||
stx ptr3+1
|
||||
jsr popptr1 ; get low word of value to ptr1
|
||||
jsr popax ; get high word of value to sreg
|
||||
sta sreg
|
||||
stx sreg+1
|
||||
rts
|
||||
|
||||
;
|
||||
@ -41,20 +39,20 @@ _ltoa: jsr dopop ; pop the arguments
|
||||
; We must handle $80000000 in a special way, since it is the only negative
|
||||
; number that has no positive 32-bit counterpart
|
||||
|
||||
ldx ptr3+1 ; get high byte
|
||||
ldx sreg+1 ; get high byte
|
||||
ldy tmp1 ; get radix
|
||||
cpy #10
|
||||
bne ultoa
|
||||
lda ptr3
|
||||
ora ptr2+1
|
||||
ora ptr2
|
||||
lda sreg
|
||||
ora ptr1+1
|
||||
ora ptr1
|
||||
bne L2
|
||||
cpx #$80
|
||||
bne L2
|
||||
|
||||
ldy #11
|
||||
L1: lda __longminstr,y ; copy -2147483648
|
||||
sta (ptr1),y
|
||||
sta (ptr2),y
|
||||
dey
|
||||
bpl L1
|
||||
jmp L10
|
||||
@ -65,29 +63,25 @@ L1: lda __longminstr,y ; copy -2147483648
|
||||
L2: txa ; get high byte
|
||||
bpl ultoa
|
||||
lda #'-'
|
||||
ldy #0
|
||||
sta (ptr1),y ; store sign
|
||||
inc ptr1
|
||||
bne L3
|
||||
inc ptr1+1
|
||||
|
||||
L3: lda ptr2 ; negate val
|
||||
eor #$FF
|
||||
clc
|
||||
adc #$01
|
||||
sta ptr2
|
||||
lda ptr2+1
|
||||
eor #$FF
|
||||
adc #$00
|
||||
sta ptr2+1
|
||||
lda ptr3
|
||||
eor #$FF
|
||||
adc #$00
|
||||
sta ptr3
|
||||
lda ptr3+1
|
||||
eor #$FF
|
||||
adc #$00
|
||||
sta ptr3+1
|
||||
.if (.cpu .bitand CPU_ISET_65SC02)
|
||||
sta (ptr2)
|
||||
.else
|
||||
ldy #0
|
||||
sta (ptr2),y ; store sign
|
||||
.endif
|
||||
|
||||
inc ptr2
|
||||
bne L3
|
||||
inc ptr2+1
|
||||
|
||||
L3: lda ptr1 ; negate val
|
||||
ldx ptr1+1
|
||||
|
||||
jsr negeax
|
||||
|
||||
sta ptr1
|
||||
stx ptr1+1
|
||||
jmp ultoa
|
||||
|
||||
;
|
||||
@ -105,15 +99,15 @@ ultoa: lda #$00
|
||||
|
||||
L5: ldy #32 ; 32 bit
|
||||
lda #0 ; remainder
|
||||
L6: asl ptr2
|
||||
rol ptr2+1
|
||||
rol ptr3
|
||||
rol ptr3+1
|
||||
L6: asl ptr1
|
||||
rol ptr1+1
|
||||
rol sreg
|
||||
rol sreg+1
|
||||
rol a
|
||||
cmp tmp1
|
||||
bcc L7
|
||||
sbc tmp1
|
||||
inc ptr2
|
||||
inc ptr1
|
||||
L7: dey
|
||||
bne L6
|
||||
|
||||
@ -121,25 +115,25 @@ L7: dey
|
||||
lda __hextab,y ; get hex character
|
||||
pha ; save char value on stack
|
||||
|
||||
lda ptr2
|
||||
ora ptr2+1
|
||||
ora ptr3
|
||||
ora ptr3+1
|
||||
lda ptr1
|
||||
ora ptr1+1
|
||||
ora sreg
|
||||
ora sreg+1
|
||||
bne L5
|
||||
|
||||
; Get the characters from the stack into the string
|
||||
|
||||
ldy #0
|
||||
L9: pla
|
||||
sta (ptr1),y
|
||||
sta (ptr2),y
|
||||
beq L10 ; jump if sentinel
|
||||
iny
|
||||
bne L9 ; jump always
|
||||
|
||||
; Done! Return the target string
|
||||
|
||||
L10: lda sreg
|
||||
ldx sreg+1
|
||||
L10: lda ptr3
|
||||
ldx ptr3+1
|
||||
rts
|
||||
|
||||
|
||||
|
@ -6,40 +6,39 @@
|
||||
;
|
||||
|
||||
.export _strcspn
|
||||
.import popax, _strlen
|
||||
.import popptr1, _strlen
|
||||
.importzp ptr1, ptr2, tmp1, tmp2
|
||||
|
||||
_strcspn:
|
||||
jsr _strlen ; get length in a/x and transfer s2 to ptr1
|
||||
jsr _strlen ; get length in a/x and transfer s2 to ptr2
|
||||
; Note: It does not make sense to
|
||||
; have more than 255 test chars, so
|
||||
; we don't support a high byte here! (ptr1+1 is
|
||||
; we don't support a high byte here! (ptr2+1 is
|
||||
; also unchanged in strlen then (important!))
|
||||
; -> the original implementation also
|
||||
; ignored this case
|
||||
|
||||
sta tmp1 ; tmp1 = strlen of test chars
|
||||
jsr popax ; get and save s1
|
||||
sta ptr2 ; to ptr2
|
||||
stx ptr2+1
|
||||
jsr popptr1 ; get and save s1 to ptr1
|
||||
|
||||
ldx #0 ; low counter byte
|
||||
stx tmp2 ; high counter byte
|
||||
|
||||
loadChar:
|
||||
ldy #0
|
||||
lda (ptr2),y ; get next char from s1
|
||||
lda (ptr1),y ; get next char from s1
|
||||
beq leave ; handly byte of s1
|
||||
advance:
|
||||
inc ptr2 ; advance string position to test
|
||||
inc ptr1 ; advance string position to test
|
||||
bne check
|
||||
inc ptr2+1
|
||||
inc ptr1+1
|
||||
dey ; correct next iny (faster/shorter than bne...)
|
||||
|
||||
checkNext:
|
||||
iny
|
||||
check: cpy tmp1 ; compare with length of test character string
|
||||
beq endOfTestChars
|
||||
cmp (ptr1),y ; found matching char?
|
||||
cmp (ptr2),y ; found matching char?
|
||||
bne checkNext
|
||||
|
||||
leave: txa ; restore position of finding
|
||||
|
@ -2,26 +2,26 @@
|
||||
; Ullrich von Bassewitz, 31.05.1998
|
||||
;
|
||||
; Note: strspn & strcspn call internally this function and rely on
|
||||
; the usage of only ptr1 here! Keep in mind when appling changes
|
||||
; the usage of only ptr2 here! Keep in mind when appling changes
|
||||
; and check the other implementations too!
|
||||
;
|
||||
; int strlen (const char* s);
|
||||
;
|
||||
|
||||
.export _strlen
|
||||
.importzp ptr1
|
||||
.importzp ptr2
|
||||
|
||||
_strlen:
|
||||
sta ptr1 ; Save s
|
||||
stx ptr1+1
|
||||
sta ptr2 ; Save s
|
||||
stx ptr2+1
|
||||
ldx #0 ; YX used as counter
|
||||
ldy #0
|
||||
|
||||
L1: lda (ptr1),y
|
||||
L1: lda (ptr2),y
|
||||
beq L9
|
||||
iny
|
||||
bne L1
|
||||
inc ptr1+1
|
||||
inc ptr2+1
|
||||
inx
|
||||
bne L1
|
||||
|
||||
|
@ -6,40 +6,39 @@
|
||||
;
|
||||
|
||||
.export _strspn
|
||||
.import popax, _strlen
|
||||
.import popptr1, _strlen
|
||||
.importzp ptr1, ptr2, tmp1, tmp2
|
||||
|
||||
_strspn:
|
||||
jsr _strlen ; get length in a/x and transfer s2 to ptr1
|
||||
jsr _strlen ; get length in a/x and transfer s2 to ptr2
|
||||
; Note: It does not make sense to
|
||||
; have more than 255 test chars, so
|
||||
; we don't support a high byte here! (ptr1+1 is
|
||||
; we don't support a high byte here! (ptr2+1 is
|
||||
; also unchanged in strlen then (important!))
|
||||
; -> the original implementation also
|
||||
; ignored this case
|
||||
|
||||
sta tmp1 ; tmp1 = strlen of test chars
|
||||
jsr popax ; get and save s1
|
||||
sta ptr2 ; to ptr2
|
||||
stx ptr2+1
|
||||
jsr popptr1 ; get and save s1 to ptr1
|
||||
|
||||
ldx #0 ; low counter byte
|
||||
stx tmp2 ; high counter byte
|
||||
|
||||
loadChar:
|
||||
ldy #0
|
||||
lda (ptr2),y ; get next char from s1
|
||||
lda (ptr1),y ; get next char from s1
|
||||
beq leave ; handly byte of s1
|
||||
advance:
|
||||
inc ptr2 ; advance string position to test
|
||||
inc ptr1 ; advance string position to test
|
||||
bne check
|
||||
inc ptr2+1
|
||||
inc ptr1+1
|
||||
dey ; correct next iny (faster/shorter than bne...)
|
||||
|
||||
checkNext:
|
||||
iny
|
||||
check: cpy tmp1 ; compare with length of test character string
|
||||
beq leave
|
||||
cmp (ptr1),y ; found matching char?
|
||||
cmp (ptr2),y ; found matching char?
|
||||
bne checkNext
|
||||
|
||||
foundTestChar:
|
||||
|
@ -6,29 +6,30 @@
|
||||
|
||||
.export _screensize
|
||||
|
||||
.import popsreg
|
||||
.import popptr1
|
||||
.import screensize
|
||||
.importzp ptr1, sreg
|
||||
.importzp ptr1, ptr2
|
||||
|
||||
.macpack cpu
|
||||
|
||||
.proc _screensize
|
||||
|
||||
sta ptr1 ; Store the y pointer
|
||||
stx ptr1+1
|
||||
jsr popsreg ; Get the x pointer into sreg
|
||||
sta ptr2 ; Store the y pointer
|
||||
stx ptr2+1
|
||||
jsr popptr1 ; Get the x pointer into ptr1
|
||||
jsr screensize ; Get screensize into X/Y
|
||||
tya ; Get Y size into A
|
||||
|
||||
.IFP02
|
||||
ldy #0
|
||||
sta (ptr1),y
|
||||
.if (.cpu .bitand ::CPU_ISET_65SC02)
|
||||
sta (ptr2)
|
||||
txa
|
||||
sta (sreg),y
|
||||
.ELSE
|
||||
sta (ptr1)
|
||||
.else
|
||||
ldy #0
|
||||
sta (ptr2),y
|
||||
txa
|
||||
sta (sreg)
|
||||
.ENDIF
|
||||
|
||||
sta (ptr1),y
|
||||
.endif
|
||||
rts
|
||||
|
||||
.endproc
|
||||
|
@ -6,8 +6,8 @@
|
||||
|
||||
.export tosumulax, tosmulax
|
||||
.import mul8x16, mul8x16a ; in mul8.s
|
||||
.import popsreg
|
||||
.importzp sreg, tmp1, ptr4
|
||||
.import popptr1
|
||||
.importzp tmp1, ptr1, ptr4
|
||||
|
||||
|
||||
;---------------------------------------------------------------------------
|
||||
@ -19,12 +19,12 @@ tosumulax:
|
||||
txa ; High byte zero
|
||||
beq @L3 ; Do 8x16 multiplication if high byte zero
|
||||
stx ptr4+1 ; Save right operand
|
||||
jsr popsreg ; Get left operand
|
||||
jsr popptr1 ; Get left operand (Y=0 by popptr1)
|
||||
|
||||
; Do ptr4:ptr4+1 * sreg:sreg+1 --> AX
|
||||
; Do ptr4:ptr4+1 * ptr1:ptr1+1 --> AX
|
||||
|
||||
lda #0
|
||||
ldx sreg+1 ; Get high byte into register for speed
|
||||
tya ; A = 0
|
||||
ldx ptr1+1 ; check if lhs is 8 bit only
|
||||
beq @L4 ; -> we can do 8x16 after swap
|
||||
sta tmp1
|
||||
ldy #16 ; Number of bits
|
||||
@ -34,12 +34,12 @@ tosumulax:
|
||||
@L0: bcc @L1
|
||||
|
||||
clc
|
||||
adc sreg
|
||||
pha
|
||||
txa ; hi byte of left op
|
||||
adc ptr1
|
||||
tax
|
||||
lda ptr1+1 ; hi byte of left op
|
||||
adc tmp1
|
||||
sta tmp1
|
||||
pla
|
||||
txa
|
||||
|
||||
@L1: ror tmp1
|
||||
ror a
|
||||
@ -59,10 +59,11 @@ tosumulax:
|
||||
; If the high byte of rhs is zero, swap the operands and use the 8x16
|
||||
; routine. On entry, A and X are zero
|
||||
|
||||
@L4: ldy sreg ; Save right operand (8 bit)
|
||||
@L4: ldy ptr1 ; Save right operand (8 bit)
|
||||
ldx ptr4 ; Copy left 16 bit operand to right
|
||||
stx sreg
|
||||
ldx ptr4+1 ; Don't store, this is done later
|
||||
stx ptr1
|
||||
ldx ptr4+1 ; swap high-byte too
|
||||
stx ptr1+1
|
||||
sty ptr4 ; Copy low 8 bit of right op to left
|
||||
ldy #8
|
||||
jmp mul8x16a
|
||||
|
@ -6,8 +6,8 @@
|
||||
|
||||
.export tosumula0, tosmula0
|
||||
.export mul8x16, mul8x16a
|
||||
.import popsreg
|
||||
.importzp sreg, ptr4
|
||||
.import popptr1
|
||||
.importzp ptr1, ptr4
|
||||
|
||||
|
||||
;---------------------------------------------------------------------------
|
||||
@ -16,11 +16,11 @@
|
||||
tosmula0:
|
||||
tosumula0:
|
||||
sta ptr4
|
||||
mul8x16:jsr popsreg ; Get left operand
|
||||
mul8x16:jsr popptr1 ; Get left operand (Y=0 by popptr1)
|
||||
|
||||
lda #0 ; Clear byte 1
|
||||
tya ; Clear byte 1
|
||||
ldy #8 ; Number of bits
|
||||
ldx sreg+1 ; Get into register for speed
|
||||
ldx ptr1+1 ; check if lhs is 8 bit only
|
||||
beq mul8x8 ; Do 8x8 multiplication if high byte zero
|
||||
mul8x16a:
|
||||
sta ptr4+1 ; Clear byte 2
|
||||
@ -29,12 +29,12 @@ mul8x16a:
|
||||
@L0: bcc @L1
|
||||
|
||||
clc
|
||||
adc sreg
|
||||
pha
|
||||
txa ; hi byte of left op
|
||||
adc ptr1
|
||||
tax
|
||||
lda ptr1+1 ; hi byte of left op
|
||||
adc ptr4+1
|
||||
sta ptr4+1
|
||||
pla
|
||||
txa
|
||||
|
||||
@L1: ror ptr4+1
|
||||
ror a
|
||||
@ -52,7 +52,7 @@ mul8x8:
|
||||
lsr ptr4 ; Get first bit into carry
|
||||
@L0: bcc @L1
|
||||
clc
|
||||
adc sreg
|
||||
adc ptr1
|
||||
@L1: ror
|
||||
ror ptr4
|
||||
dey
|
||||
|
@ -3,6 +3,7 @@
|
||||
;
|
||||
; CC65 runtime: Multiply the primary register by 5
|
||||
;
|
||||
; Don't touch the Y-register here, the optimizer relies on it!
|
||||
|
||||
.export mulax5
|
||||
.importzp ptr1
|
||||
|
@ -4,6 +4,7 @@
|
||||
;
|
||||
; CC65 runtime: Multiply the primary register by 7
|
||||
;
|
||||
; Don't touch the Y-register here, the optimizer relies on it!
|
||||
|
||||
.export mulax7
|
||||
.importzp ptr1
|
||||
|
@ -4,6 +4,7 @@
|
||||
;
|
||||
; CC65 runtime: Multiply the primary register by 9
|
||||
;
|
||||
; Don't touch the Y-register here, the optimizer relies on it!
|
||||
|
||||
.export mulax9
|
||||
.importzp ptr1
|
||||
|
Loading…
x
Reference in New Issue
Block a user