tenfourfox/nsprpub/lib/libc/src/vmx_strchr.S

307 lines
5.7 KiB
ArmAsm

; VMX version of strchr in assembly
;
; Does not make stack frames or update the stack pointer.
;
; It uses Darwin's red zone to load/store vector values,
; and part of the code assumes memory loads are BE ordered.
;
; r3: const void *p (input param)
; r4: int ch (input param)
;
; All GPRs used are volatile.
#define VRSAVE 256
#define VMX_ALL_NE 26
#define VMX_ALL_EQ 24
#ifdef _PPC970_
#warning using 64-bit code
.machine ppc970
#else
.machine ppc7400
#endif
.text
.align 4
.globl _vmx_strchr
_vmx_strchr:
;truncate value to 8bit, update CR0
clrlwi. r4,r4,24
neg r6,r3
mfspr r12,VRSAVE ;get old VRSAVE
li r11,-16
stb r4,-16(r1)
;prefix length in r6
clrlwi r6,r6,28 ;logical AND 0xF
oris r9,r12,0xFE00 ;VR0-VR6 = 0xFE000000
mtspr VRSAVE,r9
vspltisb v0,0
vspltisb v6,7
addi r6,r6,1
mtctr r6
;If we search for NULL, use specific code
beq cr0,L_NULL_ONLY
lvebx v3,r1,r11
vspltb v3,v3,0
;check if there is a prefix first
bdz L_prefix_end
lbz r5,0(r3)
.align 4
L_prefix_loop:
addi r3,r3,1
cmpw cr0,r4,r5
cmpwi cr1,r5,0
beq cr0,L_found
beq cr1,L_notfound_1
lbz r5,0(r3)
bdnz L_prefix_loop
;The end-of-string and searchByte vectors
;are compared against memory. Both result vectors are
;then compared together. If there's no end-of-string
;or searchByte found, both result vectors should be
;filled with 0's.
;
;If anything is found there will be an inequality between
;the two.
;
;(Not applicable if the searchByte in question is NULL)
L_prefix_end:
lvx v1,0,r3
vcmpequb v4,v1,v0
vcmpequb v2,v1,v3
li r10,16
vcmpequb. v5,v2,v4
bf 24,L_foundsomething
.align 4
L_vmx_loop:
lvx v1,r3,r10
vcmpequb v4,v1,v0
vcmpequb v2,v1,v3
la r10,16(r10)
vcmpequb. v5,v2,v4
bf VMX_ALL_EQ,L_foundsomething
lvx v1,r3,r10
vcmpequb v4,v1,v0
vcmpequb v2,v1,v3
la r10,16(r10)
vcmpequb. v5,v2,v4
bf VMX_ALL_EQ,L_foundsomething
lvx v1,r3,r10
vcmpequb v4,v1,v0
vcmpequb v2,v1,v3
la r10,16(r10)
vcmpequb. v5,v2,v4
bf VMX_ALL_EQ,L_foundsomething
lvx v1,r3,r10
vcmpequb v4,v1,v0
vcmpequb v2,v1,v3
la r10,16(r10)
vcmpequb. v5,v2,v4
bf VMX_ALL_EQ,L_foundsomething
lvx v1,r3,r10
vcmpequb v4,v1,v0
vcmpequb v2,v1,v3
la r10,16(r10)
vcmpequb. v5,v2,v4
bt VMX_ALL_EQ,L_vmx_loop
L_foundsomething:
;end-of-string bytes will have a 0x07 pattern
;in case of equality instead of 0xFF.
vand v4,v4,v6
;OR the two result vectors together.
vor v2,v2,v4
L_found_NULL:
stvx v2,r1,r11
add r3,r3,r10
subi r3,r3,16
mtspr VRSAVE,r12
;both result vectors OR'd together are stored at -16(r1).
;load the vector in GPRs and stop at the first nonzero GPR.
;Count leading 0'; If the result is odd it's end-of-string.
;Otherwise it's the "good" byte.
;
;If we searched for NULL then it'll always be the good byte.
#ifdef _PPC970_
ld r5,-16(r1)
ld r6,-8(r1)
cmpldi cr0,r5,0
cntlzd r4,r5
bne cr0,L_bytefound ;0-7
addi r3,r3,8 ;8-15
cntlzd r4,r6
#else
;PPC32
lwz r5,-16(r1)
lwz r6,-12(r1)
cmplwi cr0,r5,0
cntlzw r4,r5
cmplwi cr1,r6,0
lwz r7,-8(r1)
bne cr0,L_bytefound ;0-3
addi r3,r3,4
cntlzw r4,r6
bne cr1,L_bytefound ;4-7
cmplwi cr0,r7,0
addi r3,r3,4
cntlzw r4,r7
lwz r8,-4(r1)
bne cr0,L_bytefound ;8-11
cntlzw r4,r8 ;12-15
addi r3,r3,4
#endif
L_bytefound:
clrlwi r7,r4,31 ;look if it's an odd number
srwi r6,r4,3
cmplwi cr0,r7,0
nop
bne cr0,L_notfound
add r3,r6,r3 ;update r3 to point to the good byte
blr
.align 4
;Code to execute when searching for NULL
L_NULL_ONLY:
bdz L_NULLVMX
lbz r5,0(r3)
addi r3,r3,1
cmplwi cr0,r5,0
bne cr0,L_NULL_ONLY ;prefix loop
la r3,-1(r3)
mtspr VRSAVE,r12
blr
L_NULLVMX:
lvx v1,0,r3
vcmpequb. v2,v1,v0
li r10,16
bc 4,26,L_found_NULL
L_NULL_LOOP:
lvx v1,r3,r10
vcmpequb. v2,v1,v0
la r10,16(r10)
bc 4,26,L_found_NULL
lvx v1,r3,r10
vcmpequb. v2,v1,v0
la r10,16(r10)
bc 4,26,L_found_NULL
lvx v1,r3,r10
vcmpequb. v2,v1,v0
la r10,16(r10)
bc 4,26,L_found_NULL
lvx v1,r3,r10
vcmpequb. v2,v1,v0
la r10,16(r10)
bc 4,26,L_found_NULL
lvx v1,r3,r10
vcmpequb. v2,v1,v0
la r10,16(r10)
bc 4,26,L_found_NULL
lvx v1,r3,r10
vcmpequb. v2,v1,v0
la r10,16(r10)
bc+ 12,26,L_NULL_LOOP
b L_found_NULL
L_notfound_1:
mtspr VRSAVE,r12
L_notfound:
li r3,0
blr
L_found:
mtspr VRSAVE,r12
la r3,-1(r3)
blr
.subsections_via_symbols