mirror of
https://github.com/classilla/tenfourfox.git
synced 2024-11-19 02:13:04 +00:00
#440: Raphael's assembly VMX haschr/memchr/strchr plus NSPR build changes
This commit is contained in:
parent
2dba79e825
commit
fa5198b512
@ -134,6 +134,7 @@ endif
|
||||
|
||||
ifndef OBJS
|
||||
OBJS = $(addprefix $(OBJDIR)/,$(CSRCS:.c=.$(OBJ_SUFFIX))) \
|
||||
$(addprefix $(OBJDIR)/,$(SSRCS:.S=.$(OBJ_SUFFIX))) \
|
||||
$(addprefix $(OBJDIR)/,$(ASFILES:.$(ASM_SUFFIX)=.$(OBJ_SUFFIX)))
|
||||
endif
|
||||
|
||||
@ -458,6 +459,13 @@ endif
|
||||
endif
|
||||
endif
|
||||
|
||||
$(OBJDIR)/%.$(OBJ_SUFFIX): %.S
|
||||
@$(MAKE_OBJDIR)
|
||||
ifdef NEED_ABSOLUTE_PATH
|
||||
$(CC) -o $@ -c $(CFLAGS) $(call pr_abspath,$<)
|
||||
else
|
||||
$(CC) -o $@ -c $(CFLAGS) $<
|
||||
endif
|
||||
|
||||
$(OBJDIR)/%.$(OBJ_SUFFIX): %.s
|
||||
@$(MAKE_OBJDIR)
|
||||
@ -509,7 +517,7 @@ endif
|
||||
# hundreds of built-in suffix rules for stuff we don't need.
|
||||
#
|
||||
.SUFFIXES:
|
||||
.SUFFIXES: .a .$(OBJ_SUFFIX) .c .cpp .s .h .i .pl
|
||||
.SUFFIXES: .a .$(OBJ_SUFFIX) .c .cpp .s .h .i .pl .S
|
||||
|
||||
#
|
||||
# Fake targets. Always run these rules, even if a file/directory with that
|
||||
|
@ -40,8 +40,11 @@ LIBRARY_VERSION = $(MOD_MAJOR_VERSION)
|
||||
RELEASE_LIBS = $(TARGETS)
|
||||
|
||||
ifeq ($(TENFOURFOX_VMX),1)
|
||||
CSRCS += plvmx.c
|
||||
CFLAGS += -faltivec
|
||||
SSRCS += \
|
||||
vmx_haschr.S \
|
||||
vmx_memchr.S \
|
||||
vmx_strchr.S \
|
||||
$(NULL)
|
||||
endif
|
||||
|
||||
ifeq ($(OS_ARCH),WINNT)
|
||||
|
229
nsprpub/lib/libc/src/vmx_haschr.S
Normal file
229
nsprpub/lib/libc/src/vmx_haschr.S
Normal file
@ -0,0 +1,229 @@
|
||||
|
||||
; VMX version of memchr in assembly
|
||||
;
|
||||
; Does not make stack frames or update the stack pointer.
|
||||
;
|
||||
; It uses Darwin's red zone to load/store vector values,
|
||||
; and part of the code assumes memory loads are BE ordered.
|
||||
;
|
||||
; r3: const void *b (input param)
|
||||
; r4: int c (input param)
|
||||
; r5: size_t length (input param)
|
||||
;
|
||||
; All GPRs used are volatile.
|
||||
|
||||
|
||||
#define VRSAVE 256
|
||||
#define VMX_ALL_NE 26
|
||||
#define VMX_ALL_EQ 24
|
||||
|
||||
#ifdef _PPC970_
|
||||
|
||||
#define PADDING nop
|
||||
|
||||
#warning using 64-bit code
|
||||
|
||||
.machine ppc970
|
||||
|
||||
#else
|
||||
|
||||
#define PADDING
|
||||
|
||||
.machine ppc7400
|
||||
|
||||
#endif
|
||||
|
||||
.text
|
||||
|
||||
.globl _vmx_haschr
|
||||
|
||||
.align 4
|
||||
|
||||
_vmx_haschr:
|
||||
|
||||
mfspr r12,VRSAVE ;Get old VRSAVE
|
||||
|
||||
cmplwi cr0,r5,16
|
||||
|
||||
neg r11,r3
|
||||
add r7,r3,r5
|
||||
|
||||
;prefix length in r6, suffix length in r7
|
||||
clrlwi r6,r11,28
|
||||
clrlwi r7,r7,28 ;logical AND 0xF
|
||||
|
||||
;total length - prefix length
|
||||
sub r8,r5,r6
|
||||
|
||||
;prefix/total predecrements in its loop
|
||||
addi r6,r6,1
|
||||
addi r9,r5,1
|
||||
|
||||
;bytes into quadwords
|
||||
srwi r8,r8,4 ;suffix is < 16bytes, no need to subtract it.
|
||||
|
||||
;if total length is < 16, skip vmx
|
||||
blt cr0,L_novmx
|
||||
|
||||
cmplwi cr1,r8,0
|
||||
|
||||
;new VRSAVE
|
||||
oris r0,r12,0xE000 ;VR0-VR2 = 0xE0000000
|
||||
|
||||
li r11,-16
|
||||
stb r4,-16(r1) ;store searchByte in red zone
|
||||
|
||||
mtctr r6
|
||||
|
||||
beq cr1,L_novmx
|
||||
|
||||
;VMX is used
|
||||
mtspr VRSAVE,r0
|
||||
|
||||
;load, splat searchByte in VR0
|
||||
lvebx v0,r1,r11
|
||||
vspltb v0,v0,0
|
||||
|
||||
;truncate int to uchar
|
||||
clrlwi r4,r4,24
|
||||
|
||||
L_prefix_loop: ;prefix loop
|
||||
|
||||
;check for a prefix before actually looping
|
||||
bdz L_prefix_end
|
||||
|
||||
lbz r9,0(r3)
|
||||
addi r3,r3,1
|
||||
cmplw cr1,r4,r9
|
||||
|
||||
bne cr1,L_prefix_loop
|
||||
|
||||
mtspr VRSAVE,r12 ;found the byte. c'ya ;-)
|
||||
li r3,1
|
||||
|
||||
blr
|
||||
|
||||
|
||||
L_prefix_end:
|
||||
|
||||
;check if there is a suffix
|
||||
cmplwi cr0,r7,0
|
||||
|
||||
|
||||
mtctr r8
|
||||
|
||||
;first VMX iteration is outside the loop
|
||||
lvx v1,0,r3
|
||||
li r10,16
|
||||
vcmpequb. v2,v1,v0
|
||||
|
||||
bdz L_vmx_end
|
||||
|
||||
.align 4
|
||||
|
||||
L_vmx_loop: ;vector loop
|
||||
|
||||
lvx v1,r3,r10
|
||||
la r10,16(r10)
|
||||
PADDING
|
||||
bf VMX_ALL_NE,L_found
|
||||
PADDING
|
||||
vcmpequb. v2,v1,v0
|
||||
bdz L_vmx_end
|
||||
|
||||
lvx v1,r3,r10
|
||||
la r10,16(r10)
|
||||
PADDING
|
||||
bf VMX_ALL_NE,L_found
|
||||
PADDING
|
||||
vcmpequb. v2,v1,v0
|
||||
bdz L_vmx_end
|
||||
|
||||
lvx v1,r3,r10
|
||||
la r10,16(r10)
|
||||
PADDING
|
||||
bf VMX_ALL_NE,L_found
|
||||
PADDING
|
||||
vcmpequb. v2,v1,v0
|
||||
bdz L_vmx_end
|
||||
|
||||
lvx v1,r3,r10
|
||||
la r10,16(r10)
|
||||
PADDING
|
||||
bf VMX_ALL_NE,L_found
|
||||
PADDING
|
||||
vcmpequb. v2,v1,v0
|
||||
bdz L_vmx_end
|
||||
|
||||
lvx v1,r3,r10
|
||||
la r10,16(r10)
|
||||
PADDING
|
||||
bf VMX_ALL_NE,L_found
|
||||
PADDING
|
||||
vcmpequb. v2,v1,v0
|
||||
bdz L_vmx_end
|
||||
|
||||
lvx v1,r3,r10
|
||||
la r10,16(r10)
|
||||
PADDING
|
||||
bf VMX_ALL_NE,L_found
|
||||
PADDING
|
||||
vcmpequb. v2,v1,v0
|
||||
bdnz L_vmx_loop
|
||||
|
||||
L_vmx_end:
|
||||
|
||||
add r3,r3,r10
|
||||
bf VMX_ALL_NE,L_found
|
||||
|
||||
mtctr r7
|
||||
|
||||
;skip suffix if nonexistent
|
||||
beq cr0,L_notfound
|
||||
|
||||
.align 4
|
||||
|
||||
L_suffix_loop: ;suffix loop
|
||||
|
||||
lbz r5,0(r3)
|
||||
addi r3,r3,1
|
||||
cmplw cr0,r4,r5
|
||||
beq cr0,L_found
|
||||
bdnz L_suffix_loop
|
||||
|
||||
L_notfound:
|
||||
|
||||
mtspr VRSAVE,r12
|
||||
li r3,0
|
||||
blr
|
||||
|
||||
L_found:
|
||||
|
||||
mtspr VRSAVE,r12
|
||||
li r3,1
|
||||
blr
|
||||
|
||||
|
||||
;Path that skips VMX
|
||||
|
||||
L_novmx:
|
||||
|
||||
mtctr r9
|
||||
clrlwi r4,r4,24
|
||||
|
||||
L_novmx_loop:
|
||||
bdz L_notfound_1
|
||||
lbz r5,0(r3)
|
||||
addi r3,r3,1
|
||||
cmpw cr0,r4,r5
|
||||
bne cr0,L_novmx_loop
|
||||
|
||||
L_found_1:
|
||||
la r3,-1(r3)
|
||||
blr
|
||||
L_notfound_1:
|
||||
li r3,0
|
||||
blr
|
||||
|
||||
.subsections_via_symbols
|
||||
|
311
nsprpub/lib/libc/src/vmx_memchr.S
Normal file
311
nsprpub/lib/libc/src/vmx_memchr.S
Normal file
@ -0,0 +1,311 @@
|
||||
|
||||
; VMX version of memchr in assembly
|
||||
;
|
||||
; Does not make stack frames or update the stack pointer.
|
||||
;
|
||||
; It uses Darwin's red zone to load/store vector values,
|
||||
; and part of the code assumes memory loads are BE ordered.
|
||||
;
|
||||
; r3: const void *b (input param)
|
||||
; r4: int c (input param)
|
||||
; r5: size_t length (input param)
|
||||
;
|
||||
; All GPRs used are volatile.
|
||||
|
||||
|
||||
#define VRSAVE 256
|
||||
#define VMX_ALL_NE 26
|
||||
#define VMX_ALL_EQ 24
|
||||
|
||||
#ifdef _PPC970_
|
||||
|
||||
#define PADDING nop
|
||||
|
||||
#warning using 64-bit code
|
||||
|
||||
.machine ppc970
|
||||
|
||||
#else
|
||||
|
||||
#define PADDING
|
||||
|
||||
.machine ppc7400
|
||||
|
||||
#endif
|
||||
|
||||
.text
|
||||
|
||||
.globl _vmx_memchr
|
||||
|
||||
.align 4
|
||||
|
||||
_vmx_memchr:
|
||||
|
||||
mfspr r12,VRSAVE ;Get old VRSAVE
|
||||
|
||||
cmplwi cr0,r5,16
|
||||
|
||||
neg r11,r3
|
||||
add r7,r3,r5
|
||||
|
||||
;prefix length in r6, suffix length in r7
|
||||
clrlwi r6,r11,28
|
||||
clrlwi r7,r7,28 ;logical AND 0xF
|
||||
|
||||
;total length - prefix length
|
||||
sub r8,r5,r6
|
||||
|
||||
;prefix/total predecrements in its loop
|
||||
addi r6,r6,1
|
||||
addi r9,r5,1
|
||||
|
||||
;bytes into quadwords
|
||||
srwi r8,r8,4 ;suffix is < 16bytes, no need to subtract it.
|
||||
|
||||
;if total length is < 16, skip vmx
|
||||
blt cr0,L_novmx
|
||||
|
||||
cmplwi cr1,r8,0
|
||||
|
||||
;new VRSAVE
|
||||
oris r0,r12,0xE000 ;VR0-VR2 = 0xE0000000
|
||||
|
||||
li r11,-16
|
||||
stb r4,-16(r1) ;store searchByte in red zone
|
||||
|
||||
mtctr r6
|
||||
|
||||
beq cr1,L_novmx
|
||||
|
||||
;VMX is used
|
||||
mtspr VRSAVE,r0
|
||||
|
||||
;load, splat searchByte in VR0
|
||||
lvebx v0,r1,r11
|
||||
vspltb v0,v0,0
|
||||
|
||||
;truncate int to uchar
|
||||
clrlwi r4,r4,24
|
||||
|
||||
L_prefix_loop: ;prefix loop
|
||||
|
||||
;check for a prefix before actually looping
|
||||
bdz L_prefix_end
|
||||
|
||||
lbz r9,0(r3)
|
||||
addi r3,r3,1
|
||||
cmplw cr1,r4,r9
|
||||
|
||||
bne cr1,L_prefix_loop
|
||||
|
||||
mtspr VRSAVE,r12 ;found the byte. c'ya ;-)
|
||||
la r3,-1(r3)
|
||||
|
||||
blr
|
||||
|
||||
|
||||
L_prefix_end:
|
||||
|
||||
;check if there is a suffix
|
||||
cmplwi cr0,r7,0
|
||||
|
||||
|
||||
mtctr r8
|
||||
|
||||
;first VMX iteration is outside the loop
|
||||
lvx v1,0,r3
|
||||
li r10,16
|
||||
vcmpequb. v2,v1,v0
|
||||
|
||||
bdz L_vmx_end
|
||||
|
||||
.align 4
|
||||
|
||||
L_vmx_loop: ;vector loop
|
||||
|
||||
lvx v1,r3,r10
|
||||
la r10,16(r10)
|
||||
PADDING
|
||||
bf VMX_ALL_NE,L_foundvmx
|
||||
PADDING
|
||||
vcmpequb. v2,v1,v0
|
||||
bdz L_vmx_end
|
||||
|
||||
lvx v1,r3,r10
|
||||
la r10,16(r10)
|
||||
PADDING
|
||||
bf VMX_ALL_NE,L_foundvmx
|
||||
PADDING
|
||||
vcmpequb. v2,v1,v0
|
||||
bdz L_vmx_end
|
||||
|
||||
lvx v1,r3,r10
|
||||
la r10,16(r10)
|
||||
PADDING
|
||||
bf VMX_ALL_NE,L_foundvmx
|
||||
PADDING
|
||||
vcmpequb. v2,v1,v0
|
||||
bdz L_vmx_end
|
||||
|
||||
lvx v1,r3,r10
|
||||
la r10,16(r10)
|
||||
PADDING
|
||||
bf VMX_ALL_NE,L_foundvmx
|
||||
PADDING
|
||||
vcmpequb. v2,v1,v0
|
||||
bdz L_vmx_end
|
||||
|
||||
lvx v1,r3,r10
|
||||
la r10,16(r10)
|
||||
PADDING
|
||||
bf VMX_ALL_NE,L_foundvmx
|
||||
PADDING
|
||||
vcmpequb. v2,v1,v0
|
||||
bdz L_vmx_end
|
||||
|
||||
lvx v1,r3,r10
|
||||
la r10,16(r10)
|
||||
PADDING
|
||||
bf VMX_ALL_NE,L_foundvmx
|
||||
PADDING
|
||||
vcmpequb. v2,v1,v0
|
||||
bdnz L_vmx_loop
|
||||
|
||||
L_vmx_end:
|
||||
|
||||
add r3,r3,r10
|
||||
bf VMX_ALL_NE,L_foundvmx_1
|
||||
|
||||
mtctr r7
|
||||
|
||||
;skip suffix if nonexistent
|
||||
beq cr0,L_notfound
|
||||
|
||||
.align 4
|
||||
|
||||
L_suffix_loop: ;suffix loop
|
||||
|
||||
lbz r5,0(r3)
|
||||
addi r3,r3,1
|
||||
cmplw cr0,r4,r5
|
||||
beq cr0,L_found
|
||||
bdnz L_suffix_loop
|
||||
|
||||
L_notfound:
|
||||
|
||||
mtspr VRSAVE,r12
|
||||
li r3,0
|
||||
blr
|
||||
|
||||
L_found:
|
||||
|
||||
mtspr VRSAVE,r12
|
||||
la r3,-1(r3)
|
||||
blr
|
||||
|
||||
.align 4
|
||||
|
||||
L_foundvmx:
|
||||
|
||||
;add the
|
||||
subi r3,r3,16
|
||||
add r3,r3,r10
|
||||
|
||||
L_foundvmx_1:
|
||||
|
||||
stvx v2,r1,r11 ;store result vector in the red zone
|
||||
|
||||
mtspr VRSAVE,r12 ;restore VRSAVE
|
||||
|
||||
;make r3 point the "good" quadword
|
||||
subi r3,r3,16
|
||||
|
||||
|
||||
;From here, the result vector from last executed vcmpequb is stored in -16(r1).
|
||||
;
|
||||
;Following part searches the 'good' byte in GPRs (32 or 64 bits GPRS according to cpu)
|
||||
;It loads the vector containing the 'good' byte in GPRs, searches for first non-zero
|
||||
;GPR, counts the leading 0's and divides this by 8.
|
||||
;This gives the 'good' byte's position inside the GPR from MS Byte to LS BYTE
|
||||
;(0 MS, 3 LS).
|
||||
;This cuts the number of checks to do from 16 to 4 (32bit GPRs) or 2 (64bit GPRs).
|
||||
|
||||
#ifdef _PPC970_
|
||||
|
||||
ld r5,-16(r1)
|
||||
ld r6,-8(r1)
|
||||
|
||||
cmpldi cr0,r5,0
|
||||
cntlzd r4,r5
|
||||
|
||||
bne cr0,L_bytefound ;0-7
|
||||
|
||||
addi r3,r3,8 ;8-15
|
||||
cntlzd r4,r6
|
||||
|
||||
#else
|
||||
|
||||
;PPC32
|
||||
|
||||
lwz r5,-16(r1)
|
||||
lwz r6,-12(r1)
|
||||
|
||||
cmplwi cr0,r5,0
|
||||
cntlzw r4,r5
|
||||
|
||||
cmplwi cr1,r6,0
|
||||
|
||||
lwz r7,-8(r1)
|
||||
|
||||
bne cr0,L_bytefound ;0-3
|
||||
|
||||
addi r3,r3,4
|
||||
cntlzw r4,r6
|
||||
|
||||
bne cr1,L_bytefound ;4-7
|
||||
|
||||
cmplwi cr0,r7,0
|
||||
|
||||
addi r3,r3,4
|
||||
cntlzw r4,r7
|
||||
lwz r8,-4(r1)
|
||||
|
||||
bne cr0,L_bytefound ;8-11
|
||||
|
||||
cntlzw r4,r8 ;12-15
|
||||
addi r3,r3,4
|
||||
|
||||
#endif
|
||||
|
||||
L_bytefound:
|
||||
|
||||
PADDING
|
||||
srwi r6,r4,3
|
||||
PADDING
|
||||
add r3,r6,r3
|
||||
blr
|
||||
|
||||
|
||||
;Path that skips VMX
|
||||
|
||||
L_novmx:
|
||||
|
||||
mtctr r9
|
||||
clrlwi r4,r4,24
|
||||
|
||||
L_novmx_loop:
|
||||
bdz L_notfound_1
|
||||
lbz r5,0(r3)
|
||||
addi r3,r3,1
|
||||
cmpw cr0,r4,r5
|
||||
bne cr0,L_novmx_loop
|
||||
|
||||
L_found_1:
|
||||
la r3,-1(r3)
|
||||
blr
|
||||
L_notfound_1:
|
||||
li r3,0
|
||||
blr
|
||||
|
||||
.subsections_via_symbols
|
||||
|
306
nsprpub/lib/libc/src/vmx_strchr.S
Normal file
306
nsprpub/lib/libc/src/vmx_strchr.S
Normal file
@ -0,0 +1,306 @@
|
||||
|
||||
; VMX version of strchr in assembly
|
||||
;
|
||||
; Does not make stack frames or update the stack pointer.
|
||||
;
|
||||
; It uses Darwin's red zone to load/store vector values,
|
||||
; and part of the code assumes memory loads are BE ordered.
|
||||
;
|
||||
; r3: const void *p (input param)
|
||||
; r4: int ch (input param)
|
||||
;
|
||||
; All GPRs used are volatile.
|
||||
|
||||
#define VRSAVE 256
|
||||
#define VMX_ALL_NE 26
|
||||
#define VMX_ALL_EQ 24
|
||||
|
||||
#ifdef _PPC970_
|
||||
|
||||
#warning using 64-bit code
|
||||
|
||||
.machine ppc970
|
||||
|
||||
|
||||
#else
|
||||
|
||||
.machine ppc7400
|
||||
|
||||
#endif
|
||||
|
||||
.text
|
||||
|
||||
.align 4
|
||||
|
||||
.globl _vmx_strchr
|
||||
|
||||
_vmx_strchr:
|
||||
|
||||
;truncate value to 8bit, update CR0
|
||||
clrlwi. r4,r4,24
|
||||
|
||||
neg r6,r3
|
||||
|
||||
mfspr r12,VRSAVE ;get old VRSAVE
|
||||
|
||||
li r11,-16
|
||||
stb r4,-16(r1)
|
||||
|
||||
;prefix length in r6
|
||||
clrlwi r6,r6,28 ;logical AND 0xF
|
||||
|
||||
oris r9,r12,0xFE00 ;VR0-VR6 = 0xFE000000
|
||||
|
||||
mtspr VRSAVE,r9
|
||||
|
||||
vspltisb v0,0
|
||||
vspltisb v6,7
|
||||
|
||||
addi r6,r6,1
|
||||
mtctr r6
|
||||
|
||||
;If we search for NULL, use specific code
|
||||
beq cr0,L_NULL_ONLY
|
||||
|
||||
lvebx v3,r1,r11
|
||||
vspltb v3,v3,0
|
||||
|
||||
;check if there is a prefix first
|
||||
bdz L_prefix_end
|
||||
|
||||
lbz r5,0(r3)
|
||||
|
||||
.align 4
|
||||
|
||||
L_prefix_loop:
|
||||
|
||||
addi r3,r3,1
|
||||
|
||||
cmpw cr0,r4,r5
|
||||
cmpwi cr1,r5,0
|
||||
beq cr0,L_found
|
||||
beq cr1,L_notfound_1
|
||||
lbz r5,0(r3)
|
||||
|
||||
bdnz L_prefix_loop
|
||||
|
||||
;The end-of-string and searchByte vectors
|
||||
;are compared against memory. Both result vectors are
|
||||
;then compared together. If there's no end-of-string
|
||||
;or searchByte found, both result vectors should be
|
||||
;filled with 0's.
|
||||
;
|
||||
;If anything is found there will be an inequality between
|
||||
;the two.
|
||||
;
|
||||
;(Not applicable if the searchByte in question is NULL)
|
||||
|
||||
L_prefix_end:
|
||||
|
||||
lvx v1,0,r3
|
||||
vcmpequb v4,v1,v0
|
||||
vcmpequb v2,v1,v3
|
||||
li r10,16
|
||||
vcmpequb. v5,v2,v4
|
||||
bf 24,L_foundsomething
|
||||
|
||||
.align 4
|
||||
|
||||
L_vmx_loop:
|
||||
|
||||
lvx v1,r3,r10
|
||||
vcmpequb v4,v1,v0
|
||||
vcmpequb v2,v1,v3
|
||||
la r10,16(r10)
|
||||
vcmpequb. v5,v2,v4
|
||||
bf VMX_ALL_EQ,L_foundsomething
|
||||
|
||||
lvx v1,r3,r10
|
||||
vcmpequb v4,v1,v0
|
||||
vcmpequb v2,v1,v3
|
||||
la r10,16(r10)
|
||||
vcmpequb. v5,v2,v4
|
||||
bf VMX_ALL_EQ,L_foundsomething
|
||||
|
||||
lvx v1,r3,r10
|
||||
vcmpequb v4,v1,v0
|
||||
vcmpequb v2,v1,v3
|
||||
la r10,16(r10)
|
||||
vcmpequb. v5,v2,v4
|
||||
bf VMX_ALL_EQ,L_foundsomething
|
||||
|
||||
lvx v1,r3,r10
|
||||
vcmpequb v4,v1,v0
|
||||
vcmpequb v2,v1,v3
|
||||
la r10,16(r10)
|
||||
vcmpequb. v5,v2,v4
|
||||
bf VMX_ALL_EQ,L_foundsomething
|
||||
|
||||
lvx v1,r3,r10
|
||||
vcmpequb v4,v1,v0
|
||||
vcmpequb v2,v1,v3
|
||||
la r10,16(r10)
|
||||
vcmpequb. v5,v2,v4
|
||||
bt VMX_ALL_EQ,L_vmx_loop
|
||||
|
||||
L_foundsomething:
|
||||
|
||||
;end-of-string bytes will have a 0x07 pattern
|
||||
;in case of equality instead of 0xFF.
|
||||
vand v4,v4,v6
|
||||
|
||||
;OR the two result vectors together.
|
||||
vor v2,v2,v4
|
||||
|
||||
L_found_NULL:
|
||||
|
||||
stvx v2,r1,r11
|
||||
add r3,r3,r10
|
||||
subi r3,r3,16
|
||||
mtspr VRSAVE,r12
|
||||
|
||||
;both result vectors OR'd together are stored at -16(r1).
|
||||
;load the vector in GPRs and stop at the first nonzero GPR.
|
||||
;Count leading 0'; If the result is odd it's end-of-string.
|
||||
;Otherwise it's the "good" byte.
|
||||
;
|
||||
;If we searched for NULL then it'll always be the good byte.
|
||||
|
||||
#ifdef _PPC970_
|
||||
|
||||
ld r5,-16(r1)
|
||||
ld r6,-8(r1)
|
||||
|
||||
cmpldi cr0,r5,0
|
||||
cntlzd r4,r5
|
||||
|
||||
bne cr0,L_bytefound ;0-7
|
||||
|
||||
addi r3,r3,8 ;8-15
|
||||
cntlzd r4,r6
|
||||
|
||||
#else
|
||||
|
||||
;PPC32
|
||||
|
||||
lwz r5,-16(r1)
|
||||
lwz r6,-12(r1)
|
||||
|
||||
cmplwi cr0,r5,0
|
||||
cntlzw r4,r5
|
||||
|
||||
cmplwi cr1,r6,0
|
||||
|
||||
lwz r7,-8(r1)
|
||||
|
||||
bne cr0,L_bytefound ;0-3
|
||||
|
||||
addi r3,r3,4
|
||||
cntlzw r4,r6
|
||||
|
||||
bne cr1,L_bytefound ;4-7
|
||||
|
||||
cmplwi cr0,r7,0
|
||||
|
||||
addi r3,r3,4
|
||||
cntlzw r4,r7
|
||||
lwz r8,-4(r1)
|
||||
|
||||
bne cr0,L_bytefound ;8-11
|
||||
|
||||
cntlzw r4,r8 ;12-15
|
||||
addi r3,r3,4
|
||||
|
||||
#endif
|
||||
|
||||
L_bytefound:
|
||||
|
||||
clrlwi r7,r4,31 ;look if it's an odd number
|
||||
srwi r6,r4,3
|
||||
cmplwi cr0,r7,0
|
||||
nop
|
||||
|
||||
bne cr0,L_notfound
|
||||
|
||||
add r3,r6,r3 ;update r3 to point to the good byte
|
||||
blr
|
||||
|
||||
.align 4
|
||||
|
||||
|
||||
|
||||
;Code to execute when searching for NULL
|
||||
|
||||
L_NULL_ONLY:
|
||||
|
||||
bdz L_NULLVMX
|
||||
lbz r5,0(r3)
|
||||
addi r3,r3,1
|
||||
cmplwi cr0,r5,0
|
||||
|
||||
bne cr0,L_NULL_ONLY ;prefix loop
|
||||
|
||||
la r3,-1(r3)
|
||||
mtspr VRSAVE,r12
|
||||
|
||||
blr
|
||||
|
||||
|
||||
L_NULLVMX:
|
||||
|
||||
lvx v1,0,r3
|
||||
vcmpequb. v2,v1,v0
|
||||
li r10,16
|
||||
bc 4,26,L_found_NULL
|
||||
|
||||
L_NULL_LOOP:
|
||||
|
||||
lvx v1,r3,r10
|
||||
vcmpequb. v2,v1,v0
|
||||
la r10,16(r10)
|
||||
bc 4,26,L_found_NULL
|
||||
|
||||
lvx v1,r3,r10
|
||||
vcmpequb. v2,v1,v0
|
||||
la r10,16(r10)
|
||||
bc 4,26,L_found_NULL
|
||||
|
||||
lvx v1,r3,r10
|
||||
vcmpequb. v2,v1,v0
|
||||
la r10,16(r10)
|
||||
bc 4,26,L_found_NULL
|
||||
|
||||
lvx v1,r3,r10
|
||||
vcmpequb. v2,v1,v0
|
||||
la r10,16(r10)
|
||||
bc 4,26,L_found_NULL
|
||||
|
||||
lvx v1,r3,r10
|
||||
vcmpequb. v2,v1,v0
|
||||
la r10,16(r10)
|
||||
bc 4,26,L_found_NULL
|
||||
|
||||
lvx v1,r3,r10
|
||||
vcmpequb. v2,v1,v0
|
||||
la r10,16(r10)
|
||||
bc+ 12,26,L_NULL_LOOP
|
||||
|
||||
b L_found_NULL
|
||||
|
||||
L_notfound_1:
|
||||
|
||||
mtspr VRSAVE,r12
|
||||
|
||||
L_notfound:
|
||||
|
||||
li r3,0
|
||||
blr
|
||||
|
||||
L_found:
|
||||
|
||||
mtspr VRSAVE,r12
|
||||
la r3,-1(r3)
|
||||
blr
|
||||
|
||||
.subsections_via_symbols
|
||||
|
Loading…
Reference in New Issue
Block a user