#406: improvements to vmx_memchr(), M1371215

This commit is contained in:
Cameron Kaiser 2017-06-27 17:47:01 -07:00
parent ca165994c4
commit e5108cf1ca
2 changed files with 43 additions and 66 deletions

View File

@ -1167,56 +1167,15 @@ FirstCharMatcher8bit(const char* text, uint32_t n, const char pat)
#endif
}
static const char16_t*
FirstCharMatcher16bit(const char16_t* text, uint32_t n, const char16_t pat)
{
#if defined(XP_DARWIN) || defined(XP_WIN)
/*
* Performance of memchr is horrible in OSX. Windows is better,
* but it is still better to use UnrolledMatcher.
*/
return FirstCharMatcherUnrolled<char16_t, char16_t>(text, n, pat);
#else
/*
* For linux the best performance is obtained by slightly hacking memchr.
* memchr works only on 8bit char but char16_t is 16bit. So we treat char16_t
* in blocks of 8bit and use memchr.
*/
const char* text8 = (const char*) text;
const char* pat8 = reinterpret_cast<const char*>(&pat);
MOZ_ASSERT(n < UINT32_MAX/2);
n *= 2;
uint32_t i = 0;
while (i < n) {
/* Find the first 8 bits of 16bit character in text. */
const char* pos8 = FirstCharMatcher8bit(text8 + i, n - i, pat8[0]);
if (pos8 == nullptr)
return nullptr;
i = static_cast<uint32_t>(pos8 - text8);
/* Incorrect match if it matches the last 8 bits of 16bit char. */
if (i % 2 != 0) {
i++;
continue;
}
/* Test if last 8 bits match last 8 bits of 16bit char. */
if (pat8[1] == text8[i + 1])
return (text + (i/2));
i += 2;
}
return nullptr;
#endif
}
template <class InnerMatch, typename TextChar, typename PatChar>
static int
Matcher(const TextChar* text, uint32_t textlen, const PatChar* pat, uint32_t patlen)
{
MOZ_ASSERT(patlen > 0);
if (sizeof(TextChar) == 1 && sizeof(PatChar) > 1 && pat[0] > 0xff)
return -1;
const typename InnerMatch::Extent extent = InnerMatch::computeExtent(pat, patlen);
uint32_t i = 0;
@ -1224,12 +1183,12 @@ Matcher(const TextChar* text, uint32_t textlen, const PatChar* pat, uint32_t pat
while (i < n) {
const TextChar* pos;
if (sizeof(TextChar) == 2 && sizeof(PatChar) == 2)
pos = (TextChar*) FirstCharMatcher16bit((char16_t*)text + i, n - i, pat[0]);
else if (sizeof(TextChar) == 1 && sizeof(PatChar) == 1)
if (sizeof(TextChar) == 1) {
MOZ_ASSERT(pat[0] <= 0xff);
pos = (TextChar*) FirstCharMatcher8bit((char*) text + i, n - i, pat[0]);
else
pos = (TextChar*) FirstCharMatcherUnrolled<TextChar, PatChar>(text + i, n - i, pat[0]);
} else {
pos = FirstCharMatcherUnrolled(text + i, n - i, char16_t(pat[0]));
}
if (pos == nullptr)
return -1;
@ -1294,9 +1253,10 @@ StringMatch(const TextChar* text, uint32_t textLen, const PatChar* pat, uint32_t
* use memcmp if one of the strings is TwoByte and the other is Latin1.
*
* FIXME: Linux memcmp performance is sad and the manual loop is faster.
* memcmp() is also pretty bad on PPC OSX, so we just use our fast memchr.
*/
return
#if !defined(__linux__)
#if (0) // !defined(__linux__)
(patLen > 128 && IsSame<TextChar, PatChar>::value)
? Matcher<MemCmp<TextChar, PatChar>, TextChar, PatChar>(text, textLen, pat, patLen)
:

View File

@ -1,12 +1,13 @@
#include <stdio.h>
#include "plvmx.h"
void *vmx_memchr(const void *b, int c, size_t length) {
/* VMX/AltiVec specific libc functions for TenFourFox. */
void *vmx_memchr(const void *b, int c, size_t length) {
/* From:
https://github.com/ridiculousfish/HexFiend/blob/4d5bcee5715f5f288649f7471d1da5bd06376f46/framework/sources/HFFastMemchr.m
with some optimizations */
with some optimizations and fixes. */
const unsigned char *haystack = (const unsigned char *)b;
unsigned char needle = (unsigned char)c;
@ -16,7 +17,7 @@ https://github.com/ridiculousfish/HexFiend/blob/4d5bcee5715f5f288649f7471d1da5bd
// It is possible for altivecLength to be < 0 for short strings.
int altivecLength = length - prefixLength - suffixLength;
if (altivecLength < 16) {
if (altivecLength < 32) { // not worth the setup
while (length--) {
if (*haystack == needle) return (void *)haystack;
haystack++;
@ -46,19 +47,35 @@ https://github.com/ridiculousfish/HexFiend/blob/4d5bcee5715f5f288649f7471d1da5bd
foundResult:
;
/* some byte has the result - look in groups of 4 to find which it is */
unsigned numWords = 4;
unsigned int val;
while (numWords--) {
val = *(unsigned int*)haystack;
if (((val >> 24) & 0xFF) == needle) return (void *)haystack;
if (((val >> 16) & 0xFF) == needle) return (void *)(1 + haystack);
if (((val >> 8) & 0xFF) == needle) return (void *)(2 + haystack);
if ((val & 0xFF) == needle) return (void *)(3 + haystack);
haystack += 4;
}
/* should never get here */
// Some byte has the result; look in groups of 4 to find which one.
// Unroll the loop.
val = *(unsigned int*)haystack;
if (((val >> 24) & 0xFF) == needle) return (void *)haystack;
if (((val >> 16) & 0xFF) == needle) return (void *)(1 + haystack);
if (((val >> 8) & 0xFF) == needle) return (void *)(2 + haystack);
if ((val & 0xFF) == needle) return (void *)(3 + haystack);
haystack += 4;
val = *(unsigned int*)haystack;
if (((val >> 24) & 0xFF) == needle) return (void *)haystack;
if (((val >> 16) & 0xFF) == needle) return (void *)(1 + haystack);
if (((val >> 8) & 0xFF) == needle) return (void *)(2 + haystack);
if ((val & 0xFF) == needle) return (void *)(3 + haystack);
haystack += 4;
val = *(unsigned int*)haystack;
if (((val >> 24) & 0xFF) == needle) return (void *)haystack;
if (((val >> 16) & 0xFF) == needle) return (void *)(1 + haystack);
if (((val >> 8) & 0xFF) == needle) return (void *)(2 + haystack);
if ((val & 0xFF) == needle) return (void *)(3 + haystack);
haystack += 4;
val = *(unsigned int*)haystack;
if (((val >> 24) & 0xFF) == needle) return (void *)haystack;
if (((val >> 16) & 0xFF) == needle) return (void *)(1 + haystack);
if (((val >> 8) & 0xFF) == needle) return (void *)(2 + haystack);
if ((val & 0xFF) == needle) return (void *)(3 + haystack);
// unreachable
fprintf(stderr, "failed vmx_memchr()\n");
return NULL;
}