tenfourfox/intl/uconv/nsUTF8ToUnicodeSSE2.cpp

97 lines
2.7 KiB
C++

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// This file should only be compiled if you're on x86 or x86_64. Additionally,
// you'll need to compile this file with -msse2 if you're using gcc.
#include <emmintrin.h>
#include "nscore.h"
namespace mozilla {
namespace SSE2 {
void
Convert_ascii_run(const char *&src,
char16_t *&dst,
int32_t len)
{
if (len > 15) {
__m128i in, out1, out2;
__m128d *outp1, *outp2;
__m128i zeroes;
uint32_t offset;
// align input to 16 bytes
while ((NS_PTR_TO_UINT32(src) & 15) && len > 0) {
if (*src & 0x80U)
return;
*dst++ = (char16_t) *src++;
len--;
}
zeroes = _mm_setzero_si128();
offset = NS_PTR_TO_UINT32(dst) & 15;
// Note: all these inner loops have to break, not return; we need
// to let the single-char loop below catch any leftover
// byte-at-a-time ASCII chars, since this function must consume
// all available ASCII chars before it returns
if (offset == 0) {
while (len > 15) {
in = _mm_load_si128((__m128i *) src);
if (_mm_movemask_epi8(in))
break;
out1 = _mm_unpacklo_epi8(in, zeroes);
out2 = _mm_unpackhi_epi8(in, zeroes);
_mm_stream_si128((__m128i *) dst, out1);
_mm_stream_si128((__m128i *) (dst + 8), out2);
dst += 16;
src += 16;
len -= 16;
}
} else if (offset == 8) {
outp1 = (__m128d *) &out1;
outp2 = (__m128d *) &out2;
while (len > 15) {
in = _mm_load_si128((__m128i *) src);
if (_mm_movemask_epi8(in))
break;
out1 = _mm_unpacklo_epi8(in, zeroes);
out2 = _mm_unpackhi_epi8(in, zeroes);
_mm_storel_epi64((__m128i *) dst, out1);
_mm_storel_epi64((__m128i *) (dst + 8), out2);
_mm_storeh_pd((double *) (dst + 4), *outp1);
_mm_storeh_pd((double *) (dst + 12), *outp2);
src += 16;
dst += 16;
len -= 16;
}
} else {
while (len > 15) {
in = _mm_load_si128((__m128i *) src);
if (_mm_movemask_epi8(in))
break;
out1 = _mm_unpacklo_epi8(in, zeroes);
out2 = _mm_unpackhi_epi8(in, zeroes);
_mm_storeu_si128((__m128i *) dst, out1);
_mm_storeu_si128((__m128i *) (dst + 8), out2);
src += 16;
dst += 16;
len -= 16;
}
}
}
// finish off a byte at a time
while (len-- > 0 && (*src & 0x80U) == 0) {
*dst++ = (char16_t) *src++;
}
}
} // namespace SSE2
} // namespace mozilla