mirror of
https://github.com/classilla/tenfourfox.git
synced 2025-01-12 20:30:18 +00:00
97 lines
2.7 KiB
C++
97 lines
2.7 KiB
C++
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
// This file should only be compiled if you're on x86 or x86_64. Additionally,
|
|
// you'll need to compile this file with -msse2 if you're using gcc.
|
|
|
|
#include <emmintrin.h>
|
|
#include "nscore.h"
|
|
|
|
namespace mozilla {
|
|
namespace SSE2 {
|
|
|
|
void
|
|
Convert_ascii_run(const char *&src,
|
|
char16_t *&dst,
|
|
int32_t len)
|
|
{
|
|
if (len > 15) {
|
|
__m128i in, out1, out2;
|
|
__m128d *outp1, *outp2;
|
|
__m128i zeroes;
|
|
uint32_t offset;
|
|
|
|
// align input to 16 bytes
|
|
while ((NS_PTR_TO_UINT32(src) & 15) && len > 0) {
|
|
if (*src & 0x80U)
|
|
return;
|
|
*dst++ = (char16_t) *src++;
|
|
len--;
|
|
}
|
|
|
|
zeroes = _mm_setzero_si128();
|
|
|
|
offset = NS_PTR_TO_UINT32(dst) & 15;
|
|
|
|
// Note: all these inner loops have to break, not return; we need
|
|
// to let the single-char loop below catch any leftover
|
|
// byte-at-a-time ASCII chars, since this function must consume
|
|
// all available ASCII chars before it returns
|
|
|
|
if (offset == 0) {
|
|
while (len > 15) {
|
|
in = _mm_load_si128((__m128i *) src);
|
|
if (_mm_movemask_epi8(in))
|
|
break;
|
|
out1 = _mm_unpacklo_epi8(in, zeroes);
|
|
out2 = _mm_unpackhi_epi8(in, zeroes);
|
|
_mm_stream_si128((__m128i *) dst, out1);
|
|
_mm_stream_si128((__m128i *) (dst + 8), out2);
|
|
dst += 16;
|
|
src += 16;
|
|
len -= 16;
|
|
}
|
|
} else if (offset == 8) {
|
|
outp1 = (__m128d *) &out1;
|
|
outp2 = (__m128d *) &out2;
|
|
while (len > 15) {
|
|
in = _mm_load_si128((__m128i *) src);
|
|
if (_mm_movemask_epi8(in))
|
|
break;
|
|
out1 = _mm_unpacklo_epi8(in, zeroes);
|
|
out2 = _mm_unpackhi_epi8(in, zeroes);
|
|
_mm_storel_epi64((__m128i *) dst, out1);
|
|
_mm_storel_epi64((__m128i *) (dst + 8), out2);
|
|
_mm_storeh_pd((double *) (dst + 4), *outp1);
|
|
_mm_storeh_pd((double *) (dst + 12), *outp2);
|
|
src += 16;
|
|
dst += 16;
|
|
len -= 16;
|
|
}
|
|
} else {
|
|
while (len > 15) {
|
|
in = _mm_load_si128((__m128i *) src);
|
|
if (_mm_movemask_epi8(in))
|
|
break;
|
|
out1 = _mm_unpacklo_epi8(in, zeroes);
|
|
out2 = _mm_unpackhi_epi8(in, zeroes);
|
|
_mm_storeu_si128((__m128i *) dst, out1);
|
|
_mm_storeu_si128((__m128i *) (dst + 8), out2);
|
|
src += 16;
|
|
dst += 16;
|
|
len -= 16;
|
|
}
|
|
}
|
|
}
|
|
|
|
// finish off a byte at a time
|
|
|
|
while (len-- > 0 && (*src & 0x80U) == 0) {
|
|
*dst++ = (char16_t) *src++;
|
|
}
|
|
}
|
|
|
|
} // namespace SSE2
|
|
} // namespace mozilla
|