tenfourfox/intl/uconv/nsNCRFallbackEncoderWrapper...

153 lines
5.1 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsNCRFallbackEncoderWrapper.h"
#include "mozilla/dom/EncodingUtils.h"
nsNCRFallbackEncoderWrapper::nsNCRFallbackEncoderWrapper(const nsACString& aEncoding)
: mEncoder(mozilla::dom::EncodingUtils::EncoderForEncoding(aEncoding))
{
}
nsNCRFallbackEncoderWrapper::~nsNCRFallbackEncoderWrapper()
{
}
bool
nsNCRFallbackEncoderWrapper::WriteNCR(nsACString& aBytes,
uint32_t& aDstWritten,
int32_t aUnmappable)
{
// To avoid potentially shrinking aBytes and then growing it back, use
// another string for number formatting.
nsAutoCString ncr("&#");
ncr.AppendInt(aUnmappable);
ncr.Append(';');
uint32_t ncrLen = ncr.Length();
uint32_t needed = aDstWritten + ncrLen;
if (needed > INT32_MAX) {
return false;
}
if (needed > aBytes.Length() && !aBytes.SetLength(needed,
mozilla::fallible_t())) {
return false;
}
memcpy(aBytes.BeginWriting() + aDstWritten,
ncr.BeginReading(),
ncrLen);
aDstWritten += ncrLen;
return true;
}
bool
nsNCRFallbackEncoderWrapper::Encode(const nsAString& aUtf16,
nsACString& aBytes)
{
// nsIUnicodeEncoder uses int32_t for sizes :-(
if (aUtf16.Length() > INT32_MAX) {
return false;
}
const char16_t* src = aUtf16.BeginReading();
const char16_t* srcEnd = aUtf16.EndReading();
uint32_t dstWritten = 0;
for (;;) {
int32_t srcLen = srcEnd - src;
int32_t dstLen = 0;
nsresult rv = mEncoder->GetMaxLength(src, srcLen, &dstLen);
if (NS_FAILED(rv)) {
return false;
}
uint32_t needed = dstWritten + dstLen;
if (needed > INT32_MAX) {
return false;
}
// Behind the scenes SetLength() makes the underlying allocation not have
// slop, so we don't need to round up here.
if (needed > aBytes.Length() && !aBytes.SetLength(needed,
mozilla::fallible_t())) {
return false;
}
// We need to re-obtain the destination pointer on every iteration, because
// SetLength() invalidates it.
char* dst = aBytes.BeginWriting() + dstWritten;
dstLen = aBytes.Length() - dstWritten;
mEncoder->Reset();
rv = mEncoder->Convert(src, &srcLen, dst, &dstLen);
// Update state tracking
src += srcLen;
dstWritten += dstLen;
if (rv == NS_OK_UENC_MOREOUTPUT) {
MOZ_ASSERT_UNREACHABLE("GetMaxLength must have returned a bogus length.");
return false;
}
if (rv == NS_ERROR_UENC_NOMAPPING) {
int32_t unmappable;
// The unmappable code unit or the first half of an unmappable surrogate
// pair is consumed by the encoder.
MOZ_ASSERT(srcLen > 0, "Encoder should have consumed some input.");
char16_t codeUnit = src[-1];
// Let's see if it is a surrogate
size_t highBits = (codeUnit & 0xFC00);
if (highBits == 0xD800) {
// high surrogate
// Let's see if we actually have a surrogate pair.
char16_t next;
if (src < srcEnd && NS_IS_LOW_SURROGATE((next = *src))) {
src++; // consume the low surrogate
unmappable = SURROGATE_TO_UCS4(codeUnit, next);
} else {
// unpaired surrogate.
unmappable = 0xFFFD;
}
} else if (highBits == 0xDC00) {
// low surrogate
// This must be an unpaired surrogate.
unmappable = 0xFFFD;
} else {
// not a surrogate
unmappable = codeUnit;
}
// If we are encoding to ISO-2022-JP, we need to let the encoder to
// generate a transition to the ASCII state if not already there.
dst = aBytes.BeginWriting() + dstWritten;
dstLen = aBytes.Length() - dstWritten;
rv = mEncoder->Finish(dst, &dstLen);
dstWritten += dstLen;
if (rv != NS_OK) {
// Failures should be impossible if GetMaxLength works. Big5 is the
// only case where Finish() may return NS_ERROR_UENC_NOMAPPING but
// that should never happen right after Convert() has returned it.
MOZ_ASSERT_UNREACHABLE("Broken encoder.");
return false;
}
if (!WriteNCR(aBytes, dstWritten, unmappable)) {
return false;
}
continue;
}
if (!(rv == NS_OK || rv == NS_OK_UENC_MOREINPUT)) {
return false;
}
MOZ_ASSERT(src == srcEnd, "Converter did not consume all input.");
dst = aBytes.BeginWriting() + dstWritten;
dstLen = aBytes.Length() - dstWritten;
rv = mEncoder->Finish(dst, &dstLen);
dstWritten += dstLen;
if (rv == NS_OK_UENC_MOREOUTPUT) {
MOZ_ASSERT_UNREACHABLE("GetMaxLength must have returned a bogus length.");
return false;
}
if (rv == NS_ERROR_UENC_NOMAPPING) {
// Big5
if (!WriteNCR(aBytes, dstWritten, 0xFFFD)) {
return false;
}
}
return aBytes.SetLength(dstWritten, mozilla::fallible_t());
}
}