/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "nsUnicodeToBIG5.h" NS_IMPL_ADDREF(nsUnicodeToBIG5) NS_IMPL_RELEASE(nsUnicodeToBIG5) NS_IMPL_QUERY_INTERFACE(nsUnicodeToBIG5, nsIUnicodeEncoder) nsUnicodeToBIG5::nsUnicodeToBIG5() : mUtf16Lead(0) , mPendingTrail(0) , mSignal(true) // as in nsEncoderSupport { } NS_IMETHODIMP nsUnicodeToBIG5::Convert(const char16_t* aSrc, int32_t* aSrcLength, char* aDest, int32_t * aDestLength) { const char16_t* in = aSrc; const char16_t* inEnd = in + *aSrcLength; uint8_t* out = reinterpret_cast(aDest); uint8_t* outEnd = out + *aDestLength; MOZ_ASSERT(!(mPendingTrail && mUtf16Lead), "Can't have both pending output and pending input."); if (mPendingTrail) { if (out == outEnd) { *aSrcLength = 0; *aDestLength = 0; return NS_OK_UENC_MOREOUTPUT; } *out++ = mPendingTrail; mPendingTrail = 0; } for (;;) { if (in == inEnd) { *aSrcLength = in - aSrc; *aDestLength = out - reinterpret_cast(aDest); return mUtf16Lead ? NS_OK_UENC_MOREINPUT : NS_OK; } if (out == outEnd) { *aSrcLength = in - aSrc; *aDestLength = out - reinterpret_cast(aDest); return NS_OK_UENC_MOREOUTPUT; } bool isAstral; // true means Plane 2, false means BMP char16_t lowBits; // The low 16 bits of the code point char16_t codeUnit = *in++; size_t highBits = (codeUnit & 0xFC00); if (highBits == 0xD800) { // high surrogate if (mUtf16Lead) { // High surrogate follows another high surrogate. The // *previous* code unit is in error. if (mSignal) { mUtf16Lead = 0; // NOTE: Encode API differs from decode API! --in; *aSrcLength = in - aSrc; *aDestLength = out - reinterpret_cast(aDest); return NS_ERROR_UENC_NOMAPPING; } *out++ = '?'; } mUtf16Lead = codeUnit; continue; } if (highBits == 0xDC00) { // low surrogate if (!mUtf16Lead) { // Got low surrogate without a previous high surrogate if (mSignal) { // NOTE: Encode API differs from decode API! *aSrcLength = in - aSrc; *aDestLength = out - reinterpret_cast(aDest); return NS_ERROR_UENC_NOMAPPING; } *out++ = '?'; continue; } size_t codePoint = (mUtf16Lead << 10) + codeUnit - (((0xD800 << 10) - 0x10000) + 0xDC00); mUtf16Lead = 0; // Plane 2 is the only astral plane that has potentially // Big5-encodable characters. if ((0xFF0000 & codePoint) != 0x20000) { if (mSignal) { // NOTE: Encode API differs from decode API! // nsSaveAsCharset wants us to back up on step in the case of a // surrogate pair. --in; *aSrcLength = in - aSrc; *aDestLength = out - reinterpret_cast(aDest); return NS_ERROR_UENC_NOMAPPING; } *out++ = '?'; continue; } isAstral = true; lowBits = (char16_t)(codePoint & 0xFFFF); } else { // not a surrogate if (mUtf16Lead) { // Non-surrogate follows a high surrogate. The *previous* // code unit is in error. mUtf16Lead = 0; if (mSignal) { // NOTE: Encode API differs from decode API! --in; *aSrcLength = in - aSrc; *aDestLength = out - reinterpret_cast(aDest); return NS_ERROR_UENC_NOMAPPING; } *out++ = '?'; // Let's unconsume this code unit and reloop in order to // re-check if the output buffer still has space. --in; continue; } isAstral = false; lowBits = codeUnit; } // isAstral now tells us if we have a Plane 2 or a BMP character. // lowBits tells us the low 16 bits. // After all the above setup to deal with UTF-16, we are now // finally ready to follow the spec. if (!isAstral && lowBits <= 0x7F) { *out++ = (uint8_t)lowBits; continue; } size_t pointer = nsBIG5Data::FindPointer(lowBits, isAstral); if (!pointer) { if (mSignal) { // NOTE: Encode API differs from decode API! if (isAstral) { // nsSaveAsCharset wants us to back up on step in the case of a // surrogate pair. --in; } *aSrcLength = in - aSrc; *aDestLength = out - reinterpret_cast(aDest); return NS_ERROR_UENC_NOMAPPING; } *out++ = '?'; continue; } uint8_t lead = (uint8_t)(pointer / 157 + 0x81); uint8_t trail = (uint8_t)(pointer % 157); if (trail < 0x3F) { trail += 0x40; } else { trail += 0x62; } *out++ = lead; if (out == outEnd) { mPendingTrail = trail; *aSrcLength = in - aSrc; *aDestLength = out - reinterpret_cast(aDest); return NS_OK_UENC_MOREOUTPUT; } *out++ = trail; continue; } } NS_IMETHODIMP nsUnicodeToBIG5::Finish(char* aDest, int32_t* aDestLength) { MOZ_ASSERT(!(mPendingTrail && mUtf16Lead), "Can't have both pending output and pending input."); uint8_t* out = reinterpret_cast(aDest); if (mPendingTrail) { if (*aDestLength < 1) { *aDestLength = 0; return NS_OK_UENC_MOREOUTPUT; } *out = mPendingTrail; mPendingTrail = 0; *aDestLength = 1; return NS_OK; } if (mUtf16Lead) { if (*aDestLength < 1) { *aDestLength = 0; return NS_OK_UENC_MOREOUTPUT; } mUtf16Lead = 0; if (mSignal) { *aDestLength = 0; return NS_ERROR_UENC_NOMAPPING; } *out = '?'; *aDestLength = 1; return NS_OK; } *aDestLength = 0; return NS_OK; } NS_IMETHODIMP nsUnicodeToBIG5::GetMaxLength(const char16_t* aSrc, int32_t aSrcLength, int32_t* aDestLength) { mozilla::CheckedInt32 length = aSrcLength; length *= 2; if (mPendingTrail) { length += 1; } // If the lead ends up being paired, the bytes produced // are already included above. // If not, it produces a single '?'. if (mUtf16Lead) { length += 1; } if (!length.isValid()) { return NS_ERROR_OUT_OF_MEMORY; } *aDestLength = length.value(); return NS_OK; } NS_IMETHODIMP nsUnicodeToBIG5::Reset() { mUtf16Lead = 0; mPendingTrail = 0; return NS_OK; } NS_IMETHODIMP nsUnicodeToBIG5::SetOutputErrorBehavior(int32_t aBehavior, nsIUnicharEncoder* aEncoder, char16_t aChar) { switch (aBehavior) { case kOnError_Signal: mSignal = true; break; case kOnError_Replace: mSignal = false; MOZ_ASSERT(aChar == '?', "Unsupported replacement."); break; case kOnError_CallBack: MOZ_ASSERT_UNREACHABLE("kOnError_CallBack is supposed to be unused."); break; default: MOZ_ASSERT_UNREACHABLE("Non-existent enum item."); break; } return NS_OK; }