tenfourfox/intl/uconv/nsScriptableUConv.cpp

333 lines
9.5 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsString.h"
#include "nsIScriptableUConv.h"
#include "nsScriptableUConv.h"
#include "nsIStringStream.h"
#include "nsComponentManagerUtils.h"
#include "nsIUnicodeDecoder.h"
#include "nsIUnicodeEncoder.h"
#include "mozilla/dom/EncodingUtils.h"
#include "mozilla/CheckedInt.h"
using mozilla::dom::EncodingUtils;
/* Implementation file */
NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter)
nsScriptableUnicodeConverter::nsScriptableUnicodeConverter()
: mIsInternal(false)
{
}
nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter()
{
}
nsresult
nsScriptableUnicodeConverter::ConvertFromUnicodeWithLength(const nsAString& aSrc,
int32_t* aOutLen,
char **_retval)
{
if (!mEncoder)
return NS_ERROR_FAILURE;
nsresult rv = NS_OK;
int32_t inLength = aSrc.Length();
const nsAFlatString& flatSrc = PromiseFlatString(aSrc);
rv = mEncoder->GetMaxLength(flatSrc.get(), inLength, aOutLen);
if (NS_SUCCEEDED(rv)) {
mozilla::CheckedInt<int32_t> needed(*aOutLen);
needed += 1;
if (MOZ_UNLIKELY(!needed.isValid())) {
return NS_ERROR_OUT_OF_MEMORY;
}
*_retval = (char*)malloc(needed.value());
if (MOZ_UNLIKELY(!*_retval))
return NS_ERROR_OUT_OF_MEMORY;
rv = mEncoder->Convert(flatSrc.get(), &inLength, *_retval, aOutLen);
if (NS_SUCCEEDED(rv))
{
(*_retval)[*aOutLen] = '\0';
return NS_OK;
}
free(*_retval);
}
*_retval = nullptr;
return NS_ERROR_FAILURE;
}
NS_IMETHODIMP
nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc,
nsACString& _retval)
{
int32_t len;
char* str;
nsresult rv = ConvertFromUnicodeWithLength(aSrc, &len, &str);
if (NS_SUCCEEDED(rv)) {
// No Adopt on nsACString :(
if (!_retval.Assign(str, len, mozilla::fallible)) {
rv = NS_ERROR_OUT_OF_MEMORY;
}
free(str);
}
return rv;
}
nsresult
nsScriptableUnicodeConverter::FinishWithLength(char **_retval, int32_t* aLength)
{
if (!mEncoder)
return NS_ERROR_FAILURE;
int32_t finLength = 32;
*_retval = (char *)malloc(finLength);
if (!*_retval)
return NS_ERROR_OUT_OF_MEMORY;
nsresult rv = mEncoder->Finish(*_retval, &finLength);
if (NS_SUCCEEDED(rv))
*aLength = finLength;
else
free(*_retval);
return rv;
}
NS_IMETHODIMP
nsScriptableUnicodeConverter::Finish(nsACString& _retval)
{
// The documentation for this method says it should be called after
// ConvertFromUnicode(). However, our own tests called it after
// convertFromByteArray(), i.e. when *decoding*.
// Assuming that there exists extensions that similarly call
// this at the wrong time, let's deal. In general, it is a design
// error for this class to handle conversions in both directions.
if (!mEncoder) {
_retval.Truncate();
return NS_OK;
}
int32_t len;
char* str;
nsresult rv = FinishWithLength(&str, &len);
if (NS_SUCCEEDED(rv)) {
// No Adopt on nsACString :(
if (!_retval.Assign(str, len, mozilla::fallible)) {
rv = NS_ERROR_OUT_OF_MEMORY;
}
free(str);
}
return rv;
}
NS_IMETHODIMP
nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc, nsAString& _retval)
{
nsACString::const_iterator i;
aSrc.BeginReading(i);
return ConvertFromByteArray(reinterpret_cast<const uint8_t*>(i.get()),
aSrc.Length(),
_retval);
}
NS_IMETHODIMP
nsScriptableUnicodeConverter::ConvertFromByteArray(const uint8_t* aData,
uint32_t aCount,
nsAString& _retval)
{
if (!mDecoder)
return NS_ERROR_FAILURE;
nsresult rv = NS_OK;
int32_t inLength = aCount;
int32_t outLength;
rv = mDecoder->GetMaxLength(reinterpret_cast<const char*>(aData),
inLength, &outLength);
if (NS_SUCCEEDED(rv))
{
mozilla::CheckedInt<nsACString::size_type> needed(outLength);
needed += 1;
needed *= sizeof(char16_t);
if (MOZ_UNLIKELY(!needed.isValid())) {
return NS_ERROR_OUT_OF_MEMORY;
}
char16_t* buf = (char16_t*)malloc(needed.value());
if (MOZ_UNLIKELY(!buf))
return NS_ERROR_OUT_OF_MEMORY;
rv = mDecoder->Convert(reinterpret_cast<const char*>(aData),
&inLength, buf, &outLength);
if (NS_SUCCEEDED(rv))
{
buf[outLength] = 0;
if (!_retval.Assign(buf, outLength, mozilla::fallible)) {
rv = NS_ERROR_OUT_OF_MEMORY;
}
}
free(buf);
return rv;
}
return NS_ERROR_FAILURE;
}
NS_IMETHODIMP
nsScriptableUnicodeConverter::ConvertToByteArray(const nsAString& aString,
uint32_t* aLen,
uint8_t** _aData)
{
char* data;
int32_t len;
nsresult rv = ConvertFromUnicodeWithLength(aString, &len, &data);
if (NS_FAILED(rv))
return rv;
nsXPIDLCString str;
str.Adopt(data, len); // NOTE: This uses the XPIDLCString as a byte array
rv = FinishWithLength(&data, &len);
if (NS_FAILED(rv))
return rv;
str.Append(data, len);
free(data);
// NOTE: this being a byte array, it needs no null termination
*_aData = reinterpret_cast<uint8_t*>(malloc(str.Length()));
if (!*_aData)
return NS_ERROR_OUT_OF_MEMORY;
memcpy(*_aData, str.get(), str.Length());
*aLen = str.Length();
return NS_OK;
}
NS_IMETHODIMP
nsScriptableUnicodeConverter::ConvertToInputStream(const nsAString& aString,
nsIInputStream** _retval)
{
nsresult rv;
nsCOMPtr<nsIStringInputStream> inputStream =
do_CreateInstance("@mozilla.org/io/string-input-stream;1", &rv);
if (NS_FAILED(rv))
return rv;
uint8_t* data;
uint32_t dataLen;
rv = ConvertToByteArray(aString, &dataLen, &data);
if (NS_FAILED(rv))
return rv;
rv = inputStream->AdoptData(reinterpret_cast<char*>(data), dataLen);
if (NS_FAILED(rv)) {
free(data);
return rv;
}
NS_ADDREF(*_retval = inputStream);
return rv;
}
NS_IMETHODIMP
nsScriptableUnicodeConverter::GetCharset(char * *aCharset)
{
*aCharset = ToNewCString(mCharset);
if (!*aCharset)
return NS_ERROR_OUT_OF_MEMORY;
return NS_OK;
}
NS_IMETHODIMP
nsScriptableUnicodeConverter::SetCharset(const char * aCharset)
{
mCharset.Assign(aCharset);
return InitConverter();
}
NS_IMETHODIMP
nsScriptableUnicodeConverter::GetIsInternal(bool *aIsInternal)
{
*aIsInternal = mIsInternal;
return NS_OK;
}
NS_IMETHODIMP
nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal)
{
mIsInternal = aIsInternal;
return NS_OK;
}
nsresult
nsScriptableUnicodeConverter::InitConverter()
{
mEncoder = nullptr;
mDecoder = nullptr;
nsAutoCString encoding;
if (mIsInternal) {
// For compatibility with legacy extensions, let's try to see if the label
// happens to be ASCII-case-insensitively an encoding. This should allow
// for things like "utf-7" and "x-Mac-Hebrew".
nsAutoCString contractId;
nsAutoCString label(mCharset);
EncodingUtils::TrimSpaceCharacters(label);
// Let's try in lower case if we didn't get an decoder. E.g. x-mac-ce
// and x-imap4-modified-utf7 are all lower case.
ToLowerCase(label);
if (label.EqualsLiteral("replacement")) {
// reject "replacement"
return NS_ERROR_UCONV_NOCONV;
}
contractId.AssignLiteral(NS_UNICODEENCODER_CONTRACTID_BASE);
contractId.Append(label);
mEncoder = do_CreateInstance(contractId.get());
contractId.AssignLiteral(NS_UNICODEDECODER_CONTRACTID_BASE);
contractId.Append(label);
mDecoder = do_CreateInstance(contractId.get());
if (!mDecoder) {
// The old code seemed to want both a decoder and an encoder. Since some
// internal encodings will be decoder-only in the future, let's relax
// this. Note that the other methods check mEncoder for null anyway.
// Let's try the upper case. E.g. UTF-7 and ISO-2022-CN have upper
// case Gecko-canonical names.
ToUpperCase(label);
contractId.AssignLiteral(NS_UNICODEENCODER_CONTRACTID_BASE);
contractId.Append(label);
mEncoder = do_CreateInstance(contractId.get());
contractId.AssignLiteral(NS_UNICODEDECODER_CONTRACTID_BASE);
contractId.Append(label);
mDecoder = do_CreateInstance(contractId.get());
// If still no decoder, use the normal non-internal case below.
}
}
if (!mDecoder) {
if (!EncodingUtils::FindEncodingForLabelNoReplacement(mCharset, encoding)) {
return NS_ERROR_UCONV_NOCONV;
}
mEncoder = EncodingUtils::EncoderForEncoding(encoding);
mDecoder = EncodingUtils::DecoderForEncoding(encoding);
}
// The UTF-8 decoder used to throw regardless of the error behavior.
// Simulating the old behavior for compatibility with legacy callers
// (including addons). If callers want a control over the behavior,
// they should switch to TextDecoder.
if (encoding.EqualsLiteral("UTF-8")) {
mDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
}
if (!mEncoder) {
return NS_OK;
}
return mEncoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace,
nullptr,
(char16_t)'?');
}