diff --git a/lib/Support/ConvertUTFWrapper.cpp b/lib/Support/ConvertUTFWrapper.cpp index 8f77bff4668..4feff01b08e 100644 --- a/lib/Support/ConvertUTFWrapper.cpp +++ b/lib/Support/ConvertUTFWrapper.cpp @@ -109,8 +109,9 @@ bool convertUTF16ToUTF8String(ArrayRef SrcBytes, std::string &Out) { if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_NATIVE) Src++; - // Just allocate enough space up front. We'll shrink it later. - Out.resize(SrcBytes.size() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT); + // Just allocate enough space up front. We'll shrink it later. Allocate + // enough that we can fit a null terminator without reallocating. + Out.resize(SrcBytes.size() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT + 1); UTF8 *Dst = reinterpret_cast(&Out[0]); UTF8 *DstEnd = Dst + Out.size(); @@ -124,6 +125,8 @@ bool convertUTF16ToUTF8String(ArrayRef SrcBytes, std::string &Out) { } Out.resize(reinterpret_cast(Dst) - &Out[0]); + Out.push_back(0); + Out.pop_back(); return true; } @@ -140,8 +143,10 @@ bool convertUTF8ToUTF16String(StringRef SrcUTF8, // Allocate the same number of UTF-16 code units as UTF-8 code units. Encoding // as UTF-16 should always require the same amount or less code units than the - // UTF-8 encoding. - DstUTF16.resize(SrcUTF8.size()); + // UTF-8 encoding. Allocate one extra byte for the null terminator though, + // so that someone calling DstUTF16.data() gets a null terminated string. + // We resize down later so we don't have to worry that this over allocates. + DstUTF16.resize(SrcUTF8.size()+1); UTF16 *Dst = &DstUTF16[0]; UTF16 *DstEnd = Dst + DstUTF16.size(); @@ -155,6 +160,8 @@ bool convertUTF8ToUTF16String(StringRef SrcUTF8, } DstUTF16.resize(Dst - &DstUTF16[0]); + DstUTF16.push_back(0); + DstUTF16.pop_back(); return true; }