From 7af97d2dfdb0efb6da48e9c19ce05d09e0d7d191 Mon Sep 17 00:00:00 2001 From: Cameron Kaiser Date: Fri, 6 Jul 2018 17:49:00 -0700 Subject: [PATCH] closes #502: CVE-2016-6293/7415 CVE-2017-7867/7868/14952/15422 --- intl/icu/source/common/locid.cpp | 78 ++++++++++++------------ intl/icu/source/common/uloc.cpp | 98 ++++++++++--------------------- intl/icu/source/common/utext.cpp | 28 +++++++-- intl/icu/source/i18n/gregoimp.cpp | 5 ++ intl/icu/source/i18n/gregoimp.h | 11 ++++ intl/icu/source/i18n/persncal.cpp | 2 +- intl/icu/source/i18n/zonemeta.cpp | 1 - 7 files changed, 112 insertions(+), 111 deletions(-) diff --git a/intl/icu/source/common/locid.cpp b/intl/icu/source/common/locid.cpp index 27e403065..2ee1eaaec 100644 --- a/intl/icu/source/common/locid.cpp +++ b/intl/icu/source/common/locid.cpp @@ -42,6 +42,7 @@ #include "uhash.h" #include "ucln_cmn.h" #include "ustr_imp.h" +#include "charstr.h" U_CDECL_BEGIN static UBool U_CALLCONV locale_cleanup(void); @@ -57,6 +58,12 @@ static UMutex gDefaultLocaleMutex = U_MUTEX_INITIALIZER; static UHashtable *gDefaultLocalesHashT = NULL; static Locale *gDefaultLocale = NULL; +/** + * \def ULOC_STRING_LIMIT + * strings beyond this value crash in CharString + */ +#define ULOC_STRING_LIMIT 357913941 + U_NAMESPACE_END typedef enum ELocalePos { @@ -283,13 +290,12 @@ Locale::Locale( const char * newLanguage, } else { - MaybeStackArray togo; + UErrorCode status = U_ZERO_ERROR; int32_t size = 0; int32_t lsize = 0; int32_t csize = 0; int32_t vsize = 0; int32_t ksize = 0; - char *p; // Calculate the size of the resulting string. @@ -297,13 +303,23 @@ Locale::Locale( const char * newLanguage, if ( newLanguage != NULL ) { lsize = (int32_t)uprv_strlen(newLanguage); + if ( lsize < 0 || lsize > ULOC_STRING_LIMIT ) { // int32 wrap + setToBogus(); + return; + } size = lsize; } + CharString togo(newLanguage, lsize, status); // start with newLanguage + // _Country if ( newCountry != NULL ) { csize = (int32_t)uprv_strlen(newCountry); + if ( csize < 0 || csize > ULOC_STRING_LIMIT ) { // int32 wrap + setToBogus(); + return; + } size += csize; } @@ -318,6 +334,10 @@ Locale::Locale( const char * newLanguage, // remove trailing _'s vsize = (int32_t)uprv_strlen(newVariant); + if ( vsize < 0 || vsize > ULOC_STRING_LIMIT ) { // int32 wrap + setToBogus(); + return; + } while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR) ) { vsize--; @@ -342,70 +362,56 @@ Locale::Locale( const char * newLanguage, if ( newKeywords != NULL) { ksize = (int32_t)uprv_strlen(newKeywords); + if ( ksize < 0 || ksize > ULOC_STRING_LIMIT ) { + setToBogus(); + return; + } size += ksize + 1; } - // NOW we have the full locale string.. - - /*if the whole string is longer than our internal limit, we need - to go to the heap for temporary buffers*/ - if (size >= togo.getCapacity()) - { - // If togo_heap could not be created, initialize with default settings. - if (togo.resize(size+1) == NULL) { - init(NULL, FALSE); - } - } - - togo[0] = 0; - // Now, copy it back. - p = togo.getAlias(); - if ( lsize != 0 ) - { - uprv_strcpy(p, newLanguage); - p += lsize; - } + + // newLanguage is already copied if ( ( vsize != 0 ) || (csize != 0) ) // at least: __v { // ^ - *p++ = SEP_CHAR; + togo.append(SEP_CHAR, status); } if ( csize != 0 ) { - uprv_strcpy(p, newCountry); - p += csize; + togo.append(newCountry, status); } if ( vsize != 0) { - *p++ = SEP_CHAR; // at least: __v - - uprv_strncpy(p, newVariant, vsize); // Must use strncpy because - p += vsize; // of trimming (above). - *p = 0; // terminate + togo.append(SEP_CHAR, status) + .append(newVariant, vsize, status); } if ( ksize != 0) { if (uprv_strchr(newKeywords, '=')) { - *p++ = '@'; /* keyword parsing */ + togo.append('@', status); /* keyword parsing */ } else { - *p++ = '_'; /* Variant parsing with a script */ + togo.append('_', status); /* Variant parsing with a script */ if ( vsize == 0) { - *p++ = '_'; /* No country found */ + togo.append('_', status); /* No country found */ } } - uprv_strcpy(p, newKeywords); - p += ksize; + togo.append(newKeywords, status); } + if (U_FAILURE(status)) { + // Something went wrong with appending, etc. + setToBogus(); + return; + } // Parse it, because for example 'language' might really be a complete // string. - init(togo.getAlias(), FALSE); + init(togo.data(), FALSE); } } diff --git a/intl/icu/source/common/uloc.cpp b/intl/icu/source/common/uloc.cpp index bdfd0c9a6..00198439e 100644 --- a/intl/icu/source/common/uloc.cpp +++ b/intl/icu/source/common/uloc.cpp @@ -2246,7 +2246,7 @@ _uloc_strtod(const char *start, char **end) { typedef struct { float q; int32_t dummy; /* to avoid uninitialized memory copy from qsort */ - char *locale; + char locale[ULOC_FULLNAME_CAPACITY+1]; } _acceptLangItem; static int32_t U_CALLCONV @@ -2288,9 +2288,7 @@ uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult UEnumeration* availableLocales, UErrorCode *status) { - _acceptLangItem *j; - _acceptLangItem smallBuffer[30]; - char **strs; + icu::MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items. char tmp[ULOC_FULLNAME_CAPACITY +1]; int32_t n = 0; const char *itemEnd; @@ -2300,11 +2298,7 @@ uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult int32_t res; int32_t i; int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage); - int32_t jSize; - char *tempstr; /* Use for null pointer check */ - j = smallBuffer; - jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]); if(U_FAILURE(*status)) { return -1; } @@ -2332,93 +2326,61 @@ uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult while(isspace(*t)) { t++; } - j[n].q = (float)_uloc_strtod(t,NULL); + items[n].q = (float)_uloc_strtod(t,NULL); } else { /* no semicolon - it's 1.0 */ - j[n].q = 1.0f; + items[n].q = 1.0f; paramEnd = itemEnd; } - j[n].dummy=0; + items[n].dummy=0; /* eat spaces prior to semi */ for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--) ; - /* Check for null pointer from uprv_strndup */ - tempstr = uprv_strndup(s,(int32_t)((t+1)-s)); - if (tempstr == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return -1; + int32_t slen = ((t+1)-s); + if(slen > ULOC_FULLNAME_CAPACITY) { + *status = U_BUFFER_OVERFLOW_ERROR; + return -1; // too big } - j[n].locale = tempstr; - uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status); - if(strcmp(j[n].locale,tmp)) { - uprv_free(j[n].locale); - j[n].locale=uprv_strdup(tmp); + uprv_strncpy(items[n].locale, s, slen); + items[n].locale[slen]=0; // terminate + int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status); + if(U_FAILURE(*status)) return -1; + if((clen!=slen) || (uprv_strncmp(items[n].locale, tmp, slen))) { + // canonicalization had an effect- copy back + uprv_strncpy(items[n].locale, tmp, clen); + items[n].locale[clen] = 0; // terminate } #if defined(ULOC_DEBUG) - /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/ + /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, items[n].locale, items[n].q);*/ #endif n++; s = itemEnd; while(*s==',') { /* eat duplicate commas */ s++; } - if(n>=jSize) { - if(j==smallBuffer) { /* overflowed the small buffer. */ - j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2))); - if(j!=NULL) { - uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize); - } + if(n>=items.getCapacity()) { // If we need more items + if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) { + *status = U_MEMORY_ALLOCATION_ERROR; + return -1; + } #if defined(ULOC_DEBUG) - fprintf(stderr,"malloced at size %d\n", jSize); + fprintf(stderr,"malloced at size %d\n", items.getCapacity()); #endif - } else { - j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2)); -#if defined(ULOC_DEBUG) - fprintf(stderr,"re-alloced at size %d\n", jSize); -#endif - } - jSize *= 2; - if(j==NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return -1; - } } } - uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status); + uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status); + icu::LocalArray strs(new const char*[n], *status); if(U_FAILURE(*status)) { - if(j != smallBuffer) { -#if defined(ULOC_DEBUG) - fprintf(stderr,"freeing j %p\n", j); -#endif - uprv_free(j); - } - return -1; - } - strs = static_cast(uprv_malloc((size_t)(sizeof(strs[0])*n))); - /* Check for null pointer */ - if (strs == NULL) { - uprv_free(j); /* Free to avoid memory leak */ - *status = U_MEMORY_ALLOCATION_ERROR; - return -1; + return -1; } for(i=0;i q <%g>\n", i, j[i].locale, j[i].q);*/ + /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, items[i].locale, items[i].q);*/ #endif - strs[i]=j[i].locale; + strs[i]=items[i].locale; } res = uloc_acceptLanguage(result, resultAvailable, outResult, - (const char**)strs, n, availableLocales, status); - for(i=0;ip; // the current buffer mapIndex = ix - u8b->toUCharsMapStart; + U_ASSERT(mapIndex < (int32_t)SIZEOF_MAPTOUCHARS); ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx; return TRUE; @@ -1296,6 +1304,10 @@ fillReverse: // Can only do this if the incoming index is somewhere in the interior of the string. // If index is at the end, there is no character there to look at. if (ix != ut->b) { + // Note: this function will only move the index back if it is on a trail byte + // and there is a preceding lead byte and the sequence from the lead + // through this trail could be part of a valid UTF-8 sequence + // Otherwise the index remains unchanged. U8_SET_CP_START(s8, 0, ix); } @@ -1309,7 +1321,10 @@ fillReverse: UChar *buf = u8b->buf; uint8_t *mapToNative = u8b->mapToNative; uint8_t *mapToUChars = u8b->mapToUChars; - int32_t toUCharsMapStart = ix - (UTF8_TEXT_CHUNK_SIZE*3 + 1); + int32_t toUCharsMapStart = ix - SIZEOF_MAPTOUCHARS + 1; + // Note that toUCharsMapStart can be negative. Happens when the remaining + // text from current position to the beginning is less than the buffer size. + // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry. int32_t destIx = UTF8_TEXT_CHUNK_SIZE+2; // Start in the overflow region // at end of buffer to leave room // for a surrogate pair at the @@ -1336,6 +1351,7 @@ fillReverse: if (c<0x80) { // Special case ASCII range for speed. buf[destIx] = (UChar)c; + U_ASSERT(toUCharsMapStart <= srcIx); mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx; mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart); } else { @@ -1365,6 +1381,7 @@ fillReverse: do { mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx; } while (sIx >= srcIx); + U_ASSERT(toUCharsMapStart <= (srcIx+1)); // Set native indexing limit to be the current position. // We are processing a non-ascii, non-native-indexing char now; @@ -1539,6 +1556,7 @@ utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) { U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit); U_ASSERT(index<=ut->chunkNativeLimit); int32_t mapIndex = index - u8b->toUCharsMapStart; + U_ASSERT(mapIndex < (int32_t)SIZEOF_MAPTOUCHARS); int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx; U_ASSERT(offset>=0 && offset<=ut->chunkLength); return offset; diff --git a/intl/icu/source/i18n/gregoimp.cpp b/intl/icu/source/i18n/gregoimp.cpp index 08a3fbcef..30def0ef2 100644 --- a/intl/icu/source/i18n/gregoimp.cpp +++ b/intl/icu/source/i18n/gregoimp.cpp @@ -29,6 +29,11 @@ int32_t ClockMath::floorDivide(int32_t numerator, int32_t denominator) { numerator / denominator : ((numerator + 1) / denominator) - 1; } +int64_t ClockMath::floorDivide(int64_t numerator, int64_t denominator) { + return (numerator >= 0) ? + numerator / denominator : ((numerator + 1) / denominator) - 1; +} + int32_t ClockMath::floorDivide(double numerator, int32_t denominator, int32_t& remainder) { double quotient; diff --git a/intl/icu/source/i18n/gregoimp.h b/intl/icu/source/i18n/gregoimp.h index f65d14101..3f4d821a1 100644 --- a/intl/icu/source/i18n/gregoimp.h +++ b/intl/icu/source/i18n/gregoimp.h @@ -38,6 +38,17 @@ class ClockMath { */ static int32_t floorDivide(int32_t numerator, int32_t denominator); + /** + * Divide two integers, returning the floor of the quotient. + * Unlike the built-in division, this is mathematically + * well-behaved. E.g., -1/4 => 0 but + * floorDivide(-1,4) => -1. + * @param numerator the numerator + * @param denominator a divisor which must be != 0 + * @return the floor of the quotient + */ + static int64_t floorDivide(int64_t numerator, int64_t denominator); + /** * Divide two numbers, returning the floor of the quotient. * Unlike the built-in division, this is mathematically diff --git a/intl/icu/source/i18n/persncal.cpp b/intl/icu/source/i18n/persncal.cpp index bcc1411ea..775a493c2 100644 --- a/intl/icu/source/i18n/persncal.cpp +++ b/intl/icu/source/i18n/persncal.cpp @@ -211,7 +211,7 @@ void PersianCalendar::handleComputeFields(int32_t julianDay, UErrorCode &/*statu int32_t year, month, dayOfMonth, dayOfYear; int32_t daysSinceEpoch = julianDay - PERSIAN_EPOCH; - year = 1 + ClockMath::floorDivide(33 * daysSinceEpoch + 3, 12053); + year = 1 + (int32_t)ClockMath::floorDivide(33 * (int64_t)daysSinceEpoch + 3, (int64_t)12053); int32_t farvardin1 = 365 * (year - 1) + ClockMath::floorDivide(8 * year + 21, 33); dayOfYear = (daysSinceEpoch - farvardin1); // 0-based diff --git a/intl/icu/source/i18n/zonemeta.cpp b/intl/icu/source/i18n/zonemeta.cpp index e2c75e557..d78e2acc0 100644 --- a/intl/icu/source/i18n/zonemeta.cpp +++ b/intl/icu/source/i18n/zonemeta.cpp @@ -681,7 +681,6 @@ ZoneMeta::createMetazoneMappings(const UnicodeString &tzid) { mzMappings = new UVector(deleteOlsonToMetaMappingEntry, NULL, status); if (U_FAILURE(status)) { delete mzMappings; - deleteOlsonToMetaMappingEntry(entry); uprv_free(entry); break; }