closes #502: CVE-2016-6293/7415 CVE-2017-7867/7868/14952/15422

2018-07-06 17:49:00 -07:00 · 2018-07-06 17:49:00 -07:00 · 7af97d2dfd
parent 8a97adc59a
commit 7af97d2dfd
7 changed files with 112 additions and 111 deletions
--- a/intl/icu/source/common/locid.cpp
+++ b/intl/icu/source/common/locid.cpp
@ -42,6 +42,7 @@
 #include "uhash.h"
 #include "ucln_cmn.h"
 #include "ustr_imp.h"
+#include "charstr.h"

 U_CDECL_BEGIN
 static UBool U_CALLCONV locale_cleanup(void);
@ -57,6 +58,12 @@ static UMutex gDefaultLocaleMutex = U_MUTEX_INITIALIZER;
 static UHashtable *gDefaultLocalesHashT = NULL;
 static Locale *gDefaultLocale = NULL;

+/**
+ * \def ULOC_STRING_LIMIT
+ * strings beyond this value crash in CharString
+ */
+#define ULOC_STRING_LIMIT 357913941
+
 U_NAMESPACE_END

 typedef enum ELocalePos {
@ -283,13 +290,12 @@ Locale::Locale( const   char * newLanguage,
    }
    else
    {
-        MaybeStackArray<char, ULOC_FULLNAME_CAPACITY> togo;
+        UErrorCode status = U_ZERO_ERROR;
        int32_t size = 0;
        int32_t lsize = 0;
        int32_t csize = 0;
        int32_t vsize = 0;
        int32_t ksize = 0;
-        char    *p;

        // Calculate the size of the resulting string.

@ -297,13 +303,23 @@ Locale::Locale( const   char * newLanguage,
        if ( newLanguage != NULL )
        {
            lsize = (int32_t)uprv_strlen(newLanguage);
+            if ( lsize < 0 || lsize > ULOC_STRING_LIMIT ) { // int32 wrap
+                setToBogus();
+                return;
+            }
            size = lsize;
        }

+        CharString togo(newLanguage, lsize, status); // start with newLanguage
+
        // _Country
        if ( newCountry != NULL )
        {
            csize = (int32_t)uprv_strlen(newCountry);
+            if ( csize < 0 || csize > ULOC_STRING_LIMIT ) { // int32 wrap
+                setToBogus();
+                return;
+            }
            size += csize;
        }

@ -318,6 +334,10 @@ Locale::Locale( const   char * newLanguage,

            // remove trailing _'s
            vsize = (int32_t)uprv_strlen(newVariant);
+            if ( vsize < 0 || vsize > ULOC_STRING_LIMIT ) { // int32 wrap
+                setToBogus();
+                return;
+            }
            while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR) )
            {
                vsize--;
@ -342,70 +362,56 @@ Locale::Locale( const   char * newLanguage,
        if ( newKeywords != NULL)
        {
            ksize = (int32_t)uprv_strlen(newKeywords);
+            if ( ksize < 0 || ksize > ULOC_STRING_LIMIT ) {
+              setToBogus();
+              return;
+            }
            size += ksize + 1;
        }

-
        //  NOW we have the full locale string..
-
-        /*if the whole string is longer than our internal limit, we need
-        to go to the heap for temporary buffers*/
-        if (size >= togo.getCapacity())
-        {
-            // If togo_heap could not be created, initialize with default settings.
-            if (togo.resize(size+1) == NULL) {
-                init(NULL, FALSE);
-            }
-        }
-
-        togo[0] = 0;
-
        // Now, copy it back.
-        p = togo.getAlias();
-        if ( lsize != 0 )
-        {
-            uprv_strcpy(p, newLanguage);
-            p += lsize;
-        }
+
+        // newLanguage is already copied

        if ( ( vsize != 0 ) || (csize != 0) )  // at least:  __v
        {                                      //            ^
-            *p++ = SEP_CHAR;
+            togo.append(SEP_CHAR, status);
        }

        if ( csize != 0 )
        {
-            uprv_strcpy(p, newCountry);
-            p += csize;
+            togo.append(newCountry, status);
        }

        if ( vsize != 0)
        {
-            *p++ = SEP_CHAR; // at least: __v
-
-            uprv_strncpy(p, newVariant, vsize);  // Must use strncpy because
-            p += vsize;                          // of trimming (above).
-            *p = 0; // terminate
+            togo.append(SEP_CHAR, status)
+                .append(newVariant, vsize, status);
        }

        if ( ksize != 0)
        {
            if (uprv_strchr(newKeywords, '=')) {
-                *p++ = '@'; /* keyword parsing */
+                togo.append('@', status); /* keyword parsing */
            }
            else {
-                *p++ = '_'; /* Variant parsing with a script */
+                togo.append('_', status); /* Variant parsing with a script */
                if ( vsize == 0) {
-                    *p++ = '_'; /* No country found */
+                    togo.append('_', status); /* No country found */
                }
            }
-            uprv_strcpy(p, newKeywords);
-            p += ksize;
+            togo.append(newKeywords, status);
        }

+        if (U_FAILURE(status)) {
+            // Something went wrong with appending, etc.
+            setToBogus();
+            return;
+        }
        // Parse it, because for example 'language' might really be a complete
        // string.
-        init(togo.getAlias(), FALSE);
+        init(togo.data(), FALSE);
    }
 }

--- a/intl/icu/source/common/uloc.cpp
+++ b/intl/icu/source/common/uloc.cpp
@ -2246,7 +2246,7 @@ _uloc_strtod(const char *start, char **end) {
 typedef struct { 
    float q;
    int32_t dummy;  /* to avoid uninitialized memory copy from qsort */
-    char *locale;
+    char locale[ULOC_FULLNAME_CAPACITY+1];
 } _acceptLangItem;

 static int32_t U_CALLCONV
@ -2288,9 +2288,7 @@ uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult
                            UEnumeration* availableLocales,
                            UErrorCode *status)
 {
-    _acceptLangItem *j;
-    _acceptLangItem smallBuffer[30];
-    char **strs;
+  icu::MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items. 
    char tmp[ULOC_FULLNAME_CAPACITY +1];
    int32_t n = 0;
    const char *itemEnd;
@ -2300,11 +2298,7 @@ uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult
    int32_t res;
    int32_t i;
    int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
-    int32_t jSize;
-    char *tempstr; /* Use for null pointer check */

-    j = smallBuffer;
-    jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
    if(U_FAILURE(*status)) {
        return -1;
    }
@ -2332,93 +2326,61 @@ uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult
            while(isspace(*t)) {
                t++;
            }
-            j[n].q = (float)_uloc_strtod(t,NULL);
+            items[n].q = (float)_uloc_strtod(t,NULL);
        } else {
            /* no semicolon - it's 1.0 */
-            j[n].q = 1.0f;
+            items[n].q = 1.0f;
            paramEnd = itemEnd;
        }
-        j[n].dummy=0;
+        items[n].dummy=0;
        /* eat spaces prior to semi */
        for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
            ;
-        /* Check for null pointer from uprv_strndup */
-        tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
-        if (tempstr == NULL) {
-            *status = U_MEMORY_ALLOCATION_ERROR;
-            return -1;
+        int32_t slen = ((t+1)-s);
+        if(slen > ULOC_FULLNAME_CAPACITY) {
+          *status = U_BUFFER_OVERFLOW_ERROR;
+          return -1; // too big
        }
-        j[n].locale = tempstr;
-        uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
-        if(strcmp(j[n].locale,tmp)) {
-            uprv_free(j[n].locale);
-            j[n].locale=uprv_strdup(tmp);
+        uprv_strncpy(items[n].locale, s, slen);
+        items[n].locale[slen]=0; // terminate
+        int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status);
+        if(U_FAILURE(*status)) return -1;
+        if((clen!=slen) || (uprv_strncmp(items[n].locale, tmp, slen))) {
+            // canonicalization had an effect- copy back
+            uprv_strncpy(items[n].locale, tmp, clen);
+            items[n].locale[clen] = 0; // terminate
        }
 #if defined(ULOC_DEBUG)
-        /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
+        /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, items[n].locale, items[n].q);*/
 #endif
        n++;
        s = itemEnd;
        while(*s==',') { /* eat duplicate commas */
            s++;
        }
-        if(n>=jSize) {
-            if(j==smallBuffer) {  /* overflowed the small buffer. */
-                j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2)));
-                if(j!=NULL) {
-                    uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
-                }
+        if(n>=items.getCapacity()) { // If we need more items
+          if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) {
+              *status = U_MEMORY_ALLOCATION_ERROR;
+              return -1;
+          }
 #if defined(ULOC_DEBUG)
-                fprintf(stderr,"malloced at size %d\n", jSize);
+          fprintf(stderr,"malloced at size %d\n", items.getCapacity());
 #endif
-            } else {
-                j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2));
-#if defined(ULOC_DEBUG)
-                fprintf(stderr,"re-alloced at size %d\n", jSize);
-#endif
-            }
-            jSize *= 2;
-            if(j==NULL) {
-                *status = U_MEMORY_ALLOCATION_ERROR;
-                return -1;
-            }
        }
    }
-    uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
+    uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
+    icu::LocalArray<const char*> strs(new const char*[n], *status);
    if(U_FAILURE(*status)) {
-        if(j != smallBuffer) {
-#if defined(ULOC_DEBUG)
-            fprintf(stderr,"freeing j %p\n", j);
-#endif
-            uprv_free(j);
-        }
-        return -1;
-    }
-    strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n)));
-    /* Check for null pointer */
-    if (strs == NULL) {
-        uprv_free(j); /* Free to avoid memory leak */
-        *status = U_MEMORY_ALLOCATION_ERROR;
-        return -1;
+      return -1;
    }
    for(i=0;i<n;i++) {
 #if defined(ULOC_DEBUG)
-        /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
+        /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, items[i].locale, items[i].q);*/
 #endif
-        strs[i]=j[i].locale;
+        strs[i]=items[i].locale;
    }
    res =  uloc_acceptLanguage(result, resultAvailable, outResult, 
-        (const char**)strs, n, availableLocales, status);
-    for(i=0;i<n;i++) {
-        uprv_free(strs[i]);
-    }
-    uprv_free(strs);
-    if(j != smallBuffer) {
-#if defined(ULOC_DEBUG)
-        fprintf(stderr,"freeing j %p\n", j);
-#endif
-        uprv_free(j);
-    }
+                               strs.getAlias(), n, availableLocales, status);
    return res;
 }

--- a/intl/icu/source/common/utext.cpp
+++ b/intl/icu/source/common/utext.cpp
@ -845,11 +845,18 @@ U_CDECL_END
 //------------------------------------------------------------------------------

 // Chunk size.
-//     Must be less than 85, because of byte mapping from UChar indexes to native indexes.
-//     Worst case is three native bytes to one UChar.  (Supplemenaries are 4 native bytes
-//     to two UChars.)
+//     Must be less than 42  (256/6), because of byte mapping from UChar indexes to native indexes.
+//     Worst case there are six UTF-8 bytes per UChar.
+//         obsolete 6 byte form fd + 5 trails maps to fffd
+//         obsolete 5 byte form fc + 4 trails maps to fffd
+//         non-shortest 4 byte forms maps to fffd
+//         normal supplementaries map to a pair of utf-16, two utf8 bytes per utf-16 unit
+//     mapToUChars array size must allow for the worst case, 6.
+//     This could be brought down to 4, by treating fd and fc as pure illegal,
+//     rather than obsolete lead bytes. But that is not compatible with the utf-8 access macros.
 //
 enum { UTF8_TEXT_CHUNK_SIZE=32 };
+enum { SIZEOF_MAPTOUCHARS=UTF8_TEXT_CHUNK_SIZE*6+6 };

 //
 // UTF8Buf  Two of these structs will be set up in the UText's extra allocated space.
@ -887,7 +894,7 @@ struct UTF8Buf {
                                                     //  Requires two extra slots,
                                                     //    one for a supplementary starting in the last normal position,
                                                     //    and one for an entry for the buffer limit position.
-    uint8_t   mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to
+    uint8_t   mapToUChars[SIZEOF_MAPTOUCHARS]; // Map native offset from bufNativeStart to
                                                     //   correspoding offset in filled part of buf.
    int32_t   align;
 };
@ -1030,6 +1037,7 @@ utf8TextAccess(UText *ut, int64_t index, UBool forward) {
            // Requested index is in this buffer.
            u8b = (UTF8Buf *)ut->p;   // the current buffer
            mapIndex = ix - u8b->toUCharsMapStart;
+            U_ASSERT(mapIndex < (int32_t)SIZEOF_MAPTOUCHARS);
            ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
            return TRUE;

@ -1296,6 +1304,10 @@ fillReverse:
        // Can only do this if the incoming index is somewhere in the interior of the string.
        //   If index is at the end, there is no character there to look at.
        if (ix != ut->b) {
+            // Note: this function will only move the index back if it is on a trail byte
+            //       and there is a preceding lead byte and the sequence from the lead 
+            //       through this trail could be part of a valid UTF-8 sequence
+            //       Otherwise the index remains unchanged.
            U8_SET_CP_START(s8, 0, ix);
        }

@ -1309,7 +1321,10 @@ fillReverse:
        UChar   *buf = u8b->buf;
        uint8_t *mapToNative = u8b->mapToNative;
        uint8_t *mapToUChars = u8b->mapToUChars;
-        int32_t  toUCharsMapStart = ix - (UTF8_TEXT_CHUNK_SIZE*3 + 1);
+        int32_t  toUCharsMapStart = ix - SIZEOF_MAPTOUCHARS + 1;
+        // Note that toUCharsMapStart can be negative. Happens when the remaining
+        // text from current position to the beginning is less than the buffer size.
+        // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry.
        int32_t  destIx = UTF8_TEXT_CHUNK_SIZE+2;   // Start in the overflow region
                                                    //   at end of buffer to leave room
                                                    //   for a surrogate pair at the
@ -1336,6 +1351,7 @@ fillReverse:
            if (c<0x80) {
                // Special case ASCII range for speed.
                buf[destIx] = (UChar)c;
+                U_ASSERT(toUCharsMapStart <= srcIx);
                mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
                mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
            } else {
@ -1365,6 +1381,7 @@ fillReverse:
                do {
                    mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx;
                } while (sIx >= srcIx);
+                U_ASSERT(toUCharsMapStart <= (srcIx+1));

                // Set native indexing limit to be the current position.
                //   We are processing a non-ascii, non-native-indexing char now;
@ -1539,6 +1556,7 @@ utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) {
    U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit);
    U_ASSERT(index<=ut->chunkNativeLimit);
    int32_t mapIndex = index - u8b->toUCharsMapStart;
+    U_ASSERT(mapIndex < (int32_t)SIZEOF_MAPTOUCHARS);
    int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
    U_ASSERT(offset>=0 && offset<=ut->chunkLength);
    return offset;
--- a/intl/icu/source/i18n/gregoimp.cpp
+++ b/intl/icu/source/i18n/gregoimp.cpp
@ -29,6 +29,11 @@ int32_t ClockMath::floorDivide(int32_t numerator, int32_t denominator) {
        numerator / denominator : ((numerator + 1) / denominator) - 1;
 }

+int64_t ClockMath::floorDivide(int64_t numerator, int64_t denominator) {
+    return (numerator >= 0) ?
+        numerator / denominator : ((numerator + 1) / denominator) - 1;
+}
+
 int32_t ClockMath::floorDivide(double numerator, int32_t denominator,
                          int32_t& remainder) {
    double quotient;
--- a/intl/icu/source/i18n/gregoimp.h
+++ b/intl/icu/source/i18n/gregoimp.h
@ -38,6 +38,17 @@ class ClockMath {
     */
    static int32_t floorDivide(int32_t numerator, int32_t denominator);

+    /**
+     * Divide two integers, returning the floor of the quotient.
+     * Unlike the built-in division, this is mathematically
+     * well-behaved.  E.g., <code>-1/4</code> => 0 but
+     * <code>floorDivide(-1,4)</code> => -1.
+     * @param numerator the numerator
+     * @param denominator a divisor which must be != 0
+     * @return the floor of the quotient
+     */
+    static int64_t floorDivide(int64_t numerator, int64_t denominator);
+
    /**
     * Divide two numbers, returning the floor of the quotient.
     * Unlike the built-in division, this is mathematically
--- a/intl/icu/source/i18n/persncal.cpp
+++ b/intl/icu/source/i18n/persncal.cpp
@ -211,7 +211,7 @@ void PersianCalendar::handleComputeFields(int32_t julianDay, UErrorCode &/*statu
    int32_t year, month, dayOfMonth, dayOfYear;

    int32_t daysSinceEpoch = julianDay - PERSIAN_EPOCH;
-    year = 1 + ClockMath::floorDivide(33 * daysSinceEpoch + 3, 12053);
+    year = 1 + (int32_t)ClockMath::floorDivide(33 * (int64_t)daysSinceEpoch + 3, (int64_t)12053);

    int32_t farvardin1 = 365 * (year - 1) + ClockMath::floorDivide(8 * year + 21, 33);
    dayOfYear = (daysSinceEpoch - farvardin1); // 0-based
--- a/intl/icu/source/i18n/zonemeta.cpp
+++ b/intl/icu/source/i18n/zonemeta.cpp
@ -681,7 +681,6 @@ ZoneMeta::createMetazoneMappings(const UnicodeString &tzid) {
                    mzMappings = new UVector(deleteOlsonToMetaMappingEntry, NULL, status);
                    if (U_FAILURE(status)) {
                        delete mzMappings;
-                        deleteOlsonToMetaMappingEntry(entry);
                        uprv_free(entry);
                        break;
                    }