/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* * utf8.c * * This file contains some additional utility routines required for * handling UTF8 strings. */ #ifndef BASE_H #include "base.h" #endif /* BASE_H */ #include "plstr.h" /* * NOTES: * * There's an "is hex string" function in pki1/atav.c. If we need * it in more places, pull that one out. */ /* * nssUTF8_CaseIgnoreMatch * * Returns true if the two UTF8-encoded strings pointed to by the * two specified NSSUTF8 pointers differ only in typcase. * * The error may be one of the following values: * NSS_ERROR_INVALID_POINTER * * Return value: * PR_TRUE if the strings match, ignoring case * PR_FALSE if they don't * PR_FALSE upon error */ NSS_IMPLEMENT PRBool nssUTF8_CaseIgnoreMatch ( const NSSUTF8 *a, const NSSUTF8 *b, PRStatus *statusOpt ) { #ifdef NSSDEBUG if( ((const NSSUTF8 *)NULL == a) || ((const NSSUTF8 *)NULL == b) ) { nss_SetError(NSS_ERROR_INVALID_POINTER); if( (PRStatus *)NULL != statusOpt ) { *statusOpt = PR_FAILURE; } return PR_FALSE; } #endif /* NSSDEBUG */ if( (PRStatus *)NULL != statusOpt ) { *statusOpt = PR_SUCCESS; } /* * XXX fgmr * * This is, like, so wrong! */ if( 0 == PL_strcasecmp((const char *)a, (const char *)b) ) { return PR_TRUE; } else { return PR_FALSE; } } /* * nssUTF8_PrintableMatch * * Returns true if the two Printable strings pointed to by the * two specified NSSUTF8 pointers match when compared with the * rules for Printable String (leading and trailing spaces are * disregarded, extents of whitespace match irregardless of length, * and case is not significant), then PR_TRUE will be returned. * Otherwise, PR_FALSE will be returned. Upon failure, PR_FALSE * will be returned. If the optional statusOpt argument is not * NULL, then PR_SUCCESS or PR_FAILURE will be stored in that * location. * * The error may be one of the following values: * NSS_ERROR_INVALID_POINTER * * Return value: * PR_TRUE if the strings match, ignoring case * PR_FALSE if they don't * PR_FALSE upon error */ NSS_IMPLEMENT PRBool nssUTF8_PrintableMatch ( const NSSUTF8 *a, const NSSUTF8 *b, PRStatus *statusOpt ) { PRUint8 *c; PRUint8 *d; #ifdef NSSDEBUG if( ((const NSSUTF8 *)NULL == a) || ((const NSSUTF8 *)NULL == b) ) { nss_SetError(NSS_ERROR_INVALID_POINTER); if( (PRStatus *)NULL != statusOpt ) { *statusOpt = PR_FAILURE; } return PR_FALSE; } #endif /* NSSDEBUG */ if( (PRStatus *)NULL != statusOpt ) { *statusOpt = PR_SUCCESS; } c = (PRUint8 *)a; d = (PRUint8 *)b; while( ' ' == *c ) { c++; } while( ' ' == *d ) { d++; } while( ('\0' != *c) && ('\0' != *d) ) { PRUint8 e, f; e = *c; f = *d; if( ('a' <= e) && (e <= 'z') ) { e -= ('a' - 'A'); } if( ('a' <= f) && (f <= 'z') ) { f -= ('a' - 'A'); } if( e != f ) { return PR_FALSE; } c++; d++; if( ' ' == *c ) { while( ' ' == *c ) { c++; } c--; } if( ' ' == *d ) { while( ' ' == *d ) { d++; } d--; } } while( ' ' == *c ) { c++; } while( ' ' == *d ) { d++; } if( *c == *d ) { /* And both '\0', btw */ return PR_TRUE; } else { return PR_FALSE; } } /* * nssUTF8_Duplicate * * This routine duplicates the UTF8-encoded string pointed to by the * specified NSSUTF8 pointer. If the optional arenaOpt argument is * not null, the memory required will be obtained from that arena; * otherwise, the memory required will be obtained from the heap. * A pointer to the new string will be returned. In case of error, * an error will be placed on the error stack and NULL will be * returned. * * The error may be one of the following values: * NSS_ERROR_INVALID_POINTER * NSS_ERROR_INVALID_ARENA * NSS_ERROR_NO_MEMORY */ NSS_IMPLEMENT NSSUTF8 * nssUTF8_Duplicate ( const NSSUTF8 *s, NSSArena *arenaOpt ) { NSSUTF8 *rv; PRUint32 len; #ifdef NSSDEBUG if( (const NSSUTF8 *)NULL == s ) { nss_SetError(NSS_ERROR_INVALID_POINTER); return (NSSUTF8 *)NULL; } if( (NSSArena *)NULL != arenaOpt ) { if( PR_SUCCESS != nssArena_verifyPointer(arenaOpt) ) { return (NSSUTF8 *)NULL; } } #endif /* NSSDEBUG */ len = PL_strlen((const char *)s); #ifdef PEDANTIC if( '\0' != ((const char *)s)[ len ] ) { /* must have wrapped, e.g., too big for PRUint32 */ nss_SetError(NSS_ERROR_NO_MEMORY); return (NSSUTF8 *)NULL; } #endif /* PEDANTIC */ len++; /* zero termination */ rv = nss_ZAlloc(arenaOpt, len); if( (void *)NULL == rv ) { return (NSSUTF8 *)NULL; } (void)nsslibc_memcpy(rv, s, len); return rv; } /* * nssUTF8_Size * * This routine returns the length in bytes (including the terminating * null) of the UTF8-encoded string pointed to by the specified * NSSUTF8 pointer. Zero is returned on error. * * The error may be one of the following values: * NSS_ERROR_INVALID_POINTER * NSS_ERROR_VALUE_TOO_LARGE * * Return value: * 0 on error * nonzero length of the string. */ NSS_IMPLEMENT PRUint32 nssUTF8_Size ( const NSSUTF8 *s, PRStatus *statusOpt ) { PRUint32 sv; #ifdef NSSDEBUG if( (const NSSUTF8 *)NULL == s ) { nss_SetError(NSS_ERROR_INVALID_POINTER); if( (PRStatus *)NULL != statusOpt ) { *statusOpt = PR_FAILURE; } return 0; } #endif /* NSSDEBUG */ sv = PL_strlen((const char *)s) + 1; #ifdef PEDANTIC if( '\0' != ((const char *)s)[ sv-1 ] ) { /* wrapped */ nss_SetError(NSS_ERROR_VALUE_TOO_LARGE); if( (PRStatus *)NULL != statusOpt ) { *statusOpt = PR_FAILURE; } return 0; } #endif /* PEDANTIC */ if( (PRStatus *)NULL != statusOpt ) { *statusOpt = PR_SUCCESS; } return sv; } /* * nssUTF8_Length * * This routine returns the length in characters (not including the * terminating null) of the UTF8-encoded string pointed to by the * specified NSSUTF8 pointer. * * The error may be one of the following values: * NSS_ERROR_INVALID_POINTER * NSS_ERROR_VALUE_TOO_LARGE * NSS_ERROR_INVALID_STRING * * Return value: * length of the string (which may be zero) * 0 on error */ NSS_IMPLEMENT PRUint32 nssUTF8_Length ( const NSSUTF8 *s, PRStatus *statusOpt ) { PRUint32 l = 0; const PRUint8 *c = (const PRUint8 *)s; #ifdef NSSDEBUG if( (const NSSUTF8 *)NULL == s ) { nss_SetError(NSS_ERROR_INVALID_POINTER); goto loser; } #endif /* NSSDEBUG */ /* * From RFC 2044: * * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx */ while( 0 != *c ) { PRUint32 incr; if( (*c & 0x80) == 0 ) { incr = 1; } else if( (*c & 0xE0) == 0xC0 ) { incr = 2; } else if( (*c & 0xF0) == 0xE0 ) { incr = 3; } else if( (*c & 0xF8) == 0xF0 ) { incr = 4; } else if( (*c & 0xFC) == 0xF8 ) { incr = 5; } else if( (*c & 0xFE) == 0xFC ) { incr = 6; } else { nss_SetError(NSS_ERROR_INVALID_STRING); goto loser; } l += incr; #ifdef PEDANTIC if( l < incr ) { /* Wrapped-- too big */ nss_SetError(NSS_ERROR_VALUE_TOO_LARGE); goto loser; } { PRUint8 *d; for( d = &c[1]; d < &c[incr]; d++ ) { if( (*d & 0xC0) != 0xF0 ) { nss_SetError(NSS_ERROR_INVALID_STRING); goto loser; } } } #endif /* PEDANTIC */ c += incr; } if( (PRStatus *)NULL != statusOpt ) { *statusOpt = PR_SUCCESS; } return l; loser: if( (PRStatus *)NULL != statusOpt ) { *statusOpt = PR_FAILURE; } return 0; } /* * nssUTF8_Create * * This routine creates a UTF8 string from a string in some other * format. Some types of string may include embedded null characters, * so for them the length parameter must be used. For string types * that are null-terminated, the length parameter is optional; if it * is zero, it will be ignored. If the optional arena argument is * non-null, the memory used for the new string will be obtained from * that arena, otherwise it will be obtained from the heap. This * routine may return NULL upon error, in which case it will have * placed an error on the error stack. * * The error may be one of the following: * NSS_ERROR_INVALID_POINTER * NSS_ERROR_NO_MEMORY * NSS_ERROR_UNSUPPORTED_TYPE * * Return value: * NULL upon error * A non-null pointer to a new UTF8 string otherwise */ extern const NSSError NSS_ERROR_INTERNAL_ERROR; /* XXX fgmr */ NSS_IMPLEMENT NSSUTF8 * nssUTF8_Create ( NSSArena *arenaOpt, nssStringType type, const void *inputString, PRUint32 size /* in bytes, not characters */ ) { NSSUTF8 *rv = NULL; #ifdef NSSDEBUG if( (NSSArena *)NULL != arenaOpt ) { if( PR_SUCCESS != nssArena_verifyPointer(arenaOpt) ) { return (NSSUTF8 *)NULL; } } if( (const void *)NULL == inputString ) { nss_SetError(NSS_ERROR_INVALID_POINTER); return (NSSUTF8 *)NULL; } #endif /* NSSDEBUG */ switch( type ) { case nssStringType_DirectoryString: /* This is a composite type requiring BER */ nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE); break; case nssStringType_TeletexString: /* * draft-ietf-pkix-ipki-part1-11 says in part: * * In addition, many legacy implementations support names encoded * in the ISO 8859-1 character set (Latin1String) but tag them as * TeletexString. The Latin1String includes characters used in * Western European countries which are not part of the * TeletexString charcter set. Implementations that process * TeletexString SHOULD be prepared to handle the entire ISO * 8859-1 character set.[ISO 8859-1]. */ nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; case nssStringType_PrintableString: /* * PrintableString consists of A-Za-z0-9 ,()+,-./:=? * This is a subset of ASCII, which is a subset of UTF8. * So we can just duplicate the string over. */ if( 0 == size ) { rv = nssUTF8_Duplicate((const NSSUTF8 *)inputString, arenaOpt); } else { rv = nss_ZAlloc(arenaOpt, size+1); if( (NSSUTF8 *)NULL == rv ) { return (NSSUTF8 *)NULL; } (void)nsslibc_memcpy(rv, inputString, size); } break; case nssStringType_UniversalString: /* 4-byte unicode */ nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; case nssStringType_BMPString: /* Base Multilingual Plane of Unicode */ nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; case nssStringType_UTF8String: if( 0 == size ) { rv = nssUTF8_Duplicate((const NSSUTF8 *)inputString, arenaOpt); } else { rv = nss_ZAlloc(arenaOpt, size+1); if( (NSSUTF8 *)NULL == rv ) { return (NSSUTF8 *)NULL; } (void)nsslibc_memcpy(rv, inputString, size); } break; case nssStringType_PHGString: /* * PHGString is an IA5String (with case-insensitive comparisons). * IA5 is ~almost~ ascii; ascii has dollar-sign where IA5 has * currency symbol. */ nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; case nssStringType_GeneralString: nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; default: nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE); break; } return rv; } NSS_IMPLEMENT NSSItem * nssUTF8_GetEncoding ( NSSArena *arenaOpt, NSSItem *rvOpt, nssStringType type, NSSUTF8 *string ) { NSSItem *rv = (NSSItem *)NULL; PRStatus status = PR_SUCCESS; #ifdef NSSDEBUG if( (NSSArena *)NULL != arenaOpt ) { if( PR_SUCCESS != nssArena_verifyPointer(arenaOpt) ) { return (NSSItem *)NULL; } } if( (NSSUTF8 *)NULL == string ) { nss_SetError(NSS_ERROR_INVALID_POINTER); return (NSSItem *)NULL; } #endif /* NSSDEBUG */ switch( type ) { case nssStringType_DirectoryString: nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; case nssStringType_TeletexString: nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; case nssStringType_PrintableString: nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; case nssStringType_UniversalString: nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; case nssStringType_BMPString: nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; case nssStringType_UTF8String: { NSSUTF8 *dup = nssUTF8_Duplicate(string, arenaOpt); if( (NSSUTF8 *)NULL == dup ) { return (NSSItem *)NULL; } if( (NSSItem *)NULL == rvOpt ) { rv = nss_ZNEW(arenaOpt, NSSItem); if( (NSSItem *)NULL == rv ) { (void)nss_ZFreeIf(dup); return (NSSItem *)NULL; } } else { rv = rvOpt; } rv->data = dup; dup = (NSSUTF8 *)NULL; rv->size = nssUTF8_Size(rv->data, &status); if( (0 == rv->size) && (PR_SUCCESS != status) ) { if( (NSSItem *)NULL == rvOpt ) { (void)nss_ZFreeIf(rv); } return (NSSItem *)NULL; } } break; case nssStringType_PHGString: nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; default: nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE); break; } return rv; } /* * nssUTF8_CopyIntoFixedBuffer * * This will copy a UTF8 string into a fixed-length buffer, making * sure that the all characters are valid. Any remaining space will * be padded with the specified ASCII character, typically either * null or space. * * Blah, blah, blah. */ NSS_IMPLEMENT PRStatus nssUTF8_CopyIntoFixedBuffer ( NSSUTF8 *string, char *buffer, PRUint32 bufferSize, char pad ) { PRUint32 stringSize = 0; #ifdef NSSDEBUG if( (char *)NULL == buffer ) { nss_SetError(NSS_ERROR_INVALID_POINTER); return PR_FALSE; } if( 0 == bufferSize ) { nss_SetError(NSS_ERROR_INVALID_ARGUMENT); return PR_FALSE; } if( (pad & 0x80) != 0x00 ) { nss_SetError(NSS_ERROR_INVALID_ARGUMENT); return PR_FALSE; } #endif /* NSSDEBUG */ if( (NSSUTF8 *)NULL == string ) { string = (NSSUTF8 *) ""; } stringSize = nssUTF8_Size(string, (PRStatus *)NULL); stringSize--; /* don't count the trailing null */ if( stringSize > bufferSize ) { PRUint32 bs = bufferSize; (void)nsslibc_memcpy(buffer, string, bufferSize); if( ( ((buffer[ bs-1 ] & 0x80) == 0x00)) || ((bs > 1) && ((buffer[ bs-2 ] & 0xE0) == 0xC0)) || ((bs > 2) && ((buffer[ bs-3 ] & 0xF0) == 0xE0)) || ((bs > 3) && ((buffer[ bs-4 ] & 0xF8) == 0xF0)) || ((bs > 4) && ((buffer[ bs-5 ] & 0xFC) == 0xF8)) || ((bs > 5) && ((buffer[ bs-6 ] & 0xFE) == 0xFC)) ) { /* It fit exactly */ return PR_SUCCESS; } /* Too long. We have to trim the last character */ for( /*bs*/; bs != 0; bs-- ) { if( (buffer[bs-1] & 0xC0) != 0x80 ) { buffer[bs-1] = pad; break; } else { buffer[bs-1] = pad; } } } else { (void)nsslibc_memset(buffer, pad, bufferSize); (void)nsslibc_memcpy(buffer, string, stringSize); } return PR_SUCCESS; } /* * nssUTF8_Equal * */ NSS_IMPLEMENT PRBool nssUTF8_Equal ( const NSSUTF8 *a, const NSSUTF8 *b, PRStatus *statusOpt ) { PRUint32 la, lb; #ifdef NSSDEBUG if( ((const NSSUTF8 *)NULL == a) || ((const NSSUTF8 *)NULL == b) ) { nss_SetError(NSS_ERROR_INVALID_POINTER); if( (PRStatus *)NULL != statusOpt ) { *statusOpt = PR_FAILURE; } return PR_FALSE; } #endif /* NSSDEBUG */ la = nssUTF8_Size(a, statusOpt); if( 0 == la ) { return PR_FALSE; } lb = nssUTF8_Size(b, statusOpt); if( 0 == lb ) { return PR_FALSE; } if( la != lb ) { return PR_FALSE; } return nsslibc_memequal(a, b, la, statusOpt); }