1 line
59 KiB
C
Executable File
1 line
59 KiB
C
Executable File
/* Copyright (c) 2017, Computer History Museum
|
|
All rights reserved.
|
|
Redistribution and use in source and binary forms, with or without modification, are permitted (subject to
|
|
the limitations in the disclaimer below) provided that the following conditions are met:
|
|
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
|
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
|
|
disclaimer in the documentation and/or other materials provided with the distribution.
|
|
* Neither the name of Computer History Museum nor the names of its contributors may be used to endorse or promote products
|
|
derived from this software without specific prior written permission.
|
|
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE
|
|
COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
|
DAMAGE. */
|
|
|
|
#define MAX_UTF8_CHAR_LEN (((sizeof(UniChar) * 8) / 6) + ((6 - ((sizeof(UniChar) * 8) / 6)) < ((sizeof(UniChar) * 8) % 6) ? 2 : 1))
|
|
|
|
#if __profile__
|
|
#include "unicodeprofile.h"
|
|
#endif
|
|
|
|
typedef enum
|
|
{
|
|
umFlowedDir,
|
|
umCharsetDir,
|
|
umHtmlDir,
|
|
umRichDir,
|
|
umDirCount
|
|
} umDirectives;
|
|
|
|
typedef enum
|
|
{
|
|
umHeaderState,
|
|
umFlowedState,
|
|
umCharsetState,
|
|
umHtmlState,
|
|
umRichState,
|
|
umTextState,
|
|
} umStates;
|
|
|
|
typedef struct {
|
|
Byte xState;
|
|
Byte tState;
|
|
Byte cState;
|
|
Byte oState;
|
|
} UniGlobalsState;
|
|
|
|
static struct UniGlobals {
|
|
TECObjectRef internetToUTF8;
|
|
TextEncoding internetToUTF8Encoding;
|
|
Boolean maclatin1;
|
|
UnicodeToTextRunInfo UTF8ToMac;
|
|
IntlConverter quickConverter;
|
|
ByteCount origTextLen;
|
|
TextPtr *origText;
|
|
ByteCount convTextLen;
|
|
TextPtr *convText;
|
|
ItemCount convRunsCount;
|
|
TextEncodingRun **convRuns;
|
|
FormatOrderPtr *order;
|
|
UInt16 tecVersion;
|
|
Boolean hasTextRunFlag;
|
|
Boolean has88591VariantsFlag;
|
|
} uGlobals = {0};
|
|
|
|
OSStatus ConvertUTF8Text(BytePtr theText, ByteCount bufLen);
|
|
OSStatus DrawConvertedUTF8(short width, ScriptFontInfo fonts);
|
|
OSStatus MeasureConvertedUTF8(short *width, FontInfo *maxFont, Boolean *rightToLeft, ScriptFontInfo fonts);
|
|
pascal long MyVisibleLength(Ptr textPtr, long len, short direction);
|
|
pascal Boolean MyRlDirProc(short theFormat, void *dirParam);
|
|
long EnsureHandleSize(Handle h, Size s);
|
|
void NoPurgeUniGlobals(UniGlobalsState *uniState);
|
|
void ResetUniGlobals(UniGlobalsState *uniState);
|
|
OSStatus GetUnicodeHint(TextEncoding encoding, StringPtr lang, UniChar *hint);
|
|
OSStatus ClearIntlConverterContext(UnicodeToTextRunInfo converter, Boolean utf8);
|
|
OSStatus UpdateIntlConverterLo(IntlConverter *converter, StringPtr charset, TextEncoding encoding);
|
|
OSStatus UpdateTECConverter(TECObjectRef *converter, StringPtr charset, TextEncoding newEncoding, TextEncoding *fromEncoding, TextEncoding toEncoding, Boolean *maclatin1, UniChar *hint);
|
|
UniCharCount UTF8CharCount(BytePtr utf8, ByteCount bufLen);
|
|
OSErr AccuEnsureSize(AccuPtr a, long len, long incr);
|
|
short GetIntlFont(ScriptCode script, ScriptFontInfo fonts);
|
|
short GetIntlSize(ScriptCode script, ScriptFontInfo fonts);
|
|
void CleanISO2022(TextPtr text, long len, IntlConverter *converter);
|
|
OSStatus InsertIntlHeaders(Handle text, long len, long tOff, AccuPtr a, TextEncoding encoding, PETEHandle pte, long *pOff);
|
|
long ParseCharset(Ptr textPtr, long len, PStr charset, umDirectives c);
|
|
OSErr PeteGetStyleRun(PETEHandle pte, long offset, long *len, PETEStyleInfoPtr style, long validBits);
|
|
OSStatus MyTECConvertText(TECObjectRef encodingConverter, ConstTextPtr inputBuffer, ByteCount inputBufferLength, ByteCount *actualInputLength, TextPtr outputBuffer, ByteCount outputBufferLength, ByteCount *actualOutputLength, Boolean maclatin1);
|
|
long UnicodeMappingCount(TextEncoding encoding);
|
|
|
|
OSStatus InitUnicode()
|
|
{
|
|
UnicodeMapping tempMapping;
|
|
TECInfoHandle info;
|
|
OSStatus statusCode;
|
|
|
|
Zero(uGlobals);
|
|
|
|
if((long)TECGetInfo == kUnresolvedCFragSymbolAddress)
|
|
{
|
|
Log(-1, "\pNo Unicode");
|
|
return noErr;
|
|
}
|
|
|
|
statusCode = TECGetInfo(&info);
|
|
if(statusCode != noErr) {
|
|
Log(-1, "\pTECGetInfo failed");
|
|
return noErr;
|
|
}
|
|
|
|
uGlobals.tecVersion = (**info).tecVersion;
|
|
uGlobals.hasTextRunFlag = !(!((**info).tecUnicodeConverterFeatures & kTECAddTextRunHeuristicsMask));
|
|
uGlobals.has88591VariantsFlag = UnicodeMappingCount(DefaultEncoding(kTextEncodingMacRomanLatin1)) > 1;
|
|
tempMapping.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault, kTextEncodingDefaultVariant, kUnicodeUTF8Format);
|
|
statusCode = CreateUnicodeToTextRunInfo(0, &tempMapping, &uGlobals.UTF8ToMac);
|
|
if(statusCode != noErr) {
|
|
Log(-1, "\pUTF8 To Mac failed");
|
|
return statusCode;
|
|
}
|
|
|
|
statusCode = CreateIntlConverter(&uGlobals.quickConverter, kTextEncodingUnknown);
|
|
if(statusCode == noErr)
|
|
{
|
|
if(((uGlobals.origText = NuHTempBetter(0)) == nil) ||
|
|
((uGlobals.convText = NuHTempBetter(0)) == nil) ||
|
|
((uGlobals.convRuns = NuHTempBetter(2 * sizeof(TextEncodingRun))) == nil) ||
|
|
((uGlobals.order = NuHTempBetter(0)) == nil)) {
|
|
statusCode = memFullErr;
|
|
} else {
|
|
HPurge((Handle)uGlobals.origText);
|
|
HPurge((Handle)uGlobals.convText);
|
|
HPurge((Handle)uGlobals.convRuns);
|
|
HPurge((Handle)uGlobals.order);
|
|
}
|
|
}
|
|
if(statusCode != noErr)
|
|
CleanupUnicode();
|
|
return statusCode;
|
|
}
|
|
|
|
void CleanupUnicode()
|
|
{
|
|
if(uGlobals.tecVersion > 0)
|
|
return;
|
|
|
|
DisposeUnicodeToTextRunInfo(&uGlobals.UTF8ToMac);
|
|
if(uGlobals.internetToUTF8) {
|
|
TECDisposeConverter(uGlobals.internetToUTF8);
|
|
uGlobals.internetToUTF8 = 0;
|
|
}
|
|
DisposeIntlConverter(uGlobals.quickConverter);
|
|
ZapHandle(uGlobals.origText);
|
|
ZapHandle(uGlobals.convText);
|
|
ZapHandle(uGlobals.convRuns);
|
|
ZapHandle(uGlobals.order);
|
|
Zero(uGlobals);
|
|
}
|
|
|
|
Boolean UTF8ToRoman(BytePtr theText, ByteCount *textLen, ByteCount bufLen)
|
|
{
|
|
if(ConvertUTF8Text(theText, *textLen) != noErr) return false;
|
|
if(uGlobals.convRunsCount != 1) return false;
|
|
if(ResolveDefaultTextEncoding((*uGlobals.convRuns)[0].textEncoding) != ResolveDefaultTextEncoding(DefaultEncoding(kTextEncodingMacRoman))) return false;
|
|
*textLen = MIN(bufLen,uGlobals.convTextLen);
|
|
BMD(*(uGlobals.convText), theText, *textLen);
|
|
return true;
|
|
}
|
|
|
|
Boolean RomanToUTF8(PStr s, long strSize)
|
|
{
|
|
UniChar temp[128];
|
|
TextToUnicodeInfo info;
|
|
UnicodeMapping tempMapping;
|
|
OSStatus statusCode;
|
|
ByteCount len;
|
|
|
|
Zero(tempMapping);
|
|
tempMapping.mappingVersion = kUnicodeUseLatestMapping;
|
|
tempMapping.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault, kTextEncodingDefaultVariant, kUnicodeUTF8Format);
|
|
tempMapping.otherEncoding = CreateTextEncoding(kTextEncodingMacRoman, kTextEncodingDefaultVariant, kTextEncodingDefaultFormat);
|
|
statusCode = CreateTextToUnicodeInfo(&tempMapping, &info);
|
|
if(statusCode) return statusCode;
|
|
statusCode = ConvertFromPStringToUnicode(info, s, strSize-1, &len, temp);
|
|
if(statusCode && statusCode != kTECOutputBufferFullStatus && statusCode != kTECUsedFallbacksStatus) return statusCode;
|
|
BlockMoveData(&temp, &s[1], len);
|
|
s[0] = len;
|
|
return noErr;
|
|
}
|
|
|
|
OSStatus DrawUTF8Text(BytePtr theText, ByteCount bufLen, short width, ScriptFontInfo fonts)
|
|
{
|
|
OSStatus statusCode;
|
|
|
|
statusCode = ConvertUTF8Text(theText, bufLen);
|
|
if(statusCode != noErr) {
|
|
return statusCode;
|
|
}
|
|
return DrawConvertedUTF8(width, fonts);
|
|
}
|
|
|
|
OSStatus DrawConvertedUTF8(short width, ScriptFontInfo fonts)
|
|
{
|
|
ItemCount run, runDisp;
|
|
OSStatus statusCode;
|
|
ScriptCode script;
|
|
short fontID, oldFont, oldSize;
|
|
GrafPtr port;
|
|
Boolean lineRight;
|
|
ByteOffset offset;
|
|
ByteCount runLen;
|
|
UniGlobalsState uniState;
|
|
|
|
NoPurgeUniGlobals(&uniState);
|
|
GetPort(&port);
|
|
oldFont = GetPortTextFont(port);
|
|
oldSize = GetPortTextSize(port);
|
|
for(statusCode = noErr, run = 0; statusCode == noErr && run < uGlobals.convRunsCount; ++run) {
|
|
runDisp = (**uGlobals.order)[run];
|
|
statusCode = RevertTextEncodingToScriptInfo((*uGlobals.convRuns)[runDisp].textEncoding, &script, nil, GlobalTemp);
|
|
if(statusCode == noErr) {
|
|
if(GlobalTemp[0] != 0) {
|
|
GetFNum(GlobalTemp, &fontID);
|
|
} else {
|
|
fontID = GetIntlFont(script, fonts);
|
|
}
|
|
TextFont(fontID);
|
|
TextSize(GetIntlSize(script, fonts));
|
|
if(run == 0) {
|
|
lineRight = !(!(Boolean)GetScriptVariable(script, smScriptRight));
|
|
if(lineRight) {
|
|
short fullWidth;
|
|
|
|
statusCode = MeasureConvertedUTF8(&fullWidth, nil, nil, fonts);
|
|
if(statusCode != noErr) break;
|
|
if(width < fullWidth) {
|
|
Move(width - fullWidth, 0);
|
|
}
|
|
}
|
|
}
|
|
offset = (*uGlobals.convRuns)[runDisp].offset;
|
|
runLen = ((runDisp == uGlobals.convRunsCount - 1) ? uGlobals.convTextLen : (*uGlobals.convRuns)[runDisp + 1].offset) - offset;
|
|
HLock((Handle)uGlobals.convText);
|
|
if((**uGlobals.order)[run] == uGlobals.convRunsCount - 1) {
|
|
runLen = MyVisibleLength(*uGlobals.convText + offset, runLen, lineRight ? rightCaret : leftCaret);
|
|
}
|
|
DrawText(*uGlobals.convText, offset, runLen);
|
|
HUnlock((Handle)uGlobals.convText);
|
|
}
|
|
}
|
|
TextSize(oldSize);
|
|
TextFont(oldFont);
|
|
ResetUniGlobals(&uniState);
|
|
return statusCode;
|
|
}
|
|
|
|
OSStatus MeasureUTF8Text(BytePtr theText, ByteCount bufLen, short *width, FontInfo *maxFont, Boolean *rightToLeft, ScriptFontInfo fonts)
|
|
{
|
|
OSStatus statusCode;
|
|
|
|
statusCode = ConvertUTF8Text(theText, bufLen);
|
|
if(statusCode != noErr) {
|
|
return statusCode;
|
|
}
|
|
return MeasureConvertedUTF8(width, maxFont, rightToLeft, fonts);
|
|
}
|
|
|
|
OSStatus MeasureConvertedUTF8(short *width, FontInfo *maxFont, Boolean *rightToLeft, ScriptFontInfo fonts)
|
|
{
|
|
GrafPtr port;
|
|
short fontID, oldFont, oldSize;
|
|
OSStatus statusCode;
|
|
ItemCount run;
|
|
ByteOffset offset;
|
|
ByteCount runLen;
|
|
ScriptCode script;
|
|
FontInfo fInfo;
|
|
Boolean lineRight;
|
|
UniGlobalsState uniState;
|
|
|
|
NoPurgeUniGlobals(&uniState);
|
|
if(width != nil) *width = 0;
|
|
if(maxFont != nil) Zero(*maxFont);
|
|
if(rightToLeft != nil) *rightToLeft = false;
|
|
GetPort(&port);
|
|
oldFont = GetPortTextFont(port);
|
|
oldSize = GetPortTextSize(port);
|
|
for(statusCode = noErr, run = 0; statusCode == noErr && run < uGlobals.convRunsCount; ++run) {
|
|
statusCode = RevertTextEncodingToScriptInfo((*uGlobals.convRuns)[run].textEncoding, &script, nil, GlobalTemp);
|
|
if(statusCode == noErr) {
|
|
if(GlobalTemp[0] != 0) {
|
|
GetFNum(GlobalTemp, &fontID);
|
|
} else {
|
|
fontID = GetIntlFont(script, fonts);
|
|
}
|
|
TextFont(fontID);
|
|
TextSize(GetIntlSize(script, fonts));
|
|
if(maxFont != nil) {
|
|
GetFontInfo(&fInfo);
|
|
if(fInfo.ascent > maxFont->ascent) {
|
|
maxFont->ascent = fInfo.ascent;
|
|
}
|
|
if(fInfo.descent > maxFont->descent) {
|
|
maxFont->descent = fInfo.descent;
|
|
}
|
|
if(fInfo.widMax > maxFont->widMax) {
|
|
maxFont->widMax = fInfo.widMax;
|
|
}
|
|
if(fInfo.leading > maxFont->leading) {
|
|
maxFont->leading = fInfo.leading;
|
|
}
|
|
}
|
|
if(run == 0) {
|
|
lineRight = !(!(Boolean)GetScriptVariable(script, smScriptRight));
|
|
if(rightToLeft != nil) *rightToLeft = lineRight;
|
|
}
|
|
if(width != nil) {
|
|
offset = (*uGlobals.convRuns)[run].offset;
|
|
runLen = ((run == uGlobals.convRunsCount - 1) ? uGlobals.convTextLen : (*uGlobals.convRuns)[run + 1].offset) - offset;
|
|
HLock((Handle)uGlobals.convText);
|
|
if((**uGlobals.order)[run] == uGlobals.convRunsCount - 1) {
|
|
runLen = MyVisibleLength(*uGlobals.convText + offset, runLen, lineRight ? rightCaret : leftCaret);
|
|
}
|
|
*width += TextWidth(*uGlobals.convText, offset, runLen);
|
|
HUnlock((Handle)uGlobals.convText);
|
|
} else if(maxFont == nil) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
TextSize(oldSize);
|
|
TextFont(oldFont);
|
|
ResetUniGlobals(&uniState);
|
|
return statusCode;
|
|
}
|
|
|
|
OSStatus ConvertUTF8Text(BytePtr theText, ByteCount bufLen)
|
|
{
|
|
OSStatus statusCode = noErr;
|
|
long textSize, runCount;
|
|
ByteCount inputRead, readTotal, outputLen, tempLen;
|
|
ItemCount numRuns;
|
|
OptionBits flags;
|
|
ScriptCode script;
|
|
UniGlobalsState uniState;
|
|
BytePtr tempText;
|
|
DECLARE_UPP(MyRlDirProc,StyleRunDirection);
|
|
|
|
INIT_UPP(MyRlDirProc,StyleRunDirection);
|
|
flags = kUnicodeKeepInfoMask | kUnicodeUseFallbacksMask | kUnicodeDefaultDirectionMask | kUnicodeLooseMappingsMask | kUnicodeTextRunMask;
|
|
if(uGlobals.hasTextRunFlag) flags |= /*kUnicodeKeepSameEncodingMask | */kUnicodeTextRunHeuristicsMask;
|
|
if((*uGlobals.convText != nil) &&
|
|
(*uGlobals.convRuns != nil) &&
|
|
(*uGlobals.order != nil) &&
|
|
(*uGlobals.origText != nil) &&
|
|
(bufLen == uGlobals.origTextLen) &&
|
|
(!memcmp(*uGlobals.origText, theText, bufLen))) {
|
|
return noErr;
|
|
}
|
|
|
|
NoPurgeUniGlobals(&uniState);
|
|
|
|
uGlobals.convTextLen = 0;
|
|
uGlobals.convRunsCount = 0;
|
|
readTotal = 0;
|
|
|
|
if((uGlobals.tecVersion == 0x0180) && (GetOSVersion() >= 0x1020)) {
|
|
// The 10.2 version of the TEC is converting 16-bit Unicode, not UTF-8
|
|
// So, convert the UTF-8 to 16-bit Unicode and put it into uGlobals.origText temporarily
|
|
tempText = theText;
|
|
tempLen = UTF8CharCount(tempText, bufLen) * sizeof(UniChar);
|
|
statusCode = EnsureHandleSize((Handle)uGlobals.origText, tempLen);
|
|
if(statusCode > noErr) {
|
|
UniChar *curUniChar = (UniChar *)*(uGlobals.origText);
|
|
UniCharCount c = tempLen / sizeof(UniChar);
|
|
|
|
while(c--) {
|
|
*curUniChar++ = UTF8ToUniChar(tempText, &tempText);
|
|
}
|
|
statusCode = noErr;
|
|
}
|
|
} else {
|
|
tempLen = GoodUTF8Len(theText, bufLen);
|
|
}
|
|
runCount = EnsureHandleSize((Handle)uGlobals.convRuns, 2 * sizeof(TextEncodingRun)) / sizeof(TextEncodingRun);
|
|
textSize = EnsureHandleSize((Handle)uGlobals.convText, bufLen);
|
|
|
|
if(!statusCode && ((statusCode = runCount) > noErr) && ((statusCode = textSize) > noErr))
|
|
do {
|
|
HLock((Handle)uGlobals.convText);
|
|
HLock((Handle)uGlobals.convRuns);
|
|
|
|
if((uGlobals.tecVersion == 0x0180) && (GetOSVersion() >= 0x1020)) {
|
|
HLock((Handle)uGlobals.origText);
|
|
tempText = *(uGlobals.origText);
|
|
} else {
|
|
tempText = theText;
|
|
}
|
|
|
|
|
|
statusCode = ConvertFromUnicodeToTextRun(uGlobals.UTF8ToMac,
|
|
tempLen - readTotal,
|
|
(ConstUniCharArrayPtr)(tempText + readTotal),
|
|
flags,
|
|
0, nil, nil, nil,
|
|
textSize - uGlobals.convTextLen,
|
|
&inputRead,
|
|
&outputLen,
|
|
*uGlobals.convText + uGlobals.convTextLen,
|
|
runCount - uGlobals.convRunsCount,
|
|
&numRuns,
|
|
*uGlobals.convRuns + uGlobals.convRunsCount);
|
|
|
|
HUnlock((Handle)uGlobals.convText);
|
|
HUnlock((Handle)uGlobals.convRuns);
|
|
if((uGlobals.tecVersion == 0x0180) && (GetOSVersion() >= 0x1020))
|
|
HUnlock((Handle)uGlobals.origText);
|
|
|
|
uGlobals.convTextLen += outputLen;
|
|
uGlobals.convRunsCount += numRuns;
|
|
readTotal += inputRead;
|
|
|
|
if(statusCode == kTECArrayFullErr) {
|
|
SetHandleSize((Handle)uGlobals.convRuns, ++runCount * sizeof(TextEncodingRun));
|
|
goto CheckError;
|
|
} else if(statusCode == kTECOutputBufferFullStatus) {
|
|
ResizeBuffer :
|
|
SetHandleSize((Handle)uGlobals.convText, textSize += 1 K);
|
|
CheckError :
|
|
if((statusCode = MemError()) != noErr) {
|
|
break;
|
|
}
|
|
} else {
|
|
if(statusCode == kTECUsedFallbacksStatus)
|
|
statusCode = noErr;
|
|
break;
|
|
}
|
|
|
|
} while(true);
|
|
|
|
if((statusCode == noErr) && ((statusCode = EnsureHandleSize((Handle)uGlobals.order, sizeof(FormatOrder) * uGlobals.convRunsCount)) > noErr)) {
|
|
if(uGlobals.convRunsCount == 1) {
|
|
(**uGlobals.order)[0] = 0;
|
|
statusCode = noErr;
|
|
} else {
|
|
statusCode = RevertTextEncodingToScriptInfo((*uGlobals.convRuns)[0].textEncoding, &script, nil, nil);
|
|
if(statusCode == noErr) {
|
|
HLock((Handle)uGlobals.order);
|
|
GetFormatOrder(**uGlobals.order, 0, uGlobals.convRunsCount - 1, !(!(Boolean)GetScriptVariable(script, smScriptRight)), MyRlDirProcUPP, (Ptr)uGlobals.convRuns);
|
|
HUnlock((Handle)uGlobals.order);
|
|
}
|
|
}
|
|
}
|
|
if((statusCode == noErr) && ((textSize = EnsureHandleSize((Handle)uGlobals.origText, bufLen)) > noErr)) {
|
|
uGlobals.origTextLen = bufLen;
|
|
BMD(theText, *uGlobals.origText, bufLen);
|
|
} else {
|
|
uGlobals.origTextLen = 0L;
|
|
}
|
|
ClearIntlConverterContext(uGlobals.UTF8ToMac, true);
|
|
ResetUniGlobals(&uniState);
|
|
return statusCode;
|
|
}
|
|
|
|
pascal Boolean MyRlDirProc(short theFormat, void *dirParam)
|
|
{
|
|
ScriptCode script;
|
|
TextEncodingRun **convRuns = (TextEncodingRun **)dirParam;
|
|
|
|
return((RevertTextEncodingToScriptInfo((*convRuns)[theFormat].textEncoding, &script, nil, nil) == noErr) && (Boolean)GetScriptVariable(script, smScriptRight));
|
|
}
|
|
|
|
long EnsureHandleSize(Handle h, Size s)
|
|
{
|
|
OSErr errCode;
|
|
long s2;
|
|
|
|
if(h == nil)
|
|
return nilHandleErr;
|
|
|
|
if(*h == nil)
|
|
ReallocateHandle(h,s);
|
|
else
|
|
{
|
|
s2 = InlineGetHandleSize(h);
|
|
if(s2 < s)
|
|
{
|
|
SetHandleSize(h,s);
|
|
} else s = s2;
|
|
}
|
|
errCode = MemError();
|
|
return (errCode == noErr) ? s : errCode;
|
|
}
|
|
|
|
void NoPurgeUniGlobals(UniGlobalsState *uniState)
|
|
{
|
|
uniState->xState = HGetState((Handle)uGlobals.origText);
|
|
HNoPurge((Handle)uGlobals.convText);
|
|
uniState->tState = HGetState((Handle)uGlobals.convText);
|
|
HNoPurge((Handle)uGlobals.convText);
|
|
uniState->cState = HGetState((Handle)uGlobals.convRuns);
|
|
HNoPurge((Handle)uGlobals.convRuns);
|
|
uniState->oState = HGetState((Handle)uGlobals.order);
|
|
HNoPurge((Handle)uGlobals.order);
|
|
}
|
|
|
|
void ResetUniGlobals(UniGlobalsState *uniState)
|
|
{
|
|
HSetState((Handle)uGlobals.origText, uniState->xState);
|
|
HSetState((Handle)uGlobals.convText, uniState->tState);
|
|
HSetState((Handle)uGlobals.convRuns, uniState->cState);
|
|
HSetState((Handle)uGlobals.order, uniState->oState);
|
|
}
|
|
|
|
/* Pass -1 for inOff if inText is a pointer instead of a handle. Pass either charset or encoding. */
|
|
OSStatus InternetToUTF8Text(StringPtr charset, TextEncoding encoding, ConstTextPtr *inText, long inOff, ByteCount inLen, AccuPtr a, Boolean hint)
|
|
{
|
|
OSStatus err;
|
|
Byte iState, aState;
|
|
ByteCount usedILen, usedOLen;
|
|
UniChar hintChar;
|
|
ConstTextPtr textPtr;
|
|
|
|
err = UpdateTECConverter(&uGlobals.internetToUTF8, charset, encoding, &uGlobals.internetToUTF8Encoding, UTF8_ENCODING, &uGlobals.maclatin1, &hintChar);
|
|
if(err && err != kTECNoConversionPathErr) return err;
|
|
if(hint && hintChar) {
|
|
unsigned char hint8[MAX_UTF8_CHAR_LEN];
|
|
ByteCount len;
|
|
|
|
len = UniCharToUTF8(hintChar, hint8, sizeof(hint8));
|
|
AccuAddPtr(a, hint8, len);
|
|
}
|
|
if(err == kTECNoConversionPathErr)
|
|
{
|
|
if(inOff >= 0)
|
|
{
|
|
err = AccuAddFromHandle(a, (Handle)inText, inOff, inLen);
|
|
}
|
|
else
|
|
{
|
|
err = AccuAddPtr(a, (Ptr)inText, inLen);
|
|
}
|
|
return err;
|
|
}
|
|
aState = HGetState(a->data);
|
|
if(inOff >= 0)
|
|
{
|
|
iState = HGetState((Handle)inText);
|
|
}
|
|
else
|
|
{
|
|
textPtr = (ConstTextPtr)inText;
|
|
}
|
|
do {
|
|
if((err = AccuEnsureSize(a, MAX(inLen, 1 K), 1 K)) == noErr) {
|
|
HLock(a->data);
|
|
if(inOff >= 0)
|
|
{
|
|
textPtr = LDRef(inText) + inOff;
|
|
}
|
|
err = MyTECConvertText(uGlobals.internetToUTF8, textPtr, inLen, &usedILen, *(a->data) + a->offset, a->size - a->offset, &usedOLen, uGlobals.maclatin1);
|
|
if(inOff >= 0)
|
|
{
|
|
HSetState((Handle)inText, iState);
|
|
}
|
|
HSetState(a->data, aState);
|
|
if(!err || (err == kTECOutputBufferFullStatus)) {
|
|
a->offset += usedOLen;
|
|
inLen -= usedILen;
|
|
if(inOff >= 0)
|
|
{
|
|
inOff += usedILen;
|
|
}
|
|
else
|
|
{
|
|
textPtr += usedILen;
|
|
}
|
|
}
|
|
if(!err) {
|
|
do {
|
|
if((err = AccuEnsureSize(a, kUnicodeMinBufSize, 1 K)) == noErr) {
|
|
HLock(a->data);
|
|
err = TECFlushText(uGlobals.internetToUTF8, *(a->data) + a->offset, a->size - a->offset, &usedOLen);
|
|
HSetState(a->data, aState);
|
|
if(!err || (err == kTECOutputBufferFullStatus)) {
|
|
a->offset += usedOLen;
|
|
}
|
|
}
|
|
} while(err == kTECOutputBufferFullStatus);
|
|
}
|
|
}
|
|
} while(err == kTECOutputBufferFullStatus);
|
|
TECClearConverterContextInfo(uGlobals.internetToUTF8);
|
|
return err;
|
|
}
|
|
|
|
OSStatus GetUnicodeHint(TextEncoding encoding, StringPtr lang, UniChar *hint)
|
|
{
|
|
ScriptCode script;
|
|
OSStatus err;
|
|
|
|
// LocaleStringToLangAndRegionCodes(string, &langCode, nil);
|
|
|
|
if(uGlobals.tecVersion < 0x0150)
|
|
return kTextUnsupportedEncodingErr;
|
|
|
|
err = NearestMacTextEncodings(encoding, &encoding, nil);
|
|
if(err) return err;
|
|
|
|
err = RevertTextEncodingToScriptInfo(encoding, &script, nil, nil);
|
|
if(!err) switch(script) {
|
|
case smJapanese :
|
|
*hint = kUnicodeSourceHintJapanese;
|
|
break;
|
|
case smTradChinese :
|
|
*hint = kUnicodeSourceHintChineseTraditional;
|
|
break;
|
|
case smKorean :
|
|
*hint = kUnicodeSourceHintKorean;
|
|
break;
|
|
case smSimpChinese :
|
|
*hint = kUnicodeSourceHintChineseSimplified;
|
|
break;
|
|
default :
|
|
*hint = kUnicodeSourceHintDefault;
|
|
}
|
|
return err;
|
|
}
|
|
|
|
OSStatus CreateIntlConverter(IntlConverter *converter, TextEncoding encoding)
|
|
{
|
|
OSStatus err;
|
|
|
|
converter->uniCount = 0L;
|
|
converter->lastCharScript = smSystemScript;
|
|
converter->lastCharType = 0;
|
|
converter->flags = kUnicodeDefaultDirectionMask;
|
|
converter->uniHandle = NuHTempBetter(129);
|
|
if(!converter->uniHandle)
|
|
{
|
|
Log(-1, "\pCreate uniHandle failed");
|
|
return MemError();
|
|
}
|
|
converter->inToUnicode = converter->unicodeToMac = 0;
|
|
if(uGlobals.tecVersion == 0)
|
|
{
|
|
err = noErr;
|
|
} else {
|
|
converter->inToUnicodeEncoding = (encoding == kTextEncodingUnknown) ? CreateSystemRomanEncoding() : encoding;
|
|
err = UpdateIntlConverter(converter, nil);
|
|
if(err) goto HDispose;
|
|
err = CreateUnicodeToTextRunInfo(0, nil, &converter->unicodeToMac);
|
|
}
|
|
if(err)
|
|
{
|
|
Log(-1L, "\pUnicode To Mac failed");
|
|
TECDisposeConverter(converter->inToUnicode);
|
|
HDispose :
|
|
ZapHandle(converter->uniHandle);
|
|
}
|
|
return err;
|
|
}
|
|
|
|
OSStatus ConvertIntlText(IntlConverter *converter, StringPtr inText, ByteCount *inLen, StringPtr outText, ByteCount *outLen, TextEncoding *encoding, WordSpaceEnum addSpace, ByteOffset *spaceOffset)
|
|
{
|
|
OptionBits flags;
|
|
OSStatus err;
|
|
ByteCount origILen, origOLen, usedOLen, actILen, usedUniLen, uniBufSize;
|
|
Boolean full;
|
|
ItemCount runCount, spaceCount;
|
|
ByteOffset iSpaceOffset, oSpaceOffset;
|
|
TextEncodingRun encodingRun;
|
|
// ScriptCode script;
|
|
Byte hState;
|
|
|
|
if(converter == nil) {
|
|
return paramErr;
|
|
}
|
|
|
|
if(converter->unicodeToMac == nil) {
|
|
*encoding = kTextEncodingUnknown;
|
|
if(spaceOffset != nil) {
|
|
*spaceOffset = ULONG_MAX;
|
|
}
|
|
if(addSpace != dontAddWordSpace) {
|
|
*outText++ = ' ';
|
|
--*outLen;
|
|
}
|
|
err = (*inLen <= *outLen) ? noErr : kTECOutputBufferFullStatus;
|
|
*inLen = *outLen = MIN(*outLen, *inLen);
|
|
BMD(inText, outText, *outLen);
|
|
if(addSpace != dontAddWordSpace) {
|
|
++*outLen;
|
|
}
|
|
if(converter->table != NO_TABLE)
|
|
{
|
|
TransLitRes(outText, *outLen, converter->table);
|
|
}
|
|
return err;
|
|
}
|
|
|
|
hState = HGetState(converter->uniHandle);
|
|
HLock(converter->uniHandle);
|
|
uniBufSize = GetHandleSize(converter->uniHandle);
|
|
|
|
origILen = *inLen;
|
|
origOLen = *outLen;
|
|
*outLen = *inLen = usedOLen = 0L;
|
|
|
|
flags = converter->flags | kUnicodeUseFallbacksMask | kUnicodeLooseMappingsMask | kUnicodeTextRunMask | kUnicodeKeepInfoMask;
|
|
if(uGlobals.hasTextRunFlag) flags |= /*kUnicodeKeepSameEncodingMask | */kUnicodeTextRunHeuristicsMask;
|
|
|
|
do {
|
|
if(addSpace != dontAddWordSpace) {
|
|
spaceCount = 1;
|
|
iSpaceOffset = (converter->uniCount * sizeof(UniChar));
|
|
(*converter->uniHandle)[converter->uniCount++] = kUnicodeSpaceChar;
|
|
addSpace = dontAddWordSpace;
|
|
} else {
|
|
spaceCount = 0;
|
|
}
|
|
if((*inLen <= origILen || origILen == 0L) && (converter->uniCount < ((uniBufSize - sizeof(UniChar)) / sizeof(UniChar)))) {
|
|
UniChar *curBuf = (*converter->uniHandle) + converter->uniCount;
|
|
ByteCount curSize = uniBufSize - ((converter->uniCount + 1) * sizeof(UniChar));
|
|
|
|
if(converter->inToUnicode == nil) {
|
|
actILen = usedUniLen = MIN(origILen - *inLen, curSize);
|
|
if(usedUniLen > curSize)
|
|
err = kTECOutputBufferFullStatus;
|
|
else
|
|
err = noErr;
|
|
BMD(inText + *inLen, curBuf, usedUniLen);
|
|
} else if((origILen != 0L) && (origILen != *inLen)) {
|
|
if(converter->iso2022) CleanISO2022(inText + *inLen, origILen - *inLen, converter);
|
|
err = MyTECConvertText(converter->inToUnicode, inText + *inLen, origILen - *inLen, &actILen, curBuf, curSize, &usedUniLen, converter->maclatin1);
|
|
} else {
|
|
actILen = 0L;
|
|
err = TECFlushText(converter->inToUnicode, curBuf, curSize, &usedUniLen);
|
|
}
|
|
|
|
full = ((err == kTECOutputBufferFullStatus) || (err == kTECBufferBelowMinimumSizeErr));
|
|
if(!full && err) break;
|
|
|
|
converter->uniCount += (usedUniLen / sizeof(UniChar));
|
|
*inLen += actILen;
|
|
} else {
|
|
full = (origILen != 0L);
|
|
}
|
|
ConvertUnicode :
|
|
err = ConvertFromUnicodeToTextRun(converter->unicodeToMac, converter->uniCount * sizeof(UniChar), *converter->uniHandle, flags, spaceCount, &iSpaceOffset, &spaceCount, &oSpaceOffset, origOLen - *outLen, &usedUniLen, &usedOLen, outText + *outLen, 1, &runCount, &encodingRun);
|
|
*encoding = encodingRun.textEncoding;
|
|
switch(err) {
|
|
case kTECUsedFallbacksStatus :
|
|
err = noErr;
|
|
goto MoveIt;
|
|
case kTECBufferBelowMinimumSizeErr :
|
|
err = kTECOutputBufferFullStatus;
|
|
case kTECOutputBufferFullStatus :
|
|
case kTECArrayFullErr :
|
|
case noErr :
|
|
MoveIt :
|
|
if((runCount > 0) && (usedOLen > 0))
|
|
{
|
|
/* RevertTextEncodingToScriptInfo(encodingRun.textEncoding, &script, nil, nil);
|
|
if(spaceCount != 0) {
|
|
short charType;
|
|
|
|
charType = CharacterType(outText, 0, script);
|
|
}
|
|
|
|
converter->lastCharScript = script;
|
|
converter->lastCharType = CharacterType(outText, *outLen - 1, script);
|
|
*/
|
|
if(spaceOffset != nil) {
|
|
if(spaceCount != 0) {
|
|
*spaceOffset = oSpaceOffset + *outLen;
|
|
} else {
|
|
*spaceOffset = ULONG_MAX;
|
|
}
|
|
}
|
|
}
|
|
converter->uniCount -= usedUniLen / sizeof(UniChar);
|
|
if(converter->uniCount > 0) {
|
|
BMD((*converter->uniHandle) + (usedUniLen / sizeof(UniChar)), *converter->uniHandle, converter->uniCount * sizeof(UniChar));
|
|
if((usedOLen == 0) && (err == kTECArrayFullErr))
|
|
goto ConvertUnicode;
|
|
}
|
|
|
|
*outLen += usedOLen;
|
|
break;
|
|
default :
|
|
if(converter->inToUnicode == nil) {
|
|
converter->uniCount -= (actILen / sizeof(UniChar)) + spaceCount;
|
|
}
|
|
}
|
|
|
|
} while(full && !err);
|
|
if(!err && (inText == (StringPtr)-1L) && (converter->unicodeToMac != nil)) {
|
|
ClearIntlConverterContext(converter->unicodeToMac, false);
|
|
}
|
|
HSetState(converter->uniHandle, hState);
|
|
return err;
|
|
}
|
|
|
|
void DisposeIntlConverter(IntlConverter converter)
|
|
{
|
|
if(converter.unicodeToMac != nil) {
|
|
TECDisposeConverter(converter.inToUnicode);
|
|
DisposeUnicodeToTextRunInfo(&converter.unicodeToMac);
|
|
converter.inToUnicode = converter.unicodeToMac = nil;
|
|
}
|
|
ZapHandle(converter.uniHandle);
|
|
}
|
|
|
|
OSStatus UpdateIntlConverter(IntlConverter *converter, StringPtr charset)
|
|
{
|
|
OSStatus err;
|
|
UniChar hint;
|
|
|
|
if(uGlobals.tecVersion == 0)
|
|
{
|
|
if((charset == nil) || (charset[0] == 0) || EqualStrRes(charset,MIME_MAC))
|
|
{
|
|
converter->table = NO_TABLE;
|
|
return noErr;
|
|
} else {
|
|
converter->table = FindMIMECharset(charset);
|
|
return (converter->table == NO_TABLE) ? kTextUnsupportedEncodingErr : noErr;
|
|
}
|
|
}
|
|
|
|
err = UpdateTECConverter(&converter->inToUnicode, charset, charset ? kTextEncodingUnknown : converter->inToUnicodeEncoding, &converter->inToUnicodeEncoding, CreateTextEncoding(kTextEncodingUnicodeDefault, kTextEncodingDefaultVariant, kUnicode16BitFormat), &converter->maclatin1, &hint);
|
|
if(!err) {
|
|
TextEncodingBase base;
|
|
|
|
if(hint)
|
|
{
|
|
(*converter->uniHandle)[converter->uniCount++] = hint;
|
|
}
|
|
base = GetTextEncodingBase(converter->inToUnicodeEncoding);
|
|
converter->iso2022 = (base <= kTextEncodingISO_2022_KR && base >= kTextEncodingISO_2022_JP);
|
|
converter->inDouble = false;
|
|
} else {
|
|
converter->iso2022 = false;
|
|
}
|
|
|
|
// If the transfer code has already taken a stab at
|
|
// transliteration, it will pass in a charset of
|
|
// orginal-charset/new-charset. Right now, we take
|
|
// this as a signal we'd better not do further transliteration
|
|
// At some future date, we may be able to undo some or all
|
|
// of the damage that was done if this original transliteration
|
|
// proved to be in error
|
|
if (charset && PIndex(charset,'/')) converter->alreadyTransliterated = true;
|
|
return err;
|
|
}
|
|
|
|
OSStatus EncodingPlusPeteStyle(TextEncoding encoding, PETEStyleEntry *pse, ScriptCode *outScript)
|
|
{
|
|
ScriptCode script, oldScript;
|
|
LangCode lang;
|
|
OSStatus err;
|
|
|
|
if(uGlobals.tecVersion == 0)
|
|
return noErr;
|
|
err = RevertTextEncodingToScriptInfo(encoding, &script, &lang, GlobalTemp);
|
|
if(err) return err;
|
|
|
|
if(GlobalTemp[0] != 0) {
|
|
GetFNum(GlobalTemp, &pse->psStyle.textStyle.tsFont);
|
|
pse->psStyle.textStyle.tsLang = langUnspecified;
|
|
script = smUninterp;
|
|
} else {
|
|
Boolean defaultFont = (pse->psStyle.textStyle.tsFont == kPETEDefaultFont) || (pse->psStyle.textStyle.tsFont == kPETEDefaultFixed);
|
|
|
|
if(pse->psStyle.textStyle.tsLang == langUnspecified) {
|
|
oldScript = smUninterp;
|
|
} else if(!defaultFont) {
|
|
oldScript = FontToScript(pse->psStyle.textStyle.tsFont);
|
|
pse->psStyle.textStyle.tsLang = (short)GetScriptVariable(oldScript, smScriptLang);
|
|
} else {
|
|
TextEncoding tempEncoding;
|
|
|
|
err = UpgradeScriptInfoToTextEncoding(kTextScriptDontCare, pse->psStyle.textStyle.tsLang, kTextRegionDontCare, nil, &tempEncoding);
|
|
if(err) return err;
|
|
|
|
err = RevertTextEncodingToScriptInfo(tempEncoding, &oldScript, nil, nil);
|
|
if(err) return err;
|
|
}
|
|
if(lang != kTextLanguageDontCare) {
|
|
if(pse->psStyle.textStyle.tsLang != lang) {
|
|
pse->psStyle.textStyle.tsLang = lang;
|
|
if(!defaultFont) {
|
|
pse->psStyle.textStyle.tsFont = kPETEDefaultFont;
|
|
}
|
|
}
|
|
} else {
|
|
if(script != oldScript) {
|
|
pse->psStyle.textStyle.tsLang = (short)GetScriptVariable(script, smScriptLang);
|
|
if(!defaultFont) {
|
|
pse->psStyle.textStyle.tsFont = kPETEDefaultFont;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if(outScript != nil) *outScript = script;
|
|
return noErr;
|
|
}
|
|
|
|
OSStatus ClearIntlConverterContext(UnicodeToTextRunInfo converter, Boolean utf8)
|
|
{
|
|
UniChar twoChars[2];
|
|
unsigned char twoUTF8Chars[2 * MAX_UTF8_CHAR_LEN];
|
|
void *iUnicodeStr;
|
|
unsigned char buffer[kUnicodeMinBufSize];
|
|
TextEncodingRun encodingRun;
|
|
ByteCount oLen, iLen;
|
|
ItemCount runs;
|
|
OptionBits flags;
|
|
|
|
flags = kUnicodeUseFallbacksMask | kUnicodeDefaultDirectionMask | kUnicodeLooseMappingsMask | kUnicodeTextRunMask;
|
|
if(uGlobals.hasTextRunFlag) flags |= /*kUnicodeKeepSameEncodingMask | */kUnicodeTextRunHeuristicsMask;
|
|
if(utf8 && (uGlobals.tecVersion != 0x0180 || (GetOSVersion() < 0x1020)))
|
|
{
|
|
iLen = UniCharToUTF8(kUnicodeSourceHintDefault, &twoUTF8Chars[0], sizeof(twoUTF8Chars));
|
|
iLen += UniCharToUTF8(kUnicodeSpaceChar, &twoUTF8Chars[iLen], sizeof(twoUTF8Chars) - iLen);
|
|
iUnicodeStr = &twoUTF8Chars[0];
|
|
}
|
|
else
|
|
{
|
|
twoChars[0] = kUnicodeSourceHintDefault;
|
|
twoChars[1] = kUnicodeSpaceChar;
|
|
iUnicodeStr = &twoChars[0];
|
|
iLen = 2 * sizeof(UniChar);
|
|
}
|
|
return ConvertFromUnicodeToTextRun(converter, iLen, iUnicodeStr, flags, 0, nil, 0, nil, sizeof(buffer), &iLen, &oLen, buffer, 1, &runs, &encodingRun);
|
|
}
|
|
|
|
OSStatus UpdateTECConverter(TECObjectRef *converter, StringPtr charset, TextEncoding newEncoding, TextEncoding *fromEncoding, TextEncoding toEncoding, Boolean *maclatin1, UniChar *hint)
|
|
{
|
|
TECObjectRef tempConverter;
|
|
OSStatus err = noErr;
|
|
UPtr spot = nil;
|
|
|
|
*maclatin1 = false;
|
|
if(charset && charset[0] != 0)
|
|
{
|
|
Str255 string;
|
|
|
|
spot = &charset[1];
|
|
PTokenPtr(&charset[1], charset[0], string, &spot, "*");
|
|
if(EqualStrRes(string, MIME_ISO_LATIN1)) {
|
|
|
|
newEncoding = DefaultEncoding(kTextEncodingWindowsLatin1);
|
|
} else {
|
|
TextEncoding tempEncoding;
|
|
|
|
err = TECGetTextEncodingFromInternetName(&tempEncoding, string);
|
|
if(err)
|
|
{
|
|
if(newEncoding == kTextEncodingUnknown)
|
|
return err;
|
|
}
|
|
else
|
|
newEncoding = tempEncoding;
|
|
|
|
// Work around another Apple bug
|
|
if((uGlobals.tecVersion >= 0x0170 && uGlobals.tecVersion < 0x0190) &&
|
|
((newEncoding & 0x0000FF00) == 0x00000100)) // It's Unicode
|
|
newEncoding &= 0xFFFFFF00; // Make it default Unicode instead of explicit version
|
|
}
|
|
}
|
|
if(!*converter || newEncoding != *fromEncoding)
|
|
{
|
|
if(toEncoding != kTextEncodingUnknown)
|
|
err = TECCreateConverter(&tempConverter, newEncoding, toEncoding);
|
|
else
|
|
err = TECCreateOneToManyConverter(&tempConverter, newEncoding, 0, nil);
|
|
if(!err)
|
|
{
|
|
if(*converter) TECDisposeConverter(*converter);
|
|
*converter = tempConverter;
|
|
*fromEncoding = newEncoding;
|
|
}
|
|
}
|
|
if(!err && hint)
|
|
{
|
|
Str255 string;
|
|
|
|
if(GetUnicodeHint(newEncoding, (spot && PTokenPtr(&charset[1],charset[0],string,&spot,"*")) ? string : nil, hint))
|
|
*hint = 0;
|
|
}
|
|
return err;
|
|
}
|
|
|
|
ByteCount UniCharToUTF8(UniChar c, unsigned char *utf8, ByteCount maxLen)
|
|
{
|
|
/* Bonehead! */
|
|
if(maxLen == 0) return 0;
|
|
|
|
/* Plain ASCII just return it */
|
|
if(c <= 0x7F) {
|
|
*utf8++ = c;
|
|
/* Null terminate if there's room */
|
|
if(maxLen > 1) *utf8 = 0;
|
|
return 1;
|
|
} else {
|
|
unsigned long lc;
|
|
long mask = 0xFFFFFFE0;
|
|
ByteCount len;
|
|
|
|
/*
|
|
* Count the bytes needed; trim off low six bits and see if the
|
|
* remainder will fit in the first byte
|
|
*/
|
|
for(lc = c, len = 2; (lc >>=6) & mask; mask >>= 1, ++len)
|
|
;
|
|
|
|
/* More than can fit? */
|
|
if(len > maxLen) return 0;
|
|
|
|
/* Fill it in from last byte to first */
|
|
utf8 += len;
|
|
/* Null terminate if there's room */
|
|
if(len < maxLen) *utf8 = 0;
|
|
lc = c;
|
|
do {
|
|
/* Drop in the low six bits and set the high bit */
|
|
*--utf8 = ((lc & 0x3F) | 0x80);
|
|
lc >>= 6;
|
|
} while(lc & mask);
|
|
/* Put the rest of the bits in the first byte */
|
|
*--utf8 = (((mask << 1) & 0x000000FF) | lc);
|
|
return len;
|
|
}
|
|
}
|
|
|
|
UniChar UTF8ToUniChar(unsigned char *utf8, unsigned char **next)
|
|
{
|
|
UniChar r;
|
|
|
|
/* Plain ASCII just return it */
|
|
if(*utf8 <= 0x7F)
|
|
{
|
|
r = *utf8++;
|
|
} else {
|
|
int c;
|
|
unsigned char b, m;
|
|
|
|
/*
|
|
* Count how many bytes there are; check how many high bits are set
|
|
* and that'll be the byte count. Also, form the mask to grab the
|
|
* low bits out of the first byte.
|
|
*/
|
|
for(c = 0, b = *utf8, m = 0x3F; ((b <<= 1) & 0x80); ++c)
|
|
m >>= 1;
|
|
|
|
/* Grab the low bits of the first byte */
|
|
r = *utf8++ & m;
|
|
while(c--)
|
|
{
|
|
/* Grab the low six bits of each byte and shift them into r */
|
|
r <<= 6;
|
|
r |= (*utf8++ & 0x3F);
|
|
}
|
|
}
|
|
if(next) *next = utf8;
|
|
return r;
|
|
}
|
|
|
|
UniCharCount UTF8CharCount(BytePtr utf8, ByteCount bufLen)
|
|
{
|
|
UniCharCount count = 0;
|
|
UTF8Char b;
|
|
ByteCount tempLen;
|
|
|
|
while(bufLen) {
|
|
tempLen = 1;
|
|
if((b = *utf8++) > 0x7F) {
|
|
while((b <<= 1) & 0x80) {
|
|
++tempLen;
|
|
}
|
|
if(tempLen > bufLen) break;
|
|
|
|
utf8 += tempLen;
|
|
}
|
|
bufLen -= tempLen;
|
|
++count;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
/* Return the length of the buffer that contains legal UTF-8 (remove trailing partials) */
|
|
ByteCount GoodUTF8Len(BytePtr utf8, ByteCount bufLen)
|
|
{
|
|
UTF8Char b;
|
|
ByteCount newLen = 0, tempLen;
|
|
|
|
while(bufLen) {
|
|
// It's at least 1 if it's US-ASCII
|
|
tempLen = 1;
|
|
|
|
// If it's not US-ASCII....
|
|
if((b = *utf8++) > 0x7F) {
|
|
/*
|
|
* The first byte of any UTF-8 sequence has the high bit set followed by
|
|
* a bit set for each subsequent byte in the sequence. So, count how many
|
|
* bits are set from high to low (not including the first one) and that
|
|
* tells how many bytes follow this one
|
|
*/
|
|
while((b <<= 1) & 0x80) {
|
|
++tempLen;
|
|
}
|
|
|
|
/*
|
|
* If the first byte said there are more than the remaining buffer in the
|
|
* sequence, don't add that to the length.
|
|
*/
|
|
if(tempLen > bufLen) break;
|
|
|
|
// Move past this sequence of UTF-8 bytes (we already moved past the first)
|
|
utf8 += tempLen - 1;
|
|
}
|
|
// Add the length used to the total, and subtract it from what's left
|
|
newLen += tempLen;
|
|
bufLen -= tempLen;
|
|
}
|
|
return newLen;
|
|
}
|
|
|
|
OSErr AccuEnsureSize(AccuPtr a, long len, long incr)
|
|
{
|
|
if(a->data && (a->size - a->offset >= len)) return noErr;
|
|
if (!a->data)
|
|
{
|
|
a->offset = 0;
|
|
a->size = MAX(len, 1 K);
|
|
a->data = NuHTempBetter(a->size);
|
|
}
|
|
else
|
|
{
|
|
a->size += len + incr;
|
|
SetHandleBig_(a->data,a->size);
|
|
}
|
|
return(a->err=MemError());
|
|
}
|
|
|
|
short GetIntlFont(ScriptCode script, ScriptFontInfo fonts)
|
|
{
|
|
if(fonts[script].fontID > applFont)
|
|
return fonts[script].fontID;
|
|
else
|
|
return LoWord(GetScriptVariable(script, smScriptAppFondSize));
|
|
}
|
|
|
|
short GetIntlSize(ScriptCode script, ScriptFontInfo fonts)
|
|
{
|
|
if(fonts[script].fontSize > 0)
|
|
return fonts[script].fontSize;
|
|
else
|
|
return HiWord(GetScriptVariable(script, smScriptAppFondSize));
|
|
}
|
|
|
|
void CleanISO2022(TextPtr text, long len, IntlConverter *converter)
|
|
{
|
|
// state = 2022AnyByte;
|
|
|
|
for(; --len >= 0; ++text) {
|
|
/* switch state :
|
|
case 2022AnyByte :
|
|
if(text[offset] == 0x1B) {
|
|
state = 2022Esc;
|
|
break;
|
|
}
|
|
if(text[offset] <= 0x7F)
|
|
break;
|
|
*/
|
|
|
|
if(*text == 0x1B) {
|
|
if(++text, --len >= 0) {
|
|
--len;
|
|
switch(*text++) {
|
|
case '$' :
|
|
converter->inDouble = true;
|
|
break;
|
|
case '(' :
|
|
converter->inDouble = false;
|
|
}
|
|
}
|
|
} else {
|
|
if(converter->inDouble && (len > 0)) {
|
|
--len;
|
|
if((*text++ < 0x21) || (*(text - 1) > 0x7E) || (*text < 0x21) || (*text > 0x7E)) {
|
|
*(text - 1) = 0x21;
|
|
*text = 0x21;
|
|
}
|
|
} else if(*text > 0x7F) {
|
|
*text = '?';
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Boolean Find2047(UPtr chars, long len, long *ewOff, long *ewTextOff, long *ewEndOff, long *ewWSLen, Boolean *qp);
|
|
|
|
OSStatus PeteInsertHeader(PETEHandle pte, long *pOff, Handle text, long len, long tOff)
|
|
{
|
|
return InsertIntlHeaders(text, len, tOff, nil, kTextEncodingUnknown, pte, pOff);
|
|
}
|
|
|
|
/*
|
|
* Pass encoding and a or if you want UTF8 in an accumulator; pass pte and pOff if you want a PETE handle
|
|
* Pass -1 for tOff if Text is pointer instead of handle
|
|
*/
|
|
OSStatus InsertIntlHeaders(Handle text, long len, long tOff, AccuPtr a, TextEncoding encoding, PETEHandle pte, long *pOff)
|
|
{
|
|
long curOff, // Current offset into the text
|
|
ewOff, // Offset of the encoded word
|
|
ewTextOff, // Offset of the text portion of the encoded word
|
|
ewEndOff, // Offset of the last question mark in the encoded word
|
|
ewWSLen, // Whitespace after the encoded word
|
|
ewLastWSLen, // Whitespace after previous encoded word
|
|
skipOff; // Stuff that looked like an encoded word but wasn't
|
|
OSStatus err = noErr;
|
|
Boolean qp; // TRUE=quoted-printable; FALSE=base64
|
|
Ptr textPtr; // temp pointer
|
|
long inTOff; // Offset into the text passed in
|
|
|
|
inTOff = tOff;
|
|
// < 0 means it was a pointer
|
|
if(tOff < 0) tOff = 0;
|
|
if(len == 0) return noErr;
|
|
// Current offset into the text
|
|
curOff = tOff;
|
|
ewWSLen = 0;
|
|
skipOff = 0;
|
|
|
|
/*
|
|
* Go through a single header. Find an encoded word. Output what came before the
|
|
* encoded word. Convert the encoded word. Output the converted stuff.
|
|
*/
|
|
do
|
|
{
|
|
// Deref if it's a handle
|
|
textPtr = inTOff < 0 ? (Ptr)text : *text;
|
|
|
|
// Save whether there was any trailing WS on the last encoded word
|
|
ewLastWSLen = ewWSLen;
|
|
if(Find2047(textPtr + curOff + skipOff, len - ((curOff + skipOff) - tOff), &ewOff, &ewTextOff, &ewEndOff, &ewWSLen, &qp))
|
|
{
|
|
// If we immediately find an encoded word after the last one, ignore the WS
|
|
if(ewOff == ewLastWSLen)
|
|
{
|
|
curOff += ewOff;
|
|
ewTextOff -= ewOff;
|
|
ewEndOff -= ewOff;
|
|
ewOff = 0;
|
|
}
|
|
}
|
|
|
|
// If there was some text before the encoded word (or there was some skipped text)...
|
|
if(ewOff + skipOff > 0)
|
|
{
|
|
// Add that text to the accumulator or document
|
|
if(a)
|
|
{
|
|
if(encoding == kTextEncodingUnknown) encoding = CreateSystemRomanEncoding();
|
|
err = InternetToUTF8Text(nil, encoding, ((Ptr)text) + (inTOff < 0 ? curOff : 0), inTOff < 0 ? inTOff : curOff, ewOff + skipOff, a, false);
|
|
}
|
|
else
|
|
{
|
|
if(inTOff < 0)
|
|
{
|
|
err = PETEInsertTextPtr(PETE,pte,*pOff,textPtr+curOff,ewOff+skipOff,nil);
|
|
} else {
|
|
err = PETEInsertTextHandle(PETE,pte,*pOff,text,ewOff+skipOff,curOff,nil);
|
|
}
|
|
}
|
|
}
|
|
|
|
if(!err)
|
|
{
|
|
// If we need to keep track of the PETE offset, add what we just inserted
|
|
if(pOff && *pOff != -1)
|
|
*pOff += ewOff + skipOff;
|
|
// Move curOff past what we just inserted
|
|
curOff += ewOff + skipOff;
|
|
skipOff = 0;
|
|
// Make the offsets relative to curOff
|
|
ewTextOff -= ewOff;
|
|
ewEndOff -= ewOff;
|
|
ewOff = 0;
|
|
|
|
// At the end?
|
|
if(ewEndOff != 0)
|
|
{
|
|
Str255 word;
|
|
|
|
// Get the text of the encoded word and un-quoted-printable or un-base64
|
|
textPtr = inTOff < 0 ? (Ptr)text : *text;
|
|
MakePStr(word, textPtr + curOff + ewTextOff, (ewEndOff - 2) - ewTextOff);
|
|
if((qp && (PseudoQP(word), true) || !DecodeB64String(word)))
|
|
{
|
|
Str31 charset;
|
|
long iOff, oOff;
|
|
|
|
// Get the charset name
|
|
MakePStr(charset, textPtr + curOff + 2, ewTextOff - 5);
|
|
if (EqualStrRes(charset,UNKNOWN_CHARSET_NAME)) GetRString(charset,UNSPECIFIED_CHARSET);
|
|
|
|
if(a) {
|
|
// Adding the text to the accumulator using the charset
|
|
iOff = a->offset;
|
|
if(InternetToUTF8Text(charset, kTextEncodingUnknown, &word[1], -1L, word[0], a, false) != noErr)
|
|
{
|
|
a->offset = iOff;
|
|
goto BadConversion;
|
|
}
|
|
else
|
|
{
|
|
curOff += ewEndOff;
|
|
}
|
|
}
|
|
// If we add to the document, we update the converter using the charset
|
|
else if(UpdateIntlConverter(&uGlobals.quickConverter, charset) == noErr)
|
|
{
|
|
// Adding the text to the document with the converter
|
|
if((iOff = *pOff) < 0L) PeteGetTextAndSelection(pte,nil,&iOff,nil);
|
|
err = PeteInsertIntlText(pte, pOff, &word[1], -1L, word[0], &uGlobals.quickConverter, kTextEncodingUnknown, false, true);
|
|
if(!err)
|
|
{
|
|
curOff += ewEndOff;
|
|
}
|
|
else if(EncodingError(err))
|
|
{
|
|
if((oOff = *pOff) < 0L) PeteGetTextAndSelection(pte,nil,&oOff,nil);
|
|
PeteDelete(pte, iOff, oOff);
|
|
goto BadConversion;
|
|
}
|
|
}
|
|
else goto BadConversion;
|
|
}
|
|
else
|
|
{
|
|
BadConversion :
|
|
/*
|
|
* Couldn't convert, so add the entire encoded word to the skip so that
|
|
* it can be added before the next encoded word.
|
|
*/
|
|
curOff -= ewLastWSLen;
|
|
skipOff = ewEndOff + ewWSLen + ewLastWSLen;
|
|
ewWSLen = 0;
|
|
}
|
|
}
|
|
}
|
|
} while((!err || EncodingError(err)) && curOff < tOff + len);
|
|
return err;
|
|
}
|
|
|
|
Boolean Find2047(UPtr chars, long len, long *ewOff, long *ewTextOff, long *ewEndOff, long *ewWSLen, Boolean *qp)
|
|
{
|
|
UPtr q[4]; // Pointers to the 4 questions marks in the encoded word
|
|
UPtr end=chars+len; // Past the end of the text
|
|
UPtr spot=chars; // Start out at the start of the text
|
|
long ewEncOff; // Offset of the encoding type (Q or B)
|
|
|
|
while(true)
|
|
{
|
|
// If we don't have enough text left for an encoded word, break out
|
|
if(end - spot < 8)
|
|
{
|
|
*ewOff = *ewTextOff = *ewEndOff = len;
|
|
*ewWSLen = 0;
|
|
break;
|
|
}
|
|
|
|
// Encoded words start with "=?"
|
|
if((*spot++ == '=') && (*spot == '?'))
|
|
{
|
|
int i;
|
|
|
|
// Find 3 more questions marks, break out if we hit the end
|
|
for(q[0] = spot++, i = 1; i < 4; ++i)
|
|
{
|
|
q[i] = q[i-1];
|
|
while(q[i] < end && *++(q[i]) != '?') ;
|
|
if(q[i] >= end) break;
|
|
}
|
|
|
|
// If we found 4 question marks and the next thing is an "="...
|
|
if((i == 4) && (q[3] < end) && (q[3][1] == '='))
|
|
{
|
|
// Encoded word starts one before the 1st '?'
|
|
*ewOff = q[0] - chars - 1;
|
|
// The encoding char is one after the 2nd '?'
|
|
ewEncOff = q[1] - chars + 1;
|
|
// The text is one after the 3rd '?'
|
|
*ewTextOff = q[2] - chars + 1;
|
|
// The end is two after the last '?'
|
|
*ewEndOff = q[3] - chars + 2;
|
|
|
|
// Get the length of WS after the encoded word to ignore
|
|
for(spot = q[3] + 2; spot < end; ++spot)
|
|
{
|
|
switch(*spot)
|
|
{
|
|
case ' ' :
|
|
case '\t' :
|
|
case '\012' :
|
|
case '\014' :
|
|
case '\015' :
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
*ewWSLen = spot - (q[3] + 2);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// See if we've got quoted-printable or base64
|
|
if(*ewTextOff - ewEncOff == 2)
|
|
{
|
|
switch(chars[ewEncOff])
|
|
{
|
|
default :
|
|
goto badEncoding;
|
|
case 'Q' :
|
|
case 'q' :
|
|
*qp = true;
|
|
break;
|
|
case 'B' :
|
|
case 'b' :
|
|
*qp = false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
badEncoding :
|
|
ewOff = ewEndOff;
|
|
}
|
|
return (*ewOff < *ewEndOff);
|
|
}
|
|
|
|
OSErr PeteSetIntlText(PETEHandle pte, Handle text, long start, long end, IntlConverter *converter, TextEncoding encoding)
|
|
{
|
|
OSErr err;
|
|
long offset = -1;
|
|
|
|
PeteDelete(pte,0,0x7fffffff);
|
|
PeteScroll(pte,0,pseCenterSelection);
|
|
err = PeteInsertIntlText(pte, &offset, text, start, end, converter, encoding, false, true);
|
|
PeteSetURLRescan(pte,0);
|
|
PeteNickScan (pte);
|
|
return err;
|
|
}
|
|
|
|
OSErr PeteInsertIntlText(PETEHandle pte, long *offset, Handle text, long start, long end, IntlConverter *converter, TextEncoding encoding, Boolean needSpace, Boolean flush)
|
|
{
|
|
PETEStyleEntry pse;
|
|
OSErr err = noErr;
|
|
Str255 outText;
|
|
long outLen, inLen, usedInLen = 0;
|
|
Byte hState;
|
|
Ptr textPtr;
|
|
Boolean bufRep, hand;
|
|
|
|
if(converter == nil)
|
|
{
|
|
converter = &uGlobals.quickConverter;
|
|
err = UpdateTECConverter(&converter->inToUnicode, nil, encoding, &converter->inToUnicodeEncoding, CreateTextEncoding(kTextEncodingUnicodeDefault, kTextEncodingDefaultVariant, kUnicode16BitFormat), &converter->maclatin1, nil);
|
|
if(err) return err;
|
|
err = UpdateIntlConverter(converter, nil);
|
|
if(err) return err;
|
|
}
|
|
|
|
PETEGetStyle(PETE,pte,kPETECurrentStyle,nil,&pse);
|
|
hand = ((text!=nil) && (start >= 0));
|
|
if(hand)
|
|
{
|
|
hState = HGetState(text);
|
|
} else {
|
|
textPtr = (Ptr)text;
|
|
}
|
|
flush = flush && (text != nil);
|
|
do {
|
|
do {
|
|
bufRep = false;
|
|
inLen = end - usedInLen;
|
|
if(start >= 0) inLen -= start;
|
|
outLen = sizeof(outText);
|
|
if(hand) textPtr = LDRef(text) + start;
|
|
err = ConvertIntlText(converter,textPtr?textPtr+usedInLen:(Ptr)-1L,&inLen,outText,&outLen,&encoding,needSpace?addWordSpaceConditional:dontAddWordSpace, nil);
|
|
if(hand) HSetState(text, hState);
|
|
if(outLen == 0) break;
|
|
needSpace = false;
|
|
switch(err)
|
|
{
|
|
case kTECOutputBufferFullStatus :
|
|
case kTECArrayFullErr :
|
|
bufRep = true;
|
|
case noErr :
|
|
err = EncodingPlusPeteStyle(encoding, &pse, nil);
|
|
if(err == noErr)
|
|
{
|
|
**Pslh = pse;
|
|
err = PETEInsertTextPtr(PETE,pte,offset==nil?kPETECurrentSelection:*offset,outText,outLen,Pslh);
|
|
if((err == noErr) && (offset != nil) && (*offset >= 0L)) *offset+=outLen;
|
|
}
|
|
}
|
|
usedInLen += inLen;
|
|
} while((err == noErr) && bufRep);
|
|
textPtr = nil;
|
|
hand = false;
|
|
start = end = usedInLen = 0L;
|
|
} while((err == noErr) && !(flush = !flush));
|
|
if(!err && (converter->unicodeToMac != nil))
|
|
ClearIntlConverterContext(converter->unicodeToMac, false);
|
|
return err;
|
|
}
|
|
|
|
Boolean EncodingError(OSStatus err)
|
|
{
|
|
switch(err) {
|
|
case kTextUnsupportedEncodingErr :
|
|
case kTextMalformedInputErr :
|
|
case kTextUndefinedElementErr :
|
|
case kTECPartialCharErr :
|
|
case kTECUnmappableElementErr :
|
|
case kTECIncompleteElementErr :
|
|
return true;
|
|
default :
|
|
return false;
|
|
}
|
|
}
|
|
|
|
Boolean HasUnicode()
|
|
{
|
|
return uGlobals.tecVersion != 0;
|
|
}
|
|
|
|
// End buffer *before* the CR
|
|
OSStatus MessageToUTF8(Handle inText, long inOff, ByteCount inLen, AccuPtr a, int *context)
|
|
{
|
|
static Str31 charset;
|
|
static Str15 dirs[umDirCount] = {0};
|
|
Str15 temp;
|
|
ByteOffset next = inOff, last;
|
|
long offset;
|
|
Byte hState;
|
|
TextEncoding encoding;
|
|
OSStatus err = noErr;
|
|
|
|
if(inLen == 0) return noErr;
|
|
|
|
if(!HasUnicode()) return paramErr;
|
|
|
|
err = UpgradeScriptInfoToTextEncoding(FontToScript(FontID), kTextLanguageDontCare, kTextRegionDontCare, nil, &encoding);
|
|
if(err) return err;
|
|
|
|
if(dirs[umFlowedDir][0] == 0)
|
|
GetRString(dirs[umFlowedDir],EnrichedStrn+enXFlowed);
|
|
if(dirs[umCharsetDir][0] == 0)
|
|
GetRString(dirs[umCharsetDir],EnrichedStrn+enXCharset);
|
|
if(dirs[umHtmlDir][0] == 0)
|
|
GetRString(dirs[umHtmlDir],EnrichedStrn+enXHTML);
|
|
if(dirs[umRichDir][0] == 0)
|
|
GetRString(dirs[umRichDir],EnrichedStrn+enXRich);
|
|
hState = HGetState(inText);
|
|
|
|
while((err == noErr) && (next < inOff + inLen))
|
|
{
|
|
switch(*context)
|
|
{
|
|
case umHeaderState :
|
|
charset[0] = 0;
|
|
temp[0] = temp[1] = 13;
|
|
offset = SearchPtrPtr(temp, 2, LDRef(inText), next, inLen - (next - inOff), false, false, nil);
|
|
HSetState(inText, hState);
|
|
last = next;
|
|
if(offset < 0)
|
|
{
|
|
next = inLen + inOff;
|
|
}
|
|
else
|
|
{
|
|
next += offset;
|
|
*context = umTextState;
|
|
}
|
|
err = InsertIntlHeaders(inText, last, next - last, a, encoding, nil, nil);
|
|
break;
|
|
case umTextState :
|
|
charset[0] = 0;
|
|
temp[0] = 13;
|
|
temp[1] = '<';
|
|
offset = SearchPtrPtr(temp, 2, LDRef(inText), next, inLen - (next - inOff), false, false, nil);
|
|
HSetState(inText, hState);
|
|
last = next;
|
|
if(offset < 0)
|
|
{
|
|
next = inLen + inOff;
|
|
offset = 0;
|
|
}
|
|
else
|
|
{
|
|
int i;
|
|
|
|
// Move by the return
|
|
next += offset + 1;
|
|
for(i = umFlowedDir; i < umDirCount; ++i)
|
|
{
|
|
if((inLen - (next - inOff) > dirs[i][0]) &&
|
|
(PPtrFindSub(dirs[i], *inText + next + 1, dirs[i][0]) != nil))
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
if(i < umDirCount)
|
|
{
|
|
*context = i;
|
|
offset = next + dirs[i][0] + 1;
|
|
offset = ParseCharset(*inText + offset, inLen - (offset - inOff), charset, i);
|
|
} else offset = 0;
|
|
}
|
|
err = InternetToUTF8Text(nil, encoding, inText, last, next - last, a, false);
|
|
next += offset;
|
|
break;
|
|
case umFlowedState :
|
|
case umCharsetState :
|
|
case umHtmlState :
|
|
case umRichState :
|
|
temp[0] = 3;
|
|
temp[1] = 13;
|
|
temp[2] = '<';
|
|
temp[3] = '/';
|
|
PCat(temp, dirs[*context]);
|
|
temp[++temp[0]] = '>';
|
|
offset = SearchPtrPtr(&temp[1], temp[0], LDRef(inText), next, inLen - (next - inOff), false, false, nil);
|
|
HSetState(inText, hState);
|
|
last = next;
|
|
if(offset < 0)
|
|
{
|
|
next = inLen + inOff;
|
|
offset = 0;
|
|
}
|
|
else
|
|
{
|
|
// Move by the return
|
|
next += offset + 1;
|
|
*context = umTextState;
|
|
offset = temp[0];
|
|
}
|
|
err = InternetToUTF8Text(charset, encoding, inText, last, next - last, a, false);
|
|
next += offset;
|
|
}
|
|
}
|
|
return err;
|
|
}
|
|
|
|
long ParseCharset(Ptr textPtr, long len, PStr charset, umDirectives dir)
|
|
{
|
|
static Str15 paramDir = {0}, charsetAttr = {0};
|
|
Ptr tempPtr;
|
|
long offset;
|
|
Boolean done = false;
|
|
|
|
if(paramDir[0] == 0)
|
|
ComposeRString(paramDir, MIME_RICH_ON, EnrichedStrn+enParam);
|
|
if(charsetAttr[0] == 0)
|
|
{
|
|
charsetAttr[++charsetAttr[0]] = ' ';
|
|
PCatR(charsetAttr,HTMLAttributeStrn+htmlCharsetAttr);
|
|
charsetAttr[++charsetAttr[0]] = '=';
|
|
}
|
|
|
|
charset[0] = 0;
|
|
offset = 0;
|
|
switch(dir)
|
|
{
|
|
case umHtmlDir :
|
|
tempPtr = memchr(textPtr, '>', len);
|
|
if(tempPtr)
|
|
{
|
|
offset = (tempPtr - textPtr) + 1;
|
|
tempPtr = PPtrFindSub(charsetAttr, textPtr, offset);
|
|
if(tempPtr)
|
|
{
|
|
len -= textPtr - tempPtr;
|
|
while((len > 0) && (charset[0] < 32) && ((charset[charset[0] + 1] = *++tempPtr) != '"'))
|
|
{
|
|
--len;
|
|
++charset[0];
|
|
}
|
|
done = (*tempPtr == '"');
|
|
}
|
|
}
|
|
break;
|
|
case umRichDir :
|
|
while((len > 0) && (*textPtr != '>'))
|
|
{
|
|
--len;
|
|
++textPtr;
|
|
++offset;
|
|
}
|
|
--len;
|
|
++textPtr;
|
|
++offset;
|
|
if((len > paramDir[0]) && PPtrFindSub(paramDir, textPtr, paramDir[0]))
|
|
{
|
|
textPtr += paramDir[0];
|
|
offset += paramDir[0];
|
|
len -= paramDir[0];
|
|
while((len > 0) && (charset[0] < 32) && ((charset[charset[0] + 1] = *++textPtr) != '<'))
|
|
{
|
|
--len;
|
|
++offset;
|
|
++charset[0];
|
|
}
|
|
if((*textPtr++ == '<') && (*textPtr++ == '/') && !strincmp(textPtr, ¶mDir[2], paramDir[0] - 1))
|
|
{
|
|
done = true;
|
|
offset += paramDir[0] + 1;
|
|
}
|
|
}
|
|
break;
|
|
case umFlowedDir :
|
|
case umCharsetDir :
|
|
while((len > 0) && (*textPtr != ' ') && (*textPtr != '>'))
|
|
{
|
|
--len;
|
|
++textPtr;
|
|
++offset;
|
|
}
|
|
++offset;
|
|
if(*textPtr == ' ')
|
|
{
|
|
while((len > 0) && (charset[0] < 32) && ((charset[charset[0] + 1] = *++textPtr) != '>'))
|
|
{
|
|
--len;
|
|
++offset;
|
|
++charset[0];
|
|
}
|
|
}
|
|
done = (*textPtr == '>');
|
|
}
|
|
if(!done)
|
|
{
|
|
offset = 0;
|
|
charset[0] = 0;
|
|
}
|
|
return offset;
|
|
}
|
|
|
|
OSStatus PeteGetUTF8Text(PETEHandle pte, long offset, long iLen, long *iUsed, UPtr out, long oLen, long *oUsed)
|
|
{
|
|
long runLen, usedIn, usedOut;
|
|
OSStatus err = noErr;
|
|
PETEStyleInfo style;
|
|
LangCode lang;
|
|
StringPtr fontName;
|
|
UHandle text;
|
|
TextEncoding encoding;
|
|
Byte hState;
|
|
|
|
*iUsed = 0;
|
|
*oUsed = 0;
|
|
runLen = 0;
|
|
err = PETEGetRawText(PETE,pte,&text);
|
|
if(err) return err;
|
|
|
|
hState = HGetState(text);
|
|
if(iLen > PeteLen(pte) - offset) iLen = PeteLen(pte) - offset;
|
|
while((iLen > 0) && (oLen > 0) && !err)
|
|
{
|
|
if(runLen == 0)
|
|
{
|
|
err = PeteGetStyleRun(pte, offset + *iUsed, &runLen, &style, peFontValid|peLangValid);
|
|
if(err) return err;
|
|
}
|
|
if(runLen > iLen) runLen = iLen;
|
|
|
|
if((style.textStyle.tsFont != kPETEDefaultFont) && (style.textStyle.tsFont != kPETEDefaultFixed))
|
|
{
|
|
lang = kTextLanguageDontCare;
|
|
fontName = GlobalTemp;
|
|
GetFontName(style.textStyle.tsFont, fontName);
|
|
}
|
|
else
|
|
{
|
|
lang = style.textStyle.tsLang;
|
|
fontName = nil;
|
|
}
|
|
err = UpgradeScriptInfoToTextEncoding(kTextScriptDontCare, lang, kTextRegionDontCare, fontName, &encoding);
|
|
if(err) return err;
|
|
|
|
err = UpdateTECConverter(&uGlobals.internetToUTF8, nil, encoding, &uGlobals.internetToUTF8Encoding, UTF8_ENCODING, &uGlobals.maclatin1, nil);
|
|
if(err) return err;
|
|
|
|
HLock(text);
|
|
err = MyTECConvertText(uGlobals.internetToUTF8, *text + offset + *iUsed, runLen, &usedIn, out + *oUsed, oLen, &usedOut, uGlobals.maclatin1);
|
|
HSetState(text, hState);
|
|
if(err && err != kTECOutputBufferFullStatus) return err;
|
|
|
|
oLen -= usedOut;
|
|
*oUsed += usedOut;
|
|
iLen -= usedIn;
|
|
*iUsed += usedIn;
|
|
runLen -= usedIn;
|
|
}
|
|
return err;
|
|
}
|
|
|
|
OSErr PeteGetStyleRun(PETEHandle pte, long offset, long *len, PETEStyleInfoPtr style, long validBits)
|
|
{
|
|
OSErr err;
|
|
long runLen, fullLen;
|
|
PETEStyleEntry pse;
|
|
|
|
*len = 0;
|
|
fullLen = PETEGetTextLen(PETE, pte);
|
|
err = PETEGetStyle(PETE, pte, offset, &runLen, &pse);
|
|
if(err) return err;
|
|
*style = pse.psStyle;
|
|
runLen -= (offset - pse.psStartChar);
|
|
while((runLen + offset + *len) < fullLen)
|
|
{
|
|
long tempLen, diffBits;
|
|
PETEStyleEntry pse2;
|
|
|
|
err = PETEGetStyle(PETE, pte, offset + *len + runLen, &tempLen, &pse2);
|
|
if(err) break;
|
|
|
|
err = PETECompareStyles(PETE, pte, &pse, &pse2, validBits, false, &diffBits);
|
|
if(err) break;
|
|
|
|
if(diffBits) break;
|
|
|
|
*len += runLen;
|
|
runLen = tempLen - pse2.psStartChar;
|
|
}
|
|
*len += runLen;
|
|
return err;
|
|
}
|
|
|
|
TextEncoding CreateSystemRomanEncoding()
|
|
{
|
|
TextEncoding encoding;
|
|
|
|
if(noErr != UpgradeScriptInfoToTextEncoding(smRoman, (LangCode)LoWord(GetScriptVariable(smRoman, smScriptLang)), kTextRegionDontCare, nil, &encoding))
|
|
return DefaultEncoding(kTextEncodingMacRoman);
|
|
else
|
|
return encoding;
|
|
}
|
|
|
|
OSStatus MyTECConvertText (TECObjectRef encodingConverter,
|
|
ConstTextPtr inputBuffer,
|
|
ByteCount inputBufferLength,
|
|
ByteCount * actualInputLength,
|
|
TextPtr outputBuffer,
|
|
ByteCount outputBufferLength,
|
|
ByteCount * actualOutputLength,
|
|
Boolean maclatin1)
|
|
{
|
|
OSStatus err;
|
|
|
|
if(!maclatin1)
|
|
{
|
|
err = TECConvertText(encodingConverter, inputBuffer, inputBufferLength, actualInputLength, outputBuffer, outputBufferLength, actualOutputLength);
|
|
if (err == kTECUsedFallbacksStatus) err = noErr;
|
|
}
|
|
else
|
|
{
|
|
if(actualInputLength) *actualInputLength = 0;
|
|
if(actualOutputLength) *actualOutputLength = 0;
|
|
do
|
|
{
|
|
ByteCount curLen, usedILen, usedOLen;
|
|
|
|
curLen = MIN(inputBufferLength, sizeof(GlobalTemp));
|
|
BMD(inputBuffer, GlobalTemp, curLen);
|
|
TransLitRes(GlobalTemp, curLen, TRANS_IN_TABL);
|
|
err = TECConvertText(encodingConverter, GlobalTemp, curLen, &usedILen, outputBuffer, outputBufferLength, &usedOLen);
|
|
if (err == kTECUsedFallbacksStatus) err = noErr;
|
|
if(actualInputLength) *actualInputLength += usedILen;
|
|
if(actualOutputLength) *actualOutputLength = usedOLen;
|
|
inputBufferLength -= usedILen;
|
|
outputBufferLength -= usedOLen;
|
|
inputBuffer += usedILen;
|
|
outputBuffer += usedOLen;
|
|
} while(inputBufferLength && !err);
|
|
}
|
|
return err;
|
|
}
|
|
|
|
long UnicodeMappingCount(TextEncoding encoding)
|
|
{
|
|
OptionBits matchFilter;
|
|
UnicodeMapping matchMapping;
|
|
ItemCount foundCount;
|
|
|
|
matchMapping.unicodeEncoding = DefaultEncoding(kTextEncodingUnicodeDefault);
|
|
matchMapping.otherEncoding = encoding;
|
|
matchMapping.mappingVersion = kUnicodeUseLatestMapping;
|
|
|
|
matchFilter = ( kUnicodeMatchUnicodeBaseMask |
|
|
kUnicodeMatchUnicodeVariantMask |
|
|
kUnicodeMatchUnicodeFormatMask |
|
|
kUnicodeMatchOtherBaseMask |
|
|
kUnicodeMatchOtherFormatMask );
|
|
|
|
return CountUnicodeMappings(matchFilter, &matchMapping, &foundCount) == noErr ? foundCount : 0;
|
|
}
|
|
|
|
/************************************************************************
|
|
* SniffAndConvertHandleToRoman - figure out what sort of text is in a handle and Romanize it
|
|
************************************************************************/
|
|
OSErr SniffAndConvertHandleToRoman(Handle *hp)
|
|
{
|
|
OSErr err;
|
|
uLong snifferCount = 0;
|
|
uLong textSize = GetHandleSize(*hp);
|
|
unsigned char utf8Magic[] = { 0xef,0xbb,0xbf };
|
|
|
|
// if it's unicode, it may have a byte order mark at the start, which is fffe (intel), feff (network), or efbbbf (utf-8)
|
|
// Handle these specially
|
|
if (textSize>2 && (*(uShort *)**hp==0xfffe || *(uShort *)**hp==0xfeff))
|
|
return ConvertHandleToRoman(hp,CreateTextEncoding(kTextEncodingUnicodeDefault, kTextEncodingDefaultVariant, kUnicode16BitFormat),0);
|
|
else if (textSize>3 && !memcmp(**hp,utf8Magic,3))
|
|
return ConvertHandleToRoman(hp,CreateTextEncoding(kTextEncodingUnicodeDefault, kTextEncodingDefaultVariant, kUnicodeUTF8Format),3);
|
|
|
|
// if it's ascii now, leave it
|
|
if (!AnyFunny(*hp,0)) return noErr;
|
|
|
|
// sniffers are available in tec 1.2 & up
|
|
// don't bother sniffing an empty handle
|
|
if (uGlobals.tecVersion >= 0x0120 && textSize)
|
|
// count all the sniffers we can; we'll sniff for everything
|
|
if (!TECCountAvailableSniffers(&snifferCount))
|
|
if (snifferCount)
|
|
{
|
|
TextEncoding *encodings = NuPtr(snifferCount*sizeof(TextEncoding));
|
|
uLong *errors = NuPtr(snifferCount*sizeof(uLong));
|
|
uLong *features = NuPtr(snifferCount*sizeof(uLong));
|
|
if (encodings && errors && features)
|
|
{
|
|
// ok, we have room for everything!
|
|
TECSnifferObjectRef theSniffer;
|
|
if (!TECGetAvailableSniffers(encodings,snifferCount,&snifferCount))
|
|
if (snifferCount)
|
|
if (!TECCreateSniffer(&theSniffer,encodings,snifferCount))
|
|
{
|
|
// Finally, we've made the sniffer, so let's sniff!
|
|
TECSniffTextEncoding(theSniffer,LDRef(*hp),textSize,encodings,snifferCount,errors,textSize/10,features,textSize/10);
|
|
TECDisposeSniffer(theSniffer);
|
|
UL(*hp);
|
|
|
|
// Ok, the "best" encoding will be on top. See if it's good enough
|
|
if (errors[0]<textSize/10)
|
|
{
|
|
// Now what?
|
|
err = ConvertHandleToRoman(hp,encodings[0],0);
|
|
}
|
|
else
|
|
{
|
|
// No. We can't figure out what the thing is. Bail.
|
|
err = kTextUnsupportedEncodingErr;
|
|
}
|
|
}
|
|
ZapPtr(encodings);
|
|
ZapPtr(errors);
|
|
ZapPtr(features);
|
|
}
|
|
}
|
|
return(err);
|
|
}
|
|
|
|
/************************************************************************
|
|
* ConvertHandleToRoman - Romanize text in a handle, if we can
|
|
************************************************************************/
|
|
OSErr ConvertHandleToRoman(Handle *hp,TextEncoding encoding,uLong offset)
|
|
{
|
|
TECObjectRef converter;
|
|
OSErr err = TECCreateConverter(&converter,encoding,kTextEncodingMacRoman);
|
|
uLong inLen = GetHandleSize(*hp)-offset;
|
|
uLong outLen;
|
|
UHandle newHandle;
|
|
|
|
if (!err)
|
|
{
|
|
if (newHandle = NuHandle(inLen))
|
|
{
|
|
err = TECConvertText(converter,LDRef(*hp)+offset,inLen,&inLen,LDRef(newHandle),inLen,&outLen);
|
|
if (err) ZapHandle(newHandle);
|
|
UL(*hp);
|
|
UL(newHandle);
|
|
}
|
|
else err = MemError();
|
|
TECDisposeConverter(converter);
|
|
}
|
|
|
|
if (!err)
|
|
{
|
|
ZapHandle(*hp);
|
|
SetHandleSize(newHandle,outLen);
|
|
*hp = newHandle;
|
|
}
|
|
|
|
return err;
|
|
}
|