Quick stab at Unicode for AWGS/Teach

This tweaks the output for AWGS and Teach Text to convert from Mac
OS Roman to Unicode, rather than Windows code page 1252.

It would be slightly more efficient (and possibly a bit more legible
in the RTF file) if we left the "good" conversions alone, e.g.
continue to use the CP1252 character for "E with acute", instead of
converting to U+00C9.  That might leave us at the mercy of the code
page converter in some random RTF reader, though, so it's probably
best to just use the official Unicode values.
This commit is contained in:
Andy McFadden 2014-12-18 17:25:46 -08:00
parent 13c38be4aa
commit 4dd0c9339d
8 changed files with 316 additions and 29 deletions

View File

@ -250,7 +250,7 @@ void ContentList::RestoreSelection(const long* savedSel, long selCount)
{
/* match! */
if (SetItemState(i, LVIS_SELECTED, LVIS_SELECTED) == FALSE) {
LOGI("WHOA: unable to set selected on item=%d", i);
LOGW("WHOA: unable to set selected on item=%d", i);
}
break;
}

View File

@ -129,7 +129,7 @@ public:
* Get the aux type display string.
*
* "buf" must be able to hold at least 5 characters plus the NUL (i.e. 6).
* Use kFileTypeBufLen.
* Use kAuxTypeBufLen.
*/
static void MakeAuxTypeDisplayString(const GenericEntry* pEntry,
WCHAR* buf);
@ -180,12 +180,12 @@ private:
// implementation relies on the top left pixel color.)
void LoadHeaderImages(void) {
if (!fHdrImageList.Create(IDB_HDRBAR, 16, 1, CLR_DEFAULT))
LOGI("GLITCH: header list create failed");
LOGW("GLITCH: header list create failed");
fHdrImageList.SetBkColor(::GetSysColor(COLOR_BTNFACE));
}
void LoadListImages(void) {
if (!fListImageList.Create(IDB_LIST_PICS, 16, 1, CLR_DEFAULT))
LOGI("GLITCH: list image create failed");
LOGW("GLITCH: list image create failed");
fListImageList.SetBkColor(::GetSysColor(COLOR_WINDOW));
}
enum { // defs for IDB_LIST_PICS

View File

@ -185,8 +185,7 @@ public:
// ever need to be reloaded. Value must be nonzero to be used.
virtual long GetSelectionSerial(void) const = 0;
/* are we allowed to change the file/aux type of this entry? */
/* (may need to generalize this to "changeable attrs" bitmask) */
/* what operations are possible with this entry? */
virtual bool GetFeatureFlag(Feature feature) const = 0;
long GetIndex(void) const { return fIndex; }

View File

@ -86,6 +86,7 @@ BOOL MyApp::InitInstance(void)
LogModuleLocation(L"riched.dll");
LogModuleLocation(L"riched20.dll");
LogModuleLocation(L"riched32.dll");
LogModuleLocation(L"msftedit.dll");
// This causes functions like SetProfileInt to use the registry rather
// than a .INI file. The registry key is "usually the name of a company".

View File

@ -388,7 +388,7 @@ int ReformatAWGS_WP::PrintParagraph(const uint8_t* ptr, long maxLen)
RTFTab();
break;
default:
RTFPrintExtChar(ConvertGSChar(uch));
RTFPrintExtChar(ConvertMacRomanToUTF16(uch));
break;
}
}

View File

@ -18,15 +18,11 @@
*/
/*
* Convert IIgs high-ASCII characters to Windows equivalents (when
* available).
*
* Also found this:
* http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMAN.TXT
* Convert Mac OS Roman to Windows CP1252.
*/
const int kUnk = 0x3f; // for unmappable chars, use '?'
/*static*/ const uint8_t ReformatText::kGSCharConv[128] = {
/*static*/ const uint8_t ReformatText::kCP1252Conv[128] = {
0xc4, // 0x80 A + umlaut (diaeresis?)
0xc5, // 0x81 A + overcircle
0xc7, // 0x82 C + cedilla
@ -159,6 +155,273 @@ const int kUnk = 0x3f; // for unmappable chars, use '?'
kUnk, // 0xff caron
};
/*
* Convert Mac OS Roman to Unicode. Mapping comes from:
*
* http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMAN.TXT
*
* We use the "Control Pictures" block for the control characters
* (0x00-0x1f, 0x7f).
*/
/*static*/ const uint16_t ReformatText::kUTF16Conv[256] = {
/*0x00*/ 0x2400, // [control] NULL
/*0x01*/ 0x2401, // [control] START OF HEADING
/*0x02*/ 0x2402, // [control] START OF TEXT
/*0x03*/ 0x2403, // [control] END OF TEXT
/*0x04*/ 0x2404, // [control] END OF TRANSMISSION
/*0x05*/ 0x2405, // [control] ENQUIRY
/*0x06*/ 0x2406, // [control] ACKNOWLEDGE
/*0x07*/ 0x2407, // [control] BELL
/*0x08*/ 0x2408, // [control] BACKSPACE
/*0x09*/ 0x2409, // [control] HORIZONTAL TABULATION
/*0x0a*/ 0x240a, // [control] LINE FEED
/*0x0b*/ 0x240b, // [control] VERTICAL TABULATION
/*0x0c*/ 0x240c, // [control] FORM FEED
/*0x0d*/ 0x240d, // [control] CARRIAGE RETURN
/*0x0e*/ 0x240e, // [control] SHIFT OUT
/*0x0f*/ 0x240f, // [control] SHIFT IN
/*0x10*/ 0x2410, // [control] DATA LINK ESCAPE
/*0x11*/ 0x2411, // [control] DEVICE CONTROL ONE
/*0x12*/ 0x2412, // [control] DEVICE CONTROL TWO
/*0x13*/ 0x2413, // [control] DEVICE CONTROL THREE
/*0x14*/ 0x2414, // [control] DEVICE CONTROL FOUR
/*0x15*/ 0x2415, // [control] NEGATIVE ACKNOWLEDGE
/*0x16*/ 0x2416, // [control] SYNCHRONOUS IDLE
/*0x17*/ 0x2417, // [control] END OF TRANSMISSION BLOCK
/*0x18*/ 0x2418, // [control] CANCEL
/*0x19*/ 0x2419, // [control] END OF MEDIUM
/*0x1a*/ 0x241a, // [control] SUBSTITUTE
/*0x1b*/ 0x241b, // [control] ESCAPE
/*0x1c*/ 0x241c, // [control] FILE SEPARATOR
/*0x1d*/ 0x241d, // [control] GROUP SEPARATOR
/*0x1e*/ 0x241e, // [control] RECORD SEPARATOR
/*0x1f*/ 0x241f, // [control] UNIT SEPARATOR
/*0x20*/ 0x0020, // SPACE
/*0x21*/ 0x0021, // EXCLAMATION MARK
/*0x22*/ 0x0022, // QUOTATION MARK
/*0x23*/ 0x0023, // NUMBER SIGN
/*0x24*/ 0x0024, // DOLLAR SIGN
/*0x25*/ 0x0025, // PERCENT SIGN
/*0x26*/ 0x0026, // AMPERSAND
/*0x27*/ 0x0027, // APOSTROPHE
/*0x28*/ 0x0028, // LEFT PARENTHESIS
/*0x29*/ 0x0029, // RIGHT PARENTHESIS
/*0x2A*/ 0x002A, // ASTERISK
/*0x2B*/ 0x002B, // PLUS SIGN
/*0x2C*/ 0x002C, // COMMA
/*0x2D*/ 0x002D, // HYPHEN-MINUS
/*0x2E*/ 0x002E, // FULL STOP
/*0x2F*/ 0x002F, // SOLIDUS
/*0x30*/ 0x0030, // DIGIT ZERO
/*0x31*/ 0x0031, // DIGIT ONE
/*0x32*/ 0x0032, // DIGIT TWO
/*0x33*/ 0x0033, // DIGIT THREE
/*0x34*/ 0x0034, // DIGIT FOUR
/*0x35*/ 0x0035, // DIGIT FIVE
/*0x36*/ 0x0036, // DIGIT SIX
/*0x37*/ 0x0037, // DIGIT SEVEN
/*0x38*/ 0x0038, // DIGIT EIGHT
/*0x39*/ 0x0039, // DIGIT NINE
/*0x3A*/ 0x003A, // COLON
/*0x3B*/ 0x003B, // SEMICOLON
/*0x3C*/ 0x003C, // LESS-THAN SIGN
/*0x3D*/ 0x003D, // EQUALS SIGN
/*0x3E*/ 0x003E, // GREATER-THAN SIGN
/*0x3F*/ 0x003F, // QUESTION MARK
/*0x40*/ 0x0040, // COMMERCIAL AT
/*0x41*/ 0x0041, // LATIN CAPITAL LETTER A
/*0x42*/ 0x0042, // LATIN CAPITAL LETTER B
/*0x43*/ 0x0043, // LATIN CAPITAL LETTER C
/*0x44*/ 0x0044, // LATIN CAPITAL LETTER D
/*0x45*/ 0x0045, // LATIN CAPITAL LETTER E
/*0x46*/ 0x0046, // LATIN CAPITAL LETTER F
/*0x47*/ 0x0047, // LATIN CAPITAL LETTER G
/*0x48*/ 0x0048, // LATIN CAPITAL LETTER H
/*0x49*/ 0x0049, // LATIN CAPITAL LETTER I
/*0x4A*/ 0x004A, // LATIN CAPITAL LETTER J
/*0x4B*/ 0x004B, // LATIN CAPITAL LETTER K
/*0x4C*/ 0x004C, // LATIN CAPITAL LETTER L
/*0x4D*/ 0x004D, // LATIN CAPITAL LETTER M
/*0x4E*/ 0x004E, // LATIN CAPITAL LETTER N
/*0x4F*/ 0x004F, // LATIN CAPITAL LETTER O
/*0x50*/ 0x0050, // LATIN CAPITAL LETTER P
/*0x51*/ 0x0051, // LATIN CAPITAL LETTER Q
/*0x52*/ 0x0052, // LATIN CAPITAL LETTER R
/*0x53*/ 0x0053, // LATIN CAPITAL LETTER S
/*0x54*/ 0x0054, // LATIN CAPITAL LETTER T
/*0x55*/ 0x0055, // LATIN CAPITAL LETTER U
/*0x56*/ 0x0056, // LATIN CAPITAL LETTER V
/*0x57*/ 0x0057, // LATIN CAPITAL LETTER W
/*0x58*/ 0x0058, // LATIN CAPITAL LETTER X
/*0x59*/ 0x0059, // LATIN CAPITAL LETTER Y
/*0x5A*/ 0x005A, // LATIN CAPITAL LETTER Z
/*0x5B*/ 0x005B, // LEFT SQUARE BRACKET
/*0x5C*/ 0x005C, // REVERSE SOLIDUS
/*0x5D*/ 0x005D, // RIGHT SQUARE BRACKET
/*0x5E*/ 0x005E, // CIRCUMFLEX ACCENT
/*0x5F*/ 0x005F, // LOW LINE
/*0x60*/ 0x0060, // GRAVE ACCENT
/*0x61*/ 0x0061, // LATIN SMALL LETTER A
/*0x62*/ 0x0062, // LATIN SMALL LETTER B
/*0x63*/ 0x0063, // LATIN SMALL LETTER C
/*0x64*/ 0x0064, // LATIN SMALL LETTER D
/*0x65*/ 0x0065, // LATIN SMALL LETTER E
/*0x66*/ 0x0066, // LATIN SMALL LETTER F
/*0x67*/ 0x0067, // LATIN SMALL LETTER G
/*0x68*/ 0x0068, // LATIN SMALL LETTER H
/*0x69*/ 0x0069, // LATIN SMALL LETTER I
/*0x6A*/ 0x006A, // LATIN SMALL LETTER J
/*0x6B*/ 0x006B, // LATIN SMALL LETTER K
/*0x6C*/ 0x006C, // LATIN SMALL LETTER L
/*0x6D*/ 0x006D, // LATIN SMALL LETTER M
/*0x6E*/ 0x006E, // LATIN SMALL LETTER N
/*0x6F*/ 0x006F, // LATIN SMALL LETTER O
/*0x70*/ 0x0070, // LATIN SMALL LETTER P
/*0x71*/ 0x0071, // LATIN SMALL LETTER Q
/*0x72*/ 0x0072, // LATIN SMALL LETTER R
/*0x73*/ 0x0073, // LATIN SMALL LETTER S
/*0x74*/ 0x0074, // LATIN SMALL LETTER T
/*0x75*/ 0x0075, // LATIN SMALL LETTER U
/*0x76*/ 0x0076, // LATIN SMALL LETTER V
/*0x77*/ 0x0077, // LATIN SMALL LETTER W
/*0x78*/ 0x0078, // LATIN SMALL LETTER X
/*0x79*/ 0x0079, // LATIN SMALL LETTER Y
/*0x7A*/ 0x007A, // LATIN SMALL LETTER Z
/*0x7B*/ 0x007B, // LEFT CURLY BRACKET
/*0x7C*/ 0x007C, // VERTICAL LINE
/*0x7D*/ 0x007D, // RIGHT CURLY BRACKET
/*0x7E*/ 0x007E, // TILDE
/*0x7f*/ 0x2421, // [control] DELETE
/*0x80*/ 0x00C4, // LATIN CAPITAL LETTER A WITH DIAERESIS
/*0x81*/ 0x00C5, // LATIN CAPITAL LETTER A WITH RING ABOVE
/*0x82*/ 0x00C7, // LATIN CAPITAL LETTER C WITH CEDILLA
/*0x83*/ 0x00C9, // LATIN CAPITAL LETTER E WITH ACUTE
/*0x84*/ 0x00D1, // LATIN CAPITAL LETTER N WITH TILDE
/*0x85*/ 0x00D6, // LATIN CAPITAL LETTER O WITH DIAERESIS
/*0x86*/ 0x00DC, // LATIN CAPITAL LETTER U WITH DIAERESIS
/*0x87*/ 0x00E1, // LATIN SMALL LETTER A WITH ACUTE
/*0x88*/ 0x00E0, // LATIN SMALL LETTER A WITH GRAVE
/*0x89*/ 0x00E2, // LATIN SMALL LETTER A WITH CIRCUMFLEX
/*0x8A*/ 0x00E4, // LATIN SMALL LETTER A WITH DIAERESIS
/*0x8B*/ 0x00E3, // LATIN SMALL LETTER A WITH TILDE
/*0x8C*/ 0x00E5, // LATIN SMALL LETTER A WITH RING ABOVE
/*0x8D*/ 0x00E7, // LATIN SMALL LETTER C WITH CEDILLA
/*0x8E*/ 0x00E9, // LATIN SMALL LETTER E WITH ACUTE
/*0x8F*/ 0x00E8, // LATIN SMALL LETTER E WITH GRAVE
/*0x90*/ 0x00EA, // LATIN SMALL LETTER E WITH CIRCUMFLEX
/*0x91*/ 0x00EB, // LATIN SMALL LETTER E WITH DIAERESIS
/*0x92*/ 0x00ED, // LATIN SMALL LETTER I WITH ACUTE
/*0x93*/ 0x00EC, // LATIN SMALL LETTER I WITH GRAVE
/*0x94*/ 0x00EE, // LATIN SMALL LETTER I WITH CIRCUMFLEX
/*0x95*/ 0x00EF, // LATIN SMALL LETTER I WITH DIAERESIS
/*0x96*/ 0x00F1, // LATIN SMALL LETTER N WITH TILDE
/*0x97*/ 0x00F3, // LATIN SMALL LETTER O WITH ACUTE
/*0x98*/ 0x00F2, // LATIN SMALL LETTER O WITH GRAVE
/*0x99*/ 0x00F4, // LATIN SMALL LETTER O WITH CIRCUMFLEX
/*0x9A*/ 0x00F6, // LATIN SMALL LETTER O WITH DIAERESIS
/*0x9B*/ 0x00F5, // LATIN SMALL LETTER O WITH TILDE
/*0x9C*/ 0x00FA, // LATIN SMALL LETTER U WITH ACUTE
/*0x9D*/ 0x00F9, // LATIN SMALL LETTER U WITH GRAVE
/*0x9E*/ 0x00FB, // LATIN SMALL LETTER U WITH CIRCUMFLEX
/*0x9F*/ 0x00FC, // LATIN SMALL LETTER U WITH DIAERESIS
/*0xA0*/ 0x2020, // DAGGER
/*0xA1*/ 0x00B0, // DEGREE SIGN
/*0xA2*/ 0x00A2, // CENT SIGN
/*0xA3*/ 0x00A3, // POUND SIGN
/*0xA4*/ 0x00A7, // SECTION SIGN
/*0xA5*/ 0x2022, // BULLET
/*0xA6*/ 0x00B6, // PILCROW SIGN
/*0xA7*/ 0x00DF, // LATIN SMALL LETTER SHARP S
/*0xA8*/ 0x00AE, // REGISTERED SIGN
/*0xA9*/ 0x00A9, // COPYRIGHT SIGN
/*0xAA*/ 0x2122, // TRADE MARK SIGN
/*0xAB*/ 0x00B4, // ACUTE ACCENT
/*0xAC*/ 0x00A8, // DIAERESIS
/*0xAD*/ 0x2260, // NOT EQUAL TO
/*0xAE*/ 0x00C6, // LATIN CAPITAL LETTER AE
/*0xAF*/ 0x00D8, // LATIN CAPITAL LETTER O WITH STROKE
/*0xB0*/ 0x221E, // INFINITY
/*0xB1*/ 0x00B1, // PLUS-MINUS SIGN
/*0xB2*/ 0x2264, // LESS-THAN OR EQUAL TO
/*0xB3*/ 0x2265, // GREATER-THAN OR EQUAL TO
/*0xB4*/ 0x00A5, // YEN SIGN
/*0xB5*/ 0x00B5, // MICRO SIGN
/*0xB6*/ 0x2202, // PARTIAL DIFFERENTIAL
/*0xB7*/ 0x2211, // N-ARY SUMMATION
/*0xB8*/ 0x220F, // N-ARY PRODUCT
/*0xB9*/ 0x03C0, // GREEK SMALL LETTER PI
/*0xBA*/ 0x222B, // INTEGRAL
/*0xBB*/ 0x00AA, // FEMININE ORDINAL INDICATOR
/*0xBC*/ 0x00BA, // MASCULINE ORDINAL INDICATOR
/*0xBD*/ 0x03A9, // GREEK CAPITAL LETTER OMEGA
/*0xBE*/ 0x00E6, // LATIN SMALL LETTER AE
/*0xBF*/ 0x00F8, // LATIN SMALL LETTER O WITH STROKE
/*0xC0*/ 0x00BF, // INVERTED QUESTION MARK
/*0xC1*/ 0x00A1, // INVERTED EXCLAMATION MARK
/*0xC2*/ 0x00AC, // NOT SIGN
/*0xC3*/ 0x221A, // SQUARE ROOT
/*0xC4*/ 0x0192, // LATIN SMALL LETTER F WITH HOOK
/*0xC5*/ 0x2248, // ALMOST EQUAL TO
/*0xC6*/ 0x2206, // INCREMENT
/*0xC7*/ 0x00AB, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
/*0xC8*/ 0x00BB, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
/*0xC9*/ 0x2026, // HORIZONTAL ELLIPSIS
/*0xCA*/ 0x00A0, // NO-BREAK SPACE
/*0xCB*/ 0x00C0, // LATIN CAPITAL LETTER A WITH GRAVE
/*0xCC*/ 0x00C3, // LATIN CAPITAL LETTER A WITH TILDE
/*0xCD*/ 0x00D5, // LATIN CAPITAL LETTER O WITH TILDE
/*0xCE*/ 0x0152, // LATIN CAPITAL LIGATURE OE
/*0xCF*/ 0x0153, // LATIN SMALL LIGATURE OE
/*0xD0*/ 0x2013, // EN DASH
/*0xD1*/ 0x2014, // EM DASH
/*0xD2*/ 0x201C, // LEFT DOUBLE QUOTATION MARK
/*0xD3*/ 0x201D, // RIGHT DOUBLE QUOTATION MARK
/*0xD4*/ 0x2018, // LEFT SINGLE QUOTATION MARK
/*0xD5*/ 0x2019, // RIGHT SINGLE QUOTATION MARK
/*0xD6*/ 0x00F7, // DIVISION SIGN
/*0xD7*/ 0x25CA, // LOZENGE
/*0xD8*/ 0x00FF, // LATIN SMALL LETTER Y WITH DIAERESIS
/*0xD9*/ 0x0178, // LATIN CAPITAL LETTER Y WITH DIAERESIS
/*0xDA*/ 0x2044, // FRACTION SLASH
/*0xDB*/ 0x00A4, // CURRENCY SIGN (was EURO SIGN)
/*0xDC*/ 0x2039, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
/*0xDD*/ 0x203A, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
/*0xDE*/ 0xFB01, // LATIN SMALL LIGATURE FI
/*0xDF*/ 0xFB02, // LATIN SMALL LIGATURE FL
/*0xE0*/ 0x2021, // DOUBLE DAGGER
/*0xE1*/ 0x00B7, // MIDDLE DOT
/*0xE2*/ 0x201A, // SINGLE LOW-9 QUOTATION MARK
/*0xE3*/ 0x201E, // DOUBLE LOW-9 QUOTATION MARK
/*0xE4*/ 0x2030, // PER MILLE SIGN
/*0xE5*/ 0x00C2, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
/*0xE6*/ 0x00CA, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
/*0xE7*/ 0x00C1, // LATIN CAPITAL LETTER A WITH ACUTE
/*0xE8*/ 0x00CB, // LATIN CAPITAL LETTER E WITH DIAERESIS
/*0xE9*/ 0x00C8, // LATIN CAPITAL LETTER E WITH GRAVE
/*0xEA*/ 0x00CD, // LATIN CAPITAL LETTER I WITH ACUTE
/*0xEB*/ 0x00CE, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
/*0xEC*/ 0x00CF, // LATIN CAPITAL LETTER I WITH DIAERESIS
/*0xED*/ 0x00CC, // LATIN CAPITAL LETTER I WITH GRAVE
/*0xEE*/ 0x00D3, // LATIN CAPITAL LETTER O WITH ACUTE
/*0xEF*/ 0x00D4, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
/*0xF0*/ 0xF8FF, // Apple logo
/*0xF1*/ 0x00D2, // LATIN CAPITAL LETTER O WITH GRAVE
/*0xF2*/ 0x00DA, // LATIN CAPITAL LETTER U WITH ACUTE
/*0xF3*/ 0x00DB, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
/*0xF4*/ 0x00D9, // LATIN CAPITAL LETTER U WITH GRAVE
/*0xF5*/ 0x0131, // LATIN SMALL LETTER DOTLESS I
/*0xF6*/ 0x02C6, // MODIFIER LETTER CIRCUMFLEX ACCENT
/*0xF7*/ 0x02DC, // SMALL TILDE
/*0xF8*/ 0x00AF, // MACRON
/*0xF9*/ 0x02D8, // BREVE
/*0xFA*/ 0x02D9, // DOT ABOVE
/*0xFB*/ 0x02DA, // RING ABOVE
/*0xFC*/ 0x00B8, // CEDILLA
/*0xFD*/ 0x02DD, // DOUBLE ACUTE ACCENT
/*0xFE*/ 0x02DB, // OGONEK
/*0xFF*/ 0x02C7, // CARON
};
/*
* Quick sanity check on contents of array.
*
@ -169,18 +432,29 @@ const int kUnk = 0x3f; // for unmappable chars, use '?'
void ReformatText::CheckGSCharConv(void)
{
#ifdef _DEBUG
bool test[256];
int i;
bool* test = (bool*) malloc(65536 * sizeof(bool));
memset(test, 0, sizeof(test));
for (i = 0; i < sizeof(kGSCharConv); i++) {
if (test[kGSCharConv[i]] && kGSCharConv[i] != kUnk) {
LOGI("Character used twice: 0x%02x at %d (0x%02x)",
kGSCharConv[i], i, i+128);
memset(test, 0, 65536 * sizeof(bool));
for (int i = 0; i < NELEM(kCP1252Conv); i++) {
if (test[kCP1252Conv[i]] && kCP1252Conv[i] != kUnk) {
LOGW("Character used twice: 0x%02x at %d (0x%02x)",
kCP1252Conv[i], i, i+128);
assert(false);
}
test[kGSCharConv[i]] = true;
test[kCP1252Conv[i]] = true;
}
memset(test, 0, 65536 * sizeof(bool));
for (int i = 0; i < NELEM(kUTF16Conv); i++) {
if (test[kUTF16Conv[i]]) {
LOGW("Character used twice: 0x%02x at %d (0x%02x)",
kUTF16Conv[i], i, i+128);
assert(false);
}
test[kUTF16Conv[i]] = true;
}
free(test);
#endif
}

View File

@ -346,15 +346,19 @@ protected:
}
// output an RTF-escaped char, allowing high ASCII
// (only use this if we're in RTF mode)
inline void RTFPrintExtChar(uint8_t ch) {
inline void RTFPrintExtChar(uint16_t ch) {
if (ch == '\\')
fExpBuf.Printf("\\\\");
else if (ch == '{')
fExpBuf.Printf("\\{");
else if (ch == '}')
fExpBuf.Printf("\\}");
else
else if (ch < 256) {
// TODO: should be \'xx for 0x80-ff?
fExpBuf.Printf("%c", ch);
} else {
fExpBuf.Printf("\\u%d?", ch);
}
}
// output a char, doubling up double quotes (for .CSV)
inline void BufPrintQChar(uint8_t ch) {
@ -364,13 +368,21 @@ protected:
fExpBuf.Printf("%c", ch);
}
// convert IIgs documents
uint8_t ConvertGSChar(uint8_t ch) {
// Convert a Mac OS Roman character value (from a IIgs document) to
// an 8-bit Windows CP1252 equivalent.
uint8_t ConvertMacRomanTo1252(uint8_t ch) {
if (ch < 128)
return ch;
else
return kGSCharConv[ch-128];
return kCP1252Conv[ch-128];
}
// Convert a Mac OS Roman character value (from a IIgs document) to
// its UTF-16 Unicode equivalent.
uint16_t ConvertMacRomanToUTF16(uint8_t ch) {
return kUTF16Conv[ch];
}
void CheckGSCharConv(void);
private:
@ -378,7 +390,8 @@ private:
int CreateWorkBuf(void);
enum { kRTFUnitsPerInch = 1440 }; // TWIPS
static const uint8_t kGSCharConv[];
static const uint8_t kCP1252Conv[];
static const uint16_t kUTF16Conv[];
int fLeftMargin, fRightMargin; // for documents, in 1/10th inch
int fPointSize;

View File

@ -67,7 +67,7 @@ int ReformatGWP::Process(const ReformatHolder* pHolder,
BufPrintf("\r\n");
} else {
// RTF is always off, so just use BufPrintf
BufPrintf("%c", ConvertGSChar(ch));
BufPrintf("%c", ConvertMacRomanTo1252(ch));
}
}
@ -206,7 +206,7 @@ int ReformatTeach::Process(const ReformatHolder* pHolder,
} else if (uch == '\t') {
RTFTab();
} else {
RTFPrintExtChar(ConvertGSChar(uch));
RTFPrintExtChar(ConvertMacRomanToUTF16(uch));
}
dataBuf++;
dataLen--;