ciderpress/reformat/ReformatBase.cpp

/*
 * CiderPress
 * Copyright (C) 2007 by faddenSoft, LLC.  All Rights Reserved.
 * See the file LICENSE for distribution terms.
 */
/*
 * Reformatter base class implementation.
 */
#include "StdAfx.h"
#include "ReformatBase.h"
#include <math.h>


/*
 * ==========================================================================
 *      ReformatText
 * ==========================================================================
 */

/*
 * Convert Mac OS Roman to Windows CP1252.
 */
const int kUnk = 0x3f;      // for unmappable chars, use '?'

/*static*/ const uint8_t ReformatText::kCP1252Conv[128] = {
    0xc4,   // 0x80 A + umlaut (diaeresis?)
    0xc5,   // 0x81 A + overcircle
    0xc7,   // 0x82 C + cedilla
    0xc9,   // 0x83 E + acute
    0xd1,   // 0x84 N + tilde
    0xd6,   // 0x85 O + umlaut
    0xdc,   // 0x86 U + umlaut
    0xe1,   // 0x87 a + acute
    0xe0,   // 0x88 a + grave
    0xe2,   // 0x89 a + circumflex
    0xe4,   // 0x8a a + umlaut
    0xe3,   // 0x8b a + tilde
    0xe5,   // 0x8c a + overcircle
    0xe7,   // 0x8d c + cedilla
    0xe9,   // 0x8e e + acute
    0xe8,   // 0x8f e + grave
    0xea,   // 0x90 e + circumflex
    0xeb,   // 0x91 e + umlaut
    0xed,   // 0x92 i + acute
    0xec,   // 0x93 i + grave
    0xee,   // 0x94 i + circumflex
    0xef,   // 0x95 i + umlaut
    0xf1,   // 0x96 n + tilde
    0xf3,   // 0x97 o + acute
    0xf2,   // 0x98 o + grave
    0xf4,   // 0x99 o + circumflex
    0xf6,   // 0x9a o + umlaut
    0xf5,   // 0x9b o + tilde
    0xfa,   // 0x9c u + acute
    0xf9,   // 0x9d u + grave
    0xfb,   // 0x9e u + circumflex
    0xfc,   // 0x9f u + umlaut
    0x87,   // 0xa0 double cross (dagger)
    0xb0,   // 0xa1 degrees
    0xa2,   // 0xa2 cents
    0xa3,   // 0xa3 pounds (UK$)
    0xa7,   // 0xa4 section start
    0x95,   // 0xa5 small square (bullet)  [using fat bullet]
    0xb6,   // 0xa6 paragraph (pilcrow)
    0xdf,   // 0xa7 curly B (latin small letter sharp S)
    0xae,   // 0xa8 raised 'R' (registered)
    0xa9,   // 0xa9 raised 'C' (copyright)
    0x99,   // 0xaa raised 'TM' (trademark)
    0xb4,   // 0xab acute accent
    0xa8,   // 0xac umlaut (diaeresis)
    kUnk,   // 0xad not-equal
    0xc6,   // 0xae merged AE
    0xd8,   // 0xaf O + slash (upper-case nil?)
    kUnk,   // 0xb0 infinity
    0xb1,   // 0xb1 +/-
    kUnk,   // 0xb2 <=
    kUnk,   // 0xb3 >=
    0xa5,   // 0xb4 Yen (Japan$)
    0xb5,   // 0xb5 mu (micro)
    kUnk,   // 0xb6 delta (partial differentiation) [could use D-bar 0xd0]
    kUnk,   // 0xb7 epsilon (N-ary summation) [could use C-double-bar 0x80]
    kUnk,   // 0xb8 PI (N-ary product)
    kUnk,   // 0xb9 pi
    kUnk,   // 0xba integral
    0xaa,   // 0xbb a underbar (feminine ordinal)  [using raised a]
    0xba,   // 0xbc o underbar (masculine ordinal)  [using raised o]
    kUnk,   // 0xbd omega (Ohm)
    0xe6,   // 0xbe merged ae
    0xf8,   // 0xbf o + slash (lower-case NULL?)
    0xbf,   // 0xc0 upside-down question mark
    0xa1,   // 0xc1 upside-down exclamation point
    0xac,   // 0xc2 rotated L ("not" sign)
    0xb7,   // 0xc3 checkmark (square root) [using small bullet]
    0x83,   // 0xc4 script f
    kUnk,   // 0xc5 approximately equal
    kUnk,   // 0xc6 delta (triangle / increment)
    0xab,   // 0xc7 much less than
    0xbb,   // 0xc8 much greater than
    0x85,   // 0xc9 ellipsis
    0xa0,   // 0xca blank (sticky space)
    0xc0,   // 0xcb A + grave
    0xc3,   // 0xcc A + tilde
    0xd5,   // 0xcd O + tilde
    0x8c,   // 0xce merged OE
    0x9c,   // 0xcf merged oe
    0x96,   // 0xd0 short hyphen (en dash)
    0x97,   // 0xd1 long hyphen (em dash)
    0x93,   // 0xd2 smart double-quote start
    0x94,   // 0xd3 smart double-quote end
    0x91,   // 0xd4 smart single-quote start
    0x92,   // 0xd5 smart single-quote end
    0xf7,   // 0xd6 divide
    0xa4,   // 0xd7 diamond (lozenge)  [using spiky circle]
    0xff,   // 0xd8 y + umlaut
    // [nothing below here is part of standard Windows-ASCII?]
    // remaining descriptions based on hfsutils' "charset.txt"
    kUnk,   // 0xd9 Y + umlaut
    kUnk,   // 0xda fraction slash
    kUnk,   // 0xdb currency sign
    kUnk,   // 0xdc single left-pointing angle quotation mark
    kUnk,   // 0xdd single right-pointing angle quotation mark
    kUnk,   // 0xde merged fi
    kUnk,   // 0xdf merged FL
    kUnk,   // 0xe0 double dagger
    kUnk,   // 0xe1 middle dot
    kUnk,   // 0xe2 single low-9 quotation mark
    kUnk,   // 0xe3 double low-9 quotation mark
    kUnk,   // 0xe4 per mille sign
    kUnk,   // 0xe5 A + circumflex
    kUnk,   // 0xe6 E + circumflex
    kUnk,   // 0xe7 A + acute accent
    kUnk,   // 0xe8 E + diaeresis
    kUnk,   // 0xe9 E + grave accent
    kUnk,   // 0xea I + acute accent
    kUnk,   // 0xeb I + circumflex
    kUnk,   // 0xec I + diaeresis
    kUnk,   // 0xed I + grave accent
    kUnk,   // 0xee O + acute accent
    kUnk,   // 0xef O + circumflex
    kUnk,   // 0xf0 apple logo
    kUnk,   // 0xf1 O + grave accent
    kUnk,   // 0xf2 U + acute accent
    kUnk,   // 0xf3 U + circumflex
    kUnk,   // 0xf4 U + grave accent
    kUnk,   // 0xf5 i without dot
    kUnk,   // 0xf6 modifier letter circumflex accent
    kUnk,   // 0xf7 small tilde
    kUnk,   // 0xf8 macron
    kUnk,   // 0xf9 breve
    kUnk,   // 0xfa dot above
    kUnk,   // 0xfb ring above
    kUnk,   // 0xfc cedilla
    kUnk,   // 0xfd double acute accent
    kUnk,   // 0xfe ogonek
    kUnk,   // 0xff caron
};

/*
 * Convert Mac OS Roman to Unicode.  Mapping comes from:
 *
 * http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMAN.TXT
 *
 * We use the "Control Pictures" block for the control characters
 * (0x00-0x1f, 0x7f).
 */
/*static*/ const uint16_t ReformatText::kUTF16Conv[256] = {
    /*0x00*/  0x2400,   // [control] NULL
    /*0x01*/  0x2401,   // [control] START OF HEADING
    /*0x02*/  0x2402,   // [control] START OF TEXT
    /*0x03*/  0x2403,   // [control] END OF TEXT
    /*0x04*/  0x2404,   // [control] END OF TRANSMISSION
    /*0x05*/  0x2405,   // [control] ENQUIRY
    /*0x06*/  0x2406,   // [control] ACKNOWLEDGE
    /*0x07*/  0x2407,   // [control] BELL
    /*0x08*/  0x2408,   // [control] BACKSPACE
    /*0x09*/  0x2409,   // [control] HORIZONTAL TABULATION
    /*0x0a*/  0x240a,   // [control] LINE FEED
    /*0x0b*/  0x240b,   // [control] VERTICAL TABULATION
    /*0x0c*/  0x240c,   // [control] FORM FEED
    /*0x0d*/  0x240d,   // [control] CARRIAGE RETURN
    /*0x0e*/  0x240e,   // [control] SHIFT OUT
    /*0x0f*/  0x240f,   // [control] SHIFT IN
    /*0x10*/  0x2410,   // [control] DATA LINK ESCAPE
    /*0x11*/  0x2411,   // [control] DEVICE CONTROL ONE
    /*0x12*/  0x2412,   // [control] DEVICE CONTROL TWO
    /*0x13*/  0x2413,   // [control] DEVICE CONTROL THREE
    /*0x14*/  0x2414,   // [control] DEVICE CONTROL FOUR
    /*0x15*/  0x2415,   // [control] NEGATIVE ACKNOWLEDGE
    /*0x16*/  0x2416,   // [control] SYNCHRONOUS IDLE
    /*0x17*/  0x2417,   // [control] END OF TRANSMISSION BLOCK
    /*0x18*/  0x2418,   // [control] CANCEL
    /*0x19*/  0x2419,   // [control] END OF MEDIUM
    /*0x1a*/  0x241a,   // [control] SUBSTITUTE
    /*0x1b*/  0x241b,   // [control] ESCAPE
    /*0x1c*/  0x241c,   // [control] FILE SEPARATOR
    /*0x1d*/  0x241d,   // [control] GROUP SEPARATOR
    /*0x1e*/  0x241e,   // [control] RECORD SEPARATOR
    /*0x1f*/  0x241f,   // [control] UNIT SEPARATOR
    /*0x20*/  0x0020,   // SPACE
    /*0x21*/  0x0021,   // EXCLAMATION MARK
    /*0x22*/  0x0022,   // QUOTATION MARK
    /*0x23*/  0x0023,   // NUMBER SIGN
    /*0x24*/  0x0024,   // DOLLAR SIGN
    /*0x25*/  0x0025,   // PERCENT SIGN
    /*0x26*/  0x0026,   // AMPERSAND
    /*0x27*/  0x0027,   // APOSTROPHE
    /*0x28*/  0x0028,   // LEFT PARENTHESIS
    /*0x29*/  0x0029,   // RIGHT PARENTHESIS
    /*0x2A*/  0x002A,   // ASTERISK
    /*0x2B*/  0x002B,   // PLUS SIGN
    /*0x2C*/  0x002C,   // COMMA
    /*0x2D*/  0x002D,   // HYPHEN-MINUS
    /*0x2E*/  0x002E,   // FULL STOP
    /*0x2F*/  0x002F,   // SOLIDUS
    /*0x30*/  0x0030,   // DIGIT ZERO
    /*0x31*/  0x0031,   // DIGIT ONE
    /*0x32*/  0x0032,   // DIGIT TWO
    /*0x33*/  0x0033,   // DIGIT THREE
    /*0x34*/  0x0034,   // DIGIT FOUR
    /*0x35*/  0x0035,   // DIGIT FIVE
    /*0x36*/  0x0036,   // DIGIT SIX
    /*0x37*/  0x0037,   // DIGIT SEVEN
    /*0x38*/  0x0038,   // DIGIT EIGHT
    /*0x39*/  0x0039,   // DIGIT NINE
    /*0x3A*/  0x003A,   // COLON
    /*0x3B*/  0x003B,   // SEMICOLON
    /*0x3C*/  0x003C,   // LESS-THAN SIGN
    /*0x3D*/  0x003D,   // EQUALS SIGN
    /*0x3E*/  0x003E,   // GREATER-THAN SIGN
    /*0x3F*/  0x003F,   // QUESTION MARK
    /*0x40*/  0x0040,   // COMMERCIAL AT
    /*0x41*/  0x0041,   // LATIN CAPITAL LETTER A
    /*0x42*/  0x0042,   // LATIN CAPITAL LETTER B
    /*0x43*/  0x0043,   // LATIN CAPITAL LETTER C
    /*0x44*/  0x0044,   // LATIN CAPITAL LETTER D
    /*0x45*/  0x0045,   // LATIN CAPITAL LETTER E
    /*0x46*/  0x0046,   // LATIN CAPITAL LETTER F
    /*0x47*/  0x0047,   // LATIN CAPITAL LETTER G
    /*0x48*/  0x0048,   // LATIN CAPITAL LETTER H
    /*0x49*/  0x0049,   // LATIN CAPITAL LETTER I
    /*0x4A*/  0x004A,   // LATIN CAPITAL LETTER J
    /*0x4B*/  0x004B,   // LATIN CAPITAL LETTER K
    /*0x4C*/  0x004C,   // LATIN CAPITAL LETTER L
    /*0x4D*/  0x004D,   // LATIN CAPITAL LETTER M
    /*0x4E*/  0x004E,   // LATIN CAPITAL LETTER N
    /*0x4F*/  0x004F,   // LATIN CAPITAL LETTER O
    /*0x50*/  0x0050,   // LATIN CAPITAL LETTER P
    /*0x51*/  0x0051,   // LATIN CAPITAL LETTER Q
    /*0x52*/  0x0052,   // LATIN CAPITAL LETTER R
    /*0x53*/  0x0053,   // LATIN CAPITAL LETTER S
    /*0x54*/  0x0054,   // LATIN CAPITAL LETTER T
    /*0x55*/  0x0055,   // LATIN CAPITAL LETTER U
    /*0x56*/  0x0056,   // LATIN CAPITAL LETTER V
    /*0x57*/  0x0057,   // LATIN CAPITAL LETTER W
    /*0x58*/  0x0058,   // LATIN CAPITAL LETTER X
    /*0x59*/  0x0059,   // LATIN CAPITAL LETTER Y
    /*0x5A*/  0x005A,   // LATIN CAPITAL LETTER Z
    /*0x5B*/  0x005B,   // LEFT SQUARE BRACKET
    /*0x5C*/  0x005C,   // REVERSE SOLIDUS
    /*0x5D*/  0x005D,   // RIGHT SQUARE BRACKET
    /*0x5E*/  0x005E,   // CIRCUMFLEX ACCENT
    /*0x5F*/  0x005F,   // LOW LINE
    /*0x60*/  0x0060,   // GRAVE ACCENT
    /*0x61*/  0x0061,   // LATIN SMALL LETTER A
    /*0x62*/  0x0062,   // LATIN SMALL LETTER B
    /*0x63*/  0x0063,   // LATIN SMALL LETTER C
    /*0x64*/  0x0064,   // LATIN SMALL LETTER D
    /*0x65*/  0x0065,   // LATIN SMALL LETTER E
    /*0x66*/  0x0066,   // LATIN SMALL LETTER F
    /*0x67*/  0x0067,   // LATIN SMALL LETTER G
    /*0x68*/  0x0068,   // LATIN SMALL LETTER H
    /*0x69*/  0x0069,   // LATIN SMALL LETTER I
    /*0x6A*/  0x006A,   // LATIN SMALL LETTER J
    /*0x6B*/  0x006B,   // LATIN SMALL LETTER K
    /*0x6C*/  0x006C,   // LATIN SMALL LETTER L
    /*0x6D*/  0x006D,   // LATIN SMALL LETTER M
    /*0x6E*/  0x006E,   // LATIN SMALL LETTER N
    /*0x6F*/  0x006F,   // LATIN SMALL LETTER O
    /*0x70*/  0x0070,   // LATIN SMALL LETTER P
    /*0x71*/  0x0071,   // LATIN SMALL LETTER Q
    /*0x72*/  0x0072,   // LATIN SMALL LETTER R
    /*0x73*/  0x0073,   // LATIN SMALL LETTER S
    /*0x74*/  0x0074,   // LATIN SMALL LETTER T
    /*0x75*/  0x0075,   // LATIN SMALL LETTER U
    /*0x76*/  0x0076,   // LATIN SMALL LETTER V
    /*0x77*/  0x0077,   // LATIN SMALL LETTER W
    /*0x78*/  0x0078,   // LATIN SMALL LETTER X
    /*0x79*/  0x0079,   // LATIN SMALL LETTER Y
    /*0x7A*/  0x007A,   // LATIN SMALL LETTER Z
    /*0x7B*/  0x007B,   // LEFT CURLY BRACKET
    /*0x7C*/  0x007C,   // VERTICAL LINE
    /*0x7D*/  0x007D,   // RIGHT CURLY BRACKET
    /*0x7E*/  0x007E,   // TILDE
    /*0x7f*/  0x2421,   // [control] DELETE
    /*0x80*/  0x00C4,   // LATIN CAPITAL LETTER A WITH DIAERESIS
    /*0x81*/  0x00C5,   // LATIN CAPITAL LETTER A WITH RING ABOVE
    /*0x82*/  0x00C7,   // LATIN CAPITAL LETTER C WITH CEDILLA
    /*0x83*/  0x00C9,   // LATIN CAPITAL LETTER E WITH ACUTE
    /*0x84*/  0x00D1,   // LATIN CAPITAL LETTER N WITH TILDE
    /*0x85*/  0x00D6,   // LATIN CAPITAL LETTER O WITH DIAERESIS
    /*0x86*/  0x00DC,   // LATIN CAPITAL LETTER U WITH DIAERESIS
    /*0x87*/  0x00E1,   // LATIN SMALL LETTER A WITH ACUTE
    /*0x88*/  0x00E0,   // LATIN SMALL LETTER A WITH GRAVE
    /*0x89*/  0x00E2,   // LATIN SMALL LETTER A WITH CIRCUMFLEX
    /*0x8A*/  0x00E4,   // LATIN SMALL LETTER A WITH DIAERESIS
    /*0x8B*/  0x00E3,   // LATIN SMALL LETTER A WITH TILDE
    /*0x8C*/  0x00E5,   // LATIN SMALL LETTER A WITH RING ABOVE
    /*0x8D*/  0x00E7,   // LATIN SMALL LETTER C WITH CEDILLA
    /*0x8E*/  0x00E9,   // LATIN SMALL LETTER E WITH ACUTE
    /*0x8F*/  0x00E8,   // LATIN SMALL LETTER E WITH GRAVE
    /*0x90*/  0x00EA,   // LATIN SMALL LETTER E WITH CIRCUMFLEX
    /*0x91*/  0x00EB,   // LATIN SMALL LETTER E WITH DIAERESIS
    /*0x92*/  0x00ED,   // LATIN SMALL LETTER I WITH ACUTE
    /*0x93*/  0x00EC,   // LATIN SMALL LETTER I WITH GRAVE
    /*0x94*/  0x00EE,   // LATIN SMALL LETTER I WITH CIRCUMFLEX
    /*0x95*/  0x00EF,   // LATIN SMALL LETTER I WITH DIAERESIS
    /*0x96*/  0x00F1,   // LATIN SMALL LETTER N WITH TILDE
    /*0x97*/  0x00F3,   // LATIN SMALL LETTER O WITH ACUTE
    /*0x98*/  0x00F2,   // LATIN SMALL LETTER O WITH GRAVE
    /*0x99*/  0x00F4,   // LATIN SMALL LETTER O WITH CIRCUMFLEX
    /*0x9A*/  0x00F6,   // LATIN SMALL LETTER O WITH DIAERESIS
    /*0x9B*/  0x00F5,   // LATIN SMALL LETTER O WITH TILDE
    /*0x9C*/  0x00FA,   // LATIN SMALL LETTER U WITH ACUTE
    /*0x9D*/  0x00F9,   // LATIN SMALL LETTER U WITH GRAVE
    /*0x9E*/  0x00FB,   // LATIN SMALL LETTER U WITH CIRCUMFLEX
    /*0x9F*/  0x00FC,   // LATIN SMALL LETTER U WITH DIAERESIS
    /*0xA0*/  0x2020,   // DAGGER
    /*0xA1*/  0x00B0,   // DEGREE SIGN
    /*0xA2*/  0x00A2,   // CENT SIGN
    /*0xA3*/  0x00A3,   // POUND SIGN
    /*0xA4*/  0x00A7,   // SECTION SIGN
    /*0xA5*/  0x2022,   // BULLET
    /*0xA6*/  0x00B6,   // PILCROW SIGN
    /*0xA7*/  0x00DF,   // LATIN SMALL LETTER SHARP S
    /*0xA8*/  0x00AE,   // REGISTERED SIGN
    /*0xA9*/  0x00A9,   // COPYRIGHT SIGN
    /*0xAA*/  0x2122,   // TRADE MARK SIGN
    /*0xAB*/  0x00B4,   // ACUTE ACCENT
    /*0xAC*/  0x00A8,   // DIAERESIS
    /*0xAD*/  0x2260,   // NOT EQUAL TO
    /*0xAE*/  0x00C6,   // LATIN CAPITAL LETTER AE
    /*0xAF*/  0x00D8,   // LATIN CAPITAL LETTER O WITH STROKE
    /*0xB0*/  0x221E,   // INFINITY
    /*0xB1*/  0x00B1,   // PLUS-MINUS SIGN
    /*0xB2*/  0x2264,   // LESS-THAN OR EQUAL TO
    /*0xB3*/  0x2265,   // GREATER-THAN OR EQUAL TO
    /*0xB4*/  0x00A5,   // YEN SIGN
    /*0xB5*/  0x00B5,   // MICRO SIGN
    /*0xB6*/  0x2202,   // PARTIAL DIFFERENTIAL
    /*0xB7*/  0x2211,   // N-ARY SUMMATION
    /*0xB8*/  0x220F,   // N-ARY PRODUCT
    /*0xB9*/  0x03C0,   // GREEK SMALL LETTER PI
    /*0xBA*/  0x222B,   // INTEGRAL
    /*0xBB*/  0x00AA,   // FEMININE ORDINAL INDICATOR
    /*0xBC*/  0x00BA,   // MASCULINE ORDINAL INDICATOR
    /*0xBD*/  0x03A9,   // GREEK CAPITAL LETTER OMEGA
    /*0xBE*/  0x00E6,   // LATIN SMALL LETTER AE
    /*0xBF*/  0x00F8,   // LATIN SMALL LETTER O WITH STROKE
    /*0xC0*/  0x00BF,   // INVERTED QUESTION MARK
    /*0xC1*/  0x00A1,   // INVERTED EXCLAMATION MARK
    /*0xC2*/  0x00AC,   // NOT SIGN
    /*0xC3*/  0x221A,   // SQUARE ROOT
    /*0xC4*/  0x0192,   // LATIN SMALL LETTER F WITH HOOK
    /*0xC5*/  0x2248,   // ALMOST EQUAL TO
    /*0xC6*/  0x2206,   // INCREMENT
    /*0xC7*/  0x00AB,   // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
    /*0xC8*/  0x00BB,   // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
    /*0xC9*/  0x2026,   // HORIZONTAL ELLIPSIS
    /*0xCA*/  0x00A0,   // NO-BREAK SPACE
    /*0xCB*/  0x00C0,   // LATIN CAPITAL LETTER A WITH GRAVE
    /*0xCC*/  0x00C3,   // LATIN CAPITAL LETTER A WITH TILDE
    /*0xCD*/  0x00D5,   // LATIN CAPITAL LETTER O WITH TILDE
    /*0xCE*/  0x0152,   // LATIN CAPITAL LIGATURE OE
    /*0xCF*/  0x0153,   // LATIN SMALL LIGATURE OE
    /*0xD0*/  0x2013,   // EN DASH
    /*0xD1*/  0x2014,   // EM DASH
    /*0xD2*/  0x201C,   // LEFT DOUBLE QUOTATION MARK
    /*0xD3*/  0x201D,   // RIGHT DOUBLE QUOTATION MARK
    /*0xD4*/  0x2018,   // LEFT SINGLE QUOTATION MARK
    /*0xD5*/  0x2019,   // RIGHT SINGLE QUOTATION MARK
    /*0xD6*/  0x00F7,   // DIVISION SIGN
    /*0xD7*/  0x25CA,   // LOZENGE
    /*0xD8*/  0x00FF,   // LATIN SMALL LETTER Y WITH DIAERESIS
    /*0xD9*/  0x0178,   // LATIN CAPITAL LETTER Y WITH DIAERESIS
    /*0xDA*/  0x2044,   // FRACTION SLASH
    /*0xDB*/  0x00A4,   // CURRENCY SIGN (was EURO SIGN)
    /*0xDC*/  0x2039,   // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
    /*0xDD*/  0x203A,   // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
    /*0xDE*/  0xFB01,   // LATIN SMALL LIGATURE FI
    /*0xDF*/  0xFB02,   // LATIN SMALL LIGATURE FL
    /*0xE0*/  0x2021,   // DOUBLE DAGGER
    /*0xE1*/  0x00B7,   // MIDDLE DOT
    /*0xE2*/  0x201A,   // SINGLE LOW-9 QUOTATION MARK
    /*0xE3*/  0x201E,   // DOUBLE LOW-9 QUOTATION MARK
    /*0xE4*/  0x2030,   // PER MILLE SIGN
    /*0xE5*/  0x00C2,   // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
    /*0xE6*/  0x00CA,   // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
    /*0xE7*/  0x00C1,   // LATIN CAPITAL LETTER A WITH ACUTE
    /*0xE8*/  0x00CB,   // LATIN CAPITAL LETTER E WITH DIAERESIS
    /*0xE9*/  0x00C8,   // LATIN CAPITAL LETTER E WITH GRAVE
    /*0xEA*/  0x00CD,   // LATIN CAPITAL LETTER I WITH ACUTE
    /*0xEB*/  0x00CE,   // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
    /*0xEC*/  0x00CF,   // LATIN CAPITAL LETTER I WITH DIAERESIS
    /*0xED*/  0x00CC,   // LATIN CAPITAL LETTER I WITH GRAVE
    /*0xEE*/  0x00D3,   // LATIN CAPITAL LETTER O WITH ACUTE
    /*0xEF*/  0x00D4,   // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
    /*0xF0*/  0xF8FF,   // Apple logo
    /*0xF1*/  0x00D2,   // LATIN CAPITAL LETTER O WITH GRAVE
    /*0xF2*/  0x00DA,   // LATIN CAPITAL LETTER U WITH ACUTE
    /*0xF3*/  0x00DB,   // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
    /*0xF4*/  0x00D9,   // LATIN CAPITAL LETTER U WITH GRAVE
    /*0xF5*/  0x0131,   // LATIN SMALL LETTER DOTLESS I
    /*0xF6*/  0x02C6,   // MODIFIER LETTER CIRCUMFLEX ACCENT
    /*0xF7*/  0x02DC,   // SMALL TILDE
    /*0xF8*/  0x00AF,   // MACRON
    /*0xF9*/  0x02D8,   // BREVE
    /*0xFA*/  0x02D9,   // DOT ABOVE
    /*0xFB*/  0x02DA,   // RING ABOVE
    /*0xFC*/  0x00B8,   // CEDILLA
    /*0xFD*/  0x02DD,   // DOUBLE ACUTE ACCENT
    /*0xFE*/  0x02DB,   // OGONEK
    /*0xFF*/  0x02C7,   // CARON
};

/*
 * Quick sanity check on contents of array.
 *
 * No two characters should map to the same thing.  This isn't vital, but
 * if we want to have a reversible transformation someday, it'll make our
 * lives easier then.
 */
void ReformatText::CheckGSCharConv(void)
{
#ifdef _DEBUG
    bool* test = (bool*) malloc(65536 * sizeof(bool));

    memset(test, 0, 65536 * sizeof(bool));
    for (int i = 0; i < NELEM(kCP1252Conv); i++) {
        if (test[kCP1252Conv[i]] && kCP1252Conv[i] != kUnk) {
            LOGW("Character used twice: 0x%02x at %d (0x%02x)",
                kCP1252Conv[i], i, i+128);
            assert(false);
        }
        test[kCP1252Conv[i]] = true;
    }

    memset(test, 0, 65536 * sizeof(bool));
    for (int i = 0; i < NELEM(kUTF16Conv); i++) {
        if (test[kUTF16Conv[i]]) {
            LOGW("Character used twice: 0x%02x at %d (0x%02x)",
                kUTF16Conv[i], i, i+128);
            assert(false);
        }
        test[kUTF16Conv[i]] = true;
    }

    free(test);
#endif
}

/*
 * Set the output format and buffer.
 *
 * Clears our work buffer pointer so we don't free it.
 */
void ReformatText::SetResultBuffer(ReformatOutput* pOutput, bool multiFont)
{
    char* buf;
    long len;
    fExpBuf.SeizeBuffer(&buf, &len);
    pOutput->SetTextBuf(buf, len, true);

    if (pOutput->GetTextBuf() == NULL) {
        /*
         * Force "raw" mode if there's no output.  This can happen if we,
         * say, try to format an empty file as a hex dump.  We never
         * produce any output, so no buffer gets allocated.
         *
         * We set the mode to "raw" so that applications can assume that
         * results of type "text" actually have text to look at -- though
         * it's possible the length will be zero, we promise that there'll
         * be a buffer there.  I'm not sure it's important to do this,
         * but it does reduce the #of situations in which we have to
         * worry about NULL pointers.
         */
        pOutput->SetOutputKind(ReformatOutput::kOutputRaw);
        LOGI("ReformatText returning a null pointer");
    } else {
        if (fUseRTF)
            pOutput->SetOutputKind(ReformatOutput::kOutputRTF);
        else
            pOutput->SetOutputKind(ReformatOutput::kOutputText);
    }

    if (fUseRTF && multiFont)
        pOutput->SetMultipleFontsFlag(true);
}

/*
 * Output the RTF header.
 *
 * The color table is the standard MS Word color table, except that entry
 * #17 (dark grey) has been lightened from (51,51,51) because it's nearly
 * indistinguishable from black on the screen.
 *
 * The default font is Courier New (\f0) at 10 points (\fs20).
 */
void ReformatText::RTFBegin(int flags)
{
//  static const char* rtfHdr =
//"{\\rtf1\\ansi\\ansicpg1252\\deff0\\deflang1033{\\fonttbl{\\f0\\fmodern\\fprq1\\fcharset0 Courier New;}}\r\n"
//"\\viewkind4\\uc1\\pard\\f0\\fs20 ";

    static const char* rtfHdrStart =
"{\\rtf1\\ansi\\ansicpg1252\\deff0\\deflang1033\\deflangfe1033{\\fonttbl"
    "{\\f0\\fmodern\\fprq1\\fcharset0 Courier New;}"
    "{\\f1\\froman\\fprq2\\fcharset0 Times New Roman;}"
    "{\\f2\\fswiss\\fprq2\\fcharset0 Arial;}"
    "{\\f3\\froman\\fprq2\\fcharset2 Symbol;}"
    "}\r\n";

    static const char* rtfColorTable =
"{\\colortbl;"
    "\\red0\\green0\\blue0;\\red0\\green0\\blue255;\\red0\\green255\\blue255;\\red0\\green255\\blue0;"
    "\\red255\\green0\\blue255;\\red255\\green0\\blue0;\\red255\\green255\\blue0;\\red255\\green255\\blue255;"
    "\\red0\\green0\\blue128;\\red0\\green128\\blue128;\\red0\\green128\\blue0;\r\n"
    "\\red128\\green0\\blue128;\\red128\\green0\\blue0;\\red128\\green128\\blue0;\\red128\\green128\\blue128;"
    "\\red192\\green192\\blue192;\\red64\\green64\\blue64;\\red255\\green153\\blue0;}\r\n";

    static const char* rtfHdrEnd =
"\\viewkind4\\uc1\\pard\\f0\\fs20 ";

    if (fUseRTF) {
        BufPrintf("%s", rtfHdrStart);
        if ((flags & kRTFFlagColorTable) != 0)
            BufPrintf("%s", rtfColorTable);
        BufPrintf("%s", rtfHdrEnd);
    }

    fPointSize = 10;
}

/*
 * Output the RTF footer.
 */
void ReformatText::RTFEnd(void)
{
    if (fUseRTF) BufPrintf("}\r\n%c", '\0');
}

/*
 * Output RTF paragraph definition marker.  Do this every time we change some
 * aspect of paragraph formatting, such as margins or justification.
 */
void ReformatText::RTFSetPara(void)
{
    if (!fUseRTF)
        return;

    BufPrintf("\\pard\\nowidctlpar");

    if (fLeftMargin != 0 || fRightMargin != 0) {
        /* looks like RTF thinks we're getting 12 chars per inch? */
        if (fLeftMargin != 0)
            BufPrintf("\\li%d",
                (int) (fLeftMargin * (kRTFUnitsPerInch/12)));
        if (fLeftMargin != 0)
            BufPrintf("\\ri%d",
                (int) (fRightMargin * (kRTFUnitsPerInch/12)));
    }

    switch (fJustified) {
    case kJustifyLeft:                          break;
    case kJustifyRight:     BufPrintf("\\qr");  break;
    case kJustifyCenter:    BufPrintf("\\qc");  break;
    case kJustifyFull:      BufPrintf("\\qj");  break;
    default:
        assert(false);
        break;
    }

    // Ideally we'd suppress this if the next thing is an RTF
    //  formatting command, esp. "\\par".
    BufPrintf(" ");
}

/*
 * Output a new paragraph marker.
 *
 * If you're producing RTF output, this is the right way to output an
 * end-of-line character.
 */
void ReformatText::RTFNewPara(void)
{
    if (fUseRTF)
        BufPrintf("\\par\r\n");
    else
        BufPrintf("\r\n");
}


/*
 * Insert a page break.  This isn't supported by the Rich Edit control,
 * so it won't appear in CiderPress or WordPad, but it will come out in
 * Microsoft Word if you extract to a file.
 */
void ReformatText::RTFPageBreak(void)
{
    if (fUseRTF)
        BufPrintf("\\page ");
}

/*
 * RTF tab character.
 */
void ReformatText::RTFTab(void)
{
    if (fUseRTF)
        BufPrintf("\\tab ");
}

/*
 * Minor formatting.
 */
void ReformatText::RTFBoldOn(void)
{
    if (fBoldEnabled)
        return;
    if (fUseRTF) {
        BufPrintf("\\b ");
        fBoldEnabled = true;
    }
}

void ReformatText::RTFBoldOff(void)
{
    if (!fBoldEnabled)
        return;
    if (fUseRTF) {
        BufPrintf("\\b0 ");
        fBoldEnabled = false;
    }
}

void ReformatText::RTFItalicOn(void)
{
    if (fItalicEnabled)
        return;
    if (fUseRTF) {
        BufPrintf("\\i ");
        fItalicEnabled = true;
    }
}

void ReformatText::RTFItalicOff(void)
{
    if (!fItalicEnabled)
        return;
    if (fUseRTF) {
        BufPrintf("\\i0 ");
        fItalicEnabled = false;
    }
}

void ReformatText::RTFUnderlineOn(void)
{
    if (fUnderlineEnabled)
        return;
    if (fUseRTF) {
        BufPrintf("\\ul ");
        fUnderlineEnabled = true;
    }
}

void ReformatText::RTFUnderlineOff(void)
{
    if (!fUnderlineEnabled)
        return;
    if (fUseRTF) {
        BufPrintf("\\ulnone ");
        fUnderlineEnabled = false;
    }
}

void ReformatText::RTFOutlineOn(void)
{
    if (fOutlineEnabled)
        return;
    if (fUseRTF) {
        BufPrintf("\\outl ");
        fOutlineEnabled = true;
    }
}

void ReformatText::RTFOutlineOff(void)
{
    if (!fOutlineEnabled)
        return;
    if (fUseRTF) {
        BufPrintf("\\outl0 ");
        fOutlineEnabled = false;
    }
}

void ReformatText::RTFShadowOn(void)
{
    if (fShadowEnabled)
        return;
    if (fUseRTF) {
        BufPrintf("\\shad ");
        fShadowEnabled = true;
    }
}

void ReformatText::RTFShadowOff(void)
{
    if (!fShadowEnabled)
        return;
    if (fUseRTF) {
        BufPrintf("\\shad0 ");
        fShadowEnabled = false;
    }
}

void ReformatText::RTFSubscriptOn(void)
{
    if (fSubscriptEnabled)
        return;
    if (fUseRTF) {
        BufPrintf("\\sub ");
        fSubscriptEnabled = true;
    }
}

void ReformatText::RTFSubscriptOff(void)
{
    if (!fSubscriptEnabled)
        return;
    if (fUseRTF) {
        BufPrintf("\\nosupersub ");
        fSubscriptEnabled = false;
    }
}

void ReformatText::RTFSuperscriptOn(void)
{
    if (fSuperscriptEnabled)
        return;
    if (fUseRTF) {
        BufPrintf("\\super ");
        fSuperscriptEnabled = true;
    }
}

void ReformatText::RTFSuperscriptOff(void)
{
    if (!fSuperscriptEnabled)
        return;
    if (fUseRTF) {
        BufPrintf("\\nosupersub ");
        fSuperscriptEnabled = false;
    }
}

void ReformatText::RTFSetColor(TextColor color)
{
    if (color == fTextColor)
        return;
    if (fUseRTF) {
        BufPrintf("\\cf%d ", color);
        fTextColor = color;
    }
}

/*
 * Change paragraph formatting.
 */
void ReformatText::RTFParaLeft(void)
{
    if (fJustified != kJustifyLeft) {
        fJustified = kJustifyLeft;
        RTFSetPara();
    }
}

void ReformatText::RTFParaRight(void)
{
    if (fJustified != kJustifyRight) {
        fJustified = kJustifyRight;
        RTFSetPara();
    }
}

void ReformatText::RTFParaCenter(void)
{
    if (fJustified != kJustifyCenter) {
        fJustified = kJustifyCenter;
        RTFSetPara();
    }
}

void ReformatText::RTFParaJustify(void)
{
    if (fJustified != kJustifyFull) {
        fJustified = kJustifyFull;
        RTFSetPara();
    }
}

/*
 * Page margins, in 1/10th inches.
 */
void ReformatText::RTFLeftMargin(int margin)
{
    //LOGI("+++ Left margin now %d", margin);
    fLeftMargin = margin;
    RTFSetPara();
}

void ReformatText::RTFRightMargin(int margin)
{
    //LOGI("+++ Right margin now %d", margin);
    fRightMargin = margin;
    RTFSetPara();
}

/*
 * Switch to a different font size.
 */
void ReformatText::RTFSetFontSize(int points)
{
    if (fUseRTF && fPointSize != points)
        BufPrintf("\\fs%d ", points * 2);
    fPointSize = points;
}
/*
 * Switch to a different font.
 */
void ReformatText::RTFSetFont(RTFFont font)
{
    if (fUseRTF)
        BufPrintf("\\f%d ", font);
}

/*
 * Set the font by specifying a IIgs QuickDraw II font family number.
 */
void ReformatText::RTFSetGSFont(uint16_t family)
{
    float newMult;

    if (!fUseRTF)
        return;

    /*
     * Apple II fonts seem to be about 1.5x in a WYSIWYG way, except
     * for Times, which is about 1:1.
     */
    switch (family) {
    case kGSFontTimes:
        RTFSetFont(kFontTimesRoman);
        newMult = 0.9f;
        break;
    case kGSFontNewYork:
        RTFSetFont(kFontTimesRoman);
        newMult = 1.1f;
        break;

    case kGSFontSymbol:
        RTFSetFont(kFontSymbol);
        newMult = 1.0f;
        break;

    case kGSFontMonaco:
        RTFSetFont(kFontCourierNew);
        newMult = 0.80f;
        break;
    case kGSFontCourier:
    case kGSFontPCMonospace:
    case kGSFontAppleM:
    case kGSFontGenesys:
        RTFSetFont(kFontCourierNew);
        newMult = 1.5f;
        break;

    case kGSFontClassical:
    case kGSFontGenoa:
    case kGSFontWestern:
        RTFSetFont(kFontArial);
        newMult = 0.80f;
        break;
    case kGSFontChicago:
    case kGSFontVenice:
    case kGSFontGeneva:
    case kGSFontStarfleet:
    case kGSFontUnknown1:
    case kGSFontUnknown2:
        RTFSetFont(kFontArial);
        newMult = 1.0f;
        break;
    case kGSFontLondon:
    case kGSFontAthens:
    case kGSFontSanFran:
    case kGSFontShaston:
    case kGSFontToronto:
    case kGSFontCairo:
    case kGSFontLosAngeles:
    case kGSFontHelvetica:
    case kGSFontTaliesin:
        RTFSetFont(kFontArial);
        newMult = 1.5f;
        break;
    default:
        LOGI("Unrecognized font family 0x%04x, using Arial", family);
        RTFSetFont(kFontArial);
        newMult = 1.0f;
        break;
    }

    if (newMult != fGSFontSizeMult) {
        fGSFontSizeMult = newMult;
        RTFSetGSFontSize(fPreMultPointSize);
    }
}

/*
 * Set the font size of a IIgs font.  We factor the size multiplier in.
 *
 * BUG: we should track the state of the "underline" mode, and turn it
 * on and off based on the font size (8-point fonts aren't underlined).
 */
void ReformatText::RTFSetGSFontSize(int points)
{
    RTFSetFontSize((int) roundf(points * fGSFontSizeMult));

    fPreMultPointSize = points;
}

/*
 * Set bold/italic/underline etc.
 *
 * Note that "Teach" does not show underlining on text that is 8 points
 * or smaller.  We have to emulate this behavior or some documents, such
 * as ModZap's "MZ.Manual", look terrible.
 *
 * Set the font size before calling here.
 *
 * Some characters, such as '=' in Shaston 8, look the same in
 * bold as they do in plain.  This doesn't hold true for Windows
 * fonts, so we're going to look different in some circumstances.
 */
void ReformatText::RTFSetGSFontStyle(uint8_t qdStyle)
{
    if (!fUseRTF)
        return;

    if ((qdStyle & kQDStyleBold) != 0) {
        RTFBoldOn();
    } else {
        RTFBoldOff();
    }
    if ((qdStyle & kQDStyleItalic) != 0) {
        RTFItalicOn();
    } else {
        RTFItalicOff();
    }
    if ((qdStyle & kQDStyleUnderline) != 0 && fPreMultPointSize > 8) {
        RTFUnderlineOn();
    } else {
        RTFUnderlineOff();
    }
    if ((qdStyle & kQDStyleOutline) != 0) {
        RTFOutlineOn();
    } else {
        RTFOutlineOff();
    }
    if ((qdStyle & kQDStyleShadow) != 0) {
        RTFShadowOn();
    } else {
        RTFShadowOff();
    }
    if ((qdStyle & kQDStyleSuperscript) != 0) {
        RTFSuperscriptOn();
    } else {
        RTFSuperscriptOff();
    }
    if ((qdStyle & kQDStyleSubscript) != 0) {
        RTFSubscriptOn();
    } else {
        RTFSubscriptOff();
    }
}


#if 0
void
ReformatText::RTFProportionalOn(void) {
    if (fUseRTF)
        BufPrintf("\\f%d ", kFontTimesRoman);
}
void
ReformatText::RTFProportionalOff(void) {
    if (fUseRTF)
        BufPrintf("\\f%d ", kFontCourierNew);
}
#endif


/*
 * Convert the EOL markers in a buffer.  The output is written to the work
 * buffer.  The input buffer may be CR, LF, or CRLF.
 *
 * If "stripHiBits" is set, the high bit of each character is cleared before
 * the value is considered.
 */
void ReformatText::ConvertEOL(const uint8_t* srcBuf, long srcLen,
    bool stripHiBits)
{
    /* Compatibility - assume we're not stripping nulls */
    ConvertEOL(srcBuf, srcLen, stripHiBits, false);
}

/*
 * Convert the EOL markers in a buffer.  The output is written to the work
 * buffer.  The input buffer may be CR, LF, or CRLF.
 *
 * If "stripHiBits" is set, the high bit of each character is cleared before
 * the value is considered.
 *2
 * If "stripNulls" is true, no null values will make it through.
 */
void ReformatText::ConvertEOL(const uint8_t* srcBuf, long srcLen,
    bool stripHiBits, bool stripNulls)
{
    uint8_t ch;
    int mask;

    assert(!fUseRTF);   // else we have to use RTFPrintChar

    if (stripHiBits)
        mask = 0x7f;
    else
        mask = 0xff;

    /*
     * Could probably speed this up by taking things a line at a time,
     * but this is fast enough and much more straightforward.
     */
    while (srcLen) {
        ch = (*srcBuf++) & mask;
        srcLen--;

        if (ch == '\r') {
            /* got CR, check for CRLF */
            if (srcLen != 0 && ((*srcBuf) & mask) == '\n') {
                srcBuf++;
                srcLen--;
            }
            BufPrintf("\r\n");
        } else if (ch == '\n') {
            BufPrintf("\r\n");
        } else {
            /* Strip out null bytes if requested */
            if ((stripNulls && ch != 0x00) || !stripNulls)
                BufPrintf("%c", ch);
        }
    }
}

/*
 * Write a hex dump into the buffer.
 */
void ReformatText::BufHexDump(const uint8_t* srcBuf, long srcLen)
{
    const uint8_t* origSrcBuf = srcBuf;
    char chBuf[17];
    int i, remLen;

    ASSERT(srcBuf != NULL);
    ASSERT(srcLen >= 0);

    chBuf[16] = '\0';

    while (srcLen > 0) {
        BufPrintf("%08lx: ", srcBuf - origSrcBuf);

        if (srcLen >= 16) {
            if (!fUseRTF) {
                /* the really easy (and relatively fast) way */
                BufPrintf("%02x %02x %02x %02x %02x %02x %02x %02x "
                          "%02x %02x %02x %02x %02x %02x %02x %02x ",
                    srcBuf[0], srcBuf[1], srcBuf[2], srcBuf[3],
                    srcBuf[4], srcBuf[5], srcBuf[6], srcBuf[7],
                    srcBuf[8], srcBuf[9], srcBuf[10], srcBuf[11],
                    srcBuf[12], srcBuf[13], srcBuf[14], srcBuf[15]);
            } else {
                /* the fairly easy (and fairly fast) way */
                RTFBoldOn();
                BufPrintf("%02x %02x %02x %02x ",
                    srcBuf[0], srcBuf[1], srcBuf[2], srcBuf[3]);
                RTFBoldOff();
                BufPrintf("%02x %02x %02x %02x ",
                    srcBuf[4], srcBuf[5], srcBuf[6], srcBuf[7]);
                RTFBoldOn();
                BufPrintf("%02x %02x %02x %02x ",
                    srcBuf[8], srcBuf[9], srcBuf[10], srcBuf[11]);
                RTFBoldOff();
                BufPrintf("%02x %02x %02x %02x ",
                    srcBuf[12], srcBuf[13], srcBuf[14], srcBuf[15]);
            }
        } else {
            /* the not-so-easy (and not-so-fast) way */
            remLen = srcLen;

            for (i = 0; i < remLen; i++) {
                if (i == 0 || i == 8)
                    RTFBoldOn();
                else if (i == 4 || i == 12)
                    RTFBoldOff();
                BufPrintf("%02x ", srcBuf[i]);
            }
            RTFBoldOff();
            for ( ; i < 16; i++)
                BufPrintf("   ");

            /* blank out the char buf, since we're only filling part in */
            for (i = 0; i < 16; i++)
                chBuf[i] = ' ';
        }

        bool hosed = false;
        remLen = srcLen;
        if (remLen > 16)
            remLen = 16;
        int i;
        for (i = 0; i < remLen; i++) {
            chBuf[i] = PrintableChar(srcBuf[i]);
            if (fUseRTF &&
                (chBuf[i] == '\\' || chBuf[i] == '{' || chBuf[i] == '}'))
            {
                hosed = true;
                break;
            }
        }

        if (!hosed) {
            BufPrintf(" %s", chBuf);
        } else {
            /* escaped chars in RTF mode; have to do this one the hard way */
            ASSERT(fUseRTF);
            BufPrintf(" ");
            for (i = 0; i < remLen; i++) {
                RTFPrintChar(srcBuf[i]);
            }
        }

        RTFNewPara();

        srcBuf += 16;
        srcLen -= 16;
    }
}


/*
 * ==========================================================================
 *      ReformatGraphics
 * ==========================================================================
 */

/*
 * Initialize the Apple II color palette, used for Hi-Res and DHR
 * conversions.  Could also be used for lo-res mode.
 */
void ReformatGraphics::InitPalette(void)
{
    ASSERT(kPaletteSize == 16);

    static const RGBQUAD stdPalette[kPaletteSize] = {
        /* blue, green, red, reserved */
        { 0x00, 0x00, 0x00 },   // $0 black
        { 0x33, 0x00, 0xdd },   // $1 red (magenta)
        { 0x99, 0x00, 0x00 },   // $2 dark blue
        { 0xdd, 0x22, 0xdd },   // $3 purple (violet)
        { 0x22, 0x77, 0x00 },   // $4 dark green
        { 0x55, 0x55, 0x55 },   // $5 grey1 (dark)
        { 0xff, 0x22, 0x22 },   // $6 medium blue
        { 0xff, 0xaa, 0x66 },   // $7 light blue
        { 0x00, 0x55, 0x88 },   // $8 brown
        { 0x00, 0x66, 0xff },   // $9 orange
        { 0xaa, 0xaa, 0xaa },   // $A grey2 (light)
        { 0x88, 0x99, 0xff },   // $B pink
        { 0x00, 0xdd, 0x11 },   // $C green (a/k/a light green)
        { 0x00, 0xff, 0xff },   // $D yellow
        { 0x99, 0xff, 0x44 },   // $E aqua
        { 0xff, 0xff, 0xff },   // $F white
    };

    memcpy(fPalette, stdPalette, sizeof(fPalette));
}

/*
 * Stuff out DIB into the output fields, and set the appropriate flags.
 */
void ReformatGraphics::SetResultBuffer(ReformatOutput* pOutput, MyDIBitmap* pDib)
{
    ASSERT(pOutput != NULL);
    ASSERT(pDib != NULL);
    pOutput->SetOutputKind(ReformatOutput::kOutputBitmap);
    pOutput->SetDIB(pDib);
}

/*
 * Unpack the Apple PackBytes format.
 *
 * Format is:
 *  <flag><data> ...
 *
 * Flag values (first 6 bits of flag byte):
 *  00xxxxxx: (0-63) 1 to 64 bytes follow, all different
 *  01xxxxxx: (0-63) 1 to 64 repeats of next byte
 *  10xxxxxx: (0-63) 1 to 64 repeats of next 4 bytes
 *  11xxxxxx: (0-63) 1 to 64 repeats of next byte taken as 4 bytes
 *              (as in 10xxxxxx case)
 *
 * Pass the destination buffer in "dst", source buffer in "src", source
 * length in "srcLen", and expected sizes of output in "dstRem".
 *
 * Returns 0 on success, nonzero if the buffer is overfilled or underfilled.
 */
int ReformatGraphics::UnpackBytes(uint8_t* dst, const uint8_t* src,
    long dstRem, long srcLen)
{
    while (srcLen > 0) {
        uint8_t flag = *src++;
        int count = (flag & 0x3f) +1;
        uint8_t val;
        uint8_t valSet[4];
        int i;

        srcLen--;

        switch (flag & 0xc0) {
        case 0x00:
            for (i = 0; i < count; i++) {
                if (srcLen == 0 || dstRem == 0) {
                    LOGI(" SHR unpack overrun1 (srcLen=%ld dstRem=%ld)",
                        srcLen, dstRem);
                    return -1;
                }
                *dst++ = *src++;
                srcLen--;
                dstRem--;
            }
            break;
        case 0x40:
            //if (count != 3 || count != 5 || count != 6 || count != 7) {
            //  LOGI(" SHR unpack funky len %d?", count);
            //}
            if (srcLen == 0) {
                LOGI(" SHR unpack underrun2");
                return -1;
            }
            val = *src++;
            srcLen--;
            for (i = 0; i < count; i++) {
                if (dstRem == 0) {
                    LOGI(" SHR unpack overrun2 (srcLen=%d, i=%d of %d)",
                        srcLen, i, count);
                    return -1;
                }
                *dst++ = val;
                dstRem--;
            }
            break;
        case 0x80:
            if (srcLen < 4) {
                LOGI(" SHR unpack underrun3");
                return -1;
            }
            valSet[0] = *src++;
            valSet[1] = *src++;
            valSet[2] = *src++;
            valSet[3] = *src++;
            srcLen -= 4;
            for (i = 0; i < count; i++) {
                if (dstRem < 4) {
                    LOGI(" SHR unpack overrun3 (srcLen=%ld dstRem=%ld)",
                        srcLen, dstRem);
                    return -1;
                }
                *dst++ = valSet[0];
                *dst++ = valSet[1];
                *dst++ = valSet[2];
                *dst++ = valSet[3];
                dstRem -= 4;
            }
            break;
        case 0xc0:
            if (srcLen == 0) {
                LOGI(" SHR unpack underrun4");
                return -1;
            }
            val = *src++;
            srcLen--;
            for (i = 0; i < count; i++) {
                if (dstRem < 4) {
                    LOGI(" SHR unpack overrun4 (srcLen=%ld dstRem=%ld count=%d)",
                        srcLen, dstRem, count);
                    return -1;
                }
                *dst++ = val;
                *dst++ = val;
                *dst++ = val;
                *dst++ = val;
                dstRem -= 4;
            }
            break;
        default:
            ASSERT(false);
            break;
        }
    }

    ASSERT(srcLen == 0);

    /* require that we completely fill the buffer */
    if (dstRem != 0) {
        LOGI(" SHR unpack dstRem at %d", dstRem);
        return -1;
    }

    return 0;
}

/*
 * Unpack Macintosh PackBits format.  See Technical Note TN1023.
 *
 * Read a byte.
 * If the high bit is set, count is 2s complement +1 (i.e. count = (-byte)+1).
 *   Read the next byte, then write that byte 'count' times.
 * If the high bit is clear, count is 1+value (i.e. count = byte+1).  Read and
 *   copy that many bytes.
 * After "destLen" bytes have been written, return (even if in the middle of
 * a run).
 *
 * NOTE: if the count byte is 0x80, Apple says it's an invalid value and
 * should be skipped over.  Use the following byte as the count byte.  This
 * is probably because PackBits is only supposed to crunch 127 bytes, though
 * that suggests 0x81 and 0x7f are also impossible.
 *
 * We have to watch for underruns on the input and overruns on the output.
 */
void ReformatGraphics::UnPackBits(const uint8_t** pSrcBuf, long* pSrcLen,
    uint8_t** pOutPtr, long dstLen, uint8_t xorVal)
{
    const uint8_t* srcBuf = *pSrcBuf;
    long length = *pSrcLen;
    uint8_t* outPtr = *pOutPtr;
    int pixByte = 0;

    while (pixByte < dstLen && length > 0) {
        uint8_t countByte;
        int count;

        countByte = *srcBuf++;
        length--;
        if (countByte & 0x80) {
            /* RLE string */
            uint8_t ch;
            count = (countByte ^ 0xff)+1 +1;
            ch = *srcBuf++;
            length--;
            while (count-- && pixByte < dstLen) {
                *outPtr++ = ch ^ xorVal;
                pixByte++;
            }
        } else {
            /* series of bytes */
            count = countByte +1;
            while (count && pixByte < dstLen && length > 0) {
                *outPtr++ = *srcBuf++ ^ xorVal;
                count--;
                length--;
                pixByte++;
            }
        }
    }
    if (pixByte != 72) {
        /* can happen if we run out of input early */
        LOGI("  MP unexpected pixByte=%d", pixByte);
        /* keep going */
    }

    *pSrcBuf = srcBuf;
    *pSrcLen = length;
    *pOutPtr = outPtr;
}