mirror of
https://github.com/fadden/ciderpress.git
synced 2025-01-12 06:30:41 +00:00
Fix subvol charset handling
We weren't doing a MOR-to-UNI conversion on the sub-volume name, so HFS volumes with non-ASCII characters didn't look right. This also relocates the character-conversion code to a new source file. It's currently part of the reformat lib, though it arguably belongs in util (but that would introduce a new dependency between reformat and util).
This commit is contained in:
parent
591eef6882
commit
bb24f51ccb
@ -16,6 +16,7 @@
|
||||
#include "RenameEntryDialog.h"
|
||||
#include "ConfirmOverwriteDialog.h"
|
||||
#include "../diskimg/DiskImgDetail.h"
|
||||
#include "../reformat/Charset.h"
|
||||
|
||||
static const char kEmptyFolderMarker[] = ".$$EmptyFolder";
|
||||
|
||||
@ -748,7 +749,7 @@ CString DiskArchive::New(const WCHAR* fileName, const void* vOptions)
|
||||
* want to do it under Win2K/XP because it can be slow for larger
|
||||
* volumes.
|
||||
*/
|
||||
fileNameA = fileName;
|
||||
fileNameA = fileName; // TODO(Unicode)
|
||||
if (numBlocks > 0) {
|
||||
dierr = fDiskImg.CreateImage(fileNameA, NULL,
|
||||
DiskImg::kOuterFormatNone,
|
||||
@ -797,6 +798,7 @@ CString DiskArchive::New(const WCHAR* fileName, const void* vOptions)
|
||||
volName.MakeUpper();
|
||||
|
||||
/* format it */
|
||||
// TODO(Unicode): for HFS, we need to convert Unicode to MOR
|
||||
volNameA = volName;
|
||||
dierr = fDiskImg.FormatImage(pOptions->base.format, volNameA);
|
||||
if (dierr != kDIErrNone) {
|
||||
@ -1086,18 +1088,21 @@ int DiskArchive::LoadDiskFSContents(DiskFS* pDiskFS, const WCHAR* volName)
|
||||
*/
|
||||
pSubVol = pDiskFS->GetNextSubVolume(NULL);
|
||||
while (pSubVol != NULL) {
|
||||
CStringA subVolNameMOR;
|
||||
CString concatSubVolName;
|
||||
const char* subVolName;
|
||||
int ret;
|
||||
|
||||
subVolName = pSubVol->GetDiskFS()->GetVolumeName();
|
||||
if (subVolName == NULL)
|
||||
subVolName = "+++"; // call it *something*
|
||||
subVolNameMOR = pSubVol->GetDiskFS()->GetVolumeName();
|
||||
if (subVolNameMOR.IsEmpty()) {
|
||||
subVolNameMOR = "+++"; // call it *something*
|
||||
}
|
||||
CString subVolName(Charset::ConvertMORToUNI(subVolNameMOR));
|
||||
|
||||
if (volName[0] == '\0')
|
||||
concatSubVolName.Format(L"_%hs", subVolName);
|
||||
else
|
||||
concatSubVolName.Format(L"%ls_%hs", volName, subVolName);
|
||||
if (volName[0] == '\0') {
|
||||
concatSubVolName.Format(L"_%ls", (LPCWSTR) subVolName);
|
||||
} else {
|
||||
concatSubVolName.Format(L"%ls_%ls", volName, (LPCWSTR) subVolName);
|
||||
}
|
||||
ret = LoadDiskFSContents(pSubVol->GetDiskFS(), concatSubVolName);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
@ -139,21 +139,6 @@ void GenericEntry::SetSubVolName(const WCHAR* name)
|
||||
fSubVolName = name;
|
||||
}
|
||||
|
||||
// Simple Mac OS Roman to Unicode conversion.
|
||||
static CString ConvertMORToUNI(const CStringA& strMOR)
|
||||
{
|
||||
// We know that all MOR characters are represented in Unicode with a
|
||||
// single BMP code point, so we know that strlen(MOR) == wcslen(UNI).
|
||||
const int len = strMOR.GetLength();
|
||||
CString strUNI;
|
||||
WCHAR* uniBuf = strUNI.GetBuffer(len);
|
||||
for (int i = 0; i < len; i++) {
|
||||
uniBuf[i] = ReformatText::ConvertMacRomanToUTF16(strMOR[i]);
|
||||
}
|
||||
strUNI.ReleaseBuffer(len);
|
||||
return strUNI;
|
||||
}
|
||||
|
||||
const CString& GenericEntry::GetDisplayName(void) const
|
||||
{
|
||||
ASSERT(!fPathNameMOR.IsEmpty());
|
||||
@ -164,7 +149,7 @@ const CString& GenericEntry::GetDisplayName(void) const
|
||||
if (!fSubVolName.IsEmpty()) {
|
||||
fDisplayName = fSubVolName + (WCHAR) DiskFS::kDIFssep;
|
||||
}
|
||||
fDisplayName += ConvertMORToUNI(fPathNameMOR);
|
||||
fDisplayName += Charset::ConvertMORToUNI(fPathNameMOR);
|
||||
return fDisplayName;
|
||||
}
|
||||
|
||||
@ -1024,8 +1009,8 @@ void GenericArchive::LocalFileDetails::GenerateStoragePathName()
|
||||
// TODO(Unicode): generate MOR name from Unicode, instead of just
|
||||
// doing a generic CP-1252 conversion. We need to do this on both
|
||||
// sides though, so until we can extract MOR->Unicode we don't
|
||||
// want to add Unicode->MOR. And it all depends on NufxLib and
|
||||
// DiskImg being able to handle UTF-16 filenames.
|
||||
// want to add Unicode->MOR. For this all to work well we need NufxLib
|
||||
// and DiskImgLib to be able to handle UTF-16 filenames.
|
||||
fStoragePathNameMOR = fStrippedLocalPathName;
|
||||
}
|
||||
|
||||
|
@ -36,7 +36,7 @@ int ReformatAWGS_WP::Process(const ReformatHolder* pHolder,
|
||||
Chunk doc, header, footer;
|
||||
uint16_t val;
|
||||
|
||||
CheckGSCharConv();
|
||||
Charset::CheckGSCharConv();
|
||||
|
||||
/* must at least have the doc header and globals */
|
||||
if (srcLen < kMinExpectedLen) {
|
||||
@ -388,7 +388,7 @@ int ReformatAWGS_WP::PrintParagraph(const uint8_t* ptr, long maxLen)
|
||||
RTFTab();
|
||||
break;
|
||||
default:
|
||||
RTFPrintUTF16Char(ConvertMacRomanToUTF16(uch));
|
||||
RTFPrintUTF16Char(Charset::ConvertMacRomanToUTF16(uch));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
451
reformat/Charset.cpp
Normal file
451
reformat/Charset.cpp
Normal file
@ -0,0 +1,451 @@
|
||||
/*
|
||||
* CiderPress
|
||||
* Copyright (C) 2015 by faddenSoft. All Rights Reserved.
|
||||
* See the file LICENSE for distribution terms.
|
||||
*/
|
||||
/*
|
||||
* Reformatter base class implementation.
|
||||
*/
|
||||
#include "StdAfx.h"
|
||||
#include "Charset.h"
|
||||
|
||||
/*
|
||||
* Convert Mac OS Roman to Windows CP1252.
|
||||
*/
|
||||
const int kUnk = 0x3f; // for unmappable chars, use '?'
|
||||
|
||||
/*static*/ const uint8_t Charset::kCP1252Conv[128] = {
|
||||
0xc4, // 0x80 A + umlaut (diaeresis?)
|
||||
0xc5, // 0x81 A + overcircle
|
||||
0xc7, // 0x82 C + cedilla
|
||||
0xc9, // 0x83 E + acute
|
||||
0xd1, // 0x84 N + tilde
|
||||
0xd6, // 0x85 O + umlaut
|
||||
0xdc, // 0x86 U + umlaut
|
||||
0xe1, // 0x87 a + acute
|
||||
0xe0, // 0x88 a + grave
|
||||
0xe2, // 0x89 a + circumflex
|
||||
0xe4, // 0x8a a + umlaut
|
||||
0xe3, // 0x8b a + tilde
|
||||
0xe5, // 0x8c a + overcircle
|
||||
0xe7, // 0x8d c + cedilla
|
||||
0xe9, // 0x8e e + acute
|
||||
0xe8, // 0x8f e + grave
|
||||
0xea, // 0x90 e + circumflex
|
||||
0xeb, // 0x91 e + umlaut
|
||||
0xed, // 0x92 i + acute
|
||||
0xec, // 0x93 i + grave
|
||||
0xee, // 0x94 i + circumflex
|
||||
0xef, // 0x95 i + umlaut
|
||||
0xf1, // 0x96 n + tilde
|
||||
0xf3, // 0x97 o + acute
|
||||
0xf2, // 0x98 o + grave
|
||||
0xf4, // 0x99 o + circumflex
|
||||
0xf6, // 0x9a o + umlaut
|
||||
0xf5, // 0x9b o + tilde
|
||||
0xfa, // 0x9c u + acute
|
||||
0xf9, // 0x9d u + grave
|
||||
0xfb, // 0x9e u + circumflex
|
||||
0xfc, // 0x9f u + umlaut
|
||||
0x87, // 0xa0 double cross (dagger)
|
||||
0xb0, // 0xa1 degrees
|
||||
0xa2, // 0xa2 cents
|
||||
0xa3, // 0xa3 pounds (UK$)
|
||||
0xa7, // 0xa4 section start
|
||||
0x95, // 0xa5 small square (bullet) [using fat bullet]
|
||||
0xb6, // 0xa6 paragraph (pilcrow)
|
||||
0xdf, // 0xa7 curly B (latin small letter sharp S)
|
||||
0xae, // 0xa8 raised 'R' (registered)
|
||||
0xa9, // 0xa9 raised 'C' (copyright)
|
||||
0x99, // 0xaa raised 'TM' (trademark)
|
||||
0xb4, // 0xab acute accent
|
||||
0xa8, // 0xac umlaut (diaeresis)
|
||||
kUnk, // 0xad not-equal
|
||||
0xc6, // 0xae merged AE
|
||||
0xd8, // 0xaf O + slash (upper-case nil?)
|
||||
kUnk, // 0xb0 infinity
|
||||
0xb1, // 0xb1 +/-
|
||||
kUnk, // 0xb2 <=
|
||||
kUnk, // 0xb3 >=
|
||||
0xa5, // 0xb4 Yen (Japan$)
|
||||
0xb5, // 0xb5 mu (micro)
|
||||
kUnk, // 0xb6 delta (partial differentiation) [could use D-bar 0xd0]
|
||||
kUnk, // 0xb7 epsilon (N-ary summation) [could use C-double-bar 0x80]
|
||||
kUnk, // 0xb8 PI (N-ary product)
|
||||
kUnk, // 0xb9 pi
|
||||
kUnk, // 0xba integral
|
||||
0xaa, // 0xbb a underbar (feminine ordinal) [using raised a]
|
||||
0xba, // 0xbc o underbar (masculine ordinal) [using raised o]
|
||||
kUnk, // 0xbd omega (Ohm)
|
||||
0xe6, // 0xbe merged ae
|
||||
0xf8, // 0xbf o + slash (lower-case NULL?)
|
||||
0xbf, // 0xc0 upside-down question mark
|
||||
0xa1, // 0xc1 upside-down exclamation point
|
||||
0xac, // 0xc2 rotated L ("not" sign)
|
||||
0xb7, // 0xc3 checkmark (square root) [using small bullet]
|
||||
0x83, // 0xc4 script f
|
||||
kUnk, // 0xc5 approximately equal
|
||||
kUnk, // 0xc6 delta (triangle / increment)
|
||||
0xab, // 0xc7 much less than
|
||||
0xbb, // 0xc8 much greater than
|
||||
0x85, // 0xc9 ellipsis
|
||||
0xa0, // 0xca blank (sticky space)
|
||||
0xc0, // 0xcb A + grave
|
||||
0xc3, // 0xcc A + tilde
|
||||
0xd5, // 0xcd O + tilde
|
||||
0x8c, // 0xce merged OE
|
||||
0x9c, // 0xcf merged oe
|
||||
0x96, // 0xd0 short hyphen (en dash)
|
||||
0x97, // 0xd1 long hyphen (em dash)
|
||||
0x93, // 0xd2 smart double-quote start
|
||||
0x94, // 0xd3 smart double-quote end
|
||||
0x91, // 0xd4 smart single-quote start
|
||||
0x92, // 0xd5 smart single-quote end
|
||||
0xf7, // 0xd6 divide
|
||||
0xa4, // 0xd7 diamond (lozenge) [using spiky circle]
|
||||
0xff, // 0xd8 y + umlaut
|
||||
// [nothing below here is part of standard Windows-ASCII?]
|
||||
// remaining descriptions based on hfsutils' "charset.txt"
|
||||
kUnk, // 0xd9 Y + umlaut
|
||||
kUnk, // 0xda fraction slash
|
||||
kUnk, // 0xdb currency sign
|
||||
kUnk, // 0xdc single left-pointing angle quotation mark
|
||||
kUnk, // 0xdd single right-pointing angle quotation mark
|
||||
kUnk, // 0xde merged fi
|
||||
kUnk, // 0xdf merged FL
|
||||
kUnk, // 0xe0 double dagger
|
||||
kUnk, // 0xe1 middle dot
|
||||
kUnk, // 0xe2 single low-9 quotation mark
|
||||
kUnk, // 0xe3 double low-9 quotation mark
|
||||
kUnk, // 0xe4 per mille sign
|
||||
kUnk, // 0xe5 A + circumflex
|
||||
kUnk, // 0xe6 E + circumflex
|
||||
kUnk, // 0xe7 A + acute accent
|
||||
kUnk, // 0xe8 E + diaeresis
|
||||
kUnk, // 0xe9 E + grave accent
|
||||
kUnk, // 0xea I + acute accent
|
||||
kUnk, // 0xeb I + circumflex
|
||||
kUnk, // 0xec I + diaeresis
|
||||
kUnk, // 0xed I + grave accent
|
||||
kUnk, // 0xee O + acute accent
|
||||
kUnk, // 0xef O + circumflex
|
||||
kUnk, // 0xf0 apple logo
|
||||
kUnk, // 0xf1 O + grave accent
|
||||
kUnk, // 0xf2 U + acute accent
|
||||
kUnk, // 0xf3 U + circumflex
|
||||
kUnk, // 0xf4 U + grave accent
|
||||
kUnk, // 0xf5 i without dot
|
||||
kUnk, // 0xf6 modifier letter circumflex accent
|
||||
kUnk, // 0xf7 small tilde
|
||||
kUnk, // 0xf8 macron
|
||||
kUnk, // 0xf9 breve
|
||||
kUnk, // 0xfa dot above
|
||||
kUnk, // 0xfb ring above
|
||||
kUnk, // 0xfc cedilla
|
||||
kUnk, // 0xfd double acute accent
|
||||
kUnk, // 0xfe ogonek
|
||||
kUnk, // 0xff caron
|
||||
};
|
||||
|
||||
/*
|
||||
* Convert Mac OS Roman to Unicode. Mapping comes from:
|
||||
*
|
||||
* http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMAN.TXT
|
||||
*
|
||||
* We use the "Control Pictures" block for the control characters
|
||||
* (0x00-0x1f, 0x7f).
|
||||
*/
|
||||
/*static*/ const uint16_t Charset::kUTF16Conv[256] = {
|
||||
/*0x00*/ 0x2400, // [control] NULL
|
||||
/*0x01*/ 0x2401, // [control] START OF HEADING
|
||||
/*0x02*/ 0x2402, // [control] START OF TEXT
|
||||
/*0x03*/ 0x2403, // [control] END OF TEXT
|
||||
/*0x04*/ 0x2404, // [control] END OF TRANSMISSION
|
||||
/*0x05*/ 0x2405, // [control] ENQUIRY
|
||||
/*0x06*/ 0x2406, // [control] ACKNOWLEDGE
|
||||
/*0x07*/ 0x2407, // [control] BELL
|
||||
/*0x08*/ 0x2408, // [control] BACKSPACE
|
||||
/*0x09*/ 0x2409, // [control] HORIZONTAL TABULATION
|
||||
/*0x0a*/ 0x240a, // [control] LINE FEED
|
||||
/*0x0b*/ 0x240b, // [control] VERTICAL TABULATION
|
||||
/*0x0c*/ 0x240c, // [control] FORM FEED
|
||||
/*0x0d*/ 0x240d, // [control] CARRIAGE RETURN
|
||||
/*0x0e*/ 0x240e, // [control] SHIFT OUT
|
||||
/*0x0f*/ 0x240f, // [control] SHIFT IN
|
||||
/*0x10*/ 0x2410, // [control] DATA LINK ESCAPE
|
||||
/*0x11*/ 0x2411, // [control] DEVICE CONTROL ONE
|
||||
/*0x12*/ 0x2412, // [control] DEVICE CONTROL TWO
|
||||
/*0x13*/ 0x2413, // [control] DEVICE CONTROL THREE
|
||||
/*0x14*/ 0x2414, // [control] DEVICE CONTROL FOUR
|
||||
/*0x15*/ 0x2415, // [control] NEGATIVE ACKNOWLEDGE
|
||||
/*0x16*/ 0x2416, // [control] SYNCHRONOUS IDLE
|
||||
/*0x17*/ 0x2417, // [control] END OF TRANSMISSION BLOCK
|
||||
/*0x18*/ 0x2418, // [control] CANCEL
|
||||
/*0x19*/ 0x2419, // [control] END OF MEDIUM
|
||||
/*0x1a*/ 0x241a, // [control] SUBSTITUTE
|
||||
/*0x1b*/ 0x241b, // [control] ESCAPE
|
||||
/*0x1c*/ 0x241c, // [control] FILE SEPARATOR
|
||||
/*0x1d*/ 0x241d, // [control] GROUP SEPARATOR
|
||||
/*0x1e*/ 0x241e, // [control] RECORD SEPARATOR
|
||||
/*0x1f*/ 0x241f, // [control] UNIT SEPARATOR
|
||||
/*0x20*/ 0x0020, // SPACE
|
||||
/*0x21*/ 0x0021, // EXCLAMATION MARK
|
||||
/*0x22*/ 0x0022, // QUOTATION MARK
|
||||
/*0x23*/ 0x0023, // NUMBER SIGN
|
||||
/*0x24*/ 0x0024, // DOLLAR SIGN
|
||||
/*0x25*/ 0x0025, // PERCENT SIGN
|
||||
/*0x26*/ 0x0026, // AMPERSAND
|
||||
/*0x27*/ 0x0027, // APOSTROPHE
|
||||
/*0x28*/ 0x0028, // LEFT PARENTHESIS
|
||||
/*0x29*/ 0x0029, // RIGHT PARENTHESIS
|
||||
/*0x2A*/ 0x002A, // ASTERISK
|
||||
/*0x2B*/ 0x002B, // PLUS SIGN
|
||||
/*0x2C*/ 0x002C, // COMMA
|
||||
/*0x2D*/ 0x002D, // HYPHEN-MINUS
|
||||
/*0x2E*/ 0x002E, // FULL STOP
|
||||
/*0x2F*/ 0x002F, // SOLIDUS
|
||||
/*0x30*/ 0x0030, // DIGIT ZERO
|
||||
/*0x31*/ 0x0031, // DIGIT ONE
|
||||
/*0x32*/ 0x0032, // DIGIT TWO
|
||||
/*0x33*/ 0x0033, // DIGIT THREE
|
||||
/*0x34*/ 0x0034, // DIGIT FOUR
|
||||
/*0x35*/ 0x0035, // DIGIT FIVE
|
||||
/*0x36*/ 0x0036, // DIGIT SIX
|
||||
/*0x37*/ 0x0037, // DIGIT SEVEN
|
||||
/*0x38*/ 0x0038, // DIGIT EIGHT
|
||||
/*0x39*/ 0x0039, // DIGIT NINE
|
||||
/*0x3A*/ 0x003A, // COLON
|
||||
/*0x3B*/ 0x003B, // SEMICOLON
|
||||
/*0x3C*/ 0x003C, // LESS-THAN SIGN
|
||||
/*0x3D*/ 0x003D, // EQUALS SIGN
|
||||
/*0x3E*/ 0x003E, // GREATER-THAN SIGN
|
||||
/*0x3F*/ 0x003F, // QUESTION MARK
|
||||
/*0x40*/ 0x0040, // COMMERCIAL AT
|
||||
/*0x41*/ 0x0041, // LATIN CAPITAL LETTER A
|
||||
/*0x42*/ 0x0042, // LATIN CAPITAL LETTER B
|
||||
/*0x43*/ 0x0043, // LATIN CAPITAL LETTER C
|
||||
/*0x44*/ 0x0044, // LATIN CAPITAL LETTER D
|
||||
/*0x45*/ 0x0045, // LATIN CAPITAL LETTER E
|
||||
/*0x46*/ 0x0046, // LATIN CAPITAL LETTER F
|
||||
/*0x47*/ 0x0047, // LATIN CAPITAL LETTER G
|
||||
/*0x48*/ 0x0048, // LATIN CAPITAL LETTER H
|
||||
/*0x49*/ 0x0049, // LATIN CAPITAL LETTER I
|
||||
/*0x4A*/ 0x004A, // LATIN CAPITAL LETTER J
|
||||
/*0x4B*/ 0x004B, // LATIN CAPITAL LETTER K
|
||||
/*0x4C*/ 0x004C, // LATIN CAPITAL LETTER L
|
||||
/*0x4D*/ 0x004D, // LATIN CAPITAL LETTER M
|
||||
/*0x4E*/ 0x004E, // LATIN CAPITAL LETTER N
|
||||
/*0x4F*/ 0x004F, // LATIN CAPITAL LETTER O
|
||||
/*0x50*/ 0x0050, // LATIN CAPITAL LETTER P
|
||||
/*0x51*/ 0x0051, // LATIN CAPITAL LETTER Q
|
||||
/*0x52*/ 0x0052, // LATIN CAPITAL LETTER R
|
||||
/*0x53*/ 0x0053, // LATIN CAPITAL LETTER S
|
||||
/*0x54*/ 0x0054, // LATIN CAPITAL LETTER T
|
||||
/*0x55*/ 0x0055, // LATIN CAPITAL LETTER U
|
||||
/*0x56*/ 0x0056, // LATIN CAPITAL LETTER V
|
||||
/*0x57*/ 0x0057, // LATIN CAPITAL LETTER W
|
||||
/*0x58*/ 0x0058, // LATIN CAPITAL LETTER X
|
||||
/*0x59*/ 0x0059, // LATIN CAPITAL LETTER Y
|
||||
/*0x5A*/ 0x005A, // LATIN CAPITAL LETTER Z
|
||||
/*0x5B*/ 0x005B, // LEFT SQUARE BRACKET
|
||||
/*0x5C*/ 0x005C, // REVERSE SOLIDUS
|
||||
/*0x5D*/ 0x005D, // RIGHT SQUARE BRACKET
|
||||
/*0x5E*/ 0x005E, // CIRCUMFLEX ACCENT
|
||||
/*0x5F*/ 0x005F, // LOW LINE
|
||||
/*0x60*/ 0x0060, // GRAVE ACCENT
|
||||
/*0x61*/ 0x0061, // LATIN SMALL LETTER A
|
||||
/*0x62*/ 0x0062, // LATIN SMALL LETTER B
|
||||
/*0x63*/ 0x0063, // LATIN SMALL LETTER C
|
||||
/*0x64*/ 0x0064, // LATIN SMALL LETTER D
|
||||
/*0x65*/ 0x0065, // LATIN SMALL LETTER E
|
||||
/*0x66*/ 0x0066, // LATIN SMALL LETTER F
|
||||
/*0x67*/ 0x0067, // LATIN SMALL LETTER G
|
||||
/*0x68*/ 0x0068, // LATIN SMALL LETTER H
|
||||
/*0x69*/ 0x0069, // LATIN SMALL LETTER I
|
||||
/*0x6A*/ 0x006A, // LATIN SMALL LETTER J
|
||||
/*0x6B*/ 0x006B, // LATIN SMALL LETTER K
|
||||
/*0x6C*/ 0x006C, // LATIN SMALL LETTER L
|
||||
/*0x6D*/ 0x006D, // LATIN SMALL LETTER M
|
||||
/*0x6E*/ 0x006E, // LATIN SMALL LETTER N
|
||||
/*0x6F*/ 0x006F, // LATIN SMALL LETTER O
|
||||
/*0x70*/ 0x0070, // LATIN SMALL LETTER P
|
||||
/*0x71*/ 0x0071, // LATIN SMALL LETTER Q
|
||||
/*0x72*/ 0x0072, // LATIN SMALL LETTER R
|
||||
/*0x73*/ 0x0073, // LATIN SMALL LETTER S
|
||||
/*0x74*/ 0x0074, // LATIN SMALL LETTER T
|
||||
/*0x75*/ 0x0075, // LATIN SMALL LETTER U
|
||||
/*0x76*/ 0x0076, // LATIN SMALL LETTER V
|
||||
/*0x77*/ 0x0077, // LATIN SMALL LETTER W
|
||||
/*0x78*/ 0x0078, // LATIN SMALL LETTER X
|
||||
/*0x79*/ 0x0079, // LATIN SMALL LETTER Y
|
||||
/*0x7A*/ 0x007A, // LATIN SMALL LETTER Z
|
||||
/*0x7B*/ 0x007B, // LEFT CURLY BRACKET
|
||||
/*0x7C*/ 0x007C, // VERTICAL LINE
|
||||
/*0x7D*/ 0x007D, // RIGHT CURLY BRACKET
|
||||
/*0x7E*/ 0x007E, // TILDE
|
||||
/*0x7f*/ 0x2421, // [control] DELETE
|
||||
/*0x80*/ 0x00C4, // LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
/*0x81*/ 0x00C5, // LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
/*0x82*/ 0x00C7, // LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
/*0x83*/ 0x00C9, // LATIN CAPITAL LETTER E WITH ACUTE
|
||||
/*0x84*/ 0x00D1, // LATIN CAPITAL LETTER N WITH TILDE
|
||||
/*0x85*/ 0x00D6, // LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
/*0x86*/ 0x00DC, // LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
/*0x87*/ 0x00E1, // LATIN SMALL LETTER A WITH ACUTE
|
||||
/*0x88*/ 0x00E0, // LATIN SMALL LETTER A WITH GRAVE
|
||||
/*0x89*/ 0x00E2, // LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
/*0x8A*/ 0x00E4, // LATIN SMALL LETTER A WITH DIAERESIS
|
||||
/*0x8B*/ 0x00E3, // LATIN SMALL LETTER A WITH TILDE
|
||||
/*0x8C*/ 0x00E5, // LATIN SMALL LETTER A WITH RING ABOVE
|
||||
/*0x8D*/ 0x00E7, // LATIN SMALL LETTER C WITH CEDILLA
|
||||
/*0x8E*/ 0x00E9, // LATIN SMALL LETTER E WITH ACUTE
|
||||
/*0x8F*/ 0x00E8, // LATIN SMALL LETTER E WITH GRAVE
|
||||
/*0x90*/ 0x00EA, // LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
/*0x91*/ 0x00EB, // LATIN SMALL LETTER E WITH DIAERESIS
|
||||
/*0x92*/ 0x00ED, // LATIN SMALL LETTER I WITH ACUTE
|
||||
/*0x93*/ 0x00EC, // LATIN SMALL LETTER I WITH GRAVE
|
||||
/*0x94*/ 0x00EE, // LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
/*0x95*/ 0x00EF, // LATIN SMALL LETTER I WITH DIAERESIS
|
||||
/*0x96*/ 0x00F1, // LATIN SMALL LETTER N WITH TILDE
|
||||
/*0x97*/ 0x00F3, // LATIN SMALL LETTER O WITH ACUTE
|
||||
/*0x98*/ 0x00F2, // LATIN SMALL LETTER O WITH GRAVE
|
||||
/*0x99*/ 0x00F4, // LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
/*0x9A*/ 0x00F6, // LATIN SMALL LETTER O WITH DIAERESIS
|
||||
/*0x9B*/ 0x00F5, // LATIN SMALL LETTER O WITH TILDE
|
||||
/*0x9C*/ 0x00FA, // LATIN SMALL LETTER U WITH ACUTE
|
||||
/*0x9D*/ 0x00F9, // LATIN SMALL LETTER U WITH GRAVE
|
||||
/*0x9E*/ 0x00FB, // LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
/*0x9F*/ 0x00FC, // LATIN SMALL LETTER U WITH DIAERESIS
|
||||
/*0xA0*/ 0x2020, // DAGGER
|
||||
/*0xA1*/ 0x00B0, // DEGREE SIGN
|
||||
/*0xA2*/ 0x00A2, // CENT SIGN
|
||||
/*0xA3*/ 0x00A3, // POUND SIGN
|
||||
/*0xA4*/ 0x00A7, // SECTION SIGN
|
||||
/*0xA5*/ 0x2022, // BULLET
|
||||
/*0xA6*/ 0x00B6, // PILCROW SIGN
|
||||
/*0xA7*/ 0x00DF, // LATIN SMALL LETTER SHARP S
|
||||
/*0xA8*/ 0x00AE, // REGISTERED SIGN
|
||||
/*0xA9*/ 0x00A9, // COPYRIGHT SIGN
|
||||
/*0xAA*/ 0x2122, // TRADE MARK SIGN
|
||||
/*0xAB*/ 0x00B4, // ACUTE ACCENT
|
||||
/*0xAC*/ 0x00A8, // DIAERESIS
|
||||
/*0xAD*/ 0x2260, // NOT EQUAL TO
|
||||
/*0xAE*/ 0x00C6, // LATIN CAPITAL LETTER AE
|
||||
/*0xAF*/ 0x00D8, // LATIN CAPITAL LETTER O WITH STROKE
|
||||
/*0xB0*/ 0x221E, // INFINITY
|
||||
/*0xB1*/ 0x00B1, // PLUS-MINUS SIGN
|
||||
/*0xB2*/ 0x2264, // LESS-THAN OR EQUAL TO
|
||||
/*0xB3*/ 0x2265, // GREATER-THAN OR EQUAL TO
|
||||
/*0xB4*/ 0x00A5, // YEN SIGN
|
||||
/*0xB5*/ 0x00B5, // MICRO SIGN
|
||||
/*0xB6*/ 0x2202, // PARTIAL DIFFERENTIAL
|
||||
/*0xB7*/ 0x2211, // N-ARY SUMMATION
|
||||
/*0xB8*/ 0x220F, // N-ARY PRODUCT
|
||||
/*0xB9*/ 0x03C0, // GREEK SMALL LETTER PI
|
||||
/*0xBA*/ 0x222B, // INTEGRAL
|
||||
/*0xBB*/ 0x00AA, // FEMININE ORDINAL INDICATOR
|
||||
/*0xBC*/ 0x00BA, // MASCULINE ORDINAL INDICATOR
|
||||
/*0xBD*/ 0x03A9, // GREEK CAPITAL LETTER OMEGA
|
||||
/*0xBE*/ 0x00E6, // LATIN SMALL LETTER AE
|
||||
/*0xBF*/ 0x00F8, // LATIN SMALL LETTER O WITH STROKE
|
||||
/*0xC0*/ 0x00BF, // INVERTED QUESTION MARK
|
||||
/*0xC1*/ 0x00A1, // INVERTED EXCLAMATION MARK
|
||||
/*0xC2*/ 0x00AC, // NOT SIGN
|
||||
/*0xC3*/ 0x221A, // SQUARE ROOT
|
||||
/*0xC4*/ 0x0192, // LATIN SMALL LETTER F WITH HOOK
|
||||
/*0xC5*/ 0x2248, // ALMOST EQUAL TO
|
||||
/*0xC6*/ 0x2206, // INCREMENT
|
||||
/*0xC7*/ 0x00AB, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
/*0xC8*/ 0x00BB, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
/*0xC9*/ 0x2026, // HORIZONTAL ELLIPSIS
|
||||
/*0xCA*/ 0x00A0, // NO-BREAK SPACE
|
||||
/*0xCB*/ 0x00C0, // LATIN CAPITAL LETTER A WITH GRAVE
|
||||
/*0xCC*/ 0x00C3, // LATIN CAPITAL LETTER A WITH TILDE
|
||||
/*0xCD*/ 0x00D5, // LATIN CAPITAL LETTER O WITH TILDE
|
||||
/*0xCE*/ 0x0152, // LATIN CAPITAL LIGATURE OE
|
||||
/*0xCF*/ 0x0153, // LATIN SMALL LIGATURE OE
|
||||
/*0xD0*/ 0x2013, // EN DASH
|
||||
/*0xD1*/ 0x2014, // EM DASH
|
||||
/*0xD2*/ 0x201C, // LEFT DOUBLE QUOTATION MARK
|
||||
/*0xD3*/ 0x201D, // RIGHT DOUBLE QUOTATION MARK
|
||||
/*0xD4*/ 0x2018, // LEFT SINGLE QUOTATION MARK
|
||||
/*0xD5*/ 0x2019, // RIGHT SINGLE QUOTATION MARK
|
||||
/*0xD6*/ 0x00F7, // DIVISION SIGN
|
||||
/*0xD7*/ 0x25CA, // LOZENGE
|
||||
/*0xD8*/ 0x00FF, // LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
/*0xD9*/ 0x0178, // LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
/*0xDA*/ 0x2044, // FRACTION SLASH
|
||||
/*0xDB*/ 0x00A4, // CURRENCY SIGN (was EURO SIGN)
|
||||
/*0xDC*/ 0x2039, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
/*0xDD*/ 0x203A, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
/*0xDE*/ 0xFB01, // LATIN SMALL LIGATURE FI
|
||||
/*0xDF*/ 0xFB02, // LATIN SMALL LIGATURE FL
|
||||
/*0xE0*/ 0x2021, // DOUBLE DAGGER
|
||||
/*0xE1*/ 0x00B7, // MIDDLE DOT
|
||||
/*0xE2*/ 0x201A, // SINGLE LOW-9 QUOTATION MARK
|
||||
/*0xE3*/ 0x201E, // DOUBLE LOW-9 QUOTATION MARK
|
||||
/*0xE4*/ 0x2030, // PER MILLE SIGN
|
||||
/*0xE5*/ 0x00C2, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
/*0xE6*/ 0x00CA, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
/*0xE7*/ 0x00C1, // LATIN CAPITAL LETTER A WITH ACUTE
|
||||
/*0xE8*/ 0x00CB, // LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
/*0xE9*/ 0x00C8, // LATIN CAPITAL LETTER E WITH GRAVE
|
||||
/*0xEA*/ 0x00CD, // LATIN CAPITAL LETTER I WITH ACUTE
|
||||
/*0xEB*/ 0x00CE, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
/*0xEC*/ 0x00CF, // LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
/*0xED*/ 0x00CC, // LATIN CAPITAL LETTER I WITH GRAVE
|
||||
/*0xEE*/ 0x00D3, // LATIN CAPITAL LETTER O WITH ACUTE
|
||||
/*0xEF*/ 0x00D4, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
/*0xF0*/ 0xF8FF, // Apple logo
|
||||
/*0xF1*/ 0x00D2, // LATIN CAPITAL LETTER O WITH GRAVE
|
||||
/*0xF2*/ 0x00DA, // LATIN CAPITAL LETTER U WITH ACUTE
|
||||
/*0xF3*/ 0x00DB, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
/*0xF4*/ 0x00D9, // LATIN CAPITAL LETTER U WITH GRAVE
|
||||
/*0xF5*/ 0x0131, // LATIN SMALL LETTER DOTLESS I
|
||||
/*0xF6*/ 0x02C6, // MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
/*0xF7*/ 0x02DC, // SMALL TILDE
|
||||
/*0xF8*/ 0x00AF, // MACRON
|
||||
/*0xF9*/ 0x02D8, // BREVE
|
||||
/*0xFA*/ 0x02D9, // DOT ABOVE
|
||||
/*0xFB*/ 0x02DA, // RING ABOVE
|
||||
/*0xFC*/ 0x00B8, // CEDILLA
|
||||
/*0xFD*/ 0x02DD, // DOUBLE ACUTE ACCENT
|
||||
/*0xFE*/ 0x02DB, // OGONEK
|
||||
/*0xFF*/ 0x02C7, // CARON
|
||||
};
|
||||
|
||||
/*
|
||||
* Quick sanity check on contents of array.
|
||||
*
|
||||
* No two characters should map to the same thing. This isn't vital, but
|
||||
* if we want to have a reversible transformation someday, it'll make our
|
||||
* lives easier then.
|
||||
*/
|
||||
/*static*/ void Charset::CheckGSCharConv(void)
|
||||
{
|
||||
#ifdef _DEBUG
|
||||
bool* test = (bool*) malloc(65536 * sizeof(bool));
|
||||
|
||||
memset(test, 0, 65536 * sizeof(bool));
|
||||
for (int i = 0; i < NELEM(kCP1252Conv); i++) {
|
||||
if (test[kCP1252Conv[i]] && kCP1252Conv[i] != kUnk) {
|
||||
LOGW("Character used twice: 0x%02x at %d (0x%02x)",
|
||||
kCP1252Conv[i], i, i+128);
|
||||
assert(false);
|
||||
}
|
||||
test[kCP1252Conv[i]] = true;
|
||||
}
|
||||
|
||||
memset(test, 0, 65536 * sizeof(bool));
|
||||
for (int i = 0; i < NELEM(kUTF16Conv); i++) {
|
||||
if (test[kUTF16Conv[i]]) {
|
||||
LOGW("Character used twice: 0x%02x at %d (0x%02x)",
|
||||
kUTF16Conv[i], i, i+128);
|
||||
assert(false);
|
||||
}
|
||||
test[kUTF16Conv[i]] = true;
|
||||
}
|
||||
|
||||
free(test);
|
||||
#endif
|
||||
}
|
52
reformat/Charset.h
Normal file
52
reformat/Charset.h
Normal file
@ -0,0 +1,52 @@
|
||||
/*
|
||||
* CiderPress
|
||||
* Copyright (C) 2015 by faddenSoft. All Rights Reserved.
|
||||
* See the file LICENSE for distribution terms.
|
||||
*/
|
||||
#ifndef REFORMAT_CHARSET_H
|
||||
#define REFORMAT_CHARSET_H
|
||||
|
||||
/*
|
||||
* Character set conversions.
|
||||
*/
|
||||
class Charset {
|
||||
public:
|
||||
// Convert a Mac OS Roman character value (from a IIgs document) to
|
||||
// its UTF-16 Unicode equivalent. This also includes a conversion
|
||||
// for the control characters. The transformation is reversible.
|
||||
static uint16_t ConvertMacRomanToUTF16(uint8_t ch) {
|
||||
return kUTF16Conv[ch];
|
||||
}
|
||||
|
||||
// Convert a Mac OS Roman character value an 8-bit Windows CP1252
|
||||
// equivalent. The transformation is NOT reversible.
|
||||
static uint8_t ConvertMacRomanTo1252(uint8_t ch) {
|
||||
if (ch < 128)
|
||||
return ch;
|
||||
else
|
||||
return kCP1252Conv[ch-128];
|
||||
}
|
||||
|
||||
// Simple Mac OS Roman to Unicode string conversion.
|
||||
static CString ConvertMORToUNI(const CStringA& strMOR)
|
||||
{
|
||||
// We know that all MOR characters are represented in Unicode with a
|
||||
// single BMP code point, so we know that strlen(MOR) == wcslen(UNI).
|
||||
const int len = strMOR.GetLength();
|
||||
CString strUNI;
|
||||
WCHAR* uniBuf = strUNI.GetBuffer(len);
|
||||
for (int i = 0; i < len; i++) {
|
||||
uniBuf[i] = Charset::ConvertMacRomanToUTF16(strMOR[i]);
|
||||
}
|
||||
strUNI.ReleaseBuffer(len);
|
||||
return strUNI;
|
||||
}
|
||||
|
||||
static void CheckGSCharConv(void);
|
||||
|
||||
private:
|
||||
static const uint8_t kCP1252Conv[];
|
||||
static const uint16_t kUTF16Conv[];
|
||||
};
|
||||
|
||||
#endif /*REFORMAT_CHARSET_H*/
|
@ -17,447 +17,6 @@
|
||||
* ==========================================================================
|
||||
*/
|
||||
|
||||
/*
|
||||
* Convert Mac OS Roman to Windows CP1252.
|
||||
*/
|
||||
const int kUnk = 0x3f; // for unmappable chars, use '?'
|
||||
|
||||
/*static*/ const uint8_t ReformatText::kCP1252Conv[128] = {
|
||||
0xc4, // 0x80 A + umlaut (diaeresis?)
|
||||
0xc5, // 0x81 A + overcircle
|
||||
0xc7, // 0x82 C + cedilla
|
||||
0xc9, // 0x83 E + acute
|
||||
0xd1, // 0x84 N + tilde
|
||||
0xd6, // 0x85 O + umlaut
|
||||
0xdc, // 0x86 U + umlaut
|
||||
0xe1, // 0x87 a + acute
|
||||
0xe0, // 0x88 a + grave
|
||||
0xe2, // 0x89 a + circumflex
|
||||
0xe4, // 0x8a a + umlaut
|
||||
0xe3, // 0x8b a + tilde
|
||||
0xe5, // 0x8c a + overcircle
|
||||
0xe7, // 0x8d c + cedilla
|
||||
0xe9, // 0x8e e + acute
|
||||
0xe8, // 0x8f e + grave
|
||||
0xea, // 0x90 e + circumflex
|
||||
0xeb, // 0x91 e + umlaut
|
||||
0xed, // 0x92 i + acute
|
||||
0xec, // 0x93 i + grave
|
||||
0xee, // 0x94 i + circumflex
|
||||
0xef, // 0x95 i + umlaut
|
||||
0xf1, // 0x96 n + tilde
|
||||
0xf3, // 0x97 o + acute
|
||||
0xf2, // 0x98 o + grave
|
||||
0xf4, // 0x99 o + circumflex
|
||||
0xf6, // 0x9a o + umlaut
|
||||
0xf5, // 0x9b o + tilde
|
||||
0xfa, // 0x9c u + acute
|
||||
0xf9, // 0x9d u + grave
|
||||
0xfb, // 0x9e u + circumflex
|
||||
0xfc, // 0x9f u + umlaut
|
||||
0x87, // 0xa0 double cross (dagger)
|
||||
0xb0, // 0xa1 degrees
|
||||
0xa2, // 0xa2 cents
|
||||
0xa3, // 0xa3 pounds (UK$)
|
||||
0xa7, // 0xa4 section start
|
||||
0x95, // 0xa5 small square (bullet) [using fat bullet]
|
||||
0xb6, // 0xa6 paragraph (pilcrow)
|
||||
0xdf, // 0xa7 curly B (latin small letter sharp S)
|
||||
0xae, // 0xa8 raised 'R' (registered)
|
||||
0xa9, // 0xa9 raised 'C' (copyright)
|
||||
0x99, // 0xaa raised 'TM' (trademark)
|
||||
0xb4, // 0xab acute accent
|
||||
0xa8, // 0xac umlaut (diaeresis)
|
||||
kUnk, // 0xad not-equal
|
||||
0xc6, // 0xae merged AE
|
||||
0xd8, // 0xaf O + slash (upper-case nil?)
|
||||
kUnk, // 0xb0 infinity
|
||||
0xb1, // 0xb1 +/-
|
||||
kUnk, // 0xb2 <=
|
||||
kUnk, // 0xb3 >=
|
||||
0xa5, // 0xb4 Yen (Japan$)
|
||||
0xb5, // 0xb5 mu (micro)
|
||||
kUnk, // 0xb6 delta (partial differentiation) [could use D-bar 0xd0]
|
||||
kUnk, // 0xb7 epsilon (N-ary summation) [could use C-double-bar 0x80]
|
||||
kUnk, // 0xb8 PI (N-ary product)
|
||||
kUnk, // 0xb9 pi
|
||||
kUnk, // 0xba integral
|
||||
0xaa, // 0xbb a underbar (feminine ordinal) [using raised a]
|
||||
0xba, // 0xbc o underbar (masculine ordinal) [using raised o]
|
||||
kUnk, // 0xbd omega (Ohm)
|
||||
0xe6, // 0xbe merged ae
|
||||
0xf8, // 0xbf o + slash (lower-case NULL?)
|
||||
0xbf, // 0xc0 upside-down question mark
|
||||
0xa1, // 0xc1 upside-down exclamation point
|
||||
0xac, // 0xc2 rotated L ("not" sign)
|
||||
0xb7, // 0xc3 checkmark (square root) [using small bullet]
|
||||
0x83, // 0xc4 script f
|
||||
kUnk, // 0xc5 approximately equal
|
||||
kUnk, // 0xc6 delta (triangle / increment)
|
||||
0xab, // 0xc7 much less than
|
||||
0xbb, // 0xc8 much greater than
|
||||
0x85, // 0xc9 ellipsis
|
||||
0xa0, // 0xca blank (sticky space)
|
||||
0xc0, // 0xcb A + grave
|
||||
0xc3, // 0xcc A + tilde
|
||||
0xd5, // 0xcd O + tilde
|
||||
0x8c, // 0xce merged OE
|
||||
0x9c, // 0xcf merged oe
|
||||
0x96, // 0xd0 short hyphen (en dash)
|
||||
0x97, // 0xd1 long hyphen (em dash)
|
||||
0x93, // 0xd2 smart double-quote start
|
||||
0x94, // 0xd3 smart double-quote end
|
||||
0x91, // 0xd4 smart single-quote start
|
||||
0x92, // 0xd5 smart single-quote end
|
||||
0xf7, // 0xd6 divide
|
||||
0xa4, // 0xd7 diamond (lozenge) [using spiky circle]
|
||||
0xff, // 0xd8 y + umlaut
|
||||
// [nothing below here is part of standard Windows-ASCII?]
|
||||
// remaining descriptions based on hfsutils' "charset.txt"
|
||||
kUnk, // 0xd9 Y + umlaut
|
||||
kUnk, // 0xda fraction slash
|
||||
kUnk, // 0xdb currency sign
|
||||
kUnk, // 0xdc single left-pointing angle quotation mark
|
||||
kUnk, // 0xdd single right-pointing angle quotation mark
|
||||
kUnk, // 0xde merged fi
|
||||
kUnk, // 0xdf merged FL
|
||||
kUnk, // 0xe0 double dagger
|
||||
kUnk, // 0xe1 middle dot
|
||||
kUnk, // 0xe2 single low-9 quotation mark
|
||||
kUnk, // 0xe3 double low-9 quotation mark
|
||||
kUnk, // 0xe4 per mille sign
|
||||
kUnk, // 0xe5 A + circumflex
|
||||
kUnk, // 0xe6 E + circumflex
|
||||
kUnk, // 0xe7 A + acute accent
|
||||
kUnk, // 0xe8 E + diaeresis
|
||||
kUnk, // 0xe9 E + grave accent
|
||||
kUnk, // 0xea I + acute accent
|
||||
kUnk, // 0xeb I + circumflex
|
||||
kUnk, // 0xec I + diaeresis
|
||||
kUnk, // 0xed I + grave accent
|
||||
kUnk, // 0xee O + acute accent
|
||||
kUnk, // 0xef O + circumflex
|
||||
kUnk, // 0xf0 apple logo
|
||||
kUnk, // 0xf1 O + grave accent
|
||||
kUnk, // 0xf2 U + acute accent
|
||||
kUnk, // 0xf3 U + circumflex
|
||||
kUnk, // 0xf4 U + grave accent
|
||||
kUnk, // 0xf5 i without dot
|
||||
kUnk, // 0xf6 modifier letter circumflex accent
|
||||
kUnk, // 0xf7 small tilde
|
||||
kUnk, // 0xf8 macron
|
||||
kUnk, // 0xf9 breve
|
||||
kUnk, // 0xfa dot above
|
||||
kUnk, // 0xfb ring above
|
||||
kUnk, // 0xfc cedilla
|
||||
kUnk, // 0xfd double acute accent
|
||||
kUnk, // 0xfe ogonek
|
||||
kUnk, // 0xff caron
|
||||
};
|
||||
|
||||
/*
|
||||
* Convert Mac OS Roman to Unicode. Mapping comes from:
|
||||
*
|
||||
* http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMAN.TXT
|
||||
*
|
||||
* We use the "Control Pictures" block for the control characters
|
||||
* (0x00-0x1f, 0x7f).
|
||||
*/
|
||||
/*static*/ const uint16_t ReformatText::kUTF16Conv[256] = {
|
||||
/*0x00*/ 0x2400, // [control] NULL
|
||||
/*0x01*/ 0x2401, // [control] START OF HEADING
|
||||
/*0x02*/ 0x2402, // [control] START OF TEXT
|
||||
/*0x03*/ 0x2403, // [control] END OF TEXT
|
||||
/*0x04*/ 0x2404, // [control] END OF TRANSMISSION
|
||||
/*0x05*/ 0x2405, // [control] ENQUIRY
|
||||
/*0x06*/ 0x2406, // [control] ACKNOWLEDGE
|
||||
/*0x07*/ 0x2407, // [control] BELL
|
||||
/*0x08*/ 0x2408, // [control] BACKSPACE
|
||||
/*0x09*/ 0x2409, // [control] HORIZONTAL TABULATION
|
||||
/*0x0a*/ 0x240a, // [control] LINE FEED
|
||||
/*0x0b*/ 0x240b, // [control] VERTICAL TABULATION
|
||||
/*0x0c*/ 0x240c, // [control] FORM FEED
|
||||
/*0x0d*/ 0x240d, // [control] CARRIAGE RETURN
|
||||
/*0x0e*/ 0x240e, // [control] SHIFT OUT
|
||||
/*0x0f*/ 0x240f, // [control] SHIFT IN
|
||||
/*0x10*/ 0x2410, // [control] DATA LINK ESCAPE
|
||||
/*0x11*/ 0x2411, // [control] DEVICE CONTROL ONE
|
||||
/*0x12*/ 0x2412, // [control] DEVICE CONTROL TWO
|
||||
/*0x13*/ 0x2413, // [control] DEVICE CONTROL THREE
|
||||
/*0x14*/ 0x2414, // [control] DEVICE CONTROL FOUR
|
||||
/*0x15*/ 0x2415, // [control] NEGATIVE ACKNOWLEDGE
|
||||
/*0x16*/ 0x2416, // [control] SYNCHRONOUS IDLE
|
||||
/*0x17*/ 0x2417, // [control] END OF TRANSMISSION BLOCK
|
||||
/*0x18*/ 0x2418, // [control] CANCEL
|
||||
/*0x19*/ 0x2419, // [control] END OF MEDIUM
|
||||
/*0x1a*/ 0x241a, // [control] SUBSTITUTE
|
||||
/*0x1b*/ 0x241b, // [control] ESCAPE
|
||||
/*0x1c*/ 0x241c, // [control] FILE SEPARATOR
|
||||
/*0x1d*/ 0x241d, // [control] GROUP SEPARATOR
|
||||
/*0x1e*/ 0x241e, // [control] RECORD SEPARATOR
|
||||
/*0x1f*/ 0x241f, // [control] UNIT SEPARATOR
|
||||
/*0x20*/ 0x0020, // SPACE
|
||||
/*0x21*/ 0x0021, // EXCLAMATION MARK
|
||||
/*0x22*/ 0x0022, // QUOTATION MARK
|
||||
/*0x23*/ 0x0023, // NUMBER SIGN
|
||||
/*0x24*/ 0x0024, // DOLLAR SIGN
|
||||
/*0x25*/ 0x0025, // PERCENT SIGN
|
||||
/*0x26*/ 0x0026, // AMPERSAND
|
||||
/*0x27*/ 0x0027, // APOSTROPHE
|
||||
/*0x28*/ 0x0028, // LEFT PARENTHESIS
|
||||
/*0x29*/ 0x0029, // RIGHT PARENTHESIS
|
||||
/*0x2A*/ 0x002A, // ASTERISK
|
||||
/*0x2B*/ 0x002B, // PLUS SIGN
|
||||
/*0x2C*/ 0x002C, // COMMA
|
||||
/*0x2D*/ 0x002D, // HYPHEN-MINUS
|
||||
/*0x2E*/ 0x002E, // FULL STOP
|
||||
/*0x2F*/ 0x002F, // SOLIDUS
|
||||
/*0x30*/ 0x0030, // DIGIT ZERO
|
||||
/*0x31*/ 0x0031, // DIGIT ONE
|
||||
/*0x32*/ 0x0032, // DIGIT TWO
|
||||
/*0x33*/ 0x0033, // DIGIT THREE
|
||||
/*0x34*/ 0x0034, // DIGIT FOUR
|
||||
/*0x35*/ 0x0035, // DIGIT FIVE
|
||||
/*0x36*/ 0x0036, // DIGIT SIX
|
||||
/*0x37*/ 0x0037, // DIGIT SEVEN
|
||||
/*0x38*/ 0x0038, // DIGIT EIGHT
|
||||
/*0x39*/ 0x0039, // DIGIT NINE
|
||||
/*0x3A*/ 0x003A, // COLON
|
||||
/*0x3B*/ 0x003B, // SEMICOLON
|
||||
/*0x3C*/ 0x003C, // LESS-THAN SIGN
|
||||
/*0x3D*/ 0x003D, // EQUALS SIGN
|
||||
/*0x3E*/ 0x003E, // GREATER-THAN SIGN
|
||||
/*0x3F*/ 0x003F, // QUESTION MARK
|
||||
/*0x40*/ 0x0040, // COMMERCIAL AT
|
||||
/*0x41*/ 0x0041, // LATIN CAPITAL LETTER A
|
||||
/*0x42*/ 0x0042, // LATIN CAPITAL LETTER B
|
||||
/*0x43*/ 0x0043, // LATIN CAPITAL LETTER C
|
||||
/*0x44*/ 0x0044, // LATIN CAPITAL LETTER D
|
||||
/*0x45*/ 0x0045, // LATIN CAPITAL LETTER E
|
||||
/*0x46*/ 0x0046, // LATIN CAPITAL LETTER F
|
||||
/*0x47*/ 0x0047, // LATIN CAPITAL LETTER G
|
||||
/*0x48*/ 0x0048, // LATIN CAPITAL LETTER H
|
||||
/*0x49*/ 0x0049, // LATIN CAPITAL LETTER I
|
||||
/*0x4A*/ 0x004A, // LATIN CAPITAL LETTER J
|
||||
/*0x4B*/ 0x004B, // LATIN CAPITAL LETTER K
|
||||
/*0x4C*/ 0x004C, // LATIN CAPITAL LETTER L
|
||||
/*0x4D*/ 0x004D, // LATIN CAPITAL LETTER M
|
||||
/*0x4E*/ 0x004E, // LATIN CAPITAL LETTER N
|
||||
/*0x4F*/ 0x004F, // LATIN CAPITAL LETTER O
|
||||
/*0x50*/ 0x0050, // LATIN CAPITAL LETTER P
|
||||
/*0x51*/ 0x0051, // LATIN CAPITAL LETTER Q
|
||||
/*0x52*/ 0x0052, // LATIN CAPITAL LETTER R
|
||||
/*0x53*/ 0x0053, // LATIN CAPITAL LETTER S
|
||||
/*0x54*/ 0x0054, // LATIN CAPITAL LETTER T
|
||||
/*0x55*/ 0x0055, // LATIN CAPITAL LETTER U
|
||||
/*0x56*/ 0x0056, // LATIN CAPITAL LETTER V
|
||||
/*0x57*/ 0x0057, // LATIN CAPITAL LETTER W
|
||||
/*0x58*/ 0x0058, // LATIN CAPITAL LETTER X
|
||||
/*0x59*/ 0x0059, // LATIN CAPITAL LETTER Y
|
||||
/*0x5A*/ 0x005A, // LATIN CAPITAL LETTER Z
|
||||
/*0x5B*/ 0x005B, // LEFT SQUARE BRACKET
|
||||
/*0x5C*/ 0x005C, // REVERSE SOLIDUS
|
||||
/*0x5D*/ 0x005D, // RIGHT SQUARE BRACKET
|
||||
/*0x5E*/ 0x005E, // CIRCUMFLEX ACCENT
|
||||
/*0x5F*/ 0x005F, // LOW LINE
|
||||
/*0x60*/ 0x0060, // GRAVE ACCENT
|
||||
/*0x61*/ 0x0061, // LATIN SMALL LETTER A
|
||||
/*0x62*/ 0x0062, // LATIN SMALL LETTER B
|
||||
/*0x63*/ 0x0063, // LATIN SMALL LETTER C
|
||||
/*0x64*/ 0x0064, // LATIN SMALL LETTER D
|
||||
/*0x65*/ 0x0065, // LATIN SMALL LETTER E
|
||||
/*0x66*/ 0x0066, // LATIN SMALL LETTER F
|
||||
/*0x67*/ 0x0067, // LATIN SMALL LETTER G
|
||||
/*0x68*/ 0x0068, // LATIN SMALL LETTER H
|
||||
/*0x69*/ 0x0069, // LATIN SMALL LETTER I
|
||||
/*0x6A*/ 0x006A, // LATIN SMALL LETTER J
|
||||
/*0x6B*/ 0x006B, // LATIN SMALL LETTER K
|
||||
/*0x6C*/ 0x006C, // LATIN SMALL LETTER L
|
||||
/*0x6D*/ 0x006D, // LATIN SMALL LETTER M
|
||||
/*0x6E*/ 0x006E, // LATIN SMALL LETTER N
|
||||
/*0x6F*/ 0x006F, // LATIN SMALL LETTER O
|
||||
/*0x70*/ 0x0070, // LATIN SMALL LETTER P
|
||||
/*0x71*/ 0x0071, // LATIN SMALL LETTER Q
|
||||
/*0x72*/ 0x0072, // LATIN SMALL LETTER R
|
||||
/*0x73*/ 0x0073, // LATIN SMALL LETTER S
|
||||
/*0x74*/ 0x0074, // LATIN SMALL LETTER T
|
||||
/*0x75*/ 0x0075, // LATIN SMALL LETTER U
|
||||
/*0x76*/ 0x0076, // LATIN SMALL LETTER V
|
||||
/*0x77*/ 0x0077, // LATIN SMALL LETTER W
|
||||
/*0x78*/ 0x0078, // LATIN SMALL LETTER X
|
||||
/*0x79*/ 0x0079, // LATIN SMALL LETTER Y
|
||||
/*0x7A*/ 0x007A, // LATIN SMALL LETTER Z
|
||||
/*0x7B*/ 0x007B, // LEFT CURLY BRACKET
|
||||
/*0x7C*/ 0x007C, // VERTICAL LINE
|
||||
/*0x7D*/ 0x007D, // RIGHT CURLY BRACKET
|
||||
/*0x7E*/ 0x007E, // TILDE
|
||||
/*0x7f*/ 0x2421, // [control] DELETE
|
||||
/*0x80*/ 0x00C4, // LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
/*0x81*/ 0x00C5, // LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
/*0x82*/ 0x00C7, // LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
/*0x83*/ 0x00C9, // LATIN CAPITAL LETTER E WITH ACUTE
|
||||
/*0x84*/ 0x00D1, // LATIN CAPITAL LETTER N WITH TILDE
|
||||
/*0x85*/ 0x00D6, // LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
/*0x86*/ 0x00DC, // LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
/*0x87*/ 0x00E1, // LATIN SMALL LETTER A WITH ACUTE
|
||||
/*0x88*/ 0x00E0, // LATIN SMALL LETTER A WITH GRAVE
|
||||
/*0x89*/ 0x00E2, // LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
/*0x8A*/ 0x00E4, // LATIN SMALL LETTER A WITH DIAERESIS
|
||||
/*0x8B*/ 0x00E3, // LATIN SMALL LETTER A WITH TILDE
|
||||
/*0x8C*/ 0x00E5, // LATIN SMALL LETTER A WITH RING ABOVE
|
||||
/*0x8D*/ 0x00E7, // LATIN SMALL LETTER C WITH CEDILLA
|
||||
/*0x8E*/ 0x00E9, // LATIN SMALL LETTER E WITH ACUTE
|
||||
/*0x8F*/ 0x00E8, // LATIN SMALL LETTER E WITH GRAVE
|
||||
/*0x90*/ 0x00EA, // LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
/*0x91*/ 0x00EB, // LATIN SMALL LETTER E WITH DIAERESIS
|
||||
/*0x92*/ 0x00ED, // LATIN SMALL LETTER I WITH ACUTE
|
||||
/*0x93*/ 0x00EC, // LATIN SMALL LETTER I WITH GRAVE
|
||||
/*0x94*/ 0x00EE, // LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
/*0x95*/ 0x00EF, // LATIN SMALL LETTER I WITH DIAERESIS
|
||||
/*0x96*/ 0x00F1, // LATIN SMALL LETTER N WITH TILDE
|
||||
/*0x97*/ 0x00F3, // LATIN SMALL LETTER O WITH ACUTE
|
||||
/*0x98*/ 0x00F2, // LATIN SMALL LETTER O WITH GRAVE
|
||||
/*0x99*/ 0x00F4, // LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
/*0x9A*/ 0x00F6, // LATIN SMALL LETTER O WITH DIAERESIS
|
||||
/*0x9B*/ 0x00F5, // LATIN SMALL LETTER O WITH TILDE
|
||||
/*0x9C*/ 0x00FA, // LATIN SMALL LETTER U WITH ACUTE
|
||||
/*0x9D*/ 0x00F9, // LATIN SMALL LETTER U WITH GRAVE
|
||||
/*0x9E*/ 0x00FB, // LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
/*0x9F*/ 0x00FC, // LATIN SMALL LETTER U WITH DIAERESIS
|
||||
/*0xA0*/ 0x2020, // DAGGER
|
||||
/*0xA1*/ 0x00B0, // DEGREE SIGN
|
||||
/*0xA2*/ 0x00A2, // CENT SIGN
|
||||
/*0xA3*/ 0x00A3, // POUND SIGN
|
||||
/*0xA4*/ 0x00A7, // SECTION SIGN
|
||||
/*0xA5*/ 0x2022, // BULLET
|
||||
/*0xA6*/ 0x00B6, // PILCROW SIGN
|
||||
/*0xA7*/ 0x00DF, // LATIN SMALL LETTER SHARP S
|
||||
/*0xA8*/ 0x00AE, // REGISTERED SIGN
|
||||
/*0xA9*/ 0x00A9, // COPYRIGHT SIGN
|
||||
/*0xAA*/ 0x2122, // TRADE MARK SIGN
|
||||
/*0xAB*/ 0x00B4, // ACUTE ACCENT
|
||||
/*0xAC*/ 0x00A8, // DIAERESIS
|
||||
/*0xAD*/ 0x2260, // NOT EQUAL TO
|
||||
/*0xAE*/ 0x00C6, // LATIN CAPITAL LETTER AE
|
||||
/*0xAF*/ 0x00D8, // LATIN CAPITAL LETTER O WITH STROKE
|
||||
/*0xB0*/ 0x221E, // INFINITY
|
||||
/*0xB1*/ 0x00B1, // PLUS-MINUS SIGN
|
||||
/*0xB2*/ 0x2264, // LESS-THAN OR EQUAL TO
|
||||
/*0xB3*/ 0x2265, // GREATER-THAN OR EQUAL TO
|
||||
/*0xB4*/ 0x00A5, // YEN SIGN
|
||||
/*0xB5*/ 0x00B5, // MICRO SIGN
|
||||
/*0xB6*/ 0x2202, // PARTIAL DIFFERENTIAL
|
||||
/*0xB7*/ 0x2211, // N-ARY SUMMATION
|
||||
/*0xB8*/ 0x220F, // N-ARY PRODUCT
|
||||
/*0xB9*/ 0x03C0, // GREEK SMALL LETTER PI
|
||||
/*0xBA*/ 0x222B, // INTEGRAL
|
||||
/*0xBB*/ 0x00AA, // FEMININE ORDINAL INDICATOR
|
||||
/*0xBC*/ 0x00BA, // MASCULINE ORDINAL INDICATOR
|
||||
/*0xBD*/ 0x03A9, // GREEK CAPITAL LETTER OMEGA
|
||||
/*0xBE*/ 0x00E6, // LATIN SMALL LETTER AE
|
||||
/*0xBF*/ 0x00F8, // LATIN SMALL LETTER O WITH STROKE
|
||||
/*0xC0*/ 0x00BF, // INVERTED QUESTION MARK
|
||||
/*0xC1*/ 0x00A1, // INVERTED EXCLAMATION MARK
|
||||
/*0xC2*/ 0x00AC, // NOT SIGN
|
||||
/*0xC3*/ 0x221A, // SQUARE ROOT
|
||||
/*0xC4*/ 0x0192, // LATIN SMALL LETTER F WITH HOOK
|
||||
/*0xC5*/ 0x2248, // ALMOST EQUAL TO
|
||||
/*0xC6*/ 0x2206, // INCREMENT
|
||||
/*0xC7*/ 0x00AB, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
/*0xC8*/ 0x00BB, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
/*0xC9*/ 0x2026, // HORIZONTAL ELLIPSIS
|
||||
/*0xCA*/ 0x00A0, // NO-BREAK SPACE
|
||||
/*0xCB*/ 0x00C0, // LATIN CAPITAL LETTER A WITH GRAVE
|
||||
/*0xCC*/ 0x00C3, // LATIN CAPITAL LETTER A WITH TILDE
|
||||
/*0xCD*/ 0x00D5, // LATIN CAPITAL LETTER O WITH TILDE
|
||||
/*0xCE*/ 0x0152, // LATIN CAPITAL LIGATURE OE
|
||||
/*0xCF*/ 0x0153, // LATIN SMALL LIGATURE OE
|
||||
/*0xD0*/ 0x2013, // EN DASH
|
||||
/*0xD1*/ 0x2014, // EM DASH
|
||||
/*0xD2*/ 0x201C, // LEFT DOUBLE QUOTATION MARK
|
||||
/*0xD3*/ 0x201D, // RIGHT DOUBLE QUOTATION MARK
|
||||
/*0xD4*/ 0x2018, // LEFT SINGLE QUOTATION MARK
|
||||
/*0xD5*/ 0x2019, // RIGHT SINGLE QUOTATION MARK
|
||||
/*0xD6*/ 0x00F7, // DIVISION SIGN
|
||||
/*0xD7*/ 0x25CA, // LOZENGE
|
||||
/*0xD8*/ 0x00FF, // LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
/*0xD9*/ 0x0178, // LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
/*0xDA*/ 0x2044, // FRACTION SLASH
|
||||
/*0xDB*/ 0x00A4, // CURRENCY SIGN (was EURO SIGN)
|
||||
/*0xDC*/ 0x2039, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
/*0xDD*/ 0x203A, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
/*0xDE*/ 0xFB01, // LATIN SMALL LIGATURE FI
|
||||
/*0xDF*/ 0xFB02, // LATIN SMALL LIGATURE FL
|
||||
/*0xE0*/ 0x2021, // DOUBLE DAGGER
|
||||
/*0xE1*/ 0x00B7, // MIDDLE DOT
|
||||
/*0xE2*/ 0x201A, // SINGLE LOW-9 QUOTATION MARK
|
||||
/*0xE3*/ 0x201E, // DOUBLE LOW-9 QUOTATION MARK
|
||||
/*0xE4*/ 0x2030, // PER MILLE SIGN
|
||||
/*0xE5*/ 0x00C2, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
/*0xE6*/ 0x00CA, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
/*0xE7*/ 0x00C1, // LATIN CAPITAL LETTER A WITH ACUTE
|
||||
/*0xE8*/ 0x00CB, // LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
/*0xE9*/ 0x00C8, // LATIN CAPITAL LETTER E WITH GRAVE
|
||||
/*0xEA*/ 0x00CD, // LATIN CAPITAL LETTER I WITH ACUTE
|
||||
/*0xEB*/ 0x00CE, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
/*0xEC*/ 0x00CF, // LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
/*0xED*/ 0x00CC, // LATIN CAPITAL LETTER I WITH GRAVE
|
||||
/*0xEE*/ 0x00D3, // LATIN CAPITAL LETTER O WITH ACUTE
|
||||
/*0xEF*/ 0x00D4, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
/*0xF0*/ 0xF8FF, // Apple logo
|
||||
/*0xF1*/ 0x00D2, // LATIN CAPITAL LETTER O WITH GRAVE
|
||||
/*0xF2*/ 0x00DA, // LATIN CAPITAL LETTER U WITH ACUTE
|
||||
/*0xF3*/ 0x00DB, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
/*0xF4*/ 0x00D9, // LATIN CAPITAL LETTER U WITH GRAVE
|
||||
/*0xF5*/ 0x0131, // LATIN SMALL LETTER DOTLESS I
|
||||
/*0xF6*/ 0x02C6, // MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
/*0xF7*/ 0x02DC, // SMALL TILDE
|
||||
/*0xF8*/ 0x00AF, // MACRON
|
||||
/*0xF9*/ 0x02D8, // BREVE
|
||||
/*0xFA*/ 0x02D9, // DOT ABOVE
|
||||
/*0xFB*/ 0x02DA, // RING ABOVE
|
||||
/*0xFC*/ 0x00B8, // CEDILLA
|
||||
/*0xFD*/ 0x02DD, // DOUBLE ACUTE ACCENT
|
||||
/*0xFE*/ 0x02DB, // OGONEK
|
||||
/*0xFF*/ 0x02C7, // CARON
|
||||
};
|
||||
|
||||
/*
|
||||
* Quick sanity check on contents of array.
|
||||
*
|
||||
* No two characters should map to the same thing. This isn't vital, but
|
||||
* if we want to have a reversible transformation someday, it'll make our
|
||||
* lives easier then.
|
||||
*/
|
||||
void ReformatText::CheckGSCharConv(void)
|
||||
{
|
||||
#ifdef _DEBUG
|
||||
bool* test = (bool*) malloc(65536 * sizeof(bool));
|
||||
|
||||
memset(test, 0, 65536 * sizeof(bool));
|
||||
for (int i = 0; i < NELEM(kCP1252Conv); i++) {
|
||||
if (test[kCP1252Conv[i]] && kCP1252Conv[i] != kUnk) {
|
||||
LOGW("Character used twice: 0x%02x at %d (0x%02x)",
|
||||
kCP1252Conv[i], i, i+128);
|
||||
assert(false);
|
||||
}
|
||||
test[kCP1252Conv[i]] = true;
|
||||
}
|
||||
|
||||
memset(test, 0, 65536 * sizeof(bool));
|
||||
for (int i = 0; i < NELEM(kUTF16Conv); i++) {
|
||||
if (test[kUTF16Conv[i]]) {
|
||||
LOGW("Character used twice: 0x%02x at %d (0x%02x)",
|
||||
kUTF16Conv[i], i, i+128);
|
||||
assert(false);
|
||||
}
|
||||
test[kUTF16Conv[i]] = true;
|
||||
}
|
||||
|
||||
free(test);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the output format and buffer.
|
||||
*
|
||||
|
@ -11,10 +11,11 @@
|
||||
* that, but we'd have to figure out what that means when extracting a file
|
||||
* (i.e. figure out the RTF embedded bitmap format).
|
||||
*/
|
||||
#ifndef REFORMAT_REFORMATBASE
|
||||
#define REFORMAT_REFORMATBASE
|
||||
#ifndef REFORMAT_REFORMATBASE_H
|
||||
#define REFORMAT_REFORMATBASE_H
|
||||
|
||||
#include "Reformat.h"
|
||||
#include "Charset.h"
|
||||
|
||||
#define BufPrintf fExpBuf.Printf
|
||||
|
||||
@ -290,13 +291,6 @@ public:
|
||||
kRTFFlagColorTable = 1, // include color table
|
||||
};
|
||||
|
||||
// Convert a Mac OS Roman character value (from a IIgs document) to
|
||||
// its UTF-16 Unicode equivalent. This also includes a conversion
|
||||
// for the control characters.
|
||||
static uint16_t ConvertMacRomanToUTF16(uint8_t ch) {
|
||||
return kUTF16Conv[ch];
|
||||
}
|
||||
|
||||
protected:
|
||||
void RTFBegin(int flags = 0);
|
||||
void RTFEnd(void);
|
||||
@ -387,25 +381,11 @@ protected:
|
||||
fExpBuf.Printf("%c", ch);
|
||||
}
|
||||
|
||||
// Convert a Mac OS Roman character value (from a IIgs document) to
|
||||
// an 8-bit Windows CP1252 equivalent.
|
||||
static uint8_t ConvertMacRomanTo1252(uint8_t ch) {
|
||||
if (ch < 128)
|
||||
return ch;
|
||||
else
|
||||
return kCP1252Conv[ch-128];
|
||||
}
|
||||
|
||||
void CheckGSCharConv(void);
|
||||
|
||||
private:
|
||||
DECLARE_COPY_AND_OPEQ(ReformatText)
|
||||
int CreateWorkBuf(void);
|
||||
enum { kRTFUnitsPerInch = 1440 }; // TWIPS
|
||||
|
||||
static const uint8_t kCP1252Conv[];
|
||||
static const uint16_t kUTF16Conv[];
|
||||
|
||||
int fLeftMargin, fRightMargin; // for documents, in 1/10th inch
|
||||
int fPointSize;
|
||||
int fPreMultPointSize;
|
||||
@ -421,4 +401,4 @@ private:
|
||||
TextColor fTextColor;
|
||||
};
|
||||
|
||||
#endif /*REFORMAT_REFORMATBASE*/
|
||||
#endif /*REFORMAT_REFORMATBASE_H*/
|
||||
|
@ -47,7 +47,7 @@ int ReformatGWP::Process(const ReformatHolder* pHolder,
|
||||
long srcLen = pHolder->GetSourceLen(part);
|
||||
fUseRTF = false;
|
||||
|
||||
CheckGSCharConv();
|
||||
Charset::CheckGSCharConv();
|
||||
RTFBegin();
|
||||
|
||||
/* convert EOL markers and IIgs characters */
|
||||
@ -67,7 +67,7 @@ int ReformatGWP::Process(const ReformatHolder* pHolder,
|
||||
BufPrintf("\r\n");
|
||||
} else {
|
||||
// RTF is always off, so just use BufPrintf
|
||||
BufPrintf("%c", ConvertMacRomanTo1252(ch));
|
||||
BufPrintf("%c", Charset::ConvertMacRomanTo1252(ch));
|
||||
}
|
||||
}
|
||||
|
||||
@ -124,7 +124,7 @@ int ReformatTeach::Process(const ReformatHolder* pHolder,
|
||||
LOGI("Teach reformatter missing one fork of the file");
|
||||
return -1;
|
||||
}
|
||||
CheckGSCharConv();
|
||||
Charset::CheckGSCharConv();
|
||||
|
||||
/* find the rStyleBlock */
|
||||
if (!ReformatResourceFork::GetResource(rsrcBuf, rsrcLen, 0x8012, 0x0001,
|
||||
@ -206,7 +206,7 @@ int ReformatTeach::Process(const ReformatHolder* pHolder,
|
||||
} else if (uch == '\t') {
|
||||
RTFTab();
|
||||
} else {
|
||||
RTFPrintUTF16Char(ConvertMacRomanToUTF16(uch));
|
||||
RTFPrintUTF16Char(Charset::ConvertMacRomanToUTF16(uch));
|
||||
}
|
||||
dataBuf++;
|
||||
dataLen--;
|
||||
|
@ -108,6 +108,7 @@
|
||||
<ClInclude Include="Asm.h" />
|
||||
<ClInclude Include="AWGS.h" />
|
||||
<ClInclude Include="BASIC.h" />
|
||||
<ClInclude Include="Charset.h" />
|
||||
<ClInclude Include="CPMFiles.h" />
|
||||
<ClInclude Include="Directory.h" />
|
||||
<ClInclude Include="Disasm.h" />
|
||||
@ -130,6 +131,7 @@
|
||||
<ClCompile Include="Asm.cpp" />
|
||||
<ClCompile Include="AWGS.cpp" />
|
||||
<ClCompile Include="BASIC.cpp" />
|
||||
<ClCompile Include="Charset.cpp" />
|
||||
<ClCompile Include="CPMFiles.cpp" />
|
||||
<ClCompile Include="Directory.cpp" />
|
||||
<ClCompile Include="Disasm.cpp" />
|
||||
|
@ -71,6 +71,9 @@
|
||||
<ClInclude Include="Text8.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Charset.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="AppleWorks.cpp">
|
||||
@ -142,5 +145,8 @@
|
||||
<ClCompile Include="Text8.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Charset.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
</Project>
|
Loading…
x
Reference in New Issue
Block a user