/* * hfsutils - tools for reading and writing Macintosh HFS volumes * Copyright (C) 1996-1998 Robert Leslie * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * $Id: charset.c,v 1.5 1998/11/02 22:08:23 rob Exp $ */ # ifdef HAVE_CONFIG_H # include "config.h" # endif # include # include # include "charset.h" static const UCS2 macroman[256] = { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, 0x00c4, 0x00c5, 0x00c7, 0x00c9, 0x00d1, 0x00d6, 0x00dc, 0x00e1, 0x00e0, 0x00e2, 0x00e4, 0x00e3, 0x00e5, 0x00e7, 0x00e9, 0x00e8, 0x00ea, 0x00eb, 0x00ed, 0x00ec, 0x00ee, 0x00ef, 0x00f1, 0x00f3, 0x00f2, 0x00f4, 0x00f6, 0x00f5, 0x00fa, 0x00f9, 0x00fb, 0x00fc, 0x2020, 0x00b0, 0x00a2, 0x00a3, 0x00a7, 0x2022, 0x00b6, 0x00df, 0x00ae, 0x00a9, 0x2122, 0x00b4, 0x00a8, 0x2260, 0x00c6, 0x00d8, 0x221e, 0x00b1, 0x2264, 0x2265, 0x00a5, 0x00b5, 0x2202, 0x2211, 0x220f, 0x03c0, 0x222b, 0x00aa, 0x00ba, 0x2126, 0x00e6, 0x00f8, 0x00bf, 0x00a1, 0x00ac, 0x221a, 0x0192, 0x2248, 0x2206, 0x00ab, 0x00bb, 0x2026, 0x00a0, 0x00c0, 0x00c3, 0x00d5, 0x0152, 0x0153, 0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0x00f7, 0x25ca, 0x00ff, 0x0178, 0x2044, 0x00a4, 0x2039, 0x203a, 0xfb01, 0xfb02, 0x2021, 0x00b7, 0x201a, 0x201e, 0x2030, 0x00c2, 0x00ca, 0x00c1, 0x00cb, 0x00c8, 0x00cd, 0x00ce, 0x00cf, 0x00cc, 0x00d3, 0x00d4, 0xf8ff, 0x00d2, 0x00da, 0x00db, 0x00d9, 0x0131, 0x02c6, 0x02dc, 0x00af, 0x02d8, 0x02d9, 0x02da, 0x00b8, 0x02dd, 0x02db, 0x02c7 }; static const char *macroman_subst[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80 - 0x8f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90 - 0x9f */ "+", 0, 0, 0, 0, "*", 0, 0, /* 0xa0 - 0xa7 */ 0, 0, "[tm]", 0, 0, "!=", 0, 0, /* 0xa8 - 0xaf */ "[inf]", 0, "<=", ">=", 0, 0, "[partial]", "[Sum]", /* 0xb0 - 0xb7 */ "[Prod]", "[pi]", "[Integral]", 0, 0, "[ohm]", 0, 0, /* 0xb8 - 0xbf */ 0, 0, 0, "[Sqrt]", "[f]", "~=", "[delta]", 0, /* 0xc0 - 0xc7 */ 0, "...", 0, 0, 0, 0, "OE", "oe", /* 0xc8 - 0xcf */ "-", "--", "``", "''", "`", "'", 0, "#", /* 0xd0 - 0xd7 */ 0, "Y", "/", 0, "<", ">", "fi", "fl", /* 0xd8 - 0xdf */ "++", 0, ",", ",,", "o/oo", 0, 0, 0, /* 0xe0 - 0xe7 */ 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe8 - 0xef */ "[Apple]", 0, 0, 0, 0, "i", "^", "~", /* 0xf0 - 0xf7 */ 0, "-", "\267", "\260", 0, "\"", "-", "^" /* 0xf8 - 0xff */ }; static unsigned char latin1[256]; static const char *latin1_subst[128] = { "", "", "", "", "", "", "", "", /* 0x80 - 0x87 */ "", "", "", "", "", "", "", "", /* 0x88 - 0x8f */ "", "", "", "", "", "", "", "", /* 0x90 - 0x97 */ "", "", "", "", "", "", "", "", /* 0x98 - 0x9f */ 0, 0, 0, 0, 0, 0, "|", 0, /* 0xa0 - 0xa7 */ 0, 0, 0, 0, 0, "-", 0, 0, /* 0xa8 - 0xaf */ 0, 0, "[^2]", "[^3]", 0, 0, 0, 0, /* 0xb0 - 0xb7 */ 0, "[^1]", 0, 0, "[1/4]", "[1/2]", "[3/4]", 0, /* 0xb8 - 0xbf */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0 - 0xcf */ "D", 0, 0, 0, 0, 0, 0, "x", /* 0xd0 - 0xd7 */ 0, 0, 0, 0, 0, "Y", "P", 0, /* 0xd8 - 0xdf */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0 - 0xef */ "d", 0, 0, 0, 0, 0, 0, 0, /* 0xf0 - 0xf7 */ 0, 0, 0, 0, 0, "y", "p", 0 /* 0xf8 - 0xff */ }; /* * NAME: charset->unicode() * DESCRIPTION: return a Unicode string for MacOS Standard Roman */ UCS2 *cs_unicode(char *mstr, int *lenptr) { int len, i; UCS2 *unicode, *ptr; len = lenptr ? *lenptr : strlen(mstr); unicode = malloc((len + 1) * sizeof(UCS2)); if (unicode == 0) return 0; ptr = unicode; for (i = 0; i < len; ++i) *ptr++ = macroman[*(unsigned char *) mstr++]; *ptr = 0; return unicode; } /* * NAME: charset->latin1() * DESCRIPTION: return a Latin-1 (ISO 8859-1) string for MacOS Standard Roman */ char *cs_latin1(char *mstr, int *lenptr) { int ilen, olen, i; char *latin1, *ptr; const char *subst; unsigned char ch; UCS2 unicode; ilen = lenptr ? *lenptr : strlen(mstr); olen = 0; for (i = 0; i < ilen; ++i) { ch = *((unsigned char *) mstr + i); unicode = macroman[ch]; if (unicode & 0xff00) olen += strlen(macroman_subst[ch & 0x7f]); else ++olen; } latin1 = malloc(olen + 1); if (latin1 == 0) return 0; ptr = latin1; for (i = 0; i < ilen; ++i) { ch = *(unsigned char *) mstr++; unicode = macroman[ch]; if (unicode & 0xff00) { /* substitute similar Latin-1 character(s) */ subst = macroman_subst[ch & 0x7f]; strcpy(ptr, subst); ptr += strlen(subst); } else *ptr++ = unicode; } *ptr = 0; if (lenptr) *lenptr = olen; return latin1; } /* * NAME: charset->mktable() * DESCRIPTION: construct latin1[] table from macroman[] */ static void mktable(void) { int i; for (i = 0x00; i <= 0xff; ++i) latin1[i] = 0xff; for (i = 0x00; i <= 0xff; ++i) { UCS2 unicode = macroman[i]; if (! (unicode & 0xff00)) latin1[unicode] = i; } } /* * NAME: charset->macroman() * DESCRIPTION: return a MacOS Standard Roman string for Latin-1 (ISO 8859-1) */ char *cs_macroman(char *lstr, int *lenptr) { int ilen, olen, i; char *macroman, *ptr; const char *subst; unsigned char ch, msr; if (latin1[0x80] != 0xff) mktable(); ilen = lenptr ? *lenptr : strlen(lstr); olen = 0; for (i = 0; i < ilen; ++i) { ch = *((unsigned char *) lstr + i); msr = latin1[ch]; if (msr == 0xff) olen += strlen(latin1_subst[ch & 0x7f]); else ++olen; } macroman = malloc(olen + 1); if (macroman == 0) return 0; ptr = macroman; for (i = 0; i < ilen; ++i) { ch = *(unsigned char *) lstr++; msr = latin1[ch]; if (msr == 0xff) { /* substitute similar MacOS Standard Roman character(s) */ subst = latin1_subst[ch & 0x7f]; strcpy(ptr, subst); ptr += strlen(subst); } else *ptr++ = msr; } *ptr = 0; if (lenptr) *lenptr = olen; return macroman; }