Fill in all of the entities, mapping to the GS's character set (which is based on that from classic MacOS).

This commit is contained in:
Jeremy Rand 2021-04-22 00:56:13 -04:00
parent a642513db3
commit beb063b56f
2 changed files with 262 additions and 12 deletions

View File

@ -52,6 +52,7 @@ typedef struct tEntity
{
const char * entityString;
char entityChar;
uint32_t unicodeChar;
} tEntity;
// Forward declarations
@ -89,8 +90,229 @@ tBlockListItem * blockList = NULL;
FILE * outputFile;
tEntity entities[] = {
{ "©", 0xa9 }
// GS_TODO - Add more to the entity table to fill out the extended character set of the GS.
{ "	", 0x9, 0x9 },
{ "
", 0x13, 0x10 },
{ "!", 0x21, 0x21 },
{ """, 0x22, 0x22 },
{ """, 0x22, 0x22 },
{ "#", 0x23, 0x23 },
{ "$", 0x24, 0x24 },
{ "%", 0x25, 0x25 },
{ "&", 0x26, 0x26 },
{ "'", 0x27, 0x27 },
{ "(", 0x28, 0x28 },
{ ")", 0x29, 0x29 },
{ "*", 0x2a, 0x2a },
{ "*", 0x2a, 0x2a },
{ "+", 0x2b, 0x2b },
{ ",", 0x2c, 0x2c },
{ ".", 0x2e, 0x2e },
{ "/", 0x2f, 0x2f },
{ ":", 0x3a, 0x3a },
{ ";", 0x3b, 0x3b },
{ "<", 0x3c, 0x3c },
{ "<", 0x3c, 0x3c },
{ "=", 0x3d, 0x3d },
{ ">", 0x3e, 0x3e },
{ ">", 0x3e, 0x3e },
{ "?", 0x3f, 0x3f },
{ "@", 0x40, 0x40 },
{ "[", 0x5b, 0x5b },
{ "[", 0x5b, 0x5b },
{ "\", 0x5c, 0x5c },
{ "]", 0x5d, 0x5d },
{ "]", 0x5d, 0x5d },
{ "^", 0x5e, 0x5e },
{ "_", 0x5f, 0x5f },
{ "`", 0x60, 0x60 },
{ "`", 0x60, 0x60 },
{ "{", 0x7b, 0x7b },
{ "{", 0x7b, 0x7b },
{ "|", 0x7c, 0x7c },
{ "|", 0x7c, 0x7c },
{ "|", 0x7c, 0x7c },
{ "}", 0x7d, 0x7d },
{ "}", 0x7d, 0x7d },
{ " ", 0xca, 0xa0 },
{ " ", 0xca, 0xa0 },
{ "¡", 0xc1, 0xa1 },
{ "¢", 0xa2, 0xa2 },
{ "£", 0xa3, 0xa3 },
{ "¤", 0xdb, 0xa4 },
{ "¥", 0xb4, 0xa5 },
{ "§", 0xa4, 0xa7 },
{ "¨", 0xac, 0xa8 },
{ "¨", 0xac, 0xa8 },
{ "¨", 0xac, 0xa8 },
{ "¨", 0xac, 0xa8 },
{ "©", 0xa9, 0xa9 },
{ "©", 0xa9, 0xa9 },
{ "ª", 0xbb, 0xaa },
{ "«", 0xc7, 0xab },
{ "¬", 0xc2, 0xac },
{ "®", 0xa8, 0xae },
{ "&circleR;", 0xa8, 0xae },
{ "®", 0xa8, 0xae },
{ "¯", 0xf8, 0xaf },
{ "‾", 0xf8, 0xaf },
{ "¯", 0xf8, 0xaf },
{ "°", 0xa1, 0xb0 },
{ "±", 0xb1, 0xb1 },
{ "±", 0xb1, 0xb1 },
{ "±", 0xb1, 0xb1 },
{ "´", 0xab, 0xb4 },
{ "´", 0xab, 0xb4 },
{ "µ", 0xb5, 0xb5 },
{ "¶", 0xa6, 0xb6 },
{ "·", 0xe1, 0xb7 },
{ "·", 0xe1, 0xb7 },
{ "·", 0xe1, 0xb7 },
{ "¸", 0xfc, 0xb8 },
{ "¸", 0xfc, 0xb8 },
{ "º", 0xbc, 0xba },
{ "»", 0xc8, 0xbb },
{ "¿", 0xc0, 0xbf },
{ "À", 0xcb, 0xc0 },
{ "Á", 0xe7, 0xc1 },
{ "Â", 0xe5, 0xc2 },
{ "Ã", 0xcc, 0xc3 },
{ "Ä", 0x80, 0xc4 },
{ "Å", 0x81, 0xc5 },
{ "Æ", 0xae, 0xc6 },
{ "Ç", 0x82, 0xc7 },
{ "È", 0xe9, 0xc8 },
{ "É", 0x83, 0xc9 },
{ "Ê", 0xe6, 0xca },
{ "Ë", 0xe8, 0xcb },
{ "Ì", 0xed, 0xcc },
{ "Í", 0xea, 0xcd },
{ "Î", 0xeb, 0xce },
{ "Ï", 0xec, 0xcf },
{ "Ñ", 0x84, 0xd1 },
{ "Ò", 0xf1, 0xd2 },
{ "Ó", 0xee, 0xd3 },
{ "Ô", 0xef, 0xd4 },
{ "Õ", 0xcd, 0xd5 },
{ "Ö", 0x85, 0xd6 },
{ "Ø", 0xaf, 0xd8 },
{ "Ù", 0xf4, 0xd9 },
{ "Ú", 0xf2, 0xda },
{ "Û", 0xf3, 0xdb },
{ "Ü", 0x86, 0xdc },
{ "ß", 0xa7, 0xdf },
{ "à", 0x88, 0xe0 },
{ "á", 0x87, 0xe1 },
{ "â", 0x89, 0xe2 },
{ "ã", 0x8b, 0xe3 },
{ "ä", 0x8a, 0xe4 },
{ "å", 0x8c, 0xe5 },
{ "æ", 0xbe, 0xe6 },
{ "ç", 0x8d, 0xe7 },
{ "è", 0x8f, 0xe8 },
{ "é", 0x8e, 0xe9 },
{ "ê", 0x90, 0xea },
{ "ë", 0x91, 0xeb },
{ "ì", 0x93, 0xec },
{ "í", 0x92, 0xed },
{ "î", 0x94, 0xee },
{ "ï", 0x95, 0xef },
{ "ñ", 0x96, 0xf1 },
{ "ò", 0x98, 0xf2 },
{ "ó", 0x97, 0xf3 },
{ "ô", 0x99, 0xf4 },
{ "õ", 0x9b, 0xf5 },
{ "ö", 0x9a, 0xf6 },
{ "÷", 0xd6, 0xf7 },
{ "÷", 0xd6, 0xf7 },
{ "ø", 0xbf, 0xf8 },
{ "ù", 0x9d, 0xf9 },
{ "ú", 0x9c, 0xfa },
{ "û", 0x9e, 0xfb },
{ "ü", 0x9f, 0xfc },
{ "ÿ", 0xd8, 0xff },
{ "†", 0xa0, 0x2020 },
{ "•", 0xa5, 0x2022 },
{ "•", 0xa5, 0x2022 },
{ "™", 0xaa, 0x2122 },
{ "™", 0xaa, 0x2122 },
{ "≠", 0xad, 0x2260 },
{ "≠", 0xad, 0x2260 },
{ "∞", 0xb0, 0x221e },
{ "≤", 0xb2, 0x2264 },
{ "≤", 0xb2, 0x2264 },
{ "&LessEqual;", 0xb2, 0x2264 },
{ "≥", 0xb3, 0x2265 },
{ "≥", 0xb3, 0x2265 },
{ "≥", 0xb3, 0x2265 },
{ "∂", 0xb6, 0x2202 },
{ "∂", 0xb6, 0x2202 },
{ "∑", 0xb7, 0x2211 },
{ "∑", 0xb7, 0x2211 },
{ "∏", 0xb8, 0x220f },
{ "∏", 0xb8, 0x220f },
{ "π", 0xb9, 0x3c0 },
{ "∫", 0xba, 0x222b },
{ "∫", 0xba, 0x222b },
{ "Ω", 0xbd, 0x3a9 },
{ "√", 0xc3, 0x221a },
{ "√", 0xc3, 0x221a },
{ "ƒ", 0xc4, 0x192 },
{ "≈", 0xc5, 0x2248 },
{ "≈", 0xc5, 0x2248 },
{ "≈", 0xc5, 0x2248 },
{ "≈", 0xc5, 0x2248 },
{ "≈", 0xc5, 0x2248 },
{ "≈", 0xc5, 0x2248 },
{ "Δ", 0xc6, 0x394 },
{ "…", 0xc9, 0x2026 },
{ "…", 0xc9, 0x2026 },
{ "Œ", 0xce, 0x152 },
{ "œ", 0xcf, 0x153 },
{ "–", 0xd0, 0x2013 },
{ "—", 0xd1, 0x2014 },
{ "“", 0xd2, 0x201c },
{ "“", 0xd2, 0x201c },
{ "”", 0xd3, 0x201d },
{ "”", 0xd3, 0x201d },
{ "”", 0xd3, 0x201d },
{ "‘", 0xd4, 0x2018 },
{ "‘", 0xd4, 0x2018 },
{ "’", 0xd5, 0x2019 },
{ "’", 0xd5, 0x2019 },
{ "’", 0xd5, 0x2019 },
{ "◊", 0xd7, 0x25ca },
{ "◊", 0xd7, 0x25ca },
{ "Ÿ", 0xd9, 0x178 },
{ "⁄", 0xda, 0x2044 },
{ "‹", 0xdc, 0x2039 },
{ "›", 0xdd, 0x203a },
{ "fi", 0xde, 0xfb01 },
{ "fl", 0xdf, 0xfb02 },
{ "‡", 0xe0, 0x2021 },
{ "‡", 0xe0, 0x2021 },
{ "‚", 0xe2, 0x201a },
{ "‚", 0xe2, 0x201a },
{ "„", 0xe3, 0x201e },
{ "„", 0xe3, 0x201e },
{ "‰", 0xe4, 0x2030 },
{ "", 0xf0, 0xf8ff },
{ "ı", 0xf5, 0x131 },
{ "ı", 0xf5, 0x131 },
{ "ˆ", 0xf6, 0x2c6 },
{ "˜", 0xf7, 0x2dc },
{ "˜", 0xf7, 0x2dc },
{ "˘", 0xf9, 0x2d8 },
{ "˘", 0xf9, 0x2d8 },
{ "˙", 0xfa, 0x2d9 },
{ "˙", 0xfa, 0x2d9 },
{ "˚", 0xfb, 0x2da },
{ "˝", 0xfd, 0x2dd },
{ "˝", 0xfd, 0x2dd },
{ "˛", 0xfe, 0x2db },
{ "ˇ", 0xff, 0x2c7 },
{ "ˇ", 0xff, 0x2c7 },
// GS_TODO - Test each of these entities.
};
// Implementation
@ -185,7 +407,7 @@ static int enterBlockHook(MD_BLOCKTYPE type, void * detail, void * userdata)
fprintf(outputFile, "%u%c ", enclosingBlock->u.olDetail.start, enclosingBlock->u.olDetail.mark_delimiter);
enclosingBlock->u.olDetail.start++;
} else {
fprintf(outputFile, "%c ", 0xd7); // 0xd7 is a diamond looking character which is good for a bullet
fprintf(outputFile, "%c ", 0xa5); // 0xa5 is a bullet character
}
break;
@ -408,16 +630,44 @@ static int leaveSpanHook(MD_SPANTYPE type, void * detail, void * userdata)
static void printEntity(const MD_CHAR * text, MD_SIZE size)
{
int entityNum;
uint32_t unicodeChar = 0;
if (size < 4)
return;
if (text[0] != '&')
return;
if (text[size - 1] != ';')
return;
if (text[1] == '#') {
char * end;
unicodeChar = strtoul(text + 2, &end, 10);
if (end != text + size - 1)
unicodeChar = 0;
if ((unicodeChar > 0) &&
(unicodeChar < 128)) {
fputc(unicodeChar, outputFile);
return;
}
}
if (text[1] == 'x') {
char * end;
unicodeChar = strtoul(text + 2, &end, 16);
if (end != text + size - 1)
unicodeChar = 0;
if ((unicodeChar > 0) &&
(unicodeChar < 128)) {
fputc(unicodeChar, outputFile);
return;
}
}
for (entityNum = 0; entityNum < (sizeof(entities) / sizeof(entities[0])); entityNum++) {
int offset;
char * entityString = entities[entityNum].entityString;
for (offset = 0; offset < size; offset++) {
if (tolower(text[offset]) != entityString[offset])
break;
}
if (offset >= size) {
if ((unicodeChar == entities[entityNum].unicodeChar) ||
(strncmp(entities[entityNum].entityString, text, size) == 0)) {
fputc(entities[entityNum].entityChar, outputFile);
return;
}

View File

@ -29,7 +29,7 @@ fenced code
[ref]: /url
paragraph
&copy; &#1234; &#xabcd;
&#65;&#169;&xA9;&copy; &#1234; &#xabcd;
`code`
*emph* **strong** ***strong emph***
_emph_ __strong__ ___strong emph___