diff --git a/md2teach/main.c b/md2teach/main.c index f7cbf52..a677911 100644 --- a/md2teach/main.c +++ b/md2teach/main.c @@ -52,6 +52,7 @@ typedef struct tEntity { const char * entityString; char entityChar; + uint32_t unicodeChar; } tEntity; // Forward declarations @@ -89,8 +90,229 @@ tBlockListItem * blockList = NULL; FILE * outputFile; tEntity entities[] = { - { "©", 0xa9 } - // GS_TODO - Add more to the entity table to fill out the extended character set of the GS. + { " ", 0x9, 0x9 }, + { " ", 0x13, 0x10 }, + { "!", 0x21, 0x21 }, + { """, 0x22, 0x22 }, + { """, 0x22, 0x22 }, + { "#", 0x23, 0x23 }, + { "$", 0x24, 0x24 }, + { "%", 0x25, 0x25 }, + { "&", 0x26, 0x26 }, + { "'", 0x27, 0x27 }, + { "(", 0x28, 0x28 }, + { ")", 0x29, 0x29 }, + { "*", 0x2a, 0x2a }, + { "*", 0x2a, 0x2a }, + { "+", 0x2b, 0x2b }, + { ",", 0x2c, 0x2c }, + { ".", 0x2e, 0x2e }, + { "/", 0x2f, 0x2f }, + { ":", 0x3a, 0x3a }, + { ";", 0x3b, 0x3b }, + { "<", 0x3c, 0x3c }, + { "<", 0x3c, 0x3c }, + { "=", 0x3d, 0x3d }, + { ">", 0x3e, 0x3e }, + { ">", 0x3e, 0x3e }, + { "?", 0x3f, 0x3f }, + { "@", 0x40, 0x40 }, + { "[", 0x5b, 0x5b }, + { "[", 0x5b, 0x5b }, + { "\", 0x5c, 0x5c }, + { "]", 0x5d, 0x5d }, + { "]", 0x5d, 0x5d }, + { "^", 0x5e, 0x5e }, + { "_", 0x5f, 0x5f }, + { "`", 0x60, 0x60 }, + { "`", 0x60, 0x60 }, + { "{", 0x7b, 0x7b }, + { "{", 0x7b, 0x7b }, + { "|", 0x7c, 0x7c }, + { "|", 0x7c, 0x7c }, + { "|", 0x7c, 0x7c }, + { "}", 0x7d, 0x7d }, + { "}", 0x7d, 0x7d }, + { " ", 0xca, 0xa0 }, + { " ", 0xca, 0xa0 }, + { "¡", 0xc1, 0xa1 }, + { "¢", 0xa2, 0xa2 }, + { "£", 0xa3, 0xa3 }, + { "¤", 0xdb, 0xa4 }, + { "¥", 0xb4, 0xa5 }, + { "§", 0xa4, 0xa7 }, + { "¨", 0xac, 0xa8 }, + { "¨", 0xac, 0xa8 }, + { "¨", 0xac, 0xa8 }, + { "¨", 0xac, 0xa8 }, + { "©", 0xa9, 0xa9 }, + { "©", 0xa9, 0xa9 }, + { "ª", 0xbb, 0xaa }, + { "«", 0xc7, 0xab }, + { "¬", 0xc2, 0xac }, + { "®", 0xa8, 0xae }, + { "&circleR;", 0xa8, 0xae }, + { "®", 0xa8, 0xae }, + { "¯", 0xf8, 0xaf }, + { "‾", 0xf8, 0xaf }, + { "¯", 0xf8, 0xaf }, + { "°", 0xa1, 0xb0 }, + { "±", 0xb1, 0xb1 }, + { "±", 0xb1, 0xb1 }, + { "±", 0xb1, 0xb1 }, + { "´", 0xab, 0xb4 }, + { "´", 0xab, 0xb4 }, + { "µ", 0xb5, 0xb5 }, + { "¶", 0xa6, 0xb6 }, + { "·", 0xe1, 0xb7 }, + { "·", 0xe1, 0xb7 }, + { "·", 0xe1, 0xb7 }, + { "¸", 0xfc, 0xb8 }, + { "¸", 0xfc, 0xb8 }, + { "º", 0xbc, 0xba }, + { "»", 0xc8, 0xbb }, + { "¿", 0xc0, 0xbf }, + { "À", 0xcb, 0xc0 }, + { "Á", 0xe7, 0xc1 }, + { "Â", 0xe5, 0xc2 }, + { "Ã", 0xcc, 0xc3 }, + { "Ä", 0x80, 0xc4 }, + { "Å", 0x81, 0xc5 }, + { "Æ", 0xae, 0xc6 }, + { "Ç", 0x82, 0xc7 }, + { "È", 0xe9, 0xc8 }, + { "É", 0x83, 0xc9 }, + { "Ê", 0xe6, 0xca }, + { "Ë", 0xe8, 0xcb }, + { "Ì", 0xed, 0xcc }, + { "Í", 0xea, 0xcd }, + { "Î", 0xeb, 0xce }, + { "Ï", 0xec, 0xcf }, + { "Ñ", 0x84, 0xd1 }, + { "Ò", 0xf1, 0xd2 }, + { "Ó", 0xee, 0xd3 }, + { "Ô", 0xef, 0xd4 }, + { "Õ", 0xcd, 0xd5 }, + { "Ö", 0x85, 0xd6 }, + { "Ø", 0xaf, 0xd8 }, + { "Ù", 0xf4, 0xd9 }, + { "Ú", 0xf2, 0xda }, + { "Û", 0xf3, 0xdb }, + { "Ü", 0x86, 0xdc }, + { "ß", 0xa7, 0xdf }, + { "à", 0x88, 0xe0 }, + { "á", 0x87, 0xe1 }, + { "â", 0x89, 0xe2 }, + { "ã", 0x8b, 0xe3 }, + { "ä", 0x8a, 0xe4 }, + { "å", 0x8c, 0xe5 }, + { "æ", 0xbe, 0xe6 }, + { "ç", 0x8d, 0xe7 }, + { "è", 0x8f, 0xe8 }, + { "é", 0x8e, 0xe9 }, + { "ê", 0x90, 0xea }, + { "ë", 0x91, 0xeb }, + { "ì", 0x93, 0xec }, + { "í", 0x92, 0xed }, + { "î", 0x94, 0xee }, + { "ï", 0x95, 0xef }, + { "ñ", 0x96, 0xf1 }, + { "ò", 0x98, 0xf2 }, + { "ó", 0x97, 0xf3 }, + { "ô", 0x99, 0xf4 }, + { "õ", 0x9b, 0xf5 }, + { "ö", 0x9a, 0xf6 }, + { "÷", 0xd6, 0xf7 }, + { "÷", 0xd6, 0xf7 }, + { "ø", 0xbf, 0xf8 }, + { "ù", 0x9d, 0xf9 }, + { "ú", 0x9c, 0xfa }, + { "û", 0x9e, 0xfb }, + { "ü", 0x9f, 0xfc }, + { "ÿ", 0xd8, 0xff }, + { "†", 0xa0, 0x2020 }, + { "•", 0xa5, 0x2022 }, + { "•", 0xa5, 0x2022 }, + { "™", 0xaa, 0x2122 }, + { "™", 0xaa, 0x2122 }, + { "≠", 0xad, 0x2260 }, + { "≠", 0xad, 0x2260 }, + { "∞", 0xb0, 0x221e }, + { "≤", 0xb2, 0x2264 }, + { "≤", 0xb2, 0x2264 }, + { "&LessEqual;", 0xb2, 0x2264 }, + { "≥", 0xb3, 0x2265 }, + { "≥", 0xb3, 0x2265 }, + { "≥", 0xb3, 0x2265 }, + { "∂", 0xb6, 0x2202 }, + { "∂", 0xb6, 0x2202 }, + { "∑", 0xb7, 0x2211 }, + { "∑", 0xb7, 0x2211 }, + { "∏", 0xb8, 0x220f }, + { "∏", 0xb8, 0x220f }, + { "π", 0xb9, 0x3c0 }, + { "∫", 0xba, 0x222b }, + { "∫", 0xba, 0x222b }, + { "Ω", 0xbd, 0x3a9 }, + { "√", 0xc3, 0x221a }, + { "√", 0xc3, 0x221a }, + { "ƒ", 0xc4, 0x192 }, + { "≈", 0xc5, 0x2248 }, + { "≈", 0xc5, 0x2248 }, + { "≈", 0xc5, 0x2248 }, + { "≈", 0xc5, 0x2248 }, + { "≈", 0xc5, 0x2248 }, + { "≈", 0xc5, 0x2248 }, + { "Δ", 0xc6, 0x394 }, + { "…", 0xc9, 0x2026 }, + { "…", 0xc9, 0x2026 }, + { "Œ", 0xce, 0x152 }, + { "œ", 0xcf, 0x153 }, + { "–", 0xd0, 0x2013 }, + { "—", 0xd1, 0x2014 }, + { "“", 0xd2, 0x201c }, + { "“", 0xd2, 0x201c }, + { "”", 0xd3, 0x201d }, + { "”", 0xd3, 0x201d }, + { "”", 0xd3, 0x201d }, + { "‘", 0xd4, 0x2018 }, + { "‘", 0xd4, 0x2018 }, + { "’", 0xd5, 0x2019 }, + { "’", 0xd5, 0x2019 }, + { "’", 0xd5, 0x2019 }, + { "◊", 0xd7, 0x25ca }, + { "◊", 0xd7, 0x25ca }, + { "Ÿ", 0xd9, 0x178 }, + { "⁄", 0xda, 0x2044 }, + { "‹", 0xdc, 0x2039 }, + { "›", 0xdd, 0x203a }, + { "fi", 0xde, 0xfb01 }, + { "fl", 0xdf, 0xfb02 }, + { "‡", 0xe0, 0x2021 }, + { "‡", 0xe0, 0x2021 }, + { "‚", 0xe2, 0x201a }, + { "‚", 0xe2, 0x201a }, + { "„", 0xe3, 0x201e }, + { "„", 0xe3, 0x201e }, + { "‰", 0xe4, 0x2030 }, + { "", 0xf0, 0xf8ff }, + { "ı", 0xf5, 0x131 }, + { "ı", 0xf5, 0x131 }, + { "ˆ", 0xf6, 0x2c6 }, + { "˜", 0xf7, 0x2dc }, + { "˜", 0xf7, 0x2dc }, + { "˘", 0xf9, 0x2d8 }, + { "˘", 0xf9, 0x2d8 }, + { "˙", 0xfa, 0x2d9 }, + { "˙", 0xfa, 0x2d9 }, + { "˚", 0xfb, 0x2da }, + { "˝", 0xfd, 0x2dd }, + { "˝", 0xfd, 0x2dd }, + { "˛", 0xfe, 0x2db }, + { "ˇ", 0xff, 0x2c7 }, + { "ˇ", 0xff, 0x2c7 }, + + // GS_TODO - Test each of these entities. }; // Implementation @@ -185,7 +407,7 @@ static int enterBlockHook(MD_BLOCKTYPE type, void * detail, void * userdata) fprintf(outputFile, "%u%c ", enclosingBlock->u.olDetail.start, enclosingBlock->u.olDetail.mark_delimiter); enclosingBlock->u.olDetail.start++; } else { - fprintf(outputFile, "%c ", 0xd7); // 0xd7 is a diamond looking character which is good for a bullet + fprintf(outputFile, "%c ", 0xa5); // 0xa5 is a bullet character } break; @@ -408,16 +630,44 @@ static int leaveSpanHook(MD_SPANTYPE type, void * detail, void * userdata) static void printEntity(const MD_CHAR * text, MD_SIZE size) { int entityNum; + uint32_t unicodeChar = 0; + + if (size < 4) + return; + + if (text[0] != '&') + return; + + if (text[size - 1] != ';') + return; + + if (text[1] == '#') { + char * end; + unicodeChar = strtoul(text + 2, &end, 10); + if (end != text + size - 1) + unicodeChar = 0; + if ((unicodeChar > 0) && + (unicodeChar < 128)) { + fputc(unicodeChar, outputFile); + return; + } + } + + if (text[1] == 'x') { + char * end; + unicodeChar = strtoul(text + 2, &end, 16); + if (end != text + size - 1) + unicodeChar = 0; + if ((unicodeChar > 0) && + (unicodeChar < 128)) { + fputc(unicodeChar, outputFile); + return; + } + } for (entityNum = 0; entityNum < (sizeof(entities) / sizeof(entities[0])); entityNum++) { - int offset; - char * entityString = entities[entityNum].entityString; - - for (offset = 0; offset < size; offset++) { - if (tolower(text[offset]) != entityString[offset]) - break; - } - if (offset >= size) { + if ((unicodeChar == entities[entityNum].unicodeChar) || + (strncmp(entities[entityNum].entityString, text, size) == 0)) { fputc(entities[entityNum].entityChar, outputFile); return; } diff --git a/md2teach/test.md b/md2teach/test.md index 4c9fb1c..b74fc55 100644 --- a/md2teach/test.md +++ b/md2teach/test.md @@ -29,7 +29,7 @@ fenced code [ref]: /url paragraph -© Ӓ ꯍ +A©&xA9;© Ӓ ꯍ `code` *emph* **strong** ***strong emph*** _emph_ __strong__ ___strong emph___