/* * translate.c * md2teach * * Created by Jeremy Rand on 2021-04-24. * */ #include #include #include "translate.h" #include "io.h" #include "main.h" #include "style.h" // Typedefs typedef struct tEntity { const char * entityString; char entityChar; uint32_t unicodeChar; } tEntity; typedef struct tBlockListItem { MD_BLOCKTYPE type; union { MD_BLOCK_UL_DETAIL ulDetail; MD_BLOCK_OL_DETAIL olDetail; MD_BLOCK_H_DETAIL hDetail; MD_BLOCK_CODE_DETAIL codeDetail; } u; int numTabs; tStyleType styleType; struct tBlockListItem * next; } tBlockListItem; // Forward declarations static int enterBlockHook(MD_BLOCKTYPE type, void * detail, void * userdata); static int leaveBlockHook(MD_BLOCKTYPE type, void * detail, void * userdata); static int enterSpanHook(MD_SPANTYPE type, void * detail, void * userdata); static int leaveSpanHook(MD_SPANTYPE type, void * detail, void * userdata); static int textHook(MD_TEXTTYPE type, const MD_CHAR * text, MD_SIZE size, void * userdata); static void debugLogHook(const char * message, void * userdata); // Globals static MD_PARSER parser = { 0, // abi_version MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS, // flags enterBlockHook, leaveBlockHook, enterSpanHook, leaveSpanHook, textHook, debugLogHook, NULL // syntax }; static int debugIndentLevel = 0; static tBlockListItem * blockList = NULL; static uint16_t textStyleMask = STYLE_TEXT_PLAIN; static tEntity entities[] = { { " ", 0x9, 0x9 }, { " ", 0x13, 0x10 }, { "!", 0x21, 0x21 }, { """, 0x22, 0x22 }, { """, 0x22, 0x22 }, { "#", 0x23, 0x23 }, { "$", 0x24, 0x24 }, { "%", 0x25, 0x25 }, { "&", 0x26, 0x26 }, { "'", 0x27, 0x27 }, { "(", 0x28, 0x28 }, { ")", 0x29, 0x29 }, { "*", 0x2a, 0x2a }, { "*", 0x2a, 0x2a }, { "+", 0x2b, 0x2b }, { ",", 0x2c, 0x2c }, { ".", 0x2e, 0x2e }, { "/", 0x2f, 0x2f }, { ":", 0x3a, 0x3a }, { ";", 0x3b, 0x3b }, { "<", 0x3c, 0x3c }, { "<", 0x3c, 0x3c }, { "=", 0x3d, 0x3d }, { ">", 0x3e, 0x3e }, { ">", 0x3e, 0x3e }, { "?", 0x3f, 0x3f }, { "@", 0x40, 0x40 }, { "[", 0x5b, 0x5b }, { "[", 0x5b, 0x5b }, { "\", 0x5c, 0x5c }, { "]", 0x5d, 0x5d }, { "]", 0x5d, 0x5d }, { "^", 0x5e, 0x5e }, { "_", 0x5f, 0x5f }, { "`", 0x60, 0x60 }, { "`", 0x60, 0x60 }, { "{", 0x7b, 0x7b }, { "{", 0x7b, 0x7b }, { "|", 0x7c, 0x7c }, { "|", 0x7c, 0x7c }, { "|", 0x7c, 0x7c }, { "}", 0x7d, 0x7d }, { "}", 0x7d, 0x7d }, { " ", 0xca, 0xa0 }, { " ", 0xca, 0xa0 }, { "¡", 0xc1, 0xa1 }, { "¢", 0xa2, 0xa2 }, { "£", 0xa3, 0xa3 }, { "¤", 0xdb, 0xa4 }, { "¥", 0xb4, 0xa5 }, { "§", 0xa4, 0xa7 }, { "¨", 0xac, 0xa8 }, { "¨", 0xac, 0xa8 }, { "¨", 0xac, 0xa8 }, { "¨", 0xac, 0xa8 }, { "©", 0xa9, 0xa9 }, { "©", 0xa9, 0xa9 }, { "ª", 0xbb, 0xaa }, { "«", 0xc7, 0xab }, { "¬", 0xc2, 0xac }, { "®", 0xa8, 0xae }, { "&circleR;", 0xa8, 0xae }, { "®", 0xa8, 0xae }, { "¯", 0xf8, 0xaf }, { "‾", 0xf8, 0xaf }, { "¯", 0xf8, 0xaf }, { "°", 0xa1, 0xb0 }, { "±", 0xb1, 0xb1 }, { "±", 0xb1, 0xb1 }, { "±", 0xb1, 0xb1 }, { "´", 0xab, 0xb4 }, { "´", 0xab, 0xb4 }, { "µ", 0xb5, 0xb5 }, { "¶", 0xa6, 0xb6 }, { "·", 0xe1, 0xb7 }, { "·", 0xe1, 0xb7 }, { "·", 0xe1, 0xb7 }, { "¸", 0xfc, 0xb8 }, { "¸", 0xfc, 0xb8 }, { "º", 0xbc, 0xba }, { "»", 0xc8, 0xbb }, { "¿", 0xc0, 0xbf }, { "À", 0xcb, 0xc0 }, { "Á", 0xe7, 0xc1 }, { "Â", 0xe5, 0xc2 }, { "Ã", 0xcc, 0xc3 }, { "Ä", 0x80, 0xc4 }, { "Å", 0x81, 0xc5 }, { "Æ", 0xae, 0xc6 }, { "Ç", 0x82, 0xc7 }, { "È", 0xe9, 0xc8 }, { "É", 0x83, 0xc9 }, { "Ê", 0xe6, 0xca }, { "Ë", 0xe8, 0xcb }, { "Ì", 0xed, 0xcc }, { "Í", 0xea, 0xcd }, { "Î", 0xeb, 0xce }, { "Ï", 0xec, 0xcf }, { "Ñ", 0x84, 0xd1 }, { "Ò", 0xf1, 0xd2 }, { "Ó", 0xee, 0xd3 }, { "Ô", 0xef, 0xd4 }, { "Õ", 0xcd, 0xd5 }, { "Ö", 0x85, 0xd6 }, { "Ø", 0xaf, 0xd8 }, { "Ù", 0xf4, 0xd9 }, { "Ú", 0xf2, 0xda }, { "Û", 0xf3, 0xdb }, { "Ü", 0x86, 0xdc }, { "ß", 0xa7, 0xdf }, { "à", 0x88, 0xe0 }, { "á", 0x87, 0xe1 }, { "â", 0x89, 0xe2 }, { "ã", 0x8b, 0xe3 }, { "ä", 0x8a, 0xe4 }, { "å", 0x8c, 0xe5 }, { "æ", 0xbe, 0xe6 }, { "ç", 0x8d, 0xe7 }, { "è", 0x8f, 0xe8 }, { "é", 0x8e, 0xe9 }, { "ê", 0x90, 0xea }, { "ë", 0x91, 0xeb }, { "ì", 0x93, 0xec }, { "í", 0x92, 0xed }, { "î", 0x94, 0xee }, { "ï", 0x95, 0xef }, { "ñ", 0x96, 0xf1 }, { "ò", 0x98, 0xf2 }, { "ó", 0x97, 0xf3 }, { "ô", 0x99, 0xf4 }, { "õ", 0x9b, 0xf5 }, { "ö", 0x9a, 0xf6 }, { "÷", 0xd6, 0xf7 }, { "÷", 0xd6, 0xf7 }, { "ø", 0xbf, 0xf8 }, { "ù", 0x9d, 0xf9 }, { "ú", 0x9c, 0xfa }, { "û", 0x9e, 0xfb }, { "ü", 0x9f, 0xfc }, { "ÿ", 0xd8, 0xff }, { "†", 0xa0, 0x2020 }, { "•", 0xa5, 0x2022 }, { "•", 0xa5, 0x2022 }, { "™", 0xaa, 0x2122 }, { "™", 0xaa, 0x2122 }, { "≠", 0xad, 0x2260 }, { "≠", 0xad, 0x2260 }, { "∞", 0xb0, 0x221e }, { "≤", 0xb2, 0x2264 }, { "≤", 0xb2, 0x2264 }, { "&LessEqual;", 0xb2, 0x2264 }, { "≥", 0xb3, 0x2265 }, { "≥", 0xb3, 0x2265 }, { "≥", 0xb3, 0x2265 }, { "∂", 0xb6, 0x2202 }, { "∂", 0xb6, 0x2202 }, { "∑", 0xb7, 0x2211 }, { "∑", 0xb7, 0x2211 }, { "∏", 0xb8, 0x220f }, { "∏", 0xb8, 0x220f }, { "π", 0xb9, 0x3c0 }, { "∫", 0xba, 0x222b }, { "∫", 0xba, 0x222b }, { "Ω", 0xbd, 0x3a9 }, { "√", 0xc3, 0x221a }, { "√", 0xc3, 0x221a }, { "ƒ", 0xc4, 0x192 }, { "≈", 0xc5, 0x2248 }, { "≈", 0xc5, 0x2248 }, { "≈", 0xc5, 0x2248 }, { "≈", 0xc5, 0x2248 }, { "≈", 0xc5, 0x2248 }, { "≈", 0xc5, 0x2248 }, { "Δ", 0xc6, 0x394 }, { "…", 0xc9, 0x2026 }, { "…", 0xc9, 0x2026 }, { "Œ", 0xce, 0x152 }, { "œ", 0xcf, 0x153 }, { "–", 0xd0, 0x2013 }, { "—", 0xd1, 0x2014 }, { "“", 0xd2, 0x201c }, { "“", 0xd2, 0x201c }, { "”", 0xd3, 0x201d }, { "”", 0xd3, 0x201d }, { "”", 0xd3, 0x201d }, { "‘", 0xd4, 0x2018 }, { "‘", 0xd4, 0x2018 }, { "’", 0xd5, 0x2019 }, { "’", 0xd5, 0x2019 }, { "’", 0xd5, 0x2019 }, { "◊", 0xd7, 0x25ca }, { "◊", 0xd7, 0x25ca }, { "Ÿ", 0xd9, 0x178 }, { "⁄", 0xda, 0x2044 }, { "‹", 0xdc, 0x2039 }, { "›", 0xdd, 0x203a }, { "fi", 0xde, 0xfb01 }, { "fl", 0xdf, 0xfb02 }, { "‡", 0xe0, 0x2021 }, { "‡", 0xe0, 0x2021 }, { "‚", 0xe2, 0x201a }, { "‚", 0xe2, 0x201a }, { "„", 0xe3, 0x201e }, { "„", 0xe3, 0x201e }, { "‰", 0xe4, 0x2030 }, { "", 0xf0, 0xf8ff }, { "ı", 0xf5, 0x131 }, { "ı", 0xf5, 0x131 }, { "ˆ", 0xf6, 0x2c6 }, { "˜", 0xf7, 0x2dc }, { "˜", 0xf7, 0x2dc }, { "˘", 0xf9, 0x2d8 }, { "˘", 0xf9, 0x2d8 }, { "˙", 0xfa, 0x2d9 }, { "˙", 0xfa, 0x2d9 }, { "˚", 0xfb, 0x2da }, { "˝", 0xfd, 0x2dd }, { "˝", 0xfd, 0x2dd }, { "˛", 0xfe, 0x2db }, { "ˇ", 0xff, 0x2c7 }, { "ˇ", 0xff, 0x2c7 }, // GS_TODO - Test each of these entities. }; // Implementation static int enterBlockHook(MD_BLOCKTYPE type, void * detail, void * userdata) { static int isFirstNonDocumentBlock = 1; int shouldInsertCR = 1; uint16_t headerSize = 0; tBlockListItem * newBlock = malloc(sizeof(tBlockListItem)); if (newBlock == NULL) { fprintf(stderr, "%s: Out of memory", commandName); return 1; } newBlock->type = type; if (blockList == NULL) { newBlock->numTabs = 0; newBlock->styleType = STYLE_TYPE_TEXT; } else { newBlock->numTabs = blockList->numTabs; if (blockList->styleType == STYLE_TYPE_QUOTE) newBlock->styleType = STYLE_TYPE_QUOTE; newBlock->styleType = STYLE_TYPE_TEXT; } newBlock->next = blockList; blockList = newBlock; if ((detail != NULL) && (detail < lowestStackSeen)) lowestStackSeen = detail; switch (type) { case MD_BLOCK_DOC: if (debugEnabled) fprintf(stderr, "%*sDOC {\n", debugIndentLevel, ""); newBlock->styleType = STYLE_TYPE_TEXT; shouldInsertCR = 0; break; case MD_BLOCK_QUOTE: if (debugEnabled) fprintf(stderr, "%*sQUOTE {\n", debugIndentLevel, ""); newBlock->styleType = STYLE_TYPE_QUOTE; shouldInsertCR = 0; break; case MD_BLOCK_UL: { MD_BLOCK_UL_DETAIL * ulDetail = (MD_BLOCK_UL_DETAIL *)detail; if (debugEnabled) fprintf(stderr, "%*sUL (is_tight=%d, mark=%c) {\n", debugIndentLevel, "", ulDetail->is_tight, ulDetail->mark); memcpy(&(newBlock->u.ulDetail), ulDetail, sizeof(*ulDetail)); newBlock->numTabs++; break; } case MD_BLOCK_OL: { MD_BLOCK_OL_DETAIL * olDetail = (MD_BLOCK_OL_DETAIL *)detail; if (debugEnabled) fprintf(stderr, "%*sOL (start=%u, is_tight=%d, mark_delimiter=%c) {\n", debugIndentLevel, "", olDetail->start, olDetail->is_tight, olDetail->mark_delimiter); memcpy(&(newBlock->u.olDetail), olDetail, sizeof(*olDetail)); newBlock->numTabs++; break; } case MD_BLOCK_LI: { int i; tBlockListItem * enclosingBlock = newBlock->next; if (debugEnabled) fprintf(stderr, "%*sLI {\n", debugIndentLevel, ""); if (enclosingBlock == NULL) { fprintf(stderr, "%s: Got a list item block without an enclosing block\n", commandName); return 1; } if (enclosingBlock->type == MD_BLOCK_OL) { shouldInsertCR = !enclosingBlock->u.olDetail.is_tight; } else if (enclosingBlock->type == MD_BLOCK_UL) { shouldInsertCR = !enclosingBlock->u.ulDetail.is_tight; } break; } case MD_BLOCK_HR: if (debugEnabled) fprintf(stderr, "%*sHR {\n", debugIndentLevel, ""); break; case MD_BLOCK_H: { MD_BLOCK_H_DETAIL * hDetail = (MD_BLOCK_H_DETAIL *)detail; if (debugEnabled) fprintf(stderr, "%*sH (level=%u) {\n", debugIndentLevel, "", hDetail->level); memcpy(&(newBlock->u.hDetail), hDetail, sizeof(*hDetail)); setStyle(STYLE_TYPE_TEXT, textStyleMask, headerSize); if (!isFirstNonDocumentBlock) writeChar('\r'); headerSize = hDetail->level; shouldInsertCR = 0; newBlock->styleType = STYLE_TYPE_HEADER; break; } case MD_BLOCK_CODE: { MD_BLOCK_CODE_DETAIL * codeDetail = (MD_BLOCK_CODE_DETAIL *)detail; if (debugEnabled) { fprintf(stderr, "%*sCODE ", debugIndentLevel, ""); if (codeDetail->fence_char != '\0') { fprintf(stderr, "(fence_char=%c) ", codeDetail->fence_char); } fprintf(stderr, "{\n"); } memcpy(&(newBlock->u.codeDetail), codeDetail, sizeof(*codeDetail)); newBlock->styleType = STYLE_TYPE_CODE; break; } case MD_BLOCK_P: if (debugEnabled) fprintf(stderr, "%*sP {\n", debugIndentLevel, ""); break; default: fprintf(stderr, "%s: Invalid block type (%d)\n", commandName, (int)type); return 1; break; } setStyle(newBlock->styleType, textStyleMask, headerSize); if ((!isFirstNonDocumentBlock) && (shouldInsertCR)) writeChar('\r'); switch (type) { case MD_BLOCK_LI: { int i; tBlockListItem * enclosingBlock = newBlock->next; static char str[16]; for (i = 0; i < newBlock->numTabs; i++) writeChar('\t'); if (enclosingBlock->type == MD_BLOCK_OL) { sprintf(str, "%u%c ", enclosingBlock->u.olDetail.start, enclosingBlock->u.olDetail.mark_delimiter); enclosingBlock->u.olDetail.start++; } else { sprintf(str, "%c ", 0xa5); // 0xa5 is a bullet character } writeString(str, strlen(str)); break; } case MD_BLOCK_HR: { int i; for (i = 0; i < 30; i++) writeChar('_'); break; } case MD_BLOCK_DOC: case MD_BLOCK_QUOTE: case MD_BLOCK_UL: case MD_BLOCK_OL: case MD_BLOCK_H: case MD_BLOCK_CODE: case MD_BLOCK_P: break; default: fprintf(stderr, "%s: Invalid block type (%d)\n", commandName, (int)type); return 1; break; } if (type != MD_BLOCK_DOC) isFirstNonDocumentBlock = 0; debugIndentLevel+=2; return 0; } static int leaveBlockHook(MD_BLOCKTYPE type, void * detail, void * userdata) { tBlockListItem * oldBlock = blockList; if (oldBlock == NULL) { fprintf(stderr, "%s: Block list is empty but leaving block of type %d\n", commandName, (int)type); return 1; } if (oldBlock->type != type) { fprintf(stderr, "%s: Expected to leave block of type %d but got type %d\n", commandName, (int)oldBlock->type, (int)type); return 1; } blockList = oldBlock->next; free(oldBlock); if ((detail != NULL) && (detail < lowestStackSeen)) lowestStackSeen = detail; switch (type) { case MD_BLOCK_DOC: break; case MD_BLOCK_QUOTE: break; case MD_BLOCK_UL: writeChar('\r'); break; case MD_BLOCK_OL: writeChar('\r'); break; case MD_BLOCK_LI: writeChar('\r'); break; case MD_BLOCK_HR: writeChar('\r'); break; case MD_BLOCK_H: writeChar('\r'); break; case MD_BLOCK_CODE: writeChar('\r'); break; case MD_BLOCK_P: writeChar('\r'); break; default: fprintf(stderr, "%s: Invalid block type (%d)\n", commandName, (int)type); return 1; break; } if (blockList != NULL) setStyle(blockList->styleType, textStyleMask, 0); debugIndentLevel-=2; if (debugEnabled) fprintf(stderr, "%*s}\n", debugIndentLevel, ""); return 0; } static int enterSpanHook(MD_SPANTYPE type, void * detail, void * userdata) { if ((detail != NULL) && (detail < lowestStackSeen)) lowestStackSeen = detail; switch (type) { case MD_SPAN_EM: if (debugEnabled) fprintf(stderr, "%*sEM {\n", debugIndentLevel, ""); textStyleMask |= STYLE_TEXT_MASK_EMPHASIZED; setStyle(blockList->styleType, textStyleMask, blockList->u.hDetail.level); break; case MD_SPAN_STRONG: if (debugEnabled) fprintf(stderr, "%*sSTRONG {\n", debugIndentLevel, ""); textStyleMask |= STYLE_TEXT_MASK_STRONG; setStyle(blockList->styleType, textStyleMask, blockList->u.hDetail.level); break; case MD_SPAN_A: if (debugEnabled) fprintf(stderr, "%*sA {\n", debugIndentLevel, ""); break; case MD_SPAN_IMG: if (debugEnabled) fprintf(stderr, "%*sIMG {\n", debugIndentLevel, ""); break; case MD_SPAN_CODE: if (debugEnabled) fprintf(stderr, "%*sCODE {\n", debugIndentLevel, ""); setStyle(STYLE_TYPE_CODE, STYLE_TEXT_PLAIN, 0); break; default: fprintf(stderr, "%s: Invalid span type (%d)\n", commandName, (int)type); return 1; break; } debugIndentLevel+=2; return 0; } static int leaveSpanHook(MD_SPANTYPE type, void * detail, void * userdata) { if ((detail != NULL) && (detail < lowestStackSeen)) lowestStackSeen = detail; switch (type) { case MD_SPAN_EM: textStyleMask &= ~STYLE_TEXT_MASK_EMPHASIZED; setStyle(blockList->styleType, textStyleMask, blockList->u.hDetail.level); break; case MD_SPAN_STRONG: textStyleMask &= ~STYLE_TEXT_MASK_STRONG; setStyle(blockList->styleType, textStyleMask, blockList->u.hDetail.level); break; case MD_SPAN_A: break; case MD_SPAN_IMG: break; case MD_SPAN_CODE: setStyle(blockList->styleType, textStyleMask, blockList->u.hDetail.level); break; default: fprintf(stderr, "%s: Invalid span type (%d)\n", commandName, (int)type); return 1; break; } debugIndentLevel-=2; if (debugEnabled) fprintf(stderr, "%*s}\n", debugIndentLevel, ""); return 0; } static void printEntity(const MD_CHAR * text, MD_SIZE size) { int entityNum; uint32_t unicodeChar = 0; if (size < 4) return; if (text[0] != '&') return; if (text[size - 1] != ';') return; if (text[1] == '#') { char * end; unicodeChar = strtoul(text + 2, &end, 10); if (end != text + size - 1) unicodeChar = 0; if ((unicodeChar > 0) && (unicodeChar < 128)) { writeChar(unicodeChar); return; } } if (text[1] == 'x') { char * end; unicodeChar = strtoul(text + 2, &end, 16); if (end != text + size - 1) unicodeChar = 0; if ((unicodeChar > 0) && (unicodeChar < 128)) { writeChar(unicodeChar); return; } } for (entityNum = 0; entityNum < (sizeof(entities) / sizeof(entities[0])); entityNum++) { if ((unicodeChar == entities[entityNum].unicodeChar) || (strncmp(entities[entityNum].entityString, text, size) == 0)) { writeChar(entities[entityNum].entityChar); return; } } } static int textHook(MD_TEXTTYPE type, const MD_CHAR * text, MD_SIZE size, void * userdata) { switch (type) { case MD_TEXT_NORMAL: if (debugEnabled) fprintf(stderr, "%*sText: \"", debugIndentLevel, ""); break; case MD_TEXT_NULLCHAR: fprintf(stderr, "%s: Null character encountered on input\n", commandName); return 1; case MD_TEXT_BR: if (debugEnabled) fprintf(stderr, "%*sBR\n", debugIndentLevel, ""); writeChar('\n'); return 0; case MD_TEXT_SOFTBR: if (debugEnabled) fprintf(stderr, "%*sSOFT BR\n", debugIndentLevel, ""); return 0; case MD_TEXT_ENTITY: if (debugEnabled) { fprintf(stderr, "%*sEntity: \"", debugIndentLevel, ""); fwrite(text, sizeof(MD_CHAR), size, stderr); } printEntity(text, size); text = ""; size = 0; break; case MD_TEXT_CODE: if (debugEnabled) fprintf(stderr, "%*sCode: \"", debugIndentLevel, ""); break; default: fprintf(stderr, "%s: Invalid text type (%d)\n", commandName, (int)type); return 1; break; } if (debugEnabled) { fwrite(text, sizeof(MD_CHAR), size, stderr); fprintf(stderr, "\"\n"); } if (size > 0) writeString(text, size); return 0; } static void debugLogHook(const char * message, void * userdata) { if (debugEnabled) fprintf(stderr, "DEBUG: %s\n", message); } int parse(const MD_CHAR* text, MD_SIZE size) { int result; if (styleInit() != 0) return 1; result = md_parse(text, size, &parser, NULL); closeStyle(); return result; }