From 2c6c191af857fb48faaced47b63fb2de71993e5c Mon Sep 17 00:00:00 2001 From: Jeremy Rand Date: Sat, 24 Apr 2021 16:18:30 -0400 Subject: [PATCH] Add this missing file. --- md2teach/translate.c | 724 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 724 insertions(+) create mode 100644 md2teach/translate.c diff --git a/md2teach/translate.c b/md2teach/translate.c new file mode 100644 index 0000000..206d8b9 --- /dev/null +++ b/md2teach/translate.c @@ -0,0 +1,724 @@ +/* + * translate.c + * md2teach + * + * Created by Jeremy Rand on 2021-04-24. + * + */ + +#include +#include + +#include "translate.h" +#include "io.h" +#include "main.h" + + +// Typedefs + +typedef struct tEntity +{ + const char * entityString; + char entityChar; + uint32_t unicodeChar; +} tEntity; + +typedef struct tBlockListItem +{ + MD_BLOCKTYPE type; + union { + MD_BLOCK_UL_DETAIL ulDetail; + MD_BLOCK_OL_DETAIL olDetail; + MD_BLOCK_H_DETAIL hDetail; + MD_BLOCK_CODE_DETAIL codeDetail; + } u; + int numTabs; + + struct tBlockListItem * next; +} tBlockListItem; + + +// Forward declarations + +static int enterBlockHook(MD_BLOCKTYPE type, void * detail, void * userdata); +static int leaveBlockHook(MD_BLOCKTYPE type, void * detail, void * userdata); +static int enterSpanHook(MD_SPANTYPE type, void * detail, void * userdata); +static int leaveSpanHook(MD_SPANTYPE type, void * detail, void * userdata); +static int textHook(MD_TEXTTYPE type, const MD_CHAR * text, MD_SIZE size, void * userdata); +static void debugLogHook(const char * message, void * userdata); + + +// Globals + +static MD_PARSER parser = { + 0, // abi_version + MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS, // flags + enterBlockHook, + leaveBlockHook, + enterSpanHook, + leaveSpanHook, + textHook, + debugLogHook, + NULL // syntax +}; + +static int debugIndentLevel = 0; + +static tBlockListItem * blockList = NULL; + +static tEntity entities[] = { + { " ", 0x9, 0x9 }, + { " ", 0x13, 0x10 }, + { "!", 0x21, 0x21 }, + { """, 0x22, 0x22 }, + { """, 0x22, 0x22 }, + { "#", 0x23, 0x23 }, + { "$", 0x24, 0x24 }, + { "%", 0x25, 0x25 }, + { "&", 0x26, 0x26 }, + { "'", 0x27, 0x27 }, + { "(", 0x28, 0x28 }, + { ")", 0x29, 0x29 }, + { "*", 0x2a, 0x2a }, + { "*", 0x2a, 0x2a }, + { "+", 0x2b, 0x2b }, + { ",", 0x2c, 0x2c }, + { ".", 0x2e, 0x2e }, + { "/", 0x2f, 0x2f }, + { ":", 0x3a, 0x3a }, + { ";", 0x3b, 0x3b }, + { "<", 0x3c, 0x3c }, + { "<", 0x3c, 0x3c }, + { "=", 0x3d, 0x3d }, + { ">", 0x3e, 0x3e }, + { ">", 0x3e, 0x3e }, + { "?", 0x3f, 0x3f }, + { "@", 0x40, 0x40 }, + { "[", 0x5b, 0x5b }, + { "[", 0x5b, 0x5b }, + { "\", 0x5c, 0x5c }, + { "]", 0x5d, 0x5d }, + { "]", 0x5d, 0x5d }, + { "^", 0x5e, 0x5e }, + { "_", 0x5f, 0x5f }, + { "`", 0x60, 0x60 }, + { "`", 0x60, 0x60 }, + { "{", 0x7b, 0x7b }, + { "{", 0x7b, 0x7b }, + { "|", 0x7c, 0x7c }, + { "|", 0x7c, 0x7c }, + { "|", 0x7c, 0x7c }, + { "}", 0x7d, 0x7d }, + { "}", 0x7d, 0x7d }, + { " ", 0xca, 0xa0 }, + { " ", 0xca, 0xa0 }, + { "¡", 0xc1, 0xa1 }, + { "¢", 0xa2, 0xa2 }, + { "£", 0xa3, 0xa3 }, + { "¤", 0xdb, 0xa4 }, + { "¥", 0xb4, 0xa5 }, + { "§", 0xa4, 0xa7 }, + { "¨", 0xac, 0xa8 }, + { "¨", 0xac, 0xa8 }, + { "¨", 0xac, 0xa8 }, + { "¨", 0xac, 0xa8 }, + { "©", 0xa9, 0xa9 }, + { "©", 0xa9, 0xa9 }, + { "ª", 0xbb, 0xaa }, + { "«", 0xc7, 0xab }, + { "¬", 0xc2, 0xac }, + { "®", 0xa8, 0xae }, + { "&circleR;", 0xa8, 0xae }, + { "®", 0xa8, 0xae }, + { "¯", 0xf8, 0xaf }, + { "‾", 0xf8, 0xaf }, + { "¯", 0xf8, 0xaf }, + { "°", 0xa1, 0xb0 }, + { "±", 0xb1, 0xb1 }, + { "±", 0xb1, 0xb1 }, + { "±", 0xb1, 0xb1 }, + { "´", 0xab, 0xb4 }, + { "´", 0xab, 0xb4 }, + { "µ", 0xb5, 0xb5 }, + { "¶", 0xa6, 0xb6 }, + { "·", 0xe1, 0xb7 }, + { "·", 0xe1, 0xb7 }, + { "·", 0xe1, 0xb7 }, + { "¸", 0xfc, 0xb8 }, + { "¸", 0xfc, 0xb8 }, + { "º", 0xbc, 0xba }, + { "»", 0xc8, 0xbb }, + { "¿", 0xc0, 0xbf }, + { "À", 0xcb, 0xc0 }, + { "Á", 0xe7, 0xc1 }, + { "Â", 0xe5, 0xc2 }, + { "Ã", 0xcc, 0xc3 }, + { "Ä", 0x80, 0xc4 }, + { "Å", 0x81, 0xc5 }, + { "Æ", 0xae, 0xc6 }, + { "Ç", 0x82, 0xc7 }, + { "È", 0xe9, 0xc8 }, + { "É", 0x83, 0xc9 }, + { "Ê", 0xe6, 0xca }, + { "Ë", 0xe8, 0xcb }, + { "Ì", 0xed, 0xcc }, + { "Í", 0xea, 0xcd }, + { "Î", 0xeb, 0xce }, + { "Ï", 0xec, 0xcf }, + { "Ñ", 0x84, 0xd1 }, + { "Ò", 0xf1, 0xd2 }, + { "Ó", 0xee, 0xd3 }, + { "Ô", 0xef, 0xd4 }, + { "Õ", 0xcd, 0xd5 }, + { "Ö", 0x85, 0xd6 }, + { "Ø", 0xaf, 0xd8 }, + { "Ù", 0xf4, 0xd9 }, + { "Ú", 0xf2, 0xda }, + { "Û", 0xf3, 0xdb }, + { "Ü", 0x86, 0xdc }, + { "ß", 0xa7, 0xdf }, + { "à", 0x88, 0xe0 }, + { "á", 0x87, 0xe1 }, + { "â", 0x89, 0xe2 }, + { "ã", 0x8b, 0xe3 }, + { "ä", 0x8a, 0xe4 }, + { "å", 0x8c, 0xe5 }, + { "æ", 0xbe, 0xe6 }, + { "ç", 0x8d, 0xe7 }, + { "è", 0x8f, 0xe8 }, + { "é", 0x8e, 0xe9 }, + { "ê", 0x90, 0xea }, + { "ë", 0x91, 0xeb }, + { "ì", 0x93, 0xec }, + { "í", 0x92, 0xed }, + { "î", 0x94, 0xee }, + { "ï", 0x95, 0xef }, + { "ñ", 0x96, 0xf1 }, + { "ò", 0x98, 0xf2 }, + { "ó", 0x97, 0xf3 }, + { "ô", 0x99, 0xf4 }, + { "õ", 0x9b, 0xf5 }, + { "ö", 0x9a, 0xf6 }, + { "÷", 0xd6, 0xf7 }, + { "÷", 0xd6, 0xf7 }, + { "ø", 0xbf, 0xf8 }, + { "ù", 0x9d, 0xf9 }, + { "ú", 0x9c, 0xfa }, + { "û", 0x9e, 0xfb }, + { "ü", 0x9f, 0xfc }, + { "ÿ", 0xd8, 0xff }, + { "†", 0xa0, 0x2020 }, + { "•", 0xa5, 0x2022 }, + { "•", 0xa5, 0x2022 }, + { "™", 0xaa, 0x2122 }, + { "™", 0xaa, 0x2122 }, + { "≠", 0xad, 0x2260 }, + { "≠", 0xad, 0x2260 }, + { "∞", 0xb0, 0x221e }, + { "≤", 0xb2, 0x2264 }, + { "≤", 0xb2, 0x2264 }, + { "&LessEqual;", 0xb2, 0x2264 }, + { "≥", 0xb3, 0x2265 }, + { "≥", 0xb3, 0x2265 }, + { "≥", 0xb3, 0x2265 }, + { "∂", 0xb6, 0x2202 }, + { "∂", 0xb6, 0x2202 }, + { "∑", 0xb7, 0x2211 }, + { "∑", 0xb7, 0x2211 }, + { "∏", 0xb8, 0x220f }, + { "∏", 0xb8, 0x220f }, + { "π", 0xb9, 0x3c0 }, + { "∫", 0xba, 0x222b }, + { "∫", 0xba, 0x222b }, + { "Ω", 0xbd, 0x3a9 }, + { "√", 0xc3, 0x221a }, + { "√", 0xc3, 0x221a }, + { "ƒ", 0xc4, 0x192 }, + { "≈", 0xc5, 0x2248 }, + { "≈", 0xc5, 0x2248 }, + { "≈", 0xc5, 0x2248 }, + { "≈", 0xc5, 0x2248 }, + { "≈", 0xc5, 0x2248 }, + { "≈", 0xc5, 0x2248 }, + { "Δ", 0xc6, 0x394 }, + { "…", 0xc9, 0x2026 }, + { "…", 0xc9, 0x2026 }, + { "Œ", 0xce, 0x152 }, + { "œ", 0xcf, 0x153 }, + { "–", 0xd0, 0x2013 }, + { "—", 0xd1, 0x2014 }, + { "“", 0xd2, 0x201c }, + { "“", 0xd2, 0x201c }, + { "”", 0xd3, 0x201d }, + { "”", 0xd3, 0x201d }, + { "”", 0xd3, 0x201d }, + { "‘", 0xd4, 0x2018 }, + { "‘", 0xd4, 0x2018 }, + { "’", 0xd5, 0x2019 }, + { "’", 0xd5, 0x2019 }, + { "’", 0xd5, 0x2019 }, + { "◊", 0xd7, 0x25ca }, + { "◊", 0xd7, 0x25ca }, + { "Ÿ", 0xd9, 0x178 }, + { "⁄", 0xda, 0x2044 }, + { "‹", 0xdc, 0x2039 }, + { "›", 0xdd, 0x203a }, + { "fi", 0xde, 0xfb01 }, + { "fl", 0xdf, 0xfb02 }, + { "‡", 0xe0, 0x2021 }, + { "‡", 0xe0, 0x2021 }, + { "‚", 0xe2, 0x201a }, + { "‚", 0xe2, 0x201a }, + { "„", 0xe3, 0x201e }, + { "„", 0xe3, 0x201e }, + { "‰", 0xe4, 0x2030 }, + { "", 0xf0, 0xf8ff }, + { "ı", 0xf5, 0x131 }, + { "ı", 0xf5, 0x131 }, + { "ˆ", 0xf6, 0x2c6 }, + { "˜", 0xf7, 0x2dc }, + { "˜", 0xf7, 0x2dc }, + { "˘", 0xf9, 0x2d8 }, + { "˘", 0xf9, 0x2d8 }, + { "˙", 0xfa, 0x2d9 }, + { "˙", 0xfa, 0x2d9 }, + { "˚", 0xfb, 0x2da }, + { "˝", 0xfd, 0x2dd }, + { "˝", 0xfd, 0x2dd }, + { "˛", 0xfe, 0x2db }, + { "ˇ", 0xff, 0x2c7 }, + { "ˇ", 0xff, 0x2c7 }, + + // GS_TODO - Test each of these entities. +}; + +// Implementation + +static int enterBlockHook(MD_BLOCKTYPE type, void * detail, void * userdata) +{ + static int isFirstNonDocumentBlock = 1; + tBlockListItem * newBlock = malloc(sizeof(tBlockListItem)); + + if (newBlock == NULL) { + fprintf(stderr, "%s: Out of memory", commandName); + return 1; + } + + newBlock->type = type; + if (blockList == NULL) + newBlock->numTabs = 0; + else + newBlock->numTabs = blockList->numTabs; + newBlock->next = blockList; + blockList = newBlock; + + if ((detail != NULL) && + (detail < lowestStackSeen)) + lowestStackSeen = detail; + + switch (type) { + case MD_BLOCK_DOC: + if (debugEnabled) + fprintf(stderr, "%*sDOC {\n", debugIndentLevel, ""); + break; + + case MD_BLOCK_QUOTE: + if (debugEnabled) + fprintf(stderr, "%*sQUOTE {\n", debugIndentLevel, ""); + + break; + + case MD_BLOCK_UL: { + MD_BLOCK_UL_DETAIL * ulDetail = (MD_BLOCK_UL_DETAIL *)detail; + if (debugEnabled) + fprintf(stderr, "%*sUL (is_tight=%d, mark=%c) {\n", debugIndentLevel, "", ulDetail->is_tight, ulDetail->mark); + + memcpy(&(newBlock->u.ulDetail), ulDetail, sizeof(*ulDetail)); + newBlock->numTabs++; + + if (!isFirstNonDocumentBlock) + writeChar('\r'); + break; + } + + case MD_BLOCK_OL: { + MD_BLOCK_OL_DETAIL * olDetail = (MD_BLOCK_OL_DETAIL *)detail; + if (debugEnabled) + fprintf(stderr, "%*sOL (start=%u, is_tight=%d, mark_delimiter=%c) {\n", debugIndentLevel, "", olDetail->start, olDetail->is_tight, olDetail->mark_delimiter); + + memcpy(&(newBlock->u.olDetail), olDetail, sizeof(*olDetail)); + newBlock->numTabs++; + + if (!isFirstNonDocumentBlock) + writeChar('\r'); + break; + } + + case MD_BLOCK_LI: { + int i; + tBlockListItem * enclosingBlock = newBlock->next; + int isNumbered = 0; + static char str[16]; + + if (debugEnabled) + fprintf(stderr, "%*sLI {\n", debugIndentLevel, ""); + + if (enclosingBlock == NULL) { + fprintf(stderr, "%s: Got a list item block without an enclosing block\n", commandName); + return 1; + } + + if (enclosingBlock->type == MD_BLOCK_OL) { + isNumbered = 1; + if ((!enclosingBlock->u.olDetail.is_tight) && + (!isFirstNonDocumentBlock)) + writeChar('\r'); + } else if (enclosingBlock->type == MD_BLOCK_UL) { + if ((!enclosingBlock->u.ulDetail.is_tight) && + (!isFirstNonDocumentBlock)) + writeChar('\r'); + } + + for (i = 0; i < newBlock->numTabs; i++) + writeChar('\t'); + + if (isNumbered) { + sprintf(str, "%u%c ", enclosingBlock->u.olDetail.start, enclosingBlock->u.olDetail.mark_delimiter); + enclosingBlock->u.olDetail.start++; + } else { + sprintf(str, "%c ", 0xa5); // 0xa5 is a bullet character + } + writeString(str, strlen(str)); + + break; + } + + case MD_BLOCK_HR: { + int i; + + if (debugEnabled) + fprintf(stderr, "%*sHR {\n", debugIndentLevel, ""); + + if (!isFirstNonDocumentBlock) + writeChar('\r'); + + for (i = 0; i < 30; i++) + writeChar('_'); + break; + } + + case MD_BLOCK_H: { + MD_BLOCK_H_DETAIL * hDetail = (MD_BLOCK_H_DETAIL *)detail; + if (debugEnabled) + fprintf(stderr, "%*sH (level=%u) {\n", debugIndentLevel, "", hDetail->level); + + memcpy(&(newBlock->u.hDetail), hDetail, sizeof(*hDetail)); + + if (!isFirstNonDocumentBlock) + writeChar('\r'); + break; + } + + case MD_BLOCK_CODE: { + MD_BLOCK_CODE_DETAIL * codeDetail = (MD_BLOCK_CODE_DETAIL *)detail; + if (debugEnabled) { + fprintf(stderr, "%*sCODE ", debugIndentLevel, ""); + if (codeDetail->fence_char != '\0') { + fprintf(stderr, "(fence_char=%c) ", codeDetail->fence_char); + } + fprintf(stderr, "{\n"); + } + + memcpy(&(newBlock->u.codeDetail), codeDetail, sizeof(*codeDetail)); + + if (!isFirstNonDocumentBlock) + writeChar('\r'); + break; + } + + case MD_BLOCK_P: + if (debugEnabled) + fprintf(stderr, "%*sP {\n", debugIndentLevel, ""); + + if (!isFirstNonDocumentBlock) + writeChar('\r'); + break; + + default: + fprintf(stderr, "%s: Invalid block type (%d)\n", commandName, (int)type); + return 1; + break; + } + + + if (type != MD_BLOCK_DOC) + isFirstNonDocumentBlock = 0; + + debugIndentLevel+=2; + return 0; +} + + +static int leaveBlockHook(MD_BLOCKTYPE type, void * detail, void * userdata) +{ + tBlockListItem * oldBlock = blockList; + + if (oldBlock == NULL) { + fprintf(stderr, "%s: Block list is empty but leaving block of type %d\n", commandName, (int)type); + return 1; + } + + if (oldBlock->type != type) { + fprintf(stderr, "%s: Expected to leave block of type %d but got type %d\n", commandName, (int)oldBlock->type, (int)type); + return 1; + } + + blockList = oldBlock->next; + free(oldBlock); + + if ((detail != NULL) && + (detail < lowestStackSeen)) + lowestStackSeen = detail; + + switch (type) { + case MD_BLOCK_DOC: + break; + + case MD_BLOCK_QUOTE: + break; + + case MD_BLOCK_UL: + writeChar('\r'); + break; + + case MD_BLOCK_OL: + writeChar('\r'); + break; + + case MD_BLOCK_LI: + writeChar('\r'); + break; + + case MD_BLOCK_HR: + writeChar('\r'); + break; + + case MD_BLOCK_H: + writeChar('\r'); + break; + + case MD_BLOCK_CODE: + writeChar('\r'); + break; + + case MD_BLOCK_P: + writeChar('\r'); + break; + + default: + fprintf(stderr, "%s: Invalid block type (%d)\n", commandName, (int)type); + return 1; + break; + } + + debugIndentLevel-=2; + if (debugEnabled) + fprintf(stderr, "%*s}\n", debugIndentLevel, ""); + + return 0; +} + + +static int enterSpanHook(MD_SPANTYPE type, void * detail, void * userdata) +{ + if ((detail != NULL) && + (detail < lowestStackSeen)) + lowestStackSeen = detail; + + switch (type) { + case MD_SPAN_EM: + if (debugEnabled) + fprintf(stderr, "%*sEM {\n", debugIndentLevel, ""); + break; + + case MD_SPAN_STRONG: + if (debugEnabled) + fprintf(stderr, "%*sSTRONG {\n", debugIndentLevel, ""); + break; + + case MD_SPAN_A: + if (debugEnabled) + fprintf(stderr, "%*sA {\n", debugIndentLevel, ""); + break; + + case MD_SPAN_IMG: + if (debugEnabled) + fprintf(stderr, "%*sIMG {\n", debugIndentLevel, ""); + break; + + case MD_SPAN_CODE: + if (debugEnabled) + fprintf(stderr, "%*sCODE {\n", debugIndentLevel, ""); + break; + + default: + fprintf(stderr, "%s: Invalid span type (%d)\n", commandName, (int)type); + return 1; + break; + } + + debugIndentLevel+=2; + return 0; +} + + +static int leaveSpanHook(MD_SPANTYPE type, void * detail, void * userdata) +{ + if ((detail != NULL) && + (detail < lowestStackSeen)) + lowestStackSeen = detail; + + switch (type) { + case MD_SPAN_EM: + break; + + case MD_SPAN_STRONG: + break; + + case MD_SPAN_A: + break; + + case MD_SPAN_IMG: + break; + + case MD_SPAN_CODE: + break; + + default: + fprintf(stderr, "%s: Invalid span type (%d)\n", commandName, (int)type); + return 1; + break; + } + + debugIndentLevel-=2; + if (debugEnabled) + fprintf(stderr, "%*s}\n", debugIndentLevel, ""); + + return 0; +} + +static void printEntity(const MD_CHAR * text, MD_SIZE size) +{ + int entityNum; + uint32_t unicodeChar = 0; + + if (size < 4) + return; + + if (text[0] != '&') + return; + + if (text[size - 1] != ';') + return; + + if (text[1] == '#') { + char * end; + unicodeChar = strtoul(text + 2, &end, 10); + if (end != text + size - 1) + unicodeChar = 0; + if ((unicodeChar > 0) && + (unicodeChar < 128)) { + writeChar(unicodeChar); + return; + } + } + + if (text[1] == 'x') { + char * end; + unicodeChar = strtoul(text + 2, &end, 16); + if (end != text + size - 1) + unicodeChar = 0; + if ((unicodeChar > 0) && + (unicodeChar < 128)) { + writeChar(unicodeChar); + return; + } + } + + for (entityNum = 0; entityNum < (sizeof(entities) / sizeof(entities[0])); entityNum++) { + if ((unicodeChar == entities[entityNum].unicodeChar) || + (strncmp(entities[entityNum].entityString, text, size) == 0)) { + writeChar(entities[entityNum].entityChar); + return; + } + } +} + +static int textHook(MD_TEXTTYPE type, const MD_CHAR * text, MD_SIZE size, void * userdata) +{ + switch (type) { + case MD_TEXT_NORMAL: + if (debugEnabled) + fprintf(stderr, "%*sText: \"", debugIndentLevel, ""); + break; + + case MD_TEXT_NULLCHAR: + fprintf(stderr, "%s: Null character encountered on input\n", commandName); + return 1; + + case MD_TEXT_BR: + if (debugEnabled) + fprintf(stderr, "%*sBR\n", debugIndentLevel, ""); + putchar('\n'); + return 0; + + case MD_TEXT_SOFTBR: + if (debugEnabled) + fprintf(stderr, "%*sSOFT BR\n", debugIndentLevel, ""); + return 0; + + case MD_TEXT_ENTITY: + if (debugEnabled) { + fprintf(stderr, "%*sEntity: \"", debugIndentLevel, ""); + fwrite(text, sizeof(MD_CHAR), size, stderr); + } + + printEntity(text, size); + text = ""; + size = 0; + break; + + case MD_TEXT_CODE: + if (debugEnabled) + fprintf(stderr, "%*sCode: \"", debugIndentLevel, ""); + break; + + default: + fprintf(stderr, "%s: Invalid text type (%d)\n", commandName, (int)type); + return 1; + break; + } + + if (debugEnabled) { + fwrite(text, sizeof(MD_CHAR), size, stderr); + fprintf(stderr, "\"\n"); + } + + if (size > 0) + writeString(text, size); + + return 0; +} + + +static void debugLogHook(const char * message, void * userdata) +{ + if (debugEnabled) + fprintf(stderr, "DEBUG: %s\n", message); +} + + +int parse(const MD_CHAR* text, MD_SIZE size) +{ + return md_parse(text, size, &parser, NULL); +}