diff --git a/src/main/scala/millfork/Confusables.scala b/src/main/scala/millfork/Confusables.scala new file mode 100644 index 00000000..ac3ff634 --- /dev/null +++ b/src/main/scala/millfork/Confusables.scala @@ -0,0 +1,800 @@ +package millfork + +/** + * Based on https://www.unicode.org/Public/security/13.0.0/confusables.txt + * @author Karol Stasiak + */ +object Confusables { + + lazy val map = Map( + '\u0060' -> "\u0027", + '\u00A0' -> "\u0020", + '\u00B4' -> "\u0027", + '\u00B8' -> "\u002C", + '\u00C6' -> "\u0041\u0045", + '\u00D7' -> "\u0078", + '\u00E6' -> "\u0061\u0065", + '\u0131' -> "\u0069", + '\u0132' -> "\u006C\u004A", + '\u0133' -> "\u0069\u006A", + '\u0149' -> "\u0027\u006E", + '\u0152' -> "\u004F\u0045", + '\u0153' -> "\u006F\u0065", + '\u017F' -> "\u0066", + '\u0181' -> "\u0027\u0042", + '\u0184' -> "\u0062", + '\u0187' -> "\u0043\u0027", + '\u018A' -> "\u0027\u0044", + '\u018D' -> "\u0067", + '\u0193' -> "\u0047\u0027", + '\u0196' -> "\u006C", + '\u0198' -> "\u004B\u0027", + '\u01A0' -> "\u004F\u0027", + '\u01A1' -> "\u006F\u0027", + '\u01A4' -> "\u0027\u0050", + '\u01A6' -> "\u0052", + '\u01A7' -> "\u0032", + '\u01AC' -> "\u0027\u0054", + '\u01B3' -> "\u0027\u0059", + '\u01B7' -> "\u0033", + '\u01BC' -> "\u0035", + '\u01BD' -> "\u0073", + '\u01C0' -> "\u006C", + '\u01C1' -> "\u006C\u006C", + '\u01C3' -> "\u0021", + '\u01C7' -> "\u004C\u004A", + '\u01C8' -> "\u004C\u006A", + '\u01C9' -> "\u006C\u006A", + '\u01CA' -> "\u004E\u004A", + '\u01CB' -> "\u004E\u006A", + '\u01CC' -> "\u006E\u006A", + '\u01F1' -> "\u0044\u005A", + '\u01F2' -> "\u0044\u007A", + '\u01F3' -> "\u0064\u007A", + '\u021C' -> "\u0033", + '\u0222' -> "\u0038", + '\u0223' -> "\u0038", + '\u0241' -> "\u003F", + '\u0251' -> "\u0061", + '\u0261' -> "\u0067", + '\u0263' -> "\u0079", + '\u0269' -> "\u0069", + '\u026A' -> "\u0069", + '\u026F' -> "\u0077", + '\u028B' -> "\u0075", + '\u028F' -> "\u0079", + '\u0294' -> "\u003F", + '\u02A3' -> "\u0064\u007A", + '\u02A6' -> "\u0074\u0073", + '\u02AA' -> "\u006C\u0073", + '\u02AB' -> "\u006C\u007A", + '\u02B9' -> "\u0027", + '\u02BA' -> "\"", + '\u02BB' -> "\u0027", + '\u02BC' -> "\u0027", + '\u02BD' -> "\u0027", + '\u02BE' -> "\u0027", + '\u02C2' -> "\u003C", + '\u02C3' -> "\u003E", + '\u02C4' -> "\u005E", + '\u02C6' -> "\u005E", + '\u02C8' -> "\u0027", + '\u02CA' -> "\u0027", + '\u02CB' -> "\u0027", + '\u02D0' -> "\u003A", + '\u02D7' -> "\u002D", + '\u02DB' -> "\u0069", + '\u02DC' -> "\u007E", + '\u02DD' -> "\"", + '\u02EE' -> "\"", + '\u02F4' -> "\u0027", + '\u02F6' -> "\"", + '\u02F8' -> "\u003A", + '\u0374' -> "\u0027", + '\u037A' -> "\u0069", + '\u037E' -> "\u003B", + '\u037F' -> "\u004A", + '\u0384' -> "\u0027", + '\u0391' -> "\u0041", + '\u0392' -> "\u0042", + '\u0395' -> "\u0045", + '\u0396' -> "\u005A", + '\u0397' -> "\u0048", + '\u0399' -> "\u006C", + '\u039A' -> "\u004B", + '\u039C' -> "\u004D", + '\u039D' -> "\u004E", + '\u039F' -> "\u004F", + '\u03A1' -> "\u0050", + '\u03A4' -> "\u0054", + '\u03A5' -> "\u0059", + '\u03A7' -> "\u0058", + '\u03B1' -> "\u0061", + '\u03B3' -> "\u0079", + '\u03B9' -> "\u0069", + '\u03BD' -> "\u0076", + '\u03BF' -> "\u006F", + '\u03C1' -> "\u0070", + '\u03C3' -> "\u006F", + '\u03C5' -> "\u0075", + '\u03D2' -> "\u0059", + '\u03DC' -> "\u0046", + '\u03E8' -> "\u0032", + '\u03F1' -> "\u0070", + '\u03F2' -> "\u0063", + '\u03F3' -> "\u006A", + '\u03F9' -> "\u0043", + '\u03FA' -> "\u004D", + '\u0405' -> "\u0053", + '\u0406' -> "\u006C", + '\u0408' -> "\u004A", + '\u0410' -> "\u0041", + '\u0412' -> "\u0042", + '\u0415' -> "\u0045", + '\u0417' -> "\u0033", + '\u041A' -> "\u004B", + '\u041C' -> "\u004D", + '\u041D' -> "\u0048", + '\u041E' -> "\u004F", + '\u0420' -> "\u0050", + '\u0421' -> "\u0043", + '\u0422' -> "\u0054", + '\u0423' -> "\u0059", + '\u0425' -> "\u0058", + '\u042B' -> "\u0062\u006C", + '\u042C' -> "\u0062", + '\u042E' -> "\u006C\u004F", + '\u0430' -> "\u0061", + '\u0431' -> "\u0036", + '\u0433' -> "\u0072", + '\u0435' -> "\u0065", + '\u043E' -> "\u006F", + '\u0440' -> "\u0070", + '\u0441' -> "\u0063", + '\u0443' -> "\u0079", + '\u0445' -> "\u0078", + '\u0455' -> "\u0073", + '\u0456' -> "\u0069", + '\u0458' -> "\u006A", + '\u0461' -> "\u0077", + '\u0474' -> "\u0056", + '\u0475' -> "\u0076", + '\u0491' -> "\u0072\u0027", + '\u04AE' -> "\u0059", + '\u04AF' -> "\u0079", + '\u04BB' -> "\u0068", + '\u04BD' -> "\u0065", + '\u04C0' -> "\u006C", + '\u04CF' -> "\u0069", + '\u04D4' -> "\u0041\u0045", + '\u04D5' -> "\u0061\u0065", + '\u04E0' -> "\u0033", + '\u0501' -> "\u0064", + '\u050C' -> "\u0047", + '\u051B' -> "\u0071", + '\u051C' -> "\u0057", + '\u051D' -> "\u0077", + '\u054D' -> "\u0055", + '\u054F' -> "\u0053", + '\u0555' -> "\u004F", + '\u055A' -> "\u0027", + '\u055D' -> "\u0027", + '\u0561' -> "\u0077", + '\u0563' -> "\u0071", + '\u0566' -> "\u0071", + '\u0570' -> "\u0068", + '\u0578' -> "\u006E", + '\u057C' -> "\u006E", + '\u057D' -> "\u0075", + '\u0581' -> "\u0067", + '\u0584' -> "\u0066", + '\u0585' -> "\u006F", + '\u0589' -> "\u003A", + '\u05C0' -> "\u006C", + '\u05C3' -> "\u003A", + '\u05D5' -> "\u006C", + '\u05D8' -> "\u0076", + '\u05D9' -> "\u0027", + '\u05DF' -> "\u006C", + '\u05E1' -> "\u006F", + '\u05F0' -> "\u006C\u006C", + '\u05F1' -> "\u006C\u0027", + '\u05F2' -> "\"", + '\u05F3' -> "\u0027", + '\u05F4' -> "\"", + '\u060D' -> "\u002C", + '\u0627' -> "\u006C", + '\u0647' -> "\u006F", + '\u0660' -> "\u002E", + '\u0661' -> "\u006C", + '\u0665' -> "\u006F", + '\u0667' -> "\u0056", + '\u066B' -> "\u002C", + '\u066D' -> "\u002A", + '\u06BE' -> "\u006F", + '\u06C1' -> "\u006F", + '\u06D4' -> "\u002D", + '\u06D5' -> "\u006F", + '\u06F0' -> "\u002E", + '\u06F1' -> "\u006C", + '\u06F5' -> "\u006F", + '\u06F7' -> "\u0056", + '\u0701' -> "\u002E", + '\u0702' -> "\u002E", + '\u0703' -> "\u003A", + '\u0704' -> "\u003A", + '\u07C0' -> "\u004F", + '\u07CA' -> "\u006C", + '\u07F4' -> "\u0027", + '\u07F5' -> "\u0027", + '\u07FA' -> "\u005F", + '\u0903' -> "\u003A", + '\u0966' -> "\u006F", + '\u097D' -> "\u003F", + '\u09E6' -> "\u004F", + '\u09EA' -> "\u0038", + '\u09ED' -> "\u0039", + '\u0A66' -> "\u006F", + '\u0A67' -> "\u0039", + '\u0A6A' -> "\u0038", + '\u0A83' -> "\u003A", + '\u0AE6' -> "\u006F", + '\u0B03' -> "\u0038", + '\u0B20' -> "\u004F", + '\u0B66' -> "\u004F", + '\u0B68' -> "\u0039", + '\u0BE6' -> "\u006F", + '\u0C02' -> "\u006F", + '\u0C66' -> "\u006F", + '\u0C82' -> "\u006F", + '\u0CE6' -> "\u006F", + '\u0D02' -> "\u006F", + '\u0D20' -> "\u006F", + '\u0D66' -> "\u006F", + '\u0D6D' -> "\u0039", + '\u0D82' -> "\u006F", + '\u0E50' -> "\u006F", + '\u0ED0' -> "\u006F", + '\u101D' -> "\u006F", + '\u1040' -> "\u006F", + '\u10E7' -> "\u0079", + '\u10FF' -> "\u006F", + '\u1200' -> "\u0055", + '\u12D0' -> "\u004F", + '\u13A0' -> "\u0044", + '\u13A1' -> "\u0052", + '\u13A2' -> "\u0054", + '\u13A4' -> "\u004F\u0027", + '\u13A5' -> "\u0069", + '\u13A9' -> "\u0059", + '\u13AA' -> "\u0041", + '\u13AB' -> "\u004A", + '\u13AC' -> "\u0045", + '\u13AE' -> "\u003F", + '\u13B3' -> "\u0057", + '\u13B7' -> "\u004D", + '\u13BB' -> "\u0048", + '\u13BD' -> "\u0059", + '\u13C0' -> "\u0047", + '\u13C2' -> "\u0068", + '\u13C3' -> "\u005A", + '\u13CE' -> "\u0034", + '\u13CF' -> "\u0062", + '\u13D2' -> "\u0052", + '\u13D4' -> "\u0057", + '\u13D5' -> "\u0053", + '\u13D9' -> "\u0056", + '\u13DA' -> "\u0053", + '\u13DE' -> "\u004C", + '\u13DF' -> "\u0043", + '\u13E2' -> "\u0050", + '\u13E6' -> "\u004B", + '\u13E7' -> "\u0064", + '\u13EE' -> "\u0036", + '\u13F3' -> "\u0047", + '\u13F4' -> "\u0042", + '\u1400' -> "\u003D", + '\u142F' -> "\u0056", + '\u1433' -> "\u003E", + '\u1438' -> "\u003C", + '\u144A' -> "\u0027", + '\u144C' -> "\u0055", + '\u1467' -> "\u0055\u0027", + '\u146D' -> "\u0050", + '\u146F' -> "\u0064", + '\u1472' -> "\u0062", + '\u1486' -> "\u0050\u0027", + '\u1487' -> "\u0064\u0027", + '\u1488' -> "\u0062\u0027", + '\u148D' -> "\u004A", + '\u14AA' -> "\u004C", + '\u14BF' -> "\u0032", + '\u1541' -> "\u0078", + '\u157C' -> "\u0048", + '\u157D' -> "\u0078", + '\u1587' -> "\u0052", + '\u15AF' -> "\u0062", + '\u15B4' -> "\u0046", + '\u15C5' -> "\u0041", + '\u15DE' -> "\u0044", + '\u15EA' -> "\u0044", + '\u15F0' -> "\u004D", + '\u15F7' -> "\u0042", + '\u166D' -> "\u0058", + '\u166E' -> "\u0078", + '\u1680' -> "\u0020", + '\u16B2' -> "\u003C", + '\u16B7' -> "\u0058", + '\u16C1' -> "\u006C", + '\u16CC' -> "\u0027", + '\u16D5' -> "\u004B", + '\u16D6' -> "\u004D", + '\u16EC' -> "\u003A", + '\u16ED' -> "\u002B", + '\u1735' -> "\u002F", + '\u1803' -> "\u003A", + '\u1809' -> "\u003A", + '\u1CD3' -> "\"", + '\u1D04' -> "\u0063", + '\u1D0F' -> "\u006F", + '\u1D11' -> "\u006F", + '\u1D1C' -> "\u0075", + '\u1D20' -> "\u0076", + '\u1D21' -> "\u0077", + '\u1D22' -> "\u007A", + '\u1D26' -> "\u0072", + '\u1D6B' -> "\u0075\u0065", + '\u1D83' -> "\u0067", + '\u1D8C' -> "\u0079", + '\u1E9D' -> "\u0066", + '\u1EFF' -> "\u0079", + '\u1FBD' -> "\u0027", + '\u1FBE' -> "\u0069", + '\u1FBF' -> "\u0027", + '\u1FC0' -> "\u007E", + '\u1FEF' -> "\u0027", + '\u1FFD' -> "\u0027", + '\u1FFE' -> "\u0027", + '\u2000' -> "\u0020", + '\u2001' -> "\u0020", + '\u2002' -> "\u0020", + '\u2003' -> "\u0020", + '\u2004' -> "\u0020", + '\u2005' -> "\u0020", + '\u2006' -> "\u0020", + '\u2007' -> "\u0020", + '\u2008' -> "\u0020", + '\u2009' -> "\u0020", + '\u200A' -> "\u0020", + '\u2010' -> "\u002D", + '\u2011' -> "\u002D", + '\u2012' -> "\u002D", + '\u2013' -> "\u002D", + '\u2016' -> "\u006C\u006C", + '\u2018' -> "\u0027", + '\u2019' -> "\u0027", + '\u201A' -> "\u002C", + '\u201B' -> "\u0027", + '\u201C' -> "\"", + '\u201D' -> "\"", + '\u201F' -> "\"", + '\u2024' -> "\u002E", + '\u2025' -> "\u002E\u002E", + '\u2026' -> "\u002E\u002E\u002E", + '\u2028' -> "\u0020", + '\u2029' -> "\u0020", + '\u202F' -> "\u0020", + '\u2032' -> "\u0027", + '\u2033' -> "\"", + '\u2035' -> "\u0027", + '\u2036' -> "\"", + '\u2039' -> "\u003C", + '\u203A' -> "\u003E", + '\u203C' -> "\u0021\u0021", + '\u2041' -> "\u002F", + '\u2043' -> "\u002D", + '\u2044' -> "\u002F", + '\u2047' -> "\u003F\u003F", + '\u2048' -> "\u003F\u0021", + '\u2049' -> "\u0021\u003F", + '\u204E' -> "\u002A", + '\u2053' -> "\u007E", + '\u2057' -> "\"\"", + '\u205A' -> "\u003A", + '\u205F' -> "\u0020", + '\u20A8' -> "\u0052\u0073", + '\u20B6' -> "\u006C\u0074", + '\u2100' -> "\u0061\u002F\u0063", + '\u2101' -> "\u0061\u002F\u0073", + '\u2102' -> "\u0043", + '\u2105' -> "\u0063\u002F\u006F", + '\u2106' -> "\u0063\u002F\u0075", + '\u210A' -> "\u0067", + '\u210B' -> "\u0048", + '\u210C' -> "\u0048", + '\u210D' -> "\u0048", + '\u210E' -> "\u0068", + '\u2110' -> "\u006C", + '\u2111' -> "\u006C", + '\u2112' -> "\u004C", + '\u2113' -> "\u006C", + '\u2115' -> "\u004E", + '\u2116' -> "\u004E\u006F", + '\u2119' -> "\u0050", + '\u211A' -> "\u0051", + '\u211B' -> "\u0052", + '\u211C' -> "\u0052", + '\u211D' -> "\u0052", + '\u2121' -> "\u0054\u0045\u004C", + '\u2124' -> "\u005A", + '\u2128' -> "\u005A", + '\u212A' -> "\u004B", + '\u212C' -> "\u0042", + '\u212D' -> "\u0043", + '\u212E' -> "\u0065", + '\u212F' -> "\u0065", + '\u2130' -> "\u0045", + '\u2131' -> "\u0046", + '\u2133' -> "\u004D", + '\u2134' -> "\u006F", + '\u2139' -> "\u0069", + '\u213B' -> "\u0046\u0041\u0058", + '\u213D' -> "\u0079", + '\u2145' -> "\u0044", + '\u2146' -> "\u0064", + '\u2147' -> "\u0065", + '\u2148' -> "\u0069", + '\u2149' -> "\u006A", + '\u2160' -> "\u006C", + '\u2161' -> "\u006C\u006C", + '\u2162' -> "\u006C\u006C\u006C", + '\u2163' -> "\u006C\u0056", + '\u2164' -> "\u0056", + '\u2165' -> "\u0056\u006C", + '\u2166' -> "\u0056\u006C\u006C", + '\u2167' -> "\u0056\u006C\u006C\u006C", + '\u2168' -> "\u006C\u0058", + '\u2169' -> "\u0058", + '\u216A' -> "\u0058\u006C", + '\u216B' -> "\u0058\u006C\u006C", + '\u216C' -> "\u004C", + '\u216D' -> "\u0043", + '\u216E' -> "\u0044", + '\u216F' -> "\u004D", + '\u2170' -> "\u0069", + '\u2171' -> "\u0069\u0069", + '\u2172' -> "\u0069\u0069\u0069", + '\u2173' -> "\u0069\u0076", + '\u2174' -> "\u0076", + '\u2175' -> "\u0076\u0069", + '\u2176' -> "\u0076\u0069\u0069", + '\u2177' -> "\u0076\u0069\u0069\u0069", + '\u2178' -> "\u0069\u0078", + '\u2179' -> "\u0078", + '\u217A' -> "\u0078\u0069", + '\u217B' -> "\u0078\u0069\u0069", + '\u217C' -> "\u006C", + '\u217D' -> "\u0063", + '\u217E' -> "\u0064", + '\u217F' -> "\u0072\u006E", + '\u2212' -> "\u002D", + '\u2215' -> "\u002F", + '\u2216' -> "\"", + '\u2217' -> "\u002A", + '\u221E' -> "\u006F\u006F", + '\u2223' -> "\u006C", + '\u2225' -> "\u006C\u006C", + '\u2228' -> "\u0076", + '\u222A' -> "\u0055", + '\u2236' -> "\u003A", + '\u223C' -> "\u007E", + '\u226A' -> "\u003C\u003C", + '\u226B' -> "\u003E\u003E", + '\u22A4' -> "\u0054", + '\u22C1' -> "\u0076", + '\u22C3' -> "\u0055", + '\u22D8' -> "\u003C\u003C\u003C", + '\u22D9' -> "\u003E\u003E\u003E", + '\u22FF' -> "\u0045", + '\u2373' -> "\u0069", + '\u2374' -> "\u0070", + '\u237A' -> "\u0061", + '\u23FD' -> "\u006C", + '\u244A' -> "\"\"", + '\u2474' -> "\u0028\u006C\u0029", + '\u2475' -> "\u0028\u0032\u0029", + '\u2476' -> "\u0028\u0033\u0029", + '\u2477' -> "\u0028\u0034\u0029", + '\u2478' -> "\u0028\u0035\u0029", + '\u2479' -> "\u0028\u0036\u0029", + '\u247A' -> "\u0028\u0037\u0029", + '\u247B' -> "\u0028\u0038\u0029", + '\u247C' -> "\u0028\u0039\u0029", + '\u247D' -> "\u0028\u006C\u004F\u0029", + '\u247E' -> "\u0028\u006C\u006C\u0029", + '\u247F' -> "\u0028\u006C\u0032\u0029", + '\u2480' -> "\u0028\u006C\u0033\u0029", + '\u2481' -> "\u0028\u006C\u0034\u0029", + '\u2482' -> "\u0028\u006C\u0035\u0029", + '\u2483' -> "\u0028\u006C\u0036\u0029", + '\u2484' -> "\u0028\u006C\u0037\u0029", + '\u2485' -> "\u0028\u006C\u0038\u0029", + '\u2486' -> "\u0028\u006C\u0039\u0029", + '\u2487' -> "\u0028\u0032\u004F\u0029", + '\u2488' -> "\u006C\u002E", + '\u2489' -> "\u0032\u002E", + '\u248A' -> "\u0033\u002E", + '\u248B' -> "\u0034\u002E", + '\u248C' -> "\u0035\u002E", + '\u248D' -> "\u0036\u002E", + '\u248E' -> "\u0037\u002E", + '\u248F' -> "\u0038\u002E", + '\u2490' -> "\u0039\u002E", + '\u2491' -> "\u006C\u004F\u002E", + '\u2492' -> "\u006C\u006C\u002E", + '\u2493' -> "\u006C\u0032\u002E", + '\u2494' -> "\u006C\u0033\u002E", + '\u2495' -> "\u006C\u0034\u002E", + '\u2496' -> "\u006C\u0035\u002E", + '\u2497' -> "\u006C\u0036\u002E", + '\u2498' -> "\u006C\u0037\u002E", + '\u2499' -> "\u006C\u0038\u002E", + '\u249A' -> "\u006C\u0039\u002E", + '\u249B' -> "\u0032\u004F\u002E", + '\u249C' -> "\u0028\u0061\u0029", + '\u249D' -> "\u0028\u0062\u0029", + '\u249E' -> "\u0028\u0063\u0029", + '\u249F' -> "\u0028\u0064\u0029", + '\u24A0' -> "\u0028\u0065\u0029", + '\u24A1' -> "\u0028\u0066\u0029", + '\u24A2' -> "\u0028\u0067\u0029", + '\u24A3' -> "\u0028\u0068\u0029", + '\u24A4' -> "\u0028\u0069\u0029", + '\u24A5' -> "\u0028\u006A\u0029", + '\u24A6' -> "\u0028\u006B\u0029", + '\u24A7' -> "\u0028\u006C\u0029", + '\u24A8' -> "\u0028\u0072\u006E\u0029", + '\u24A9' -> "\u0028\u006E\u0029", + '\u24AA' -> "\u0028\u006F\u0029", + '\u24AB' -> "\u0028\u0070\u0029", + '\u24AC' -> "\u0028\u0071\u0029", + '\u24AD' -> "\u0028\u0072\u0029", + '\u24AE' -> "\u0028\u0073\u0029", + '\u24AF' -> "\u0028\u0074\u0029", + '\u24B0' -> "\u0028\u0075\u0029", + '\u24B1' -> "\u0028\u0076\u0029", + '\u24B2' -> "\u0028\u0077\u0029", + '\u24B3' -> "\u0028\u0078\u0029", + '\u24B4' -> "\u0028\u0079\u0029", + '\u24B5' -> "\u0028\u007A\u0029", + '\u2571' -> "\u002F", + '\u2573' -> "\u0058", + '\u2768' -> "\u0028", + '\u2769' -> "\u0029", + '\u276E' -> "\u003C", + '\u276F' -> "\u003E", + '\u2772' -> "\u0028", + '\u2773' -> "\u0029", + '\u2774' -> "\u007B", + '\u2775' -> "\u007D", + '\u2795' -> "\u002B", + '\u2796' -> "\u002D", + '\u27CB' -> "\u002F", + '\u27CD' -> "\"", + '\u27D9' -> "\u0054", + '\u292B' -> "\u0078", + '\u292C' -> "\u0078", + '\u29F5' -> "\"", + '\u29F8' -> "\u002F", + '\u29F9' -> "\"", + '\u2A20' -> "\u003E\u003E", + '\u2A2F' -> "\u0078", + '\u2A74' -> "\u003A\u003A\u003D", + '\u2A75' -> "\u003D\u003D", + '\u2A76' -> "\u003D\u003D\u003D", + '\u2AA5' -> "\u003E\u003C", + '\u2AFB' -> "\u002F\u002F\u002F", + '\u2AFD' -> "\u002F\u002F", + '\u2C85' -> "\u0072", + '\u2C8E' -> "\u0048", + '\u2C92' -> "\u006C", + '\u2C94' -> "\u004B", + '\u2C98' -> "\u004D", + '\u2C9A' -> "\u004E", + '\u2C9E' -> "\u004F", + '\u2C9F' -> "\u006F", + '\u2CA2' -> "\u0050", + '\u2CA3' -> "\u0070", + '\u2CA4' -> "\u0043", + '\u2CA5' -> "\u0063", + '\u2CA6' -> "\u0054", + '\u2CA8' -> "\u0059", + '\u2CAC' -> "\u0058", + '\u2CB4' -> "\u003C\u00B7", + '\u2CBA' -> "\u002D", + '\u2CC6' -> "\u002F", + '\u2CCA' -> "\u0039", + '\u2CCC' -> "\u0033", + '\u2CD0' -> "\u004C", + '\u2CD2' -> "\u0036", + '\u2CF9' -> "\"\"", + '\u2D38' -> "\u0056", + '\u2D39' -> "\u0045", + '\u2D4F' -> "\u006C", + '\u2D51' -> "\u0021", + '\u2D54' -> "\u004F", + '\u2D55' -> "\u0051", + '\u2D5D' -> "\u0058", + '\u2E28' -> "\u0028\u0028", + '\u2E29' -> "\u0029\u0029", + '\u2E40' -> "\u003D", + '\u2F02' -> "\"", + '\u2F03' -> "\u002F", + '\u3003' -> "\"", + '\u3007' -> "\u004F", + '\u3014' -> "\u0028", + '\u3015' -> "\u0029", + '\u3033' -> "\u002F", + '\u30A0' -> "\u003D", + '\u30CE' -> "\u002F", + '\u31D3' -> "\u002F", + '\u31D4' -> "\"", + '\u4E36' -> "\"", + '\u4E3F' -> "\u002F", + '\uA4D0' -> "\u0042", + '\uA4D1' -> "\u0050", + '\uA4D2' -> "\u0064", + '\uA4D3' -> "\u0044", + '\uA4D4' -> "\u0054", + '\uA4D6' -> "\u0047", + '\uA4D7' -> "\u004B", + '\uA4D9' -> "\u004A", + '\uA4DA' -> "\u0043", + '\uA4DC' -> "\u005A", + '\uA4DD' -> "\u0046", + '\uA4DF' -> "\u004D", + '\uA4E0' -> "\u004E", + '\uA4E1' -> "\u004C", + '\uA4E2' -> "\u0053", + '\uA4E3' -> "\u0052", + '\uA4E6' -> "\u0056", + '\uA4E7' -> "\u0048", + '\uA4EA' -> "\u0057", + '\uA4EB' -> "\u0058", + '\uA4EC' -> "\u0059", + '\uA4EE' -> "\u0041", + '\uA4F0' -> "\u0045", + '\uA4F2' -> "\u006C", + '\uA4F3' -> "\u004F", + '\uA4F4' -> "\u0055", + '\uA4F8' -> "\u002E", + '\uA4F9' -> "\u002C", + '\uA4FA' -> "\u002E\u002E", + '\uA4FB' -> "\u002E\u002C", + '\uA4FD' -> "\u003A", + '\uA4FE' -> "\u002D\u002E", + '\uA4FF' -> "\u003D", + '\uA60E' -> "\u002E", + '\uA644' -> "\u0032", + '\uA647' -> "\u0069", + '\uA698' -> "\u004F\u004F", + '\uA699' -> "\u006F\u006F", + '\uA6DF' -> "\u0056", + '\uA6EB' -> "\u003F", + '\uA6EF' -> "\u0032", + '\uA728' -> "\u0054\u0033", + '\uA731' -> "\u0073", + '\uA732' -> "\u0041\u0041", + '\uA733' -> "\u0061\u0061", + '\uA734' -> "\u0041\u004F", + '\uA735' -> "\u0061\u006F", + '\uA736' -> "\u0041\u0055", + '\uA737' -> "\u0061\u0075", + '\uA738' -> "\u0041\u0056", + '\uA739' -> "\u0061\u0076", + '\uA73A' -> "\u0041\u0056", + '\uA73B' -> "\u0061\u0076", + '\uA73C' -> "\u0041\u0059", + '\uA73D' -> "\u0061\u0079", + '\uA74E' -> "\u004F\u004F", + '\uA74F' -> "\u006F\u006F", + '\uA75A' -> "\u0032", + '\uA76A' -> "\u0033", + '\uA76E' -> "\u0039", + '\uA777' -> "\u0074\u0066", + '\uA778' -> "\u0026", + '\uA789' -> "\u003A", + '\uA78C' -> "\u0027", + '\uA798' -> "\u0046", + '\uA799' -> "\u0066", + '\uA79F' -> "\u0075", + '\uA7AB' -> "\u0033", + '\uA7B2' -> "\u004A", + '\uA7B3' -> "\u0058", + '\uA7B4' -> "\u0042", + '\uAB32' -> "\u0065", + '\uAB35' -> "\u0066", + '\uAB3D' -> "\u006F", + '\uAB47' -> "\u0072", + '\uAB48' -> "\u0072", + '\uAB4E' -> "\u0075", + '\uAB52' -> "\u0075", + '\uAB5A' -> "\u0079", + '\uAB63' -> "\u0075\u006F", + '\uAB75' -> "\u0069", + '\uAB81' -> "\u0072", + '\uAB83' -> "\u0077", + '\uAB93' -> "\u007A", + '\uABA9' -> "\u0076", + '\uABAA' -> "\u0073", + '\uABAF' -> "\u0063", + '\uFB00' -> "\u0066\u0066", + '\uFB01' -> "\u0066\u0069", + '\uFB02' -> "\u0066\u006C", + '\uFB03' -> "\u0066\u0066\u0069", + '\uFB04' -> "\u0066\u0066\u006C", + '\uFB06' -> "\u0073\u0074", + '\uFBA6' -> "\u006F", + '\uFBA7' -> "\u006F", + '\uFBA8' -> "\u006F", + '\uFBA9' -> "\u006F", + '\uFBAA' -> "\u006F", + '\uFBAB' -> "\u006F", + '\uFBAC' -> "\u006F", + '\uFBAD' -> "\u006F", + '\uFD3E' -> "\u0028", + '\uFD3F' -> "\u0029", + '\uFE30' -> "\u003A", + '\uFE4D' -> "\u005F", + '\uFE4E' -> "\u005F", + '\uFE4F' -> "\u005F", + '\uFE58' -> "\u002D", + '\uFE68' -> "\"", + '\uFE8D' -> "\u006C", + '\uFE8E' -> "\u006C", + '\uFEE9' -> "\u006F", + '\uFEEA' -> "\u006F", + '\uFEEB' -> "\u006F", + '\uFEEC' -> "\u006F", + '\uFF01' -> "\u0021", + '\uFF02' -> "\"", + '\uFF07' -> "\u0027", + '\uFF1A' -> "\u003A", + '\uFF21' -> "\u0041", + '\uFF22' -> "\u0042", + '\uFF23' -> "\u0043", + '\uFF25' -> "\u0045", + '\uFF28' -> "\u0048", + '\uFF29' -> "\u006C", + '\uFF2A' -> "\u004A", + '\uFF2B' -> "\u004B", + '\uFF2D' -> "\u004D", + '\uFF2E' -> "\u004E", + '\uFF2F' -> "\u004F", + '\uFF30' -> "\u0050", + '\uFF33' -> "\u0053", + '\uFF34' -> "\u0054", + '\uFF38' -> "\u0058", + '\uFF39' -> "\u0059", + '\uFF3A' -> "\u005A", + '\uFF3B' -> "\u0028", + '\uFF3C' -> "\"", + '\uFF3D' -> "\u0029", + '\uFF40' -> "\u0027", + '\uFF41' -> "\u0061", + '\uFF43' -> "\u0063", + '\uFF45' -> "\u0065", + '\uFF47' -> "\u0067", + '\uFF48' -> "\u0068", + '\uFF49' -> "\u0069", + '\uFF4A' -> "\u006A", + '\uFF4C' -> "\u006C", + '\uFF4F' -> "\u006F", + '\uFF50' -> "\u0070", + '\uFF53' -> "\u0073", + '\uFF56' -> "\u0076", + '\uFF58' -> "\u0078", + '\uFF59' -> "\u0079", + '\uFFE8' -> "\u006C" + ) +} diff --git a/src/main/scala/millfork/parser/MfParser.scala b/src/main/scala/millfork/parser/MfParser.scala index cdc94010..519e8f44 100644 --- a/src/main/scala/millfork/parser/MfParser.scala +++ b/src/main/scala/millfork/parser/MfParser.scala @@ -5,12 +5,13 @@ import java.nio.file.{Files, Paths} import java.util import fastparse.all._ +import fastparse.core.Parsed.Failure import millfork.assembly.Elidability import millfork.env._ import millfork.error.{ConsoleLogger, Logger} import millfork.node._ import millfork.output.{DivisibleAlignment, MemoryAlignment, NoAlignment} -import millfork.{CompilationFlag, CompilationOptions, SeparatedList} +import millfork.{CompilationFlag, CompilationOptions, Confusables, SeparatedList} import scala.collection.immutable.BitSet @@ -29,7 +30,23 @@ abstract class MfParser[T](fileId: String, input: String, currentDirectory: Stri val enableDebuggingOptions: Boolean = options.flag(CompilationFlag.EnableInternalTestSyntax) - def toAst: Parsed[Program] = program.parse(input + "\n\n\n") + def toAst: Parsed[Program] = { + val parse = program.parse(input + "\n\n\n") + parse match { + case _:Failure[_, _] => + val c = input(lastPosition.cursor) + if (c >= 0x100 || c < 0x20 || c == '`') { + log.error("Invalid character U+%04X %s".format(c.toInt, Character.getName(c)), Some(lastPosition)) + Confusables.map.get(c) match { + case Some(ascii) => + log.info(s"Did you mean: $ascii") + case _ => + } + } + case _ => + } + parse + } private val lineStarts: Array[Int] = (0 +: input.zipWithIndex.filter(_._1 == '\n').map(_._2)).toArray diff --git a/src/test/scala/millfork/test/ParserSuite.scala b/src/test/scala/millfork/test/ParserSuite.scala index c7dabc18..b01f4b0f 100644 --- a/src/test/scala/millfork/test/ParserSuite.scala +++ b/src/test/scala/millfork/test/ParserSuite.scala @@ -1,7 +1,7 @@ package millfork.test import millfork.Cpu -import millfork.test.emu.{EmuUnoptimizedCrossPlatformRun, ShouldNotCompile} +import millfork.test.emu.{EmuUnoptimizedCrossPlatformRun, ShouldNotCompile, ShouldNotParse} import org.scalatest.{FunSuite, Matchers} /** @@ -53,4 +53,11 @@ class ParserSuite extends FunSuite with Matchers { m.readByte(0xc003) should equal(5) } } + + test("I hate Millfork, it won't let me prank my coworkers") { + ShouldNotParse( + """ + |const array a = ‟aa" + |""".stripMargin) + } } diff --git a/src/test/scala/millfork/test/emu/ShouldNotCompile.scala b/src/test/scala/millfork/test/emu/ShouldNotCompile.scala index 6d0f3257..115812a1 100644 --- a/src/test/scala/millfork/test/emu/ShouldNotCompile.scala +++ b/src/test/scala/millfork/test/emu/ShouldNotCompile.scala @@ -32,7 +32,11 @@ object ShouldNotCompile extends Matchers { val log = TestErrorReporting.log println(source) val platform = EmuPlatform.get(cpu) - val options = CompilationOptions(platform, Map(CompilationFlag.LenientTextEncoding -> true), None, platform.zpRegisterSize, Map(), EmuPlatform.textCodecRepository, JobContext(log, new LabelGenerator)) + val flags = CpuFamily.forType(cpu) match { + case CpuFamily.M6809 => Map(CompilationFlag.LenientTextEncoding -> true, CompilationFlag.UseUForStack -> true) + case _ => Map(CompilationFlag.LenientTextEncoding -> true) + } + val options = CompilationOptions(platform, flags, None, platform.zpRegisterSize, Map(), EmuPlatform.textCodecRepository, JobContext(log, new LabelGenerator)) log.hasErrors = false log.verbosity = 999 var effectiveSource = source diff --git a/src/test/scala/millfork/test/emu/ShouldNotParse.scala b/src/test/scala/millfork/test/emu/ShouldNotParse.scala index 2b25ee01..59fd04b1 100644 --- a/src/test/scala/millfork/test/emu/ShouldNotParse.scala +++ b/src/test/scala/millfork/test/emu/ShouldNotParse.scala @@ -22,7 +22,11 @@ object ShouldNotParse extends Matchers { val log = TestErrorReporting.log println(source) val platform = EmuPlatform.get(cpu) - val options = CompilationOptions(platform, Map(CompilationFlag.LenientTextEncoding -> true), None, platform.zpRegisterSize, Map(), EmuPlatform.textCodecRepository, JobContext(log, new LabelGenerator)) + val flags = CpuFamily.forType(cpu) match { + case CpuFamily.M6809 => Map(CompilationFlag.LenientTextEncoding -> true, CompilationFlag.UseUForStack -> true) + case _ => Map(CompilationFlag.LenientTextEncoding -> true) + } + val options = CompilationOptions(platform, flags, None, platform.zpRegisterSize, Map(), EmuPlatform.textCodecRepository, JobContext(log, new LabelGenerator)) log.hasErrors = false log.verbosity = 999 var effectiveSource = source