2016-07-21 15:14:27 -04:00
|
|
|
|
|
|
|
|
|
|
|
#include <string>
|
|
|
|
#include <algorithm>
|
2016-07-23 15:21:13 -04:00
|
|
|
#include <stdint.h>
|
2016-07-23 16:05:05 -04:00
|
|
|
#include <array>
|
|
|
|
|
|
|
|
|
|
|
|
struct mu_pair {
|
|
|
|
uint16_t unicode;
|
|
|
|
uint8_t macroman;
|
|
|
|
|
|
|
|
constexpr bool operator<(const mu_pair &rhs) const noexcept {
|
|
|
|
return unicode < rhs.unicode;
|
|
|
|
}
|
|
|
|
|
|
|
|
constexpr bool operator==(const mu_pair &rhs) const noexcept {
|
|
|
|
return unicode == rhs.unicode;
|
|
|
|
}
|
|
|
|
|
|
|
|
constexpr bool operator<(uint16_t c) const noexcept {
|
|
|
|
return unicode < c;
|
|
|
|
}
|
|
|
|
|
|
|
|
constexpr bool operator==(uint16_t c) const noexcept {
|
|
|
|
return unicode == c;
|
|
|
|
}
|
|
|
|
};
|
2016-07-21 15:14:27 -04:00
|
|
|
|
2016-07-23 15:21:13 -04:00
|
|
|
uint8_t unicode_to_macroman(uint16_t c){
|
2016-07-21 15:14:27 -04:00
|
|
|
|
2016-07-23 15:21:13 -04:00
|
|
|
|
2016-07-23 16:05:05 -04:00
|
|
|
static std::array< mu_pair, 0x80> table = {{
|
2016-07-21 15:14:27 -04:00
|
|
|
#undef _
|
2016-07-23 16:05:05 -04:00
|
|
|
#define _(macroman, unicode, comment) mu_pair{ unicode, macroman } ,
|
2016-07-23 15:21:13 -04:00
|
|
|
#include "macroman.x"
|
2016-07-21 15:14:27 -04:00
|
|
|
#undef _
|
2016-07-23 16:05:05 -04:00
|
|
|
}};
|
|
|
|
|
|
|
|
static bool init = false;
|
|
|
|
|
|
|
|
if (c < 0x80) return c;
|
|
|
|
|
|
|
|
if (!init) {
|
|
|
|
init = true;
|
|
|
|
std::sort(table.begin(), table.end());
|
|
|
|
init = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto iter = std::lower_bound(table.begin(), table.end(), c);
|
|
|
|
if (iter != table.end() && iter->unicode == c) return iter->macroman;
|
2016-07-21 15:14:27 -04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-07-23 15:21:13 -04:00
|
|
|
uint16_t macroman_to_unicode(uint8_t c) {
|
2016-07-21 15:14:27 -04:00
|
|
|
|
2016-07-23 16:05:05 -04:00
|
|
|
static std::array<uint16_t, 0x80> table = {{
|
2016-07-23 15:21:13 -04:00
|
|
|
#undef _
|
|
|
|
#define _(macroman,unicode,comment) unicode ,
|
|
|
|
#include "macroman.x"
|
|
|
|
#undef _
|
2016-07-23 16:05:05 -04:00
|
|
|
}};
|
2016-07-23 15:21:13 -04:00
|
|
|
|
|
|
|
if (c < 0x80) return c;
|
|
|
|
return table[c - 0x80];
|
|
|
|
|
|
|
|
}
|
2016-07-21 15:14:27 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::string utf8_to_macroman(const std::string &s) {
|
|
|
|
|
|
|
|
|
|
|
|
if (std::find_if(s.begin(), s.end(), [](unsigned char c){ return c & 0x80; }) == s.end())
|
|
|
|
return s;
|
|
|
|
|
|
|
|
std::string rv;
|
2016-07-23 15:21:13 -04:00
|
|
|
rv.reserve(s.size());
|
2016-07-21 15:14:27 -04:00
|
|
|
|
|
|
|
unsigned cs = 0;
|
|
|
|
uint16_t tmp;
|
|
|
|
|
|
|
|
for (unsigned char c : s) {
|
|
|
|
switch(cs) {
|
|
|
|
case 0:
|
|
|
|
if (c <= 0x7f) {
|
|
|
|
rv.push_back(c);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if ((c & 0b11100000) == 0b11000000) {
|
|
|
|
(tmp = c & 0b00011111);
|
|
|
|
cs = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if ((c & 0b11110000) == 0b11100000) {
|
|
|
|
(tmp = c & 0b00001111);
|
|
|
|
cs = 2;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if ((c & 0b11111000) == 0b11110000) {
|
|
|
|
(tmp = c & 0b00000111);
|
|
|
|
cs = 3;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// not utf8...
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
case 2:
|
|
|
|
case 3:
|
|
|
|
if ((c & 0b11000000) != 0b10000000) {
|
|
|
|
//not utf8...
|
|
|
|
}
|
|
|
|
tmp = (tmp << 6) + (c & 0b00111111);
|
2016-07-26 16:07:51 -04:00
|
|
|
if (--cs == 0) {
|
2016-07-21 15:14:27 -04:00
|
|
|
c = unicode_to_macroman(tmp);
|
|
|
|
if (c) rv.push_back(c);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-07-23 15:21:13 -04:00
|
|
|
std::string macroman_to_utf8(const std::string &s) {
|
|
|
|
|
|
|
|
if (std::find_if(s.begin(), s.end(), [](unsigned char c){ return c & 0x80; }) == s.end())
|
|
|
|
return s;
|
|
|
|
|
|
|
|
std::string rv;
|
|
|
|
rv.reserve(s.size());
|
|
|
|
|
|
|
|
for (uint8_t c : s) {
|
|
|
|
if (c <= 0x7f) { rv.push_back(c); continue; }
|
|
|
|
|
|
|
|
uint16_t tmp = macroman_to_unicode(c);
|
|
|
|
|
|
|
|
if (tmp <= 0x7f) {
|
|
|
|
rv.push_back(tmp);
|
|
|
|
} else if (tmp <= 0x07ff) {
|
|
|
|
uint8_t a,b;
|
|
|
|
b = tmp & 0b00111111; tmp >>= 6;
|
|
|
|
a = tmp;
|
|
|
|
rv.push_back(0b11000000 | a);
|
|
|
|
rv.push_back(0b10000000 | b);
|
|
|
|
} else { // tmp <= 0xffff
|
|
|
|
uint8_t a,b,c;
|
|
|
|
c = tmp & 0b00111111; tmp >>= 6;
|
|
|
|
b = tmp & 0b00111111; tmp >>= 6;
|
|
|
|
a = tmp;
|
|
|
|
rv.push_back(0b11100000 | a);
|
|
|
|
rv.push_back(0b10000000 | b);
|
|
|
|
rv.push_back(0b10000000 | c);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
2016-07-21 15:14:27 -04:00
|
|
|
|