Mercurial > code
view tools/mkunicode/Unicode-after.cpp @ 401:ca5e4360f79a
Js:
- Add support of constants map (js::Map<T>)
- Add supports for vectors (std::vector<T>)
- Add index based getProperty/putProperty
author | David Demelier <markand@malikania.fr> |
---|---|
date | Sat, 03 Oct 2015 11:27:49 +0200 |
parents | b78d6d8f2872 |
children | f083259de5e6 |
line wrap: on
line source
void encode(char32_t c, char res[5]) noexcept { switch (nbytesPoint(c)) { case 1: res[0] = c; res[1] = '\0'; break; case 2: res[0] = 0xC0 | ((c >> 6) & 0x1F); res[1] = 0x80 | (c & 0x3F); res[2] = '\0'; break; case 3: res[0] = 0xE0 | ((c >> 12) & 0xF ); res[1] = 0x80 | ((c >> 6) & 0x3F); res[2] = 0x80 | (c & 0x3F); res[3] = '\0'; break; case 4: res[0] = 0xF0 | ((c >> 18) & 0x7 ); res[1] = 0x80 | ((c >> 12) & 0x3F); res[2] = 0x80 | ((c >> 6) & 0x3F); res[3] = 0x80 | (c & 0x3F); res[4] = '\0'; break; default: break; } } void decode(char32_t &c, const char *res) noexcept { c = 0; switch (nbytesUtf8(res[0])) { case 1: c = res[0]; break; case 2: c = (res[0] & 0x1f) << 6; c |= (res[1] & 0x3f); break; case 3: c = (res[0] & 0x0f) << 12; c |= (res[1] & 0x3f) << 6; c |= (res[2] & 0x3f); break; case 4: c = (res[0] & 0x07) << 16; c |= (res[1] & 0x3f) << 12; c |= (res[2] & 0x3f) << 6; c |= (res[3] & 0x3f); default: break; } } int nbytesUtf8(char c) noexcept { if ((c & 0xE0) == 0xC0) return 2; if ((c & 0xF0) == 0xE0) return 3; if ((c & 0xF8) == 0xF0) return 4; return 1; } int nbytesPoint(char32_t c) noexcept { if (c <= 0x7F) return 1; if (c <= 0x7FF) return 2; if (c <= 0xFFFF) return 3; if (c <= 0x1FFFFF) return 4; return -1; } int length(const std::string &str) { int total = 0; forEach(str, [&] (char32_t) { ++ total; }); return total; } std::string toUtf8(const std::u32string &array) { std::string res; for (size_t i = 0; i < array.size(); ++i) { char tmp[5]; int size = nbytesPoint(array[i]); if (size < 0) { throw std::invalid_argument("invalid sequence"); } encode(array[i], tmp); res.insert(res.length(), tmp); } return res; } std::u32string toUtf32(const std::string &str) { std::u32string res; forEach(str, [&] (char32_t code) { res.push_back(code); }); return res; } } // !unicode