Mercurial > libunicode
view generator/make-unicode/unicode-after.cpp @ 2:84765c6f4872
New style
author | David Demelier <markand@malikania.fr> |
---|---|
date | Thu, 02 Feb 2017 18:07:27 +0100 |
parents | f94206b2e05e |
children | d9d3406c1250 |
line wrap: on
line source
void encode(char32_t c, char res[5]) noexcept { switch (nbytes_point(c)) { case 1: res[0] = static_cast<char>(c); res[1] = '\0'; break; case 2: res[0] = 0xC0 | ((c >> 6) & 0x1F); res[1] = 0x80 | (c & 0x3F); res[2] = '\0'; break; case 3: res[0] = 0xE0 | ((c >> 12) & 0xF ); res[1] = 0x80 | ((c >> 6) & 0x3F); res[2] = 0x80 | (c & 0x3F); res[3] = '\0'; break; case 4: res[0] = 0xF0 | ((c >> 18) & 0x7 ); res[1] = 0x80 | ((c >> 12) & 0x3F); res[2] = 0x80 | ((c >> 6) & 0x3F); res[3] = 0x80 | (c & 0x3F); res[4] = '\0'; break; default: break; } } void decode(char32_t& c, const char* res) noexcept { c = 0; switch (nbytes_utf8(res[0])) { case 1: c = res[0]; break; case 2: c = (res[0] & 0x1f) << 6; c |= (res[1] & 0x3f); break; case 3: c = (res[0] & 0x0f) << 12; c |= (res[1] & 0x3f) << 6; c |= (res[2] & 0x3f); break; case 4: c = (res[0] & 0x07) << 16; c |= (res[1] & 0x3f) << 12; c |= (res[2] & 0x3f) << 6; c |= (res[3] & 0x3f); default: break; } } int nbytes_utf8(char c) noexcept { if (static_cast<unsigned char>(c) <= 127) { return 1; } if ((c & 0xE0) == 0xC0) { return 2; } if ((c & 0xF0) == 0xE0) { return 3; } if ((c & 0xF8) == 0xF0) { return 4; } return -1; } int nbytes_point(char32_t c) noexcept { if (c <= 0x7F) { return 1; } if (c <= 0x7FF) { return 2; } if (c <= 0xFFFF) { return 3; } if (c <= 0x1FFFFF) { return 4; } return -1; } unsigned length(const std::string& str) { unsigned total = 0; for_each(str, [&] (char32_t) { ++ total; }); return total; } std::string to_utf8(const std::u32string& array) { std::string res; for (size_t i = 0; i < array.size(); ++i) { char tmp[5]; int size = nbytes_point(array[i]); if (size < 0) { throw std::invalid_argument("invalid sequence"); } encode(array[i], tmp); res.insert(res.length(), tmp); } return res; } std::u32string to_utf32(const std::string& str) { std::u32string res; for_each(str, [&] (char32_t code) { res.push_back(code); }); return res; } } // !unicode