Mercurial > code
changeset 396:30788c97c58c
Merge branch 'master' of malikania.fr:code
author | David Demelier <markand@malikania.fr> |
---|---|
date | Mon, 28 Sep 2015 19:40:26 +0200 |
parents | 69adcefe73ae (current diff) b78d6d8f2872 (diff) |
children | 6b2db5425836 |
files | |
diffstat | 13 files changed, 5443 insertions(+), 5441 deletions(-) [+] |
line wrap: on
line diff
--- a/C++/modules/Base64/Base64.cpp Mon Sep 28 19:40:16 2015 +0200 +++ b/C++/modules/Base64/Base64.cpp Mon Sep 28 19:40:26 2015 +0200 @@ -21,14 +21,16 @@ #include "Base64.h" -char Base64::lookup(int value) noexcept +namespace base64 { + +char lookup(int value) noexcept { static const char table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; return table[value]; } -int Base64::rlookup(char ch) +int rlookup(char ch) { if (ch == '+') return 62; @@ -45,7 +47,7 @@ throw std::invalid_argument("not a valid base64 string"); } -std::string Base64::encode(const std::string &input) +std::string encode(const std::string &input) { std::string result; std::istringstream iss(input, std::istringstream::in); @@ -55,7 +57,7 @@ return result; } -std::string Base64::decode(const std::string &input) +std::string decode(const std::string &input) { std::string result; std::istringstream iss(input, std::istringstream::in); @@ -63,4 +65,6 @@ decode(std::istreambuf_iterator<char>(iss), std::istreambuf_iterator<char>(), std::back_inserter(result)); return result; -} \ No newline at end of file +} + +} // !base64
--- a/C++/modules/Base64/Base64.h Mon Sep 28 19:40:16 2015 +0200 +++ b/C++/modules/Base64/Base64.h Mon Sep 28 19:40:26 2015 +0200 @@ -27,108 +27,105 @@ #include <stdexcept> #include <string> +namespace base64 { + /** - * @class Base64 - * @brief Encode and decode Base64 data + * Get the base 64 character from the 6-bits value. + * + * @param value the value */ -class Base64 { -public: - /** - * Get the base 64 character from the 6-bits value. - * - * @param value the value - */ - static char lookup(int value) noexcept; +char lookup(int value) noexcept; - /** - * Get the integer value from the base 64 character. - * - * @param ch the base64 character - */ - static int rlookup(char ch); +/** + * Get the integer value from the base 64 character. + * + * @param ch the base64 character + */ +int rlookup(char ch); - /** - * Encode the input to the output. Requirements: - * InputIt must be InputIterator - * OutputIt must be OutputIterator - * - * @param input the beginning - * @param end the end of the data - * @param output the output destination - * @return output - */ - template <typename InputIt, typename OutputIt> - static OutputIt encode(InputIt input, InputIt end, OutputIt output) - { - while (input != end) { - char inputbuf[3] = { 0, 0, 0 }; - int count; +/** + * Encode the input to the output. Requirements: + * InputIt must be InputIterator + * OutputIt must be OutputIterator + * + * @param input the beginning + * @param end the end of the data + * @param output the output destination + * @return output + */ +template <typename InputIt, typename OutputIt> +OutputIt encode(InputIt input, InputIt end, OutputIt output) +{ + while (input != end) { + char inputbuf[3] = { 0, 0, 0 }; + int count; - for (count = 0; count < 3 && input != end; ++count) - inputbuf[count] = *input++; + for (count = 0; count < 3 && input != end; ++count) + inputbuf[count] = *input++; - *output++ = lookup(inputbuf[0] >> 2 & 0x3f); - *output++ = lookup((inputbuf[0] << 4 & 0x3f) | (inputbuf[1] >> 4 & 0x0f)); - *output++ = (count < 2) ? '=' : lookup((inputbuf[1] << 2 & 0x3c) | (inputbuf[2] >> 6 & 0x03)); - *output++ = (count < 3) ? '=' : lookup(inputbuf[2] & 0x3f); - } - - return output; + *output++ = lookup(inputbuf[0] >> 2 & 0x3f); + *output++ = lookup((inputbuf[0] << 4 & 0x3f) | (inputbuf[1] >> 4 & 0x0f)); + *output++ = (count < 2) ? '=' : lookup((inputbuf[1] << 2 & 0x3c) | (inputbuf[2] >> 6 & 0x03)); + *output++ = (count < 3) ? '=' : lookup(inputbuf[2] & 0x3f); } - /** - * Decode the input to the output. Requirements: - * InputIt must be InputIterator - * OutputIt must be OutputIterator - * - * @param input the beginning - * @param end the end of the data - * @param output the output destination - * @return output - * @throw std::invalid_argument on bad base64 string - */ - template <typename InputIt, typename OutputIt> - static OutputIt decode(InputIt input, InputIt end, OutputIt output) - { - while (input != end) { - char inputbuf[4] = { 0, 0, 0, 0 }; - int count; + return output; +} - for (count = 0; count < 4 && input != end; ++count) { - inputbuf[count] = (*input == '=') ? '=' : rlookup(*input); - input++; - } +/** + * Decode the input to the output. Requirements: + * InputIt must be InputIterator + * OutputIt must be OutputIterator + * + * @param input the beginning + * @param end the end of the data + * @param output the output destination + * @return output + * @throw std::invalid_argument on bad base64 string + */ +template <typename InputIt, typename OutputIt> +OutputIt decode(InputIt input, InputIt end, OutputIt output) +{ + while (input != end) { + char inputbuf[4] = { 0, 0, 0, 0 }; + int count; - if (count != 4) - throw std::invalid_argument("truncated string"); - - *output++ = (inputbuf[0] << 2 & 0xfc) | (inputbuf[1] >> 4 & 0x03); - - if (inputbuf[2] != '=') - *output++ = (inputbuf[1] << 4 & 0xf0) | (inputbuf[2] >> 2 & 0x0f); - if (inputbuf[3] != '=') - *output++ = (inputbuf[2] << 6 & 0xc0) | (inputbuf[3] & 0x3f); + for (count = 0; count < 4 && input != end; ++count) { + inputbuf[count] = (*input == '=') ? '=' : rlookup(*input); + input++; } - return output; + if (count != 4) + throw std::invalid_argument("truncated string"); + + *output++ = (inputbuf[0] << 2 & 0xfc) | (inputbuf[1] >> 4 & 0x03); + + if (inputbuf[2] != '=') + *output++ = (inputbuf[1] << 4 & 0xf0) | (inputbuf[2] >> 2 & 0x0f); + if (inputbuf[3] != '=') + *output++ = (inputbuf[2] << 6 & 0xc0) | (inputbuf[3] & 0x3f); } - /** - * Encode a string. - * - * @param input the input string - * @return the base64 formatted string - */ - static std::string encode(const std::string &input); + return output; +} + +/** + * Encode a string. + * + * @param input the input string + * @return the base64 formatted string + */ +std::string encode(const std::string &input); - /** - * Decode a string. - * - * @param input the base64 formatted string - * @return the original string - * @throw std::invalid_argument on bad base64 string - */ - static std::string decode(const std::string &input); -}; +/** + * Decode a string. + * + * @param input the base64 formatted string + * @return the original string + * @throw std::invalid_argument on bad base64 string + */ +std::string decode(const std::string &input); + +} // !base64 #endif // !_BASE_64_H_
--- a/C++/modules/Hash/Hash.cpp Mon Sep 28 19:40:16 2015 +0200 +++ b/C++/modules/Hash/Hash.cpp Mon Sep 28 19:40:26 2015 +0200 @@ -21,22 +21,56 @@ #include <openssl/sha.h> #include <openssl/md5.h> -std::string Hash::md5(const std::string &input) +namespace hash { + +namespace { + +template <typename Context> +using Init = int (*)(Context *); + +template <typename Context> +using Update = int (*)(Context *, const void *, size_t); + +template <typename Context> +using Final = int (*)(unsigned char *, Context *); + +template <typename Context, size_t Length> +std::string convert(const std::string &input, Init<Context> init, Update<Context> update, Final<Context> finalize) +{ + unsigned char digest[Length]; + char hash[Length * 2 + 1]; + + Context ctx; + init(&ctx); + update(&ctx, input.c_str(), input.length()); + finalize(digest, &ctx); + + for (unsigned long i = 0; i < Length; i++) + sprintf(&hash[i * 2], "%02x", (unsigned int)digest[i]); + + return std::string(hash); +} + +} // !namespace + +std::string md5(const std::string &input) { return convert<MD5_CTX, MD5_DIGEST_LENGTH>(input, MD5_Init, MD5_Update, MD5_Final); } -std::string Hash::sha1(const std::string &input) +std::string sha1(const std::string &input) { return convert<SHA_CTX, SHA_DIGEST_LENGTH>(input, SHA1_Init, SHA1_Update, SHA1_Final); } -std::string Hash::sha256(const std::string &input) +std::string sha256(const std::string &input) { return convert<SHA256_CTX, SHA256_DIGEST_LENGTH>(input, SHA256_Init, SHA256_Update, SHA256_Final); } -std::string Hash::sha512(const std::string &input) +std::string sha512(const std::string &input) { return convert<SHA512_CTX, SHA512_DIGEST_LENGTH>(input, SHA512_Init, SHA512_Update, SHA512_Final); } + +} // !hash
--- a/C++/modules/Hash/Hash.h Mon Sep 28 19:40:16 2015 +0200 +++ b/C++/modules/Hash/Hash.h Mon Sep 28 19:40:26 2015 +0200 @@ -26,75 +26,40 @@ #include <string> +namespace hash { + /** - * @class Hash - * @brief Hash functions + * Hash using MD5. * - * Provide support for MD5, SHA1, SHA256 and SHA512. + * @param input the input string + * @return the hashed string */ -class Hash { -private: - template <typename Context> - using Init = int (*)(Context *); +std::string md5(const std::string &input); - template <typename Context> - using Update = int (*)(Context *, const void *, size_t); - - template <typename Context> - using Final = int (*)(unsigned char *, Context *); +/** + * Hash using SHA1. + * + * @param input the input string + * @return the hashed string + */ +std::string sha1(const std::string &input); - template <typename Context, size_t Length> - static std::string convert(const std::string &input, - Init<Context> init, - Update<Context> update, - Final<Context> finalize) - { - unsigned char digest[Length]; - char hash[Length * 2 + 1]; - - Context ctx; - init(&ctx); - update(&ctx, input.c_str(), input.length()); - finalize(digest, &ctx); - - for (unsigned long i = 0; i < Length; i++) - sprintf(&hash[i * 2], "%02x", (unsigned int)digest[i]); - - return std::string(hash); - } +/** + * Hash using SHA256. + * + * @param input the input string + * @return the hashed string + */ +std::string sha256(const std::string &input); -public: - /** - * Hash using MD5. - * - * @param input the input string - * @return the hashed string - */ - static std::string md5(const std::string &input); - - /** - * Hash using SHA1. - * - * @param input the input string - * @return the hashed string - */ - static std::string sha1(const std::string &input); +/** + * Hash using SHA512. + * + * @param input the input string + * @return the hashed string + */ +std::string sha512(const std::string &input); - /** - * Hash using SHA256. - * - * @param input the input string - * @return the hashed string - */ - static std::string sha256(const std::string &input); - - /** - * Hash using SHA512. - * - * @param input the input string - * @return the hashed string - */ - static std::string sha512(const std::string &input); -}; +} // !hash #endif // !_HASH_H_
--- a/C++/modules/Unicode/Unicode.cpp Mon Sep 28 19:40:16 2015 +0200 +++ b/C++/modules/Unicode/Unicode.cpp Mon Sep 28 19:40:26 2015 +0200 @@ -1,4703 +1,4708 @@ -/* - * Unicode.cpp -- UTF-8 to UTF-32 conversions and various operations - * - * Copyright (c) 2013, 2014, 2015 David Demelier <markand@malikania.fr> - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include "Unicode.h" - -/* - * The following code has been generated from Go mkrunetype adapted to our - * needs. - */ -#define nelem(x) (sizeof (x) / sizeof ((x)[0])) - -char32_t *rbsearch(char32_t c, char32_t *t, int n, int ne) noexcept -{ - char32_t *p; - int m; - - while (n > 1) { - m = n >> 1; - p = t + m * ne; - - if (c >= p[0]) { - t = p; - n = n - m; - } else { - n = m; - } - } - - if (n && c >= t[0]) - return t; - - return nullptr; -} - -static char32_t isspacer[] = { - 0x0009, 0x000d, - 0x0020, 0x0020, - 0x0085, 0x0085, - 0x00a0, 0x00a0, - 0x1680, 0x1680, - 0x2000, 0x200a, - 0x2028, 0x2029, - 0x202f, 0x202f, - 0x205f, 0x205f, - 0x3000, 0x3000, - 0xfeff, 0xfeff, -}; - -bool Unicode::isspace(char32_t c) noexcept -{ - char32_t *p; - - p = rbsearch(c, isspacer, nelem (isspacer)/2, 2); - if (p && c >= p[0] && c <= p[1]) - return true; - - return false; -} - -static char32_t isdigitr[] = { - 0x0030, 0x0039, - 0x0660, 0x0669, - 0x06f0, 0x06f9, - 0x07c0, 0x07c9, - 0x0966, 0x096f, - 0x09e6, 0x09ef, - 0x0a66, 0x0a6f, - 0x0ae6, 0x0aef, - 0x0b66, 0x0b6f, - 0x0be6, 0x0bef, - 0x0c66, 0x0c6f, - 0x0ce6, 0x0cef, - 0x0d66, 0x0d6f, - 0x0de6, 0x0def, - 0x0e50, 0x0e59, - 0x0ed0, 0x0ed9, - 0x0f20, 0x0f29, - 0x1040, 0x1049, - 0x1090, 0x1099, - 0x17e0, 0x17e9, - 0x1810, 0x1819, - 0x1946, 0x194f, - 0x19d0, 0x19d9, - 0x1a80, 0x1a89, - 0x1a90, 0x1a99, - 0x1b50, 0x1b59, - 0x1bb0, 0x1bb9, - 0x1c40, 0x1c49, - 0x1c50, 0x1c59, - 0xa620, 0xa629, - 0xa8d0, 0xa8d9, - 0xa900, 0xa909, - 0xa9d0, 0xa9d9, - 0xa9f0, 0xa9f9, - 0xaa50, 0xaa59, - 0xabf0, 0xabf9, - 0xff10, 0xff19, - 0x104a0, 0x104a9, - 0x11066, 0x1106f, - 0x110f0, 0x110f9, - 0x11136, 0x1113f, - 0x111d0, 0x111d9, - 0x112f0, 0x112f9, - 0x114d0, 0x114d9, - 0x11650, 0x11659, - 0x116c0, 0x116c9, - 0x118e0, 0x118e9, - 0x16a60, 0x16a69, - 0x16b50, 0x16b59, - 0x1d7ce, 0x1d7ff, -}; - -bool Unicode::isdigit(char32_t c) noexcept -{ - char32_t *p; - - p = rbsearch(c, isdigitr, nelem (isdigitr)/2, 2); - if (p && c >= p[0] && c <= p[1]) - return true; - - return false; -} - -static char32_t isalphar[] = { - 0x0041, 0x005a, - 0x0061, 0x007a, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, - 0x00f8, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x0370, 0x0374, - 0x0376, 0x0377, - 0x037a, 0x037d, - 0x0388, 0x038a, - 0x038e, 0x03a1, - 0x03a3, 0x03f5, - 0x03f7, 0x0481, - 0x048a, 0x052f, - 0x0531, 0x0556, - 0x0561, 0x0587, - 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0620, 0x064a, - 0x066e, 0x066f, - 0x0671, 0x06d3, - 0x06e5, 0x06e6, - 0x06ee, 0x06ef, - 0x06fa, 0x06fc, - 0x0712, 0x072f, - 0x074d, 0x07a5, - 0x07ca, 0x07ea, - 0x07f4, 0x07f5, - 0x0800, 0x0815, - 0x0840, 0x0858, - 0x08a0, 0x08b2, - 0x0904, 0x0939, - 0x0958, 0x0961, - 0x0971, 0x0980, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b6, 0x09b9, - 0x09dc, 0x09dd, - 0x09df, 0x09e1, - 0x09f0, 0x09f1, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a59, 0x0a5c, - 0x0a72, 0x0a74, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0ae0, 0x0ae1, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb9, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c39, - 0x0c58, 0x0c59, - 0x0c60, 0x0c61, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0ce0, 0x0ce1, - 0x0cf1, 0x0cf2, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d3a, - 0x0d60, 0x0d61, - 0x0d7a, 0x0d7f, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dc0, 0x0dc6, - 0x0e01, 0x0e30, - 0x0e32, 0x0e33, - 0x0e40, 0x0e46, - 0x0e81, 0x0e82, - 0x0e87, 0x0e88, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb0, - 0x0eb2, 0x0eb3, - 0x0ec0, 0x0ec4, - 0x0edc, 0x0edf, - 0x0f40, 0x0f47, - 0x0f49, 0x0f6c, - 0x0f88, 0x0f8c, - 0x1000, 0x102a, - 0x1050, 0x1055, - 0x105a, 0x105d, - 0x1065, 0x1066, - 0x106e, 0x1070, - 0x1075, 0x1081, - 0x10a0, 0x10c5, - 0x10d0, 0x10fa, - 0x10fc, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x1380, 0x138f, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x167f, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x16f1, 0x16f8, - 0x1700, 0x170c, - 0x170e, 0x1711, - 0x1720, 0x1731, - 0x1740, 0x1751, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1780, 0x17b3, - 0x1820, 0x1877, - 0x1880, 0x18a8, - 0x18b0, 0x18f5, - 0x1900, 0x191e, - 0x1950, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19ab, - 0x19c1, 0x19c7, - 0x1a00, 0x1a16, - 0x1a20, 0x1a54, - 0x1b05, 0x1b33, - 0x1b45, 0x1b4b, - 0x1b83, 0x1ba0, - 0x1bae, 0x1baf, - 0x1bba, 0x1be5, - 0x1c00, 0x1c23, - 0x1c4d, 0x1c4f, - 0x1c5a, 0x1c7d, - 0x1ce9, 0x1cec, - 0x1cee, 0x1cf1, - 0x1cf5, 0x1cf6, - 0x1d00, 0x1dbf, - 0x1e00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x2090, 0x209c, - 0x210a, 0x2113, - 0x2119, 0x211d, - 0x212a, 0x212d, - 0x212f, 0x2139, - 0x213c, 0x213f, - 0x2145, 0x2149, - 0x2183, 0x2184, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e, - 0x2c60, 0x2ce4, - 0x2ceb, 0x2cee, - 0x2cf2, 0x2cf3, - 0x2d00, 0x2d25, - 0x2d30, 0x2d67, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x3005, 0x3006, - 0x3031, 0x3035, - 0x303b, 0x303c, - 0x3041, 0x3096, - 0x309d, 0x309f, - 0x30a1, 0x30fa, - 0x30fc, 0x30ff, - 0x3105, 0x312d, - 0x3131, 0x318e, - 0x31a0, 0x31ba, - 0x31f0, 0x31ff, - 0x3400, 0x4db5, - 0x4e00, 0x9fcc, - 0xa000, 0xa48c, - 0xa4d0, 0xa4fd, - 0xa500, 0xa60c, - 0xa610, 0xa61f, - 0xa62a, 0xa62b, - 0xa640, 0xa66e, - 0xa67f, 0xa69d, - 0xa6a0, 0xa6e5, - 0xa717, 0xa71f, - 0xa722, 0xa788, - 0xa78b, 0xa78e, - 0xa790, 0xa7ad, - 0xa7b0, 0xa7b1, - 0xa7f7, 0xa801, - 0xa803, 0xa805, - 0xa807, 0xa80a, - 0xa80c, 0xa822, - 0xa840, 0xa873, - 0xa882, 0xa8b3, - 0xa8f2, 0xa8f7, - 0xa90a, 0xa925, - 0xa930, 0xa946, - 0xa960, 0xa97c, - 0xa984, 0xa9b2, - 0xa9e0, 0xa9e4, - 0xa9e6, 0xa9ef, - 0xa9fa, 0xa9fe, - 0xaa00, 0xaa28, - 0xaa40, 0xaa42, - 0xaa44, 0xaa4b, - 0xaa60, 0xaa76, - 0xaa7e, 0xaaaf, - 0xaab5, 0xaab6, - 0xaab9, 0xaabd, - 0xaadb, 0xaadd, - 0xaae0, 0xaaea, - 0xaaf2, 0xaaf4, - 0xab01, 0xab06, - 0xab09, 0xab0e, - 0xab11, 0xab16, - 0xab20, 0xab26, - 0xab28, 0xab2e, - 0xab30, 0xab5a, - 0xab5c, 0xab5f, - 0xab64, 0xab65, - 0xabc0, 0xabe2, - 0xac00, 0xd7a3, - 0xd7b0, 0xd7c6, - 0xd7cb, 0xd7fb, - 0xf900, 0xfa6d, - 0xfa70, 0xfad9, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1f, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff21, 0xff3a, - 0xff41, 0xff5a, - 0xff66, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10280, 0x1029c, - 0x102a0, 0x102d0, - 0x10300, 0x1031f, - 0x10330, 0x10340, - 0x10342, 0x10349, - 0x10350, 0x10375, - 0x10380, 0x1039d, - 0x103a0, 0x103c3, - 0x103c8, 0x103cf, - 0x10400, 0x1049d, - 0x10500, 0x10527, - 0x10530, 0x10563, - 0x10600, 0x10736, - 0x10740, 0x10755, - 0x10760, 0x10767, - 0x10800, 0x10805, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083f, 0x10855, - 0x10860, 0x10876, - 0x10880, 0x1089e, - 0x10900, 0x10915, - 0x10920, 0x10939, - 0x10980, 0x109b7, - 0x109be, 0x109bf, - 0x10a10, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x10a60, 0x10a7c, - 0x10a80, 0x10a9c, - 0x10ac0, 0x10ac7, - 0x10ac9, 0x10ae4, - 0x10b00, 0x10b35, - 0x10b40, 0x10b55, - 0x10b60, 0x10b72, - 0x10b80, 0x10b91, - 0x10c00, 0x10c48, - 0x11003, 0x11037, - 0x11083, 0x110af, - 0x110d0, 0x110e8, - 0x11103, 0x11126, - 0x11150, 0x11172, - 0x11183, 0x111b2, - 0x111c1, 0x111c4, - 0x11200, 0x11211, - 0x11213, 0x1122b, - 0x112b0, 0x112de, - 0x11305, 0x1130c, - 0x1130f, 0x11310, - 0x11313, 0x11328, - 0x1132a, 0x11330, - 0x11332, 0x11333, - 0x11335, 0x11339, - 0x1135d, 0x11361, - 0x11480, 0x114af, - 0x114c4, 0x114c5, - 0x11580, 0x115ae, - 0x11600, 0x1162f, - 0x11680, 0x116aa, - 0x118a0, 0x118df, - 0x11ac0, 0x11af8, - 0x12000, 0x12398, - 0x13000, 0x1342e, - 0x16800, 0x16a38, - 0x16a40, 0x16a5e, - 0x16ad0, 0x16aed, - 0x16b00, 0x16b2f, - 0x16b40, 0x16b43, - 0x16b63, 0x16b77, - 0x16b7d, 0x16b8f, - 0x16f00, 0x16f44, - 0x16f93, 0x16f9f, - 0x1b000, 0x1b001, - 0x1bc00, 0x1bc6a, - 0x1bc70, 0x1bc7c, - 0x1bc80, 0x1bc88, - 0x1bc90, 0x1bc99, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7cb, - 0x1e800, 0x1e8c4, - 0x1ee00, 0x1ee03, - 0x1ee05, 0x1ee1f, - 0x1ee21, 0x1ee22, - 0x1ee29, 0x1ee32, - 0x1ee34, 0x1ee37, - 0x1ee4d, 0x1ee4f, - 0x1ee51, 0x1ee52, - 0x1ee61, 0x1ee62, - 0x1ee67, 0x1ee6a, - 0x1ee6c, 0x1ee72, - 0x1ee74, 0x1ee77, - 0x1ee79, 0x1ee7c, - 0x1ee80, 0x1ee89, - 0x1ee8b, 0x1ee9b, - 0x1eea1, 0x1eea3, - 0x1eea5, 0x1eea9, - 0x1eeab, 0x1eebb, - 0x20000, 0x2a6d6, - 0x2a700, 0x2b734, - 0x2b740, 0x2b81d, - 0x2f800, 0x2fa1d, -}; - -static char32_t isalphas[] = { - 0x00aa, - 0x00b5, - 0x00ba, - 0x02ec, - 0x02ee, - 0x037f, - 0x0386, - 0x038c, - 0x0559, - 0x06d5, - 0x06ff, - 0x0710, - 0x07b1, - 0x07fa, - 0x081a, - 0x0824, - 0x0828, - 0x093d, - 0x0950, - 0x09b2, - 0x09bd, - 0x09ce, - 0x0a5e, - 0x0abd, - 0x0ad0, - 0x0b3d, - 0x0b71, - 0x0b83, - 0x0b9c, - 0x0bd0, - 0x0c3d, - 0x0cbd, - 0x0cde, - 0x0d3d, - 0x0d4e, - 0x0dbd, - 0x0e84, - 0x0e8a, - 0x0e8d, - 0x0ea5, - 0x0ea7, - 0x0ebd, - 0x0ec6, - 0x0f00, - 0x103f, - 0x1061, - 0x108e, - 0x10c7, - 0x10cd, - 0x1258, - 0x12c0, - 0x17d7, - 0x17dc, - 0x18aa, - 0x1aa7, - 0x1f59, - 0x1f5b, - 0x1f5d, - 0x1fbe, - 0x2071, - 0x207f, - 0x2102, - 0x2107, - 0x2115, - 0x2124, - 0x2126, - 0x2128, - 0x214e, - 0x2d27, - 0x2d2d, - 0x2d6f, - 0x2e2f, - 0xa8fb, - 0xa9cf, - 0xaa7a, - 0xaab1, - 0xaac0, - 0xaac2, - 0xfb1d, - 0xfb3e, - 0x10808, - 0x1083c, - 0x10a00, - 0x11176, - 0x111da, - 0x1133d, - 0x114c7, - 0x11644, - 0x118ff, - 0x16f50, - 0x1d4a2, - 0x1d4bb, - 0x1d546, - 0x1ee24, - 0x1ee27, - 0x1ee39, - 0x1ee3b, - 0x1ee42, - 0x1ee47, - 0x1ee49, - 0x1ee4b, - 0x1ee54, - 0x1ee57, - 0x1ee59, - 0x1ee5b, - 0x1ee5d, - 0x1ee5f, - 0x1ee64, - 0x1ee7e, -}; - -bool Unicode::isalpha(char32_t c) noexcept -{ - char32_t *p; - - p = rbsearch(c, isalphar, nelem (isalphar)/2, 2); - if (p && c >= p[0] && c <= p[1]) - return true; - - p = rbsearch(c, isalphas, nelem (isalphas), 1); - if (p && c == p[0]) - return true; - - return false; -} - -static char32_t isupperr[] = { - 0x0041, 0x005a, - 0x00c0, 0x00d6, - 0x00d8, 0x00de, - 0x0178, 0x0179, - 0x0181, 0x0182, - 0x0186, 0x0187, - 0x0189, 0x018b, - 0x018e, 0x0191, - 0x0193, 0x0194, - 0x0196, 0x0198, - 0x019c, 0x019d, - 0x019f, 0x01a0, - 0x01a6, 0x01a7, - 0x01ae, 0x01af, - 0x01b1, 0x01b3, - 0x01b7, 0x01b8, - 0x01f6, 0x01f8, - 0x023a, 0x023b, - 0x023d, 0x023e, - 0x0243, 0x0246, - 0x0388, 0x038a, - 0x038e, 0x038f, - 0x0391, 0x03a1, - 0x03a3, 0x03ab, - 0x03d2, 0x03d4, - 0x03f9, 0x03fa, - 0x03fd, 0x042f, - 0x04c0, 0x04c1, - 0x0531, 0x0556, - 0x10a0, 0x10c5, - 0x1f08, 0x1f0f, - 0x1f18, 0x1f1d, - 0x1f28, 0x1f2f, - 0x1f38, 0x1f3f, - 0x1f48, 0x1f4d, - 0x1f68, 0x1f6f, - 0x1f88, 0x1f8f, - 0x1f98, 0x1f9f, - 0x1fa8, 0x1faf, - 0x1fb8, 0x1fbc, - 0x1fc8, 0x1fcc, - 0x1fd8, 0x1fdb, - 0x1fe8, 0x1fec, - 0x1ff8, 0x1ffc, - 0x210b, 0x210d, - 0x2110, 0x2112, - 0x2119, 0x211d, - 0x212a, 0x212d, - 0x2130, 0x2133, - 0x213e, 0x213f, - 0x2160, 0x216f, - 0x24b6, 0x24cf, - 0x2c00, 0x2c2e, - 0x2c62, 0x2c64, - 0x2c6d, 0x2c70, - 0x2c7e, 0x2c80, - 0xa77d, 0xa77e, - 0xa7aa, 0xa7ad, - 0xa7b0, 0xa7b1, - 0xff21, 0xff3a, - 0x10400, 0x10427, - 0x118a0, 0x118bf, - 0x1d400, 0x1d419, - 0x1d434, 0x1d44d, - 0x1d468, 0x1d481, - 0x1d49e, 0x1d49f, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b5, - 0x1d4d0, 0x1d4e9, - 0x1d504, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d538, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d54a, 0x1d550, - 0x1d56c, 0x1d585, - 0x1d5a0, 0x1d5b9, - 0x1d5d4, 0x1d5ed, - 0x1d608, 0x1d621, - 0x1d63c, 0x1d655, - 0x1d670, 0x1d689, - 0x1d6a8, 0x1d6c0, - 0x1d6e2, 0x1d6fa, - 0x1d71c, 0x1d734, - 0x1d756, 0x1d76e, - 0x1d790, 0x1d7a8, -}; - -static char32_t isuppers[] = { - 0x0100, - 0x0102, - 0x0104, - 0x0106, - 0x0108, - 0x010a, - 0x010c, - 0x010e, - 0x0110, - 0x0112, - 0x0114, - 0x0116, - 0x0118, - 0x011a, - 0x011c, - 0x011e, - 0x0120, - 0x0122, - 0x0124, - 0x0126, - 0x0128, - 0x012a, - 0x012c, - 0x012e, - 0x0130, - 0x0132, - 0x0134, - 0x0136, - 0x0139, - 0x013b, - 0x013d, - 0x013f, - 0x0141, - 0x0143, - 0x0145, - 0x0147, - 0x014a, - 0x014c, - 0x014e, - 0x0150, - 0x0152, - 0x0154, - 0x0156, - 0x0158, - 0x015a, - 0x015c, - 0x015e, - 0x0160, - 0x0162, - 0x0164, - 0x0166, - 0x0168, - 0x016a, - 0x016c, - 0x016e, - 0x0170, - 0x0172, - 0x0174, - 0x0176, - 0x017b, - 0x017d, - 0x0184, - 0x01a2, - 0x01a4, - 0x01a9, - 0x01ac, - 0x01b5, - 0x01bc, - 0x01c4, - 0x01c7, - 0x01ca, - 0x01cd, - 0x01cf, - 0x01d1, - 0x01d3, - 0x01d5, - 0x01d7, - 0x01d9, - 0x01db, - 0x01de, - 0x01e0, - 0x01e2, - 0x01e4, - 0x01e6, - 0x01e8, - 0x01ea, - 0x01ec, - 0x01ee, - 0x01f1, - 0x01f4, - 0x01fa, - 0x01fc, - 0x01fe, - 0x0200, - 0x0202, - 0x0204, - 0x0206, - 0x0208, - 0x020a, - 0x020c, - 0x020e, - 0x0210, - 0x0212, - 0x0214, - 0x0216, - 0x0218, - 0x021a, - 0x021c, - 0x021e, - 0x0220, - 0x0222, - 0x0224, - 0x0226, - 0x0228, - 0x022a, - 0x022c, - 0x022e, - 0x0230, - 0x0232, - 0x0241, - 0x0248, - 0x024a, - 0x024c, - 0x024e, - 0x0370, - 0x0372, - 0x0376, - 0x037f, - 0x0386, - 0x038c, - 0x03cf, - 0x03d8, - 0x03da, - 0x03dc, - 0x03de, - 0x03e0, - 0x03e2, - 0x03e4, - 0x03e6, - 0x03e8, - 0x03ea, - 0x03ec, - 0x03ee, - 0x03f4, - 0x03f7, - 0x0460, - 0x0462, - 0x0464, - 0x0466, - 0x0468, - 0x046a, - 0x046c, - 0x046e, - 0x0470, - 0x0472, - 0x0474, - 0x0476, - 0x0478, - 0x047a, - 0x047c, - 0x047e, - 0x0480, - 0x048a, - 0x048c, - 0x048e, - 0x0490, - 0x0492, - 0x0494, - 0x0496, - 0x0498, - 0x049a, - 0x049c, - 0x049e, - 0x04a0, - 0x04a2, - 0x04a4, - 0x04a6, - 0x04a8, - 0x04aa, - 0x04ac, - 0x04ae, - 0x04b0, - 0x04b2, - 0x04b4, - 0x04b6, - 0x04b8, - 0x04ba, - 0x04bc, - 0x04be, - 0x04c3, - 0x04c5, - 0x04c7, - 0x04c9, - 0x04cb, - 0x04cd, - 0x04d0, - 0x04d2, - 0x04d4, - 0x04d6, - 0x04d8, - 0x04da, - 0x04dc, - 0x04de, - 0x04e0, - 0x04e2, - 0x04e4, - 0x04e6, - 0x04e8, - 0x04ea, - 0x04ec, - 0x04ee, - 0x04f0, - 0x04f2, - 0x04f4, - 0x04f6, - 0x04f8, - 0x04fa, - 0x04fc, - 0x04fe, - 0x0500, - 0x0502, - 0x0504, - 0x0506, - 0x0508, - 0x050a, - 0x050c, - 0x050e, - 0x0510, - 0x0512, - 0x0514, - 0x0516, - 0x0518, - 0x051a, - 0x051c, - 0x051e, - 0x0520, - 0x0522, - 0x0524, - 0x0526, - 0x0528, - 0x052a, - 0x052c, - 0x052e, - 0x10c7, - 0x10cd, - 0x1e00, - 0x1e02, - 0x1e04, - 0x1e06, - 0x1e08, - 0x1e0a, - 0x1e0c, - 0x1e0e, - 0x1e10, - 0x1e12, - 0x1e14, - 0x1e16, - 0x1e18, - 0x1e1a, - 0x1e1c, - 0x1e1e, - 0x1e20, - 0x1e22, - 0x1e24, - 0x1e26, - 0x1e28, - 0x1e2a, - 0x1e2c, - 0x1e2e, - 0x1e30, - 0x1e32, - 0x1e34, - 0x1e36, - 0x1e38, - 0x1e3a, - 0x1e3c, - 0x1e3e, - 0x1e40, - 0x1e42, - 0x1e44, - 0x1e46, - 0x1e48, - 0x1e4a, - 0x1e4c, - 0x1e4e, - 0x1e50, - 0x1e52, - 0x1e54, - 0x1e56, - 0x1e58, - 0x1e5a, - 0x1e5c, - 0x1e5e, - 0x1e60, - 0x1e62, - 0x1e64, - 0x1e66, - 0x1e68, - 0x1e6a, - 0x1e6c, - 0x1e6e, - 0x1e70, - 0x1e72, - 0x1e74, - 0x1e76, - 0x1e78, - 0x1e7a, - 0x1e7c, - 0x1e7e, - 0x1e80, - 0x1e82, - 0x1e84, - 0x1e86, - 0x1e88, - 0x1e8a, - 0x1e8c, - 0x1e8e, - 0x1e90, - 0x1e92, - 0x1e94, - 0x1e9e, - 0x1ea0, - 0x1ea2, - 0x1ea4, - 0x1ea6, - 0x1ea8, - 0x1eaa, - 0x1eac, - 0x1eae, - 0x1eb0, - 0x1eb2, - 0x1eb4, - 0x1eb6, - 0x1eb8, - 0x1eba, - 0x1ebc, - 0x1ebe, - 0x1ec0, - 0x1ec2, - 0x1ec4, - 0x1ec6, - 0x1ec8, - 0x1eca, - 0x1ecc, - 0x1ece, - 0x1ed0, - 0x1ed2, - 0x1ed4, - 0x1ed6, - 0x1ed8, - 0x1eda, - 0x1edc, - 0x1ede, - 0x1ee0, - 0x1ee2, - 0x1ee4, - 0x1ee6, - 0x1ee8, - 0x1eea, - 0x1eec, - 0x1eee, - 0x1ef0, - 0x1ef2, - 0x1ef4, - 0x1ef6, - 0x1ef8, - 0x1efa, - 0x1efc, - 0x1efe, - 0x1f59, - 0x1f5b, - 0x1f5d, - 0x1f5f, - 0x2102, - 0x2107, - 0x2115, - 0x2124, - 0x2126, - 0x2128, - 0x2145, - 0x2183, - 0x2c60, - 0x2c67, - 0x2c69, - 0x2c6b, - 0x2c72, - 0x2c75, - 0x2c82, - 0x2c84, - 0x2c86, - 0x2c88, - 0x2c8a, - 0x2c8c, - 0x2c8e, - 0x2c90, - 0x2c92, - 0x2c94, - 0x2c96, - 0x2c98, - 0x2c9a, - 0x2c9c, - 0x2c9e, - 0x2ca0, - 0x2ca2, - 0x2ca4, - 0x2ca6, - 0x2ca8, - 0x2caa, - 0x2cac, - 0x2cae, - 0x2cb0, - 0x2cb2, - 0x2cb4, - 0x2cb6, - 0x2cb8, - 0x2cba, - 0x2cbc, - 0x2cbe, - 0x2cc0, - 0x2cc2, - 0x2cc4, - 0x2cc6, - 0x2cc8, - 0x2cca, - 0x2ccc, - 0x2cce, - 0x2cd0, - 0x2cd2, - 0x2cd4, - 0x2cd6, - 0x2cd8, - 0x2cda, - 0x2cdc, - 0x2cde, - 0x2ce0, - 0x2ce2, - 0x2ceb, - 0x2ced, - 0x2cf2, - 0xa640, - 0xa642, - 0xa644, - 0xa646, - 0xa648, - 0xa64a, - 0xa64c, - 0xa64e, - 0xa650, - 0xa652, - 0xa654, - 0xa656, - 0xa658, - 0xa65a, - 0xa65c, - 0xa65e, - 0xa660, - 0xa662, - 0xa664, - 0xa666, - 0xa668, - 0xa66a, - 0xa66c, - 0xa680, - 0xa682, - 0xa684, - 0xa686, - 0xa688, - 0xa68a, - 0xa68c, - 0xa68e, - 0xa690, - 0xa692, - 0xa694, - 0xa696, - 0xa698, - 0xa69a, - 0xa722, - 0xa724, - 0xa726, - 0xa728, - 0xa72a, - 0xa72c, - 0xa72e, - 0xa732, - 0xa734, - 0xa736, - 0xa738, - 0xa73a, - 0xa73c, - 0xa73e, - 0xa740, - 0xa742, - 0xa744, - 0xa746, - 0xa748, - 0xa74a, - 0xa74c, - 0xa74e, - 0xa750, - 0xa752, - 0xa754, - 0xa756, - 0xa758, - 0xa75a, - 0xa75c, - 0xa75e, - 0xa760, - 0xa762, - 0xa764, - 0xa766, - 0xa768, - 0xa76a, - 0xa76c, - 0xa76e, - 0xa779, - 0xa77b, - 0xa780, - 0xa782, - 0xa784, - 0xa786, - 0xa78b, - 0xa78d, - 0xa790, - 0xa792, - 0xa796, - 0xa798, - 0xa79a, - 0xa79c, - 0xa79e, - 0xa7a0, - 0xa7a2, - 0xa7a4, - 0xa7a6, - 0xa7a8, - 0x1d49c, - 0x1d4a2, - 0x1d546, - 0x1d7ca, -}; - -bool Unicode::isupper(char32_t c) noexcept -{ - char32_t *p; - - p = rbsearch(c, isupperr, nelem (isupperr)/2, 2); - if (p && c >= p[0] && c <= p[1]) - return true; - - p = rbsearch(c, isuppers, nelem (isuppers), 1); - if (p && c == p[0]) - return true; - - return false; -} - -static char32_t islowerr[] = { - 0x0061, 0x007a, - 0x00df, 0x00f6, - 0x00f8, 0x00ff, - 0x0137, 0x0138, - 0x0148, 0x0149, - 0x017e, 0x0180, - 0x018c, 0x018d, - 0x0199, 0x019b, - 0x01aa, 0x01ab, - 0x01b9, 0x01ba, - 0x01bd, 0x01bf, - 0x01dc, 0x01dd, - 0x01ef, 0x01f0, - 0x0233, 0x0239, - 0x023f, 0x0240, - 0x024f, 0x0293, - 0x0295, 0x02af, - 0x037b, 0x037d, - 0x03ac, 0x03ce, - 0x03d0, 0x03d1, - 0x03d5, 0x03d7, - 0x03ef, 0x03f3, - 0x03fb, 0x03fc, - 0x0430, 0x045f, - 0x04ce, 0x04cf, - 0x0561, 0x0587, - 0x1d00, 0x1d2b, - 0x1d6b, 0x1d77, - 0x1d79, 0x1d9a, - 0x1e95, 0x1e9d, - 0x1eff, 0x1f07, - 0x1f10, 0x1f15, - 0x1f20, 0x1f27, - 0x1f30, 0x1f37, - 0x1f40, 0x1f45, - 0x1f50, 0x1f57, - 0x1f60, 0x1f67, - 0x1f70, 0x1f7d, - 0x1f80, 0x1f87, - 0x1f90, 0x1f97, - 0x1fa0, 0x1fa7, - 0x1fb0, 0x1fb4, - 0x1fb6, 0x1fb7, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fc7, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fd7, - 0x1fe0, 0x1fe7, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ff7, - 0x210e, 0x210f, - 0x213c, 0x213d, - 0x2146, 0x2149, - 0x2170, 0x217f, - 0x24d0, 0x24e9, - 0x2c30, 0x2c5e, - 0x2c65, 0x2c66, - 0x2c73, 0x2c74, - 0x2c76, 0x2c7b, - 0x2ce3, 0x2ce4, - 0x2d00, 0x2d25, - 0xa72f, 0xa731, - 0xa771, 0xa778, - 0xa793, 0xa795, - 0xab30, 0xab5a, - 0xab64, 0xab65, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xff41, 0xff5a, - 0x10428, 0x1044f, - 0x118c0, 0x118df, - 0x1d41a, 0x1d433, - 0x1d44e, 0x1d454, - 0x1d456, 0x1d467, - 0x1d482, 0x1d49b, - 0x1d4b6, 0x1d4b9, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d4cf, - 0x1d4ea, 0x1d503, - 0x1d51e, 0x1d537, - 0x1d552, 0x1d56b, - 0x1d586, 0x1d59f, - 0x1d5ba, 0x1d5d3, - 0x1d5ee, 0x1d607, - 0x1d622, 0x1d63b, - 0x1d656, 0x1d66f, - 0x1d68a, 0x1d6a5, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6e1, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d71b, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d755, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d78f, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9, -}; - -static char32_t islowers[] = { - 0x00b5, - 0x0101, - 0x0103, - 0x0105, - 0x0107, - 0x0109, - 0x010b, - 0x010d, - 0x010f, - 0x0111, - 0x0113, - 0x0115, - 0x0117, - 0x0119, - 0x011b, - 0x011d, - 0x011f, - 0x0121, - 0x0123, - 0x0125, - 0x0127, - 0x0129, - 0x012b, - 0x012d, - 0x012f, - 0x0131, - 0x0133, - 0x0135, - 0x013a, - 0x013c, - 0x013e, - 0x0140, - 0x0142, - 0x0144, - 0x0146, - 0x014b, - 0x014d, - 0x014f, - 0x0151, - 0x0153, - 0x0155, - 0x0157, - 0x0159, - 0x015b, - 0x015d, - 0x015f, - 0x0161, - 0x0163, - 0x0165, - 0x0167, - 0x0169, - 0x016b, - 0x016d, - 0x016f, - 0x0171, - 0x0173, - 0x0175, - 0x0177, - 0x017a, - 0x017c, - 0x0183, - 0x0185, - 0x0188, - 0x0192, - 0x0195, - 0x019e, - 0x01a1, - 0x01a3, - 0x01a5, - 0x01a8, - 0x01ad, - 0x01b0, - 0x01b4, - 0x01b6, - 0x01c6, - 0x01c9, - 0x01cc, - 0x01ce, - 0x01d0, - 0x01d2, - 0x01d4, - 0x01d6, - 0x01d8, - 0x01da, - 0x01df, - 0x01e1, - 0x01e3, - 0x01e5, - 0x01e7, - 0x01e9, - 0x01eb, - 0x01ed, - 0x01f3, - 0x01f5, - 0x01f9, - 0x01fb, - 0x01fd, - 0x01ff, - 0x0201, - 0x0203, - 0x0205, - 0x0207, - 0x0209, - 0x020b, - 0x020d, - 0x020f, - 0x0211, - 0x0213, - 0x0215, - 0x0217, - 0x0219, - 0x021b, - 0x021d, - 0x021f, - 0x0221, - 0x0223, - 0x0225, - 0x0227, - 0x0229, - 0x022b, - 0x022d, - 0x022f, - 0x0231, - 0x023c, - 0x0242, - 0x0247, - 0x0249, - 0x024b, - 0x024d, - 0x0371, - 0x0373, - 0x0377, - 0x0390, - 0x03d9, - 0x03db, - 0x03dd, - 0x03df, - 0x03e1, - 0x03e3, - 0x03e5, - 0x03e7, - 0x03e9, - 0x03eb, - 0x03ed, - 0x03f5, - 0x03f8, - 0x0461, - 0x0463, - 0x0465, - 0x0467, - 0x0469, - 0x046b, - 0x046d, - 0x046f, - 0x0471, - 0x0473, - 0x0475, - 0x0477, - 0x0479, - 0x047b, - 0x047d, - 0x047f, - 0x0481, - 0x048b, - 0x048d, - 0x048f, - 0x0491, - 0x0493, - 0x0495, - 0x0497, - 0x0499, - 0x049b, - 0x049d, - 0x049f, - 0x04a1, - 0x04a3, - 0x04a5, - 0x04a7, - 0x04a9, - 0x04ab, - 0x04ad, - 0x04af, - 0x04b1, - 0x04b3, - 0x04b5, - 0x04b7, - 0x04b9, - 0x04bb, - 0x04bd, - 0x04bf, - 0x04c2, - 0x04c4, - 0x04c6, - 0x04c8, - 0x04ca, - 0x04cc, - 0x04d1, - 0x04d3, - 0x04d5, - 0x04d7, - 0x04d9, - 0x04db, - 0x04dd, - 0x04df, - 0x04e1, - 0x04e3, - 0x04e5, - 0x04e7, - 0x04e9, - 0x04eb, - 0x04ed, - 0x04ef, - 0x04f1, - 0x04f3, - 0x04f5, - 0x04f7, - 0x04f9, - 0x04fb, - 0x04fd, - 0x04ff, - 0x0501, - 0x0503, - 0x0505, - 0x0507, - 0x0509, - 0x050b, - 0x050d, - 0x050f, - 0x0511, - 0x0513, - 0x0515, - 0x0517, - 0x0519, - 0x051b, - 0x051d, - 0x051f, - 0x0521, - 0x0523, - 0x0525, - 0x0527, - 0x0529, - 0x052b, - 0x052d, - 0x052f, - 0x1e01, - 0x1e03, - 0x1e05, - 0x1e07, - 0x1e09, - 0x1e0b, - 0x1e0d, - 0x1e0f, - 0x1e11, - 0x1e13, - 0x1e15, - 0x1e17, - 0x1e19, - 0x1e1b, - 0x1e1d, - 0x1e1f, - 0x1e21, - 0x1e23, - 0x1e25, - 0x1e27, - 0x1e29, - 0x1e2b, - 0x1e2d, - 0x1e2f, - 0x1e31, - 0x1e33, - 0x1e35, - 0x1e37, - 0x1e39, - 0x1e3b, - 0x1e3d, - 0x1e3f, - 0x1e41, - 0x1e43, - 0x1e45, - 0x1e47, - 0x1e49, - 0x1e4b, - 0x1e4d, - 0x1e4f, - 0x1e51, - 0x1e53, - 0x1e55, - 0x1e57, - 0x1e59, - 0x1e5b, - 0x1e5d, - 0x1e5f, - 0x1e61, - 0x1e63, - 0x1e65, - 0x1e67, - 0x1e69, - 0x1e6b, - 0x1e6d, - 0x1e6f, - 0x1e71, - 0x1e73, - 0x1e75, - 0x1e77, - 0x1e79, - 0x1e7b, - 0x1e7d, - 0x1e7f, - 0x1e81, - 0x1e83, - 0x1e85, - 0x1e87, - 0x1e89, - 0x1e8b, - 0x1e8d, - 0x1e8f, - 0x1e91, - 0x1e93, - 0x1e9f, - 0x1ea1, - 0x1ea3, - 0x1ea5, - 0x1ea7, - 0x1ea9, - 0x1eab, - 0x1ead, - 0x1eaf, - 0x1eb1, - 0x1eb3, - 0x1eb5, - 0x1eb7, - 0x1eb9, - 0x1ebb, - 0x1ebd, - 0x1ebf, - 0x1ec1, - 0x1ec3, - 0x1ec5, - 0x1ec7, - 0x1ec9, - 0x1ecb, - 0x1ecd, - 0x1ecf, - 0x1ed1, - 0x1ed3, - 0x1ed5, - 0x1ed7, - 0x1ed9, - 0x1edb, - 0x1edd, - 0x1edf, - 0x1ee1, - 0x1ee3, - 0x1ee5, - 0x1ee7, - 0x1ee9, - 0x1eeb, - 0x1eed, - 0x1eef, - 0x1ef1, - 0x1ef3, - 0x1ef5, - 0x1ef7, - 0x1ef9, - 0x1efb, - 0x1efd, - 0x1fbe, - 0x210a, - 0x2113, - 0x212f, - 0x2134, - 0x2139, - 0x214e, - 0x2184, - 0x2c61, - 0x2c68, - 0x2c6a, - 0x2c6c, - 0x2c71, - 0x2c81, - 0x2c83, - 0x2c85, - 0x2c87, - 0x2c89, - 0x2c8b, - 0x2c8d, - 0x2c8f, - 0x2c91, - 0x2c93, - 0x2c95, - 0x2c97, - 0x2c99, - 0x2c9b, - 0x2c9d, - 0x2c9f, - 0x2ca1, - 0x2ca3, - 0x2ca5, - 0x2ca7, - 0x2ca9, - 0x2cab, - 0x2cad, - 0x2caf, - 0x2cb1, - 0x2cb3, - 0x2cb5, - 0x2cb7, - 0x2cb9, - 0x2cbb, - 0x2cbd, - 0x2cbf, - 0x2cc1, - 0x2cc3, - 0x2cc5, - 0x2cc7, - 0x2cc9, - 0x2ccb, - 0x2ccd, - 0x2ccf, - 0x2cd1, - 0x2cd3, - 0x2cd5, - 0x2cd7, - 0x2cd9, - 0x2cdb, - 0x2cdd, - 0x2cdf, - 0x2ce1, - 0x2cec, - 0x2cee, - 0x2cf3, - 0x2d27, - 0x2d2d, - 0xa641, - 0xa643, - 0xa645, - 0xa647, - 0xa649, - 0xa64b, - 0xa64d, - 0xa64f, - 0xa651, - 0xa653, - 0xa655, - 0xa657, - 0xa659, - 0xa65b, - 0xa65d, - 0xa65f, - 0xa661, - 0xa663, - 0xa665, - 0xa667, - 0xa669, - 0xa66b, - 0xa66d, - 0xa681, - 0xa683, - 0xa685, - 0xa687, - 0xa689, - 0xa68b, - 0xa68d, - 0xa68f, - 0xa691, - 0xa693, - 0xa695, - 0xa697, - 0xa699, - 0xa69b, - 0xa723, - 0xa725, - 0xa727, - 0xa729, - 0xa72b, - 0xa72d, - 0xa733, - 0xa735, - 0xa737, - 0xa739, - 0xa73b, - 0xa73d, - 0xa73f, - 0xa741, - 0xa743, - 0xa745, - 0xa747, - 0xa749, - 0xa74b, - 0xa74d, - 0xa74f, - 0xa751, - 0xa753, - 0xa755, - 0xa757, - 0xa759, - 0xa75b, - 0xa75d, - 0xa75f, - 0xa761, - 0xa763, - 0xa765, - 0xa767, - 0xa769, - 0xa76b, - 0xa76d, - 0xa76f, - 0xa77a, - 0xa77c, - 0xa77f, - 0xa781, - 0xa783, - 0xa785, - 0xa787, - 0xa78c, - 0xa78e, - 0xa791, - 0xa797, - 0xa799, - 0xa79b, - 0xa79d, - 0xa79f, - 0xa7a1, - 0xa7a3, - 0xa7a5, - 0xa7a7, - 0xa7a9, - 0xa7fa, - 0x1d4bb, - 0x1d7cb, -}; - -bool Unicode::islower(char32_t c) noexcept -{ - char32_t *p; - - p = rbsearch(c, islowerr, nelem (islowerr)/2, 2); - if (p && c >= p[0] && c <= p[1]) - return true; - - p = rbsearch(c, islowers, nelem (islowers), 1); - if (p && c == p[0]) - return true; - - return false; -} - -static char32_t istitler[] = { - 0x0041, 0x005a, - 0x00c0, 0x00d6, - 0x00d8, 0x00de, - 0x0178, 0x0179, - 0x0181, 0x0182, - 0x0186, 0x0187, - 0x0189, 0x018b, - 0x018e, 0x0191, - 0x0193, 0x0194, - 0x0196, 0x0198, - 0x019c, 0x019d, - 0x019f, 0x01a0, - 0x01a6, 0x01a7, - 0x01ae, 0x01af, - 0x01b1, 0x01b3, - 0x01b7, 0x01b8, - 0x01f6, 0x01f8, - 0x023a, 0x023b, - 0x023d, 0x023e, - 0x0243, 0x0246, - 0x0388, 0x038a, - 0x038e, 0x038f, - 0x0391, 0x03a1, - 0x03a3, 0x03ab, - 0x03f9, 0x03fa, - 0x03fd, 0x042f, - 0x04c0, 0x04c1, - 0x0531, 0x0556, - 0x10a0, 0x10c5, - 0x1f08, 0x1f0f, - 0x1f18, 0x1f1d, - 0x1f28, 0x1f2f, - 0x1f38, 0x1f3f, - 0x1f48, 0x1f4d, - 0x1f68, 0x1f6f, - 0x1f88, 0x1f8f, - 0x1f98, 0x1f9f, - 0x1fa8, 0x1faf, - 0x1fb8, 0x1fbc, - 0x1fc8, 0x1fcc, - 0x1fd8, 0x1fdb, - 0x1fe8, 0x1fec, - 0x1ff8, 0x1ffc, - 0x2160, 0x216f, - 0x24b6, 0x24cf, - 0x2c00, 0x2c2e, - 0x2c62, 0x2c64, - 0x2c6d, 0x2c70, - 0x2c7e, 0x2c80, - 0xa77d, 0xa77e, - 0xa7aa, 0xa7ad, - 0xa7b0, 0xa7b1, - 0xff21, 0xff3a, - 0x10400, 0x10427, - 0x118a0, 0x118bf, -}; - -static char32_t istitles[] = { - 0x0100, - 0x0102, - 0x0104, - 0x0106, - 0x0108, - 0x010a, - 0x010c, - 0x010e, - 0x0110, - 0x0112, - 0x0114, - 0x0116, - 0x0118, - 0x011a, - 0x011c, - 0x011e, - 0x0120, - 0x0122, - 0x0124, - 0x0126, - 0x0128, - 0x012a, - 0x012c, - 0x012e, - 0x0132, - 0x0134, - 0x0136, - 0x0139, - 0x013b, - 0x013d, - 0x013f, - 0x0141, - 0x0143, - 0x0145, - 0x0147, - 0x014a, - 0x014c, - 0x014e, - 0x0150, - 0x0152, - 0x0154, - 0x0156, - 0x0158, - 0x015a, - 0x015c, - 0x015e, - 0x0160, - 0x0162, - 0x0164, - 0x0166, - 0x0168, - 0x016a, - 0x016c, - 0x016e, - 0x0170, - 0x0172, - 0x0174, - 0x0176, - 0x017b, - 0x017d, - 0x0184, - 0x01a2, - 0x01a4, - 0x01a9, - 0x01ac, - 0x01b5, - 0x01bc, - 0x01c5, - 0x01c8, - 0x01cb, - 0x01cd, - 0x01cf, - 0x01d1, - 0x01d3, - 0x01d5, - 0x01d7, - 0x01d9, - 0x01db, - 0x01de, - 0x01e0, - 0x01e2, - 0x01e4, - 0x01e6, - 0x01e8, - 0x01ea, - 0x01ec, - 0x01ee, - 0x01f2, - 0x01f4, - 0x01fa, - 0x01fc, - 0x01fe, - 0x0200, - 0x0202, - 0x0204, - 0x0206, - 0x0208, - 0x020a, - 0x020c, - 0x020e, - 0x0210, - 0x0212, - 0x0214, - 0x0216, - 0x0218, - 0x021a, - 0x021c, - 0x021e, - 0x0220, - 0x0222, - 0x0224, - 0x0226, - 0x0228, - 0x022a, - 0x022c, - 0x022e, - 0x0230, - 0x0232, - 0x0241, - 0x0248, - 0x024a, - 0x024c, - 0x024e, - 0x0370, - 0x0372, - 0x0376, - 0x037f, - 0x0386, - 0x038c, - 0x03cf, - 0x03d8, - 0x03da, - 0x03dc, - 0x03de, - 0x03e0, - 0x03e2, - 0x03e4, - 0x03e6, - 0x03e8, - 0x03ea, - 0x03ec, - 0x03ee, - 0x03f7, - 0x0460, - 0x0462, - 0x0464, - 0x0466, - 0x0468, - 0x046a, - 0x046c, - 0x046e, - 0x0470, - 0x0472, - 0x0474, - 0x0476, - 0x0478, - 0x047a, - 0x047c, - 0x047e, - 0x0480, - 0x048a, - 0x048c, - 0x048e, - 0x0490, - 0x0492, - 0x0494, - 0x0496, - 0x0498, - 0x049a, - 0x049c, - 0x049e, - 0x04a0, - 0x04a2, - 0x04a4, - 0x04a6, - 0x04a8, - 0x04aa, - 0x04ac, - 0x04ae, - 0x04b0, - 0x04b2, - 0x04b4, - 0x04b6, - 0x04b8, - 0x04ba, - 0x04bc, - 0x04be, - 0x04c3, - 0x04c5, - 0x04c7, - 0x04c9, - 0x04cb, - 0x04cd, - 0x04d0, - 0x04d2, - 0x04d4, - 0x04d6, - 0x04d8, - 0x04da, - 0x04dc, - 0x04de, - 0x04e0, - 0x04e2, - 0x04e4, - 0x04e6, - 0x04e8, - 0x04ea, - 0x04ec, - 0x04ee, - 0x04f0, - 0x04f2, - 0x04f4, - 0x04f6, - 0x04f8, - 0x04fa, - 0x04fc, - 0x04fe, - 0x0500, - 0x0502, - 0x0504, - 0x0506, - 0x0508, - 0x050a, - 0x050c, - 0x050e, - 0x0510, - 0x0512, - 0x0514, - 0x0516, - 0x0518, - 0x051a, - 0x051c, - 0x051e, - 0x0520, - 0x0522, - 0x0524, - 0x0526, - 0x0528, - 0x052a, - 0x052c, - 0x052e, - 0x10c7, - 0x10cd, - 0x1e00, - 0x1e02, - 0x1e04, - 0x1e06, - 0x1e08, - 0x1e0a, - 0x1e0c, - 0x1e0e, - 0x1e10, - 0x1e12, - 0x1e14, - 0x1e16, - 0x1e18, - 0x1e1a, - 0x1e1c, - 0x1e1e, - 0x1e20, - 0x1e22, - 0x1e24, - 0x1e26, - 0x1e28, - 0x1e2a, - 0x1e2c, - 0x1e2e, - 0x1e30, - 0x1e32, - 0x1e34, - 0x1e36, - 0x1e38, - 0x1e3a, - 0x1e3c, - 0x1e3e, - 0x1e40, - 0x1e42, - 0x1e44, - 0x1e46, - 0x1e48, - 0x1e4a, - 0x1e4c, - 0x1e4e, - 0x1e50, - 0x1e52, - 0x1e54, - 0x1e56, - 0x1e58, - 0x1e5a, - 0x1e5c, - 0x1e5e, - 0x1e60, - 0x1e62, - 0x1e64, - 0x1e66, - 0x1e68, - 0x1e6a, - 0x1e6c, - 0x1e6e, - 0x1e70, - 0x1e72, - 0x1e74, - 0x1e76, - 0x1e78, - 0x1e7a, - 0x1e7c, - 0x1e7e, - 0x1e80, - 0x1e82, - 0x1e84, - 0x1e86, - 0x1e88, - 0x1e8a, - 0x1e8c, - 0x1e8e, - 0x1e90, - 0x1e92, - 0x1e94, - 0x1ea0, - 0x1ea2, - 0x1ea4, - 0x1ea6, - 0x1ea8, - 0x1eaa, - 0x1eac, - 0x1eae, - 0x1eb0, - 0x1eb2, - 0x1eb4, - 0x1eb6, - 0x1eb8, - 0x1eba, - 0x1ebc, - 0x1ebe, - 0x1ec0, - 0x1ec2, - 0x1ec4, - 0x1ec6, - 0x1ec8, - 0x1eca, - 0x1ecc, - 0x1ece, - 0x1ed0, - 0x1ed2, - 0x1ed4, - 0x1ed6, - 0x1ed8, - 0x1eda, - 0x1edc, - 0x1ede, - 0x1ee0, - 0x1ee2, - 0x1ee4, - 0x1ee6, - 0x1ee8, - 0x1eea, - 0x1eec, - 0x1eee, - 0x1ef0, - 0x1ef2, - 0x1ef4, - 0x1ef6, - 0x1ef8, - 0x1efa, - 0x1efc, - 0x1efe, - 0x1f59, - 0x1f5b, - 0x1f5d, - 0x1f5f, - 0x2132, - 0x2183, - 0x2c60, - 0x2c67, - 0x2c69, - 0x2c6b, - 0x2c72, - 0x2c75, - 0x2c82, - 0x2c84, - 0x2c86, - 0x2c88, - 0x2c8a, - 0x2c8c, - 0x2c8e, - 0x2c90, - 0x2c92, - 0x2c94, - 0x2c96, - 0x2c98, - 0x2c9a, - 0x2c9c, - 0x2c9e, - 0x2ca0, - 0x2ca2, - 0x2ca4, - 0x2ca6, - 0x2ca8, - 0x2caa, - 0x2cac, - 0x2cae, - 0x2cb0, - 0x2cb2, - 0x2cb4, - 0x2cb6, - 0x2cb8, - 0x2cba, - 0x2cbc, - 0x2cbe, - 0x2cc0, - 0x2cc2, - 0x2cc4, - 0x2cc6, - 0x2cc8, - 0x2cca, - 0x2ccc, - 0x2cce, - 0x2cd0, - 0x2cd2, - 0x2cd4, - 0x2cd6, - 0x2cd8, - 0x2cda, - 0x2cdc, - 0x2cde, - 0x2ce0, - 0x2ce2, - 0x2ceb, - 0x2ced, - 0x2cf2, - 0xa640, - 0xa642, - 0xa644, - 0xa646, - 0xa648, - 0xa64a, - 0xa64c, - 0xa64e, - 0xa650, - 0xa652, - 0xa654, - 0xa656, - 0xa658, - 0xa65a, - 0xa65c, - 0xa65e, - 0xa660, - 0xa662, - 0xa664, - 0xa666, - 0xa668, - 0xa66a, - 0xa66c, - 0xa680, - 0xa682, - 0xa684, - 0xa686, - 0xa688, - 0xa68a, - 0xa68c, - 0xa68e, - 0xa690, - 0xa692, - 0xa694, - 0xa696, - 0xa698, - 0xa69a, - 0xa722, - 0xa724, - 0xa726, - 0xa728, - 0xa72a, - 0xa72c, - 0xa72e, - 0xa732, - 0xa734, - 0xa736, - 0xa738, - 0xa73a, - 0xa73c, - 0xa73e, - 0xa740, - 0xa742, - 0xa744, - 0xa746, - 0xa748, - 0xa74a, - 0xa74c, - 0xa74e, - 0xa750, - 0xa752, - 0xa754, - 0xa756, - 0xa758, - 0xa75a, - 0xa75c, - 0xa75e, - 0xa760, - 0xa762, - 0xa764, - 0xa766, - 0xa768, - 0xa76a, - 0xa76c, - 0xa76e, - 0xa779, - 0xa77b, - 0xa780, - 0xa782, - 0xa784, - 0xa786, - 0xa78b, - 0xa78d, - 0xa790, - 0xa792, - 0xa796, - 0xa798, - 0xa79a, - 0xa79c, - 0xa79e, - 0xa7a0, - 0xa7a2, - 0xa7a4, - 0xa7a6, - 0xa7a8, -}; - -bool Unicode::istitle(char32_t c) noexcept -{ - char32_t *p; - - p = rbsearch(c, istitler, nelem (istitler)/2, 2); - if (p && c >= p[0] && c <= p[1]) - return true; - - p = rbsearch(c, istitles, nelem (istitles), 1); - if (p && c == p[0]) - return true; - - return false; -} - -char32_t toupperr[] = { - 0x0061, 0x007a, 1048544, - 0x00e0, 0x00f6, 1048544, - 0x00f8, 0x00fe, 1048544, - 0x023f, 0x0240, 1059391, - 0x0256, 0x0257, 1048371, - 0x028a, 0x028b, 1048359, - 0x037b, 0x037d, 1048706, - 0x03ad, 0x03af, 1048539, - 0x03b1, 0x03c1, 1048544, - 0x03c3, 0x03cb, 1048544, - 0x03cd, 0x03ce, 1048513, - 0x0430, 0x044f, 1048544, - 0x0450, 0x045f, 1048496, - 0x0561, 0x0586, 1048528, - 0x1f00, 0x1f07, 1048584, - 0x1f10, 0x1f15, 1048584, - 0x1f20, 0x1f27, 1048584, - 0x1f30, 0x1f37, 1048584, - 0x1f40, 0x1f45, 1048584, - 0x1f60, 0x1f67, 1048584, - 0x1f70, 0x1f71, 1048650, - 0x1f72, 0x1f75, 1048662, - 0x1f76, 0x1f77, 1048676, - 0x1f78, 0x1f79, 1048704, - 0x1f7a, 0x1f7b, 1048688, - 0x1f7c, 0x1f7d, 1048702, - 0x1f80, 0x1f87, 1048584, - 0x1f90, 0x1f97, 1048584, - 0x1fa0, 0x1fa7, 1048584, - 0x1fb0, 0x1fb1, 1048584, - 0x1fd0, 0x1fd1, 1048584, - 0x1fe0, 0x1fe1, 1048584, - 0x2170, 0x217f, 1048560, - 0x24d0, 0x24e9, 1048550, - 0x2c30, 0x2c5e, 1048528, - 0x2d00, 0x2d25, 1041312, - 0xff41, 0xff5a, 1048544, - 0x10428, 0x1044f, 1048536, - 0x118c0, 0x118df, 1048544, -}; - -static char32_t touppers[] = { - 0x00b5, 1049319, - 0x00ff, 1048697, - 0x0101, 1048575, - 0x0103, 1048575, - 0x0105, 1048575, - 0x0107, 1048575, - 0x0109, 1048575, - 0x010b, 1048575, - 0x010d, 1048575, - 0x010f, 1048575, - 0x0111, 1048575, - 0x0113, 1048575, - 0x0115, 1048575, - 0x0117, 1048575, - 0x0119, 1048575, - 0x011b, 1048575, - 0x011d, 1048575, - 0x011f, 1048575, - 0x0121, 1048575, - 0x0123, 1048575, - 0x0125, 1048575, - 0x0127, 1048575, - 0x0129, 1048575, - 0x012b, 1048575, - 0x012d, 1048575, - 0x012f, 1048575, - 0x0131, 1048344, - 0x0133, 1048575, - 0x0135, 1048575, - 0x0137, 1048575, - 0x013a, 1048575, - 0x013c, 1048575, - 0x013e, 1048575, - 0x0140, 1048575, - 0x0142, 1048575, - 0x0144, 1048575, - 0x0146, 1048575, - 0x0148, 1048575, - 0x014b, 1048575, - 0x014d, 1048575, - 0x014f, 1048575, - 0x0151, 1048575, - 0x0153, 1048575, - 0x0155, 1048575, - 0x0157, 1048575, - 0x0159, 1048575, - 0x015b, 1048575, - 0x015d, 1048575, - 0x015f, 1048575, - 0x0161, 1048575, - 0x0163, 1048575, - 0x0165, 1048575, - 0x0167, 1048575, - 0x0169, 1048575, - 0x016b, 1048575, - 0x016d, 1048575, - 0x016f, 1048575, - 0x0171, 1048575, - 0x0173, 1048575, - 0x0175, 1048575, - 0x0177, 1048575, - 0x017a, 1048575, - 0x017c, 1048575, - 0x017e, 1048575, - 0x017f, 1048276, - 0x0180, 1048771, - 0x0183, 1048575, - 0x0185, 1048575, - 0x0188, 1048575, - 0x018c, 1048575, - 0x0192, 1048575, - 0x0195, 1048673, - 0x0199, 1048575, - 0x019a, 1048739, - 0x019e, 1048706, - 0x01a1, 1048575, - 0x01a3, 1048575, - 0x01a5, 1048575, - 0x01a8, 1048575, - 0x01ad, 1048575, - 0x01b0, 1048575, - 0x01b4, 1048575, - 0x01b6, 1048575, - 0x01b9, 1048575, - 0x01bd, 1048575, - 0x01bf, 1048632, - 0x01c5, 1048575, - 0x01c6, 1048574, - 0x01c8, 1048575, - 0x01c9, 1048574, - 0x01cb, 1048575, - 0x01cc, 1048574, - 0x01ce, 1048575, - 0x01d0, 1048575, - 0x01d2, 1048575, - 0x01d4, 1048575, - 0x01d6, 1048575, - 0x01d8, 1048575, - 0x01da, 1048575, - 0x01dc, 1048575, - 0x01dd, 1048497, - 0x01df, 1048575, - 0x01e1, 1048575, - 0x01e3, 1048575, - 0x01e5, 1048575, - 0x01e7, 1048575, - 0x01e9, 1048575, - 0x01eb, 1048575, - 0x01ed, 1048575, - 0x01ef, 1048575, - 0x01f2, 1048575, - 0x01f3, 1048574, - 0x01f5, 1048575, - 0x01f9, 1048575, - 0x01fb, 1048575, - 0x01fd, 1048575, - 0x01ff, 1048575, - 0x0201, 1048575, - 0x0203, 1048575, - 0x0205, 1048575, - 0x0207, 1048575, - 0x0209, 1048575, - 0x020b, 1048575, - 0x020d, 1048575, - 0x020f, 1048575, - 0x0211, 1048575, - 0x0213, 1048575, - 0x0215, 1048575, - 0x0217, 1048575, - 0x0219, 1048575, - 0x021b, 1048575, - 0x021d, 1048575, - 0x021f, 1048575, - 0x0223, 1048575, - 0x0225, 1048575, - 0x0227, 1048575, - 0x0229, 1048575, - 0x022b, 1048575, - 0x022d, 1048575, - 0x022f, 1048575, - 0x0231, 1048575, - 0x0233, 1048575, - 0x023c, 1048575, - 0x0242, 1048575, - 0x0247, 1048575, - 0x0249, 1048575, - 0x024b, 1048575, - 0x024d, 1048575, - 0x024f, 1048575, - 0x0250, 1059359, - 0x0251, 1059356, - 0x0252, 1059358, - 0x0253, 1048366, - 0x0254, 1048370, - 0x0259, 1048374, - 0x025b, 1048373, - 0x025c, 1090895, - 0x0260, 1048371, - 0x0261, 1090891, - 0x0263, 1048369, - 0x0265, 1090856, - 0x0266, 1090884, - 0x0268, 1048367, - 0x0269, 1048365, - 0x026b, 1059319, - 0x026c, 1090881, - 0x026f, 1048365, - 0x0271, 1059325, - 0x0272, 1048363, - 0x0275, 1048362, - 0x027d, 1059303, - 0x0280, 1048358, - 0x0283, 1048358, - 0x0287, 1090858, - 0x0288, 1048358, - 0x0289, 1048507, - 0x028c, 1048505, - 0x0292, 1048357, - 0x029e, 1090834, - 0x0345, 1048660, - 0x0371, 1048575, - 0x0373, 1048575, - 0x0377, 1048575, - 0x03ac, 1048538, - 0x03c2, 1048545, - 0x03cc, 1048512, - 0x03d0, 1048514, - 0x03d1, 1048519, - 0x03d5, 1048529, - 0x03d6, 1048522, - 0x03d7, 1048568, - 0x03d9, 1048575, - 0x03db, 1048575, - 0x03dd, 1048575, - 0x03df, 1048575, - 0x03e1, 1048575, - 0x03e3, 1048575, - 0x03e5, 1048575, - 0x03e7, 1048575, - 0x03e9, 1048575, - 0x03eb, 1048575, - 0x03ed, 1048575, - 0x03ef, 1048575, - 0x03f0, 1048490, - 0x03f1, 1048496, - 0x03f2, 1048583, - 0x03f3, 1048460, - 0x03f5, 1048480, - 0x03f8, 1048575, - 0x03fb, 1048575, - 0x0461, 1048575, - 0x0463, 1048575, - 0x0465, 1048575, - 0x0467, 1048575, - 0x0469, 1048575, - 0x046b, 1048575, - 0x046d, 1048575, - 0x046f, 1048575, - 0x0471, 1048575, - 0x0473, 1048575, - 0x0475, 1048575, - 0x0477, 1048575, - 0x0479, 1048575, - 0x047b, 1048575, - 0x047d, 1048575, - 0x047f, 1048575, - 0x0481, 1048575, - 0x048b, 1048575, - 0x048d, 1048575, - 0x048f, 1048575, - 0x0491, 1048575, - 0x0493, 1048575, - 0x0495, 1048575, - 0x0497, 1048575, - 0x0499, 1048575, - 0x049b, 1048575, - 0x049d, 1048575, - 0x049f, 1048575, - 0x04a1, 1048575, - 0x04a3, 1048575, - 0x04a5, 1048575, - 0x04a7, 1048575, - 0x04a9, 1048575, - 0x04ab, 1048575, - 0x04ad, 1048575, - 0x04af, 1048575, - 0x04b1, 1048575, - 0x04b3, 1048575, - 0x04b5, 1048575, - 0x04b7, 1048575, - 0x04b9, 1048575, - 0x04bb, 1048575, - 0x04bd, 1048575, - 0x04bf, 1048575, - 0x04c2, 1048575, - 0x04c4, 1048575, - 0x04c6, 1048575, - 0x04c8, 1048575, - 0x04ca, 1048575, - 0x04cc, 1048575, - 0x04ce, 1048575, - 0x04cf, 1048561, - 0x04d1, 1048575, - 0x04d3, 1048575, - 0x04d5, 1048575, - 0x04d7, 1048575, - 0x04d9, 1048575, - 0x04db, 1048575, - 0x04dd, 1048575, - 0x04df, 1048575, - 0x04e1, 1048575, - 0x04e3, 1048575, - 0x04e5, 1048575, - 0x04e7, 1048575, - 0x04e9, 1048575, - 0x04eb, 1048575, - 0x04ed, 1048575, - 0x04ef, 1048575, - 0x04f1, 1048575, - 0x04f3, 1048575, - 0x04f5, 1048575, - 0x04f7, 1048575, - 0x04f9, 1048575, - 0x04fb, 1048575, - 0x04fd, 1048575, - 0x04ff, 1048575, - 0x0501, 1048575, - 0x0503, 1048575, - 0x0505, 1048575, - 0x0507, 1048575, - 0x0509, 1048575, - 0x050b, 1048575, - 0x050d, 1048575, - 0x050f, 1048575, - 0x0511, 1048575, - 0x0513, 1048575, - 0x0515, 1048575, - 0x0517, 1048575, - 0x0519, 1048575, - 0x051b, 1048575, - 0x051d, 1048575, - 0x051f, 1048575, - 0x0521, 1048575, - 0x0523, 1048575, - 0x0525, 1048575, - 0x0527, 1048575, - 0x0529, 1048575, - 0x052b, 1048575, - 0x052d, 1048575, - 0x052f, 1048575, - 0x1d79, 1083908, - 0x1d7d, 1052390, - 0x1e01, 1048575, - 0x1e03, 1048575, - 0x1e05, 1048575, - 0x1e07, 1048575, - 0x1e09, 1048575, - 0x1e0b, 1048575, - 0x1e0d, 1048575, - 0x1e0f, 1048575, - 0x1e11, 1048575, - 0x1e13, 1048575, - 0x1e15, 1048575, - 0x1e17, 1048575, - 0x1e19, 1048575, - 0x1e1b, 1048575, - 0x1e1d, 1048575, - 0x1e1f, 1048575, - 0x1e21, 1048575, - 0x1e23, 1048575, - 0x1e25, 1048575, - 0x1e27, 1048575, - 0x1e29, 1048575, - 0x1e2b, 1048575, - 0x1e2d, 1048575, - 0x1e2f, 1048575, - 0x1e31, 1048575, - 0x1e33, 1048575, - 0x1e35, 1048575, - 0x1e37, 1048575, - 0x1e39, 1048575, - 0x1e3b, 1048575, - 0x1e3d, 1048575, - 0x1e3f, 1048575, - 0x1e41, 1048575, - 0x1e43, 1048575, - 0x1e45, 1048575, - 0x1e47, 1048575, - 0x1e49, 1048575, - 0x1e4b, 1048575, - 0x1e4d, 1048575, - 0x1e4f, 1048575, - 0x1e51, 1048575, - 0x1e53, 1048575, - 0x1e55, 1048575, - 0x1e57, 1048575, - 0x1e59, 1048575, - 0x1e5b, 1048575, - 0x1e5d, 1048575, - 0x1e5f, 1048575, - 0x1e61, 1048575, - 0x1e63, 1048575, - 0x1e65, 1048575, - 0x1e67, 1048575, - 0x1e69, 1048575, - 0x1e6b, 1048575, - 0x1e6d, 1048575, - 0x1e6f, 1048575, - 0x1e71, 1048575, - 0x1e73, 1048575, - 0x1e75, 1048575, - 0x1e77, 1048575, - 0x1e79, 1048575, - 0x1e7b, 1048575, - 0x1e7d, 1048575, - 0x1e7f, 1048575, - 0x1e81, 1048575, - 0x1e83, 1048575, - 0x1e85, 1048575, - 0x1e87, 1048575, - 0x1e89, 1048575, - 0x1e8b, 1048575, - 0x1e8d, 1048575, - 0x1e8f, 1048575, - 0x1e91, 1048575, - 0x1e93, 1048575, - 0x1e95, 1048575, - 0x1e9b, 1048517, - 0x1ea1, 1048575, - 0x1ea3, 1048575, - 0x1ea5, 1048575, - 0x1ea7, 1048575, - 0x1ea9, 1048575, - 0x1eab, 1048575, - 0x1ead, 1048575, - 0x1eaf, 1048575, - 0x1eb1, 1048575, - 0x1eb3, 1048575, - 0x1eb5, 1048575, - 0x1eb7, 1048575, - 0x1eb9, 1048575, - 0x1ebb, 1048575, - 0x1ebd, 1048575, - 0x1ebf, 1048575, - 0x1ec1, 1048575, - 0x1ec3, 1048575, - 0x1ec5, 1048575, - 0x1ec7, 1048575, - 0x1ec9, 1048575, - 0x1ecb, 1048575, - 0x1ecd, 1048575, - 0x1ecf, 1048575, - 0x1ed1, 1048575, - 0x1ed3, 1048575, - 0x1ed5, 1048575, - 0x1ed7, 1048575, - 0x1ed9, 1048575, - 0x1edb, 1048575, - 0x1edd, 1048575, - 0x1edf, 1048575, - 0x1ee1, 1048575, - 0x1ee3, 1048575, - 0x1ee5, 1048575, - 0x1ee7, 1048575, - 0x1ee9, 1048575, - 0x1eeb, 1048575, - 0x1eed, 1048575, - 0x1eef, 1048575, - 0x1ef1, 1048575, - 0x1ef3, 1048575, - 0x1ef5, 1048575, - 0x1ef7, 1048575, - 0x1ef9, 1048575, - 0x1efb, 1048575, - 0x1efd, 1048575, - 0x1eff, 1048575, - 0x1f51, 1048584, - 0x1f53, 1048584, - 0x1f55, 1048584, - 0x1f57, 1048584, - 0x1fb3, 1048585, - 0x1fbe, 1041371, - 0x1fc3, 1048585, - 0x1fe5, 1048583, - 0x1ff3, 1048585, - 0x214e, 1048548, - 0x2184, 1048575, - 0x2c61, 1048575, - 0x2c65, 1037781, - 0x2c66, 1037784, - 0x2c68, 1048575, - 0x2c6a, 1048575, - 0x2c6c, 1048575, - 0x2c73, 1048575, - 0x2c76, 1048575, - 0x2c81, 1048575, - 0x2c83, 1048575, - 0x2c85, 1048575, - 0x2c87, 1048575, - 0x2c89, 1048575, - 0x2c8b, 1048575, - 0x2c8d, 1048575, - 0x2c8f, 1048575, - 0x2c91, 1048575, - 0x2c93, 1048575, - 0x2c95, 1048575, - 0x2c97, 1048575, - 0x2c99, 1048575, - 0x2c9b, 1048575, - 0x2c9d, 1048575, - 0x2c9f, 1048575, - 0x2ca1, 1048575, - 0x2ca3, 1048575, - 0x2ca5, 1048575, - 0x2ca7, 1048575, - 0x2ca9, 1048575, - 0x2cab, 1048575, - 0x2cad, 1048575, - 0x2caf, 1048575, - 0x2cb1, 1048575, - 0x2cb3, 1048575, - 0x2cb5, 1048575, - 0x2cb7, 1048575, - 0x2cb9, 1048575, - 0x2cbb, 1048575, - 0x2cbd, 1048575, - 0x2cbf, 1048575, - 0x2cc1, 1048575, - 0x2cc3, 1048575, - 0x2cc5, 1048575, - 0x2cc7, 1048575, - 0x2cc9, 1048575, - 0x2ccb, 1048575, - 0x2ccd, 1048575, - 0x2ccf, 1048575, - 0x2cd1, 1048575, - 0x2cd3, 1048575, - 0x2cd5, 1048575, - 0x2cd7, 1048575, - 0x2cd9, 1048575, - 0x2cdb, 1048575, - 0x2cdd, 1048575, - 0x2cdf, 1048575, - 0x2ce1, 1048575, - 0x2ce3, 1048575, - 0x2cec, 1048575, - 0x2cee, 1048575, - 0x2cf3, 1048575, - 0x2d27, 1041312, - 0x2d2d, 1041312, - 0xa641, 1048575, - 0xa643, 1048575, - 0xa645, 1048575, - 0xa647, 1048575, - 0xa649, 1048575, - 0xa64b, 1048575, - 0xa64d, 1048575, - 0xa64f, 1048575, - 0xa651, 1048575, - 0xa653, 1048575, - 0xa655, 1048575, - 0xa657, 1048575, - 0xa659, 1048575, - 0xa65b, 1048575, - 0xa65d, 1048575, - 0xa65f, 1048575, - 0xa661, 1048575, - 0xa663, 1048575, - 0xa665, 1048575, - 0xa667, 1048575, - 0xa669, 1048575, - 0xa66b, 1048575, - 0xa66d, 1048575, - 0xa681, 1048575, - 0xa683, 1048575, - 0xa685, 1048575, - 0xa687, 1048575, - 0xa689, 1048575, - 0xa68b, 1048575, - 0xa68d, 1048575, - 0xa68f, 1048575, - 0xa691, 1048575, - 0xa693, 1048575, - 0xa695, 1048575, - 0xa697, 1048575, - 0xa699, 1048575, - 0xa69b, 1048575, - 0xa723, 1048575, - 0xa725, 1048575, - 0xa727, 1048575, - 0xa729, 1048575, - 0xa72b, 1048575, - 0xa72d, 1048575, - 0xa72f, 1048575, - 0xa733, 1048575, - 0xa735, 1048575, - 0xa737, 1048575, - 0xa739, 1048575, - 0xa73b, 1048575, - 0xa73d, 1048575, - 0xa73f, 1048575, - 0xa741, 1048575, - 0xa743, 1048575, - 0xa745, 1048575, - 0xa747, 1048575, - 0xa749, 1048575, - 0xa74b, 1048575, - 0xa74d, 1048575, - 0xa74f, 1048575, - 0xa751, 1048575, - 0xa753, 1048575, - 0xa755, 1048575, - 0xa757, 1048575, - 0xa759, 1048575, - 0xa75b, 1048575, - 0xa75d, 1048575, - 0xa75f, 1048575, - 0xa761, 1048575, - 0xa763, 1048575, - 0xa765, 1048575, - 0xa767, 1048575, - 0xa769, 1048575, - 0xa76b, 1048575, - 0xa76d, 1048575, - 0xa76f, 1048575, - 0xa77a, 1048575, - 0xa77c, 1048575, - 0xa77f, 1048575, - 0xa781, 1048575, - 0xa783, 1048575, - 0xa785, 1048575, - 0xa787, 1048575, - 0xa78c, 1048575, - 0xa791, 1048575, - 0xa793, 1048575, - 0xa797, 1048575, - 0xa799, 1048575, - 0xa79b, 1048575, - 0xa79d, 1048575, - 0xa79f, 1048575, - 0xa7a1, 1048575, - 0xa7a3, 1048575, - 0xa7a5, 1048575, - 0xa7a7, 1048575, - 0xa7a9, 1048575, -}; - -char32_t Unicode::toupper(char32_t c) noexcept -{ - char32_t *p; - - p = rbsearch(c, toupperr, nelem (toupperr)/3, 3); - if (p && c >= p[0] && c <= p[1]) - return c + p[2] - 1048576; - - p = rbsearch(c, touppers, nelem (touppers)/2, 2); - if (p && c == p[0]) - return c + p[1] - 1048576; - - return c; -} - -char32_t tolowerr[] = { - 0x0041, 0x005a, 1048608, - 0x00c0, 0x00d6, 1048608, - 0x00d8, 0x00de, 1048608, - 0x0189, 0x018a, 1048781, - 0x01b1, 0x01b2, 1048793, - 0x0388, 0x038a, 1048613, - 0x038e, 0x038f, 1048639, - 0x0391, 0x03a1, 1048608, - 0x03a3, 0x03ab, 1048608, - 0x03fd, 0x03ff, 1048446, - 0x0400, 0x040f, 1048656, - 0x0410, 0x042f, 1048608, - 0x0531, 0x0556, 1048624, - 0x10a0, 0x10c5, 1055840, - 0x1f08, 0x1f0f, 1048568, - 0x1f18, 0x1f1d, 1048568, - 0x1f28, 0x1f2f, 1048568, - 0x1f38, 0x1f3f, 1048568, - 0x1f48, 0x1f4d, 1048568, - 0x1f68, 0x1f6f, 1048568, - 0x1f88, 0x1f8f, 1048568, - 0x1f98, 0x1f9f, 1048568, - 0x1fa8, 0x1faf, 1048568, - 0x1fb8, 0x1fb9, 1048568, - 0x1fba, 0x1fbb, 1048502, - 0x1fc8, 0x1fcb, 1048490, - 0x1fd8, 0x1fd9, 1048568, - 0x1fda, 0x1fdb, 1048476, - 0x1fe8, 0x1fe9, 1048568, - 0x1fea, 0x1feb, 1048464, - 0x1ff8, 0x1ff9, 1048448, - 0x1ffa, 0x1ffb, 1048450, - 0x2160, 0x216f, 1048592, - 0x24b6, 0x24cf, 1048602, - 0x2c00, 0x2c2e, 1048624, - 0x2c7e, 0x2c7f, 1037761, - 0xff21, 0xff3a, 1048608, - 0x10400, 0x10427, 1048616, - 0x118a0, 0x118bf, 1048608, -}; - -static char32_t tolowers[] = { - 0x0100, 1048577, - 0x0102, 1048577, - 0x0104, 1048577, - 0x0106, 1048577, - 0x0108, 1048577, - 0x010a, 1048577, - 0x010c, 1048577, - 0x010e, 1048577, - 0x0110, 1048577, - 0x0112, 1048577, - 0x0114, 1048577, - 0x0116, 1048577, - 0x0118, 1048577, - 0x011a, 1048577, - 0x011c, 1048577, - 0x011e, 1048577, - 0x0120, 1048577, - 0x0122, 1048577, - 0x0124, 1048577, - 0x0126, 1048577, - 0x0128, 1048577, - 0x012a, 1048577, - 0x012c, 1048577, - 0x012e, 1048577, - 0x0130, 1048377, - 0x0132, 1048577, - 0x0134, 1048577, - 0x0136, 1048577, - 0x0139, 1048577, - 0x013b, 1048577, - 0x013d, 1048577, - 0x013f, 1048577, - 0x0141, 1048577, - 0x0143, 1048577, - 0x0145, 1048577, - 0x0147, 1048577, - 0x014a, 1048577, - 0x014c, 1048577, - 0x014e, 1048577, - 0x0150, 1048577, - 0x0152, 1048577, - 0x0154, 1048577, - 0x0156, 1048577, - 0x0158, 1048577, - 0x015a, 1048577, - 0x015c, 1048577, - 0x015e, 1048577, - 0x0160, 1048577, - 0x0162, 1048577, - 0x0164, 1048577, - 0x0166, 1048577, - 0x0168, 1048577, - 0x016a, 1048577, - 0x016c, 1048577, - 0x016e, 1048577, - 0x0170, 1048577, - 0x0172, 1048577, - 0x0174, 1048577, - 0x0176, 1048577, - 0x0178, 1048455, - 0x0179, 1048577, - 0x017b, 1048577, - 0x017d, 1048577, - 0x0181, 1048786, - 0x0182, 1048577, - 0x0184, 1048577, - 0x0186, 1048782, - 0x0187, 1048577, - 0x018b, 1048577, - 0x018e, 1048655, - 0x018f, 1048778, - 0x0190, 1048779, - 0x0191, 1048577, - 0x0193, 1048781, - 0x0194, 1048783, - 0x0196, 1048787, - 0x0197, 1048785, - 0x0198, 1048577, - 0x019c, 1048787, - 0x019d, 1048789, - 0x019f, 1048790, - 0x01a0, 1048577, - 0x01a2, 1048577, - 0x01a4, 1048577, - 0x01a6, 1048794, - 0x01a7, 1048577, - 0x01a9, 1048794, - 0x01ac, 1048577, - 0x01ae, 1048794, - 0x01af, 1048577, - 0x01b3, 1048577, - 0x01b5, 1048577, - 0x01b7, 1048795, - 0x01b8, 1048577, - 0x01bc, 1048577, - 0x01c4, 1048578, - 0x01c5, 1048577, - 0x01c7, 1048578, - 0x01c8, 1048577, - 0x01ca, 1048578, - 0x01cb, 1048577, - 0x01cd, 1048577, - 0x01cf, 1048577, - 0x01d1, 1048577, - 0x01d3, 1048577, - 0x01d5, 1048577, - 0x01d7, 1048577, - 0x01d9, 1048577, - 0x01db, 1048577, - 0x01de, 1048577, - 0x01e0, 1048577, - 0x01e2, 1048577, - 0x01e4, 1048577, - 0x01e6, 1048577, - 0x01e8, 1048577, - 0x01ea, 1048577, - 0x01ec, 1048577, - 0x01ee, 1048577, - 0x01f1, 1048578, - 0x01f2, 1048577, - 0x01f4, 1048577, - 0x01f6, 1048479, - 0x01f7, 1048520, - 0x01f8, 1048577, - 0x01fa, 1048577, - 0x01fc, 1048577, - 0x01fe, 1048577, - 0x0200, 1048577, - 0x0202, 1048577, - 0x0204, 1048577, - 0x0206, 1048577, - 0x0208, 1048577, - 0x020a, 1048577, - 0x020c, 1048577, - 0x020e, 1048577, - 0x0210, 1048577, - 0x0212, 1048577, - 0x0214, 1048577, - 0x0216, 1048577, - 0x0218, 1048577, - 0x021a, 1048577, - 0x021c, 1048577, - 0x021e, 1048577, - 0x0220, 1048446, - 0x0222, 1048577, - 0x0224, 1048577, - 0x0226, 1048577, - 0x0228, 1048577, - 0x022a, 1048577, - 0x022c, 1048577, - 0x022e, 1048577, - 0x0230, 1048577, - 0x0232, 1048577, - 0x023a, 1059371, - 0x023b, 1048577, - 0x023d, 1048413, - 0x023e, 1059368, - 0x0241, 1048577, - 0x0243, 1048381, - 0x0244, 1048645, - 0x0245, 1048647, - 0x0246, 1048577, - 0x0248, 1048577, - 0x024a, 1048577, - 0x024c, 1048577, - 0x024e, 1048577, - 0x0370, 1048577, - 0x0372, 1048577, - 0x0376, 1048577, - 0x037f, 1048692, - 0x0386, 1048614, - 0x038c, 1048640, - 0x03cf, 1048584, - 0x03d8, 1048577, - 0x03da, 1048577, - 0x03dc, 1048577, - 0x03de, 1048577, - 0x03e0, 1048577, - 0x03e2, 1048577, - 0x03e4, 1048577, - 0x03e6, 1048577, - 0x03e8, 1048577, - 0x03ea, 1048577, - 0x03ec, 1048577, - 0x03ee, 1048577, - 0x03f4, 1048516, - 0x03f7, 1048577, - 0x03f9, 1048569, - 0x03fa, 1048577, - 0x0460, 1048577, - 0x0462, 1048577, - 0x0464, 1048577, - 0x0466, 1048577, - 0x0468, 1048577, - 0x046a, 1048577, - 0x046c, 1048577, - 0x046e, 1048577, - 0x0470, 1048577, - 0x0472, 1048577, - 0x0474, 1048577, - 0x0476, 1048577, - 0x0478, 1048577, - 0x047a, 1048577, - 0x047c, 1048577, - 0x047e, 1048577, - 0x0480, 1048577, - 0x048a, 1048577, - 0x048c, 1048577, - 0x048e, 1048577, - 0x0490, 1048577, - 0x0492, 1048577, - 0x0494, 1048577, - 0x0496, 1048577, - 0x0498, 1048577, - 0x049a, 1048577, - 0x049c, 1048577, - 0x049e, 1048577, - 0x04a0, 1048577, - 0x04a2, 1048577, - 0x04a4, 1048577, - 0x04a6, 1048577, - 0x04a8, 1048577, - 0x04aa, 1048577, - 0x04ac, 1048577, - 0x04ae, 1048577, - 0x04b0, 1048577, - 0x04b2, 1048577, - 0x04b4, 1048577, - 0x04b6, 1048577, - 0x04b8, 1048577, - 0x04ba, 1048577, - 0x04bc, 1048577, - 0x04be, 1048577, - 0x04c0, 1048591, - 0x04c1, 1048577, - 0x04c3, 1048577, - 0x04c5, 1048577, - 0x04c7, 1048577, - 0x04c9, 1048577, - 0x04cb, 1048577, - 0x04cd, 1048577, - 0x04d0, 1048577, - 0x04d2, 1048577, - 0x04d4, 1048577, - 0x04d6, 1048577, - 0x04d8, 1048577, - 0x04da, 1048577, - 0x04dc, 1048577, - 0x04de, 1048577, - 0x04e0, 1048577, - 0x04e2, 1048577, - 0x04e4, 1048577, - 0x04e6, 1048577, - 0x04e8, 1048577, - 0x04ea, 1048577, - 0x04ec, 1048577, - 0x04ee, 1048577, - 0x04f0, 1048577, - 0x04f2, 1048577, - 0x04f4, 1048577, - 0x04f6, 1048577, - 0x04f8, 1048577, - 0x04fa, 1048577, - 0x04fc, 1048577, - 0x04fe, 1048577, - 0x0500, 1048577, - 0x0502, 1048577, - 0x0504, 1048577, - 0x0506, 1048577, - 0x0508, 1048577, - 0x050a, 1048577, - 0x050c, 1048577, - 0x050e, 1048577, - 0x0510, 1048577, - 0x0512, 1048577, - 0x0514, 1048577, - 0x0516, 1048577, - 0x0518, 1048577, - 0x051a, 1048577, - 0x051c, 1048577, - 0x051e, 1048577, - 0x0520, 1048577, - 0x0522, 1048577, - 0x0524, 1048577, - 0x0526, 1048577, - 0x0528, 1048577, - 0x052a, 1048577, - 0x052c, 1048577, - 0x052e, 1048577, - 0x10c7, 1055840, - 0x10cd, 1055840, - 0x1e00, 1048577, - 0x1e02, 1048577, - 0x1e04, 1048577, - 0x1e06, 1048577, - 0x1e08, 1048577, - 0x1e0a, 1048577, - 0x1e0c, 1048577, - 0x1e0e, 1048577, - 0x1e10, 1048577, - 0x1e12, 1048577, - 0x1e14, 1048577, - 0x1e16, 1048577, - 0x1e18, 1048577, - 0x1e1a, 1048577, - 0x1e1c, 1048577, - 0x1e1e, 1048577, - 0x1e20, 1048577, - 0x1e22, 1048577, - 0x1e24, 1048577, - 0x1e26, 1048577, - 0x1e28, 1048577, - 0x1e2a, 1048577, - 0x1e2c, 1048577, - 0x1e2e, 1048577, - 0x1e30, 1048577, - 0x1e32, 1048577, - 0x1e34, 1048577, - 0x1e36, 1048577, - 0x1e38, 1048577, - 0x1e3a, 1048577, - 0x1e3c, 1048577, - 0x1e3e, 1048577, - 0x1e40, 1048577, - 0x1e42, 1048577, - 0x1e44, 1048577, - 0x1e46, 1048577, - 0x1e48, 1048577, - 0x1e4a, 1048577, - 0x1e4c, 1048577, - 0x1e4e, 1048577, - 0x1e50, 1048577, - 0x1e52, 1048577, - 0x1e54, 1048577, - 0x1e56, 1048577, - 0x1e58, 1048577, - 0x1e5a, 1048577, - 0x1e5c, 1048577, - 0x1e5e, 1048577, - 0x1e60, 1048577, - 0x1e62, 1048577, - 0x1e64, 1048577, - 0x1e66, 1048577, - 0x1e68, 1048577, - 0x1e6a, 1048577, - 0x1e6c, 1048577, - 0x1e6e, 1048577, - 0x1e70, 1048577, - 0x1e72, 1048577, - 0x1e74, 1048577, - 0x1e76, 1048577, - 0x1e78, 1048577, - 0x1e7a, 1048577, - 0x1e7c, 1048577, - 0x1e7e, 1048577, - 0x1e80, 1048577, - 0x1e82, 1048577, - 0x1e84, 1048577, - 0x1e86, 1048577, - 0x1e88, 1048577, - 0x1e8a, 1048577, - 0x1e8c, 1048577, - 0x1e8e, 1048577, - 0x1e90, 1048577, - 0x1e92, 1048577, - 0x1e94, 1048577, - 0x1e9e, 1040961, - 0x1ea0, 1048577, - 0x1ea2, 1048577, - 0x1ea4, 1048577, - 0x1ea6, 1048577, - 0x1ea8, 1048577, - 0x1eaa, 1048577, - 0x1eac, 1048577, - 0x1eae, 1048577, - 0x1eb0, 1048577, - 0x1eb2, 1048577, - 0x1eb4, 1048577, - 0x1eb6, 1048577, - 0x1eb8, 1048577, - 0x1eba, 1048577, - 0x1ebc, 1048577, - 0x1ebe, 1048577, - 0x1ec0, 1048577, - 0x1ec2, 1048577, - 0x1ec4, 1048577, - 0x1ec6, 1048577, - 0x1ec8, 1048577, - 0x1eca, 1048577, - 0x1ecc, 1048577, - 0x1ece, 1048577, - 0x1ed0, 1048577, - 0x1ed2, 1048577, - 0x1ed4, 1048577, - 0x1ed6, 1048577, - 0x1ed8, 1048577, - 0x1eda, 1048577, - 0x1edc, 1048577, - 0x1ede, 1048577, - 0x1ee0, 1048577, - 0x1ee2, 1048577, - 0x1ee4, 1048577, - 0x1ee6, 1048577, - 0x1ee8, 1048577, - 0x1eea, 1048577, - 0x1eec, 1048577, - 0x1eee, 1048577, - 0x1ef0, 1048577, - 0x1ef2, 1048577, - 0x1ef4, 1048577, - 0x1ef6, 1048577, - 0x1ef8, 1048577, - 0x1efa, 1048577, - 0x1efc, 1048577, - 0x1efe, 1048577, - 0x1f59, 1048568, - 0x1f5b, 1048568, - 0x1f5d, 1048568, - 0x1f5f, 1048568, - 0x1fbc, 1048567, - 0x1fcc, 1048567, - 0x1fec, 1048569, - 0x1ffc, 1048567, - 0x2126, 1041059, - 0x212a, 1040193, - 0x212b, 1040314, - 0x2132, 1048604, - 0x2183, 1048577, - 0x2c60, 1048577, - 0x2c62, 1037833, - 0x2c63, 1044762, - 0x2c64, 1037849, - 0x2c67, 1048577, - 0x2c69, 1048577, - 0x2c6b, 1048577, - 0x2c6d, 1037796, - 0x2c6e, 1037827, - 0x2c6f, 1037793, - 0x2c70, 1037794, - 0x2c72, 1048577, - 0x2c75, 1048577, - 0x2c80, 1048577, - 0x2c82, 1048577, - 0x2c84, 1048577, - 0x2c86, 1048577, - 0x2c88, 1048577, - 0x2c8a, 1048577, - 0x2c8c, 1048577, - 0x2c8e, 1048577, - 0x2c90, 1048577, - 0x2c92, 1048577, - 0x2c94, 1048577, - 0x2c96, 1048577, - 0x2c98, 1048577, - 0x2c9a, 1048577, - 0x2c9c, 1048577, - 0x2c9e, 1048577, - 0x2ca0, 1048577, - 0x2ca2, 1048577, - 0x2ca4, 1048577, - 0x2ca6, 1048577, - 0x2ca8, 1048577, - 0x2caa, 1048577, - 0x2cac, 1048577, - 0x2cae, 1048577, - 0x2cb0, 1048577, - 0x2cb2, 1048577, - 0x2cb4, 1048577, - 0x2cb6, 1048577, - 0x2cb8, 1048577, - 0x2cba, 1048577, - 0x2cbc, 1048577, - 0x2cbe, 1048577, - 0x2cc0, 1048577, - 0x2cc2, 1048577, - 0x2cc4, 1048577, - 0x2cc6, 1048577, - 0x2cc8, 1048577, - 0x2cca, 1048577, - 0x2ccc, 1048577, - 0x2cce, 1048577, - 0x2cd0, 1048577, - 0x2cd2, 1048577, - 0x2cd4, 1048577, - 0x2cd6, 1048577, - 0x2cd8, 1048577, - 0x2cda, 1048577, - 0x2cdc, 1048577, - 0x2cde, 1048577, - 0x2ce0, 1048577, - 0x2ce2, 1048577, - 0x2ceb, 1048577, - 0x2ced, 1048577, - 0x2cf2, 1048577, - 0xa640, 1048577, - 0xa642, 1048577, - 0xa644, 1048577, - 0xa646, 1048577, - 0xa648, 1048577, - 0xa64a, 1048577, - 0xa64c, 1048577, - 0xa64e, 1048577, - 0xa650, 1048577, - 0xa652, 1048577, - 0xa654, 1048577, - 0xa656, 1048577, - 0xa658, 1048577, - 0xa65a, 1048577, - 0xa65c, 1048577, - 0xa65e, 1048577, - 0xa660, 1048577, - 0xa662, 1048577, - 0xa664, 1048577, - 0xa666, 1048577, - 0xa668, 1048577, - 0xa66a, 1048577, - 0xa66c, 1048577, - 0xa680, 1048577, - 0xa682, 1048577, - 0xa684, 1048577, - 0xa686, 1048577, - 0xa688, 1048577, - 0xa68a, 1048577, - 0xa68c, 1048577, - 0xa68e, 1048577, - 0xa690, 1048577, - 0xa692, 1048577, - 0xa694, 1048577, - 0xa696, 1048577, - 0xa698, 1048577, - 0xa69a, 1048577, - 0xa722, 1048577, - 0xa724, 1048577, - 0xa726, 1048577, - 0xa728, 1048577, - 0xa72a, 1048577, - 0xa72c, 1048577, - 0xa72e, 1048577, - 0xa732, 1048577, - 0xa734, 1048577, - 0xa736, 1048577, - 0xa738, 1048577, - 0xa73a, 1048577, - 0xa73c, 1048577, - 0xa73e, 1048577, - 0xa740, 1048577, - 0xa742, 1048577, - 0xa744, 1048577, - 0xa746, 1048577, - 0xa748, 1048577, - 0xa74a, 1048577, - 0xa74c, 1048577, - 0xa74e, 1048577, - 0xa750, 1048577, - 0xa752, 1048577, - 0xa754, 1048577, - 0xa756, 1048577, - 0xa758, 1048577, - 0xa75a, 1048577, - 0xa75c, 1048577, - 0xa75e, 1048577, - 0xa760, 1048577, - 0xa762, 1048577, - 0xa764, 1048577, - 0xa766, 1048577, - 0xa768, 1048577, - 0xa76a, 1048577, - 0xa76c, 1048577, - 0xa76e, 1048577, - 0xa779, 1048577, - 0xa77b, 1048577, - 0xa77d, 1013244, - 0xa77e, 1048577, - 0xa780, 1048577, - 0xa782, 1048577, - 0xa784, 1048577, - 0xa786, 1048577, - 0xa78b, 1048577, - 0xa78d, 1006296, - 0xa790, 1048577, - 0xa792, 1048577, - 0xa796, 1048577, - 0xa798, 1048577, - 0xa79a, 1048577, - 0xa79c, 1048577, - 0xa79e, 1048577, - 0xa7a0, 1048577, - 0xa7a2, 1048577, - 0xa7a4, 1048577, - 0xa7a6, 1048577, - 0xa7a8, 1048577, - 0xa7aa, 1006268, - 0xa7ab, 1006257, - 0xa7ac, 1006261, - 0xa7ad, 1006271, - 0xa7b0, 1006318, - 0xa7b1, 1006294, -}; - -char32_t Unicode::tolower(char32_t c) noexcept -{ - char32_t *p; - - p = rbsearch(c, tolowerr, nelem (tolowerr)/3, 3); - if (p && c >= p[0] && c <= p[1]) - return c + p[2] - 1048576; - - p = rbsearch(c, tolowers, nelem (tolowers)/2, 2); - if (p && c == p[0]) - return c + p[1] - 1048576; - - return c; -} - -char32_t totitler[] = { - 0x0061, 0x007a, 1048544, - 0x00e0, 0x00f6, 1048544, - 0x00f8, 0x00fe, 1048544, - 0x023f, 0x0240, 1059391, - 0x0256, 0x0257, 1048371, - 0x028a, 0x028b, 1048359, - 0x037b, 0x037d, 1048706, - 0x03ad, 0x03af, 1048539, - 0x03b1, 0x03c1, 1048544, - 0x03c3, 0x03cb, 1048544, - 0x03cd, 0x03ce, 1048513, - 0x0430, 0x044f, 1048544, - 0x0450, 0x045f, 1048496, - 0x0561, 0x0586, 1048528, - 0x1f00, 0x1f07, 1048584, - 0x1f10, 0x1f15, 1048584, - 0x1f20, 0x1f27, 1048584, - 0x1f30, 0x1f37, 1048584, - 0x1f40, 0x1f45, 1048584, - 0x1f60, 0x1f67, 1048584, - 0x1f70, 0x1f71, 1048650, - 0x1f72, 0x1f75, 1048662, - 0x1f76, 0x1f77, 1048676, - 0x1f78, 0x1f79, 1048704, - 0x1f7a, 0x1f7b, 1048688, - 0x1f7c, 0x1f7d, 1048702, - 0x1f80, 0x1f87, 1048584, - 0x1f90, 0x1f97, 1048584, - 0x1fa0, 0x1fa7, 1048584, - 0x1fb0, 0x1fb1, 1048584, - 0x1fd0, 0x1fd1, 1048584, - 0x1fe0, 0x1fe1, 1048584, - 0x2170, 0x217f, 1048560, - 0x24d0, 0x24e9, 1048550, - 0x2c30, 0x2c5e, 1048528, - 0x2d00, 0x2d25, 1041312, - 0xff41, 0xff5a, 1048544, - 0x10428, 0x1044f, 1048536, - 0x118c0, 0x118df, 1048544, -}; - -static char32_t totitles[] = { - 0x00b5, 1049319, - 0x00ff, 1048697, - 0x0101, 1048575, - 0x0103, 1048575, - 0x0105, 1048575, - 0x0107, 1048575, - 0x0109, 1048575, - 0x010b, 1048575, - 0x010d, 1048575, - 0x010f, 1048575, - 0x0111, 1048575, - 0x0113, 1048575, - 0x0115, 1048575, - 0x0117, 1048575, - 0x0119, 1048575, - 0x011b, 1048575, - 0x011d, 1048575, - 0x011f, 1048575, - 0x0121, 1048575, - 0x0123, 1048575, - 0x0125, 1048575, - 0x0127, 1048575, - 0x0129, 1048575, - 0x012b, 1048575, - 0x012d, 1048575, - 0x012f, 1048575, - 0x0131, 1048344, - 0x0133, 1048575, - 0x0135, 1048575, - 0x0137, 1048575, - 0x013a, 1048575, - 0x013c, 1048575, - 0x013e, 1048575, - 0x0140, 1048575, - 0x0142, 1048575, - 0x0144, 1048575, - 0x0146, 1048575, - 0x0148, 1048575, - 0x014b, 1048575, - 0x014d, 1048575, - 0x014f, 1048575, - 0x0151, 1048575, - 0x0153, 1048575, - 0x0155, 1048575, - 0x0157, 1048575, - 0x0159, 1048575, - 0x015b, 1048575, - 0x015d, 1048575, - 0x015f, 1048575, - 0x0161, 1048575, - 0x0163, 1048575, - 0x0165, 1048575, - 0x0167, 1048575, - 0x0169, 1048575, - 0x016b, 1048575, - 0x016d, 1048575, - 0x016f, 1048575, - 0x0171, 1048575, - 0x0173, 1048575, - 0x0175, 1048575, - 0x0177, 1048575, - 0x017a, 1048575, - 0x017c, 1048575, - 0x017e, 1048575, - 0x017f, 1048276, - 0x0180, 1048771, - 0x0183, 1048575, - 0x0185, 1048575, - 0x0188, 1048575, - 0x018c, 1048575, - 0x0192, 1048575, - 0x0195, 1048673, - 0x0199, 1048575, - 0x019a, 1048739, - 0x019e, 1048706, - 0x01a1, 1048575, - 0x01a3, 1048575, - 0x01a5, 1048575, - 0x01a8, 1048575, - 0x01ad, 1048575, - 0x01b0, 1048575, - 0x01b4, 1048575, - 0x01b6, 1048575, - 0x01b9, 1048575, - 0x01bd, 1048575, - 0x01bf, 1048632, - 0x01c4, 1048577, - 0x01c6, 1048575, - 0x01c7, 1048577, - 0x01c9, 1048575, - 0x01ca, 1048577, - 0x01cc, 1048575, - 0x01ce, 1048575, - 0x01d0, 1048575, - 0x01d2, 1048575, - 0x01d4, 1048575, - 0x01d6, 1048575, - 0x01d8, 1048575, - 0x01da, 1048575, - 0x01dc, 1048575, - 0x01dd, 1048497, - 0x01df, 1048575, - 0x01e1, 1048575, - 0x01e3, 1048575, - 0x01e5, 1048575, - 0x01e7, 1048575, - 0x01e9, 1048575, - 0x01eb, 1048575, - 0x01ed, 1048575, - 0x01ef, 1048575, - 0x01f1, 1048577, - 0x01f3, 1048575, - 0x01f5, 1048575, - 0x01f9, 1048575, - 0x01fb, 1048575, - 0x01fd, 1048575, - 0x01ff, 1048575, - 0x0201, 1048575, - 0x0203, 1048575, - 0x0205, 1048575, - 0x0207, 1048575, - 0x0209, 1048575, - 0x020b, 1048575, - 0x020d, 1048575, - 0x020f, 1048575, - 0x0211, 1048575, - 0x0213, 1048575, - 0x0215, 1048575, - 0x0217, 1048575, - 0x0219, 1048575, - 0x021b, 1048575, - 0x021d, 1048575, - 0x021f, 1048575, - 0x0223, 1048575, - 0x0225, 1048575, - 0x0227, 1048575, - 0x0229, 1048575, - 0x022b, 1048575, - 0x022d, 1048575, - 0x022f, 1048575, - 0x0231, 1048575, - 0x0233, 1048575, - 0x023c, 1048575, - 0x0242, 1048575, - 0x0247, 1048575, - 0x0249, 1048575, - 0x024b, 1048575, - 0x024d, 1048575, - 0x024f, 1048575, - 0x0250, 1059359, - 0x0251, 1059356, - 0x0252, 1059358, - 0x0253, 1048366, - 0x0254, 1048370, - 0x0259, 1048374, - 0x025b, 1048373, - 0x025c, 1090895, - 0x0260, 1048371, - 0x0261, 1090891, - 0x0263, 1048369, - 0x0265, 1090856, - 0x0266, 1090884, - 0x0268, 1048367, - 0x0269, 1048365, - 0x026b, 1059319, - 0x026c, 1090881, - 0x026f, 1048365, - 0x0271, 1059325, - 0x0272, 1048363, - 0x0275, 1048362, - 0x027d, 1059303, - 0x0280, 1048358, - 0x0283, 1048358, - 0x0287, 1090858, - 0x0288, 1048358, - 0x0289, 1048507, - 0x028c, 1048505, - 0x0292, 1048357, - 0x029e, 1090834, - 0x0345, 1048660, - 0x0371, 1048575, - 0x0373, 1048575, - 0x0377, 1048575, - 0x03ac, 1048538, - 0x03c2, 1048545, - 0x03cc, 1048512, - 0x03d0, 1048514, - 0x03d1, 1048519, - 0x03d5, 1048529, - 0x03d6, 1048522, - 0x03d7, 1048568, - 0x03d9, 1048575, - 0x03db, 1048575, - 0x03dd, 1048575, - 0x03df, 1048575, - 0x03e1, 1048575, - 0x03e3, 1048575, - 0x03e5, 1048575, - 0x03e7, 1048575, - 0x03e9, 1048575, - 0x03eb, 1048575, - 0x03ed, 1048575, - 0x03ef, 1048575, - 0x03f0, 1048490, - 0x03f1, 1048496, - 0x03f2, 1048583, - 0x03f3, 1048460, - 0x03f5, 1048480, - 0x03f8, 1048575, - 0x03fb, 1048575, - 0x0461, 1048575, - 0x0463, 1048575, - 0x0465, 1048575, - 0x0467, 1048575, - 0x0469, 1048575, - 0x046b, 1048575, - 0x046d, 1048575, - 0x046f, 1048575, - 0x0471, 1048575, - 0x0473, 1048575, - 0x0475, 1048575, - 0x0477, 1048575, - 0x0479, 1048575, - 0x047b, 1048575, - 0x047d, 1048575, - 0x047f, 1048575, - 0x0481, 1048575, - 0x048b, 1048575, - 0x048d, 1048575, - 0x048f, 1048575, - 0x0491, 1048575, - 0x0493, 1048575, - 0x0495, 1048575, - 0x0497, 1048575, - 0x0499, 1048575, - 0x049b, 1048575, - 0x049d, 1048575, - 0x049f, 1048575, - 0x04a1, 1048575, - 0x04a3, 1048575, - 0x04a5, 1048575, - 0x04a7, 1048575, - 0x04a9, 1048575, - 0x04ab, 1048575, - 0x04ad, 1048575, - 0x04af, 1048575, - 0x04b1, 1048575, - 0x04b3, 1048575, - 0x04b5, 1048575, - 0x04b7, 1048575, - 0x04b9, 1048575, - 0x04bb, 1048575, - 0x04bd, 1048575, - 0x04bf, 1048575, - 0x04c2, 1048575, - 0x04c4, 1048575, - 0x04c6, 1048575, - 0x04c8, 1048575, - 0x04ca, 1048575, - 0x04cc, 1048575, - 0x04ce, 1048575, - 0x04cf, 1048561, - 0x04d1, 1048575, - 0x04d3, 1048575, - 0x04d5, 1048575, - 0x04d7, 1048575, - 0x04d9, 1048575, - 0x04db, 1048575, - 0x04dd, 1048575, - 0x04df, 1048575, - 0x04e1, 1048575, - 0x04e3, 1048575, - 0x04e5, 1048575, - 0x04e7, 1048575, - 0x04e9, 1048575, - 0x04eb, 1048575, - 0x04ed, 1048575, - 0x04ef, 1048575, - 0x04f1, 1048575, - 0x04f3, 1048575, - 0x04f5, 1048575, - 0x04f7, 1048575, - 0x04f9, 1048575, - 0x04fb, 1048575, - 0x04fd, 1048575, - 0x04ff, 1048575, - 0x0501, 1048575, - 0x0503, 1048575, - 0x0505, 1048575, - 0x0507, 1048575, - 0x0509, 1048575, - 0x050b, 1048575, - 0x050d, 1048575, - 0x050f, 1048575, - 0x0511, 1048575, - 0x0513, 1048575, - 0x0515, 1048575, - 0x0517, 1048575, - 0x0519, 1048575, - 0x051b, 1048575, - 0x051d, 1048575, - 0x051f, 1048575, - 0x0521, 1048575, - 0x0523, 1048575, - 0x0525, 1048575, - 0x0527, 1048575, - 0x0529, 1048575, - 0x052b, 1048575, - 0x052d, 1048575, - 0x052f, 1048575, - 0x1d79, 1083908, - 0x1d7d, 1052390, - 0x1e01, 1048575, - 0x1e03, 1048575, - 0x1e05, 1048575, - 0x1e07, 1048575, - 0x1e09, 1048575, - 0x1e0b, 1048575, - 0x1e0d, 1048575, - 0x1e0f, 1048575, - 0x1e11, 1048575, - 0x1e13, 1048575, - 0x1e15, 1048575, - 0x1e17, 1048575, - 0x1e19, 1048575, - 0x1e1b, 1048575, - 0x1e1d, 1048575, - 0x1e1f, 1048575, - 0x1e21, 1048575, - 0x1e23, 1048575, - 0x1e25, 1048575, - 0x1e27, 1048575, - 0x1e29, 1048575, - 0x1e2b, 1048575, - 0x1e2d, 1048575, - 0x1e2f, 1048575, - 0x1e31, 1048575, - 0x1e33, 1048575, - 0x1e35, 1048575, - 0x1e37, 1048575, - 0x1e39, 1048575, - 0x1e3b, 1048575, - 0x1e3d, 1048575, - 0x1e3f, 1048575, - 0x1e41, 1048575, - 0x1e43, 1048575, - 0x1e45, 1048575, - 0x1e47, 1048575, - 0x1e49, 1048575, - 0x1e4b, 1048575, - 0x1e4d, 1048575, - 0x1e4f, 1048575, - 0x1e51, 1048575, - 0x1e53, 1048575, - 0x1e55, 1048575, - 0x1e57, 1048575, - 0x1e59, 1048575, - 0x1e5b, 1048575, - 0x1e5d, 1048575, - 0x1e5f, 1048575, - 0x1e61, 1048575, - 0x1e63, 1048575, - 0x1e65, 1048575, - 0x1e67, 1048575, - 0x1e69, 1048575, - 0x1e6b, 1048575, - 0x1e6d, 1048575, - 0x1e6f, 1048575, - 0x1e71, 1048575, - 0x1e73, 1048575, - 0x1e75, 1048575, - 0x1e77, 1048575, - 0x1e79, 1048575, - 0x1e7b, 1048575, - 0x1e7d, 1048575, - 0x1e7f, 1048575, - 0x1e81, 1048575, - 0x1e83, 1048575, - 0x1e85, 1048575, - 0x1e87, 1048575, - 0x1e89, 1048575, - 0x1e8b, 1048575, - 0x1e8d, 1048575, - 0x1e8f, 1048575, - 0x1e91, 1048575, - 0x1e93, 1048575, - 0x1e95, 1048575, - 0x1e9b, 1048517, - 0x1ea1, 1048575, - 0x1ea3, 1048575, - 0x1ea5, 1048575, - 0x1ea7, 1048575, - 0x1ea9, 1048575, - 0x1eab, 1048575, - 0x1ead, 1048575, - 0x1eaf, 1048575, - 0x1eb1, 1048575, - 0x1eb3, 1048575, - 0x1eb5, 1048575, - 0x1eb7, 1048575, - 0x1eb9, 1048575, - 0x1ebb, 1048575, - 0x1ebd, 1048575, - 0x1ebf, 1048575, - 0x1ec1, 1048575, - 0x1ec3, 1048575, - 0x1ec5, 1048575, - 0x1ec7, 1048575, - 0x1ec9, 1048575, - 0x1ecb, 1048575, - 0x1ecd, 1048575, - 0x1ecf, 1048575, - 0x1ed1, 1048575, - 0x1ed3, 1048575, - 0x1ed5, 1048575, - 0x1ed7, 1048575, - 0x1ed9, 1048575, - 0x1edb, 1048575, - 0x1edd, 1048575, - 0x1edf, 1048575, - 0x1ee1, 1048575, - 0x1ee3, 1048575, - 0x1ee5, 1048575, - 0x1ee7, 1048575, - 0x1ee9, 1048575, - 0x1eeb, 1048575, - 0x1eed, 1048575, - 0x1eef, 1048575, - 0x1ef1, 1048575, - 0x1ef3, 1048575, - 0x1ef5, 1048575, - 0x1ef7, 1048575, - 0x1ef9, 1048575, - 0x1efb, 1048575, - 0x1efd, 1048575, - 0x1eff, 1048575, - 0x1f51, 1048584, - 0x1f53, 1048584, - 0x1f55, 1048584, - 0x1f57, 1048584, - 0x1fb3, 1048585, - 0x1fbe, 1041371, - 0x1fc3, 1048585, - 0x1fe5, 1048583, - 0x1ff3, 1048585, - 0x214e, 1048548, - 0x2184, 1048575, - 0x2c61, 1048575, - 0x2c65, 1037781, - 0x2c66, 1037784, - 0x2c68, 1048575, - 0x2c6a, 1048575, - 0x2c6c, 1048575, - 0x2c73, 1048575, - 0x2c76, 1048575, - 0x2c81, 1048575, - 0x2c83, 1048575, - 0x2c85, 1048575, - 0x2c87, 1048575, - 0x2c89, 1048575, - 0x2c8b, 1048575, - 0x2c8d, 1048575, - 0x2c8f, 1048575, - 0x2c91, 1048575, - 0x2c93, 1048575, - 0x2c95, 1048575, - 0x2c97, 1048575, - 0x2c99, 1048575, - 0x2c9b, 1048575, - 0x2c9d, 1048575, - 0x2c9f, 1048575, - 0x2ca1, 1048575, - 0x2ca3, 1048575, - 0x2ca5, 1048575, - 0x2ca7, 1048575, - 0x2ca9, 1048575, - 0x2cab, 1048575, - 0x2cad, 1048575, - 0x2caf, 1048575, - 0x2cb1, 1048575, - 0x2cb3, 1048575, - 0x2cb5, 1048575, - 0x2cb7, 1048575, - 0x2cb9, 1048575, - 0x2cbb, 1048575, - 0x2cbd, 1048575, - 0x2cbf, 1048575, - 0x2cc1, 1048575, - 0x2cc3, 1048575, - 0x2cc5, 1048575, - 0x2cc7, 1048575, - 0x2cc9, 1048575, - 0x2ccb, 1048575, - 0x2ccd, 1048575, - 0x2ccf, 1048575, - 0x2cd1, 1048575, - 0x2cd3, 1048575, - 0x2cd5, 1048575, - 0x2cd7, 1048575, - 0x2cd9, 1048575, - 0x2cdb, 1048575, - 0x2cdd, 1048575, - 0x2cdf, 1048575, - 0x2ce1, 1048575, - 0x2ce3, 1048575, - 0x2cec, 1048575, - 0x2cee, 1048575, - 0x2cf3, 1048575, - 0x2d27, 1041312, - 0x2d2d, 1041312, - 0xa641, 1048575, - 0xa643, 1048575, - 0xa645, 1048575, - 0xa647, 1048575, - 0xa649, 1048575, - 0xa64b, 1048575, - 0xa64d, 1048575, - 0xa64f, 1048575, - 0xa651, 1048575, - 0xa653, 1048575, - 0xa655, 1048575, - 0xa657, 1048575, - 0xa659, 1048575, - 0xa65b, 1048575, - 0xa65d, 1048575, - 0xa65f, 1048575, - 0xa661, 1048575, - 0xa663, 1048575, - 0xa665, 1048575, - 0xa667, 1048575, - 0xa669, 1048575, - 0xa66b, 1048575, - 0xa66d, 1048575, - 0xa681, 1048575, - 0xa683, 1048575, - 0xa685, 1048575, - 0xa687, 1048575, - 0xa689, 1048575, - 0xa68b, 1048575, - 0xa68d, 1048575, - 0xa68f, 1048575, - 0xa691, 1048575, - 0xa693, 1048575, - 0xa695, 1048575, - 0xa697, 1048575, - 0xa699, 1048575, - 0xa69b, 1048575, - 0xa723, 1048575, - 0xa725, 1048575, - 0xa727, 1048575, - 0xa729, 1048575, - 0xa72b, 1048575, - 0xa72d, 1048575, - 0xa72f, 1048575, - 0xa733, 1048575, - 0xa735, 1048575, - 0xa737, 1048575, - 0xa739, 1048575, - 0xa73b, 1048575, - 0xa73d, 1048575, - 0xa73f, 1048575, - 0xa741, 1048575, - 0xa743, 1048575, - 0xa745, 1048575, - 0xa747, 1048575, - 0xa749, 1048575, - 0xa74b, 1048575, - 0xa74d, 1048575, - 0xa74f, 1048575, - 0xa751, 1048575, - 0xa753, 1048575, - 0xa755, 1048575, - 0xa757, 1048575, - 0xa759, 1048575, - 0xa75b, 1048575, - 0xa75d, 1048575, - 0xa75f, 1048575, - 0xa761, 1048575, - 0xa763, 1048575, - 0xa765, 1048575, - 0xa767, 1048575, - 0xa769, 1048575, - 0xa76b, 1048575, - 0xa76d, 1048575, - 0xa76f, 1048575, - 0xa77a, 1048575, - 0xa77c, 1048575, - 0xa77f, 1048575, - 0xa781, 1048575, - 0xa783, 1048575, - 0xa785, 1048575, - 0xa787, 1048575, - 0xa78c, 1048575, - 0xa791, 1048575, - 0xa793, 1048575, - 0xa797, 1048575, - 0xa799, 1048575, - 0xa79b, 1048575, - 0xa79d, 1048575, - 0xa79f, 1048575, - 0xa7a1, 1048575, - 0xa7a3, 1048575, - 0xa7a5, 1048575, - 0xa7a7, 1048575, - 0xa7a9, 1048575, -}; - -char32_t Unicode::totitle(char32_t c) noexcept -{ - char32_t *p; - - p = rbsearch(c, totitler, nelem (totitler)/3, 3); - if (p && c >= p[0] && c <= p[1]) - return c + p[2] - 1048576; - - p = rbsearch(c, totitles, nelem (totitles)/2, 2); - if (p && c == p[0]) - return c + p[1] - 1048576; - - return c; -} - -void Unicode::encode(char32_t c, char res[5]) noexcept -{ - switch (nbytesPoint(c)) { - case 1: - res[0] = c; - res[1] = '\0'; - break; - case 2: - res[0] = 0xC0 | ((c >> 6) & 0x1F); - res[1] = 0x80 | (c & 0x3F); - res[2] = '\0'; - break; - case 3: - res[0] = 0xE0 | ((c >> 12) & 0xF ); - res[1] = 0x80 | ((c >> 6) & 0x3F); - res[2] = 0x80 | (c & 0x3F); - res[3] = '\0'; - break; - case 4: - res[0] = 0xF0 | ((c >> 18) & 0x7 ); - res[1] = 0x80 | ((c >> 12) & 0x3F); - res[2] = 0x80 | ((c >> 6) & 0x3F); - res[3] = 0x80 | (c & 0x3F); - res[4] = '\0'; - break; - default: - break; - } -} - -void Unicode::decode(char32_t &c, const char *res) noexcept -{ - c = 0; - - switch (nbytesUtf8(res[0])) { - case 1: - c = res[0]; - break; - case 2: - c = (res[0] & 0x1f) << 6; - c |= (res[1] & 0x3f); - break; - case 3: - c = (res[0] & 0x0f) << 12; - c |= (res[1] & 0x3f) << 6; - c |= (res[2] & 0x3f); - break; - case 4: - c = (res[0] & 0x07) << 16; - c |= (res[1] & 0x3f) << 12; - c |= (res[2] & 0x3f) << 6; - c |= (res[3] & 0x3f); - default: - break; - } -} - -int Unicode::nbytesUtf8(char c) noexcept -{ - if ((c & 0xE0) == 0xC0) - return 2; - if ((c & 0xF0) == 0xE0) - return 3; - if ((c & 0xF8) == 0xF0) - return 4; - - return 1; -} - -int Unicode::nbytesPoint(char32_t c) noexcept -{ - if (c <= 0x7F) - return 1; - if (c <= 0x7FF) - return 2; - if (c <= 0xFFFF) - return 3; - if (c <= 0x1FFFFF) - return 4; - - return -1; -} - -int Unicode::length(const std::string &str) -{ - int total = 0; - - forEach(str, [&] (char32_t) { - ++ total; - }); - - return total; -} - -std::string Unicode::toUtf8(const std::u32string &array) -{ - std::string res; - - for (size_t i = 0; i < array.size(); ++i) { - char tmp[5]; - int size = nbytesPoint(array[i]); - - if (size < 0) { - throw std::invalid_argument("invalid sequence"); - } - - encode(array[i], tmp); - res.insert(res.length(), tmp); - } - - return res; -} - -std::u32string Unicode::toUtf32(const std::string &str) -{ - std::u32string res; - - forEach(str, [&] (char32_t code) { - res.push_back(code); - }); - - return res; -} \ No newline at end of file +/* + * Unicode.cpp -- UTF-8 to UTF-32 conversions and various operations + * + * Copyright (c) 2013, 2014, 2015 David Demelier <markand@malikania.fr> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "Unicode.h" + +/* + * The following code has been generated from Go mkrunetype adapted to our + * needs. + */ + +namespace unicode { + +#define nelem(x) (sizeof (x) / sizeof ((x)[0])) + +char32_t *rbsearch(char32_t c, char32_t *t, int n, int ne) noexcept +{ + char32_t *p; + int m; + + while (n > 1) { + m = n >> 1; + p = t + m * ne; + + if (c >= p[0]) { + t = p; + n = n - m; + } else { + n = m; + } + } + + if (n && c >= t[0]) + return t; + + return nullptr; +} + +static char32_t isspacer[] = { + 0x0009, 0x000d, + 0x0020, 0x0020, + 0x0085, 0x0085, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x2000, 0x200a, + 0x2028, 0x2029, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000, + 0xfeff, 0xfeff, +}; + +bool isspace(char32_t c) noexcept +{ + char32_t *p; + + p = rbsearch(c, isspacer, nelem (isspacer)/2, 2); + if (p && c >= p[0] && c <= p[1]) + return true; + + return false; +} + +static char32_t isdigitr[] = { + 0x0030, 0x0039, + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x07c0, 0x07c9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be6, 0x0bef, + 0x0c66, 0x0c6f, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d6f, + 0x0de6, 0x0def, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f29, + 0x1040, 0x1049, + 0x1090, 0x1099, + 0x17e0, 0x17e9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0x19d0, 0x19d9, + 0x1a80, 0x1a89, + 0x1a90, 0x1a99, + 0x1b50, 0x1b59, + 0x1bb0, 0x1bb9, + 0x1c40, 0x1c49, + 0x1c50, 0x1c59, + 0xa620, 0xa629, + 0xa8d0, 0xa8d9, + 0xa900, 0xa909, + 0xa9d0, 0xa9d9, + 0xa9f0, 0xa9f9, + 0xaa50, 0xaa59, + 0xabf0, 0xabf9, + 0xff10, 0xff19, + 0x104a0, 0x104a9, + 0x11066, 0x1106f, + 0x110f0, 0x110f9, + 0x11136, 0x1113f, + 0x111d0, 0x111d9, + 0x112f0, 0x112f9, + 0x114d0, 0x114d9, + 0x11650, 0x11659, + 0x116c0, 0x116c9, + 0x118e0, 0x118e9, + 0x16a60, 0x16a69, + 0x16b50, 0x16b59, + 0x1d7ce, 0x1d7ff, +}; + +bool isdigit(char32_t c) noexcept +{ + char32_t *p; + + p = rbsearch(c, isdigitr, nelem (isdigitr)/2, 2); + if (p && c >= p[0] && c <= p[1]) + return true; + + return false; +} + +static char32_t isalphar[] = { + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x0370, 0x0374, + 0x0376, 0x0377, + 0x037a, 0x037d, + 0x0388, 0x038a, + 0x038e, 0x03a1, + 0x03a3, 0x03f5, + 0x03f7, 0x0481, + 0x048a, 0x052f, + 0x0531, 0x0556, + 0x0561, 0x0587, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0620, 0x064a, + 0x066e, 0x066f, + 0x0671, 0x06d3, + 0x06e5, 0x06e6, + 0x06ee, 0x06ef, + 0x06fa, 0x06fc, + 0x0712, 0x072f, + 0x074d, 0x07a5, + 0x07ca, 0x07ea, + 0x07f4, 0x07f5, + 0x0800, 0x0815, + 0x0840, 0x0858, + 0x08a0, 0x08b2, + 0x0904, 0x0939, + 0x0958, 0x0961, + 0x0971, 0x0980, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b6, 0x09b9, + 0x09dc, 0x09dd, + 0x09df, 0x09e1, + 0x09f0, 0x09f1, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a59, 0x0a5c, + 0x0a72, 0x0a74, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0ae0, 0x0ae1, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c39, + 0x0c58, 0x0c59, + 0x0c60, 0x0c61, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0ce0, 0x0ce1, + 0x0cf1, 0x0cf2, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d3a, + 0x0d60, 0x0d61, + 0x0d7a, 0x0d7f, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dc0, 0x0dc6, + 0x0e01, 0x0e30, + 0x0e32, 0x0e33, + 0x0e40, 0x0e46, + 0x0e81, 0x0e82, + 0x0e87, 0x0e88, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb0, + 0x0eb2, 0x0eb3, + 0x0ec0, 0x0ec4, + 0x0edc, 0x0edf, + 0x0f40, 0x0f47, + 0x0f49, 0x0f6c, + 0x0f88, 0x0f8c, + 0x1000, 0x102a, + 0x1050, 0x1055, + 0x105a, 0x105d, + 0x1065, 0x1066, + 0x106e, 0x1070, + 0x1075, 0x1081, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x167f, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x16f1, 0x16f8, + 0x1700, 0x170c, + 0x170e, 0x1711, + 0x1720, 0x1731, + 0x1740, 0x1751, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1780, 0x17b3, + 0x1820, 0x1877, + 0x1880, 0x18a8, + 0x18b0, 0x18f5, + 0x1900, 0x191e, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19ab, + 0x19c1, 0x19c7, + 0x1a00, 0x1a16, + 0x1a20, 0x1a54, + 0x1b05, 0x1b33, + 0x1b45, 0x1b4b, + 0x1b83, 0x1ba0, + 0x1bae, 0x1baf, + 0x1bba, 0x1be5, + 0x1c00, 0x1c23, + 0x1c4d, 0x1c4f, + 0x1c5a, 0x1c7d, + 0x1ce9, 0x1cec, + 0x1cee, 0x1cf1, + 0x1cf5, 0x1cf6, + 0x1d00, 0x1dbf, + 0x1e00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2090, 0x209c, + 0x210a, 0x2113, + 0x2119, 0x211d, + 0x212a, 0x212d, + 0x212f, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x2183, 0x2184, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c60, 0x2ce4, + 0x2ceb, 0x2cee, + 0x2cf2, 0x2cf3, + 0x2d00, 0x2d25, + 0x2d30, 0x2d67, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3005, 0x3006, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312d, + 0x3131, 0x318e, + 0x31a0, 0x31ba, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fcc, + 0xa000, 0xa48c, + 0xa4d0, 0xa4fd, + 0xa500, 0xa60c, + 0xa610, 0xa61f, + 0xa62a, 0xa62b, + 0xa640, 0xa66e, + 0xa67f, 0xa69d, + 0xa6a0, 0xa6e5, + 0xa717, 0xa71f, + 0xa722, 0xa788, + 0xa78b, 0xa78e, + 0xa790, 0xa7ad, + 0xa7b0, 0xa7b1, + 0xa7f7, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa822, + 0xa840, 0xa873, + 0xa882, 0xa8b3, + 0xa8f2, 0xa8f7, + 0xa90a, 0xa925, + 0xa930, 0xa946, + 0xa960, 0xa97c, + 0xa984, 0xa9b2, + 0xa9e0, 0xa9e4, + 0xa9e6, 0xa9ef, + 0xa9fa, 0xa9fe, + 0xaa00, 0xaa28, + 0xaa40, 0xaa42, + 0xaa44, 0xaa4b, + 0xaa60, 0xaa76, + 0xaa7e, 0xaaaf, + 0xaab5, 0xaab6, + 0xaab9, 0xaabd, + 0xaadb, 0xaadd, + 0xaae0, 0xaaea, + 0xaaf2, 0xaaf4, + 0xab01, 0xab06, + 0xab09, 0xab0e, + 0xab11, 0xab16, + 0xab20, 0xab26, + 0xab28, 0xab2e, + 0xab30, 0xab5a, + 0xab5c, 0xab5f, + 0xab64, 0xab65, + 0xabc0, 0xabe2, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xf900, 0xfa6d, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1f, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10280, 0x1029c, + 0x102a0, 0x102d0, + 0x10300, 0x1031f, + 0x10330, 0x10340, + 0x10342, 0x10349, + 0x10350, 0x10375, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10400, 0x1049d, + 0x10500, 0x10527, + 0x10530, 0x10563, + 0x10600, 0x10736, + 0x10740, 0x10755, + 0x10760, 0x10767, + 0x10800, 0x10805, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083f, 0x10855, + 0x10860, 0x10876, + 0x10880, 0x1089e, + 0x10900, 0x10915, + 0x10920, 0x10939, + 0x10980, 0x109b7, + 0x109be, 0x109bf, + 0x10a10, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a60, 0x10a7c, + 0x10a80, 0x10a9c, + 0x10ac0, 0x10ac7, + 0x10ac9, 0x10ae4, + 0x10b00, 0x10b35, + 0x10b40, 0x10b55, + 0x10b60, 0x10b72, + 0x10b80, 0x10b91, + 0x10c00, 0x10c48, + 0x11003, 0x11037, + 0x11083, 0x110af, + 0x110d0, 0x110e8, + 0x11103, 0x11126, + 0x11150, 0x11172, + 0x11183, 0x111b2, + 0x111c1, 0x111c4, + 0x11200, 0x11211, + 0x11213, 0x1122b, + 0x112b0, 0x112de, + 0x11305, 0x1130c, + 0x1130f, 0x11310, + 0x11313, 0x11328, + 0x1132a, 0x11330, + 0x11332, 0x11333, + 0x11335, 0x11339, + 0x1135d, 0x11361, + 0x11480, 0x114af, + 0x114c4, 0x114c5, + 0x11580, 0x115ae, + 0x11600, 0x1162f, + 0x11680, 0x116aa, + 0x118a0, 0x118df, + 0x11ac0, 0x11af8, + 0x12000, 0x12398, + 0x13000, 0x1342e, + 0x16800, 0x16a38, + 0x16a40, 0x16a5e, + 0x16ad0, 0x16aed, + 0x16b00, 0x16b2f, + 0x16b40, 0x16b43, + 0x16b63, 0x16b77, + 0x16b7d, 0x16b8f, + 0x16f00, 0x16f44, + 0x16f93, 0x16f9f, + 0x1b000, 0x1b001, + 0x1bc00, 0x1bc6a, + 0x1bc70, 0x1bc7c, + 0x1bc80, 0x1bc88, + 0x1bc90, 0x1bc99, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7cb, + 0x1e800, 0x1e8c4, + 0x1ee00, 0x1ee03, + 0x1ee05, 0x1ee1f, + 0x1ee21, 0x1ee22, + 0x1ee29, 0x1ee32, + 0x1ee34, 0x1ee37, + 0x1ee4d, 0x1ee4f, + 0x1ee51, 0x1ee52, + 0x1ee61, 0x1ee62, + 0x1ee67, 0x1ee6a, + 0x1ee6c, 0x1ee72, + 0x1ee74, 0x1ee77, + 0x1ee79, 0x1ee7c, + 0x1ee80, 0x1ee89, + 0x1ee8b, 0x1ee9b, + 0x1eea1, 0x1eea3, + 0x1eea5, 0x1eea9, + 0x1eeab, 0x1eebb, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2b740, 0x2b81d, + 0x2f800, 0x2fa1d, +}; + +static char32_t isalphas[] = { + 0x00aa, + 0x00b5, + 0x00ba, + 0x02ec, + 0x02ee, + 0x037f, + 0x0386, + 0x038c, + 0x0559, + 0x06d5, + 0x06ff, + 0x0710, + 0x07b1, + 0x07fa, + 0x081a, + 0x0824, + 0x0828, + 0x093d, + 0x0950, + 0x09b2, + 0x09bd, + 0x09ce, + 0x0a5e, + 0x0abd, + 0x0ad0, + 0x0b3d, + 0x0b71, + 0x0b83, + 0x0b9c, + 0x0bd0, + 0x0c3d, + 0x0cbd, + 0x0cde, + 0x0d3d, + 0x0d4e, + 0x0dbd, + 0x0e84, + 0x0e8a, + 0x0e8d, + 0x0ea5, + 0x0ea7, + 0x0ebd, + 0x0ec6, + 0x0f00, + 0x103f, + 0x1061, + 0x108e, + 0x10c7, + 0x10cd, + 0x1258, + 0x12c0, + 0x17d7, + 0x17dc, + 0x18aa, + 0x1aa7, + 0x1f59, + 0x1f5b, + 0x1f5d, + 0x1fbe, + 0x2071, + 0x207f, + 0x2102, + 0x2107, + 0x2115, + 0x2124, + 0x2126, + 0x2128, + 0x214e, + 0x2d27, + 0x2d2d, + 0x2d6f, + 0x2e2f, + 0xa8fb, + 0xa9cf, + 0xaa7a, + 0xaab1, + 0xaac0, + 0xaac2, + 0xfb1d, + 0xfb3e, + 0x10808, + 0x1083c, + 0x10a00, + 0x11176, + 0x111da, + 0x1133d, + 0x114c7, + 0x11644, + 0x118ff, + 0x16f50, + 0x1d4a2, + 0x1d4bb, + 0x1d546, + 0x1ee24, + 0x1ee27, + 0x1ee39, + 0x1ee3b, + 0x1ee42, + 0x1ee47, + 0x1ee49, + 0x1ee4b, + 0x1ee54, + 0x1ee57, + 0x1ee59, + 0x1ee5b, + 0x1ee5d, + 0x1ee5f, + 0x1ee64, + 0x1ee7e, +}; + +bool isalpha(char32_t c) noexcept +{ + char32_t *p; + + p = rbsearch(c, isalphar, nelem (isalphar)/2, 2); + if (p && c >= p[0] && c <= p[1]) + return true; + + p = rbsearch(c, isalphas, nelem (isalphas), 1); + if (p && c == p[0]) + return true; + + return false; +} + +static char32_t isupperr[] = { + 0x0041, 0x005a, + 0x00c0, 0x00d6, + 0x00d8, 0x00de, + 0x0178, 0x0179, + 0x0181, 0x0182, + 0x0186, 0x0187, + 0x0189, 0x018b, + 0x018e, 0x0191, + 0x0193, 0x0194, + 0x0196, 0x0198, + 0x019c, 0x019d, + 0x019f, 0x01a0, + 0x01a6, 0x01a7, + 0x01ae, 0x01af, + 0x01b1, 0x01b3, + 0x01b7, 0x01b8, + 0x01f6, 0x01f8, + 0x023a, 0x023b, + 0x023d, 0x023e, + 0x0243, 0x0246, + 0x0388, 0x038a, + 0x038e, 0x038f, + 0x0391, 0x03a1, + 0x03a3, 0x03ab, + 0x03d2, 0x03d4, + 0x03f9, 0x03fa, + 0x03fd, 0x042f, + 0x04c0, 0x04c1, + 0x0531, 0x0556, + 0x10a0, 0x10c5, + 0x1f08, 0x1f0f, + 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, + 0x1f38, 0x1f3f, + 0x1f48, 0x1f4d, + 0x1f68, 0x1f6f, + 0x1f88, 0x1f8f, + 0x1f98, 0x1f9f, + 0x1fa8, 0x1faf, + 0x1fb8, 0x1fbc, + 0x1fc8, 0x1fcc, + 0x1fd8, 0x1fdb, + 0x1fe8, 0x1fec, + 0x1ff8, 0x1ffc, + 0x210b, 0x210d, + 0x2110, 0x2112, + 0x2119, 0x211d, + 0x212a, 0x212d, + 0x2130, 0x2133, + 0x213e, 0x213f, + 0x2160, 0x216f, + 0x24b6, 0x24cf, + 0x2c00, 0x2c2e, + 0x2c62, 0x2c64, + 0x2c6d, 0x2c70, + 0x2c7e, 0x2c80, + 0xa77d, 0xa77e, + 0xa7aa, 0xa7ad, + 0xa7b0, 0xa7b1, + 0xff21, 0xff3a, + 0x10400, 0x10427, + 0x118a0, 0x118bf, + 0x1d400, 0x1d419, + 0x1d434, 0x1d44d, + 0x1d468, 0x1d481, + 0x1d49e, 0x1d49f, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b5, + 0x1d4d0, 0x1d4e9, + 0x1d504, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d538, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d54a, 0x1d550, + 0x1d56c, 0x1d585, + 0x1d5a0, 0x1d5b9, + 0x1d5d4, 0x1d5ed, + 0x1d608, 0x1d621, + 0x1d63c, 0x1d655, + 0x1d670, 0x1d689, + 0x1d6a8, 0x1d6c0, + 0x1d6e2, 0x1d6fa, + 0x1d71c, 0x1d734, + 0x1d756, 0x1d76e, + 0x1d790, 0x1d7a8, +}; + +static char32_t isuppers[] = { + 0x0100, + 0x0102, + 0x0104, + 0x0106, + 0x0108, + 0x010a, + 0x010c, + 0x010e, + 0x0110, + 0x0112, + 0x0114, + 0x0116, + 0x0118, + 0x011a, + 0x011c, + 0x011e, + 0x0120, + 0x0122, + 0x0124, + 0x0126, + 0x0128, + 0x012a, + 0x012c, + 0x012e, + 0x0130, + 0x0132, + 0x0134, + 0x0136, + 0x0139, + 0x013b, + 0x013d, + 0x013f, + 0x0141, + 0x0143, + 0x0145, + 0x0147, + 0x014a, + 0x014c, + 0x014e, + 0x0150, + 0x0152, + 0x0154, + 0x0156, + 0x0158, + 0x015a, + 0x015c, + 0x015e, + 0x0160, + 0x0162, + 0x0164, + 0x0166, + 0x0168, + 0x016a, + 0x016c, + 0x016e, + 0x0170, + 0x0172, + 0x0174, + 0x0176, + 0x017b, + 0x017d, + 0x0184, + 0x01a2, + 0x01a4, + 0x01a9, + 0x01ac, + 0x01b5, + 0x01bc, + 0x01c4, + 0x01c7, + 0x01ca, + 0x01cd, + 0x01cf, + 0x01d1, + 0x01d3, + 0x01d5, + 0x01d7, + 0x01d9, + 0x01db, + 0x01de, + 0x01e0, + 0x01e2, + 0x01e4, + 0x01e6, + 0x01e8, + 0x01ea, + 0x01ec, + 0x01ee, + 0x01f1, + 0x01f4, + 0x01fa, + 0x01fc, + 0x01fe, + 0x0200, + 0x0202, + 0x0204, + 0x0206, + 0x0208, + 0x020a, + 0x020c, + 0x020e, + 0x0210, + 0x0212, + 0x0214, + 0x0216, + 0x0218, + 0x021a, + 0x021c, + 0x021e, + 0x0220, + 0x0222, + 0x0224, + 0x0226, + 0x0228, + 0x022a, + 0x022c, + 0x022e, + 0x0230, + 0x0232, + 0x0241, + 0x0248, + 0x024a, + 0x024c, + 0x024e, + 0x0370, + 0x0372, + 0x0376, + 0x037f, + 0x0386, + 0x038c, + 0x03cf, + 0x03d8, + 0x03da, + 0x03dc, + 0x03de, + 0x03e0, + 0x03e2, + 0x03e4, + 0x03e6, + 0x03e8, + 0x03ea, + 0x03ec, + 0x03ee, + 0x03f4, + 0x03f7, + 0x0460, + 0x0462, + 0x0464, + 0x0466, + 0x0468, + 0x046a, + 0x046c, + 0x046e, + 0x0470, + 0x0472, + 0x0474, + 0x0476, + 0x0478, + 0x047a, + 0x047c, + 0x047e, + 0x0480, + 0x048a, + 0x048c, + 0x048e, + 0x0490, + 0x0492, + 0x0494, + 0x0496, + 0x0498, + 0x049a, + 0x049c, + 0x049e, + 0x04a0, + 0x04a2, + 0x04a4, + 0x04a6, + 0x04a8, + 0x04aa, + 0x04ac, + 0x04ae, + 0x04b0, + 0x04b2, + 0x04b4, + 0x04b6, + 0x04b8, + 0x04ba, + 0x04bc, + 0x04be, + 0x04c3, + 0x04c5, + 0x04c7, + 0x04c9, + 0x04cb, + 0x04cd, + 0x04d0, + 0x04d2, + 0x04d4, + 0x04d6, + 0x04d8, + 0x04da, + 0x04dc, + 0x04de, + 0x04e0, + 0x04e2, + 0x04e4, + 0x04e6, + 0x04e8, + 0x04ea, + 0x04ec, + 0x04ee, + 0x04f0, + 0x04f2, + 0x04f4, + 0x04f6, + 0x04f8, + 0x04fa, + 0x04fc, + 0x04fe, + 0x0500, + 0x0502, + 0x0504, + 0x0506, + 0x0508, + 0x050a, + 0x050c, + 0x050e, + 0x0510, + 0x0512, + 0x0514, + 0x0516, + 0x0518, + 0x051a, + 0x051c, + 0x051e, + 0x0520, + 0x0522, + 0x0524, + 0x0526, + 0x0528, + 0x052a, + 0x052c, + 0x052e, + 0x10c7, + 0x10cd, + 0x1e00, + 0x1e02, + 0x1e04, + 0x1e06, + 0x1e08, + 0x1e0a, + 0x1e0c, + 0x1e0e, + 0x1e10, + 0x1e12, + 0x1e14, + 0x1e16, + 0x1e18, + 0x1e1a, + 0x1e1c, + 0x1e1e, + 0x1e20, + 0x1e22, + 0x1e24, + 0x1e26, + 0x1e28, + 0x1e2a, + 0x1e2c, + 0x1e2e, + 0x1e30, + 0x1e32, + 0x1e34, + 0x1e36, + 0x1e38, + 0x1e3a, + 0x1e3c, + 0x1e3e, + 0x1e40, + 0x1e42, + 0x1e44, + 0x1e46, + 0x1e48, + 0x1e4a, + 0x1e4c, + 0x1e4e, + 0x1e50, + 0x1e52, + 0x1e54, + 0x1e56, + 0x1e58, + 0x1e5a, + 0x1e5c, + 0x1e5e, + 0x1e60, + 0x1e62, + 0x1e64, + 0x1e66, + 0x1e68, + 0x1e6a, + 0x1e6c, + 0x1e6e, + 0x1e70, + 0x1e72, + 0x1e74, + 0x1e76, + 0x1e78, + 0x1e7a, + 0x1e7c, + 0x1e7e, + 0x1e80, + 0x1e82, + 0x1e84, + 0x1e86, + 0x1e88, + 0x1e8a, + 0x1e8c, + 0x1e8e, + 0x1e90, + 0x1e92, + 0x1e94, + 0x1e9e, + 0x1ea0, + 0x1ea2, + 0x1ea4, + 0x1ea6, + 0x1ea8, + 0x1eaa, + 0x1eac, + 0x1eae, + 0x1eb0, + 0x1eb2, + 0x1eb4, + 0x1eb6, + 0x1eb8, + 0x1eba, + 0x1ebc, + 0x1ebe, + 0x1ec0, + 0x1ec2, + 0x1ec4, + 0x1ec6, + 0x1ec8, + 0x1eca, + 0x1ecc, + 0x1ece, + 0x1ed0, + 0x1ed2, + 0x1ed4, + 0x1ed6, + 0x1ed8, + 0x1eda, + 0x1edc, + 0x1ede, + 0x1ee0, + 0x1ee2, + 0x1ee4, + 0x1ee6, + 0x1ee8, + 0x1eea, + 0x1eec, + 0x1eee, + 0x1ef0, + 0x1ef2, + 0x1ef4, + 0x1ef6, + 0x1ef8, + 0x1efa, + 0x1efc, + 0x1efe, + 0x1f59, + 0x1f5b, + 0x1f5d, + 0x1f5f, + 0x2102, + 0x2107, + 0x2115, + 0x2124, + 0x2126, + 0x2128, + 0x2145, + 0x2183, + 0x2c60, + 0x2c67, + 0x2c69, + 0x2c6b, + 0x2c72, + 0x2c75, + 0x2c82, + 0x2c84, + 0x2c86, + 0x2c88, + 0x2c8a, + 0x2c8c, + 0x2c8e, + 0x2c90, + 0x2c92, + 0x2c94, + 0x2c96, + 0x2c98, + 0x2c9a, + 0x2c9c, + 0x2c9e, + 0x2ca0, + 0x2ca2, + 0x2ca4, + 0x2ca6, + 0x2ca8, + 0x2caa, + 0x2cac, + 0x2cae, + 0x2cb0, + 0x2cb2, + 0x2cb4, + 0x2cb6, + 0x2cb8, + 0x2cba, + 0x2cbc, + 0x2cbe, + 0x2cc0, + 0x2cc2, + 0x2cc4, + 0x2cc6, + 0x2cc8, + 0x2cca, + 0x2ccc, + 0x2cce, + 0x2cd0, + 0x2cd2, + 0x2cd4, + 0x2cd6, + 0x2cd8, + 0x2cda, + 0x2cdc, + 0x2cde, + 0x2ce0, + 0x2ce2, + 0x2ceb, + 0x2ced, + 0x2cf2, + 0xa640, + 0xa642, + 0xa644, + 0xa646, + 0xa648, + 0xa64a, + 0xa64c, + 0xa64e, + 0xa650, + 0xa652, + 0xa654, + 0xa656, + 0xa658, + 0xa65a, + 0xa65c, + 0xa65e, + 0xa660, + 0xa662, + 0xa664, + 0xa666, + 0xa668, + 0xa66a, + 0xa66c, + 0xa680, + 0xa682, + 0xa684, + 0xa686, + 0xa688, + 0xa68a, + 0xa68c, + 0xa68e, + 0xa690, + 0xa692, + 0xa694, + 0xa696, + 0xa698, + 0xa69a, + 0xa722, + 0xa724, + 0xa726, + 0xa728, + 0xa72a, + 0xa72c, + 0xa72e, + 0xa732, + 0xa734, + 0xa736, + 0xa738, + 0xa73a, + 0xa73c, + 0xa73e, + 0xa740, + 0xa742, + 0xa744, + 0xa746, + 0xa748, + 0xa74a, + 0xa74c, + 0xa74e, + 0xa750, + 0xa752, + 0xa754, + 0xa756, + 0xa758, + 0xa75a, + 0xa75c, + 0xa75e, + 0xa760, + 0xa762, + 0xa764, + 0xa766, + 0xa768, + 0xa76a, + 0xa76c, + 0xa76e, + 0xa779, + 0xa77b, + 0xa780, + 0xa782, + 0xa784, + 0xa786, + 0xa78b, + 0xa78d, + 0xa790, + 0xa792, + 0xa796, + 0xa798, + 0xa79a, + 0xa79c, + 0xa79e, + 0xa7a0, + 0xa7a2, + 0xa7a4, + 0xa7a6, + 0xa7a8, + 0x1d49c, + 0x1d4a2, + 0x1d546, + 0x1d7ca, +}; + +bool isupper(char32_t c) noexcept +{ + char32_t *p; + + p = rbsearch(c, isupperr, nelem (isupperr)/2, 2); + if (p && c >= p[0] && c <= p[1]) + return true; + + p = rbsearch(c, isuppers, nelem (isuppers), 1); + if (p && c == p[0]) + return true; + + return false; +} + +static char32_t islowerr[] = { + 0x0061, 0x007a, + 0x00df, 0x00f6, + 0x00f8, 0x00ff, + 0x0137, 0x0138, + 0x0148, 0x0149, + 0x017e, 0x0180, + 0x018c, 0x018d, + 0x0199, 0x019b, + 0x01aa, 0x01ab, + 0x01b9, 0x01ba, + 0x01bd, 0x01bf, + 0x01dc, 0x01dd, + 0x01ef, 0x01f0, + 0x0233, 0x0239, + 0x023f, 0x0240, + 0x024f, 0x0293, + 0x0295, 0x02af, + 0x037b, 0x037d, + 0x03ac, 0x03ce, + 0x03d0, 0x03d1, + 0x03d5, 0x03d7, + 0x03ef, 0x03f3, + 0x03fb, 0x03fc, + 0x0430, 0x045f, + 0x04ce, 0x04cf, + 0x0561, 0x0587, + 0x1d00, 0x1d2b, + 0x1d6b, 0x1d77, + 0x1d79, 0x1d9a, + 0x1e95, 0x1e9d, + 0x1eff, 0x1f07, + 0x1f10, 0x1f15, + 0x1f20, 0x1f27, + 0x1f30, 0x1f37, + 0x1f40, 0x1f45, + 0x1f50, 0x1f57, + 0x1f60, 0x1f67, + 0x1f70, 0x1f7d, + 0x1f80, 0x1f87, + 0x1f90, 0x1f97, + 0x1fa0, 0x1fa7, + 0x1fb0, 0x1fb4, + 0x1fb6, 0x1fb7, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fc7, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fd7, + 0x1fe0, 0x1fe7, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ff7, + 0x210e, 0x210f, + 0x213c, 0x213d, + 0x2146, 0x2149, + 0x2170, 0x217f, + 0x24d0, 0x24e9, + 0x2c30, 0x2c5e, + 0x2c65, 0x2c66, + 0x2c73, 0x2c74, + 0x2c76, 0x2c7b, + 0x2ce3, 0x2ce4, + 0x2d00, 0x2d25, + 0xa72f, 0xa731, + 0xa771, 0xa778, + 0xa793, 0xa795, + 0xab30, 0xab5a, + 0xab64, 0xab65, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff41, 0xff5a, + 0x10428, 0x1044f, + 0x118c0, 0x118df, + 0x1d41a, 0x1d433, + 0x1d44e, 0x1d454, + 0x1d456, 0x1d467, + 0x1d482, 0x1d49b, + 0x1d4b6, 0x1d4b9, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d4cf, + 0x1d4ea, 0x1d503, + 0x1d51e, 0x1d537, + 0x1d552, 0x1d56b, + 0x1d586, 0x1d59f, + 0x1d5ba, 0x1d5d3, + 0x1d5ee, 0x1d607, + 0x1d622, 0x1d63b, + 0x1d656, 0x1d66f, + 0x1d68a, 0x1d6a5, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6e1, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d71b, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d755, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d78f, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, +}; + +static char32_t islowers[] = { + 0x00b5, + 0x0101, + 0x0103, + 0x0105, + 0x0107, + 0x0109, + 0x010b, + 0x010d, + 0x010f, + 0x0111, + 0x0113, + 0x0115, + 0x0117, + 0x0119, + 0x011b, + 0x011d, + 0x011f, + 0x0121, + 0x0123, + 0x0125, + 0x0127, + 0x0129, + 0x012b, + 0x012d, + 0x012f, + 0x0131, + 0x0133, + 0x0135, + 0x013a, + 0x013c, + 0x013e, + 0x0140, + 0x0142, + 0x0144, + 0x0146, + 0x014b, + 0x014d, + 0x014f, + 0x0151, + 0x0153, + 0x0155, + 0x0157, + 0x0159, + 0x015b, + 0x015d, + 0x015f, + 0x0161, + 0x0163, + 0x0165, + 0x0167, + 0x0169, + 0x016b, + 0x016d, + 0x016f, + 0x0171, + 0x0173, + 0x0175, + 0x0177, + 0x017a, + 0x017c, + 0x0183, + 0x0185, + 0x0188, + 0x0192, + 0x0195, + 0x019e, + 0x01a1, + 0x01a3, + 0x01a5, + 0x01a8, + 0x01ad, + 0x01b0, + 0x01b4, + 0x01b6, + 0x01c6, + 0x01c9, + 0x01cc, + 0x01ce, + 0x01d0, + 0x01d2, + 0x01d4, + 0x01d6, + 0x01d8, + 0x01da, + 0x01df, + 0x01e1, + 0x01e3, + 0x01e5, + 0x01e7, + 0x01e9, + 0x01eb, + 0x01ed, + 0x01f3, + 0x01f5, + 0x01f9, + 0x01fb, + 0x01fd, + 0x01ff, + 0x0201, + 0x0203, + 0x0205, + 0x0207, + 0x0209, + 0x020b, + 0x020d, + 0x020f, + 0x0211, + 0x0213, + 0x0215, + 0x0217, + 0x0219, + 0x021b, + 0x021d, + 0x021f, + 0x0221, + 0x0223, + 0x0225, + 0x0227, + 0x0229, + 0x022b, + 0x022d, + 0x022f, + 0x0231, + 0x023c, + 0x0242, + 0x0247, + 0x0249, + 0x024b, + 0x024d, + 0x0371, + 0x0373, + 0x0377, + 0x0390, + 0x03d9, + 0x03db, + 0x03dd, + 0x03df, + 0x03e1, + 0x03e3, + 0x03e5, + 0x03e7, + 0x03e9, + 0x03eb, + 0x03ed, + 0x03f5, + 0x03f8, + 0x0461, + 0x0463, + 0x0465, + 0x0467, + 0x0469, + 0x046b, + 0x046d, + 0x046f, + 0x0471, + 0x0473, + 0x0475, + 0x0477, + 0x0479, + 0x047b, + 0x047d, + 0x047f, + 0x0481, + 0x048b, + 0x048d, + 0x048f, + 0x0491, + 0x0493, + 0x0495, + 0x0497, + 0x0499, + 0x049b, + 0x049d, + 0x049f, + 0x04a1, + 0x04a3, + 0x04a5, + 0x04a7, + 0x04a9, + 0x04ab, + 0x04ad, + 0x04af, + 0x04b1, + 0x04b3, + 0x04b5, + 0x04b7, + 0x04b9, + 0x04bb, + 0x04bd, + 0x04bf, + 0x04c2, + 0x04c4, + 0x04c6, + 0x04c8, + 0x04ca, + 0x04cc, + 0x04d1, + 0x04d3, + 0x04d5, + 0x04d7, + 0x04d9, + 0x04db, + 0x04dd, + 0x04df, + 0x04e1, + 0x04e3, + 0x04e5, + 0x04e7, + 0x04e9, + 0x04eb, + 0x04ed, + 0x04ef, + 0x04f1, + 0x04f3, + 0x04f5, + 0x04f7, + 0x04f9, + 0x04fb, + 0x04fd, + 0x04ff, + 0x0501, + 0x0503, + 0x0505, + 0x0507, + 0x0509, + 0x050b, + 0x050d, + 0x050f, + 0x0511, + 0x0513, + 0x0515, + 0x0517, + 0x0519, + 0x051b, + 0x051d, + 0x051f, + 0x0521, + 0x0523, + 0x0525, + 0x0527, + 0x0529, + 0x052b, + 0x052d, + 0x052f, + 0x1e01, + 0x1e03, + 0x1e05, + 0x1e07, + 0x1e09, + 0x1e0b, + 0x1e0d, + 0x1e0f, + 0x1e11, + 0x1e13, + 0x1e15, + 0x1e17, + 0x1e19, + 0x1e1b, + 0x1e1d, + 0x1e1f, + 0x1e21, + 0x1e23, + 0x1e25, + 0x1e27, + 0x1e29, + 0x1e2b, + 0x1e2d, + 0x1e2f, + 0x1e31, + 0x1e33, + 0x1e35, + 0x1e37, + 0x1e39, + 0x1e3b, + 0x1e3d, + 0x1e3f, + 0x1e41, + 0x1e43, + 0x1e45, + 0x1e47, + 0x1e49, + 0x1e4b, + 0x1e4d, + 0x1e4f, + 0x1e51, + 0x1e53, + 0x1e55, + 0x1e57, + 0x1e59, + 0x1e5b, + 0x1e5d, + 0x1e5f, + 0x1e61, + 0x1e63, + 0x1e65, + 0x1e67, + 0x1e69, + 0x1e6b, + 0x1e6d, + 0x1e6f, + 0x1e71, + 0x1e73, + 0x1e75, + 0x1e77, + 0x1e79, + 0x1e7b, + 0x1e7d, + 0x1e7f, + 0x1e81, + 0x1e83, + 0x1e85, + 0x1e87, + 0x1e89, + 0x1e8b, + 0x1e8d, + 0x1e8f, + 0x1e91, + 0x1e93, + 0x1e9f, + 0x1ea1, + 0x1ea3, + 0x1ea5, + 0x1ea7, + 0x1ea9, + 0x1eab, + 0x1ead, + 0x1eaf, + 0x1eb1, + 0x1eb3, + 0x1eb5, + 0x1eb7, + 0x1eb9, + 0x1ebb, + 0x1ebd, + 0x1ebf, + 0x1ec1, + 0x1ec3, + 0x1ec5, + 0x1ec7, + 0x1ec9, + 0x1ecb, + 0x1ecd, + 0x1ecf, + 0x1ed1, + 0x1ed3, + 0x1ed5, + 0x1ed7, + 0x1ed9, + 0x1edb, + 0x1edd, + 0x1edf, + 0x1ee1, + 0x1ee3, + 0x1ee5, + 0x1ee7, + 0x1ee9, + 0x1eeb, + 0x1eed, + 0x1eef, + 0x1ef1, + 0x1ef3, + 0x1ef5, + 0x1ef7, + 0x1ef9, + 0x1efb, + 0x1efd, + 0x1fbe, + 0x210a, + 0x2113, + 0x212f, + 0x2134, + 0x2139, + 0x214e, + 0x2184, + 0x2c61, + 0x2c68, + 0x2c6a, + 0x2c6c, + 0x2c71, + 0x2c81, + 0x2c83, + 0x2c85, + 0x2c87, + 0x2c89, + 0x2c8b, + 0x2c8d, + 0x2c8f, + 0x2c91, + 0x2c93, + 0x2c95, + 0x2c97, + 0x2c99, + 0x2c9b, + 0x2c9d, + 0x2c9f, + 0x2ca1, + 0x2ca3, + 0x2ca5, + 0x2ca7, + 0x2ca9, + 0x2cab, + 0x2cad, + 0x2caf, + 0x2cb1, + 0x2cb3, + 0x2cb5, + 0x2cb7, + 0x2cb9, + 0x2cbb, + 0x2cbd, + 0x2cbf, + 0x2cc1, + 0x2cc3, + 0x2cc5, + 0x2cc7, + 0x2cc9, + 0x2ccb, + 0x2ccd, + 0x2ccf, + 0x2cd1, + 0x2cd3, + 0x2cd5, + 0x2cd7, + 0x2cd9, + 0x2cdb, + 0x2cdd, + 0x2cdf, + 0x2ce1, + 0x2cec, + 0x2cee, + 0x2cf3, + 0x2d27, + 0x2d2d, + 0xa641, + 0xa643, + 0xa645, + 0xa647, + 0xa649, + 0xa64b, + 0xa64d, + 0xa64f, + 0xa651, + 0xa653, + 0xa655, + 0xa657, + 0xa659, + 0xa65b, + 0xa65d, + 0xa65f, + 0xa661, + 0xa663, + 0xa665, + 0xa667, + 0xa669, + 0xa66b, + 0xa66d, + 0xa681, + 0xa683, + 0xa685, + 0xa687, + 0xa689, + 0xa68b, + 0xa68d, + 0xa68f, + 0xa691, + 0xa693, + 0xa695, + 0xa697, + 0xa699, + 0xa69b, + 0xa723, + 0xa725, + 0xa727, + 0xa729, + 0xa72b, + 0xa72d, + 0xa733, + 0xa735, + 0xa737, + 0xa739, + 0xa73b, + 0xa73d, + 0xa73f, + 0xa741, + 0xa743, + 0xa745, + 0xa747, + 0xa749, + 0xa74b, + 0xa74d, + 0xa74f, + 0xa751, + 0xa753, + 0xa755, + 0xa757, + 0xa759, + 0xa75b, + 0xa75d, + 0xa75f, + 0xa761, + 0xa763, + 0xa765, + 0xa767, + 0xa769, + 0xa76b, + 0xa76d, + 0xa76f, + 0xa77a, + 0xa77c, + 0xa77f, + 0xa781, + 0xa783, + 0xa785, + 0xa787, + 0xa78c, + 0xa78e, + 0xa791, + 0xa797, + 0xa799, + 0xa79b, + 0xa79d, + 0xa79f, + 0xa7a1, + 0xa7a3, + 0xa7a5, + 0xa7a7, + 0xa7a9, + 0xa7fa, + 0x1d4bb, + 0x1d7cb, +}; + +bool islower(char32_t c) noexcept +{ + char32_t *p; + + p = rbsearch(c, islowerr, nelem (islowerr)/2, 2); + if (p && c >= p[0] && c <= p[1]) + return true; + + p = rbsearch(c, islowers, nelem (islowers), 1); + if (p && c == p[0]) + return true; + + return false; +} + +static char32_t istitler[] = { + 0x0041, 0x005a, + 0x00c0, 0x00d6, + 0x00d8, 0x00de, + 0x0178, 0x0179, + 0x0181, 0x0182, + 0x0186, 0x0187, + 0x0189, 0x018b, + 0x018e, 0x0191, + 0x0193, 0x0194, + 0x0196, 0x0198, + 0x019c, 0x019d, + 0x019f, 0x01a0, + 0x01a6, 0x01a7, + 0x01ae, 0x01af, + 0x01b1, 0x01b3, + 0x01b7, 0x01b8, + 0x01f6, 0x01f8, + 0x023a, 0x023b, + 0x023d, 0x023e, + 0x0243, 0x0246, + 0x0388, 0x038a, + 0x038e, 0x038f, + 0x0391, 0x03a1, + 0x03a3, 0x03ab, + 0x03f9, 0x03fa, + 0x03fd, 0x042f, + 0x04c0, 0x04c1, + 0x0531, 0x0556, + 0x10a0, 0x10c5, + 0x1f08, 0x1f0f, + 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, + 0x1f38, 0x1f3f, + 0x1f48, 0x1f4d, + 0x1f68, 0x1f6f, + 0x1f88, 0x1f8f, + 0x1f98, 0x1f9f, + 0x1fa8, 0x1faf, + 0x1fb8, 0x1fbc, + 0x1fc8, 0x1fcc, + 0x1fd8, 0x1fdb, + 0x1fe8, 0x1fec, + 0x1ff8, 0x1ffc, + 0x2160, 0x216f, + 0x24b6, 0x24cf, + 0x2c00, 0x2c2e, + 0x2c62, 0x2c64, + 0x2c6d, 0x2c70, + 0x2c7e, 0x2c80, + 0xa77d, 0xa77e, + 0xa7aa, 0xa7ad, + 0xa7b0, 0xa7b1, + 0xff21, 0xff3a, + 0x10400, 0x10427, + 0x118a0, 0x118bf, +}; + +static char32_t istitles[] = { + 0x0100, + 0x0102, + 0x0104, + 0x0106, + 0x0108, + 0x010a, + 0x010c, + 0x010e, + 0x0110, + 0x0112, + 0x0114, + 0x0116, + 0x0118, + 0x011a, + 0x011c, + 0x011e, + 0x0120, + 0x0122, + 0x0124, + 0x0126, + 0x0128, + 0x012a, + 0x012c, + 0x012e, + 0x0132, + 0x0134, + 0x0136, + 0x0139, + 0x013b, + 0x013d, + 0x013f, + 0x0141, + 0x0143, + 0x0145, + 0x0147, + 0x014a, + 0x014c, + 0x014e, + 0x0150, + 0x0152, + 0x0154, + 0x0156, + 0x0158, + 0x015a, + 0x015c, + 0x015e, + 0x0160, + 0x0162, + 0x0164, + 0x0166, + 0x0168, + 0x016a, + 0x016c, + 0x016e, + 0x0170, + 0x0172, + 0x0174, + 0x0176, + 0x017b, + 0x017d, + 0x0184, + 0x01a2, + 0x01a4, + 0x01a9, + 0x01ac, + 0x01b5, + 0x01bc, + 0x01c5, + 0x01c8, + 0x01cb, + 0x01cd, + 0x01cf, + 0x01d1, + 0x01d3, + 0x01d5, + 0x01d7, + 0x01d9, + 0x01db, + 0x01de, + 0x01e0, + 0x01e2, + 0x01e4, + 0x01e6, + 0x01e8, + 0x01ea, + 0x01ec, + 0x01ee, + 0x01f2, + 0x01f4, + 0x01fa, + 0x01fc, + 0x01fe, + 0x0200, + 0x0202, + 0x0204, + 0x0206, + 0x0208, + 0x020a, + 0x020c, + 0x020e, + 0x0210, + 0x0212, + 0x0214, + 0x0216, + 0x0218, + 0x021a, + 0x021c, + 0x021e, + 0x0220, + 0x0222, + 0x0224, + 0x0226, + 0x0228, + 0x022a, + 0x022c, + 0x022e, + 0x0230, + 0x0232, + 0x0241, + 0x0248, + 0x024a, + 0x024c, + 0x024e, + 0x0370, + 0x0372, + 0x0376, + 0x037f, + 0x0386, + 0x038c, + 0x03cf, + 0x03d8, + 0x03da, + 0x03dc, + 0x03de, + 0x03e0, + 0x03e2, + 0x03e4, + 0x03e6, + 0x03e8, + 0x03ea, + 0x03ec, + 0x03ee, + 0x03f7, + 0x0460, + 0x0462, + 0x0464, + 0x0466, + 0x0468, + 0x046a, + 0x046c, + 0x046e, + 0x0470, + 0x0472, + 0x0474, + 0x0476, + 0x0478, + 0x047a, + 0x047c, + 0x047e, + 0x0480, + 0x048a, + 0x048c, + 0x048e, + 0x0490, + 0x0492, + 0x0494, + 0x0496, + 0x0498, + 0x049a, + 0x049c, + 0x049e, + 0x04a0, + 0x04a2, + 0x04a4, + 0x04a6, + 0x04a8, + 0x04aa, + 0x04ac, + 0x04ae, + 0x04b0, + 0x04b2, + 0x04b4, + 0x04b6, + 0x04b8, + 0x04ba, + 0x04bc, + 0x04be, + 0x04c3, + 0x04c5, + 0x04c7, + 0x04c9, + 0x04cb, + 0x04cd, + 0x04d0, + 0x04d2, + 0x04d4, + 0x04d6, + 0x04d8, + 0x04da, + 0x04dc, + 0x04de, + 0x04e0, + 0x04e2, + 0x04e4, + 0x04e6, + 0x04e8, + 0x04ea, + 0x04ec, + 0x04ee, + 0x04f0, + 0x04f2, + 0x04f4, + 0x04f6, + 0x04f8, + 0x04fa, + 0x04fc, + 0x04fe, + 0x0500, + 0x0502, + 0x0504, + 0x0506, + 0x0508, + 0x050a, + 0x050c, + 0x050e, + 0x0510, + 0x0512, + 0x0514, + 0x0516, + 0x0518, + 0x051a, + 0x051c, + 0x051e, + 0x0520, + 0x0522, + 0x0524, + 0x0526, + 0x0528, + 0x052a, + 0x052c, + 0x052e, + 0x10c7, + 0x10cd, + 0x1e00, + 0x1e02, + 0x1e04, + 0x1e06, + 0x1e08, + 0x1e0a, + 0x1e0c, + 0x1e0e, + 0x1e10, + 0x1e12, + 0x1e14, + 0x1e16, + 0x1e18, + 0x1e1a, + 0x1e1c, + 0x1e1e, + 0x1e20, + 0x1e22, + 0x1e24, + 0x1e26, + 0x1e28, + 0x1e2a, + 0x1e2c, + 0x1e2e, + 0x1e30, + 0x1e32, + 0x1e34, + 0x1e36, + 0x1e38, + 0x1e3a, + 0x1e3c, + 0x1e3e, + 0x1e40, + 0x1e42, + 0x1e44, + 0x1e46, + 0x1e48, + 0x1e4a, + 0x1e4c, + 0x1e4e, + 0x1e50, + 0x1e52, + 0x1e54, + 0x1e56, + 0x1e58, + 0x1e5a, + 0x1e5c, + 0x1e5e, + 0x1e60, + 0x1e62, + 0x1e64, + 0x1e66, + 0x1e68, + 0x1e6a, + 0x1e6c, + 0x1e6e, + 0x1e70, + 0x1e72, + 0x1e74, + 0x1e76, + 0x1e78, + 0x1e7a, + 0x1e7c, + 0x1e7e, + 0x1e80, + 0x1e82, + 0x1e84, + 0x1e86, + 0x1e88, + 0x1e8a, + 0x1e8c, + 0x1e8e, + 0x1e90, + 0x1e92, + 0x1e94, + 0x1ea0, + 0x1ea2, + 0x1ea4, + 0x1ea6, + 0x1ea8, + 0x1eaa, + 0x1eac, + 0x1eae, + 0x1eb0, + 0x1eb2, + 0x1eb4, + 0x1eb6, + 0x1eb8, + 0x1eba, + 0x1ebc, + 0x1ebe, + 0x1ec0, + 0x1ec2, + 0x1ec4, + 0x1ec6, + 0x1ec8, + 0x1eca, + 0x1ecc, + 0x1ece, + 0x1ed0, + 0x1ed2, + 0x1ed4, + 0x1ed6, + 0x1ed8, + 0x1eda, + 0x1edc, + 0x1ede, + 0x1ee0, + 0x1ee2, + 0x1ee4, + 0x1ee6, + 0x1ee8, + 0x1eea, + 0x1eec, + 0x1eee, + 0x1ef0, + 0x1ef2, + 0x1ef4, + 0x1ef6, + 0x1ef8, + 0x1efa, + 0x1efc, + 0x1efe, + 0x1f59, + 0x1f5b, + 0x1f5d, + 0x1f5f, + 0x2132, + 0x2183, + 0x2c60, + 0x2c67, + 0x2c69, + 0x2c6b, + 0x2c72, + 0x2c75, + 0x2c82, + 0x2c84, + 0x2c86, + 0x2c88, + 0x2c8a, + 0x2c8c, + 0x2c8e, + 0x2c90, + 0x2c92, + 0x2c94, + 0x2c96, + 0x2c98, + 0x2c9a, + 0x2c9c, + 0x2c9e, + 0x2ca0, + 0x2ca2, + 0x2ca4, + 0x2ca6, + 0x2ca8, + 0x2caa, + 0x2cac, + 0x2cae, + 0x2cb0, + 0x2cb2, + 0x2cb4, + 0x2cb6, + 0x2cb8, + 0x2cba, + 0x2cbc, + 0x2cbe, + 0x2cc0, + 0x2cc2, + 0x2cc4, + 0x2cc6, + 0x2cc8, + 0x2cca, + 0x2ccc, + 0x2cce, + 0x2cd0, + 0x2cd2, + 0x2cd4, + 0x2cd6, + 0x2cd8, + 0x2cda, + 0x2cdc, + 0x2cde, + 0x2ce0, + 0x2ce2, + 0x2ceb, + 0x2ced, + 0x2cf2, + 0xa640, + 0xa642, + 0xa644, + 0xa646, + 0xa648, + 0xa64a, + 0xa64c, + 0xa64e, + 0xa650, + 0xa652, + 0xa654, + 0xa656, + 0xa658, + 0xa65a, + 0xa65c, + 0xa65e, + 0xa660, + 0xa662, + 0xa664, + 0xa666, + 0xa668, + 0xa66a, + 0xa66c, + 0xa680, + 0xa682, + 0xa684, + 0xa686, + 0xa688, + 0xa68a, + 0xa68c, + 0xa68e, + 0xa690, + 0xa692, + 0xa694, + 0xa696, + 0xa698, + 0xa69a, + 0xa722, + 0xa724, + 0xa726, + 0xa728, + 0xa72a, + 0xa72c, + 0xa72e, + 0xa732, + 0xa734, + 0xa736, + 0xa738, + 0xa73a, + 0xa73c, + 0xa73e, + 0xa740, + 0xa742, + 0xa744, + 0xa746, + 0xa748, + 0xa74a, + 0xa74c, + 0xa74e, + 0xa750, + 0xa752, + 0xa754, + 0xa756, + 0xa758, + 0xa75a, + 0xa75c, + 0xa75e, + 0xa760, + 0xa762, + 0xa764, + 0xa766, + 0xa768, + 0xa76a, + 0xa76c, + 0xa76e, + 0xa779, + 0xa77b, + 0xa780, + 0xa782, + 0xa784, + 0xa786, + 0xa78b, + 0xa78d, + 0xa790, + 0xa792, + 0xa796, + 0xa798, + 0xa79a, + 0xa79c, + 0xa79e, + 0xa7a0, + 0xa7a2, + 0xa7a4, + 0xa7a6, + 0xa7a8, +}; + +bool istitle(char32_t c) noexcept +{ + char32_t *p; + + p = rbsearch(c, istitler, nelem (istitler)/2, 2); + if (p && c >= p[0] && c <= p[1]) + return true; + + p = rbsearch(c, istitles, nelem (istitles), 1); + if (p && c == p[0]) + return true; + + return false; +} + +char32_t toupperr[] = { + 0x0061, 0x007a, 1048544, + 0x00e0, 0x00f6, 1048544, + 0x00f8, 0x00fe, 1048544, + 0x023f, 0x0240, 1059391, + 0x0256, 0x0257, 1048371, + 0x028a, 0x028b, 1048359, + 0x037b, 0x037d, 1048706, + 0x03ad, 0x03af, 1048539, + 0x03b1, 0x03c1, 1048544, + 0x03c3, 0x03cb, 1048544, + 0x03cd, 0x03ce, 1048513, + 0x0430, 0x044f, 1048544, + 0x0450, 0x045f, 1048496, + 0x0561, 0x0586, 1048528, + 0x1f00, 0x1f07, 1048584, + 0x1f10, 0x1f15, 1048584, + 0x1f20, 0x1f27, 1048584, + 0x1f30, 0x1f37, 1048584, + 0x1f40, 0x1f45, 1048584, + 0x1f60, 0x1f67, 1048584, + 0x1f70, 0x1f71, 1048650, + 0x1f72, 0x1f75, 1048662, + 0x1f76, 0x1f77, 1048676, + 0x1f78, 0x1f79, 1048704, + 0x1f7a, 0x1f7b, 1048688, + 0x1f7c, 0x1f7d, 1048702, + 0x1f80, 0x1f87, 1048584, + 0x1f90, 0x1f97, 1048584, + 0x1fa0, 0x1fa7, 1048584, + 0x1fb0, 0x1fb1, 1048584, + 0x1fd0, 0x1fd1, 1048584, + 0x1fe0, 0x1fe1, 1048584, + 0x2170, 0x217f, 1048560, + 0x24d0, 0x24e9, 1048550, + 0x2c30, 0x2c5e, 1048528, + 0x2d00, 0x2d25, 1041312, + 0xff41, 0xff5a, 1048544, + 0x10428, 0x1044f, 1048536, + 0x118c0, 0x118df, 1048544, +}; + +static char32_t touppers[] = { + 0x00b5, 1049319, + 0x00ff, 1048697, + 0x0101, 1048575, + 0x0103, 1048575, + 0x0105, 1048575, + 0x0107, 1048575, + 0x0109, 1048575, + 0x010b, 1048575, + 0x010d, 1048575, + 0x010f, 1048575, + 0x0111, 1048575, + 0x0113, 1048575, + 0x0115, 1048575, + 0x0117, 1048575, + 0x0119, 1048575, + 0x011b, 1048575, + 0x011d, 1048575, + 0x011f, 1048575, + 0x0121, 1048575, + 0x0123, 1048575, + 0x0125, 1048575, + 0x0127, 1048575, + 0x0129, 1048575, + 0x012b, 1048575, + 0x012d, 1048575, + 0x012f, 1048575, + 0x0131, 1048344, + 0x0133, 1048575, + 0x0135, 1048575, + 0x0137, 1048575, + 0x013a, 1048575, + 0x013c, 1048575, + 0x013e, 1048575, + 0x0140, 1048575, + 0x0142, 1048575, + 0x0144, 1048575, + 0x0146, 1048575, + 0x0148, 1048575, + 0x014b, 1048575, + 0x014d, 1048575, + 0x014f, 1048575, + 0x0151, 1048575, + 0x0153, 1048575, + 0x0155, 1048575, + 0x0157, 1048575, + 0x0159, 1048575, + 0x015b, 1048575, + 0x015d, 1048575, + 0x015f, 1048575, + 0x0161, 1048575, + 0x0163, 1048575, + 0x0165, 1048575, + 0x0167, 1048575, + 0x0169, 1048575, + 0x016b, 1048575, + 0x016d, 1048575, + 0x016f, 1048575, + 0x0171, 1048575, + 0x0173, 1048575, + 0x0175, 1048575, + 0x0177, 1048575, + 0x017a, 1048575, + 0x017c, 1048575, + 0x017e, 1048575, + 0x017f, 1048276, + 0x0180, 1048771, + 0x0183, 1048575, + 0x0185, 1048575, + 0x0188, 1048575, + 0x018c, 1048575, + 0x0192, 1048575, + 0x0195, 1048673, + 0x0199, 1048575, + 0x019a, 1048739, + 0x019e, 1048706, + 0x01a1, 1048575, + 0x01a3, 1048575, + 0x01a5, 1048575, + 0x01a8, 1048575, + 0x01ad, 1048575, + 0x01b0, 1048575, + 0x01b4, 1048575, + 0x01b6, 1048575, + 0x01b9, 1048575, + 0x01bd, 1048575, + 0x01bf, 1048632, + 0x01c5, 1048575, + 0x01c6, 1048574, + 0x01c8, 1048575, + 0x01c9, 1048574, + 0x01cb, 1048575, + 0x01cc, 1048574, + 0x01ce, 1048575, + 0x01d0, 1048575, + 0x01d2, 1048575, + 0x01d4, 1048575, + 0x01d6, 1048575, + 0x01d8, 1048575, + 0x01da, 1048575, + 0x01dc, 1048575, + 0x01dd, 1048497, + 0x01df, 1048575, + 0x01e1, 1048575, + 0x01e3, 1048575, + 0x01e5, 1048575, + 0x01e7, 1048575, + 0x01e9, 1048575, + 0x01eb, 1048575, + 0x01ed, 1048575, + 0x01ef, 1048575, + 0x01f2, 1048575, + 0x01f3, 1048574, + 0x01f5, 1048575, + 0x01f9, 1048575, + 0x01fb, 1048575, + 0x01fd, 1048575, + 0x01ff, 1048575, + 0x0201, 1048575, + 0x0203, 1048575, + 0x0205, 1048575, + 0x0207, 1048575, + 0x0209, 1048575, + 0x020b, 1048575, + 0x020d, 1048575, + 0x020f, 1048575, + 0x0211, 1048575, + 0x0213, 1048575, + 0x0215, 1048575, + 0x0217, 1048575, + 0x0219, 1048575, + 0x021b, 1048575, + 0x021d, 1048575, + 0x021f, 1048575, + 0x0223, 1048575, + 0x0225, 1048575, + 0x0227, 1048575, + 0x0229, 1048575, + 0x022b, 1048575, + 0x022d, 1048575, + 0x022f, 1048575, + 0x0231, 1048575, + 0x0233, 1048575, + 0x023c, 1048575, + 0x0242, 1048575, + 0x0247, 1048575, + 0x0249, 1048575, + 0x024b, 1048575, + 0x024d, 1048575, + 0x024f, 1048575, + 0x0250, 1059359, + 0x0251, 1059356, + 0x0252, 1059358, + 0x0253, 1048366, + 0x0254, 1048370, + 0x0259, 1048374, + 0x025b, 1048373, + 0x025c, 1090895, + 0x0260, 1048371, + 0x0261, 1090891, + 0x0263, 1048369, + 0x0265, 1090856, + 0x0266, 1090884, + 0x0268, 1048367, + 0x0269, 1048365, + 0x026b, 1059319, + 0x026c, 1090881, + 0x026f, 1048365, + 0x0271, 1059325, + 0x0272, 1048363, + 0x0275, 1048362, + 0x027d, 1059303, + 0x0280, 1048358, + 0x0283, 1048358, + 0x0287, 1090858, + 0x0288, 1048358, + 0x0289, 1048507, + 0x028c, 1048505, + 0x0292, 1048357, + 0x029e, 1090834, + 0x0345, 1048660, + 0x0371, 1048575, + 0x0373, 1048575, + 0x0377, 1048575, + 0x03ac, 1048538, + 0x03c2, 1048545, + 0x03cc, 1048512, + 0x03d0, 1048514, + 0x03d1, 1048519, + 0x03d5, 1048529, + 0x03d6, 1048522, + 0x03d7, 1048568, + 0x03d9, 1048575, + 0x03db, 1048575, + 0x03dd, 1048575, + 0x03df, 1048575, + 0x03e1, 1048575, + 0x03e3, 1048575, + 0x03e5, 1048575, + 0x03e7, 1048575, + 0x03e9, 1048575, + 0x03eb, 1048575, + 0x03ed, 1048575, + 0x03ef, 1048575, + 0x03f0, 1048490, + 0x03f1, 1048496, + 0x03f2, 1048583, + 0x03f3, 1048460, + 0x03f5, 1048480, + 0x03f8, 1048575, + 0x03fb, 1048575, + 0x0461, 1048575, + 0x0463, 1048575, + 0x0465, 1048575, + 0x0467, 1048575, + 0x0469, 1048575, + 0x046b, 1048575, + 0x046d, 1048575, + 0x046f, 1048575, + 0x0471, 1048575, + 0x0473, 1048575, + 0x0475, 1048575, + 0x0477, 1048575, + 0x0479, 1048575, + 0x047b, 1048575, + 0x047d, 1048575, + 0x047f, 1048575, + 0x0481, 1048575, + 0x048b, 1048575, + 0x048d, 1048575, + 0x048f, 1048575, + 0x0491, 1048575, + 0x0493, 1048575, + 0x0495, 1048575, + 0x0497, 1048575, + 0x0499, 1048575, + 0x049b, 1048575, + 0x049d, 1048575, + 0x049f, 1048575, + 0x04a1, 1048575, + 0x04a3, 1048575, + 0x04a5, 1048575, + 0x04a7, 1048575, + 0x04a9, 1048575, + 0x04ab, 1048575, + 0x04ad, 1048575, + 0x04af, 1048575, + 0x04b1, 1048575, + 0x04b3, 1048575, + 0x04b5, 1048575, + 0x04b7, 1048575, + 0x04b9, 1048575, + 0x04bb, 1048575, + 0x04bd, 1048575, + 0x04bf, 1048575, + 0x04c2, 1048575, + 0x04c4, 1048575, + 0x04c6, 1048575, + 0x04c8, 1048575, + 0x04ca, 1048575, + 0x04cc, 1048575, + 0x04ce, 1048575, + 0x04cf, 1048561, + 0x04d1, 1048575, + 0x04d3, 1048575, + 0x04d5, 1048575, + 0x04d7, 1048575, + 0x04d9, 1048575, + 0x04db, 1048575, + 0x04dd, 1048575, + 0x04df, 1048575, + 0x04e1, 1048575, + 0x04e3, 1048575, + 0x04e5, 1048575, + 0x04e7, 1048575, + 0x04e9, 1048575, + 0x04eb, 1048575, + 0x04ed, 1048575, + 0x04ef, 1048575, + 0x04f1, 1048575, + 0x04f3, 1048575, + 0x04f5, 1048575, + 0x04f7, 1048575, + 0x04f9, 1048575, + 0x04fb, 1048575, + 0x04fd, 1048575, + 0x04ff, 1048575, + 0x0501, 1048575, + 0x0503, 1048575, + 0x0505, 1048575, + 0x0507, 1048575, + 0x0509, 1048575, + 0x050b, 1048575, + 0x050d, 1048575, + 0x050f, 1048575, + 0x0511, 1048575, + 0x0513, 1048575, + 0x0515, 1048575, + 0x0517, 1048575, + 0x0519, 1048575, + 0x051b, 1048575, + 0x051d, 1048575, + 0x051f, 1048575, + 0x0521, 1048575, + 0x0523, 1048575, + 0x0525, 1048575, + 0x0527, 1048575, + 0x0529, 1048575, + 0x052b, 1048575, + 0x052d, 1048575, + 0x052f, 1048575, + 0x1d79, 1083908, + 0x1d7d, 1052390, + 0x1e01, 1048575, + 0x1e03, 1048575, + 0x1e05, 1048575, + 0x1e07, 1048575, + 0x1e09, 1048575, + 0x1e0b, 1048575, + 0x1e0d, 1048575, + 0x1e0f, 1048575, + 0x1e11, 1048575, + 0x1e13, 1048575, + 0x1e15, 1048575, + 0x1e17, 1048575, + 0x1e19, 1048575, + 0x1e1b, 1048575, + 0x1e1d, 1048575, + 0x1e1f, 1048575, + 0x1e21, 1048575, + 0x1e23, 1048575, + 0x1e25, 1048575, + 0x1e27, 1048575, + 0x1e29, 1048575, + 0x1e2b, 1048575, + 0x1e2d, 1048575, + 0x1e2f, 1048575, + 0x1e31, 1048575, + 0x1e33, 1048575, + 0x1e35, 1048575, + 0x1e37, 1048575, + 0x1e39, 1048575, + 0x1e3b, 1048575, + 0x1e3d, 1048575, + 0x1e3f, 1048575, + 0x1e41, 1048575, + 0x1e43, 1048575, + 0x1e45, 1048575, + 0x1e47, 1048575, + 0x1e49, 1048575, + 0x1e4b, 1048575, + 0x1e4d, 1048575, + 0x1e4f, 1048575, + 0x1e51, 1048575, + 0x1e53, 1048575, + 0x1e55, 1048575, + 0x1e57, 1048575, + 0x1e59, 1048575, + 0x1e5b, 1048575, + 0x1e5d, 1048575, + 0x1e5f, 1048575, + 0x1e61, 1048575, + 0x1e63, 1048575, + 0x1e65, 1048575, + 0x1e67, 1048575, + 0x1e69, 1048575, + 0x1e6b, 1048575, + 0x1e6d, 1048575, + 0x1e6f, 1048575, + 0x1e71, 1048575, + 0x1e73, 1048575, + 0x1e75, 1048575, + 0x1e77, 1048575, + 0x1e79, 1048575, + 0x1e7b, 1048575, + 0x1e7d, 1048575, + 0x1e7f, 1048575, + 0x1e81, 1048575, + 0x1e83, 1048575, + 0x1e85, 1048575, + 0x1e87, 1048575, + 0x1e89, 1048575, + 0x1e8b, 1048575, + 0x1e8d, 1048575, + 0x1e8f, 1048575, + 0x1e91, 1048575, + 0x1e93, 1048575, + 0x1e95, 1048575, + 0x1e9b, 1048517, + 0x1ea1, 1048575, + 0x1ea3, 1048575, + 0x1ea5, 1048575, + 0x1ea7, 1048575, + 0x1ea9, 1048575, + 0x1eab, 1048575, + 0x1ead, 1048575, + 0x1eaf, 1048575, + 0x1eb1, 1048575, + 0x1eb3, 1048575, + 0x1eb5, 1048575, + 0x1eb7, 1048575, + 0x1eb9, 1048575, + 0x1ebb, 1048575, + 0x1ebd, 1048575, + 0x1ebf, 1048575, + 0x1ec1, 1048575, + 0x1ec3, 1048575, + 0x1ec5, 1048575, + 0x1ec7, 1048575, + 0x1ec9, 1048575, + 0x1ecb, 1048575, + 0x1ecd, 1048575, + 0x1ecf, 1048575, + 0x1ed1, 1048575, + 0x1ed3, 1048575, + 0x1ed5, 1048575, + 0x1ed7, 1048575, + 0x1ed9, 1048575, + 0x1edb, 1048575, + 0x1edd, 1048575, + 0x1edf, 1048575, + 0x1ee1, 1048575, + 0x1ee3, 1048575, + 0x1ee5, 1048575, + 0x1ee7, 1048575, + 0x1ee9, 1048575, + 0x1eeb, 1048575, + 0x1eed, 1048575, + 0x1eef, 1048575, + 0x1ef1, 1048575, + 0x1ef3, 1048575, + 0x1ef5, 1048575, + 0x1ef7, 1048575, + 0x1ef9, 1048575, + 0x1efb, 1048575, + 0x1efd, 1048575, + 0x1eff, 1048575, + 0x1f51, 1048584, + 0x1f53, 1048584, + 0x1f55, 1048584, + 0x1f57, 1048584, + 0x1fb3, 1048585, + 0x1fbe, 1041371, + 0x1fc3, 1048585, + 0x1fe5, 1048583, + 0x1ff3, 1048585, + 0x214e, 1048548, + 0x2184, 1048575, + 0x2c61, 1048575, + 0x2c65, 1037781, + 0x2c66, 1037784, + 0x2c68, 1048575, + 0x2c6a, 1048575, + 0x2c6c, 1048575, + 0x2c73, 1048575, + 0x2c76, 1048575, + 0x2c81, 1048575, + 0x2c83, 1048575, + 0x2c85, 1048575, + 0x2c87, 1048575, + 0x2c89, 1048575, + 0x2c8b, 1048575, + 0x2c8d, 1048575, + 0x2c8f, 1048575, + 0x2c91, 1048575, + 0x2c93, 1048575, + 0x2c95, 1048575, + 0x2c97, 1048575, + 0x2c99, 1048575, + 0x2c9b, 1048575, + 0x2c9d, 1048575, + 0x2c9f, 1048575, + 0x2ca1, 1048575, + 0x2ca3, 1048575, + 0x2ca5, 1048575, + 0x2ca7, 1048575, + 0x2ca9, 1048575, + 0x2cab, 1048575, + 0x2cad, 1048575, + 0x2caf, 1048575, + 0x2cb1, 1048575, + 0x2cb3, 1048575, + 0x2cb5, 1048575, + 0x2cb7, 1048575, + 0x2cb9, 1048575, + 0x2cbb, 1048575, + 0x2cbd, 1048575, + 0x2cbf, 1048575, + 0x2cc1, 1048575, + 0x2cc3, 1048575, + 0x2cc5, 1048575, + 0x2cc7, 1048575, + 0x2cc9, 1048575, + 0x2ccb, 1048575, + 0x2ccd, 1048575, + 0x2ccf, 1048575, + 0x2cd1, 1048575, + 0x2cd3, 1048575, + 0x2cd5, 1048575, + 0x2cd7, 1048575, + 0x2cd9, 1048575, + 0x2cdb, 1048575, + 0x2cdd, 1048575, + 0x2cdf, 1048575, + 0x2ce1, 1048575, + 0x2ce3, 1048575, + 0x2cec, 1048575, + 0x2cee, 1048575, + 0x2cf3, 1048575, + 0x2d27, 1041312, + 0x2d2d, 1041312, + 0xa641, 1048575, + 0xa643, 1048575, + 0xa645, 1048575, + 0xa647, 1048575, + 0xa649, 1048575, + 0xa64b, 1048575, + 0xa64d, 1048575, + 0xa64f, 1048575, + 0xa651, 1048575, + 0xa653, 1048575, + 0xa655, 1048575, + 0xa657, 1048575, + 0xa659, 1048575, + 0xa65b, 1048575, + 0xa65d, 1048575, + 0xa65f, 1048575, + 0xa661, 1048575, + 0xa663, 1048575, + 0xa665, 1048575, + 0xa667, 1048575, + 0xa669, 1048575, + 0xa66b, 1048575, + 0xa66d, 1048575, + 0xa681, 1048575, + 0xa683, 1048575, + 0xa685, 1048575, + 0xa687, 1048575, + 0xa689, 1048575, + 0xa68b, 1048575, + 0xa68d, 1048575, + 0xa68f, 1048575, + 0xa691, 1048575, + 0xa693, 1048575, + 0xa695, 1048575, + 0xa697, 1048575, + 0xa699, 1048575, + 0xa69b, 1048575, + 0xa723, 1048575, + 0xa725, 1048575, + 0xa727, 1048575, + 0xa729, 1048575, + 0xa72b, 1048575, + 0xa72d, 1048575, + 0xa72f, 1048575, + 0xa733, 1048575, + 0xa735, 1048575, + 0xa737, 1048575, + 0xa739, 1048575, + 0xa73b, 1048575, + 0xa73d, 1048575, + 0xa73f, 1048575, + 0xa741, 1048575, + 0xa743, 1048575, + 0xa745, 1048575, + 0xa747, 1048575, + 0xa749, 1048575, + 0xa74b, 1048575, + 0xa74d, 1048575, + 0xa74f, 1048575, + 0xa751, 1048575, + 0xa753, 1048575, + 0xa755, 1048575, + 0xa757, 1048575, + 0xa759, 1048575, + 0xa75b, 1048575, + 0xa75d, 1048575, + 0xa75f, 1048575, + 0xa761, 1048575, + 0xa763, 1048575, + 0xa765, 1048575, + 0xa767, 1048575, + 0xa769, 1048575, + 0xa76b, 1048575, + 0xa76d, 1048575, + 0xa76f, 1048575, + 0xa77a, 1048575, + 0xa77c, 1048575, + 0xa77f, 1048575, + 0xa781, 1048575, + 0xa783, 1048575, + 0xa785, 1048575, + 0xa787, 1048575, + 0xa78c, 1048575, + 0xa791, 1048575, + 0xa793, 1048575, + 0xa797, 1048575, + 0xa799, 1048575, + 0xa79b, 1048575, + 0xa79d, 1048575, + 0xa79f, 1048575, + 0xa7a1, 1048575, + 0xa7a3, 1048575, + 0xa7a5, 1048575, + 0xa7a7, 1048575, + 0xa7a9, 1048575, +}; + +char32_t toupper(char32_t c) noexcept +{ + char32_t *p; + + p = rbsearch(c, toupperr, nelem (toupperr)/3, 3); + if (p && c >= p[0] && c <= p[1]) + return c + p[2] - 1048576; + + p = rbsearch(c, touppers, nelem (touppers)/2, 2); + if (p && c == p[0]) + return c + p[1] - 1048576; + + return c; +} + +char32_t tolowerr[] = { + 0x0041, 0x005a, 1048608, + 0x00c0, 0x00d6, 1048608, + 0x00d8, 0x00de, 1048608, + 0x0189, 0x018a, 1048781, + 0x01b1, 0x01b2, 1048793, + 0x0388, 0x038a, 1048613, + 0x038e, 0x038f, 1048639, + 0x0391, 0x03a1, 1048608, + 0x03a3, 0x03ab, 1048608, + 0x03fd, 0x03ff, 1048446, + 0x0400, 0x040f, 1048656, + 0x0410, 0x042f, 1048608, + 0x0531, 0x0556, 1048624, + 0x10a0, 0x10c5, 1055840, + 0x1f08, 0x1f0f, 1048568, + 0x1f18, 0x1f1d, 1048568, + 0x1f28, 0x1f2f, 1048568, + 0x1f38, 0x1f3f, 1048568, + 0x1f48, 0x1f4d, 1048568, + 0x1f68, 0x1f6f, 1048568, + 0x1f88, 0x1f8f, 1048568, + 0x1f98, 0x1f9f, 1048568, + 0x1fa8, 0x1faf, 1048568, + 0x1fb8, 0x1fb9, 1048568, + 0x1fba, 0x1fbb, 1048502, + 0x1fc8, 0x1fcb, 1048490, + 0x1fd8, 0x1fd9, 1048568, + 0x1fda, 0x1fdb, 1048476, + 0x1fe8, 0x1fe9, 1048568, + 0x1fea, 0x1feb, 1048464, + 0x1ff8, 0x1ff9, 1048448, + 0x1ffa, 0x1ffb, 1048450, + 0x2160, 0x216f, 1048592, + 0x24b6, 0x24cf, 1048602, + 0x2c00, 0x2c2e, 1048624, + 0x2c7e, 0x2c7f, 1037761, + 0xff21, 0xff3a, 1048608, + 0x10400, 0x10427, 1048616, + 0x118a0, 0x118bf, 1048608, +}; + +static char32_t tolowers[] = { + 0x0100, 1048577, + 0x0102, 1048577, + 0x0104, 1048577, + 0x0106, 1048577, + 0x0108, 1048577, + 0x010a, 1048577, + 0x010c, 1048577, + 0x010e, 1048577, + 0x0110, 1048577, + 0x0112, 1048577, + 0x0114, 1048577, + 0x0116, 1048577, + 0x0118, 1048577, + 0x011a, 1048577, + 0x011c, 1048577, + 0x011e, 1048577, + 0x0120, 1048577, + 0x0122, 1048577, + 0x0124, 1048577, + 0x0126, 1048577, + 0x0128, 1048577, + 0x012a, 1048577, + 0x012c, 1048577, + 0x012e, 1048577, + 0x0130, 1048377, + 0x0132, 1048577, + 0x0134, 1048577, + 0x0136, 1048577, + 0x0139, 1048577, + 0x013b, 1048577, + 0x013d, 1048577, + 0x013f, 1048577, + 0x0141, 1048577, + 0x0143, 1048577, + 0x0145, 1048577, + 0x0147, 1048577, + 0x014a, 1048577, + 0x014c, 1048577, + 0x014e, 1048577, + 0x0150, 1048577, + 0x0152, 1048577, + 0x0154, 1048577, + 0x0156, 1048577, + 0x0158, 1048577, + 0x015a, 1048577, + 0x015c, 1048577, + 0x015e, 1048577, + 0x0160, 1048577, + 0x0162, 1048577, + 0x0164, 1048577, + 0x0166, 1048577, + 0x0168, 1048577, + 0x016a, 1048577, + 0x016c, 1048577, + 0x016e, 1048577, + 0x0170, 1048577, + 0x0172, 1048577, + 0x0174, 1048577, + 0x0176, 1048577, + 0x0178, 1048455, + 0x0179, 1048577, + 0x017b, 1048577, + 0x017d, 1048577, + 0x0181, 1048786, + 0x0182, 1048577, + 0x0184, 1048577, + 0x0186, 1048782, + 0x0187, 1048577, + 0x018b, 1048577, + 0x018e, 1048655, + 0x018f, 1048778, + 0x0190, 1048779, + 0x0191, 1048577, + 0x0193, 1048781, + 0x0194, 1048783, + 0x0196, 1048787, + 0x0197, 1048785, + 0x0198, 1048577, + 0x019c, 1048787, + 0x019d, 1048789, + 0x019f, 1048790, + 0x01a0, 1048577, + 0x01a2, 1048577, + 0x01a4, 1048577, + 0x01a6, 1048794, + 0x01a7, 1048577, + 0x01a9, 1048794, + 0x01ac, 1048577, + 0x01ae, 1048794, + 0x01af, 1048577, + 0x01b3, 1048577, + 0x01b5, 1048577, + 0x01b7, 1048795, + 0x01b8, 1048577, + 0x01bc, 1048577, + 0x01c4, 1048578, + 0x01c5, 1048577, + 0x01c7, 1048578, + 0x01c8, 1048577, + 0x01ca, 1048578, + 0x01cb, 1048577, + 0x01cd, 1048577, + 0x01cf, 1048577, + 0x01d1, 1048577, + 0x01d3, 1048577, + 0x01d5, 1048577, + 0x01d7, 1048577, + 0x01d9, 1048577, + 0x01db, 1048577, + 0x01de, 1048577, + 0x01e0, 1048577, + 0x01e2, 1048577, + 0x01e4, 1048577, + 0x01e6, 1048577, + 0x01e8, 1048577, + 0x01ea, 1048577, + 0x01ec, 1048577, + 0x01ee, 1048577, + 0x01f1, 1048578, + 0x01f2, 1048577, + 0x01f4, 1048577, + 0x01f6, 1048479, + 0x01f7, 1048520, + 0x01f8, 1048577, + 0x01fa, 1048577, + 0x01fc, 1048577, + 0x01fe, 1048577, + 0x0200, 1048577, + 0x0202, 1048577, + 0x0204, 1048577, + 0x0206, 1048577, + 0x0208, 1048577, + 0x020a, 1048577, + 0x020c, 1048577, + 0x020e, 1048577, + 0x0210, 1048577, + 0x0212, 1048577, + 0x0214, 1048577, + 0x0216, 1048577, + 0x0218, 1048577, + 0x021a, 1048577, + 0x021c, 1048577, + 0x021e, 1048577, + 0x0220, 1048446, + 0x0222, 1048577, + 0x0224, 1048577, + 0x0226, 1048577, + 0x0228, 1048577, + 0x022a, 1048577, + 0x022c, 1048577, + 0x022e, 1048577, + 0x0230, 1048577, + 0x0232, 1048577, + 0x023a, 1059371, + 0x023b, 1048577, + 0x023d, 1048413, + 0x023e, 1059368, + 0x0241, 1048577, + 0x0243, 1048381, + 0x0244, 1048645, + 0x0245, 1048647, + 0x0246, 1048577, + 0x0248, 1048577, + 0x024a, 1048577, + 0x024c, 1048577, + 0x024e, 1048577, + 0x0370, 1048577, + 0x0372, 1048577, + 0x0376, 1048577, + 0x037f, 1048692, + 0x0386, 1048614, + 0x038c, 1048640, + 0x03cf, 1048584, + 0x03d8, 1048577, + 0x03da, 1048577, + 0x03dc, 1048577, + 0x03de, 1048577, + 0x03e0, 1048577, + 0x03e2, 1048577, + 0x03e4, 1048577, + 0x03e6, 1048577, + 0x03e8, 1048577, + 0x03ea, 1048577, + 0x03ec, 1048577, + 0x03ee, 1048577, + 0x03f4, 1048516, + 0x03f7, 1048577, + 0x03f9, 1048569, + 0x03fa, 1048577, + 0x0460, 1048577, + 0x0462, 1048577, + 0x0464, 1048577, + 0x0466, 1048577, + 0x0468, 1048577, + 0x046a, 1048577, + 0x046c, 1048577, + 0x046e, 1048577, + 0x0470, 1048577, + 0x0472, 1048577, + 0x0474, 1048577, + 0x0476, 1048577, + 0x0478, 1048577, + 0x047a, 1048577, + 0x047c, 1048577, + 0x047e, 1048577, + 0x0480, 1048577, + 0x048a, 1048577, + 0x048c, 1048577, + 0x048e, 1048577, + 0x0490, 1048577, + 0x0492, 1048577, + 0x0494, 1048577, + 0x0496, 1048577, + 0x0498, 1048577, + 0x049a, 1048577, + 0x049c, 1048577, + 0x049e, 1048577, + 0x04a0, 1048577, + 0x04a2, 1048577, + 0x04a4, 1048577, + 0x04a6, 1048577, + 0x04a8, 1048577, + 0x04aa, 1048577, + 0x04ac, 1048577, + 0x04ae, 1048577, + 0x04b0, 1048577, + 0x04b2, 1048577, + 0x04b4, 1048577, + 0x04b6, 1048577, + 0x04b8, 1048577, + 0x04ba, 1048577, + 0x04bc, 1048577, + 0x04be, 1048577, + 0x04c0, 1048591, + 0x04c1, 1048577, + 0x04c3, 1048577, + 0x04c5, 1048577, + 0x04c7, 1048577, + 0x04c9, 1048577, + 0x04cb, 1048577, + 0x04cd, 1048577, + 0x04d0, 1048577, + 0x04d2, 1048577, + 0x04d4, 1048577, + 0x04d6, 1048577, + 0x04d8, 1048577, + 0x04da, 1048577, + 0x04dc, 1048577, + 0x04de, 1048577, + 0x04e0, 1048577, + 0x04e2, 1048577, + 0x04e4, 1048577, + 0x04e6, 1048577, + 0x04e8, 1048577, + 0x04ea, 1048577, + 0x04ec, 1048577, + 0x04ee, 1048577, + 0x04f0, 1048577, + 0x04f2, 1048577, + 0x04f4, 1048577, + 0x04f6, 1048577, + 0x04f8, 1048577, + 0x04fa, 1048577, + 0x04fc, 1048577, + 0x04fe, 1048577, + 0x0500, 1048577, + 0x0502, 1048577, + 0x0504, 1048577, + 0x0506, 1048577, + 0x0508, 1048577, + 0x050a, 1048577, + 0x050c, 1048577, + 0x050e, 1048577, + 0x0510, 1048577, + 0x0512, 1048577, + 0x0514, 1048577, + 0x0516, 1048577, + 0x0518, 1048577, + 0x051a, 1048577, + 0x051c, 1048577, + 0x051e, 1048577, + 0x0520, 1048577, + 0x0522, 1048577, + 0x0524, 1048577, + 0x0526, 1048577, + 0x0528, 1048577, + 0x052a, 1048577, + 0x052c, 1048577, + 0x052e, 1048577, + 0x10c7, 1055840, + 0x10cd, 1055840, + 0x1e00, 1048577, + 0x1e02, 1048577, + 0x1e04, 1048577, + 0x1e06, 1048577, + 0x1e08, 1048577, + 0x1e0a, 1048577, + 0x1e0c, 1048577, + 0x1e0e, 1048577, + 0x1e10, 1048577, + 0x1e12, 1048577, + 0x1e14, 1048577, + 0x1e16, 1048577, + 0x1e18, 1048577, + 0x1e1a, 1048577, + 0x1e1c, 1048577, + 0x1e1e, 1048577, + 0x1e20, 1048577, + 0x1e22, 1048577, + 0x1e24, 1048577, + 0x1e26, 1048577, + 0x1e28, 1048577, + 0x1e2a, 1048577, + 0x1e2c, 1048577, + 0x1e2e, 1048577, + 0x1e30, 1048577, + 0x1e32, 1048577, + 0x1e34, 1048577, + 0x1e36, 1048577, + 0x1e38, 1048577, + 0x1e3a, 1048577, + 0x1e3c, 1048577, + 0x1e3e, 1048577, + 0x1e40, 1048577, + 0x1e42, 1048577, + 0x1e44, 1048577, + 0x1e46, 1048577, + 0x1e48, 1048577, + 0x1e4a, 1048577, + 0x1e4c, 1048577, + 0x1e4e, 1048577, + 0x1e50, 1048577, + 0x1e52, 1048577, + 0x1e54, 1048577, + 0x1e56, 1048577, + 0x1e58, 1048577, + 0x1e5a, 1048577, + 0x1e5c, 1048577, + 0x1e5e, 1048577, + 0x1e60, 1048577, + 0x1e62, 1048577, + 0x1e64, 1048577, + 0x1e66, 1048577, + 0x1e68, 1048577, + 0x1e6a, 1048577, + 0x1e6c, 1048577, + 0x1e6e, 1048577, + 0x1e70, 1048577, + 0x1e72, 1048577, + 0x1e74, 1048577, + 0x1e76, 1048577, + 0x1e78, 1048577, + 0x1e7a, 1048577, + 0x1e7c, 1048577, + 0x1e7e, 1048577, + 0x1e80, 1048577, + 0x1e82, 1048577, + 0x1e84, 1048577, + 0x1e86, 1048577, + 0x1e88, 1048577, + 0x1e8a, 1048577, + 0x1e8c, 1048577, + 0x1e8e, 1048577, + 0x1e90, 1048577, + 0x1e92, 1048577, + 0x1e94, 1048577, + 0x1e9e, 1040961, + 0x1ea0, 1048577, + 0x1ea2, 1048577, + 0x1ea4, 1048577, + 0x1ea6, 1048577, + 0x1ea8, 1048577, + 0x1eaa, 1048577, + 0x1eac, 1048577, + 0x1eae, 1048577, + 0x1eb0, 1048577, + 0x1eb2, 1048577, + 0x1eb4, 1048577, + 0x1eb6, 1048577, + 0x1eb8, 1048577, + 0x1eba, 1048577, + 0x1ebc, 1048577, + 0x1ebe, 1048577, + 0x1ec0, 1048577, + 0x1ec2, 1048577, + 0x1ec4, 1048577, + 0x1ec6, 1048577, + 0x1ec8, 1048577, + 0x1eca, 1048577, + 0x1ecc, 1048577, + 0x1ece, 1048577, + 0x1ed0, 1048577, + 0x1ed2, 1048577, + 0x1ed4, 1048577, + 0x1ed6, 1048577, + 0x1ed8, 1048577, + 0x1eda, 1048577, + 0x1edc, 1048577, + 0x1ede, 1048577, + 0x1ee0, 1048577, + 0x1ee2, 1048577, + 0x1ee4, 1048577, + 0x1ee6, 1048577, + 0x1ee8, 1048577, + 0x1eea, 1048577, + 0x1eec, 1048577, + 0x1eee, 1048577, + 0x1ef0, 1048577, + 0x1ef2, 1048577, + 0x1ef4, 1048577, + 0x1ef6, 1048577, + 0x1ef8, 1048577, + 0x1efa, 1048577, + 0x1efc, 1048577, + 0x1efe, 1048577, + 0x1f59, 1048568, + 0x1f5b, 1048568, + 0x1f5d, 1048568, + 0x1f5f, 1048568, + 0x1fbc, 1048567, + 0x1fcc, 1048567, + 0x1fec, 1048569, + 0x1ffc, 1048567, + 0x2126, 1041059, + 0x212a, 1040193, + 0x212b, 1040314, + 0x2132, 1048604, + 0x2183, 1048577, + 0x2c60, 1048577, + 0x2c62, 1037833, + 0x2c63, 1044762, + 0x2c64, 1037849, + 0x2c67, 1048577, + 0x2c69, 1048577, + 0x2c6b, 1048577, + 0x2c6d, 1037796, + 0x2c6e, 1037827, + 0x2c6f, 1037793, + 0x2c70, 1037794, + 0x2c72, 1048577, + 0x2c75, 1048577, + 0x2c80, 1048577, + 0x2c82, 1048577, + 0x2c84, 1048577, + 0x2c86, 1048577, + 0x2c88, 1048577, + 0x2c8a, 1048577, + 0x2c8c, 1048577, + 0x2c8e, 1048577, + 0x2c90, 1048577, + 0x2c92, 1048577, + 0x2c94, 1048577, + 0x2c96, 1048577, + 0x2c98, 1048577, + 0x2c9a, 1048577, + 0x2c9c, 1048577, + 0x2c9e, 1048577, + 0x2ca0, 1048577, + 0x2ca2, 1048577, + 0x2ca4, 1048577, + 0x2ca6, 1048577, + 0x2ca8, 1048577, + 0x2caa, 1048577, + 0x2cac, 1048577, + 0x2cae, 1048577, + 0x2cb0, 1048577, + 0x2cb2, 1048577, + 0x2cb4, 1048577, + 0x2cb6, 1048577, + 0x2cb8, 1048577, + 0x2cba, 1048577, + 0x2cbc, 1048577, + 0x2cbe, 1048577, + 0x2cc0, 1048577, + 0x2cc2, 1048577, + 0x2cc4, 1048577, + 0x2cc6, 1048577, + 0x2cc8, 1048577, + 0x2cca, 1048577, + 0x2ccc, 1048577, + 0x2cce, 1048577, + 0x2cd0, 1048577, + 0x2cd2, 1048577, + 0x2cd4, 1048577, + 0x2cd6, 1048577, + 0x2cd8, 1048577, + 0x2cda, 1048577, + 0x2cdc, 1048577, + 0x2cde, 1048577, + 0x2ce0, 1048577, + 0x2ce2, 1048577, + 0x2ceb, 1048577, + 0x2ced, 1048577, + 0x2cf2, 1048577, + 0xa640, 1048577, + 0xa642, 1048577, + 0xa644, 1048577, + 0xa646, 1048577, + 0xa648, 1048577, + 0xa64a, 1048577, + 0xa64c, 1048577, + 0xa64e, 1048577, + 0xa650, 1048577, + 0xa652, 1048577, + 0xa654, 1048577, + 0xa656, 1048577, + 0xa658, 1048577, + 0xa65a, 1048577, + 0xa65c, 1048577, + 0xa65e, 1048577, + 0xa660, 1048577, + 0xa662, 1048577, + 0xa664, 1048577, + 0xa666, 1048577, + 0xa668, 1048577, + 0xa66a, 1048577, + 0xa66c, 1048577, + 0xa680, 1048577, + 0xa682, 1048577, + 0xa684, 1048577, + 0xa686, 1048577, + 0xa688, 1048577, + 0xa68a, 1048577, + 0xa68c, 1048577, + 0xa68e, 1048577, + 0xa690, 1048577, + 0xa692, 1048577, + 0xa694, 1048577, + 0xa696, 1048577, + 0xa698, 1048577, + 0xa69a, 1048577, + 0xa722, 1048577, + 0xa724, 1048577, + 0xa726, 1048577, + 0xa728, 1048577, + 0xa72a, 1048577, + 0xa72c, 1048577, + 0xa72e, 1048577, + 0xa732, 1048577, + 0xa734, 1048577, + 0xa736, 1048577, + 0xa738, 1048577, + 0xa73a, 1048577, + 0xa73c, 1048577, + 0xa73e, 1048577, + 0xa740, 1048577, + 0xa742, 1048577, + 0xa744, 1048577, + 0xa746, 1048577, + 0xa748, 1048577, + 0xa74a, 1048577, + 0xa74c, 1048577, + 0xa74e, 1048577, + 0xa750, 1048577, + 0xa752, 1048577, + 0xa754, 1048577, + 0xa756, 1048577, + 0xa758, 1048577, + 0xa75a, 1048577, + 0xa75c, 1048577, + 0xa75e, 1048577, + 0xa760, 1048577, + 0xa762, 1048577, + 0xa764, 1048577, + 0xa766, 1048577, + 0xa768, 1048577, + 0xa76a, 1048577, + 0xa76c, 1048577, + 0xa76e, 1048577, + 0xa779, 1048577, + 0xa77b, 1048577, + 0xa77d, 1013244, + 0xa77e, 1048577, + 0xa780, 1048577, + 0xa782, 1048577, + 0xa784, 1048577, + 0xa786, 1048577, + 0xa78b, 1048577, + 0xa78d, 1006296, + 0xa790, 1048577, + 0xa792, 1048577, + 0xa796, 1048577, + 0xa798, 1048577, + 0xa79a, 1048577, + 0xa79c, 1048577, + 0xa79e, 1048577, + 0xa7a0, 1048577, + 0xa7a2, 1048577, + 0xa7a4, 1048577, + 0xa7a6, 1048577, + 0xa7a8, 1048577, + 0xa7aa, 1006268, + 0xa7ab, 1006257, + 0xa7ac, 1006261, + 0xa7ad, 1006271, + 0xa7b0, 1006318, + 0xa7b1, 1006294, +}; + +char32_t tolower(char32_t c) noexcept +{ + char32_t *p; + + p = rbsearch(c, tolowerr, nelem (tolowerr)/3, 3); + if (p && c >= p[0] && c <= p[1]) + return c + p[2] - 1048576; + + p = rbsearch(c, tolowers, nelem (tolowers)/2, 2); + if (p && c == p[0]) + return c + p[1] - 1048576; + + return c; +} + +char32_t totitler[] = { + 0x0061, 0x007a, 1048544, + 0x00e0, 0x00f6, 1048544, + 0x00f8, 0x00fe, 1048544, + 0x023f, 0x0240, 1059391, + 0x0256, 0x0257, 1048371, + 0x028a, 0x028b, 1048359, + 0x037b, 0x037d, 1048706, + 0x03ad, 0x03af, 1048539, + 0x03b1, 0x03c1, 1048544, + 0x03c3, 0x03cb, 1048544, + 0x03cd, 0x03ce, 1048513, + 0x0430, 0x044f, 1048544, + 0x0450, 0x045f, 1048496, + 0x0561, 0x0586, 1048528, + 0x1f00, 0x1f07, 1048584, + 0x1f10, 0x1f15, 1048584, + 0x1f20, 0x1f27, 1048584, + 0x1f30, 0x1f37, 1048584, + 0x1f40, 0x1f45, 1048584, + 0x1f60, 0x1f67, 1048584, + 0x1f70, 0x1f71, 1048650, + 0x1f72, 0x1f75, 1048662, + 0x1f76, 0x1f77, 1048676, + 0x1f78, 0x1f79, 1048704, + 0x1f7a, 0x1f7b, 1048688, + 0x1f7c, 0x1f7d, 1048702, + 0x1f80, 0x1f87, 1048584, + 0x1f90, 0x1f97, 1048584, + 0x1fa0, 0x1fa7, 1048584, + 0x1fb0, 0x1fb1, 1048584, + 0x1fd0, 0x1fd1, 1048584, + 0x1fe0, 0x1fe1, 1048584, + 0x2170, 0x217f, 1048560, + 0x24d0, 0x24e9, 1048550, + 0x2c30, 0x2c5e, 1048528, + 0x2d00, 0x2d25, 1041312, + 0xff41, 0xff5a, 1048544, + 0x10428, 0x1044f, 1048536, + 0x118c0, 0x118df, 1048544, +}; + +static char32_t totitles[] = { + 0x00b5, 1049319, + 0x00ff, 1048697, + 0x0101, 1048575, + 0x0103, 1048575, + 0x0105, 1048575, + 0x0107, 1048575, + 0x0109, 1048575, + 0x010b, 1048575, + 0x010d, 1048575, + 0x010f, 1048575, + 0x0111, 1048575, + 0x0113, 1048575, + 0x0115, 1048575, + 0x0117, 1048575, + 0x0119, 1048575, + 0x011b, 1048575, + 0x011d, 1048575, + 0x011f, 1048575, + 0x0121, 1048575, + 0x0123, 1048575, + 0x0125, 1048575, + 0x0127, 1048575, + 0x0129, 1048575, + 0x012b, 1048575, + 0x012d, 1048575, + 0x012f, 1048575, + 0x0131, 1048344, + 0x0133, 1048575, + 0x0135, 1048575, + 0x0137, 1048575, + 0x013a, 1048575, + 0x013c, 1048575, + 0x013e, 1048575, + 0x0140, 1048575, + 0x0142, 1048575, + 0x0144, 1048575, + 0x0146, 1048575, + 0x0148, 1048575, + 0x014b, 1048575, + 0x014d, 1048575, + 0x014f, 1048575, + 0x0151, 1048575, + 0x0153, 1048575, + 0x0155, 1048575, + 0x0157, 1048575, + 0x0159, 1048575, + 0x015b, 1048575, + 0x015d, 1048575, + 0x015f, 1048575, + 0x0161, 1048575, + 0x0163, 1048575, + 0x0165, 1048575, + 0x0167, 1048575, + 0x0169, 1048575, + 0x016b, 1048575, + 0x016d, 1048575, + 0x016f, 1048575, + 0x0171, 1048575, + 0x0173, 1048575, + 0x0175, 1048575, + 0x0177, 1048575, + 0x017a, 1048575, + 0x017c, 1048575, + 0x017e, 1048575, + 0x017f, 1048276, + 0x0180, 1048771, + 0x0183, 1048575, + 0x0185, 1048575, + 0x0188, 1048575, + 0x018c, 1048575, + 0x0192, 1048575, + 0x0195, 1048673, + 0x0199, 1048575, + 0x019a, 1048739, + 0x019e, 1048706, + 0x01a1, 1048575, + 0x01a3, 1048575, + 0x01a5, 1048575, + 0x01a8, 1048575, + 0x01ad, 1048575, + 0x01b0, 1048575, + 0x01b4, 1048575, + 0x01b6, 1048575, + 0x01b9, 1048575, + 0x01bd, 1048575, + 0x01bf, 1048632, + 0x01c4, 1048577, + 0x01c6, 1048575, + 0x01c7, 1048577, + 0x01c9, 1048575, + 0x01ca, 1048577, + 0x01cc, 1048575, + 0x01ce, 1048575, + 0x01d0, 1048575, + 0x01d2, 1048575, + 0x01d4, 1048575, + 0x01d6, 1048575, + 0x01d8, 1048575, + 0x01da, 1048575, + 0x01dc, 1048575, + 0x01dd, 1048497, + 0x01df, 1048575, + 0x01e1, 1048575, + 0x01e3, 1048575, + 0x01e5, 1048575, + 0x01e7, 1048575, + 0x01e9, 1048575, + 0x01eb, 1048575, + 0x01ed, 1048575, + 0x01ef, 1048575, + 0x01f1, 1048577, + 0x01f3, 1048575, + 0x01f5, 1048575, + 0x01f9, 1048575, + 0x01fb, 1048575, + 0x01fd, 1048575, + 0x01ff, 1048575, + 0x0201, 1048575, + 0x0203, 1048575, + 0x0205, 1048575, + 0x0207, 1048575, + 0x0209, 1048575, + 0x020b, 1048575, + 0x020d, 1048575, + 0x020f, 1048575, + 0x0211, 1048575, + 0x0213, 1048575, + 0x0215, 1048575, + 0x0217, 1048575, + 0x0219, 1048575, + 0x021b, 1048575, + 0x021d, 1048575, + 0x021f, 1048575, + 0x0223, 1048575, + 0x0225, 1048575, + 0x0227, 1048575, + 0x0229, 1048575, + 0x022b, 1048575, + 0x022d, 1048575, + 0x022f, 1048575, + 0x0231, 1048575, + 0x0233, 1048575, + 0x023c, 1048575, + 0x0242, 1048575, + 0x0247, 1048575, + 0x0249, 1048575, + 0x024b, 1048575, + 0x024d, 1048575, + 0x024f, 1048575, + 0x0250, 1059359, + 0x0251, 1059356, + 0x0252, 1059358, + 0x0253, 1048366, + 0x0254, 1048370, + 0x0259, 1048374, + 0x025b, 1048373, + 0x025c, 1090895, + 0x0260, 1048371, + 0x0261, 1090891, + 0x0263, 1048369, + 0x0265, 1090856, + 0x0266, 1090884, + 0x0268, 1048367, + 0x0269, 1048365, + 0x026b, 1059319, + 0x026c, 1090881, + 0x026f, 1048365, + 0x0271, 1059325, + 0x0272, 1048363, + 0x0275, 1048362, + 0x027d, 1059303, + 0x0280, 1048358, + 0x0283, 1048358, + 0x0287, 1090858, + 0x0288, 1048358, + 0x0289, 1048507, + 0x028c, 1048505, + 0x0292, 1048357, + 0x029e, 1090834, + 0x0345, 1048660, + 0x0371, 1048575, + 0x0373, 1048575, + 0x0377, 1048575, + 0x03ac, 1048538, + 0x03c2, 1048545, + 0x03cc, 1048512, + 0x03d0, 1048514, + 0x03d1, 1048519, + 0x03d5, 1048529, + 0x03d6, 1048522, + 0x03d7, 1048568, + 0x03d9, 1048575, + 0x03db, 1048575, + 0x03dd, 1048575, + 0x03df, 1048575, + 0x03e1, 1048575, + 0x03e3, 1048575, + 0x03e5, 1048575, + 0x03e7, 1048575, + 0x03e9, 1048575, + 0x03eb, 1048575, + 0x03ed, 1048575, + 0x03ef, 1048575, + 0x03f0, 1048490, + 0x03f1, 1048496, + 0x03f2, 1048583, + 0x03f3, 1048460, + 0x03f5, 1048480, + 0x03f8, 1048575, + 0x03fb, 1048575, + 0x0461, 1048575, + 0x0463, 1048575, + 0x0465, 1048575, + 0x0467, 1048575, + 0x0469, 1048575, + 0x046b, 1048575, + 0x046d, 1048575, + 0x046f, 1048575, + 0x0471, 1048575, + 0x0473, 1048575, + 0x0475, 1048575, + 0x0477, 1048575, + 0x0479, 1048575, + 0x047b, 1048575, + 0x047d, 1048575, + 0x047f, 1048575, + 0x0481, 1048575, + 0x048b, 1048575, + 0x048d, 1048575, + 0x048f, 1048575, + 0x0491, 1048575, + 0x0493, 1048575, + 0x0495, 1048575, + 0x0497, 1048575, + 0x0499, 1048575, + 0x049b, 1048575, + 0x049d, 1048575, + 0x049f, 1048575, + 0x04a1, 1048575, + 0x04a3, 1048575, + 0x04a5, 1048575, + 0x04a7, 1048575, + 0x04a9, 1048575, + 0x04ab, 1048575, + 0x04ad, 1048575, + 0x04af, 1048575, + 0x04b1, 1048575, + 0x04b3, 1048575, + 0x04b5, 1048575, + 0x04b7, 1048575, + 0x04b9, 1048575, + 0x04bb, 1048575, + 0x04bd, 1048575, + 0x04bf, 1048575, + 0x04c2, 1048575, + 0x04c4, 1048575, + 0x04c6, 1048575, + 0x04c8, 1048575, + 0x04ca, 1048575, + 0x04cc, 1048575, + 0x04ce, 1048575, + 0x04cf, 1048561, + 0x04d1, 1048575, + 0x04d3, 1048575, + 0x04d5, 1048575, + 0x04d7, 1048575, + 0x04d9, 1048575, + 0x04db, 1048575, + 0x04dd, 1048575, + 0x04df, 1048575, + 0x04e1, 1048575, + 0x04e3, 1048575, + 0x04e5, 1048575, + 0x04e7, 1048575, + 0x04e9, 1048575, + 0x04eb, 1048575, + 0x04ed, 1048575, + 0x04ef, 1048575, + 0x04f1, 1048575, + 0x04f3, 1048575, + 0x04f5, 1048575, + 0x04f7, 1048575, + 0x04f9, 1048575, + 0x04fb, 1048575, + 0x04fd, 1048575, + 0x04ff, 1048575, + 0x0501, 1048575, + 0x0503, 1048575, + 0x0505, 1048575, + 0x0507, 1048575, + 0x0509, 1048575, + 0x050b, 1048575, + 0x050d, 1048575, + 0x050f, 1048575, + 0x0511, 1048575, + 0x0513, 1048575, + 0x0515, 1048575, + 0x0517, 1048575, + 0x0519, 1048575, + 0x051b, 1048575, + 0x051d, 1048575, + 0x051f, 1048575, + 0x0521, 1048575, + 0x0523, 1048575, + 0x0525, 1048575, + 0x0527, 1048575, + 0x0529, 1048575, + 0x052b, 1048575, + 0x052d, 1048575, + 0x052f, 1048575, + 0x1d79, 1083908, + 0x1d7d, 1052390, + 0x1e01, 1048575, + 0x1e03, 1048575, + 0x1e05, 1048575, + 0x1e07, 1048575, + 0x1e09, 1048575, + 0x1e0b, 1048575, + 0x1e0d, 1048575, + 0x1e0f, 1048575, + 0x1e11, 1048575, + 0x1e13, 1048575, + 0x1e15, 1048575, + 0x1e17, 1048575, + 0x1e19, 1048575, + 0x1e1b, 1048575, + 0x1e1d, 1048575, + 0x1e1f, 1048575, + 0x1e21, 1048575, + 0x1e23, 1048575, + 0x1e25, 1048575, + 0x1e27, 1048575, + 0x1e29, 1048575, + 0x1e2b, 1048575, + 0x1e2d, 1048575, + 0x1e2f, 1048575, + 0x1e31, 1048575, + 0x1e33, 1048575, + 0x1e35, 1048575, + 0x1e37, 1048575, + 0x1e39, 1048575, + 0x1e3b, 1048575, + 0x1e3d, 1048575, + 0x1e3f, 1048575, + 0x1e41, 1048575, + 0x1e43, 1048575, + 0x1e45, 1048575, + 0x1e47, 1048575, + 0x1e49, 1048575, + 0x1e4b, 1048575, + 0x1e4d, 1048575, + 0x1e4f, 1048575, + 0x1e51, 1048575, + 0x1e53, 1048575, + 0x1e55, 1048575, + 0x1e57, 1048575, + 0x1e59, 1048575, + 0x1e5b, 1048575, + 0x1e5d, 1048575, + 0x1e5f, 1048575, + 0x1e61, 1048575, + 0x1e63, 1048575, + 0x1e65, 1048575, + 0x1e67, 1048575, + 0x1e69, 1048575, + 0x1e6b, 1048575, + 0x1e6d, 1048575, + 0x1e6f, 1048575, + 0x1e71, 1048575, + 0x1e73, 1048575, + 0x1e75, 1048575, + 0x1e77, 1048575, + 0x1e79, 1048575, + 0x1e7b, 1048575, + 0x1e7d, 1048575, + 0x1e7f, 1048575, + 0x1e81, 1048575, + 0x1e83, 1048575, + 0x1e85, 1048575, + 0x1e87, 1048575, + 0x1e89, 1048575, + 0x1e8b, 1048575, + 0x1e8d, 1048575, + 0x1e8f, 1048575, + 0x1e91, 1048575, + 0x1e93, 1048575, + 0x1e95, 1048575, + 0x1e9b, 1048517, + 0x1ea1, 1048575, + 0x1ea3, 1048575, + 0x1ea5, 1048575, + 0x1ea7, 1048575, + 0x1ea9, 1048575, + 0x1eab, 1048575, + 0x1ead, 1048575, + 0x1eaf, 1048575, + 0x1eb1, 1048575, + 0x1eb3, 1048575, + 0x1eb5, 1048575, + 0x1eb7, 1048575, + 0x1eb9, 1048575, + 0x1ebb, 1048575, + 0x1ebd, 1048575, + 0x1ebf, 1048575, + 0x1ec1, 1048575, + 0x1ec3, 1048575, + 0x1ec5, 1048575, + 0x1ec7, 1048575, + 0x1ec9, 1048575, + 0x1ecb, 1048575, + 0x1ecd, 1048575, + 0x1ecf, 1048575, + 0x1ed1, 1048575, + 0x1ed3, 1048575, + 0x1ed5, 1048575, + 0x1ed7, 1048575, + 0x1ed9, 1048575, + 0x1edb, 1048575, + 0x1edd, 1048575, + 0x1edf, 1048575, + 0x1ee1, 1048575, + 0x1ee3, 1048575, + 0x1ee5, 1048575, + 0x1ee7, 1048575, + 0x1ee9, 1048575, + 0x1eeb, 1048575, + 0x1eed, 1048575, + 0x1eef, 1048575, + 0x1ef1, 1048575, + 0x1ef3, 1048575, + 0x1ef5, 1048575, + 0x1ef7, 1048575, + 0x1ef9, 1048575, + 0x1efb, 1048575, + 0x1efd, 1048575, + 0x1eff, 1048575, + 0x1f51, 1048584, + 0x1f53, 1048584, + 0x1f55, 1048584, + 0x1f57, 1048584, + 0x1fb3, 1048585, + 0x1fbe, 1041371, + 0x1fc3, 1048585, + 0x1fe5, 1048583, + 0x1ff3, 1048585, + 0x214e, 1048548, + 0x2184, 1048575, + 0x2c61, 1048575, + 0x2c65, 1037781, + 0x2c66, 1037784, + 0x2c68, 1048575, + 0x2c6a, 1048575, + 0x2c6c, 1048575, + 0x2c73, 1048575, + 0x2c76, 1048575, + 0x2c81, 1048575, + 0x2c83, 1048575, + 0x2c85, 1048575, + 0x2c87, 1048575, + 0x2c89, 1048575, + 0x2c8b, 1048575, + 0x2c8d, 1048575, + 0x2c8f, 1048575, + 0x2c91, 1048575, + 0x2c93, 1048575, + 0x2c95, 1048575, + 0x2c97, 1048575, + 0x2c99, 1048575, + 0x2c9b, 1048575, + 0x2c9d, 1048575, + 0x2c9f, 1048575, + 0x2ca1, 1048575, + 0x2ca3, 1048575, + 0x2ca5, 1048575, + 0x2ca7, 1048575, + 0x2ca9, 1048575, + 0x2cab, 1048575, + 0x2cad, 1048575, + 0x2caf, 1048575, + 0x2cb1, 1048575, + 0x2cb3, 1048575, + 0x2cb5, 1048575, + 0x2cb7, 1048575, + 0x2cb9, 1048575, + 0x2cbb, 1048575, + 0x2cbd, 1048575, + 0x2cbf, 1048575, + 0x2cc1, 1048575, + 0x2cc3, 1048575, + 0x2cc5, 1048575, + 0x2cc7, 1048575, + 0x2cc9, 1048575, + 0x2ccb, 1048575, + 0x2ccd, 1048575, + 0x2ccf, 1048575, + 0x2cd1, 1048575, + 0x2cd3, 1048575, + 0x2cd5, 1048575, + 0x2cd7, 1048575, + 0x2cd9, 1048575, + 0x2cdb, 1048575, + 0x2cdd, 1048575, + 0x2cdf, 1048575, + 0x2ce1, 1048575, + 0x2ce3, 1048575, + 0x2cec, 1048575, + 0x2cee, 1048575, + 0x2cf3, 1048575, + 0x2d27, 1041312, + 0x2d2d, 1041312, + 0xa641, 1048575, + 0xa643, 1048575, + 0xa645, 1048575, + 0xa647, 1048575, + 0xa649, 1048575, + 0xa64b, 1048575, + 0xa64d, 1048575, + 0xa64f, 1048575, + 0xa651, 1048575, + 0xa653, 1048575, + 0xa655, 1048575, + 0xa657, 1048575, + 0xa659, 1048575, + 0xa65b, 1048575, + 0xa65d, 1048575, + 0xa65f, 1048575, + 0xa661, 1048575, + 0xa663, 1048575, + 0xa665, 1048575, + 0xa667, 1048575, + 0xa669, 1048575, + 0xa66b, 1048575, + 0xa66d, 1048575, + 0xa681, 1048575, + 0xa683, 1048575, + 0xa685, 1048575, + 0xa687, 1048575, + 0xa689, 1048575, + 0xa68b, 1048575, + 0xa68d, 1048575, + 0xa68f, 1048575, + 0xa691, 1048575, + 0xa693, 1048575, + 0xa695, 1048575, + 0xa697, 1048575, + 0xa699, 1048575, + 0xa69b, 1048575, + 0xa723, 1048575, + 0xa725, 1048575, + 0xa727, 1048575, + 0xa729, 1048575, + 0xa72b, 1048575, + 0xa72d, 1048575, + 0xa72f, 1048575, + 0xa733, 1048575, + 0xa735, 1048575, + 0xa737, 1048575, + 0xa739, 1048575, + 0xa73b, 1048575, + 0xa73d, 1048575, + 0xa73f, 1048575, + 0xa741, 1048575, + 0xa743, 1048575, + 0xa745, 1048575, + 0xa747, 1048575, + 0xa749, 1048575, + 0xa74b, 1048575, + 0xa74d, 1048575, + 0xa74f, 1048575, + 0xa751, 1048575, + 0xa753, 1048575, + 0xa755, 1048575, + 0xa757, 1048575, + 0xa759, 1048575, + 0xa75b, 1048575, + 0xa75d, 1048575, + 0xa75f, 1048575, + 0xa761, 1048575, + 0xa763, 1048575, + 0xa765, 1048575, + 0xa767, 1048575, + 0xa769, 1048575, + 0xa76b, 1048575, + 0xa76d, 1048575, + 0xa76f, 1048575, + 0xa77a, 1048575, + 0xa77c, 1048575, + 0xa77f, 1048575, + 0xa781, 1048575, + 0xa783, 1048575, + 0xa785, 1048575, + 0xa787, 1048575, + 0xa78c, 1048575, + 0xa791, 1048575, + 0xa793, 1048575, + 0xa797, 1048575, + 0xa799, 1048575, + 0xa79b, 1048575, + 0xa79d, 1048575, + 0xa79f, 1048575, + 0xa7a1, 1048575, + 0xa7a3, 1048575, + 0xa7a5, 1048575, + 0xa7a7, 1048575, + 0xa7a9, 1048575, +}; + +char32_t totitle(char32_t c) noexcept +{ + char32_t *p; + + p = rbsearch(c, totitler, nelem (totitler)/3, 3); + if (p && c >= p[0] && c <= p[1]) + return c + p[2] - 1048576; + + p = rbsearch(c, totitles, nelem (totitles)/2, 2); + if (p && c == p[0]) + return c + p[1] - 1048576; + + return c; +} + +void encode(char32_t c, char res[5]) noexcept +{ + switch (nbytesPoint(c)) { + case 1: + res[0] = c; + res[1] = '\0'; + break; + case 2: + res[0] = 0xC0 | ((c >> 6) & 0x1F); + res[1] = 0x80 | (c & 0x3F); + res[2] = '\0'; + break; + case 3: + res[0] = 0xE0 | ((c >> 12) & 0xF ); + res[1] = 0x80 | ((c >> 6) & 0x3F); + res[2] = 0x80 | (c & 0x3F); + res[3] = '\0'; + break; + case 4: + res[0] = 0xF0 | ((c >> 18) & 0x7 ); + res[1] = 0x80 | ((c >> 12) & 0x3F); + res[2] = 0x80 | ((c >> 6) & 0x3F); + res[3] = 0x80 | (c & 0x3F); + res[4] = '\0'; + break; + default: + break; + } +} + +void decode(char32_t &c, const char *res) noexcept +{ + c = 0; + + switch (nbytesUtf8(res[0])) { + case 1: + c = res[0]; + break; + case 2: + c = (res[0] & 0x1f) << 6; + c |= (res[1] & 0x3f); + break; + case 3: + c = (res[0] & 0x0f) << 12; + c |= (res[1] & 0x3f) << 6; + c |= (res[2] & 0x3f); + break; + case 4: + c = (res[0] & 0x07) << 16; + c |= (res[1] & 0x3f) << 12; + c |= (res[2] & 0x3f) << 6; + c |= (res[3] & 0x3f); + default: + break; + } +} + +int nbytesUtf8(char c) noexcept +{ + if ((c & 0xE0) == 0xC0) + return 2; + if ((c & 0xF0) == 0xE0) + return 3; + if ((c & 0xF8) == 0xF0) + return 4; + + return 1; +} + +int nbytesPoint(char32_t c) noexcept +{ + if (c <= 0x7F) + return 1; + if (c <= 0x7FF) + return 2; + if (c <= 0xFFFF) + return 3; + if (c <= 0x1FFFFF) + return 4; + + return -1; +} + +int length(const std::string &str) +{ + int total = 0; + + forEach(str, [&] (char32_t) { + ++ total; + }); + + return total; +} + +std::string toUtf8(const std::u32string &array) +{ + std::string res; + + for (size_t i = 0; i < array.size(); ++i) { + char tmp[5]; + int size = nbytesPoint(array[i]); + + if (size < 0) { + throw std::invalid_argument("invalid sequence"); + } + + encode(array[i], tmp); + res.insert(res.length(), tmp); + } + + return res; +} + +std::u32string toUtf32(const std::string &str) +{ + std::u32string res; + + forEach(str, [&] (char32_t code) { + res.push_back(code); + }); + + return res; +} + +} // !unicode
--- a/C++/modules/Unicode/Unicode.h Mon Sep 28 19:40:16 2015 +0200 +++ b/C++/modules/Unicode/Unicode.h Mon Sep 28 19:40:26 2015 +0200 @@ -27,215 +27,211 @@ #include <stdexcept> #include <string> +namespace unicode { + +void encode(char32_t point, char res[5]) noexcept; +void decode(char32_t &c, const char *res) noexcept; + /** - * @class Unicode - * @brief Conversion between UTF-8 and UTF-32 + * Get the number of bytes for the first multi byte character from a + * utf-8 string. + * + * This can be used to iterate a valid UTF-8 string to jump to the next + * real character. + * + * @param c the first multi byte character + * @return the number of bytes [1-4] */ -class Unicode { -private: - static void encode(char32_t point, char res[5]) noexcept; - static void decode(char32_t &c, const char *res) noexcept; +int nbytesUtf8(char c) noexcept; + +/** + * Get the number of bytes for the unicode point. + * + * @param point the unicode point + * @return the number of bytes [1-4] or -1 on invalid + */ +int nbytesPoint(char32_t point) noexcept; + +/** + * Get real number of character in a string. + * + * @param str the string + * @return the length + * @throw std::invalid_argument on invalid sequence + */ +int length(const std::string &str); -public: - /** - * Get the number of bytes for the first multi byte character from a - * utf-8 string. - * - * This can be used to iterate a valid UTF-8 string to jump to the next - * real character. - * - * @param c the first multi byte character - * @return the number of bytes [1-4] - */ - static int nbytesUtf8(char c) noexcept; +/** + * Iterate over all real characters in the UTF-8 string. + * + * The function must have the following signature: + * void f(char ch) + * + * @param str the UTF-8 string + * @throw std::invalid_argument on invalid sequence + */ +template <typename Func> +void forEach(const std::string &str, Func function) +{ + for (size_t i = 0; i < str.size(); ) { + char32_t point = 0; + int size = nbytesUtf8(str[i]); + + if (size < 0) { + throw std::invalid_argument("invalid sequence"); + } - /** - * Get the number of bytes for the unicode point. - * - * @param point the unicode point - * @return the number of bytes [1-4] or -1 on invalid - */ - static int nbytesPoint(char32_t point) noexcept; + decode(point, str.data() + i); + function(point); + + i += size; + } +} + +/** + * Convert a UTF-32 string to UTF-8 string. + * + * @param array the UTF-32 string + * @return the UTF-8 string + * @throw std::invalid_argument on invalid sequence + */ +std::string toUtf8(const std::u32string &array); + +/** + * Convert a UTF-8 string to UTF-32 string. + * + * @param str the UTF-8 string + * @return the UTF-32 string + * @throw std::invalid_argument on invalid sequence + */ +std::u32string toUtf32(const std::string &str); - /** - * Get real number of character in a string. - * - * @param str the string - * @return the length - * @throw std::invalid_argument on invalid sequence - */ - static int length(const std::string &str); +/** + * Check if the unicode character is space. + * + * @param c the character + * @return true if space + */ +bool isspace(char32_t c) noexcept; + +/** + * Check if the unicode character is digit. + * + * @param c the character + * @return true if digit + */ +bool isdigit(char32_t c) noexcept; + +/** + * Check if the unicode character is alpha category. + * + * @param c the character + * @return true if alpha + */ +bool isalpha(char32_t c) noexcept; + +/** + * Check if the unicode character is upper case. + * + * @param c the character + * @return true if upper case + */ +bool isupper(char32_t c) noexcept; + +/** + * Check if the unicode character is lower case. + * + * @param c the character + * @return true if lower case + */ +bool islower(char32_t c) noexcept; - /** - * Iterate over all real characters in the UTF-8 string. - * - * The function must have the following signature: - * void f(char ch) - * - * @param str the UTF-8 string - * @throw std::invalid_argument on invalid sequence - */ - template <typename Func> - static void forEach(const std::string &str, Func function) - { - for (size_t i = 0; i < str.size(); ) { - char32_t point = 0; - int size = nbytesUtf8(str[i]); +/** + * Check if the unicode character is title case. + * + * @param c the character + * @return true if title case + */ +bool istitle(char32_t c) noexcept; + +/** + * Convert to upper case. + * + * @param c the character + * @return the upper case character + */ +char32_t toupper(char32_t c) noexcept; - if (size < 0) { - throw std::invalid_argument("invalid sequence"); - } +/** + * Convert to lower case. + * + * @param c the character + * @return the lower case character + */ +char32_t tolower(char32_t c) noexcept; - decode(point, str.data() + i); - function(point); +/** + * Convert to title case. + * + * @param c the character + * @return the title case character + */ +char32_t totitle(char32_t c) noexcept; - i += size; - } +/** + * Convert the UTF-32 string to upper case. + * + * @param str the str + * @return the upper case string + */ +inline std::u32string toupper(std::u32string str) +{ + for (size_t i = 0; i < str.size(); ++i) { + str[i] = toupper(str[i]); } - /** - * Convert a UTF-32 string to UTF-8 string. - * - * @param array the UTF-32 string - * @return the UTF-8 string - * @throw std::invalid_argument on invalid sequence - */ - static std::string toUtf8(const std::u32string &array); - - /** - * Convert a UTF-8 string to UTF-32 string. - * - * @param str the UTF-8 string - * @return the UTF-32 string - * @throw std::invalid_argument on invalid sequence - */ - static std::u32string toUtf32(const std::string &str); - - /** - * Check if the unicode character is space. - * - * @param c the character - * @return true if space - */ - static bool isspace(char32_t c) noexcept; - - /** - * Check if the unicode character is digit. - * - * @param c the character - * @return true if digit - */ - static bool isdigit(char32_t c) noexcept; - - /** - * Check if the unicode character is alpha category. - * - * @param c the character - * @return true if alpha - */ - static bool isalpha(char32_t c) noexcept; - - /** - * Check if the unicode character is upper case. - * - * @param c the character - * @return true if upper case - */ - static bool isupper(char32_t c) noexcept; + return str; +} - /** - * Check if the unicode character is lower case. - * - * @param c the character - * @return true if lower case - */ - static bool islower(char32_t c) noexcept; - - /** - * Check if the unicode character is title case. - * - * @param c the character - * @return true if title case - */ - static bool istitle(char32_t c) noexcept; - - /** - * Convert to upper case. - * - * @param c the character - * @return the upper case character - */ - static char32_t toupper(char32_t c) noexcept; +/** + * Convert the UTF-8 string to upper case. + * + * @param str the str + * @return the upper case string + * @warning very slow at the moment + */ +inline std::string toupper(const std::string &str) +{ + return toUtf8(toupper(toUtf32(str))); +} - /** - * Convert to lower case. - * - * @param c the character - * @return the lower case character - */ - static char32_t tolower(char32_t c) noexcept; - - /** - * Convert to title case. - * - * @param c the character - * @return the title case character - */ - static char32_t totitle(char32_t c) noexcept; - - /** - * Convert the UTF-8 string to upper case. - * - * @param str the str - * @return the upper case string - * @warning very slow at the moment - */ - static inline std::string toupper(const std::string &str) - { - return toUtf8(toupper(toUtf32(str))); +/** + * Convert the UTF-32 string to lower case. + * + * @param str the str + * @return the lower case string + */ +inline std::u32string tolower(std::u32string str) +{ + for (size_t i = 0; i < str.size(); ++i) { + str[i] = tolower(str[i]); } - /** - * Convert the UTF-32 string to upper case. - * - * @param str the str - * @return the upper case string - */ - static inline std::u32string toupper(std::u32string str) - { - for (size_t i = 0; i < str.size(); ++i) { - str[i] = toupper(str[i]); - } - - return str; - } + return str; +} - /** - * Convert the UTF-8 string to lower case. - * - * @param str the str - * @return the lower case string - * @warning very slow at the moment - */ - static inline std::string tolower(const std::string &str) - { - return toUtf8(tolower(toUtf32(str))); - } +/** + * Convert the UTF-8 string to lower case. + * + * @param str the str + * @return the lower case string + * @warning very slow at the moment + */ +inline std::string tolower(const std::string &str) +{ + return toUtf8(tolower(toUtf32(str))); +} - /** - * Convert the UTF-32 string to lower case. - * - * @param str the str - * @return the lower case string - */ - static inline std::u32string tolower(std::u32string str) - { - for (size_t i = 0; i < str.size(); ++i) { - str[i] = tolower(str[i]); - } - - return str; - } -}; +} // !unicode #endif // !_UTF8_H_
--- a/C++/tests/Base64/main.cpp Mon Sep 28 19:40:16 2015 +0200 +++ b/C++/tests/Base64/main.cpp Mon Sep 28 19:40:26 2015 +0200 @@ -22,158 +22,158 @@ TEST(Lookup, lookup) { - ASSERT_EQ('A', Base64::lookup(0b000000)); - ASSERT_EQ('B', Base64::lookup(0b000001)); - ASSERT_EQ('C', Base64::lookup(0b000010)); - ASSERT_EQ('D', Base64::lookup(0b000011)); - ASSERT_EQ('E', Base64::lookup(0b000100)); - ASSERT_EQ('F', Base64::lookup(0b000101)); - ASSERT_EQ('G', Base64::lookup(0b000110)); - ASSERT_EQ('H', Base64::lookup(0b000111)); - ASSERT_EQ('I', Base64::lookup(0b001000)); - ASSERT_EQ('J', Base64::lookup(0b001001)); - ASSERT_EQ('K', Base64::lookup(0b001010)); - ASSERT_EQ('L', Base64::lookup(0b001011)); - ASSERT_EQ('M', Base64::lookup(0b001100)); - ASSERT_EQ('N', Base64::lookup(0b001101)); - ASSERT_EQ('O', Base64::lookup(0b001110)); - ASSERT_EQ('P', Base64::lookup(0b001111)); - ASSERT_EQ('Q', Base64::lookup(0b010000)); - ASSERT_EQ('R', Base64::lookup(0b010001)); - ASSERT_EQ('S', Base64::lookup(0b010010)); - ASSERT_EQ('T', Base64::lookup(0b010011)); - ASSERT_EQ('U', Base64::lookup(0b010100)); - ASSERT_EQ('V', Base64::lookup(0b010101)); - ASSERT_EQ('W', Base64::lookup(0b010110)); - ASSERT_EQ('X', Base64::lookup(0b010111)); - ASSERT_EQ('Y', Base64::lookup(0b011000)); - ASSERT_EQ('Z', Base64::lookup(0b011001)); - ASSERT_EQ('a', Base64::lookup(0b011010)); - ASSERT_EQ('b', Base64::lookup(0b011011)); - ASSERT_EQ('c', Base64::lookup(0b011100)); - ASSERT_EQ('d', Base64::lookup(0b011101)); - ASSERT_EQ('e', Base64::lookup(0b011110)); - ASSERT_EQ('f', Base64::lookup(0b011111)); - ASSERT_EQ('g', Base64::lookup(0b100000)); - ASSERT_EQ('h', Base64::lookup(0b100001)); - ASSERT_EQ('i', Base64::lookup(0b100010)); - ASSERT_EQ('j', Base64::lookup(0b100011)); - ASSERT_EQ('k', Base64::lookup(0b100100)); - ASSERT_EQ('l', Base64::lookup(0b100101)); - ASSERT_EQ('m', Base64::lookup(0b100110)); - ASSERT_EQ('n', Base64::lookup(0b100111)); - ASSERT_EQ('o', Base64::lookup(0b101000)); - ASSERT_EQ('p', Base64::lookup(0b101001)); - ASSERT_EQ('q', Base64::lookup(0b101010)); - ASSERT_EQ('r', Base64::lookup(0b101011)); - ASSERT_EQ('s', Base64::lookup(0b101100)); - ASSERT_EQ('t', Base64::lookup(0b101101)); - ASSERT_EQ('u', Base64::lookup(0b101110)); - ASSERT_EQ('v', Base64::lookup(0b101111)); - ASSERT_EQ('w', Base64::lookup(0b110000)); - ASSERT_EQ('x', Base64::lookup(0b110001)); - ASSERT_EQ('y', Base64::lookup(0b110010)); - ASSERT_EQ('z', Base64::lookup(0b110011)); - ASSERT_EQ('0', Base64::lookup(0b110100)); - ASSERT_EQ('1', Base64::lookup(0b110101)); - ASSERT_EQ('2', Base64::lookup(0b110110)); - ASSERT_EQ('3', Base64::lookup(0b110111)); - ASSERT_EQ('4', Base64::lookup(0b111000)); - ASSERT_EQ('5', Base64::lookup(0b111001)); - ASSERT_EQ('6', Base64::lookup(0b111010)); - ASSERT_EQ('7', Base64::lookup(0b111011)); - ASSERT_EQ('8', Base64::lookup(0b111100)); - ASSERT_EQ('9', Base64::lookup(0b111101)); - ASSERT_EQ('+', Base64::lookup(0b111110)); - ASSERT_EQ('/', Base64::lookup(0b111111)); + ASSERT_EQ('A', base64::lookup(0b000000)); + ASSERT_EQ('B', base64::lookup(0b000001)); + ASSERT_EQ('C', base64::lookup(0b000010)); + ASSERT_EQ('D', base64::lookup(0b000011)); + ASSERT_EQ('E', base64::lookup(0b000100)); + ASSERT_EQ('F', base64::lookup(0b000101)); + ASSERT_EQ('G', base64::lookup(0b000110)); + ASSERT_EQ('H', base64::lookup(0b000111)); + ASSERT_EQ('I', base64::lookup(0b001000)); + ASSERT_EQ('J', base64::lookup(0b001001)); + ASSERT_EQ('K', base64::lookup(0b001010)); + ASSERT_EQ('L', base64::lookup(0b001011)); + ASSERT_EQ('M', base64::lookup(0b001100)); + ASSERT_EQ('N', base64::lookup(0b001101)); + ASSERT_EQ('O', base64::lookup(0b001110)); + ASSERT_EQ('P', base64::lookup(0b001111)); + ASSERT_EQ('Q', base64::lookup(0b010000)); + ASSERT_EQ('R', base64::lookup(0b010001)); + ASSERT_EQ('S', base64::lookup(0b010010)); + ASSERT_EQ('T', base64::lookup(0b010011)); + ASSERT_EQ('U', base64::lookup(0b010100)); + ASSERT_EQ('V', base64::lookup(0b010101)); + ASSERT_EQ('W', base64::lookup(0b010110)); + ASSERT_EQ('X', base64::lookup(0b010111)); + ASSERT_EQ('Y', base64::lookup(0b011000)); + ASSERT_EQ('Z', base64::lookup(0b011001)); + ASSERT_EQ('a', base64::lookup(0b011010)); + ASSERT_EQ('b', base64::lookup(0b011011)); + ASSERT_EQ('c', base64::lookup(0b011100)); + ASSERT_EQ('d', base64::lookup(0b011101)); + ASSERT_EQ('e', base64::lookup(0b011110)); + ASSERT_EQ('f', base64::lookup(0b011111)); + ASSERT_EQ('g', base64::lookup(0b100000)); + ASSERT_EQ('h', base64::lookup(0b100001)); + ASSERT_EQ('i', base64::lookup(0b100010)); + ASSERT_EQ('j', base64::lookup(0b100011)); + ASSERT_EQ('k', base64::lookup(0b100100)); + ASSERT_EQ('l', base64::lookup(0b100101)); + ASSERT_EQ('m', base64::lookup(0b100110)); + ASSERT_EQ('n', base64::lookup(0b100111)); + ASSERT_EQ('o', base64::lookup(0b101000)); + ASSERT_EQ('p', base64::lookup(0b101001)); + ASSERT_EQ('q', base64::lookup(0b101010)); + ASSERT_EQ('r', base64::lookup(0b101011)); + ASSERT_EQ('s', base64::lookup(0b101100)); + ASSERT_EQ('t', base64::lookup(0b101101)); + ASSERT_EQ('u', base64::lookup(0b101110)); + ASSERT_EQ('v', base64::lookup(0b101111)); + ASSERT_EQ('w', base64::lookup(0b110000)); + ASSERT_EQ('x', base64::lookup(0b110001)); + ASSERT_EQ('y', base64::lookup(0b110010)); + ASSERT_EQ('z', base64::lookup(0b110011)); + ASSERT_EQ('0', base64::lookup(0b110100)); + ASSERT_EQ('1', base64::lookup(0b110101)); + ASSERT_EQ('2', base64::lookup(0b110110)); + ASSERT_EQ('3', base64::lookup(0b110111)); + ASSERT_EQ('4', base64::lookup(0b111000)); + ASSERT_EQ('5', base64::lookup(0b111001)); + ASSERT_EQ('6', base64::lookup(0b111010)); + ASSERT_EQ('7', base64::lookup(0b111011)); + ASSERT_EQ('8', base64::lookup(0b111100)); + ASSERT_EQ('9', base64::lookup(0b111101)); + ASSERT_EQ('+', base64::lookup(0b111110)); + ASSERT_EQ('/', base64::lookup(0b111111)); } TEST(Lookup, rlookup) { - ASSERT_EQ(0b000000, Base64::rlookup('A')); - ASSERT_EQ(0b000001, Base64::rlookup('B')); - ASSERT_EQ(0b000010, Base64::rlookup('C')); - ASSERT_EQ(0b000011, Base64::rlookup('D')); - ASSERT_EQ(0b000100, Base64::rlookup('E')); - ASSERT_EQ(0b000101, Base64::rlookup('F')); - ASSERT_EQ(0b000110, Base64::rlookup('G')); - ASSERT_EQ(0b000111, Base64::rlookup('H')); - ASSERT_EQ(0b001000, Base64::rlookup('I')); - ASSERT_EQ(0b001001, Base64::rlookup('J')); - ASSERT_EQ(0b001010, Base64::rlookup('K')); - ASSERT_EQ(0b001011, Base64::rlookup('L')); - ASSERT_EQ(0b001100, Base64::rlookup('M')); - ASSERT_EQ(0b001101, Base64::rlookup('N')); - ASSERT_EQ(0b001110, Base64::rlookup('O')); - ASSERT_EQ(0b001111, Base64::rlookup('P')); - ASSERT_EQ(0b010000, Base64::rlookup('Q')); - ASSERT_EQ(0b010001, Base64::rlookup('R')); - ASSERT_EQ(0b010010, Base64::rlookup('S')); - ASSERT_EQ(0b010011, Base64::rlookup('T')); - ASSERT_EQ(0b010100, Base64::rlookup('U')); - ASSERT_EQ(0b010101, Base64::rlookup('V')); - ASSERT_EQ(0b010110, Base64::rlookup('W')); - ASSERT_EQ(0b010111, Base64::rlookup('X')); - ASSERT_EQ(0b011000, Base64::rlookup('Y')); - ASSERT_EQ(0b011001, Base64::rlookup('Z')); - ASSERT_EQ(0b011010, Base64::rlookup('a')); - ASSERT_EQ(0b011011, Base64::rlookup('b')); - ASSERT_EQ(0b011100, Base64::rlookup('c')); - ASSERT_EQ(0b011101, Base64::rlookup('d')); - ASSERT_EQ(0b011110, Base64::rlookup('e')); - ASSERT_EQ(0b011111, Base64::rlookup('f')); - ASSERT_EQ(0b100000, Base64::rlookup('g')); - ASSERT_EQ(0b100001, Base64::rlookup('h')); - ASSERT_EQ(0b100010, Base64::rlookup('i')); - ASSERT_EQ(0b100011, Base64::rlookup('j')); - ASSERT_EQ(0b100100, Base64::rlookup('k')); - ASSERT_EQ(0b100101, Base64::rlookup('l')); - ASSERT_EQ(0b100110, Base64::rlookup('m')); - ASSERT_EQ(0b100111, Base64::rlookup('n')); - ASSERT_EQ(0b101000, Base64::rlookup('o')); - ASSERT_EQ(0b101001, Base64::rlookup('p')); - ASSERT_EQ(0b101010, Base64::rlookup('q')); - ASSERT_EQ(0b101011, Base64::rlookup('r')); - ASSERT_EQ(0b101100, Base64::rlookup('s')); - ASSERT_EQ(0b101101, Base64::rlookup('t')); - ASSERT_EQ(0b101110, Base64::rlookup('u')); - ASSERT_EQ(0b101111, Base64::rlookup('v')); - ASSERT_EQ(0b110000, Base64::rlookup('w')); - ASSERT_EQ(0b110001, Base64::rlookup('x')); - ASSERT_EQ(0b110010, Base64::rlookup('y')); - ASSERT_EQ(0b110011, Base64::rlookup('z')); - ASSERT_EQ(0b110100, Base64::rlookup('0')); - ASSERT_EQ(0b110101, Base64::rlookup('1')); - ASSERT_EQ(0b110110, Base64::rlookup('2')); - ASSERT_EQ(0b110111, Base64::rlookup('3')); - ASSERT_EQ(0b111000, Base64::rlookup('4')); - ASSERT_EQ(0b111001, Base64::rlookup('5')); - ASSERT_EQ(0b111010, Base64::rlookup('6')); - ASSERT_EQ(0b111011, Base64::rlookup('7')); - ASSERT_EQ(0b111100, Base64::rlookup('8')); - ASSERT_EQ(0b111101, Base64::rlookup('9')); - ASSERT_EQ(0b111110, Base64::rlookup('+')); - ASSERT_EQ(0b111111, Base64::rlookup('/')); + ASSERT_EQ(0b000000, base64::rlookup('A')); + ASSERT_EQ(0b000001, base64::rlookup('B')); + ASSERT_EQ(0b000010, base64::rlookup('C')); + ASSERT_EQ(0b000011, base64::rlookup('D')); + ASSERT_EQ(0b000100, base64::rlookup('E')); + ASSERT_EQ(0b000101, base64::rlookup('F')); + ASSERT_EQ(0b000110, base64::rlookup('G')); + ASSERT_EQ(0b000111, base64::rlookup('H')); + ASSERT_EQ(0b001000, base64::rlookup('I')); + ASSERT_EQ(0b001001, base64::rlookup('J')); + ASSERT_EQ(0b001010, base64::rlookup('K')); + ASSERT_EQ(0b001011, base64::rlookup('L')); + ASSERT_EQ(0b001100, base64::rlookup('M')); + ASSERT_EQ(0b001101, base64::rlookup('N')); + ASSERT_EQ(0b001110, base64::rlookup('O')); + ASSERT_EQ(0b001111, base64::rlookup('P')); + ASSERT_EQ(0b010000, base64::rlookup('Q')); + ASSERT_EQ(0b010001, base64::rlookup('R')); + ASSERT_EQ(0b010010, base64::rlookup('S')); + ASSERT_EQ(0b010011, base64::rlookup('T')); + ASSERT_EQ(0b010100, base64::rlookup('U')); + ASSERT_EQ(0b010101, base64::rlookup('V')); + ASSERT_EQ(0b010110, base64::rlookup('W')); + ASSERT_EQ(0b010111, base64::rlookup('X')); + ASSERT_EQ(0b011000, base64::rlookup('Y')); + ASSERT_EQ(0b011001, base64::rlookup('Z')); + ASSERT_EQ(0b011010, base64::rlookup('a')); + ASSERT_EQ(0b011011, base64::rlookup('b')); + ASSERT_EQ(0b011100, base64::rlookup('c')); + ASSERT_EQ(0b011101, base64::rlookup('d')); + ASSERT_EQ(0b011110, base64::rlookup('e')); + ASSERT_EQ(0b011111, base64::rlookup('f')); + ASSERT_EQ(0b100000, base64::rlookup('g')); + ASSERT_EQ(0b100001, base64::rlookup('h')); + ASSERT_EQ(0b100010, base64::rlookup('i')); + ASSERT_EQ(0b100011, base64::rlookup('j')); + ASSERT_EQ(0b100100, base64::rlookup('k')); + ASSERT_EQ(0b100101, base64::rlookup('l')); + ASSERT_EQ(0b100110, base64::rlookup('m')); + ASSERT_EQ(0b100111, base64::rlookup('n')); + ASSERT_EQ(0b101000, base64::rlookup('o')); + ASSERT_EQ(0b101001, base64::rlookup('p')); + ASSERT_EQ(0b101010, base64::rlookup('q')); + ASSERT_EQ(0b101011, base64::rlookup('r')); + ASSERT_EQ(0b101100, base64::rlookup('s')); + ASSERT_EQ(0b101101, base64::rlookup('t')); + ASSERT_EQ(0b101110, base64::rlookup('u')); + ASSERT_EQ(0b101111, base64::rlookup('v')); + ASSERT_EQ(0b110000, base64::rlookup('w')); + ASSERT_EQ(0b110001, base64::rlookup('x')); + ASSERT_EQ(0b110010, base64::rlookup('y')); + ASSERT_EQ(0b110011, base64::rlookup('z')); + ASSERT_EQ(0b110100, base64::rlookup('0')); + ASSERT_EQ(0b110101, base64::rlookup('1')); + ASSERT_EQ(0b110110, base64::rlookup('2')); + ASSERT_EQ(0b110111, base64::rlookup('3')); + ASSERT_EQ(0b111000, base64::rlookup('4')); + ASSERT_EQ(0b111001, base64::rlookup('5')); + ASSERT_EQ(0b111010, base64::rlookup('6')); + ASSERT_EQ(0b111011, base64::rlookup('7')); + ASSERT_EQ(0b111100, base64::rlookup('8')); + ASSERT_EQ(0b111101, base64::rlookup('9')); + ASSERT_EQ(0b111110, base64::rlookup('+')); + ASSERT_EQ(0b111111, base64::rlookup('/')); } TEST(Encode, basic) { - ASSERT_EQ("YQ==", Base64::encode("a")); - ASSERT_EQ("YWI=", Base64::encode("ab")); - ASSERT_EQ("YWJj", Base64::encode("abc")); + ASSERT_EQ("YQ==", base64::encode("a")); + ASSERT_EQ("YWI=", base64::encode("ab")); + ASSERT_EQ("YWJj", base64::encode("abc")); - ASSERT_EQ("aGVsbG8=", Base64::encode("hello")); - ASSERT_EQ("dGhpcyBpcyBhIGxvbmcgc2VudGVuY2U=", Base64::encode("this is a long sentence")); + ASSERT_EQ("aGVsbG8=", base64::encode("hello")); + ASSERT_EQ("dGhpcyBpcyBhIGxvbmcgc2VudGVuY2U=", base64::encode("this is a long sentence")); } TEST(Decode, basic) { - ASSERT_EQ("a", Base64::decode("YQ==")); - ASSERT_EQ("ab", Base64::decode("YWI=")); - ASSERT_EQ("abc", Base64::decode("YWJj")); + ASSERT_EQ("a", base64::decode("YQ==")); + ASSERT_EQ("ab", base64::decode("YWI=")); + ASSERT_EQ("abc", base64::decode("YWJj")); - ASSERT_EQ("hello", Base64::decode("aGVsbG8=")); - ASSERT_EQ("this is a long sentence", Base64::decode("dGhpcyBpcyBhIGxvbmcgc2VudGVuY2U=")); + ASSERT_EQ("hello", base64::decode("aGVsbG8=")); + ASSERT_EQ("this is a long sentence", base64::decode("dGhpcyBpcyBhIGxvbmcgc2VudGVuY2U=")); } int main(int argc, char **argv)
--- a/C++/tests/Hash/main.cpp Mon Sep 28 19:40:16 2015 +0200 +++ b/C++/tests/Hash/main.cpp Mon Sep 28 19:40:26 2015 +0200 @@ -27,7 +27,7 @@ TEST(Hash, md5) { std::string expected = "b10a8db164e0754105b7a99be72e3fe5"; - std::string output = Hash::md5("Hello World"); + std::string output = hash::md5("Hello World"); ASSERT_EQ(expected, output); } @@ -35,7 +35,7 @@ TEST(Hash, sha1) { std::string expected = "0a4d55a8d778e5022fab701977c5d840bbc486d0"; - std::string output = Hash::sha1("Hello World"); + std::string output = hash::sha1("Hello World"); ASSERT_EQ(expected, output); } @@ -43,7 +43,7 @@ TEST(Hash, sha256) { std::string expected = "a591a6d40bf420404a011733cfb7b190d62c65bf0bcda32b57b277d9ad9f146e"; - std::string output = Hash::sha256("Hello World"); + std::string output = hash::sha256("Hello World"); ASSERT_EQ(expected, output); } @@ -51,7 +51,7 @@ TEST(Hash, sha512) { std::string expected = "2c74fd17edafd80e8447b0d46741ee243b7eb74dd2149a0ab1b9246fb30382f27e853d8585719e0e67cbda0daa8f51671064615d645ae27acb15bfb1447f459b"; - std::string output = Hash::sha512("Hello World"); + std::string output = hash::sha512("Hello World"); ASSERT_EQ(expected, output); }
--- a/C++/tests/Unicode/main.cpp Mon Sep 28 19:40:16 2015 +0200 +++ b/C++/tests/Unicode/main.cpp Mon Sep 28 19:40:26 2015 +0200 @@ -34,7 +34,7 @@ { try { std::u32string u32{'a', 'b', 'c'}; - std::string s = Unicode::toUtf8(u32); + std::string s = unicode::toUtf8(u32); ASSERT_EQ("abc", s); } catch (const std::exception &ex) { @@ -46,7 +46,7 @@ { try { std::u32string u32{'a', U'é', 'c', U'𠀀'}; - std::string s = Unicode::toUtf8(u32); + std::string s = unicode::toUtf8(u32); ASSERT_EQ("aéc𠀀", s); } catch (const std::exception &ex) { @@ -58,7 +58,7 @@ { try { std::u32string u32{'a', 0xFFFFFFFF, 'c'}; - std::string s = Unicode::toUtf8(u32); + std::string s = unicode::toUtf8(u32); FAIL() << "expected a failure"; } catch (const std::exception &ex) { @@ -75,7 +75,7 @@ try { std::string s{"abc"}; std::u32string expected{'a', 'b', 'c'}; - std::u32string result = Unicode::toUtf32(s); + std::u32string result = unicode::toUtf32(s); ASSERT_EQ(expected, result); } catch (const std::exception &ex) { @@ -88,7 +88,7 @@ try { std::string s{"aéc𠀀"}; std::u32string expected{'a', U'é', 'c', U'𠀀'}; - std::u32string result = Unicode::toUtf32(s); + std::u32string result = unicode::toUtf32(s); ASSERT_EQ(expected, result); } catch (const std::exception &ex) { @@ -105,7 +105,7 @@ try { std::u32string u32{'a', 'b', 'c'}; std::u32string expected{'A', 'B', 'C'}; - std::u32string result = Unicode::toupper(u32); + std::u32string result = unicode::toupper(u32); ASSERT_EQ(expected, result); } catch (const std::exception &ex) { @@ -118,7 +118,7 @@ try { std::u32string u32{U'ä', U'ç', U'ë'}; std::u32string expected{U'Ä', U'Ç', U'Ë'}; - std::u32string result = Unicode::toupper(u32); + std::u32string result = unicode::toupper(u32); ASSERT_EQ(expected, result); } catch (const std::exception &ex) { @@ -131,7 +131,7 @@ try { std::u32string u32{'a', 0xFFFFFFFF, 'b'}; std::u32string expected{'A', 0xFFFFFFFF, 'B'}; - std::u32string result = Unicode::toupper(u32); + std::u32string result = unicode::toupper(u32); ASSERT_EQ(expected, result); } catch (const std::exception &ex) { @@ -148,7 +148,7 @@ try { std::u32string u32{'A', 'B', 'C'}; std::u32string expected{'a', 'b', 'c'}; - std::u32string result = Unicode::tolower(u32); + std::u32string result = unicode::tolower(u32); ASSERT_EQ(expected, result); } catch (const std::exception &ex) { @@ -161,7 +161,7 @@ try { std::u32string u32{U'Ä', U'Ç', U'Ë'}; std::u32string expected{U'ä', U'ç', U'ë'}; - std::u32string result = Unicode::tolower(u32); + std::u32string result = unicode::tolower(u32); ASSERT_EQ(expected, result); } catch (const std::exception &ex) { @@ -174,7 +174,7 @@ try { std::u32string u32{'A', 0xFFFFFFFF, 'B'}; std::u32string expected{'a', 0xFFFFFFFF, 'b'}; - std::u32string result = Unicode::tolower(u32); + std::u32string result = unicode::tolower(u32); ASSERT_EQ(expected, result); } catch (const std::exception &ex) { @@ -190,7 +190,7 @@ { try { std::string s{"abc"}; - std::string r = Unicode::toupper(s); + std::string r = unicode::toupper(s); ASSERT_EQ("ABC", r); } catch (const std::exception &ex) { @@ -202,7 +202,7 @@ { try { std::string s{"aéc"}; - std::string r = Unicode::toupper(s); + std::string r = unicode::toupper(s); ASSERT_EQ("AÉC", r); } catch (const std::exception &ex) { @@ -214,7 +214,7 @@ { try { std::string s{"a" "\xFF""b"}; - std::string r = Unicode::toupper(s); + std::string r = unicode::toupper(s); FAIL() << "expected a failure"; } catch (const std::exception &ex) { @@ -230,7 +230,7 @@ { try { std::string s{"ABC"}; - std::string r = Unicode::tolower(s); + std::string r = unicode::tolower(s); ASSERT_EQ("abc", r); } catch (const std::exception &ex) { @@ -242,7 +242,7 @@ { try { std::string s{"AÉC"}; - std::string r = Unicode::tolower(s); + std::string r = unicode::tolower(s); ASSERT_EQ("aéc", r); } catch (const std::exception &ex) { @@ -254,7 +254,7 @@ { try { std::string s{"A" "\xFF""B"}; - std::string r = Unicode::tolower(s); + std::string r = unicode::tolower(s); FAIL() << "expected a failure"; } catch (const std::exception &ex) { @@ -268,30 +268,30 @@ TEST(Check, isspace) { - ASSERT_TRUE(Unicode::isspace(' ')); - ASSERT_FALSE(Unicode::isspace(/* é */ 233)); + ASSERT_TRUE(unicode::isspace(' ')); + ASSERT_FALSE(unicode::isspace(/* é */ 233)); } TEST(Check, isalpha) { - ASSERT_TRUE(Unicode::isalpha(U'é')); - ASSERT_FALSE(Unicode::isalpha(U'€')); + ASSERT_TRUE(unicode::isalpha(U'é')); + ASSERT_FALSE(unicode::isalpha(U'€')); } TEST(Check, isupper) { - ASSERT_FALSE(Unicode::isupper('a')); - ASSERT_FALSE(Unicode::isupper(U'é')); - ASSERT_TRUE(Unicode::isupper('A')); - ASSERT_TRUE(Unicode::isupper(U'É')); + ASSERT_FALSE(unicode::isupper('a')); + ASSERT_FALSE(unicode::isupper(U'é')); + ASSERT_TRUE(unicode::isupper('A')); + ASSERT_TRUE(unicode::isupper(U'É')); } TEST(Check, islower) { - ASSERT_TRUE(Unicode::islower('a')); - ASSERT_TRUE(Unicode::islower(U'é')); - ASSERT_FALSE(Unicode::islower('A')); - ASSERT_FALSE(Unicode::islower(U'É')); + ASSERT_TRUE(unicode::islower('a')); + ASSERT_TRUE(unicode::islower(U'é')); + ASSERT_FALSE(unicode::islower('A')); + ASSERT_FALSE(unicode::islower(U'É')); } /* -------------------------------------------------------- @@ -300,10 +300,10 @@ TEST(Misc, nbytesPoint) { - ASSERT_EQ(1, Unicode::nbytesPoint('a')); - ASSERT_EQ(2, Unicode::nbytesPoint(U'é')); - ASSERT_EQ(3, Unicode::nbytesPoint(U'€')); - ASSERT_EQ(4, Unicode::nbytesPoint(U'𠀀')); + ASSERT_EQ(1, unicode::nbytesPoint('a')); + ASSERT_EQ(2, unicode::nbytesPoint(U'é')); + ASSERT_EQ(3, unicode::nbytesPoint(U'€')); + ASSERT_EQ(4, unicode::nbytesPoint(U'𠀀')); } TEST(Misc, nbytesUtf8) @@ -313,10 +313,10 @@ std::string s3{"€"}; std::string s4{"𠀀"}; - ASSERT_EQ(1, Unicode::nbytesUtf8(s1[0])); - ASSERT_EQ(2, Unicode::nbytesUtf8(s2[0])); - ASSERT_EQ(3, Unicode::nbytesUtf8(s3[0])); - ASSERT_EQ(4, Unicode::nbytesUtf8(s4[0])); + ASSERT_EQ(1, unicode::nbytesUtf8(s1[0])); + ASSERT_EQ(2, unicode::nbytesUtf8(s2[0])); + ASSERT_EQ(3, unicode::nbytesUtf8(s3[0])); + ASSERT_EQ(4, unicode::nbytesUtf8(s4[0])); } TEST(Misc, forEach) @@ -324,7 +324,7 @@ std::string s{"aé€𠀀"}; int current = 0; - Unicode::forEach(s, [&] (char32_t code) { + unicode::forEach(s, [&] (char32_t code) { if (current == 0) { ASSERT_EQ(U'a', code); } else if (current == 1) {
--- a/tools/mkunicode/Unicode-after.cpp Mon Sep 28 19:40:16 2015 +0200 +++ b/tools/mkunicode/Unicode-after.cpp Mon Sep 28 19:40:26 2015 +0200 @@ -1,4 +1,4 @@ -void Unicode::encode(char32_t c, char res[5]) noexcept +void encode(char32_t c, char res[5]) noexcept { switch (nbytesPoint(c)) { case 1: @@ -28,7 +28,7 @@ } } -void Unicode::decode(char32_t &c, const char *res) noexcept +void decode(char32_t &c, const char *res) noexcept { c = 0; @@ -55,7 +55,7 @@ } } -int Unicode::nbytesUtf8(char c) noexcept +int nbytesUtf8(char c) noexcept { if ((c & 0xE0) == 0xC0) return 2; @@ -67,7 +67,7 @@ return 1; } -int Unicode::nbytesPoint(char32_t c) noexcept +int nbytesPoint(char32_t c) noexcept { if (c <= 0x7F) return 1; @@ -81,7 +81,7 @@ return -1; } -int Unicode::length(const std::string &str) +int length(const std::string &str) { int total = 0; @@ -92,7 +92,7 @@ return total; } -std::string Unicode::toUtf8(const std::u32string &array) +std::string toUtf8(const std::u32string &array) { std::string res; @@ -111,7 +111,7 @@ return res; } -std::u32string Unicode::toUtf32(const std::string &str) +std::u32string toUtf32(const std::string &str) { std::u32string res; @@ -120,4 +120,6 @@ }); return res; -} \ No newline at end of file +} + +} // !unicode
--- a/tools/mkunicode/Unicode-before.cpp Mon Sep 28 19:40:16 2015 +0200 +++ b/tools/mkunicode/Unicode-before.cpp Mon Sep 28 19:40:26 2015 +0200 @@ -22,3 +22,6 @@ * The following code has been generated from Go mkrunetype adapted to our * needs. */ + +namespace unicode { +
--- a/tools/mkunicode/Unicode.h Mon Sep 28 19:40:16 2015 +0200 +++ b/tools/mkunicode/Unicode.h Mon Sep 28 19:40:26 2015 +0200 @@ -27,215 +27,211 @@ #include <stdexcept> #include <string> +namespace unicode { + +void encode(char32_t point, char res[5]) noexcept; +void decode(char32_t &c, const char *res) noexcept; + /** - * @class Unicode - * @brief Conversion between UTF-8 and UTF-32 + * Get the number of bytes for the first multi byte character from a + * utf-8 string. + * + * This can be used to iterate a valid UTF-8 string to jump to the next + * real character. + * + * @param c the first multi byte character + * @return the number of bytes [1-4] */ -class Unicode { -private: - static void encode(char32_t point, char res[5]) noexcept; - static void decode(char32_t &c, const char *res) noexcept; +int nbytesUtf8(char c) noexcept; + +/** + * Get the number of bytes for the unicode point. + * + * @param point the unicode point + * @return the number of bytes [1-4] or -1 on invalid + */ +int nbytesPoint(char32_t point) noexcept; + +/** + * Get real number of character in a string. + * + * @param str the string + * @return the length + * @throw std::invalid_argument on invalid sequence + */ +int length(const std::string &str); -public: - /** - * Get the number of bytes for the first multi byte character from a - * utf-8 string. - * - * This can be used to iterate a valid UTF-8 string to jump to the next - * real character. - * - * @param c the first multi byte character - * @return the number of bytes [1-4] - */ - static int nbytesUtf8(char c) noexcept; +/** + * Iterate over all real characters in the UTF-8 string. + * + * The function must have the following signature: + * void f(char ch) + * + * @param str the UTF-8 string + * @throw std::invalid_argument on invalid sequence + */ +template <typename Func> +void forEach(const std::string &str, Func function) +{ + for (size_t i = 0; i < str.size(); ) { + char32_t point = 0; + int size = nbytesUtf8(str[i]); + + if (size < 0) { + throw std::invalid_argument("invalid sequence"); + } - /** - * Get the number of bytes for the unicode point. - * - * @param point the unicode point - * @return the number of bytes [1-4] or -1 on invalid - */ - static int nbytesPoint(char32_t point) noexcept; + decode(point, str.data() + i); + function(point); + + i += size; + } +} + +/** + * Convert a UTF-32 string to UTF-8 string. + * + * @param array the UTF-32 string + * @return the UTF-8 string + * @throw std::invalid_argument on invalid sequence + */ +std::string toUtf8(const std::u32string &array); + +/** + * Convert a UTF-8 string to UTF-32 string. + * + * @param str the UTF-8 string + * @return the UTF-32 string + * @throw std::invalid_argument on invalid sequence + */ +std::u32string toUtf32(const std::string &str); - /** - * Get real number of character in a string. - * - * @param str the string - * @return the length - * @throw std::invalid_argument on invalid sequence - */ - static int length(const std::string &str); +/** + * Check if the unicode character is space. + * + * @param c the character + * @return true if space + */ +bool isspace(char32_t c) noexcept; + +/** + * Check if the unicode character is digit. + * + * @param c the character + * @return true if digit + */ +bool isdigit(char32_t c) noexcept; + +/** + * Check if the unicode character is alpha category. + * + * @param c the character + * @return true if alpha + */ +bool isalpha(char32_t c) noexcept; + +/** + * Check if the unicode character is upper case. + * + * @param c the character + * @return true if upper case + */ +bool isupper(char32_t c) noexcept; + +/** + * Check if the unicode character is lower case. + * + * @param c the character + * @return true if lower case + */ +bool islower(char32_t c) noexcept; - /** - * Iterate over all real characters in the UTF-8 string. - * - * The function must have the following signature: - * void f(char ch) - * - * @param str the UTF-8 string - * @throw std::invalid_argument on invalid sequence - */ - template <typename Func> - static void forEach(const std::string &str, Func function) - { - for (size_t i = 0; i < str.size(); ) { - char32_t point = 0; - int size = nbytesUtf8(str[i]); +/** + * Check if the unicode character is title case. + * + * @param c the character + * @return true if title case + */ +bool istitle(char32_t c) noexcept; + +/** + * Convert to upper case. + * + * @param c the character + * @return the upper case character + */ +char32_t toupper(char32_t c) noexcept; - if (size < 0) { - throw std::invalid_argument("invalid sequence"); - } +/** + * Convert to lower case. + * + * @param c the character + * @return the lower case character + */ +char32_t tolower(char32_t c) noexcept; - decode(point, str.data() + i); - function(point); +/** + * Convert to title case. + * + * @param c the character + * @return the title case character + */ +char32_t totitle(char32_t c) noexcept; - i += size; - } +/** + * Convert the UTF-32 string to upper case. + * + * @param str the str + * @return the upper case string + */ +inline std::u32string toupper(std::u32string str) +{ + for (size_t i = 0; i < str.size(); ++i) { + str[i] = toupper(str[i]); } - /** - * Convert a UTF-32 string to UTF-8 string. - * - * @param array the UTF-32 string - * @return the UTF-8 string - * @throw std::invalid_argument on invalid sequence - */ - static std::string toUtf8(const std::u32string &array); - - /** - * Convert a UTF-8 string to UTF-32 string. - * - * @param str the UTF-8 string - * @return the UTF-32 string - * @throw std::invalid_argument on invalid sequence - */ - static std::u32string toUtf32(const std::string &str); - - /** - * Check if the unicode character is space. - * - * @param c the character - * @return true if space - */ - static bool isspace(char32_t c) noexcept; - - /** - * Check if the unicode character is digit. - * - * @param c the character - * @return true if digit - */ - static bool isdigit(char32_t c) noexcept; - - /** - * Check if the unicode character is alpha category. - * - * @param c the character - * @return true if alpha - */ - static bool isalpha(char32_t c) noexcept; - - /** - * Check if the unicode character is upper case. - * - * @param c the character - * @return true if upper case - */ - static bool isupper(char32_t c) noexcept; + return str; +} - /** - * Check if the unicode character is lower case. - * - * @param c the character - * @return true if lower case - */ - static bool islower(char32_t c) noexcept; - - /** - * Check if the unicode character is title case. - * - * @param c the character - * @return true if title case - */ - static bool istitle(char32_t c) noexcept; - - /** - * Convert to upper case. - * - * @param c the character - * @return the upper case character - */ - static char32_t toupper(char32_t c) noexcept; +/** + * Convert the UTF-8 string to upper case. + * + * @param str the str + * @return the upper case string + * @warning very slow at the moment + */ +inline std::string toupper(const std::string &str) +{ + return toUtf8(toupper(toUtf32(str))); +} - /** - * Convert to lower case. - * - * @param c the character - * @return the lower case character - */ - static char32_t tolower(char32_t c) noexcept; - - /** - * Convert to title case. - * - * @param c the character - * @return the title case character - */ - static char32_t totitle(char32_t c) noexcept; - - /** - * Convert the UTF-8 string to upper case. - * - * @param str the str - * @return the upper case string - * @warning very slow at the moment - */ - static inline std::string toupper(const std::string &str) - { - return toUtf8(toupper(toUtf32(str))); +/** + * Convert the UTF-32 string to lower case. + * + * @param str the str + * @return the lower case string + */ +inline std::u32string tolower(std::u32string str) +{ + for (size_t i = 0; i < str.size(); ++i) { + str[i] = tolower(str[i]); } - /** - * Convert the UTF-32 string to upper case. - * - * @param str the str - * @return the upper case string - */ - static inline std::u32string toupper(std::u32string str) - { - for (size_t i = 0; i < str.size(); ++i) { - str[i] = toupper(str[i]); - } - - return str; - } + return str; +} - /** - * Convert the UTF-8 string to lower case. - * - * @param str the str - * @return the lower case string - * @warning very slow at the moment - */ - static inline std::string tolower(const std::string &str) - { - return toUtf8(tolower(toUtf32(str))); - } +/** + * Convert the UTF-8 string to lower case. + * + * @param str the str + * @return the lower case string + * @warning very slow at the moment + */ +inline std::string tolower(const std::string &str) +{ + return toUtf8(tolower(toUtf32(str))); +} - /** - * Convert the UTF-32 string to lower case. - * - * @param str the str - * @return the lower case string - */ - static inline std::u32string tolower(std::u32string str) - { - for (size_t i = 0; i < str.size(); ++i) { - str[i] = tolower(str[i]); - } - - return str; - } -}; +} // !unicode #endif // !_UTF8_H_
--- a/tools/mkunicode/src/mkunicode.c Mon Sep 28 19:40:16 2015 +0200 +++ b/tools/mkunicode/src/mkunicode.c Mon Sep 28 19:40:26 2015 +0200 @@ -365,7 +365,7 @@ iss = mkissingle(label, prop); printf( - "bool Unicode::is%s(char32_t c) noexcept\n" + "bool is%s(char32_t c) noexcept\n" "{\n" " char32_t *p;\n" "\n", @@ -526,7 +526,7 @@ tos = mktosingle(label, map); printf( - "char32_t Unicode::to%s(char32_t c) noexcept\n" + "char32_t to%s(char32_t c) noexcept\n" "{\n" " char32_t *p;\n" "\n", @@ -566,7 +566,7 @@ { mkisrange(label, prop, 1); printf( - "bool Unicode::is%s(char32_t c) noexcept\n" + "bool is%s(char32_t c) noexcept\n" "{\n" " char32_t *p;\n" "\n" @@ -703,4 +703,4 @@ fprintf(stderr, "\n"); exit(1); -} \ No newline at end of file +}