Mercurial > code
diff tools/mkunicode/Unicode.h @ 395:b78d6d8f2872
Unicode: remove class, use namespace
author | David Demelier <markand@malikania.fr> |
---|---|
date | Mon, 28 Sep 2015 15:55:46 +0200 |
parents | 7fe8d4094983 |
children | d5ec1174b707 |
line wrap: on
line diff
--- a/tools/mkunicode/Unicode.h Mon Sep 28 15:45:35 2015 +0200 +++ b/tools/mkunicode/Unicode.h Mon Sep 28 15:55:46 2015 +0200 @@ -27,215 +27,211 @@ #include <stdexcept> #include <string> +namespace unicode { + +void encode(char32_t point, char res[5]) noexcept; +void decode(char32_t &c, const char *res) noexcept; + /** - * @class Unicode - * @brief Conversion between UTF-8 and UTF-32 + * Get the number of bytes for the first multi byte character from a + * utf-8 string. + * + * This can be used to iterate a valid UTF-8 string to jump to the next + * real character. + * + * @param c the first multi byte character + * @return the number of bytes [1-4] */ -class Unicode { -private: - static void encode(char32_t point, char res[5]) noexcept; - static void decode(char32_t &c, const char *res) noexcept; +int nbytesUtf8(char c) noexcept; + +/** + * Get the number of bytes for the unicode point. + * + * @param point the unicode point + * @return the number of bytes [1-4] or -1 on invalid + */ +int nbytesPoint(char32_t point) noexcept; + +/** + * Get real number of character in a string. + * + * @param str the string + * @return the length + * @throw std::invalid_argument on invalid sequence + */ +int length(const std::string &str); -public: - /** - * Get the number of bytes for the first multi byte character from a - * utf-8 string. - * - * This can be used to iterate a valid UTF-8 string to jump to the next - * real character. - * - * @param c the first multi byte character - * @return the number of bytes [1-4] - */ - static int nbytesUtf8(char c) noexcept; +/** + * Iterate over all real characters in the UTF-8 string. + * + * The function must have the following signature: + * void f(char ch) + * + * @param str the UTF-8 string + * @throw std::invalid_argument on invalid sequence + */ +template <typename Func> +void forEach(const std::string &str, Func function) +{ + for (size_t i = 0; i < str.size(); ) { + char32_t point = 0; + int size = nbytesUtf8(str[i]); + + if (size < 0) { + throw std::invalid_argument("invalid sequence"); + } - /** - * Get the number of bytes for the unicode point. - * - * @param point the unicode point - * @return the number of bytes [1-4] or -1 on invalid - */ - static int nbytesPoint(char32_t point) noexcept; + decode(point, str.data() + i); + function(point); + + i += size; + } +} + +/** + * Convert a UTF-32 string to UTF-8 string. + * + * @param array the UTF-32 string + * @return the UTF-8 string + * @throw std::invalid_argument on invalid sequence + */ +std::string toUtf8(const std::u32string &array); + +/** + * Convert a UTF-8 string to UTF-32 string. + * + * @param str the UTF-8 string + * @return the UTF-32 string + * @throw std::invalid_argument on invalid sequence + */ +std::u32string toUtf32(const std::string &str); - /** - * Get real number of character in a string. - * - * @param str the string - * @return the length - * @throw std::invalid_argument on invalid sequence - */ - static int length(const std::string &str); +/** + * Check if the unicode character is space. + * + * @param c the character + * @return true if space + */ +bool isspace(char32_t c) noexcept; + +/** + * Check if the unicode character is digit. + * + * @param c the character + * @return true if digit + */ +bool isdigit(char32_t c) noexcept; + +/** + * Check if the unicode character is alpha category. + * + * @param c the character + * @return true if alpha + */ +bool isalpha(char32_t c) noexcept; + +/** + * Check if the unicode character is upper case. + * + * @param c the character + * @return true if upper case + */ +bool isupper(char32_t c) noexcept; + +/** + * Check if the unicode character is lower case. + * + * @param c the character + * @return true if lower case + */ +bool islower(char32_t c) noexcept; - /** - * Iterate over all real characters in the UTF-8 string. - * - * The function must have the following signature: - * void f(char ch) - * - * @param str the UTF-8 string - * @throw std::invalid_argument on invalid sequence - */ - template <typename Func> - static void forEach(const std::string &str, Func function) - { - for (size_t i = 0; i < str.size(); ) { - char32_t point = 0; - int size = nbytesUtf8(str[i]); +/** + * Check if the unicode character is title case. + * + * @param c the character + * @return true if title case + */ +bool istitle(char32_t c) noexcept; + +/** + * Convert to upper case. + * + * @param c the character + * @return the upper case character + */ +char32_t toupper(char32_t c) noexcept; - if (size < 0) { - throw std::invalid_argument("invalid sequence"); - } +/** + * Convert to lower case. + * + * @param c the character + * @return the lower case character + */ +char32_t tolower(char32_t c) noexcept; - decode(point, str.data() + i); - function(point); +/** + * Convert to title case. + * + * @param c the character + * @return the title case character + */ +char32_t totitle(char32_t c) noexcept; - i += size; - } +/** + * Convert the UTF-32 string to upper case. + * + * @param str the str + * @return the upper case string + */ +inline std::u32string toupper(std::u32string str) +{ + for (size_t i = 0; i < str.size(); ++i) { + str[i] = toupper(str[i]); } - /** - * Convert a UTF-32 string to UTF-8 string. - * - * @param array the UTF-32 string - * @return the UTF-8 string - * @throw std::invalid_argument on invalid sequence - */ - static std::string toUtf8(const std::u32string &array); - - /** - * Convert a UTF-8 string to UTF-32 string. - * - * @param str the UTF-8 string - * @return the UTF-32 string - * @throw std::invalid_argument on invalid sequence - */ - static std::u32string toUtf32(const std::string &str); - - /** - * Check if the unicode character is space. - * - * @param c the character - * @return true if space - */ - static bool isspace(char32_t c) noexcept; - - /** - * Check if the unicode character is digit. - * - * @param c the character - * @return true if digit - */ - static bool isdigit(char32_t c) noexcept; - - /** - * Check if the unicode character is alpha category. - * - * @param c the character - * @return true if alpha - */ - static bool isalpha(char32_t c) noexcept; - - /** - * Check if the unicode character is upper case. - * - * @param c the character - * @return true if upper case - */ - static bool isupper(char32_t c) noexcept; + return str; +} - /** - * Check if the unicode character is lower case. - * - * @param c the character - * @return true if lower case - */ - static bool islower(char32_t c) noexcept; - - /** - * Check if the unicode character is title case. - * - * @param c the character - * @return true if title case - */ - static bool istitle(char32_t c) noexcept; - - /** - * Convert to upper case. - * - * @param c the character - * @return the upper case character - */ - static char32_t toupper(char32_t c) noexcept; +/** + * Convert the UTF-8 string to upper case. + * + * @param str the str + * @return the upper case string + * @warning very slow at the moment + */ +inline std::string toupper(const std::string &str) +{ + return toUtf8(toupper(toUtf32(str))); +} - /** - * Convert to lower case. - * - * @param c the character - * @return the lower case character - */ - static char32_t tolower(char32_t c) noexcept; - - /** - * Convert to title case. - * - * @param c the character - * @return the title case character - */ - static char32_t totitle(char32_t c) noexcept; - - /** - * Convert the UTF-8 string to upper case. - * - * @param str the str - * @return the upper case string - * @warning very slow at the moment - */ - static inline std::string toupper(const std::string &str) - { - return toUtf8(toupper(toUtf32(str))); +/** + * Convert the UTF-32 string to lower case. + * + * @param str the str + * @return the lower case string + */ +inline std::u32string tolower(std::u32string str) +{ + for (size_t i = 0; i < str.size(); ++i) { + str[i] = tolower(str[i]); } - /** - * Convert the UTF-32 string to upper case. - * - * @param str the str - * @return the upper case string - */ - static inline std::u32string toupper(std::u32string str) - { - for (size_t i = 0; i < str.size(); ++i) { - str[i] = toupper(str[i]); - } - - return str; - } + return str; +} - /** - * Convert the UTF-8 string to lower case. - * - * @param str the str - * @return the lower case string - * @warning very slow at the moment - */ - static inline std::string tolower(const std::string &str) - { - return toUtf8(tolower(toUtf32(str))); - } +/** + * Convert the UTF-8 string to lower case. + * + * @param str the str + * @return the lower case string + * @warning very slow at the moment + */ +inline std::string tolower(const std::string &str) +{ + return toUtf8(tolower(toUtf32(str))); +} - /** - * Convert the UTF-32 string to lower case. - * - * @param str the str - * @return the lower case string - */ - static inline std::u32string tolower(std::u32string str) - { - for (size_t i = 0; i < str.size(); ++i) { - str[i] = tolower(str[i]); - } - - return str; - } -}; +} // !unicode #endif // !_UTF8_H_