Mercurial > code
view tools/mkunicode/Unicode.h @ 395:b78d6d8f2872
Unicode: remove class, use namespace
author | David Demelier <markand@malikania.fr> |
---|---|
date | Mon, 28 Sep 2015 15:55:46 +0200 |
parents | 7fe8d4094983 |
children | d5ec1174b707 |
line wrap: on
line source
/* * Unicode.h -- UTF-8 to UTF-32 conversions and various operations * * Copyright (c) 2013, 2014, 2015 David Demelier <markand@malikania.fr> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #ifndef _UNICODE_H_ #define _UNICODE_H_ /** * @file Unicode.h * @brief UTF-8 to UTF-32 conversions */ #include <stdexcept> #include <string> namespace unicode { void encode(char32_t point, char res[5]) noexcept; void decode(char32_t &c, const char *res) noexcept; /** * Get the number of bytes for the first multi byte character from a * utf-8 string. * * This can be used to iterate a valid UTF-8 string to jump to the next * real character. * * @param c the first multi byte character * @return the number of bytes [1-4] */ int nbytesUtf8(char c) noexcept; /** * Get the number of bytes for the unicode point. * * @param point the unicode point * @return the number of bytes [1-4] or -1 on invalid */ int nbytesPoint(char32_t point) noexcept; /** * Get real number of character in a string. * * @param str the string * @return the length * @throw std::invalid_argument on invalid sequence */ int length(const std::string &str); /** * Iterate over all real characters in the UTF-8 string. * * The function must have the following signature: * void f(char ch) * * @param str the UTF-8 string * @throw std::invalid_argument on invalid sequence */ template <typename Func> void forEach(const std::string &str, Func function) { for (size_t i = 0; i < str.size(); ) { char32_t point = 0; int size = nbytesUtf8(str[i]); if (size < 0) { throw std::invalid_argument("invalid sequence"); } decode(point, str.data() + i); function(point); i += size; } } /** * Convert a UTF-32 string to UTF-8 string. * * @param array the UTF-32 string * @return the UTF-8 string * @throw std::invalid_argument on invalid sequence */ std::string toUtf8(const std::u32string &array); /** * Convert a UTF-8 string to UTF-32 string. * * @param str the UTF-8 string * @return the UTF-32 string * @throw std::invalid_argument on invalid sequence */ std::u32string toUtf32(const std::string &str); /** * Check if the unicode character is space. * * @param c the character * @return true if space */ bool isspace(char32_t c) noexcept; /** * Check if the unicode character is digit. * * @param c the character * @return true if digit */ bool isdigit(char32_t c) noexcept; /** * Check if the unicode character is alpha category. * * @param c the character * @return true if alpha */ bool isalpha(char32_t c) noexcept; /** * Check if the unicode character is upper case. * * @param c the character * @return true if upper case */ bool isupper(char32_t c) noexcept; /** * Check if the unicode character is lower case. * * @param c the character * @return true if lower case */ bool islower(char32_t c) noexcept; /** * Check if the unicode character is title case. * * @param c the character * @return true if title case */ bool istitle(char32_t c) noexcept; /** * Convert to upper case. * * @param c the character * @return the upper case character */ char32_t toupper(char32_t c) noexcept; /** * Convert to lower case. * * @param c the character * @return the lower case character */ char32_t tolower(char32_t c) noexcept; /** * Convert to title case. * * @param c the character * @return the title case character */ char32_t totitle(char32_t c) noexcept; /** * Convert the UTF-32 string to upper case. * * @param str the str * @return the upper case string */ inline std::u32string toupper(std::u32string str) { for (size_t i = 0; i < str.size(); ++i) { str[i] = toupper(str[i]); } return str; } /** * Convert the UTF-8 string to upper case. * * @param str the str * @return the upper case string * @warning very slow at the moment */ inline std::string toupper(const std::string &str) { return toUtf8(toupper(toUtf32(str))); } /** * Convert the UTF-32 string to lower case. * * @param str the str * @return the lower case string */ inline std::u32string tolower(std::u32string str) { for (size_t i = 0; i < str.size(); ++i) { str[i] = tolower(str[i]); } return str; } /** * Convert the UTF-8 string to lower case. * * @param str the str * @return the lower case string * @warning very slow at the moment */ inline std::string tolower(const std::string &str) { return toUtf8(tolower(toUtf32(str))); } } // !unicode #endif // !_UTF8_H_