Mercurial > libunicode
view unicode.hpp @ 7:6ecc84c922b2
Switch to C++17
author | David Demelier <markand@malikania.fr> |
---|---|
date | Mon, 09 Jul 2018 23:34:00 +0200 |
parents | d9c9a35cb4b2 |
children | d9309daa0d7b |
line wrap: on
line source
/* * unicode.hpp -- UTF-8 to UTF-32 conversions and various operations * * Copyright (c) 2013-2018 David Demelier <markand@malikania.fr> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #ifndef UNICODE_HPP #define UNICODE_HPP /** * \file unicode.hpp * \brief UTF-8 to UTF-32 conversions * \author David Demelier <markand@malikania.fr> * \warning These files are auto-generated! */ #include <stdexcept> #include <string> #include <string_view> /** * \brief Unicode namespace. */ namespace unicode { /** * Encode the unicode code point into multibyte string. * * \param point the unicode code point * \param res the output buffer */ void encode(char32_t point, char res[5]) noexcept; /** * Decode the multibyte buffer into an unicode code point. * * \param c the code point destination * \param res the multibyte string. */ void decode(char32_t& c, const char* res) noexcept; /** * Get the number of bytes for the first multi byte character from a * utf-8 string. * * This can be used to iterate a valid UTF-8 string to jump to the next * real character. * * \param c the first multi byte character * \return the number of bytes [1-4] or -1 if invalid */ auto nbytes_utf8(char c) noexcept -> int; /** * Get the number of bytes for the unicode point. * * \param point the unicode point * \return the number of bytes [1-4] or -1 if invalid */ auto nbytes_point(char32_t point) noexcept -> int; /** * Get real number of character in a string. * * \param str the string * \return the length * \throw std::invalid_argument on invalid sequence */ auto length(std::string_view str) -> unsigned; /** * Iterate over all real characters in the UTF-8 string. * * The function must have the following signature: * void f(char ch) * * \param str the UTF-8 string * \param function the function callback * \throw std::invalid_argument on invalid sequence */ template <typename Func> void for_each(std::string_view str, Func function) { for (size_t i = 0; i < str.size(); ) { char32_t point = 0; int size = nbytes_utf8(str[i]); if (size < 0) throw std::invalid_argument("invalid sequence"); decode(point, str.data() + i); function(point); i += size; } } /** * Convert a UTF-32 string to UTF-8 string. * * \param array the UTF-32 string * \return the UTF-8 string * \throw std::invalid_argument on invalid sequence */ auto to_utf8(std::u32string_view array) -> std::string; /** * Convert a UTF-8 string to UTF-32 string. * * \param str the UTF-8 string * \return the UTF-32 string * \throw std::invalid_argument on invalid sequence */ auto to_utf32(std::string_view str) -> std::u32string; /** * Check if the unicode character is space. * * \param c the character * \return true if space */ auto isspace(char32_t c) noexcept -> bool; /** * Check if the unicode character is digit. * * \param c the character * \return true if digit */ auto isdigit(char32_t c) noexcept -> bool; /** * Check if the unicode character is alpha category. * * \param c the character * \return true if alpha */ auto isalpha(char32_t c) noexcept -> bool; /** * Check if the unicode character is upper case. * * \param c the character * \return true if upper case */ auto isupper(char32_t c) noexcept -> bool; /** * Check if the unicode character is lower case. * * \param c the character * \return true if lower case */ auto islower(char32_t c) noexcept -> bool; /** * Check if the unicode character is title case. * * \param c the character * \return true if title case */ auto istitle(char32_t c) noexcept -> bool; /** * Convert to upper case. * * \param c the character * \return the upper case character */ auto toupper(char32_t c) noexcept -> char32_t; /** * Convert to lower case. * * \param c the character * \return the lower case character */ auto tolower(char32_t c) noexcept -> char32_t; /** * Convert to title case. * * \param c the character * \return the title case character */ auto totitle(char32_t c) noexcept -> char32_t; /** * Convert the UTF-32 string to upper case. * * \param str the string * \return the upper case string */ auto toupper(std::u32string_view str) -> std::u32string; /** * Convert the UTF-8 string to upper case. * * \param str the string * \return the upper case string * \warning very slow at the moment */ auto toupper(std::string_view str) -> std::string; /** * Convert the UTF-32 string to lower case. * * \param str the string * \return the lower case string */ auto tolower(std::u32string_view str) -> std::u32string; /** * Convert the UTF-8 string to lower case. * * \param str the string * \return the lower case string * \warning very slow at the moment */ auto tolower(std::string_view str) -> std::string; } // !unicode #endif // !UNICODE_HPP