Mercurial > libunicode
diff unicode.hpp @ 14:153c09cc6dcb
misc: miscellaneous cleanups for 2021
- Removal of Doxygen,
- Increase copyrights years,
- Replace bool with ints.
author | David Demelier <markand@malikania.fr> |
---|---|
date | Wed, 03 Feb 2021 15:29:06 +0100 |
parents | ae1003c2a284 |
children |
line wrap: on
line diff
--- a/unicode.hpp Wed Mar 25 17:14:07 2020 +0100 +++ b/unicode.hpp Wed Feb 03 15:29:06 2021 +0100 @@ -1,7 +1,7 @@ /* * unicode.hpp -- UTF-8 to UTF-32 conversions and various operations * - * Copyright (c) 2013-2020 David Demelier <markand@malikania.fr> + * Copyright (c) 2013-2021 David Demelier <markand@malikania.fr> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -19,76 +19,22 @@ #ifndef UNICODE_HPP #define UNICODE_HPP -/** - * \file unicode.hpp - * \brief UTF-8 to UTF-32 conversions - * \author David Demelier <markand@malikania.fr> - */ - #include <stdexcept> #include <string> #include <string_view> -/** - * \brief Unicode namespace. - */ namespace unicode { -/** - * Encode the unicode code point into multibyte string. - * - * \param point the unicode code point - * \param res the output buffer - */ void encode(char32_t point, char res[5]) noexcept; -/** - * Decode the multibyte buffer into an unicode code point. - * - * \param c the code point destination - * \param res the multibyte string. - */ void decode(char32_t& c, const char* res) noexcept; -/** - * Get the number of bytes for the first multi byte character from a - * utf-8 string. - * - * This can be used to iterate a valid UTF-8 string to jump to the next - * real character. - * - * \param c the first multi byte character - * \return the number of bytes [1-4] or -1 if invalid - */ auto nbytes_utf8(char c) noexcept -> int; -/** - * Get the number of bytes for the unicode point. - * - * \param point the unicode point - * \return the number of bytes [1-4] or -1 if invalid - */ auto nbytes_point(char32_t point) noexcept -> int; -/** - * Get real number of character in a string. - * - * \param str the string - * \return the length - * \throw std::invalid_argument on invalid sequence - */ auto length(std::string_view str) -> unsigned; -/** - * Iterate over all real characters in the UTF-8 string. - * - * The function must have the following signature: - * void f(char32_t ch) - * - * \param str the UTF-8 string - * \param function the function callback - * \throw std::invalid_argument on invalid sequence - */ template <typename Func> void for_each(std::string_view str, Func function) { @@ -106,128 +52,34 @@ } } -/** - * Convert a UTF-32 string to UTF-8 string. - * - * \param array the UTF-32 string - * \return the UTF-8 string - * \throw std::invalid_argument on invalid sequence - */ auto to_utf8(std::u32string_view array) -> std::string; -/** - * Convert a UTF-8 string to UTF-32 string. - * - * \param str the UTF-8 string - * \return the UTF-32 string - * \throw std::invalid_argument on invalid sequence - */ auto to_utf32(std::string_view str) -> std::u32string; -/** - * Check if the unicode character is space. - * - * \param c the character - * \return true if space - */ auto isspace(char32_t c) noexcept -> bool; -/** - * Check if the unicode character is digit. - * - * \param c the character - * \return true if digit - */ auto isdigit(char32_t c) noexcept -> bool; -/** - * Check if the unicode character is alpha category. - * - * \param c the character - * \return true if alpha - */ auto isalpha(char32_t c) noexcept -> bool; -/** - * Check if the unicode character is upper case. - * - * \param c the character - * \return true if upper case - */ auto isupper(char32_t c) noexcept -> bool; -/** - * Check if the unicode character is lower case. - * - * \param c the character - * \return true if lower case - */ auto islower(char32_t c) noexcept -> bool; -/** - * Check if the unicode character is title case. - * - * \param c the character - * \return true if title case - */ auto istitle(char32_t c) noexcept -> bool; -/** - * Convert to upper case. - * - * \param c the character - * \return the upper case character - */ auto toupper(char32_t c) noexcept -> char32_t; -/** - * Convert to lower case. - * - * \param c the character - * \return the lower case character - */ auto tolower(char32_t c) noexcept -> char32_t; -/** - * Convert to title case. - * - * \param c the character - * \return the title case character - */ auto totitle(char32_t c) noexcept -> char32_t; -/** - * Convert the UTF-32 string to upper case. - * - * \param str the string - * \return the upper case string - */ auto toupper(std::u32string_view str) -> std::u32string; -/** - * Convert the UTF-8 string to upper case. - * - * \param str the string - * \return the upper case string - * \warning very slow at the moment - */ auto toupper(std::string_view str) -> std::string; -/** - * Convert the UTF-32 string to lower case. - * - * \param str the string - * \return the lower case string - */ auto tolower(std::u32string_view str) -> std::u32string; -/** - * Convert the UTF-8 string to lower case. - * - * \param str the string - * \return the lower case string - * \warning very slow at the moment - */ auto tolower(std::string_view str) -> std::string; } // !unicode