Mercurial > libunicode
annotate unicode.hpp @ 8:e7a1a3c4f850
Use doxygen_add_docs from FindDoxygen
author | David Demelier <markand@malikania.fr> |
---|---|
date | Mon, 09 Jul 2018 23:40:56 +0200 |
parents | 6ecc84c922b2 |
children | d9309daa0d7b |
rev | line source |
---|---|
0 | 1 /* |
2 * unicode.hpp -- UTF-8 to UTF-32 conversions and various operations | |
3 * | |
5 | 4 * Copyright (c) 2013-2018 David Demelier <markand@malikania.fr> |
0 | 5 * |
6 * Permission to use, copy, modify, and/or distribute this software for any | |
7 * purpose with or without fee is hereby granted, provided that the above | |
8 * copyright notice and this permission notice appear in all copies. | |
9 * | |
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
17 */ | |
18 | |
19 #ifndef UNICODE_HPP | |
20 #define UNICODE_HPP | |
21 | |
22 /** | |
23 * \file unicode.hpp | |
24 * \brief UTF-8 to UTF-32 conversions | |
25 * \author David Demelier <markand@malikania.fr> | |
26 * \warning These files are auto-generated! | |
27 */ | |
28 | |
29 #include <stdexcept> | |
30 #include <string> | |
7 | 31 #include <string_view> |
0 | 32 |
33 /** | |
34 * \brief Unicode namespace. | |
35 */ | |
36 namespace unicode { | |
37 | |
38 /** | |
39 * Encode the unicode code point into multibyte string. | |
40 * | |
41 * \param point the unicode code point | |
42 * \param res the output buffer | |
43 */ | |
6
d9c9a35cb4b2
Get rid of export macro, use CMake
David Demelier <markand@malikania.fr>
parents:
5
diff
changeset
|
44 void encode(char32_t point, char res[5]) noexcept; |
0 | 45 |
46 /** | |
47 * Decode the multibyte buffer into an unicode code point. | |
48 * | |
49 * \param c the code point destination | |
50 * \param res the multibyte string. | |
51 */ | |
6
d9c9a35cb4b2
Get rid of export macro, use CMake
David Demelier <markand@malikania.fr>
parents:
5
diff
changeset
|
52 void decode(char32_t& c, const char* res) noexcept; |
0 | 53 |
54 /** | |
55 * Get the number of bytes for the first multi byte character from a | |
56 * utf-8 string. | |
57 * | |
58 * This can be used to iterate a valid UTF-8 string to jump to the next | |
59 * real character. | |
60 * | |
61 * \param c the first multi byte character | |
62 * \return the number of bytes [1-4] or -1 if invalid | |
63 */ | |
7 | 64 auto nbytes_utf8(char c) noexcept -> int; |
0 | 65 |
66 /** | |
67 * Get the number of bytes for the unicode point. | |
68 * | |
69 * \param point the unicode point | |
70 * \return the number of bytes [1-4] or -1 if invalid | |
71 */ | |
7 | 72 auto nbytes_point(char32_t point) noexcept -> int; |
0 | 73 |
74 /** | |
75 * Get real number of character in a string. | |
76 * | |
77 * \param str the string | |
78 * \return the length | |
79 * \throw std::invalid_argument on invalid sequence | |
80 */ | |
7 | 81 auto length(std::string_view str) -> unsigned; |
0 | 82 |
83 /** | |
84 * Iterate over all real characters in the UTF-8 string. | |
85 * | |
86 * The function must have the following signature: | |
87 * void f(char ch) | |
88 * | |
89 * \param str the UTF-8 string | |
90 * \param function the function callback | |
91 * \throw std::invalid_argument on invalid sequence | |
92 */ | |
93 template <typename Func> | |
7 | 94 void for_each(std::string_view str, Func function) |
0 | 95 { |
96 for (size_t i = 0; i < str.size(); ) { | |
97 char32_t point = 0; | |
2 | 98 int size = nbytes_utf8(str[i]); |
0 | 99 |
3 | 100 if (size < 0) |
0 | 101 throw std::invalid_argument("invalid sequence"); |
102 | |
103 decode(point, str.data() + i); | |
104 function(point); | |
105 | |
106 i += size; | |
107 } | |
108 } | |
109 | |
110 /** | |
111 * Convert a UTF-32 string to UTF-8 string. | |
112 * | |
113 * \param array the UTF-32 string | |
114 * \return the UTF-8 string | |
115 * \throw std::invalid_argument on invalid sequence | |
116 */ | |
7 | 117 auto to_utf8(std::u32string_view array) -> std::string; |
0 | 118 |
119 /** | |
120 * Convert a UTF-8 string to UTF-32 string. | |
121 * | |
122 * \param str the UTF-8 string | |
123 * \return the UTF-32 string | |
124 * \throw std::invalid_argument on invalid sequence | |
125 */ | |
7 | 126 auto to_utf32(std::string_view str) -> std::u32string; |
0 | 127 |
128 /** | |
129 * Check if the unicode character is space. | |
130 * | |
131 * \param c the character | |
132 * \return true if space | |
133 */ | |
7 | 134 auto isspace(char32_t c) noexcept -> bool; |
0 | 135 |
136 /** | |
137 * Check if the unicode character is digit. | |
138 * | |
139 * \param c the character | |
140 * \return true if digit | |
141 */ | |
7 | 142 auto isdigit(char32_t c) noexcept -> bool; |
0 | 143 |
144 /** | |
145 * Check if the unicode character is alpha category. | |
146 * | |
147 * \param c the character | |
148 * \return true if alpha | |
149 */ | |
7 | 150 auto isalpha(char32_t c) noexcept -> bool; |
0 | 151 |
152 /** | |
153 * Check if the unicode character is upper case. | |
154 * | |
155 * \param c the character | |
156 * \return true if upper case | |
157 */ | |
7 | 158 auto isupper(char32_t c) noexcept -> bool; |
0 | 159 |
160 /** | |
161 * Check if the unicode character is lower case. | |
162 * | |
163 * \param c the character | |
164 * \return true if lower case | |
165 */ | |
7 | 166 auto islower(char32_t c) noexcept -> bool; |
0 | 167 |
168 /** | |
169 * Check if the unicode character is title case. | |
170 * | |
171 * \param c the character | |
172 * \return true if title case | |
173 */ | |
7 | 174 auto istitle(char32_t c) noexcept -> bool; |
0 | 175 |
176 /** | |
177 * Convert to upper case. | |
178 * | |
179 * \param c the character | |
180 * \return the upper case character | |
181 */ | |
7 | 182 auto toupper(char32_t c) noexcept -> char32_t; |
0 | 183 |
184 /** | |
185 * Convert to lower case. | |
186 * | |
187 * \param c the character | |
188 * \return the lower case character | |
189 */ | |
7 | 190 auto tolower(char32_t c) noexcept -> char32_t; |
0 | 191 |
192 /** | |
193 * Convert to title case. | |
194 * | |
195 * \param c the character | |
196 * \return the title case character | |
197 */ | |
7 | 198 auto totitle(char32_t c) noexcept -> char32_t; |
0 | 199 |
200 /** | |
201 * Convert the UTF-32 string to upper case. | |
202 * | |
7 | 203 * \param str the string |
0 | 204 * \return the upper case string |
205 */ | |
7 | 206 auto toupper(std::u32string_view str) -> std::u32string; |
0 | 207 |
208 /** | |
209 * Convert the UTF-8 string to upper case. | |
210 * | |
7 | 211 * \param str the string |
0 | 212 * \return the upper case string |
213 * \warning very slow at the moment | |
214 */ | |
7 | 215 auto toupper(std::string_view str) -> std::string; |
0 | 216 |
217 /** | |
218 * Convert the UTF-32 string to lower case. | |
219 * | |
7 | 220 * \param str the string |
0 | 221 * \return the lower case string |
222 */ | |
7 | 223 auto tolower(std::u32string_view str) -> std::u32string; |
0 | 224 |
225 /** | |
226 * Convert the UTF-8 string to lower case. | |
227 * | |
7 | 228 * \param str the string |
0 | 229 * \return the lower case string |
230 * \warning very slow at the moment | |
231 */ | |
7 | 232 auto tolower(std::string_view str) -> std::string; |
0 | 233 |
234 } // !unicode | |
235 | |
236 #endif // !UNICODE_HPP |