Mercurial > libunicode
annotate generator/make-unicode/unicode.hpp @ 6:d9c9a35cb4b2
Get rid of export macro, use CMake
author | David Demelier <markand@malikania.fr> |
---|---|
date | Wed, 09 May 2018 08:56:34 +0200 |
parents | ebcc8c9c8831 |
children | 6ecc84c922b2 |
rev | line source |
---|---|
0 | 1 /* |
2 * unicode.hpp -- UTF-8 to UTF-32 conversions and various operations | |
3 * | |
5 | 4 * Copyright (c) 2013-2018 David Demelier <markand@malikania.fr> |
0 | 5 * |
6 * Permission to use, copy, modify, and/or distribute this software for any | |
7 * purpose with or without fee is hereby granted, provided that the above | |
8 * copyright notice and this permission notice appear in all copies. | |
9 * | |
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
17 */ | |
18 | |
19 #ifndef UNICODE_HPP | |
20 #define UNICODE_HPP | |
21 | |
22 /** | |
23 * \file unicode.hpp | |
24 * \brief UTF-8 to UTF-32 conversions | |
25 * \author David Demelier <markand@malikania.fr> | |
26 * \warning These files are auto-generated! | |
27 */ | |
28 | |
29 #include <stdexcept> | |
30 #include <string> | |
31 | |
32 /** | |
33 * \brief Unicode namespace. | |
34 */ | |
35 namespace unicode { | |
36 | |
37 /** | |
38 * Encode the unicode code point into multibyte string. | |
39 * | |
40 * \param point the unicode code point | |
41 * \param res the output buffer | |
42 */ | |
6
d9c9a35cb4b2
Get rid of export macro, use CMake
David Demelier <markand@malikania.fr>
parents:
5
diff
changeset
|
43 void encode(char32_t point, char res[5]) noexcept; |
0 | 44 |
45 /** | |
46 * Decode the multibyte buffer into an unicode code point. | |
47 * | |
48 * \param c the code point destination | |
49 * \param res the multibyte string. | |
50 */ | |
6
d9c9a35cb4b2
Get rid of export macro, use CMake
David Demelier <markand@malikania.fr>
parents:
5
diff
changeset
|
51 void decode(char32_t& c, const char* res) noexcept; |
0 | 52 |
53 /** | |
54 * Get the number of bytes for the first multi byte character from a | |
55 * utf-8 string. | |
56 * | |
57 * This can be used to iterate a valid UTF-8 string to jump to the next | |
58 * real character. | |
59 * | |
60 * \param c the first multi byte character | |
61 * \return the number of bytes [1-4] or -1 if invalid | |
62 */ | |
6
d9c9a35cb4b2
Get rid of export macro, use CMake
David Demelier <markand@malikania.fr>
parents:
5
diff
changeset
|
63 int nbytes_utf8(char c) noexcept; |
0 | 64 |
65 /** | |
66 * Get the number of bytes for the unicode point. | |
67 * | |
68 * \param point the unicode point | |
69 * \return the number of bytes [1-4] or -1 if invalid | |
70 */ | |
6
d9c9a35cb4b2
Get rid of export macro, use CMake
David Demelier <markand@malikania.fr>
parents:
5
diff
changeset
|
71 int nbytes_point(char32_t point) noexcept; |
0 | 72 |
73 /** | |
74 * Get real number of character in a string. | |
75 * | |
76 * \param str the string | |
77 * \return the length | |
78 * \throw std::invalid_argument on invalid sequence | |
79 */ | |
6
d9c9a35cb4b2
Get rid of export macro, use CMake
David Demelier <markand@malikania.fr>
parents:
5
diff
changeset
|
80 unsigned length(const std::string& str); |
0 | 81 |
82 /** | |
83 * Iterate over all real characters in the UTF-8 string. | |
84 * | |
85 * The function must have the following signature: | |
86 * void f(char ch) | |
87 * | |
88 * \param str the UTF-8 string | |
89 * \param function the function callback | |
90 * \throw std::invalid_argument on invalid sequence | |
91 */ | |
92 template <typename Func> | |
2 | 93 void for_each(const std::string& str, Func function) |
0 | 94 { |
95 for (size_t i = 0; i < str.size(); ) { | |
96 char32_t point = 0; | |
2 | 97 int size = nbytes_utf8(str[i]); |
0 | 98 |
3 | 99 if (size < 0) |
0 | 100 throw std::invalid_argument("invalid sequence"); |
101 | |
102 decode(point, str.data() + i); | |
103 function(point); | |
104 | |
105 i += size; | |
106 } | |
107 } | |
108 | |
109 /** | |
110 * Convert a UTF-32 string to UTF-8 string. | |
111 * | |
112 * \param array the UTF-32 string | |
113 * \return the UTF-8 string | |
114 * \throw std::invalid_argument on invalid sequence | |
115 */ | |
6
d9c9a35cb4b2
Get rid of export macro, use CMake
David Demelier <markand@malikania.fr>
parents:
5
diff
changeset
|
116 std::string to_utf8(const std::u32string& array); |
0 | 117 |
118 /** | |
119 * Convert a UTF-8 string to UTF-32 string. | |
120 * | |
121 * \param str the UTF-8 string | |
122 * \return the UTF-32 string | |
123 * \throw std::invalid_argument on invalid sequence | |
124 */ | |
6
d9c9a35cb4b2
Get rid of export macro, use CMake
David Demelier <markand@malikania.fr>
parents:
5
diff
changeset
|
125 std::u32string to_utf32(const std::string& str); |
0 | 126 |
127 /** | |
128 * Check if the unicode character is space. | |
129 * | |
130 * \param c the character | |
131 * \return true if space | |
132 */ | |
6
d9c9a35cb4b2
Get rid of export macro, use CMake
David Demelier <markand@malikania.fr>
parents:
5
diff
changeset
|
133 bool isspace(char32_t c) noexcept; |
0 | 134 |
135 /** | |
136 * Check if the unicode character is digit. | |
137 * | |
138 * \param c the character | |
139 * \return true if digit | |
140 */ | |
6
d9c9a35cb4b2
Get rid of export macro, use CMake
David Demelier <markand@malikania.fr>
parents:
5
diff
changeset
|
141 bool isdigit(char32_t c) noexcept; |
0 | 142 |
143 /** | |
144 * Check if the unicode character is alpha category. | |
145 * | |
146 * \param c the character | |
147 * \return true if alpha | |
148 */ | |
6
d9c9a35cb4b2
Get rid of export macro, use CMake
David Demelier <markand@malikania.fr>
parents:
5
diff
changeset
|
149 bool isalpha(char32_t c) noexcept; |
0 | 150 |
151 /** | |
152 * Check if the unicode character is upper case. | |
153 * | |
154 * \param c the character | |
155 * \return true if upper case | |
156 */ | |
6
d9c9a35cb4b2
Get rid of export macro, use CMake
David Demelier <markand@malikania.fr>
parents:
5
diff
changeset
|
157 bool isupper(char32_t c) noexcept; |
0 | 158 |
159 /** | |
160 * Check if the unicode character is lower case. | |
161 * | |
162 * \param c the character | |
163 * \return true if lower case | |
164 */ | |
6
d9c9a35cb4b2
Get rid of export macro, use CMake
David Demelier <markand@malikania.fr>
parents:
5
diff
changeset
|
165 bool islower(char32_t c) noexcept; |
0 | 166 |
167 /** | |
168 * Check if the unicode character is title case. | |
169 * | |
170 * \param c the character | |
171 * \return true if title case | |
172 */ | |
6
d9c9a35cb4b2
Get rid of export macro, use CMake
David Demelier <markand@malikania.fr>
parents:
5
diff
changeset
|
173 bool istitle(char32_t c) noexcept; |
0 | 174 |
175 /** | |
176 * Convert to upper case. | |
177 * | |
178 * \param c the character | |
179 * \return the upper case character | |
180 */ | |
6
d9c9a35cb4b2
Get rid of export macro, use CMake
David Demelier <markand@malikania.fr>
parents:
5
diff
changeset
|
181 char32_t toupper(char32_t c) noexcept; |
0 | 182 |
183 /** | |
184 * Convert to lower case. | |
185 * | |
186 * \param c the character | |
187 * \return the lower case character | |
188 */ | |
6
d9c9a35cb4b2
Get rid of export macro, use CMake
David Demelier <markand@malikania.fr>
parents:
5
diff
changeset
|
189 char32_t tolower(char32_t c) noexcept; |
0 | 190 |
191 /** | |
192 * Convert to title case. | |
193 * | |
194 * \param c the character | |
195 * \return the title case character | |
196 */ | |
6
d9c9a35cb4b2
Get rid of export macro, use CMake
David Demelier <markand@malikania.fr>
parents:
5
diff
changeset
|
197 char32_t totitle(char32_t c) noexcept; |
0 | 198 |
199 /** | |
200 * Convert the UTF-32 string to upper case. | |
201 * | |
202 * \param str the str | |
203 * \return the upper case string | |
204 */ | |
205 inline std::u32string toupper(std::u32string str) | |
206 { | |
3 | 207 for (size_t i = 0; i < str.size(); ++i) |
0 | 208 str[i] = toupper(str[i]); |
209 | |
210 return str; | |
211 } | |
212 | |
213 /** | |
214 * Convert the UTF-8 string to upper case. | |
215 * | |
216 * \param str the str | |
217 * \return the upper case string | |
218 * \warning very slow at the moment | |
219 */ | |
2 | 220 inline std::string toupper(const std::string& str) |
0 | 221 { |
222 std::string result; | |
223 char buffer[5]; | |
224 | |
2 | 225 for_each(str, [&] (char32_t code) { |
0 | 226 encode(toupper(code), buffer); |
227 result += buffer; | |
228 }); | |
229 | |
230 return result; | |
231 } | |
232 | |
233 /** | |
234 * Convert the UTF-32 string to lower case. | |
235 * | |
236 * \param str the str | |
237 * \return the lower case string | |
238 */ | |
239 inline std::u32string tolower(std::u32string str) | |
240 { | |
3 | 241 for (size_t i = 0; i < str.size(); ++i) |
0 | 242 str[i] = tolower(str[i]); |
243 | |
244 return str; | |
245 } | |
246 | |
247 /** | |
248 * Convert the UTF-8 string to lower case. | |
249 * | |
250 * \param str the str | |
251 * \return the lower case string | |
252 * \warning very slow at the moment | |
253 */ | |
2 | 254 inline std::string tolower(const std::string& str) |
0 | 255 { |
256 std::string result; | |
257 char buffer[5]; | |
258 | |
2 | 259 for_each(str, [&] (char32_t code) { |
0 | 260 encode(tolower(code), buffer); |
261 result += buffer; | |
262 }); | |
263 | |
264 return result; | |
265 } | |
266 | |
267 } // !unicode | |
268 | |
269 #endif // !UNICODE_HPP |