Mercurial > libunicode
comparison generator/make-unicode/unicode.hpp @ 2:84765c6f4872
New style
author | David Demelier <markand@malikania.fr> |
---|---|
date | Thu, 02 Feb 2017 18:07:27 +0100 |
parents | 0d9603b420c2 |
children | d9d3406c1250 |
comparison
equal
deleted
inserted
replaced
1:0d9603b420c2 | 2:84765c6f4872 |
---|---|
29 /** | 29 /** |
30 * \page unicode Basic unicode management. | 30 * \page unicode Basic unicode management. |
31 * | 31 * |
32 * ## Export macros | 32 * ## Export macros |
33 * | 33 * |
34 * You must define `UNICODE_DLL` globally and `UNICODE_BUILDING_DLL` when compiling the library if you want a DLL, alternatively you can provide | 34 * You must define `UNICODE_DLL` globally and `UNICODE_BUILDING_DLL` when |
35 * compiling the library if you want a DLL, alternatively you can provide | |
35 * your own `UNICODE_EXPORT` macro instead. | 36 * your own `UNICODE_EXPORT` macro instead. |
36 */ | 37 */ |
37 | 38 |
38 /** | 39 /** |
39 * \cond UNICODE_HIDDEN_SYMBOLS | 40 * \cond UNICODE_HIDDEN_SYMBOLS |
79 * Decode the multibyte buffer into an unicode code point. | 80 * Decode the multibyte buffer into an unicode code point. |
80 * | 81 * |
81 * \param c the code point destination | 82 * \param c the code point destination |
82 * \param res the multibyte string. | 83 * \param res the multibyte string. |
83 */ | 84 */ |
84 UNICODE_EXPORT void decode(char32_t &c, const char *res) noexcept; | 85 UNICODE_EXPORT void decode(char32_t& c, const char* res) noexcept; |
85 | 86 |
86 /** | 87 /** |
87 * Get the number of bytes for the first multi byte character from a | 88 * Get the number of bytes for the first multi byte character from a |
88 * utf-8 string. | 89 * utf-8 string. |
89 * | 90 * |
91 * real character. | 92 * real character. |
92 * | 93 * |
93 * \param c the first multi byte character | 94 * \param c the first multi byte character |
94 * \return the number of bytes [1-4] or -1 if invalid | 95 * \return the number of bytes [1-4] or -1 if invalid |
95 */ | 96 */ |
96 UNICODE_EXPORT int nbytesUtf8(char c) noexcept; | 97 UNICODE_EXPORT int nbytes_utf8(char c) noexcept; |
97 | 98 |
98 /** | 99 /** |
99 * Get the number of bytes for the unicode point. | 100 * Get the number of bytes for the unicode point. |
100 * | 101 * |
101 * \param point the unicode point | 102 * \param point the unicode point |
102 * \return the number of bytes [1-4] or -1 if invalid | 103 * \return the number of bytes [1-4] or -1 if invalid |
103 */ | 104 */ |
104 UNICODE_EXPORT int nbytesPoint(char32_t point) noexcept; | 105 UNICODE_EXPORT int nbytes_point(char32_t point) noexcept; |
105 | 106 |
106 /** | 107 /** |
107 * Get real number of character in a string. | 108 * Get real number of character in a string. |
108 * | 109 * |
109 * \param str the string | 110 * \param str the string |
110 * \return the length | 111 * \return the length |
111 * \throw std::invalid_argument on invalid sequence | 112 * \throw std::invalid_argument on invalid sequence |
112 */ | 113 */ |
113 UNICODE_EXPORT unsigned length(const std::string &str); | 114 UNICODE_EXPORT unsigned length(const std::string& str); |
114 | 115 |
115 /** | 116 /** |
116 * Iterate over all real characters in the UTF-8 string. | 117 * Iterate over all real characters in the UTF-8 string. |
117 * | 118 * |
118 * The function must have the following signature: | 119 * The function must have the following signature: |
121 * \param str the UTF-8 string | 122 * \param str the UTF-8 string |
122 * \param function the function callback | 123 * \param function the function callback |
123 * \throw std::invalid_argument on invalid sequence | 124 * \throw std::invalid_argument on invalid sequence |
124 */ | 125 */ |
125 template <typename Func> | 126 template <typename Func> |
126 void forEach(const std::string &str, Func function) | 127 void for_each(const std::string& str, Func function) |
127 { | 128 { |
128 for (size_t i = 0; i < str.size(); ) { | 129 for (size_t i = 0; i < str.size(); ) { |
129 char32_t point = 0; | 130 char32_t point = 0; |
130 int size = nbytesUtf8(str[i]); | 131 int size = nbytes_utf8(str[i]); |
131 | 132 |
132 if (size < 0) | 133 if (size < 0) { |
133 throw std::invalid_argument("invalid sequence"); | 134 throw std::invalid_argument("invalid sequence"); |
135 } | |
134 | 136 |
135 decode(point, str.data() + i); | 137 decode(point, str.data() + i); |
136 function(point); | 138 function(point); |
137 | 139 |
138 i += size; | 140 i += size; |
144 * | 146 * |
145 * \param array the UTF-32 string | 147 * \param array the UTF-32 string |
146 * \return the UTF-8 string | 148 * \return the UTF-8 string |
147 * \throw std::invalid_argument on invalid sequence | 149 * \throw std::invalid_argument on invalid sequence |
148 */ | 150 */ |
149 UNICODE_EXPORT std::string toUtf8(const std::u32string &array); | 151 UNICODE_EXPORT std::string to_utf8(const std::u32string& array); |
150 | 152 |
151 /** | 153 /** |
152 * Convert a UTF-8 string to UTF-32 string. | 154 * Convert a UTF-8 string to UTF-32 string. |
153 * | 155 * |
154 * \param str the UTF-8 string | 156 * \param str the UTF-8 string |
155 * \return the UTF-32 string | 157 * \return the UTF-32 string |
156 * \throw std::invalid_argument on invalid sequence | 158 * \throw std::invalid_argument on invalid sequence |
157 */ | 159 */ |
158 UNICODE_EXPORT std::u32string toUtf32(const std::string &str); | 160 UNICODE_EXPORT std::u32string to_utf32(const std::string& str); |
159 | 161 |
160 /** | 162 /** |
161 * Check if the unicode character is space. | 163 * Check if the unicode character is space. |
162 * | 164 * |
163 * \param c the character | 165 * \param c the character |
235 * \param str the str | 237 * \param str the str |
236 * \return the upper case string | 238 * \return the upper case string |
237 */ | 239 */ |
238 inline std::u32string toupper(std::u32string str) | 240 inline std::u32string toupper(std::u32string str) |
239 { | 241 { |
240 for (size_t i = 0; i < str.size(); ++i) | 242 for (size_t i = 0; i < str.size(); ++i) { |
241 str[i] = toupper(str[i]); | 243 str[i] = toupper(str[i]); |
244 } | |
242 | 245 |
243 return str; | 246 return str; |
244 } | 247 } |
245 | 248 |
246 /** | 249 /** |
248 * | 251 * |
249 * \param str the str | 252 * \param str the str |
250 * \return the upper case string | 253 * \return the upper case string |
251 * \warning very slow at the moment | 254 * \warning very slow at the moment |
252 */ | 255 */ |
253 inline std::string toupper(const std::string &str) | 256 inline std::string toupper(const std::string& str) |
254 { | 257 { |
255 std::string result; | 258 std::string result; |
256 char buffer[5]; | 259 char buffer[5]; |
257 | 260 |
258 forEach(str, [&] (char32_t code) { | 261 for_each(str, [&] (char32_t code) { |
259 encode(toupper(code), buffer); | 262 encode(toupper(code), buffer); |
260 result += buffer; | 263 result += buffer; |
261 }); | 264 }); |
262 | 265 |
263 return result; | 266 return result; |
269 * \param str the str | 272 * \param str the str |
270 * \return the lower case string | 273 * \return the lower case string |
271 */ | 274 */ |
272 inline std::u32string tolower(std::u32string str) | 275 inline std::u32string tolower(std::u32string str) |
273 { | 276 { |
274 for (size_t i = 0; i < str.size(); ++i) | 277 for (size_t i = 0; i < str.size(); ++i) { |
275 str[i] = tolower(str[i]); | 278 str[i] = tolower(str[i]); |
279 } | |
276 | 280 |
277 return str; | 281 return str; |
278 } | 282 } |
279 | 283 |
280 /** | 284 /** |
282 * | 286 * |
283 * \param str the str | 287 * \param str the str |
284 * \return the lower case string | 288 * \return the lower case string |
285 * \warning very slow at the moment | 289 * \warning very slow at the moment |
286 */ | 290 */ |
287 inline std::string tolower(const std::string &str) | 291 inline std::string tolower(const std::string& str) |
288 { | 292 { |
289 std::string result; | 293 std::string result; |
290 char buffer[5]; | 294 char buffer[5]; |
291 | 295 |
292 forEach(str, [&] (char32_t code) { | 296 for_each(str, [&] (char32_t code) { |
293 encode(tolower(code), buffer); | 297 encode(tolower(code), buffer); |
294 result += buffer; | 298 result += buffer; |
295 }); | 299 }); |
296 | 300 |
297 return result; | 301 return result; |