Mercurial > code
annotate tools/mkunicode/Unicode.h @ 352:7fe8d4094983
Utf8:
- Fix invalid decoding from UTF-8 to UTF-32
- Add all files
author | David Demelier <markand@malikania.fr> |
---|---|
date | Wed, 08 Apr 2015 12:33:45 +0200 |
parents | C++/modules/Utf8/Utf8.h@0b576ee64d45 |
children | b78d6d8f2872 |
rev | line source |
---|---|
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
1 /* |
352 | 2 * Unicode.h -- UTF-8 to UTF-32 conversions and various operations |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
3 * |
352 | 4 * Copyright (c) 2013, 2014, 2015 David Demelier <markand@malikania.fr> |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
5 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
6 * Permission to use, copy, modify, and/or distribute this software for any |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
7 * purpose with or without fee is hereby granted, provided that the above |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
8 * copyright notice and this permission notice appear in all copies. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
9 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
17 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
18 |
352 | 19 #ifndef _UNICODE_H_ |
20 #define _UNICODE_H_ | |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
21 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
22 /** |
352 | 23 * @file Unicode.h |
307 | 24 * @brief UTF-8 to UTF-32 conversions |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
25 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
26 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
27 #include <stdexcept> |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
28 #include <string> |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
29 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
30 /** |
352 | 31 * @class Unicode |
307 | 32 * @brief Conversion between UTF-8 and UTF-32 |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
33 */ |
352 | 34 class Unicode { |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
35 private: |
352 | 36 static void encode(char32_t point, char res[5]) noexcept; |
37 static void decode(char32_t &c, const char *res) noexcept; | |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
38 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
39 public: |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
40 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
41 * Get the number of bytes for the first multi byte character from a |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
42 * utf-8 string. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
43 * |
352 | 44 * This can be used to iterate a valid UTF-8 string to jump to the next |
45 * real character. | |
46 * | |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
47 * @param c the first multi byte character |
352 | 48 * @return the number of bytes [1-4] |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
49 */ |
352 | 50 static int nbytesUtf8(char c) noexcept; |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
51 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
52 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
53 * Get the number of bytes for the unicode point. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
54 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
55 * @param point the unicode point |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
56 * @return the number of bytes [1-4] or -1 on invalid |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
57 */ |
352 | 58 static int nbytesPoint(char32_t point) noexcept; |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
59 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
60 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
61 * Get real number of character in a string. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
62 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
63 * @param str the string |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
64 * @return the length |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
65 * @throw std::invalid_argument on invalid sequence |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
66 */ |
352 | 67 static int length(const std::string &str); |
68 | |
69 /** | |
70 * Iterate over all real characters in the UTF-8 string. | |
71 * | |
72 * The function must have the following signature: | |
73 * void f(char ch) | |
74 * | |
75 * @param str the UTF-8 string | |
76 * @throw std::invalid_argument on invalid sequence | |
77 */ | |
78 template <typename Func> | |
79 static void forEach(const std::string &str, Func function) | |
80 { | |
81 for (size_t i = 0; i < str.size(); ) { | |
82 char32_t point = 0; | |
83 int size = nbytesUtf8(str[i]); | |
84 | |
85 if (size < 0) { | |
86 throw std::invalid_argument("invalid sequence"); | |
87 } | |
88 | |
89 decode(point, str.data() + i); | |
90 function(point); | |
91 | |
92 i += size; | |
93 } | |
94 } | |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
95 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
96 /** |
307 | 97 * Convert a UTF-32 string to UTF-8 string. |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
98 * |
307 | 99 * @param array the UTF-32 string |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
100 * @return the UTF-8 string |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
101 * @throw std::invalid_argument on invalid sequence |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
102 */ |
352 | 103 static std::string toUtf8(const std::u32string &array); |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
104 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
105 /** |
307 | 106 * Convert a UTF-8 string to UTF-32 string. |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
107 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
108 * @param str the UTF-8 string |
307 | 109 * @return the UTF-32 string |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
110 * @throw std::invalid_argument on invalid sequence |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
111 */ |
352 | 112 static std::u32string toUtf32(const std::string &str); |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
113 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
114 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
115 * Check if the unicode character is space. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
116 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
117 * @param c the character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
118 * @return true if space |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
119 */ |
352 | 120 static bool isspace(char32_t c) noexcept; |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
121 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
122 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
123 * Check if the unicode character is digit. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
124 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
125 * @param c the character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
126 * @return true if digit |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
127 */ |
352 | 128 static bool isdigit(char32_t c) noexcept; |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
129 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
130 /** |
352 | 131 * Check if the unicode character is alpha category. |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
132 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
133 * @param c the character |
352 | 134 * @return true if alpha |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
135 */ |
352 | 136 static bool isalpha(char32_t c) noexcept; |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
137 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
138 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
139 * Check if the unicode character is upper case. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
140 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
141 * @param c the character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
142 * @return true if upper case |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
143 */ |
352 | 144 static bool isupper(char32_t c) noexcept; |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
145 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
146 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
147 * Check if the unicode character is lower case. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
148 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
149 * @param c the character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
150 * @return true if lower case |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
151 */ |
352 | 152 static bool islower(char32_t c) noexcept; |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
153 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
154 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
155 * Check if the unicode character is title case. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
156 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
157 * @param c the character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
158 * @return true if title case |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
159 */ |
352 | 160 static bool istitle(char32_t c) noexcept; |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
161 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
162 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
163 * Convert to upper case. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
164 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
165 * @param c the character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
166 * @return the upper case character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
167 */ |
352 | 168 static char32_t toupper(char32_t c) noexcept; |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
169 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
170 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
171 * Convert to lower case. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
172 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
173 * @param c the character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
174 * @return the lower case character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
175 */ |
352 | 176 static char32_t tolower(char32_t c) noexcept; |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
177 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
178 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
179 * Convert to title case. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
180 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
181 * @param c the character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
182 * @return the title case character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
183 */ |
352 | 184 static char32_t totitle(char32_t c) noexcept; |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
185 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
186 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
187 * Convert the UTF-8 string to upper case. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
188 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
189 * @param str the str |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
190 * @return the upper case string |
352 | 191 * @warning very slow at the moment |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
192 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
193 static inline std::string toupper(const std::string &str) |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
194 { |
352 | 195 return toUtf8(toupper(toUtf32(str))); |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
196 } |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
197 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
198 /** |
307 | 199 * Convert the UTF-32 string to upper case. |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
200 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
201 * @param str the str |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
202 * @return the upper case string |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
203 */ |
352 | 204 static inline std::u32string toupper(std::u32string str) |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
205 { |
352 | 206 for (size_t i = 0; i < str.size(); ++i) { |
207 str[i] = toupper(str[i]); | |
208 } | |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
209 |
352 | 210 return str; |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
211 } |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
212 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
213 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
214 * Convert the UTF-8 string to lower case. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
215 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
216 * @param str the str |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
217 * @return the lower case string |
352 | 218 * @warning very slow at the moment |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
219 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
220 static inline std::string tolower(const std::string &str) |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
221 { |
352 | 222 return toUtf8(tolower(toUtf32(str))); |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
223 } |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
224 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
225 /** |
307 | 226 * Convert the UTF-32 string to lower case. |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
227 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
228 * @param str the str |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
229 * @return the lower case string |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
230 */ |
352 | 231 static inline std::u32string tolower(std::u32string str) |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
232 { |
352 | 233 for (size_t i = 0; i < str.size(); ++i) { |
234 str[i] = tolower(str[i]); | |
235 } | |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
236 |
352 | 237 return str; |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
238 } |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
239 }; |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
240 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
241 #endif // !_UTF8_H_ |