Mercurial > code
comparison tools/mkunicode/Unicode.h @ 395:b78d6d8f2872
Unicode: remove class, use namespace
author | David Demelier <markand@malikania.fr> |
---|---|
date | Mon, 28 Sep 2015 15:55:46 +0200 |
parents | 7fe8d4094983 |
children | d5ec1174b707 |
comparison
equal
deleted
inserted
replaced
394:fdceef4be88b | 395:b78d6d8f2872 |
---|---|
25 */ | 25 */ |
26 | 26 |
27 #include <stdexcept> | 27 #include <stdexcept> |
28 #include <string> | 28 #include <string> |
29 | 29 |
30 /** | 30 namespace unicode { |
31 * @class Unicode | 31 |
32 * @brief Conversion between UTF-8 and UTF-32 | 32 void encode(char32_t point, char res[5]) noexcept; |
33 */ | 33 void decode(char32_t &c, const char *res) noexcept; |
34 class Unicode { | 34 |
35 private: | 35 /** |
36 static void encode(char32_t point, char res[5]) noexcept; | 36 * Get the number of bytes for the first multi byte character from a |
37 static void decode(char32_t &c, const char *res) noexcept; | 37 * utf-8 string. |
38 | 38 * |
39 public: | 39 * This can be used to iterate a valid UTF-8 string to jump to the next |
40 /** | 40 * real character. |
41 * Get the number of bytes for the first multi byte character from a | 41 * |
42 * utf-8 string. | 42 * @param c the first multi byte character |
43 * | 43 * @return the number of bytes [1-4] |
44 * This can be used to iterate a valid UTF-8 string to jump to the next | 44 */ |
45 * real character. | 45 int nbytesUtf8(char c) noexcept; |
46 * | 46 |
47 * @param c the first multi byte character | 47 /** |
48 * @return the number of bytes [1-4] | 48 * Get the number of bytes for the unicode point. |
49 */ | 49 * |
50 static int nbytesUtf8(char c) noexcept; | 50 * @param point the unicode point |
51 | 51 * @return the number of bytes [1-4] or -1 on invalid |
52 /** | 52 */ |
53 * Get the number of bytes for the unicode point. | 53 int nbytesPoint(char32_t point) noexcept; |
54 * | 54 |
55 * @param point the unicode point | 55 /** |
56 * @return the number of bytes [1-4] or -1 on invalid | 56 * Get real number of character in a string. |
57 */ | 57 * |
58 static int nbytesPoint(char32_t point) noexcept; | 58 * @param str the string |
59 | 59 * @return the length |
60 /** | 60 * @throw std::invalid_argument on invalid sequence |
61 * Get real number of character in a string. | 61 */ |
62 * | 62 int length(const std::string &str); |
63 * @param str the string | 63 |
64 * @return the length | 64 /** |
65 * @throw std::invalid_argument on invalid sequence | 65 * Iterate over all real characters in the UTF-8 string. |
66 */ | 66 * |
67 static int length(const std::string &str); | 67 * The function must have the following signature: |
68 | 68 * void f(char ch) |
69 /** | 69 * |
70 * Iterate over all real characters in the UTF-8 string. | 70 * @param str the UTF-8 string |
71 * | 71 * @throw std::invalid_argument on invalid sequence |
72 * The function must have the following signature: | 72 */ |
73 * void f(char ch) | 73 template <typename Func> |
74 * | 74 void forEach(const std::string &str, Func function) |
75 * @param str the UTF-8 string | 75 { |
76 * @throw std::invalid_argument on invalid sequence | 76 for (size_t i = 0; i < str.size(); ) { |
77 */ | 77 char32_t point = 0; |
78 template <typename Func> | 78 int size = nbytesUtf8(str[i]); |
79 static void forEach(const std::string &str, Func function) | 79 |
80 { | 80 if (size < 0) { |
81 for (size_t i = 0; i < str.size(); ) { | 81 throw std::invalid_argument("invalid sequence"); |
82 char32_t point = 0; | |
83 int size = nbytesUtf8(str[i]); | |
84 | |
85 if (size < 0) { | |
86 throw std::invalid_argument("invalid sequence"); | |
87 } | |
88 | |
89 decode(point, str.data() + i); | |
90 function(point); | |
91 | |
92 i += size; | |
93 } | 82 } |
83 | |
84 decode(point, str.data() + i); | |
85 function(point); | |
86 | |
87 i += size; | |
94 } | 88 } |
95 | 89 } |
96 /** | 90 |
97 * Convert a UTF-32 string to UTF-8 string. | 91 /** |
98 * | 92 * Convert a UTF-32 string to UTF-8 string. |
99 * @param array the UTF-32 string | 93 * |
100 * @return the UTF-8 string | 94 * @param array the UTF-32 string |
101 * @throw std::invalid_argument on invalid sequence | 95 * @return the UTF-8 string |
102 */ | 96 * @throw std::invalid_argument on invalid sequence |
103 static std::string toUtf8(const std::u32string &array); | 97 */ |
104 | 98 std::string toUtf8(const std::u32string &array); |
105 /** | 99 |
106 * Convert a UTF-8 string to UTF-32 string. | 100 /** |
107 * | 101 * Convert a UTF-8 string to UTF-32 string. |
108 * @param str the UTF-8 string | 102 * |
109 * @return the UTF-32 string | 103 * @param str the UTF-8 string |
110 * @throw std::invalid_argument on invalid sequence | 104 * @return the UTF-32 string |
111 */ | 105 * @throw std::invalid_argument on invalid sequence |
112 static std::u32string toUtf32(const std::string &str); | 106 */ |
113 | 107 std::u32string toUtf32(const std::string &str); |
114 /** | 108 |
115 * Check if the unicode character is space. | 109 /** |
116 * | 110 * Check if the unicode character is space. |
117 * @param c the character | 111 * |
118 * @return true if space | 112 * @param c the character |
119 */ | 113 * @return true if space |
120 static bool isspace(char32_t c) noexcept; | 114 */ |
121 | 115 bool isspace(char32_t c) noexcept; |
122 /** | 116 |
123 * Check if the unicode character is digit. | 117 /** |
124 * | 118 * Check if the unicode character is digit. |
125 * @param c the character | 119 * |
126 * @return true if digit | 120 * @param c the character |
127 */ | 121 * @return true if digit |
128 static bool isdigit(char32_t c) noexcept; | 122 */ |
129 | 123 bool isdigit(char32_t c) noexcept; |
130 /** | 124 |
131 * Check if the unicode character is alpha category. | 125 /** |
132 * | 126 * Check if the unicode character is alpha category. |
133 * @param c the character | 127 * |
134 * @return true if alpha | 128 * @param c the character |
135 */ | 129 * @return true if alpha |
136 static bool isalpha(char32_t c) noexcept; | 130 */ |
137 | 131 bool isalpha(char32_t c) noexcept; |
138 /** | 132 |
139 * Check if the unicode character is upper case. | 133 /** |
140 * | 134 * Check if the unicode character is upper case. |
141 * @param c the character | 135 * |
142 * @return true if upper case | 136 * @param c the character |
143 */ | 137 * @return true if upper case |
144 static bool isupper(char32_t c) noexcept; | 138 */ |
145 | 139 bool isupper(char32_t c) noexcept; |
146 /** | 140 |
147 * Check if the unicode character is lower case. | 141 /** |
148 * | 142 * Check if the unicode character is lower case. |
149 * @param c the character | 143 * |
150 * @return true if lower case | 144 * @param c the character |
151 */ | 145 * @return true if lower case |
152 static bool islower(char32_t c) noexcept; | 146 */ |
153 | 147 bool islower(char32_t c) noexcept; |
154 /** | 148 |
155 * Check if the unicode character is title case. | 149 /** |
156 * | 150 * Check if the unicode character is title case. |
157 * @param c the character | 151 * |
158 * @return true if title case | 152 * @param c the character |
159 */ | 153 * @return true if title case |
160 static bool istitle(char32_t c) noexcept; | 154 */ |
161 | 155 bool istitle(char32_t c) noexcept; |
162 /** | 156 |
163 * Convert to upper case. | 157 /** |
164 * | 158 * Convert to upper case. |
165 * @param c the character | 159 * |
166 * @return the upper case character | 160 * @param c the character |
167 */ | 161 * @return the upper case character |
168 static char32_t toupper(char32_t c) noexcept; | 162 */ |
169 | 163 char32_t toupper(char32_t c) noexcept; |
170 /** | 164 |
171 * Convert to lower case. | 165 /** |
172 * | 166 * Convert to lower case. |
173 * @param c the character | 167 * |
174 * @return the lower case character | 168 * @param c the character |
175 */ | 169 * @return the lower case character |
176 static char32_t tolower(char32_t c) noexcept; | 170 */ |
177 | 171 char32_t tolower(char32_t c) noexcept; |
178 /** | 172 |
179 * Convert to title case. | 173 /** |
180 * | 174 * Convert to title case. |
181 * @param c the character | 175 * |
182 * @return the title case character | 176 * @param c the character |
183 */ | 177 * @return the title case character |
184 static char32_t totitle(char32_t c) noexcept; | 178 */ |
185 | 179 char32_t totitle(char32_t c) noexcept; |
186 /** | 180 |
187 * Convert the UTF-8 string to upper case. | 181 /** |
188 * | 182 * Convert the UTF-32 string to upper case. |
189 * @param str the str | 183 * |
190 * @return the upper case string | 184 * @param str the str |
191 * @warning very slow at the moment | 185 * @return the upper case string |
192 */ | 186 */ |
193 static inline std::string toupper(const std::string &str) | 187 inline std::u32string toupper(std::u32string str) |
194 { | 188 { |
195 return toUtf8(toupper(toUtf32(str))); | 189 for (size_t i = 0; i < str.size(); ++i) { |
190 str[i] = toupper(str[i]); | |
196 } | 191 } |
197 | 192 |
198 /** | 193 return str; |
199 * Convert the UTF-32 string to upper case. | 194 } |
200 * | 195 |
201 * @param str the str | 196 /** |
202 * @return the upper case string | 197 * Convert the UTF-8 string to upper case. |
203 */ | 198 * |
204 static inline std::u32string toupper(std::u32string str) | 199 * @param str the str |
205 { | 200 * @return the upper case string |
206 for (size_t i = 0; i < str.size(); ++i) { | 201 * @warning very slow at the moment |
207 str[i] = toupper(str[i]); | 202 */ |
208 } | 203 inline std::string toupper(const std::string &str) |
209 | 204 { |
210 return str; | 205 return toUtf8(toupper(toUtf32(str))); |
206 } | |
207 | |
208 /** | |
209 * Convert the UTF-32 string to lower case. | |
210 * | |
211 * @param str the str | |
212 * @return the lower case string | |
213 */ | |
214 inline std::u32string tolower(std::u32string str) | |
215 { | |
216 for (size_t i = 0; i < str.size(); ++i) { | |
217 str[i] = tolower(str[i]); | |
211 } | 218 } |
212 | 219 |
213 /** | 220 return str; |
214 * Convert the UTF-8 string to lower case. | 221 } |
215 * | 222 |
216 * @param str the str | 223 /** |
217 * @return the lower case string | 224 * Convert the UTF-8 string to lower case. |
218 * @warning very slow at the moment | 225 * |
219 */ | 226 * @param str the str |
220 static inline std::string tolower(const std::string &str) | 227 * @return the lower case string |
221 { | 228 * @warning very slow at the moment |
222 return toUtf8(tolower(toUtf32(str))); | 229 */ |
223 } | 230 inline std::string tolower(const std::string &str) |
224 | 231 { |
225 /** | 232 return toUtf8(tolower(toUtf32(str))); |
226 * Convert the UTF-32 string to lower case. | 233 } |
227 * | 234 |
228 * @param str the str | 235 } // !unicode |
229 * @return the lower case string | |
230 */ | |
231 static inline std::u32string tolower(std::u32string str) | |
232 { | |
233 for (size_t i = 0; i < str.size(); ++i) { | |
234 str[i] = tolower(str[i]); | |
235 } | |
236 | |
237 return str; | |
238 } | |
239 }; | |
240 | 236 |
241 #endif // !_UTF8_H_ | 237 #endif // !_UTF8_H_ |