comparison C++/modules/Unicode/Unicode.h @ 395:b78d6d8f2872

Unicode: remove class, use namespace
author David Demelier <markand@malikania.fr>
date Mon, 28 Sep 2015 15:55:46 +0200
parents 7fe8d4094983
children d5ec1174b707
comparison
equal deleted inserted replaced
394:fdceef4be88b 395:b78d6d8f2872
25 */ 25 */
26 26
27 #include <stdexcept> 27 #include <stdexcept>
28 #include <string> 28 #include <string>
29 29
30 /** 30 namespace unicode {
31 * @class Unicode 31
32 * @brief Conversion between UTF-8 and UTF-32 32 void encode(char32_t point, char res[5]) noexcept;
33 */ 33 void decode(char32_t &c, const char *res) noexcept;
34 class Unicode { 34
35 private: 35 /**
36 static void encode(char32_t point, char res[5]) noexcept; 36 * Get the number of bytes for the first multi byte character from a
37 static void decode(char32_t &c, const char *res) noexcept; 37 * utf-8 string.
38 38 *
39 public: 39 * This can be used to iterate a valid UTF-8 string to jump to the next
40 /** 40 * real character.
41 * Get the number of bytes for the first multi byte character from a 41 *
42 * utf-8 string. 42 * @param c the first multi byte character
43 * 43 * @return the number of bytes [1-4]
44 * This can be used to iterate a valid UTF-8 string to jump to the next 44 */
45 * real character. 45 int nbytesUtf8(char c) noexcept;
46 * 46
47 * @param c the first multi byte character 47 /**
48 * @return the number of bytes [1-4] 48 * Get the number of bytes for the unicode point.
49 */ 49 *
50 static int nbytesUtf8(char c) noexcept; 50 * @param point the unicode point
51 51 * @return the number of bytes [1-4] or -1 on invalid
52 /** 52 */
53 * Get the number of bytes for the unicode point. 53 int nbytesPoint(char32_t point) noexcept;
54 * 54
55 * @param point the unicode point 55 /**
56 * @return the number of bytes [1-4] or -1 on invalid 56 * Get real number of character in a string.
57 */ 57 *
58 static int nbytesPoint(char32_t point) noexcept; 58 * @param str the string
59 59 * @return the length
60 /** 60 * @throw std::invalid_argument on invalid sequence
61 * Get real number of character in a string. 61 */
62 * 62 int length(const std::string &str);
63 * @param str the string 63
64 * @return the length 64 /**
65 * @throw std::invalid_argument on invalid sequence 65 * Iterate over all real characters in the UTF-8 string.
66 */ 66 *
67 static int length(const std::string &str); 67 * The function must have the following signature:
68 68 * void f(char ch)
69 /** 69 *
70 * Iterate over all real characters in the UTF-8 string. 70 * @param str the UTF-8 string
71 * 71 * @throw std::invalid_argument on invalid sequence
72 * The function must have the following signature: 72 */
73 * void f(char ch) 73 template <typename Func>
74 * 74 void forEach(const std::string &str, Func function)
75 * @param str the UTF-8 string 75 {
76 * @throw std::invalid_argument on invalid sequence 76 for (size_t i = 0; i < str.size(); ) {
77 */ 77 char32_t point = 0;
78 template <typename Func> 78 int size = nbytesUtf8(str[i]);
79 static void forEach(const std::string &str, Func function) 79
80 { 80 if (size < 0) {
81 for (size_t i = 0; i < str.size(); ) { 81 throw std::invalid_argument("invalid sequence");
82 char32_t point = 0;
83 int size = nbytesUtf8(str[i]);
84
85 if (size < 0) {
86 throw std::invalid_argument("invalid sequence");
87 }
88
89 decode(point, str.data() + i);
90 function(point);
91
92 i += size;
93 } 82 }
83
84 decode(point, str.data() + i);
85 function(point);
86
87 i += size;
94 } 88 }
95 89 }
96 /** 90
97 * Convert a UTF-32 string to UTF-8 string. 91 /**
98 * 92 * Convert a UTF-32 string to UTF-8 string.
99 * @param array the UTF-32 string 93 *
100 * @return the UTF-8 string 94 * @param array the UTF-32 string
101 * @throw std::invalid_argument on invalid sequence 95 * @return the UTF-8 string
102 */ 96 * @throw std::invalid_argument on invalid sequence
103 static std::string toUtf8(const std::u32string &array); 97 */
104 98 std::string toUtf8(const std::u32string &array);
105 /** 99
106 * Convert a UTF-8 string to UTF-32 string. 100 /**
107 * 101 * Convert a UTF-8 string to UTF-32 string.
108 * @param str the UTF-8 string 102 *
109 * @return the UTF-32 string 103 * @param str the UTF-8 string
110 * @throw std::invalid_argument on invalid sequence 104 * @return the UTF-32 string
111 */ 105 * @throw std::invalid_argument on invalid sequence
112 static std::u32string toUtf32(const std::string &str); 106 */
113 107 std::u32string toUtf32(const std::string &str);
114 /** 108
115 * Check if the unicode character is space. 109 /**
116 * 110 * Check if the unicode character is space.
117 * @param c the character 111 *
118 * @return true if space 112 * @param c the character
119 */ 113 * @return true if space
120 static bool isspace(char32_t c) noexcept; 114 */
121 115 bool isspace(char32_t c) noexcept;
122 /** 116
123 * Check if the unicode character is digit. 117 /**
124 * 118 * Check if the unicode character is digit.
125 * @param c the character 119 *
126 * @return true if digit 120 * @param c the character
127 */ 121 * @return true if digit
128 static bool isdigit(char32_t c) noexcept; 122 */
129 123 bool isdigit(char32_t c) noexcept;
130 /** 124
131 * Check if the unicode character is alpha category. 125 /**
132 * 126 * Check if the unicode character is alpha category.
133 * @param c the character 127 *
134 * @return true if alpha 128 * @param c the character
135 */ 129 * @return true if alpha
136 static bool isalpha(char32_t c) noexcept; 130 */
137 131 bool isalpha(char32_t c) noexcept;
138 /** 132
139 * Check if the unicode character is upper case. 133 /**
140 * 134 * Check if the unicode character is upper case.
141 * @param c the character 135 *
142 * @return true if upper case 136 * @param c the character
143 */ 137 * @return true if upper case
144 static bool isupper(char32_t c) noexcept; 138 */
145 139 bool isupper(char32_t c) noexcept;
146 /** 140
147 * Check if the unicode character is lower case. 141 /**
148 * 142 * Check if the unicode character is lower case.
149 * @param c the character 143 *
150 * @return true if lower case 144 * @param c the character
151 */ 145 * @return true if lower case
152 static bool islower(char32_t c) noexcept; 146 */
153 147 bool islower(char32_t c) noexcept;
154 /** 148
155 * Check if the unicode character is title case. 149 /**
156 * 150 * Check if the unicode character is title case.
157 * @param c the character 151 *
158 * @return true if title case 152 * @param c the character
159 */ 153 * @return true if title case
160 static bool istitle(char32_t c) noexcept; 154 */
161 155 bool istitle(char32_t c) noexcept;
162 /** 156
163 * Convert to upper case. 157 /**
164 * 158 * Convert to upper case.
165 * @param c the character 159 *
166 * @return the upper case character 160 * @param c the character
167 */ 161 * @return the upper case character
168 static char32_t toupper(char32_t c) noexcept; 162 */
169 163 char32_t toupper(char32_t c) noexcept;
170 /** 164
171 * Convert to lower case. 165 /**
172 * 166 * Convert to lower case.
173 * @param c the character 167 *
174 * @return the lower case character 168 * @param c the character
175 */ 169 * @return the lower case character
176 static char32_t tolower(char32_t c) noexcept; 170 */
177 171 char32_t tolower(char32_t c) noexcept;
178 /** 172
179 * Convert to title case. 173 /**
180 * 174 * Convert to title case.
181 * @param c the character 175 *
182 * @return the title case character 176 * @param c the character
183 */ 177 * @return the title case character
184 static char32_t totitle(char32_t c) noexcept; 178 */
185 179 char32_t totitle(char32_t c) noexcept;
186 /** 180
187 * Convert the UTF-8 string to upper case. 181 /**
188 * 182 * Convert the UTF-32 string to upper case.
189 * @param str the str 183 *
190 * @return the upper case string 184 * @param str the str
191 * @warning very slow at the moment 185 * @return the upper case string
192 */ 186 */
193 static inline std::string toupper(const std::string &str) 187 inline std::u32string toupper(std::u32string str)
194 { 188 {
195 return toUtf8(toupper(toUtf32(str))); 189 for (size_t i = 0; i < str.size(); ++i) {
190 str[i] = toupper(str[i]);
196 } 191 }
197 192
198 /** 193 return str;
199 * Convert the UTF-32 string to upper case. 194 }
200 * 195
201 * @param str the str 196 /**
202 * @return the upper case string 197 * Convert the UTF-8 string to upper case.
203 */ 198 *
204 static inline std::u32string toupper(std::u32string str) 199 * @param str the str
205 { 200 * @return the upper case string
206 for (size_t i = 0; i < str.size(); ++i) { 201 * @warning very slow at the moment
207 str[i] = toupper(str[i]); 202 */
208 } 203 inline std::string toupper(const std::string &str)
209 204 {
210 return str; 205 return toUtf8(toupper(toUtf32(str)));
206 }
207
208 /**
209 * Convert the UTF-32 string to lower case.
210 *
211 * @param str the str
212 * @return the lower case string
213 */
214 inline std::u32string tolower(std::u32string str)
215 {
216 for (size_t i = 0; i < str.size(); ++i) {
217 str[i] = tolower(str[i]);
211 } 218 }
212 219
213 /** 220 return str;
214 * Convert the UTF-8 string to lower case. 221 }
215 * 222
216 * @param str the str 223 /**
217 * @return the lower case string 224 * Convert the UTF-8 string to lower case.
218 * @warning very slow at the moment 225 *
219 */ 226 * @param str the str
220 static inline std::string tolower(const std::string &str) 227 * @return the lower case string
221 { 228 * @warning very slow at the moment
222 return toUtf8(tolower(toUtf32(str))); 229 */
223 } 230 inline std::string tolower(const std::string &str)
224 231 {
225 /** 232 return toUtf8(tolower(toUtf32(str)));
226 * Convert the UTF-32 string to lower case. 233 }
227 * 234
228 * @param str the str 235 } // !unicode
229 * @return the lower case string
230 */
231 static inline std::u32string tolower(std::u32string str)
232 {
233 for (size_t i = 0; i < str.size(); ++i) {
234 str[i] = tolower(str[i]);
235 }
236
237 return str;
238 }
239 };
240 236
241 #endif // !_UTF8_H_ 237 #endif // !_UTF8_H_