Mercurial > code
annotate tools/mkunicode/Unicode.h @ 401:ca5e4360f79a
Js:
- Add support of constants map (js::Map<T>)
- Add supports for vectors (std::vector<T>)
- Add index based getProperty/putProperty
author | David Demelier <markand@malikania.fr> |
---|---|
date | Sat, 03 Oct 2015 11:27:49 +0200 |
parents | b78d6d8f2872 |
children | d5ec1174b707 |
rev | line source |
---|---|
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
1 /* |
352 | 2 * Unicode.h -- UTF-8 to UTF-32 conversions and various operations |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
3 * |
352 | 4 * Copyright (c) 2013, 2014, 2015 David Demelier <markand@malikania.fr> |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
5 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
6 * Permission to use, copy, modify, and/or distribute this software for any |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
7 * purpose with or without fee is hereby granted, provided that the above |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
8 * copyright notice and this permission notice appear in all copies. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
9 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
17 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
18 |
352 | 19 #ifndef _UNICODE_H_ |
20 #define _UNICODE_H_ | |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
21 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
22 /** |
352 | 23 * @file Unicode.h |
307 | 24 * @brief UTF-8 to UTF-32 conversions |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
25 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
26 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
27 #include <stdexcept> |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
28 #include <string> |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
29 |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
30 namespace unicode { |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
31 |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
32 void encode(char32_t point, char res[5]) noexcept; |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
33 void decode(char32_t &c, const char *res) noexcept; |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
34 |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
35 /** |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
36 * Get the number of bytes for the first multi byte character from a |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
37 * utf-8 string. |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
38 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
39 * This can be used to iterate a valid UTF-8 string to jump to the next |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
40 * real character. |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
41 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
42 * @param c the first multi byte character |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
43 * @return the number of bytes [1-4] |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
44 */ |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
45 int nbytesUtf8(char c) noexcept; |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
46 |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
47 /** |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
48 * Get the number of bytes for the unicode point. |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
49 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
50 * @param point the unicode point |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
51 * @return the number of bytes [1-4] or -1 on invalid |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
52 */ |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
53 int nbytesPoint(char32_t point) noexcept; |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
54 |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
55 /** |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
56 * Get real number of character in a string. |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
57 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
58 * @param str the string |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
59 * @return the length |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
60 * @throw std::invalid_argument on invalid sequence |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
61 */ |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
62 int length(const std::string &str); |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
63 |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
64 /** |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
65 * Iterate over all real characters in the UTF-8 string. |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
66 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
67 * The function must have the following signature: |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
68 * void f(char ch) |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
69 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
70 * @param str the UTF-8 string |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
71 * @throw std::invalid_argument on invalid sequence |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
72 */ |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
73 template <typename Func> |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
74 void forEach(const std::string &str, Func function) |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
75 { |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
76 for (size_t i = 0; i < str.size(); ) { |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
77 char32_t point = 0; |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
78 int size = nbytesUtf8(str[i]); |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
79 |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
80 if (size < 0) { |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
81 throw std::invalid_argument("invalid sequence"); |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
82 } |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
83 |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
84 decode(point, str.data() + i); |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
85 function(point); |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
86 |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
87 i += size; |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
88 } |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
89 } |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
90 |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
91 /** |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
92 * Convert a UTF-32 string to UTF-8 string. |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
93 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
94 * @param array the UTF-32 string |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
95 * @return the UTF-8 string |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
96 * @throw std::invalid_argument on invalid sequence |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
97 */ |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
98 std::string toUtf8(const std::u32string &array); |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
99 |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
100 /** |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
101 * Convert a UTF-8 string to UTF-32 string. |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
102 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
103 * @param str the UTF-8 string |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
104 * @return the UTF-32 string |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
105 * @throw std::invalid_argument on invalid sequence |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
106 */ |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
107 std::u32string toUtf32(const std::string &str); |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
108 |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
109 /** |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
110 * Check if the unicode character is space. |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
111 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
112 * @param c the character |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
113 * @return true if space |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
114 */ |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
115 bool isspace(char32_t c) noexcept; |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
116 |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
117 /** |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
118 * Check if the unicode character is digit. |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
119 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
120 * @param c the character |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
121 * @return true if digit |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
122 */ |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
123 bool isdigit(char32_t c) noexcept; |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
124 |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
125 /** |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
126 * Check if the unicode character is alpha category. |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
127 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
128 * @param c the character |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
129 * @return true if alpha |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
130 */ |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
131 bool isalpha(char32_t c) noexcept; |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
132 |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
133 /** |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
134 * Check if the unicode character is upper case. |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
135 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
136 * @param c the character |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
137 * @return true if upper case |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
138 */ |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
139 bool isupper(char32_t c) noexcept; |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
140 |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
141 /** |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
142 * Check if the unicode character is lower case. |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
143 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
144 * @param c the character |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
145 * @return true if lower case |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
146 */ |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
147 bool islower(char32_t c) noexcept; |
352 | 148 |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
149 /** |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
150 * Check if the unicode character is title case. |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
151 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
152 * @param c the character |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
153 * @return true if title case |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
154 */ |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
155 bool istitle(char32_t c) noexcept; |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
156 |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
157 /** |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
158 * Convert to upper case. |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
159 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
160 * @param c the character |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
161 * @return the upper case character |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
162 */ |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
163 char32_t toupper(char32_t c) noexcept; |
352 | 164 |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
165 /** |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
166 * Convert to lower case. |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
167 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
168 * @param c the character |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
169 * @return the lower case character |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
170 */ |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
171 char32_t tolower(char32_t c) noexcept; |
352 | 172 |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
173 /** |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
174 * Convert to title case. |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
175 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
176 * @param c the character |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
177 * @return the title case character |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
178 */ |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
179 char32_t totitle(char32_t c) noexcept; |
352 | 180 |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
181 /** |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
182 * Convert the UTF-32 string to upper case. |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
183 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
184 * @param str the str |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
185 * @return the upper case string |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
186 */ |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
187 inline std::u32string toupper(std::u32string str) |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
188 { |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
189 for (size_t i = 0; i < str.size(); ++i) { |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
190 str[i] = toupper(str[i]); |
352 | 191 } |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
192 |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
193 return str; |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
194 } |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
195 |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
196 /** |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
197 * Convert the UTF-8 string to upper case. |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
198 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
199 * @param str the str |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
200 * @return the upper case string |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
201 * @warning very slow at the moment |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
202 */ |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
203 inline std::string toupper(const std::string &str) |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
204 { |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
205 return toUtf8(toupper(toUtf32(str))); |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
206 } |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
207 |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
208 /** |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
209 * Convert the UTF-32 string to lower case. |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
210 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
211 * @param str the str |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
212 * @return the lower case string |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
213 */ |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
214 inline std::u32string tolower(std::u32string str) |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
215 { |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
216 for (size_t i = 0; i < str.size(); ++i) { |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
217 str[i] = tolower(str[i]); |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
218 } |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
219 |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
220 return str; |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
221 } |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
222 |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
223 /** |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
224 * Convert the UTF-8 string to lower case. |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
225 * |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
226 * @param str the str |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
227 * @return the lower case string |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
228 * @warning very slow at the moment |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
229 */ |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
230 inline std::string tolower(const std::string &str) |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
231 { |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
232 return toUtf8(tolower(toUtf32(str))); |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
233 } |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
234 |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
235 } // !unicode |
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
236 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
237 #endif // !_UTF8_H_ |