Mercurial > code
annotate tools/mkunicode/Unicode-after.cpp @ 395:b78d6d8f2872
Unicode: remove class, use namespace
author | David Demelier <markand@malikania.fr> |
---|---|
date | Mon, 28 Sep 2015 15:55:46 +0200 |
parents | 7fe8d4094983 |
children | f083259de5e6 |
rev | line source |
---|---|
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
1 void encode(char32_t c, char res[5]) noexcept |
352 | 2 { |
3 switch (nbytesPoint(c)) { | |
4 case 1: | |
5 res[0] = c; | |
6 res[1] = '\0'; | |
7 break; | |
8 case 2: | |
9 res[0] = 0xC0 | ((c >> 6) & 0x1F); | |
10 res[1] = 0x80 | (c & 0x3F); | |
11 res[2] = '\0'; | |
12 break; | |
13 case 3: | |
14 res[0] = 0xE0 | ((c >> 12) & 0xF ); | |
15 res[1] = 0x80 | ((c >> 6) & 0x3F); | |
16 res[2] = 0x80 | (c & 0x3F); | |
17 res[3] = '\0'; | |
18 break; | |
19 case 4: | |
20 res[0] = 0xF0 | ((c >> 18) & 0x7 ); | |
21 res[1] = 0x80 | ((c >> 12) & 0x3F); | |
22 res[2] = 0x80 | ((c >> 6) & 0x3F); | |
23 res[3] = 0x80 | (c & 0x3F); | |
24 res[4] = '\0'; | |
25 break; | |
26 default: | |
27 break; | |
28 } | |
29 } | |
30 | |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
31 void decode(char32_t &c, const char *res) noexcept |
352 | 32 { |
33 c = 0; | |
34 | |
35 switch (nbytesUtf8(res[0])) { | |
36 case 1: | |
37 c = res[0]; | |
38 break; | |
39 case 2: | |
40 c = (res[0] & 0x1f) << 6; | |
41 c |= (res[1] & 0x3f); | |
42 break; | |
43 case 3: | |
44 c = (res[0] & 0x0f) << 12; | |
45 c |= (res[1] & 0x3f) << 6; | |
46 c |= (res[2] & 0x3f); | |
47 break; | |
48 case 4: | |
49 c = (res[0] & 0x07) << 16; | |
50 c |= (res[1] & 0x3f) << 12; | |
51 c |= (res[2] & 0x3f) << 6; | |
52 c |= (res[3] & 0x3f); | |
53 default: | |
54 break; | |
55 } | |
56 } | |
57 | |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
58 int nbytesUtf8(char c) noexcept |
352 | 59 { |
60 if ((c & 0xE0) == 0xC0) | |
61 return 2; | |
62 if ((c & 0xF0) == 0xE0) | |
63 return 3; | |
64 if ((c & 0xF8) == 0xF0) | |
65 return 4; | |
66 | |
67 return 1; | |
68 } | |
69 | |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
70 int nbytesPoint(char32_t c) noexcept |
352 | 71 { |
72 if (c <= 0x7F) | |
73 return 1; | |
74 if (c <= 0x7FF) | |
75 return 2; | |
76 if (c <= 0xFFFF) | |
77 return 3; | |
78 if (c <= 0x1FFFFF) | |
79 return 4; | |
80 | |
81 return -1; | |
82 } | |
83 | |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
84 int length(const std::string &str) |
352 | 85 { |
86 int total = 0; | |
87 | |
88 forEach(str, [&] (char32_t) { | |
89 ++ total; | |
90 }); | |
91 | |
92 return total; | |
93 } | |
94 | |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
95 std::string toUtf8(const std::u32string &array) |
352 | 96 { |
97 std::string res; | |
98 | |
99 for (size_t i = 0; i < array.size(); ++i) { | |
100 char tmp[5]; | |
101 int size = nbytesPoint(array[i]); | |
102 | |
103 if (size < 0) { | |
104 throw std::invalid_argument("invalid sequence"); | |
105 } | |
106 | |
107 encode(array[i], tmp); | |
108 res.insert(res.length(), tmp); | |
109 } | |
110 | |
111 return res; | |
112 } | |
113 | |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
114 std::u32string toUtf32(const std::string &str) |
352 | 115 { |
116 std::u32string res; | |
117 | |
118 forEach(str, [&] (char32_t code) { | |
119 res.push_back(code); | |
120 }); | |
121 | |
122 return res; | |
395
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
123 } |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
124 |
b78d6d8f2872
Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents:
352
diff
changeset
|
125 } // !unicode |