annotate tools/mkunicode/Unicode.h @ 352:7fe8d4094983

Utf8: - Fix invalid decoding from UTF-8 to UTF-32 - Add all files
author David Demelier <markand@malikania.fr>
date Wed, 08 Apr 2015 12:33:45 +0200
parents C++/modules/Utf8/Utf8.h@0b576ee64d45
children b78d6d8f2872
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
1 /*
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
2 * Unicode.h -- UTF-8 to UTF-32 conversions and various operations
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
3 *
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
4 * Copyright (c) 2013, 2014, 2015 David Demelier <markand@malikania.fr>
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
5 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
6 * Permission to use, copy, modify, and/or distribute this software for any
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
7 * purpose with or without fee is hereby granted, provided that the above
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
8 * copyright notice and this permission notice appear in all copies.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
9 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
17 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
18
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
19 #ifndef _UNICODE_H_
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
20 #define _UNICODE_H_
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
21
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
22 /**
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
23 * @file Unicode.h
307
David Demelier <markand@malikania.fr>
parents: 212
diff changeset
24 * @brief UTF-8 to UTF-32 conversions
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
25 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
26
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
27 #include <stdexcept>
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
28 #include <string>
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
29
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
30 /**
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
31 * @class Unicode
307
David Demelier <markand@malikania.fr>
parents: 212
diff changeset
32 * @brief Conversion between UTF-8 and UTF-32
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
33 */
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
34 class Unicode {
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
35 private:
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
36 static void encode(char32_t point, char res[5]) noexcept;
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
37 static void decode(char32_t &c, const char *res) noexcept;
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
38
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
39 public:
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
40 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
41 * Get the number of bytes for the first multi byte character from a
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
42 * utf-8 string.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
43 *
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
44 * This can be used to iterate a valid UTF-8 string to jump to the next
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
45 * real character.
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
46 *
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
47 * @param c the first multi byte character
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
48 * @return the number of bytes [1-4]
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
49 */
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
50 static int nbytesUtf8(char c) noexcept;
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
51
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
52 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
53 * Get the number of bytes for the unicode point.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
54 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
55 * @param point the unicode point
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
56 * @return the number of bytes [1-4] or -1 on invalid
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
57 */
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
58 static int nbytesPoint(char32_t point) noexcept;
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
59
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
60 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
61 * Get real number of character in a string.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
62 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
63 * @param str the string
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
64 * @return the length
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
65 * @throw std::invalid_argument on invalid sequence
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
66 */
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
67 static int length(const std::string &str);
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
68
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
69 /**
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
70 * Iterate over all real characters in the UTF-8 string.
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
71 *
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
72 * The function must have the following signature:
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
73 * void f(char ch)
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
74 *
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
75 * @param str the UTF-8 string
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
76 * @throw std::invalid_argument on invalid sequence
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
77 */
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
78 template <typename Func>
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
79 static void forEach(const std::string &str, Func function)
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
80 {
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
81 for (size_t i = 0; i < str.size(); ) {
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
82 char32_t point = 0;
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
83 int size = nbytesUtf8(str[i]);
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
84
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
85 if (size < 0) {
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
86 throw std::invalid_argument("invalid sequence");
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
87 }
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
88
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
89 decode(point, str.data() + i);
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
90 function(point);
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
91
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
92 i += size;
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
93 }
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
94 }
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
95
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
96 /**
307
David Demelier <markand@malikania.fr>
parents: 212
diff changeset
97 * Convert a UTF-32 string to UTF-8 string.
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
98 *
307
David Demelier <markand@malikania.fr>
parents: 212
diff changeset
99 * @param array the UTF-32 string
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
100 * @return the UTF-8 string
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
101 * @throw std::invalid_argument on invalid sequence
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
102 */
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
103 static std::string toUtf8(const std::u32string &array);
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
104
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
105 /**
307
David Demelier <markand@malikania.fr>
parents: 212
diff changeset
106 * Convert a UTF-8 string to UTF-32 string.
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
107 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
108 * @param str the UTF-8 string
307
David Demelier <markand@malikania.fr>
parents: 212
diff changeset
109 * @return the UTF-32 string
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
110 * @throw std::invalid_argument on invalid sequence
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
111 */
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
112 static std::u32string toUtf32(const std::string &str);
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
113
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
114 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
115 * Check if the unicode character is space.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
116 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
117 * @param c the character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
118 * @return true if space
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
119 */
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
120 static bool isspace(char32_t c) noexcept;
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
121
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
122 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
123 * Check if the unicode character is digit.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
124 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
125 * @param c the character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
126 * @return true if digit
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
127 */
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
128 static bool isdigit(char32_t c) noexcept;
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
129
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
130 /**
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
131 * Check if the unicode character is alpha category.
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
132 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
133 * @param c the character
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
134 * @return true if alpha
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
135 */
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
136 static bool isalpha(char32_t c) noexcept;
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
137
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
138 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
139 * Check if the unicode character is upper case.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
140 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
141 * @param c the character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
142 * @return true if upper case
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
143 */
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
144 static bool isupper(char32_t c) noexcept;
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
145
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
146 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
147 * Check if the unicode character is lower case.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
148 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
149 * @param c the character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
150 * @return true if lower case
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
151 */
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
152 static bool islower(char32_t c) noexcept;
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
153
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
154 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
155 * Check if the unicode character is title case.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
156 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
157 * @param c the character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
158 * @return true if title case
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
159 */
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
160 static bool istitle(char32_t c) noexcept;
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
161
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
162 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
163 * Convert to upper case.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
164 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
165 * @param c the character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
166 * @return the upper case character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
167 */
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
168 static char32_t toupper(char32_t c) noexcept;
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
169
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
170 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
171 * Convert to lower case.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
172 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
173 * @param c the character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
174 * @return the lower case character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
175 */
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
176 static char32_t tolower(char32_t c) noexcept;
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
177
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
178 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
179 * Convert to title case.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
180 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
181 * @param c the character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
182 * @return the title case character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
183 */
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
184 static char32_t totitle(char32_t c) noexcept;
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
185
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
186 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
187 * Convert the UTF-8 string to upper case.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
188 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
189 * @param str the str
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
190 * @return the upper case string
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
191 * @warning very slow at the moment
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
192 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
193 static inline std::string toupper(const std::string &str)
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
194 {
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
195 return toUtf8(toupper(toUtf32(str)));
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
196 }
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
197
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
198 /**
307
David Demelier <markand@malikania.fr>
parents: 212
diff changeset
199 * Convert the UTF-32 string to upper case.
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
200 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
201 * @param str the str
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
202 * @return the upper case string
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
203 */
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
204 static inline std::u32string toupper(std::u32string str)
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
205 {
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
206 for (size_t i = 0; i < str.size(); ++i) {
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
207 str[i] = toupper(str[i]);
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
208 }
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
209
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
210 return str;
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
211 }
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
212
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
213 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
214 * Convert the UTF-8 string to lower case.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
215 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
216 * @param str the str
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
217 * @return the lower case string
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
218 * @warning very slow at the moment
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
219 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
220 static inline std::string tolower(const std::string &str)
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
221 {
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
222 return toUtf8(tolower(toUtf32(str)));
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
223 }
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
224
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
225 /**
307
David Demelier <markand@malikania.fr>
parents: 212
diff changeset
226 * Convert the UTF-32 string to lower case.
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
227 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
228 * @param str the str
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
229 * @return the lower case string
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
230 */
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
231 static inline std::u32string tolower(std::u32string str)
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
232 {
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
233 for (size_t i = 0; i < str.size(); ++i) {
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
234 str[i] = tolower(str[i]);
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
235 }
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
236
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
237 return str;
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
238 }
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
239 };
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
240
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
241 #endif // !_UTF8_H_