annotate C++/modules/Unicode/Unicode.h @ 485:898d8b29a4f1

Switch to lowercase filenames
author David Demelier <markand@malikania.fr>
date Thu, 12 Nov 2015 21:53:36 +0100
parents f083259de5e6
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
1 /*
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
2 * Unicode.h -- UTF-8 to UTF-32 conversions and various operations
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
3 *
403
d5ec1174b707 Massive cleanup
David Demelier <markand@malikania.fr>
parents: 395
diff changeset
4 * Copyright (c) 2013-2015 David Demelier <markand@malikania.fr>
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
5 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
6 * Permission to use, copy, modify, and/or distribute this software for any
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
7 * purpose with or without fee is hereby granted, provided that the above
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
8 * copyright notice and this permission notice appear in all copies.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
9 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
17 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
18
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
19 #ifndef _UNICODE_H_
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
20 #define _UNICODE_H_
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
21
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
22 /**
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
23 * @file Unicode.h
307
David Demelier <markand@malikania.fr>
parents: 212
diff changeset
24 * @brief UTF-8 to UTF-32 conversions
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
25 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
26
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
27 #include <stdexcept>
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
28 #include <string>
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
29
395
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
30 namespace unicode {
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
31
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
32 void encode(char32_t point, char res[5]) noexcept;
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
33 void decode(char32_t &c, const char *res) noexcept;
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
34
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
35 /**
395
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
36 * Get the number of bytes for the first multi byte character from a
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
37 * utf-8 string.
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
38 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
39 * This can be used to iterate a valid UTF-8 string to jump to the next
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
40 * real character.
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
41 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
42 * @param c the first multi byte character
408
f083259de5e6 Unicode: fix forEach and nbytesPoint on invalid, improve tolower/toupper for UTF-8 while here
David Demelier <markand@malikania.fr>
parents: 403
diff changeset
43 * @return the number of bytes [1-4] or -1 if invalid
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
44 */
395
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
45 int nbytesUtf8(char c) noexcept;
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
46
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
47 /**
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
48 * Get the number of bytes for the unicode point.
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
49 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
50 * @param point the unicode point
408
f083259de5e6 Unicode: fix forEach and nbytesPoint on invalid, improve tolower/toupper for UTF-8 while here
David Demelier <markand@malikania.fr>
parents: 403
diff changeset
51 * @return the number of bytes [1-4] or -1 if invalid
395
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
52 */
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
53 int nbytesPoint(char32_t point) noexcept;
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
54
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
55 /**
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
56 * Get real number of character in a string.
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
57 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
58 * @param str the string
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
59 * @return the length
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
60 * @throw std::invalid_argument on invalid sequence
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
61 */
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
62 int length(const std::string &str);
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
63
395
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
64 /**
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
65 * Iterate over all real characters in the UTF-8 string.
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
66 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
67 * The function must have the following signature:
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
68 * void f(char ch)
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
69 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
70 * @param str the UTF-8 string
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
71 * @throw std::invalid_argument on invalid sequence
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
72 */
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
73 template <typename Func>
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
74 void forEach(const std::string &str, Func function)
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
75 {
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
76 for (size_t i = 0; i < str.size(); ) {
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
77 char32_t point = 0;
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
78 int size = nbytesUtf8(str[i]);
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
79
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
80 if (size < 0) {
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
81 throw std::invalid_argument("invalid sequence");
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
82 }
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
83
395
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
84 decode(point, str.data() + i);
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
85 function(point);
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
86
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
87 i += size;
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
88 }
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
89 }
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
90
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
91 /**
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
92 * Convert a UTF-32 string to UTF-8 string.
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
93 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
94 * @param array the UTF-32 string
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
95 * @return the UTF-8 string
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
96 * @throw std::invalid_argument on invalid sequence
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
97 */
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
98 std::string toUtf8(const std::u32string &array);
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
99
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
100 /**
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
101 * Convert a UTF-8 string to UTF-32 string.
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
102 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
103 * @param str the UTF-8 string
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
104 * @return the UTF-32 string
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
105 * @throw std::invalid_argument on invalid sequence
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
106 */
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
107 std::u32string toUtf32(const std::string &str);
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
108
395
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
109 /**
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
110 * Check if the unicode character is space.
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
111 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
112 * @param c the character
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
113 * @return true if space
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
114 */
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
115 bool isspace(char32_t c) noexcept;
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
116
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
117 /**
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
118 * Check if the unicode character is digit.
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
119 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
120 * @param c the character
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
121 * @return true if digit
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
122 */
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
123 bool isdigit(char32_t c) noexcept;
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
124
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
125 /**
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
126 * Check if the unicode character is alpha category.
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
127 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
128 * @param c the character
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
129 * @return true if alpha
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
130 */
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
131 bool isalpha(char32_t c) noexcept;
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
132
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
133 /**
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
134 * Check if the unicode character is upper case.
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
135 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
136 * @param c the character
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
137 * @return true if upper case
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
138 */
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
139 bool isupper(char32_t c) noexcept;
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
140
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
141 /**
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
142 * Check if the unicode character is lower case.
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
143 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
144 * @param c the character
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
145 * @return true if lower case
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
146 */
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
147 bool islower(char32_t c) noexcept;
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
148
395
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
149 /**
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
150 * Check if the unicode character is title case.
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
151 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
152 * @param c the character
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
153 * @return true if title case
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
154 */
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
155 bool istitle(char32_t c) noexcept;
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
156
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
157 /**
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
158 * Convert to upper case.
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
159 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
160 * @param c the character
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
161 * @return the upper case character
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
162 */
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
163 char32_t toupper(char32_t c) noexcept;
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
164
395
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
165 /**
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
166 * Convert to lower case.
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
167 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
168 * @param c the character
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
169 * @return the lower case character
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
170 */
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
171 char32_t tolower(char32_t c) noexcept;
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
172
395
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
173 /**
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
174 * Convert to title case.
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
175 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
176 * @param c the character
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
177 * @return the title case character
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
178 */
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
179 char32_t totitle(char32_t c) noexcept;
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
180
395
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
181 /**
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
182 * Convert the UTF-32 string to upper case.
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
183 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
184 * @param str the str
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
185 * @return the upper case string
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
186 */
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
187 inline std::u32string toupper(std::u32string str)
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
188 {
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
189 for (size_t i = 0; i < str.size(); ++i) {
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
190 str[i] = toupper(str[i]);
352
David Demelier <markand@malikania.fr>
parents: 334
diff changeset
191 }
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
192
395
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
193 return str;
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
194 }
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
195
395
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
196 /**
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
197 * Convert the UTF-8 string to upper case.
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
198 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
199 * @param str the str
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
200 * @return the upper case string
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
201 * @warning very slow at the moment
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
202 */
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
203 inline std::string toupper(const std::string &str)
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
204 {
408
f083259de5e6 Unicode: fix forEach and nbytesPoint on invalid, improve tolower/toupper for UTF-8 while here
David Demelier <markand@malikania.fr>
parents: 403
diff changeset
205 std::string result;
f083259de5e6 Unicode: fix forEach and nbytesPoint on invalid, improve tolower/toupper for UTF-8 while here
David Demelier <markand@malikania.fr>
parents: 403
diff changeset
206 char buffer[5];
f083259de5e6 Unicode: fix forEach and nbytesPoint on invalid, improve tolower/toupper for UTF-8 while here
David Demelier <markand@malikania.fr>
parents: 403
diff changeset
207
f083259de5e6 Unicode: fix forEach and nbytesPoint on invalid, improve tolower/toupper for UTF-8 while here
David Demelier <markand@malikania.fr>
parents: 403
diff changeset
208 forEach(str, [&] (char32_t code) {
f083259de5e6 Unicode: fix forEach and nbytesPoint on invalid, improve tolower/toupper for UTF-8 while here
David Demelier <markand@malikania.fr>
parents: 403
diff changeset
209 encode(toupper(code), buffer);
f083259de5e6 Unicode: fix forEach and nbytesPoint on invalid, improve tolower/toupper for UTF-8 while here
David Demelier <markand@malikania.fr>
parents: 403
diff changeset
210 result += buffer;
f083259de5e6 Unicode: fix forEach and nbytesPoint on invalid, improve tolower/toupper for UTF-8 while here
David Demelier <markand@malikania.fr>
parents: 403
diff changeset
211 });
f083259de5e6 Unicode: fix forEach and nbytesPoint on invalid, improve tolower/toupper for UTF-8 while here
David Demelier <markand@malikania.fr>
parents: 403
diff changeset
212
f083259de5e6 Unicode: fix forEach and nbytesPoint on invalid, improve tolower/toupper for UTF-8 while here
David Demelier <markand@malikania.fr>
parents: 403
diff changeset
213 return result;
395
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
214 }
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
215
395
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
216 /**
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
217 * Convert the UTF-32 string to lower case.
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
218 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
219 * @param str the str
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
220 * @return the lower case string
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
221 */
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
222 inline std::u32string tolower(std::u32string str)
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
223 {
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
224 for (size_t i = 0; i < str.size(); ++i) {
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
225 str[i] = tolower(str[i]);
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
226 }
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
227
395
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
228 return str;
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
229 }
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
230
395
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
231 /**
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
232 * Convert the UTF-8 string to lower case.
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
233 *
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
234 * @param str the str
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
235 * @return the lower case string
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
236 * @warning very slow at the moment
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
237 */
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
238 inline std::string tolower(const std::string &str)
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
239 {
408
f083259de5e6 Unicode: fix forEach and nbytesPoint on invalid, improve tolower/toupper for UTF-8 while here
David Demelier <markand@malikania.fr>
parents: 403
diff changeset
240 std::string result;
f083259de5e6 Unicode: fix forEach and nbytesPoint on invalid, improve tolower/toupper for UTF-8 while here
David Demelier <markand@malikania.fr>
parents: 403
diff changeset
241 char buffer[5];
f083259de5e6 Unicode: fix forEach and nbytesPoint on invalid, improve tolower/toupper for UTF-8 while here
David Demelier <markand@malikania.fr>
parents: 403
diff changeset
242
f083259de5e6 Unicode: fix forEach and nbytesPoint on invalid, improve tolower/toupper for UTF-8 while here
David Demelier <markand@malikania.fr>
parents: 403
diff changeset
243 forEach(str, [&] (char32_t code) {
f083259de5e6 Unicode: fix forEach and nbytesPoint on invalid, improve tolower/toupper for UTF-8 while here
David Demelier <markand@malikania.fr>
parents: 403
diff changeset
244 encode(tolower(code), buffer);
f083259de5e6 Unicode: fix forEach and nbytesPoint on invalid, improve tolower/toupper for UTF-8 while here
David Demelier <markand@malikania.fr>
parents: 403
diff changeset
245 result += buffer;
f083259de5e6 Unicode: fix forEach and nbytesPoint on invalid, improve tolower/toupper for UTF-8 while here
David Demelier <markand@malikania.fr>
parents: 403
diff changeset
246 });
f083259de5e6 Unicode: fix forEach and nbytesPoint on invalid, improve tolower/toupper for UTF-8 while here
David Demelier <markand@malikania.fr>
parents: 403
diff changeset
247
f083259de5e6 Unicode: fix forEach and nbytesPoint on invalid, improve tolower/toupper for UTF-8 while here
David Demelier <markand@malikania.fr>
parents: 403
diff changeset
248 return result;
395
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
249 }
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
250
395
b78d6d8f2872 Unicode: remove class, use namespace
David Demelier <markand@malikania.fr>
parents: 352
diff changeset
251 } // !unicode
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
252
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
253 #endif // !_UTF8_H_