Mercurial > code
annotate C++/Utf8.h @ 212:35e34b0b80d4
Utf8: remove namespace
author | David Demelier <markand@malikania.fr> |
---|---|
date | Sat, 22 Mar 2014 22:05:58 +0100 |
parents | e8ab4c7b8a25 |
children | e2a8cbf2dd79 |
rev | line source |
---|---|
208
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
1 /* |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
2 * Utf8.h -- UTF-8 to UCS-4 conversions |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
3 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
4 * Copyright (c) 2013, 2014 David Demelier <markand@malikania.fr> |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
5 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
6 * Permission to use, copy, modify, and/or distribute this software for any |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
7 * purpose with or without fee is hereby granted, provided that the above |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
8 * copyright notice and this permission notice appear in all copies. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
9 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
17 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
18 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
19 #ifndef _UTF8_H_ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
20 #define _UTF8_H_ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
21 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
22 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
23 * @file Utf8.h |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
24 * @brief UTF-8 to UCS-4 conversions |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
25 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
26 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
27 #include <cstdint> |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
28 #include <stdexcept> |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
29 #include <string> |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
30 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
31 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
32 * @class Utf8 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
33 * @brief Conversion between UTF-8 and UCS-4 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
34 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
35 class Utf8 { |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
36 private: |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
37 static void encode(uint32_t point, char res[5]); |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
38 static void decode(uint32_t &c, const char *res); |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
39 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
40 public: |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
41 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
42 * Get the number of bytes for the first multi byte character from a |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
43 * utf-8 string. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
44 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
45 * @param c the first multi byte character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
46 * @return the number of bytes [1-4] or -1 on invalid |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
47 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
48 static int8_t nbytesUtf8(uint8_t c); |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
49 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
50 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
51 * Get the number of bytes for the unicode point. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
52 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
53 * @param point the unicode point |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
54 * @return the number of bytes [1-4] or -1 on invalid |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
55 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
56 static int8_t nbytesPoint(uint32_t point); |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
57 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
58 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
59 * Get real number of character in a string. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
60 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
61 * @param str the string |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
62 * @return the length |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
63 * @throw std::invalid_argument on invalid sequence |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
64 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
65 static size_t length(const std::string &str); |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
66 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
67 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
68 * Convert a UCS-4 string to UTF-8 string. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
69 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
70 * @param array the UCS-4 string |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
71 * @return the UTF-8 string |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
72 * @throw std::invalid_argument on invalid sequence |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
73 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
74 static std::string toutf8(const std::u32string &array); |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
75 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
76 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
77 * Convert a UTF-8 string to UCS-4 string. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
78 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
79 * @param str the UTF-8 string |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
80 * @return the UCS-4 string |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
81 * @throw std::invalid_argument on invalid sequence |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
82 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
83 static std::u32string toucs(const std::string &str); |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
84 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
85 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
86 * Check if the unicode character is space. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
87 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
88 * @param c the character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
89 * @return true if space |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
90 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
91 static bool isspace(uint32_t c); |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
92 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
93 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
94 * Check if the unicode character is digit. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
95 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
96 * @param c the character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
97 * @return true if digit |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
98 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
99 static bool isdigit(uint32_t c); |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
100 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
101 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
102 * Check if the unicode character is letter. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
103 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
104 * @param c the character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
105 * @return true if letter |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
106 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
107 static bool isletter(uint32_t c); |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
108 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
109 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
110 * Check if the unicode character is upper case. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
111 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
112 * @param c the character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
113 * @return true if upper case |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
114 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
115 static bool isupper(uint32_t c); |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
116 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
117 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
118 * Check if the unicode character is lower case. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
119 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
120 * @param c the character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
121 * @return true if lower case |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
122 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
123 static bool islower(uint32_t c); |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
124 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
125 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
126 * Check if the unicode character is title case. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
127 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
128 * @param c the character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
129 * @return true if title case |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
130 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
131 static bool istitle(uint32_t c); |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
132 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
133 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
134 * Convert to upper case. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
135 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
136 * @param c the character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
137 * @return the upper case character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
138 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
139 static uint32_t toupper(uint32_t c); |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
140 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
141 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
142 * Convert to lower case. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
143 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
144 * @param c the character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
145 * @return the lower case character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
146 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
147 static uint32_t tolower(uint32_t c); |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
148 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
149 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
150 * Convert to title case. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
151 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
152 * @param c the character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
153 * @return the title case character |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
154 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
155 static uint32_t totitle(uint32_t c); |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
156 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
157 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
158 * Convert the UTF-8 string to upper case. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
159 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
160 * @param str the str |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
161 * @return the upper case string |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
162 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
163 static inline std::string toupper(const std::string &str) |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
164 { |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
165 return toutf8(toupper(toucs(str))); |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
166 } |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
167 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
168 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
169 * Convert the UCS-4 string to upper case. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
170 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
171 * @param str the str |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
172 * @return the upper case string |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
173 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
174 static inline std::u32string toupper(const std::u32string &str) |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
175 { |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
176 auto copy = str; |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
177 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
178 for (size_t i = 0; i < str.size(); ++i) |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
179 copy[i] = toupper(str[i]); |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
180 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
181 return copy; |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
182 } |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
183 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
184 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
185 * Convert the UTF-8 string to lower case. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
186 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
187 * @param str the str |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
188 * @return the lower case string |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
189 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
190 static inline std::string tolower(const std::string &str) |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
191 { |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
192 return toutf8(tolower(toucs(str))); |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
193 } |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
194 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
195 /** |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
196 * Convert the UCS-4 string to lower case. |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
197 * |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
198 * @param str the str |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
199 * @return the lower case string |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
200 */ |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
201 static inline std::u32string tolower(const std::u32string &str) |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
202 { |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
203 auto copy = str; |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
204 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
205 for (size_t i = 0; i < str.size(); ++i) |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
206 copy[i] = tolower(str[i]); |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
207 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
208 return copy; |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
209 } |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
210 }; |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
211 |
e8ab4c7b8a25
Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
212 #endif // !_UTF8_H_ |