annotate C++/Utf8.h @ 212:35e34b0b80d4

Utf8: remove namespace
author David Demelier <markand@malikania.fr>
date Sat, 22 Mar 2014 22:05:58 +0100
parents e8ab4c7b8a25
children e2a8cbf2dd79
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
208
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
1 /*
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
2 * Utf8.h -- UTF-8 to UCS-4 conversions
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
3 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
4 * Copyright (c) 2013, 2014 David Demelier <markand@malikania.fr>
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
5 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
6 * Permission to use, copy, modify, and/or distribute this software for any
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
7 * purpose with or without fee is hereby granted, provided that the above
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
8 * copyright notice and this permission notice appear in all copies.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
9 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
17 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
18
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
19 #ifndef _UTF8_H_
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
20 #define _UTF8_H_
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
21
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
22 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
23 * @file Utf8.h
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
24 * @brief UTF-8 to UCS-4 conversions
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
25 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
26
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
27 #include <cstdint>
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
28 #include <stdexcept>
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
29 #include <string>
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
30
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
31 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
32 * @class Utf8
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
33 * @brief Conversion between UTF-8 and UCS-4
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
34 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
35 class Utf8 {
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
36 private:
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
37 static void encode(uint32_t point, char res[5]);
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
38 static void decode(uint32_t &c, const char *res);
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
39
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
40 public:
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
41 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
42 * Get the number of bytes for the first multi byte character from a
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
43 * utf-8 string.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
44 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
45 * @param c the first multi byte character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
46 * @return the number of bytes [1-4] or -1 on invalid
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
47 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
48 static int8_t nbytesUtf8(uint8_t c);
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
49
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
50 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
51 * Get the number of bytes for the unicode point.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
52 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
53 * @param point the unicode point
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
54 * @return the number of bytes [1-4] or -1 on invalid
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
55 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
56 static int8_t nbytesPoint(uint32_t point);
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
57
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
58 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
59 * Get real number of character in a string.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
60 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
61 * @param str the string
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
62 * @return the length
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
63 * @throw std::invalid_argument on invalid sequence
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
64 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
65 static size_t length(const std::string &str);
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
66
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
67 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
68 * Convert a UCS-4 string to UTF-8 string.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
69 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
70 * @param array the UCS-4 string
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
71 * @return the UTF-8 string
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
72 * @throw std::invalid_argument on invalid sequence
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
73 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
74 static std::string toutf8(const std::u32string &array);
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
75
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
76 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
77 * Convert a UTF-8 string to UCS-4 string.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
78 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
79 * @param str the UTF-8 string
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
80 * @return the UCS-4 string
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
81 * @throw std::invalid_argument on invalid sequence
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
82 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
83 static std::u32string toucs(const std::string &str);
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
84
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
85 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
86 * Check if the unicode character is space.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
87 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
88 * @param c the character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
89 * @return true if space
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
90 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
91 static bool isspace(uint32_t c);
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
92
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
93 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
94 * Check if the unicode character is digit.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
95 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
96 * @param c the character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
97 * @return true if digit
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
98 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
99 static bool isdigit(uint32_t c);
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
100
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
101 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
102 * Check if the unicode character is letter.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
103 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
104 * @param c the character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
105 * @return true if letter
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
106 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
107 static bool isletter(uint32_t c);
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
108
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
109 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
110 * Check if the unicode character is upper case.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
111 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
112 * @param c the character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
113 * @return true if upper case
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
114 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
115 static bool isupper(uint32_t c);
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
116
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
117 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
118 * Check if the unicode character is lower case.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
119 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
120 * @param c the character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
121 * @return true if lower case
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
122 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
123 static bool islower(uint32_t c);
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
124
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
125 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
126 * Check if the unicode character is title case.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
127 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
128 * @param c the character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
129 * @return true if title case
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
130 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
131 static bool istitle(uint32_t c);
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
132
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
133 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
134 * Convert to upper case.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
135 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
136 * @param c the character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
137 * @return the upper case character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
138 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
139 static uint32_t toupper(uint32_t c);
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
140
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
141 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
142 * Convert to lower case.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
143 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
144 * @param c the character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
145 * @return the lower case character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
146 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
147 static uint32_t tolower(uint32_t c);
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
148
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
149 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
150 * Convert to title case.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
151 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
152 * @param c the character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
153 * @return the title case character
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
154 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
155 static uint32_t totitle(uint32_t c);
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
156
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
157 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
158 * Convert the UTF-8 string to upper case.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
159 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
160 * @param str the str
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
161 * @return the upper case string
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
162 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
163 static inline std::string toupper(const std::string &str)
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
164 {
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
165 return toutf8(toupper(toucs(str)));
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
166 }
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
167
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
168 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
169 * Convert the UCS-4 string to upper case.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
170 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
171 * @param str the str
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
172 * @return the upper case string
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
173 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
174 static inline std::u32string toupper(const std::u32string &str)
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
175 {
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
176 auto copy = str;
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
177
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
178 for (size_t i = 0; i < str.size(); ++i)
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
179 copy[i] = toupper(str[i]);
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
180
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
181 return copy;
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
182 }
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
183
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
184 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
185 * Convert the UTF-8 string to lower case.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
186 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
187 * @param str the str
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
188 * @return the lower case string
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
189 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
190 static inline std::string tolower(const std::string &str)
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
191 {
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
192 return toutf8(tolower(toucs(str)));
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
193 }
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
194
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
195 /**
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
196 * Convert the UCS-4 string to lower case.
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
197 *
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
198 * @param str the str
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
199 * @return the lower case string
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
200 */
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
201 static inline std::u32string tolower(const std::u32string &str)
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
202 {
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
203 auto copy = str;
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
204
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
205 for (size_t i = 0; i < str.size(); ++i)
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
206 copy[i] = tolower(str[i]);
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
207
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
208 return copy;
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
209 }
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
210 };
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
211
e8ab4c7b8a25 Utf8: UTF-8 to UCS-4 and others
David Demelier <markand@malikania.fr>
parents:
diff changeset
212 #endif // !_UTF8_H_