Mercurial > libunicode
annotate unicode.h @ 11:43a9d763656b
unicode: improve C API, removing dynamic allocations
author | David Demelier <markand@malikania.fr> |
---|---|
date | Wed, 25 Mar 2020 14:33:03 +0100 |
parents | ae1003c2a284 |
children | 153c09cc6dcb |
rev | line source |
---|---|
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
1 /* |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
2 * unicode.h -- UTF-8 to UTF-32 conversions and various operations |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
3 * |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
4 * Copyright (c) 2013-2020 David Demelier <markand@malikania.fr> |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
5 * |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
6 * Permission to use, copy, modify, and/or distribute this software for any |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
7 * purpose with or without fee is hereby granted, provided that the above |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
8 * copyright notice and this permission notice appear in all copies. |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
9 * |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
17 */ |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
18 |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
19 #ifndef UNICODE_H |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
20 #define UNICODE_H |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
21 |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
22 /** |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
23 * \file unicode.h |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
24 * \brief UTF-8 to UTF-32 conversions |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
25 * \author David Demelier <markand@malikania.fr> |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
26 */ |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
27 |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
28 #include <stdbool.h> |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
29 #include <stddef.h> |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
30 #include <stdint.h> |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
31 |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
32 /** |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
33 * Encode the unicode code point into multibyte string. |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
34 * |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
35 * To make sure that buffer is always large enough, you may pass a buffer of |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
36 * size 4 as it's the largest UTF-8 string for now. |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
37 * |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
38 * \pre dst != NULL |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
39 * \param dst the UTF-8 buffer destination |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
40 * \param dstsz the size available in dst |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
41 * \param point the unicode character |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
42 * \return The number of bytes written (excluding the null terminator) or -1 on |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
43 * error and sets errno. |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
44 * \warning The destination is **not** NUL terminated. |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
45 */ |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
46 size_t |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
47 uni8_encode(uint8_t dst[], size_t dstsz, uint32_t point); |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
48 |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
49 /** |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
50 * Decode the multibyte buffer into an unicode code point. |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
51 * |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
52 * \pre src != NULL |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
53 * \pre point != NULL |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
54 * \param src UTF-8 the source string |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
55 * \param point the unicode character destination |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
56 * \return The number of bytes parsed in src or -1 on error and sets errno. |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
57 */ |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
58 size_t |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
59 uni8_decode(const uint8_t src[], uint32_t *point); |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
60 |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
61 /** |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
62 * Get the number of bytes that follow this UTF-8 character. |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
63 * |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
64 * This can be used to iterate a valid UTF-8 string to jump to the next real |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
65 * character. |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
66 * |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
67 * \param c the first multi byte character |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
68 * \return The number of bytes [1-4] or -1 if invalid and sets errno. |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
69 * \warning You may still need to verify that following characters are valid as |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
70 * this function only returns the number of bytes that *should* |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
71 * exists after this one. |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
72 */ |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
73 size_t |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
74 uni8_sizeof(uint8_t c); |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
75 |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
76 /** |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
77 * Get real number of unicode character in a string. |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
78 * |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
79 * \pre src != NULL |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
80 * \param src the UTF-8 string |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
81 * \return The number of unicode characters or -1 on error and sets errno. |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
82 */ |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
83 size_t |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
84 uni8_length(const uint8_t src[]); |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
85 |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
86 /** |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
87 * Convert a UTF-8 string to UTF-32 string. |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
88 * |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
89 * This function will write at most dstsz bytes in dst including the NUL |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
90 * terminator. Caller is responsible to provide an area large enough to store |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
91 * the required number of unicode characters plus the NUL terminator. |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
92 * |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
93 * Use \ref uni8_length to determine the number of characters required. |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
94 * |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
95 * \pre src != NULL |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
96 * \pre dst != NULL |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
97 * \param src the UTF-8 string |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
98 * \param dst the UTF-32 destination |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
99 * \param dstsz the size of the destination |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
100 * \return The number of bytes written (excluding the null terminator) or -1 on |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
101 * error and sets errno. |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
102 * \see \ref uni8_length |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
103 */ |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
104 size_t |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
105 uni8_to32(const uint8_t src[], uint32_t dst[], size_t dstsz); |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
106 |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
107 /** |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
108 * Get the number of bytes required for the unicode point. |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
109 * |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
110 * \param point the unicode point |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
111 * \return The number of bytes [1-4] or -1 on error and sets errno. |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
112 */ |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
113 size_t |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
114 uni32_sizeof(uint32_t point); |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
115 |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
116 /** |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
117 * Get the number of characters in src. |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
118 * |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
119 * \pre src != NULL |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
120 * \param src the NUL terminated UTF-32 string |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
121 * \return The number of unicode characters. |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
122 */ |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
123 size_t |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
124 uni32_length(const uint32_t src[]); |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
125 |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
126 /** |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
127 * Determine the number of UTF-8 characters excluding the NUL terminator that |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
128 * are needed to convert this UTF-32 string to UTF-8. |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
129 * |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
130 * \pre src != NULL |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
131 * \param src the UTF-32 source string |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
132 * \return The number of bytes required excluding the NUL terminator or -1 on |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
133 * error and sets errno. |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
134 */ |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
135 size_t |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
136 uni32_requires(const uint32_t src[]); |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
137 |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
138 /** |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
139 * Convert a UTF-32 string to UTF-8 string. |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
140 * |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
141 * The output buffer will be filled with at most `dstsize` bytes including the |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
142 * nul terminator. The function \ref uni32_requires can be used to determine |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
143 * the number of codepoints required. |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
144 * |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
145 * \pre src != NULL |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
146 * \pre dst != NULL |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
147 * \param src the UTF-32 string |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
148 * \param dst the string destination |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
149 * \param dstsz the number of bytes available in dst |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
150 * \return the number of bytes written or -1 on error and sets errno |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
151 * accordingly. |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
152 */ |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
153 size_t |
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
154 uni32_to8(const uint32_t src[], uint8_t dst[], size_t dstsz); |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
155 |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
156 /** |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
157 * Check if the unicode character is alpha category. |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
158 * |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
159 * \param c the character |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
160 * \return True if alpha. |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
161 */ |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
162 bool |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
163 uni_isalpha(uint32_t c); |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
164 |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
165 /** |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
166 * Check if the unicode character is digit. |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
167 * |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
168 * \param c the character |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
169 * \return True if digit. |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
170 */ |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
171 bool |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
172 uni_isdigit(uint32_t c); |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
173 |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
174 /** |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
175 * Check if the unicode character is lower case. |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
176 * |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
177 * \param c the character |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
178 * \return True if lower case. |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
179 */ |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
180 bool |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
181 uni_islower(uint32_t c); |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
182 |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
183 /** |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
184 * Check if the unicode character is space. |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
185 * |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
186 * \param c the character |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
187 * \return True if space. |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
188 */ |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
189 bool |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
190 uni_isspace(uint32_t c); |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
191 |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
192 /** |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
193 * Check if the unicode character is title case. |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
194 * |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
195 * \param c the character |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
196 * \return True if title case. |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
197 */ |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
198 bool |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
199 uni_istitle(uint32_t c); |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
200 |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
201 /** |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
202 * Check if the unicode character is upper case. |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
203 * |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
204 * \param c the character |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
205 * \return True if upper case. |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
206 */ |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
207 bool |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
208 uni_isupper(uint32_t c); |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
209 |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
210 /** |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
211 * Convert to upper case. |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
212 * |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
213 * \param c the character |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
214 * \return The upper case character. |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
215 */ |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
216 uint32_t |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
217 uni_toupper(uint32_t c); |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
218 |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
219 /** |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
220 * Convert to lower case. |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
221 * |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
222 * \param c the character |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
223 * \return The lower case character. |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
224 */ |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
225 uint32_t |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
226 uni_tolower(uint32_t c); |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
227 |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
228 /** |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
229 * Convert to title case. |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
230 * |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
231 * \param c the character |
11
43a9d763656b
unicode: improve C API, removing dynamic allocations
David Demelier <markand@malikania.fr>
parents:
10
diff
changeset
|
232 * \return The title case character. |
10
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
233 */ |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
234 uint32_t |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
235 uni_totitle(uint32_t c); |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
236 |
ae1003c2a284
misc: extreme simplification
David Demelier <markand@malikania.fr>
parents:
diff
changeset
|
237 #endif // !UNICODE_H |