Mercurial > libunicode
changeset 14:153c09cc6dcb
misc: miscellaneous cleanups for 2021
- Removal of Doxygen,
- Increase copyrights years,
- Replace bool with ints.
author | David Demelier <markand@malikania.fr> |
---|---|
date | Wed, 03 Feb 2021 15:29:06 +0100 |
parents | 5c917a8cd011 |
children | aa7817402878 |
files | Doxyfile LICENSE.md Makefile gen/src/mkunicode-c.c gen/unicode-before.c gen/unicode-before.cpp test/unicode++.cpp test/unicode.c unicode.c unicode.cpp unicode.h unicode.hpp |
diffstat | 12 files changed, 58 insertions(+), 404 deletions(-) [+] |
line wrap: on
line diff
--- a/Doxyfile Wed Mar 25 17:14:07 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ -# -# Doxyfile -- generate API documentation for libunicode -# -# Copyright (c) 2013-2020 David Demelier <markand@malikania.fr> -# -# Permission to use, copy, modify, and/or distribute this software for any -# purpose with or without fee is hereby granted, provided that the above -# copyright notice and this permission notice appear in all copies. -# -# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -# - -DOXYFILE_ENCODING = UTF-8 -PROJECT_NAME = "libunicode" -PROJECT_NUMBER = "0.1.0" -PROJECT_BRIEF = "UTF-8 to UTF-32 conversions and various operations" -PROJECT_LOGO = -OUTPUT_DIRECTORY = doxygen -ALLOW_UNICODE_NAMES = YES -STRIP_FROM_PATH = ./ -TAB_SIZE = 8 -OPTIMIZE_OUTPUT_FOR_C = YES -AUTOLINK_SUPPORT = NO -QUIET = YES -WARNINGS = YES -INPUT = unicode.h unicode.hpp -INPUT_ENCODING = UTF-8 -RECURSIVE = NO -GENERATE_LATEX = NO -GENERATE_MAN = NO -MAX_INITIALIZER_LINES = 0
--- a/LICENSE.md Wed Mar 25 17:14:07 2020 +0100 +++ b/LICENSE.md Wed Feb 03 15:29:06 2021 +0100 @@ -1,7 +1,7 @@ libunicode -- license ===================== -Copyright (c) 2013-2020 David Demelier <markand@malikania.fr> +Copyright (c) 2013-2021 David Demelier <markand@malikania.fr> Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above
--- a/Makefile Wed Mar 25 17:14:07 2020 +0100 +++ b/Makefile Wed Feb 03 15:29:06 2021 +0100 @@ -1,7 +1,7 @@ # # Makefile -- basic Makefile for libunicode # -# Copyright (c) 2013-2020 David Demelier <markand@malikania.fr> +# Copyright (c) 2013-2021 David Demelier <markand@malikania.fr> # # Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above @@ -46,17 +46,14 @@ ${CC} ${INCS} ${CFLAGS} -o test/unicode unicode.c test/unicode.c ${LDFLAGS} test/unicode++: unicode.cpp unicode.hpp test/unicode++.cpp - ${CPP} ${INCS} ${CFLAGS} -o test/unicode++ unicode.cpp test/unicode++.cpp ${LDFLAGS} + ${CPP} ${INCS} ${CXXFLAGS} -o test/unicode++ unicode.cpp test/unicode++.cpp ${LDFLAGS} tests: test/unicode test/unicode++ test/unicode test/unicode++ -doxygen: - doxygen Doxyfile - clean: rm -f gen/src/mkunicode-c gen/src/mkunicode-cpp rm -f test/unicode test/unicode++ -.PHONY: all clean doxygen tests +.PHONY: all clean tests
--- a/gen/src/mkunicode-c.c Wed Mar 25 17:14:07 2020 +0100 +++ b/gen/src/mkunicode-c.c Wed Feb 03 15:29:06 2021 +0100 @@ -376,7 +376,7 @@ iss = mkissingle(label, prop); fprintf(out, - "bool\n" + "int\n" "uni_is%s(uint32_t c)\n" "{\n" "\tconst uint32_t *p;\n" @@ -387,7 +387,7 @@ fprintf(out, "\tp = search(c, is%sr, nelem (is%sr) / 2, 2);\n\n" "\tif (p && c >= p[0] && c <= p[1])\n" - "\t\treturn true;\n", + "\t\treturn 1;\n", label, label); if(isp) @@ -395,7 +395,7 @@ "\n" "\tp = search(c, is%sp, nelem (is%sp) / 2, 2);\n\n" "\tif (p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n" - "\t\treturn true;\n", + "\t\treturn 1;\n", label, label); if(iss) @@ -403,13 +403,13 @@ "\n" "\tp = search(c, is%ss, nelem (is%ss), 1);\n\n" "\tif (p && c == p[0])\n" - "\t\treturn true;\n", + "\t\treturn 1;\n", label, label); fprintf(out, "\n" - "\treturn false;\n" + "\treturn 0;\n" "}\n" "\n" ); @@ -590,15 +590,15 @@ { mkisrange(label, prop, 1); fprintf(out, - "bool\n" + "int\n" "uni_is%s(uint32_t c)\n" "{\n" "\tconst uint32_t *p;\n" "\n" "\tp = search(c, is%sr, nelem (is%sr) / 2, 2);\n\n" "\tif (p && c >= p[0] && c <= p[1])\n" - "\t\treturn true;\n\n" - "\treturn false;\n" + "\t\treturn 1;\n\n" + "\treturn 0;\n" "}\n" "\n", label, label, label);
--- a/gen/unicode-before.c Wed Mar 25 17:14:07 2020 +0100 +++ b/gen/unicode-before.c Wed Feb 03 15:29:06 2021 +0100 @@ -1,7 +1,7 @@ /* * unicode.c -- UTF-8 to UTF-32 conversions and various operations * - * Copyright (c) 2013-2020 David Demelier <markand@malikania.fr> + * Copyright (c) 2013-2021 David Demelier <markand@malikania.fr> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above
--- a/gen/unicode-before.cpp Wed Mar 25 17:14:07 2020 +0100 +++ b/gen/unicode-before.cpp Wed Feb 03 15:29:06 2021 +0100 @@ -1,7 +1,7 @@ /* * unicode.cpp -- UTF-8 to UTF-32 conversions and various operations * - * Copyright (c) 2013-2020 David Demelier <markand@malikania.fr> + * Copyright (c) 2013-2021 David Demelier <markand@malikania.fr> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above
--- a/test/unicode++.cpp Wed Mar 25 17:14:07 2020 +0100 +++ b/test/unicode++.cpp Wed Feb 03 15:29:06 2021 +0100 @@ -1,7 +1,7 @@ /* * unicode++.cpp -- main test file for unicode * - * Copyright (c) 2013-2020 David Demelier <markand@malikania.fr> + * Copyright (c) 2013-2021 David Demelier <markand@malikania.fr> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above
--- a/test/unicode.c Wed Mar 25 17:14:07 2020 +0100 +++ b/test/unicode.c Wed Feb 03 15:29:06 2021 +0100 @@ -1,7 +1,7 @@ /* * unicode.c -- main test file for unicode (C version) * - * Copyright (c) 2013-2020 David Demelier <markand@malikania.fr> + * Copyright (c) 2013-2021 David Demelier <markand@malikania.fr> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -38,7 +38,7 @@ return t; } -static bool +static int u32cmp(const uint32_t *s1, const uint32_t *s2) { const size_t l1 = u32len(s1);
--- a/unicode.c Wed Mar 25 17:14:07 2020 +0100 +++ b/unicode.c Wed Feb 03 15:29:06 2021 +0100 @@ -1,7 +1,7 @@ /* * unicode.c -- UTF-8 to UTF-32 conversions and various operations * - * Copyright (c) 2013-2020 David Demelier <markand@malikania.fr> + * Copyright (c) 2013-2021 David Demelier <markand@malikania.fr> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -66,7 +66,7 @@ 0xfeff, 0xfeff, }; -bool +int uni_isspace(uint32_t c) { const uint32_t *p; @@ -74,9 +74,9 @@ p = search(c, isspacer, nelem (isspacer) / 2, 2); if (p && c >= p[0] && c <= p[1]) - return true; + return 1; - return false; + return 0; } static const uint32_t isdigitr[] = { @@ -132,7 +132,7 @@ 0x1d7ce, 0x1d7ff, }; -bool +int uni_isdigit(uint32_t c) { const uint32_t *p; @@ -140,9 +140,9 @@ p = search(c, isdigitr, nelem (isdigitr) / 2, 2); if (p && c >= p[0] && c <= p[1]) - return true; + return 1; - return false; + return 0; } static const uint32_t isalphar[] = { @@ -688,7 +688,7 @@ 0x1ee7e, }; -bool +int uni_isalpha(uint32_t c) { const uint32_t *p; @@ -696,14 +696,14 @@ p = search(c, isalphar, nelem (isalphar) / 2, 2); if (p && c >= p[0] && c <= p[1]) - return true; + return 1; p = search(c, isalphas, nelem (isalphas), 1); if (p && c == p[0]) - return true; + return 1; - return false; + return 0; } static const uint32_t isupperr[] = { @@ -1339,7 +1339,7 @@ 0x1d7ca, }; -bool +int uni_isupper(uint32_t c) { const uint32_t *p; @@ -1347,14 +1347,14 @@ p = search(c, isupperr, nelem (isupperr) / 2, 2); if (p && c >= p[0] && c <= p[1]) - return true; + return 1; p = search(c, isuppers, nelem (isuppers), 1); if (p && c == p[0]) - return true; + return 1; - return false; + return 0; } static const uint32_t islowerr[] = { @@ -1990,7 +1990,7 @@ 0x1d7cb, }; -bool +int uni_islower(uint32_t c) { const uint32_t *p; @@ -1998,14 +1998,14 @@ p = search(c, islowerr, nelem (islowerr) / 2, 2); if (p && c >= p[0] && c <= p[1]) - return true; + return 1; p = search(c, islowers, nelem (islowers), 1); if (p && c == p[0]) - return true; + return 1; - return false; + return 0; } static const uint32_t istitler[] = { @@ -2594,7 +2594,7 @@ 0xa7a8, }; -bool +int uni_istitle(uint32_t c) { const uint32_t *p; @@ -2602,14 +2602,14 @@ p = search(c, istitler, nelem (istitler) / 2, 2); if (p && c >= p[0] && c <= p[1]) - return true; + return 1; p = search(c, istitles, nelem (istitles), 1); if (p && c == p[0]) - return true; + return 1; - return false; + return 0; } static const uint32_t toupperr[] = {
--- a/unicode.cpp Wed Mar 25 17:14:07 2020 +0100 +++ b/unicode.cpp Wed Feb 03 15:29:06 2021 +0100 @@ -1,7 +1,7 @@ /* * unicode.cpp -- UTF-8 to UTF-32 conversions and various operations * - * Copyright (c) 2013-2020 David Demelier <markand@malikania.fr> + * Copyright (c) 2013-2021 David Demelier <markand@malikania.fr> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above
--- a/unicode.h Wed Mar 25 17:14:07 2020 +0100 +++ b/unicode.h Wed Feb 03 15:29:06 2021 +0100 @@ -1,7 +1,7 @@ /* * unicode.h -- UTF-8 to UTF-32 conversions and various operations * - * Copyright (c) 2013-2020 David Demelier <markand@malikania.fr> + * Copyright (c) 2013-2021 David Demelier <markand@malikania.fr> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -19,218 +19,60 @@ #ifndef UNICODE_H #define UNICODE_H -/** - * \file unicode.h - * \brief UTF-8 to UTF-32 conversions - * \author David Demelier <markand@malikania.fr> - */ - -#include <stdbool.h> #include <stddef.h> #include <stdint.h> -/** - * Encode the unicode code point into multibyte string. - * - * To make sure that buffer is always large enough, you may pass a buffer of - * size 4 as it's the largest UTF-8 string for now. - * - * \pre dst != NULL - * \param dst the UTF-8 buffer destination - * \param dstsz the size available in dst - * \param point the unicode character - * \return The number of bytes written (excluding the null terminator) or -1 on - * error and sets errno. - * \warning The destination is **not** NUL terminated. - */ size_t -uni8_encode(uint8_t dst[], size_t dstsz, uint32_t point); +uni8_encode(uint8_t *dst, size_t dstsz, uint32_t point); -/** - * Decode the multibyte buffer into an unicode code point. - * - * \pre src != NULL - * \pre point != NULL - * \param src UTF-8 the source string - * \param point the unicode character destination - * \return The number of bytes parsed in src or -1 on error and sets errno. - */ size_t -uni8_decode(const uint8_t src[], uint32_t *point); +uni8_decode(const uint8_t *src, uint32_t *point); -/** - * Get the number of bytes that follow this UTF-8 character. - * - * This can be used to iterate a valid UTF-8 string to jump to the next real - * character. - * - * \param c the first multi byte character - * \return The number of bytes [1-4] or -1 if invalid and sets errno. - * \warning You may still need to verify that following characters are valid as - * this function only returns the number of bytes that *should* - * exists after this one. - */ size_t uni8_sizeof(uint8_t c); -/** - * Get real number of unicode character in a string. - * - * \pre src != NULL - * \param src the UTF-8 string - * \return The number of unicode characters or -1 on error and sets errno. - */ size_t -uni8_length(const uint8_t src[]); +uni8_length(const uint8_t *src); -/** - * Convert a UTF-8 string to UTF-32 string. - * - * This function will write at most dstsz bytes in dst including the NUL - * terminator. Caller is responsible to provide an area large enough to store - * the required number of unicode characters plus the NUL terminator. - * - * Use \ref uni8_length to determine the number of characters required. - * - * \pre src != NULL - * \pre dst != NULL - * \param src the UTF-8 string - * \param dst the UTF-32 destination - * \param dstsz the size of the destination - * \return The number of bytes written (excluding the null terminator) or -1 on - * error and sets errno. - * \see \ref uni8_length - */ size_t -uni8_to32(const uint8_t src[], uint32_t dst[], size_t dstsz); +uni8_to32(const uint8_t *src, uint32_t *dst, size_t dstsz); -/** - * Get the number of bytes required for the unicode point. - * - * \param point the unicode point - * \return The number of bytes [1-4] or -1 on error and sets errno. - */ size_t uni32_sizeof(uint32_t point); -/** - * Get the number of characters in src. - * - * \pre src != NULL - * \param src the NUL terminated UTF-32 string - * \return The number of unicode characters. - */ size_t -uni32_length(const uint32_t src[]); - -/** - * Determine the number of UTF-8 characters excluding the NUL terminator that - * are needed to convert this UTF-32 string to UTF-8. - * - * \pre src != NULL - * \param src the UTF-32 source string - * \return The number of bytes required excluding the NUL terminator or -1 on - * error and sets errno. - */ -size_t -uni32_requires(const uint32_t src[]); +uni32_length(const uint32_t *src); -/** - * Convert a UTF-32 string to UTF-8 string. - * - * The output buffer will be filled with at most `dstsize` bytes including the - * nul terminator. The function \ref uni32_requires can be used to determine - * the number of codepoints required. - * - * \pre src != NULL - * \pre dst != NULL - * \param src the UTF-32 string - * \param dst the string destination - * \param dstsz the number of bytes available in dst - * \return the number of bytes written or -1 on error and sets errno - * accordingly. - */ +size_t +uni32_requires(const uint32_t *src); + size_t -uni32_to8(const uint32_t src[], uint8_t dst[], size_t dstsz); +uni32_to8(const uint32_t *src, uint8_t *dst, size_t dstsz); -/** - * Check if the unicode character is alpha category. - * - * \param c the character - * \return True if alpha. - */ -bool +int uni_isalpha(uint32_t c); -/** - * Check if the unicode character is digit. - * - * \param c the character - * \return True if digit. - */ -bool +int uni_isdigit(uint32_t c); -/** - * Check if the unicode character is lower case. - * - * \param c the character - * \return True if lower case. - */ -bool +int uni_islower(uint32_t c); -/** - * Check if the unicode character is space. - * - * \param c the character - * \return True if space. - */ -bool +int uni_isspace(uint32_t c); -/** - * Check if the unicode character is title case. - * - * \param c the character - * \return True if title case. - */ -bool +int uni_istitle(uint32_t c); -/** - * Check if the unicode character is upper case. - * - * \param c the character - * \return True if upper case. - */ -bool +int uni_isupper(uint32_t c); -/** - * Convert to upper case. - * - * \param c the character - * \return The upper case character. - */ uint32_t uni_toupper(uint32_t c); -/** - * Convert to lower case. - * - * \param c the character - * \return The lower case character. - */ uint32_t uni_tolower(uint32_t c); -/** - * Convert to title case. - * - * \param c the character - * \return The title case character. - */ uint32_t uni_totitle(uint32_t c);
--- a/unicode.hpp Wed Mar 25 17:14:07 2020 +0100 +++ b/unicode.hpp Wed Feb 03 15:29:06 2021 +0100 @@ -1,7 +1,7 @@ /* * unicode.hpp -- UTF-8 to UTF-32 conversions and various operations * - * Copyright (c) 2013-2020 David Demelier <markand@malikania.fr> + * Copyright (c) 2013-2021 David Demelier <markand@malikania.fr> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -19,76 +19,22 @@ #ifndef UNICODE_HPP #define UNICODE_HPP -/** - * \file unicode.hpp - * \brief UTF-8 to UTF-32 conversions - * \author David Demelier <markand@malikania.fr> - */ - #include <stdexcept> #include <string> #include <string_view> -/** - * \brief Unicode namespace. - */ namespace unicode { -/** - * Encode the unicode code point into multibyte string. - * - * \param point the unicode code point - * \param res the output buffer - */ void encode(char32_t point, char res[5]) noexcept; -/** - * Decode the multibyte buffer into an unicode code point. - * - * \param c the code point destination - * \param res the multibyte string. - */ void decode(char32_t& c, const char* res) noexcept; -/** - * Get the number of bytes for the first multi byte character from a - * utf-8 string. - * - * This can be used to iterate a valid UTF-8 string to jump to the next - * real character. - * - * \param c the first multi byte character - * \return the number of bytes [1-4] or -1 if invalid - */ auto nbytes_utf8(char c) noexcept -> int; -/** - * Get the number of bytes for the unicode point. - * - * \param point the unicode point - * \return the number of bytes [1-4] or -1 if invalid - */ auto nbytes_point(char32_t point) noexcept -> int; -/** - * Get real number of character in a string. - * - * \param str the string - * \return the length - * \throw std::invalid_argument on invalid sequence - */ auto length(std::string_view str) -> unsigned; -/** - * Iterate over all real characters in the UTF-8 string. - * - * The function must have the following signature: - * void f(char32_t ch) - * - * \param str the UTF-8 string - * \param function the function callback - * \throw std::invalid_argument on invalid sequence - */ template <typename Func> void for_each(std::string_view str, Func function) { @@ -106,128 +52,34 @@ } } -/** - * Convert a UTF-32 string to UTF-8 string. - * - * \param array the UTF-32 string - * \return the UTF-8 string - * \throw std::invalid_argument on invalid sequence - */ auto to_utf8(std::u32string_view array) -> std::string; -/** - * Convert a UTF-8 string to UTF-32 string. - * - * \param str the UTF-8 string - * \return the UTF-32 string - * \throw std::invalid_argument on invalid sequence - */ auto to_utf32(std::string_view str) -> std::u32string; -/** - * Check if the unicode character is space. - * - * \param c the character - * \return true if space - */ auto isspace(char32_t c) noexcept -> bool; -/** - * Check if the unicode character is digit. - * - * \param c the character - * \return true if digit - */ auto isdigit(char32_t c) noexcept -> bool; -/** - * Check if the unicode character is alpha category. - * - * \param c the character - * \return true if alpha - */ auto isalpha(char32_t c) noexcept -> bool; -/** - * Check if the unicode character is upper case. - * - * \param c the character - * \return true if upper case - */ auto isupper(char32_t c) noexcept -> bool; -/** - * Check if the unicode character is lower case. - * - * \param c the character - * \return true if lower case - */ auto islower(char32_t c) noexcept -> bool; -/** - * Check if the unicode character is title case. - * - * \param c the character - * \return true if title case - */ auto istitle(char32_t c) noexcept -> bool; -/** - * Convert to upper case. - * - * \param c the character - * \return the upper case character - */ auto toupper(char32_t c) noexcept -> char32_t; -/** - * Convert to lower case. - * - * \param c the character - * \return the lower case character - */ auto tolower(char32_t c) noexcept -> char32_t; -/** - * Convert to title case. - * - * \param c the character - * \return the title case character - */ auto totitle(char32_t c) noexcept -> char32_t; -/** - * Convert the UTF-32 string to upper case. - * - * \param str the string - * \return the upper case string - */ auto toupper(std::u32string_view str) -> std::u32string; -/** - * Convert the UTF-8 string to upper case. - * - * \param str the string - * \return the upper case string - * \warning very slow at the moment - */ auto toupper(std::string_view str) -> std::string; -/** - * Convert the UTF-32 string to lower case. - * - * \param str the string - * \return the lower case string - */ auto tolower(std::u32string_view str) -> std::u32string; -/** - * Convert the UTF-8 string to lower case. - * - * \param str the string - * \return the lower case string - * \warning very slow at the moment - */ auto tolower(std::string_view str) -> std::string; } // !unicode