# HG changeset patch # User David Demelier # Date 1647850694 -3600 # Node ID 887a8fd73d1ea269dd627780c0197561864f8b95 # Parent 496cd52a50ec45374629faa91d0cf29ede659f9f cmake: add support diff -r 496cd52a50ec -r 887a8fd73d1e .hgignore --- a/.hgignore Mon Mar 21 09:00:42 2022 +0100 +++ b/.hgignore Mon Mar 21 09:18:14 2022 +0100 @@ -3,16 +3,16 @@ \.swp$ \.swo$ +# Temporary files. +\.a$ +\.d$ +\.o$ + +# Test files. +^tests/test-unicode$ + # Doxygen. ^doxygen/ -# Generator files. -^gen/src/mkunicode-c$ -^gen/src/mkunicode-cpp$ - -# Test files. -^test/unicode$ -^test/unicode\+\+$ - # macOS specific. \.DS_Store diff -r 496cd52a50ec -r 887a8fd73d1e CMakeLists.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CMakeLists.txt Mon Mar 21 09:18:14 2022 +0100 @@ -0,0 +1,90 @@ +# +# CMakeLists.txt -- basic CMake build for libunicode +# +# Copyright (c) 2013-2022 David Demelier +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +# + +cmake_minimum_required(VERSION 3.20) +project( + libunicode + VERSION "1.0.0" + DESCRIPTION "UTF-8 to UTF-32 conversions and various operations" + HOMEPAGE_URL "http://projects.malikania.fr/libunicode" + LANGUAGES C +) + +include(CMakePackageConfigHelpers) +include(GNUInstallDirs) + +add_library(libunicode-static STATIC unicode.c unicode.h) +target_include_directories(libunicode-static PUBLIC $) +install( + TARGETS libunicode-static + EXPORT unicode-targets + ARCHIVE DESTINATION lib +) + +if (NOT CMAKE_C_COMPILER_ID MATCHES "MSVC" OR NOT BUILD_SHARED_LIBS) + set_target_properties(libunicode-static PROPERTIES OUTPUT_NAME unicode) +else () + set_target_properties(libunicode-static PROPERTIES OUTPUT_NAME unicode-static) +endif () + +if (BUILD_SHARED_LIBS) + add_library(libunicode-shared SHARED unicode.c unicode.h unicode.def) + target_include_directories(libunicode-shared PUBLIC $) + set_target_properties( + libunicode-shared + PROPERTIES + OUTPUT_NAME unicode + VERSION ${PROJECT_VERSION} + SOVERSION ${PROJECT_VERSION_MAJOR} + ) + install( + TARGETS libunicode-shared + EXPORT unicode-targets + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + ) +endif () + +configure_file( + ${PROJECT_SOURCE_DIR}/unicode.pc.in + ${PROJECT_BINARY_DIR}/unicode.pc + @ONLY +) + +write_basic_package_version_file( + ${PROJECT_BINARY_DIR}/unicode-config-version.cmake + VERSION ${PROJECT_VERSION} + COMPATIBILITY SameMajorVersion +) + +install(FILES ${PROJECT_SOURCE_DIR}/unicode.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) +install(FILES ${PROJECT_BINARY_DIR}/unicode.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) +install(FILES ${PROJECT_SOURCE_DIR}/libunicode.3 DESTINATION ${CMAKE_INSTALL_MANDIR}/man3) +install( + EXPORT unicode-targets + FILE unicode-targets.cmake + NAMESPACE unicode:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/unicode +) +install( + FILES + ${PROJECT_BINARY_DIR}/unicode-config-version.cmake + ${PROJECT_SOURCE_DIR}/unicode-config.cmake + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/unicode +) diff -r 496cd52a50ec -r 887a8fd73d1e INSTALL.md --- a/INSTALL.md Mon Mar 21 09:00:42 2022 +0100 +++ b/INSTALL.md Mon Mar 21 09:18:14 2022 +0100 @@ -6,14 +6,10 @@ - C99 (at least `EILSEQ` has to be available as errno constant). - C11 (for running tests). - -Installation (C++ variant) --------------------------- +- `uint32_t` and `uint8_t` types. -Just copy the files unicode.cpp and unicode.hpp and add them to your project. - -Installation ------------- +Embed +----- Copy unicode.h and unicode.c to your project. @@ -22,3 +18,47 @@ The file unicode.c is generated from gen/ subdirectory. Edit the appropriate files and run `make` in top level directory to regenerate them. + +Installation +------------ + +The module is small enough to be incorporated verbatim into your project, but it +still possible to install it system wide. + +### Using CMake (recommended) + +Using [CMake][cmake] you get proper CMake package configuration files, shared +libraries and `pkg-config` files. + + $ cmake -S . -B build -DBUILD_SHARED_LIBS=On + $ cmake --build build + # cmake --build build --target install + +Turn `BUILD_SHARED_LIBS` to *Off* if you don't want shared libraries. + +Then, you can import `unicode` and use on of the imported targets: + +- `unicode::libunicode`: shared if available, static otherwise, +- `unicode::libunicode-shared`: shared version, +- `unicode::libunicode-static`: static version. + +Example: + + cmake_minimum_required(VERSION 3.20) + project(example) + find_package(unicode REQUIRED) + add_executable(example example.c) + target_link_libraries(example unicode::libunicode) + +### Using POSIX make (not recommended) + +POSIX make (not recommended, only static library): + + $ make + # make install + +The test suite is available using: + + $ make tests + +[cmake]: http://cmake.org diff -r 496cd52a50ec -r 887a8fd73d1e Makefile --- a/Makefile Mon Mar 21 09:00:42 2022 +0100 +++ b/Makefile Mon Mar 21 09:18:14 2022 +0100 @@ -18,18 +18,35 @@ .POSIX: -CC= cc -CFLAGS= -O3 -DNDEBUG +CC= cc + +PREFIX= /usr/local +INCDIR= ${PREFIX}/include +LIBDIR= ${PREFIX}/lib +MANDIR= ${PREFIX}/share/man -INCS= -Iextern/librexo -I. +VERSION= 1.0.0 + +LIB_SRCS= unicode.c +LIB_OBJS= ${LIB_SRCS:.c=.o} +LIB_DEPS= ${LIB_SRCS:.c=.d} +LIB= libunicode.a + +TESTS_SRCS= tests/test-unicode.c +TESTS_OBJS= ${TESTS_SRCS:.c=} .SUFFIXES: -.SUFFIXES: .c +.SUFFIXES: .c .o -all: unicode.c +all: ${LIB} + +-include ${LIB_DEPS} .c: - ${CC} ${CFLAGS} $< -o $@ ${LDFLAGS} + ${CC} ${CFLAGS} -Iextern/librexo -I. $< -o $@ ${LIB} ${LDFLAGS} + +.c.o: + ${CC} ${CFLAGS} -Iextern/librexo -I. -MMD -c $< -o $@ ${LDFLAGS} gen/UnicodeData.txt: curl http://unicode.org/Public/UCD/latest/ucd/UnicodeData.txt -o $@ @@ -39,13 +56,23 @@ cat gen/UnicodeData.txt | awk -f gen/mkutf.awk >> unicode.c cat gen/unicode-after.c >> unicode.c -test/unicode: unicode.c unicode.h test/unicode.c - ${CC} ${INCS} ${CFLAGS} -o test/unicode unicode.c test/unicode.c ${LDFLAGS} +${LIB}: ${LIB_OBJS} + ${AR} -rc $@ ${LIB_OBJS} + +${TESTS_OBJS}: ${LIB} + +tests: ${TESTS_OBJS} + for t in ${TESTS_OBJS}; do ./$$t; done -tests: test/unicode - test/unicode +install: + mkdir -p ${DESTDIR}${LIBDIR} + cp libunicode.a ${DESTDIR}${LIBDIR} + mkdir -p ${DESTDIR}${INCDIR} + cp unicode.h ${DESTDIR}${INCDIR} + mkdir -p ${DESTDIR}${MANDIR}/man3 + cp libunicode.3 ${DESTDIR}${MANDIR}/man3 clean: - rm -f test/unicode + rm -f ${LIB} ${LIB_DEPS} ${LIB_OBJS} ${TESTS_OBJS} -.PHONY: all clean tests +.PHONY: all clean install tests diff -r 496cd52a50ec -r 887a8fd73d1e README.md --- a/README.md Mon Mar 21 09:00:42 2022 +0100 +++ b/README.md Mon Mar 21 09:18:14 2022 +0100 @@ -6,7 +6,7 @@ Conversions and unicode inspection in C99 -It is currently based on unicode 13.0.0. +It is currently based on unicode 14.0.0. Features -------- @@ -19,6 +19,4 @@ Documentation ------------- -See the libunicode(3) manual page. - - man ./libunicode.3 +See the `libunicode(3)` manual page. diff -r 496cd52a50ec -r 887a8fd73d1e test/unicode.c --- a/test/unicode.c Mon Mar 21 09:00:42 2022 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,329 +0,0 @@ -/* - * unicode.c -- main test file for unicode - * - * Copyright (c) 2013-2022 David Demelier - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include - -#include - -#include "unicode.h" - -/* - * /!\ Be sure to keep this file with UTF-8 encoding /!\ - */ - -static size_t -u32len(const uint32_t *s) -{ - size_t t = 0; - - while (*s++) - ++t; - - return t; -} - -static int -u32cmp(const uint32_t *s1, const uint32_t *s2) -{ - const size_t l1 = u32len(s1); - const size_t l2 = u32len(s2); - - return l1 == l2 && memcmp(s1, s2, l1) == 0; -} - -RX_TEST_CASE(uni8_encode, simple) -{ - size_t r; - - /* a -> 1 bytes. */ - { - uint8_t buffer[5] = { 0 }; - - r = uni8_encode(buffer, sizeof (buffer), U'a'); - RX_INT_REQUIRE_EQUAL(r, 1); - RX_STR_REQUIRE_EQUAL((const char *)buffer, (const char *)u8"a"); - } - - /* é -> 2 bytes. */ - { - uint8_t buffer[5] = { 0 }; - - r = uni8_encode(buffer, sizeof (buffer), U'é'); - RX_INT_REQUIRE_EQUAL(r, 2); - RX_STR_REQUIRE_EQUAL((const char *)buffer, (const char *)u8"é"); - } -} - -RX_TEST_CASE(uni8_encode, invalid) -{ - size_t r; - uint8_t buffer[5] = { 0 }; - - r = uni8_encode(buffer, sizeof (buffer), 0xffffffff); - RX_UINT_REQUIRE_EQUAL(r, (size_t)-1); - RX_INT_REQUIRE_EQUAL(errno, EILSEQ); -} - -RX_TEST_CASE(uni8_encode, toosmall) -{ - size_t r; - uint8_t buffer[1] = { 0 }; - - r = uni8_encode(buffer, sizeof (buffer), U'é'); - RX_UINT_REQUIRE_EQUAL(r, (size_t)-1); - RX_INT_REQUIRE_EQUAL(errno, ERANGE); -} - -RX_TEST_CASE(unit8_decode, simple) -{ - size_t r; - - /* a -> 1 bytes. */ - { - uint32_t code = -1; - - r = uni8_decode((const uint8_t *)u8"a", &code); - RX_UINT_REQUIRE_EQUAL(r, 1U); - RX_INT_REQUIRE_EQUAL(code, 'a'); - } - - /* é -> 2 bytes. */ - { - uint32_t code = -1; - - r = uni8_decode((const uint8_t *)u8"é", &code); - RX_UINT_REQUIRE_EQUAL(r, 2U); - RX_INT_REQUIRE_EQUAL(code, U'é'); - } -} - -RX_TEST_CASE(uni8_decode, invalid) -{ - size_t r; - - /* Invalid UTF-8 sequence. */ - { - uint32_t code = -1; - - r = uni8_decode((const uint8_t *)u8"\xff""a", &code); - RX_UINT_REQUIRE_EQUAL(r, (size_t)-1); - RX_UINT_REQUIRE_EQUAL(code, (uint32_t)-1); - RX_INT_REQUIRE_EQUAL(errno, EILSEQ); - } - - /* Valid "€" but unfinished sequence. */ - { - uint32_t code = -1; - - r = uni8_decode((const uint8_t []){ -30, 0 }, &code); - RX_UINT_REQUIRE_EQUAL(r, (size_t)-1); - RX_UINT_REQUIRE_EQUAL(code, (uint32_t)-1); - RX_INT_REQUIRE_EQUAL(errno, EILSEQ); - } -} - -RX_TEST_CASE(uni8_sizeof, simple) -{ - RX_INT_REQUIRE_EQUAL(uni8_sizeof(u8"a"[0]), 1U); - RX_INT_REQUIRE_EQUAL(uni8_sizeof(u8"é"[0]), 2U); - RX_INT_REQUIRE_EQUAL(uni8_sizeof(u8"€"[0]), 3U); - RX_INT_REQUIRE_EQUAL(uni8_sizeof(u8"𐍈"[0]), 4U); -} - -RX_TEST_CASE(uni8_sizeof, invalid) -{ - RX_UINT_REQUIRE_EQUAL((size_t)-1, uni8_sizeof(u8"\xff"[0])); - RX_INT_REQUIRE_EQUAL(errno, EILSEQ); -} - -RX_TEST_CASE(uni8_length, simple) -{ - RX_UINT_REQUIRE_EQUAL(uni8_length((const uint8_t *)"abc"), 3U); - RX_UINT_REQUIRE_EQUAL(uni8_length((const uint8_t *)"5€"), 2U); -} - -RX_TEST_CASE(uni8_length, invalid) -{ - RX_UINT_REQUIRE_EQUAL((size_t)-1, uni8_length((const uint8_t *)"a""\xff""b")); - RX_INT_REQUIRE_EQUAL(errno, EILSEQ); -} - -RX_TEST_CASE(uni8_to32, simple) -{ - size_t r; - - { - uint32_t buffer[10] = { 0 }; - uint32_t expected[] = { U'a', U'b', U'c', 0 }; - - r = uni8_to32((const uint8_t *)"abc", buffer, 10); - RX_UINT_REQUIRE_EQUAL(r, 3U); - RX_REQUIRE(u32cmp(buffer, expected)); - } - - { - uint32_t buffer[10] = { 0 }; - uint32_t expected[] = { U'a', U'é', U'c', 0 }; - - r = uni8_to32((const uint8_t *)"aéc", buffer, 10); - RX_UINT_REQUIRE_EQUAL(r, 3); - RX_REQUIRE(u32cmp(buffer, expected)); - } -} - -RX_TEST_CASE(uni8_to32, invalid) -{ - size_t r; - uint32_t buffer[10] = { 0 }; - - /* Invalid UTF-8 sequence. */ - r = uni8_to32((const uint8_t *)u8"\xff""a", buffer, 10); - RX_UINT_REQUIRE_EQUAL(r, (size_t)-1); - RX_INT_REQUIRE_EQUAL(errno, EILSEQ); - - /* Valid "€" but unfinished sequence. */ - r = uni8_to32((const uint8_t []){ -30, 0 }, buffer, 10); - RX_UINT_REQUIRE_EQUAL(r, (size_t)-1); - RX_INT_REQUIRE_EQUAL(errno, EILSEQ); -} - -RX_TEST_CASE(uni8_to32, toosmall) -{ - size_t r; - uint32_t buffer[4] = { 0 }; - - r = uni8_to32((const uint8_t *)u8"bonjour à tous", buffer, 1); - RX_UINT_REQUIRE_EQUAL(r, (size_t)-1); - RX_INT_REQUIRE_EQUAL(errno, ERANGE); -} - -RX_TEST_CASE(uni32_sizeof, simple) -{ - RX_UINT_REQUIRE_EQUAL(uni32_sizeof(U'a'), 1); - RX_UINT_REQUIRE_EQUAL(uni32_sizeof(U'é'), 2); - RX_UINT_REQUIRE_EQUAL(uni32_sizeof(U'€'), 3); - RX_UINT_REQUIRE_EQUAL(uni32_sizeof(U'𐍈'), 4); -} - -RX_TEST_CASE(uni32_sizeof, invalid) -{ - RX_UINT_REQUIRE_EQUAL((size_t)-1, uni32_sizeof(0xffffffff)); - RX_INT_REQUIRE_EQUAL(errno, EILSEQ); -} - -RX_TEST_CASE(uni32_length, simple) -{ - RX_UINT_REQUIRE_EQUAL(uni32_length((const uint32_t []){ U'a', U'é', U'c', 0 }), 3U); -} - -RX_TEST_CASE(uni32_requires, simple) -{ - RX_UINT_REQUIRE_EQUAL(uni32_requires(U"abc"), 3U); - RX_UINT_REQUIRE_EQUAL(uni32_requires(U"é€𐍈"), 9U); -} - -RX_TEST_CASE(uni32_requires, invalid) -{ - RX_UINT_REQUIRE_EQUAL((size_t)-1, uni32_requires(U"\xffffffff")); - RX_INT_REQUIRE_EQUAL(errno, EILSEQ); -} - -RX_TEST_CASE(uni32_to8, simple) -{ - size_t r; - - { - uint8_t buffer[10] = { 0 }; - - r = uni32_to8(U"abc", buffer, sizeof (buffer)); - RX_UINT_REQUIRE_EQUAL(r, 3U); - RX_STR_REQUIRE_EQUAL((const char *)buffer, (const char *)u8"abc"); - } - - { - uint8_t buffer[20] = { 0 }; - - r = uni32_to8(U"ça va, 5€ ?", buffer, sizeof (buffer)); - RX_UINT_REQUIRE_EQUAL(r, 14U); - RX_STR_REQUIRE_EQUAL((const char *)buffer, (const char *)u8"ça va, 5€ ?"); - } -} - -RX_TEST_CASE(uni32_to8, invalid) -{ - uint8_t buffer[10] = { 0 }; - - RX_INT_REQUIRE_EQUAL(uni32_to8(U"\xffffffff", buffer, sizeof (buffer)), (size_t)-1); - RX_UINT_REQUIRE_EQUAL(errno, EILSEQ); -} - -RX_TEST_CASE(uni32_to8, toosmall) -{ - size_t r; - uint8_t buffer[3] = { 0 }; - - r = uni32_to8(U"ça va ?", buffer, sizeof (buffer)); - RX_UINT_REQUIRE_EQUAL(r, (size_t)-1); - RX_INT_REQUIRE_EQUAL(errno, ERANGE); -} - -RX_TEST_CASE(misc, isalpha) -{ - RX_REQUIRE(uni_isalpha(U'é')); - RX_REQUIRE(!uni_isalpha(U'€')); -} - -RX_TEST_CASE(misc, isdigit) -{ - RX_REQUIRE(uni_isdigit(U'۱')); - RX_REQUIRE(!uni_isdigit(U'€')); -} - -RX_TEST_CASE(misc, islower) -{ - RX_REQUIRE(uni_islower(U'a')); - RX_REQUIRE(uni_islower(U'é')); - RX_REQUIRE(!uni_islower(U'A')); - RX_REQUIRE(!uni_islower(U'É')); -} - -RX_TEST_CASE(misc, isspace) -{ - RX_REQUIRE(uni_isspace(U' ')); - RX_REQUIRE(!uni_isspace(U'é')); -} - -RX_TEST_CASE(misc, istitle) -{ - RX_REQUIRE(uni_istitle(U'Dž')); - RX_REQUIRE(!uni_istitle(U'€')); -} - -RX_TEST_CASE(misc, isupper) -{ - RX_REQUIRE(!uni_isupper('a')); - RX_REQUIRE(!uni_isupper(U'é')); - RX_REQUIRE(uni_isupper('A')); - RX_REQUIRE(uni_isupper(U'É')); -} - -int -main(int argc, char **argv) -{ - return rx_main(0, NULL, argc, (const char **)argv) == RX_SUCCESS ? 0 : 1; -} diff -r 496cd52a50ec -r 887a8fd73d1e tests/test-unicode.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test-unicode.c Mon Mar 21 09:18:14 2022 +0100 @@ -0,0 +1,329 @@ +/* + * unicode.c -- main test file for unicode + * + * Copyright (c) 2013-2022 David Demelier + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +#include + +#include "unicode.h" + +/* + * /!\ Be sure to keep this file with UTF-8 encoding /!\ + */ + +static size_t +u32len(const uint32_t *s) +{ + size_t t = 0; + + while (*s++) + ++t; + + return t; +} + +static int +u32cmp(const uint32_t *s1, const uint32_t *s2) +{ + const size_t l1 = u32len(s1); + const size_t l2 = u32len(s2); + + return l1 == l2 && memcmp(s1, s2, l1) == 0; +} + +RX_TEST_CASE(uni8_encode, simple) +{ + size_t r; + + /* a -> 1 bytes. */ + { + uint8_t buffer[5] = { 0 }; + + r = uni8_encode(buffer, sizeof (buffer), U'a'); + RX_INT_REQUIRE_EQUAL(r, 1); + RX_STR_REQUIRE_EQUAL((const char *)buffer, (const char *)u8"a"); + } + + /* é -> 2 bytes. */ + { + uint8_t buffer[5] = { 0 }; + + r = uni8_encode(buffer, sizeof (buffer), U'é'); + RX_INT_REQUIRE_EQUAL(r, 2); + RX_STR_REQUIRE_EQUAL((const char *)buffer, (const char *)u8"é"); + } +} + +RX_TEST_CASE(uni8_encode, invalid) +{ + size_t r; + uint8_t buffer[5] = { 0 }; + + r = uni8_encode(buffer, sizeof (buffer), 0xffffffff); + RX_UINT_REQUIRE_EQUAL(r, (size_t)-1); + RX_INT_REQUIRE_EQUAL(errno, EILSEQ); +} + +RX_TEST_CASE(uni8_encode, toosmall) +{ + size_t r; + uint8_t buffer[1] = { 0 }; + + r = uni8_encode(buffer, sizeof (buffer), U'é'); + RX_UINT_REQUIRE_EQUAL(r, (size_t)-1); + RX_INT_REQUIRE_EQUAL(errno, ERANGE); +} + +RX_TEST_CASE(unit8_decode, simple) +{ + size_t r; + + /* a -> 1 bytes. */ + { + uint32_t code = -1; + + r = uni8_decode((const uint8_t *)u8"a", &code); + RX_UINT_REQUIRE_EQUAL(r, 1U); + RX_INT_REQUIRE_EQUAL(code, 'a'); + } + + /* é -> 2 bytes. */ + { + uint32_t code = -1; + + r = uni8_decode((const uint8_t *)u8"é", &code); + RX_UINT_REQUIRE_EQUAL(r, 2U); + RX_INT_REQUIRE_EQUAL(code, U'é'); + } +} + +RX_TEST_CASE(uni8_decode, invalid) +{ + size_t r; + + /* Invalid UTF-8 sequence. */ + { + uint32_t code = -1; + + r = uni8_decode((const uint8_t *)u8"\xff""a", &code); + RX_UINT_REQUIRE_EQUAL(r, (size_t)-1); + RX_UINT_REQUIRE_EQUAL(code, (uint32_t)-1); + RX_INT_REQUIRE_EQUAL(errno, EILSEQ); + } + + /* Valid "€" but unfinished sequence. */ + { + uint32_t code = -1; + + r = uni8_decode((const uint8_t []){ -30, 0 }, &code); + RX_UINT_REQUIRE_EQUAL(r, (size_t)-1); + RX_UINT_REQUIRE_EQUAL(code, (uint32_t)-1); + RX_INT_REQUIRE_EQUAL(errno, EILSEQ); + } +} + +RX_TEST_CASE(uni8_sizeof, simple) +{ + RX_INT_REQUIRE_EQUAL(uni8_sizeof(u8"a"[0]), 1U); + RX_INT_REQUIRE_EQUAL(uni8_sizeof(u8"é"[0]), 2U); + RX_INT_REQUIRE_EQUAL(uni8_sizeof(u8"€"[0]), 3U); + RX_INT_REQUIRE_EQUAL(uni8_sizeof(u8"𐍈"[0]), 4U); +} + +RX_TEST_CASE(uni8_sizeof, invalid) +{ + RX_UINT_REQUIRE_EQUAL((size_t)-1, uni8_sizeof(u8"\xff"[0])); + RX_INT_REQUIRE_EQUAL(errno, EILSEQ); +} + +RX_TEST_CASE(uni8_length, simple) +{ + RX_UINT_REQUIRE_EQUAL(uni8_length((const uint8_t *)"abc"), 3U); + RX_UINT_REQUIRE_EQUAL(uni8_length((const uint8_t *)"5€"), 2U); +} + +RX_TEST_CASE(uni8_length, invalid) +{ + RX_UINT_REQUIRE_EQUAL((size_t)-1, uni8_length((const uint8_t *)"a""\xff""b")); + RX_INT_REQUIRE_EQUAL(errno, EILSEQ); +} + +RX_TEST_CASE(uni8_to32, simple) +{ + size_t r; + + { + uint32_t buffer[10] = { 0 }; + uint32_t expected[] = { U'a', U'b', U'c', 0 }; + + r = uni8_to32((const uint8_t *)"abc", buffer, 10); + RX_UINT_REQUIRE_EQUAL(r, 3U); + RX_REQUIRE(u32cmp(buffer, expected)); + } + + { + uint32_t buffer[10] = { 0 }; + uint32_t expected[] = { U'a', U'é', U'c', 0 }; + + r = uni8_to32((const uint8_t *)"aéc", buffer, 10); + RX_UINT_REQUIRE_EQUAL(r, 3); + RX_REQUIRE(u32cmp(buffer, expected)); + } +} + +RX_TEST_CASE(uni8_to32, invalid) +{ + size_t r; + uint32_t buffer[10] = { 0 }; + + /* Invalid UTF-8 sequence. */ + r = uni8_to32((const uint8_t *)u8"\xff""a", buffer, 10); + RX_UINT_REQUIRE_EQUAL(r, (size_t)-1); + RX_INT_REQUIRE_EQUAL(errno, EILSEQ); + + /* Valid "€" but unfinished sequence. */ + r = uni8_to32((const uint8_t []){ -30, 0 }, buffer, 10); + RX_UINT_REQUIRE_EQUAL(r, (size_t)-1); + RX_INT_REQUIRE_EQUAL(errno, EILSEQ); +} + +RX_TEST_CASE(uni8_to32, toosmall) +{ + size_t r; + uint32_t buffer[4] = { 0 }; + + r = uni8_to32((const uint8_t *)u8"bonjour à tous", buffer, 1); + RX_UINT_REQUIRE_EQUAL(r, (size_t)-1); + RX_INT_REQUIRE_EQUAL(errno, ERANGE); +} + +RX_TEST_CASE(uni32_sizeof, simple) +{ + RX_UINT_REQUIRE_EQUAL(uni32_sizeof(U'a'), 1); + RX_UINT_REQUIRE_EQUAL(uni32_sizeof(U'é'), 2); + RX_UINT_REQUIRE_EQUAL(uni32_sizeof(U'€'), 3); + RX_UINT_REQUIRE_EQUAL(uni32_sizeof(U'𐍈'), 4); +} + +RX_TEST_CASE(uni32_sizeof, invalid) +{ + RX_UINT_REQUIRE_EQUAL((size_t)-1, uni32_sizeof(0xffffffff)); + RX_INT_REQUIRE_EQUAL(errno, EILSEQ); +} + +RX_TEST_CASE(uni32_length, simple) +{ + RX_UINT_REQUIRE_EQUAL(uni32_length((const uint32_t []){ U'a', U'é', U'c', 0 }), 3U); +} + +RX_TEST_CASE(uni32_requires, simple) +{ + RX_UINT_REQUIRE_EQUAL(uni32_requires(U"abc"), 3U); + RX_UINT_REQUIRE_EQUAL(uni32_requires(U"é€𐍈"), 9U); +} + +RX_TEST_CASE(uni32_requires, invalid) +{ + RX_UINT_REQUIRE_EQUAL((size_t)-1, uni32_requires(U"\xffffffff")); + RX_INT_REQUIRE_EQUAL(errno, EILSEQ); +} + +RX_TEST_CASE(uni32_to8, simple) +{ + size_t r; + + { + uint8_t buffer[10] = { 0 }; + + r = uni32_to8(U"abc", buffer, sizeof (buffer)); + RX_UINT_REQUIRE_EQUAL(r, 3U); + RX_STR_REQUIRE_EQUAL((const char *)buffer, (const char *)u8"abc"); + } + + { + uint8_t buffer[20] = { 0 }; + + r = uni32_to8(U"ça va, 5€ ?", buffer, sizeof (buffer)); + RX_UINT_REQUIRE_EQUAL(r, 14U); + RX_STR_REQUIRE_EQUAL((const char *)buffer, (const char *)u8"ça va, 5€ ?"); + } +} + +RX_TEST_CASE(uni32_to8, invalid) +{ + uint8_t buffer[10] = { 0 }; + + RX_INT_REQUIRE_EQUAL(uni32_to8(U"\xffffffff", buffer, sizeof (buffer)), (size_t)-1); + RX_UINT_REQUIRE_EQUAL(errno, EILSEQ); +} + +RX_TEST_CASE(uni32_to8, toosmall) +{ + size_t r; + uint8_t buffer[3] = { 0 }; + + r = uni32_to8(U"ça va ?", buffer, sizeof (buffer)); + RX_UINT_REQUIRE_EQUAL(r, (size_t)-1); + RX_INT_REQUIRE_EQUAL(errno, ERANGE); +} + +RX_TEST_CASE(misc, isalpha) +{ + RX_REQUIRE(uni_isalpha(U'é')); + RX_REQUIRE(!uni_isalpha(U'€')); +} + +RX_TEST_CASE(misc, isdigit) +{ + RX_REQUIRE(uni_isdigit(U'۱')); + RX_REQUIRE(!uni_isdigit(U'€')); +} + +RX_TEST_CASE(misc, islower) +{ + RX_REQUIRE(uni_islower(U'a')); + RX_REQUIRE(uni_islower(U'é')); + RX_REQUIRE(!uni_islower(U'A')); + RX_REQUIRE(!uni_islower(U'É')); +} + +RX_TEST_CASE(misc, isspace) +{ + RX_REQUIRE(uni_isspace(U' ')); + RX_REQUIRE(!uni_isspace(U'é')); +} + +RX_TEST_CASE(misc, istitle) +{ + RX_REQUIRE(uni_istitle(U'Dž')); + RX_REQUIRE(!uni_istitle(U'€')); +} + +RX_TEST_CASE(misc, isupper) +{ + RX_REQUIRE(!uni_isupper('a')); + RX_REQUIRE(!uni_isupper(U'é')); + RX_REQUIRE(uni_isupper('A')); + RX_REQUIRE(uni_isupper(U'É')); +} + +int +main(int argc, char **argv) +{ + return rx_main(0, NULL, argc, (const char **)argv) == RX_SUCCESS ? 0 : 1; +} diff -r 496cd52a50ec -r 887a8fd73d1e unicode-config.cmake --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/unicode-config.cmake Mon Mar 21 09:18:14 2022 +0100 @@ -0,0 +1,8 @@ +include("${CMAKE_CURRENT_LIST_DIR}/unicode-targets.cmake") + +# Prefer shared version if found. +if (TARGET unicode::libunicode-shared) + add_library(unicode::libunicode ALIAS unicode::libunicode-shared) +else () + add_library(unicode::libunicode ALIAS unicode::libunicode-static) +endif () diff -r 496cd52a50ec -r 887a8fd73d1e unicode.def --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/unicode.def Mon Mar 21 09:18:14 2022 +0100 @@ -0,0 +1,20 @@ +EXPORTS + uni32_length + uni32_requires + uni32_sizeof + uni32_to8 + uni8_decode + uni8_encode + uni8_length + uni8_sizeof + uni8_to32 + uni_isalpha + uni_iscontrol + uni_isdigit + uni_islower + uni_isspace + uni_istitle + uni_isupper + uni_tolower + uni_totitle + uni_toupper diff -r 496cd52a50ec -r 887a8fd73d1e unicode.h --- a/unicode.h Mon Mar 21 09:00:42 2022 +0100 +++ b/unicode.h Mon Mar 21 09:18:14 2022 +0100 @@ -22,61 +22,69 @@ #include #include -size_t -uni8_encode(uint8_t *dst, size_t dstsz, uint32_t point); +#if defined(__cplusplus) +extern "C" { +#endif size_t -uni8_decode(const uint8_t *src, uint32_t *point); +uni8_encode(uint8_t *, size_t, uint32_t); size_t -uni8_sizeof(uint8_t c); +uni8_decode(const uint8_t *, uint32_t *); size_t -uni8_length(const uint8_t *src); +uni8_sizeof(uint8_t); size_t -uni8_to32(const uint8_t *src, uint32_t *dst, size_t dstsz); +uni8_length(const uint8_t *); size_t -uni32_sizeof(uint32_t point); +uni8_to32(const uint8_t *, uint32_t *, size_t); size_t -uni32_length(const uint32_t *src); +uni32_sizeof(uint32_t); size_t -uni32_requires(const uint32_t *src); +uni32_length(const uint32_t *); size_t -uni32_to8(const uint32_t *src, uint8_t *dst, size_t dstsz); +uni32_requires(const uint32_t *); + +size_t +uni32_to8(const uint32_t *, uint8_t *, size_t); int -uni_isalpha(uint32_t c); +uni_isalpha(uint32_t); int -uni_iscontrol(uint32_t c); +uni_iscontrol(uint32_t); int -uni_isdigit(uint32_t c); +uni_isdigit(uint32_t); int -uni_islower(uint32_t c); +uni_islower(uint32_t); int -uni_isspace(uint32_t c); +uni_isspace(uint32_t); int -uni_istitle(uint32_t c); +uni_istitle(uint32_t); int -uni_isupper(uint32_t c); +uni_isupper(uint32_t); + +uint32_t +uni_toupper(uint32_t); uint32_t -uni_toupper(uint32_t c); +uni_tolower(uint32_t); uint32_t -uni_tolower(uint32_t c); +uni_totitle(uint32_t); -uint32_t -uni_totitle(uint32_t c); +#if defined(__cplusplus) +} +#endif -#endif // !UNICODE_H +#endif /* !UNICODE_H */ diff -r 496cd52a50ec -r 887a8fd73d1e unicode.pc.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/unicode.pc.in Mon Mar 21 09:18:14 2022 +0100 @@ -0,0 +1,6 @@ +Name: @PROJECT_NAME@ +Description: @PROJECT_DESCRIPTION@ +Version: @PROJECT_VERSION@ +URL: @PROJECT_HOMEPAGE_URL@ +Libs: -L@CMAKE_INSTALL_FULL_LIBDIR@ -lunicode +Cflags: -I@CMAKE_INSTALL_FULL_INCLUDEDIR@