Mercurial > libunicode

--- a/.hgignore	Mon Mar 21 09:00:42 2022 +0100
+++ b/.hgignore	Mon Mar 21 09:18:14 2022 +0100
@@ -3,16 +3,16 @@
 \.swp$
 \.swo$

+# Temporary files.
+\.a$
+\.d$
+\.o$
+
+# Test files.
+^tests/test-unicode$
+
 # Doxygen.
 ^doxygen/

-# Generator files.
-^gen/src/mkunicode-c$
-^gen/src/mkunicode-cpp$
-
-# Test files.
-^test/unicode$
-^test/unicode\+\+$
-
 # macOS specific.
 \.DS_Store
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CMakeLists.txt	Mon Mar 21 09:18:14 2022 +0100
@@ -0,0 +1,90 @@
+#
+# CMakeLists.txt -- basic CMake build for libunicode
+#
+# Copyright (c) 2013-2022 David Demelier <markand@malikania.fr>
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+#
+
+cmake_minimum_required(VERSION 3.20)
+project(
+	libunicode
+	VERSION "1.0.0"
+	DESCRIPTION "UTF-8 to UTF-32 conversions and various operations"
+	HOMEPAGE_URL "http://projects.malikania.fr/libunicode"
+	LANGUAGES C
+)
+
+include(CMakePackageConfigHelpers)
+include(GNUInstallDirs)
+
+add_library(libunicode-static STATIC unicode.c unicode.h)
+target_include_directories(libunicode-static PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
+install(
+	TARGETS libunicode-static
+	EXPORT unicode-targets
+	ARCHIVE DESTINATION lib
+)
+
+if (NOT CMAKE_C_COMPILER_ID MATCHES "MSVC" OR NOT BUILD_SHARED_LIBS)
+	set_target_properties(libunicode-static PROPERTIES OUTPUT_NAME unicode)
+else ()
+	set_target_properties(libunicode-static PROPERTIES OUTPUT_NAME unicode-static)
+endif ()
+
+if (BUILD_SHARED_LIBS)
+	add_library(libunicode-shared SHARED unicode.c unicode.h unicode.def)
+	target_include_directories(libunicode-shared PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
+	set_target_properties(
+		libunicode-shared
+		PROPERTIES
+			OUTPUT_NAME unicode
+			VERSION ${PROJECT_VERSION}
+			SOVERSION ${PROJECT_VERSION_MAJOR}
+	)
+	install(
+		TARGETS libunicode-shared
+		EXPORT unicode-targets
+		ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+		LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+		RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+	)
+endif ()
+
+configure_file(
+	${PROJECT_SOURCE_DIR}/unicode.pc.in
+	${PROJECT_BINARY_DIR}/unicode.pc
+	@ONLY
+)
+
+write_basic_package_version_file(
+	${PROJECT_BINARY_DIR}/unicode-config-version.cmake
+	VERSION ${PROJECT_VERSION}
+	COMPATIBILITY SameMajorVersion
+)
+
+install(FILES ${PROJECT_SOURCE_DIR}/unicode.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+install(FILES ${PROJECT_BINARY_DIR}/unicode.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
+install(FILES ${PROJECT_SOURCE_DIR}/libunicode.3 DESTINATION ${CMAKE_INSTALL_MANDIR}/man3)
+install(
+	EXPORT unicode-targets
+	FILE unicode-targets.cmake
+	NAMESPACE unicode::
+	DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/unicode
+)
+install(
+	FILES
+		${PROJECT_BINARY_DIR}/unicode-config-version.cmake
+		${PROJECT_SOURCE_DIR}/unicode-config.cmake
+	DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/unicode
+)
--- a/INSTALL.md	Mon Mar 21 09:00:42 2022 +0100
+++ b/INSTALL.md	Mon Mar 21 09:18:14 2022 +0100
@@ -6,14 +6,10 @@

 - C99 (at least `EILSEQ` has to be available as errno constant).
 - C11 (for running tests).
-
-Installation (C++ variant)
---------------------------
+- `uint32_t` and `uint8_t` types.

-Just copy the files unicode.cpp and unicode.hpp and add them to your project.
-
-Installation
-------------
+Embed
+-----

 Copy unicode.h and unicode.c to your project.

@@ -22,3 +18,47 @@

 The file unicode.c is generated from gen/ subdirectory. Edit the appropriate
 files and run `make` in top level directory to regenerate them.
+
+Installation
+------------
+
+The module is small enough to be incorporated verbatim into your project, but it
+still possible to install it system wide.
+
+### Using CMake (recommended)
+
+Using [CMake][cmake] you get proper CMake package configuration files, shared
+libraries and `pkg-config` files.
+
+	$ cmake -S . -B build -DBUILD_SHARED_LIBS=On
+	$ cmake --build build
+	# cmake --build build --target install
+
+Turn `BUILD_SHARED_LIBS` to *Off* if you don't want shared libraries.
+
+Then, you can import `unicode` and use on of the imported targets:
+
+- `unicode::libunicode`: shared if available, static otherwise,
+- `unicode::libunicode-shared`: shared version,
+- `unicode::libunicode-static`: static version.
+
+Example:
+
+	cmake_minimum_required(VERSION 3.20)
+	project(example)
+	find_package(unicode REQUIRED)
+	add_executable(example example.c)
+	target_link_libraries(example unicode::libunicode)
+
+### Using POSIX make (not recommended)
+
+POSIX make (not recommended, only static library):
+
+	$ make
+	# make install
+
+The test suite is available using:
+
+	$ make tests
+
+[cmake]: http://cmake.org
--- a/Makefile	Mon Mar 21 09:00:42 2022 +0100
+++ b/Makefile	Mon Mar 21 09:18:14 2022 +0100
@@ -18,18 +18,35 @@

 .POSIX:

-CC=     cc
-CFLAGS= -O3 -DNDEBUG
+CC=             cc
+
+PREFIX=         /usr/local
+INCDIR=         ${PREFIX}/include
+LIBDIR=         ${PREFIX}/lib
+MANDIR=         ${PREFIX}/share/man

-INCS=   -Iextern/librexo -I.
+VERSION=        1.0.0
+
+LIB_SRCS=       unicode.c
+LIB_OBJS=       ${LIB_SRCS:.c=.o}
+LIB_DEPS=       ${LIB_SRCS:.c=.d}
+LIB=            libunicode.a
+
+TESTS_SRCS=     tests/test-unicode.c
+TESTS_OBJS=     ${TESTS_SRCS:.c=}

 .SUFFIXES:
-.SUFFIXES: .c
+.SUFFIXES: .c .o

-all: unicode.c
+all: ${LIB}
+
+-include ${LIB_DEPS}

 .c:
-	${CC} ${CFLAGS} $< -o $@ ${LDFLAGS}
+	${CC} ${CFLAGS} -Iextern/librexo -I. $< -o $@ ${LIB} ${LDFLAGS}
+
+.c.o:
+	${CC} ${CFLAGS} -Iextern/librexo -I. -MMD -c $< -o $@ ${LDFLAGS}

 gen/UnicodeData.txt:
 	curl http://unicode.org/Public/UCD/latest/ucd/UnicodeData.txt -o $@
@@ -39,13 +56,23 @@
 	cat gen/UnicodeData.txt | awk -f gen/mkutf.awk >> unicode.c
 	cat gen/unicode-after.c >> unicode.c

-test/unicode: unicode.c unicode.h test/unicode.c
-	${CC} ${INCS} ${CFLAGS} -o test/unicode unicode.c test/unicode.c ${LDFLAGS}
+${LIB}: ${LIB_OBJS}
+	${AR} -rc $@ ${LIB_OBJS}
+
+${TESTS_OBJS}: ${LIB}
+
+tests: ${TESTS_OBJS}
+	for t in ${TESTS_OBJS}; do ./$$t; done

-tests: test/unicode
-	test/unicode
+install:
+	mkdir -p ${DESTDIR}${LIBDIR}
+	cp libunicode.a ${DESTDIR}${LIBDIR}
+	mkdir -p ${DESTDIR}${INCDIR}
+	cp unicode.h ${DESTDIR}${INCDIR}
+	mkdir -p ${DESTDIR}${MANDIR}/man3
+	cp libunicode.3 ${DESTDIR}${MANDIR}/man3

 clean:
-	rm -f test/unicode
+	rm -f ${LIB} ${LIB_DEPS} ${LIB_OBJS} ${TESTS_OBJS}

-.PHONY: all clean tests
+.PHONY: all clean install tests
--- a/README.md	Mon Mar 21 09:00:42 2022 +0100
+++ b/README.md	Mon Mar 21 09:18:14 2022 +0100
@@ -6,7 +6,7 @@

 Conversions and unicode inspection in C99

-It is currently based on unicode 13.0.0.
+It is currently based on unicode 14.0.0.

 Features
 --------
@@ -19,6 +19,4 @@
 Documentation
 -------------

-See the libunicode(3) manual page.
-
-	man ./libunicode.3
+See the `libunicode(3)` manual page.
--- a/test/unicode.c	Mon Mar 21 09:00:42 2022 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,329 +0,0 @@
-/*
- * unicode.c -- main test file for unicode
- *
- * Copyright (c) 2013-2022 David Demelier <markand@malikania.fr>
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <errno.h>
-
-#include <rexo.h>
-
-#include "unicode.h"
-
-/*
- * /!\ Be sure to keep this file with UTF-8 encoding /!\
- */
-
-static size_t
-u32len(const uint32_t *s)
-{
-	size_t t = 0;
-
-	while (*s++)
-		++t;
-
-	return t;
-}
-
-static int
-u32cmp(const uint32_t *s1, const uint32_t *s2)
-{
-	const size_t l1 = u32len(s1);
-	const size_t l2 = u32len(s2);
-
-	return l1 == l2 && memcmp(s1, s2, l1) == 0;
-}
-
-RX_TEST_CASE(uni8_encode, simple)
-{
-	size_t r;
-
-	/* a -> 1 bytes. */
-	{
-		uint8_t buffer[5] = { 0 };
-
-		r = uni8_encode(buffer, sizeof (buffer), U'a');
-		RX_INT_REQUIRE_EQUAL(r, 1);
-		RX_STR_REQUIRE_EQUAL((const char *)buffer, (const char *)u8"a");
-	}
-
-	/* é -> 2 bytes. */
-	{
-		uint8_t buffer[5] = { 0 };
-
-		r = uni8_encode(buffer, sizeof (buffer), U'é');
-		RX_INT_REQUIRE_EQUAL(r, 2);
-		RX_STR_REQUIRE_EQUAL((const char *)buffer, (const char *)u8"é");
-	}
-}
-
-RX_TEST_CASE(uni8_encode, invalid)
-{
-	size_t r;
-	uint8_t buffer[5] = { 0 };
-
-	r = uni8_encode(buffer, sizeof (buffer), 0xffffffff);
-	RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
-	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
-}
-
-RX_TEST_CASE(uni8_encode, toosmall)
-{
-	size_t r;
-	uint8_t buffer[1] = { 0 };
-
-	r = uni8_encode(buffer, sizeof (buffer), U'é');
-	RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
-	RX_INT_REQUIRE_EQUAL(errno, ERANGE);
-}
-
-RX_TEST_CASE(unit8_decode, simple)
-{
-	size_t r;
-
-	/* a -> 1 bytes. */
-	{
-		uint32_t code = -1;
-
-		r = uni8_decode((const uint8_t *)u8"a", &code);
-		RX_UINT_REQUIRE_EQUAL(r, 1U);
-		RX_INT_REQUIRE_EQUAL(code, 'a');
-	}
-
-	/* é -> 2 bytes. */
-	{
-		uint32_t code = -1;
-
-		r = uni8_decode((const uint8_t *)u8"é", &code);
-		RX_UINT_REQUIRE_EQUAL(r, 2U);
-		RX_INT_REQUIRE_EQUAL(code, U'é');
-	}
-}
-
-RX_TEST_CASE(uni8_decode, invalid)
-{
-	size_t r;
-
-	/* Invalid UTF-8 sequence. */
-	{
-		uint32_t code = -1;
-
-		r = uni8_decode((const uint8_t *)u8"\xff""a", &code);
-		RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
-		RX_UINT_REQUIRE_EQUAL(code, (uint32_t)-1);
-		RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
-	}
-
-	/* Valid "€" but unfinished sequence. */
-	{
-		uint32_t code = -1;
-
-		r = uni8_decode((const uint8_t []){ -30, 0 }, &code);
-		RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
-		RX_UINT_REQUIRE_EQUAL(code, (uint32_t)-1);
-		RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
-	}
-}
-
-RX_TEST_CASE(uni8_sizeof, simple)
-{
-	RX_INT_REQUIRE_EQUAL(uni8_sizeof(u8"a"[0]), 1U);
-	RX_INT_REQUIRE_EQUAL(uni8_sizeof(u8"é"[0]), 2U);
-	RX_INT_REQUIRE_EQUAL(uni8_sizeof(u8"€"[0]), 3U);
-	RX_INT_REQUIRE_EQUAL(uni8_sizeof(u8"𐍈"[0]), 4U);
-}
-
-RX_TEST_CASE(uni8_sizeof, invalid)
-{
-	RX_UINT_REQUIRE_EQUAL((size_t)-1, uni8_sizeof(u8"\xff"[0]));
-	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
-}
-
-RX_TEST_CASE(uni8_length, simple)
-{
-	RX_UINT_REQUIRE_EQUAL(uni8_length((const uint8_t *)"abc"), 3U);
-	RX_UINT_REQUIRE_EQUAL(uni8_length((const uint8_t *)"5€"), 2U);
-}
-
-RX_TEST_CASE(uni8_length, invalid)
-{
-	RX_UINT_REQUIRE_EQUAL((size_t)-1, uni8_length((const uint8_t *)"a""\xff""b"));
-	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
-}
-
-RX_TEST_CASE(uni8_to32, simple)
-{
-	size_t r;
-
-	{
-		uint32_t buffer[10] = { 0 };
-		uint32_t expected[] = { U'a', U'b', U'c', 0 };
-
-		r = uni8_to32((const uint8_t *)"abc", buffer, 10);
-		RX_UINT_REQUIRE_EQUAL(r, 3U);
-		RX_REQUIRE(u32cmp(buffer, expected));
-	}
-
-	{
-		uint32_t buffer[10] = { 0 };
-		uint32_t expected[] = { U'a', U'é', U'c', 0 };
-
-		r = uni8_to32((const uint8_t *)"aéc", buffer, 10);
-		RX_UINT_REQUIRE_EQUAL(r, 3);
-		RX_REQUIRE(u32cmp(buffer, expected));
-	}
-}
-
-RX_TEST_CASE(uni8_to32, invalid)
-{
-	size_t r;
-	uint32_t buffer[10] = { 0 };
-
-	/* Invalid UTF-8 sequence. */
-	r = uni8_to32((const uint8_t *)u8"\xff""a", buffer, 10);
-	RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
-	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
-
-	/* Valid "€" but unfinished sequence. */
-	r = uni8_to32((const uint8_t []){ -30, 0 }, buffer, 10);
-	RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
-	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
-}
-
-RX_TEST_CASE(uni8_to32, toosmall)
-{
-	size_t r;
-	uint32_t buffer[4] = { 0 };
-
-	r = uni8_to32((const uint8_t *)u8"bonjour à tous", buffer, 1);
-	RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
-	RX_INT_REQUIRE_EQUAL(errno, ERANGE);
-}
-
-RX_TEST_CASE(uni32_sizeof, simple)
-{
-	RX_UINT_REQUIRE_EQUAL(uni32_sizeof(U'a'), 1);
-	RX_UINT_REQUIRE_EQUAL(uni32_sizeof(U'é'), 2);
-	RX_UINT_REQUIRE_EQUAL(uni32_sizeof(U'€'), 3);
-	RX_UINT_REQUIRE_EQUAL(uni32_sizeof(U'𐍈'), 4);
-}
-
-RX_TEST_CASE(uni32_sizeof, invalid)
-{
-	RX_UINT_REQUIRE_EQUAL((size_t)-1, uni32_sizeof(0xffffffff));
-	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
-}
-
-RX_TEST_CASE(uni32_length, simple)
-{
-	RX_UINT_REQUIRE_EQUAL(uni32_length((const uint32_t []){ U'a', U'é', U'c', 0 }), 3U);
-}
-
-RX_TEST_CASE(uni32_requires, simple)
-{
-	RX_UINT_REQUIRE_EQUAL(uni32_requires(U"abc"), 3U);
-	RX_UINT_REQUIRE_EQUAL(uni32_requires(U"é€𐍈"), 9U);
-}
-
-RX_TEST_CASE(uni32_requires, invalid)
-{
-	RX_UINT_REQUIRE_EQUAL((size_t)-1, uni32_requires(U"\xffffffff"));
-	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
-}
-
-RX_TEST_CASE(uni32_to8, simple)
-{
-	size_t r;
-
-	{
-		uint8_t buffer[10] = { 0 };
-
-		r = uni32_to8(U"abc", buffer, sizeof (buffer));
-		RX_UINT_REQUIRE_EQUAL(r, 3U);
-		RX_STR_REQUIRE_EQUAL((const char *)buffer, (const char *)u8"abc");
-	}
-
-	{
-		uint8_t buffer[20] = { 0 };
-
-		r = uni32_to8(U"ça va, 5€ ?", buffer, sizeof (buffer));
-		RX_UINT_REQUIRE_EQUAL(r, 14U);
-		RX_STR_REQUIRE_EQUAL((const char *)buffer, (const char *)u8"ça va, 5€ ?");
-	}
-}
-
-RX_TEST_CASE(uni32_to8, invalid)
-{
-	uint8_t buffer[10] = { 0 };
-
-	RX_INT_REQUIRE_EQUAL(uni32_to8(U"\xffffffff", buffer, sizeof (buffer)), (size_t)-1);
-	RX_UINT_REQUIRE_EQUAL(errno, EILSEQ);
-}
-
-RX_TEST_CASE(uni32_to8, toosmall)
-{
-	size_t r;
-	uint8_t buffer[3] = { 0 };
-
-	r = uni32_to8(U"ça va ?", buffer, sizeof (buffer));
-	RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
-	RX_INT_REQUIRE_EQUAL(errno, ERANGE);
-}
-
-RX_TEST_CASE(misc, isalpha)
-{
-	RX_REQUIRE(uni_isalpha(U'é'));
-	RX_REQUIRE(!uni_isalpha(U'€'));
-}
-
-RX_TEST_CASE(misc, isdigit)
-{
-	RX_REQUIRE(uni_isdigit(U'۱'));
-	RX_REQUIRE(!uni_isdigit(U'€'));
-}
-
-RX_TEST_CASE(misc, islower)
-{
-	RX_REQUIRE(uni_islower(U'a'));
-	RX_REQUIRE(uni_islower(U'é'));
-	RX_REQUIRE(!uni_islower(U'A'));
-	RX_REQUIRE(!uni_islower(U'É'));
-}
-
-RX_TEST_CASE(misc, isspace)
-{
-	RX_REQUIRE(uni_isspace(U' '));
-	RX_REQUIRE(!uni_isspace(U'é'));
-}
-
-RX_TEST_CASE(misc, istitle)
-{
-	RX_REQUIRE(uni_istitle(U'ǅ'));
-	RX_REQUIRE(!uni_istitle(U'€'));
-}
-
-RX_TEST_CASE(misc, isupper)
-{
-	RX_REQUIRE(!uni_isupper('a'));
-	RX_REQUIRE(!uni_isupper(U'é'));
-	RX_REQUIRE(uni_isupper('A'));
-	RX_REQUIRE(uni_isupper(U'É'));
-}
-
-int
-main(int argc, char **argv)
-{
-	return rx_main(0, NULL, argc, (const char **)argv) == RX_SUCCESS ? 0 : 1;
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-unicode.c	Mon Mar 21 09:18:14 2022 +0100
@@ -0,0 +1,329 @@
+/*
+ * unicode.c -- main test file for unicode
+ *
+ * Copyright (c) 2013-2022 David Demelier <markand@malikania.fr>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <errno.h>
+
+#include <rexo.h>
+
+#include "unicode.h"
+
+/*
+ * /!\ Be sure to keep this file with UTF-8 encoding /!\
+ */
+
+static size_t
+u32len(const uint32_t *s)
+{
+	size_t t = 0;
+
+	while (*s++)
+		++t;
+
+	return t;
+}
+
+static int
+u32cmp(const uint32_t *s1, const uint32_t *s2)
+{
+	const size_t l1 = u32len(s1);
+	const size_t l2 = u32len(s2);
+
+	return l1 == l2 && memcmp(s1, s2, l1) == 0;
+}
+
+RX_TEST_CASE(uni8_encode, simple)
+{
+	size_t r;
+
+	/* a -> 1 bytes. */
+	{
+		uint8_t buffer[5] = { 0 };
+
+		r = uni8_encode(buffer, sizeof (buffer), U'a');
+		RX_INT_REQUIRE_EQUAL(r, 1);
+		RX_STR_REQUIRE_EQUAL((const char *)buffer, (const char *)u8"a");
+	}
+
+	/* é -> 2 bytes. */
+	{
+		uint8_t buffer[5] = { 0 };
+
+		r = uni8_encode(buffer, sizeof (buffer), U'é');
+		RX_INT_REQUIRE_EQUAL(r, 2);
+		RX_STR_REQUIRE_EQUAL((const char *)buffer, (const char *)u8"é");
+	}
+}
+
+RX_TEST_CASE(uni8_encode, invalid)
+{
+	size_t r;
+	uint8_t buffer[5] = { 0 };
+
+	r = uni8_encode(buffer, sizeof (buffer), 0xffffffff);
+	RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
+	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
+}
+
+RX_TEST_CASE(uni8_encode, toosmall)
+{
+	size_t r;
+	uint8_t buffer[1] = { 0 };
+
+	r = uni8_encode(buffer, sizeof (buffer), U'é');
+	RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
+	RX_INT_REQUIRE_EQUAL(errno, ERANGE);
+}
+
+RX_TEST_CASE(unit8_decode, simple)
+{
+	size_t r;
+
+	/* a -> 1 bytes. */
+	{
+		uint32_t code = -1;
+
+		r = uni8_decode((const uint8_t *)u8"a", &code);
+		RX_UINT_REQUIRE_EQUAL(r, 1U);
+		RX_INT_REQUIRE_EQUAL(code, 'a');
+	}
+
+	/* é -> 2 bytes. */
+	{
+		uint32_t code = -1;
+
+		r = uni8_decode((const uint8_t *)u8"é", &code);
+		RX_UINT_REQUIRE_EQUAL(r, 2U);
+		RX_INT_REQUIRE_EQUAL(code, U'é');
+	}
+}
+
+RX_TEST_CASE(uni8_decode, invalid)
+{
+	size_t r;
+
+	/* Invalid UTF-8 sequence. */
+	{
+		uint32_t code = -1;
+
+		r = uni8_decode((const uint8_t *)u8"\xff""a", &code);
+		RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
+		RX_UINT_REQUIRE_EQUAL(code, (uint32_t)-1);
+		RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
+	}
+
+	/* Valid "€" but unfinished sequence. */
+	{
+		uint32_t code = -1;
+
+		r = uni8_decode((const uint8_t []){ -30, 0 }, &code);
+		RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
+		RX_UINT_REQUIRE_EQUAL(code, (uint32_t)-1);
+		RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
+	}
+}
+
+RX_TEST_CASE(uni8_sizeof, simple)
+{
+	RX_INT_REQUIRE_EQUAL(uni8_sizeof(u8"a"[0]), 1U);
+	RX_INT_REQUIRE_EQUAL(uni8_sizeof(u8"é"[0]), 2U);
+	RX_INT_REQUIRE_EQUAL(uni8_sizeof(u8"€"[0]), 3U);
+	RX_INT_REQUIRE_EQUAL(uni8_sizeof(u8"𐍈"[0]), 4U);
+}
+
+RX_TEST_CASE(uni8_sizeof, invalid)
+{
+	RX_UINT_REQUIRE_EQUAL((size_t)-1, uni8_sizeof(u8"\xff"[0]));
+	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
+}
+
+RX_TEST_CASE(uni8_length, simple)
+{
+	RX_UINT_REQUIRE_EQUAL(uni8_length((const uint8_t *)"abc"), 3U);
+	RX_UINT_REQUIRE_EQUAL(uni8_length((const uint8_t *)"5€"), 2U);
+}
+
+RX_TEST_CASE(uni8_length, invalid)
+{
+	RX_UINT_REQUIRE_EQUAL((size_t)-1, uni8_length((const uint8_t *)"a""\xff""b"));
+	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
+}
+
+RX_TEST_CASE(uni8_to32, simple)
+{
+	size_t r;
+
+	{
+		uint32_t buffer[10] = { 0 };
+		uint32_t expected[] = { U'a', U'b', U'c', 0 };
+
+		r = uni8_to32((const uint8_t *)"abc", buffer, 10);
+		RX_UINT_REQUIRE_EQUAL(r, 3U);
+		RX_REQUIRE(u32cmp(buffer, expected));
+	}
+
+	{
+		uint32_t buffer[10] = { 0 };
+		uint32_t expected[] = { U'a', U'é', U'c', 0 };
+
+		r = uni8_to32((const uint8_t *)"aéc", buffer, 10);
+		RX_UINT_REQUIRE_EQUAL(r, 3);
+		RX_REQUIRE(u32cmp(buffer, expected));
+	}
+}
+
+RX_TEST_CASE(uni8_to32, invalid)
+{
+	size_t r;
+	uint32_t buffer[10] = { 0 };
+
+	/* Invalid UTF-8 sequence. */
+	r = uni8_to32((const uint8_t *)u8"\xff""a", buffer, 10);
+	RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
+	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
+
+	/* Valid "€" but unfinished sequence. */
+	r = uni8_to32((const uint8_t []){ -30, 0 }, buffer, 10);
+	RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
+	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
+}
+
+RX_TEST_CASE(uni8_to32, toosmall)
+{
+	size_t r;
+	uint32_t buffer[4] = { 0 };
+
+	r = uni8_to32((const uint8_t *)u8"bonjour à tous", buffer, 1);
+	RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
+	RX_INT_REQUIRE_EQUAL(errno, ERANGE);
+}
+
+RX_TEST_CASE(uni32_sizeof, simple)
+{
+	RX_UINT_REQUIRE_EQUAL(uni32_sizeof(U'a'), 1);
+	RX_UINT_REQUIRE_EQUAL(uni32_sizeof(U'é'), 2);
+	RX_UINT_REQUIRE_EQUAL(uni32_sizeof(U'€'), 3);
+	RX_UINT_REQUIRE_EQUAL(uni32_sizeof(U'𐍈'), 4);
+}
+
+RX_TEST_CASE(uni32_sizeof, invalid)
+{
+	RX_UINT_REQUIRE_EQUAL((size_t)-1, uni32_sizeof(0xffffffff));
+	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
+}
+
+RX_TEST_CASE(uni32_length, simple)
+{
+	RX_UINT_REQUIRE_EQUAL(uni32_length((const uint32_t []){ U'a', U'é', U'c', 0 }), 3U);
+}
+
+RX_TEST_CASE(uni32_requires, simple)
+{
+	RX_UINT_REQUIRE_EQUAL(uni32_requires(U"abc"), 3U);
+	RX_UINT_REQUIRE_EQUAL(uni32_requires(U"é€𐍈"), 9U);
+}
+
+RX_TEST_CASE(uni32_requires, invalid)
+{
+	RX_UINT_REQUIRE_EQUAL((size_t)-1, uni32_requires(U"\xffffffff"));
+	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
+}
+
+RX_TEST_CASE(uni32_to8, simple)
+{
+	size_t r;
+
+	{
+		uint8_t buffer[10] = { 0 };
+
+		r = uni32_to8(U"abc", buffer, sizeof (buffer));
+		RX_UINT_REQUIRE_EQUAL(r, 3U);
+		RX_STR_REQUIRE_EQUAL((const char *)buffer, (const char *)u8"abc");
+	}
+
+	{
+		uint8_t buffer[20] = { 0 };
+
+		r = uni32_to8(U"ça va, 5€ ?", buffer, sizeof (buffer));
+		RX_UINT_REQUIRE_EQUAL(r, 14U);
+		RX_STR_REQUIRE_EQUAL((const char *)buffer, (const char *)u8"ça va, 5€ ?");
+	}
+}
+
+RX_TEST_CASE(uni32_to8, invalid)
+{
+	uint8_t buffer[10] = { 0 };
+
+	RX_INT_REQUIRE_EQUAL(uni32_to8(U"\xffffffff", buffer, sizeof (buffer)), (size_t)-1);
+	RX_UINT_REQUIRE_EQUAL(errno, EILSEQ);
+}
+
+RX_TEST_CASE(uni32_to8, toosmall)
+{
+	size_t r;
+	uint8_t buffer[3] = { 0 };
+
+	r = uni32_to8(U"ça va ?", buffer, sizeof (buffer));
+	RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
+	RX_INT_REQUIRE_EQUAL(errno, ERANGE);
+}
+
+RX_TEST_CASE(misc, isalpha)
+{
+	RX_REQUIRE(uni_isalpha(U'é'));
+	RX_REQUIRE(!uni_isalpha(U'€'));
+}
+
+RX_TEST_CASE(misc, isdigit)
+{
+	RX_REQUIRE(uni_isdigit(U'۱'));
+	RX_REQUIRE(!uni_isdigit(U'€'));
+}
+
+RX_TEST_CASE(misc, islower)
+{
+	RX_REQUIRE(uni_islower(U'a'));
+	RX_REQUIRE(uni_islower(U'é'));
+	RX_REQUIRE(!uni_islower(U'A'));
+	RX_REQUIRE(!uni_islower(U'É'));
+}
+
+RX_TEST_CASE(misc, isspace)
+{
+	RX_REQUIRE(uni_isspace(U' '));
+	RX_REQUIRE(!uni_isspace(U'é'));
+}
+
+RX_TEST_CASE(misc, istitle)
+{
+	RX_REQUIRE(uni_istitle(U'ǅ'));
+	RX_REQUIRE(!uni_istitle(U'€'));
+}
+
+RX_TEST_CASE(misc, isupper)
+{
+	RX_REQUIRE(!uni_isupper('a'));
+	RX_REQUIRE(!uni_isupper(U'é'));
+	RX_REQUIRE(uni_isupper('A'));
+	RX_REQUIRE(uni_isupper(U'É'));
+}
+
+int
+main(int argc, char **argv)
+{
+	return rx_main(0, NULL, argc, (const char **)argv) == RX_SUCCESS ? 0 : 1;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/unicode-config.cmake	Mon Mar 21 09:18:14 2022 +0100
@@ -0,0 +1,8 @@
+include("${CMAKE_CURRENT_LIST_DIR}/unicode-targets.cmake")
+
+# Prefer shared version if found.
+if (TARGET unicode::libunicode-shared)
+	add_library(unicode::libunicode ALIAS unicode::libunicode-shared)
+else ()
+	add_library(unicode::libunicode ALIAS unicode::libunicode-static)
+endif ()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/unicode.def	Mon Mar 21 09:18:14 2022 +0100
@@ -0,0 +1,20 @@
+EXPORTS
+	uni32_length
+	uni32_requires
+	uni32_sizeof
+	uni32_to8
+	uni8_decode
+	uni8_encode
+	uni8_length
+	uni8_sizeof
+	uni8_to32
+	uni_isalpha
+	uni_iscontrol
+	uni_isdigit
+	uni_islower
+	uni_isspace
+	uni_istitle
+	uni_isupper
+	uni_tolower
+	uni_totitle
+	uni_toupper
--- a/unicode.h	Mon Mar 21 09:00:42 2022 +0100
+++ b/unicode.h	Mon Mar 21 09:18:14 2022 +0100
@@ -22,61 +22,69 @@
 #include <stddef.h>
 #include <stdint.h>

-size_t
-uni8_encode(uint8_t *dst, size_t dstsz, uint32_t point);
+#if defined(__cplusplus)
+extern "C" {
+#endif

 size_t
-uni8_decode(const uint8_t *src, uint32_t *point);
+uni8_encode(uint8_t *, size_t, uint32_t);

 size_t
-uni8_sizeof(uint8_t c);
+uni8_decode(const uint8_t *, uint32_t *);

 size_t
-uni8_length(const uint8_t *src);
+uni8_sizeof(uint8_t);

 size_t
-uni8_to32(const uint8_t *src, uint32_t *dst, size_t dstsz);
+uni8_length(const uint8_t *);

 size_t
-uni32_sizeof(uint32_t point);
+uni8_to32(const uint8_t *, uint32_t *, size_t);

 size_t
-uni32_length(const uint32_t *src);
+uni32_sizeof(uint32_t);

 size_t
-uni32_requires(const uint32_t *src);
+uni32_length(const uint32_t *);

 size_t
-uni32_to8(const uint32_t *src, uint8_t *dst, size_t dstsz);
+uni32_requires(const uint32_t *);
+
+size_t
+uni32_to8(const uint32_t *, uint8_t *, size_t);

 int
-uni_isalpha(uint32_t c);
+uni_isalpha(uint32_t);

 int
-uni_iscontrol(uint32_t c);
+uni_iscontrol(uint32_t);

 int
-uni_isdigit(uint32_t c);
+uni_isdigit(uint32_t);

 int
-uni_islower(uint32_t c);
+uni_islower(uint32_t);

 int
-uni_isspace(uint32_t c);
+uni_isspace(uint32_t);

 int
-uni_istitle(uint32_t c);
+uni_istitle(uint32_t);

 int
-uni_isupper(uint32_t c);
+uni_isupper(uint32_t);
+
+uint32_t
+uni_toupper(uint32_t);

 uint32_t
-uni_toupper(uint32_t c);
+uni_tolower(uint32_t);

 uint32_t
-uni_tolower(uint32_t c);
+uni_totitle(uint32_t);

-uint32_t
-uni_totitle(uint32_t c);
+#if defined(__cplusplus)
+}
+#endif

-#endif // !UNICODE_H
+#endif /* !UNICODE_H */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/unicode.pc.in	Mon Mar 21 09:18:14 2022 +0100
@@ -0,0 +1,6 @@
+Name: @PROJECT_NAME@
+Description: @PROJECT_DESCRIPTION@
+Version: @PROJECT_VERSION@
+URL: @PROJECT_HOMEPAGE_URL@
+Libs: -L@CMAKE_INSTALL_FULL_LIBDIR@ -lunicode
+Cflags: -I@CMAKE_INSTALL_FULL_INCLUDEDIR@