view tests/test-unicode.c @ 30:303403de1314 default tip @

misc: update copyright years
author David Demelier <markand@malikania.fr>
date Thu, 04 Jan 2024 10:43:47 +0100
parents f06312a7432b
children
line wrap: on
line source

/*
 * unicode.c -- main test file for unicode
 *
 * Copyright (c) 2013-2024 David Demelier <markand@malikania.fr>
 *
 * Permission to use, copy, modify, and/or distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include <errno.h>

#include <dt.h>

#include "unicode.h"

/*
 * /!\ Be sure to keep this file with UTF-8 encoding /!\
 */

static size_t
u32len(const uint32_t *s)
{
	size_t t = 0;

	while (*s++)
		++t;

	return t;
}

static int
u32cmp(const uint32_t *s1, const uint32_t *s2)
{
	const size_t l1 = u32len(s1);
	const size_t l2 = u32len(s2);

	return l1 == l2 && memcmp(s1, s2, l1) == 0;
}

static void
uni8_encode_simple(void)
{
	size_t r;

	/* a -> 1 bytes. */
	{
		uint8_t buffer[5] = { 0 };

		r = uni8_encode(buffer, sizeof (buffer), U'a');
		DT_EQ_INT(r, 1);
		DT_EQ_STR((const char *)buffer, (const char *)u8"a");
	}

	/* é -> 2 bytes. */
	{
		uint8_t buffer[5] = { 0 };

		r = uni8_encode(buffer, sizeof (buffer), U'é');
		DT_EQ_INT(r, 2);
		DT_EQ_STR((const char *)buffer, (const char *)u8"é");
	}
}

static void
uni8_encode_invalid(void)
{
	size_t r;
	uint8_t buffer[5] = { 0 };

	r = uni8_encode(buffer, sizeof (buffer), 0xffffffff);
	DT_EQ_SIZE(r, (size_t)-1);
	DT_EQ_INT(errno, EILSEQ);
}

static void
uni8_encode_toosmall(void)
{
	size_t r;
	uint8_t buffer[1] = { 0 };

	r = uni8_encode(buffer, sizeof (buffer), U'é');
	DT_EQ_SIZE(r, (size_t)-1);
	DT_EQ_INT(errno, ERANGE);
}

static void
uni8_decode_simple(void)
{
	size_t r;

	/* a -> 1 bytes. */
	{
		uint32_t code = -1;

		r = uni8_decode((const uint8_t *)u8"a", &code);
		DT_EQ_SIZE(r, 1U);
		DT_EQ_INT(code, 'a');
	}

	/* é -> 2 bytes. */
	{
		uint32_t code = -1;

		r = uni8_decode((const uint8_t *)u8"é", &code);
		DT_EQ_SIZE(r, 2U);
		DT_EQ_INT(code, U'é');
	}
}

static void
uni8_decode_invalid(void)
{
	size_t r;

	/* Invalid UTF-8 sequence. */
	{
		uint32_t code = -1;

		r = uni8_decode((const uint8_t *)u8"\xff""a", &code);
		DT_EQ_SIZE(r, (size_t)-1);
		DT_EQ_SIZE(code, (uint32_t)-1);
		DT_EQ_INT(errno, EILSEQ);
	}

	/* Valid "€" but unfinished sequence. */
	{
		uint32_t code = -1;

		r = uni8_decode((const uint8_t []){ -30, 0 }, &code);
		DT_EQ_SIZE(r, (size_t)-1);
		DT_EQ_SIZE(code, (uint32_t)-1);
		DT_EQ_INT(errno, EILSEQ);
	}
}

static void
uni8_sizeof_simple(void)
{
	DT_EQ_INT(uni8_sizeof(u8"a"[0]), 1U);
	DT_EQ_INT(uni8_sizeof(u8"é"[0]), 2U);
	DT_EQ_INT(uni8_sizeof(u8"€"[0]), 3U);
	DT_EQ_INT(uni8_sizeof(u8"𐍈"[0]), 4U);
}

static void
uni8_sizeof_invalid(void)
{
	DT_EQ_SIZE((size_t)-1, uni8_sizeof(u8"\xff"[0]));
	DT_EQ_INT(errno, EILSEQ);
}

static void
uni8_length_simple(void)
{
	DT_EQ_SIZE(uni8_length((const uint8_t *)"abc"), 3U);
	DT_EQ_SIZE(uni8_length((const uint8_t *)"5€"), 2U);
}

static void
uni8_length_invalid(void)
{
	DT_EQ_SIZE((size_t)-1, uni8_length((const uint8_t *)"a""\xff""b"));
	DT_EQ_INT(errno, EILSEQ);
}

static void
uni8_to32_simple(void)
{
	size_t r;

	{
		uint32_t buffer[10] = { 0 };
		uint32_t expected[] = { U'a', U'b', U'c', 0 };

		r = uni8_to32((const uint8_t *)"abc", buffer, 10);
		DT_EQ_SIZE(r, 3U);
		DT_ASSERT(u32cmp(buffer, expected));
	}

	{
		uint32_t buffer[10] = { 0 };
		uint32_t expected[] = { U'a', U'é', U'c', 0 };

		r = uni8_to32((const uint8_t *)"aéc", buffer, 10);
		DT_EQ_SIZE(r, 3);
		DT_ASSERT(u32cmp(buffer, expected));
	}
}

static void
uni8_to32_invalid(void)
{
	size_t r;
	uint32_t buffer[10] = { 0 };

	/* Invalid UTF-8 sequence. */
	r = uni8_to32((const uint8_t *)u8"\xff""a", buffer, 10);
	DT_EQ_SIZE(r, (size_t)-1);
	DT_EQ_INT(errno, EILSEQ);

	/* Valid "€" but unfinished sequence. */
	r = uni8_to32((const uint8_t []){ -30, 0 }, buffer, 10);
	DT_EQ_SIZE(r, (size_t)-1);
	DT_EQ_INT(errno, EILSEQ);
}

static void
uni8_to32_toosmall(void)
{
	size_t r;
	uint32_t buffer[4] = { 0 };

	r = uni8_to32((const uint8_t *)u8"bonjour à tous", buffer, 1);
	DT_EQ_SIZE(r, (size_t)-1);
	DT_EQ_INT(errno, ERANGE);
}

static void
uni32_sizeof_simple(void)
{
	DT_EQ_SIZE(uni32_sizeof(U'a'), 1);
	DT_EQ_SIZE(uni32_sizeof(U'é'), 2);
	DT_EQ_SIZE(uni32_sizeof(U'€'), 3);
	DT_EQ_SIZE(uni32_sizeof(U'𐍈'), 4);
}

static void
uni32_sizeof_invalid(void)
{
	DT_EQ_SIZE((size_t)-1, uni32_sizeof(0xffffffff));
	DT_EQ_INT(errno, EILSEQ);
}

static void
uni32_length_simple(void)
{
	DT_EQ_SIZE(uni32_length((const uint32_t []){ U'a', U'é', U'c', 0 }), 3U);
}

static void
uni32_requires_simple(void)
{
	DT_EQ_SIZE(uni32_requires(U"abc"), 3U);
	DT_EQ_SIZE(uni32_requires(U"é€𐍈"), 9U);
}

static void
uni32_requires_invalid(void)
{
	DT_EQ_SIZE((size_t)-1, uni32_requires(U"\xffffffff"));
	DT_EQ_INT(errno, EILSEQ);
}

static void
uni32_to8_simple(void)
{
	size_t r;

	{
		uint8_t buffer[10] = { 0 };

		r = uni32_to8(U"abc", buffer, sizeof (buffer));
		DT_EQ_SIZE(r, 3U);
		DT_EQ_STR((const char *)buffer, (const char *)u8"abc");
	}

	{
		uint8_t buffer[20] = { 0 };

		r = uni32_to8(U"ça va, 5€ ?", buffer, sizeof (buffer));
		DT_EQ_SIZE(r, 14U);
		DT_EQ_STR((const char *)buffer, (const char *)u8"ça va, 5€ ?");
	}
}

static void
uni32_to8_invalid(void)
{
	uint8_t buffer[10] = { 0 };

	DT_EQ_SIZE(uni32_to8(U"\xffffffff", buffer, sizeof (buffer)), (size_t)-1);
	DT_EQ_INT(errno, EILSEQ);
}

static void
uni32_to8_toosmall(void)
{
	size_t r;
	uint8_t buffer[3] = { 0 };

	r = uni32_to8(U"ça va ?", buffer, sizeof (buffer));
	DT_EQ_SIZE(r, (size_t)-1);
	DT_EQ_INT(errno, ERANGE);
}

static void
misc_isalpha(void)
{
	DT_ASSERT(uni_isalpha(U'é'));
	DT_ASSERT(!uni_isalpha(U'€'));
}

static void
misc_isdigit(void)
{
	DT_ASSERT(uni_isdigit(U'۱'));
	DT_ASSERT(!uni_isdigit(U'€'));
}

static void
misc_islower(void)
{
	DT_ASSERT(uni_islower(U'a'));
	DT_ASSERT(uni_islower(U'é'));
	DT_ASSERT(!uni_islower(U'A'));
	DT_ASSERT(!uni_islower(U'É'));
}

static void
misc_isspace(void)
{
	DT_ASSERT(uni_isspace(U' '));
	DT_ASSERT(!uni_isspace(U'é'));
}

static void
misc_istitle(void)
{
	DT_ASSERT(uni_istitle(U'Dž'));
	DT_ASSERT(!uni_istitle(U'€'));
}

static void
misc_isupper(void)
{
	DT_ASSERT(!uni_isupper('a'));
	DT_ASSERT(!uni_isupper(U'é'));
	DT_ASSERT(uni_isupper('A'));
	DT_ASSERT(uni_isupper(U'É'));
}

int
main(int argc, char **argv)
{
	DT_RUN(uni8_encode_simple);
	DT_RUN(uni8_encode_invalid);
	DT_RUN(uni8_encode_toosmall);
	DT_RUN(uni8_decode_simple);
	DT_RUN(uni8_decode_invalid);
	DT_RUN(uni8_sizeof_simple);
	DT_RUN(uni8_sizeof_invalid);
	DT_RUN(uni8_length_simple);
	DT_RUN(uni8_length_invalid);
	DT_RUN(uni8_to32_simple);
	DT_RUN(uni8_to32_invalid);
	DT_RUN(uni8_to32_toosmall);
	DT_RUN(uni32_sizeof_simple);
	DT_RUN(uni32_sizeof_invalid);
	DT_RUN(uni32_length_simple);
	DT_RUN(uni32_requires_simple);
	DT_RUN(uni32_requires_invalid);
	DT_RUN(uni32_to8_simple);
	DT_RUN(uni32_to8_invalid);
	DT_RUN(uni32_to8_toosmall);
	DT_RUN(misc_isalpha);
	DT_RUN(misc_isdigit);
	DT_RUN(misc_islower);
	DT_RUN(misc_isspace);
	DT_RUN(misc_istitle);
	DT_RUN(misc_isupper);
	DT_SUMMARY();
}