view tests/test-unicode.c @ 23:4983392b356f

unicode: add totitle conversions
author David Demelier <markand@malikania.fr>
date Wed, 23 Mar 2022 11:46:41 +0100
parents 887a8fd73d1e
children 23ceab03a393
line wrap: on
line source

/*
 * unicode.c -- main test file for unicode
 *
 * Copyright (c) 2013-2022 David Demelier <markand@malikania.fr>
 *
 * Permission to use, copy, modify, and/or distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include <errno.h>

#include <rexo.h>

#include "unicode.h"

/*
 * /!\ Be sure to keep this file with UTF-8 encoding /!\
 */

static size_t
u32len(const uint32_t *s)
{
	size_t t = 0;

	while (*s++)
		++t;

	return t;
}

static int
u32cmp(const uint32_t *s1, const uint32_t *s2)
{
	const size_t l1 = u32len(s1);
	const size_t l2 = u32len(s2);

	return l1 == l2 && memcmp(s1, s2, l1) == 0;
}

RX_TEST_CASE(uni8_encode, simple)
{
	size_t r;

	/* a -> 1 bytes. */
	{
		uint8_t buffer[5] = { 0 };

		r = uni8_encode(buffer, sizeof (buffer), U'a');
		RX_INT_REQUIRE_EQUAL(r, 1);
		RX_STR_REQUIRE_EQUAL((const char *)buffer, (const char *)u8"a");
	}

	/* é -> 2 bytes. */
	{
		uint8_t buffer[5] = { 0 };

		r = uni8_encode(buffer, sizeof (buffer), U'é');
		RX_INT_REQUIRE_EQUAL(r, 2);
		RX_STR_REQUIRE_EQUAL((const char *)buffer, (const char *)u8"é");
	}
}

RX_TEST_CASE(uni8_encode, invalid)
{
	size_t r;
	uint8_t buffer[5] = { 0 };

	r = uni8_encode(buffer, sizeof (buffer), 0xffffffff);
	RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
}

RX_TEST_CASE(uni8_encode, toosmall)
{
	size_t r;
	uint8_t buffer[1] = { 0 };

	r = uni8_encode(buffer, sizeof (buffer), U'é');
	RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
	RX_INT_REQUIRE_EQUAL(errno, ERANGE);
}

RX_TEST_CASE(unit8_decode, simple)
{
	size_t r;

	/* a -> 1 bytes. */
	{
		uint32_t code = -1;

		r = uni8_decode((const uint8_t *)u8"a", &code);
		RX_UINT_REQUIRE_EQUAL(r, 1U);
		RX_INT_REQUIRE_EQUAL(code, 'a');
	}

	/* é -> 2 bytes. */
	{
		uint32_t code = -1;

		r = uni8_decode((const uint8_t *)u8"é", &code);
		RX_UINT_REQUIRE_EQUAL(r, 2U);
		RX_INT_REQUIRE_EQUAL(code, U'é');
	}
}

RX_TEST_CASE(uni8_decode, invalid)
{
	size_t r;

	/* Invalid UTF-8 sequence. */
	{
		uint32_t code = -1;

		r = uni8_decode((const uint8_t *)u8"\xff""a", &code);
		RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
		RX_UINT_REQUIRE_EQUAL(code, (uint32_t)-1);
		RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
	}

	/* Valid "€" but unfinished sequence. */
	{
		uint32_t code = -1;

		r = uni8_decode((const uint8_t []){ -30, 0 }, &code);
		RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
		RX_UINT_REQUIRE_EQUAL(code, (uint32_t)-1);
		RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
	}
}

RX_TEST_CASE(uni8_sizeof, simple)
{
	RX_INT_REQUIRE_EQUAL(uni8_sizeof(u8"a"[0]), 1U);
	RX_INT_REQUIRE_EQUAL(uni8_sizeof(u8"é"[0]), 2U);
	RX_INT_REQUIRE_EQUAL(uni8_sizeof(u8"€"[0]), 3U);
	RX_INT_REQUIRE_EQUAL(uni8_sizeof(u8"𐍈"[0]), 4U);
}

RX_TEST_CASE(uni8_sizeof, invalid)
{
	RX_UINT_REQUIRE_EQUAL((size_t)-1, uni8_sizeof(u8"\xff"[0]));
	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
}

RX_TEST_CASE(uni8_length, simple)
{
	RX_UINT_REQUIRE_EQUAL(uni8_length((const uint8_t *)"abc"), 3U);
	RX_UINT_REQUIRE_EQUAL(uni8_length((const uint8_t *)"5€"), 2U);
}

RX_TEST_CASE(uni8_length, invalid)
{
	RX_UINT_REQUIRE_EQUAL((size_t)-1, uni8_length((const uint8_t *)"a""\xff""b"));
	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
}

RX_TEST_CASE(uni8_to32, simple)
{
	size_t r;

	{
		uint32_t buffer[10] = { 0 };
		uint32_t expected[] = { U'a', U'b', U'c', 0 };

		r = uni8_to32((const uint8_t *)"abc", buffer, 10);
		RX_UINT_REQUIRE_EQUAL(r, 3U);
		RX_REQUIRE(u32cmp(buffer, expected));
	}

	{
		uint32_t buffer[10] = { 0 };
		uint32_t expected[] = { U'a', U'é', U'c', 0 };

		r = uni8_to32((const uint8_t *)"aéc", buffer, 10);
		RX_UINT_REQUIRE_EQUAL(r, 3);
		RX_REQUIRE(u32cmp(buffer, expected));
	}
}

RX_TEST_CASE(uni8_to32, invalid)
{
	size_t r;
	uint32_t buffer[10] = { 0 };

	/* Invalid UTF-8 sequence. */
	r = uni8_to32((const uint8_t *)u8"\xff""a", buffer, 10);
	RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);

	/* Valid "€" but unfinished sequence. */
	r = uni8_to32((const uint8_t []){ -30, 0 }, buffer, 10);
	RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
}

RX_TEST_CASE(uni8_to32, toosmall)
{
	size_t r;
	uint32_t buffer[4] = { 0 };

	r = uni8_to32((const uint8_t *)u8"bonjour à tous", buffer, 1);
	RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
	RX_INT_REQUIRE_EQUAL(errno, ERANGE);
}

RX_TEST_CASE(uni32_sizeof, simple)
{
	RX_UINT_REQUIRE_EQUAL(uni32_sizeof(U'a'), 1);
	RX_UINT_REQUIRE_EQUAL(uni32_sizeof(U'é'), 2);
	RX_UINT_REQUIRE_EQUAL(uni32_sizeof(U'€'), 3);
	RX_UINT_REQUIRE_EQUAL(uni32_sizeof(U'𐍈'), 4);
}

RX_TEST_CASE(uni32_sizeof, invalid)
{
	RX_UINT_REQUIRE_EQUAL((size_t)-1, uni32_sizeof(0xffffffff));
	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
}

RX_TEST_CASE(uni32_length, simple)
{
	RX_UINT_REQUIRE_EQUAL(uni32_length((const uint32_t []){ U'a', U'é', U'c', 0 }), 3U);
}

RX_TEST_CASE(uni32_requires, simple)
{
	RX_UINT_REQUIRE_EQUAL(uni32_requires(U"abc"), 3U);
	RX_UINT_REQUIRE_EQUAL(uni32_requires(U"é€𐍈"), 9U);
}

RX_TEST_CASE(uni32_requires, invalid)
{
	RX_UINT_REQUIRE_EQUAL((size_t)-1, uni32_requires(U"\xffffffff"));
	RX_INT_REQUIRE_EQUAL(errno, EILSEQ);
}

RX_TEST_CASE(uni32_to8, simple)
{
	size_t r;

	{
		uint8_t buffer[10] = { 0 };

		r = uni32_to8(U"abc", buffer, sizeof (buffer));
		RX_UINT_REQUIRE_EQUAL(r, 3U);
		RX_STR_REQUIRE_EQUAL((const char *)buffer, (const char *)u8"abc");
	}

	{
		uint8_t buffer[20] = { 0 };

		r = uni32_to8(U"ça va, 5€ ?", buffer, sizeof (buffer));
		RX_UINT_REQUIRE_EQUAL(r, 14U);
		RX_STR_REQUIRE_EQUAL((const char *)buffer, (const char *)u8"ça va, 5€ ?");
	}
}

RX_TEST_CASE(uni32_to8, invalid)
{
	uint8_t buffer[10] = { 0 };

	RX_INT_REQUIRE_EQUAL(uni32_to8(U"\xffffffff", buffer, sizeof (buffer)), (size_t)-1);
	RX_UINT_REQUIRE_EQUAL(errno, EILSEQ);
}

RX_TEST_CASE(uni32_to8, toosmall)
{
	size_t r;
	uint8_t buffer[3] = { 0 };

	r = uni32_to8(U"ça va ?", buffer, sizeof (buffer));
	RX_UINT_REQUIRE_EQUAL(r, (size_t)-1);
	RX_INT_REQUIRE_EQUAL(errno, ERANGE);
}

RX_TEST_CASE(misc, isalpha)
{
	RX_REQUIRE(uni_isalpha(U'é'));
	RX_REQUIRE(!uni_isalpha(U'€'));
}

RX_TEST_CASE(misc, isdigit)
{
	RX_REQUIRE(uni_isdigit(U'۱'));
	RX_REQUIRE(!uni_isdigit(U'€'));
}

RX_TEST_CASE(misc, islower)
{
	RX_REQUIRE(uni_islower(U'a'));
	RX_REQUIRE(uni_islower(U'é'));
	RX_REQUIRE(!uni_islower(U'A'));
	RX_REQUIRE(!uni_islower(U'É'));
}

RX_TEST_CASE(misc, isspace)
{
	RX_REQUIRE(uni_isspace(U' '));
	RX_REQUIRE(!uni_isspace(U'é'));
}

RX_TEST_CASE(misc, istitle)
{
	RX_REQUIRE(uni_istitle(U'Dž'));
	RX_REQUIRE(!uni_istitle(U'€'));
}

RX_TEST_CASE(misc, isupper)
{
	RX_REQUIRE(!uni_isupper('a'));
	RX_REQUIRE(!uni_isupper(U'é'));
	RX_REQUIRE(uni_isupper('A'));
	RX_REQUIRE(uni_isupper(U'É'));
}

RX_TEST_CASE(misc, toupper)
{
	RX_INT_REQUIRE_EQUAL(uni_totitle(U's'), 'S');
}

int
main(int argc, char **argv)
{
	return rx_main(0, NULL, argc, (const char **)argv) == RX_SUCCESS ? 0 : 1;
}