view gen/unicode-after.c @ 24:23ceab03a393 1.0.0

unicode: remove uni_totitle While here, add more consts.
author David Demelier <markand@malikania.fr>
date Wed, 23 Mar 2022 13:17:10 +0100
parents 6fa530cc1188
children
line wrap: on
line source

size_t
uni8_encode(uint8_t dst[], size_t dstsz, uint32_t point)
{
	assert(dst);

	size_t written;

	switch ((written = uni32_sizeof(point))) {
	case 1:
		if (dstsz < 1)
			goto erange;

		dst[0] = (uint8_t)point;
		break;
	case 2:
		if (dstsz < 2)
			goto erange;

		dst[0] = 0xC0 | ((point >> 6)  & 0x1F);
		dst[1] = 0x80 | (point & 0x3F);
		break;
	case 3:
		if (dstsz < 3)
			goto erange;

		dst[0] = 0xE0 | ((point >> 12) & 0xF );
		dst[1] = 0x80 | ((point >> 6)  & 0x3F);
		dst[2] = 0x80 | (point & 0x3F);
		break;
	case 4:
		if (dstsz < 4)
			goto erange;

		dst[0] = 0xF0 | ((point >> 18) & 0x7 );
		dst[1] = 0x80 | ((point >> 12) & 0x3F);
		dst[2] = 0x80 | ((point >> 6)  & 0x3F);
		dst[3] = 0x80 | (point & 0x3F);
		break;
	default:
		break;
	}

	return written;

erange:
	errno = ERANGE;

	return -1;
}

size_t
uni8_decode(const uint8_t src[], uint32_t *point)
{
	assert(src);
	assert(point);

	size_t parsed;

	switch ((parsed = uni8_sizeof(*src))) {
	case 1:
		*point = src[0];
		break;
	case 2:
		if (!src[1])
			goto eilseq;

		*point =  (src[0] & 0x1f) << 6;
		*point |= (src[1] & 0x3f);
		break;
	case 3:
		if (!src[1] || !src[2])
			goto eilseq;

		*point =  (src[0] & 0x0f) << 12;
		*point |= (src[1] & 0x3f) << 6;
		*point |= (src[2] & 0x3f);
		break;
	case 4:
		if (!src[1] || !src[2] || !src[3])
			goto eilseq;

		*point =  (src[0] & 0x07) << 16;
		*point |= (src[1] & 0x3f) << 12;
		*point |= (src[2] & 0x3f) << 6;
		*point |= (src[3] & 0x3f);
		break;
	default:
		break;
	}

	return parsed;

eilseq:
	errno = EILSEQ;

	return -1;
}

size_t
uni8_sizeof(uint8_t c)
{
	if (c <= 127)
		return 1;
	if ((c & 0xE0) == 0xC0)
		return 2;
	if ((c & 0xF0) == 0xE0)
		return 3;
	if ((c & 0xF8) == 0xF0)
		return 4;

	errno = EILSEQ;
	return -1;
}

size_t
uni8_length(const uint8_t src[])
{
	assert(src);

	size_t total = 0, gap;

	while (*src) {
		if ((gap = uni8_sizeof(*src)) == (size_t)-1)
			return -1;

		total += 1;
		src += gap;
	}

	return total;
}

size_t
uni8_to32(const uint8_t src[], uint32_t dst[], size_t dstsz)
{
	assert(src);
	assert(dst);

	size_t nwritten = 0, gap;

	for (; *src && dstsz; --dstsz) {
		if ((gap = uni8_decode(src, dst++)) == (size_t)-1)
			return -1;

		src += gap;
		++nwritten;
	}

	/* No more space to store NUL. */
	if (dstsz == 0) {
		errno = ERANGE;
		return -1;
	}

	*dst = 0;

	return nwritten;
}

size_t
uni32_sizeof(uint32_t c)
{
	if (c <= 0x7F)
		return 1;
	if (c <= 0x7FF)
		return 2;
	if (c <= 0xFFFF)
		return 3;
	if (c <= 0x1FFFFF)
		return 4;

	errno = EILSEQ;
	return -1;
}

size_t
uni32_length(const uint32_t src[])
{
	assert(src);

	size_t total = 0;

	while (*src++)
		total++;

	return total;
}

size_t
uni32_requires(const uint32_t src[])
{
	assert(src);

	size_t total = 0, gap;

	while (*src) {
		if ((gap = uni32_sizeof(*src++)) == (size_t)-1)
			return -1;
		if (gap >= SIZE_MAX - total) {
			errno = ERANGE;
			return -1;
		}

		total += gap;
	}

	return total;
}

size_t
uni32_to8(const uint32_t src[], uint8_t dst[], size_t dstsz)
{
	assert(src);
	assert(dst);

	size_t nwritten = 0, gap;

	while (*src && dstsz) {
		if ((gap = uni8_encode(dst, dstsz, *src++)) == (size_t)-1)
			return -1;

		dst += gap;
		dstsz -= gap;
		nwritten += gap;
	}

	if (dstsz == 0) {
		errno = ERANGE;
		return -1;
	}

	*dst = 0;

	return nwritten;
}