view tools/mkunicode/Unicode-after.cpp @ 362:ff5b46474895

- Fix WITH_DYNLIB=Off - Fix generate-unicode on Linux
author David Demelier <markand@malikania.fr>
date Tue, 28 Apr 2015 11:50:44 +0200
parents 7fe8d4094983
children b78d6d8f2872
line wrap: on
line source

void Unicode::encode(char32_t c, char res[5]) noexcept
{
	switch (nbytesPoint(c)) {
	case 1:
		res[0] = c;
		res[1] = '\0';
		break;
	case 2:
		res[0] = 0xC0 | ((c >> 6)  & 0x1F);
		res[1] = 0x80 | (c & 0x3F);
		res[2] = '\0';
		break;
	case 3:
		res[0] = 0xE0 | ((c >> 12) & 0xF );
		res[1] = 0x80 | ((c >> 6)  & 0x3F);
		res[2] = 0x80 | (c & 0x3F);
		res[3] = '\0';
		break;
	case 4:
		res[0] = 0xF0 | ((c >> 18) & 0x7 );
		res[1] = 0x80 | ((c >> 12) & 0x3F);
		res[2] = 0x80 | ((c >> 6)  & 0x3F);
		res[3] = 0x80 | (c & 0x3F);
		res[4] = '\0';
		break;
	default:
		break;
	}
}

void Unicode::decode(char32_t &c, const char *res) noexcept
{
	c = 0;

	switch (nbytesUtf8(res[0])) {
	case 1:
		c = res[0];
		break;
	case 2:
		c =  (res[0] & 0x1f) << 6;
		c |= (res[1] & 0x3f);
		break;
	case 3:
		c =  (res[0] & 0x0f) << 12;
		c |= (res[1] & 0x3f) << 6;
		c |= (res[2] & 0x3f);
		break;
	case 4:
		c =  (res[0] & 0x07) << 16;
		c |= (res[1] & 0x3f) << 12;
		c |= (res[2] & 0x3f) << 6;
		c |= (res[3] & 0x3f);
	default:
		break;
	}
}

int Unicode::nbytesUtf8(char c) noexcept
{
	if ((c & 0xE0) == 0xC0)
		return 2;
	if ((c & 0xF0) == 0xE0)
		return 3;
	if ((c & 0xF8) == 0xF0)
		return 4;

	return 1;
}

int Unicode::nbytesPoint(char32_t c) noexcept
{
	if (c <= 0x7F)
		return 1;
	if (c <= 0x7FF)
		return 2;
	if (c <= 0xFFFF)
		return 3;
	if (c <= 0x1FFFFF)
		return 4;

	return -1;
}

int Unicode::length(const std::string &str)
{
	int total = 0;

	forEach(str, [&] (char32_t) {
		++ total;
	});

	return total;
}

std::string Unicode::toUtf8(const std::u32string &array)
{
	std::string res;

	for (size_t i = 0; i < array.size(); ++i) {
		char tmp[5];
		int size = nbytesPoint(array[i]);

		if (size < 0) {
			throw std::invalid_argument("invalid sequence");
		}

		encode(array[i], tmp);
		res.insert(res.length(), tmp);
	}

	return res;
}

std::u32string Unicode::toUtf32(const std::string &str)
{
	std::u32string res;

	forEach(str, [&] (char32_t code) {
		res.push_back(code);
	});

	return res;
}