view generator/make-unicode/unicode-after.cpp @ 2:84765c6f4872

New style
author David Demelier <markand@malikania.fr>
date Thu, 02 Feb 2017 18:07:27 +0100
parents f94206b2e05e
children d9d3406c1250
line wrap: on
line source

void encode(char32_t c, char res[5]) noexcept
{
    switch (nbytes_point(c)) {
    case 1:
        res[0] = static_cast<char>(c);
        res[1] = '\0';
        break;
    case 2:
        res[0] = 0xC0 | ((c >> 6)  & 0x1F);
        res[1] = 0x80 | (c & 0x3F);
        res[2] = '\0';
        break;
    case 3:
        res[0] = 0xE0 | ((c >> 12) & 0xF );
        res[1] = 0x80 | ((c >> 6)  & 0x3F);
        res[2] = 0x80 | (c & 0x3F);
        res[3] = '\0';
        break;
    case 4:
        res[0] = 0xF0 | ((c >> 18) & 0x7 );
        res[1] = 0x80 | ((c >> 12) & 0x3F);
        res[2] = 0x80 | ((c >> 6)  & 0x3F);
        res[3] = 0x80 | (c & 0x3F);
        res[4] = '\0';
        break;
    default:
        break;
    }
}

void decode(char32_t& c, const char* res) noexcept
{
    c = 0;

    switch (nbytes_utf8(res[0])) {
    case 1:
        c = res[0];
        break;
    case 2:
        c =  (res[0] & 0x1f) << 6;
        c |= (res[1] & 0x3f);
        break;
    case 3:
        c =  (res[0] & 0x0f) << 12;
        c |= (res[1] & 0x3f) << 6;
        c |= (res[2] & 0x3f);
        break;
    case 4:
        c =  (res[0] & 0x07) << 16;
        c |= (res[1] & 0x3f) << 12;
        c |= (res[2] & 0x3f) << 6;
        c |= (res[3] & 0x3f);
    default:
        break;
    }
}

int nbytes_utf8(char c) noexcept
{
    if (static_cast<unsigned char>(c) <= 127) {
        return 1;
    }
    if ((c & 0xE0) == 0xC0) {
        return 2;
    }
    if ((c & 0xF0) == 0xE0) {
        return 3;
    }
    if ((c & 0xF8) == 0xF0) {
        return 4;
    }

    return -1;
}

int nbytes_point(char32_t c) noexcept
{
    if (c <= 0x7F) {
        return 1;
    }
    if (c <= 0x7FF) {
        return 2;
    }
    if (c <= 0xFFFF) {
        return 3;
    }
    if (c <= 0x1FFFFF) {
        return 4;
    }

    return -1;
}

unsigned length(const std::string& str)
{
    unsigned total = 0;

    for_each(str, [&] (char32_t) {
        ++ total;
    });

    return total;
}

std::string to_utf8(const std::u32string& array)
{
    std::string res;

    for (size_t i = 0; i < array.size(); ++i) {
        char tmp[5];
        int size = nbytes_point(array[i]);

        if (size < 0) {
            throw std::invalid_argument("invalid sequence");
        }

        encode(array[i], tmp);
        res.insert(res.length(), tmp);
    }

    return res;
}

std::u32string to_utf32(const std::string& str)
{
    std::u32string res;

    for_each(str, [&] (char32_t code) {
        res.push_back(code);
    });

    return res;
}

} // !unicode