Mercurial > libunicode
changeset 2:84765c6f4872
New style
author | David Demelier <markand@malikania.fr> |
---|---|
date | Thu, 02 Feb 2017 18:07:27 +0100 |
parents | 0d9603b420c2 |
children | d9d3406c1250 |
files | generator/cat/cat.cpp generator/make-unicode/CMakeLists.txt generator/make-unicode/src/mkunicode.c generator/make-unicode/unicode-after.cpp generator/make-unicode/unicode.hpp test/main.cpp unicode.cpp unicode.hpp |
diffstat | 8 files changed, 332 insertions(+), 266 deletions(-) [+] |
line wrap: on
line diff
--- a/generator/cat/cat.cpp Mon Jan 02 20:43:34 2017 +0100 +++ b/generator/cat/cat.cpp Thu Feb 02 18:07:27 2017 +0100 @@ -1,5 +1,5 @@ /* - * cccat.cpp -- very basic cat replacement for portability + * cat.cpp -- very basic cat replacement for portability * * Copyright (c) 2013-2017 David Demelier <markand@malikania.fr> * @@ -22,7 +22,7 @@ #include <iterator> #include <string> -int main(int argc, char **argv) +int main(int argc, char** argv) { -- argc; ++ argv; @@ -38,8 +38,9 @@ std::ifstream input(argv[i]); std::string line; - while (std::getline(input, line)) + while (std::getline(input, line)) { output << line << "\n"; + } } return 0;
--- a/generator/make-unicode/CMakeLists.txt Mon Jan 02 20:43:34 2017 +0100 +++ b/generator/make-unicode/CMakeLists.txt Thu Feb 02 18:07:27 2017 +0100 @@ -26,7 +26,9 @@ add_custom_target( generate-unicode COMMAND - $<TARGET_FILE:generator> ${unicode_BINARY_DIR}/unicode-tmp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/UnicodeData.txt + $<TARGET_FILE:generator> + ${unicode_BINARY_DIR}/unicode-tmp.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/UnicodeData.txt COMMAND $<TARGET_FILE:cat> ${unicode_SOURCE_DIR}/unicode.cpp @@ -37,6 +39,10 @@ ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/unicode.hpp ${unicode_SOURCE_DIR} COMMAND ${CMAKE_COMMAND} -E remove ${unicode_BINARY_DIR}/unicode-tmp.cpp + SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/unicode-before.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/unicode-after.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/unicode.hpp DEPENDS generator ${CMAKE_BINARY_DIR}/cat
--- a/generator/make-unicode/src/mkunicode.c Mon Jan 02 20:43:34 2017 +0100 +++ b/generator/make-unicode/src/mkunicode.c Thu Feb 02 18:07:27 2017 +0100 @@ -381,37 +381,43 @@ isp = mkispair(label, prop); iss = mkissingle(label, prop); - fprintf(out, + fprintf(out, "bool is%s(char32_t c) noexcept\n" "{\n" - " const char32_t *p;\n" + " const char32_t* p;\n" "\n", label); if(isr) - fprintf(out, - " p = rbsearch(c, is%sr, nelem (is%sr)/2, 2);\n\n" - " if (p && c >= p[0] && c <= p[1])\n" - " return true;\n", + fprintf(out, + " p = rbsearch(c, is%sr, nelem (is%sr) / 2, 2);\n\n" + " if (p && c >= p[0] && c <= p[1]) {\n" + " return true;\n" + " }\n", label, label); if(isp) - fprintf(out, - "\n p = rbsearch(c, is%sp, nelem (is%sp)/2, 2);\n\n" - " if (p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n" - " return true;\n", + fprintf(out, + "\n" + " p = rbsearch(c, is%sp, nelem (is%sp) / 2, 2);\n\n" + " if (p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1)) {\n" + " return true;\n" + " }\n", label, label); if(iss) - fprintf(out, - "\n p = rbsearch(c, is%ss, nelem (is%ss), 1);\n\n" - " if (p && c == p[0])\n" - " return true;\n", + fprintf(out, + "\n" + " p = rbsearch(c, is%ss, nelem (is%ss), 1);\n\n" + " if (p && c == p[0]) {\n" + " return true;\n" + " }\n", label, label); - fprintf(out, - "\n return false;\n" + fprintf(out, + "\n" + " return false;\n" "}\n" "\n" ); @@ -554,36 +560,41 @@ top = mktopair(label, map); tos = mktosingle(label, map); - fprintf(out, + fprintf(out, "char32_t to%s(char32_t c) noexcept\n" "{\n" - " const char32_t *p;\n" + " const char32_t* p;\n" "\n", label); if(tor) - fprintf(out, - " p = rbsearch(c, to%sr, nelem (to%sr)/3, 3);\n\n" - " if (p && c >= p[0] && c <= p[1])\n" - " return c + p[2] - %d;\n", + fprintf(out, + " p = rbsearch(c, to%sr, nelem (to%sr) / 3, 3);\n\n" + " if (p && c >= p[0] && c <= p[1]) {\n" + " return c + p[2] - %d;\n" + " }\n", label, label, TO_OFFSET); if(top) - fprintf(out, - "\n p = rbsearch(c, to%sp, nelem (to%sp)/3, 3);\n\n" - " if (p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n" - " return c + p[2] - %d;\n", + fprintf(out, + "\n" + " p = rbsearch(c, to%sp, nelem (to%sp) / 3, 3);\n\n" + " if (p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1)) {\n" + " return c + p[2] - %d;\n" + " }\n", label, label, TO_OFFSET); if(tos) - fprintf(out, - "\n p = rbsearch(c, to%ss, nelem (to%ss)/2, 2);\n\n" - " if (p && c == p[0])\n" - " return c + p[1] - %d;\n\n", + fprintf(out, + "\n" + " p = rbsearch(c, to%ss, nelem (to%ss) / 2, 2);\n\n" + " if (p && c == p[0]) {\n" + " return c + p[1] - %d;\n" + " }\n\n", label, label, TO_OFFSET); - fprintf(out, - " return c;\n" + fprintf(out, + " return c;\n" "}\n" "\n" ); @@ -594,15 +605,16 @@ mkisronly(const char* label, char* prop) { mkisrange(label, prop, 1); - fprintf(out, + fprintf(out, "bool is%s(char32_t c) noexcept\n" "{\n" - " const char32_t *p;\n" + " const char32_t* p;\n" "\n" - " p = rbsearch(c, is%sr, nelem (is%sr)/2, 2);\n\n" - " if (p && c >= p[0] && c <= p[1])\n" - " return true;\n\n" - " return false;\n" + " p = rbsearch(c, is%sr, nelem (is%sr) / 2, 2);\n\n" + " if (p && c >= p[0] && c <= p[1]) {\n" + " return true;\n" + " }\n\n" + " return false;\n" "}\n" "\n", label, label, label); @@ -616,29 +628,31 @@ mktables(char *src, int usepairs) { /* Add nelem macro */ - fprintf(out, + fprintf(out, "#define nelem(x) (sizeof (x) / sizeof ((x)[0]))\n\n" ); /* Add the rbsearch function */ - fprintf(out, + fprintf(out, "namespace {\n\n" - "const char32_t *rbsearch(char32_t c, const char32_t *t, int n, int ne) noexcept\n" + "const char32_t *rbsearch(char32_t c, const char32_t* t, int n, int ne) noexcept\n" "{\n" - " const char32_t *p;\n" - " int m;\n\n" - " while (n > 1) {\n" - " m = n >> 1;\n" - " p = t + m * ne;\n\n" - " if (c >= p[0]) {\n" - " t = p;\n" - " n = n - m;\n" - " } else\n" - " n = m;\n" - " }\n\n" - " if (n && c >= t[0])\n" - " return t;\n\n" - " return nullptr;\n" + " const char32_t* p;\n" + " int m;\n\n" + " while (n > 1) {\n" + " m = n >> 1;\n" + " p = t + m * ne;\n\n" + " if (c >= p[0]) {\n" + " t = p;\n" + " n = n - m;\n" + " } else {\n" + " n = m;\n" + " }\n" + " }\n\n" + " if (n && c >= t[0]) {\n" + " return t;\n" + " }\n\n" + " return nullptr;\n" "}\n\n" "} // !namespace\n\n" );
--- a/generator/make-unicode/unicode-after.cpp Mon Jan 02 20:43:34 2017 +0100 +++ b/generator/make-unicode/unicode-after.cpp Thu Feb 02 18:07:27 2017 +0100 @@ -1,6 +1,6 @@ void encode(char32_t c, char res[5]) noexcept { - switch (nbytesPoint(c)) { + switch (nbytes_point(c)) { case 1: res[0] = static_cast<char>(c); res[1] = '\0'; @@ -28,11 +28,11 @@ } } -void decode(char32_t &c, const char *res) noexcept +void decode(char32_t& c, const char* res) noexcept { c = 0; - switch (nbytesUtf8(res[0])) { + switch (nbytes_utf8(res[0])) { case 1: c = res[0]; break; @@ -55,55 +55,64 @@ } } -int nbytesUtf8(char c) noexcept +int nbytes_utf8(char c) noexcept { - if (static_cast<unsigned char>(c) <= 127) + if (static_cast<unsigned char>(c) <= 127) { return 1; - if ((c & 0xE0) == 0xC0) + } + if ((c & 0xE0) == 0xC0) { return 2; - if ((c & 0xF0) == 0xE0) + } + if ((c & 0xF0) == 0xE0) { return 3; - if ((c & 0xF8) == 0xF0) + } + if ((c & 0xF8) == 0xF0) { return 4; + } return -1; } -int nbytesPoint(char32_t c) noexcept +int nbytes_point(char32_t c) noexcept { - if (c <= 0x7F) + if (c <= 0x7F) { return 1; - if (c <= 0x7FF) + } + if (c <= 0x7FF) { return 2; - if (c <= 0xFFFF) + } + if (c <= 0xFFFF) { return 3; - if (c <= 0x1FFFFF) + } + if (c <= 0x1FFFFF) { return 4; + } return -1; } -unsigned length(const std::string &str) +unsigned length(const std::string& str) { unsigned total = 0; - forEach(str, [&] (char32_t) { + for_each(str, [&] (char32_t) { ++ total; }); return total; } -std::string toUtf8(const std::u32string &array) +std::string to_utf8(const std::u32string& array) { std::string res; for (size_t i = 0; i < array.size(); ++i) { char tmp[5]; - int size = nbytesPoint(array[i]); + int size = nbytes_point(array[i]); - if (size < 0) + if (size < 0) { throw std::invalid_argument("invalid sequence"); + } encode(array[i], tmp); res.insert(res.length(), tmp); @@ -112,11 +121,11 @@ return res; } -std::u32string toUtf32(const std::string &str) +std::u32string to_utf32(const std::string& str) { std::u32string res; - forEach(str, [&] (char32_t code) { + for_each(str, [&] (char32_t code) { res.push_back(code); });
--- a/generator/make-unicode/unicode.hpp Mon Jan 02 20:43:34 2017 +0100 +++ b/generator/make-unicode/unicode.hpp Thu Feb 02 18:07:27 2017 +0100 @@ -31,7 +31,8 @@ * * ## Export macros * - * You must define `UNICODE_DLL` globally and `UNICODE_BUILDING_DLL` when compiling the library if you want a DLL, alternatively you can provide + * You must define `UNICODE_DLL` globally and `UNICODE_BUILDING_DLL` when + * compiling the library if you want a DLL, alternatively you can provide * your own `UNICODE_EXPORT` macro instead. */ @@ -81,7 +82,7 @@ * \param c the code point destination * \param res the multibyte string. */ -UNICODE_EXPORT void decode(char32_t &c, const char *res) noexcept; +UNICODE_EXPORT void decode(char32_t& c, const char* res) noexcept; /** * Get the number of bytes for the first multi byte character from a @@ -93,7 +94,7 @@ * \param c the first multi byte character * \return the number of bytes [1-4] or -1 if invalid */ -UNICODE_EXPORT int nbytesUtf8(char c) noexcept; +UNICODE_EXPORT int nbytes_utf8(char c) noexcept; /** * Get the number of bytes for the unicode point. @@ -101,7 +102,7 @@ * \param point the unicode point * \return the number of bytes [1-4] or -1 if invalid */ -UNICODE_EXPORT int nbytesPoint(char32_t point) noexcept; +UNICODE_EXPORT int nbytes_point(char32_t point) noexcept; /** * Get real number of character in a string. @@ -110,7 +111,7 @@ * \return the length * \throw std::invalid_argument on invalid sequence */ -UNICODE_EXPORT unsigned length(const std::string &str); +UNICODE_EXPORT unsigned length(const std::string& str); /** * Iterate over all real characters in the UTF-8 string. @@ -123,14 +124,15 @@ * \throw std::invalid_argument on invalid sequence */ template <typename Func> -void forEach(const std::string &str, Func function) +void for_each(const std::string& str, Func function) { for (size_t i = 0; i < str.size(); ) { char32_t point = 0; - int size = nbytesUtf8(str[i]); + int size = nbytes_utf8(str[i]); - if (size < 0) + if (size < 0) { throw std::invalid_argument("invalid sequence"); + } decode(point, str.data() + i); function(point); @@ -146,7 +148,7 @@ * \return the UTF-8 string * \throw std::invalid_argument on invalid sequence */ -UNICODE_EXPORT std::string toUtf8(const std::u32string &array); +UNICODE_EXPORT std::string to_utf8(const std::u32string& array); /** * Convert a UTF-8 string to UTF-32 string. @@ -155,7 +157,7 @@ * \return the UTF-32 string * \throw std::invalid_argument on invalid sequence */ -UNICODE_EXPORT std::u32string toUtf32(const std::string &str); +UNICODE_EXPORT std::u32string to_utf32(const std::string& str); /** * Check if the unicode character is space. @@ -237,8 +239,9 @@ */ inline std::u32string toupper(std::u32string str) { - for (size_t i = 0; i < str.size(); ++i) + for (size_t i = 0; i < str.size(); ++i) { str[i] = toupper(str[i]); + } return str; } @@ -250,12 +253,12 @@ * \return the upper case string * \warning very slow at the moment */ -inline std::string toupper(const std::string &str) +inline std::string toupper(const std::string& str) { std::string result; char buffer[5]; - forEach(str, [&] (char32_t code) { + for_each(str, [&] (char32_t code) { encode(toupper(code), buffer); result += buffer; }); @@ -271,8 +274,9 @@ */ inline std::u32string tolower(std::u32string str) { - for (size_t i = 0; i < str.size(); ++i) + for (size_t i = 0; i < str.size(); ++i) { str[i] = tolower(str[i]); + } return str; } @@ -284,12 +288,12 @@ * \return the lower case string * \warning very slow at the moment */ -inline std::string tolower(const std::string &str) +inline std::string tolower(const std::string& str) { std::string result; char buffer[5]; - forEach(str, [&] (char32_t code) { + for_each(str, [&] (char32_t code) { encode(tolower(code), buffer); result += buffer; });
--- a/test/main.cpp Mon Jan 02 20:43:34 2017 +0100 +++ b/test/main.cpp Thu Feb 02 18:07:27 2017 +0100 @@ -1,4 +1,4 @@ -/* +/* * main.cpp -- main test file for unicode * * Copyright (c) 2013-2017 David Demelier <markand@malikania.fr> @@ -31,11 +31,11 @@ * ------------------------------------------------------------------ */ -TEST(Conversion32to8, ascii) +TEST(conversion_32_to_8, ascii) { try { std::u32string u32{'a', 'b', 'c'}; - std::string s = unicode::toUtf8(u32); + std::string s = unicode::to_utf8(u32); ASSERT_EQ("abc", s); } catch (const std::exception &ex) { @@ -43,11 +43,11 @@ } } -TEST(Conversion32to8, valid) +TEST(conversion_32_to_8, valid) { try { std::u32string u32{'a', U'é', 'c', U'𠀀'}; - std::string s = unicode::toUtf8(u32); + std::string s = unicode::to_utf8(u32); std::string expected = u8"aéc𠀀"; ASSERT_EQ(expected, s); @@ -56,11 +56,11 @@ } } -TEST(Conversion32to8, invalid) +TEST(conversion_32_to_8, invalid) { std::u32string u32{'a', 0xFFFFFFFF, 'c'}; - ASSERT_ANY_THROW(unicode::toUtf8(u32)); + ASSERT_ANY_THROW(unicode::to_utf8(u32)); } /* @@ -68,12 +68,12 @@ * ------------------------------------------------------------------ */ -TEST(Conversion8to32, ascii) +TEST(Conversion_8_to_32, ascii) { try { std::string s{"abc"}; std::u32string expected{'a', 'b', 'c'}; - std::u32string result = unicode::toUtf32(s); + std::u32string result = unicode::to_utf32(s); ASSERT_EQ(expected, result); } catch (const std::exception &ex) { @@ -81,12 +81,12 @@ } } -TEST(Conversion8to32, valid) +TEST(Conversion_8_to_32, valid) { try { std::string s{u8"aéc𠀀"}; std::u32string expected{'a', U'é', 'c', U'𠀀'}; - std::u32string result = unicode::toUtf32(s); + std::u32string result = unicode::to_utf32(s); ASSERT_EQ(expected, result); } catch (const std::exception &ex) { @@ -99,7 +99,7 @@ * ------------------------------------------------------------------ */ -TEST(Toupper32, ascii) +TEST(toupper_32, ascii) { try { std::u32string u32{'a', 'b', 'c'}; @@ -112,7 +112,7 @@ } } -TEST(Toupper32, valid) +TEST(toupper_32, valid) { try { std::u32string u32{U'ä', U'ç', U'ë'}; @@ -125,7 +125,7 @@ } } -TEST(Toupper32, invalid) +TEST(toupper_32, invalid) { try { std::u32string u32{'a', 0xFFFFFFFF, 'b'}; @@ -143,7 +143,7 @@ * ------------------------------------------------------------------ */ -TEST(Tolower32, ascii) +TEST(tolower_32, ascii) { try { std::u32string u32{'A', 'B', 'C'}; @@ -156,7 +156,7 @@ } } -TEST(Tolower32, valid) +TEST(tolower_32, valid) { try { std::u32string u32{U'Ä', U'Ç', U'Ë'}; @@ -169,7 +169,7 @@ } } -TEST(Tolower32, invalid) +TEST(tolower_32, invalid) { try { std::u32string u32{'A', 0xFFFFFFFF, 'B'}; @@ -187,7 +187,7 @@ * ------------------------------------------------------------------ */ -TEST(Toupper8, ascii) +TEST(toupper_8, ascii) { try { std::string s{"abc"}; @@ -199,7 +199,7 @@ } } -TEST(Toupper8, valid) +TEST(toupper_8, valid) { try { std::string s{u8"aéc"}; @@ -211,7 +211,7 @@ } } -TEST(Toupper8, invalid) +TEST(toupper_8, invalid) { std::string s{"a" "\xFF""b"}; @@ -223,7 +223,7 @@ * ------------------------------------------------------------------ */ -TEST(Tolower8, ascii) +TEST(tolower_8, ascii) { try { std::string s{"ABC"}; @@ -235,7 +235,7 @@ } } -TEST(Tolower8, valid) +TEST(tolower_8, valid) { try { std::string s{u8"AÉC"}; @@ -247,7 +247,7 @@ } } -TEST(Tolower8, invalid) +TEST(tolower_8, invalid) { std::string s{"A" "\xFF""B"}; @@ -255,23 +255,23 @@ } /* - * Check functions + * Checks functions * ------------------------------------------------------------------ */ -TEST(Check, isspace) +TEST(checks, isspace) { ASSERT_TRUE(unicode::isspace(' ')); ASSERT_FALSE(unicode::isspace(/* é */ 233)); } -TEST(Check, isalpha) +TEST(checks, isalpha) { ASSERT_TRUE(unicode::isalpha(U'é')); ASSERT_FALSE(unicode::isalpha(U'€')); } -TEST(Check, isupper) +TEST(checks, isupper) { ASSERT_FALSE(unicode::isupper('a')); ASSERT_FALSE(unicode::isupper(U'é')); @@ -279,7 +279,7 @@ ASSERT_TRUE(unicode::isupper(U'É')); } -TEST(Check, islower) +TEST(checks, islower) { ASSERT_TRUE(unicode::islower('a')); ASSERT_TRUE(unicode::islower(U'é')); @@ -292,41 +292,42 @@ * ------------------------------------------------------------------ */ -TEST(Misc, nbytesPoint) +TEST(misc, nbytes_point) { - ASSERT_EQ(1, unicode::nbytesPoint('a')); - ASSERT_EQ(2, unicode::nbytesPoint(U'é')); - ASSERT_EQ(3, unicode::nbytesPoint(U'€')); - ASSERT_EQ(4, unicode::nbytesPoint(U'𠀀')); + ASSERT_EQ(1, unicode::nbytes_point('a')); + ASSERT_EQ(2, unicode::nbytes_point(U'é')); + ASSERT_EQ(3, unicode::nbytes_point(U'€')); + ASSERT_EQ(4, unicode::nbytes_point(U'𠀀')); } -TEST(Misc, nbytesUtf8) +TEST(misc, nbytes_utf8) { std::string s1{u8"a"}; std::string s2{u8"é"}; std::string s3{u8"€"}; std::string s4{u8"𠀀"}; - ASSERT_EQ(1, unicode::nbytesUtf8(s1[0])); - ASSERT_EQ(2, unicode::nbytesUtf8(s2[0])); - ASSERT_EQ(3, unicode::nbytesUtf8(s3[0])); - ASSERT_EQ(4, unicode::nbytesUtf8(s4[0])); + ASSERT_EQ(1, unicode::nbytes_utf8(s1[0])); + ASSERT_EQ(2, unicode::nbytes_utf8(s2[0])); + ASSERT_EQ(3, unicode::nbytes_utf8(s3[0])); + ASSERT_EQ(4, unicode::nbytes_utf8(s4[0])); } -TEST(Misc, forEach) +TEST(misc, for_each) { std::string s{u8"aé€𠀀"}; int current = 0; - unicode::forEach(s, [&] (char32_t code) { - if (current == 0) + unicode::for_each(s, [&] (char32_t code) { + if (current == 0) { ASSERT_EQ(U'a', code); - else if (current == 1) + } else if (current == 1) { ASSERT_EQ(U'é', code); - else if (current == 2) + } else if (current == 2) { ASSERT_EQ(U'€', code); - else if (current == 3) + } else if (current == 3) { ASSERT_EQ(U'𠀀', code); + } current++; }); @@ -334,14 +335,14 @@ ASSERT_EQ(4, current); } -TEST(Misc, forEachInvalid) +TEST(misc, for_each_invalid) { std::string s{"a" "\xFF" "b"}; - ASSERT_ANY_THROW(unicode::forEach(s, [&] (char32_t) { })); + ASSERT_ANY_THROW(unicode::for_each(s, [&] (char32_t) { })); } -int main(int argc, char **argv) +int main(int argc, char** argv) { InitGoogleTest(&argc, argv);
--- a/unicode.cpp Mon Jan 02 20:43:34 2017 +0100 +++ b/unicode.cpp Thu Feb 02 18:07:27 2017 +0100 @@ -29,26 +29,28 @@ namespace { -const char32_t *rbsearch(char32_t c, const char32_t *t, int n, int ne) noexcept +const char32_t *rbsearch(char32_t c, const char32_t* t, int n, int ne) noexcept { - const char32_t *p; - int m; + const char32_t* p; + int m; - while (n > 1) { - m = n >> 1; - p = t + m * ne; + while (n > 1) { + m = n >> 1; + p = t + m * ne; - if (c >= p[0]) { - t = p; - n = n - m; - } else - n = m; - } + if (c >= p[0]) { + t = p; + n = n - m; + } else { + n = m; + } + } - if (n && c >= t[0]) - return t; + if (n && c >= t[0]) { + return t; + } - return nullptr; + return nullptr; } } // !namespace @@ -73,14 +75,15 @@ bool isspace(char32_t c) noexcept { - const char32_t *p; + const char32_t* p; - p = rbsearch(c, isspacer, nelem (isspacer)/2, 2); + p = rbsearch(c, isspacer, nelem (isspacer) / 2, 2); - if (p && c >= p[0] && c <= p[1]) - return true; + if (p && c >= p[0] && c <= p[1]) { + return true; + } - return false; + return false; } namespace { @@ -142,14 +145,15 @@ bool isdigit(char32_t c) noexcept { - const char32_t *p; + const char32_t* p; - p = rbsearch(c, isdigitr, nelem (isdigitr)/2, 2); + p = rbsearch(c, isdigitr, nelem (isdigitr) / 2, 2); - if (p && c >= p[0] && c <= p[1]) - return true; + if (p && c >= p[0] && c <= p[1]) { + return true; + } - return false; + return false; } namespace { @@ -705,19 +709,21 @@ bool isalpha(char32_t c) noexcept { - const char32_t *p; + const char32_t* p; + + p = rbsearch(c, isalphar, nelem (isalphar) / 2, 2); - p = rbsearch(c, isalphar, nelem (isalphar)/2, 2); - - if (p && c >= p[0] && c <= p[1]) - return true; + if (p && c >= p[0] && c <= p[1]) { + return true; + } - p = rbsearch(c, isalphas, nelem (isalphas), 1); + p = rbsearch(c, isalphas, nelem (isalphas), 1); - if (p && c == p[0]) - return true; + if (p && c == p[0]) { + return true; + } - return false; + return false; } namespace { @@ -1363,19 +1369,21 @@ bool isupper(char32_t c) noexcept { - const char32_t *p; + const char32_t* p; + + p = rbsearch(c, isupperr, nelem (isupperr) / 2, 2); - p = rbsearch(c, isupperr, nelem (isupperr)/2, 2); - - if (p && c >= p[0] && c <= p[1]) - return true; + if (p && c >= p[0] && c <= p[1]) { + return true; + } - p = rbsearch(c, isuppers, nelem (isuppers), 1); + p = rbsearch(c, isuppers, nelem (isuppers), 1); - if (p && c == p[0]) - return true; + if (p && c == p[0]) { + return true; + } - return false; + return false; } namespace { @@ -2021,19 +2029,21 @@ bool islower(char32_t c) noexcept { - const char32_t *p; + const char32_t* p; + + p = rbsearch(c, islowerr, nelem (islowerr) / 2, 2); - p = rbsearch(c, islowerr, nelem (islowerr)/2, 2); - - if (p && c >= p[0] && c <= p[1]) - return true; + if (p && c >= p[0] && c <= p[1]) { + return true; + } - p = rbsearch(c, islowers, nelem (islowers), 1); + p = rbsearch(c, islowers, nelem (islowers), 1); - if (p && c == p[0]) - return true; + if (p && c == p[0]) { + return true; + } - return false; + return false; } namespace { @@ -2632,19 +2642,21 @@ bool istitle(char32_t c) noexcept { - const char32_t *p; + const char32_t* p; + + p = rbsearch(c, istitler, nelem (istitler) / 2, 2); - p = rbsearch(c, istitler, nelem (istitler)/2, 2); - - if (p && c >= p[0] && c <= p[1]) - return true; + if (p && c >= p[0] && c <= p[1]) { + return true; + } - p = rbsearch(c, istitles, nelem (istitles), 1); + p = rbsearch(c, istitles, nelem (istitles), 1); - if (p && c == p[0]) - return true; + if (p && c == p[0]) { + return true; + } - return false; + return false; } namespace { @@ -3307,19 +3319,21 @@ char32_t toupper(char32_t c) noexcept { - const char32_t *p; + const char32_t* p; + + p = rbsearch(c, toupperr, nelem (toupperr) / 3, 3); - p = rbsearch(c, toupperr, nelem (toupperr)/3, 3); - - if (p && c >= p[0] && c <= p[1]) - return c + p[2] - 1048576; + if (p && c >= p[0] && c <= p[1]) { + return c + p[2] - 1048576; + } - p = rbsearch(c, touppers, nelem (touppers)/2, 2); + p = rbsearch(c, touppers, nelem (touppers) / 2, 2); - if (p && c == p[0]) - return c + p[1] - 1048576; + if (p && c == p[0]) { + return c + p[1] - 1048576; + } - return c; + return c; } namespace { @@ -3974,19 +3988,21 @@ char32_t tolower(char32_t c) noexcept { - const char32_t *p; + const char32_t* p; + + p = rbsearch(c, tolowerr, nelem (tolowerr) / 3, 3); - p = rbsearch(c, tolowerr, nelem (tolowerr)/3, 3); - - if (p && c >= p[0] && c <= p[1]) - return c + p[2] - 1048576; + if (p && c >= p[0] && c <= p[1]) { + return c + p[2] - 1048576; + } - p = rbsearch(c, tolowers, nelem (tolowers)/2, 2); + p = rbsearch(c, tolowers, nelem (tolowers) / 2, 2); - if (p && c == p[0]) - return c + p[1] - 1048576; + if (p && c == p[0]) { + return c + p[1] - 1048576; + } - return c; + return c; } namespace { @@ -4649,24 +4665,26 @@ char32_t totitle(char32_t c) noexcept { - const char32_t *p; + const char32_t* p; + + p = rbsearch(c, totitler, nelem (totitler) / 3, 3); - p = rbsearch(c, totitler, nelem (totitler)/3, 3); - - if (p && c >= p[0] && c <= p[1]) - return c + p[2] - 1048576; + if (p && c >= p[0] && c <= p[1]) { + return c + p[2] - 1048576; + } - p = rbsearch(c, totitles, nelem (totitles)/2, 2); + p = rbsearch(c, totitles, nelem (totitles) / 2, 2); - if (p && c == p[0]) - return c + p[1] - 1048576; + if (p && c == p[0]) { + return c + p[1] - 1048576; + } - return c; + return c; } void encode(char32_t c, char res[5]) noexcept { - switch (nbytesPoint(c)) { + switch (nbytes_point(c)) { case 1: res[0] = static_cast<char>(c); res[1] = '\0'; @@ -4694,11 +4712,11 @@ } } -void decode(char32_t &c, const char *res) noexcept +void decode(char32_t& c, const char* res) noexcept { c = 0; - switch (nbytesUtf8(res[0])) { + switch (nbytes_utf8(res[0])) { case 1: c = res[0]; break; @@ -4721,55 +4739,64 @@ } } -int nbytesUtf8(char c) noexcept +int nbytes_utf8(char c) noexcept { - if (static_cast<unsigned char>(c) <= 127) + if (static_cast<unsigned char>(c) <= 127) { return 1; - if ((c & 0xE0) == 0xC0) + } + if ((c & 0xE0) == 0xC0) { return 2; - if ((c & 0xF0) == 0xE0) + } + if ((c & 0xF0) == 0xE0) { return 3; - if ((c & 0xF8) == 0xF0) + } + if ((c & 0xF8) == 0xF0) { return 4; + } return -1; } -int nbytesPoint(char32_t c) noexcept +int nbytes_point(char32_t c) noexcept { - if (c <= 0x7F) + if (c <= 0x7F) { return 1; - if (c <= 0x7FF) + } + if (c <= 0x7FF) { return 2; - if (c <= 0xFFFF) + } + if (c <= 0xFFFF) { return 3; - if (c <= 0x1FFFFF) + } + if (c <= 0x1FFFFF) { return 4; + } return -1; } -unsigned length(const std::string &str) +unsigned length(const std::string& str) { unsigned total = 0; - forEach(str, [&] (char32_t) { + for_each(str, [&] (char32_t) { ++ total; }); return total; } -std::string toUtf8(const std::u32string &array) +std::string to_utf8(const std::u32string& array) { std::string res; for (size_t i = 0; i < array.size(); ++i) { char tmp[5]; - int size = nbytesPoint(array[i]); + int size = nbytes_point(array[i]); - if (size < 0) + if (size < 0) { throw std::invalid_argument("invalid sequence"); + } encode(array[i], tmp); res.insert(res.length(), tmp); @@ -4778,11 +4805,11 @@ return res; } -std::u32string toUtf32(const std::string &str) +std::u32string to_utf32(const std::string& str) { std::u32string res; - forEach(str, [&] (char32_t code) { + for_each(str, [&] (char32_t code) { res.push_back(code); });
--- a/unicode.hpp Mon Jan 02 20:43:34 2017 +0100 +++ b/unicode.hpp Thu Feb 02 18:07:27 2017 +0100 @@ -31,7 +31,8 @@ * * ## Export macros * - * You must define `UNICODE_DLL` globally and `UNICODE_BUILDING_DLL` when compiling the library if you want a DLL, alternatively you can provide + * You must define `UNICODE_DLL` globally and `UNICODE_BUILDING_DLL` when + * compiling the library if you want a DLL, alternatively you can provide * your own `UNICODE_EXPORT` macro instead. */ @@ -81,7 +82,7 @@ * \param c the code point destination * \param res the multibyte string. */ -UNICODE_EXPORT void decode(char32_t &c, const char *res) noexcept; +UNICODE_EXPORT void decode(char32_t& c, const char* res) noexcept; /** * Get the number of bytes for the first multi byte character from a @@ -93,7 +94,7 @@ * \param c the first multi byte character * \return the number of bytes [1-4] or -1 if invalid */ -UNICODE_EXPORT int nbytesUtf8(char c) noexcept; +UNICODE_EXPORT int nbytes_utf8(char c) noexcept; /** * Get the number of bytes for the unicode point. @@ -101,7 +102,7 @@ * \param point the unicode point * \return the number of bytes [1-4] or -1 if invalid */ -UNICODE_EXPORT int nbytesPoint(char32_t point) noexcept; +UNICODE_EXPORT int nbytes_point(char32_t point) noexcept; /** * Get real number of character in a string. @@ -110,7 +111,7 @@ * \return the length * \throw std::invalid_argument on invalid sequence */ -UNICODE_EXPORT unsigned length(const std::string &str); +UNICODE_EXPORT unsigned length(const std::string& str); /** * Iterate over all real characters in the UTF-8 string. @@ -123,14 +124,15 @@ * \throw std::invalid_argument on invalid sequence */ template <typename Func> -void forEach(const std::string &str, Func function) +void for_each(const std::string& str, Func function) { for (size_t i = 0; i < str.size(); ) { char32_t point = 0; - int size = nbytesUtf8(str[i]); + int size = nbytes_utf8(str[i]); - if (size < 0) + if (size < 0) { throw std::invalid_argument("invalid sequence"); + } decode(point, str.data() + i); function(point); @@ -146,7 +148,7 @@ * \return the UTF-8 string * \throw std::invalid_argument on invalid sequence */ -UNICODE_EXPORT std::string toUtf8(const std::u32string &array); +UNICODE_EXPORT std::string to_utf8(const std::u32string& array); /** * Convert a UTF-8 string to UTF-32 string. @@ -155,7 +157,7 @@ * \return the UTF-32 string * \throw std::invalid_argument on invalid sequence */ -UNICODE_EXPORT std::u32string toUtf32(const std::string &str); +UNICODE_EXPORT std::u32string to_utf32(const std::string& str); /** * Check if the unicode character is space. @@ -237,8 +239,9 @@ */ inline std::u32string toupper(std::u32string str) { - for (size_t i = 0; i < str.size(); ++i) + for (size_t i = 0; i < str.size(); ++i) { str[i] = toupper(str[i]); + } return str; } @@ -250,12 +253,12 @@ * \return the upper case string * \warning very slow at the moment */ -inline std::string toupper(const std::string &str) +inline std::string toupper(const std::string& str) { std::string result; char buffer[5]; - forEach(str, [&] (char32_t code) { + for_each(str, [&] (char32_t code) { encode(toupper(code), buffer); result += buffer; }); @@ -271,8 +274,9 @@ */ inline std::u32string tolower(std::u32string str) { - for (size_t i = 0; i < str.size(); ++i) + for (size_t i = 0; i < str.size(); ++i) { str[i] = tolower(str[i]); + } return str; } @@ -284,12 +288,12 @@ * \return the lower case string * \warning very slow at the moment */ -inline std::string tolower(const std::string &str) +inline std::string tolower(const std::string& str) { std::string result; char buffer[5]; - forEach(str, [&] (char32_t code) { + for_each(str, [&] (char32_t code) { encode(tolower(code), buffer); result += buffer; });