Mercurial > code
changeset 307:e2a8cbf2dd79
Utf8:
* Add unit tests
* Rename toucs -> toutf32
author | David Demelier <markand@malikania.fr> |
---|---|
date | Wed, 10 Dec 2014 10:39:38 +0100 |
parents | 4fac25f2b251 |
children | b4dcf2e0013e |
files | C++/Tests/Utf8/CMakeLists.txt C++/Tests/Utf8/main.cpp C++/Utf8.cpp C++/Utf8.h CMakeLists.txt |
diffstat | 5 files changed, 371 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/C++/Tests/Utf8/CMakeLists.txt Wed Dec 10 10:39:38 2014 +0100 @@ -0,0 +1,26 @@ +# +# CMakeLists.txt -- tests for Utf8 +# +# Copyright (c) 2013, 2014 David Demelier <markand@malikania.fr> +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +# + +set( + SOURCES + ${code_SOURCE_DIR}/C++/Utf8.cpp + ${code_SOURCE_DIR}/C++/Utf8.h + main.cpp +) + +define_test(utf8 "${SOURCES}")
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/C++/Tests/Utf8/main.cpp Wed Dec 10 10:39:38 2014 +0100 @@ -0,0 +1,327 @@ +/* + * main.cpp -- main test file for Utf8 + * + * Copyright (c) 2013, 2014 David Demelier <markand@malikania.fr> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * /!\ Be sure to keep this file with UTF-8 encoding /!\ + */ + +#include <gtest/gtest.h> + +#include <Utf8.h> + +using namespace testing; + +/* -------------------------------------------------------- + * Conversion UTF32 -> UTF8 + * -------------------------------------------------------- */ + +TEST(Conversion32to8, ascii) +{ + try { + std::u32string u32{'a', 'b', 'c'}; + std::string s = Utf8::toutf8(u32); + + ASSERT_EQ("abc", s); + } catch (const std::exception &ex) { + FAIL() << ex.what(); + } +} + +TEST(Conversion32to8, valid) +{ + try { + std::u32string u32{'a', /* é */ 233, 'c'}; + std::string s = Utf8::toutf8(u32); + + ASSERT_EQ("aéc", s); + } catch (const std::exception &ex) { + FAIL() << ex.what(); + } +} + +TEST(Conversion32to8, invalid) +{ + try { + std::u32string u32{'a', 0xFFFFFFFF, 'c'}; + std::string s = Utf8::toutf8(u32); + + FAIL() << "expected a failure"; + } catch (const std::exception &ex) { + SUCCEED(); + } +} + +/* -------------------------------------------------------- + * Conversion UTF8 -> UTF32 + * -------------------------------------------------------- */ + +TEST(Conversion8to32, ascii) +{ + try { + std::string s{"abc"}; + std::u32string expected{'a', 'b', 'c'}; + std::u32string result = Utf8::toutf32(s); + + ASSERT_EQ(expected, result); + } catch (const std::exception &ex) { + FAIL() << ex.what(); + } +} + +TEST(Conversion8to32, valid) +{ + try { + std::string s{"aéc"}; + std::u32string expected{'a', /* é */ 233, 'c'}; + std::u32string result = Utf8::toutf32(s); + + ASSERT_EQ(expected, result); + } catch (const std::exception &ex) { + FAIL() << ex.what(); + } +} + +/* -------------------------------------------------------- + * UTF32 to upper + * -------------------------------------------------------- */ + +TEST(Toupper32, ascii) +{ + try { + std::u32string u32{'a', 'b', 'c'}; + std::u32string expected{'A', 'B', 'C'}; + std::u32string result = Utf8::toupper(u32); + + ASSERT_EQ(expected, result); + } catch (const std::exception &ex) { + FAIL() << ex.what(); + } +} + +TEST(Toupper32, valid) +{ + try { + std::u32string u32{/* ä */ 228, /* ç */ 231, /* ë */ 235}; + std::u32string expected{/* Ä */ 196, /* Ç */ 199, /* Ë */ 203}; + std::u32string result = Utf8::toupper(u32); + + ASSERT_EQ(expected, result); + } catch (const std::exception &ex) { + FAIL() << ex.what(); + } +} + +TEST(Toupper32, invalid) +{ + try { + std::u32string u32{'a', 0xFFFFFFFF, 'b'}; + std::u32string expected{'A', 0xFFFFFFFF, 'B'}; + std::u32string result = Utf8::toupper(u32); + + ASSERT_EQ(expected, result); + } catch (const std::exception &ex) { + FAIL() << ex.what(); + } +} + +/* -------------------------------------------------------- + * UTF32 to lower + * -------------------------------------------------------- */ + +TEST(Tolower32, ascii) +{ + try { + std::u32string u32{'A', 'B', 'C'}; + std::u32string expected{'a', 'b', 'c'}; + std::u32string result = Utf8::tolower(u32); + + ASSERT_EQ(expected, result); + } catch (const std::exception &ex) { + FAIL() << ex.what(); + } +} + +TEST(Tolower32, valid) +{ + try { + std::u32string u32{/* Ä */ 196, /* Ç */ 199, /* Ë */ 203}; + std::u32string expected{/* ä */ 228, /* ç */ 231, /* ë */ 235}; + std::u32string result = Utf8::tolower(u32); + + ASSERT_EQ(expected, result); + } catch (const std::exception &ex) { + FAIL() << ex.what(); + } +} + +TEST(Tolower32, invalid) +{ + try { + std::u32string u32{'A', 0xFFFFFFFF, 'B'}; + std::u32string expected{'a', 0xFFFFFFFF, 'b'}; + std::u32string result = Utf8::tolower(u32); + + ASSERT_EQ(expected, result); + } catch (const std::exception &ex) { + FAIL() << ex.what(); + } +} + +/* -------------------------------------------------------- + * UTF8 to upper + * -------------------------------------------------------- */ + +TEST(Toupper8, ascii) +{ + try { + std::string s{"abc"}; + std::string r = Utf8::toupper(s); + + ASSERT_EQ("ABC", r); + } catch (const std::exception &ex) { + FAIL() << ex.what(); + } +} + +TEST(Toupper8, valid) +{ + try { + std::string s{"aéc"}; + std::string r = Utf8::toupper(s); + + ASSERT_EQ("AÉC", r); + } catch (const std::exception &ex) { + FAIL() << ex.what(); + } +} + +TEST(Toupper8, invalid) +{ + try { + std::string s{"a" "\xFF""b"}; + std::string r = Utf8::toupper(s); + + FAIL() << "expected a failure"; + } catch (const std::exception &ex) { + SUCCEED(); + } +} + +/* -------------------------------------------------------- + * UTF8 to lower + * -------------------------------------------------------- */ + +TEST(Tolower8, ascii) +{ + try { + std::string s{"ABC"}; + std::string r = Utf8::tolower(s); + + ASSERT_EQ("abc", r); + } catch (const std::exception &ex) { + FAIL() << ex.what(); + } +} + +TEST(Tolower8, valid) +{ + try { + std::string s{"AÉC"}; + std::string r = Utf8::tolower(s); + + ASSERT_EQ("aéc", r); + } catch (const std::exception &ex) { + FAIL() << ex.what(); + } +} + +TEST(Tolower8, invalid) +{ + try { + std::string s{"A" "\xFF""B"}; + std::string r = Utf8::tolower(s); + + FAIL() << "expected a failure"; + } catch (const std::exception &ex) { + SUCCEED(); + } +} + +/* -------------------------------------------------------- + * Check functions + * -------------------------------------------------------- */ + +TEST(Check, isspace) +{ + ASSERT_TRUE(Utf8::isspace(' ')); + ASSERT_FALSE(Utf8::isspace(/* é */ 233)); +} + +TEST(Check, isletter) +{ + ASSERT_TRUE(Utf8::isletter(/* é */ 233)); + ASSERT_FALSE(Utf8::isletter(/* € */ 8364)); +} + +TEST(Check, isupper) +{ + ASSERT_FALSE(Utf8::isupper('a')); + ASSERT_FALSE(Utf8::isupper(/* é */ 233)); + ASSERT_TRUE(Utf8::isupper('A')); + ASSERT_TRUE(Utf8::isupper(/* É */ 201)); +} + +TEST(Check, islower) +{ + ASSERT_TRUE(Utf8::islower('a')); + ASSERT_TRUE(Utf8::islower(/* é */ 233)); + ASSERT_FALSE(Utf8::islower('A')); + ASSERT_FALSE(Utf8::islower(/* É */ 201)); +} + +/* -------------------------------------------------------- + * Miscellaneous + * -------------------------------------------------------- */ + +TEST(Misc, nbytesPoint) +{ + ASSERT_EQ(1, Utf8::nbytesPoint('a')); + ASSERT_EQ(2, Utf8::nbytesPoint(/* é */ 233)); + ASSERT_EQ(3, Utf8::nbytesPoint(/* € */ 8364)); + ASSERT_EQ(4, Utf8::nbytesPoint(/* 𠀀 */ 131072)); +} + +TEST(Misc, nbytesUtf8) +{ + std::string s1{"a"}; + std::string s2{"é"}; + std::string s3{"€"}; + std::string s4{"𠀀"}; + + ASSERT_EQ(1, Utf8::nbytesUtf8(s1[0])); + ASSERT_EQ(2, Utf8::nbytesUtf8(s2[0])); + ASSERT_EQ(3, Utf8::nbytesUtf8(s3[0])); + ASSERT_EQ(4, Utf8::nbytesUtf8(s4[0])); +} + +int main(int argc, char **argv) +{ + InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +}
--- a/C++/Utf8.cpp Mon Nov 17 20:29:09 2014 +0100 +++ b/C++/Utf8.cpp Wed Dec 10 10:39:38 2014 +0100 @@ -1,5 +1,5 @@ /* - * Utf8.cpp -- UTF-8 to UCS-4 conversions + * Utf8.cpp -- UTF-8 to UTF-32 conversions * * Copyright (c) 2013, 2014 David Demelier <markand@malikania.fr> * @@ -4472,7 +4472,7 @@ return res; } -std::u32string Utf8::toucs(const std::string &str) +std::u32string Utf8::toutf32(const std::string &str) { std::u32string res;
--- a/C++/Utf8.h Mon Nov 17 20:29:09 2014 +0100 +++ b/C++/Utf8.h Wed Dec 10 10:39:38 2014 +0100 @@ -1,5 +1,5 @@ /* - * Utf8.h -- UTF-8 to UCS-4 conversions + * Utf8.h -- UTF-8 to UTF-32 conversions * * Copyright (c) 2013, 2014 David Demelier <markand@malikania.fr> * @@ -21,7 +21,7 @@ /** * @file Utf8.h - * @brief UTF-8 to UCS-4 conversions + * @brief UTF-8 to UTF-32 conversions */ #include <cstdint> @@ -30,7 +30,7 @@ /** * @class Utf8 - * @brief Conversion between UTF-8 and UCS-4 + * @brief Conversion between UTF-8 and UTF-32 */ class Utf8 { private: @@ -65,22 +65,22 @@ static size_t length(const std::string &str); /** - * Convert a UCS-4 string to UTF-8 string. + * Convert a UTF-32 string to UTF-8 string. * - * @param array the UCS-4 string + * @param array the UTF-32 string * @return the UTF-8 string * @throw std::invalid_argument on invalid sequence */ static std::string toutf8(const std::u32string &array); /** - * Convert a UTF-8 string to UCS-4 string. + * Convert a UTF-8 string to UTF-32 string. * * @param str the UTF-8 string - * @return the UCS-4 string + * @return the UTF-32 string * @throw std::invalid_argument on invalid sequence */ - static std::u32string toucs(const std::string &str); + static std::u32string toutf32(const std::string &str); /** * Check if the unicode character is space. @@ -162,11 +162,11 @@ */ static inline std::string toupper(const std::string &str) { - return toutf8(toupper(toucs(str))); + return toutf8(toupper(toutf32(str))); } /** - * Convert the UCS-4 string to upper case. + * Convert the UTF-32 string to upper case. * * @param str the str * @return the upper case string @@ -189,11 +189,11 @@ */ static inline std::string tolower(const std::string &str) { - return toutf8(tolower(toucs(str))); + return toutf8(tolower(toutf32(str))); } /** - * Convert the UCS-4 string to lower case. + * Convert the UTF-32 string to lower case. * * @param str the str * @return the lower case string