changeset 307:e2a8cbf2dd79

Utf8: * Add unit tests * Rename toucs -> toutf32
author David Demelier <markand@malikania.fr>
date Wed, 10 Dec 2014 10:39:38 +0100
parents 4fac25f2b251
children b4dcf2e0013e
files C++/Tests/Utf8/CMakeLists.txt C++/Tests/Utf8/main.cpp C++/Utf8.cpp C++/Utf8.h CMakeLists.txt
diffstat 5 files changed, 371 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/C++/Tests/Utf8/CMakeLists.txt	Wed Dec 10 10:39:38 2014 +0100
@@ -0,0 +1,26 @@
+#
+# CMakeLists.txt -- tests for Utf8
+#
+# Copyright (c) 2013, 2014 David Demelier <markand@malikania.fr>
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+#
+
+set(
+	SOURCES
+	${code_SOURCE_DIR}/C++/Utf8.cpp
+	${code_SOURCE_DIR}/C++/Utf8.h
+	main.cpp
+)
+
+define_test(utf8 "${SOURCES}")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/C++/Tests/Utf8/main.cpp	Wed Dec 10 10:39:38 2014 +0100
@@ -0,0 +1,327 @@
+/*
+ * main.cpp -- main test file for Utf8
+ *
+ * Copyright (c) 2013, 2014 David Demelier <markand@malikania.fr>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * /!\ Be sure to keep this file with UTF-8 encoding /!\
+ */
+
+#include <gtest/gtest.h>
+
+#include <Utf8.h>
+
+using namespace testing;
+
+/* --------------------------------------------------------
+ * Conversion UTF32 -> UTF8
+ * -------------------------------------------------------- */
+
+TEST(Conversion32to8, ascii)
+{
+	try {
+		std::u32string u32{'a', 'b', 'c'};
+		std::string s = Utf8::toutf8(u32);
+
+		ASSERT_EQ("abc", s);
+	} catch (const std::exception &ex) {
+		FAIL() << ex.what();
+	}
+}
+
+TEST(Conversion32to8, valid)
+{
+	try {
+		std::u32string u32{'a', /* é */ 233, 'c'};
+		std::string s = Utf8::toutf8(u32);
+
+		ASSERT_EQ("aéc", s);
+	} catch (const std::exception &ex) {
+		FAIL() << ex.what();
+	}
+}
+
+TEST(Conversion32to8, invalid)
+{
+	try {
+		std::u32string u32{'a', 0xFFFFFFFF, 'c'};
+		std::string s = Utf8::toutf8(u32);
+
+		FAIL() << "expected a failure";
+	} catch (const std::exception &ex) {
+		SUCCEED();
+	}
+}
+
+/* --------------------------------------------------------
+ * Conversion UTF8 -> UTF32
+ * -------------------------------------------------------- */
+
+TEST(Conversion8to32, ascii)
+{
+	try {
+		std::string s{"abc"};
+		std::u32string expected{'a', 'b', 'c'};
+		std::u32string result = Utf8::toutf32(s);
+
+		ASSERT_EQ(expected, result);
+	} catch (const std::exception &ex) {
+		FAIL() << ex.what();
+	}
+}
+
+TEST(Conversion8to32, valid)
+{
+	try {
+		std::string s{"aéc"};
+		std::u32string expected{'a', /* é */ 233, 'c'};
+		std::u32string result = Utf8::toutf32(s);
+
+		ASSERT_EQ(expected, result);
+	} catch (const std::exception &ex) {
+		FAIL() << ex.what();
+	}
+}
+
+/* --------------------------------------------------------
+ * UTF32 to upper
+ * -------------------------------------------------------- */
+
+TEST(Toupper32, ascii)
+{
+	try {
+		std::u32string u32{'a', 'b', 'c'};
+		std::u32string expected{'A', 'B', 'C'};
+		std::u32string result = Utf8::toupper(u32);
+
+		ASSERT_EQ(expected, result);
+	} catch (const std::exception &ex) {
+		FAIL() << ex.what();
+	}
+}
+
+TEST(Toupper32, valid)
+{
+	try {
+		std::u32string u32{/* ä */ 228, /* ç */ 231, /* ë */ 235};
+		std::u32string expected{/* Ä */ 196, /* Ç */ 199, /* Ë */ 203};
+		std::u32string result = Utf8::toupper(u32);
+
+		ASSERT_EQ(expected, result);
+	} catch (const std::exception &ex) {
+		FAIL() << ex.what();
+	}
+}
+
+TEST(Toupper32, invalid)
+{
+	try {
+		std::u32string u32{'a', 0xFFFFFFFF, 'b'};
+		std::u32string expected{'A', 0xFFFFFFFF, 'B'};
+		std::u32string result = Utf8::toupper(u32);
+
+		ASSERT_EQ(expected, result);
+	} catch (const std::exception &ex) {
+		FAIL() << ex.what();
+	}
+}
+
+/* --------------------------------------------------------
+ * UTF32 to lower
+ * -------------------------------------------------------- */
+
+TEST(Tolower32, ascii)
+{
+	try {
+		std::u32string u32{'A', 'B', 'C'};
+		std::u32string expected{'a', 'b', 'c'};
+		std::u32string result = Utf8::tolower(u32);
+
+		ASSERT_EQ(expected, result);
+	} catch (const std::exception &ex) {
+		FAIL() << ex.what();
+	}
+}
+
+TEST(Tolower32, valid)
+{
+	try {
+		std::u32string u32{/* Ä */ 196, /* Ç */ 199, /* Ë */ 203};
+		std::u32string expected{/* ä */ 228, /* ç */ 231, /* ë */ 235};
+		std::u32string result = Utf8::tolower(u32);
+
+		ASSERT_EQ(expected, result);
+	} catch (const std::exception &ex) {
+		FAIL() << ex.what();
+	}
+}
+
+TEST(Tolower32, invalid)
+{
+	try {
+		std::u32string u32{'A', 0xFFFFFFFF, 'B'};
+		std::u32string expected{'a', 0xFFFFFFFF, 'b'};
+		std::u32string result = Utf8::tolower(u32);
+
+		ASSERT_EQ(expected, result);
+	} catch (const std::exception &ex) {
+		FAIL() << ex.what();
+	}
+}
+
+/* --------------------------------------------------------
+ * UTF8 to upper
+ * -------------------------------------------------------- */
+
+TEST(Toupper8, ascii)
+{
+	try {
+		std::string s{"abc"};
+		std::string r = Utf8::toupper(s);
+
+		ASSERT_EQ("ABC", r);
+	} catch (const std::exception &ex) {
+		FAIL() << ex.what();
+	}
+}
+
+TEST(Toupper8, valid)
+{
+	try {
+		std::string s{"aéc"};
+		std::string r = Utf8::toupper(s);
+
+		ASSERT_EQ("AÉC", r);
+	} catch (const std::exception &ex) {
+		FAIL() << ex.what();
+	}
+}
+
+TEST(Toupper8, invalid)
+{
+	try {
+		std::string s{"a" "\xFF""b"};
+		std::string r = Utf8::toupper(s);
+
+		FAIL() << "expected a failure";
+	} catch (const std::exception &ex) {
+		SUCCEED();
+	}
+}
+
+/* --------------------------------------------------------
+ * UTF8 to lower
+ * -------------------------------------------------------- */
+
+TEST(Tolower8, ascii)
+{
+	try {
+		std::string s{"ABC"};
+		std::string r = Utf8::tolower(s);
+
+		ASSERT_EQ("abc", r);
+	} catch (const std::exception &ex) {
+		FAIL() << ex.what();
+	}
+}
+
+TEST(Tolower8, valid)
+{
+	try {
+		std::string s{"AÉC"};
+		std::string r = Utf8::tolower(s);
+
+		ASSERT_EQ("aéc", r);
+	} catch (const std::exception &ex) {
+		FAIL() << ex.what();
+	}
+}
+
+TEST(Tolower8, invalid)
+{
+	try {
+		std::string s{"A" "\xFF""B"};
+		std::string r = Utf8::tolower(s);
+
+		FAIL() << "expected a failure";
+	} catch (const std::exception &ex) {
+		SUCCEED();
+	}
+}
+
+/* --------------------------------------------------------
+ * Check functions
+ * -------------------------------------------------------- */
+
+TEST(Check, isspace)
+{
+	ASSERT_TRUE(Utf8::isspace(' '));
+	ASSERT_FALSE(Utf8::isspace(/* é */ 233));
+}
+
+TEST(Check, isletter)
+{
+	ASSERT_TRUE(Utf8::isletter(/* é */ 233));
+	ASSERT_FALSE(Utf8::isletter(/* € */ 8364));
+}
+
+TEST(Check, isupper)
+{
+	ASSERT_FALSE(Utf8::isupper('a'));
+	ASSERT_FALSE(Utf8::isupper(/* é */ 233));
+	ASSERT_TRUE(Utf8::isupper('A'));
+	ASSERT_TRUE(Utf8::isupper(/* É */ 201));
+}
+
+TEST(Check, islower)
+{
+	ASSERT_TRUE(Utf8::islower('a'));
+	ASSERT_TRUE(Utf8::islower(/* é */ 233));
+	ASSERT_FALSE(Utf8::islower('A'));
+	ASSERT_FALSE(Utf8::islower(/* É */ 201));
+}
+
+/* --------------------------------------------------------
+ * Miscellaneous
+ * -------------------------------------------------------- */
+
+TEST(Misc, nbytesPoint)
+{
+	ASSERT_EQ(1, Utf8::nbytesPoint('a'));
+	ASSERT_EQ(2, Utf8::nbytesPoint(/* é */ 233));
+	ASSERT_EQ(3, Utf8::nbytesPoint(/* € */ 8364));
+	ASSERT_EQ(4, Utf8::nbytesPoint(/* 𠀀 */ 131072));
+}
+
+TEST(Misc, nbytesUtf8)
+{
+	std::string s1{"a"};
+	std::string s2{"é"};
+	std::string s3{"€"};
+	std::string s4{"𠀀"};
+
+	ASSERT_EQ(1, Utf8::nbytesUtf8(s1[0]));
+	ASSERT_EQ(2, Utf8::nbytesUtf8(s2[0]));
+	ASSERT_EQ(3, Utf8::nbytesUtf8(s3[0]));
+	ASSERT_EQ(4, Utf8::nbytesUtf8(s4[0]));	
+}
+
+int main(int argc, char **argv)
+{
+	InitGoogleTest(&argc, argv);
+
+	return RUN_ALL_TESTS();
+}
--- a/C++/Utf8.cpp	Mon Nov 17 20:29:09 2014 +0100
+++ b/C++/Utf8.cpp	Wed Dec 10 10:39:38 2014 +0100
@@ -1,5 +1,5 @@
 /*
- * Utf8.cpp -- UTF-8 to UCS-4 conversions
+ * Utf8.cpp -- UTF-8 to UTF-32 conversions
  *
  * Copyright (c) 2013, 2014 David Demelier <markand@malikania.fr>
  *
@@ -4472,7 +4472,7 @@
 	return res;
 }
 
-std::u32string Utf8::toucs(const std::string &str)
+std::u32string Utf8::toutf32(const std::string &str)
 {
 	std::u32string res;
 
--- a/C++/Utf8.h	Mon Nov 17 20:29:09 2014 +0100
+++ b/C++/Utf8.h	Wed Dec 10 10:39:38 2014 +0100
@@ -1,5 +1,5 @@
 /*
- * Utf8.h -- UTF-8 to UCS-4 conversions
+ * Utf8.h -- UTF-8 to UTF-32 conversions
  *
  * Copyright (c) 2013, 2014 David Demelier <markand@malikania.fr>
  *
@@ -21,7 +21,7 @@
 
 /**
  * @file Utf8.h
- * @brief UTF-8 to UCS-4 conversions
+ * @brief UTF-8 to UTF-32 conversions
  */
 
 #include <cstdint>
@@ -30,7 +30,7 @@
 
 /**
  * @class Utf8
- * @brief Conversion between UTF-8 and UCS-4
+ * @brief Conversion between UTF-8 and UTF-32
  */
 class Utf8 {
 private:
@@ -65,22 +65,22 @@
 	static size_t length(const std::string &str);
 
 	/**
-	 * Convert a UCS-4 string to UTF-8 string.
+	 * Convert a UTF-32 string to UTF-8 string.
 	 *
-	 * @param array the UCS-4 string
+	 * @param array the UTF-32 string
 	 * @return the UTF-8 string
 	 * @throw std::invalid_argument on invalid sequence
 	 */
 	static std::string toutf8(const std::u32string &array);
 
 	/**
-	 * Convert a UTF-8 string to UCS-4 string.
+	 * Convert a UTF-8 string to UTF-32 string.
 	 *
 	 * @param str the UTF-8 string
-	 * @return the UCS-4 string
+	 * @return the UTF-32 string
 	 * @throw std::invalid_argument on invalid sequence
 	 */
-	static std::u32string toucs(const std::string &str);
+	static std::u32string toutf32(const std::string &str);
 
 	/**
 	 * Check if the unicode character is space.
@@ -162,11 +162,11 @@
 	 */
 	static inline std::string toupper(const std::string &str)
 	{
-		return toutf8(toupper(toucs(str)));
+		return toutf8(toupper(toutf32(str)));
 	}
 
 	/**
-	 * Convert the UCS-4 string to upper case.
+	 * Convert the UTF-32 string to upper case.
 	 *
 	 * @param str the str
 	 * @return the upper case string
@@ -189,11 +189,11 @@
 	 */
 	static inline std::string tolower(const std::string &str)
 	{
-		return toutf8(tolower(toucs(str)));
+		return toutf8(tolower(toutf32(str)));
 	}
 
 	/**
-	 * Convert the UCS-4 string to lower case.
+	 * Convert the UTF-32 string to lower case.
 	 *
 	 * @param str the str
 	 * @return the lower case string
--- a/CMakeLists.txt	Mon Nov 17 20:29:09 2014 +0100
+++ b/CMakeLists.txt	Wed Dec 10 10:39:38 2014 +0100
@@ -105,6 +105,10 @@
 	add_subdirectory(C++/Tests/TreeNode)
 endif ()
 
+if (WITH_UTF8)
+	add_subdirectory(C++/Tests/Utf8)
+endif ()
+
 if (WITH_XDG AND UNIX)
 	add_subdirectory(C++/Tests/Xdg)
 endif ()