changeset 24:23ceab03a393 1.0.0

unicode: remove uni_totitle While here, add more consts.
author David Demelier <markand@malikania.fr>
date Wed, 23 Mar 2022 13:17:10 +0100
parents 4983392b356f
children a70dbbce88eb
files gen/mkutf.awk libunicode.3 tests/test-unicode.c unicode.c unicode.h
diffstat 5 files changed, 47 insertions(+), 85 deletions(-) [+]
line wrap: on
line diff
--- a/gen/mkutf.awk	Wed Mar 23 11:46:41 2022 +0100
+++ b/gen/mkutf.awk	Wed Mar 23 13:17:10 2022 +0100
@@ -81,7 +81,7 @@
 $3 == "Cc" { cntrlv[cntrlc++] = $1; }
 $3 == "Lu" { upperv[upperc++] = $1; tolowerv[uppercc++] = ($14 == "") ? $1 : $14; }
 $3 == "Ll" { lowerv[lowerc++] = $1; toupperv[lowercc++] = ($13 == "") ? $1 : $13; }
-$3 == "Lt" { titlev[titlec++] = $1; totitlev[lotitlecc++] = ($13 == "") ? $1 : $13;}
+$3 == "Lt" { titlev[titlec++] = $1; }
 $3 == "Nd" { digitv[digitc++] = $1; }
 
 END {
@@ -90,7 +90,7 @@
 	mkis("control", cntrlv, cntrlc, q, "");
 	mkis("upper", upperv, upperc, tolowerv, "lower");
 	mkis("lower", lowerv, lowerc, toupperv, "upper");
-	mkis("title", titlev, titlec, totitlev, "title");
+	mkis("title", titlev, titlec, q, "");
 	mkis("digit", digitv, digitc, q, "");
 }
 
@@ -104,7 +104,7 @@
 	return x;
 }
 
-# generate 'uni_is<name>' unicode lookup function
+# generate 'is<name>' unicode lookup function
 function mkis(name, runev, runec, casev, casename) {
 	rune1c = 0;
 	rune2c = 0;
@@ -189,7 +189,7 @@
 
 	#generate list of laces 1
 	if(rune3c > 0) {
-		print "static uint32_t "name"3[][2] = {";
+		print "static const uint32_t "name"3[][2] = {";
 		for(j = 0; j < rune3c; j++) {
 			print "\t{ 0x"rune3v0[j]", 0x"rune3v1[j]" },";
 		}
@@ -198,7 +198,7 @@
 
 	#generate list of laces 2
 	if(rune4c > 0) {
-		print "static uint32_t "name"4[][2] = {";
+		print "static const uint32_t "name"4[][2] = {";
 		for(j = 0; j < rune4c; j++) {
 			print "\t{ 0x"rune4v0[j]", 0x"rune4v1[j]" },";
 		}
@@ -208,12 +208,12 @@
 	# generate list of ranges
 	if(rune2c > 0) {
 		if(length(casev) > 0) {
-			print "static uint32_t "name"2[][3] = {";
+			print "static const uint32_t "name"2[][3] = {";
 			for(j = 0; j < rune2c; j++) {
 				print "\t{ 0x"rune2v0[j]", 0x"rune2v1[j]", 0x"case2v[j]" },";
 			}
 		} else {
-			print "static uint32_t "name"2[][2] = {"
+			print "static const uint32_t "name"2[][2] = {"
 			for(j = 0; j < rune2c; j++) {
 				print "\t{ 0x"rune2v0[j]", 0x"rune2v1[j]" },";
 			}
@@ -224,12 +224,12 @@
 	# generate list of singletons
 	if(rune1c > 0) {
 		if(length(casev) > 0) {
-			print "static uint32_t "name"1[][2] = {";
+			print "static const uint32_t "name"1[][2] = {";
 			for(j = 0; j < rune1c; j++) {
 				print "\t{ 0x"rune1v[j]", 0x"case1v[j]" },";
 			}
 		} else {
-			print "static uint32_t "name"1[] = {";
+			print "static const uint32_t "name"1[] = {";
 			for(j = 0; j < rune1c; j++) {
 				print "\t0x"rune1v[j]",";
 			}
@@ -260,25 +260,21 @@
 	if(length(casev) > 0) {
 		print "uint32_t\nuni_to"casename"(uint32_t r)\n{\n\tuint32_t *match;\n";
 		if(rune4c > 0) {
-			print "\tmatch = bsearch(&r, "name"4, nelem("name"4), sizeof *"name"4, &cmp2);\n";
-			print "\tif (match)";
+			print "\tif ((match = bsearch(&r, "name"4, nelem("name"4), sizeof *"name"4, &cmp2)))\n";
 			print "\t\treturn ((r - match[0]) % 2) ? r : r - 1;";
 		}
 		if(rune3c > 0) {
-			print "\tmatch = bsearch(&r, "name"3, nelem("name"3), sizeof *"name"3, &cmp2);\n";
-			print "\tif (match)";
+			print "\tif ((match = bsearch(&r, "name"3, nelem("name"3), sizeof *"name"3, &cmp2)))\n";
 			print "\t\treturn ((r - match[0]) % 2) ? r : r + 1;";
 		}
 		if(rune2c > 0) {
-			print "\tmatch = bsearch(&r, "name"2, nelem("name"2), sizeof *"name"2, &cmp2);\n";
-			print "\tif (match)";
+			print "\tif ((match = bsearch(&r, "name"2, nelem("name"2), sizeof *"name"2, &cmp2)))\n";
 			print "\t\treturn match[2] + (r - match[0]);";
 		}
 		if(rune1c > 0) {
-			print "\tmatch = bsearch(&r, "name"1, nelem("name"1), sizeof *"name"1, &cmp1);\n";
-			print "\tif (match)";
+			print "\tif ((match = bsearch(&r, "name"1, nelem("name"1), sizeof *"name"1, &cmp1)))\n";
 			print "\t\treturn match[1];";
 		}
-		print "\treturn r;\n}\n";
+		print "\n\treturn r;\n}\n";
 	}
 }
--- a/libunicode.3	Wed Mar 23 11:46:41 2022 +0100
+++ b/libunicode.3	Wed Mar 23 13:17:10 2022 +0100
@@ -59,8 +59,6 @@
 .Fn uni_toupper "uint32_t c"
 .Ft uint32_t
 .Fn uni_tolower "uint32_t c"
-.Ft uint32_t
-.Fn uni_totitle "uint32_t c"
 .\" DESCRIPTION
 .Sh DESCRIPTION
 This set of functions allows back-and-forth conversions between UTF-8 and
@@ -208,10 +206,6 @@
 returns the lower case variant of the unicode character
 .Fa c .
 .Pp
-The
-.Fn uni_totitle
-returns the title case variant of the unicode character
-.Fa c .
 .\" RETURN VALUES
 .Sh RETURN VALUES
 The
--- a/tests/test-unicode.c	Wed Mar 23 11:46:41 2022 +0100
+++ b/tests/test-unicode.c	Wed Mar 23 13:17:10 2022 +0100
@@ -322,11 +322,6 @@
 	RX_REQUIRE(uni_isupper(U'É'));
 }
 
-RX_TEST_CASE(misc, toupper)
-{
-	RX_INT_REQUIRE_EQUAL(uni_totitle(U's'), 'S');
-}
-
 int
 main(int argc, char **argv)
 {
--- a/unicode.c	Wed Mar 23 11:46:41 2022 +0100
+++ b/unicode.c	Wed Mar 23 13:17:10 2022 +0100
@@ -43,7 +43,7 @@
 		return r - p[0];
 }
 
-static uint32_t alpha3[][2] = {
+static const uint32_t alpha3[][2] = {
 	{ 0x00D6, 0x00D8 },
 	{ 0x00F6, 0x00F8 },
 	{ 0x02EC, 0x02EE },
@@ -238,7 +238,7 @@
 	{ 0x1EEA9, 0x1EEAB },
 };
 
-static uint32_t alpha2[][2] = {
+static const uint32_t alpha2[][2] = {
 	{ 0x0041, 0x005A },
 	{ 0x0061, 0x007A },
 	{ 0x00C0, 0x00D6 },
@@ -745,7 +745,7 @@
 	{ 0x2F800, 0x2FA1D },
 };
 
-static uint32_t alpha1[] = {
+static const uint32_t alpha1[] = {
 	0x00AA,
 	0x00B5,
 	0x00BA,
@@ -856,14 +856,14 @@
 	return 0;
 }
 
-static uint32_t space2[][2] = {
+static const uint32_t space2[][2] = {
 	{ 0x0009, 0x000D },
 	{ 0x001C, 0x0020 },
 	{ 0x2000, 0x200A },
 	{ 0x2028, 0x2029 },
 };
 
-static uint32_t space1[] = {
+static const uint32_t space1[] = {
 	0x0085,
 	0x00A0,
 	0x1680,
@@ -883,7 +883,7 @@
 	return 0;
 }
 
-static uint32_t control2[][2] = {
+static const uint32_t control2[][2] = {
 	{ 0x0000, 0x001F },
 	{ 0x007F, 0x009F },
 };
@@ -897,7 +897,7 @@
 	return 0;
 }
 
-static uint32_t upper3[][2] = {
+static const uint32_t upper3[][2] = {
 	{ 0x0100, 0x012E },
 	{ 0x0132, 0x0136 },
 	{ 0x0139, 0x0147 },
@@ -935,7 +935,7 @@
 	{ 0xA7D6, 0xA7D8 },
 };
 
-static uint32_t upper2[][3] = {
+static const uint32_t upper2[][3] = {
 	{ 0x0041, 0x005A, 0x0061 },
 	{ 0x00C0, 0x00D6, 0x00E0 },
 	{ 0x00D8, 0x00DE, 0x00F8 },
@@ -1018,7 +1018,7 @@
 	{ 0x1E900, 0x1E921, 0x1E922 },
 };
 
-static uint32_t upper1[][2] = {
+static const uint32_t upper1[][2] = {
 	{ 0x0130, 0x0069 },
 	{ 0x0178, 0x00FF },
 	{ 0x0181, 0x0253 },
@@ -1146,22 +1146,20 @@
 {
 	uint32_t *match;
 
-	match = bsearch(&r, upper3, nelem(upper3), sizeof *upper3, &cmp2);
+	if ((match = bsearch(&r, upper3, nelem(upper3), sizeof *upper3, &cmp2)))
 
-	if (match)
 		return ((r - match[0]) % 2) ? r : r + 1;
-	match = bsearch(&r, upper2, nelem(upper2), sizeof *upper2, &cmp2);
+	if ((match = bsearch(&r, upper2, nelem(upper2), sizeof *upper2, &cmp2)))
 
-	if (match)
 		return match[2] + (r - match[0]);
-	match = bsearch(&r, upper1, nelem(upper1), sizeof *upper1, &cmp1);
+	if ((match = bsearch(&r, upper1, nelem(upper1), sizeof *upper1, &cmp1)))
 
-	if (match)
 		return match[1];
+
 	return r;
 }
 
-static uint32_t lower4[][2] = {
+static const uint32_t lower4[][2] = {
 	{ 0x0101, 0x012F },
 	{ 0x0133, 0x0137 },
 	{ 0x013A, 0x0148 },
@@ -1199,7 +1197,7 @@
 	{ 0xA7D7, 0xA7D9 },
 };
 
-static uint32_t lower2[][3] = {
+static const uint32_t lower2[][3] = {
 	{ 0x0061, 0x007A, 0x0041 },
 	{ 0x00E0, 0x00F6, 0x00C0 },
 	{ 0x00F8, 0x00FE, 0x00D8 },
@@ -1317,7 +1315,7 @@
 	{ 0x1E922, 0x1E943, 0x1E900 },
 };
 
-static uint32_t lower1[][2] = {
+static const uint32_t lower1[][2] = {
 	{ 0x00B5, 0x039C },
 	{ 0x00DF, 0x00DF },
 	{ 0x00FF, 0x0178 },
@@ -1500,35 +1498,33 @@
 {
 	uint32_t *match;
 
-	match = bsearch(&r, lower4, nelem(lower4), sizeof *lower4, &cmp2);
+	if ((match = bsearch(&r, lower4, nelem(lower4), sizeof *lower4, &cmp2)))
 
-	if (match)
 		return ((r - match[0]) % 2) ? r : r - 1;
-	match = bsearch(&r, lower2, nelem(lower2), sizeof *lower2, &cmp2);
+	if ((match = bsearch(&r, lower2, nelem(lower2), sizeof *lower2, &cmp2)))
 
-	if (match)
 		return match[2] + (r - match[0]);
-	match = bsearch(&r, lower1, nelem(lower1), sizeof *lower1, &cmp1);
+	if ((match = bsearch(&r, lower1, nelem(lower1), sizeof *lower1, &cmp1)))
 
-	if (match)
 		return match[1];
+
 	return r;
 }
 
-static uint32_t title2[][3] = {
-	{ 0x1F88, 0x1F8F, 0x1F88 },
-	{ 0x1F98, 0x1F9F, 0x1F98 },
-	{ 0x1FA8, 0x1FAF, 0x1FA8 },
+static const uint32_t title2[][2] = {
+	{ 0x1F88, 0x1F8F },
+	{ 0x1F98, 0x1F9F },
+	{ 0x1FA8, 0x1FAF },
 };
 
-static uint32_t title1[][2] = {
-	{ 0x01C5, 0x01C4 },
-	{ 0x01C8, 0x01C7 },
-	{ 0x01CB, 0x01CA },
-	{ 0x01F2, 0x01F1 },
-	{ 0x1FBC, 0x1FBC },
-	{ 0x1FCC, 0x1FCC },
-	{ 0x1FFC, 0x1FFC },
+static const uint32_t title1[] = {
+	0x01C5,
+	0x01C8,
+	0x01CB,
+	0x01F2,
+	0x1FBC,
+	0x1FCC,
+	0x1FFC,
 };
 
 int
@@ -1542,23 +1538,7 @@
 	return 0;
 }
 
-uint32_t
-uni_totitle(uint32_t r)
-{
-	uint32_t *match;
-
-	match = bsearch(&r, title2, nelem(title2), sizeof *title2, &cmp2);
-
-	if (match)
-		return match[2] + (r - match[0]);
-	match = bsearch(&r, title1, nelem(title1), sizeof *title1, &cmp1);
-
-	if (match)
-		return match[1];
-	return r;
-}
-
-static uint32_t digit2[][2] = {
+static const uint32_t digit2[][2] = {
 	{ 0x0030, 0x0039 },
 	{ 0x0660, 0x0669 },
 	{ 0x06F0, 0x06F9 },
--- a/unicode.h	Wed Mar 23 11:46:41 2022 +0100
+++ b/unicode.h	Wed Mar 23 13:17:10 2022 +0100
@@ -80,9 +80,6 @@
 uint32_t
 uni_tolower(uint32_t);
 
-uint32_t
-uni_totitle(uint32_t);
-
 #if defined(__cplusplus)
 }
 #endif