Mercurial > libunicode
changeset 24:23ceab03a393 1.0.0
unicode: remove uni_totitle
While here, add more consts.
author | David Demelier <markand@malikania.fr> |
---|---|
date | Wed, 23 Mar 2022 13:17:10 +0100 |
parents | 4983392b356f |
children | a70dbbce88eb |
files | gen/mkutf.awk libunicode.3 tests/test-unicode.c unicode.c unicode.h |
diffstat | 5 files changed, 47 insertions(+), 85 deletions(-) [+] |
line wrap: on
line diff
--- a/gen/mkutf.awk Wed Mar 23 11:46:41 2022 +0100 +++ b/gen/mkutf.awk Wed Mar 23 13:17:10 2022 +0100 @@ -81,7 +81,7 @@ $3 == "Cc" { cntrlv[cntrlc++] = $1; } $3 == "Lu" { upperv[upperc++] = $1; tolowerv[uppercc++] = ($14 == "") ? $1 : $14; } $3 == "Ll" { lowerv[lowerc++] = $1; toupperv[lowercc++] = ($13 == "") ? $1 : $13; } -$3 == "Lt" { titlev[titlec++] = $1; totitlev[lotitlecc++] = ($13 == "") ? $1 : $13;} +$3 == "Lt" { titlev[titlec++] = $1; } $3 == "Nd" { digitv[digitc++] = $1; } END { @@ -90,7 +90,7 @@ mkis("control", cntrlv, cntrlc, q, ""); mkis("upper", upperv, upperc, tolowerv, "lower"); mkis("lower", lowerv, lowerc, toupperv, "upper"); - mkis("title", titlev, titlec, totitlev, "title"); + mkis("title", titlev, titlec, q, ""); mkis("digit", digitv, digitc, q, ""); } @@ -104,7 +104,7 @@ return x; } -# generate 'uni_is<name>' unicode lookup function +# generate 'is<name>' unicode lookup function function mkis(name, runev, runec, casev, casename) { rune1c = 0; rune2c = 0; @@ -189,7 +189,7 @@ #generate list of laces 1 if(rune3c > 0) { - print "static uint32_t "name"3[][2] = {"; + print "static const uint32_t "name"3[][2] = {"; for(j = 0; j < rune3c; j++) { print "\t{ 0x"rune3v0[j]", 0x"rune3v1[j]" },"; } @@ -198,7 +198,7 @@ #generate list of laces 2 if(rune4c > 0) { - print "static uint32_t "name"4[][2] = {"; + print "static const uint32_t "name"4[][2] = {"; for(j = 0; j < rune4c; j++) { print "\t{ 0x"rune4v0[j]", 0x"rune4v1[j]" },"; } @@ -208,12 +208,12 @@ # generate list of ranges if(rune2c > 0) { if(length(casev) > 0) { - print "static uint32_t "name"2[][3] = {"; + print "static const uint32_t "name"2[][3] = {"; for(j = 0; j < rune2c; j++) { print "\t{ 0x"rune2v0[j]", 0x"rune2v1[j]", 0x"case2v[j]" },"; } } else { - print "static uint32_t "name"2[][2] = {" + print "static const uint32_t "name"2[][2] = {" for(j = 0; j < rune2c; j++) { print "\t{ 0x"rune2v0[j]", 0x"rune2v1[j]" },"; } @@ -224,12 +224,12 @@ # generate list of singletons if(rune1c > 0) { if(length(casev) > 0) { - print "static uint32_t "name"1[][2] = {"; + print "static const uint32_t "name"1[][2] = {"; for(j = 0; j < rune1c; j++) { print "\t{ 0x"rune1v[j]", 0x"case1v[j]" },"; } } else { - print "static uint32_t "name"1[] = {"; + print "static const uint32_t "name"1[] = {"; for(j = 0; j < rune1c; j++) { print "\t0x"rune1v[j]","; } @@ -260,25 +260,21 @@ if(length(casev) > 0) { print "uint32_t\nuni_to"casename"(uint32_t r)\n{\n\tuint32_t *match;\n"; if(rune4c > 0) { - print "\tmatch = bsearch(&r, "name"4, nelem("name"4), sizeof *"name"4, &cmp2);\n"; - print "\tif (match)"; + print "\tif ((match = bsearch(&r, "name"4, nelem("name"4), sizeof *"name"4, &cmp2)))\n"; print "\t\treturn ((r - match[0]) % 2) ? r : r - 1;"; } if(rune3c > 0) { - print "\tmatch = bsearch(&r, "name"3, nelem("name"3), sizeof *"name"3, &cmp2);\n"; - print "\tif (match)"; + print "\tif ((match = bsearch(&r, "name"3, nelem("name"3), sizeof *"name"3, &cmp2)))\n"; print "\t\treturn ((r - match[0]) % 2) ? r : r + 1;"; } if(rune2c > 0) { - print "\tmatch = bsearch(&r, "name"2, nelem("name"2), sizeof *"name"2, &cmp2);\n"; - print "\tif (match)"; + print "\tif ((match = bsearch(&r, "name"2, nelem("name"2), sizeof *"name"2, &cmp2)))\n"; print "\t\treturn match[2] + (r - match[0]);"; } if(rune1c > 0) { - print "\tmatch = bsearch(&r, "name"1, nelem("name"1), sizeof *"name"1, &cmp1);\n"; - print "\tif (match)"; + print "\tif ((match = bsearch(&r, "name"1, nelem("name"1), sizeof *"name"1, &cmp1)))\n"; print "\t\treturn match[1];"; } - print "\treturn r;\n}\n"; + print "\n\treturn r;\n}\n"; } }
--- a/libunicode.3 Wed Mar 23 11:46:41 2022 +0100 +++ b/libunicode.3 Wed Mar 23 13:17:10 2022 +0100 @@ -59,8 +59,6 @@ .Fn uni_toupper "uint32_t c" .Ft uint32_t .Fn uni_tolower "uint32_t c" -.Ft uint32_t -.Fn uni_totitle "uint32_t c" .\" DESCRIPTION .Sh DESCRIPTION This set of functions allows back-and-forth conversions between UTF-8 and @@ -208,10 +206,6 @@ returns the lower case variant of the unicode character .Fa c . .Pp -The -.Fn uni_totitle -returns the title case variant of the unicode character -.Fa c . .\" RETURN VALUES .Sh RETURN VALUES The
--- a/tests/test-unicode.c Wed Mar 23 11:46:41 2022 +0100 +++ b/tests/test-unicode.c Wed Mar 23 13:17:10 2022 +0100 @@ -322,11 +322,6 @@ RX_REQUIRE(uni_isupper(U'É')); } -RX_TEST_CASE(misc, toupper) -{ - RX_INT_REQUIRE_EQUAL(uni_totitle(U's'), 'S'); -} - int main(int argc, char **argv) {
--- a/unicode.c Wed Mar 23 11:46:41 2022 +0100 +++ b/unicode.c Wed Mar 23 13:17:10 2022 +0100 @@ -43,7 +43,7 @@ return r - p[0]; } -static uint32_t alpha3[][2] = { +static const uint32_t alpha3[][2] = { { 0x00D6, 0x00D8 }, { 0x00F6, 0x00F8 }, { 0x02EC, 0x02EE }, @@ -238,7 +238,7 @@ { 0x1EEA9, 0x1EEAB }, }; -static uint32_t alpha2[][2] = { +static const uint32_t alpha2[][2] = { { 0x0041, 0x005A }, { 0x0061, 0x007A }, { 0x00C0, 0x00D6 }, @@ -745,7 +745,7 @@ { 0x2F800, 0x2FA1D }, }; -static uint32_t alpha1[] = { +static const uint32_t alpha1[] = { 0x00AA, 0x00B5, 0x00BA, @@ -856,14 +856,14 @@ return 0; } -static uint32_t space2[][2] = { +static const uint32_t space2[][2] = { { 0x0009, 0x000D }, { 0x001C, 0x0020 }, { 0x2000, 0x200A }, { 0x2028, 0x2029 }, }; -static uint32_t space1[] = { +static const uint32_t space1[] = { 0x0085, 0x00A0, 0x1680, @@ -883,7 +883,7 @@ return 0; } -static uint32_t control2[][2] = { +static const uint32_t control2[][2] = { { 0x0000, 0x001F }, { 0x007F, 0x009F }, }; @@ -897,7 +897,7 @@ return 0; } -static uint32_t upper3[][2] = { +static const uint32_t upper3[][2] = { { 0x0100, 0x012E }, { 0x0132, 0x0136 }, { 0x0139, 0x0147 }, @@ -935,7 +935,7 @@ { 0xA7D6, 0xA7D8 }, }; -static uint32_t upper2[][3] = { +static const uint32_t upper2[][3] = { { 0x0041, 0x005A, 0x0061 }, { 0x00C0, 0x00D6, 0x00E0 }, { 0x00D8, 0x00DE, 0x00F8 }, @@ -1018,7 +1018,7 @@ { 0x1E900, 0x1E921, 0x1E922 }, }; -static uint32_t upper1[][2] = { +static const uint32_t upper1[][2] = { { 0x0130, 0x0069 }, { 0x0178, 0x00FF }, { 0x0181, 0x0253 }, @@ -1146,22 +1146,20 @@ { uint32_t *match; - match = bsearch(&r, upper3, nelem(upper3), sizeof *upper3, &cmp2); + if ((match = bsearch(&r, upper3, nelem(upper3), sizeof *upper3, &cmp2))) - if (match) return ((r - match[0]) % 2) ? r : r + 1; - match = bsearch(&r, upper2, nelem(upper2), sizeof *upper2, &cmp2); + if ((match = bsearch(&r, upper2, nelem(upper2), sizeof *upper2, &cmp2))) - if (match) return match[2] + (r - match[0]); - match = bsearch(&r, upper1, nelem(upper1), sizeof *upper1, &cmp1); + if ((match = bsearch(&r, upper1, nelem(upper1), sizeof *upper1, &cmp1))) - if (match) return match[1]; + return r; } -static uint32_t lower4[][2] = { +static const uint32_t lower4[][2] = { { 0x0101, 0x012F }, { 0x0133, 0x0137 }, { 0x013A, 0x0148 }, @@ -1199,7 +1197,7 @@ { 0xA7D7, 0xA7D9 }, }; -static uint32_t lower2[][3] = { +static const uint32_t lower2[][3] = { { 0x0061, 0x007A, 0x0041 }, { 0x00E0, 0x00F6, 0x00C0 }, { 0x00F8, 0x00FE, 0x00D8 }, @@ -1317,7 +1315,7 @@ { 0x1E922, 0x1E943, 0x1E900 }, }; -static uint32_t lower1[][2] = { +static const uint32_t lower1[][2] = { { 0x00B5, 0x039C }, { 0x00DF, 0x00DF }, { 0x00FF, 0x0178 }, @@ -1500,35 +1498,33 @@ { uint32_t *match; - match = bsearch(&r, lower4, nelem(lower4), sizeof *lower4, &cmp2); + if ((match = bsearch(&r, lower4, nelem(lower4), sizeof *lower4, &cmp2))) - if (match) return ((r - match[0]) % 2) ? r : r - 1; - match = bsearch(&r, lower2, nelem(lower2), sizeof *lower2, &cmp2); + if ((match = bsearch(&r, lower2, nelem(lower2), sizeof *lower2, &cmp2))) - if (match) return match[2] + (r - match[0]); - match = bsearch(&r, lower1, nelem(lower1), sizeof *lower1, &cmp1); + if ((match = bsearch(&r, lower1, nelem(lower1), sizeof *lower1, &cmp1))) - if (match) return match[1]; + return r; } -static uint32_t title2[][3] = { - { 0x1F88, 0x1F8F, 0x1F88 }, - { 0x1F98, 0x1F9F, 0x1F98 }, - { 0x1FA8, 0x1FAF, 0x1FA8 }, +static const uint32_t title2[][2] = { + { 0x1F88, 0x1F8F }, + { 0x1F98, 0x1F9F }, + { 0x1FA8, 0x1FAF }, }; -static uint32_t title1[][2] = { - { 0x01C5, 0x01C4 }, - { 0x01C8, 0x01C7 }, - { 0x01CB, 0x01CA }, - { 0x01F2, 0x01F1 }, - { 0x1FBC, 0x1FBC }, - { 0x1FCC, 0x1FCC }, - { 0x1FFC, 0x1FFC }, +static const uint32_t title1[] = { + 0x01C5, + 0x01C8, + 0x01CB, + 0x01F2, + 0x1FBC, + 0x1FCC, + 0x1FFC, }; int @@ -1542,23 +1538,7 @@ return 0; } -uint32_t -uni_totitle(uint32_t r) -{ - uint32_t *match; - - match = bsearch(&r, title2, nelem(title2), sizeof *title2, &cmp2); - - if (match) - return match[2] + (r - match[0]); - match = bsearch(&r, title1, nelem(title1), sizeof *title1, &cmp1); - - if (match) - return match[1]; - return r; -} - -static uint32_t digit2[][2] = { +static const uint32_t digit2[][2] = { { 0x0030, 0x0039 }, { 0x0660, 0x0669 }, { 0x06F0, 0x06F9 },