Mercurial > libunicode
view gen/mkutf.awk @ 24:23ceab03a393 1.0.0
unicode: remove uni_totitle
While here, add more consts.
author | David Demelier <markand@malikania.fr> |
---|---|
date | Wed, 23 Mar 2022 13:17:10 +0100 |
parents | 4983392b356f |
children |
line wrap: on
line source
# # This file comes from sbase (https://git.suckless.org/sbase/file/libutf/Makefile.html) # and has been modified to match libunicode's API. # # Original license is as following: # # MIT/X Consortium License # # © 2011 Connor Lane Smith <cls@lubutu.com> # © 2011-2016 Dimitris Papastamos <sin@2f30.org> # © 2014-2016 Laslo Hunhold <dev@frign.de> # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), # to deal in the Software without restriction, including without limitation # the rights to use, copy, modify, merge, publish, distribute, sublicense, # and/or sell copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. # # Authors/contributors include: # # © 2011 Kamil Cholewiński <harry666t@gmail.com> # © 2011 Rob Pilling <robpilling@gmail.com> # © 2011 Hiltjo Posthuma <hiltjo@codemadness.org> # © 2011 pancake <pancake@youterm.com> # © 2011 Random832 <random832@fastmail.us> # © 2012 William Haddon <william@haddonthethird.net> # © 2012 Kurt H. Maier <khm@sciops.net> # © 2012 Christoph Lohmann <20h@r-36.net> # © 2012 David Galos <galosd83@students.rowan.edu> # © 2012 Robert Ransom <rransom.8774@gmail.com> # © 2013 Jakob Kramer <jakob.kramer@gmx.de> # © 2013 Anselm R Garbe <anselm@garbe.us> # © 2013 Truls Becken <truls.becken@gmail.com> # © 2013 dsp <dsp@2f30.org> # © 2013 Markus Teich <markus.teich@stusta.mhn.de> # © 2013 Jesse Ogle <jesse.p.ogle@gmail.com> # © 2013 Lorenzo Cogotti <miciamail@hotmail.it> # © 2013 Federico G. Benavento <benavento@gmail.com> # © 2013 Roberto E. Vargas Caballero <k0ga@shike2.com> # © 2013 Christian Hesse <mail@eworm.de> # © 2013 Markus Wichmann <nullplan@gmx.net> # © 2014 Silvan Jegen <s.jegen@gmail.com> # © 2014 Daniel Bainton <dpb@driftaway.org> # © 2014 Tuukka Kataja <stuge@xor.fi> # © 2014 Jeffrey Picard <jeff@jeffreypicard.com> # © 2014 Evan Gates <evan.gates@gmail.com> # © 2014 Michael Forney <mforney@mforney.org> # © 2014 Ari Malinen <ari.malinen@gmail.com> # © 2014 Brandon Mulcahy <brandon@jangler.info> # © 2014 Adria Garriga <rhaps0dy@installgentoo.com> # © 2014-2015 Greg Reagle <greg.reagle@umbc.edu> # © 2015 Tai Chi Minh Ralph Eastwood <tcmreastwood@gmail.com> # © 2015 Quentin Rameau <quinq@quinq.eu.org> # © 2015 Dionysis Grigoropoulos <info@erethon.com> # © 2015 Wolfgang Corcoran-Mathe <wcm@sigwinch.xyz> # © 2016 Mattias Andrée <maandree@kth.se> # © 2016 Eivind Uggedal <eivind@uggedal.com> # BEGIN { FS = ";" # set up hexadecimal lookup table for(i = 0; i < 16; i++) hex[sprintf("%X",i)] = i; } $3 ~ /^L/ { alphav[alphac++] = $1; } ($3 ~ /^Z/) || ($5 == "WS") || ($5 == "S") || ($5 == "B") { spacev[spacec++] = $1; } $3 == "Cc" { cntrlv[cntrlc++] = $1; } $3 == "Lu" { upperv[upperc++] = $1; tolowerv[uppercc++] = ($14 == "") ? $1 : $14; } $3 == "Ll" { lowerv[lowerc++] = $1; toupperv[lowercc++] = ($13 == "") ? $1 : $13; } $3 == "Lt" { titlev[titlec++] = $1; } $3 == "Nd" { digitv[digitc++] = $1; } END { mkis("alpha", alphav, alphac, q, ""); mkis("space", spacev, spacec, q, ""); mkis("control", cntrlv, cntrlc, q, ""); mkis("upper", upperv, upperc, tolowerv, "lower"); mkis("lower", lowerv, lowerc, toupperv, "upper"); mkis("title", titlev, titlec, q, ""); mkis("digit", digitv, digitc, q, ""); } # parse hexadecimal rune index to int function code(s) { x = 0; for(i = 1; i <= length(s); i++) { c = substr(s, i, 1); x = (x*16) + hex[c]; } return x; } # generate 'is<name>' unicode lookup function function mkis(name, runev, runec, casev, casename) { rune1c = 0; rune2c = 0; rune3c = 0; rune4c = 0; mode = 1; #sort rune groups into singletons, ranges and laces for(j = 0; j < runec; j++) { # range if(code(runev[j+1]) == code(runev[j])+1 && ((length(casev) == 0) || code(casev[j+1]) == code(casev[j])+1) && j+1 < runec) { if (mode == 2) { continue; } else if (mode == 3) { rune3v1[rune3c] = runev[j]; rune3c++; } else if (mode == 4) { rune4v1[rune4c] = runev[j]; rune4c++; } mode = 2; rune2v0[rune2c] = runev[j]; if(length(casev) > 0) { case2v[rune2c] = casev[j]; } continue; } # lace 1 if(code(runev[j+1]) == code(runev[j])+2 && ((length(casev) == 0) || (code(casev[j+1]) == code(runev[j+1])+1 && code(casev[j]) == code(runev[j])+1)) && j+1 < runec) { if (mode == 3) { continue; } else if (mode == 2) { rune2v1[rune2c] = runev[j]; rune2c++; } else if (mode == 4) { rune4v1[rune2c] = runev[j]; rune4c++; } mode = 3; rune3v0[rune3c] = runev[j]; continue; } # lace 2 if(code(runev[j+1]) == code(runev[j])+2 && ((length(casev) == 0) || (code(casev[j+1]) == code(runev[j+1])-1 && code(casev[j]) == code(runev[j])-1)) && j+1 < runec) { if (mode == 4) { continue; } else if (mode == 2) { rune2v1[rune2c] = runev[j]; rune2c++; } else if (mode == 3) { rune3v1[rune2c] = runev[j]; rune3c++; } mode = 4; rune4v0[rune4c] = runev[j]; continue; } # terminating case if (mode == 1) { rune1v[rune1c] = runev[j]; if (length(casev) > 0) { case1v[rune1c] = casev[j]; } rune1c++; } else if (mode == 2) { rune2v1[rune2c] = runev[j]; rune2c++; } else if (mode == 3) { rune3v1[rune3c] = runev[j]; rune3c++; } else { #lace 2 rune4v1[rune4c] = runev[j]; rune4c++; } mode = 1; } #generate list of laces 1 if(rune3c > 0) { print "static const uint32_t "name"3[][2] = {"; for(j = 0; j < rune3c; j++) { print "\t{ 0x"rune3v0[j]", 0x"rune3v1[j]" },"; } print "};\n"; } #generate list of laces 2 if(rune4c > 0) { print "static const uint32_t "name"4[][2] = {"; for(j = 0; j < rune4c; j++) { print "\t{ 0x"rune4v0[j]", 0x"rune4v1[j]" },"; } print "};\n"; } # generate list of ranges if(rune2c > 0) { if(length(casev) > 0) { print "static const uint32_t "name"2[][3] = {"; for(j = 0; j < rune2c; j++) { print "\t{ 0x"rune2v0[j]", 0x"rune2v1[j]", 0x"case2v[j]" },"; } } else { print "static const uint32_t "name"2[][2] = {" for(j = 0; j < rune2c; j++) { print "\t{ 0x"rune2v0[j]", 0x"rune2v1[j]" },"; } } print "};\n"; } # generate list of singletons if(rune1c > 0) { if(length(casev) > 0) { print "static const uint32_t "name"1[][2] = {"; for(j = 0; j < rune1c; j++) { print "\t{ 0x"rune1v[j]", 0x"case1v[j]" },"; } } else { print "static const uint32_t "name"1[] = {"; for(j = 0; j < rune1c; j++) { print "\t0x"rune1v[j]","; } } print "};\n"; } # generate lookup function print "int\nuni_is"name"(uint32_t r)\n{"; if(rune4c > 0 || rune3c > 0) print "\tconst uint32_t *match;\n"; if(rune4c > 0) { print "\tif ((match = bsearch(&r, "name"4, nelem("name"4), sizeof *"name"4, &cmp2)))"; print "\t\treturn !((r - match[0]) % 2);"; } if(rune3c > 0) { print "\tif ((match = bsearch(&r, "name"3, nelem("name"3), sizeof *"name"3, &cmp2)))"; print "\t\treturn !((r - match[0]) % 2);"; } if(rune2c > 0) { print "\tif (bsearch(&r, "name"2, nelem("name"2), sizeof *"name"2, &cmp2))\n\t\treturn 1;"; } if(rune1c > 0) { print "\tif (bsearch(&r, "name"1, nelem("name"1), sizeof *"name"1, &cmp1))\n\t\treturn 1;"; } print "\n\treturn 0;\n}\n"; # generate case conversion function if(length(casev) > 0) { print "uint32_t\nuni_to"casename"(uint32_t r)\n{\n\tuint32_t *match;\n"; if(rune4c > 0) { print "\tif ((match = bsearch(&r, "name"4, nelem("name"4), sizeof *"name"4, &cmp2)))\n"; print "\t\treturn ((r - match[0]) % 2) ? r : r - 1;"; } if(rune3c > 0) { print "\tif ((match = bsearch(&r, "name"3, nelem("name"3), sizeof *"name"3, &cmp2)))\n"; print "\t\treturn ((r - match[0]) % 2) ? r : r + 1;"; } if(rune2c > 0) { print "\tif ((match = bsearch(&r, "name"2, nelem("name"2), sizeof *"name"2, &cmp2)))\n"; print "\t\treturn match[2] + (r - match[0]);"; } if(rune1c > 0) { print "\tif ((match = bsearch(&r, "name"1, nelem("name"1), sizeof *"name"1, &cmp1)))\n"; print "\t\treturn match[1];"; } print "\n\treturn r;\n}\n"; } }