0
|
1 /* |
|
2 * Tool to create our unicode.cpp and unicode.h file. |
|
3 * |
|
4 * Current version: 7.0.0 |
|
5 * |
|
6 * Based on mkrunetype from the Go language. |
|
7 * |
|
8 * Adapted to generated C++ code. |
|
9 */ |
|
10 |
|
11 // Copyright 2009 The Go Authors. All rights reserved. |
|
12 // Use of this source code is governed by a BSD-style |
|
13 // license that can be found in the LICENSE file. |
|
14 |
|
15 /* |
|
16 * make is(upper|lower|title|space|alpha)rune and |
|
17 * to(upper|lower|title)rune from a UnicodeData.txt file. |
|
18 * these can be found at unicode.org |
|
19 * |
|
20 * with -c, runs a check of the existing runetype functions vs. |
|
21 * those extracted from UnicodeData. |
|
22 * |
|
23 * with -p, generates tables for pairs of chars, as well as for ranges |
|
24 * and singletons. |
|
25 * |
|
26 * UnicodeData defines 4 fields of interest: |
|
27 * 1) a category |
|
28 * 2) an upper case mapping |
|
29 * 3) a lower case mapping |
|
30 * 4) a title case mapping |
|
31 * |
|
32 * toupper, tolower, and totitle are defined directly from the mapping. |
|
33 * |
|
34 * isalpharune(c) is true iff c is a "letter" category |
|
35 * isupperrune(c) is true iff c is the target of toupperrune, |
|
36 * or is in the uppercase letter category |
|
37 * similarly for islowerrune and istitlerune. |
|
38 * isspacerune is true for space category chars, "C" locale white space chars, |
|
39 * and two additions: |
|
40 * 0085 "next line" control char |
|
41 * feff] "zero-width non-break space" |
|
42 * isdigitrune is true iff c is a numeric-digit category. |
|
43 */ |
|
44 |
|
45 #include <stdarg.h> |
|
46 #include <stdio.h> |
|
47 #include <stdlib.h> |
|
48 #include <string.h> |
|
49 |
|
50 #include "utf.h" |
|
51 #include "utfdef.h" |
|
52 |
|
53 #define nelem(x) (sizeof(x)/sizeof((x)[0])) |
|
54 |
|
55 enum { |
|
56 /* |
|
57 * fields in the unicode data file |
|
58 */ |
|
59 FIELD_CODE, |
|
60 FIELD_NAME, |
|
61 FIELD_CATEGORY, |
|
62 FIELD_COMBINING, |
|
63 FIELD_BIDIR, |
|
64 FIELD_DECOMP, |
|
65 FIELD_DECIMAL_DIG, |
|
66 FIELD_DIG, |
|
67 FIELD_NUMERIC_VAL, |
|
68 FIELD_MIRRORED, |
|
69 FIELD_UNICODE_1_NAME, |
|
70 FIELD_COMMENT, |
|
71 FIELD_UPPER, |
|
72 FIELD_LOWER, |
|
73 FIELD_TITLE, |
|
74 NFIELDS, |
|
75 |
|
76 MAX_LINE = 1024, |
|
77 |
|
78 TO_OFFSET = 1 << 20, |
|
79 |
|
80 NRUNES = 1 << 21, |
|
81 }; |
|
82 |
|
83 #define TO_DELTA(xmapped,x) (TO_OFFSET + (xmapped) - (x)) |
|
84 |
|
85 static FILE *out; |
|
86 static char myisspace[NRUNES]; |
|
87 static char myisalpha[NRUNES]; |
|
88 static char myisdigit[NRUNES]; |
|
89 static char myisupper[NRUNES]; |
|
90 static char myislower[NRUNES]; |
|
91 static char myistitle[NRUNES]; |
|
92 |
|
93 static int mytoupper[NRUNES]; |
|
94 static int mytolower[NRUNES]; |
|
95 static int mytotitle[NRUNES]; |
|
96 |
|
97 static void check(void); |
|
98 static void mktables(char *src, int usepairs); |
|
99 static void fatal(const char *fmt, ...); |
|
100 static int mygetfields(char **fields, int nfields, char *str, const char *delim); |
|
101 static int getunicodeline(FILE *in, char **fields, char *buf); |
|
102 static int getcode(char *s); |
|
103 |
|
104 static void |
|
105 usage(void) |
|
106 { |
|
107 fprintf(stderr, "usage: mktables [-cp] output UnicodeData.txt\n"); |
|
108 exit(1); |
|
109 } |
|
110 |
|
111 int |
|
112 main(int argc, char *argv[]) |
|
113 { |
|
114 FILE *in; |
|
115 char buf[MAX_LINE], buf2[MAX_LINE]; |
|
116 char *fields[NFIELDS + 1], *fields2[NFIELDS + 1]; |
|
117 char *p; |
|
118 int i, code, last, usepairs; |
|
119 |
|
120 usepairs = 0; |
|
121 |
|
122 --argc; |
|
123 ++argv; |
|
124 |
|
125 if (argc != 2){ |
|
126 usage(); |
|
127 } |
|
128 |
|
129 out = fopen(argv[0], "w"); |
|
130 if (out == NULL) { |
|
131 fatal("can't open %s", argv[0]); |
|
132 } |
|
133 |
|
134 in = fopen(argv[1], "r"); |
|
135 |
|
136 if (in == NULL) { |
|
137 fatal("can't open %s", argv[1]); |
|
138 } |
|
139 |
|
140 for(i = 0; i < NRUNES; i++){ |
|
141 mytoupper[i] = i; |
|
142 mytolower[i] = i; |
|
143 mytotitle[i] = i; |
|
144 } |
|
145 |
|
146 /* |
|
147 * make sure isspace has all of the "C" locale whitespace chars |
|
148 */ |
|
149 myisspace['\t'] = 1; |
|
150 myisspace['\n'] = 1; |
|
151 myisspace['\r'] = 1; |
|
152 myisspace['\f'] = 1; |
|
153 myisspace['\v'] = 1; |
|
154 |
|
155 /* |
|
156 * a couple of other exceptions |
|
157 */ |
|
158 myisspace[0x85] = 1; /* control char, "next line" */ |
|
159 myisspace[0xfeff] = 1; /* zero-width non-break space */ |
|
160 |
|
161 last = -1; |
|
162 while(getunicodeline(in, fields, buf)){ |
|
163 code = getcode(fields[FIELD_CODE]); |
|
164 if (code >= NRUNES) |
|
165 fatal("code-point value too big: %x", code); |
|
166 if(code <= last) |
|
167 fatal("bad code sequence: %x then %x", last, code); |
|
168 last = code; |
|
169 |
|
170 /* |
|
171 * check for ranges |
|
172 */ |
|
173 p = fields[FIELD_CATEGORY]; |
|
174 if(strstr(fields[FIELD_NAME], ", First>") != NULL){ |
|
175 if(!getunicodeline(in, fields2, buf2)) |
|
176 fatal("range start at eof"); |
|
177 if (strstr(fields2[FIELD_NAME], ", Last>") == NULL) |
|
178 fatal("range start not followed by range end"); |
|
179 last = getcode(fields2[FIELD_CODE]); |
|
180 if(last <= code) |
|
181 fatal("range out of sequence: %x then %x", code, last); |
|
182 if(strcmp(p, fields2[FIELD_CATEGORY]) != 0) |
|
183 fatal("range with mismatched category"); |
|
184 } |
|
185 |
|
186 /* |
|
187 * set properties and conversions |
|
188 */ |
|
189 for (; code <= last; code++){ |
|
190 if(p[0] == 'L') |
|
191 myisalpha[code] = 1; |
|
192 if(p[0] == 'Z') |
|
193 myisspace[code] = 1; |
|
194 |
|
195 if(strcmp(p, "Lu") == 0) |
|
196 myisupper[code] = 1; |
|
197 if(strcmp(p, "Ll") == 0) |
|
198 myislower[code] = 1; |
|
199 |
|
200 if(strcmp(p, "Lt") == 0) |
|
201 myistitle[code] = 1; |
|
202 |
|
203 if(strcmp(p, "Nd") == 0) |
|
204 myisdigit[code] = 1; |
|
205 |
|
206 /* |
|
207 * when finding conversions, also need to mark |
|
208 * upper/lower case, since some chars, like |
|
209 * "III" (0x2162), aren't defined as letters but have a |
|
210 * lower case mapping ("iii" (0x2172)). |
|
211 */ |
|
212 if(fields[FIELD_UPPER][0] != '\0'){ |
|
213 mytoupper[code] = getcode(fields[FIELD_UPPER]); |
|
214 } |
|
215 if(fields[FIELD_LOWER][0] != '\0'){ |
|
216 mytolower[code] = getcode(fields[FIELD_LOWER]); |
|
217 } |
|
218 if(fields[FIELD_TITLE][0] != '\0'){ |
|
219 mytotitle[code] = getcode(fields[FIELD_TITLE]); |
|
220 } |
|
221 } |
|
222 } |
|
223 |
|
224 fclose(in); |
|
225 |
|
226 /* |
|
227 * check for codes with no totitle mapping but a toupper mapping. |
|
228 * these appear in UnicodeData-2.0.14.txt, but are almost certainly |
|
229 * erroneous. |
|
230 */ |
|
231 for(i = 0; i < NRUNES; i++){ |
|
232 if(mytotitle[i] == i |
|
233 && mytoupper[i] != i |
|
234 && !myistitle[i]) |
|
235 fprintf(stderr, "warning: code=%.4x not istitle, totitle is same, toupper=%.4x\n", i, mytoupper[i]); |
|
236 } |
|
237 |
|
238 /* |
|
239 * make sure isupper[c] is true if for some x toupper[x] == c |
|
240 * ditto for islower and istitle |
|
241 */ |
|
242 for(i = 0; i < NRUNES; i++) { |
|
243 if(mytoupper[i] != i) |
|
244 myisupper[mytoupper[i]] = 1; |
|
245 if(mytolower[i] != i) |
|
246 myislower[mytolower[i]] = 1; |
|
247 if(mytotitle[i] != i) |
|
248 myistitle[mytotitle[i]] = 1; |
|
249 } |
|
250 |
|
251 mktables(argv[0], usepairs); |
|
252 exit(0); |
|
253 } |
|
254 |
|
255 /* |
|
256 * generate a properties array for ranges, clearing those cases covered. |
|
257 * if force, generate one-entry ranges for singletons. |
|
258 */ |
|
259 static int |
|
260 mkisrange(const char* label, char* prop, int force) |
|
261 { |
|
262 int start, stop, some; |
|
263 |
|
264 /* |
|
265 * first, the ranges |
|
266 */ |
|
267 some = 0; |
|
268 for(start = 0; start < NRUNES; ) { |
|
269 if(!prop[start]){ |
|
270 start++; |
|
271 continue; |
|
272 } |
|
273 |
|
274 for(stop = start + 1; stop < NRUNES; stop++){ |
|
275 if(!prop[stop]){ |
|
276 break; |
|
277 } |
|
278 prop[stop] = 0; |
|
279 } |
|
280 if(force || stop != start + 1){ |
|
281 if(!some){ |
|
282 fprintf(out, "namespace {\n\n"); |
|
283 fprintf(out, "const char32_t is%sr[] = {\n", label); |
|
284 some = 1; |
|
285 } |
|
286 prop[start] = 0; |
|
287 fprintf(out, " 0x%.4x, 0x%.4x,\n", start, stop - 1); |
|
288 } |
|
289 |
|
290 start = stop; |
|
291 } |
|
292 if(some) { |
|
293 fprintf(out, "};\n\n"); |
|
294 fprintf(out, "} // !namespace\n\n"); |
|
295 } |
|
296 |
|
297 return some; |
|
298 } |
|
299 |
|
300 /* |
|
301 * generate a mapping array for pairs with a skip between, |
|
302 * clearing those entries covered. |
|
303 */ |
|
304 static int |
|
305 mkispair(const char *label, char *prop) |
|
306 { |
|
307 int start, stop, some; |
|
308 |
|
309 some = 0; |
|
310 for(start = 0; start + 2 < NRUNES; ) { |
|
311 if(!prop[start]){ |
|
312 start++; |
|
313 continue; |
|
314 } |
|
315 |
|
316 for(stop = start + 2; stop < NRUNES; stop += 2){ |
|
317 if(!prop[stop]){ |
|
318 break; |
|
319 } |
|
320 prop[stop] = 0; |
|
321 } |
|
322 if(stop != start + 2){ |
|
323 if(!some){ |
|
324 fprintf(out, "namespace {\n\n"); |
|
325 fprintf(out, "const char32_t is%sp[] = {\n", label); |
|
326 some = 1; |
|
327 } |
|
328 prop[start] = 0; |
|
329 fprintf(out, " 0x%.4x, 0x%.4x,\n", start, stop - 2); |
|
330 } |
|
331 |
|
332 start = stop; |
|
333 } |
|
334 if(some) { |
|
335 fprintf(out, "};\n\n"); |
|
336 fprintf(out, "} // !namespace\n\n"); |
|
337 } |
|
338 return some; |
|
339 } |
|
340 |
|
341 /* |
|
342 * generate a properties array for singletons, clearing those cases covered. |
|
343 */ |
|
344 static int |
|
345 mkissingle(const char *label, char *prop) |
|
346 { |
|
347 int start, some; |
|
348 |
|
349 some = 0; |
|
350 for(start = 0; start < NRUNES; start++) { |
|
351 if(!prop[start]){ |
|
352 continue; |
|
353 } |
|
354 |
|
355 if(!some){ |
|
356 fprintf(out, "namespace {\n\n"); |
|
357 fprintf(out, "const char32_t is%ss[] = {\n", label); |
|
358 some = 1; |
|
359 } |
|
360 prop[start] = 0; |
|
361 fprintf(out, " 0x%.4x,\n", start); |
|
362 } |
|
363 if(some) { |
|
364 fprintf(out, "};\n\n"); |
|
365 fprintf(out, "} // !namespace\n\n"); |
|
366 } |
|
367 return some; |
|
368 } |
|
369 |
|
370 /* |
|
371 * generate tables and a function for is<label>rune |
|
372 */ |
|
373 static void |
|
374 mkis(const char* label, char* prop, int usepairs) |
|
375 { |
|
376 int isr, isp, iss; |
|
377 |
|
378 isr = mkisrange(label, prop, 0); |
|
379 isp = 0; |
|
380 if(usepairs) |
|
381 isp = mkispair(label, prop); |
|
382 iss = mkissingle(label, prop); |
|
383 |
2
|
384 fprintf(out, |
0
|
385 "bool is%s(char32_t c) noexcept\n" |
|
386 "{\n" |
2
|
387 " const char32_t* p;\n" |
0
|
388 "\n", |
|
389 label); |
|
390 |
|
391 if(isr) |
2
|
392 fprintf(out, |
|
393 " p = rbsearch(c, is%sr, nelem (is%sr) / 2, 2);\n\n" |
3
|
394 " if (p && c >= p[0] && c <= p[1])\n" |
|
395 " return true;\n", |
0
|
396 label, label); |
|
397 |
|
398 if(isp) |
2
|
399 fprintf(out, |
|
400 "\n" |
|
401 " p = rbsearch(c, is%sp, nelem (is%sp) / 2, 2);\n\n" |
3
|
402 " if (p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n" |
|
403 " return true;\n", |
0
|
404 label, label); |
|
405 |
|
406 if(iss) |
2
|
407 fprintf(out, |
|
408 "\n" |
|
409 " p = rbsearch(c, is%ss, nelem (is%ss), 1);\n\n" |
3
|
410 " if (p && c == p[0])\n" |
|
411 " return true;\n", |
0
|
412 label, label); |
|
413 |
|
414 |
2
|
415 fprintf(out, |
|
416 "\n" |
|
417 " return false;\n" |
0
|
418 "}\n" |
|
419 "\n" |
|
420 ); |
|
421 } |
|
422 |
|
423 /* |
|
424 * generate a mapping array for ranges, clearing those entries covered. |
|
425 * if force, generate one-entry ranges for singletons. |
|
426 */ |
|
427 static int |
|
428 mktorange(const char* label, int* map, int force) |
|
429 { |
|
430 int start, stop, delta, some; |
|
431 |
|
432 some = 0; |
|
433 for(start = 0; start < NRUNES; ) { |
|
434 if(map[start] == start){ |
|
435 start++; |
|
436 continue; |
|
437 } |
|
438 |
|
439 delta = TO_DELTA(map[start], start); |
|
440 if(delta != (Rune)delta) |
|
441 fatal("bad map delta %d", delta); |
|
442 for(stop = start + 1; stop < NRUNES; stop++){ |
|
443 if(TO_DELTA(map[stop], stop) != delta){ |
|
444 break; |
|
445 } |
|
446 map[stop] = stop; |
|
447 } |
|
448 if(stop != start + 1){ |
|
449 if(!some){ |
|
450 fprintf(out, "namespace {\n\n"); |
|
451 fprintf(out, "const char32_t to%sr[] = {\n", label); |
|
452 some = 1; |
|
453 } |
|
454 map[start] = start; |
|
455 fprintf(out, " 0x%.4x, 0x%.4x, %d,\n", start, stop - 1, delta); |
|
456 } |
|
457 |
|
458 start = stop; |
|
459 } |
|
460 if(some) { |
|
461 fprintf(out, "};\n\n"); |
|
462 fprintf(out, "} // !namespace\n\n"); |
|
463 } |
|
464 |
|
465 return some; |
|
466 } |
|
467 |
|
468 /* |
|
469 * generate a mapping array for pairs with a skip between, |
|
470 * clearing those entries covered. |
|
471 */ |
|
472 static int |
|
473 mktopair(const char* label, int* map) |
|
474 { |
|
475 int start, stop, delta, some; |
|
476 |
|
477 some = 0; |
|
478 for(start = 0; start + 2 < NRUNES; ) { |
|
479 if(map[start] == start){ |
|
480 start++; |
|
481 continue; |
|
482 } |
|
483 |
|
484 delta = TO_DELTA(map[start], start); |
|
485 if(delta != (Rune)delta) |
|
486 fatal("bad map delta %d", delta); |
|
487 for(stop = start + 2; stop < NRUNES; stop += 2){ |
|
488 if(TO_DELTA(map[stop], stop) != delta){ |
|
489 break; |
|
490 } |
|
491 map[stop] = stop; |
|
492 } |
|
493 if(stop != start + 2){ |
|
494 if(!some){ |
|
495 fprintf(out, "namespace {\n\n"); |
|
496 fprintf(out, "const char32_t to%sp[] = {\n", label); |
|
497 some = 1; |
|
498 } |
|
499 map[start] = start; |
|
500 fprintf(out, " 0x%.4x, 0x%.4x, %d,\n", start, stop - 2, delta); |
|
501 } |
|
502 |
|
503 start = stop; |
|
504 } |
|
505 if(some) { |
|
506 fprintf(out, "};\n\n"); |
|
507 fprintf(out, "} // !namespace\n\n"); |
|
508 } |
|
509 |
|
510 return some; |
|
511 } |
|
512 |
|
513 /* |
|
514 * generate a mapping array for singletons, clearing those entries covered. |
|
515 */ |
|
516 static int |
|
517 mktosingle(const char* label, int* map) |
|
518 { |
|
519 int start, delta, some; |
|
520 |
|
521 some = 0; |
|
522 for(start = 0; start < NRUNES; start++) { |
|
523 if(map[start] == start){ |
|
524 continue; |
|
525 } |
|
526 |
|
527 delta = TO_DELTA(map[start], start); |
|
528 if(delta != (Rune)delta) |
|
529 fatal("bad map delta %d", delta); |
|
530 if(!some){ |
|
531 fprintf(out, "namespace {\n\n"); |
|
532 fprintf(out, "const char32_t to%ss[] = {\n", label); |
|
533 some = 1; |
|
534 } |
|
535 map[start] = start; |
|
536 fprintf(out, " 0x%.4x, %d,\n", start, delta); |
|
537 } |
|
538 if(some) { |
|
539 fprintf(out, "};\n\n"); |
|
540 fprintf(out, "} // !namespace\n\n"); |
|
541 } |
|
542 |
|
543 return some; |
|
544 } |
|
545 |
|
546 /* |
|
547 * generate tables and a function for to<label>rune |
|
548 */ |
|
549 static void |
|
550 mkto(const char* label, int* map, int usepairs) |
|
551 { |
|
552 int tor, top, tos; |
|
553 |
|
554 tor = mktorange(label, map, 0); |
|
555 top = 0; |
|
556 if(usepairs) |
|
557 top = mktopair(label, map); |
|
558 tos = mktosingle(label, map); |
|
559 |
2
|
560 fprintf(out, |
0
|
561 "char32_t to%s(char32_t c) noexcept\n" |
|
562 "{\n" |
2
|
563 " const char32_t* p;\n" |
0
|
564 "\n", |
|
565 label); |
|
566 |
|
567 if(tor) |
2
|
568 fprintf(out, |
|
569 " p = rbsearch(c, to%sr, nelem (to%sr) / 3, 3);\n\n" |
3
|
570 " if (p && c >= p[0] && c <= p[1])\n" |
|
571 " return c + p[2] - %d;\n", |
0
|
572 label, label, TO_OFFSET); |
|
573 |
|
574 if(top) |
2
|
575 fprintf(out, |
|
576 "\n" |
|
577 " p = rbsearch(c, to%sp, nelem (to%sp) / 3, 3);\n\n" |
3
|
578 " if (p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n" |
|
579 " return c + p[2] - %d;\n", |
0
|
580 label, label, TO_OFFSET); |
|
581 |
|
582 if(tos) |
2
|
583 fprintf(out, |
|
584 "\n" |
|
585 " p = rbsearch(c, to%ss, nelem (to%ss) / 2, 2);\n\n" |
3
|
586 " if (p && c == p[0])\n" |
|
587 " return c + p[1] - %d;\n\n", |
0
|
588 label, label, TO_OFFSET); |
|
589 |
2
|
590 fprintf(out, |
|
591 " return c;\n" |
0
|
592 "}\n" |
|
593 "\n" |
|
594 ); |
|
595 } |
|
596 |
|
597 // Make only range tables and a function for is<label>rune. |
|
598 static void |
|
599 mkisronly(const char* label, char* prop) |
|
600 { |
|
601 mkisrange(label, prop, 1); |
2
|
602 fprintf(out, |
0
|
603 "bool is%s(char32_t c) noexcept\n" |
|
604 "{\n" |
2
|
605 " const char32_t* p;\n" |
0
|
606 "\n" |
2
|
607 " p = rbsearch(c, is%sr, nelem (is%sr) / 2, 2);\n\n" |
3
|
608 " if (p && c >= p[0] && c <= p[1])\n" |
|
609 " return true;\n\n" |
2
|
610 " return false;\n" |
0
|
611 "}\n" |
|
612 "\n", |
|
613 label, label, label); |
|
614 } |
|
615 |
|
616 /* |
|
617 * generate the body of runetype. |
|
618 * assumes there is a function Rune* rbsearch(Rune c, Rune *t, int n, int ne); |
|
619 */ |
|
620 static void |
|
621 mktables(char *src, int usepairs) |
|
622 { |
|
623 /* Add nelem macro */ |
2
|
624 fprintf(out, |
0
|
625 "#define nelem(x) (sizeof (x) / sizeof ((x)[0]))\n\n" |
|
626 ); |
|
627 |
|
628 /* Add the rbsearch function */ |
2
|
629 fprintf(out, |
0
|
630 "namespace {\n\n" |
2
|
631 "const char32_t *rbsearch(char32_t c, const char32_t* t, int n, int ne) noexcept\n" |
0
|
632 "{\n" |
2
|
633 " const char32_t* p;\n" |
|
634 " int m;\n\n" |
|
635 " while (n > 1) {\n" |
|
636 " m = n >> 1;\n" |
|
637 " p = t + m * ne;\n\n" |
|
638 " if (c >= p[0]) {\n" |
|
639 " t = p;\n" |
|
640 " n = n - m;\n" |
3
|
641 " } else\n" |
2
|
642 " n = m;\n" |
|
643 " }\n\n" |
3
|
644 " if (n && c >= t[0])\n" |
|
645 " return t;\n\n" |
2
|
646 " return nullptr;\n" |
0
|
647 "}\n\n" |
|
648 "} // !namespace\n\n" |
|
649 ); |
|
650 |
|
651 /* |
|
652 * we special case the space and digit tables, since they are assumed |
|
653 * to be small with several ranges. |
|
654 */ |
|
655 mkisronly("space", myisspace); |
|
656 mkisronly("digit", myisdigit); |
|
657 |
|
658 mkis("alpha", myisalpha, 0); |
|
659 mkis("upper", myisupper, usepairs); |
|
660 mkis("lower", myislower, usepairs); |
|
661 mkis("title", myistitle, usepairs); |
|
662 |
|
663 mkto("upper", mytoupper, usepairs); |
|
664 mkto("lower", mytolower, usepairs); |
|
665 mkto("title", mytotitle, usepairs); |
|
666 } |
|
667 |
|
668 static int |
|
669 mygetfields(char **fields, int nfields, char *str, const char *delim) |
|
670 { |
|
671 int nf; |
|
672 |
|
673 fields[0] = str; |
|
674 nf = 1; |
|
675 if(nf >= nfields) |
|
676 return nf; |
|
677 |
|
678 for(; *str; str++){ |
|
679 if(strchr(delim, *str) != NULL){ |
|
680 *str = '\0'; |
|
681 fields[nf++] = str + 1; |
|
682 if(nf >= nfields) |
|
683 break; |
|
684 } |
|
685 } |
|
686 return nf; |
|
687 } |
|
688 |
|
689 static int |
|
690 getunicodeline(FILE *in, char **fields, char *buf) |
|
691 { |
|
692 char *p; |
|
693 |
|
694 if(fgets(buf, MAX_LINE, in) == NULL) |
|
695 return 0; |
|
696 |
|
697 p = strchr(buf, '\n'); |
|
698 if (p == NULL) |
|
699 fatal("line too long"); |
|
700 *p = '\0'; |
|
701 |
|
702 if (mygetfields(fields, NFIELDS + 1, buf, ";") != NFIELDS) |
|
703 fatal("bad number of fields"); |
|
704 |
|
705 return 1; |
|
706 } |
|
707 |
|
708 static int |
|
709 getcode(char *s) |
|
710 { |
|
711 int i, code; |
|
712 |
|
713 code = 0; |
|
714 i = 0; |
|
715 /* Parse a hex number */ |
|
716 while(s[i]) { |
|
717 code <<= 4; |
|
718 if(s[i] >= '0' && s[i] <= '9') |
|
719 code += s[i] - '0'; |
|
720 else if(s[i] >= 'A' && s[i] <= 'F') |
|
721 code += s[i] - 'A' + 10; |
|
722 else |
|
723 fatal("bad code char '%c'", s[i]); |
|
724 i++; |
|
725 } |
|
726 return code; |
|
727 } |
|
728 |
|
729 static void |
|
730 fatal(const char *fmt, ...) |
|
731 { |
|
732 va_list arg; |
|
733 |
|
734 fprintf(stderr, "mkunicode: fatal error: "); |
|
735 va_start(arg, fmt); |
|
736 vfprintf(stderr, fmt, arg); |
|
737 va_end(arg); |
|
738 fprintf(stderr, "\n"); |
|
739 |
|
740 exit(1); |
|
741 } |