Mercurial > embed
comparison jansson/src/load.c @ 0:0047655db1aa
jansson: import 2.7
author | David Demelier <markand@malikania.fr> |
---|---|
date | Wed, 24 Feb 2016 20:50:05 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0047655db1aa |
---|---|
1 /* | |
2 * Copyright (c) 2009-2014 Petri Lehtinen <petri@digip.org> | |
3 * | |
4 * Jansson is free software; you can redistribute it and/or modify | |
5 * it under the terms of the MIT license. See LICENSE for details. | |
6 */ | |
7 | |
8 #ifndef _GNU_SOURCE | |
9 #define _GNU_SOURCE | |
10 #endif | |
11 | |
12 #include <errno.h> | |
13 #include <limits.h> | |
14 #include <stdio.h> | |
15 #include <stdlib.h> | |
16 #include <string.h> | |
17 #include <assert.h> | |
18 | |
19 #include "jansson.h" | |
20 #include "jansson_private.h" | |
21 #include "strbuffer.h" | |
22 #include "utf.h" | |
23 | |
24 #define STREAM_STATE_OK 0 | |
25 #define STREAM_STATE_EOF -1 | |
26 #define STREAM_STATE_ERROR -2 | |
27 | |
28 #define TOKEN_INVALID -1 | |
29 #define TOKEN_EOF 0 | |
30 #define TOKEN_STRING 256 | |
31 #define TOKEN_INTEGER 257 | |
32 #define TOKEN_REAL 258 | |
33 #define TOKEN_TRUE 259 | |
34 #define TOKEN_FALSE 260 | |
35 #define TOKEN_NULL 261 | |
36 | |
37 /* Locale independent versions of isxxx() functions */ | |
38 #define l_isupper(c) ('A' <= (c) && (c) <= 'Z') | |
39 #define l_islower(c) ('a' <= (c) && (c) <= 'z') | |
40 #define l_isalpha(c) (l_isupper(c) || l_islower(c)) | |
41 #define l_isdigit(c) ('0' <= (c) && (c) <= '9') | |
42 #define l_isxdigit(c) \ | |
43 (l_isdigit(c) || ('A' <= (c) && (c) <= 'F') || ('a' <= (c) && (c) <= 'f')) | |
44 | |
45 /* Read one byte from stream, convert to unsigned char, then int, and | |
46 return. return EOF on end of file. This corresponds to the | |
47 behaviour of fgetc(). */ | |
48 typedef int (*get_func)(void *data); | |
49 | |
50 typedef struct { | |
51 get_func get; | |
52 void *data; | |
53 char buffer[5]; | |
54 size_t buffer_pos; | |
55 int state; | |
56 int line; | |
57 int column, last_column; | |
58 size_t position; | |
59 } stream_t; | |
60 | |
61 typedef struct { | |
62 stream_t stream; | |
63 strbuffer_t saved_text; | |
64 int token; | |
65 union { | |
66 struct { | |
67 char *val; | |
68 size_t len; | |
69 } string; | |
70 json_int_t integer; | |
71 double real; | |
72 } value; | |
73 } lex_t; | |
74 | |
75 #define stream_to_lex(stream) container_of(stream, lex_t, stream) | |
76 | |
77 | |
78 /*** error reporting ***/ | |
79 | |
80 static void error_set(json_error_t *error, const lex_t *lex, | |
81 const char *msg, ...) | |
82 { | |
83 va_list ap; | |
84 char msg_text[JSON_ERROR_TEXT_LENGTH]; | |
85 char msg_with_context[JSON_ERROR_TEXT_LENGTH]; | |
86 | |
87 int line = -1, col = -1; | |
88 size_t pos = 0; | |
89 const char *result = msg_text; | |
90 | |
91 if(!error) | |
92 return; | |
93 | |
94 va_start(ap, msg); | |
95 vsnprintf(msg_text, JSON_ERROR_TEXT_LENGTH, msg, ap); | |
96 msg_text[JSON_ERROR_TEXT_LENGTH - 1] = '\0'; | |
97 va_end(ap); | |
98 | |
99 if(lex) | |
100 { | |
101 const char *saved_text = strbuffer_value(&lex->saved_text); | |
102 | |
103 line = lex->stream.line; | |
104 col = lex->stream.column; | |
105 pos = lex->stream.position; | |
106 | |
107 if(saved_text && saved_text[0]) | |
108 { | |
109 if(lex->saved_text.length <= 20) { | |
110 snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH, | |
111 "%s near '%s'", msg_text, saved_text); | |
112 msg_with_context[JSON_ERROR_TEXT_LENGTH - 1] = '\0'; | |
113 result = msg_with_context; | |
114 } | |
115 } | |
116 else | |
117 { | |
118 if(lex->stream.state == STREAM_STATE_ERROR) { | |
119 /* No context for UTF-8 decoding errors */ | |
120 result = msg_text; | |
121 } | |
122 else { | |
123 snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH, | |
124 "%s near end of file", msg_text); | |
125 msg_with_context[JSON_ERROR_TEXT_LENGTH - 1] = '\0'; | |
126 result = msg_with_context; | |
127 } | |
128 } | |
129 } | |
130 | |
131 jsonp_error_set(error, line, col, pos, "%s", result); | |
132 } | |
133 | |
134 | |
135 /*** lexical analyzer ***/ | |
136 | |
137 static void | |
138 stream_init(stream_t *stream, get_func get, void *data) | |
139 { | |
140 stream->get = get; | |
141 stream->data = data; | |
142 stream->buffer[0] = '\0'; | |
143 stream->buffer_pos = 0; | |
144 | |
145 stream->state = STREAM_STATE_OK; | |
146 stream->line = 1; | |
147 stream->column = 0; | |
148 stream->position = 0; | |
149 } | |
150 | |
151 static int stream_get(stream_t *stream, json_error_t *error) | |
152 { | |
153 int c; | |
154 | |
155 if(stream->state != STREAM_STATE_OK) | |
156 return stream->state; | |
157 | |
158 if(!stream->buffer[stream->buffer_pos]) | |
159 { | |
160 c = stream->get(stream->data); | |
161 if(c == EOF) { | |
162 stream->state = STREAM_STATE_EOF; | |
163 return STREAM_STATE_EOF; | |
164 } | |
165 | |
166 stream->buffer[0] = c; | |
167 stream->buffer_pos = 0; | |
168 | |
169 if(0x80 <= c && c <= 0xFF) | |
170 { | |
171 /* multi-byte UTF-8 sequence */ | |
172 int i, count; | |
173 | |
174 count = utf8_check_first(c); | |
175 if(!count) | |
176 goto out; | |
177 | |
178 assert(count >= 2); | |
179 | |
180 for(i = 1; i < count; i++) | |
181 stream->buffer[i] = stream->get(stream->data); | |
182 | |
183 if(!utf8_check_full(stream->buffer, count, NULL)) | |
184 goto out; | |
185 | |
186 stream->buffer[count] = '\0'; | |
187 } | |
188 else | |
189 stream->buffer[1] = '\0'; | |
190 } | |
191 | |
192 c = stream->buffer[stream->buffer_pos++]; | |
193 | |
194 stream->position++; | |
195 if(c == '\n') { | |
196 stream->line++; | |
197 stream->last_column = stream->column; | |
198 stream->column = 0; | |
199 } | |
200 else if(utf8_check_first(c)) { | |
201 /* track the Unicode character column, so increment only if | |
202 this is the first character of a UTF-8 sequence */ | |
203 stream->column++; | |
204 } | |
205 | |
206 return c; | |
207 | |
208 out: | |
209 stream->state = STREAM_STATE_ERROR; | |
210 error_set(error, stream_to_lex(stream), "unable to decode byte 0x%x", c); | |
211 return STREAM_STATE_ERROR; | |
212 } | |
213 | |
214 static void stream_unget(stream_t *stream, int c) | |
215 { | |
216 if(c == STREAM_STATE_EOF || c == STREAM_STATE_ERROR) | |
217 return; | |
218 | |
219 stream->position--; | |
220 if(c == '\n') { | |
221 stream->line--; | |
222 stream->column = stream->last_column; | |
223 } | |
224 else if(utf8_check_first(c)) | |
225 stream->column--; | |
226 | |
227 assert(stream->buffer_pos > 0); | |
228 stream->buffer_pos--; | |
229 assert(stream->buffer[stream->buffer_pos] == c); | |
230 } | |
231 | |
232 | |
233 static int lex_get(lex_t *lex, json_error_t *error) | |
234 { | |
235 return stream_get(&lex->stream, error); | |
236 } | |
237 | |
238 static void lex_save(lex_t *lex, int c) | |
239 { | |
240 strbuffer_append_byte(&lex->saved_text, c); | |
241 } | |
242 | |
243 static int lex_get_save(lex_t *lex, json_error_t *error) | |
244 { | |
245 int c = stream_get(&lex->stream, error); | |
246 if(c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR) | |
247 lex_save(lex, c); | |
248 return c; | |
249 } | |
250 | |
251 static void lex_unget(lex_t *lex, int c) | |
252 { | |
253 stream_unget(&lex->stream, c); | |
254 } | |
255 | |
256 static void lex_unget_unsave(lex_t *lex, int c) | |
257 { | |
258 if(c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR) { | |
259 /* Since we treat warnings as errors, when assertions are turned | |
260 * off the "d" variable would be set but never used. Which is | |
261 * treated as an error by GCC. | |
262 */ | |
263 #ifndef NDEBUG | |
264 char d; | |
265 #endif | |
266 stream_unget(&lex->stream, c); | |
267 #ifndef NDEBUG | |
268 d = | |
269 #endif | |
270 strbuffer_pop(&lex->saved_text); | |
271 assert(c == d); | |
272 } | |
273 } | |
274 | |
275 static void lex_save_cached(lex_t *lex) | |
276 { | |
277 while(lex->stream.buffer[lex->stream.buffer_pos] != '\0') | |
278 { | |
279 lex_save(lex, lex->stream.buffer[lex->stream.buffer_pos]); | |
280 lex->stream.buffer_pos++; | |
281 lex->stream.position++; | |
282 } | |
283 } | |
284 | |
285 static void lex_free_string(lex_t *lex) | |
286 { | |
287 jsonp_free(lex->value.string.val); | |
288 lex->value.string.val = NULL; | |
289 lex->value.string.len = 0; | |
290 } | |
291 | |
292 /* assumes that str points to 'u' plus at least 4 valid hex digits */ | |
293 static int32_t decode_unicode_escape(const char *str) | |
294 { | |
295 int i; | |
296 int32_t value = 0; | |
297 | |
298 assert(str[0] == 'u'); | |
299 | |
300 for(i = 1; i <= 4; i++) { | |
301 char c = str[i]; | |
302 value <<= 4; | |
303 if(l_isdigit(c)) | |
304 value += c - '0'; | |
305 else if(l_islower(c)) | |
306 value += c - 'a' + 10; | |
307 else if(l_isupper(c)) | |
308 value += c - 'A' + 10; | |
309 else | |
310 return -1; | |
311 } | |
312 | |
313 return value; | |
314 } | |
315 | |
316 static void lex_scan_string(lex_t *lex, json_error_t *error) | |
317 { | |
318 int c; | |
319 const char *p; | |
320 char *t; | |
321 int i; | |
322 | |
323 lex->value.string.val = NULL; | |
324 lex->token = TOKEN_INVALID; | |
325 | |
326 c = lex_get_save(lex, error); | |
327 | |
328 while(c != '"') { | |
329 if(c == STREAM_STATE_ERROR) | |
330 goto out; | |
331 | |
332 else if(c == STREAM_STATE_EOF) { | |
333 error_set(error, lex, "premature end of input"); | |
334 goto out; | |
335 } | |
336 | |
337 else if(0 <= c && c <= 0x1F) { | |
338 /* control character */ | |
339 lex_unget_unsave(lex, c); | |
340 if(c == '\n') | |
341 error_set(error, lex, "unexpected newline", c); | |
342 else | |
343 error_set(error, lex, "control character 0x%x", c); | |
344 goto out; | |
345 } | |
346 | |
347 else if(c == '\\') { | |
348 c = lex_get_save(lex, error); | |
349 if(c == 'u') { | |
350 c = lex_get_save(lex, error); | |
351 for(i = 0; i < 4; i++) { | |
352 if(!l_isxdigit(c)) { | |
353 error_set(error, lex, "invalid escape"); | |
354 goto out; | |
355 } | |
356 c = lex_get_save(lex, error); | |
357 } | |
358 } | |
359 else if(c == '"' || c == '\\' || c == '/' || c == 'b' || | |
360 c == 'f' || c == 'n' || c == 'r' || c == 't') | |
361 c = lex_get_save(lex, error); | |
362 else { | |
363 error_set(error, lex, "invalid escape"); | |
364 goto out; | |
365 } | |
366 } | |
367 else | |
368 c = lex_get_save(lex, error); | |
369 } | |
370 | |
371 /* the actual value is at most of the same length as the source | |
372 string, because: | |
373 - shortcut escapes (e.g. "\t") (length 2) are converted to 1 byte | |
374 - a single \uXXXX escape (length 6) is converted to at most 3 bytes | |
375 - two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair | |
376 are converted to 4 bytes | |
377 */ | |
378 t = jsonp_malloc(lex->saved_text.length + 1); | |
379 if(!t) { | |
380 /* this is not very nice, since TOKEN_INVALID is returned */ | |
381 goto out; | |
382 } | |
383 lex->value.string.val = t; | |
384 | |
385 /* + 1 to skip the " */ | |
386 p = strbuffer_value(&lex->saved_text) + 1; | |
387 | |
388 while(*p != '"') { | |
389 if(*p == '\\') { | |
390 p++; | |
391 if(*p == 'u') { | |
392 size_t length; | |
393 int32_t value; | |
394 | |
395 value = decode_unicode_escape(p); | |
396 if(value < 0) { | |
397 error_set(error, lex, "invalid Unicode escape '%.6s'", p - 1); | |
398 goto out; | |
399 } | |
400 p += 5; | |
401 | |
402 if(0xD800 <= value && value <= 0xDBFF) { | |
403 /* surrogate pair */ | |
404 if(*p == '\\' && *(p + 1) == 'u') { | |
405 int32_t value2 = decode_unicode_escape(++p); | |
406 if(value2 < 0) { | |
407 error_set(error, lex, "invalid Unicode escape '%.6s'", p - 1); | |
408 goto out; | |
409 } | |
410 p += 5; | |
411 | |
412 if(0xDC00 <= value2 && value2 <= 0xDFFF) { | |
413 /* valid second surrogate */ | |
414 value = | |
415 ((value - 0xD800) << 10) + | |
416 (value2 - 0xDC00) + | |
417 0x10000; | |
418 } | |
419 else { | |
420 /* invalid second surrogate */ | |
421 error_set(error, lex, | |
422 "invalid Unicode '\\u%04X\\u%04X'", | |
423 value, value2); | |
424 goto out; | |
425 } | |
426 } | |
427 else { | |
428 /* no second surrogate */ | |
429 error_set(error, lex, "invalid Unicode '\\u%04X'", | |
430 value); | |
431 goto out; | |
432 } | |
433 } | |
434 else if(0xDC00 <= value && value <= 0xDFFF) { | |
435 error_set(error, lex, "invalid Unicode '\\u%04X'", value); | |
436 goto out; | |
437 } | |
438 | |
439 if(utf8_encode(value, t, &length)) | |
440 assert(0); | |
441 t += length; | |
442 } | |
443 else { | |
444 switch(*p) { | |
445 case '"': case '\\': case '/': | |
446 *t = *p; break; | |
447 case 'b': *t = '\b'; break; | |
448 case 'f': *t = '\f'; break; | |
449 case 'n': *t = '\n'; break; | |
450 case 'r': *t = '\r'; break; | |
451 case 't': *t = '\t'; break; | |
452 default: assert(0); | |
453 } | |
454 t++; | |
455 p++; | |
456 } | |
457 } | |
458 else | |
459 *(t++) = *(p++); | |
460 } | |
461 *t = '\0'; | |
462 lex->value.string.len = t - lex->value.string.val; | |
463 lex->token = TOKEN_STRING; | |
464 return; | |
465 | |
466 out: | |
467 lex_free_string(lex); | |
468 } | |
469 | |
470 #ifndef JANSSON_USING_CMAKE /* disabled if using cmake */ | |
471 #if JSON_INTEGER_IS_LONG_LONG | |
472 #ifdef _MSC_VER /* Microsoft Visual Studio */ | |
473 #define json_strtoint _strtoi64 | |
474 #else | |
475 #define json_strtoint strtoll | |
476 #endif | |
477 #else | |
478 #define json_strtoint strtol | |
479 #endif | |
480 #endif | |
481 | |
482 static int lex_scan_number(lex_t *lex, int c, json_error_t *error) | |
483 { | |
484 const char *saved_text; | |
485 char *end; | |
486 double doubleval; | |
487 | |
488 lex->token = TOKEN_INVALID; | |
489 | |
490 if(c == '-') | |
491 c = lex_get_save(lex, error); | |
492 | |
493 if(c == '0') { | |
494 c = lex_get_save(lex, error); | |
495 if(l_isdigit(c)) { | |
496 lex_unget_unsave(lex, c); | |
497 goto out; | |
498 } | |
499 } | |
500 else if(l_isdigit(c)) { | |
501 c = lex_get_save(lex, error); | |
502 while(l_isdigit(c)) | |
503 c = lex_get_save(lex, error); | |
504 } | |
505 else { | |
506 lex_unget_unsave(lex, c); | |
507 goto out; | |
508 } | |
509 | |
510 if(c != '.' && c != 'E' && c != 'e') { | |
511 json_int_t intval; | |
512 | |
513 lex_unget_unsave(lex, c); | |
514 | |
515 saved_text = strbuffer_value(&lex->saved_text); | |
516 | |
517 errno = 0; | |
518 intval = json_strtoint(saved_text, &end, 10); | |
519 if(errno == ERANGE) { | |
520 if(intval < 0) | |
521 error_set(error, lex, "too big negative integer"); | |
522 else | |
523 error_set(error, lex, "too big integer"); | |
524 goto out; | |
525 } | |
526 | |
527 assert(end == saved_text + lex->saved_text.length); | |
528 | |
529 lex->token = TOKEN_INTEGER; | |
530 lex->value.integer = intval; | |
531 return 0; | |
532 } | |
533 | |
534 if(c == '.') { | |
535 c = lex_get(lex, error); | |
536 if(!l_isdigit(c)) { | |
537 lex_unget(lex, c); | |
538 goto out; | |
539 } | |
540 lex_save(lex, c); | |
541 | |
542 c = lex_get_save(lex, error); | |
543 while(l_isdigit(c)) | |
544 c = lex_get_save(lex, error); | |
545 } | |
546 | |
547 if(c == 'E' || c == 'e') { | |
548 c = lex_get_save(lex, error); | |
549 if(c == '+' || c == '-') | |
550 c = lex_get_save(lex, error); | |
551 | |
552 if(!l_isdigit(c)) { | |
553 lex_unget_unsave(lex, c); | |
554 goto out; | |
555 } | |
556 | |
557 c = lex_get_save(lex, error); | |
558 while(l_isdigit(c)) | |
559 c = lex_get_save(lex, error); | |
560 } | |
561 | |
562 lex_unget_unsave(lex, c); | |
563 | |
564 if(jsonp_strtod(&lex->saved_text, &doubleval)) { | |
565 error_set(error, lex, "real number overflow"); | |
566 goto out; | |
567 } | |
568 | |
569 lex->token = TOKEN_REAL; | |
570 lex->value.real = doubleval; | |
571 return 0; | |
572 | |
573 out: | |
574 return -1; | |
575 } | |
576 | |
577 static int lex_scan(lex_t *lex, json_error_t *error) | |
578 { | |
579 int c; | |
580 | |
581 strbuffer_clear(&lex->saved_text); | |
582 | |
583 if(lex->token == TOKEN_STRING) | |
584 lex_free_string(lex); | |
585 | |
586 c = lex_get(lex, error); | |
587 while(c == ' ' || c == '\t' || c == '\n' || c == '\r') | |
588 c = lex_get(lex, error); | |
589 | |
590 if(c == STREAM_STATE_EOF) { | |
591 lex->token = TOKEN_EOF; | |
592 goto out; | |
593 } | |
594 | |
595 if(c == STREAM_STATE_ERROR) { | |
596 lex->token = TOKEN_INVALID; | |
597 goto out; | |
598 } | |
599 | |
600 lex_save(lex, c); | |
601 | |
602 if(c == '{' || c == '}' || c == '[' || c == ']' || c == ':' || c == ',') | |
603 lex->token = c; | |
604 | |
605 else if(c == '"') | |
606 lex_scan_string(lex, error); | |
607 | |
608 else if(l_isdigit(c) || c == '-') { | |
609 if(lex_scan_number(lex, c, error)) | |
610 goto out; | |
611 } | |
612 | |
613 else if(l_isalpha(c)) { | |
614 /* eat up the whole identifier for clearer error messages */ | |
615 const char *saved_text; | |
616 | |
617 c = lex_get_save(lex, error); | |
618 while(l_isalpha(c)) | |
619 c = lex_get_save(lex, error); | |
620 lex_unget_unsave(lex, c); | |
621 | |
622 saved_text = strbuffer_value(&lex->saved_text); | |
623 | |
624 if(strcmp(saved_text, "true") == 0) | |
625 lex->token = TOKEN_TRUE; | |
626 else if(strcmp(saved_text, "false") == 0) | |
627 lex->token = TOKEN_FALSE; | |
628 else if(strcmp(saved_text, "null") == 0) | |
629 lex->token = TOKEN_NULL; | |
630 else | |
631 lex->token = TOKEN_INVALID; | |
632 } | |
633 | |
634 else { | |
635 /* save the rest of the input UTF-8 sequence to get an error | |
636 message of valid UTF-8 */ | |
637 lex_save_cached(lex); | |
638 lex->token = TOKEN_INVALID; | |
639 } | |
640 | |
641 out: | |
642 return lex->token; | |
643 } | |
644 | |
645 static char *lex_steal_string(lex_t *lex, size_t *out_len) | |
646 { | |
647 char *result = NULL; | |
648 if(lex->token == TOKEN_STRING) { | |
649 result = lex->value.string.val; | |
650 *out_len = lex->value.string.len; | |
651 lex->value.string.val = NULL; | |
652 lex->value.string.len = 0; | |
653 } | |
654 return result; | |
655 } | |
656 | |
657 static int lex_init(lex_t *lex, get_func get, void *data) | |
658 { | |
659 stream_init(&lex->stream, get, data); | |
660 if(strbuffer_init(&lex->saved_text)) | |
661 return -1; | |
662 | |
663 lex->token = TOKEN_INVALID; | |
664 return 0; | |
665 } | |
666 | |
667 static void lex_close(lex_t *lex) | |
668 { | |
669 if(lex->token == TOKEN_STRING) | |
670 lex_free_string(lex); | |
671 strbuffer_close(&lex->saved_text); | |
672 } | |
673 | |
674 | |
675 /*** parser ***/ | |
676 | |
677 static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error); | |
678 | |
679 static json_t *parse_object(lex_t *lex, size_t flags, json_error_t *error) | |
680 { | |
681 json_t *object = json_object(); | |
682 if(!object) | |
683 return NULL; | |
684 | |
685 lex_scan(lex, error); | |
686 if(lex->token == '}') | |
687 return object; | |
688 | |
689 while(1) { | |
690 char *key; | |
691 size_t len; | |
692 json_t *value; | |
693 | |
694 if(lex->token != TOKEN_STRING) { | |
695 error_set(error, lex, "string or '}' expected"); | |
696 goto error; | |
697 } | |
698 | |
699 key = lex_steal_string(lex, &len); | |
700 if(!key) | |
701 return NULL; | |
702 if (memchr(key, '\0', len)) { | |
703 jsonp_free(key); | |
704 error_set(error, lex, "NUL byte in object key not supported"); | |
705 goto error; | |
706 } | |
707 | |
708 if(flags & JSON_REJECT_DUPLICATES) { | |
709 if(json_object_get(object, key)) { | |
710 jsonp_free(key); | |
711 error_set(error, lex, "duplicate object key"); | |
712 goto error; | |
713 } | |
714 } | |
715 | |
716 lex_scan(lex, error); | |
717 if(lex->token != ':') { | |
718 jsonp_free(key); | |
719 error_set(error, lex, "':' expected"); | |
720 goto error; | |
721 } | |
722 | |
723 lex_scan(lex, error); | |
724 value = parse_value(lex, flags, error); | |
725 if(!value) { | |
726 jsonp_free(key); | |
727 goto error; | |
728 } | |
729 | |
730 if(json_object_set_nocheck(object, key, value)) { | |
731 jsonp_free(key); | |
732 json_decref(value); | |
733 goto error; | |
734 } | |
735 | |
736 json_decref(value); | |
737 jsonp_free(key); | |
738 | |
739 lex_scan(lex, error); | |
740 if(lex->token != ',') | |
741 break; | |
742 | |
743 lex_scan(lex, error); | |
744 } | |
745 | |
746 if(lex->token != '}') { | |
747 error_set(error, lex, "'}' expected"); | |
748 goto error; | |
749 } | |
750 | |
751 return object; | |
752 | |
753 error: | |
754 json_decref(object); | |
755 return NULL; | |
756 } | |
757 | |
758 static json_t *parse_array(lex_t *lex, size_t flags, json_error_t *error) | |
759 { | |
760 json_t *array = json_array(); | |
761 if(!array) | |
762 return NULL; | |
763 | |
764 lex_scan(lex, error); | |
765 if(lex->token == ']') | |
766 return array; | |
767 | |
768 while(lex->token) { | |
769 json_t *elem = parse_value(lex, flags, error); | |
770 if(!elem) | |
771 goto error; | |
772 | |
773 if(json_array_append(array, elem)) { | |
774 json_decref(elem); | |
775 goto error; | |
776 } | |
777 json_decref(elem); | |
778 | |
779 lex_scan(lex, error); | |
780 if(lex->token != ',') | |
781 break; | |
782 | |
783 lex_scan(lex, error); | |
784 } | |
785 | |
786 if(lex->token != ']') { | |
787 error_set(error, lex, "']' expected"); | |
788 goto error; | |
789 } | |
790 | |
791 return array; | |
792 | |
793 error: | |
794 json_decref(array); | |
795 return NULL; | |
796 } | |
797 | |
798 static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error) | |
799 { | |
800 json_t *json; | |
801 double value; | |
802 | |
803 switch(lex->token) { | |
804 case TOKEN_STRING: { | |
805 const char *value = lex->value.string.val; | |
806 size_t len = lex->value.string.len; | |
807 | |
808 if(!(flags & JSON_ALLOW_NUL)) { | |
809 if(memchr(value, '\0', len)) { | |
810 error_set(error, lex, "\\u0000 is not allowed without JSON_ALLOW_NUL"); | |
811 return NULL; | |
812 } | |
813 } | |
814 | |
815 json = jsonp_stringn_nocheck_own(value, len); | |
816 if(json) { | |
817 lex->value.string.val = NULL; | |
818 lex->value.string.len = 0; | |
819 } | |
820 break; | |
821 } | |
822 | |
823 case TOKEN_INTEGER: { | |
824 if (flags & JSON_DECODE_INT_AS_REAL) { | |
825 if(jsonp_strtod(&lex->saved_text, &value)) { | |
826 error_set(error, lex, "real number overflow"); | |
827 return NULL; | |
828 } | |
829 json = json_real(value); | |
830 } else { | |
831 json = json_integer(lex->value.integer); | |
832 } | |
833 break; | |
834 } | |
835 | |
836 case TOKEN_REAL: { | |
837 json = json_real(lex->value.real); | |
838 break; | |
839 } | |
840 | |
841 case TOKEN_TRUE: | |
842 json = json_true(); | |
843 break; | |
844 | |
845 case TOKEN_FALSE: | |
846 json = json_false(); | |
847 break; | |
848 | |
849 case TOKEN_NULL: | |
850 json = json_null(); | |
851 break; | |
852 | |
853 case '{': | |
854 json = parse_object(lex, flags, error); | |
855 break; | |
856 | |
857 case '[': | |
858 json = parse_array(lex, flags, error); | |
859 break; | |
860 | |
861 case TOKEN_INVALID: | |
862 error_set(error, lex, "invalid token"); | |
863 return NULL; | |
864 | |
865 default: | |
866 error_set(error, lex, "unexpected token"); | |
867 return NULL; | |
868 } | |
869 | |
870 if(!json) | |
871 return NULL; | |
872 | |
873 return json; | |
874 } | |
875 | |
876 static json_t *parse_json(lex_t *lex, size_t flags, json_error_t *error) | |
877 { | |
878 json_t *result; | |
879 | |
880 lex_scan(lex, error); | |
881 if(!(flags & JSON_DECODE_ANY)) { | |
882 if(lex->token != '[' && lex->token != '{') { | |
883 error_set(error, lex, "'[' or '{' expected"); | |
884 return NULL; | |
885 } | |
886 } | |
887 | |
888 result = parse_value(lex, flags, error); | |
889 if(!result) | |
890 return NULL; | |
891 | |
892 if(!(flags & JSON_DISABLE_EOF_CHECK)) { | |
893 lex_scan(lex, error); | |
894 if(lex->token != TOKEN_EOF) { | |
895 error_set(error, lex, "end of file expected"); | |
896 json_decref(result); | |
897 return NULL; | |
898 } | |
899 } | |
900 | |
901 if(error) { | |
902 /* Save the position even though there was no error */ | |
903 error->position = lex->stream.position; | |
904 } | |
905 | |
906 return result; | |
907 } | |
908 | |
909 typedef struct | |
910 { | |
911 const char *data; | |
912 int pos; | |
913 } string_data_t; | |
914 | |
915 static int string_get(void *data) | |
916 { | |
917 char c; | |
918 string_data_t *stream = (string_data_t *)data; | |
919 c = stream->data[stream->pos]; | |
920 if(c == '\0') | |
921 return EOF; | |
922 else | |
923 { | |
924 stream->pos++; | |
925 return (unsigned char)c; | |
926 } | |
927 } | |
928 | |
929 json_t *json_loads(const char *string, size_t flags, json_error_t *error) | |
930 { | |
931 lex_t lex; | |
932 json_t *result; | |
933 string_data_t stream_data; | |
934 | |
935 jsonp_error_init(error, "<string>"); | |
936 | |
937 if (string == NULL) { | |
938 error_set(error, NULL, "wrong arguments"); | |
939 return NULL; | |
940 } | |
941 | |
942 stream_data.data = string; | |
943 stream_data.pos = 0; | |
944 | |
945 if(lex_init(&lex, string_get, (void *)&stream_data)) | |
946 return NULL; | |
947 | |
948 result = parse_json(&lex, flags, error); | |
949 | |
950 lex_close(&lex); | |
951 return result; | |
952 } | |
953 | |
954 typedef struct | |
955 { | |
956 const char *data; | |
957 size_t len; | |
958 size_t pos; | |
959 } buffer_data_t; | |
960 | |
961 static int buffer_get(void *data) | |
962 { | |
963 char c; | |
964 buffer_data_t *stream = data; | |
965 if(stream->pos >= stream->len) | |
966 return EOF; | |
967 | |
968 c = stream->data[stream->pos]; | |
969 stream->pos++; | |
970 return (unsigned char)c; | |
971 } | |
972 | |
973 json_t *json_loadb(const char *buffer, size_t buflen, size_t flags, json_error_t *error) | |
974 { | |
975 lex_t lex; | |
976 json_t *result; | |
977 buffer_data_t stream_data; | |
978 | |
979 jsonp_error_init(error, "<buffer>"); | |
980 | |
981 if (buffer == NULL) { | |
982 error_set(error, NULL, "wrong arguments"); | |
983 return NULL; | |
984 } | |
985 | |
986 stream_data.data = buffer; | |
987 stream_data.pos = 0; | |
988 stream_data.len = buflen; | |
989 | |
990 if(lex_init(&lex, buffer_get, (void *)&stream_data)) | |
991 return NULL; | |
992 | |
993 result = parse_json(&lex, flags, error); | |
994 | |
995 lex_close(&lex); | |
996 return result; | |
997 } | |
998 | |
999 json_t *json_loadf(FILE *input, size_t flags, json_error_t *error) | |
1000 { | |
1001 lex_t lex; | |
1002 const char *source; | |
1003 json_t *result; | |
1004 | |
1005 if(input == stdin) | |
1006 source = "<stdin>"; | |
1007 else | |
1008 source = "<stream>"; | |
1009 | |
1010 jsonp_error_init(error, source); | |
1011 | |
1012 if (input == NULL) { | |
1013 error_set(error, NULL, "wrong arguments"); | |
1014 return NULL; | |
1015 } | |
1016 | |
1017 if(lex_init(&lex, (get_func)fgetc, input)) | |
1018 return NULL; | |
1019 | |
1020 result = parse_json(&lex, flags, error); | |
1021 | |
1022 lex_close(&lex); | |
1023 return result; | |
1024 } | |
1025 | |
1026 json_t *json_load_file(const char *path, size_t flags, json_error_t *error) | |
1027 { | |
1028 json_t *result; | |
1029 FILE *fp; | |
1030 | |
1031 jsonp_error_init(error, path); | |
1032 | |
1033 if (path == NULL) { | |
1034 error_set(error, NULL, "wrong arguments"); | |
1035 return NULL; | |
1036 } | |
1037 | |
1038 fp = fopen(path, "rb"); | |
1039 if(!fp) | |
1040 { | |
1041 error_set(error, NULL, "unable to open %s: %s", | |
1042 path, strerror(errno)); | |
1043 return NULL; | |
1044 } | |
1045 | |
1046 result = json_loadf(fp, flags, error); | |
1047 | |
1048 fclose(fp); | |
1049 return result; | |
1050 } | |
1051 | |
1052 #define MAX_BUF_LEN 1024 | |
1053 | |
1054 typedef struct | |
1055 { | |
1056 char data[MAX_BUF_LEN]; | |
1057 size_t len; | |
1058 size_t pos; | |
1059 json_load_callback_t callback; | |
1060 void *arg; | |
1061 } callback_data_t; | |
1062 | |
1063 static int callback_get(void *data) | |
1064 { | |
1065 char c; | |
1066 callback_data_t *stream = data; | |
1067 | |
1068 if(stream->pos >= stream->len) { | |
1069 stream->pos = 0; | |
1070 stream->len = stream->callback(stream->data, MAX_BUF_LEN, stream->arg); | |
1071 if(stream->len == 0 || stream->len == (size_t)-1) | |
1072 return EOF; | |
1073 } | |
1074 | |
1075 c = stream->data[stream->pos]; | |
1076 stream->pos++; | |
1077 return (unsigned char)c; | |
1078 } | |
1079 | |
1080 json_t *json_load_callback(json_load_callback_t callback, void *arg, size_t flags, json_error_t *error) | |
1081 { | |
1082 lex_t lex; | |
1083 json_t *result; | |
1084 | |
1085 callback_data_t stream_data; | |
1086 | |
1087 memset(&stream_data, 0, sizeof(stream_data)); | |
1088 stream_data.callback = callback; | |
1089 stream_data.arg = arg; | |
1090 | |
1091 jsonp_error_init(error, "<callback>"); | |
1092 | |
1093 if (callback == NULL) { | |
1094 error_set(error, NULL, "wrong arguments"); | |
1095 return NULL; | |
1096 } | |
1097 | |
1098 if(lex_init(&lex, (get_func)callback_get, &stream_data)) | |
1099 return NULL; | |
1100 | |
1101 result = parse_json(&lex, flags, error); | |
1102 | |
1103 lex_close(&lex); | |
1104 return result; | |
1105 } |