1 | /*
2 | * Copyright (c) 2007-2014, Lloyd Hilaiel <me@lloyd.io>
3 | *
4 | * Permission to use, copy, modify, and/or distribute this software for any
5 | * purpose with or without fee is hereby granted, provided that the above
6 | * copyright notice and this permission notice appear in all copies.
7 | *
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 | */
16 |
17 | #include "yajl/yajl_parse.h"
18 | #include "yajl_lex.h"
19 | #include "yajl_parser.h"
20 | #include "yajl_encode.h"
21 | #include "yajl_bytestack.h"
22 |
23 | #include <stdlib.h>
24 | #include <limits.h>
25 | #include <errno.h>
26 | #include <stdio.h>
27 | #include <string.h>
28 | #include <ctype.h>
29 | #include <assert.h>
30 | #include <math.h>
31 |
32 | #define MAX_VALUE_TO_MULTIPLY ((LLONG_MAX / 10) + (LLONG_MAX % 10))
33 |
34 | /* same semantics as strtol */
35 | long long
36 | yajl_parse_integer(const unsigned char *number, size_t length)
37 | {
38 | long long ret = 0;
39 | long sign = 1;
40 | const unsigned char *pos = number;
41 | if (*pos == '-') { pos++; sign = -1; }
42 | if (*pos == '+') { pos++; }
43 |
44 | while (pos < number + length) {
45 | if ( ret > MAX_VALUE_TO_MULTIPLY ) {
46 | errno = ERANGE;
47 | return sign == 1 ? LLONG_MAX : LLONG_MIN;
48 | }
49 | ret *= 10;
50 | if (LLONG_MAX - ret < (*pos - '0')) {
51 | errno = ERANGE;
52 | return sign == 1 ? LLONG_MAX : LLONG_MIN;
53 | }
54 | if (*pos < '0' || *pos > '9') {
55 | errno = ERANGE;
56 | return sign == 1 ? LLONG_MAX : LLONG_MIN;
57 | }
58 | ret += (*pos++ - '0');
59 | }
60 |
61 | return sign * ret;
62 | }
63 |
64 | unsigned char *
65 | yajl_render_error_string(yajl_handle hand, const unsigned char * jsonText,
66 | size_t jsonTextLen, int verbose)
67 | {
68 | size_t offset = hand->bytesConsumed;
69 | unsigned char * str;
70 | const char * errorType = NULL;
71 | const char * errorText = NULL;
72 | unsigned char text[72];
73 | const char * arrow = " (right here) ------^\n";
74 |
75 | if (yajl_bs_current(hand->stateStack) == yajl_state_parse_error) {
76 | errorType = "parse";
77 | errorText = hand->parseError;
78 | } else if (yajl_bs_current(hand->stateStack) == yajl_state_lexical_error) {
79 | errorType = "lexical";
80 | errorText = yajl_lex_error_to_string(yajl_lex_get_error(hand->lexer));
81 | } else {
82 | errorType = "unknown";
83 | }
84 |
85 | {
86 | size_t memneeded = 0;
87 | memneeded += strlen(errorType);
88 | memneeded += strlen(" error");
89 | if (errorText != NULL) {
90 | memneeded += strlen(": ");
91 | memneeded += strlen(errorText);
92 | }
93 | str = (unsigned char *) YA_MALLOC(&(hand->alloc), memneeded + 2);
94 | if (!str) return NULL;
95 | str[0] = 0;
96 | strcat((char *) str, errorType);
97 | strcat((char *) str, " error");
98 | if (errorText != NULL) {
99 | strcat((char *) str, ": ");
100 | strcat((char *) str, errorText);
101 | }
102 | strcat((char *) str, "\n");
103 | }
104 |
105 | /* now, if verbose was specified, we append as many spaces as needed to make
106 | * sure the error falls at char 40 */
107 | if (verbose) {
108 | size_t start, end, i;
109 | size_t spacesNeeded;
110 |
111 | /* xxx this doesn't seem to be working right.... */
112 | spacesNeeded = (offset < 30 ? 40 - offset : 10);
113 | start = (offset >= 30 ? offset - 30 : 0);
114 | end = (offset + 30 > jsonTextLen ? jsonTextLen : offset + 30);
115 |
116 | for (i=0; i < spacesNeeded; i++) {
117 | text[i] = ' ';
118 | }
119 |
120 | for (; start < end; start++, i++) {
121 | if (jsonText[start] != '\n' && jsonText[start] != '\r') {
122 | text[i] = jsonText[start];
123 | } else {
124 | text[i] = ' ';
125 | }
126 | }
127 | assert(i <= 71);
128 | text[i++] = '\n';
129 | text[i] = 0;
130 | {
131 | char * newStr = (char *)
132 | YA_MALLOC(&(hand->alloc), (size_t)(strlen((char *) str) +
133 | strlen((char *) text) +
134 | strlen(arrow) + 1));
135 | if (newStr) {
136 | newStr[0] = 0;
137 | strcat((char *) newStr, (char *) str);
138 | strcat((char *) newStr, (char *) text);
139 | strcat((char *) newStr, arrow);
140 | }
141 | YA_FREE(&(hand->alloc), str);
142 | str = (unsigned char *) newStr;
143 | }
144 | }
145 | return str;
146 | }
147 |
148 | /* check for client cancelation */
149 | #define _CC_CHK(x) \
150 | if (!(x)) { \
151 | yajl_bs_set(hand->stateStack, yajl_state_parse_error); \
152 | hand->parseError = \
153 | "client cancelled parse via callback return value"; \
154 | return yajl_status_client_canceled; \
155 | }
156 |
157 |
158 | yajl_status
159 | yajl_do_finish(yajl_handle hand)
160 | {
161 | yajl_status stat;
162 | stat = yajl_do_parse(hand,(const unsigned char *) " ",(size_t) 1);
163 |
164 | if (stat != yajl_status_ok) return stat;
165 |
166 | switch(yajl_bs_current(hand->stateStack))
167 | {
168 | case yajl_state_parse_error:
169 | case yajl_state_lexical_error:
170 | return yajl_status_error;
171 | case yajl_state_got_value:
172 | case yajl_state_parse_complete:
173 | return yajl_status_ok;
174 | default:
175 | if (!(hand->flags & yajl_allow_partial_values))
176 | {
177 | yajl_bs_set(hand->stateStack, yajl_state_parse_error);
178 | hand->parseError = "premature EOF";
179 | return yajl_status_error;
180 | }
181 | return yajl_status_ok;
182 | }
183 | }
184 |
185 | yajl_status
186 | yajl_do_parse(yajl_handle hand, const unsigned char * jsonText,
187 | size_t jsonTextLen)
188 | {
189 | yajl_tok tok;
190 | const unsigned char * buf;
191 | size_t bufLen;
192 | size_t * offset = &(hand->bytesConsumed);
193 |
194 | *offset = 0;
195 |
196 | around_again:
197 | switch (yajl_bs_current(hand->stateStack)) {
198 | case yajl_state_parse_complete:
199 | if (hand->flags & yajl_allow_multiple_values) {
200 | yajl_bs_set(hand->stateStack, yajl_state_got_value);
201 | goto around_again;
202 | }
203 | if (!(hand->flags & yajl_allow_trailing_garbage)) {
204 | if (*offset != jsonTextLen) {
205 | tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
206 | offset, &buf, &bufLen);
207 | if (tok != yajl_tok_eof) {
208 | yajl_bs_set(hand->stateStack, yajl_state_parse_error);
209 | hand->parseError = "trailing garbage";
210 | }
211 | goto around_again;
212 | }
213 | }
214 | return yajl_status_ok;
215 | case yajl_state_lexical_error:
216 | case yajl_state_parse_error:
217 | return yajl_status_error;
218 | case yajl_state_start:
219 | case yajl_state_got_value:
220 | case yajl_state_map_need_val:
221 | case yajl_state_array_need_val:
222 | case yajl_state_array_start: {
223 | /* for arrays and maps, we advance the state for this
224 | * depth, then push the state of the next depth.
225 | * If an error occurs during the parsing of the nesting
226 | * enitity, the state at this level will not matter.
227 | * a state that needs pushing will be anything other
228 | * than state_start */
229 |
230 | yajl_state stateToPush = yajl_state_start;
231 |
232 | tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
233 | offset, &buf, &bufLen);
234 |
235 | switch (tok) {
236 | case yajl_tok_eof:
237 | return yajl_status_ok;
238 | case yajl_tok_error:
239 | yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
240 | goto around_again;
241 | case yajl_tok_string:
242 | if (hand->callbacks && hand->callbacks->yajl_string) {
243 | _CC_CHK(hand->callbacks->yajl_string(hand->ctx,
244 | buf, bufLen));
245 | }
246 | break;
247 | case yajl_tok_string_with_escapes:
248 | if (hand->callbacks && hand->callbacks->yajl_string) {
249 | yajl_buf_clear(hand->decodeBuf);
250 | yajl_string_decode(hand->decodeBuf, buf, bufLen);
251 | _CC_CHK(hand->callbacks->yajl_string(
252 | hand->ctx, yajl_buf_data(hand->decodeBuf),
253 | yajl_buf_len(hand->decodeBuf)));
254 | }
255 | break;
256 | case yajl_tok_bool:
257 | if (hand->callbacks && hand->callbacks->yajl_boolean) {
258 | _CC_CHK(hand->callbacks->yajl_boolean(hand->ctx,
259 | *buf == 't'));
260 | }
261 | break;
262 | case yajl_tok_null:
263 | if (hand->callbacks && hand->callbacks->yajl_null) {
264 | _CC_CHK(hand->callbacks->yajl_null(hand->ctx));
265 | }
266 | break;
267 | case yajl_tok_left_bracket:
268 | if (hand->callbacks && hand->callbacks->yajl_start_map) {
269 | _CC_CHK(hand->callbacks->yajl_start_map(hand->ctx));
270 | }
271 | stateToPush = yajl_state_map_start;
272 | break;
273 | case yajl_tok_left_brace:
274 | if (hand->callbacks && hand->callbacks->yajl_start_array) {
275 | _CC_CHK(hand->callbacks->yajl_start_array(hand->ctx));
276 | }
277 | stateToPush = yajl_state_array_start;
278 | break;
279 | case yajl_tok_integer:
280 | if (hand->callbacks) {
281 | if (hand->callbacks->yajl_number) {
282 | _CC_CHK(hand->callbacks->yajl_number(
283 | hand->ctx,(const char *) buf, bufLen));
284 | } else if (hand->callbacks->yajl_integer) {
285 | long long int i = 0;
286 | errno = 0;
287 | i = yajl_parse_integer(buf, bufLen);
288 | if ((i == LLONG_MIN || i == LLONG_MAX) &&
289 | errno == ERANGE)
290 | {
291 | yajl_bs_set(hand->stateStack,
292 | yajl_state_parse_error);
293 | hand->parseError = "integer overflow" ;
294 | /* try to restore error offset */
295 | if (*offset >= bufLen) *offset -= bufLen;
296 | else *offset = 0;
297 | goto around_again;
298 | }
299 | _CC_CHK(hand->callbacks->yajl_integer(hand->ctx,
300 | i));
301 | }
302 | }
303 | break;
304 | case yajl_tok_double:
305 | if (hand->callbacks) {
306 | if (hand->callbacks->yajl_number) {
307 | _CC_CHK(hand->callbacks->yajl_number(
308 | hand->ctx, (const char *) buf, bufLen));
309 | } else if (hand->callbacks->yajl_double) {
310 | double d = 0.0;
311 | yajl_buf_clear(hand->decodeBuf);
312 | yajl_buf_append(hand->decodeBuf, buf, bufLen);
313 | buf = yajl_buf_data(hand->decodeBuf);
314 | errno = 0;
315 | d = strtod((const char *) buf, NULL);
316 | if ((d == HUGE_VAL || d == -HUGE_VAL) &&
317 | errno == ERANGE)
318 | {
319 | yajl_bs_set(hand->stateStack,
320 | yajl_state_parse_error);
321 | hand->parseError = "numeric (floating point) "
322 | "overflow";
323 | /* try to restore error offset */
324 | if (*offset >= bufLen) *offset -= bufLen;
325 | else *offset = 0;
326 | goto around_again;
327 | }
328 | _CC_CHK(hand->callbacks->yajl_double(hand->ctx,
329 | d));
330 | }
331 | }
332 | break;
333 | case yajl_tok_right_brace:
334 | if (yajl_bs_current(hand->stateStack) ==
335 | yajl_state_array_start)
336 | {
337 | if (hand->callbacks &&
338 | hand->callbacks->yajl_end_array)
339 | {
340 | _CC_CHK(hand->callbacks->yajl_end_array(hand->ctx));
341 | }
342 | yajl_bs_pop(hand->stateStack);
343 | goto around_again;
344 | }
345 | /* FALLTHROUGH */
346 | case yajl_tok_colon:
347 | case yajl_tok_comma:
348 | case yajl_tok_right_bracket:
349 | yajl_bs_set(hand->stateStack, yajl_state_parse_error);
350 | hand->parseError =
351 | "unallowed token at this point in JSON text";
352 | goto around_again;
353 | default:
354 | yajl_bs_set(hand->stateStack, yajl_state_parse_error);
355 | hand->parseError = "invalid token, internal error";
356 | goto around_again;
357 | }
358 | /* got a value. transition depends on the state we're in. */
359 | {
360 | yajl_state s = (yajl_state) yajl_bs_current(hand->stateStack);
361 | if (s == yajl_state_start || s == yajl_state_got_value) {
362 | yajl_bs_set(hand->stateStack, yajl_state_parse_complete);
363 | } else if (s == yajl_state_map_need_val) {
364 | yajl_bs_set(hand->stateStack, yajl_state_map_got_val);
365 | } else {
366 | yajl_bs_set(hand->stateStack, yajl_state_array_got_val);
367 | }
368 | }
369 | if (stateToPush != yajl_state_start) {
370 | yajl_bs_push(hand->stateStack, stateToPush);
371 | }
372 |
373 | goto around_again;
374 | }
375 | case yajl_state_map_start:
376 | case yajl_state_map_need_key: {
377 | /* only difference between these two states is that in
378 | * start '}' is valid, whereas in need_key, we've parsed
379 | * a comma, and a string key _must_ follow */
380 | tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
381 | offset, &buf, &bufLen);
382 | switch (tok) {
383 | case yajl_tok_eof:
384 | return yajl_status_ok;
385 | case yajl_tok_error:
386 | yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
387 | goto around_again;
388 | case yajl_tok_string_with_escapes:
389 | if (hand->callbacks && hand->callbacks->yajl_map_key) {
390 | yajl_buf_clear(hand->decodeBuf);
391 | yajl_string_decode(hand->decodeBuf, buf, bufLen);
392 | buf = yajl_buf_data(hand->decodeBuf);
393 | bufLen = yajl_buf_len(hand->decodeBuf);
394 | }
395 | /* FALLTHROUGH */
396 | case yajl_tok_string:
397 | if (hand->callbacks && hand->callbacks->yajl_map_key) {
398 | _CC_CHK(hand->callbacks->yajl_map_key(hand->ctx, buf,
399 | bufLen));
400 | }
401 | yajl_bs_set(hand->stateStack, yajl_state_map_sep);
402 | goto around_again;
403 | case yajl_tok_right_bracket:
404 | if (yajl_bs_current(hand->stateStack) ==
405 | yajl_state_map_start)
406 | {
407 | if (hand->callbacks && hand->callbacks->yajl_end_map) {
408 | _CC_CHK(hand->callbacks->yajl_end_map(hand->ctx));
409 | }
410 | yajl_bs_pop(hand->stateStack);
411 | goto around_again;
412 | }
413 | /* FALLTHROUGH */
414 | default:
415 | yajl_bs_set(hand->stateStack, yajl_state_parse_error);
416 | hand->parseError =
417 | "invalid object key (must be a string)";
418 | goto around_again;
419 | }
420 | }
421 | case yajl_state_map_sep: {
422 | tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
423 | offset, &buf, &bufLen);
424 | switch (tok) {
425 | case yajl_tok_colon:
426 | yajl_bs_set(hand->stateStack, yajl_state_map_need_val);
427 | goto around_again;
428 | case yajl_tok_eof:
429 | return yajl_status_ok;
430 | case yajl_tok_error:
431 | yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
432 | goto around_again;
433 | default:
434 | yajl_bs_set(hand->stateStack, yajl_state_parse_error);
435 | hand->parseError = "object key and value must "
436 | "be separated by a colon (':')";
437 | goto around_again;
438 | }
439 | }
440 | case yajl_state_map_got_val: {
441 | tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
442 | offset, &buf, &bufLen);
443 | switch (tok) {
444 | case yajl_tok_right_bracket:
445 | if (hand->callbacks && hand->callbacks->yajl_end_map) {
446 | _CC_CHK(hand->callbacks->yajl_end_map(hand->ctx));
447 | }
448 | yajl_bs_pop(hand->stateStack);
449 | goto around_again;
450 | case yajl_tok_comma:
451 | yajl_bs_set(hand->stateStack, yajl_state_map_need_key);
452 | goto around_again;
453 | case yajl_tok_eof:
454 | return yajl_status_ok;
455 | case yajl_tok_error:
456 | yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
457 | goto around_again;
458 | default:
459 | yajl_bs_set(hand->stateStack, yajl_state_parse_error);
460 | hand->parseError = "after key and value, inside map, "
461 | "I expect ',' or '}'";
462 | /* try to restore error offset */
463 | if (*offset >= bufLen) *offset -= bufLen;
464 | else *offset = 0;
465 | goto around_again;
466 | }
467 | }
468 | case yajl_state_array_got_val: {
469 | tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
470 | offset, &buf, &bufLen);
471 | switch (tok) {
472 | case yajl_tok_right_brace:
473 | if (hand->callbacks && hand->callbacks->yajl_end_array) {
474 | _CC_CHK(hand->callbacks->yajl_end_array(hand->ctx));
475 | }
476 | yajl_bs_pop(hand->stateStack);
477 | goto around_again;
478 | case yajl_tok_comma:
479 | yajl_bs_set(hand->stateStack, yajl_state_array_need_val);
480 | goto around_again;
481 | case yajl_tok_eof:
482 | return yajl_status_ok;
483 | case yajl_tok_error:
484 | yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
485 | goto around_again;
486 | default:
487 | yajl_bs_set(hand->stateStack, yajl_state_parse_error);
488 | hand->parseError =
489 | "after array element, I expect ',' or ']'";
490 | goto around_again;
491 | }
492 | }
493 | }
494 |
495 | abort();
496 | /* NOTREACHED */
497 | return yajl_status_error;
498 | }
499 |