1    | /*
2    |  * Copyright (c) 2007-2014, Lloyd Hilaiel <me@lloyd.io>
3    |  *
4    |  * Permission to use, copy, modify, and/or distribute this software for any
5    |  * purpose with or without fee is hereby granted, provided that the above
6    |  * copyright notice and this permission notice appear in all copies.
7    |  *
8    |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9    |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10   |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11   |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12   |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13   |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14   |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15   |  */
16   | 
17   | #include "yajl/yajl_parse.h"
18   | #include "yajl_lex.h"
19   | #include "yajl_parser.h"
20   | #include "yajl_encode.h"
21   | #include "yajl_bytestack.h"
22   | 
23   | #include <stdlib.h>
24   | #include <limits.h>
25   | #include <errno.h>
26   | #include <stdio.h>
27   | #include <string.h>
28   | #include <ctype.h>
29   | #include <assert.h>
30   | #include <math.h>
31   | 
32   | #define MAX_VALUE_TO_MULTIPLY ((LLONG_MAX / 10) + (LLONG_MAX % 10))
33   | 
34   |  /* same semantics as strtol */
35   | long long
36   | yajl_parse_integer(const unsigned char *number, size_t length)
37   | {
38   |     long long ret  = 0;
39   |     long sign = 1;
40   |     const unsigned char *pos = number;
41   |     if (*pos == '-') { pos++; sign = -1; }
42   |     if (*pos == '+') { pos++; }
43   | 
44   |     while (pos < number + length) {
45   |         if ( ret > MAX_VALUE_TO_MULTIPLY ) {
46   |             errno = ERANGE;
47   |             return sign == 1 ? LLONG_MAX : LLONG_MIN;
48   |         }
49   |         ret *= 10;
50   |         if (LLONG_MAX - ret < (*pos - '0')) {
51   |             errno = ERANGE;
52   |             return sign == 1 ? LLONG_MAX : LLONG_MIN;
53   |         }
54   |         if (*pos < '0' || *pos > '9') {
55   |             errno = ERANGE;
56   |             return sign == 1 ? LLONG_MAX : LLONG_MIN;
57   |         }
58   |         ret += (*pos++ - '0');
59   |     }
60   | 
61   |     return sign * ret;
62   | }
63   | 
64   | unsigned char *
65   | yajl_render_error_string(yajl_handle hand, const unsigned char * jsonText,
66   |                          size_t jsonTextLen, int verbose)
67   | {
68   |     size_t offset = hand->bytesConsumed;
69   |     unsigned char * str;
70   |     const char * errorType = NULL;
71   |     const char * errorText = NULL;
72   |     unsigned char text[72];
73   |     const char * arrow = "                    (right here) ------^\n";
74   | 
75   |     if (yajl_bs_current(hand->stateStack) == yajl_state_parse_error) {
76   |         errorType = "parse";
77   |         errorText = hand->parseError;
78   |     } else if (yajl_bs_current(hand->stateStack) == yajl_state_lexical_error) {
79   |         errorType = "lexical";
80   |         errorText = yajl_lex_error_to_string(yajl_lex_get_error(hand->lexer));
81   |     } else {
82   |         errorType = "unknown";
83   |     }
84   | 
85   |     {
86   |         size_t memneeded = 0;
87   |         memneeded += strlen(errorType);
88   |         memneeded += strlen(" error");
89   |         if (errorText != NULL) {
90   |             memneeded += strlen(": ");
91   |             memneeded += strlen(errorText);
92   |         }
93   |         str = (unsigned char *) YA_MALLOC(&(hand->alloc), memneeded + 2);
94   |         if (!str) return NULL;
95   |         str[0] = 0;
96   |         strcat((char *) str, errorType);
97   |         strcat((char *) str, " error");
98   |         if (errorText != NULL) {
99   |             strcat((char *) str, ": ");
100  |             strcat((char *) str, errorText);
101  |         }
102  |         strcat((char *) str, "\n");
103  |     }
104  | 
105  |     /* now, if verbose was specified, we append as many spaces as needed to make
106  |      * sure the error falls at char 40 */
107  |     if (verbose) {
108  |         size_t start, end, i;
109  |         size_t spacesNeeded;
110  | 
111  |         /* xxx this doesn't seem to be working right.... */
112  |         spacesNeeded = (offset < 30 ? 40 - offset : 10);
113  |         start = (offset >= 30 ? offset - 30 : 0);
114  |         end = (offset + 30 > jsonTextLen ? jsonTextLen : offset + 30);
115  | 
116  |         for (i=0; i < spacesNeeded; i++) {
117  |             text[i] = ' ';
118  |         }
119  | 
120  |         for (; start < end; start++, i++) {
121  |             if (jsonText[start] != '\n' && jsonText[start] != '\r') {
122  |                 text[i] = jsonText[start];
123  |             } else {
124  |                 text[i] = ' ';
125  |             }
126  |         }
127  |         assert(i <= 71);
128  |         text[i++] = '\n';
129  |         text[i] = 0;
130  |         {
131  |             char * newStr = (char *)
132  |                 YA_MALLOC(&(hand->alloc), (size_t)(strlen((char *) str) +
133  |                                                    strlen((char *) text) +
134  |                                                    strlen(arrow) + 1));
135  |             if (newStr) {
136  |                 newStr[0] = 0;
137  |                 strcat((char *) newStr, (char *) str);
138  |                 strcat((char *) newStr, (char *) text);
139  |                 strcat((char *) newStr, arrow);
140  |             }
141  |             YA_FREE(&(hand->alloc), str);
142  |             str = (unsigned char *) newStr;
143  |         }
144  |     }
145  |     return str;
146  | }
147  | 
148  | /* check for client cancelation */
149  | #define _CC_CHK(x)                                                \
150  |     if (!(x)) {                                                   \
151  |         yajl_bs_set(hand->stateStack, yajl_state_parse_error);    \
152  |         hand->parseError =                                        \
153  |             "client cancelled parse via callback return value";   \
154  |         return yajl_status_client_canceled;                       \
155  |     }
156  | 
157  | 
158  | yajl_status
159  | yajl_do_finish(yajl_handle hand)
160  | {
161  |     yajl_status stat;
162  |     stat = yajl_do_parse(hand,(const unsigned char *) " ",(size_t) 1);
163  | 
164  |     if (stat != yajl_status_ok) return stat;
165  | 
166  |     switch(yajl_bs_current(hand->stateStack))
167  |     {
168  |         case yajl_state_parse_error:
169  |         case yajl_state_lexical_error:
170  |             return yajl_status_error;
171  |         case yajl_state_got_value:
172  |         case yajl_state_parse_complete:
173  |             return yajl_status_ok;
174  |         default:
175  |             if (!(hand->flags & yajl_allow_partial_values))
176  |             {
177  |                 yajl_bs_set(hand->stateStack, yajl_state_parse_error);
178  |                 hand->parseError = "premature EOF";
179  |                 return yajl_status_error;
180  |             }
181  |             return yajl_status_ok;
182  |     }
183  | }
184  | 
185  | yajl_status
186  | yajl_do_parse(yajl_handle hand, const unsigned char * jsonText,
187  |               size_t jsonTextLen)
188  | {
189  |     yajl_tok tok;
190  |     const unsigned char * buf;
191  |     size_t bufLen;
192  |     size_t * offset = &(hand->bytesConsumed);
193  | 
194  |     *offset = 0;
195  | 
196  |   around_again:
197  |     switch (yajl_bs_current(hand->stateStack)) {
198  |         case yajl_state_parse_complete:
199  |             if (hand->flags & yajl_allow_multiple_values) {
200  |                 yajl_bs_set(hand->stateStack, yajl_state_got_value);
201  |                 goto around_again;
202  |             }
203  |             if (!(hand->flags & yajl_allow_trailing_garbage)) {
204  |                 if (*offset != jsonTextLen) {
205  |                     tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
206  |                                        offset, &buf, &bufLen);
207  |                     if (tok != yajl_tok_eof) {
208  |                         yajl_bs_set(hand->stateStack, yajl_state_parse_error);
209  |                         hand->parseError = "trailing garbage";
210  |                     }
211  |                     goto around_again;
212  |                 }
213  |             }
214  |             return yajl_status_ok;
215  |         case yajl_state_lexical_error:
216  |         case yajl_state_parse_error:
217  |             return yajl_status_error;
218  |         case yajl_state_start:
219  |         case yajl_state_got_value:
220  |         case yajl_state_map_need_val:
221  |         case yajl_state_array_need_val:
222  |         case yajl_state_array_start:  {
223  |             /* for arrays and maps, we advance the state for this
224  |              * depth, then push the state of the next depth.
225  |              * If an error occurs during the parsing of the nesting
226  |              * enitity, the state at this level will not matter.
227  |              * a state that needs pushing will be anything other
228  |              * than state_start */
229  | 
230  |             yajl_state stateToPush = yajl_state_start;
231  | 
232  |             tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
233  |                                offset, &buf, &bufLen);
234  | 
235  |             switch (tok) {
236  |                 case yajl_tok_eof:
237  |                     return yajl_status_ok;
238  |                 case yajl_tok_error:
239  |                     yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
240  |                     goto around_again;
241  |                 case yajl_tok_string:
242  |                     if (hand->callbacks && hand->callbacks->yajl_string) {
243  |                         _CC_CHK(hand->callbacks->yajl_string(hand->ctx,
244  |                                                              buf, bufLen));
245  |                     }
246  |                     break;
247  |                 case yajl_tok_string_with_escapes:
248  |                     if (hand->callbacks && hand->callbacks->yajl_string) {
249  |                         yajl_buf_clear(hand->decodeBuf);
250  |                         yajl_string_decode(hand->decodeBuf, buf, bufLen);
251  |                         _CC_CHK(hand->callbacks->yajl_string(
252  |                                     hand->ctx, yajl_buf_data(hand->decodeBuf),
253  |                                     yajl_buf_len(hand->decodeBuf)));
254  |                     }
255  |                     break;
256  |                 case yajl_tok_bool:
257  |                     if (hand->callbacks && hand->callbacks->yajl_boolean) {
258  |                         _CC_CHK(hand->callbacks->yajl_boolean(hand->ctx,
259  |                                                               *buf == 't'));
260  |                     }
261  |                     break;
262  |                 case yajl_tok_null:
263  |                     if (hand->callbacks && hand->callbacks->yajl_null) {
264  |                         _CC_CHK(hand->callbacks->yajl_null(hand->ctx));
265  |                     }
266  |                     break;
267  |                 case yajl_tok_left_bracket:
268  |                     if (hand->callbacks && hand->callbacks->yajl_start_map) {
269  |                         _CC_CHK(hand->callbacks->yajl_start_map(hand->ctx));
270  |                     }
271  |                     stateToPush = yajl_state_map_start;
272  |                     break;
273  |                 case yajl_tok_left_brace:
274  |                     if (hand->callbacks && hand->callbacks->yajl_start_array) {
275  |                         _CC_CHK(hand->callbacks->yajl_start_array(hand->ctx));
276  |                     }
277  |                     stateToPush = yajl_state_array_start;
278  |                     break;
279  |                 case yajl_tok_integer:
280  |                     if (hand->callbacks) {
281  |                         if (hand->callbacks->yajl_number) {
282  |                             _CC_CHK(hand->callbacks->yajl_number(
283  |                                         hand->ctx,(const char *) buf, bufLen));
284  |                         } else if (hand->callbacks->yajl_integer) {
285  |                             long long int i = 0;
286  |                             errno = 0;
287  |                             i = yajl_parse_integer(buf, bufLen);
288  |                             if ((i == LLONG_MIN || i == LLONG_MAX) &&
289  |                                 errno == ERANGE)
290  |                             {
291  |                                 yajl_bs_set(hand->stateStack,
292  |                                             yajl_state_parse_error);
293  |                                 hand->parseError = "integer overflow" ;
294  |                                 /* try to restore error offset */
295  |                                 if (*offset >= bufLen) *offset -= bufLen;
296  |                                 else *offset = 0;
297  |                                 goto around_again;
298  |                             }
299  |                             _CC_CHK(hand->callbacks->yajl_integer(hand->ctx,
300  |                                                                   i));
301  |                         }
302  |                     }
303  |                     break;
304  |                 case yajl_tok_double:
305  |                     if (hand->callbacks) {
306  |                         if (hand->callbacks->yajl_number) {
307  |                             _CC_CHK(hand->callbacks->yajl_number(
308  |                                         hand->ctx, (const char *) buf, bufLen));
309  |                         } else if (hand->callbacks->yajl_double) {
310  |                             double d = 0.0;
311  |                             yajl_buf_clear(hand->decodeBuf);
312  |                             yajl_buf_append(hand->decodeBuf, buf, bufLen);
313  |                             buf = yajl_buf_data(hand->decodeBuf);
314  |                             errno = 0;
315  |                             d = strtod((const char *) buf, NULL);
316  |                             if ((d == HUGE_VAL || d == -HUGE_VAL) &&
317  |                                 errno == ERANGE)
318  |                             {
319  |                                 yajl_bs_set(hand->stateStack,
320  |                                             yajl_state_parse_error);
321  |                                 hand->parseError = "numeric (floating point) "
322  |                                     "overflow";
323  |                                 /* try to restore error offset */
324  |                                 if (*offset >= bufLen) *offset -= bufLen;
325  |                                 else *offset = 0;
326  |                                 goto around_again;
327  |                             }
328  |                             _CC_CHK(hand->callbacks->yajl_double(hand->ctx,
329  |                                                                  d));
330  |                         }
331  |                     }
332  |                     break;
333  |                 case yajl_tok_right_brace:
334  |                     if (yajl_bs_current(hand->stateStack) ==
335  |                         yajl_state_array_start)
336  |                     {
337  |                         if (hand->callbacks &&
338  |                             hand->callbacks->yajl_end_array)
339  |                         {
340  |                             _CC_CHK(hand->callbacks->yajl_end_array(hand->ctx));
341  |                         }
342  |                         yajl_bs_pop(hand->stateStack);
343  |                         goto around_again;
344  |                     }
345  |                     /* FALLTHROUGH */
346  |                 case yajl_tok_colon:
347  |                 case yajl_tok_comma:
348  |                 case yajl_tok_right_bracket:
349  |                     yajl_bs_set(hand->stateStack, yajl_state_parse_error);
350  |                     hand->parseError =
351  |                         "unallowed token at this point in JSON text";
352  |                     goto around_again;
353  |                 default:
354  |                     yajl_bs_set(hand->stateStack, yajl_state_parse_error);
355  |                     hand->parseError = "invalid token, internal error";
356  |                     goto around_again;
357  |             }
358  |             /* got a value.  transition depends on the state we're in. */
359  |             {
360  |                 yajl_state s = (yajl_state) yajl_bs_current(hand->stateStack);
361  |                 if (s == yajl_state_start || s == yajl_state_got_value) {
362  |                     yajl_bs_set(hand->stateStack, yajl_state_parse_complete);
363  |                 } else if (s == yajl_state_map_need_val) {
364  |                     yajl_bs_set(hand->stateStack, yajl_state_map_got_val);
365  |                 } else {
366  |                     yajl_bs_set(hand->stateStack, yajl_state_array_got_val);
367  |                 }
368  |             }
369  |             if (stateToPush != yajl_state_start) {
370  |                 yajl_bs_push(hand->stateStack, stateToPush);
371  |             }
372  | 
373  |             goto around_again;
374  |         }
375  |         case yajl_state_map_start:
376  |         case yajl_state_map_need_key: {
377  |             /* only difference between these two states is that in
378  |              * start '}' is valid, whereas in need_key, we've parsed
379  |              * a comma, and a string key _must_ follow */
380  |             tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
381  |                                offset, &buf, &bufLen);
382  |             switch (tok) {
383  |                 case yajl_tok_eof:
384  |                     return yajl_status_ok;
385  |                 case yajl_tok_error:
386  |                     yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
387  |                     goto around_again;
388  |                 case yajl_tok_string_with_escapes:
389  |                     if (hand->callbacks && hand->callbacks->yajl_map_key) {
390  |                         yajl_buf_clear(hand->decodeBuf);
391  |                         yajl_string_decode(hand->decodeBuf, buf, bufLen);
392  |                         buf = yajl_buf_data(hand->decodeBuf);
393  |                         bufLen = yajl_buf_len(hand->decodeBuf);
394  |                     }
395  |                     /* FALLTHROUGH */
396  |                 case yajl_tok_string:
397  |                     if (hand->callbacks && hand->callbacks->yajl_map_key) {
398  |                         _CC_CHK(hand->callbacks->yajl_map_key(hand->ctx, buf,
399  |                                                               bufLen));
400  |                     }
401  |                     yajl_bs_set(hand->stateStack, yajl_state_map_sep);
402  |                     goto around_again;
403  |                 case yajl_tok_right_bracket:
404  |                     if (yajl_bs_current(hand->stateStack) ==
405  |                         yajl_state_map_start)
406  |                     {
407  |                         if (hand->callbacks && hand->callbacks->yajl_end_map) {
408  |                             _CC_CHK(hand->callbacks->yajl_end_map(hand->ctx));
409  |                         }
410  |                         yajl_bs_pop(hand->stateStack);
411  |                         goto around_again;
412  |                     }
413  |                     /* FALLTHROUGH */
414  |                 default:
415  |                     yajl_bs_set(hand->stateStack, yajl_state_parse_error);
416  |                     hand->parseError =
417  |                         "invalid object key (must be a string)"; 
418  |                     goto around_again;
419  |             }
420  |         }
421  |         case yajl_state_map_sep: {
422  |             tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
423  |                                offset, &buf, &bufLen);
424  |             switch (tok) {
425  |                 case yajl_tok_colon:
426  |                     yajl_bs_set(hand->stateStack, yajl_state_map_need_val);
427  |                     goto around_again;
428  |                 case yajl_tok_eof:
429  |                     return yajl_status_ok;
430  |                 case yajl_tok_error:
431  |                     yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
432  |                     goto around_again;
433  |                 default:
434  |                     yajl_bs_set(hand->stateStack, yajl_state_parse_error);
435  |                     hand->parseError = "object key and value must "
436  |                         "be separated by a colon (':')";
437  |                     goto around_again;
438  |             }
439  |         }
440  |         case yajl_state_map_got_val: {
441  |             tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
442  |                                offset, &buf, &bufLen);
443  |             switch (tok) {
444  |                 case yajl_tok_right_bracket:
445  |                     if (hand->callbacks && hand->callbacks->yajl_end_map) {
446  |                         _CC_CHK(hand->callbacks->yajl_end_map(hand->ctx));
447  |                     }
448  |                     yajl_bs_pop(hand->stateStack);
449  |                     goto around_again;
450  |                 case yajl_tok_comma:
451  |                     yajl_bs_set(hand->stateStack, yajl_state_map_need_key);
452  |                     goto around_again;
453  |                 case yajl_tok_eof:
454  |                     return yajl_status_ok;
455  |                 case yajl_tok_error:
456  |                     yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
457  |                     goto around_again;
458  |                 default:
459  |                     yajl_bs_set(hand->stateStack, yajl_state_parse_error);
460  |                     hand->parseError = "after key and value, inside map, "
461  |                                        "I expect ',' or '}'";
462  |                     /* try to restore error offset */
463  |                     if (*offset >= bufLen) *offset -= bufLen;
464  |                     else *offset = 0;
465  |                     goto around_again;
466  |             }
467  |         }
468  |         case yajl_state_array_got_val: {
469  |             tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
470  |                                offset, &buf, &bufLen);
471  |             switch (tok) {
472  |                 case yajl_tok_right_brace:
473  |                     if (hand->callbacks && hand->callbacks->yajl_end_array) {
474  |                         _CC_CHK(hand->callbacks->yajl_end_array(hand->ctx));
475  |                     }
476  |                     yajl_bs_pop(hand->stateStack);
477  |                     goto around_again;
478  |                 case yajl_tok_comma:
479  |                     yajl_bs_set(hand->stateStack, yajl_state_array_need_val);
480  |                     goto around_again;
481  |                 case yajl_tok_eof:
482  |                     return yajl_status_ok;
483  |                 case yajl_tok_error:
484  |                     yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
485  |                     goto around_again;
486  |                 default:
487  |                     yajl_bs_set(hand->stateStack, yajl_state_parse_error);
488  |                     hand->parseError =
489  |                         "after array element, I expect ',' or ']'";
490  |                     goto around_again;
491  |             }
492  |         }
493  |     }
494  | 
495  |     abort();
496  |     /* NOTREACHED */
497  |     return yajl_status_error;
498  | }
499  |