1 | /* 2 | * Copyright (c) 2007-2014, Lloyd Hilaiel <me@lloyd.io> 3 | * 4 | * Permission to use, copy, modify, and/or distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | */ 16 | 17 | #include "yajl/yajl_parse.h" 18 | #include "yajl_lex.h" 19 | #include "yajl_parser.h" 20 | #include "yajl_encode.h" 21 | #include "yajl_bytestack.h" 22 | 23 | #include <stdlib.h> 24 | #include <limits.h> 25 | #include <errno.h> 26 | #include <stdio.h> 27 | #include <string.h> 28 | #include <ctype.h> 29 | #include <assert.h> 30 | #include <math.h> 31 | 32 | #define MAX_VALUE_TO_MULTIPLY ((LLONG_MAX / 10) + (LLONG_MAX % 10)) 33 | 34 | /* same semantics as strtol */ 35 | long long 36 | yajl_parse_integer(const unsigned char *number, size_t length) 37 | { 38 | long long ret = 0; 39 | long sign = 1; 40 | const unsigned char *pos = number; 41 | if (*pos == '-') { pos++; sign = -1; } 42 | if (*pos == '+') { pos++; } 43 | 44 | while (pos < number + length) { 45 | if ( ret > MAX_VALUE_TO_MULTIPLY ) { 46 | errno = ERANGE; 47 | return sign == 1 ? LLONG_MAX : LLONG_MIN; 48 | } 49 | ret *= 10; 50 | if (LLONG_MAX - ret < (*pos - '0')) { 51 | errno = ERANGE; 52 | return sign == 1 ? LLONG_MAX : LLONG_MIN; 53 | } 54 | if (*pos < '0' || *pos > '9') { 55 | errno = ERANGE; 56 | return sign == 1 ? LLONG_MAX : LLONG_MIN; 57 | } 58 | ret += (*pos++ - '0'); 59 | } 60 | 61 | return sign * ret; 62 | } 63 | 64 | unsigned char * 65 | yajl_render_error_string(yajl_handle hand, const unsigned char * jsonText, 66 | size_t jsonTextLen, int verbose) 67 | { 68 | size_t offset = hand->bytesConsumed; 69 | unsigned char * str; 70 | const char * errorType = NULL; 71 | const char * errorText = NULL; 72 | unsigned char text[72]; 73 | const char * arrow = " (right here) ------^\n"; 74 | 75 | if (yajl_bs_current(hand->stateStack) == yajl_state_parse_error) { 76 | errorType = "parse"; 77 | errorText = hand->parseError; 78 | } else if (yajl_bs_current(hand->stateStack) == yajl_state_lexical_error) { 79 | errorType = "lexical"; 80 | errorText = yajl_lex_error_to_string(yajl_lex_get_error(hand->lexer)); 81 | } else { 82 | errorType = "unknown"; 83 | } 84 | 85 | { 86 | size_t memneeded = 0; 87 | memneeded += strlen(errorType); 88 | memneeded += strlen(" error"); 89 | if (errorText != NULL) { 90 | memneeded += strlen(": "); 91 | memneeded += strlen(errorText); 92 | } 93 | str = (unsigned char *) YA_MALLOC(&(hand->alloc), memneeded + 2); 94 | if (!str) return NULL; 95 | str[0] = 0; 96 | strcat((char *) str, errorType); 97 | strcat((char *) str, " error"); 98 | if (errorText != NULL) { 99 | strcat((char *) str, ": "); 100 | strcat((char *) str, errorText); 101 | } 102 | strcat((char *) str, "\n"); 103 | } 104 | 105 | /* now, if verbose was specified, we append as many spaces as needed to make 106 | * sure the error falls at char 40 */ 107 | if (verbose) { 108 | size_t start, end, i; 109 | size_t spacesNeeded; 110 | 111 | /* xxx this doesn't seem to be working right.... */ 112 | spacesNeeded = (offset < 30 ? 40 - offset : 10); 113 | start = (offset >= 30 ? offset - 30 : 0); 114 | end = (offset + 30 > jsonTextLen ? jsonTextLen : offset + 30); 115 | 116 | for (i=0; i < spacesNeeded; i++) { 117 | text[i] = ' '; 118 | } 119 | 120 | for (; start < end; start++, i++) { 121 | if (jsonText[start] != '\n' && jsonText[start] != '\r') { 122 | text[i] = jsonText[start]; 123 | } else { 124 | text[i] = ' '; 125 | } 126 | } 127 | assert(i <= 71); 128 | text[i++] = '\n'; 129 | text[i] = 0; 130 | { 131 | char * newStr = (char *) 132 | YA_MALLOC(&(hand->alloc), (size_t)(strlen((char *) str) + 133 | strlen((char *) text) + 134 | strlen(arrow) + 1)); 135 | if (newStr) { 136 | newStr[0] = 0; 137 | strcat((char *) newStr, (char *) str); 138 | strcat((char *) newStr, (char *) text); 139 | strcat((char *) newStr, arrow); 140 | } 141 | YA_FREE(&(hand->alloc), str); 142 | str = (unsigned char *) newStr; 143 | } 144 | } 145 | return str; 146 | } 147 | 148 | /* check for client cancelation */ 149 | #define _CC_CHK(x) \ 150 | if (!(x)) { \ 151 | yajl_bs_set(hand->stateStack, yajl_state_parse_error); \ 152 | hand->parseError = \ 153 | "client cancelled parse via callback return value"; \ 154 | return yajl_status_client_canceled; \ 155 | } 156 | 157 | 158 | yajl_status 159 | yajl_do_finish(yajl_handle hand) 160 | { 161 | yajl_status stat; 162 | stat = yajl_do_parse(hand,(const unsigned char *) " ",(size_t) 1); 163 | 164 | if (stat != yajl_status_ok) return stat; 165 | 166 | switch(yajl_bs_current(hand->stateStack)) 167 | { 168 | case yajl_state_parse_error: 169 | case yajl_state_lexical_error: 170 | return yajl_status_error; 171 | case yajl_state_got_value: 172 | case yajl_state_parse_complete: 173 | return yajl_status_ok; 174 | default: 175 | if (!(hand->flags & yajl_allow_partial_values)) 176 | { 177 | yajl_bs_set(hand->stateStack, yajl_state_parse_error); 178 | hand->parseError = "premature EOF"; 179 | return yajl_status_error; 180 | } 181 | return yajl_status_ok; 182 | } 183 | } 184 | 185 | yajl_status 186 | yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, 187 | size_t jsonTextLen) 188 | { 189 | yajl_tok tok; 190 | const unsigned char * buf; 191 | size_t bufLen; 192 | size_t * offset = &(hand->bytesConsumed); 193 | 194 | *offset = 0; 195 | 196 | around_again: 197 | switch (yajl_bs_current(hand->stateStack)) { 198 | case yajl_state_parse_complete: 199 | if (hand->flags & yajl_allow_multiple_values) { 200 | yajl_bs_set(hand->stateStack, yajl_state_got_value); 201 | goto around_again; 202 | } 203 | if (!(hand->flags & yajl_allow_trailing_garbage)) { 204 | if (*offset != jsonTextLen) { 205 | tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen, 206 | offset, &buf, &bufLen); 207 | if (tok != yajl_tok_eof) { 208 | yajl_bs_set(hand->stateStack, yajl_state_parse_error); 209 | hand->parseError = "trailing garbage"; 210 | } 211 | goto around_again; 212 | } 213 | } 214 | return yajl_status_ok; 215 | case yajl_state_lexical_error: 216 | case yajl_state_parse_error: 217 | return yajl_status_error; 218 | case yajl_state_start: 219 | case yajl_state_got_value: 220 | case yajl_state_map_need_val: 221 | case yajl_state_array_need_val: 222 | case yajl_state_array_start: { 223 | /* for arrays and maps, we advance the state for this 224 | * depth, then push the state of the next depth. 225 | * If an error occurs during the parsing of the nesting 226 | * enitity, the state at this level will not matter. 227 | * a state that needs pushing will be anything other 228 | * than state_start */ 229 | 230 | yajl_state stateToPush = yajl_state_start; 231 | 232 | tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen, 233 | offset, &buf, &bufLen); 234 | 235 | switch (tok) { 236 | case yajl_tok_eof: 237 | return yajl_status_ok; 238 | case yajl_tok_error: 239 | yajl_bs_set(hand->stateStack, yajl_state_lexical_error); 240 | goto around_again; 241 | case yajl_tok_string: 242 | if (hand->callbacks && hand->callbacks->yajl_string) { 243 | _CC_CHK(hand->callbacks->yajl_string(hand->ctx, 244 | buf, bufLen)); 245 | } 246 | break; 247 | case yajl_tok_string_with_escapes: 248 | if (hand->callbacks && hand->callbacks->yajl_string) { 249 | yajl_buf_clear(hand->decodeBuf); 250 | yajl_string_decode(hand->decodeBuf, buf, bufLen); 251 | _CC_CHK(hand->callbacks->yajl_string( 252 | hand->ctx, yajl_buf_data(hand->decodeBuf), 253 | yajl_buf_len(hand->decodeBuf))); 254 | } 255 | break; 256 | case yajl_tok_bool: 257 | if (hand->callbacks && hand->callbacks->yajl_boolean) { 258 | _CC_CHK(hand->callbacks->yajl_boolean(hand->ctx, 259 | *buf == 't')); 260 | } 261 | break; 262 | case yajl_tok_null: 263 | if (hand->callbacks && hand->callbacks->yajl_null) { 264 | _CC_CHK(hand->callbacks->yajl_null(hand->ctx)); 265 | } 266 | break; 267 | case yajl_tok_left_bracket: 268 | if (hand->callbacks && hand->callbacks->yajl_start_map) { 269 | _CC_CHK(hand->callbacks->yajl_start_map(hand->ctx)); 270 | } 271 | stateToPush = yajl_state_map_start; 272 | break; 273 | case yajl_tok_left_brace: 274 | if (hand->callbacks && hand->callbacks->yajl_start_array) { 275 | _CC_CHK(hand->callbacks->yajl_start_array(hand->ctx)); 276 | } 277 | stateToPush = yajl_state_array_start; 278 | break; 279 | case yajl_tok_integer: 280 | if (hand->callbacks) { 281 | if (hand->callbacks->yajl_number) { 282 | _CC_CHK(hand->callbacks->yajl_number( 283 | hand->ctx,(const char *) buf, bufLen)); 284 | } else if (hand->callbacks->yajl_integer) { 285 | long long int i = 0; 286 | errno = 0; 287 | i = yajl_parse_integer(buf, bufLen); 288 | if ((i == LLONG_MIN || i == LLONG_MAX) && 289 | errno == ERANGE) 290 | { 291 | yajl_bs_set(hand->stateStack, 292 | yajl_state_parse_error); 293 | hand->parseError = "integer overflow" ; 294 | /* try to restore error offset */ 295 | if (*offset >= bufLen) *offset -= bufLen; 296 | else *offset = 0; 297 | goto around_again; 298 | } 299 | _CC_CHK(hand->callbacks->yajl_integer(hand->ctx, 300 | i)); 301 | } 302 | } 303 | break; 304 | case yajl_tok_double: 305 | if (hand->callbacks) { 306 | if (hand->callbacks->yajl_number) { 307 | _CC_CHK(hand->callbacks->yajl_number( 308 | hand->ctx, (const char *) buf, bufLen)); 309 | } else if (hand->callbacks->yajl_double) { 310 | double d = 0.0; 311 | yajl_buf_clear(hand->decodeBuf); 312 | yajl_buf_append(hand->decodeBuf, buf, bufLen); 313 | buf = yajl_buf_data(hand->decodeBuf); 314 | errno = 0; 315 | d = strtod((const char *) buf, NULL); 316 | if ((d == HUGE_VAL || d == -HUGE_VAL) && 317 | errno == ERANGE) 318 | { 319 | yajl_bs_set(hand->stateStack, 320 | yajl_state_parse_error); 321 | hand->parseError = "numeric (floating point) " 322 | "overflow"; 323 | /* try to restore error offset */ 324 | if (*offset >= bufLen) *offset -= bufLen; 325 | else *offset = 0; 326 | goto around_again; 327 | } 328 | _CC_CHK(hand->callbacks->yajl_double(hand->ctx, 329 | d)); 330 | } 331 | } 332 | break; 333 | case yajl_tok_right_brace: 334 | if (yajl_bs_current(hand->stateStack) == 335 | yajl_state_array_start) 336 | { 337 | if (hand->callbacks && 338 | hand->callbacks->yajl_end_array) 339 | { 340 | _CC_CHK(hand->callbacks->yajl_end_array(hand->ctx)); 341 | } 342 | yajl_bs_pop(hand->stateStack); 343 | goto around_again; 344 | } 345 | /* FALLTHROUGH */ 346 | case yajl_tok_colon: 347 | case yajl_tok_comma: 348 | case yajl_tok_right_bracket: 349 | yajl_bs_set(hand->stateStack, yajl_state_parse_error); 350 | hand->parseError = 351 | "unallowed token at this point in JSON text"; 352 | goto around_again; 353 | default: 354 | yajl_bs_set(hand->stateStack, yajl_state_parse_error); 355 | hand->parseError = "invalid token, internal error"; 356 | goto around_again; 357 | } 358 | /* got a value. transition depends on the state we're in. */ 359 | { 360 | yajl_state s = (yajl_state) yajl_bs_current(hand->stateStack); 361 | if (s == yajl_state_start || s == yajl_state_got_value) { 362 | yajl_bs_set(hand->stateStack, yajl_state_parse_complete); 363 | } else if (s == yajl_state_map_need_val) { 364 | yajl_bs_set(hand->stateStack, yajl_state_map_got_val); 365 | } else { 366 | yajl_bs_set(hand->stateStack, yajl_state_array_got_val); 367 | } 368 | } 369 | if (stateToPush != yajl_state_start) { 370 | yajl_bs_push(hand->stateStack, stateToPush); 371 | } 372 | 373 | goto around_again; 374 | } 375 | case yajl_state_map_start: 376 | case yajl_state_map_need_key: { 377 | /* only difference between these two states is that in 378 | * start '}' is valid, whereas in need_key, we've parsed 379 | * a comma, and a string key _must_ follow */ 380 | tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen, 381 | offset, &buf, &bufLen); 382 | switch (tok) { 383 | case yajl_tok_eof: 384 | return yajl_status_ok; 385 | case yajl_tok_error: 386 | yajl_bs_set(hand->stateStack, yajl_state_lexical_error); 387 | goto around_again; 388 | case yajl_tok_string_with_escapes: 389 | if (hand->callbacks && hand->callbacks->yajl_map_key) { 390 | yajl_buf_clear(hand->decodeBuf); 391 | yajl_string_decode(hand->decodeBuf, buf, bufLen); 392 | buf = yajl_buf_data(hand->decodeBuf); 393 | bufLen = yajl_buf_len(hand->decodeBuf); 394 | } 395 | /* FALLTHROUGH */ 396 | case yajl_tok_string: 397 | if (hand->callbacks && hand->callbacks->yajl_map_key) { 398 | _CC_CHK(hand->callbacks->yajl_map_key(hand->ctx, buf, 399 | bufLen)); 400 | } 401 | yajl_bs_set(hand->stateStack, yajl_state_map_sep); 402 | goto around_again; 403 | case yajl_tok_right_bracket: 404 | if (yajl_bs_current(hand->stateStack) == 405 | yajl_state_map_start) 406 | { 407 | if (hand->callbacks && hand->callbacks->yajl_end_map) { 408 | _CC_CHK(hand->callbacks->yajl_end_map(hand->ctx)); 409 | } 410 | yajl_bs_pop(hand->stateStack); 411 | goto around_again; 412 | } 413 | /* FALLTHROUGH */ 414 | default: 415 | yajl_bs_set(hand->stateStack, yajl_state_parse_error); 416 | hand->parseError = 417 | "invalid object key (must be a string)"; 418 | goto around_again; 419 | } 420 | } 421 | case yajl_state_map_sep: { 422 | tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen, 423 | offset, &buf, &bufLen); 424 | switch (tok) { 425 | case yajl_tok_colon: 426 | yajl_bs_set(hand->stateStack, yajl_state_map_need_val); 427 | goto around_again; 428 | case yajl_tok_eof: 429 | return yajl_status_ok; 430 | case yajl_tok_error: 431 | yajl_bs_set(hand->stateStack, yajl_state_lexical_error); 432 | goto around_again; 433 | default: 434 | yajl_bs_set(hand->stateStack, yajl_state_parse_error); 435 | hand->parseError = "object key and value must " 436 | "be separated by a colon (':')"; 437 | goto around_again; 438 | } 439 | } 440 | case yajl_state_map_got_val: { 441 | tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen, 442 | offset, &buf, &bufLen); 443 | switch (tok) { 444 | case yajl_tok_right_bracket: 445 | if (hand->callbacks && hand->callbacks->yajl_end_map) { 446 | _CC_CHK(hand->callbacks->yajl_end_map(hand->ctx)); 447 | } 448 | yajl_bs_pop(hand->stateStack); 449 | goto around_again; 450 | case yajl_tok_comma: 451 | yajl_bs_set(hand->stateStack, yajl_state_map_need_key); 452 | goto around_again; 453 | case yajl_tok_eof: 454 | return yajl_status_ok; 455 | case yajl_tok_error: 456 | yajl_bs_set(hand->stateStack, yajl_state_lexical_error); 457 | goto around_again; 458 | default: 459 | yajl_bs_set(hand->stateStack, yajl_state_parse_error); 460 | hand->parseError = "after key and value, inside map, " 461 | "I expect ',' or '}'"; 462 | /* try to restore error offset */ 463 | if (*offset >= bufLen) *offset -= bufLen; 464 | else *offset = 0; 465 | goto around_again; 466 | } 467 | } 468 | case yajl_state_array_got_val: { 469 | tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen, 470 | offset, &buf, &bufLen); 471 | switch (tok) { 472 | case yajl_tok_right_brace: 473 | if (hand->callbacks && hand->callbacks->yajl_end_array) { 474 | _CC_CHK(hand->callbacks->yajl_end_array(hand->ctx)); 475 | } 476 | yajl_bs_pop(hand->stateStack); 477 | goto around_again; 478 | case yajl_tok_comma: 479 | yajl_bs_set(hand->stateStack, yajl_state_array_need_val); 480 | goto around_again; 481 | case yajl_tok_eof: 482 | return yajl_status_ok; 483 | case yajl_tok_error: 484 | yajl_bs_set(hand->stateStack, yajl_state_lexical_error); 485 | goto around_again; 486 | default: 487 | yajl_bs_set(hand->stateStack, yajl_state_parse_error); 488 | hand->parseError = 489 | "after array element, I expect ',' or ']'"; 490 | goto around_again; 491 | } 492 | } 493 | } 494 | 495 | abort(); 496 | /* NOTREACHED */ 497 | return yajl_status_error; 498 | } 499 |