Files
epics-base/modules/libcom/src/yajl/yajl_parser.c
Andrew Johnson e2256d0663 Accept unquoted identifiers as map keys
Adds another lexer entry point for lexing map keys only,
adjust parser to use this instead of the general lexer.
Also defines another lexer token for internal use only.
2020-08-09 00:33:07 -05:00

525 lines
21 KiB
C

/*
* Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <stdlib.h>
#include <limits.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>
#include <math.h>
#include "yajl_parse.h"
#include "yajl_lex.h"
#include "yajl_parser.h"
#include "yajl_encode.h"
#include "yajl_bytestack.h"
#ifndef LLONG_MAX
#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL
#define LLONG_MIN (-0x7FFFFFFFFFFFFFFFLL - 1)
#endif
long long
yajl_parse_integer(const unsigned char *number, size_t length)
{
long long ret = 0;
long sign = 1;
long base = 10;
long long max = LLONG_MAX / base;
const unsigned char *pos = number;
const unsigned char *end = number + length;
if (*pos == '-') {
pos++;
sign = -1;
}
else if (*pos == '+') {
pos++;
}
if (*pos == '0' &&
(pos[1] == 'x' || pos[1] == 'X')) {
base = 16;
max = LLONG_MAX / base;
pos += 2;
}
while (pos < end) {
int digit;
if (ret > max) {
errno = ERANGE;
return sign == 1 ? LLONG_MAX : LLONG_MIN;
}
ret *= base;
digit = *pos++ - '0';
/* Don't have to check for non-digit characters,
* the lexer has already rejected any bad digits.
*/
if (digit > 9)
digit = (digit - ('A' - '0') + 10) & 0xf;
if (LLONG_MAX - ret < digit) {
errno = ERANGE;
return sign == 1 ? LLONG_MAX : LLONG_MIN;
}
ret += digit;
}
return sign * ret;
}
unsigned char *
yajl_render_error_string(yajl_handle hand, const unsigned char * jsonText,
size_t jsonTextLen, int verbose)
{
size_t offset = hand->bytesConsumed;
unsigned char * str;
const char * errorType = NULL;
const char * errorText = NULL;
char text[72];
const char * arrow = " (right here) ------^\n";
if (yajl_bs_current(hand->stateStack) == yajl_state_parse_error) {
errorType = "parse";
errorText = hand->parseError;
} else if (yajl_bs_current(hand->stateStack) == yajl_state_lexical_error) {
errorType = "lexical";
errorText = yajl_lex_error_to_string(yajl_lex_get_error(hand->lexer));
} else {
errorType = "unknown";
}
{
size_t memneeded = 0;
memneeded += strlen(errorType);
memneeded += strlen(" error");
if (errorText != NULL) {
memneeded += strlen(": ");
memneeded += strlen(errorText);
}
str = (unsigned char *) YA_MALLOC(&(hand->alloc), memneeded + 2);
if (!str) return NULL;
str[0] = 0;
strcat((char *) str, errorType);
strcat((char *) str, " error");
if (errorText != NULL) {
strcat((char *) str, ": ");
strcat((char *) str, errorText);
}
strcat((char *) str, "\n");
}
/* now we append as many spaces as needed to make sure the error
* falls at char 41, if verbose was specified */
if (verbose) {
size_t start, end, i;
size_t spacesNeeded;
spacesNeeded = (offset < 30 ? 40 - offset : 10);
start = (offset >= 30 ? offset - 30 : 0);
end = (offset + 30 > jsonTextLen ? jsonTextLen : offset + 30);
for (i=0;i<spacesNeeded;i++) text[i] = ' ';
for (;start < end;start++, i++) {
if (jsonText[start] != '\n' && jsonText[start] != '\r')
{
text[i] = jsonText[start];
}
else
{
text[i] = ' ';
}
}
assert(i <= 71);
text[i++] = '\n';
text[i] = 0;
{
char * newStr = (char *)
YA_MALLOC(&(hand->alloc), (unsigned int)(strlen((char *) str) +
strlen((char *) text) +
strlen(arrow) + 1));
if (newStr) {
newStr[0] = 0;
strcat((char *) newStr, (char *) str);
strcat((char *) newStr, text);
strcat((char *) newStr, arrow);
}
YA_FREE(&(hand->alloc), str);
str = (unsigned char *) newStr;
}
}
return str;
}
/* check for client cancelation */
#define _CC_CHK(x) \
if (!(x)) { \
yajl_bs_set(hand->stateStack, yajl_state_parse_error); \
hand->parseError = \
"client cancelled parse via callback return value"; \
return yajl_status_client_canceled; \
}
yajl_status
yajl_do_finish(yajl_handle hand)
{
yajl_status stat;
stat = yajl_do_parse(hand,(const unsigned char *) " ",1);
if (stat != yajl_status_ok) return stat;
switch(yajl_bs_current(hand->stateStack))
{
case yajl_state_parse_error:
case yajl_state_lexical_error:
return yajl_status_error;
case yajl_state_got_value:
case yajl_state_parse_complete:
return yajl_status_ok;
default:
if (!(hand->flags & yajl_allow_partial_values))
{
yajl_bs_set(hand->stateStack, yajl_state_parse_error);
hand->parseError = "premature EOF";
return yajl_status_error;
}
return yajl_status_ok;
}
}
yajl_status
yajl_do_parse(yajl_handle hand, const unsigned char * jsonText,
size_t jsonTextLen)
{
yajl_tok tok;
const unsigned char * buf;
size_t bufLen;
size_t * offset = &(hand->bytesConsumed);
*offset = 0;
around_again:
switch (yajl_bs_current(hand->stateStack)) {
case yajl_state_parse_complete:
if (hand->flags & yajl_allow_multiple_values) {
yajl_bs_set(hand->stateStack, yajl_state_got_value);
goto around_again;
}
if (!(hand->flags & yajl_allow_trailing_garbage)) {
if (*offset != jsonTextLen) {
tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
offset, &buf, &bufLen);
if (tok != yajl_tok_eof) {
yajl_bs_set(hand->stateStack, yajl_state_parse_error);
hand->parseError = "trailing garbage";
}
goto around_again;
}
}
return yajl_status_ok;
case yajl_state_lexical_error:
case yajl_state_parse_error:
return yajl_status_error;
case yajl_state_start:
case yajl_state_got_value:
case yajl_state_map_need_val:
case yajl_state_array_need_val:
case yajl_state_array_start: {
/* for arrays and maps, we advance the state for this
* depth, then push the state of the next depth.
* If an error occurs during the parsing of the nesting
* enitity, the state at this level will not matter.
* a state that needs pushing will be anything other
* than state_start */
yajl_state stateToPush = yajl_state_start;
tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
offset, &buf, &bufLen);
switch (tok) {
case yajl_tok_eof:
return yajl_status_ok;
case yajl_tok_error:
yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
goto around_again;
case yajl_tok_string:
if (hand->callbacks && hand->callbacks->yajl_string) {
_CC_CHK(hand->callbacks->yajl_string(hand->ctx,
buf, bufLen));
}
break;
case yajl_tok_string_with_escapes:
if (hand->callbacks && hand->callbacks->yajl_string) {
yajl_buf_clear(hand->decodeBuf);
yajl_string_decode(hand->decodeBuf, buf, bufLen);
_CC_CHK(hand->callbacks->yajl_string(
hand->ctx, yajl_buf_data(hand->decodeBuf),
yajl_buf_len(hand->decodeBuf)));
}
break;
case yajl_tok_bool:
if (hand->callbacks && hand->callbacks->yajl_boolean) {
_CC_CHK(hand->callbacks->yajl_boolean(hand->ctx,
*buf == 't'));
}
break;
case yajl_tok_null:
if (hand->callbacks && hand->callbacks->yajl_null) {
_CC_CHK(hand->callbacks->yajl_null(hand->ctx));
}
break;
case yajl_tok_left_brace:
if (hand->callbacks && hand->callbacks->yajl_start_map) {
_CC_CHK(hand->callbacks->yajl_start_map(hand->ctx));
}
stateToPush = yajl_state_map_start;
break;
case yajl_tok_left_bracket:
if (hand->callbacks && hand->callbacks->yajl_start_array) {
_CC_CHK(hand->callbacks->yajl_start_array(hand->ctx));
}
stateToPush = yajl_state_array_start;
break;
case yajl_tok_integer:
if (hand->callbacks) {
if (hand->callbacks->yajl_number) {
_CC_CHK(hand->callbacks->yajl_number(
hand->ctx,(const char *) buf, bufLen));
} else if (hand->callbacks->yajl_integer) {
long long int i = 0;
i = yajl_parse_integer(buf, bufLen);
if ((i == LLONG_MIN || i == LLONG_MAX) &&
errno == ERANGE)
{
yajl_bs_set(hand->stateStack,
yajl_state_parse_error);
hand->parseError = "integer overflow" ;
/* try to restore error offset */
if (*offset >= bufLen) *offset -= bufLen;
else *offset = 0;
goto around_again;
}
_CC_CHK(hand->callbacks->yajl_integer(hand->ctx,
i));
}
}
break;
case yajl_tok_double:
if (hand->callbacks) {
if (hand->callbacks->yajl_number) {
_CC_CHK(hand->callbacks->yajl_number(
hand->ctx, (const char *) buf, bufLen));
} else if (hand->callbacks->yajl_double) {
double d = 0.0;
yajl_buf_clear(hand->decodeBuf);
yajl_buf_append(hand->decodeBuf, buf, bufLen);
buf = yajl_buf_data(hand->decodeBuf);
d = strtod((char *) buf, NULL);
if ((d == HUGE_VAL || d == -HUGE_VAL) &&
errno == ERANGE)
{
yajl_bs_set(hand->stateStack,
yajl_state_parse_error);
hand->parseError = "numeric (floating point) "
"overflow";
/* try to restore error offset */
if (*offset >= bufLen) *offset -= bufLen;
else *offset = 0;
goto around_again;
}
_CC_CHK(hand->callbacks->yajl_double(hand->ctx,
d));
}
}
break;
case yajl_tok_right_bracket: {
yajl_state s = yajl_bs_current(hand->stateStack);
if (s == yajl_state_array_start ||
((hand->flags & yajl_allow_json5) &&
(s == yajl_state_array_need_val)))
{
if (hand->callbacks &&
hand->callbacks->yajl_end_array)
{
_CC_CHK(hand->callbacks->yajl_end_array(hand->ctx));
}
yajl_bs_pop(hand->stateStack);
goto around_again;
}
/* intentional fall-through */
}
case yajl_tok_colon:
case yajl_tok_comma:
case yajl_tok_right_brace:
yajl_bs_set(hand->stateStack, yajl_state_parse_error);
hand->parseError =
"unallowed token at this point in JSON text";
goto around_again;
default:
yajl_bs_set(hand->stateStack, yajl_state_parse_error);
hand->parseError = "invalid token, internal error";
goto around_again;
}
/* got a value. transition depends on the state we're in. */
{
yajl_state s = yajl_bs_current(hand->stateStack);
if (s == yajl_state_start || s == yajl_state_got_value) {
yajl_bs_set(hand->stateStack, yajl_state_parse_complete);
} else if (s == yajl_state_map_need_val) {
yajl_bs_set(hand->stateStack, yajl_state_map_got_val);
} else {
yajl_bs_set(hand->stateStack, yajl_state_array_got_val);
}
}
if (stateToPush != yajl_state_start) {
yajl_bs_push(hand->stateStack, stateToPush);
}
goto around_again;
}
case yajl_state_map_start:
case yajl_state_map_need_key: {
/* only difference between these two states is that in
* start '}' is valid, whereas in need_key, we've parsed
* a comma, so unless this is JSON5 a key _must_ follow. */
tok = yajl_lex_key(hand->lexer, jsonText, jsonTextLen,
offset, &buf, &bufLen);
switch (tok) {
case yajl_tok_eof:
return yajl_status_ok;
case yajl_tok_error:
yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
goto around_again;
case yajl_tok_string_with_escapes:
if (hand->callbacks && hand->callbacks->yajl_map_key) {
yajl_buf_clear(hand->decodeBuf);
yajl_string_decode(hand->decodeBuf, buf, bufLen);
buf = yajl_buf_data(hand->decodeBuf);
bufLen = yajl_buf_len(hand->decodeBuf);
}
/* intentional fall-through */
case yajl_tok_string:
if (hand->callbacks && hand->callbacks->yajl_map_key) {
_CC_CHK(hand->callbacks->yajl_map_key(hand->ctx, buf,
bufLen));
}
yajl_bs_set(hand->stateStack, yajl_state_map_sep);
goto around_again;
case yajl_tok_right_brace: {
yajl_state s = yajl_bs_current(hand->stateStack);
if (s == yajl_state_map_start ||
((hand->flags & yajl_allow_json5) &&
(s == yajl_state_map_need_key))) {
if (hand->callbacks && hand->callbacks->yajl_end_map) {
_CC_CHK(hand->callbacks->yajl_end_map(hand->ctx));
}
yajl_bs_pop(hand->stateStack);
goto around_again;
}
}
default:
yajl_bs_set(hand->stateStack, yajl_state_parse_error);
hand->parseError = hand->flags & yajl_allow_json5 ?
"invalid object key (must be a string or identifier)" :
"invalid object key (must be a string)";
goto around_again;
}
}
case yajl_state_map_sep: {
tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
offset, &buf, &bufLen);
switch (tok) {
case yajl_tok_colon:
yajl_bs_set(hand->stateStack, yajl_state_map_need_val);
goto around_again;
case yajl_tok_eof:
return yajl_status_ok;
case yajl_tok_error:
yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
goto around_again;
default:
yajl_bs_set(hand->stateStack, yajl_state_parse_error);
hand->parseError = "object key and value must "
"be separated by a colon (':')";
goto around_again;
}
}
case yajl_state_map_got_val: {
tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
offset, &buf, &bufLen);
switch (tok) {
case yajl_tok_right_brace:
if (hand->callbacks && hand->callbacks->yajl_end_map) {
_CC_CHK(hand->callbacks->yajl_end_map(hand->ctx));
}
yajl_bs_pop(hand->stateStack);
goto around_again;
case yajl_tok_comma:
yajl_bs_set(hand->stateStack, yajl_state_map_need_key);
goto around_again;
case yajl_tok_eof:
return yajl_status_ok;
case yajl_tok_error:
yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
goto around_again;
default:
yajl_bs_set(hand->stateStack, yajl_state_parse_error);
hand->parseError = "after key and value, inside map, "
"I expect ',' or '}'";
/* try to restore error offset */
if (*offset >= bufLen) *offset -= bufLen;
else *offset = 0;
goto around_again;
}
}
case yajl_state_array_got_val: {
tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
offset, &buf, &bufLen);
switch (tok) {
case yajl_tok_right_bracket:
if (hand->callbacks && hand->callbacks->yajl_end_array) {
_CC_CHK(hand->callbacks->yajl_end_array(hand->ctx));
}
yajl_bs_pop(hand->stateStack);
goto around_again;
case yajl_tok_comma:
yajl_bs_set(hand->stateStack, yajl_state_array_need_val);
goto around_again;
case yajl_tok_eof:
return yajl_status_ok;
case yajl_tok_error:
yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
goto around_again;
default:
yajl_bs_set(hand->stateStack, yajl_state_parse_error);
hand->parseError =
"after array element, I expect ',' or ']'";
goto around_again;
}
}
}
abort();
return yajl_status_error;
}