Add JSON5 support for hexadecimal integers
With tests for YAJL. Note yajl_parse_integer still can't handle LLONG_MIN in base 10 or 16.
This commit is contained in:
@@ -384,6 +384,7 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
|
||||
* _beyond_ in order to know that they are complete. There
|
||||
* is an ambiguous case for integers at EOF. */
|
||||
|
||||
const char hexDigits[] = "0123456789abcdefABCDEF";
|
||||
unsigned char c;
|
||||
int numRd = 0;
|
||||
|
||||
@@ -398,11 +399,16 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
|
||||
c = readChar(lexer, jsonText, offset);
|
||||
}
|
||||
|
||||
/* a single zero, or a series of integers */
|
||||
/* a single zero, hex number, or a series of decimal digits */
|
||||
if (c == '0') {
|
||||
numRd++;
|
||||
RETURN_IF_EOF;
|
||||
c = readChar(lexer, jsonText, offset);
|
||||
if (c == 'x' || c == 'X') {
|
||||
if (lexer->allowJson5) goto got_hex;
|
||||
lexer->error = yajl_lex_unallowed_hex_integer;
|
||||
return yajl_tok_error;
|
||||
}
|
||||
} else if (c >= '1' && c <= '9') {
|
||||
do {
|
||||
numRd++;
|
||||
@@ -419,7 +425,7 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
|
||||
|
||||
/* optional fraction (indicates this is floating point) */
|
||||
if (c == '.') {
|
||||
got_decimal:
|
||||
got_decimal:
|
||||
RETURN_IF_EOF;
|
||||
c = readChar(lexer, jsonText, offset);
|
||||
if (!lexer->allowJson5) numRd = 0;
|
||||
@@ -462,6 +468,25 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
|
||||
tok = yajl_tok_double;
|
||||
}
|
||||
|
||||
goto end_number;
|
||||
|
||||
got_hex:
|
||||
RETURN_IF_EOF;
|
||||
c = readChar(lexer, jsonText, offset);
|
||||
|
||||
if (strchr(hexDigits, c)) {
|
||||
do {
|
||||
RETURN_IF_EOF;
|
||||
c = readChar(lexer, jsonText, offset);
|
||||
} while (strchr(hexDigits, c));
|
||||
}
|
||||
else {
|
||||
unreadChar(lexer, offset);
|
||||
lexer->error = yajl_lex_missing_hex_digit_after_0x;
|
||||
return yajl_tok_error;
|
||||
}
|
||||
|
||||
end_number:
|
||||
/* we always go "one too far" */
|
||||
unreadChar(lexer, offset);
|
||||
|
||||
@@ -736,6 +761,10 @@ yajl_lex_error_to_string(yajl_lex_error error)
|
||||
case yajl_lex_unallowed_comment:
|
||||
return "probable comment found in input text, comments are "
|
||||
"not enabled.";
|
||||
case yajl_lex_missing_hex_digit_after_0x:
|
||||
return "malformed number, a hex digit is required after the 0x/0X.";
|
||||
case yajl_lex_unallowed_hex_integer:
|
||||
return "probable hex number found, JSON5 is not enabled.";
|
||||
}
|
||||
return "unknown error code";
|
||||
}
|
||||
|
||||
@@ -100,7 +100,9 @@ typedef enum {
|
||||
yajl_lex_missing_integer_after_decimal,
|
||||
yajl_lex_missing_integer_after_exponent,
|
||||
yajl_lex_missing_integer_after_minus,
|
||||
yajl_lex_unallowed_comment
|
||||
yajl_lex_unallowed_comment,
|
||||
yajl_lex_missing_hex_digit_after_0x,
|
||||
yajl_lex_unallowed_hex_integer,
|
||||
} yajl_lex_error;
|
||||
|
||||
const char * yajl_lex_error_to_string(yajl_lex_error error);
|
||||
|
||||
@@ -34,29 +34,52 @@
|
||||
#define LLONG_MIN (-0x7FFFFFFFFFFFFFFFLL - 1)
|
||||
#endif
|
||||
|
||||
#define MAX_VALUE_TO_MULTIPLY ((LLONG_MAX / 10) + (LLONG_MAX % 10))
|
||||
|
||||
/* same semantics as strtol */
|
||||
long long
|
||||
yajl_parse_integer(const unsigned char *number, size_t length)
|
||||
{
|
||||
long long ret = 0;
|
||||
long sign = 1;
|
||||
long base = 10;
|
||||
long long max = LLONG_MAX / base;
|
||||
const unsigned char *pos = number;
|
||||
if (*pos == '-') { pos++; sign = -1; }
|
||||
if (*pos == '+') { pos++; }
|
||||
const unsigned char *end = number + length;
|
||||
|
||||
while (pos < number + length) {
|
||||
if ( ret > MAX_VALUE_TO_MULTIPLY ) {
|
||||
if (*pos == '-') {
|
||||
pos++;
|
||||
sign = -1;
|
||||
}
|
||||
else if (*pos == '+') {
|
||||
pos++;
|
||||
}
|
||||
|
||||
if (*pos == '0' &&
|
||||
(pos[1] == 'x' || pos[1] == 'X')) {
|
||||
base = 16;
|
||||
max = LLONG_MAX / base;
|
||||
pos += 2;
|
||||
}
|
||||
|
||||
while (pos < end) {
|
||||
int digit;
|
||||
|
||||
if (ret > max) {
|
||||
errno = ERANGE;
|
||||
return sign == 1 ? LLONG_MAX : LLONG_MIN;
|
||||
}
|
||||
ret *= 10;
|
||||
if (LLONG_MAX - ret < (*pos - '0')) {
|
||||
|
||||
ret *= base;
|
||||
digit = *pos++ - '0';
|
||||
/* Don't have to check for non-digit characters,
|
||||
* the lexer has already rejected any bad digits.
|
||||
*/
|
||||
if (digit > 9)
|
||||
digit = (digit - ('A' - '0') + 10) & 0xf;
|
||||
|
||||
if (LLONG_MAX - ret < digit) {
|
||||
errno = ERANGE;
|
||||
return sign == 1 ? LLONG_MAX : LLONG_MIN;
|
||||
}
|
||||
ret += (*pos++ - '0');
|
||||
ret += digit;
|
||||
}
|
||||
|
||||
return sign * ret;
|
||||
|
||||
@@ -32,9 +32,16 @@ sub cases {
|
||||
-5
|
||||
],
|
||||
input => [
|
||||
"[ +1,+2,+3,+4,+5,+6,+7,",
|
||||
"[ +1,+2,+3,+4,+5,+6,+7,+8,+9,",
|
||||
" 0x1,0x2,0x3,0x4,0x5,0x6,0x7,0x8,0x9,",
|
||||
" 0xa,0xb,0xc,0xd,0xe,0xf,",
|
||||
" 0xA,0xB,0xC,0xD,0xE,0xF,",
|
||||
" +0xfedcba98, -0x6789ABCD,",
|
||||
" +123456789 , -123456789,",
|
||||
" +2147483647, -2147483647 ]",
|
||||
" +2147483647, -2147483648,",
|
||||
" 0x7fffFFFFffffFFFF, -0x7FFFffffFFFFffff,",
|
||||
" 9223372036854775807, -9223372036854775807",
|
||||
"]",
|
||||
""
|
||||
],
|
||||
gives => [
|
||||
@@ -46,10 +53,39 @@ sub cases {
|
||||
"integer: 5",
|
||||
"integer: 6",
|
||||
"integer: 7",
|
||||
"integer: 8",
|
||||
"integer: 9",
|
||||
"integer: 1",
|
||||
"integer: 2",
|
||||
"integer: 3",
|
||||
"integer: 4",
|
||||
"integer: 5",
|
||||
"integer: 6",
|
||||
"integer: 7",
|
||||
"integer: 8",
|
||||
"integer: 9",
|
||||
"integer: 10",
|
||||
"integer: 11",
|
||||
"integer: 12",
|
||||
"integer: 13",
|
||||
"integer: 14",
|
||||
"integer: 15",
|
||||
"integer: 10",
|
||||
"integer: 11",
|
||||
"integer: 12",
|
||||
"integer: 13",
|
||||
"integer: 14",
|
||||
"integer: 15",
|
||||
"integer: 4275878552",
|
||||
"integer: -1737075661",
|
||||
"integer: 123456789",
|
||||
"integer: -123456789",
|
||||
"integer: 2147483647",
|
||||
"integer: -2147483647",
|
||||
"integer: -2147483648",
|
||||
"integer: 9223372036854775807",
|
||||
"integer: -9223372036854775807",
|
||||
"integer: 9223372036854775807",
|
||||
"integer: -9223372036854775807",
|
||||
"array close ']'",
|
||||
"memory leaks:\t0"
|
||||
]
|
||||
@@ -2739,6 +2775,18 @@ sub cases {
|
||||
"memory leaks:\t0"
|
||||
]
|
||||
},
|
||||
{
|
||||
name => "hex",
|
||||
opts => [],
|
||||
input => [
|
||||
"0x1",
|
||||
""
|
||||
],
|
||||
gives => [
|
||||
"lexical error: probable hex number found, JSON5 is not enabled.",
|
||||
"memory leaks:\t0"
|
||||
]
|
||||
},
|
||||
{
|
||||
name => "high_overflow",
|
||||
opts => [],
|
||||
|
||||
Reference in New Issue
Block a user