Add JSON5 support for hexadecimal integers

With tests for YAJL.

Note yajl_parse_integer still can't handle LLONG_MIN in base 10 or 16.
This commit is contained in:
Andrew Johnson
2020-07-07 23:10:07 -05:00
parent 00ac15cec5
commit 98a358437f
4 changed files with 118 additions and 16 deletions

View File

@@ -384,6 +384,7 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
* _beyond_ in order to know that they are complete. There
* is an ambiguous case for integers at EOF. */
const char hexDigits[] = "0123456789abcdefABCDEF";
unsigned char c;
int numRd = 0;
@@ -398,11 +399,16 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
c = readChar(lexer, jsonText, offset);
}
/* a single zero, or a series of integers */
/* a single zero, hex number, or a series of decimal digits */
if (c == '0') {
numRd++;
RETURN_IF_EOF;
c = readChar(lexer, jsonText, offset);
if (c == 'x' || c == 'X') {
if (lexer->allowJson5) goto got_hex;
lexer->error = yajl_lex_unallowed_hex_integer;
return yajl_tok_error;
}
} else if (c >= '1' && c <= '9') {
do {
numRd++;
@@ -419,7 +425,7 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
/* optional fraction (indicates this is floating point) */
if (c == '.') {
got_decimal:
got_decimal:
RETURN_IF_EOF;
c = readChar(lexer, jsonText, offset);
if (!lexer->allowJson5) numRd = 0;
@@ -462,6 +468,25 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
tok = yajl_tok_double;
}
goto end_number;
got_hex:
RETURN_IF_EOF;
c = readChar(lexer, jsonText, offset);
if (strchr(hexDigits, c)) {
do {
RETURN_IF_EOF;
c = readChar(lexer, jsonText, offset);
} while (strchr(hexDigits, c));
}
else {
unreadChar(lexer, offset);
lexer->error = yajl_lex_missing_hex_digit_after_0x;
return yajl_tok_error;
}
end_number:
/* we always go "one too far" */
unreadChar(lexer, offset);
@@ -736,6 +761,10 @@ yajl_lex_error_to_string(yajl_lex_error error)
case yajl_lex_unallowed_comment:
return "probable comment found in input text, comments are "
"not enabled.";
case yajl_lex_missing_hex_digit_after_0x:
return "malformed number, a hex digit is required after the 0x/0X.";
case yajl_lex_unallowed_hex_integer:
return "probable hex number found, JSON5 is not enabled.";
}
return "unknown error code";
}

View File

@@ -100,7 +100,9 @@ typedef enum {
yajl_lex_missing_integer_after_decimal,
yajl_lex_missing_integer_after_exponent,
yajl_lex_missing_integer_after_minus,
yajl_lex_unallowed_comment
yajl_lex_unallowed_comment,
yajl_lex_missing_hex_digit_after_0x,
yajl_lex_unallowed_hex_integer,
} yajl_lex_error;
const char * yajl_lex_error_to_string(yajl_lex_error error);

View File

@@ -34,29 +34,52 @@
#define LLONG_MIN (-0x7FFFFFFFFFFFFFFFLL - 1)
#endif
#define MAX_VALUE_TO_MULTIPLY ((LLONG_MAX / 10) + (LLONG_MAX % 10))
/* same semantics as strtol */
long long
yajl_parse_integer(const unsigned char *number, size_t length)
{
long long ret = 0;
long sign = 1;
long base = 10;
long long max = LLONG_MAX / base;
const unsigned char *pos = number;
if (*pos == '-') { pos++; sign = -1; }
if (*pos == '+') { pos++; }
const unsigned char *end = number + length;
while (pos < number + length) {
if ( ret > MAX_VALUE_TO_MULTIPLY ) {
if (*pos == '-') {
pos++;
sign = -1;
}
else if (*pos == '+') {
pos++;
}
if (*pos == '0' &&
(pos[1] == 'x' || pos[1] == 'X')) {
base = 16;
max = LLONG_MAX / base;
pos += 2;
}
while (pos < end) {
int digit;
if (ret > max) {
errno = ERANGE;
return sign == 1 ? LLONG_MAX : LLONG_MIN;
}
ret *= 10;
if (LLONG_MAX - ret < (*pos - '0')) {
ret *= base;
digit = *pos++ - '0';
/* Don't have to check for non-digit characters,
* the lexer has already rejected any bad digits.
*/
if (digit > 9)
digit = (digit - ('A' - '0') + 10) & 0xf;
if (LLONG_MAX - ret < digit) {
errno = ERANGE;
return sign == 1 ? LLONG_MAX : LLONG_MIN;
}
ret += (*pos++ - '0');
ret += digit;
}
return sign * ret;

View File

@@ -32,9 +32,16 @@ sub cases {
-5
],
input => [
"[ +1,+2,+3,+4,+5,+6,+7,",
"[ +1,+2,+3,+4,+5,+6,+7,+8,+9,",
" 0x1,0x2,0x3,0x4,0x5,0x6,0x7,0x8,0x9,",
" 0xa,0xb,0xc,0xd,0xe,0xf,",
" 0xA,0xB,0xC,0xD,0xE,0xF,",
" +0xfedcba98, -0x6789ABCD,",
" +123456789 , -123456789,",
" +2147483647, -2147483647 ]",
" +2147483647, -2147483648,",
" 0x7fffFFFFffffFFFF, -0x7FFFffffFFFFffff,",
" 9223372036854775807, -9223372036854775807",
"]",
""
],
gives => [
@@ -46,10 +53,39 @@ sub cases {
"integer: 5",
"integer: 6",
"integer: 7",
"integer: 8",
"integer: 9",
"integer: 1",
"integer: 2",
"integer: 3",
"integer: 4",
"integer: 5",
"integer: 6",
"integer: 7",
"integer: 8",
"integer: 9",
"integer: 10",
"integer: 11",
"integer: 12",
"integer: 13",
"integer: 14",
"integer: 15",
"integer: 10",
"integer: 11",
"integer: 12",
"integer: 13",
"integer: 14",
"integer: 15",
"integer: 4275878552",
"integer: -1737075661",
"integer: 123456789",
"integer: -123456789",
"integer: 2147483647",
"integer: -2147483647",
"integer: -2147483648",
"integer: 9223372036854775807",
"integer: -9223372036854775807",
"integer: 9223372036854775807",
"integer: -9223372036854775807",
"array close ']'",
"memory leaks:\t0"
]
@@ -2739,6 +2775,18 @@ sub cases {
"memory leaks:\t0"
]
},
{
name => "hex",
opts => [],
input => [
"0x1",
""
],
gives => [
"lexical error: probable hex number found, JSON5 is not enabled.",
"memory leaks:\t0"
]
},
{
name => "high_overflow",
opts => [],