JSON5: Modified lexer for some number support

If configured for JSON5 the lexer now allows a leading or trailing
decimal point on doubles, and an explicit leading + sign on integers
or double numbers.

Includes test cases.
This commit is contained in:
Andrew Johnson
2020-07-07 00:34:50 -05:00
parent 91c5b2fee2
commit 456e774d85
2 changed files with 61 additions and 5 deletions

View File

@@ -117,7 +117,7 @@ yajl_lex_alloc(yajl_alloc_funcs * alloc,
lxr->buf = yajl_buf_alloc(alloc);
lxr->allowComments = allowComments;
lxr->validateUTF8 = validateUTF8;
lxr->allowJson5 = allowJson5;
lxr->allowJson5 = !!allowJson5;
lxr->alloc = alloc;
return lxr;
}
@@ -385,27 +385,32 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
* is an ambiguous case for integers at EOF. */
unsigned char c;
int numRd = 0;
yajl_tok tok = yajl_tok_integer;
RETURN_IF_EOF;
c = readChar(lexer, jsonText, offset);
/* optional leading minus */
if (c == '-') {
/* optional leading plus/minus */
if (c == '-' || (lexer->allowJson5 && c == '+')) {
RETURN_IF_EOF;
c = readChar(lexer, jsonText, offset);
}
/* a single zero, or a series of integers */
if (c == '0') {
numRd++;
RETURN_IF_EOF;
c = readChar(lexer, jsonText, offset);
} else if (c >= '1' && c <= '9') {
do {
numRd++;
RETURN_IF_EOF;
c = readChar(lexer, jsonText, offset);
} while (c >= '0' && c <= '9');
} else if (lexer->allowJson5 && c == '.') {
goto got_decimal;
} else {
unreadChar(lexer, offset);
lexer->error = yajl_lex_missing_integer_after_minus;
@@ -414,10 +419,10 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
/* optional fraction (indicates this is floating point) */
if (c == '.') {
int numRd = 0;
got_decimal:
RETURN_IF_EOF;
c = readChar(lexer, jsonText, offset);
if (!lexer->allowJson5) numRd = 0;
while (c >= '0' && c <= '9') {
numRd++;
@@ -607,6 +612,9 @@ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
jsonTextLen, offset);
goto lexed;
}
case '+': case '.':
if (!lexer->allowJson5)
goto invalid;
case '-':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9': {
@@ -645,6 +653,7 @@ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
/* hit error or eof, bail */
goto lexed;
default:
invalid:
lexer->error = yajl_lex_invalid_char;
tok = yajl_tok_error;
goto lexed;

View File

@@ -7,6 +7,53 @@
sub cases {
my $VAR1 = [
{
name => "doubles",
opts => [
-5
],
input => [
"[ .1e2, 10., +3.141569, -.1e4]",
""
],
gives => [
"array open '['",
"double: 10",
"double: 10",
"double: 3.14157",
"double: -1000",
"array close ']'",
"memory leaks:\t0"
]
},
{
name => "integers",
opts => [
-5
],
input => [
"[ +1,+2,+3,+4,+5,+6,+7,",
" +123456789 , -123456789,",
" +2147483647, -2147483647 ]",
""
],
gives => [
"array open '['",
"integer: 1",
"integer: 2",
"integer: 3",
"integer: 4",
"integer: 5",
"integer: 6",
"integer: 7",
"integer: 123456789",
"integer: -123456789",
"integer: 2147483647",
"integer: -2147483647",
"array close ']'",
"memory leaks:\t0"
]
},
{
name => "trailing_commas",
opts => [