Support for JSON5 character escapes in strings

Any character other than the digits 1-9 may be preceded by a
reverse solidus '\', and unless the combination has an explicitly
defined expansion the character is included without the solidus.
JSON5 adds \', \0 and \v to the set of defined escapes, and an
escaped newline is omitted from a string.

In the test case Perl uses \13 instead of \v in the output
but it is the correct character (13 octal = 11 decimal = '\v').
This commit is contained in:
Andrew Johnson
2020-07-10 19:21:57 -05:00
parent e2256d0663
commit 55f4e55383
3 changed files with 57 additions and 4 deletions

View File

@@ -128,8 +128,6 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str,
case 'r': unescaped = "\r"; break;
case 'n': unescaped = "\n"; break;
case '\\': unescaped = "\\"; break;
case '/': unescaped = "/"; break;
case '"': unescaped = "\""; break;
case 'f': unescaped = "\f"; break;
case 'b': unescaped = "\b"; break;
case 't': unescaped = "\t"; break;
@@ -165,8 +163,24 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str,
break;
}
/* The following escapes are only valid when parsing JSON5.
* The lexer catches them when allowJson5 is not set.
*/
case '\n': beg = ++end; continue;
case '\r':
if (str[++end] == '\n') ++end;
beg = end;
continue;
case '0':
utf8Buf[0] = '\0';
yajl_buf_append(buf, utf8Buf, 1);
beg = ++end;
continue;
case 'v': unescaped = "\v"; break;
default:
assert("this should never happen" == NULL);
utf8Buf[0] = str[end];
utf8Buf[1] = 0;
unescaped = utf8Buf;
}
yajl_buf_append(buf, unescaped, (unsigned int)strlen(unescaped));
beg = ++end;

View File

@@ -336,12 +336,20 @@ yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText,
goto finish_string_lex;
}
}
} else if (!(charLookupTable[curChar] & VEC)) {
}
else if (lexer->allowJson5 ? (curChar >= '1' && curChar <= '9')
: !(charLookupTable[curChar] & VEC)) {
/* back up to offending char */
unreadChar(lexer, offset);
lexer->error = yajl_lex_string_invalid_escaped_char;
goto finish_string_lex;
}
else if (lexer->allowJson5 && curChar == '\r') {
STR_CHECK_EOF;
curChar = readChar(lexer, jsonText, offset);
if (curChar != '\n')
unreadChar(lexer, offset);
}
}
/* when not validating UTF8 it's a simple table lookup to determine
* if the present character is invalid */

View File

@@ -168,6 +168,37 @@ sub cases {
"memory leaks:\t0"
]
},
{
name => "strings",
opts => [
-5
],
input => [
"[",
" \"Hello\\!\",",
" \"\\\"Evenin\\',\\\" said the barman.\",",
" // The following string has 3 different escaped line-endings,",
" // LF, CR, and CR+LF, which all disappear from the final string.",
" \"Well \\",
"hi \\\rthere \\\r",
"y'all!\",",
" \"\\b\\f\\n\\r\\t\\v\\\\\",",
" \"\\A\\C\\/\\D\\C\",",
"]",
""
],
gives => [
"array open '['",
"string: 'Hello!'",
"string: '\"Evenin',\" said the barman.'",
"string: 'Well hi there y'all!'",
"string: '\b\f",
"\r\t\13\\'",
"string: 'AC/DC'",
"array close ']'",
"memory leaks:\t0"
]
},
{
name => "trailing_commas",
opts => [