From 55f4e55383080ed78f0d5000d0124646f04b5c3e Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Fri, 10 Jul 2020 19:21:57 -0500 Subject: [PATCH] Support for JSON5 character escapes in strings Any character other than the digits 1-9 may be preceded by a reverse solidus '\', and unless the combination has an explicitly defined expansion the character is included without the solidus. JSON5 adds \', \0 and \v to the set of defined escapes, and an escaped newline is omitted from a string. In the test case Perl uses \13 instead of \v in the output but it is the correct character (13 octal = 11 decimal = '\v'). --- modules/libcom/src/yajl/yajl_encode.c | 20 ++++++++++++++--- modules/libcom/src/yajl/yajl_lex.c | 10 ++++++++- modules/libcom/test/yajlTestCases.pm | 31 +++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 4 deletions(-) diff --git a/modules/libcom/src/yajl/yajl_encode.c b/modules/libcom/src/yajl/yajl_encode.c index 082e28d33..947dce1d7 100644 --- a/modules/libcom/src/yajl/yajl_encode.c +++ b/modules/libcom/src/yajl/yajl_encode.c @@ -128,8 +128,6 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str, case 'r': unescaped = "\r"; break; case 'n': unescaped = "\n"; break; case '\\': unescaped = "\\"; break; - case '/': unescaped = "/"; break; - case '"': unescaped = "\""; break; case 'f': unescaped = "\f"; break; case 'b': unescaped = "\b"; break; case 't': unescaped = "\t"; break; @@ -165,8 +163,24 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str, break; } + /* The following escapes are only valid when parsing JSON5. + * The lexer catches them when allowJson5 is not set. + */ + case '\n': beg = ++end; continue; + case '\r': + if (str[++end] == '\n') ++end; + beg = end; + continue; + case '0': + utf8Buf[0] = '\0'; + yajl_buf_append(buf, utf8Buf, 1); + beg = ++end; + continue; + case 'v': unescaped = "\v"; break; default: - assert("this should never happen" == NULL); + utf8Buf[0] = str[end]; + utf8Buf[1] = 0; + unescaped = utf8Buf; } yajl_buf_append(buf, unescaped, (unsigned int)strlen(unescaped)); beg = ++end; diff --git a/modules/libcom/src/yajl/yajl_lex.c b/modules/libcom/src/yajl/yajl_lex.c index ad5171488..dca39a55a 100644 --- a/modules/libcom/src/yajl/yajl_lex.c +++ b/modules/libcom/src/yajl/yajl_lex.c @@ -336,12 +336,20 @@ yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText, goto finish_string_lex; } } - } else if (!(charLookupTable[curChar] & VEC)) { + } + else if (lexer->allowJson5 ? (curChar >= '1' && curChar <= '9') + : !(charLookupTable[curChar] & VEC)) { /* back up to offending char */ unreadChar(lexer, offset); lexer->error = yajl_lex_string_invalid_escaped_char; goto finish_string_lex; } + else if (lexer->allowJson5 && curChar == '\r') { + STR_CHECK_EOF; + curChar = readChar(lexer, jsonText, offset); + if (curChar != '\n') + unreadChar(lexer, offset); + } } /* when not validating UTF8 it's a simple table lookup to determine * if the present character is invalid */ diff --git a/modules/libcom/test/yajlTestCases.pm b/modules/libcom/test/yajlTestCases.pm index 03f00b206..2eef474b1 100644 --- a/modules/libcom/test/yajlTestCases.pm +++ b/modules/libcom/test/yajlTestCases.pm @@ -168,6 +168,37 @@ sub cases { "memory leaks:\t0" ] }, + { + name => "strings", + opts => [ + -5 + ], + input => [ + "[", + " \"Hello\\!\",", + " \"\\\"Evenin\\',\\\" said the barman.\",", + " // The following string has 3 different escaped line-endings,", + " // LF, CR, and CR+LF, which all disappear from the final string.", + " \"Well \\", + "hi \\\rthere \\\r", + "y'all!\",", + " \"\\b\\f\\n\\r\\t\\v\\\\\",", + " \"\\A\\C\\/\\D\\C\",", + "]", + "" + ], + gives => [ + "array open '['", + "string: 'Hello!'", + "string: '\"Evenin',\" said the barman.'", + "string: 'Well hi there y'all!'", + "string: '\b\f", + "\r\t\13\\'", + "string: 'AC/DC'", + "array close ']'", + "memory leaks:\t0" + ] + }, { name => "trailing_commas", opts => [