diff --git a/modules/libcom/src/yajl/yajl_encode.c b/modules/libcom/src/yajl/yajl_encode.c index 947dce1d7..732f451f9 100644 --- a/modules/libcom/src/yajl/yajl_encode.c +++ b/modules/libcom/src/yajl/yajl_encode.c @@ -33,13 +33,22 @@ yajl_string_encode(const yajl_print_t print, void * ctx, const unsigned char * str, size_t len, - int escape_solidus) + int escape_solidus, + int output_json5) { size_t beg = 0; size_t end = 0; char hexBuf[7]; - hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0'; - hexBuf[6] = 0; + char *hexAt; + if (output_json5) { + hexBuf[0] = '\\'; hexBuf[1] = 'x'; + hexBuf[4] = 0; + hexAt = &hexBuf[2]; + } else { + hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0'; + hexBuf[6] = 0; + hexAt = &hexBuf[4]; + } while (end < len) { const char * escaped = NULL; @@ -57,9 +66,20 @@ yajl_string_encode(const yajl_print_t print, case '\f': escaped = "\\f"; break; case '\b': escaped = "\\b"; break; case '\t': escaped = "\\t"; break; + case '\0': + if (output_json5) { + escaped = "\\0"; break; + } + goto ashex; + case '\v': + if (output_json5) { + escaped = "\\v"; break; + } + goto ashex; default: if ((unsigned char) str[end] < 32) { - CharToHex(str[end], hexBuf + 4); + ashex: + CharToHex(str[end], hexAt); escaped = hexBuf; } break; @@ -75,10 +95,10 @@ yajl_string_encode(const yajl_print_t print, print(ctx, (const char *) (str + beg), end - beg); } -static void hexToDigit(unsigned int * val, const unsigned char * hex) +static void hexToDigit(unsigned int * val, unsigned int len, const unsigned char * hex) { unsigned int i; - for (i=0;i<4;i++) { + for (i=0;i= 'A') c = (c & ~0x20) - 7; c -= '0'; @@ -133,14 +153,14 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str, case 't': unescaped = "\t"; break; case 'u': { unsigned int codepoint = 0; - hexToDigit(&codepoint, str + ++end); + hexToDigit(&codepoint, 4, str + ++end); end+=3; /* check if this is a surrogate */ if ((codepoint & 0xFC00) == 0xD800) { end++; if (str[end] == '\\' && str[end + 1] == 'u') { unsigned int surrogate = 0; - hexToDigit(&surrogate, str + end + 2); + hexToDigit(&surrogate, 4, str + end + 2); codepoint = (((codepoint & 0x3F) << 10) | ((((codepoint >> 6) & 0xF) + 1) << 16) | @@ -177,6 +197,15 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str, beg = ++end; continue; case 'v': unescaped = "\v"; break; + case 'x': { + unsigned int codepoint = 0; + hexToDigit(&codepoint, 2, str + ++end); + end++; + utf8Buf[0] = (char) codepoint; + yajl_buf_append(buf, utf8Buf, 1); + beg = ++end; + continue; + } default: utf8Buf[0] = str[end]; utf8Buf[1] = 0; diff --git a/modules/libcom/src/yajl/yajl_encode.h b/modules/libcom/src/yajl/yajl_encode.h index cb3895f9a..fd58dec9c 100644 --- a/modules/libcom/src/yajl/yajl_encode.h +++ b/modules/libcom/src/yajl/yajl_encode.h @@ -28,7 +28,8 @@ void yajl_string_encode(const yajl_print_t printer, void * ctx, const unsigned char * str, size_t length, - int escape_solidus); + int escape_solidus, + int output_json5); void yajl_string_decode(yajl_buf buf, const unsigned char * str, size_t length); diff --git a/modules/libcom/src/yajl/yajl_gen.c b/modules/libcom/src/yajl/yajl_gen.c index d5f5fdcd7..7d86ec8c7 100644 --- a/modules/libcom/src/yajl/yajl_gen.c +++ b/modules/libcom/src/yajl/yajl_gen.c @@ -270,7 +270,8 @@ yajl_gen_string(yajl_gen g, const unsigned char * str, } else { g->print(g->ctx, "\"", 1); - yajl_string_encode(g->print, g->ctx, str, len, g->flags & yajl_gen_escape_solidus); + yajl_string_encode(g->print, g->ctx, str, len, g->flags & yajl_gen_escape_solidus, + g->flags & yajl_gen_json5); g->print(g->ctx, "\"", 1); } APPENDED_ATOM; diff --git a/modules/libcom/src/yajl/yajl_lex.c b/modules/libcom/src/yajl/yajl_lex.c index dca39a55a..f780a3b0a 100644 --- a/modules/libcom/src/yajl/yajl_lex.c +++ b/modules/libcom/src/yajl/yajl_lex.c @@ -332,7 +332,21 @@ yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText, if (!(charLookupTable[curChar] & VHC)) { /* back up to offending char */ unreadChar(lexer, offset); - lexer->error = yajl_lex_string_invalid_hex_char; + lexer->error = yajl_lex_string_invalid_hex_u_char; + goto finish_string_lex; + } + } + } + else if (lexer->allowJson5 && curChar == 'x') { + unsigned int i = 0; + + for (i=0;i<2;i++) { + STR_CHECK_EOF; + curChar = readChar(lexer, jsonText, offset); + if (!(charLookupTable[curChar] & VHC)) { + /* back up to offending char */ + unreadChar(lexer, offset); + lexer->error = yajl_lex_string_invalid_hex_x_char; goto finish_string_lex; } } @@ -905,9 +919,12 @@ yajl_lex_error_to_string(yajl_lex_error error) "which it may not."; case yajl_lex_string_invalid_json_char: return "invalid character inside string."; - case yajl_lex_string_invalid_hex_char: + case yajl_lex_string_invalid_hex_u_char: return "invalid (non-hex) character occurs after '\\u' inside " "string."; + case yajl_lex_string_invalid_hex_x_char: + return "invalid (non-hex) character occurs after '\\x' inside " + "string."; case yajl_lex_invalid_char: return "invalid char in json text."; case yajl_lex_invalid_string: diff --git a/modules/libcom/src/yajl/yajl_lex.h b/modules/libcom/src/yajl/yajl_lex.h index ebe647bd2..7c2a6b9d8 100644 --- a/modules/libcom/src/yajl/yajl_lex.h +++ b/modules/libcom/src/yajl/yajl_lex.h @@ -107,7 +107,8 @@ typedef enum { yajl_lex_string_invalid_utf8, yajl_lex_string_invalid_escaped_char, yajl_lex_string_invalid_json_char, - yajl_lex_string_invalid_hex_char, + yajl_lex_string_invalid_hex_u_char, + yajl_lex_string_invalid_hex_x_char, yajl_lex_invalid_char, yajl_lex_invalid_string, yajl_lex_missing_integer_after_decimal, diff --git a/modules/libcom/test/yajlTestCases.pm b/modules/libcom/test/yajlTestCases.pm index 2eef474b1..f2e7a8f93 100644 --- a/modules/libcom/test/yajlTestCases.pm +++ b/modules/libcom/test/yajlTestCases.pm @@ -7,6 +7,21 @@ sub cases { my $VAR1 = [ + { + name => "codepoints_from_hex", + opts => [ + -5 + ], + input => [ + "\"\\x0a\\x07\\x21\\x40\\x7c\"", + "" + ], + gives => [ + "string: '", + "\a!\@|'", + "memory leaks:\t0" + ] + }, { name => "doubles", opts => [ @@ -94,6 +109,20 @@ sub cases { "memory leaks:\t0" ] }, + { + name => "invalid_hex_char", + opts => [ + -5 + ], + input => [ + "\"yabba dabba do \\x1g !!\"", + "" + ], + gives => [ + "lexical error: invalid (non-hex) character occurs after '\\x' inside string.", + "memory leaks:\t0" + ] + }, { name => "map_identifiers", opts => [