Support for JSON5 character escapes in strings

Any character other than the digits 1-9 may be preceded by a reverse solidus '\', and unless the combination has an explicitly defined expansion the character is included without the solidus. JSON5 adds \', \0 and \v to the set of defined escapes, and an escaped newline is omitted from a string. In the test case Perl uses \13 instead of \v in the output but it is the correct character (13 octal = 11 decimal = '\v').
2020-07-10 19:21:57 -05:00
parent e2256d0663
commit 55f4e55383
3 changed files with 57 additions and 4 deletions
--- a/modules/libcom/src/yajl/yajl_encode.c
+++ b/modules/libcom/src/yajl/yajl_encode.c
@@ -128,8 +128,6 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str,
                case 'r': unescaped = "\r"; break;
                case 'n': unescaped = "\n"; break;
                case '\\': unescaped = "\\"; break;
-                case '/': unescaped = "/"; break;
-                case '"': unescaped = "\""; break;
                case 'f': unescaped = "\f"; break;
                case 'b': unescaped = "\b"; break;
                case 't': unescaped = "\t"; break;
@@ -165,8 +163,24 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str,

                    break;
                }
+                /* The following escapes are only valid when parsing JSON5.
+                 * The lexer catches them when allowJson5 is not set.
+                 */
+                case '\n': beg = ++end; continue;
+                case '\r':
+                    if (str[++end] == '\n') ++end;
+                    beg = end;
+                    continue;
+                case '0':
+                    utf8Buf[0] = '\0';
+                    yajl_buf_append(buf, utf8Buf, 1);
+                    beg = ++end;
+                    continue;
+                case 'v': unescaped = "\v"; break;
                default:
-                    assert("this should never happen" == NULL);
+                    utf8Buf[0] = str[end];
+                    utf8Buf[1] = 0;
+                    unescaped = utf8Buf;
            }
            yajl_buf_append(buf, unescaped, (unsigned int)strlen(unescaped));
            beg = ++end;
--- a/modules/libcom/src/yajl/yajl_lex.c
+++ b/modules/libcom/src/yajl/yajl_lex.c
@@ -336,12 +336,20 @@ yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText,
                        goto finish_string_lex;
                    }
                }
-            } else if (!(charLookupTable[curChar] & VEC)) {
+            }
+            else if (lexer->allowJson5 ? (curChar >= '1' && curChar <= '9')
+                : !(charLookupTable[curChar] & VEC)) {
                /* back up to offending char */
                unreadChar(lexer, offset);
                lexer->error = yajl_lex_string_invalid_escaped_char;
                goto finish_string_lex;
            }
+            else if (lexer->allowJson5 && curChar == '\r') {
+                STR_CHECK_EOF;
+                curChar = readChar(lexer, jsonText, offset);
+                if (curChar != '\n')
+                    unreadChar(lexer, offset);
+            }
        }
        /* when not validating UTF8 it's a simple table lookup to determine
         * if the present character is invalid */
--- a/modules/libcom/test/yajlTestCases.pm
+++ b/modules/libcom/test/yajlTestCases.pm
@@ -168,6 +168,37 @@ sub cases {
        "memory leaks:\t0"
      ]
    },
+    {
+      name => "strings",
+      opts => [
+        -5
+      ],
+      input => [
+        "[",
+        "  \"Hello\\!\",",
+        "  \"\\\"Evenin\\',\\\" said the barman.\",",
+        "  // The following string has 3 different escaped line-endings,",
+        "  // LF, CR, and CR+LF, which all disappear from the final string.",
+        "  \"Well \\",
+        "hi \\\rthere \\\r",
+        "y'all!\",",
+        "  \"\\b\\f\\n\\r\\t\\v\\\\\",",
+        "  \"\\A\\C\\/\\D\\C\",",
+        "]",
+        ""
+      ],
+      gives => [
+        "array open '['",
+        "string: 'Hello!'",
+        "string: '\"Evenin',\" said the barman.'",
+        "string: 'Well hi there y'all!'",
+        "string: '\b\f",
+        "\r\t\13\\'",
+        "string: 'AC/DC'",
+        "array close ']'",
+        "memory leaks:\t0"
+      ]
+    },
    {
      name => "trailing_commas",
      opts => [