From 98a358437f37ddfdff56079b4729fe27aef46f14 Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Tue, 7 Jul 2020 23:10:07 -0500 Subject: [PATCH] Add JSON5 support for hexadecimal integers With tests for YAJL. Note yajl_parse_integer still can't handle LLONG_MIN in base 10 or 16. --- modules/libcom/src/yajl/yajl_lex.c | 33 +++++++++++++++- modules/libcom/src/yajl/yajl_lex.h | 4 +- modules/libcom/src/yajl/yajl_parser.c | 43 ++++++++++++++++----- modules/libcom/test/yajlTestCases.pm | 54 +++++++++++++++++++++++++-- 4 files changed, 118 insertions(+), 16 deletions(-) diff --git a/modules/libcom/src/yajl/yajl_lex.c b/modules/libcom/src/yajl/yajl_lex.c index 781d8def0..2439d46a7 100644 --- a/modules/libcom/src/yajl/yajl_lex.c +++ b/modules/libcom/src/yajl/yajl_lex.c @@ -384,6 +384,7 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText, * _beyond_ in order to know that they are complete. There * is an ambiguous case for integers at EOF. */ + const char hexDigits[] = "0123456789abcdefABCDEF"; unsigned char c; int numRd = 0; @@ -398,11 +399,16 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText, c = readChar(lexer, jsonText, offset); } - /* a single zero, or a series of integers */ + /* a single zero, hex number, or a series of decimal digits */ if (c == '0') { numRd++; RETURN_IF_EOF; c = readChar(lexer, jsonText, offset); + if (c == 'x' || c == 'X') { + if (lexer->allowJson5) goto got_hex; + lexer->error = yajl_lex_unallowed_hex_integer; + return yajl_tok_error; + } } else if (c >= '1' && c <= '9') { do { numRd++; @@ -419,7 +425,7 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText, /* optional fraction (indicates this is floating point) */ if (c == '.') { - got_decimal: + got_decimal: RETURN_IF_EOF; c = readChar(lexer, jsonText, offset); if (!lexer->allowJson5) numRd = 0; @@ -462,6 +468,25 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText, tok = yajl_tok_double; } + goto end_number; + + got_hex: + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + + if (strchr(hexDigits, c)) { + do { + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + } while (strchr(hexDigits, c)); + } + else { + unreadChar(lexer, offset); + lexer->error = yajl_lex_missing_hex_digit_after_0x; + return yajl_tok_error; + } + + end_number: /* we always go "one too far" */ unreadChar(lexer, offset); @@ -736,6 +761,10 @@ yajl_lex_error_to_string(yajl_lex_error error) case yajl_lex_unallowed_comment: return "probable comment found in input text, comments are " "not enabled."; + case yajl_lex_missing_hex_digit_after_0x: + return "malformed number, a hex digit is required after the 0x/0X."; + case yajl_lex_unallowed_hex_integer: + return "probable hex number found, JSON5 is not enabled."; } return "unknown error code"; } diff --git a/modules/libcom/src/yajl/yajl_lex.h b/modules/libcom/src/yajl/yajl_lex.h index 7112b8765..87b7069f3 100644 --- a/modules/libcom/src/yajl/yajl_lex.h +++ b/modules/libcom/src/yajl/yajl_lex.h @@ -100,7 +100,9 @@ typedef enum { yajl_lex_missing_integer_after_decimal, yajl_lex_missing_integer_after_exponent, yajl_lex_missing_integer_after_minus, - yajl_lex_unallowed_comment + yajl_lex_unallowed_comment, + yajl_lex_missing_hex_digit_after_0x, + yajl_lex_unallowed_hex_integer, } yajl_lex_error; const char * yajl_lex_error_to_string(yajl_lex_error error); diff --git a/modules/libcom/src/yajl/yajl_parser.c b/modules/libcom/src/yajl/yajl_parser.c index 795378704..3f62932b6 100644 --- a/modules/libcom/src/yajl/yajl_parser.c +++ b/modules/libcom/src/yajl/yajl_parser.c @@ -34,29 +34,52 @@ #define LLONG_MIN (-0x7FFFFFFFFFFFFFFFLL - 1) #endif -#define MAX_VALUE_TO_MULTIPLY ((LLONG_MAX / 10) + (LLONG_MAX % 10)) - - /* same semantics as strtol */ long long yajl_parse_integer(const unsigned char *number, size_t length) { long long ret = 0; long sign = 1; + long base = 10; + long long max = LLONG_MAX / base; const unsigned char *pos = number; - if (*pos == '-') { pos++; sign = -1; } - if (*pos == '+') { pos++; } + const unsigned char *end = number + length; - while (pos < number + length) { - if ( ret > MAX_VALUE_TO_MULTIPLY ) { + if (*pos == '-') { + pos++; + sign = -1; + } + else if (*pos == '+') { + pos++; + } + + if (*pos == '0' && + (pos[1] == 'x' || pos[1] == 'X')) { + base = 16; + max = LLONG_MAX / base; + pos += 2; + } + + while (pos < end) { + int digit; + + if (ret > max) { errno = ERANGE; return sign == 1 ? LLONG_MAX : LLONG_MIN; } - ret *= 10; - if (LLONG_MAX - ret < (*pos - '0')) { + + ret *= base; + digit = *pos++ - '0'; + /* Don't have to check for non-digit characters, + * the lexer has already rejected any bad digits. + */ + if (digit > 9) + digit = (digit - ('A' - '0') + 10) & 0xf; + + if (LLONG_MAX - ret < digit) { errno = ERANGE; return sign == 1 ? LLONG_MAX : LLONG_MIN; } - ret += (*pos++ - '0'); + ret += digit; } return sign * ret; diff --git a/modules/libcom/test/yajlTestCases.pm b/modules/libcom/test/yajlTestCases.pm index 9b5530338..0a89c97dc 100644 --- a/modules/libcom/test/yajlTestCases.pm +++ b/modules/libcom/test/yajlTestCases.pm @@ -32,9 +32,16 @@ sub cases { -5 ], input => [ - "[ +1,+2,+3,+4,+5,+6,+7,", + "[ +1,+2,+3,+4,+5,+6,+7,+8,+9,", + " 0x1,0x2,0x3,0x4,0x5,0x6,0x7,0x8,0x9,", + " 0xa,0xb,0xc,0xd,0xe,0xf,", + " 0xA,0xB,0xC,0xD,0xE,0xF,", + " +0xfedcba98, -0x6789ABCD,", " +123456789 , -123456789,", - " +2147483647, -2147483647 ]", + " +2147483647, -2147483648,", + " 0x7fffFFFFffffFFFF, -0x7FFFffffFFFFffff,", + " 9223372036854775807, -9223372036854775807", + "]", "" ], gives => [ @@ -46,10 +53,39 @@ sub cases { "integer: 5", "integer: 6", "integer: 7", + "integer: 8", + "integer: 9", + "integer: 1", + "integer: 2", + "integer: 3", + "integer: 4", + "integer: 5", + "integer: 6", + "integer: 7", + "integer: 8", + "integer: 9", + "integer: 10", + "integer: 11", + "integer: 12", + "integer: 13", + "integer: 14", + "integer: 15", + "integer: 10", + "integer: 11", + "integer: 12", + "integer: 13", + "integer: 14", + "integer: 15", + "integer: 4275878552", + "integer: -1737075661", "integer: 123456789", "integer: -123456789", "integer: 2147483647", - "integer: -2147483647", + "integer: -2147483648", + "integer: 9223372036854775807", + "integer: -9223372036854775807", + "integer: 9223372036854775807", + "integer: -9223372036854775807", "array close ']'", "memory leaks:\t0" ] @@ -2739,6 +2775,18 @@ sub cases { "memory leaks:\t0" ] }, + { + name => "hex", + opts => [], + input => [ + "0x1", + "" + ], + gives => [ + "lexical error: probable hex number found, JSON5 is not enabled.", + "memory leaks:\t0" + ] + }, { name => "high_overflow", opts => [],