Merge branch 'develop' into feature/user_defined_string_to_int
diff --git a/README.md b/README.md
index 5849067..45cba01 100644
--- a/README.md
+++ b/README.md
@@ -505,7 +505,7 @@
$ ./json_unit "*"
===============================================================================
-All tests passed (8905012 assertions in 32 test cases)
+All tests passed (5568737 assertions in 32 test cases)
```
For more information, have a look at the file [.travis.yml](https://github.com/nlohmann/json/blob/master/.travis.yml).
diff --git a/src/json.hpp b/src/json.hpp
index 9d2305d..43502d0 100644
--- a/src/json.hpp
+++ b/src/json.hpp
@@ -7371,6 +7371,8 @@
literal_false, ///< the `false` literal
literal_null, ///< the `null` literal
value_string, ///< a string -- use get_string() for actual value
+ value_uint, ///< a uint number -- use get_number_uint() for actual value
+ value_int, ///< an int number -- use get_number_int() for actual value
value_number, ///< a number -- use get_number() for actual value
begin_array, ///< the character for array begin `[`
begin_object, ///< the character for object begin `{`
@@ -7516,6 +7518,8 @@
return "null literal";
case token_type::value_string:
return "string literal";
+ case token_type::value_uint:
+ case token_type::value_int:
case token_type::value_number:
return "number literal";
case token_type::begin_array:
@@ -7781,11 +7785,11 @@
}
if (yych <= '0')
{
- goto basic_json_parser_13;
+ goto basic_json_parser_37;
}
if (yych <= '9')
{
- goto basic_json_parser_15;
+ goto basic_json_parser_39;
}
goto basic_json_parser_5;
basic_json_parser_13:
@@ -7795,23 +7799,23 @@
{
if (yych == '.')
{
- goto basic_json_parser_37;
+ goto basic_json_parser_41;
}
}
else
{
if (yych <= 'E')
{
- goto basic_json_parser_38;
+ goto basic_json_parser_42;
}
if (yych == 'e')
{
- goto basic_json_parser_38;
+ goto basic_json_parser_42;
}
}
basic_json_parser_14:
{
- last_token_type = token_type::value_number;
+ last_token_type = token_type::value_uint;
break;
}
basic_json_parser_15:
@@ -7830,7 +7834,7 @@
{
if (yych == '.')
{
- goto basic_json_parser_37;
+ goto basic_json_parser_41;
}
goto basic_json_parser_14;
}
@@ -7838,11 +7842,11 @@
{
if (yych <= 'E')
{
- goto basic_json_parser_38;
+ goto basic_json_parser_42;
}
if (yych == 'e')
{
- goto basic_json_parser_38;
+ goto basic_json_parser_42;
}
goto basic_json_parser_14;
}
@@ -7869,7 +7873,7 @@
yych = *(m_marker = ++m_cursor);
if (yych == 'a')
{
- goto basic_json_parser_39;
+ goto basic_json_parser_43;
}
goto basic_json_parser_5;
basic_json_parser_24:
@@ -7877,7 +7881,7 @@
yych = *(m_marker = ++m_cursor);
if (yych == 'u')
{
- goto basic_json_parser_40;
+ goto basic_json_parser_44;
}
goto basic_json_parser_5;
basic_json_parser_25:
@@ -7885,7 +7889,7 @@
yych = *(m_marker = ++m_cursor);
if (yych == 'r')
{
- goto basic_json_parser_41;
+ goto basic_json_parser_45;
}
goto basic_json_parser_5;
basic_json_parser_26:
@@ -7905,7 +7909,7 @@
yych = *(m_marker = ++m_cursor);
if (yych == 0xBB)
{
- goto basic_json_parser_42;
+ goto basic_json_parser_46;
}
goto basic_json_parser_5;
basic_json_parser_31:
@@ -7931,13 +7935,27 @@
goto basic_json_parser_36;
basic_json_parser_33:
m_cursor = m_marker;
- if (yyaccept == 0)
+ if (yyaccept <= 1)
{
- goto basic_json_parser_5;
+ if (yyaccept == 0)
+ {
+ goto basic_json_parser_5;
+ }
+ else
+ {
+ goto basic_json_parser_14;
+ }
}
else
{
- goto basic_json_parser_14;
+ if (yyaccept == 2)
+ {
+ goto basic_json_parser_38;
+ }
+ else
+ {
+ goto basic_json_parser_50;
+ }
}
basic_json_parser_34:
++m_cursor;
@@ -8018,13 +8036,78 @@
}
if (yych <= 'u')
{
- goto basic_json_parser_43;
+ goto basic_json_parser_47;
}
goto basic_json_parser_33;
}
}
}
basic_json_parser_37:
+ yyaccept = 2;
+ yych = *(m_marker = ++m_cursor);
+ if (yych <= 'D')
+ {
+ if (yych == '.')
+ {
+ goto basic_json_parser_41;
+ }
+ }
+ else
+ {
+ if (yych <= 'E')
+ {
+ goto basic_json_parser_42;
+ }
+ if (yych == 'e')
+ {
+ goto basic_json_parser_42;
+ }
+ }
+basic_json_parser_38:
+ {
+ last_token_type = token_type::value_int;
+ break;
+ }
+basic_json_parser_39:
+ yyaccept = 2;
+ m_marker = ++m_cursor;
+ if ((m_limit - m_cursor) < 3)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+ if (yych <= '9')
+ {
+ if (yych == '.')
+ {
+ goto basic_json_parser_41;
+ }
+ if (yych <= '/')
+ {
+ goto basic_json_parser_38;
+ }
+ goto basic_json_parser_39;
+ }
+ else
+ {
+ if (yych <= 'E')
+ {
+ if (yych <= 'D')
+ {
+ goto basic_json_parser_38;
+ }
+ goto basic_json_parser_42;
+ }
+ else
+ {
+ if (yych == 'e')
+ {
+ goto basic_json_parser_42;
+ }
+ goto basic_json_parser_38;
+ }
+ }
+basic_json_parser_41:
yych = *++m_cursor;
if (yych <= '/')
{
@@ -8032,16 +8115,16 @@
}
if (yych <= '9')
{
- goto basic_json_parser_44;
+ goto basic_json_parser_48;
}
goto basic_json_parser_33;
-basic_json_parser_38:
+basic_json_parser_42:
yych = *++m_cursor;
if (yych <= ',')
{
if (yych == '+')
{
- goto basic_json_parser_46;
+ goto basic_json_parser_51;
}
goto basic_json_parser_33;
}
@@ -8049,7 +8132,7 @@
{
if (yych <= '-')
{
- goto basic_json_parser_46;
+ goto basic_json_parser_51;
}
if (yych <= '/')
{
@@ -8057,39 +8140,39 @@
}
if (yych <= '9')
{
- goto basic_json_parser_47;
+ goto basic_json_parser_52;
}
goto basic_json_parser_33;
}
-basic_json_parser_39:
+basic_json_parser_43:
yych = *++m_cursor;
if (yych == 'l')
{
- goto basic_json_parser_49;
+ goto basic_json_parser_54;
}
goto basic_json_parser_33;
-basic_json_parser_40:
+basic_json_parser_44:
yych = *++m_cursor;
if (yych == 'l')
{
- goto basic_json_parser_50;
+ goto basic_json_parser_55;
}
goto basic_json_parser_33;
-basic_json_parser_41:
+basic_json_parser_45:
yych = *++m_cursor;
if (yych == 'u')
{
- goto basic_json_parser_51;
+ goto basic_json_parser_56;
}
goto basic_json_parser_33;
-basic_json_parser_42:
+basic_json_parser_46:
yych = *++m_cursor;
if (yych == 0xBF)
{
- goto basic_json_parser_52;
+ goto basic_json_parser_57;
}
goto basic_json_parser_33;
-basic_json_parser_43:
+basic_json_parser_47:
++m_cursor;
if (m_limit <= m_cursor)
{
@@ -8104,7 +8187,7 @@
}
if (yych <= '9')
{
- goto basic_json_parser_54;
+ goto basic_json_parser_59;
}
goto basic_json_parser_33;
}
@@ -8112,7 +8195,7 @@
{
if (yych <= 'F')
{
- goto basic_json_parser_54;
+ goto basic_json_parser_59;
}
if (yych <= '`')
{
@@ -8120,12 +8203,12 @@
}
if (yych <= 'f')
{
- goto basic_json_parser_54;
+ goto basic_json_parser_59;
}
goto basic_json_parser_33;
}
-basic_json_parser_44:
- yyaccept = 1;
+basic_json_parser_48:
+ yyaccept = 3;
m_marker = ++m_cursor;
if ((m_limit - m_cursor) < 3)
{
@@ -8136,27 +8219,30 @@
{
if (yych <= '/')
{
- goto basic_json_parser_14;
+ goto basic_json_parser_50;
}
if (yych <= '9')
{
- goto basic_json_parser_44;
+ goto basic_json_parser_48;
}
- goto basic_json_parser_14;
}
else
{
if (yych <= 'E')
{
- goto basic_json_parser_38;
+ goto basic_json_parser_42;
}
if (yych == 'e')
{
- goto basic_json_parser_38;
+ goto basic_json_parser_42;
}
- goto basic_json_parser_14;
}
-basic_json_parser_46:
+basic_json_parser_50:
+ {
+ last_token_type = token_type::value_number;
+ break;
+ }
+basic_json_parser_51:
yych = *++m_cursor;
if (yych <= '/')
{
@@ -8166,7 +8252,7 @@
{
goto basic_json_parser_33;
}
-basic_json_parser_47:
+basic_json_parser_52:
++m_cursor;
if (m_limit <= m_cursor)
{
@@ -8175,94 +8261,40 @@
yych = *m_cursor;
if (yych <= '/')
{
- goto basic_json_parser_14;
+ goto basic_json_parser_50;
}
if (yych <= '9')
{
- goto basic_json_parser_47;
+ goto basic_json_parser_52;
}
- goto basic_json_parser_14;
-basic_json_parser_49:
+ goto basic_json_parser_50;
+basic_json_parser_54:
yych = *++m_cursor;
if (yych == 's')
{
- goto basic_json_parser_55;
+ goto basic_json_parser_60;
}
goto basic_json_parser_33;
-basic_json_parser_50:
- yych = *++m_cursor;
- if (yych == 'l')
- {
- goto basic_json_parser_56;
- }
- goto basic_json_parser_33;
-basic_json_parser_51:
- yych = *++m_cursor;
- if (yych == 'e')
- {
- goto basic_json_parser_58;
- }
- goto basic_json_parser_33;
-basic_json_parser_52:
- ++m_cursor;
- {
- continue;
- }
-basic_json_parser_54:
- ++m_cursor;
- if (m_limit <= m_cursor)
- {
- yyfill(); // LCOV_EXCL_LINE;
- }
- yych = *m_cursor;
- if (yych <= '@')
- {
- if (yych <= '/')
- {
- goto basic_json_parser_33;
- }
- if (yych <= '9')
- {
- goto basic_json_parser_60;
- }
- goto basic_json_parser_33;
- }
- else
- {
- if (yych <= 'F')
- {
- goto basic_json_parser_60;
- }
- if (yych <= '`')
- {
- goto basic_json_parser_33;
- }
- if (yych <= 'f')
- {
- goto basic_json_parser_60;
- }
- goto basic_json_parser_33;
- }
basic_json_parser_55:
yych = *++m_cursor;
- if (yych == 'e')
+ if (yych == 'l')
{
goto basic_json_parser_61;
}
goto basic_json_parser_33;
basic_json_parser_56:
+ yych = *++m_cursor;
+ if (yych == 'e')
+ {
+ goto basic_json_parser_63;
+ }
+ goto basic_json_parser_33;
+basic_json_parser_57:
++m_cursor;
{
- last_token_type = token_type::literal_null;
- break;
+ continue;
}
-basic_json_parser_58:
- ++m_cursor;
- {
- last_token_type = token_type::literal_true;
- break;
- }
-basic_json_parser_60:
+basic_json_parser_59:
++m_cursor;
if (m_limit <= m_cursor)
{
@@ -8277,7 +8309,7 @@
}
if (yych <= '9')
{
- goto basic_json_parser_63;
+ goto basic_json_parser_65;
}
goto basic_json_parser_33;
}
@@ -8285,7 +8317,7 @@
{
if (yych <= 'F')
{
- goto basic_json_parser_63;
+ goto basic_json_parser_65;
}
if (yych <= '`')
{
@@ -8293,17 +8325,71 @@
}
if (yych <= 'f')
{
- goto basic_json_parser_63;
+ goto basic_json_parser_65;
}
goto basic_json_parser_33;
}
+basic_json_parser_60:
+ yych = *++m_cursor;
+ if (yych == 'e')
+ {
+ goto basic_json_parser_66;
+ }
+ goto basic_json_parser_33;
basic_json_parser_61:
++m_cursor;
{
+ last_token_type = token_type::literal_null;
+ break;
+ }
+basic_json_parser_63:
+ ++m_cursor;
+ {
+ last_token_type = token_type::literal_true;
+ break;
+ }
+basic_json_parser_65:
+ ++m_cursor;
+ if (m_limit <= m_cursor)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+ if (yych <= '@')
+ {
+ if (yych <= '/')
+ {
+ goto basic_json_parser_33;
+ }
+ if (yych <= '9')
+ {
+ goto basic_json_parser_68;
+ }
+ goto basic_json_parser_33;
+ }
+ else
+ {
+ if (yych <= 'F')
+ {
+ goto basic_json_parser_68;
+ }
+ if (yych <= '`')
+ {
+ goto basic_json_parser_33;
+ }
+ if (yych <= 'f')
+ {
+ goto basic_json_parser_68;
+ }
+ goto basic_json_parser_33;
+ }
+basic_json_parser_66:
+ ++m_cursor;
+ {
last_token_type = token_type::literal_false;
break;
}
-basic_json_parser_63:
+basic_json_parser_68:
++m_cursor;
if (m_limit <= m_cursor)
{
@@ -8562,19 +8648,7 @@
}
/*!
- @brief parse floating point number
-
- This function (and its overloads) serves to select the most approprate
- standard floating point number parsing function based on the type
- supplied via the first parameter. Set this to @a
- static_cast<number_float_t*>(nullptr).
-
- @param[in] type the @ref number_float_t in use
-
- @param[in,out] endptr recieves a pointer to the first character after
- the number
-
- @return the floating point number
+ @copydoc str_to_float_t()
*/
double str_to_float_t(double* /* type */, char** endptr) const
{
@@ -8582,19 +8656,7 @@
}
/*!
- @brief parse floating point number
-
- This function (and its overloads) serves to select the most approprate
- standard floating point number parsing function based on the type
- supplied via the first parameter. Set this to @a
- static_cast<number_float_t*>(nullptr).
-
- @param[in] type the @ref number_float_t in use
-
- @param[in,out] endptr recieves a pointer to the first character after
- the number
-
- @return the floating point number
+ @copydoc str_to_float_t()
*/
float str_to_float_t(float* /* type */, char** endptr) const
{
@@ -8602,109 +8664,155 @@
}
/*!
- @brief return number value for number tokens
+ @brief parse floating-point number
- This function translates the last token into the most appropriate
- number type (either integer, unsigned integer or floating point),
- which is passed back to the caller via the result parameter.
-
- This function parses the integer component up to the radix point or
- exponent while collecting information about the 'floating point
- representation', which it stores in the result parameter. If there is
- no radix point or exponent, and the number can fit into a @ref
- number_integer_t or @ref number_unsigned_t then it sets the result
- parameter accordingly.
-
- If the number is a floating point number the number is then parsed
- using @a std:strtod (or @a std:strtof or @a std::strtold).
-
- @param[out] result @ref basic_json object to receive the number, or
+ @param[in,out] result @ref basic_json object to receive the number, or
NAN if the conversion read past the current token. The latter case
needs to be treated by the caller function.
*/
- void get_number(basic_json& result) const
+ void get_number_float(basic_json& result) const
{
assert(m_start != nullptr);
- const lexer::lexer_char_t* curptr = m_start;
-
- // accumulate the integer conversion result (unsigned for now)
- number_unsigned_t value = 0;
-
- // maximum absolute value of the relevant integer type
- number_unsigned_t max;
-
- // temporarily store the type to avoid unecessary bitfield access
- value_t type;
-
- // look for sign
- if (*curptr == '-')
- {
- type = value_t::number_integer;
- max = static_cast<uint64_t>((std::numeric_limits<number_integer_t>::max)()) + 1;
- curptr++;
- }
- else
- {
- type = value_t::number_unsigned;
- max = static_cast<uint64_t>((std::numeric_limits<number_unsigned_t>::max)());
- }
-
- // count the significant figures
- for (; curptr < m_cursor; curptr++)
- {
- // quickly skip tests if a digit
- if (*curptr < '0' || *curptr > '9')
- {
- if (*curptr == '.')
- {
- // don't count '.' but change to float
- type = value_t::number_float;
- continue;
- }
- // assume exponent (if not then will fail parse): change to
- // float, stop counting and record exponent details
- type = value_t::number_float;
- break;
- }
-
- // skip if definitely not an integer
- if (type != value_t::number_float)
- {
- // multiply last value by ten and add the new digit
- auto temp = value * 10 + *curptr - '0';
-
- // test for overflow
- if (temp < value || temp > max)
- {
- // overflow
- type = value_t::number_float;
- }
- else
- {
- // no overflow - save it
- value = temp;
- }
- }
- }
-
- // save the value (if not a float)
- if (type == value_t::number_unsigned)
- {
- result.m_value.number_unsigned = value;
- }
- else if (type == value_t::number_integer)
- {
- result.m_value.number_integer = -static_cast<number_integer_t>(value);
- }
- else
- {
- // parse with strtod
- result.m_value.number_float = str_to_float_t(static_cast<number_float_t*>(nullptr), NULL);
- }
+ // parse with std::strtof, std::strtod, or std::strtold
+ result.m_value.number_float = str_to_float_t(static_cast<number_float_t*>(nullptr), NULL);
// save the type
- result.m_type = type;
+ result.m_type = value_t::number_float;
+ }
+
+ /*!
+ @brief string to unsigned integer conversion
+
+ Converts the last string read by the lexer to an unsigned integer
+ number assuming a base of 10. Negative numbers shall be converted by
+ ignoring the leading '-' by setting @a skip_first to false. Negating
+ the value is in the responsability of the caller.
+
+ @param[out] result the result of the conversion
+ @param skip_first whether the first character should be skipped
+
+ @tparam T an unsigned integer type
+
+ @return true iff overflow occurred; false if conversion succeeded
+
+ @pre The re2c lexer has successfully parsed an integer number. Only
+ two cases are valid: (1) all characters between m_start and m_cursor
+ are digits (unsigned integer) and @a skip_first is set to false, or
+ (2) the character at m_start is a '-' which will be ignored (@a
+ skip_first is set to true) and all other characters until m_cursor
+ are digits (signed integer).
+
+ @post Either the string between m_start and m_cursor is processed
+ completely, @a result contains the parsed number, and the function
+ returns false; or parsing was interrupted prematurely, because an
+ overflow was detected. Then, the function returns true.
+ */
+ template<typename T>
+ bool get_uint(T& result, bool skip_first) const
+ {
+ assert(m_start != nullptr);
+ assert(m_start != m_cursor);
+
+ // initially, set the result to 0
+ result = 0;
+
+ // maximal possible value for number_unsigned_t
+ static constexpr T max_value = std::numeric_limits<T>::max();
+ // maximal intermediate value before multiplying by 10
+ static constexpr T max_intermediate = max_value / 10;
+
+ /// pointer to the first digit (possibly skipping a '-')
+ const auto first_digit = skip_first ? (m_start + 1) : m_start;
+
+ // after re2c successfully parsed a number, the digits are between
+ // first_digit and m_cursor
+ for (auto p = first_digit; p != m_cursor; ++p)
+ {
+ // check if multiplying by 10 would result in overflow
+ if (result > max_intermediate)
+ {
+ // signal overflow
+ return true;
+ }
+
+ result *= 10;
+
+ // the current digit
+ const T digit = static_cast<T>(*reinterpret_cast<typename string_t::const_pointer>(p) - '0');
+
+ // check if adding the digit would result in overflow
+ if (digit > (max_value - result))
+ {
+ // signal overflow
+ return true;
+ }
+
+ result += digit;
+ }
+
+ // signal absence of overflow
+ return false;
+ }
+
+
+ /*!
+ @brief parse unsigned integer number
+
+ @param[in,out] result @ref basic_json object to receive the number, or
+ NAN if the conversion read past the current token. The latter case
+ needs to be treated by the caller function.
+
+ @note If an overflow wrt. type @ref number_unsigned_t occurs, the
+ number is processed as floating-point by @ref get_number_float().
+ */
+ void get_number_uint(basic_json& result) const
+ {
+ // optimistically set result to unsigned int
+ result.m_type = value_t::number_unsigned;
+
+ // parse the number
+ const bool overflow = get_uint(result.m_value.number_unsigned, false);
+
+ // if an overflow occurred, try again parsing the number to float
+ if (overflow)
+ {
+ get_number_float(result);
+ }
+ }
+
+ /*!
+ @brief parse signed integer number
+
+ @param[in,out] result @ref basic_json object to receive the number, or
+ NAN if the conversion read past the current token. The latter case
+ needs to be treated by the caller function.
+
+ @note If an overflow wrt. type @ref number_integer_t occurs, the
+ number is processed as floating-point by @ref get_number_float().
+ */
+ void get_number_int(basic_json& result) const
+ {
+ // optimistically set result to int
+ result.m_type = value_t::number_integer;
+
+ // parse as unsigned integer while skipping the leading '-'
+ number_unsigned_t unsigned_value = 0;
+ const bool overflow = get_uint(unsigned_value, true);
+
+ // store negated number
+ result.m_value.number_integer = static_cast<number_integer_t>(-unsigned_value);
+
+ // if an overflow occurred, try again parsing the number to float;
+ // note that, assuming min = -(max + 1), we have an underflow
+ // iff -unsigned_value < min
+ // iff -unsigned_value < -(max + 1)
+ // iff unsigned_value > max + 1
+ // iff unsigned_value - 1 > max
+ if (overflow or (unsigned_value - 1) > std::numeric_limits<number_integer_t>::max())
+ {
+ get_number_float(result);
+ }
}
private:
@@ -8934,9 +9042,23 @@
break;
}
+ case lexer::token_type::value_uint:
+ {
+ m_lexer.get_number_uint(result);
+ get_token();
+ break;
+ }
+
+ case lexer::token_type::value_int:
+ {
+ m_lexer.get_number_int(result);
+ get_token();
+ break;
+ }
+
case lexer::token_type::value_number:
{
- m_lexer.get_number(result);
+ m_lexer.get_number_float(result);
get_token();
break;
}
diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c
index 537d4f4..b2a070f 100644
--- a/src/json.hpp.re2c
+++ b/src/json.hpp.re2c
@@ -7371,6 +7371,8 @@
literal_false, ///< the `false` literal
literal_null, ///< the `null` literal
value_string, ///< a string -- use get_string() for actual value
+ value_uint, ///< a uint number -- use get_number_uint() for actual value
+ value_int, ///< an int number -- use get_number_int() for actual value
value_number, ///< a number -- use get_number() for actual value
begin_array, ///< the character for array begin `[`
begin_object, ///< the character for object begin `{`
@@ -7516,6 +7518,8 @@
return "null literal";
case token_type::value_string:
return "string literal";
+ case token_type::value_uint:
+ case token_type::value_int:
case token_type::value_number:
return "number literal";
case token_type::begin_array:
@@ -7616,7 +7620,11 @@
zero = "0";
exp = e (minus | plus)? digit+;
frac = decimal_point digit+;
- int = (zero | digit_1_9 digit*);
+ int = (zero|digit_1_9 digit*);
+ number_uint = int;
+ number_uint { last_token_type = token_type::value_uint; break; }
+ number_int = minus int;
+ number_int { last_token_type = token_type::value_int; break; }
number = minus? int frac? exp?;
number { last_token_type = token_type::value_number; break; }
@@ -7859,19 +7867,7 @@
}
/*!
- @brief parse floating point number
-
- This function (and its overloads) serves to select the most approprate
- standard floating point number parsing function based on the type
- supplied via the first parameter. Set this to @a
- static_cast<number_float_t*>(nullptr).
-
- @param[in] type the @ref number_float_t in use
-
- @param[in,out] endptr recieves a pointer to the first character after
- the number
-
- @return the floating point number
+ @copydoc str_to_float_t()
*/
double str_to_float_t(double* /* type */, char** endptr) const
{
@@ -7879,19 +7875,7 @@
}
/*!
- @brief parse floating point number
-
- This function (and its overloads) serves to select the most approprate
- standard floating point number parsing function based on the type
- supplied via the first parameter. Set this to @a
- static_cast<number_float_t*>(nullptr).
-
- @param[in] type the @ref number_float_t in use
-
- @param[in,out] endptr recieves a pointer to the first character after
- the number
-
- @return the floating point number
+ @copydoc str_to_float_t()
*/
float str_to_float_t(float* /* type */, char** endptr) const
{
@@ -7899,109 +7883,155 @@
}
/*!
- @brief return number value for number tokens
+ @brief parse floating-point number
- This function translates the last token into the most appropriate
- number type (either integer, unsigned integer or floating point),
- which is passed back to the caller via the result parameter.
-
- This function parses the integer component up to the radix point or
- exponent while collecting information about the 'floating point
- representation', which it stores in the result parameter. If there is
- no radix point or exponent, and the number can fit into a @ref
- number_integer_t or @ref number_unsigned_t then it sets the result
- parameter accordingly.
-
- If the number is a floating point number the number is then parsed
- using @a std:strtod (or @a std:strtof or @a std::strtold).
-
- @param[out] result @ref basic_json object to receive the number, or
+ @param[in,out] result @ref basic_json object to receive the number, or
NAN if the conversion read past the current token. The latter case
needs to be treated by the caller function.
*/
- void get_number(basic_json& result) const
+ void get_number_float(basic_json& result) const
{
assert(m_start != nullptr);
- const lexer::lexer_char_t* curptr = m_start;
-
- // accumulate the integer conversion result (unsigned for now)
- number_unsigned_t value = 0;
-
- // maximum absolute value of the relevant integer type
- number_unsigned_t max;
-
- // temporarily store the type to avoid unecessary bitfield access
- value_t type;
-
- // look for sign
- if (*curptr == '-')
- {
- type = value_t::number_integer;
- max = static_cast<uint64_t>((std::numeric_limits<number_integer_t>::max)()) + 1;
- curptr++;
- }
- else
- {
- type = value_t::number_unsigned;
- max = static_cast<uint64_t>((std::numeric_limits<number_unsigned_t>::max)());
- }
-
- // count the significant figures
- for (; curptr < m_cursor; curptr++)
- {
- // quickly skip tests if a digit
- if (*curptr < '0' || *curptr > '9')
- {
- if (*curptr == '.')
- {
- // don't count '.' but change to float
- type = value_t::number_float;
- continue;
- }
- // assume exponent (if not then will fail parse): change to
- // float, stop counting and record exponent details
- type = value_t::number_float;
- break;
- }
-
- // skip if definitely not an integer
- if (type != value_t::number_float)
- {
- // multiply last value by ten and add the new digit
- auto temp = value * 10 + *curptr - '0';
-
- // test for overflow
- if (temp < value || temp > max)
- {
- // overflow
- type = value_t::number_float;
- }
- else
- {
- // no overflow - save it
- value = temp;
- }
- }
- }
-
- // save the value (if not a float)
- if (type == value_t::number_unsigned)
- {
- result.m_value.number_unsigned = value;
- }
- else if (type == value_t::number_integer)
- {
- result.m_value.number_integer = -static_cast<number_integer_t>(value);
- }
- else
- {
- // parse with strtod
- result.m_value.number_float = str_to_float_t(static_cast<number_float_t*>(nullptr), NULL);
- }
+ // parse with std::strtof, std::strtod, or std::strtold
+ result.m_value.number_float = str_to_float_t(static_cast<number_float_t*>(nullptr), NULL);
// save the type
- result.m_type = type;
+ result.m_type = value_t::number_float;
+ }
+
+ /*!
+ @brief string to unsigned integer conversion
+
+ Converts the last string read by the lexer to an unsigned integer
+ number assuming a base of 10. Negative numbers shall be converted by
+ ignoring the leading '-' by setting @a skip_first to false. Negating
+ the value is in the responsability of the caller.
+
+ @param[out] result the result of the conversion
+ @param skip_first whether the first character should be skipped
+
+ @tparam T an unsigned integer type
+
+ @return true iff overflow occurred; false if conversion succeeded
+
+ @pre The re2c lexer has successfully parsed an integer number. Only
+ two cases are valid: (1) all characters between m_start and m_cursor
+ are digits (unsigned integer) and @a skip_first is set to false, or
+ (2) the character at m_start is a '-' which will be ignored (@a
+ skip_first is set to true) and all other characters until m_cursor
+ are digits (signed integer).
+
+ @post Either the string between m_start and m_cursor is processed
+ completely, @a result contains the parsed number, and the function
+ returns false; or parsing was interrupted prematurely, because an
+ overflow was detected. Then, the function returns true.
+ */
+ template<typename T>
+ bool get_uint(T& result, bool skip_first) const
+ {
+ assert(m_start != nullptr);
+ assert(m_start != m_cursor);
+
+ // initially, set the result to 0
+ result = 0;
+
+ // maximal possible value for number_unsigned_t
+ static constexpr T max_value = std::numeric_limits<T>::max();
+ // maximal intermediate value before multiplying by 10
+ static constexpr T max_intermediate = max_value / 10;
+
+ /// pointer to the first digit (possibly skipping a '-')
+ const auto first_digit = skip_first ? (m_start + 1) : m_start;
+
+ // after re2c successfully parsed a number, the digits are between
+ // first_digit and m_cursor
+ for (auto p = first_digit; p != m_cursor; ++p)
+ {
+ // check if multiplying by 10 would result in overflow
+ if (result > max_intermediate)
+ {
+ // signal overflow
+ return true;
+ }
+
+ result *= 10;
+
+ // the current digit
+ const T digit = static_cast<T>(*reinterpret_cast<typename string_t::const_pointer>(p) - '0');
+
+ // check if adding the digit would result in overflow
+ if (digit > (max_value - result))
+ {
+ // signal overflow
+ return true;
+ }
+
+ result += digit;
+ }
+
+ // signal absence of overflow
+ return false;
+ }
+
+
+ /*!
+ @brief parse unsigned integer number
+
+ @param[in,out] result @ref basic_json object to receive the number, or
+ NAN if the conversion read past the current token. The latter case
+ needs to be treated by the caller function.
+
+ @note If an overflow wrt. type @ref number_unsigned_t occurs, the
+ number is processed as floating-point by @ref get_number_float().
+ */
+ void get_number_uint(basic_json& result) const
+ {
+ // optimistically set result to unsigned int
+ result.m_type = value_t::number_unsigned;
+
+ // parse the number
+ const bool overflow = get_uint(result.m_value.number_unsigned, false);
+
+ // if an overflow occurred, try again parsing the number to float
+ if (overflow)
+ {
+ get_number_float(result);
+ }
+ }
+
+ /*!
+ @brief parse signed integer number
+
+ @param[in,out] result @ref basic_json object to receive the number, or
+ NAN if the conversion read past the current token. The latter case
+ needs to be treated by the caller function.
+
+ @note If an overflow wrt. type @ref number_integer_t occurs, the
+ number is processed as floating-point by @ref get_number_float().
+ */
+ void get_number_int(basic_json& result) const
+ {
+ // optimistically set result to int
+ result.m_type = value_t::number_integer;
+
+ // parse as unsigned integer while skipping the leading '-'
+ number_unsigned_t unsigned_value = 0;
+ const bool overflow = get_uint(unsigned_value, true);
+
+ // store negated number
+ result.m_value.number_integer = static_cast<number_integer_t>(-unsigned_value);
+
+ // if an overflow occurred, try again parsing the number to float;
+ // note that, assuming min = -(max + 1), we have an underflow
+ // iff -unsigned_value < min
+ // iff -unsigned_value < -(max + 1)
+ // iff unsigned_value > max + 1
+ // iff unsigned_value - 1 > max
+ if (overflow or (unsigned_value - 1) > std::numeric_limits<number_integer_t>::max())
+ {
+ get_number_float(result);
+ }
}
private:
@@ -8231,9 +8261,23 @@
break;
}
+ case lexer::token_type::value_uint:
+ {
+ m_lexer.get_number_uint(result);
+ get_token();
+ break;
+ }
+
+ case lexer::token_type::value_int:
+ {
+ m_lexer.get_number_int(result);
+ get_token();
+ break;
+ }
+
case lexer::token_type::value_number:
{
- m_lexer.get_number(result);
+ m_lexer.get_number_float(result);
get_token();
break;
}
diff --git a/test/src/unit.cpp b/test/src/unit.cpp
index edbafac..8c631fc 100644
--- a/test/src/unit.cpp
+++ b/test/src/unit.cpp
@@ -9681,16 +9681,40 @@
SECTION("numbers")
{
- CHECK(json::lexer("0").scan() == json::lexer::token_type::value_number);
- CHECK(json::lexer("1").scan() == json::lexer::token_type::value_number);
- CHECK(json::lexer("2").scan() == json::lexer::token_type::value_number);
- CHECK(json::lexer("3").scan() == json::lexer::token_type::value_number);
- CHECK(json::lexer("4").scan() == json::lexer::token_type::value_number);
- CHECK(json::lexer("5").scan() == json::lexer::token_type::value_number);
- CHECK(json::lexer("6").scan() == json::lexer::token_type::value_number);
- CHECK(json::lexer("7").scan() == json::lexer::token_type::value_number);
- CHECK(json::lexer("8").scan() == json::lexer::token_type::value_number);
- CHECK(json::lexer("9").scan() == json::lexer::token_type::value_number);
+ // unsigned integer
+ CHECK(json::lexer("0").scan() == json::lexer::token_type::value_uint);
+ CHECK(json::lexer("1").scan() == json::lexer::token_type::value_uint);
+ CHECK(json::lexer("2").scan() == json::lexer::token_type::value_uint);
+ CHECK(json::lexer("3").scan() == json::lexer::token_type::value_uint);
+ CHECK(json::lexer("4").scan() == json::lexer::token_type::value_uint);
+ CHECK(json::lexer("5").scan() == json::lexer::token_type::value_uint);
+ CHECK(json::lexer("6").scan() == json::lexer::token_type::value_uint);
+ CHECK(json::lexer("7").scan() == json::lexer::token_type::value_uint);
+ CHECK(json::lexer("8").scan() == json::lexer::token_type::value_uint);
+ CHECK(json::lexer("9").scan() == json::lexer::token_type::value_uint);
+
+ // signed integer
+ CHECK(json::lexer("-1").scan() == json::lexer::token_type::value_int);
+ CHECK(json::lexer("-2").scan() == json::lexer::token_type::value_int);
+ CHECK(json::lexer("-3").scan() == json::lexer::token_type::value_int);
+ CHECK(json::lexer("-4").scan() == json::lexer::token_type::value_int);
+ CHECK(json::lexer("-5").scan() == json::lexer::token_type::value_int);
+ CHECK(json::lexer("-6").scan() == json::lexer::token_type::value_int);
+ CHECK(json::lexer("-7").scan() == json::lexer::token_type::value_int);
+ CHECK(json::lexer("-8").scan() == json::lexer::token_type::value_int);
+ CHECK(json::lexer("-9").scan() == json::lexer::token_type::value_int);
+
+ // floating-point
+ CHECK(json::lexer("0.0").scan() == json::lexer::token_type::value_number);
+ CHECK(json::lexer("0.1").scan() == json::lexer::token_type::value_number);
+ CHECK(json::lexer("0.2").scan() == json::lexer::token_type::value_number);
+ CHECK(json::lexer("0.3").scan() == json::lexer::token_type::value_number);
+ CHECK(json::lexer("0.4").scan() == json::lexer::token_type::value_number);
+ CHECK(json::lexer("0.5").scan() == json::lexer::token_type::value_number);
+ CHECK(json::lexer("0.6").scan() == json::lexer::token_type::value_number);
+ CHECK(json::lexer("0.7").scan() == json::lexer::token_type::value_number);
+ CHECK(json::lexer("0.8").scan() == json::lexer::token_type::value_number);
+ CHECK(json::lexer("0.9").scan() == json::lexer::token_type::value_number);
}
SECTION("whitespace")