Merge branch 'develop' into feature/user_defined_string_to_int

commit: a1ee1987748680016cc508282ba79c6fca13181b [log] [tgz]
author: Niels <niels.lohmann@gmail.com> Sat Jul 30 10:59:32 2016 +0200
committer: Niels <niels.lohmann@gmail.com> Sat Jul 30 10:59:32 2016 +0200
tree: 00d06a81076fbb78ed87bfbaaab08ba8c4a84c8b
parent: 0c6ebd495ddc6a2776d30cb8293a002eb4e7fd67 [diff]
parent: 5405ae860117f55f9b5af287a7006dd64cef9eaa [diff]
diff --git a/README.md b/README.md
index 5849067..45cba01 100644
--- a/README.md
+++ b/README.md

@@ -505,7 +505,7 @@
 $ ./json_unit "*"
 
 ===============================================================================
-All tests passed (8905012 assertions in 32 test cases)
+All tests passed (5568737 assertions in 32 test cases)
 ```
 
 For more information, have a look at the file [.travis.yml](https://github.com/nlohmann/json/blob/master/.travis.yml).

diff --git a/src/json.hpp b/src/json.hpp
index 9d2305d..43502d0 100644
--- a/src/json.hpp
+++ b/src/json.hpp

@@ -7371,6 +7371,8 @@
             literal_false,   ///< the `false` literal
             literal_null,    ///< the `null` literal
             value_string,    ///< a string -- use get_string() for actual value
+            value_uint,      ///< a uint number -- use get_number_uint() for actual value
+            value_int,       ///< an int number -- use get_number_int() for actual value
             value_number,    ///< a number -- use get_number() for actual value
             begin_array,     ///< the character for array begin `[`
             begin_object,    ///< the character for object begin `{`
@@ -7516,6 +7518,8 @@
                     return "null literal";
                 case token_type::value_string:
                     return "string literal";
+                case token_type::value_uint:
+                case token_type::value_int:
                 case token_type::value_number:
                     return "number literal";
                 case token_type::begin_array:
@@ -7781,11 +7785,11 @@
                     }
                     if (yych <= '0')
                     {
-                        goto basic_json_parser_13;
+                        goto basic_json_parser_37;
                     }
                     if (yych <= '9')
                     {
-                        goto basic_json_parser_15;
+                        goto basic_json_parser_39;
                     }
                     goto basic_json_parser_5;
 basic_json_parser_13:
@@ -7795,23 +7799,23 @@
                     {
                         if (yych == '.')
                         {
-                            goto basic_json_parser_37;
+                            goto basic_json_parser_41;
                         }
                     }
                     else
                     {
                         if (yych <= 'E')
                         {
-                            goto basic_json_parser_38;
+                            goto basic_json_parser_42;
                         }
                         if (yych == 'e')
                         {
-                            goto basic_json_parser_38;
+                            goto basic_json_parser_42;
                         }
                     }
 basic_json_parser_14:
                     {
-                        last_token_type = token_type::value_number;
+                        last_token_type = token_type::value_uint;
                         break;
                     }
 basic_json_parser_15:
@@ -7830,7 +7834,7 @@
                     {
                         if (yych == '.')
                         {
-                            goto basic_json_parser_37;
+                            goto basic_json_parser_41;
                         }
                         goto basic_json_parser_14;
                     }
@@ -7838,11 +7842,11 @@
                     {
                         if (yych <= 'E')
                         {
-                            goto basic_json_parser_38;
+                            goto basic_json_parser_42;
                         }
                         if (yych == 'e')
                         {
-                            goto basic_json_parser_38;
+                            goto basic_json_parser_42;
                         }
                         goto basic_json_parser_14;
                     }
@@ -7869,7 +7873,7 @@
                     yych = *(m_marker = ++m_cursor);
                     if (yych == 'a')
                     {
-                        goto basic_json_parser_39;
+                        goto basic_json_parser_43;
                     }
                     goto basic_json_parser_5;
 basic_json_parser_24:
@@ -7877,7 +7881,7 @@
                     yych = *(m_marker = ++m_cursor);
                     if (yych == 'u')
                     {
-                        goto basic_json_parser_40;
+                        goto basic_json_parser_44;
                     }
                     goto basic_json_parser_5;
 basic_json_parser_25:
@@ -7885,7 +7889,7 @@
                     yych = *(m_marker = ++m_cursor);
                     if (yych == 'r')
                     {
-                        goto basic_json_parser_41;
+                        goto basic_json_parser_45;
                     }
                     goto basic_json_parser_5;
 basic_json_parser_26:
@@ -7905,7 +7909,7 @@
                     yych = *(m_marker = ++m_cursor);
                     if (yych == 0xBB)
                     {
-                        goto basic_json_parser_42;
+                        goto basic_json_parser_46;
                     }
                     goto basic_json_parser_5;
 basic_json_parser_31:
@@ -7931,13 +7935,27 @@
                     goto basic_json_parser_36;
 basic_json_parser_33:
                     m_cursor = m_marker;
-                    if (yyaccept == 0)
+                    if (yyaccept <= 1)
                     {
-                        goto basic_json_parser_5;
+                        if (yyaccept == 0)
+                        {
+                            goto basic_json_parser_5;
+                        }
+                        else
+                        {
+                            goto basic_json_parser_14;
+                        }
                     }
                     else
                     {
-                        goto basic_json_parser_14;
+                        if (yyaccept == 2)
+                        {
+                            goto basic_json_parser_38;
+                        }
+                        else
+                        {
+                            goto basic_json_parser_50;
+                        }
                     }
 basic_json_parser_34:
                     ++m_cursor;
@@ -8018,13 +8036,78 @@
                                 }
                                 if (yych <= 'u')
                                 {
-                                    goto basic_json_parser_43;
+                                    goto basic_json_parser_47;
                                 }
                                 goto basic_json_parser_33;
                             }
                         }
                     }
 basic_json_parser_37:
+                    yyaccept = 2;
+                    yych = *(m_marker = ++m_cursor);
+                    if (yych <= 'D')
+                    {
+                        if (yych == '.')
+                        {
+                            goto basic_json_parser_41;
+                        }
+                    }
+                    else
+                    {
+                        if (yych <= 'E')
+                        {
+                            goto basic_json_parser_42;
+                        }
+                        if (yych == 'e')
+                        {
+                            goto basic_json_parser_42;
+                        }
+                    }
+basic_json_parser_38:
+                    {
+                        last_token_type = token_type::value_int;
+                        break;
+                    }
+basic_json_parser_39:
+                    yyaccept = 2;
+                    m_marker = ++m_cursor;
+                    if ((m_limit - m_cursor) < 3)
+                    {
+                        yyfill();    // LCOV_EXCL_LINE;
+                    }
+                    yych = *m_cursor;
+                    if (yych <= '9')
+                    {
+                        if (yych == '.')
+                        {
+                            goto basic_json_parser_41;
+                        }
+                        if (yych <= '/')
+                        {
+                            goto basic_json_parser_38;
+                        }
+                        goto basic_json_parser_39;
+                    }
+                    else
+                    {
+                        if (yych <= 'E')
+                        {
+                            if (yych <= 'D')
+                            {
+                                goto basic_json_parser_38;
+                            }
+                            goto basic_json_parser_42;
+                        }
+                        else
+                        {
+                            if (yych == 'e')
+                            {
+                                goto basic_json_parser_42;
+                            }
+                            goto basic_json_parser_38;
+                        }
+                    }
+basic_json_parser_41:
                     yych = *++m_cursor;
                     if (yych <= '/')
                     {
@@ -8032,16 +8115,16 @@
                     }
                     if (yych <= '9')
                     {
-                        goto basic_json_parser_44;
+                        goto basic_json_parser_48;
                     }
                     goto basic_json_parser_33;
-basic_json_parser_38:
+basic_json_parser_42:
                     yych = *++m_cursor;
                     if (yych <= ',')
                     {
                         if (yych == '+')
                         {
-                            goto basic_json_parser_46;
+                            goto basic_json_parser_51;
                         }
                         goto basic_json_parser_33;
                     }
@@ -8049,7 +8132,7 @@
                     {
                         if (yych <= '-')
                         {
-                            goto basic_json_parser_46;
+                            goto basic_json_parser_51;
                         }
                         if (yych <= '/')
                         {
@@ -8057,39 +8140,39 @@
                         }
                         if (yych <= '9')
                         {
-                            goto basic_json_parser_47;
+                            goto basic_json_parser_52;
                         }
                         goto basic_json_parser_33;
                     }
-basic_json_parser_39:
+basic_json_parser_43:
                     yych = *++m_cursor;
                     if (yych == 'l')
                     {
-                        goto basic_json_parser_49;
+                        goto basic_json_parser_54;
                     }
                     goto basic_json_parser_33;
-basic_json_parser_40:
+basic_json_parser_44:
                     yych = *++m_cursor;
                     if (yych == 'l')
                     {
-                        goto basic_json_parser_50;
+                        goto basic_json_parser_55;
                     }
                     goto basic_json_parser_33;
-basic_json_parser_41:
+basic_json_parser_45:
                     yych = *++m_cursor;
                     if (yych == 'u')
                     {
-                        goto basic_json_parser_51;
+                        goto basic_json_parser_56;
                     }
                     goto basic_json_parser_33;
-basic_json_parser_42:
+basic_json_parser_46:
                     yych = *++m_cursor;
                     if (yych == 0xBF)
                     {
-                        goto basic_json_parser_52;
+                        goto basic_json_parser_57;
                     }
                     goto basic_json_parser_33;
-basic_json_parser_43:
+basic_json_parser_47:
                     ++m_cursor;
                     if (m_limit <= m_cursor)
                     {
@@ -8104,7 +8187,7 @@
                         }
                         if (yych <= '9')
                         {
-                            goto basic_json_parser_54;
+                            goto basic_json_parser_59;
                         }
                         goto basic_json_parser_33;
                     }
@@ -8112,7 +8195,7 @@
                     {
                         if (yych <= 'F')
                         {
-                            goto basic_json_parser_54;
+                            goto basic_json_parser_59;
                         }
                         if (yych <= '`')
                         {
@@ -8120,12 +8203,12 @@
                         }
                         if (yych <= 'f')
                         {
-                            goto basic_json_parser_54;
+                            goto basic_json_parser_59;
                         }
                         goto basic_json_parser_33;
                     }
-basic_json_parser_44:
-                    yyaccept = 1;
+basic_json_parser_48:
+                    yyaccept = 3;
                     m_marker = ++m_cursor;
                     if ((m_limit - m_cursor) < 3)
                     {
@@ -8136,27 +8219,30 @@
                     {
                         if (yych <= '/')
                         {
-                            goto basic_json_parser_14;
+                            goto basic_json_parser_50;
                         }
                         if (yych <= '9')
                         {
-                            goto basic_json_parser_44;
+                            goto basic_json_parser_48;
                         }
-                        goto basic_json_parser_14;
                     }
                     else
                     {
                         if (yych <= 'E')
                         {
-                            goto basic_json_parser_38;
+                            goto basic_json_parser_42;
                         }
                         if (yych == 'e')
                         {
-                            goto basic_json_parser_38;
+                            goto basic_json_parser_42;
                         }
-                        goto basic_json_parser_14;
                     }
-basic_json_parser_46:
+basic_json_parser_50:
+                    {
+                        last_token_type = token_type::value_number;
+                        break;
+                    }
+basic_json_parser_51:
                     yych = *++m_cursor;
                     if (yych <= '/')
                     {
@@ -8166,7 +8252,7 @@
                     {
                         goto basic_json_parser_33;
                     }
-basic_json_parser_47:
+basic_json_parser_52:
                     ++m_cursor;
                     if (m_limit <= m_cursor)
                     {
@@ -8175,94 +8261,40 @@
                     yych = *m_cursor;
                     if (yych <= '/')
                     {
-                        goto basic_json_parser_14;
+                        goto basic_json_parser_50;
                     }
                     if (yych <= '9')
                     {
-                        goto basic_json_parser_47;
+                        goto basic_json_parser_52;
                     }
-                    goto basic_json_parser_14;
-basic_json_parser_49:
+                    goto basic_json_parser_50;
+basic_json_parser_54:
                     yych = *++m_cursor;
                     if (yych == 's')
                     {
-                        goto basic_json_parser_55;
+                        goto basic_json_parser_60;
                     }
                     goto basic_json_parser_33;
-basic_json_parser_50:
-                    yych = *++m_cursor;
-                    if (yych == 'l')
-                    {
-                        goto basic_json_parser_56;
-                    }
-                    goto basic_json_parser_33;
-basic_json_parser_51:
-                    yych = *++m_cursor;
-                    if (yych == 'e')
-                    {
-                        goto basic_json_parser_58;
-                    }
-                    goto basic_json_parser_33;
-basic_json_parser_52:
-                    ++m_cursor;
-                    {
-                        continue;
-                    }
-basic_json_parser_54:
-                    ++m_cursor;
-                    if (m_limit <= m_cursor)
-                    {
-                        yyfill();    // LCOV_EXCL_LINE;
-                    }
-                    yych = *m_cursor;
-                    if (yych <= '@')
-                    {
-                        if (yych <= '/')
-                        {
-                            goto basic_json_parser_33;
-                        }
-                        if (yych <= '9')
-                        {
-                            goto basic_json_parser_60;
-                        }
-                        goto basic_json_parser_33;
-                    }
-                    else
-                    {
-                        if (yych <= 'F')
-                        {
-                            goto basic_json_parser_60;
-                        }
-                        if (yych <= '`')
-                        {
-                            goto basic_json_parser_33;
-                        }
-                        if (yych <= 'f')
-                        {
-                            goto basic_json_parser_60;
-                        }
-                        goto basic_json_parser_33;
-                    }
 basic_json_parser_55:
                     yych = *++m_cursor;
-                    if (yych == 'e')
+                    if (yych == 'l')
                     {
                         goto basic_json_parser_61;
                     }
                     goto basic_json_parser_33;
 basic_json_parser_56:
+                    yych = *++m_cursor;
+                    if (yych == 'e')
+                    {
+                        goto basic_json_parser_63;
+                    }
+                    goto basic_json_parser_33;
+basic_json_parser_57:
                     ++m_cursor;
                     {
-                        last_token_type = token_type::literal_null;
-                        break;
+                        continue;
                     }
-basic_json_parser_58:
-                    ++m_cursor;
-                    {
-                        last_token_type = token_type::literal_true;
-                        break;
-                    }
-basic_json_parser_60:
+basic_json_parser_59:
                     ++m_cursor;
                     if (m_limit <= m_cursor)
                     {
@@ -8277,7 +8309,7 @@
                         }
                         if (yych <= '9')
                         {
-                            goto basic_json_parser_63;
+                            goto basic_json_parser_65;
                         }
                         goto basic_json_parser_33;
                     }
@@ -8285,7 +8317,7 @@
                     {
                         if (yych <= 'F')
                         {
-                            goto basic_json_parser_63;
+                            goto basic_json_parser_65;
                         }
                         if (yych <= '`')
                         {
@@ -8293,17 +8325,71 @@
                         }
                         if (yych <= 'f')
                         {
-                            goto basic_json_parser_63;
+                            goto basic_json_parser_65;
                         }
                         goto basic_json_parser_33;
                     }
+basic_json_parser_60:
+                    yych = *++m_cursor;
+                    if (yych == 'e')
+                    {
+                        goto basic_json_parser_66;
+                    }
+                    goto basic_json_parser_33;
 basic_json_parser_61:
                     ++m_cursor;
                     {
+                        last_token_type = token_type::literal_null;
+                        break;
+                    }
+basic_json_parser_63:
+                    ++m_cursor;
+                    {
+                        last_token_type = token_type::literal_true;
+                        break;
+                    }
+basic_json_parser_65:
+                    ++m_cursor;
+                    if (m_limit <= m_cursor)
+                    {
+                        yyfill();    // LCOV_EXCL_LINE;
+                    }
+                    yych = *m_cursor;
+                    if (yych <= '@')
+                    {
+                        if (yych <= '/')
+                        {
+                            goto basic_json_parser_33;
+                        }
+                        if (yych <= '9')
+                        {
+                            goto basic_json_parser_68;
+                        }
+                        goto basic_json_parser_33;
+                    }
+                    else
+                    {
+                        if (yych <= 'F')
+                        {
+                            goto basic_json_parser_68;
+                        }
+                        if (yych <= '`')
+                        {
+                            goto basic_json_parser_33;
+                        }
+                        if (yych <= 'f')
+                        {
+                            goto basic_json_parser_68;
+                        }
+                        goto basic_json_parser_33;
+                    }
+basic_json_parser_66:
+                    ++m_cursor;
+                    {
                         last_token_type = token_type::literal_false;
                         break;
                     }
-basic_json_parser_63:
+basic_json_parser_68:
                     ++m_cursor;
                     if (m_limit <= m_cursor)
                     {
@@ -8562,19 +8648,7 @@
         }
 
         /*!
-        @brief parse floating point number
-
-        This function (and its overloads) serves to select the most approprate
-        standard floating point number parsing function based on the type
-        supplied via the first parameter.  Set this to @a
-        static_cast<number_float_t*>(nullptr).
-
-        @param[in] type  the @ref number_float_t in use
-
-        @param[in,out] endptr  recieves a pointer to the first character after
-        the number
-
-        @return the floating point number
+        @copydoc str_to_float_t()
         */
         double str_to_float_t(double* /* type */, char** endptr) const
         {
@@ -8582,19 +8656,7 @@
         }
 
         /*!
-        @brief parse floating point number
-
-        This function (and its overloads) serves to select the most approprate
-        standard floating point number parsing function based on the type
-        supplied via the first parameter.  Set this to @a
-        static_cast<number_float_t*>(nullptr).
-
-        @param[in] type  the @ref number_float_t in use
-
-        @param[in,out] endptr  recieves a pointer to the first character after
-        the number
-
-        @return the floating point number
+        @copydoc str_to_float_t()
         */
         float str_to_float_t(float* /* type */, char** endptr) const
         {
@@ -8602,109 +8664,155 @@
         }
 
         /*!
-        @brief return number value for number tokens
+        @brief parse floating-point number
 
-        This function translates the last token into the most appropriate
-        number type (either integer, unsigned integer or floating point),
-        which is passed back to the caller via the result parameter.
-
-        This function parses the integer component up to the radix point or
-        exponent while collecting information about the 'floating point
-        representation', which it stores in the result parameter. If there is
-        no radix point or exponent, and the number can fit into a @ref
-        number_integer_t or @ref number_unsigned_t then it sets the result
-        parameter accordingly.
-
-        If the number is a floating point number the number is then parsed
-        using @a std:strtod (or @a std:strtof or @a std::strtold).
-
-        @param[out] result  @ref basic_json object to receive the number, or
+        @param[in,out] result @ref basic_json object to receive the number, or
         NAN if the conversion read past the current token. The latter case
         needs to be treated by the caller function.
         */
-        void get_number(basic_json& result) const
+        void get_number_float(basic_json& result) const
         {
             assert(m_start != nullptr);
 
-            const lexer::lexer_char_t* curptr = m_start;
-
-            // accumulate the integer conversion result (unsigned for now)
-            number_unsigned_t value = 0;
-
-            // maximum absolute value of the relevant integer type
-            number_unsigned_t max;
-
-            // temporarily store the type to avoid unecessary bitfield access
-            value_t type;
-
-            // look for sign
-            if (*curptr == '-')
-            {
-                type = value_t::number_integer;
-                max = static_cast<uint64_t>((std::numeric_limits<number_integer_t>::max)()) + 1;
-                curptr++;
-            }
-            else
-            {
-                type = value_t::number_unsigned;
-                max = static_cast<uint64_t>((std::numeric_limits<number_unsigned_t>::max)());
-            }
-
-            // count the significant figures
-            for (; curptr < m_cursor; curptr++)
-            {
-                // quickly skip tests if a digit
-                if (*curptr < '0' || *curptr > '9')
-                {
-                    if (*curptr == '.')
-                    {
-                        // don't count '.' but change to float
-                        type = value_t::number_float;
-                        continue;
-                    }
-                    // assume exponent (if not then will fail parse): change to
-                    // float, stop counting and record exponent details
-                    type = value_t::number_float;
-                    break;
-                }
-
-                // skip if definitely not an integer
-                if (type != value_t::number_float)
-                {
-                    // multiply last value by ten and add the new digit
-                    auto temp = value * 10 + *curptr - '0';
-
-                    // test for overflow
-                    if (temp < value || temp > max)
-                    {
-                        // overflow
-                        type = value_t::number_float;
-                    }
-                    else
-                    {
-                        // no overflow - save it
-                        value = temp;
-                    }
-                }
-            }
-
-            // save the value (if not a float)
-            if (type == value_t::number_unsigned)
-            {
-                result.m_value.number_unsigned = value;
-            }
-            else if (type == value_t::number_integer)
-            {
-                result.m_value.number_integer = -static_cast<number_integer_t>(value);
-            }
-            else
-            {
-                // parse with strtod
-                result.m_value.number_float = str_to_float_t(static_cast<number_float_t*>(nullptr), NULL);
-            }
+            // parse with std::strtof, std::strtod, or std::strtold
+            result.m_value.number_float = str_to_float_t(static_cast<number_float_t*>(nullptr), NULL);
 
             // save the type
-            result.m_type = type;
+            result.m_type = value_t::number_float;
+        }
+
+        /*!
+        @brief string to unsigned integer conversion
+
+        Converts the last string read by the lexer to an unsigned integer
+        number assuming a base of 10. Negative numbers shall be converted by
+        ignoring the leading '-' by setting @a skip_first to false. Negating
+        the value is in the responsability of the caller.
+
+        @param[out] result  the result of the conversion
+        @param skip_first  whether the first character should be skipped
+
+        @tparam T  an unsigned integer type
+
+        @return true iff overflow occurred; false if conversion succeeded
+
+        @pre The re2c lexer has successfully parsed an integer number. Only
+        two cases are valid: (1) all characters between m_start and m_cursor
+        are digits (unsigned integer) and @a skip_first is set to false, or
+        (2) the character at m_start is a '-' which will be ignored (@a
+        skip_first is set to true) and all other characters until m_cursor
+        are digits (signed integer).
+
+        @post Either the string between m_start and m_cursor is processed
+        completely, @a result contains the parsed number, and the function
+        returns false; or parsing was interrupted prematurely, because an
+        overflow was detected. Then, the function returns true.
+        */
+        template<typename T>
+        bool get_uint(T& result, bool skip_first) const
+        {
+            assert(m_start != nullptr);
+            assert(m_start != m_cursor);
+
+            // initially, set the result to 0
+            result = 0;
+
+            // maximal possible value for number_unsigned_t
+            static constexpr T max_value = std::numeric_limits<T>::max();
+            // maximal intermediate value before multiplying by 10
+            static constexpr T max_intermediate = max_value / 10;
+
+            /// pointer to the first digit (possibly skipping a '-')
+            const auto first_digit = skip_first ? (m_start + 1) : m_start;
+
+            // after re2c successfully parsed a number, the digits are between
+            // first_digit and m_cursor
+            for (auto p = first_digit; p != m_cursor; ++p)
+            {
+                // check if multiplying by 10 would result in overflow
+                if (result > max_intermediate)
+                {
+                    // signal overflow
+                    return true;
+                }
+
+                result *= 10;
+
+                // the current digit
+                const T digit = static_cast<T>(*reinterpret_cast<typename string_t::const_pointer>(p) - '0');
+
+                // check if adding the digit would result in overflow
+                if (digit > (max_value - result))
+                {
+                    // signal overflow
+                    return true;
+                }
+
+                result += digit;
+            }
+
+            // signal absence of overflow
+            return false;
+        }
+
+
+        /*!
+        @brief parse unsigned integer number
+
+        @param[in,out] result @ref basic_json object to receive the number, or
+        NAN if the conversion read past the current token. The latter case
+        needs to be treated by the caller function.
+
+        @note If an overflow wrt. type @ref number_unsigned_t occurs, the
+        number is processed as floating-point by @ref get_number_float().
+        */
+        void get_number_uint(basic_json& result) const
+        {
+            // optimistically set result to unsigned int
+            result.m_type = value_t::number_unsigned;
+
+            // parse the number
+            const bool overflow = get_uint(result.m_value.number_unsigned, false);
+
+            // if an overflow occurred, try again parsing the number to float
+            if (overflow)
+            {
+                get_number_float(result);
+            }
+        }
+
+        /*!
+        @brief parse signed integer number
+
+        @param[in,out] result @ref basic_json object to receive the number, or
+        NAN if the conversion read past the current token. The latter case
+        needs to be treated by the caller function.
+
+        @note If an overflow wrt. type @ref number_integer_t occurs, the
+        number is processed as floating-point by @ref get_number_float().
+        */
+        void get_number_int(basic_json& result) const
+        {
+            // optimistically set result to int
+            result.m_type = value_t::number_integer;
+
+            // parse as unsigned integer while skipping the leading '-'
+            number_unsigned_t unsigned_value = 0;
+            const bool overflow = get_uint(unsigned_value, true);
+
+            // store negated number
+            result.m_value.number_integer = static_cast<number_integer_t>(-unsigned_value);
+
+            // if an overflow occurred, try again parsing the number to float;
+            // note that, assuming min = -(max + 1), we have an underflow
+            // iff  -unsigned_value     < min
+            // iff  -unsigned_value     < -(max + 1)
+            // iff   unsigned_value     > max + 1
+            // iff   unsigned_value - 1 > max
+            if (overflow or (unsigned_value - 1) > std::numeric_limits<number_integer_t>::max())
+            {
+                get_number_float(result);
+            }
         }
 
       private:
@@ -8934,9 +9042,23 @@
                     break;
                 }
 
+                case lexer::token_type::value_uint:
+                {
+                    m_lexer.get_number_uint(result);
+                    get_token();
+                    break;
+                }
+
+                case lexer::token_type::value_int:
+                {
+                    m_lexer.get_number_int(result);
+                    get_token();
+                    break;
+                }
+
                 case lexer::token_type::value_number:
                 {
-                    m_lexer.get_number(result);
+                    m_lexer.get_number_float(result);
                     get_token();
                     break;
                 }

diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c
index 537d4f4..b2a070f 100644
--- a/src/json.hpp.re2c
+++ b/src/json.hpp.re2c

@@ -7371,6 +7371,8 @@
             literal_false,   ///< the `false` literal
             literal_null,    ///< the `null` literal
             value_string,    ///< a string -- use get_string() for actual value
+            value_uint,      ///< a uint number -- use get_number_uint() for actual value
+            value_int,       ///< an int number -- use get_number_int() for actual value
             value_number,    ///< a number -- use get_number() for actual value
             begin_array,     ///< the character for array begin `[`
             begin_object,    ///< the character for object begin `{`
@@ -7516,6 +7518,8 @@
                     return "null literal";
                 case token_type::value_string:
                     return "string literal";
+                case token_type::value_uint:
+                case token_type::value_int:
                 case token_type::value_number:
                     return "number literal";
                 case token_type::begin_array:
@@ -7616,7 +7620,11 @@
                     zero          = "0";
                     exp           = e (minus | plus)? digit+;
                     frac          = decimal_point digit+;
-                    int           = (zero | digit_1_9 digit*);
+                    int           = (zero|digit_1_9 digit*);
+                    number_uint   = int;
+                    number_uint   { last_token_type = token_type::value_uint; break; }
+                    number_int    = minus int;
+                    number_int    { last_token_type = token_type::value_int; break; }
                     number        = minus? int frac? exp?;
                     number        { last_token_type = token_type::value_number; break; }
 
@@ -7859,19 +7867,7 @@
         }
 
         /*!
-        @brief parse floating point number
-
-        This function (and its overloads) serves to select the most approprate
-        standard floating point number parsing function based on the type
-        supplied via the first parameter.  Set this to @a
-        static_cast<number_float_t*>(nullptr).
-
-        @param[in] type  the @ref number_float_t in use
-
-        @param[in,out] endptr  recieves a pointer to the first character after
-        the number
-
-        @return the floating point number
+        @copydoc str_to_float_t()
         */
         double str_to_float_t(double* /* type */, char** endptr) const
         {
@@ -7879,19 +7875,7 @@
         }
 
         /*!
-        @brief parse floating point number
-
-        This function (and its overloads) serves to select the most approprate
-        standard floating point number parsing function based on the type
-        supplied via the first parameter.  Set this to @a
-        static_cast<number_float_t*>(nullptr).
-
-        @param[in] type  the @ref number_float_t in use
-
-        @param[in,out] endptr  recieves a pointer to the first character after
-        the number
-
-        @return the floating point number
+        @copydoc str_to_float_t()
         */
         float str_to_float_t(float* /* type */, char** endptr) const
         {
@@ -7899,109 +7883,155 @@
         }
 
         /*!
-        @brief return number value for number tokens
+        @brief parse floating-point number
 
-        This function translates the last token into the most appropriate
-        number type (either integer, unsigned integer or floating point),
-        which is passed back to the caller via the result parameter.
-
-        This function parses the integer component up to the radix point or
-        exponent while collecting information about the 'floating point
-        representation', which it stores in the result parameter. If there is
-        no radix point or exponent, and the number can fit into a @ref
-        number_integer_t or @ref number_unsigned_t then it sets the result
-        parameter accordingly.
-
-        If the number is a floating point number the number is then parsed
-        using @a std:strtod (or @a std:strtof or @a std::strtold).
-
-        @param[out] result  @ref basic_json object to receive the number, or
+        @param[in,out] result @ref basic_json object to receive the number, or
         NAN if the conversion read past the current token. The latter case
         needs to be treated by the caller function.
         */
-        void get_number(basic_json& result) const
+        void get_number_float(basic_json& result) const
         {
             assert(m_start != nullptr);
 
-            const lexer::lexer_char_t* curptr = m_start;
-
-            // accumulate the integer conversion result (unsigned for now)
-            number_unsigned_t value = 0;
-
-            // maximum absolute value of the relevant integer type
-            number_unsigned_t max;
-
-            // temporarily store the type to avoid unecessary bitfield access
-            value_t type;
-
-            // look for sign
-            if (*curptr == '-')
-            {
-                type = value_t::number_integer;
-                max = static_cast<uint64_t>((std::numeric_limits<number_integer_t>::max)()) + 1;
-                curptr++;
-            }
-            else
-            {
-                type = value_t::number_unsigned;
-                max = static_cast<uint64_t>((std::numeric_limits<number_unsigned_t>::max)());
-            }
-
-            // count the significant figures
-            for (; curptr < m_cursor; curptr++)
-            {
-                // quickly skip tests if a digit
-                if (*curptr < '0' || *curptr > '9')
-                {
-                    if (*curptr == '.')
-                    {
-                        // don't count '.' but change to float
-                        type = value_t::number_float;
-                        continue;
-                    }
-                    // assume exponent (if not then will fail parse): change to
-                    // float, stop counting and record exponent details
-                    type = value_t::number_float;
-                    break;
-                }
-
-                // skip if definitely not an integer
-                if (type != value_t::number_float)
-                {
-                    // multiply last value by ten and add the new digit
-                    auto temp = value * 10 + *curptr - '0';
-
-                    // test for overflow
-                    if (temp < value || temp > max)
-                    {
-                        // overflow
-                        type = value_t::number_float;
-                    }
-                    else
-                    {
-                        // no overflow - save it
-                        value = temp;
-                    }
-                }
-            }
-
-            // save the value (if not a float)
-            if (type == value_t::number_unsigned)
-            {
-                result.m_value.number_unsigned = value;
-            }
-            else if (type == value_t::number_integer)
-            {
-                result.m_value.number_integer = -static_cast<number_integer_t>(value);
-            }
-            else
-            {
-                // parse with strtod
-                result.m_value.number_float = str_to_float_t(static_cast<number_float_t*>(nullptr), NULL);
-            }
+            // parse with std::strtof, std::strtod, or std::strtold
+            result.m_value.number_float = str_to_float_t(static_cast<number_float_t*>(nullptr), NULL);
 
             // save the type
-            result.m_type = type;
+            result.m_type = value_t::number_float;
+        }
+
+        /*!
+        @brief string to unsigned integer conversion
+
+        Converts the last string read by the lexer to an unsigned integer
+        number assuming a base of 10. Negative numbers shall be converted by
+        ignoring the leading '-' by setting @a skip_first to false. Negating
+        the value is in the responsability of the caller.
+
+        @param[out] result  the result of the conversion
+        @param skip_first  whether the first character should be skipped
+
+        @tparam T  an unsigned integer type
+
+        @return true iff overflow occurred; false if conversion succeeded
+
+        @pre The re2c lexer has successfully parsed an integer number. Only
+        two cases are valid: (1) all characters between m_start and m_cursor
+        are digits (unsigned integer) and @a skip_first is set to false, or
+        (2) the character at m_start is a '-' which will be ignored (@a
+        skip_first is set to true) and all other characters until m_cursor
+        are digits (signed integer).
+
+        @post Either the string between m_start and m_cursor is processed
+        completely, @a result contains the parsed number, and the function
+        returns false; or parsing was interrupted prematurely, because an
+        overflow was detected. Then, the function returns true.
+        */
+        template<typename T>
+        bool get_uint(T& result, bool skip_first) const
+        {
+            assert(m_start != nullptr);
+            assert(m_start != m_cursor);
+
+            // initially, set the result to 0
+            result = 0;
+
+            // maximal possible value for number_unsigned_t
+            static constexpr T max_value = std::numeric_limits<T>::max();
+            // maximal intermediate value before multiplying by 10
+            static constexpr T max_intermediate = max_value / 10;
+
+            /// pointer to the first digit (possibly skipping a '-')
+            const auto first_digit = skip_first ? (m_start + 1) : m_start;
+
+            // after re2c successfully parsed a number, the digits are between
+            // first_digit and m_cursor
+            for (auto p = first_digit; p != m_cursor; ++p)
+            {
+                // check if multiplying by 10 would result in overflow
+                if (result > max_intermediate)
+                {
+                    // signal overflow
+                    return true;
+                }
+
+                result *= 10;
+
+                // the current digit
+                const T digit = static_cast<T>(*reinterpret_cast<typename string_t::const_pointer>(p) - '0');
+
+                // check if adding the digit would result in overflow
+                if (digit > (max_value - result))
+                {
+                    // signal overflow
+                    return true;
+                }
+
+                result += digit;
+            }
+
+            // signal absence of overflow
+            return false;
+        }
+
+
+        /*!
+        @brief parse unsigned integer number
+
+        @param[in,out] result @ref basic_json object to receive the number, or
+        NAN if the conversion read past the current token. The latter case
+        needs to be treated by the caller function.
+
+        @note If an overflow wrt. type @ref number_unsigned_t occurs, the
+        number is processed as floating-point by @ref get_number_float().
+        */
+        void get_number_uint(basic_json& result) const
+        {
+            // optimistically set result to unsigned int
+            result.m_type = value_t::number_unsigned;
+
+            // parse the number
+            const bool overflow = get_uint(result.m_value.number_unsigned, false);
+
+            // if an overflow occurred, try again parsing the number to float
+            if (overflow)
+            {
+                get_number_float(result);
+            }
+        }
+
+        /*!
+        @brief parse signed integer number
+
+        @param[in,out] result @ref basic_json object to receive the number, or
+        NAN if the conversion read past the current token. The latter case
+        needs to be treated by the caller function.
+
+        @note If an overflow wrt. type @ref number_integer_t occurs, the
+        number is processed as floating-point by @ref get_number_float().
+        */
+        void get_number_int(basic_json& result) const
+        {
+            // optimistically set result to int
+            result.m_type = value_t::number_integer;
+
+            // parse as unsigned integer while skipping the leading '-'
+            number_unsigned_t unsigned_value = 0;
+            const bool overflow = get_uint(unsigned_value, true);
+
+            // store negated number
+            result.m_value.number_integer = static_cast<number_integer_t>(-unsigned_value);
+
+            // if an overflow occurred, try again parsing the number to float;
+            // note that, assuming min = -(max + 1), we have an underflow
+            // iff  -unsigned_value     < min
+            // iff  -unsigned_value     < -(max + 1)
+            // iff   unsigned_value     > max + 1
+            // iff   unsigned_value - 1 > max
+            if (overflow or (unsigned_value - 1) > std::numeric_limits<number_integer_t>::max())
+            {
+                get_number_float(result);
+            }
         }
 
       private:
@@ -8231,9 +8261,23 @@
                     break;
                 }
 
+                case lexer::token_type::value_uint:
+                {
+                    m_lexer.get_number_uint(result);
+                    get_token();
+                    break;
+                }
+
+                case lexer::token_type::value_int:
+                {
+                    m_lexer.get_number_int(result);
+                    get_token();
+                    break;
+                }
+
                 case lexer::token_type::value_number:
                 {
-                    m_lexer.get_number(result);
+                    m_lexer.get_number_float(result);
                     get_token();
                     break;
                 }

diff --git a/test/src/unit.cpp b/test/src/unit.cpp
index edbafac..8c631fc 100644
--- a/test/src/unit.cpp
+++ b/test/src/unit.cpp

@@ -9681,16 +9681,40 @@
 
         SECTION("numbers")
         {
-            CHECK(json::lexer("0").scan() == json::lexer::token_type::value_number);
-            CHECK(json::lexer("1").scan() == json::lexer::token_type::value_number);
-            CHECK(json::lexer("2").scan() == json::lexer::token_type::value_number);
-            CHECK(json::lexer("3").scan() == json::lexer::token_type::value_number);
-            CHECK(json::lexer("4").scan() == json::lexer::token_type::value_number);
-            CHECK(json::lexer("5").scan() == json::lexer::token_type::value_number);
-            CHECK(json::lexer("6").scan() == json::lexer::token_type::value_number);
-            CHECK(json::lexer("7").scan() == json::lexer::token_type::value_number);
-            CHECK(json::lexer("8").scan() == json::lexer::token_type::value_number);
-            CHECK(json::lexer("9").scan() == json::lexer::token_type::value_number);
+            // unsigned integer
+            CHECK(json::lexer("0").scan() == json::lexer::token_type::value_uint);
+            CHECK(json::lexer("1").scan() == json::lexer::token_type::value_uint);
+            CHECK(json::lexer("2").scan() == json::lexer::token_type::value_uint);
+            CHECK(json::lexer("3").scan() == json::lexer::token_type::value_uint);
+            CHECK(json::lexer("4").scan() == json::lexer::token_type::value_uint);
+            CHECK(json::lexer("5").scan() == json::lexer::token_type::value_uint);
+            CHECK(json::lexer("6").scan() == json::lexer::token_type::value_uint);
+            CHECK(json::lexer("7").scan() == json::lexer::token_type::value_uint);
+            CHECK(json::lexer("8").scan() == json::lexer::token_type::value_uint);
+            CHECK(json::lexer("9").scan() == json::lexer::token_type::value_uint);
+
+            // signed integer
+            CHECK(json::lexer("-1").scan() == json::lexer::token_type::value_int);
+            CHECK(json::lexer("-2").scan() == json::lexer::token_type::value_int);
+            CHECK(json::lexer("-3").scan() == json::lexer::token_type::value_int);
+            CHECK(json::lexer("-4").scan() == json::lexer::token_type::value_int);
+            CHECK(json::lexer("-5").scan() == json::lexer::token_type::value_int);
+            CHECK(json::lexer("-6").scan() == json::lexer::token_type::value_int);
+            CHECK(json::lexer("-7").scan() == json::lexer::token_type::value_int);
+            CHECK(json::lexer("-8").scan() == json::lexer::token_type::value_int);
+            CHECK(json::lexer("-9").scan() == json::lexer::token_type::value_int);
+
+            // floating-point
+            CHECK(json::lexer("0.0").scan() == json::lexer::token_type::value_number);
+            CHECK(json::lexer("0.1").scan() == json::lexer::token_type::value_number);
+            CHECK(json::lexer("0.2").scan() == json::lexer::token_type::value_number);
+            CHECK(json::lexer("0.3").scan() == json::lexer::token_type::value_number);
+            CHECK(json::lexer("0.4").scan() == json::lexer::token_type::value_number);
+            CHECK(json::lexer("0.5").scan() == json::lexer::token_type::value_number);
+            CHECK(json::lexer("0.6").scan() == json::lexer::token_type::value_number);
+            CHECK(json::lexer("0.7").scan() == json::lexer::token_type::value_number);
+            CHECK(json::lexer("0.8").scan() == json::lexer::token_type::value_number);
+            CHECK(json::lexer("0.9").scan() == json::lexer::token_type::value_number);
         }
 
         SECTION("whitespace")
commit	a1ee1987748680016cc508282ba79c6fca13181b	[log] [tgz]
author	Niels <niels.lohmann@gmail.com>	Sat Jul 30 10:59:32 2016 +0200
committer	Niels <niels.lohmann@gmail.com>	Sat Jul 30 10:59:32 2016 +0200
tree	00d06a81076fbb78ed87bfbaaab08ba8c4a84c8b
parent	0c6ebd495ddc6a2776d30cb8293a002eb4e7fd67 [diff]
parent	5405ae860117f55f9b5af287a7006dd64cef9eaa [diff]