Split Strtod() (#106) Add `StrtodTrimmed` method, exposing a later stage of the conversion pipeline. Some tools can do the first stage outside of the double-conversion library and would prefer not to pay the cost of doing it again.
diff --git a/Changelog b/Changelog index 66a24b4..6fc5245 100644 --- a/Changelog +++ b/Changelog
@@ -2,6 +2,7 @@ Changed all macros to use DOUBLE_CONVERSION_ as prefix. Renamed ALLOW_CASE_INSENSIBILITY to ALLOW_CASE_INSENSITIVITY, the old name is still available but officially deprecated. + Created and exposed new intermediate function StrtodTrimmed(). 2019-05-25: Fix `0x` for string->double conversion when Hex Floats are allowed.
diff --git a/double-conversion/strtod.cc b/double-conversion/strtod.cc index 7feb374..d0bb7f7 100644 --- a/double-conversion/strtod.cc +++ b/double-conversion/strtod.cc
@@ -446,18 +446,31 @@ return false; } -double Strtod(Vector<const char> buffer, int exponent) { - char copy_buffer[kMaxSignificantDecimalDigits]; - Vector<const char> trimmed; - int updated_exponent; - TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits, - &trimmed, &updated_exponent); - exponent = updated_exponent; +static bool IsDigit(const char d) { + return ('0' <= d) && (d <= '9'); +} +static bool IsNonZeroDigit(const char d) { + return ('1' <= d) && (d <= '9'); +} + +static bool AssertTrimmedDigits(const Vector<const char>& buffer) { + for(int i = 0; i < buffer.length(); ++i) { + if(!IsDigit(buffer[i])) { + return false; + } + } + return (buffer.length() == 0) || (IsNonZeroDigit(buffer[0]) && IsNonZeroDigit(buffer[buffer.length()-1])); +} + +double StrtodTrimmed(Vector<const char> trimmed, int exponent) { + DOUBLE_CONVERSION_ASSERT(trimmed.length() <= kMaxSignificantDecimalDigits); + DOUBLE_CONVERSION_ASSERT(AssertTrimmedDigits(trimmed)); double guess; - bool is_correct = ComputeGuess(trimmed, exponent, &guess); - if (is_correct) return guess; - + const bool is_correct = ComputeGuess(trimmed, exponent, &guess); + if (is_correct) { + return guess; + } DiyFp upper_boundary = Double(guess).UpperBoundary(); int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary); if (comparison < 0) { @@ -472,6 +485,15 @@ } } +double Strtod(Vector<const char> buffer, int exponent) { + char copy_buffer[kMaxSignificantDecimalDigits]; + Vector<const char> trimmed; + int updated_exponent; + TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits, + &trimmed, &updated_exponent); + return StrtodTrimmed(trimmed, updated_exponent); +} + static float SanitizedDoubletof(double d) { DOUBLE_CONVERSION_ASSERT(d >= 0.0); // ASAN has a sanitize check that disallows casting doubles to floats if
diff --git a/double-conversion/strtod.h b/double-conversion/strtod.h index ed0293b..ff0ee47 100644 --- a/double-conversion/strtod.h +++ b/double-conversion/strtod.h
@@ -40,6 +40,11 @@ // contain a dot or a sign. It must not start with '0', and must not be empty. float Strtof(Vector<const char> buffer, int exponent); +// For special use cases, the heart of the Strtod() function is also available +// separately, it assumes that 'trimmed' is as produced by TrimAndCut(), i.e. +// no leading or trailing zeros, also no lone zero, and not 'too many' digits. +double StrtodTrimmed(Vector<const char> trimmed, int exponent); + } // namespace double_conversion #endif // DOUBLE_CONVERSION_STRTOD_H_
diff --git a/test/cctest/test-strtod.cc b/test/cctest/test-strtod.cc index 1a6ac3b..62badbd 100644 --- a/test/cctest/test-strtod.cc +++ b/test/cctest/test-strtod.cc
@@ -21,6 +21,11 @@ } +static double StrtodTrimmedChar(const char* str, int exponent) { + return StrtodTrimmed(StringToVector(str), exponent); +} + + static float StrtofChar(const char* str, int exponent) { return Strtof(StringToVector(str), exponent); } @@ -350,6 +355,276 @@ } +TEST(StrtodTrimmed) { + Vector<const char> vector; + + vector = StringToVector("1"); + CHECK_EQ(1.0, StrtodTrimmed(vector, 0)); + CHECK_EQ(10.0, StrtodTrimmed(vector, 1)); + CHECK_EQ(100.0, StrtodTrimmed(vector, 2)); + CHECK_EQ(1e20, StrtodTrimmed(vector, 20)); + CHECK_EQ(1e22, StrtodTrimmed(vector, 22)); + CHECK_EQ(1e23, StrtodTrimmed(vector, 23)); + CHECK_EQ(1e35, StrtodTrimmed(vector, 35)); + CHECK_EQ(1e36, StrtodTrimmed(vector, 36)); + CHECK_EQ(1e37, StrtodTrimmed(vector, 37)); + CHECK_EQ(1e-1, StrtodTrimmed(vector, -1)); + CHECK_EQ(1e-2, StrtodTrimmed(vector, -2)); + CHECK_EQ(1e-5, StrtodTrimmed(vector, -5)); + CHECK_EQ(1e-20, StrtodTrimmed(vector, -20)); + CHECK_EQ(1e-22, StrtodTrimmed(vector, -22)); + CHECK_EQ(1e-23, StrtodTrimmed(vector, -23)); + CHECK_EQ(1e-25, StrtodTrimmed(vector, -25)); + CHECK_EQ(1e-39, StrtodTrimmed(vector, -39)); + + vector = StringToVector("2"); + CHECK_EQ(2.0, StrtodTrimmed(vector, 0)); + CHECK_EQ(20.0, StrtodTrimmed(vector, 1)); + CHECK_EQ(200.0, StrtodTrimmed(vector, 2)); + CHECK_EQ(2e20, StrtodTrimmed(vector, 20)); + CHECK_EQ(2e22, StrtodTrimmed(vector, 22)); + CHECK_EQ(2e23, StrtodTrimmed(vector, 23)); + CHECK_EQ(2e35, StrtodTrimmed(vector, 35)); + CHECK_EQ(2e36, StrtodTrimmed(vector, 36)); + CHECK_EQ(2e37, StrtodTrimmed(vector, 37)); + CHECK_EQ(2e-1, StrtodTrimmed(vector, -1)); + CHECK_EQ(2e-2, StrtodTrimmed(vector, -2)); + CHECK_EQ(2e-5, StrtodTrimmed(vector, -5)); + CHECK_EQ(2e-20, StrtodTrimmed(vector, -20)); + CHECK_EQ(2e-22, StrtodTrimmed(vector, -22)); + CHECK_EQ(2e-23, StrtodTrimmed(vector, -23)); + CHECK_EQ(2e-25, StrtodTrimmed(vector, -25)); + CHECK_EQ(2e-39, StrtodTrimmed(vector, -39)); + + vector = StringToVector("9"); + CHECK_EQ(9.0, StrtodTrimmed(vector, 0)); + CHECK_EQ(90.0, StrtodTrimmed(vector, 1)); + CHECK_EQ(900.0, StrtodTrimmed(vector, 2)); + CHECK_EQ(9e20, StrtodTrimmed(vector, 20)); + CHECK_EQ(9e22, StrtodTrimmed(vector, 22)); + CHECK_EQ(9e23, StrtodTrimmed(vector, 23)); + CHECK_EQ(9e35, StrtodTrimmed(vector, 35)); + CHECK_EQ(9e36, StrtodTrimmed(vector, 36)); + CHECK_EQ(9e37, StrtodTrimmed(vector, 37)); + CHECK_EQ(9e-1, StrtodTrimmed(vector, -1)); + CHECK_EQ(9e-2, StrtodTrimmed(vector, -2)); + CHECK_EQ(9e-5, StrtodTrimmed(vector, -5)); + CHECK_EQ(9e-20, StrtodTrimmed(vector, -20)); + CHECK_EQ(9e-22, StrtodTrimmed(vector, -22)); + CHECK_EQ(9e-23, StrtodTrimmed(vector, -23)); + CHECK_EQ(9e-25, StrtodTrimmed(vector, -25)); + CHECK_EQ(9e-39, StrtodTrimmed(vector, -39)); + + vector = StringToVector("12345"); + CHECK_EQ(12345.0, StrtodTrimmed(vector, 0)); + CHECK_EQ(123450.0, StrtodTrimmed(vector, 1)); + CHECK_EQ(1234500.0, StrtodTrimmed(vector, 2)); + CHECK_EQ(12345e20, StrtodTrimmed(vector, 20)); + CHECK_EQ(12345e22, StrtodTrimmed(vector, 22)); + CHECK_EQ(12345e23, StrtodTrimmed(vector, 23)); + CHECK_EQ(12345e30, StrtodTrimmed(vector, 30)); + CHECK_EQ(12345e31, StrtodTrimmed(vector, 31)); + CHECK_EQ(12345e32, StrtodTrimmed(vector, 32)); + CHECK_EQ(12345e35, StrtodTrimmed(vector, 35)); + CHECK_EQ(12345e36, StrtodTrimmed(vector, 36)); + CHECK_EQ(12345e37, StrtodTrimmed(vector, 37)); + CHECK_EQ(12345e-1, StrtodTrimmed(vector, -1)); + CHECK_EQ(12345e-2, StrtodTrimmed(vector, -2)); + CHECK_EQ(12345e-5, StrtodTrimmed(vector, -5)); + CHECK_EQ(12345e-20, StrtodTrimmed(vector, -20)); + CHECK_EQ(12345e-22, StrtodTrimmed(vector, -22)); + CHECK_EQ(12345e-23, StrtodTrimmed(vector, -23)); + CHECK_EQ(12345e-25, StrtodTrimmed(vector, -25)); + CHECK_EQ(12345e-39, StrtodTrimmed(vector, -39)); + + vector = StringToVector("12345678901234"); + CHECK_EQ(12345678901234.0, StrtodTrimmed(vector, 0)); + CHECK_EQ(123456789012340.0, StrtodTrimmed(vector, 1)); + CHECK_EQ(1234567890123400.0, StrtodTrimmed(vector, 2)); + CHECK_EQ(12345678901234e20, StrtodTrimmed(vector, 20)); + CHECK_EQ(12345678901234e22, StrtodTrimmed(vector, 22)); + CHECK_EQ(12345678901234e23, StrtodTrimmed(vector, 23)); + CHECK_EQ(12345678901234e30, StrtodTrimmed(vector, 30)); + CHECK_EQ(12345678901234e31, StrtodTrimmed(vector, 31)); + CHECK_EQ(12345678901234e32, StrtodTrimmed(vector, 32)); + CHECK_EQ(12345678901234e35, StrtodTrimmed(vector, 35)); + CHECK_EQ(12345678901234e36, StrtodTrimmed(vector, 36)); + CHECK_EQ(12345678901234e37, StrtodTrimmed(vector, 37)); + CHECK_EQ(12345678901234e-1, StrtodTrimmed(vector, -1)); + CHECK_EQ(12345678901234e-2, StrtodTrimmed(vector, -2)); + CHECK_EQ(12345678901234e-5, StrtodTrimmed(vector, -5)); + CHECK_EQ(12345678901234e-20, StrtodTrimmed(vector, -20)); + CHECK_EQ(12345678901234e-22, StrtodTrimmed(vector, -22)); + CHECK_EQ(12345678901234e-23, StrtodTrimmed(vector, -23)); + CHECK_EQ(12345678901234e-25, StrtodTrimmed(vector, -25)); + CHECK_EQ(12345678901234e-39, StrtodTrimmed(vector, -39)); + + vector = StringToVector("123456789012345"); + CHECK_EQ(123456789012345.0, StrtodTrimmed(vector, 0)); + CHECK_EQ(1234567890123450.0, StrtodTrimmed(vector, 1)); + CHECK_EQ(12345678901234500.0, StrtodTrimmed(vector, 2)); + CHECK_EQ(123456789012345e20, StrtodTrimmed(vector, 20)); + CHECK_EQ(123456789012345e22, StrtodTrimmed(vector, 22)); + CHECK_EQ(123456789012345e23, StrtodTrimmed(vector, 23)); + CHECK_EQ(123456789012345e35, StrtodTrimmed(vector, 35)); + CHECK_EQ(123456789012345e36, StrtodTrimmed(vector, 36)); + CHECK_EQ(123456789012345e37, StrtodTrimmed(vector, 37)); + CHECK_EQ(123456789012345e39, StrtodTrimmed(vector, 39)); + CHECK_EQ(123456789012345e-1, StrtodTrimmed(vector, -1)); + CHECK_EQ(123456789012345e-2, StrtodTrimmed(vector, -2)); + CHECK_EQ(123456789012345e-5, StrtodTrimmed(vector, -5)); + CHECK_EQ(123456789012345e-20, StrtodTrimmed(vector, -20)); + CHECK_EQ(123456789012345e-22, StrtodTrimmed(vector, -22)); + CHECK_EQ(123456789012345e-23, StrtodTrimmed(vector, -23)); + CHECK_EQ(123456789012345e-25, StrtodTrimmed(vector, -25)); + CHECK_EQ(123456789012345e-39, StrtodTrimmed(vector, -39)); + + CHECK_EQ(0.0, StrtodTrimmedChar("", 1324)); + CHECK_EQ(0.0, StrtodTrimmedChar("2", -324)); + CHECK_EQ(4e-324, StrtodTrimmedChar("3", -324)); + // It would be more readable to put non-zero literals on the left side (i.e. + // CHECK_EQ(1e-325, StrtodChar("1", -325))), but then Gcc complains that + // they are truncated to zero. + CHECK_EQ(0.0, StrtodTrimmedChar("1", -325)); + CHECK_EQ(0.0, StrtodTrimmedChar("1", -325)); + + // It would be more readable to put the literals (and not Double::Infinity()) + // on the left side (i.e. CHECK_EQ(1e309, StrtodChar("1", 309))), but then Gcc + // complains that the floating constant exceeds range of 'double'. + CHECK_EQ(Double::Infinity(), StrtodTrimmedChar("1", 309)); + CHECK_EQ(1e308, StrtodTrimmedChar("1", 308)); + CHECK_EQ(1234e305, StrtodTrimmedChar("1234", 305)); + CHECK_EQ(1234e304, StrtodTrimmedChar("1234", 304)); + CHECK_EQ(Double::Infinity(), StrtodTrimmedChar("18", 307)); + CHECK_EQ(17e307, StrtodTrimmedChar("17", 307)); + + CHECK_EQ(1.7976931348623157E+308, StrtodTrimmedChar("17976931348623157", 292)); + CHECK_EQ(1.7976931348623158E+308, StrtodTrimmedChar("17976931348623158", 292)); + CHECK_EQ(Double::Infinity(), StrtodTrimmedChar("17976931348623159", 292)); + + // The following number is the result of 89255.0/1e-22. Both floating-point + // numbers can be accurately represented with doubles. However on Linux,x86 + // the floating-point stack is set to 80bits and the double-rounding + // introduces an error. + CHECK_EQ(89255e-22, StrtodTrimmedChar("89255", -22)); + + // Some random values. + CHECK_EQ(358416272e-33, StrtodTrimmedChar("358416272", -33)); + CHECK_EQ(104110013277974872254e-225, + StrtodTrimmedChar("104110013277974872254", -225)); + + CHECK_EQ(123456789e108, StrtodTrimmedChar("123456789", 108)); + CHECK_EQ(123456789e109, StrtodTrimmedChar("123456789", 109)); + CHECK_EQ(123456789e110, StrtodTrimmedChar("123456789", 110)); + CHECK_EQ(123456789e111, StrtodTrimmedChar("123456789", 111)); + CHECK_EQ(123456789e112, StrtodTrimmedChar("123456789", 112)); + CHECK_EQ(123456789e113, StrtodTrimmedChar("123456789", 113)); + CHECK_EQ(123456789e114, StrtodTrimmedChar("123456789", 114)); + CHECK_EQ(123456789e115, StrtodTrimmedChar("123456789", 115)); + CHECK_EQ(1234567890123456789012345e108, + StrtodTrimmedChar("1234567890123456789012345", 108)); + CHECK_EQ(1234567890123456789012345e109, + StrtodTrimmedChar("1234567890123456789012345", 109)); + CHECK_EQ(1234567890123456789012345e110, + StrtodTrimmedChar("1234567890123456789012345", 110)); + CHECK_EQ(1234567890123456789012345e111, + StrtodTrimmedChar("1234567890123456789012345", 111)); + CHECK_EQ(1234567890123456789012345e112, + StrtodTrimmedChar("1234567890123456789012345", 112)); + CHECK_EQ(1234567890123456789012345e113, + StrtodTrimmedChar("1234567890123456789012345", 113)); + CHECK_EQ(1234567890123456789012345e114, + StrtodTrimmedChar("1234567890123456789012345", 114)); + CHECK_EQ(1234567890123456789012345e115, + StrtodTrimmedChar("1234567890123456789012345", 115)); + + CHECK_EQ(1234567890123456789052345e108, + StrtodTrimmedChar("1234567890123456789052345", 108)); + CHECK_EQ(1234567890123456789052345e109, + StrtodTrimmedChar("1234567890123456789052345", 109)); + CHECK_EQ(1234567890123456789052345e110, + StrtodTrimmedChar("1234567890123456789052345", 110)); + CHECK_EQ(1234567890123456789052345e111, + StrtodTrimmedChar("1234567890123456789052345", 111)); + CHECK_EQ(1234567890123456789052345e112, + StrtodTrimmedChar("1234567890123456789052345", 112)); + CHECK_EQ(1234567890123456789052345e113, + StrtodTrimmedChar("1234567890123456789052345", 113)); + CHECK_EQ(1234567890123456789052345e114, + StrtodTrimmedChar("1234567890123456789052345", 114)); + CHECK_EQ(1234567890123456789052345e115, + StrtodTrimmedChar("1234567890123456789052345", 115)); + + // Boundary cases. Boundaries themselves should round to even. + // + // 0x1FFFFFFFFFFFF * 2^3 = 72057594037927928 + // next: 72057594037927936 + // boundary: 72057594037927932 should round up. + CHECK_EQ(72057594037927928.0, StrtodTrimmedChar("72057594037927928", 0)); + CHECK_EQ(72057594037927936.0, StrtodTrimmedChar("72057594037927936", 0)); + CHECK_EQ(72057594037927936.0, StrtodTrimmedChar("72057594037927932", 0)); + CHECK_EQ(72057594037927928.0, StrtodTrimmedChar("7205759403792793199999", -5)); + CHECK_EQ(72057594037927936.0, StrtodTrimmedChar("7205759403792793200001", -5)); + + // 0x1FFFFFFFFFFFF * 2^10 = 9223372036854774784 + // next: 9223372036854775808 + // boundary: 9223372036854775296 should round up. + CHECK_EQ(9223372036854774784.0, StrtodTrimmedChar("9223372036854774784", 0)); + CHECK_EQ(9223372036854775808.0, StrtodTrimmedChar("9223372036854775808", 0)); + CHECK_EQ(9223372036854775808.0, StrtodTrimmedChar("9223372036854775296", 0)); + CHECK_EQ(9223372036854774784.0, StrtodTrimmedChar("922337203685477529599999", -5)); + CHECK_EQ(9223372036854775808.0, StrtodTrimmedChar("922337203685477529600001", -5)); + + // 0x1FFFFFFFFFFFF * 2^50 = 10141204801825834086073718800384 + // next: 10141204801825835211973625643008 + // boundary: 10141204801825834649023672221696 should round up. + CHECK_EQ(10141204801825834086073718800384.0, + StrtodTrimmedChar("10141204801825834086073718800384", 0)); + CHECK_EQ(10141204801825835211973625643008.0, + StrtodTrimmedChar("10141204801825835211973625643008", 0)); + CHECK_EQ(10141204801825835211973625643008.0, + StrtodTrimmedChar("10141204801825834649023672221696", 0)); + CHECK_EQ(10141204801825834086073718800384.0, + StrtodTrimmedChar("1014120480182583464902367222169599999", -5)); + CHECK_EQ(10141204801825835211973625643008.0, + StrtodTrimmedChar("1014120480182583464902367222169600001", -5)); + + // 0x1FFFFFFFFFFFF * 2^99 = 5708990770823838890407843763683279797179383808 + // next: 5708990770823839524233143877797980545530986496 + // boundary: 5708990770823839207320493820740630171355185152 + // The boundary should round up. + CHECK_EQ(5708990770823838890407843763683279797179383808.0, + StrtodTrimmedChar("5708990770823838890407843763683279797179383808", 0)); + CHECK_EQ(5708990770823839524233143877797980545530986496.0, + StrtodTrimmedChar("5708990770823839524233143877797980545530986496", 0)); + CHECK_EQ(5708990770823839524233143877797980545530986496.0, + StrtodTrimmedChar("5708990770823839207320493820740630171355185152", 0)); + CHECK_EQ(5708990770823838890407843763683279797179383808.0, + StrtodTrimmedChar("5708990770823839207320493820740630171355185151999", -3)); + CHECK_EQ(5708990770823839524233143877797980545530986496.0, + StrtodTrimmedChar("5708990770823839207320493820740630171355185152001", -3)); + + // The following test-cases got some public attention in early 2011 when they + // sent Java and PHP into an infinite loop. + CHECK_EQ(2.225073858507201e-308, StrtodTrimmedChar("22250738585072011", -324)); + CHECK_EQ(2.22507385850720138309e-308, + StrtodTrimmedChar("22250738585072011360574097967091319759348195463516456480" + "23426109724822222021076945516529523908135087914149158913" + "03962110687008643869459464552765720740782062174337998814" + "10632673292535522868813721490129811224514518898490572223" + "07285255133155755015914397476397983411801999323962548289" + "01710708185069063066665599493827577257201576306269066333" + "26475653000092458883164330377797918696120494973903778297" + "04905051080609940730262937128958950003583799967207254304" + "36028407889577179615094551674824347103070260914462157228" + "98802581825451803257070188608721131280795122334262883686" + "22321503775666622503982534335974568884423900265498198385" + "48794829220689472168983109969836584681402285424333066033" + "98508864458040010349339704275671864433837704860378616227" + "71738545623065874679014086723327636718751", -1076)); +} + + TEST(Strtof) { Vector<const char> vector;