src/libcore/num/dec2flt/algorithm.rs - third_party/rust - Git at Google

 //! The various algorithms from the paper.

 use crate::cmp::min;
 use crate::cmp::Ordering::{Equal, Greater, Less};
 use crate::num::dec2flt::num::{self, Big};
 use crate::num::dec2flt::rawfp::{self, fp_to_float, next_float, prev_float, RawFloat, Unpacked};
 use crate::num::dec2flt::table;
 use crate::num::diy_float::Fp;

 /// Number of significand bits in Fp
 const P: u32 = 64;

 // We simply store the best approximation for *all* exponents, so the variable "h" and the
 // associated conditions can be omitted. This trades performance for a couple kilobytes of space.

 fn power_of_ten(e: i16) -> Fp {
     assert!(e >= table::MIN_E);
     let i = e - table::MIN_E;
     let sig = table::POWERS.0[i as usize];
     let exp = table::POWERS.1[i as usize];
     Fp { f: sig, e: exp }
 }

 // In most architectures, floating point operations have an explicit bit size, therefore the
 // precision of the computation is determined on a per-operation basis.
 #[cfg(any(not(target_arch = "x86"), target_feature = "sse2"))]
 mod fpu_precision {
     pub fn set_precision<T>() {}
 }

 // On x86, the x87 FPU is used for float operations if the SSE/SSE2 extensions are not available.
 // The x87 FPU operates with 80 bits of precision by default, which means that operations will
 // round to 80 bits causing double rounding to happen when values are eventually represented as
 // 32/64 bit float values. To overcome this, the FPU control word can be set so that the
 // computations are performed in the desired precision.
 #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
 mod fpu_precision {
     use crate::mem::size_of;

     /// A structure used to preserve the original value of the FPU control word, so that it can be
     /// restored when the structure is dropped.
     ///
     /// The x87 FPU is a 16-bits register whose fields are as follows:
     ///
     /// | 12-15 | 10-11 | 8-9 | 6-7 |  5 |  4 |  3 |  2 |  1 |  0 |
     /// |------:|------:|----:|----:|---:|---:|---:|---:|---:|---:|
     /// |       | RC    | PC  |     | PM | UM | OM | ZM | DM | IM |
     ///
     /// The documentation for all of the fields is available in the IA-32 Architectures Software
     /// Developer's Manual (Volume 1).
     ///
     /// The only field which is relevant for the following code is PC, Precision Control. This
     /// field determines the precision of the operations performed by the  FPU. It can be set to:
     ///  - 0b00, single precision i.e., 32-bits
     ///  - 0b10, double precision i.e., 64-bits
     ///  - 0b11, double extended precision i.e., 80-bits (default state)
     /// The 0b01 value is reserved and should not be used.
     pub struct FPUControlWord(u16);

     fn set_cw(cw: u16) {
         // SAFETY: the `fldcw` instruction has been audited to be able to work correctly with
         // any `u16`
         unsafe { asm!("fldcw $0" :: "m" (cw) :: "volatile") }
     }

     /// Sets the precision field of the FPU to `T` and returns a `FPUControlWord`.
     pub fn set_precision<T>() -> FPUControlWord {
         let cw = 0u16;

         // Compute the value for the Precision Control field that is appropriate for `T`.
         let cw_precision = match size_of::<T>() {
             4 => 0x0000, // 32 bits
             8 => 0x0200, // 64 bits
             _ => 0x0300, // default, 80 bits
         };

         // Get the original value of the control word to restore it later, when the
         // `FPUControlWord` structure is dropped
         // SAFETY: the `fnstcw` instruction has been audited to be able to work correctly with
         // any `u16`
         unsafe { asm!("fnstcw $0" : "=*m" (&cw) ::: "volatile") }

         // Set the control word to the desired precision. This is achieved by masking away the old
         // precision (bits 8 and 9, 0x300) and replacing it with the precision flag computed above.
         set_cw((cw & 0xFCFF) | cw_precision);

         FPUControlWord(cw)
     }

     impl Drop for FPUControlWord {
         fn drop(&mut self) {
             set_cw(self.0)
         }
     }
 }

 /// The fast path of Bellerophon using machine-sized integers and floats.
 ///
 /// This is extracted into a separate function so that it can be attempted before constructing
 /// a bignum.
 pub fn fast_path<T: RawFloat>(integral: &[u8], fractional: &[u8], e: i64) -> Option<T> {
     let num_digits = integral.len() + fractional.len();
     // log_10(f64::MAX_SIG) ~ 15.95. We compare the exact value to MAX_SIG near the end,
     // this is just a quick, cheap rejection (and also frees the rest of the code from
     // worrying about underflow).
     if num_digits > 16 {
         return None;
     }
     if e.abs() >= T::CEIL_LOG5_OF_MAX_SIG as i64 {
         return None;
     }
     let f = num::from_str_unchecked(integral.iter().chain(fractional.iter()));
     if f > T::MAX_SIG {
         return None;
     }

     // The fast path crucially depends on arithmetic being rounded to the correct number of bits
     // without any intermediate rounding. On x86 (without SSE or SSE2) this requires the precision
     // of the x87 FPU stack to be changed so that it directly rounds to 64/32 bit.
     // The `set_precision` function takes care of setting the precision on architectures which
     // require setting it by changing the global state (like the control word of the x87 FPU).
     let _cw = fpu_precision::set_precision::<T>();

     // The case e < 0 cannot be folded into the other branch. Negative powers result in
     // a repeating fractional part in binary, which are rounded, which causes real
     // (and occasionally quite significant!) errors in the final result.
     if e >= 0 {
         Some(T::from_int(f) * T::short_fast_pow10(e as usize))
     } else {
         Some(T::from_int(f) / T::short_fast_pow10(e.abs() as usize))
     }
 }

 /// Algorithm Bellerophon is trivial code justified by non-trivial numeric analysis.
 ///
 /// It rounds ``f`` to a float with 64 bit significand and multiplies it by the best approximation
 /// of `10^e` (in the same floating point format). This is often enough to get the correct result.
 /// However, when the result is close to halfway between two adjacent (ordinary) floats, the
 /// compound rounding error from multiplying two approximation means the result may be off by a
 /// few bits. When this happens, the iterative Algorithm R fixes things up.
 ///
 /// The hand-wavy "close to halfway" is made precise by the numeric analysis in the paper.
 /// In the words of Clinger:
 ///
 /// > Slop, expressed in units of the least significant bit, is an inclusive bound for the error
 /// > accumulated during the floating point calculation of the approximation to f * 10^e. (Slop is
 /// > not a bound for the true error, but bounds the difference between the approximation z and
 /// > the best possible approximation that uses p bits of significand.)
 pub fn bellerophon<T: RawFloat>(f: &Big, e: i16) -> T {
     let slop = if f <= &Big::from_u64(T::MAX_SIG) {
         // The cases abs(e) < log5(2^N) are in fast_path()
         if e >= 0 { 0 } else { 3 }
     } else {
         if e >= 0 { 1 } else { 4 }
     };
     let z = rawfp::big_to_fp(f).mul(&power_of_ten(e)).normalize();
     let exp_p_n = 1 << (P - T::SIG_BITS as u32);
     let lowbits: i64 = (z.f % exp_p_n) as i64;
     // Is the slop large enough to make a difference when
     // rounding to n bits?
     if (lowbits - exp_p_n as i64 / 2).abs() <= slop {
         algorithm_r(f, e, fp_to_float(z))
     } else {
         fp_to_float(z)
     }
 }

 /// An iterative algorithm that improves a floating point approximation of `f * 10^e`.
 ///
 /// Each iteration gets one unit in the last place closer, which of course takes terribly long to
 /// converge if `z0` is even mildly off. Luckily, when used as fallback for Bellerophon, the
 /// starting approximation is off by at most one ULP.
 fn algorithm_r<T: RawFloat>(f: &Big, e: i16, z0: T) -> T {
     let mut z = z0;
     loop {
         let raw = z.unpack();
         let (m, k) = (raw.sig, raw.k);
         let mut x = f.clone();
         let mut y = Big::from_u64(m);

         // Find positive integers `x`, `y` such that `x / y` is exactly `(f * 10^e) / (m * 2^k)`.
         // This not only avoids dealing with the signs of `e` and `k`, we also eliminate the
         // power of two common to `10^e` and `2^k` to make the numbers smaller.
         make_ratio(&mut x, &mut y, e, k);

         let m_digits = [(m & 0xFF_FF_FF_FF) as u32, (m >> 32) as u32];
         // This is written a bit awkwardly because our bignums don't support
         // negative numbers, so we use the absolute value + sign information.
         // The multiplication with m_digits can't overflow. If `x` or `y` are large enough that
         // we need to worry about overflow, then they are also large enough that `make_ratio` has
         // reduced the fraction by a factor of 2^64 or more.
         let (d2, d_negative) = if x >= y {
             // Don't need x any more, save a clone().
             x.sub(&y).mul_pow2(1).mul_digits(&m_digits);
             (x, false)
         } else {
             // Still need y - make a copy.
             let mut y = y.clone();
             y.sub(&x).mul_pow2(1).mul_digits(&m_digits);
             (y, true)
         };

         if d2 < y {
             let mut d2_double = d2;
             d2_double.mul_pow2(1);
             if m == T::MIN_SIG && d_negative && d2_double > y {
                 z = prev_float(z);
             } else {
                 return z;
             }
         } else if d2 == y {
             if m % 2 == 0 {
                 if m == T::MIN_SIG && d_negative {
                     z = prev_float(z);
                 } else {
                     return z;
                 }
             } else if d_negative {
                 z = prev_float(z);
             } else {
                 z = next_float(z);
             }
         } else if d_negative {
             z = prev_float(z);
         } else {
             z = next_float(z);
         }
     }
 }

 /// Given `x = f` and `y = m` where `f` represent input decimal digits as usual and `m` is the
 /// significand of a floating point approximation, make the ratio `x / y` equal to
 /// `(f * 10^e) / (m * 2^k)`, possibly reduced by a power of two both have in common.
 fn make_ratio(x: &mut Big, y: &mut Big, e: i16, k: i16) {
     let (e_abs, k_abs) = (e.abs() as usize, k.abs() as usize);
     if e >= 0 {
         if k >= 0 {
             // x = f * 10^e, y = m * 2^k, except that we reduce the fraction by some power of two.
             let common = min(e_abs, k_abs);
             x.mul_pow5(e_abs).mul_pow2(e_abs - common);
             y.mul_pow2(k_abs - common);
         } else {
             // x = f * 10^e * 2^abs(k), y = m
             // This can't overflow because it requires positive `e` and negative `k`, which can
             // only happen for values extremely close to 1, which means that `e` and `k` will be
             // comparatively tiny.
             x.mul_pow5(e_abs).mul_pow2(e_abs + k_abs);
         }
     } else {
         if k >= 0 {
             // x = f, y = m * 10^abs(e) * 2^k
             // This can't overflow either, see above.
             y.mul_pow5(e_abs).mul_pow2(k_abs + e_abs);
         } else {
             // x = f * 2^abs(k), y = m * 10^abs(e), again reducing by a common power of two.
             let common = min(e_abs, k_abs);
             x.mul_pow2(k_abs - common);
             y.mul_pow5(e_abs).mul_pow2(e_abs - common);
         }
     }
 }

 /// Conceptually, Algorithm M is the simplest way to convert a decimal to a float.
 ///
 /// We form a ratio that is equal to `f * 10^e`, then throwing in powers of two until it gives
 /// a valid float significand. The binary exponent `k` is the number of times we multiplied
 /// numerator or denominator by two, i.e., at all times `f * 10^e` equals `(u / v) * 2^k`.
 /// When we have found out significand, we only need to round by inspecting the remainder of the
 /// division, which is done in helper functions further below.
 ///
 /// This algorithm is super slow, even with the optimization described in `quick_start()`.
 /// However, it's the simplest of the algorithms to adapt for overflow, underflow, and subnormal
 /// results. This implementation takes over when Bellerophon and Algorithm R are overwhelmed.
 /// Detecting underflow and overflow is easy: The ratio still isn't an in-range significand,
 /// yet the minimum/maximum exponent has been reached. In the case of overflow, we simply return
 /// infinity.
 ///
 /// Handling underflow and subnormals is trickier. One big problem is that, with the minimum
 /// exponent, the ratio might still be too large for a significand. See underflow() for details.
 pub fn algorithm_m<T: RawFloat>(f: &Big, e: i16) -> T {
     let mut u;
     let mut v;
     let e_abs = e.abs() as usize;
     let mut k = 0;
     if e < 0 {
         u = f.clone();
         v = Big::from_small(1);
         v.mul_pow5(e_abs).mul_pow2(e_abs);
     } else {
         // FIXME possible optimization: generalize big_to_fp so that we can do the equivalent of
         // fp_to_float(big_to_fp(u)) here, only without the double rounding.
         u = f.clone();
         u.mul_pow5(e_abs).mul_pow2(e_abs);
         v = Big::from_small(1);
     }
     quick_start::<T>(&mut u, &mut v, &mut k);
     let mut rem = Big::from_small(0);
     let mut x = Big::from_small(0);
     let min_sig = Big::from_u64(T::MIN_SIG);
     let max_sig = Big::from_u64(T::MAX_SIG);
     loop {
         u.div_rem(&v, &mut x, &mut rem);
         if k == T::MIN_EXP_INT {
             // We have to stop at the minimum exponent, if we wait until `k < T::MIN_EXP_INT`,
             // then we'd be off by a factor of two. Unfortunately this means we have to special-
             // case normal numbers with the minimum exponent.
             // FIXME find a more elegant formulation, but run the `tiny-pow10` test to make sure
             // that it's actually correct!
             if x >= min_sig && x <= max_sig {
                 break;
             }
             return underflow(x, v, rem);
         }
         if k > T::MAX_EXP_INT {
             return T::INFINITY;
         }
         if x < min_sig {
             u.mul_pow2(1);
             k -= 1;
         } else if x > max_sig {
             v.mul_pow2(1);
             k += 1;
         } else {
             break;
         }
     }
     let q = num::to_u64(&x);
     let z = rawfp::encode_normal(Unpacked::new(q, k));
     round_by_remainder(v, rem, q, z)
 }

 /// Skips over most Algorithm M iterations by checking the bit length.
 fn quick_start<T: RawFloat>(u: &mut Big, v: &mut Big, k: &mut i16) {
     // The bit length is an estimate of the base two logarithm, and log(u / v) = log(u) - log(v).
     // The estimate is off by at most 1, but always an under-estimate, so the error on log(u)
     // and log(v) are of the same sign and cancel out (if both are large). Therefore the error
     // for log(u / v) is at most one as well.
     // The target ratio is one where u/v is in an in-range significand. Thus our termination
     // condition is log2(u / v) being the significand bits, plus/minus one.
     // FIXME Looking at the second bit could improve the estimate and avoid some more divisions.
     let target_ratio = T::SIG_BITS as i16;
     let log2_u = u.bit_length() as i16;
     let log2_v = v.bit_length() as i16;
     let mut u_shift: i16 = 0;
     let mut v_shift: i16 = 0;
     assert!(*k == 0);
     loop {
         if *k == T::MIN_EXP_INT {
             // Underflow or subnormal. Leave it to the main function.
             break;
         }
         if *k == T::MAX_EXP_INT {
             // Overflow. Leave it to the main function.
             break;
         }
         let log2_ratio = (log2_u + u_shift) - (log2_v + v_shift);
         if log2_ratio < target_ratio - 1 {
             u_shift += 1;
             *k -= 1;
         } else if log2_ratio > target_ratio + 1 {
             v_shift += 1;
             *k += 1;
         } else {
             break;
         }
     }
     u.mul_pow2(u_shift as usize);
     v.mul_pow2(v_shift as usize);
 }

 fn underflow<T: RawFloat>(x: Big, v: Big, rem: Big) -> T {
     if x < Big::from_u64(T::MIN_SIG) {
         let q = num::to_u64(&x);
         let z = rawfp::encode_subnormal(q);
         return round_by_remainder(v, rem, q, z);
     }
     // Ratio isn't an in-range significand with the minimum exponent, so we need to round off
     // excess bits and adjust the exponent accordingly. The real value now looks like this:
     //
     //        x        lsb
     // /--------------\/
     // 1010101010101010.10101010101010 * 2^k
     // \-----/\-------/ \------------/
     //    q     trunc.    (represented by rem)
     //
     // Therefore, when the rounded-off bits are != 0.5 ULP, they decide the rounding
     // on their own. When they are equal and the remainder is non-zero, the value still
     // needs to be rounded up. Only when the rounded off bits are 1/2 and the remainder
     // is zero, we have a half-to-even situation.
     let bits = x.bit_length();
     let lsb = bits - T::SIG_BITS as usize;
     let q = num::get_bits(&x, lsb, bits);
     let k = T::MIN_EXP_INT + lsb as i16;
     let z = rawfp::encode_normal(Unpacked::new(q, k));
     let q_even = q % 2 == 0;
     match num::compare_with_half_ulp(&x, lsb) {
         Greater => next_float(z),
         Less => z,
         Equal if rem.is_zero() && q_even => z,
         Equal => next_float(z),
     }
 }

 /// Ordinary round-to-even, obfuscated by having to round based on the remainder of a division.
 fn round_by_remainder<T: RawFloat>(v: Big, r: Big, q: u64, z: T) -> T {
     let mut v_minus_r = v;
     v_minus_r.sub(&r);
     if r < v_minus_r {
         z
     } else if r > v_minus_r {
         next_float(z)
     } else if q % 2 == 0 {
         z
     } else {
         next_float(z)
     }
 }
	//! The various algorithms from the paper.

	use crate::cmp::min;
	use crate::cmp::Ordering::{Equal, Greater, Less};
	use crate::num::dec2flt::num::{self, Big};
	use crate::num::dec2flt::rawfp::{self, fp_to_float, next_float, prev_float, RawFloat, Unpacked};
	use crate::num::dec2flt::table;
	use crate::num::diy_float::Fp;

	/// Number of significand bits in Fp
	const P: u32 = 64;

	// We simply store the best approximation for all exponents, so the variable "h" and the
	// associated conditions can be omitted. This trades performance for a couple kilobytes of space.

	fn power_of_ten(e: i16) -> Fp {
	assert!(e >= table::MIN_E);
	let i = e - table::MIN_E;
	let sig = table::POWERS.0[i as usize];
	let exp = table::POWERS.1[i as usize];
	Fp { f: sig, e: exp }
	}

	// In most architectures, floating point operations have an explicit bit size, therefore the
	// precision of the computation is determined on a per-operation basis.
	#[cfg(any(not(target_arch = "x86"), target_feature = "sse2"))]
	mod fpu_precision {
	pub fn set_precision<T>() {}
	}

	// On x86, the x87 FPU is used for float operations if the SSE/SSE2 extensions are not available.
	// The x87 FPU operates with 80 bits of precision by default, which means that operations will
	// round to 80 bits causing double rounding to happen when values are eventually represented as
	// 32/64 bit float values. To overcome this, the FPU control word can be set so that the
	// computations are performed in the desired precision.
	#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
	mod fpu_precision {
	use crate::mem::size_of;

	/// A structure used to preserve the original value of the FPU control word, so that it can be
	/// restored when the structure is dropped.
	///
	/// The x87 FPU is a 16-bits register whose fields are as follows:
	///
	/// \| 12-15 \| 10-11 \| 8-9 \| 6-7 \| 5 \| 4 \| 3 \| 2 \| 1 \| 0 \|
	/// \|------:\|------:\|----:\|----:\|---:\|---:\|---:\|---:\|---:\|---:\|
	/// \| \| RC \| PC \| \| PM \| UM \| OM \| ZM \| DM \| IM \|
	///
	/// The documentation for all of the fields is available in the IA-32 Architectures Software
	/// Developer's Manual (Volume 1).
	///
	/// The only field which is relevant for the following code is PC, Precision Control. This
	/// field determines the precision of the operations performed by the FPU. It can be set to:
	/// - 0b00, single precision i.e., 32-bits
	/// - 0b10, double precision i.e., 64-bits
	/// - 0b11, double extended precision i.e., 80-bits (default state)
	/// The 0b01 value is reserved and should not be used.
	pub struct FPUControlWord(u16);

	fn set_cw(cw: u16) {
	// SAFETY: the `fldcw` instruction has been audited to be able to work correctly with
	// any `u16`
	unsafe { asm!("fldcw $0" :: "m" (cw) :: "volatile") }
	}

	/// Sets the precision field of the FPU to `T` and returns a `FPUControlWord`.
	pub fn set_precision<T>() -> FPUControlWord {
	let cw = 0u16;

	// Compute the value for the Precision Control field that is appropriate for `T`.
	let cw_precision = match size_of::<T>() {
	4 => 0x0000, // 32 bits
	8 => 0x0200, // 64 bits
	_ => 0x0300, // default, 80 bits
	};

	// Get the original value of the control word to restore it later, when the
	// `FPUControlWord` structure is dropped
	// SAFETY: the `fnstcw` instruction has been audited to be able to work correctly with
	// any `u16`
	unsafe { asm!("fnstcw $0" : "=*m" (&cw) ::: "volatile") }

	// Set the control word to the desired precision. This is achieved by masking away the old
	// precision (bits 8 and 9, 0x300) and replacing it with the precision flag computed above.
	set_cw((cw & 0xFCFF) \| cw_precision);

	FPUControlWord(cw)
	}

	impl Drop for FPUControlWord {
	fn drop(&mut self) {
	set_cw(self.0)
	}
	}
	}

	/// The fast path of Bellerophon using machine-sized integers and floats.
	///
	/// This is extracted into a separate function so that it can be attempted before constructing
	/// a bignum.
	pub fn fast_path<T: RawFloat>(integral: &[u8], fractional: &[u8], e: i64) -> Option<T> {
	let num_digits = integral.len() + fractional.len();
	// log_10(f64::MAX_SIG) ~ 15.95. We compare the exact value to MAX_SIG near the end,
	// this is just a quick, cheap rejection (and also frees the rest of the code from
	// worrying about underflow).
	if num_digits > 16 {
	return None;
	}
	if e.abs() >= T::CEIL_LOG5_OF_MAX_SIG as i64 {
	return None;
	}
	let f = num::from_str_unchecked(integral.iter().chain(fractional.iter()));
	if f > T::MAX_SIG {
	return None;
	}

	// The fast path crucially depends on arithmetic being rounded to the correct number of bits
	// without any intermediate rounding. On x86 (without SSE or SSE2) this requires the precision
	// of the x87 FPU stack to be changed so that it directly rounds to 64/32 bit.
	// The `set_precision` function takes care of setting the precision on architectures which
	// require setting it by changing the global state (like the control word of the x87 FPU).
	let _cw = fpu_precision::set_precision::<T>();

	// The case e < 0 cannot be folded into the other branch. Negative powers result in
	// a repeating fractional part in binary, which are rounded, which causes real
	// (and occasionally quite significant!) errors in the final result.
	if e >= 0 {
	Some(T::from_int(f) * T::short_fast_pow10(e as usize))
	} else {
	Some(T::from_int(f) / T::short_fast_pow10(e.abs() as usize))
	}
	}

	/// Algorithm Bellerophon is trivial code justified by non-trivial numeric analysis.
	///
	/// It rounds ``f`` to a float with 64 bit significand and multiplies it by the best approximation
	/// of `10^e` (in the same floating point format). This is often enough to get the correct result.
	/// However, when the result is close to halfway between two adjacent (ordinary) floats, the
	/// compound rounding error from multiplying two approximation means the result may be off by a
	/// few bits. When this happens, the iterative Algorithm R fixes things up.
	///
	/// The hand-wavy "close to halfway" is made precise by the numeric analysis in the paper.
	/// In the words of Clinger:
	///
	/// > Slop, expressed in units of the least significant bit, is an inclusive bound for the error
	/// > accumulated during the floating point calculation of the approximation to f * 10^e. (Slop is
	/// > not a bound for the true error, but bounds the difference between the approximation z and
	/// > the best possible approximation that uses p bits of significand.)
	pub fn bellerophon<T: RawFloat>(f: &Big, e: i16) -> T {
	let slop = if f <= &Big::from_u64(T::MAX_SIG) {
	// The cases abs(e) < log5(2^N) are in fast_path()
	if e >= 0 { 0 } else { 3 }
	} else {
	if e >= 0 { 1 } else { 4 }
	};
	let z = rawfp::big_to_fp(f).mul(&power_of_ten(e)).normalize();
	let exp_p_n = 1 << (P - T::SIG_BITS as u32);
	let lowbits: i64 = (z.f % exp_p_n) as i64;
	// Is the slop large enough to make a difference when
	// rounding to n bits?
	if (lowbits - exp_p_n as i64 / 2).abs() <= slop {
	algorithm_r(f, e, fp_to_float(z))
	} else {
	fp_to_float(z)
	}
	}

	/// An iterative algorithm that improves a floating point approximation of `f * 10^e`.
	///
	/// Each iteration gets one unit in the last place closer, which of course takes terribly long to
	/// converge if `z0` is even mildly off. Luckily, when used as fallback for Bellerophon, the
	/// starting approximation is off by at most one ULP.
	fn algorithm_r<T: RawFloat>(f: &Big, e: i16, z0: T) -> T {
	let mut z = z0;
	loop {
	let raw = z.unpack();
	let (m, k) = (raw.sig, raw.k);
	let mut x = f.clone();
	let mut y = Big::from_u64(m);

	// Find positive integers `x`, `y` such that `x / y` is exactly `(f * 10^e) / (m * 2^k)`.
	// This not only avoids dealing with the signs of `e` and `k`, we also eliminate the
	// power of two common to `10^e` and `2^k` to make the numbers smaller.
	make_ratio(&mut x, &mut y, e, k);

	let m_digits = [(m & 0xFF_FF_FF_FF) as u32, (m >> 32) as u32];
	// This is written a bit awkwardly because our bignums don't support
	// negative numbers, so we use the absolute value + sign information.
	// The multiplication with m_digits can't overflow. If `x` or `y` are large enough that
	// we need to worry about overflow, then they are also large enough that `make_ratio` has
	// reduced the fraction by a factor of 2^64 or more.
	let (d2, d_negative) = if x >= y {
	// Don't need x any more, save a clone().
	x.sub(&y).mul_pow2(1).mul_digits(&m_digits);
	(x, false)
	} else {
	// Still need y - make a copy.
	let mut y = y.clone();
	y.sub(&x).mul_pow2(1).mul_digits(&m_digits);
	(y, true)
	};

	if d2 < y {
	let mut d2_double = d2;
	d2_double.mul_pow2(1);
	if m == T::MIN_SIG && d_negative && d2_double > y {
	z = prev_float(z);
	} else {
	return z;
	}
	} else if d2 == y {
	if m % 2 == 0 {
	if m == T::MIN_SIG && d_negative {
	z = prev_float(z);
	} else {
	return z;
	}
	} else if d_negative {
	z = prev_float(z);
	} else {
	z = next_float(z);
	}
	} else if d_negative {
	z = prev_float(z);
	} else {
	z = next_float(z);
	}
	}
	}

	/// Given `x = f` and `y = m` where `f` represent input decimal digits as usual and `m` is the
	/// significand of a floating point approximation, make the ratio `x / y` equal to
	/// `(f * 10^e) / (m * 2^k)`, possibly reduced by a power of two both have in common.
	fn make_ratio(x: &mut Big, y: &mut Big, e: i16, k: i16) {
	let (e_abs, k_abs) = (e.abs() as usize, k.abs() as usize);
	if e >= 0 {
	if k >= 0 {
	// x = f * 10^e, y = m * 2^k, except that we reduce the fraction by some power of two.
	let common = min(e_abs, k_abs);
	x.mul_pow5(e_abs).mul_pow2(e_abs - common);
	y.mul_pow2(k_abs - common);
	} else {
	// x = f * 10^e * 2^abs(k), y = m
	// This can't overflow because it requires positive `e` and negative `k`, which can
	// only happen for values extremely close to 1, which means that `e` and `k` will be
	// comparatively tiny.
	x.mul_pow5(e_abs).mul_pow2(e_abs + k_abs);
	}
	} else {
	if k >= 0 {
	// x = f, y = m * 10^abs(e) * 2^k
	// This can't overflow either, see above.
	y.mul_pow5(e_abs).mul_pow2(k_abs + e_abs);
	} else {
	// x = f * 2^abs(k), y = m * 10^abs(e), again reducing by a common power of two.
	let common = min(e_abs, k_abs);
	x.mul_pow2(k_abs - common);
	y.mul_pow5(e_abs).mul_pow2(e_abs - common);
	}
	}
	}

	/// Conceptually, Algorithm M is the simplest way to convert a decimal to a float.
	///
	/// We form a ratio that is equal to `f * 10^e`, then throwing in powers of two until it gives
	/// a valid float significand. The binary exponent `k` is the number of times we multiplied
	/// numerator or denominator by two, i.e., at all times `f * 10^e` equals `(u / v) * 2^k`.
	/// When we have found out significand, we only need to round by inspecting the remainder of the
	/// division, which is done in helper functions further below.
	///
	/// This algorithm is super slow, even with the optimization described in `quick_start()`.
	/// However, it's the simplest of the algorithms to adapt for overflow, underflow, and subnormal
	/// results. This implementation takes over when Bellerophon and Algorithm R are overwhelmed.
	/// Detecting underflow and overflow is easy: The ratio still isn't an in-range significand,
	/// yet the minimum/maximum exponent has been reached. In the case of overflow, we simply return
	/// infinity.
	///
	/// Handling underflow and subnormals is trickier. One big problem is that, with the minimum
	/// exponent, the ratio might still be too large for a significand. See underflow() for details.
	pub fn algorithm_m<T: RawFloat>(f: &Big, e: i16) -> T {
	let mut u;
	let mut v;
	let e_abs = e.abs() as usize;
	let mut k = 0;
	if e < 0 {
	u = f.clone();
	v = Big::from_small(1);
	v.mul_pow5(e_abs).mul_pow2(e_abs);
	} else {
	// FIXME possible optimization: generalize big_to_fp so that we can do the equivalent of
	// fp_to_float(big_to_fp(u)) here, only without the double rounding.
	u = f.clone();
	u.mul_pow5(e_abs).mul_pow2(e_abs);
	v = Big::from_small(1);
	}
	quick_start::<T>(&mut u, &mut v, &mut k);
	let mut rem = Big::from_small(0);
	let mut x = Big::from_small(0);
	let min_sig = Big::from_u64(T::MIN_SIG);
	let max_sig = Big::from_u64(T::MAX_SIG);
	loop {
	u.div_rem(&v, &mut x, &mut rem);
	if k == T::MIN_EXP_INT {
	// We have to stop at the minimum exponent, if we wait until `k < T::MIN_EXP_INT`,
	// then we'd be off by a factor of two. Unfortunately this means we have to special-
	// case normal numbers with the minimum exponent.
	// FIXME find a more elegant formulation, but run the `tiny-pow10` test to make sure
	// that it's actually correct!
	if x >= min_sig && x <= max_sig {
	break;
	}
	return underflow(x, v, rem);
	}
	if k > T::MAX_EXP_INT {
	return T::INFINITY;
	}
	if x < min_sig {
	u.mul_pow2(1);
	k -= 1;
	} else if x > max_sig {
	v.mul_pow2(1);
	k += 1;
	} else {
	break;
	}
	}
	let q = num::to_u64(&x);
	let z = rawfp::encode_normal(Unpacked::new(q, k));
	round_by_remainder(v, rem, q, z)
	}

	/// Skips over most Algorithm M iterations by checking the bit length.
	fn quick_start<T: RawFloat>(u: &mut Big, v: &mut Big, k: &mut i16) {
	// The bit length is an estimate of the base two logarithm, and log(u / v) = log(u) - log(v).
	// The estimate is off by at most 1, but always an under-estimate, so the error on log(u)
	// and log(v) are of the same sign and cancel out (if both are large). Therefore the error
	// for log(u / v) is at most one as well.
	// The target ratio is one where u/v is in an in-range significand. Thus our termination
	// condition is log2(u / v) being the significand bits, plus/minus one.
	// FIXME Looking at the second bit could improve the estimate and avoid some more divisions.
	let target_ratio = T::SIG_BITS as i16;
	let log2_u = u.bit_length() as i16;
	let log2_v = v.bit_length() as i16;
	let mut u_shift: i16 = 0;
	let mut v_shift: i16 = 0;
	assert!(*k == 0);
	loop {
	if *k == T::MIN_EXP_INT {
	// Underflow or subnormal. Leave it to the main function.
	break;
	}
	if *k == T::MAX_EXP_INT {
	// Overflow. Leave it to the main function.
	break;
	}
	let log2_ratio = (log2_u + u_shift) - (log2_v + v_shift);
	if log2_ratio < target_ratio - 1 {
	u_shift += 1;
	*k -= 1;
	} else if log2_ratio > target_ratio + 1 {
	v_shift += 1;
	*k += 1;
	} else {
	break;
	}
	}
	u.mul_pow2(u_shift as usize);
	v.mul_pow2(v_shift as usize);
	}

	fn underflow<T: RawFloat>(x: Big, v: Big, rem: Big) -> T {
	if x < Big::from_u64(T::MIN_SIG) {
	let q = num::to_u64(&x);
	let z = rawfp::encode_subnormal(q);
	return round_by_remainder(v, rem, q, z);
	}
	// Ratio isn't an in-range significand with the minimum exponent, so we need to round off
	// excess bits and adjust the exponent accordingly. The real value now looks like this:
	//
	// x lsb
	// /--------------\/
	// 1010101010101010.10101010101010 * 2^k
	// \-----/\-------/ \------------/
	// q trunc. (represented by rem)
	//
	// Therefore, when the rounded-off bits are != 0.5 ULP, they decide the rounding
	// on their own. When they are equal and the remainder is non-zero, the value still
	// needs to be rounded up. Only when the rounded off bits are 1/2 and the remainder
	// is zero, we have a half-to-even situation.
	let bits = x.bit_length();
	let lsb = bits - T::SIG_BITS as usize;
	let q = num::get_bits(&x, lsb, bits);
	let k = T::MIN_EXP_INT + lsb as i16;
	let z = rawfp::encode_normal(Unpacked::new(q, k));
	let q_even = q % 2 == 0;
	match num::compare_with_half_ulp(&x, lsb) {
	Greater => next_float(z),
	Less => z,
	Equal if rem.is_zero() && q_even => z,
	Equal => next_float(z),
	}
	}

	/// Ordinary round-to-even, obfuscated by having to round based on the remainder of a division.
	fn round_by_remainder<T: RawFloat>(v: Big, r: Big, q: u64, z: T) -> T {
	let mut v_minus_r = v;
	v_minus_r.sub(&r);
	if r < v_minus_r {
	z
	} else if r > v_minus_r {
	next_float(z)
	} else if q % 2 == 0 {
	z
	} else {
	next_float(z)
	}
	}