| #ifndef _TCUFLOAT_HPP |
| #define _TCUFLOAT_HPP |
| /*------------------------------------------------------------------------- |
| * drawElements Quality Program Tester Core |
| * ---------------------------------------- |
| * |
| * Copyright 2014 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| *//*! |
| * \file |
| * \brief Reconfigurable floating-point value template. |
| *//*--------------------------------------------------------------------*/ |
| |
| #include "tcuDefs.hpp" |
| |
| // For memcpy(). |
| #include <string.h> |
| |
| namespace tcu |
| { |
| |
| enum FloatFlags |
| { |
| FLOAT_HAS_SIGN = (1<<0), |
| FLOAT_SUPPORT_DENORM = (1<<1) |
| }; |
| |
| enum RoundingDirection |
| { |
| ROUND_TO_EVEN = 0, |
| ROUND_DOWNWARD, // Towards -Inf. |
| ROUND_UPWARD, // Towards +Inf. |
| }; |
| |
| /*--------------------------------------------------------------------*//*! |
| * \brief Floating-point format template |
| * |
| * This template implements arbitrary floating-point handling. Template |
| * can be used for conversion between different formats and checking |
| * various properties of floating-point values. |
| *//*--------------------------------------------------------------------*/ |
| template <typename StorageType_, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> |
| class Float |
| { |
| public: |
| typedef StorageType_ StorageType; |
| |
| enum |
| { |
| EXPONENT_BITS = ExponentBits, |
| MANTISSA_BITS = MantissaBits, |
| EXPONENT_BIAS = ExponentBias, |
| FLAGS = Flags, |
| }; |
| |
| Float (void); |
| explicit Float (StorageType value); |
| explicit Float (float v, RoundingDirection rd = ROUND_TO_EVEN); |
| explicit Float (double v, RoundingDirection rd = ROUND_TO_EVEN); |
| |
| template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags> |
| static Float convert (const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& src, RoundingDirection rd = ROUND_TO_EVEN); |
| |
| static inline Float convert (const Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>& src, RoundingDirection = ROUND_TO_EVEN) { return src; } |
| |
| /*--------------------------------------------------------------------*//*! |
| * \brief Construct floating point value |
| * \param sign Sign. Must be +1/-1 |
| * \param exponent Exponent in range [1-ExponentBias, ExponentBias+1] |
| * \param mantissa Mantissa bits with implicit leading bit explicitly set |
| * \return The specified float |
| * |
| * This function constructs a floating point value from its inputs. |
| * The normally implicit leading bit of the mantissa must be explicitly set. |
| * The exponent normally used for zero/subnormals is an invalid input. Such |
| * values are specified with the leading mantissa bit of zero and the lowest |
| * normal exponent (1-ExponentBias). Additionally having both exponent and |
| * mantissa set to zero is a shorthand notation for the correctly signed |
| * floating point zero. Inf and NaN must be specified directly with an |
| * exponent of ExponentBias+1 and the appropriate mantissa (with leading |
| * bit set) |
| *//*--------------------------------------------------------------------*/ |
| static inline Float construct (int sign, int exponent, StorageType mantissa); |
| |
| /*--------------------------------------------------------------------*//*! |
| * \brief Construct floating point value. Explicit version |
| * \param sign Sign. Must be +1/-1 |
| * \param exponent Exponent in range [-ExponentBias, ExponentBias+1] |
| * \param mantissa Mantissa bits |
| * \return The specified float |
| * |
| * This function constructs a floating point value from its inputs with |
| * minimal intervention. |
| * The sign is turned into a sign bit and the exponent bias is added. |
| * See IEEE-754 for additional information on the inputs and |
| * the encoding of special values. |
| *//*--------------------------------------------------------------------*/ |
| static Float constructBits (int sign, int exponent, StorageType mantissaBits); |
| |
| StorageType bits (void) const { return m_value; } |
| float asFloat (void) const; |
| double asDouble (void) const; |
| |
| inline int signBit (void) const { return (int)(m_value >> (ExponentBits+MantissaBits)) & 1; } |
| inline StorageType exponentBits (void) const { return (m_value >> MantissaBits) & ((StorageType(1)<<ExponentBits)-1); } |
| inline StorageType mantissaBits (void) const { return m_value & ((StorageType(1)<<MantissaBits)-1); } |
| |
| inline int sign (void) const { return signBit() ? -1 : 1; } |
| inline int exponent (void) const { return isDenorm() ? 1 - ExponentBias : (int)exponentBits() - ExponentBias; } |
| inline StorageType mantissa (void) const { return isZero() || isDenorm() ? mantissaBits() : (mantissaBits() | (StorageType(1)<<MantissaBits)); } |
| |
| inline bool isInf (void) const { return exponentBits() == ((1<<ExponentBits)-1) && mantissaBits() == 0; } |
| inline bool isNaN (void) const { return exponentBits() == ((1<<ExponentBits)-1) && mantissaBits() != 0; } |
| inline bool isZero (void) const { return exponentBits() == 0 && mantissaBits() == 0; } |
| inline bool isDenorm (void) const { return exponentBits() == 0 && mantissaBits() != 0; } |
| |
| static Float zero (int sign); |
| static Float inf (int sign); |
| static Float nan (void); |
| |
| static Float largestNormal (int sign); |
| static Float smallestNormal (int sign); |
| |
| private: |
| StorageType m_value; |
| } DE_WARN_UNUSED_TYPE; |
| |
| // Common floating-point types. |
| typedef Float<deUint16, 5, 10, 15, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM> Float16; //!< IEEE 754-2008 16-bit floating-point value |
| typedef Float<deUint32, 8, 23, 127, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM> Float32; //!< IEEE 754 32-bit floating-point value |
| typedef Float<deUint64, 11, 52, 1023, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM> Float64; //!< IEEE 754 64-bit floating-point value |
| |
| typedef Float<deUint16, 5, 10, 15, FLOAT_HAS_SIGN> Float16Denormless; //!< IEEE 754-2008 16-bit floating-point value without denormalized support |
| |
| template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> |
| inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (void) |
| : m_value(0) |
| { |
| } |
| |
| template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> |
| inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (StorageType value) |
| : m_value(value) |
| { |
| } |
| |
| template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> |
| inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (float value, RoundingDirection rd) |
| : m_value(0) |
| { |
| deUint32 u32; |
| memcpy(&u32, &value, sizeof(deUint32)); |
| *this = convert(Float32(u32), rd); |
| } |
| |
| template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> |
| inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (double value, RoundingDirection rd) |
| : m_value(0) |
| { |
| deUint64 u64; |
| memcpy(&u64, &value, sizeof(deUint64)); |
| *this = convert(Float64(u64), rd); |
| } |
| |
| template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> |
| inline float Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asFloat (void) const |
| { |
| float v; |
| deUint32 u32 = Float32::convert(*this).bits(); |
| memcpy(&v, &u32, sizeof(deUint32)); |
| return v; |
| } |
| |
| template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> |
| inline double Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asDouble (void) const |
| { |
| double v; |
| deUint64 u64 = Float64::convert(*this).bits(); |
| memcpy(&v, &u64, sizeof(deUint64)); |
| return v; |
| } |
| |
| template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> |
| inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::zero (int sign) |
| { |
| DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1)); |
| return Float(StorageType((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits))); |
| } |
| |
| template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> |
| inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::inf (int sign) |
| { |
| DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1)); |
| return Float(StorageType(((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits)) | (((1ull<<ExponentBits)-1) << MantissaBits))); |
| } |
| |
| template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> |
| inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::nan (void) |
| { |
| return Float(StorageType((1ull<<(ExponentBits+MantissaBits))-1)); |
| } |
| |
| template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> |
| inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::largestNormal (int sign) |
| { |
| DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1)); |
| return Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::construct(sign, ExponentBias, (static_cast<StorageType>(1) << (MantissaBits + 1)) - 1); |
| } |
| |
| template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> |
| inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::smallestNormal (int sign) |
| { |
| DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1)); |
| return Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::construct(sign, 1 - ExponentBias, (static_cast<StorageType>(1) << MantissaBits)); |
| } |
| |
| template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> |
| Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> |
| Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::construct |
| (int sign, int exponent, StorageType mantissa) |
| { |
| // Repurpose this otherwise invalid input as a shorthand notation for zero (no need for caller to care about internal representation) |
| const bool isShorthandZero = exponent == 0 && mantissa == 0; |
| |
| // Handles the typical notation for zero (min exponent, mantissa 0). Note that the exponent usually used exponent (-ExponentBias) for zero/subnormals is not used. |
| // Instead zero/subnormals have the (normally implicit) leading mantissa bit set to zero. |
| const bool isDenormOrZero = (exponent == 1 - ExponentBias) && (mantissa >> MantissaBits == 0); |
| const StorageType s = StorageType((StorageType(sign < 0 ? 1 : 0)) << (StorageType(ExponentBits+MantissaBits))); |
| const StorageType exp = (isShorthandZero || isDenormOrZero) ? StorageType(0) : StorageType(exponent + ExponentBias); |
| |
| DE_ASSERT(sign == +1 || sign == -1); |
| DE_ASSERT(isShorthandZero || isDenormOrZero || mantissa >> MantissaBits == 1); |
| DE_ASSERT(exp >> ExponentBits == 0); |
| |
| return Float(StorageType(s | (exp << MantissaBits) | (mantissa & ((StorageType(1)<<MantissaBits)-1)))); |
| } |
| |
| template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> |
| Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> |
| Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::constructBits |
| (int sign, int exponent, StorageType mantissaBits) |
| { |
| const StorageType signBit = static_cast<StorageType>(sign < 0 ? 1 : 0); |
| const StorageType exponentBits = static_cast<StorageType>(exponent + ExponentBias); |
| |
| DE_ASSERT(sign == +1 || sign == -1 ); |
| DE_ASSERT(exponentBits >> ExponentBits == 0); |
| DE_ASSERT(mantissaBits >> MantissaBits == 0); |
| |
| return Float(StorageType((signBit << (ExponentBits+MantissaBits)) | (exponentBits << MantissaBits) | (mantissaBits))); |
| } |
| |
| template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> |
| template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags> |
| Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> |
| Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::convert |
| (const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& other, RoundingDirection rd) |
| { |
| if (!(Flags & FLOAT_HAS_SIGN) && other.sign() < 0) |
| { |
| // Negative number, truncate to zero. |
| return zero(+1); |
| } |
| |
| if (other.isInf()) |
| { |
| return inf(other.sign()); |
| } |
| |
| if (other.isNaN()) |
| { |
| return nan(); |
| } |
| |
| if (other.isZero()) |
| { |
| return zero(other.sign()); |
| } |
| |
| const int eMin = 1 - ExponentBias; |
| const int eMax = ((1<<ExponentBits)-2) - ExponentBias; |
| |
| const StorageType s = StorageType((StorageType(other.signBit())) << (StorageType(ExponentBits+MantissaBits))); // \note Not sign, but sign bit. |
| int e = other.exponent(); |
| deUint64 m = other.mantissa(); |
| |
| // Normalize denormalized values prior to conversion. |
| while (!(m & (1ull<<OtherMantissaBits))) |
| { |
| m <<= 1; |
| e -= 1; |
| } |
| |
| if (e < eMin) |
| { |
| // Underflow. |
| if ((Flags & FLOAT_SUPPORT_DENORM) && (eMin-e-1 <= MantissaBits)) |
| { |
| // Shift and round. |
| int bitDiff = (OtherMantissaBits-MantissaBits) + (eMin-e); |
| deUint64 lastBitsMask = (1ull << bitDiff) - 1ull; |
| deUint64 lastBits = (static_cast<deUint64>(m) & lastBitsMask); |
| deUint64 half = (1ull << (bitDiff - 1)) - 1; |
| deUint64 bias = (m >> bitDiff) & 1; |
| |
| switch (rd) |
| { |
| case ROUND_TO_EVEN: |
| return Float(StorageType(s | (m + half + bias) >> bitDiff)); |
| |
| case ROUND_DOWNWARD: |
| m = (m >> bitDiff); |
| if (lastBits != 0ull && other.sign() < 0) |
| { |
| m += 1; |
| } |
| return Float(StorageType(s | m)); |
| |
| case ROUND_UPWARD: |
| m = (m >> bitDiff); |
| if (lastBits != 0ull && other.sign() > 0) |
| { |
| m += 1; |
| } |
| return Float(StorageType(s | m)); |
| |
| default: |
| DE_ASSERT(false); |
| break; |
| } |
| } |
| |
| return zero(other.sign()); |
| } |
| |
| // Remove leading 1. |
| m = m & ~(1ull<<OtherMantissaBits); |
| |
| if (MantissaBits < OtherMantissaBits) |
| { |
| // Round mantissa. |
| int bitDiff = OtherMantissaBits-MantissaBits; |
| deUint64 lastBitsMask = (1ull << bitDiff) - 1ull; |
| deUint64 lastBits = (static_cast<deUint64>(m) & lastBitsMask); |
| deUint64 half = (1ull << (bitDiff - 1)) - 1; |
| deUint64 bias = (m >> bitDiff) & 1; |
| |
| switch (rd) |
| { |
| case ROUND_TO_EVEN: |
| m = (m + half + bias) >> bitDiff; |
| break; |
| |
| case ROUND_DOWNWARD: |
| m = (m >> bitDiff); |
| if (lastBits != 0ull && other.sign() < 0) |
| { |
| m += 1; |
| } |
| break; |
| |
| case ROUND_UPWARD: |
| m = (m >> bitDiff); |
| if (lastBits != 0ull && other.sign() > 0) |
| { |
| m += 1; |
| } |
| break; |
| |
| default: |
| DE_ASSERT(false); |
| break; |
| } |
| |
| if (m & (1ull<<MantissaBits)) |
| { |
| // Overflow in mantissa. |
| m = 0; |
| e += 1; |
| } |
| } |
| else |
| { |
| int bitDiff = MantissaBits-OtherMantissaBits; |
| m = m << bitDiff; |
| } |
| |
| if (e > eMax) |
| { |
| // Overflow. |
| return (((other.sign() < 0 && rd == ROUND_UPWARD) || (other.sign() > 0 && rd == ROUND_DOWNWARD)) ? largestNormal(other.sign()) : inf(other.sign())); |
| } |
| |
| DE_ASSERT(de::inRange(e, eMin, eMax)); |
| DE_ASSERT(((e + ExponentBias) & ~((1ull<<ExponentBits)-1)) == 0); |
| DE_ASSERT((m & ~((1ull<<MantissaBits)-1)) == 0); |
| |
| return Float(StorageType(s | (StorageType(e + ExponentBias) << MantissaBits) | m)); |
| } |
| |
| } // tcu |
| |
| #endif // _TCUFLOAT_HPP |