blob: 06d6ed3d99330a997b20007dc670143d3d16d902 [file] [log] [blame]
#include <math.h>
#include <stdint.h>
double fmod(double x, double y) {
union {
double f;
uint64_t i;
} ux = {x}, uy = {y};
int ex = ux.i >> 52 & 0x7ff;
int ey = uy.i >> 52 & 0x7ff;
int sx = ux.i >> 63;
uint64_t i;
/* in the followings uxi should be ux.i, but then gcc wrongly adds */
/* float load/store to inner loops ruining performance and code size */
uint64_t uxi = ux.i;
if (uy.i << 1 == 0 || isnan(y) || ex == 0x7ff)
return (x * y) / (x * y);
if (uxi << 1 <= uy.i << 1) {
if (uxi << 1 == uy.i << 1)
return 0 * x;
return x;
}
/* normalize x and y */
if (!ex) {
for (i = uxi << 12; i >> 63 == 0; ex--, i <<= 1)
;
uxi <<= -ex + 1;
} else {
uxi &= -1ULL >> 12;
uxi |= 1ULL << 52;
}
if (!ey) {
for (i = uy.i << 12; i >> 63 == 0; ey--, i <<= 1)
;
uy.i <<= -ey + 1;
} else {
uy.i &= -1ULL >> 12;
uy.i |= 1ULL << 52;
}
/* x mod y */
for (; ex > ey; ex--) {
i = uxi - uy.i;
if (i >> 63 == 0) {
if (i == 0)
return 0 * x;
uxi = i;
}
uxi <<= 1;
}
i = uxi - uy.i;
if (i >> 63 == 0) {
if (i == 0)
return 0 * x;
uxi = i;
}
for (; uxi >> 52 == 0; uxi <<= 1, ex--)
;
/* scale result */
if (ex > 0) {
uxi -= 1ULL << 52;
uxi |= (uint64_t)ex << 52;
} else {
uxi >>= -ex + 1;
}
uxi |= (uint64_t)sx << 63;
ux.i = uxi;
return ux.f;
}