math.ha (26382B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 // Sections of the code below, in particular log() and exp(), are based on Go's 5 // implementation, which is, in turn, based on FreeBSD's. The original C code, 6 // as well as the respective comments and constants are from 7 // /usr/src/lib/msun/src/{e_log,e_exp}.c. 8 // 9 // The FreeBSD copyright notice: 10 // ==================================================== 11 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 12 // 13 // Developed at SunPro, a Sun Microsystems, Inc. business. 14 // Permission to use, copy, modify, and distribute this 15 // software is freely granted, provided that this notice 16 // is preserved. 17 // ==================================================== 18 // 19 // The Go copyright notice: 20 // ==================================================== 21 // Copyright (c) 2009 The Go Authors. All rights reserved. 22 // 23 // Redistribution and use in source and binary forms, with or without 24 // modification, are permitted provided that the following conditions are 25 // met: 26 // 27 // * Redistributions of source code must retain the above copyright 28 // notice, this list of conditions and the following disclaimer. 29 // * Redistributions in binary form must reproduce the above 30 // copyright notice, this list of conditions and the following disclaimer 31 // in the documentation and/or other materials provided with the 32 // distribution. 33 // * Neither the name of Google Inc. nor the names of its 34 // contributors may be used to endorse or promote products derived from 35 // this software without specific prior written permission. 36 // 37 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 38 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 39 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 40 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 41 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 42 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 43 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 45 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 46 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 47 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 // ==================================================== 49 50 use types; 51 52 // The standard tolerance used by [[isclosef32]] and [[isclosef64]], which is 53 // just an arbitrary way to measure whether two floating-point numbers are 54 // "sufficiently" close to each other. 55 export def STANDARD_TOL = 1e-14; 56 57 // Returns whether x and y are within tol of each other. 58 export fn isclosef64(x: f64, y: f64, tol: f64 = STANDARD_TOL) bool = { 59 if (isnan(x) || isnan(y) || isnan(tol)) { 60 return false; 61 }; 62 return absf64(x - y) < tol; 63 }; 64 65 // Returns whether x and y are within tol of each other. 66 export fn isclosef32(x: f32, y: f32, tol: f32 = STANDARD_TOL) bool = { 67 if (isnan(x) || isnan(y) || isnan(tol)) { 68 return false; 69 }; 70 return absf32(x - y) < tol; 71 }; 72 73 // e - https://oeis.org/A001113 74 export def E: f64 = 2.71828182845904523536028747135266249775724709369995957496696763; 75 // pi - https://oeis.org/A000796 76 export def PI: f64 = 3.14159265358979323846264338327950288419716939937510582097494459; 77 // tau - https://oeis.org/A019692 78 export def TAU: f64 = 6.2831853071795864769252867665590057683943387987502116419498892; 79 // phi - https://oeis.org/A001622 80 export def PHI: f64 = 1.61803398874989484820458683436563811772030917980576286213544862; 81 // sqrt(2) - https://oeis.org/A002193 82 export def SQRT_2: f64 = 1.41421356237309504880168872420969807856967187537694807317667974; 83 // sqrt(e) - https://oeis.org/A019774 84 export def SQRT_E: f64 = 1.64872127070012814684865078781416357165377610071014801157507931; 85 // sqrt(pi) - https://oeis.org/A002161 86 export def SQRT_PI: f64 = 1.77245385090551602729816748334114518279754945612238712821380779; 87 // sqrt(phi) - https://oeis.org/A139339 88 export def SQRT_PHI: f64 = 1.27201964951406896425242246173749149171560804184009624861664038; 89 // ln(2) - https://oeis.org/A002162 90 export def LN_2: f64 = 0.693147180559945309417232121458176568075500134360255254120680009; 91 // ln(2) - https://oeis.org/A002162 92 export def LN2_HI: f64 = 6.93147180369123816490e-01; 93 // ln(2) - https://oeis.org/A002162 94 export def LN2_LO: f64 = 1.90821492927058770002e-10; 95 // log_{2}(e) 96 export def LOG2_E: f64 = 1f64 / LN_2; 97 // ln(10) - https://oeis.org/A002392 98 export def LN_10: f64 = 2.30258509299404568401799145468436420760110148862877297603332790; 99 // log_{10}(e) 100 export def LOG10_E: f64 = 1f64 / LN_10; 101 102 // __ieee754_log(x) 103 // Return the logarithm of x 104 // 105 // Method : 106 // 1. Argument Reduction: find k and f such that 107 // x = 2**k * (1+f), 108 // where sqrt(2)/2 < 1+f < sqrt(2) . 109 // 110 // 2. Approximation of log(1+f). 111 // Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) 112 // = 2s + 2/3 s**3 + 2/5 s**5 + ....., 113 // = 2s + s*R 114 // We use a special Reme algorithm on [0,0.1716] to generate 115 // a polynomial of degree 14 to approximate R. The maximum error 116 // of this polynomial approximation is bounded by 2**-58.45. In 117 // other words, 118 // 2 4 6 8 10 12 14 119 // R(z) ~ L1*s +L2*s +L3*s +L4*s +L5*s +L6*s +L7*s 120 // (the values of L1 to L7 are listed in the program) and 121 // | 2 14 | -58.45 122 // | L1*s +...+L7*s - R(z) | <= 2 123 // | | 124 // Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. 125 // In order to guarantee error in log below 1ulp, we compute log by 126 // log(1+f) = f - s*(f - R) (if f is not too large) 127 // log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy) 128 // 129 // 3. Finally, log(x) = k*Ln2 + log(1+f). 130 // = k*Ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*Ln2_lo))) 131 // Here Ln2 is split into two floating point number: 132 // Ln2_hi + Ln2_lo, 133 // where n*Ln2_hi is always exact for |n| < 2000. 134 // 135 // Special cases: 136 // log(x) is NaN with signal if x < 0 (including -INF) ; 137 // log(+INF) is +INF; log(0) is -INF with signal; 138 // log(NaN) is that NaN with no signal. 139 // 140 // Accuracy: 141 // according to an error analysis, the error is always less than 142 // 1 ulp (unit in the last place). 143 // 144 // Constants: 145 // The hexadecimal values are the intended ones for the following 146 // constants. The decimal values may be used, provided that the 147 // compiler will convert from decimal to binary accurately enough 148 // to produce the hexadecimal values shown. 149 150 // Returns the natural logarithm of x. 151 export fn logf64(x: f64) f64 = { 152 const L1 = 6.666666666666735130e-01; // 3fe55555 55555593 153 const L2 = 3.999999999940941908e-01; // 3fd99999 9997fa04 154 const L3 = 2.857142874366239149e-01; // 3fd24924 94229359 155 const L4 = 2.222219843214978396e-01; // 3fcc71c5 1d8e78af 156 const L5 = 1.818357216161805012e-01; // 3fc74664 96cb03de 157 const L6 = 1.531383769920937332e-01; // 3fc39a09 d078c69f 158 const L7 = 1.479819860511658591e-01; // 3fc2f112 df3e5244 159 160 // special cases 161 if (isnan(x) || x == INF) { 162 return x; 163 } else if (x < 0f64) { 164 return NAN; 165 } else if (x == 0f64) { 166 return -INF; 167 }; 168 169 // Reduce 170 const (f1, ki) = frexpf64(x); 171 if (f1 < (SQRT_2 / 2f64)) { 172 f1 *= 2f64; 173 ki -= 1i64; 174 }; 175 let f = f1 - 1f64; 176 let k = (ki: f64); 177 178 // Compute 179 const s = f / (2f64 + f); 180 const s2 = s * s; 181 const s4 = s2 * s2; 182 const t1 = s2 * (L1 + s4 * (L3 + s4 * (L5 + s4 * L7))); 183 const t2 = s4 * (L2 + s4 * (L4 + s4 * L6)); 184 const R = t1 + t2; 185 const hfsq = 0.5f64 * f * f; 186 return k * LN2_HI - ((hfsq - (s * (hfsq + R) + k * LN2_LO)) - f); 187 }; 188 189 // Returns the decimal logarithm of x. 190 export fn log10f64(x: f64) f64 = { 191 return logf64(x) * (1f64 / LN_10); 192 }; 193 194 // Returns the binary logarithm of x. 195 export fn log2f64(x: f64) f64 = { 196 const (frac, exp) = frexpf64(x); 197 // Make sure exact powers of two give an exact answer. 198 // Don't depend on log(0.5) * (1 / LN_2) + exp being exactly exp - 1. 199 if (frac == 0.5f64) { 200 return ((exp - 1): f64); 201 }; 202 return logf64(frac) * (1f64 / LN_2) + (exp: f64); 203 }; 204 205 // double log1p(double x) 206 // 207 // Method : 208 // 1. Argument Reduction: find k and f such that 209 // 1+x = 2**k * (1+f), 210 // where sqrt(2)/2 < 1+f < sqrt(2) . 211 // 212 // Note. If k=0, then f=x is exact. However, if k!=0, then f 213 // may not be representable exactly. In that case, a correction 214 // term is need. Let u=1+x rounded. Let c = (1+x)-u, then 215 // log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u), 216 // and add back the correction term c/u. 217 // (Note: when x > 2**53, one can simply return log(x)) 218 // 219 // 2. Approximation of log1p(f). 220 // Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) 221 // = 2s + 2/3 s**3 + 2/5 s**5 + ....., 222 // = 2s + s*R 223 // We use a special Reme algorithm on [0,0.1716] to generate 224 // a polynomial of degree 14 to approximate R The maximum error 225 // of this polynomial approximation is bounded by 2**-58.45. In 226 // other words, 227 // 2 4 6 8 10 12 14 228 // R(z) ~ LP1*s +LP2*s +LP3*s +LP4*s +LP5*s +LP6*s +LP7*s 229 // (the values of LP1 to LP7 are listed in the program) 230 // and 231 // | 2 14 | -58.45 232 // | LP1*s +...+LP7*s - R(z) | <= 2 233 // | | 234 // Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. 235 // In order to guarantee error in log below 1ulp, we compute log 236 // by 237 // log1p(f) = f - (hfsq - s*(hfsq+R)). 238 // 239 // 3. Finally, log1p(x) = k*ln2 + log1p(f). 240 // = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) 241 // Here ln2 is split into two floating point number: 242 // ln2_hi + ln2_lo, 243 // where n*ln2_hi is always exact for |n| < 2000. 244 // 245 // Special cases: 246 // log1p(x) is NaN with signal if x < -1 (including -INF) ; 247 // log1p(+INF) is +INF; log1p(-1) is -INF with signal; 248 // log1p(NaN) is that NaN with no signal. 249 // 250 // Accuracy: 251 // according to an error analysis, the error is always less than 252 // 1 ulp (unit in the last place). 253 // 254 // Constants: 255 // The hexadecimal values are the intended ones for the following 256 // constants. The decimal values may be used, provided that the 257 // compiler will convert from decimal to binary accurately enough 258 // to produce the hexadecimal values shown. 259 // 260 // Note: Assuming log() return accurate answer, the following 261 // algorithm can be used to compute log1p(x) to within a few ULP: 262 // 263 // u = 1+x; 264 // if(u==1.0) return x ; else 265 // return log(u)*(x/(u-1.0)); 266 // 267 // See HP-15C Advanced Functions Handbook, p.193. 268 269 // Returns the natural logarithm of 1 plus its argument x. 270 // It is more accurate than log(1 + x) when x is near zero. 271 export fn log1pf64(x: f64) f64 = { 272 // sqrt(2) - 1 273 const SQRT2M1 = 4.142135623730950488017e-01; // 0x3fda827999fcef34 274 // sqrt(2) / 2 - 1 275 const SQRT2HALFM1 = -2.928932188134524755992e-01; // 0xbfd2bec333018866 276 const SMALL = 1f64 / ((1i64 << 29): f64); // 2**-29 277 const TINY = 1f64 / ((1i64 << 54): f64); // 2**-54 278 const TWO53 = ((1i64 << 53): f64); // 2**53 279 const LN2HI = 6.93147180369123816490e-01; // 3fe62e42fee00000 280 const LN2LO = 1.90821492927058770002e-10; // 3dea39ef35793c76 281 const LP1 = 6.666666666666735130e-01; // 3fe5555555555593 282 const LP2 = 3.999999999940941908e-01; // 3fd999999997fa04 283 const LP3 = 2.857142874366239149e-01; // 3fd2492494229359 284 const LP4 = 2.222219843214978396e-01; // 3fcc71c51d8e78af 285 const LP5 = 1.818357216161805012e-01; // 3fc7466496cb03de 286 const LP6 = 1.531383769920937332e-01; // 3fc39a09d078c69f 287 const LP7 = 1.479819860511658591e-01; // 3fc2f112df3e5244 288 289 if (x < -1f64 || isnan(x)) { 290 return NAN; 291 } else if (x == -1f64) { 292 return -INF; 293 } else if (x == INF) { 294 return INF; 295 }; 296 297 const absx = absf64(x); 298 299 let f = 0f64; 300 let iu = 0u64; 301 let k = 1i64; 302 if (absx < SQRT2M1) { // |x| < Sqrt(2)-1 303 if (absx < SMALL) { // |x| < 2**-29 304 if (absx < TINY) { // |x| < 2**-54 305 return x; 306 }; 307 return x - (x * x * 0.5f64); 308 }; 309 if (x > SQRT2HALFM1) { // Sqrt(2)/2-1 < x 310 // (Sqrt(2)/2-1) < x < (Sqrt(2)-1) 311 k = 0; 312 f = x; 313 iu = 1; 314 }; 315 }; 316 let c = 0f64; 317 if (k != 0) { 318 let u = 0f64; 319 if (absx < TWO53) { // 1<<53 320 u = 1.0 + x; 321 iu = f64bits(u); 322 k = (((iu >> 52) - 1023): i64); 323 // Correction term 324 if (k > 0) { 325 c = 1f64 - (u - x); 326 } else { 327 c = x - (u - 1f64); 328 }; 329 c /= u; 330 } else { 331 u = x; 332 iu = f64bits(u); 333 k = (((iu >> 52) - 1023): i64); 334 c = 0f64; 335 }; 336 iu &= 0x000fffffffffffff; 337 if (iu < 0x0006a09e667f3bcd) { // Mantissa of Sqrt(2) 338 // Normalize u 339 u = f64frombits(iu | 0x3ff0000000000000); 340 } else { 341 k += 1; 342 // Normalize u/2 343 u = f64frombits(iu | 0x3fe0000000000000); 344 iu = (0x0010000000000000 - iu) >> 2; 345 }; 346 f = u - 1f64; // Sqrt(2)/2 < u < Sqrt(2) 347 }; 348 const hfsq = 0.5 * f * f; 349 let s = 0f64; 350 let R = 0f64; 351 let z = 0f64; 352 if (iu == 0) { // |f| < 2**-20 353 if (f == 0f64) { 354 if (k == 0) { 355 return 0f64; 356 }; 357 c += (k: f64) * LN2LO; 358 return (k: f64) * LN2HI + c; 359 }; 360 R = hfsq * (1.0 - 0.66666666666666666 * f); // Avoid division 361 if (k == 0) { 362 return f - R; 363 }; 364 return (k: f64) * LN2HI - ((R - ((k: f64) * LN2LO + c)) - f); 365 }; 366 s = f / (2f64 + f); 367 z = s * s; 368 R = z * (LP1 + 369 z * (LP2 + 370 z * (LP3 + 371 z * (LP4 + 372 z * (LP5 + 373 z * (LP6 + 374 z * LP7)))))); 375 if (k == 0) { 376 return f - (hfsq - s * (hfsq + R)); 377 }; 378 return (k: f64) * LN2HI - 379 ((hfsq - (s * (hfsq + R) + ((k: f64) * LN2LO + c))) - f); 380 }; 381 382 // exp(x) 383 // Returns the exponential of x. 384 // 385 // Method 386 // 1. Argument reduction: 387 // Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. 388 // Given x, find r and integer k such that 389 // 390 // x = k*ln2 + r, |r| <= 0.5*ln2. 391 // 392 // Here r will be represented as r = hi-lo for better 393 // accuracy. 394 // 395 // 2. Approximation of exp(r) by a special rational function on 396 // the interval [0,0.34658]: 397 // Write 398 // R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ... 399 // We use a special Remez algorithm on [0,0.34658] to generate 400 // a polynomial of degree 5 to approximate R. The maximum error 401 // of this polynomial approximation is bounded by 2**-59. In 402 // other words, 403 // R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5 404 // (where z=r*r, and the values of P1 to P5 are listed below) 405 // and 406 // | 5 | -59 407 // | 2.0+P1*z+...+P5*z - R(z) | <= 2 408 // | | 409 // The computation of exp(r) thus becomes 410 // 2*r 411 // exp(r) = 1 + ------- 412 // R - r 413 // r*R1(r) 414 // = 1 + r + ----------- (for better accuracy) 415 // 2 - R1(r) 416 // where 417 // 2 4 10 418 // R1(r) = r - (P1*r + P2*r + ... + P5*r ). 419 // 420 // 3. Scale back to obtain exp(x): 421 // From step 1, we have 422 // exp(x) = 2**k * exp(r) 423 // 424 // Special cases: 425 // exp(INF) is INF, exp(NaN) is NaN; 426 // exp(-INF) is 0, and 427 // for finite argument, only exp(0)=1 is exact. 428 // 429 // Accuracy: 430 // according to an error analysis, the error is always less than 431 // 1 ulp (unit in the last place). 432 // 433 // Misc. info. 434 // For IEEE double 435 // if x > 7.09782712893383973096e+02 then exp(x) overflow 436 // if x < -7.45133219101941108420e+02 then exp(x) underflow 437 // 438 // Constants: 439 // The hexadecimal values are the intended ones for the following 440 // constants. The decimal values may be used, provided that the 441 // compiler will convert from decimal to binary accurately enough 442 // to produce the hexadecimal values shown. 443 444 // Returns e^r * 2^k where r = hi - lo and |r| <= (ln(2) / 2). 445 export fn expmultif64(hi: f64, lo: f64, k: i64) f64 = { 446 const P1 = 1.66666666666666657415e-01; // 0x3fc55555; 0x55555555 447 const P2 = -2.77777777770155933842e-03; // 0xbf66c16c; 0X16bebd9n 448 const P3 = 6.61375632143793436117e-05; // 0x3f11566a; 0Xaf25de2c 449 const P4 = -1.65339022054652515390e-06; // 0xbebbbd41; 0Xc5d26bf1 450 const P5 = 4.13813679705723846039e-08; // 0x3e663769; 0X72bea4d0 451 452 let r = hi - lo; 453 let t = r * r; 454 let c = r - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); 455 const y = 1f64 - ((lo - (r * c) / (2f64 - c)) - hi); 456 return ldexpf64(y, k); 457 }; 458 459 // Returns e^x. 460 export fn expf64(x: f64) f64 = { 461 const overflow = 7.09782712893383973096e+02; 462 const underflow = -7.45133219101941108420e+02; 463 const near_zero = 1f64 / ((1i64 << 28i64): f64); 464 465 if (isnan(x) || x == INF) { 466 return x; 467 } else if (x == -INF) { 468 return 0f64; 469 } else if (x > overflow) { 470 return INF; 471 } else if (x < underflow) { 472 return 0f64; 473 } else if (-near_zero < x && x < near_zero) { 474 return 1f64 + x; 475 }; 476 477 // Reduce; computed as r = hi - lo for extra precision. 478 let k = 0i64; 479 if (x < 0f64) { 480 k = (((LOG2_E * x) - 0.5): i64); 481 } else if (x > 0f64) { 482 k = (((LOG2_E * x) + 0.5): i64); 483 }; 484 const hi = x - ((k: f64) * LN2_HI); 485 const lo = (k: f64) * LN2_LO; 486 487 // Compute 488 return expmultif64(hi, lo, k); 489 }; 490 491 // Returns 2^x. 492 export fn exp2f64(x: f64) f64 = { 493 const overflow = 1.0239999999999999e+03; 494 const underflow = -1.0740e+03; 495 496 if (isnan(x) || x == INF) { 497 return x; 498 } else if (x == -INF) { 499 return 0f64; 500 } else if (x > overflow) { 501 return INF; 502 } else if (x < underflow) { 503 return 0f64; 504 }; 505 506 // Argument reduction; x = r×lg(e) + k with |r| ≤ ln(2)/2. 507 // Computed as r = hi - lo for extra precision. 508 let k = 0i64; 509 if (x > 0f64) { 510 k = ((x + 0.5): i64); 511 } else if (x < 0f64) { 512 k = ((x - 0.5): i64); 513 }; 514 const t = x - (k: f64); 515 const hi = t * LN2_HI; 516 const lo = -t * LN2_LO; 517 518 // Compute 519 return expmultif64(hi, lo, k); 520 }; 521 522 // __ieee754_sqrt(x) 523 // Return correctly rounded sqrt. 524 // ----------------------------------------- 525 // | Use the hardware sqrt if you have one | 526 // ----------------------------------------- 527 // Method: 528 // Bit by bit method using integer arithmetic. (Slow, but portable) 529 // 1. Normalization 530 // Scale x to y in [1,4) with even powers of 2: 531 // find an integer k such that 1 <= (y=x*2**(2k)) < 4, then 532 // sqrt(x) = 2**k * sqrt(y) 533 // 2. Bit by bit computation 534 // Let q = sqrt(y) truncated to i bit after binary point (q = 1), 535 // i 0 536 // i+1 2 537 // s = 2*q , and y = 2 * ( y - q ). (1) 538 // i i i i 539 // 540 // To compute q from q , one checks whether 541 // i+1 i 542 // 543 // -(i+1) 2 544 // (q + 2 ) <= y. (2) 545 // i 546 // -(i+1) 547 // If (2) is false, then q = q ; otherwise q = q + 2 . 548 // i+1 i i+1 i 549 // 550 // With some algebraic manipulation, it is not difficult to see 551 // that (2) is equivalent to 552 // -(i+1) 553 // s + 2 <= y (3) 554 // i i 555 // 556 // The advantage of (3) is that s and y can be computed by 557 // i i 558 // the following recurrence formula: 559 // if (3) is false 560 // 561 // s = s , y = y ; (4) 562 // i+1 i i+1 i 563 // 564 // otherwise, 565 // -i -(i+1) 566 // s = s + 2 , y = y - s - 2 (5) 567 // i+1 i i+1 i i 568 // 569 // One may easily use induction to prove (4) and (5). 570 // Note. Since the left hand side of (3) contain only i+2 bits, 571 // it is not necessary to do a full (53-bit) comparison 572 // in (3). 573 // 3. Final rounding 574 // After generating the 53 bits result, we compute one more bit. 575 // Together with the remainder, we can decide whether the 576 // result is exact, bigger than 1/2ulp, or less than 1/2ulp 577 // (it will never equal to 1/2ulp). 578 // The rounding mode can be detected by checking whether 579 // huge + tiny is equal to huge, and whether huge - tiny is 580 // equal to huge for some floating point number "huge" and "tiny". 581 582 // Returns the square root of x. 583 export fn sqrtf64(x: f64) f64 = { 584 if (x == 0f64) { 585 return x; 586 } else if (isnan(x) || x == INF) { 587 return x; 588 } else if (x < 0f64) { 589 return NAN; 590 }; 591 592 let bits = f64bits(x); 593 594 // Normalize x 595 let exp = (((bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK): i64); 596 if (exp == 0i64) { 597 // Subnormal x 598 for (bits & (1 << F64_MANTISSA_BITS) == 0) { 599 bits <<= 1; 600 exp -= 1; 601 }; 602 exp += 1; 603 }; 604 // Unbias exponent 605 exp -= (F64_EXPONENT_BIAS: i64); 606 bits = bits & ~(F64_EXPONENT_MASK << F64_MANTISSA_BITS); 607 bits = bits | (1u64 << (F64_MANTISSA_BITS: u64)); 608 // Odd exp, double x to make it even 609 if (exp & 1i64 == 1i64) { 610 bits <<= 1; 611 }; 612 // exp = exp/2, exponent of square root 613 exp >>= 1; 614 // Generate sqrt(x) bit by bit 615 bits <<= 1; 616 // q = sqrt(x) 617 let q = 0u64; 618 let s = 0u64; 619 // r = moving bit from MSB to LSB 620 let r = ((1u64 << (F64_MANTISSA_BITS + 1u64)): u64); 621 for (r != 0) { 622 const t = s + r; 623 if (t <= bits) { 624 s = t + r; 625 bits -= t; 626 q += r; 627 }; 628 bits <<= 1u64; 629 r >>= 1u64; 630 }; 631 // Final rounding 632 if (bits != 0) { 633 // Remainder, result not exact 634 // Round according to extra bit 635 q += q & 1; 636 }; 637 // significand + biased exponent 638 bits = (q >> 1) + ( 639 ((exp - 1i64 + (F64_EXPONENT_BIAS: i64)): u64) << 640 F64_MANTISSA_BITS); 641 return f64frombits(bits); 642 }; 643 644 fn is_f64_odd_int(x: f64) bool = { 645 const (x_int, x_frac) = modfracf64(x); 646 const has_no_frac = (x_frac == 0f64); 647 const is_odd = ((x_int: i64 & 1i64) == 1i64); 648 return has_no_frac && is_odd; 649 }; 650 651 // Returns x^p. 652 export fn powf64(x: f64, p: f64) f64 = { 653 if (x == 1f64 || p == 0f64) { 654 return 1f64; 655 } else if (p == 1f64) { 656 return x; 657 } else if (isnan(x)) { 658 return NAN; 659 } else if (isnan(p)) { 660 return NAN; 661 } else if (x == 0f64) { 662 if (p < 0f64) { 663 if (is_f64_odd_int(p)) { 664 return copysignf64(INF, x); 665 } else { 666 return INF; 667 }; 668 } else if (p > 0f64) { 669 if (is_f64_odd_int(p)) { 670 return x; 671 } else { 672 return 0f64; 673 }; 674 }; 675 } else if (isinf(p)) { 676 if (x == -1f64) { 677 return 1f64; 678 } else if ((absf64(x) < 1f64) == (p == INF)) { 679 return 0f64; 680 }; 681 return INF; 682 } else if (isinf(x)) { 683 if (x == -INF) { 684 return powf64(-0f64, -p); 685 } else if (p < 0f64) { 686 return 0f64; 687 } else if (p > 0f64) { 688 return INF; 689 }; 690 } else if (p == 0.5f64) { 691 return sqrtf64(x); 692 } else if (p == -0.5f64) { 693 return 1f64 / sqrtf64(x); 694 }; 695 696 let (p_int, p_frac) = modfracf64(absf64(p)); 697 if (p_frac != 0f64 && x < 0f64) { 698 return NAN; 699 }; 700 if (p_int > types::I64_MAX: f64) { 701 if (x == -1f64) { 702 return 1f64; 703 } else if ((absf64(x) < 1f64) == (p > 0f64)) { 704 return 0f64; 705 } else { 706 return INF; 707 }; 708 }; 709 710 let res_mantissa = 1f64; 711 let res_exp = 0i64; 712 713 // The method used later in this function doesn't apply to fractional 714 // powers, so we have to handle these separately with 715 // x^p = e^{p * ln(x)} 716 if (p_frac != 0f64) { 717 if (p_frac > 0.5f64) { 718 p_frac -= 1f64; 719 p_int += 1f64; 720 }; 721 res_mantissa = expf64(p_frac * logf64(x)); 722 }; 723 724 // Repeatedly square our number x, for each bit in our power p. 725 // If the current bit is 1 in p, add the respective power of x to our 726 // result. 727 let (x_mantissa, x_exp) = frexpf64(x); 728 for (let i = p_int: i64; i != 0; i >>= 1) { 729 // Check for over/underflow. 730 if (x_exp <= -1i64 << (F64_EXPONENT_BITS: i64)) { 731 return 0f64; 732 }; 733 if (x_exp >= 1i64 << (F64_EXPONENT_BITS: i64)) { 734 return INF; 735 }; 736 // Perform squaring. 737 if (i & 1i64 == 1i64) { 738 res_mantissa *= x_mantissa; 739 res_exp += x_exp; 740 }; 741 x_mantissa *= x_mantissa; 742 x_exp <<= 1; 743 // Correct mantisa to be in [0.5, 1). 744 if (x_mantissa < 0.5f64) { 745 x_mantissa += x_mantissa; 746 x_exp -= 1; 747 }; 748 }; 749 750 if (p < 0f64) { 751 res_mantissa = 1f64 / res_mantissa; 752 res_exp = -res_exp; 753 }; 754 755 return ldexpf64(res_mantissa, res_exp); 756 }; 757 758 // Returns the greatest integer value less than or equal to x. 759 export fn floorf64(x: f64) f64 = { 760 if (x == 0f64 || isnan(x) || isinf(x)) { 761 return x; 762 }; 763 if (x < 0f64) { 764 let (int_part, frac_part) = modfracf64(-x); 765 if (frac_part != 0f64) { 766 int_part += 1f64; 767 }; 768 return -int_part; 769 }; 770 return modfracf64(x).0; 771 }; 772 773 // Returns the least integer value greater than or equal to x. 774 export fn ceilf64(x: f64) f64 = -floorf64(-x); 775 776 // Returns the integer value of x. 777 export fn truncf64(x: f64) f64 = { 778 if (x == 0f64 || isnan(x) || isinf(x)) { 779 return x; 780 }; 781 return modfracf64(x).0; 782 }; 783 784 // Returns the nearest integer, rounding half away from zero. 785 export fn roundf64(x: f64) f64 = { 786 let bits = f64bits(x); 787 let e = (bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK; 788 if (e < F64_EXPONENT_BIAS) { 789 // Round abs(x) < 1 including denormals. 790 bits &= F64_SIGN_MASK; // +-0 791 if (e == F64_EXPONENT_BIAS - 1) { 792 bits |= F64_ONE; // +-1 793 }; 794 } else if (e < F64_EXPONENT_BIAS + F64_MANTISSA_BITS) { 795 // Round any abs(x) >= 1 containing a fractional component 796 // [0,1). 797 // Numbers with larger exponents are returned unchanged since 798 // they must be either an integer, infinity, or NaN. 799 const half = 1u64 << (F64_MANTISSA_BITS - 1); 800 e -= F64_EXPONENT_BIAS; 801 bits += half >> e; 802 bits = bits & ~(F64_MANTISSA_MASK >> e); 803 }; 804 return f64frombits(bits); 805 }; 806 807 // Returns the floating-point remainder of x / y. The magnitude of the result 808 // is less than y and its sign agrees with that of x. 809 export fn modf64(x: f64, y: f64) f64 = { 810 if (y == 0f64) { 811 return NAN; 812 }; 813 if (isinf(x) || isnan(x) || isnan(y)) { 814 return NAN; 815 }; 816 817 y = absf64(y); 818 819 const (y_frac, y_exp) = frexpf64(y); 820 let r = x; 821 if (x < 0f64) { 822 r = -x; 823 }; 824 825 for (r >= y) { 826 const (r_frac, r_exp) = frexpf64(r); 827 if (r_frac < y_frac) { 828 r_exp -= 1i64; 829 }; 830 r = r - ldexpf64(y, r_exp - y_exp); 831 }; 832 if (x < 0f64) { 833 r = -r; 834 }; 835 return r; 836 };