math.ha (27411B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 // Sections of the code below, in particular log() and exp(), are based on Go's 5 // implementation, which is, in turn, based on FreeBSD's. The original C code, 6 // as well as the respective comments and constants are from 7 // /usr/src/lib/msun/src/{e_log,e_exp}.c. 8 // 9 // The FreeBSD copyright notice: 10 // ==================================================== 11 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 12 // 13 // Developed at SunPro, a Sun Microsystems, Inc. business. 14 // Permission to use, copy, modify, and distribute this 15 // software is freely granted, provided that this notice 16 // is preserved. 17 // ==================================================== 18 // 19 // The Go copyright notice: 20 // ==================================================== 21 // Copyright (c) 2009 The Go Authors. All rights reserved. 22 // 23 // Redistribution and use in source and binary forms, with or without 24 // modification, are permitted provided that the following conditions are 25 // met: 26 // 27 // * Redistributions of source code must retain the above copyright 28 // notice, this list of conditions and the following disclaimer. 29 // * Redistributions in binary form must reproduce the above 30 // copyright notice, this list of conditions and the following disclaimer 31 // in the documentation and/or other materials provided with the 32 // distribution. 33 // * Neither the name of Google Inc. nor the names of its 34 // contributors may be used to endorse or promote products derived from 35 // this software without specific prior written permission. 36 // 37 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 38 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 39 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 40 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 41 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 42 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 43 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 45 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 46 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 47 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 // ==================================================== 49 50 use types; 51 52 // The standard tolerance used by isclose(), which is just an arbitrary way to 53 // measure whether two floating-point numbers are "sufficiently" close to each 54 // other. 55 export def STANDARD_TOL: f64 = 1e-14; 56 57 // Returns whether x and y are within tol of each other. 58 export fn eqwithinf64(x: f64, y: f64, tol: f64) bool = { 59 if (isnan(x) || isnan(y) || isnan(tol)) { 60 return false; 61 }; 62 return absf64(x - y) < tol; 63 }; 64 65 // Returns whether x and y are within tol of each other. 66 export fn eqwithinf32(x: f32, y: f32, tol: f32) bool = { 67 if (isnan(x) || isnan(y) || isnan(tol)) { 68 return false; 69 }; 70 return absf32(x - y) < tol; 71 }; 72 73 // Returns whether x and y are within tol of each other. 74 export fn eqwithin( 75 x: types::floating, 76 y: types::floating, 77 tol: types::floating, 78 ) bool = { 79 match (x) { 80 case let n: f64 => 81 return eqwithinf64(n, y as f64, tol as f64); 82 case let n: f32 => 83 return eqwithinf32(n, y as f32, tol as f32); 84 }; 85 }; 86 87 // Returns whether x and y are within [[STANDARD_TOL]] of each other. 88 export fn isclosef64(x: f64, y: f64) bool = { 89 return eqwithinf64(x, y, STANDARD_TOL); 90 }; 91 92 // Returns whether x and y are within [[STANDARD_TOL]] of each other. 93 export fn isclosef32(x: f32, y: f32) bool = { 94 return eqwithinf32(x, y, STANDARD_TOL: f32); 95 }; 96 97 // Returns whether x and y are within [[STANDARD_TOL]] of each other. 98 export fn isclose(x: types::floating, y: types::floating) bool = { 99 match (x) { 100 case let n: f64 => 101 return isclosef64(n, y as f64); 102 case let n: f32 => 103 return isclosef32(n, y as f32); 104 }; 105 }; 106 107 // e - https://oeis.org/A001113 108 export def E: f64 = 2.71828182845904523536028747135266249775724709369995957496696763; 109 // pi - https://oeis.org/A000796 110 export def PI: f64 = 3.14159265358979323846264338327950288419716939937510582097494459; 111 // tau - https://oeis.org/A019692 112 export def TAU: f64 = 6.2831853071795864769252867665590057683943387987502116419498892; 113 // phi - https://oeis.org/A001622 114 export def PHI: f64 = 1.61803398874989484820458683436563811772030917980576286213544862; 115 // sqrt(2) - https://oeis.org/A002193 116 export def SQRT_2: f64 = 1.41421356237309504880168872420969807856967187537694807317667974; 117 // sqrt(e) - https://oeis.org/A019774 118 export def SQRT_E: f64 = 1.64872127070012814684865078781416357165377610071014801157507931; 119 // sqrt(pi) - https://oeis.org/A002161 120 export def SQRT_PI: f64 = 1.77245385090551602729816748334114518279754945612238712821380779; 121 // sqrt(phi) - https://oeis.org/A139339 122 export def SQRT_PHI: f64 = 1.27201964951406896425242246173749149171560804184009624861664038; 123 // ln(2) - https://oeis.org/A002162 124 export def LN_2: f64 = 0.693147180559945309417232121458176568075500134360255254120680009; 125 // ln(2) - https://oeis.org/A002162 126 export def LN2_HI: f64 = 6.93147180369123816490e-01; 127 // ln(2) - https://oeis.org/A002162 128 export def LN2_LO: f64 = 1.90821492927058770002e-10; 129 // log_{2}(e) 130 export def LOG2_E: f64 = 1f64 / LN_2; 131 // ln(10) - https://oeis.org/A002392 132 export def LN_10: f64 = 2.30258509299404568401799145468436420760110148862877297603332790; 133 // log_{10}(e) 134 export def LOG10_E: f64 = 1f64 / LN_10; 135 136 // __ieee754_log(x) 137 // Return the logarithm of x 138 // 139 // Method : 140 // 1. Argument Reduction: find k and f such that 141 // x = 2**k * (1+f), 142 // where sqrt(2)/2 < 1+f < sqrt(2) . 143 // 144 // 2. Approximation of log(1+f). 145 // Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) 146 // = 2s + 2/3 s**3 + 2/5 s**5 + ....., 147 // = 2s + s*R 148 // We use a special Reme algorithm on [0,0.1716] to generate 149 // a polynomial of degree 14 to approximate R. The maximum error 150 // of this polynomial approximation is bounded by 2**-58.45. In 151 // other words, 152 // 2 4 6 8 10 12 14 153 // R(z) ~ L1*s +L2*s +L3*s +L4*s +L5*s +L6*s +L7*s 154 // (the values of L1 to L7 are listed in the program) and 155 // | 2 14 | -58.45 156 // | L1*s +...+L7*s - R(z) | <= 2 157 // | | 158 // Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. 159 // In order to guarantee error in log below 1ulp, we compute log by 160 // log(1+f) = f - s*(f - R) (if f is not too large) 161 // log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy) 162 // 163 // 3. Finally, log(x) = k*Ln2 + log(1+f). 164 // = k*Ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*Ln2_lo))) 165 // Here Ln2 is split into two floating point number: 166 // Ln2_hi + Ln2_lo, 167 // where n*Ln2_hi is always exact for |n| < 2000. 168 // 169 // Special cases: 170 // log(x) is NaN with signal if x < 0 (including -INF) ; 171 // log(+INF) is +INF; log(0) is -INF with signal; 172 // log(NaN) is that NaN with no signal. 173 // 174 // Accuracy: 175 // according to an error analysis, the error is always less than 176 // 1 ulp (unit in the last place). 177 // 178 // Constants: 179 // The hexadecimal values are the intended ones for the following 180 // constants. The decimal values may be used, provided that the 181 // compiler will convert from decimal to binary accurately enough 182 // to produce the hexadecimal values shown. 183 184 // Returns the natural logarithm of x. 185 export fn logf64(x: f64) f64 = { 186 const L1 = 6.666666666666735130e-01; // 3fe55555 55555593 187 const L2 = 3.999999999940941908e-01; // 3fd99999 9997fa04 188 const L3 = 2.857142874366239149e-01; // 3fd24924 94229359 189 const L4 = 2.222219843214978396e-01; // 3fcc71c5 1d8e78af 190 const L5 = 1.818357216161805012e-01; // 3fc74664 96cb03de 191 const L6 = 1.531383769920937332e-01; // 3fc39a09 d078c69f 192 const L7 = 1.479819860511658591e-01; // 3fc2f112 df3e5244 193 194 // special cases 195 if (isnan(x) || x == INF) { 196 return x; 197 } else if (x < 0f64) { 198 return NAN; 199 } else if (x == 0f64) { 200 return -INF; 201 }; 202 203 // Reduce 204 const f1_and_ki = frexp(x); 205 let f1 = f1_and_ki.0; 206 let ki = f1_and_ki.1; 207 if (f1 < (SQRT_2 / 2f64)) { 208 f1 *= 2f64; 209 ki -= 1i64; 210 }; 211 let f = f1 - 1f64; 212 let k = (ki: f64); 213 214 // Compute 215 const s = f / (2f64 + f); 216 const s2 = s * s; 217 const s4 = s2 * s2; 218 const t1 = s2 * (L1 + s4 * (L3 + s4 * (L5 + s4 * L7))); 219 const t2 = s4 * (L2 + s4 * (L4 + s4 * L6)); 220 const R = t1 + t2; 221 const hfsq = 0.5f64 * f * f; 222 return k * LN2_HI - ((hfsq - (s * (hfsq + R) + k * LN2_LO)) - f); 223 }; 224 225 // Returns the decimal logarithm of x. 226 export fn log10f64(x: f64) f64 = { 227 return logf64(x) * (1f64 / LN_10); 228 }; 229 230 // Returns the binary logarithm of x. 231 export fn log2f64(x: f64) f64 = { 232 const frexp_res = frexpf64(x); 233 let frac = frexp_res.0; 234 let exp = frexp_res.1; 235 // Make sure exact powers of two give an exact answer. 236 // Don't depend on log(0.5) * (1 / LN_2) + exp being exactly exp - 1. 237 if (frac == 0.5f64) { 238 return ((exp - 1): f64); 239 }; 240 return logf64(frac) * (1f64 / LN_2) + (exp: f64); 241 }; 242 243 // double log1p(double x) 244 // 245 // Method : 246 // 1. Argument Reduction: find k and f such that 247 // 1+x = 2**k * (1+f), 248 // where sqrt(2)/2 < 1+f < sqrt(2) . 249 // 250 // Note. If k=0, then f=x is exact. However, if k!=0, then f 251 // may not be representable exactly. In that case, a correction 252 // term is need. Let u=1+x rounded. Let c = (1+x)-u, then 253 // log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u), 254 // and add back the correction term c/u. 255 // (Note: when x > 2**53, one can simply return log(x)) 256 // 257 // 2. Approximation of log1p(f). 258 // Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) 259 // = 2s + 2/3 s**3 + 2/5 s**5 + ....., 260 // = 2s + s*R 261 // We use a special Reme algorithm on [0,0.1716] to generate 262 // a polynomial of degree 14 to approximate R The maximum error 263 // of this polynomial approximation is bounded by 2**-58.45. In 264 // other words, 265 // 2 4 6 8 10 12 14 266 // R(z) ~ LP1*s +LP2*s +LP3*s +LP4*s +LP5*s +LP6*s +LP7*s 267 // (the values of LP1 to LP7 are listed in the program) 268 // and 269 // | 2 14 | -58.45 270 // | LP1*s +...+LP7*s - R(z) | <= 2 271 // | | 272 // Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. 273 // In order to guarantee error in log below 1ulp, we compute log 274 // by 275 // log1p(f) = f - (hfsq - s*(hfsq+R)). 276 // 277 // 3. Finally, log1p(x) = k*ln2 + log1p(f). 278 // = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) 279 // Here ln2 is split into two floating point number: 280 // ln2_hi + ln2_lo, 281 // where n*ln2_hi is always exact for |n| < 2000. 282 // 283 // Special cases: 284 // log1p(x) is NaN with signal if x < -1 (including -INF) ; 285 // log1p(+INF) is +INF; log1p(-1) is -INF with signal; 286 // log1p(NaN) is that NaN with no signal. 287 // 288 // Accuracy: 289 // according to an error analysis, the error is always less than 290 // 1 ulp (unit in the last place). 291 // 292 // Constants: 293 // The hexadecimal values are the intended ones for the following 294 // constants. The decimal values may be used, provided that the 295 // compiler will convert from decimal to binary accurately enough 296 // to produce the hexadecimal values shown. 297 // 298 // Note: Assuming log() return accurate answer, the following 299 // algorithm can be used to compute log1p(x) to within a few ULP: 300 // 301 // u = 1+x; 302 // if(u==1.0) return x ; else 303 // return log(u)*(x/(u-1.0)); 304 // 305 // See HP-15C Advanced Functions Handbook, p.193. 306 307 // Returns the natural logarithm of 1 plus its argument x. 308 // It is more accurate than log(1 + x) when x is near zero. 309 export fn log1pf64(x: f64) f64 = { 310 // sqrt(2) - 1 311 const SQRT2M1 = 4.142135623730950488017e-01; // 0x3fda827999fcef34 312 // sqrt(2) / 2 - 1 313 const SQRT2HALFM1 = -2.928932188134524755992e-01; // 0xbfd2bec333018866 314 const SMALL = 1f64 / ((1i64 << 29): f64); // 2**-29 315 const TINY = 1f64 / ((1i64 << 54): f64); // 2**-54 316 const TWO53 = ((1i64 << 53): f64); // 2**53 317 const LN2HI = 6.93147180369123816490e-01; // 3fe62e42fee00000 318 const LN2LO = 1.90821492927058770002e-10; // 3dea39ef35793c76 319 const LP1 = 6.666666666666735130e-01; // 3fe5555555555593 320 const LP2 = 3.999999999940941908e-01; // 3fd999999997fa04 321 const LP3 = 2.857142874366239149e-01; // 3fd2492494229359 322 const LP4 = 2.222219843214978396e-01; // 3fcc71c51d8e78af 323 const LP5 = 1.818357216161805012e-01; // 3fc7466496cb03de 324 const LP6 = 1.531383769920937332e-01; // 3fc39a09d078c69f 325 const LP7 = 1.479819860511658591e-01; // 3fc2f112df3e5244 326 327 if (x < -1f64 || isnan(x)) { 328 return NAN; 329 } else if (x == -1f64) { 330 return -INF; 331 } else if (x == INF) { 332 return INF; 333 }; 334 335 const absx = absf64(x); 336 337 let f = 0f64; 338 let iu = 0u64; 339 let k = 1i64; 340 if (absx < SQRT2M1) { // |x| < Sqrt(2)-1 341 if (absx < SMALL) { // |x| < 2**-29 342 if (absx < TINY) { // |x| < 2**-54 343 return x; 344 }; 345 return x - (x * x * 0.5f64); 346 }; 347 if (x > SQRT2HALFM1) { // Sqrt(2)/2-1 < x 348 // (Sqrt(2)/2-1) < x < (Sqrt(2)-1) 349 k = 0; 350 f = x; 351 iu = 1; 352 }; 353 }; 354 let c = 0f64; 355 if (k != 0) { 356 let u = 0f64; 357 if (absx < TWO53) { // 1<<53 358 u = 1.0 + x; 359 iu = f64bits(u); 360 k = (((iu >> 52) - 1023): i64); 361 // Correction term 362 if (k > 0) { 363 c = 1f64 - (u - x); 364 } else { 365 c = x - (u - 1f64); 366 }; 367 c /= u; 368 } else { 369 u = x; 370 iu = f64bits(u); 371 k = (((iu >> 52) - 1023): i64); 372 c = 0f64; 373 }; 374 iu &= 0x000fffffffffffff; 375 if (iu < 0x0006a09e667f3bcd) { // Mantissa of Sqrt(2) 376 // Normalize u 377 u = f64frombits(iu | 0x3ff0000000000000); 378 } else { 379 k += 1; 380 // Normalize u/2 381 u = f64frombits(iu | 0x3fe0000000000000); 382 iu = (0x0010000000000000 - iu) >> 2; 383 }; 384 f = u - 1f64; // Sqrt(2)/2 < u < Sqrt(2) 385 }; 386 const hfsq = 0.5 * f * f; 387 let s = 0f64; 388 let R = 0f64; 389 let z = 0f64; 390 if (iu == 0) { // |f| < 2**-20 391 if (f == 0f64) { 392 if (k == 0) { 393 return 0f64; 394 }; 395 c += (k: f64) * LN2LO; 396 return (k: f64) * LN2HI + c; 397 }; 398 R = hfsq * (1.0 - 0.66666666666666666 * f); // Avoid division 399 if (k == 0) { 400 return f - R; 401 }; 402 return (k: f64) * LN2HI - ((R - ((k: f64) * LN2LO + c)) - f); 403 }; 404 s = f / (2f64 + f); 405 z = s * s; 406 R = z * (LP1 + 407 z * (LP2 + 408 z * (LP3 + 409 z * (LP4 + 410 z * (LP5 + 411 z * (LP6 + 412 z * LP7)))))); 413 if (k == 0) { 414 return f - (hfsq - s * (hfsq + R)); 415 }; 416 return (k: f64) * LN2HI - 417 ((hfsq - (s * (hfsq + R) + ((k: f64) * LN2LO + c))) - f); 418 }; 419 420 // exp(x) 421 // Returns the exponential of x. 422 // 423 // Method 424 // 1. Argument reduction: 425 // Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. 426 // Given x, find r and integer k such that 427 // 428 // x = k*ln2 + r, |r| <= 0.5*ln2. 429 // 430 // Here r will be represented as r = hi-lo for better 431 // accuracy. 432 // 433 // 2. Approximation of exp(r) by a special rational function on 434 // the interval [0,0.34658]: 435 // Write 436 // R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ... 437 // We use a special Remez algorithm on [0,0.34658] to generate 438 // a polynomial of degree 5 to approximate R. The maximum error 439 // of this polynomial approximation is bounded by 2**-59. In 440 // other words, 441 // R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5 442 // (where z=r*r, and the values of P1 to P5 are listed below) 443 // and 444 // | 5 | -59 445 // | 2.0+P1*z+...+P5*z - R(z) | <= 2 446 // | | 447 // The computation of exp(r) thus becomes 448 // 2*r 449 // exp(r) = 1 + ------- 450 // R - r 451 // r*R1(r) 452 // = 1 + r + ----------- (for better accuracy) 453 // 2 - R1(r) 454 // where 455 // 2 4 10 456 // R1(r) = r - (P1*r + P2*r + ... + P5*r ). 457 // 458 // 3. Scale back to obtain exp(x): 459 // From step 1, we have 460 // exp(x) = 2**k * exp(r) 461 // 462 // Special cases: 463 // exp(INF) is INF, exp(NaN) is NaN; 464 // exp(-INF) is 0, and 465 // for finite argument, only exp(0)=1 is exact. 466 // 467 // Accuracy: 468 // according to an error analysis, the error is always less than 469 // 1 ulp (unit in the last place). 470 // 471 // Misc. info. 472 // For IEEE double 473 // if x > 7.09782712893383973096e+02 then exp(x) overflow 474 // if x < -7.45133219101941108420e+02 then exp(x) underflow 475 // 476 // Constants: 477 // The hexadecimal values are the intended ones for the following 478 // constants. The decimal values may be used, provided that the 479 // compiler will convert from decimal to binary accurately enough 480 // to produce the hexadecimal values shown. 481 482 // Returns e^r * 2^k where r = hi - lo and |r| <= (ln(2) / 2). 483 export fn expmultif64(hi: f64, lo: f64, k: i64) f64 = { 484 const P1 = 1.66666666666666657415e-01; // 0x3fc55555; 0x55555555 485 const P2 = -2.77777777770155933842e-03; // 0xbf66c16c; 0X16bebd9n 486 const P3 = 6.61375632143793436117e-05; // 0x3f11566a; 0Xaf25de2c 487 const P4 = -1.65339022054652515390e-06; // 0xbebbbd41; 0Xc5d26bf1 488 const P5 = 4.13813679705723846039e-08; // 0x3e663769; 0X72bea4d0 489 490 let r = hi - lo; 491 let t = r * r; 492 let c = r - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); 493 const y = 1f64 - ((lo - (r * c) / (2f64 - c)) - hi); 494 return ldexpf64(y, k); 495 }; 496 497 // Returns e^x. 498 export fn expf64(x: f64) f64 = { 499 const overflow = 7.09782712893383973096e+02; 500 const underflow = -7.45133219101941108420e+02; 501 const near_zero = 1f64 / ((1i64 << 28i64): f64); 502 503 if (isnan(x) || x == INF) { 504 return x; 505 } else if (x == -INF) { 506 return 0f64; 507 } else if (x > overflow) { 508 return INF; 509 } else if (x < underflow) { 510 return 0f64; 511 } else if (-near_zero < x && x < near_zero) { 512 return 1f64 + x; 513 }; 514 515 // Reduce; computed as r = hi - lo for extra precision. 516 let k = 0i64; 517 if (x < 0f64) { 518 k = (((LOG2_E * x) - 0.5): i64); 519 } else if (x > 0f64) { 520 k = (((LOG2_E * x) + 0.5): i64); 521 }; 522 const hi = x - ((k: f64) * LN2_HI); 523 const lo = (k: f64) * LN2_LO; 524 525 // Compute 526 return expmultif64(hi, lo, k); 527 }; 528 529 // Returns 2^x. 530 export fn exp2f64(x: f64) f64 = { 531 const overflow = 1.0239999999999999e+03; 532 const underflow = -1.0740e+03; 533 534 if (isnan(x) || x == INF) { 535 return x; 536 } else if (x == -INF) { 537 return 0f64; 538 } else if (x > overflow) { 539 return INF; 540 } else if (x < underflow) { 541 return 0f64; 542 }; 543 544 // Argument reduction; x = r×lg(e) + k with |r| ≤ ln(2)/2. 545 // Computed as r = hi - lo for extra precision. 546 let k = 0i64; 547 if (x > 0f64) { 548 k = ((x + 0.5): i64); 549 } else if (x < 0f64) { 550 k = ((x - 0.5): i64); 551 }; 552 const t = x - (k: f64); 553 const hi = t * LN2_HI; 554 const lo = -t * LN2_LO; 555 556 // Compute 557 return expmultif64(hi, lo, k); 558 }; 559 560 // __ieee754_sqrt(x) 561 // Return correctly rounded sqrt. 562 // ----------------------------------------- 563 // | Use the hardware sqrt if you have one | 564 // ----------------------------------------- 565 // Method: 566 // Bit by bit method using integer arithmetic. (Slow, but portable) 567 // 1. Normalization 568 // Scale x to y in [1,4) with even powers of 2: 569 // find an integer k such that 1 <= (y=x*2**(2k)) < 4, then 570 // sqrt(x) = 2**k * sqrt(y) 571 // 2. Bit by bit computation 572 // Let q = sqrt(y) truncated to i bit after binary point (q = 1), 573 // i 0 574 // i+1 2 575 // s = 2*q , and y = 2 * ( y - q ). (1) 576 // i i i i 577 // 578 // To compute q from q , one checks whether 579 // i+1 i 580 // 581 // -(i+1) 2 582 // (q + 2 ) <= y. (2) 583 // i 584 // -(i+1) 585 // If (2) is false, then q = q ; otherwise q = q + 2 . 586 // i+1 i i+1 i 587 // 588 // With some algebraic manipulation, it is not difficult to see 589 // that (2) is equivalent to 590 // -(i+1) 591 // s + 2 <= y (3) 592 // i i 593 // 594 // The advantage of (3) is that s and y can be computed by 595 // i i 596 // the following recurrence formula: 597 // if (3) is false 598 // 599 // s = s , y = y ; (4) 600 // i+1 i i+1 i 601 // 602 // otherwise, 603 // -i -(i+1) 604 // s = s + 2 , y = y - s - 2 (5) 605 // i+1 i i+1 i i 606 // 607 // One may easily use induction to prove (4) and (5). 608 // Note. Since the left hand side of (3) contain only i+2 bits, 609 // it is not necessary to do a full (53-bit) comparison 610 // in (3). 611 // 3. Final rounding 612 // After generating the 53 bits result, we compute one more bit. 613 // Together with the remainder, we can decide whether the 614 // result is exact, bigger than 1/2ulp, or less than 1/2ulp 615 // (it will never equal to 1/2ulp). 616 // The rounding mode can be detected by checking whether 617 // huge + tiny is equal to huge, and whether huge - tiny is 618 // equal to huge for some floating point number "huge" and "tiny". 619 620 // Returns the square root of x. 621 export fn sqrtf64(x: f64) f64 = { 622 if (x == 0f64) { 623 return x; 624 } else if (isnan(x) || x == INF) { 625 return x; 626 } else if (x < 0f64) { 627 return NAN; 628 }; 629 630 let bits = f64bits(x); 631 632 // Normalize x 633 let exp = (((bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK): i64); 634 if (exp == 0i64) { 635 // Subnormal x 636 for (bits & (1 << F64_MANTISSA_BITS) == 0) { 637 bits <<= 1; 638 exp -= 1; 639 }; 640 exp += 1; 641 }; 642 // Unbias exponent 643 exp -= (F64_EXPONENT_BIAS: i64); 644 bits = bits & ~(F64_EXPONENT_MASK << F64_MANTISSA_BITS); 645 bits = bits | (1u64 << (F64_MANTISSA_BITS: u64)); 646 // Odd exp, double x to make it even 647 if (exp & 1i64 == 1i64) { 648 bits <<= 1; 649 }; 650 // exp = exp/2, exponent of square root 651 exp >>= 1; 652 // Generate sqrt(x) bit by bit 653 bits <<= 1; 654 // q = sqrt(x) 655 let q = 0u64; 656 let s = 0u64; 657 // r = moving bit from MSB to LSB 658 let r = ((1u64 << (F64_MANTISSA_BITS + 1u64)): u64); 659 for (r != 0) { 660 const t = s + r; 661 if (t <= bits) { 662 s = t + r; 663 bits -= t; 664 q += r; 665 }; 666 bits <<= 1u64; 667 r >>= 1u64; 668 }; 669 // Final rounding 670 if (bits != 0) { 671 // Remainder, result not exact 672 // Round according to extra bit 673 q += q & 1; 674 }; 675 // significand + biased exponent 676 bits = (q >> 1) + ( 677 ((exp - 1i64 + (F64_EXPONENT_BIAS: i64)): u64) << 678 F64_MANTISSA_BITS); 679 return f64frombits(bits); 680 }; 681 682 fn is_f64_odd_int(x: f64) bool = { 683 const (x_int, x_frac) = modfracf64(x); 684 const has_no_frac = (x_frac == 0f64); 685 const is_odd = ((x_int: i64 & 1i64) == 1i64); 686 return has_no_frac && is_odd; 687 }; 688 689 // Returns x^p. 690 export fn powf64(x: f64, p: f64) f64 = { 691 if (x == 1f64 || p == 0f64) { 692 return 1f64; 693 } else if (p == 1f64) { 694 return x; 695 } else if (isnan(x)) { 696 return NAN; 697 } else if (isnan(p)) { 698 return NAN; 699 } else if (x == 0f64) { 700 if (p < 0f64) { 701 if (is_f64_odd_int(p)) { 702 return copysignf64(INF, x); 703 } else { 704 return INF; 705 }; 706 } else if (p > 0f64) { 707 if (is_f64_odd_int(p)) { 708 return x; 709 } else { 710 return 0f64; 711 }; 712 }; 713 } else if (isinf(p)) { 714 if (x == -1f64) { 715 return 1f64; 716 } else if ((absf64(x) < 1f64) == (p == INF)) { 717 return 0f64; 718 }; 719 return INF; 720 } else if (isinf(x)) { 721 if (x == -INF) { 722 return powf64(-0f64, -p); 723 } else if (p < 0f64) { 724 return 0f64; 725 } else if (p > 0f64) { 726 return INF; 727 }; 728 } else if (p == 0.5f64) { 729 return sqrtf64(x); 730 } else if (p == -0.5f64) { 731 return 1f64 / sqrtf64(x); 732 }; 733 734 let (p_int, p_frac) = modfracf64(absf64(p)); 735 if (p_frac != 0f64 && x < 0f64) { 736 return NAN; 737 }; 738 if (p_int > types::I64_MAX: f64) { 739 if (x == -1f64) { 740 return 1f64; 741 } else if ((absf64(x) < 1f64) == (p > 0f64)) { 742 return 0f64; 743 } else { 744 return INF; 745 }; 746 }; 747 748 let res_mantissa = 1f64; 749 let res_exp = 0i64; 750 751 // The method used later in this function doesn't apply to fractional 752 // powers, so we have to handle these separately with 753 // x^p = e^{p * ln(x)} 754 if (p_frac != 0f64) { 755 if (p_frac > 0.5f64) { 756 p_frac -= 1f64; 757 p_int += 1f64; 758 }; 759 res_mantissa = expf64(p_frac * logf64(x)); 760 }; 761 762 // Repeatedly square our number x, for each bit in our power p. 763 // If the current bit is 1 in p, add the respective power of x to our 764 // result. 765 let (x_mantissa, x_exp) = frexp(x); 766 for (let i = p_int: i64; i != 0; i >>= 1) { 767 // Check for over/underflow. 768 if (x_exp <= -1i64 << (F64_EXPONENT_BITS: i64)) { 769 return 0f64; 770 }; 771 if (x_exp >= 1i64 << (F64_EXPONENT_BITS: i64)) { 772 return INF; 773 }; 774 // Perform squaring. 775 if (i & 1i64 == 1i64) { 776 res_mantissa *= x_mantissa; 777 res_exp += x_exp; 778 }; 779 x_mantissa *= x_mantissa; 780 x_exp <<= 1; 781 // Correct mantisa to be in [0.5, 1). 782 if (x_mantissa < 0.5f64) { 783 x_mantissa += x_mantissa; 784 x_exp -= 1; 785 }; 786 }; 787 788 if (p < 0f64) { 789 res_mantissa = 1f64 / res_mantissa; 790 res_exp = -res_exp; 791 }; 792 793 return ldexpf64(res_mantissa, res_exp); 794 }; 795 796 // Returns the greatest integer value less than or equal to x. 797 export fn floorf64(x: f64) f64 = { 798 if (x == 0f64 || isnan(x) || isinf(x)) { 799 return x; 800 }; 801 if (x < 0f64) { 802 let (int_part, frac_part) = modfracf64(-x); 803 if (frac_part != 0f64) { 804 int_part += 1f64; 805 }; 806 return -int_part; 807 }; 808 return modfracf64(x).0; 809 }; 810 811 // Returns the least integer value greater than or equal to x. 812 export fn ceilf64(x: f64) f64 = -floorf64(-x); 813 814 // Returns the integer value of x. 815 export fn truncf64(x: f64) f64 = { 816 if (x == 0f64 || isnan(x) || isinf(x)) { 817 return x; 818 }; 819 return modfracf64(x).0; 820 }; 821 822 // Returns the nearest integer, rounding half away from zero. 823 export fn roundf64(x: f64) f64 = { 824 let bits = f64bits(x); 825 let e = (bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK; 826 if (e < F64_EXPONENT_BIAS) { 827 // Round abs(x) < 1 including denormals. 828 bits &= F64_SIGN_MASK; // +-0 829 if (e == F64_EXPONENT_BIAS - 1) { 830 bits |= F64_ONE; // +-1 831 }; 832 } else if (e < F64_EXPONENT_BIAS + F64_MANTISSA_BITS) { 833 // Round any abs(x) >= 1 containing a fractional component 834 // [0,1). 835 // Numbers with larger exponents are returned unchanged since 836 // they must be either an integer, infinity, or NaN. 837 const half = 1 << (F64_MANTISSA_BITS - 1); 838 e -= F64_EXPONENT_BIAS; 839 bits += half >> e; 840 bits = bits & ~(F64_MANTISSA_MASK >> e); 841 }; 842 return f64frombits(bits); 843 }; 844 845 // Returns the floating-point remainder of x / y. The magnitude of the result 846 // is less than y and its sign agrees with that of x. 847 export fn modf64(x: f64, y: f64) f64 = { 848 if (y == 0f64) { 849 return NAN; 850 }; 851 if (isinf(x) || isnan(x) || isnan(y)) { 852 return NAN; 853 }; 854 855 y = absf64(y); 856 857 const y_frexp = frexpf64(y); 858 const y_frac = y_frexp.0; 859 const y_exp = y_frexp.1; 860 let r = x; 861 if (x < 0f64) { 862 r = -x; 863 }; 864 865 for (r >= y) { 866 const r_frexp = frexpf64(r); 867 const r_frac = r_frexp.0; 868 let r_exp = r_frexp.1; 869 if (r_frac < y_frac) { 870 r_exp -= 1i64; 871 }; 872 r = r - ldexpf64(y, r_exp - y_exp); 873 }; 874 if (x < 0f64) { 875 r = -r; 876 }; 877 return r; 878 };