math.ha (35907B)
1 // License: MPL-2.0 2 // (c) 2021 Drew DeVault <sir@cmpwn.com> 3 // (c) 2021 Eyal Sawady <ecs@d2evs.net> 4 // (c) 2021 Vlad-Stefan Harbuz <vlad@vladh.net> 5 6 // Sections of the code below, in particular log() and exp(), are based on Go's 7 // implementation, which is, in turn, based on FreeBSD's. The original C code, 8 // as well as the respective comments and constants are from 9 // /usr/src/lib/msun/src/{e_log,e_exp}.c. 10 // 11 // The FreeBSD copyright notice: 12 // ==================================================== 13 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 14 // 15 // Developed at SunPro, a Sun Microsystems, Inc. business. 16 // Permission to use, copy, modify, and distribute this 17 // software is freely granted, provided that this notice 18 // is preserved. 19 // ==================================================== 20 // 21 // The Go copyright notice: 22 // ==================================================== 23 // Copyright (c) 2009 The Go Authors. All rights reserved. 24 // 25 // Redistribution and use in source and binary forms, with or without 26 // modification, are permitted provided that the following conditions are 27 // met: 28 // 29 // * Redistributions of source code must retain the above copyright 30 // notice, this list of conditions and the following disclaimer. 31 // * Redistributions in binary form must reproduce the above 32 // copyright notice, this list of conditions and the following disclaimer 33 // in the documentation and/or other materials provided with the 34 // distribution. 35 // * Neither the name of Google Inc. nor the names of its 36 // contributors may be used to endorse or promote products derived from 37 // this software without specific prior written permission. 38 // 39 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 40 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 41 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 42 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 43 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 44 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 45 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 46 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 47 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 48 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 49 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 50 // ==================================================== 51 52 use types; 53 54 // The standard tolerance used by isclose(), which is just an arbitrary way to 55 // measure whether two floating-point numbers are "sufficiently" close to each 56 // other. 57 export def STANDARD_TOL: f64 = 1e-14; 58 59 // Returns whether x and y are within tol of each other. 60 export fn eqwithinf64(x: f64, y: f64, tol: f64) bool = { 61 if (isnan(x) || isnan(y) || isnan(tol)) { 62 return false; 63 }; 64 return absf64(x - y) < tol; 65 }; 66 67 // Returns whether x and y are within tol of each other. 68 export fn eqwithinf32(x: f32, y: f32, tol: f32) bool = { 69 if (isnan(x) || isnan(y) || isnan(tol)) { 70 return false; 71 }; 72 return absf32(x - y) < tol; 73 }; 74 75 // Returns whether x and y are within tol of each other. 76 export fn eqwithin( 77 x: types::floating, 78 y: types::floating, 79 tol: types::floating, 80 ) bool = { 81 match (x) { 82 case let n: f64 => 83 return eqwithinf64(n, y as f64, tol as f64); 84 case let n: f32 => 85 return eqwithinf32(n, y as f32, tol as f32); 86 }; 87 }; 88 89 // Returns whether x and y are within [[STANDARD_TOL]] of each other. 90 export fn isclosef64(x: f64, y: f64) bool = { 91 return eqwithinf64(x, y, STANDARD_TOL); 92 }; 93 94 // Returns whether x and y are within [[STANDARD_TOL]] of each other. 95 export fn isclosef32(x: f32, y: f32) bool = { 96 return eqwithinf32(x, y, STANDARD_TOL); 97 }; 98 99 // Returns whether x and y are within [[STANDARD_TOL]] of each other. 100 export fn isclose(x: types::floating, y: types::floating) bool = { 101 match (x) { 102 case let n: f64 => 103 return isclosef64(n, y as f64); 104 case let n: f32 => 105 return isclosef32(n, y as f32); 106 }; 107 }; 108 109 @test fn eqwithin() void = { 110 assert(eqwithin(1f64, 2f64, 2f64)); 111 assert(eqwithin(1.0f32, 2.0f32, 2.0f32)); 112 assert(!eqwithin(1.0005f32, 1.0004f32, 0.00001f32)); 113 assert(isclose(1f64, 1.0000000000000000000000000001f64)); 114 assert(isclose(1.0f32, 1.0000000000000000000000000001f32)); 115 assert(!isclose(1.0005f32, 1.0004f32)); 116 }; 117 118 // e - https://oeis.org/A001113 119 export def E: f64 = 2.71828182845904523536028747135266249775724709369995957496696763; 120 // pi - https://oeis.org/A000796 121 export def PI: f64 = 3.14159265358979323846264338327950288419716939937510582097494459; 122 // phi - https://oeis.org/A001622 123 export def PHI: f64 = 1.61803398874989484820458683436563811772030917980576286213544862; 124 // sqrt(2) - https://oeis.org/A002193 125 export def SQRT_2: f64 = 1.41421356237309504880168872420969807856967187537694807317667974; 126 // sqrt(e) - https://oeis.org/A019774 127 export def SQRT_E: f64 = 1.64872127070012814684865078781416357165377610071014801157507931; 128 // sqrt(pi) - https://oeis.org/A002161 129 export def SQRT_PI: f64 = 1.77245385090551602729816748334114518279754945612238712821380779; 130 // sqrt(phi) - https://oeis.org/A139339 131 export def SQRT_PHI: f64 = 1.27201964951406896425242246173749149171560804184009624861664038; 132 // ln(2) - https://oeis.org/A002162 133 export def LN_2: f64 = 0.693147180559945309417232121458176568075500134360255254120680009; 134 // ln(2) - https://oeis.org/A002162 135 export def LN2_HI: f64 = 6.93147180369123816490e-01; 136 // ln(2) - https://oeis.org/A002162 137 export def LN2_LO: f64 = 1.90821492927058770002e-10; 138 // log_{2}(e) 139 export def LOG2_E: f64 = 1f64 / LN_2; 140 // ln(10) - https://oeis.org/A002392 141 export def LN_10: f64 = 2.30258509299404568401799145468436420760110148862877297603332790; 142 // log_{10}(e) 143 export def LOG10_E: f64 = 1f64 / LN_10; 144 145 // __ieee754_log(x) 146 // Return the logarithm of x 147 // 148 // Method : 149 // 1. Argument Reduction: find k and f such that 150 // x = 2**k * (1+f), 151 // where sqrt(2)/2 < 1+f < sqrt(2) . 152 // 153 // 2. Approximation of log(1+f). 154 // Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) 155 // = 2s + 2/3 s**3 + 2/5 s**5 + ....., 156 // = 2s + s*R 157 // We use a special Reme algorithm on [0,0.1716] to generate 158 // a polynomial of degree 14 to approximate R. The maximum error 159 // of this polynomial approximation is bounded by 2**-58.45. In 160 // other words, 161 // 2 4 6 8 10 12 14 162 // R(z) ~ L1*s +L2*s +L3*s +L4*s +L5*s +L6*s +L7*s 163 // (the values of L1 to L7 are listed in the program) and 164 // | 2 14 | -58.45 165 // | L1*s +...+L7*s - R(z) | <= 2 166 // | | 167 // Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. 168 // In order to guarantee error in log below 1ulp, we compute log by 169 // log(1+f) = f - s*(f - R) (if f is not too large) 170 // log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy) 171 // 172 // 3. Finally, log(x) = k*Ln2 + log(1+f). 173 // = k*Ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*Ln2_lo))) 174 // Here Ln2 is split into two floating point number: 175 // Ln2_hi + Ln2_lo, 176 // where n*Ln2_hi is always exact for |n| < 2000. 177 // 178 // Special cases: 179 // log(x) is NaN with signal if x < 0 (including -INF) ; 180 // log(+INF) is +INF; log(0) is -INF with signal; 181 // log(NaN) is that NaN with no signal. 182 // 183 // Accuracy: 184 // according to an error analysis, the error is always less than 185 // 1 ulp (unit in the last place). 186 // 187 // Constants: 188 // The hexadecimal values are the intended ones for the following 189 // constants. The decimal values may be used, provided that the 190 // compiler will convert from decimal to binary accurately enough 191 // to produce the hexadecimal values shown. 192 193 // Returns the natural logarithm of x. 194 export fn logf64(x: f64) f64 = { 195 const L1 = 6.666666666666735130e-01; // 3fe55555 55555593 196 const L2 = 3.999999999940941908e-01; // 3fd99999 9997fa04 197 const L3 = 2.857142874366239149e-01; // 3fd24924 94229359 198 const L4 = 2.222219843214978396e-01; // 3fcc71c5 1d8e78af 199 const L5 = 1.818357216161805012e-01; // 3fc74664 96cb03de 200 const L6 = 1.531383769920937332e-01; // 3fc39a09 d078c69f 201 const L7 = 1.479819860511658591e-01; // 3fc2f112 df3e5244 202 203 // special cases 204 if (isnan(x) || x == INF) { 205 return x; 206 } else if (x < 0f64) { 207 return NAN; 208 } else if (x == 0f64) { 209 return -INF; 210 }; 211 212 // Reduce 213 const f1_and_ki = frexp(x); 214 let f1 = f1_and_ki.0; 215 let ki = f1_and_ki.1; 216 if (f1 < (SQRT_2 / 2f64)) { 217 f1 *= 2f64; 218 ki -= 1i64; 219 }; 220 let f = f1 - 1f64; 221 let k = (ki: f64); 222 223 // Compute 224 const s = f / (2f64 + f); 225 const s2 = s * s; 226 const s4 = s2 * s2; 227 const t1 = s2 * (L1 + s4 * (L3 + s4 * (L5 + s4 * L7))); 228 const t2 = s4 * (L2 + s4 * (L4 + s4 * L6)); 229 const R = t1 + t2; 230 const hfsq = 0.5f64 * f * f; 231 return k * LN2_HI - ((hfsq - (s * (hfsq + R) + k * LN2_LO)) - f); 232 }; 233 234 @test fn logf64() void = { 235 for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) { 236 assert(isclose( 237 logf64(absf64(TEST_INPUTS[idx])), 238 TEST_LOG[idx])); 239 }; 240 assert(logf64(E) == 1f64); 241 assert(logf64(54.598150033144239078110261202860878402790f64) == 4f64); 242 assert(isnan(logf64(-1f64))); 243 assert(logf64(INF) == INF); 244 assert(logf64(0f64) == -INF); 245 assert(isnan(logf64(NAN))); 246 }; 247 248 // Returns the decimal logarithm of x. 249 export fn log10f64(x: f64) f64 = { 250 return logf64(x) * (1f64 / LN_10); 251 }; 252 253 @test fn log10f64() void = { 254 for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) { 255 assert(isclose( 256 log10f64(absf64(TEST_INPUTS[idx])), 257 TEST_LOG10[idx])); 258 }; 259 }; 260 261 // Returns the binary logarithm of x. 262 export fn log2f64(x: f64) f64 = { 263 const frexp_res = frexpf64(x); 264 let frac = frexp_res.0; 265 let exp = frexp_res.1; 266 // Make sure exact powers of two give an exact answer. 267 // Don't depend on log(0.5) * (1 / LN_2) + exp being exactly exp - 1. 268 if (frac == 0.5f64) { 269 return ((exp - 1): f64); 270 }; 271 return logf64(frac) * (1f64 / LN_2) + (exp: f64); 272 }; 273 274 @test fn log2f64() void = { 275 for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) { 276 assert(isclose( 277 log2f64(absf64(TEST_INPUTS[idx])), 278 TEST_LOG2[idx])); 279 }; 280 }; 281 282 // double log1p(double x) 283 // 284 // Method : 285 // 1. Argument Reduction: find k and f such that 286 // 1+x = 2**k * (1+f), 287 // where sqrt(2)/2 < 1+f < sqrt(2) . 288 // 289 // Note. If k=0, then f=x is exact. However, if k!=0, then f 290 // may not be representable exactly. In that case, a correction 291 // term is need. Let u=1+x rounded. Let c = (1+x)-u, then 292 // log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u), 293 // and add back the correction term c/u. 294 // (Note: when x > 2**53, one can simply return log(x)) 295 // 296 // 2. Approximation of log1p(f). 297 // Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) 298 // = 2s + 2/3 s**3 + 2/5 s**5 + ....., 299 // = 2s + s*R 300 // We use a special Reme algorithm on [0,0.1716] to generate 301 // a polynomial of degree 14 to approximate R The maximum error 302 // of this polynomial approximation is bounded by 2**-58.45. In 303 // other words, 304 // 2 4 6 8 10 12 14 305 // R(z) ~ LP1*s +LP2*s +LP3*s +LP4*s +LP5*s +LP6*s +LP7*s 306 // (the values of LP1 to LP7 are listed in the program) 307 // and 308 // | 2 14 | -58.45 309 // | LP1*s +...+LP7*s - R(z) | <= 2 310 // | | 311 // Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. 312 // In order to guarantee error in log below 1ulp, we compute log 313 // by 314 // log1p(f) = f - (hfsq - s*(hfsq+R)). 315 // 316 // 3. Finally, log1p(x) = k*ln2 + log1p(f). 317 // = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) 318 // Here ln2 is split into two floating point number: 319 // ln2_hi + ln2_lo, 320 // where n*ln2_hi is always exact for |n| < 2000. 321 // 322 // Special cases: 323 // log1p(x) is NaN with signal if x < -1 (including -INF) ; 324 // log1p(+INF) is +INF; log1p(-1) is -INF with signal; 325 // log1p(NaN) is that NaN with no signal. 326 // 327 // Accuracy: 328 // according to an error analysis, the error is always less than 329 // 1 ulp (unit in the last place). 330 // 331 // Constants: 332 // The hexadecimal values are the intended ones for the following 333 // constants. The decimal values may be used, provided that the 334 // compiler will convert from decimal to binary accurately enough 335 // to produce the hexadecimal values shown. 336 // 337 // Note: Assuming log() return accurate answer, the following 338 // algorithm can be used to compute log1p(x) to within a few ULP: 339 // 340 // u = 1+x; 341 // if(u==1.0) return x ; else 342 // return log(u)*(x/(u-1.0)); 343 // 344 // See HP-15C Advanced Functions Handbook, p.193. 345 346 // Returns the natural logarithm of 1 plus its argument x. 347 // It is more accurate than log(1 + x) when x is near zero. 348 export fn log1pf64(x: f64) f64 = { 349 // sqrt(2) - 1 350 const SQRT2M1 = 4.142135623730950488017e-01; // 0x3fda827999fcef34 351 // sqrt(2) / 2 - 1 352 const SQRT2HALFM1 = -2.928932188134524755992e-01; // 0xbfd2bec333018866 353 const SMALL = 1f64 / ((1i64 << 29): f64); // 2**-29 354 const TINY = 1f64 / ((1i64 << 54): f64); // 2**-54 355 const TWO53 = ((1i64 << 53): f64); // 2**53 356 const LN2HI = 6.93147180369123816490e-01; // 3fe62e42fee00000 357 const LN2LO = 1.90821492927058770002e-10; // 3dea39ef35793c76 358 const LP1 = 6.666666666666735130e-01; // 3fe5555555555593 359 const LP2 = 3.999999999940941908e-01; // 3fd999999997fa04 360 const LP3 = 2.857142874366239149e-01; // 3fd2492494229359 361 const LP4 = 2.222219843214978396e-01; // 3fcc71c51d8e78af 362 const LP5 = 1.818357216161805012e-01; // 3fc7466496cb03de 363 const LP6 = 1.531383769920937332e-01; // 3fc39a09d078c69f 364 const LP7 = 1.479819860511658591e-01; // 3fc2f112df3e5244 365 366 if (x < -1f64 || isnan(x)) { 367 return NAN; 368 } else if (x == -1f64) { 369 return -INF; 370 } else if (x == INF) { 371 return INF; 372 }; 373 374 const absx = absf64(x); 375 376 let f = 0f64; 377 let iu = 0u64; 378 let k = 1i64; 379 if (absx < SQRT2M1) { // |x| < Sqrt(2)-1 380 if (absx < SMALL) { // |x| < 2**-29 381 if (absx < TINY) { // |x| < 2**-54 382 return x; 383 }; 384 return x - (x * x * 0.5f64); 385 }; 386 if (x > SQRT2HALFM1) { // Sqrt(2)/2-1 < x 387 // (Sqrt(2)/2-1) < x < (Sqrt(2)-1) 388 k = 0; 389 f = x; 390 iu = 1; 391 }; 392 }; 393 let c = 0f64; 394 if (k != 0) { 395 let u = 0f64; 396 if (absx < TWO53) { // 1<<53 397 u = 1.0 + x; 398 iu = f64bits(u); 399 k = (((iu >> 52) - 1023): i64); 400 // Correction term 401 if (k > 0) { 402 c = 1f64 - (u - x); 403 } else { 404 c = x - (u - 1f64); 405 }; 406 c /= u; 407 } else { 408 u = x; 409 iu = f64bits(u); 410 k = (((iu >> 52) - 1023): i64); 411 c = 0f64; 412 }; 413 iu &= 0x000fffffffffffff; 414 if (iu < 0x0006a09e667f3bcd) { // Mantissa of Sqrt(2) 415 // Normalize u 416 u = f64frombits(iu | 0x3ff0000000000000); 417 } else { 418 k += 1; 419 // Normalize u/2 420 u = f64frombits(iu | 0x3fe0000000000000); 421 iu = (0x0010000000000000 - iu) >> 2; 422 }; 423 f = u - 1f64; // Sqrt(2)/2 < u < Sqrt(2) 424 }; 425 const hfsq = 0.5 * f * f; 426 let s = 0f64; 427 let R = 0f64; 428 let z = 0f64; 429 if (iu == 0) { // |f| < 2**-20 430 if (f == 0f64) { 431 if (k == 0) { 432 return 0f64; 433 }; 434 c += (k: f64) * LN2LO; 435 return (k: f64) * LN2HI + c; 436 }; 437 R = hfsq * (1.0 - 0.66666666666666666 * f); // Avoid division 438 if (k == 0) { 439 return f - R; 440 }; 441 return (k: f64) * LN2HI - ((R - ((k: f64) * LN2LO + c)) - f); 442 }; 443 s = f / (2f64 + f); 444 z = s * s; 445 R = z * (LP1 + 446 z * (LP2 + 447 z * (LP3 + 448 z * (LP4 + 449 z * (LP5 + 450 z * (LP6 + 451 z * LP7)))))); 452 if (k == 0) { 453 return f - (hfsq - s * (hfsq + R)); 454 }; 455 return (k: f64) * LN2HI - 456 ((hfsq - (s * (hfsq + R) + ((k: f64) * LN2LO + c))) - f); 457 }; 458 459 @test fn log1p() void = { 460 for (let idx = 0z; idx < 10; idx += 1) { 461 assert(isclose( 462 log1pf64(TEST_INPUTS[idx] / 100f64), 463 TEST_LOG1P[idx])); 464 }; 465 assert(isnan(log1pf64(-INF))); 466 assert(isnan(log1pf64(-PI))); 467 assert(log1pf64(-1f64) == -INF); 468 assert(log1pf64(-0f64) == -0f64); 469 assert(log1pf64(0f64) == 0f64); 470 assert(log1pf64(INF) == INF); 471 assert(isnan(log1pf64(NAN))); 472 }; 473 474 // exp(x) 475 // Returns the exponential of x. 476 // 477 // Method 478 // 1. Argument reduction: 479 // Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. 480 // Given x, find r and integer k such that 481 // 482 // x = k*ln2 + r, |r| <= 0.5*ln2. 483 // 484 // Here r will be represented as r = hi-lo for better 485 // accuracy. 486 // 487 // 2. Approximation of exp(r) by a special rational function on 488 // the interval [0,0.34658]: 489 // Write 490 // R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ... 491 // We use a special Remez algorithm on [0,0.34658] to generate 492 // a polynomial of degree 5 to approximate R. The maximum error 493 // of this polynomial approximation is bounded by 2**-59. In 494 // other words, 495 // R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5 496 // (where z=r*r, and the values of P1 to P5 are listed below) 497 // and 498 // | 5 | -59 499 // | 2.0+P1*z+...+P5*z - R(z) | <= 2 500 // | | 501 // The computation of exp(r) thus becomes 502 // 2*r 503 // exp(r) = 1 + ------- 504 // R - r 505 // r*R1(r) 506 // = 1 + r + ----------- (for better accuracy) 507 // 2 - R1(r) 508 // where 509 // 2 4 10 510 // R1(r) = r - (P1*r + P2*r + ... + P5*r ). 511 // 512 // 3. Scale back to obtain exp(x): 513 // From step 1, we have 514 // exp(x) = 2**k * exp(r) 515 // 516 // Special cases: 517 // exp(INF) is INF, exp(NaN) is NaN; 518 // exp(-INF) is 0, and 519 // for finite argument, only exp(0)=1 is exact. 520 // 521 // Accuracy: 522 // according to an error analysis, the error is always less than 523 // 1 ulp (unit in the last place). 524 // 525 // Misc. info. 526 // For IEEE double 527 // if x > 7.09782712893383973096e+02 then exp(x) overflow 528 // if x < -7.45133219101941108420e+02 then exp(x) underflow 529 // 530 // Constants: 531 // The hexadecimal values are the intended ones for the following 532 // constants. The decimal values may be used, provided that the 533 // compiler will convert from decimal to binary accurately enough 534 // to produce the hexadecimal values shown. 535 536 // Returns e^r * 2^k where r = hi - lo and |r| <= (ln(2) / 2). 537 export fn expmultif64(hi: f64, lo: f64, k: i64) f64 = { 538 const P1 = 1.66666666666666657415e-01; // 0x3fc55555; 0x55555555 539 const P2 = -2.77777777770155933842e-03; // 0xbf66c16c; 0X16bebd9n 540 const P3 = 6.61375632143793436117e-05; // 0x3f11566a; 0Xaf25de2c 541 const P4 = -1.65339022054652515390e-06; // 0xbebbbd41; 0Xc5d26bf1 542 const P5 = 4.13813679705723846039e-08; // 0x3e663769; 0X72bea4d0 543 544 let r = hi - lo; 545 let t = r * r; 546 let c = r - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); 547 const y = 1f64 - ((lo - (r * c) / (2f64 - c)) - hi); 548 return ldexpf64(y, k); 549 }; 550 551 // Returns e^x. 552 export fn expf64(x: f64) f64 = { 553 const overflow = 7.09782712893383973096e+02; 554 const underflow = -7.45133219101941108420e+02; 555 const near_zero = 1f64 / ((1i64 << 28i64): f64); 556 557 if (isnan(x) || x == INF) { 558 return x; 559 } else if (x == -INF) { 560 return 0f64; 561 } else if (x > overflow) { 562 return INF; 563 } else if (x < underflow) { 564 return 0f64; 565 } else if (-near_zero < x && x < near_zero) { 566 return 1f64 + x; 567 }; 568 569 // Reduce; computed as r = hi - lo for extra precision. 570 let k = 0i64; 571 if (x < 0f64) { 572 k = (((LOG2_E * x) - 0.5): i64); 573 } else if (x > 0f64) { 574 k = (((LOG2_E * x) + 0.5): i64); 575 }; 576 const hi = x - ((k: f64) * LN2_HI); 577 const lo = (k: f64) * LN2_LO; 578 579 // Compute 580 return expmultif64(hi, lo, k); 581 }; 582 583 // Returns 2^x. 584 export fn exp2f64(x: f64) f64 = { 585 const overflow = 1.0239999999999999e+03; 586 const underflow = -1.0740e+03; 587 588 if (isnan(x) || x == INF) { 589 return x; 590 } else if (x == -INF) { 591 return 0f64; 592 } else if (x > overflow) { 593 return INF; 594 } else if (x < underflow) { 595 return 0f64; 596 }; 597 598 // Argument reduction; x = r×lg(e) + k with |r| ≤ ln(2)/2. 599 // Computed as r = hi - lo for extra precision. 600 let k = 0i64; 601 if (x > 0f64) { 602 k = ((x + 0.5): i64); 603 } else if (x < 0f64) { 604 k = ((x - 0.5): i64); 605 }; 606 const t = x - (k: f64); 607 const hi = t * LN2_HI; 608 const lo = -t * LN2_LO; 609 610 // Compute 611 return expmultif64(hi, lo, k); 612 }; 613 614 @test fn expf64() void = { 615 for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) { 616 assert(isclose(expf64(TEST_INPUTS[idx]), TEST_EXP[idx])); 617 }; 618 assert(expf64(1f64) == E); 619 assert(isnan(expf64(NAN))); 620 assert(isinf(expf64(INF))); 621 assert(expf64(-INF) == 0f64); 622 assert(isinf(expf64(99999f64))); 623 assert(expf64(-99999f64) == 0f64); 624 assert(expf64(0.5e-20) == 1f64); 625 }; 626 627 @test fn exp2f64() void = { 628 for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) { 629 assert(isclose(exp2f64(TEST_INPUTS[idx]), TEST_EXP2[idx])); 630 }; 631 assert(exp2f64(0f64) == 1f64); 632 assert(exp2f64(3f64) == 8f64); 633 assert(exp2f64(-2f64) == 0.25f64); 634 assert(!isinf(exp2f64(256f64))); 635 assert(isinf(exp2f64(99999f64))); 636 assert(exp2f64(-99999f64) == 0f64); 637 assert(isnan(exp2f64(NAN))); 638 assert(isinf(exp2f64(INF))); 639 assert(exp2f64(-INF) == 0f64); 640 }; 641 642 // __ieee754_sqrt(x) 643 // Return correctly rounded sqrt. 644 // ----------------------------------------- 645 // | Use the hardware sqrt if you have one | 646 // ----------------------------------------- 647 // Method: 648 // Bit by bit method using integer arithmetic. (Slow, but portable) 649 // 1. Normalization 650 // Scale x to y in [1,4) with even powers of 2: 651 // find an integer k such that 1 <= (y=x*2**(2k)) < 4, then 652 // sqrt(x) = 2**k * sqrt(y) 653 // 2. Bit by bit computation 654 // Let q = sqrt(y) truncated to i bit after binary point (q = 1), 655 // i 0 656 // i+1 2 657 // s = 2*q , and y = 2 * ( y - q ). (1) 658 // i i i i 659 // 660 // To compute q from q , one checks whether 661 // i+1 i 662 // 663 // -(i+1) 2 664 // (q + 2 ) <= y. (2) 665 // i 666 // -(i+1) 667 // If (2) is false, then q = q ; otherwise q = q + 2 . 668 // i+1 i i+1 i 669 // 670 // With some algebraic manipulation, it is not difficult to see 671 // that (2) is equivalent to 672 // -(i+1) 673 // s + 2 <= y (3) 674 // i i 675 // 676 // The advantage of (3) is that s and y can be computed by 677 // i i 678 // the following recurrence formula: 679 // if (3) is false 680 // 681 // s = s , y = y ; (4) 682 // i+1 i i+1 i 683 // 684 // otherwise, 685 // -i -(i+1) 686 // s = s + 2 , y = y - s - 2 (5) 687 // i+1 i i+1 i i 688 // 689 // One may easily use induction to prove (4) and (5). 690 // Note. Since the left hand side of (3) contain only i+2 bits, 691 // it is not necessary to do a full (53-bit) comparison 692 // in (3). 693 // 3. Final rounding 694 // After generating the 53 bits result, we compute one more bit. 695 // Together with the remainder, we can decide whether the 696 // result is exact, bigger than 1/2ulp, or less than 1/2ulp 697 // (it will never equal to 1/2ulp). 698 // The rounding mode can be detected by checking whether 699 // huge + tiny is equal to huge, and whether huge - tiny is 700 // equal to huge for some floating point number "huge" and "tiny". 701 702 // Returns the square root of x. 703 export fn sqrtf64(x: f64) f64 = { 704 if (x == 0f64) { 705 return x; 706 } else if (isnan(x) || x == INF) { 707 return x; 708 } else if (x < 0f64) { 709 return NAN; 710 }; 711 712 let bits = f64bits(x); 713 714 // Normalize x 715 let exp = (((bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK): i64); 716 if (exp == 0i64) { 717 // Subnormal x 718 for (bits & (1 << F64_MANTISSA_BITS) == 0) { 719 bits <<= 1; 720 exp -= 1; 721 }; 722 exp += 1; 723 }; 724 // Unbias exponent 725 exp -= (F64_EXPONENT_BIAS: i64); 726 bits = bits & ~(F64_EXPONENT_MASK << F64_MANTISSA_BITS); 727 bits = bits | (1u64 << (F64_MANTISSA_BITS: u64)); 728 // Odd exp, double x to make it even 729 if (exp & 1i64 == 1i64) { 730 bits <<= 1; 731 }; 732 // exp = exp/2, exponent of square root 733 exp >>= 1; 734 // Generate sqrt(x) bit by bit 735 bits <<= 1; 736 // q = sqrt(x) 737 let q = 0u64; 738 let s = 0u64; 739 // r = moving bit from MSB to LSB 740 let r = ((1u64 << (F64_MANTISSA_BITS + 1u64)): u64); 741 for (r != 0) { 742 const t = s + r; 743 if (t <= bits) { 744 s = t + r; 745 bits -= t; 746 q += r; 747 }; 748 bits <<= 1u64; 749 r >>= 1u64; 750 }; 751 // Final rounding 752 if (bits != 0) { 753 // Remainder, result not exact 754 // Round according to extra bit 755 q += q & 1; 756 }; 757 // significand + biased exponent 758 bits = (q >> 1) + ( 759 ((exp - 1i64 + (F64_EXPONENT_BIAS: i64)): u64) << 760 F64_MANTISSA_BITS); 761 return f64frombits(bits); 762 }; 763 764 @test fn sqrt() void = { 765 for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) { 766 assert(isclose( 767 sqrtf64(absf64(TEST_INPUTS[idx])), 768 TEST_SQRT[idx])); 769 }; 770 assert(sqrtf64(2f64) == SQRT_2); 771 assert(sqrtf64(4f64) == 2f64); 772 assert(sqrtf64(16f64) == 4f64); 773 assert(sqrtf64(65536f64) == 256f64); 774 assert(sqrtf64(powf64(123f64, 2f64)) == 123f64); 775 assert(sqrtf64(0f64) == 0f64); 776 assert(isnan(sqrtf64(NAN))); 777 assert(sqrtf64(INF) == INF); 778 assert(isnan(sqrtf64(-2f64))); 779 }; 780 781 fn is_f64_odd_int(x: f64) bool = { 782 const x_int_frac = modfracf64(x); 783 const x_int = x_int_frac.0; 784 const x_frac = x_int_frac.1; 785 const has_no_frac = (x_frac == 0f64); 786 const is_odd = ((x_int & 1i64) == 1i64); 787 return has_no_frac && is_odd; 788 }; 789 790 // Returns x^p. 791 export fn powf64(x: f64, p: f64) f64 = { 792 if (x == 1f64 || p == 0f64) { 793 return 1f64; 794 } else if (p == 1f64) { 795 return x; 796 } else if (isnan(x)) { 797 return NAN; 798 } else if (isnan(p)) { 799 return NAN; 800 } else if (x == 0f64) { 801 if (p < 0f64) { 802 if (is_f64_odd_int(p)) { 803 return copysignf64(INF, x); 804 } else { 805 return INF; 806 }; 807 } else if (p > 0f64) { 808 if (is_f64_odd_int(p)) { 809 return x; 810 } else { 811 return 0f64; 812 }; 813 }; 814 } else if (isinf(p)) { 815 if (x == -1f64) { 816 return 1f64; 817 } else if ((absf64(x) < 1f64) == (p == INF)) { 818 return 0f64; 819 }; 820 return INF; 821 } else if (isinf(x)) { 822 if (x == -INF) { 823 return powf64(-0f64, -p); 824 } else if (p < 0f64) { 825 return 0f64; 826 } else if (p > 0f64) { 827 return INF; 828 }; 829 } else if (p == 0.5f64) { 830 return sqrtf64(x); 831 } else if (p == -0.5f64) { 832 return 1f64 / sqrtf64(x); 833 }; 834 835 const x_parts = frexp(x); 836 let x_mantissa = x_parts.0; 837 let x_exp = x_parts.1; 838 839 const p_int_frac = modfracf64(absf64(p)); 840 let p_int = p_int_frac.0; 841 let p_frac = p_int_frac.1; 842 843 let res_mantissa = 1f64; 844 let res_exp = 0i64; 845 846 // The method used later in this function doesn't apply to fractional 847 // powers, so we have to handle these separately with 848 // x^p = e^{p * ln(x)} 849 if (p_frac != 0f64) { 850 if (p_frac > 0.5f64) { 851 p_frac -= 1f64; 852 p_int += 1i64; 853 }; 854 res_mantissa = expf64(p_frac * logf64(x)); 855 }; 856 857 // Repeatedly square our number x, for each bit in our power p. 858 // If the current bit is 1 in p, add the respective power of x to our 859 // result. 860 for (let i = p_int; i != 0; i >>= 1) { 861 // Check for over/underflow. 862 if (x_exp <= -1i64 << (F64_EXPONENT_BITS: i64)) { 863 return 0f64; 864 }; 865 if (x_exp >= 1i64 << (F64_EXPONENT_BITS: i64)) { 866 return INF; 867 }; 868 // Perform squaring. 869 if (i & 1i64 == 1i64) { 870 res_mantissa *= x_mantissa; 871 res_exp += x_exp; 872 }; 873 x_mantissa *= x_mantissa; 874 x_exp <<= 1; 875 // Correct mantisa to be in [0.5, 1). 876 if (x_mantissa < 0.5f64) { 877 x_mantissa += x_mantissa; 878 x_exp -= 1; 879 }; 880 }; 881 882 if (p < 0f64) { 883 res_mantissa = 1f64 / res_mantissa; 884 res_exp = -res_exp; 885 }; 886 887 return ldexpf64(res_mantissa, res_exp); 888 }; 889 890 @test fn powf64() void = { 891 for (let idx = 0z; idx < len(TEST_INPUTS); idx += 1) { 892 assert(eqwithin( 893 powf64(10f64, TEST_INPUTS[idx]), 894 TEST_POW[idx], 895 1e-8f64)); 896 }; 897 // Positive integer 898 assert(powf64(2f64, 2f64) == 4f64); 899 assert(powf64(3f64, 3f64) == 27f64); 900 // Very large positive integer 901 assert(!isinf(powf64(2f64, 1020f64))); 902 assert(isinf(powf64(2f64, 1050f64))); 903 // Negative integer 904 assert(powf64(2f64, -1f64) == 0.5f64); 905 assert(powf64(2f64, -2f64) == 0.25f64); 906 // Very small negative integer 907 assert(powf64(2f64, -1020f64) > 0f64); 908 assert(powf64(2f64, -1080f64) == 0f64); 909 // Positive fractional powers 910 assert(eqwithin(powf64(2f64, 1.5f64), 911 2.8284271247461900976033774f64, 912 1e-10f64)); 913 assert(eqwithin(powf64(2f64, 5.5f64), 914 45.254833995939041561654039f64, 915 1e-10f64)); 916 // Negative fractional powers 917 assert(eqwithin(powf64(2f64, -1.5f64), 918 0.3535533905932737622004221f64, 919 1e-10f64)); 920 assert(eqwithin(powf64(2f64, -5.5f64), 921 0.0220970869120796101375263f64, 922 1e-10f64)); 923 924 // Special cases 925 // pow(x, ±0) = 1 for any x 926 assert(powf64(123f64, 0f64) == 1f64); 927 // pow(1, y) = 1 for any y 928 assert(powf64(1f64, 123f64) == 1f64); 929 // pow(x, 1) = x for any x 930 assert(powf64(123f64, 1f64) == 123f64); 931 // pow(NaN, y) = NaN 932 assert(isnan(powf64(NAN, 123f64))); 933 // pow(x, NaN) = NaN 934 assert(isnan(powf64(123f64, NAN))); 935 // pow(±0, y) = ±Inf for y an odd integer < 0 936 assert(powf64(0f64, -3f64) == INF); 937 assert(powf64(-0f64, -3f64) == -INF); 938 // pow(±0, -Inf) = +Inf 939 assert(powf64(0f64, -INF) == INF); 940 assert(powf64(-0f64, -INF) == INF); 941 // pow(±0, +Inf) = +0 942 assert(powf64(0f64, INF) == 0f64); 943 assert(powf64(-0f64, INF) == 0f64); 944 // pow(±0, y) = +Inf for finite y < 0 and not an odd integer 945 assert(powf64(0f64, -2f64) == INF); 946 assert(powf64(0f64, -2f64) == INF); 947 //pow(±0, y) = ±0 for y an odd integer > 0 948 assert(powf64(0f64, 123f64) == 0f64); 949 const neg_zero = powf64(-0f64, 123f64); 950 assert(neg_zero == 0f64); 951 assert(isnegative(neg_zero)); 952 // pow(±0, y) = +0 for finite y > 0 and not an odd integer 953 assert(powf64(0f64, 8f64) == 0f64); 954 // pow(-1, ±Inf) = 1 955 assert(powf64(-1f64, INF) == 1f64); 956 assert(powf64(-1f64, -INF) == 1f64); 957 // pow(x, +Inf) = +Inf for |x| > 1 958 assert(powf64(123f64, INF) == INF); 959 // pow(x, -Inf) = +0 for |x| > 1 960 assert(powf64(123f64, -INF) == 0f64); 961 // pow(x, +Inf) = +0 for |x| < 1 962 assert(powf64(0.5f64, INF) == 0f64); 963 assert(powf64(-0.5f64, INF) == 0f64); 964 // pow(x, -Inf) = +Inf for |x| < 1 965 assert(powf64(0.5f64, -INF) == INF); 966 assert(powf64(-0.5f64, -INF) == INF); 967 // pow(+Inf, y) = +Inf for y > 0 968 assert(powf64(INF, 123f64) == INF); 969 // pow(+Inf, y) = +0 for y < 0 970 assert(powf64(INF, -1f64) == 0f64); 971 // pow(-Inf, y) = pow(-0, -y) 972 assert(powf64(-INF, 123f64) == powf64(-0f64, -123f64)); 973 // pow(x, y) = NaN for finite x < 0 and finite non-integer y 974 assert(isnan(powf64(-2f64, 1.23f64))); 975 // sqrt 976 assert(powf64(4f64, 0.5f64) == sqrtf64(4f64)); 977 assert(powf64(4f64, 0.5f64) == 2f64); 978 assert(powf64(4f64, -0.5f64) == (1f64 / sqrtf64(4f64))); 979 assert(powf64(4f64, -0.5f64) == (1f64 / 2f64)); 980 }; 981 982 // Returns the greatest integer value less than or equal to x. 983 export fn floorf64(x: f64) f64 = { 984 if (x == 0f64 || isnan(x) || isinf(x)) { 985 return x; 986 }; 987 if (x < 0f64) { 988 const modfrac_res = modfracf64(-x); 989 let int_part = modfrac_res.0; 990 const frac_part = modfrac_res.1; 991 if (frac_part != 0f64) { 992 int_part += 1; 993 }; 994 return -(int_part: f64); 995 }; 996 const modfrac_res = modfracf64(x); 997 return (modfrac_res.0: f64); 998 }; 999 1000 @test fn floor() void = { 1001 for (let idx = 0z; idx < 10; idx += 1) { 1002 assert(isclose( 1003 floorf64(TEST_INPUTS[idx]), 1004 TEST_FLOOR[idx])); 1005 }; 1006 assert(floorf64(-INF) == -INF); 1007 assert(floorf64(-0f64) == -0f64); 1008 assert(floorf64(0f64) == 0f64); 1009 assert(floorf64(INF) == INF); 1010 assert(isnan(floorf64(NAN))); 1011 }; 1012 1013 // Returns the least integer value greater than or equal to x. 1014 export fn ceilf64(x: f64) f64 = -floorf64(-x); 1015 1016 @test fn ceil() void = { 1017 for (let idx = 0z; idx < 10; idx += 1) { 1018 assert(isclose( 1019 ceilf64(TEST_INPUTS[idx]), 1020 TEST_CEIL[idx])); 1021 }; 1022 assert(ceilf64(-INF) == -INF); 1023 assert(ceilf64(-0f64) == -0f64); 1024 assert(ceilf64(0f64) == 0f64); 1025 assert(ceilf64(INF) == INF); 1026 assert(isnan(ceilf64(NAN))); 1027 }; 1028 1029 // Returns the integer value of x. 1030 export fn truncf64(x: f64) f64 = { 1031 if (x == 0f64 || isnan(x) || isinf(x)) { 1032 return x; 1033 }; 1034 const modfrac_res = modfracf64(x); 1035 return (modfrac_res.0: f64); 1036 }; 1037 1038 @test fn trunc() void = { 1039 for (let idx = 0z; idx < 10; idx += 1) { 1040 assert(isclose( 1041 truncf64(TEST_INPUTS[idx]), 1042 TEST_TRUNC[idx])); 1043 }; 1044 assert(truncf64(-INF) == -INF); 1045 assert(truncf64(-0f64) == -0f64); 1046 assert(truncf64(0f64) == 0f64); 1047 assert(truncf64(INF) == INF); 1048 assert(isnan(truncf64(NAN))); 1049 }; 1050 1051 // Returns the nearest integer, rounding half away from zero. 1052 export fn roundf64(x: f64) f64 = { 1053 let bits = f64bits(x); 1054 let e = (bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK; 1055 if (e < F64_EXPONENT_BIAS) { 1056 // Round abs(x) < 1 including denormals. 1057 bits &= F64_SIGN_MASK; // +-0 1058 if (e == F64_EXPONENT_BIAS - 1) { 1059 bits |= F64_ONE; // +-1 1060 }; 1061 } else if (e < F64_EXPONENT_BIAS + F64_MANTISSA_BITS) { 1062 // Round any abs(x) >= 1 containing a fractional component 1063 // [0,1). 1064 // Numbers with larger exponents are returned unchanged since 1065 // they must be either an integer, infinity, or NaN. 1066 const half = 1 << (F64_MANTISSA_BITS - 1); 1067 e -= F64_EXPONENT_BIAS; 1068 bits += half >> e; 1069 bits = bits & ~(F64_MANTISSA_MASK >> e); 1070 }; 1071 return f64frombits(bits); 1072 }; 1073 1074 @test fn round() void = { 1075 for (let idx = 0z; idx < 10; idx += 1) { 1076 assert(isclose( 1077 roundf64(TEST_INPUTS[idx]), 1078 TEST_ROUND[idx])); 1079 }; 1080 assert(roundf64(-INF) == -INF); 1081 assert(roundf64(-0f64) == -0f64); 1082 assert(roundf64(0f64) == 0f64); 1083 assert(roundf64(INF) == INF); 1084 assert(isnan(roundf64(NAN))); 1085 }; 1086 1087 // Returns the floating-point remainder of x / y. The magnitude of the result 1088 // is less than y and its sign agrees with that of x. 1089 export fn modf64(x: f64, y: f64) f64 = { 1090 if (y == 0f64) { 1091 return NAN; 1092 }; 1093 if (isinf(x) || isnan(x) || isnan(y)) { 1094 return NAN; 1095 }; 1096 1097 y = absf64(y); 1098 1099 const y_frexp = frexpf64(y); 1100 const y_frac = y_frexp.0; 1101 const y_exp = y_frexp.1; 1102 let r = x; 1103 if (x < 0f64) { 1104 r = -x; 1105 }; 1106 1107 for (r >= y) { 1108 const r_frexp = frexpf64(r); 1109 const r_frac = r_frexp.0; 1110 let r_exp = r_frexp.1; 1111 if (r_frac < y_frac) { 1112 r_exp -= 1i64; 1113 }; 1114 r = r - ldexpf64(y, r_exp - y_exp); 1115 }; 1116 if (x < 0f64) { 1117 r = -r; 1118 }; 1119 return r; 1120 }; 1121 1122 @test fn modf64() void = { 1123 for (let idx = 0z; idx < 10; idx += 1) { 1124 assert(isclose( 1125 modf64(10f64, TEST_INPUTS[idx]), 1126 TEST_MODF[idx])); 1127 }; 1128 1129 assert(isnan(modf64(-INF, -INF))); 1130 assert(isnan(modf64(-INF, -PI))); 1131 assert(isnan(modf64(-INF, 0f64))); 1132 assert(isnan(modf64(-INF, PI))); 1133 assert(isnan(modf64(-INF, INF))); 1134 assert(isnan(modf64(-INF, NAN))); 1135 assert(modf64(-PI, -INF) == -PI); 1136 assert(isnan(modf64(-PI, 0f64))); 1137 assert(modf64(-PI, INF) == -PI); 1138 assert(isnan(modf64(-PI, NAN))); 1139 assert(modf64(-0f64, -INF) == -0f64); 1140 assert(isnan(modf64(-0f64, 0f64))); 1141 assert(modf64(-0f64, INF) == -0f64); 1142 assert(isnan(modf64(-0f64, NAN))); 1143 assert(modf64(0f64, -INF) == 0f64); 1144 assert(isnan(modf64(0f64, 0f64))); 1145 assert(modf64(0f64, INF) == 0f64); 1146 assert(isnan(modf64(0f64, NAN))); 1147 assert(modf64(PI, -INF) == PI); 1148 assert(isnan(modf64(PI, 0f64))); 1149 assert(modf64(PI, INF) == PI); 1150 assert(isnan(modf64(PI, NAN))); 1151 assert(isnan(modf64(INF, -INF))); 1152 assert(isnan(modf64(INF, -PI))); 1153 assert(isnan(modf64(INF, 0f64))); 1154 assert(isnan(modf64(INF, PI))); 1155 assert(isnan(modf64(INF, INF))); 1156 assert(isnan(modf64(INF, NAN))); 1157 assert(isnan(modf64(NAN, -INF))); 1158 assert(isnan(modf64(NAN, -PI))); 1159 assert(isnan(modf64(NAN, 0f64))); 1160 assert(isnan(modf64(NAN, PI))); 1161 assert(isnan(modf64(NAN, INF))); 1162 assert(isnan(modf64(NAN, NAN))); 1163 assert(modf64(5.9790119248836734e+200, 1.1258465975523544) == 1164 0.6447968302508578); 1165 };