hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

math.ha (27411B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 // Sections of the code below, in particular log() and exp(), are based on Go's
      5 // implementation, which is, in turn, based on FreeBSD's. The original C code,
      6 // as well as the respective comments and constants are from
      7 // /usr/src/lib/msun/src/{e_log,e_exp}.c.
      8 //
      9 // The FreeBSD copyright notice:
     10 // ====================================================
     11 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
     12 //
     13 // Developed at SunPro, a Sun Microsystems, Inc. business.
     14 // Permission to use, copy, modify, and distribute this
     15 // software is freely granted, provided that this notice
     16 // is preserved.
     17 // ====================================================
     18 //
     19 // The Go copyright notice:
     20 // ====================================================
     21 // Copyright (c) 2009 The Go Authors. All rights reserved.
     22 //
     23 // Redistribution and use in source and binary forms, with or without
     24 // modification, are permitted provided that the following conditions are
     25 // met:
     26 //
     27 //    * Redistributions of source code must retain the above copyright
     28 // notice, this list of conditions and the following disclaimer.
     29 //    * Redistributions in binary form must reproduce the above
     30 // copyright notice, this list of conditions and the following disclaimer
     31 // in the documentation and/or other materials provided with the
     32 // distribution.
     33 //    * Neither the name of Google Inc. nor the names of its
     34 // contributors may be used to endorse or promote products derived from
     35 // this software without specific prior written permission.
     36 //
     37 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     38 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     39 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     40 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     41 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     42 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     43 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     44 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     45 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     46 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     47 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     48 // ====================================================
     49 
     50 use types;
     51 
     52 // The standard tolerance used by isclose(), which is just an arbitrary way to
     53 // measure whether two floating-point numbers are "sufficiently" close to each
     54 // other.
     55 export def STANDARD_TOL: f64 = 1e-14;
     56 
     57 // Returns whether x and y are within tol of each other.
     58 export fn eqwithinf64(x: f64, y: f64, tol: f64) bool = {
     59 	if (isnan(x) || isnan(y) || isnan(tol)) {
     60 		return false;
     61 	};
     62 	return absf64(x - y) < tol;
     63 };
     64 
     65 // Returns whether x and y are within tol of each other.
     66 export fn eqwithinf32(x: f32, y: f32, tol: f32) bool = {
     67 	if (isnan(x) || isnan(y) || isnan(tol)) {
     68 		return false;
     69 	};
     70 	return absf32(x - y) < tol;
     71 };
     72 
     73 // Returns whether x and y are within tol of each other.
     74 export fn eqwithin(
     75 	x: types::floating,
     76 	y: types::floating,
     77 	tol: types::floating,
     78 ) bool = {
     79 	match (x) {
     80 	case let n: f64 =>
     81 		return eqwithinf64(n, y as f64, tol as f64);
     82 	case let n: f32 =>
     83 		return eqwithinf32(n, y as f32, tol as f32);
     84 	};
     85 };
     86 
     87 // Returns whether x and y are within [[STANDARD_TOL]] of each other.
     88 export fn isclosef64(x: f64, y: f64) bool = {
     89 	return eqwithinf64(x, y, STANDARD_TOL);
     90 };
     91 
     92 // Returns whether x and y are within [[STANDARD_TOL]] of each other.
     93 export fn isclosef32(x: f32, y: f32) bool = {
     94 	return eqwithinf32(x, y, STANDARD_TOL: f32);
     95 };
     96 
     97 // Returns whether x and y are within [[STANDARD_TOL]] of each other.
     98 export fn isclose(x: types::floating, y: types::floating) bool = {
     99 	match (x) {
    100 	case let n: f64 =>
    101 		return isclosef64(n, y as f64);
    102 	case let n: f32 =>
    103 		return isclosef32(n, y as f32);
    104 	};
    105 };
    106 
    107 // e - https://oeis.org/A001113
    108 export def E: f64 = 2.71828182845904523536028747135266249775724709369995957496696763;
    109 // pi - https://oeis.org/A000796
    110 export def PI: f64 = 3.14159265358979323846264338327950288419716939937510582097494459;
    111 // tau - https://oeis.org/A019692
    112 export def TAU: f64 = 6.2831853071795864769252867665590057683943387987502116419498892;
    113 // phi - https://oeis.org/A001622
    114 export def PHI: f64 = 1.61803398874989484820458683436563811772030917980576286213544862;
    115 // sqrt(2) - https://oeis.org/A002193
    116 export def SQRT_2: f64 = 1.41421356237309504880168872420969807856967187537694807317667974;
    117 // sqrt(e) - https://oeis.org/A019774
    118 export def SQRT_E: f64 = 1.64872127070012814684865078781416357165377610071014801157507931;
    119 // sqrt(pi) - https://oeis.org/A002161
    120 export def SQRT_PI: f64 = 1.77245385090551602729816748334114518279754945612238712821380779;
    121 // sqrt(phi) - https://oeis.org/A139339
    122 export def SQRT_PHI: f64 = 1.27201964951406896425242246173749149171560804184009624861664038;
    123 // ln(2) - https://oeis.org/A002162
    124 export def LN_2: f64 = 0.693147180559945309417232121458176568075500134360255254120680009;
    125 // ln(2) - https://oeis.org/A002162
    126 export def LN2_HI: f64 = 6.93147180369123816490e-01;
    127 // ln(2) - https://oeis.org/A002162
    128 export def LN2_LO: f64 = 1.90821492927058770002e-10;
    129 // log_{2}(e)
    130 export def LOG2_E: f64 = 1f64 / LN_2;
    131 // ln(10) - https://oeis.org/A002392
    132 export def LN_10: f64 = 2.30258509299404568401799145468436420760110148862877297603332790;
    133 // log_{10}(e)
    134 export def LOG10_E: f64 = 1f64 / LN_10;
    135 
    136 // __ieee754_log(x)
    137 // Return the logarithm of x
    138 //
    139 // Method :
    140 //   1. Argument Reduction: find k and f such that
    141 //			x = 2**k * (1+f),
    142 //	   where  sqrt(2)/2 < 1+f < sqrt(2) .
    143 //
    144 //   2. Approximation of log(1+f).
    145 //	Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
    146 //		 = 2s + 2/3 s**3 + 2/5 s**5 + .....,
    147 //	     	 = 2s + s*R
    148 //      We use a special Reme algorithm on [0,0.1716] to generate
    149 //	a polynomial of degree 14 to approximate R.  The maximum error
    150 //	of this polynomial approximation is bounded by 2**-58.45. In
    151 //	other words,
    152 //		        2      4      6      8      10      12      14
    153 //	    R(z) ~ L1*s +L2*s +L3*s +L4*s +L5*s  +L6*s  +L7*s
    154 //	(the values of L1 to L7 are listed in the program) and
    155 //	    |      2          14          |     -58.45
    156 //	    | L1*s +...+L7*s    -  R(z) | <= 2
    157 //	    |                             |
    158 //	Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
    159 //	In order to guarantee error in log below 1ulp, we compute log by
    160 //		log(1+f) = f - s*(f - R)		(if f is not too large)
    161 //		log(1+f) = f - (hfsq - s*(hfsq+R)).	(better accuracy)
    162 //
    163 //	3. Finally,  log(x) = k*Ln2 + log(1+f).
    164 //			    = k*Ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*Ln2_lo)))
    165 //	   Here Ln2 is split into two floating point number:
    166 //			Ln2_hi + Ln2_lo,
    167 //	   where n*Ln2_hi is always exact for |n| < 2000.
    168 //
    169 // Special cases:
    170 //	log(x) is NaN with signal if x < 0 (including -INF) ;
    171 //	log(+INF) is +INF; log(0) is -INF with signal;
    172 //	log(NaN) is that NaN with no signal.
    173 //
    174 // Accuracy:
    175 //	according to an error analysis, the error is always less than
    176 //	1 ulp (unit in the last place).
    177 //
    178 // Constants:
    179 // The hexadecimal values are the intended ones for the following
    180 // constants. The decimal values may be used, provided that the
    181 // compiler will convert from decimal to binary accurately enough
    182 // to produce the hexadecimal values shown.
    183 
    184 // Returns the natural logarithm of x.
    185 export fn logf64(x: f64) f64 = {
    186 	const L1 = 6.666666666666735130e-01; // 3fe55555 55555593
    187 	const L2 = 3.999999999940941908e-01; // 3fd99999 9997fa04
    188 	const L3 = 2.857142874366239149e-01; // 3fd24924 94229359
    189 	const L4 = 2.222219843214978396e-01; // 3fcc71c5 1d8e78af
    190 	const L5 = 1.818357216161805012e-01; // 3fc74664 96cb03de
    191 	const L6 = 1.531383769920937332e-01; // 3fc39a09 d078c69f
    192 	const L7 = 1.479819860511658591e-01; // 3fc2f112 df3e5244
    193 
    194 	// special cases
    195 	if (isnan(x) || x == INF) {
    196 		return x;
    197 	} else if (x < 0f64) {
    198 		return NAN;
    199 	} else if (x == 0f64) {
    200 		return -INF;
    201 	};
    202 
    203 	// Reduce
    204 	const f1_and_ki = frexp(x);
    205 	let f1 = f1_and_ki.0;
    206 	let ki = f1_and_ki.1;
    207 	if (f1 < (SQRT_2 / 2f64)) {
    208 		f1 *= 2f64;
    209 		ki -= 1i64;
    210 	};
    211 	let f = f1 - 1f64;
    212 	let k = (ki: f64);
    213 
    214 	// Compute
    215 	const s = f / (2f64 + f);
    216 	const s2 = s * s;
    217 	const s4 = s2 * s2;
    218 	const t1 = s2 * (L1 + s4 * (L3 + s4 * (L5 + s4 * L7)));
    219 	const t2 = s4 * (L2 + s4 * (L4 + s4 * L6));
    220 	const R = t1 + t2;
    221 	const hfsq = 0.5f64 * f * f;
    222 	return k * LN2_HI - ((hfsq - (s * (hfsq + R) + k * LN2_LO)) - f);
    223 };
    224 
    225 // Returns the decimal logarithm of x.
    226 export fn log10f64(x: f64) f64 = {
    227 	return logf64(x) * (1f64 / LN_10);
    228 };
    229 
    230 // Returns the binary logarithm of x.
    231 export fn log2f64(x: f64) f64 = {
    232 	const frexp_res = frexpf64(x);
    233 	let frac = frexp_res.0;
    234 	let exp = frexp_res.1;
    235 	// Make sure exact powers of two give an exact answer.
    236 	// Don't depend on log(0.5) * (1 / LN_2) + exp being exactly exp - 1.
    237 	if (frac == 0.5f64) {
    238 		return ((exp - 1): f64);
    239 	};
    240 	return logf64(frac) * (1f64 / LN_2) + (exp: f64);
    241 };
    242 
    243 // double log1p(double x)
    244 //
    245 // Method :
    246 //   1. Argument Reduction: find k and f such that
    247 //                      1+x = 2**k * (1+f),
    248 //         where  sqrt(2)/2 < 1+f < sqrt(2) .
    249 //
    250 //      Note. If k=0, then f=x is exact. However, if k!=0, then f
    251 //      may not be representable exactly. In that case, a correction
    252 //      term is need. Let u=1+x rounded. Let c = (1+x)-u, then
    253 //      log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
    254 //      and add back the correction term c/u.
    255 //      (Note: when x > 2**53, one can simply return log(x))
    256 //
    257 //   2. Approximation of log1p(f).
    258 //      Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
    259 //               = 2s + 2/3 s**3 + 2/5 s**5 + .....,
    260 //               = 2s + s*R
    261 //      We use a special Reme algorithm on [0,0.1716] to generate
    262 //      a polynomial of degree 14 to approximate R The maximum error
    263 //      of this polynomial approximation is bounded by 2**-58.45. In
    264 //      other words,
    265 //                      2      4      6      8      10      12      14
    266 //          R(z) ~ LP1*s +LP2*s +LP3*s +LP4*s +LP5*s  +LP6*s  +LP7*s
    267 //      (the values of LP1 to LP7 are listed in the program)
    268 //      and
    269 //          |      2          14          |     -58.45
    270 //          | LP1*s +...+LP7*s    -  R(z) | <= 2
    271 //          |                             |
    272 //      Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
    273 //      In order to guarantee error in log below 1ulp, we compute log
    274 //      by
    275 //              log1p(f) = f - (hfsq - s*(hfsq+R)).
    276 //
    277 //   3. Finally, log1p(x) = k*ln2 + log1p(f).
    278 //                        = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
    279 //      Here ln2 is split into two floating point number:
    280 //                   ln2_hi + ln2_lo,
    281 //      where n*ln2_hi is always exact for |n| < 2000.
    282 //
    283 // Special cases:
    284 //      log1p(x) is NaN with signal if x < -1 (including -INF) ;
    285 //      log1p(+INF) is +INF; log1p(-1) is -INF with signal;
    286 //      log1p(NaN) is that NaN with no signal.
    287 //
    288 // Accuracy:
    289 //      according to an error analysis, the error is always less than
    290 //      1 ulp (unit in the last place).
    291 //
    292 // Constants:
    293 // The hexadecimal values are the intended ones for the following
    294 // constants. The decimal values may be used, provided that the
    295 // compiler will convert from decimal to binary accurately enough
    296 // to produce the hexadecimal values shown.
    297 //
    298 // Note: Assuming log() return accurate answer, the following
    299 //       algorithm can be used to compute log1p(x) to within a few ULP:
    300 //
    301 //              u = 1+x;
    302 //              if(u==1.0) return x ; else
    303 //                         return log(u)*(x/(u-1.0));
    304 //
    305 //       See HP-15C Advanced Functions Handbook, p.193.
    306 
    307 // Returns the natural logarithm of 1 plus its argument x.
    308 // It is more accurate than log(1 + x) when x is near zero.
    309 export fn log1pf64(x: f64) f64 = {
    310 	// sqrt(2) - 1
    311 	const SQRT2M1 = 4.142135623730950488017e-01; // 0x3fda827999fcef34
    312 	// sqrt(2) / 2 - 1
    313 	const SQRT2HALFM1 = -2.928932188134524755992e-01; // 0xbfd2bec333018866
    314 	const SMALL = 1f64 / ((1i64 << 29): f64); // 2**-29
    315 	const TINY = 1f64 / ((1i64 << 54): f64); // 2**-54
    316 	const TWO53 = ((1i64 << 53): f64); // 2**53
    317 	const LN2HI = 6.93147180369123816490e-01; // 3fe62e42fee00000
    318 	const LN2LO = 1.90821492927058770002e-10; // 3dea39ef35793c76
    319 	const LP1 = 6.666666666666735130e-01; // 3fe5555555555593
    320 	const LP2 = 3.999999999940941908e-01; // 3fd999999997fa04
    321 	const LP3 = 2.857142874366239149e-01; // 3fd2492494229359
    322 	const LP4 = 2.222219843214978396e-01; // 3fcc71c51d8e78af
    323 	const LP5 = 1.818357216161805012e-01; // 3fc7466496cb03de
    324 	const LP6 = 1.531383769920937332e-01; // 3fc39a09d078c69f
    325 	const LP7 = 1.479819860511658591e-01; // 3fc2f112df3e5244
    326 
    327 	if (x < -1f64 || isnan(x)) {
    328 		return NAN;
    329 	} else if (x == -1f64) {
    330 		return -INF;
    331 	} else if (x == INF) {
    332 		return INF;
    333 	};
    334 
    335 	const absx = absf64(x);
    336 
    337 	let f = 0f64;
    338 	let iu = 0u64;
    339 	let k = 1i64;
    340 	if (absx < SQRT2M1) { //  |x| < Sqrt(2)-1
    341 		if (absx < SMALL) { // |x| < 2**-29
    342 			if (absx < TINY) { // |x| < 2**-54
    343 				return x;
    344 			};
    345 			return x - (x * x * 0.5f64);
    346 		};
    347 		if (x > SQRT2HALFM1) { // Sqrt(2)/2-1 < x
    348 			// (Sqrt(2)/2-1) < x < (Sqrt(2)-1)
    349 			k = 0;
    350 			f = x;
    351 			iu = 1;
    352 		};
    353 	};
    354 	let c = 0f64;
    355 	if (k != 0) {
    356 		let u = 0f64;
    357 		if (absx < TWO53) { // 1<<53
    358 			u = 1.0 + x;
    359 			iu = f64bits(u);
    360 			k = (((iu >> 52) - 1023): i64);
    361 			// Correction term
    362 			if (k > 0) {
    363 				c = 1f64 - (u - x);
    364 			} else {
    365 				c = x - (u - 1f64);
    366 			};
    367 			c /= u;
    368 		} else {
    369 			u = x;
    370 			iu = f64bits(u);
    371 			k = (((iu >> 52) - 1023): i64);
    372 			c = 0f64;
    373 		};
    374 		iu &= 0x000fffffffffffff;
    375 		if (iu < 0x0006a09e667f3bcd) { // Mantissa of Sqrt(2)
    376 			// Normalize u
    377 			u = f64frombits(iu | 0x3ff0000000000000);
    378 		} else {
    379 			k += 1;
    380 			// Normalize u/2
    381 			u = f64frombits(iu | 0x3fe0000000000000);
    382 			iu = (0x0010000000000000 - iu) >> 2;
    383 		};
    384 		f = u - 1f64; // Sqrt(2)/2 < u < Sqrt(2)
    385 	};
    386 	const hfsq = 0.5 * f * f;
    387 	let s = 0f64;
    388 	let R = 0f64;
    389 	let z = 0f64;
    390 	if (iu == 0) { // |f| < 2**-20
    391 		if (f == 0f64) {
    392 			if (k == 0) {
    393 				return 0f64;
    394 			};
    395 			c += (k: f64) * LN2LO;
    396 			return (k: f64) * LN2HI + c;
    397 		};
    398 		R = hfsq * (1.0 - 0.66666666666666666 * f); // Avoid division
    399 		if (k == 0) {
    400 			return f - R;
    401 		};
    402 		return (k: f64) * LN2HI - ((R - ((k: f64) * LN2LO + c)) - f);
    403 	};
    404 	s = f / (2f64 + f);
    405 	z = s * s;
    406 	R = z * (LP1 +
    407 		z * (LP2 +
    408 		z * (LP3 +
    409 		z * (LP4 +
    410 		z * (LP5 +
    411 		z * (LP6 +
    412 		z * LP7))))));
    413 	if (k == 0) {
    414 		return f - (hfsq - s * (hfsq + R));
    415 	};
    416 	return (k: f64) * LN2HI -
    417 		((hfsq - (s * (hfsq + R) + ((k: f64) * LN2LO + c))) - f);
    418 };
    419 
    420 // exp(x)
    421 // Returns the exponential of x.
    422 //
    423 // Method
    424 //   1. Argument reduction:
    425 //      Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658.
    426 //      Given x, find r and integer k such that
    427 //
    428 //               x = k*ln2 + r,  |r| <= 0.5*ln2.
    429 //
    430 //      Here r will be represented as r = hi-lo for better
    431 //      accuracy.
    432 //
    433 //   2. Approximation of exp(r) by a special rational function on
    434 //      the interval [0,0.34658]:
    435 //      Write
    436 //          R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
    437 //      We use a special Remez algorithm on [0,0.34658] to generate
    438 //      a polynomial of degree 5 to approximate R. The maximum error
    439 //      of this polynomial approximation is bounded by 2**-59. In
    440 //      other words,
    441 //          R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5
    442 //      (where z=r*r, and the values of P1 to P5 are listed below)
    443 //      and
    444 //          |                  5          |     -59
    445 //          | 2.0+P1*z+...+P5*z   -  R(z) | <= 2
    446 //          |                             |
    447 //      The computation of exp(r) thus becomes
    448 //                             2*r
    449 //              exp(r) = 1 + -------
    450 //                            R - r
    451 //                                 r*R1(r)
    452 //                     = 1 + r + ----------- (for better accuracy)
    453 //                                2 - R1(r)
    454 //      where
    455 //                               2       4             10
    456 //              R1(r) = r - (P1*r  + P2*r  + ... + P5*r   ).
    457 //
    458 //   3. Scale back to obtain exp(x):
    459 //      From step 1, we have
    460 //         exp(x) = 2**k * exp(r)
    461 //
    462 // Special cases:
    463 //      exp(INF) is INF, exp(NaN) is NaN;
    464 //      exp(-INF) is 0, and
    465 //      for finite argument, only exp(0)=1 is exact.
    466 //
    467 // Accuracy:
    468 //      according to an error analysis, the error is always less than
    469 //      1 ulp (unit in the last place).
    470 //
    471 // Misc. info.
    472 //      For IEEE double
    473 //          if x >  7.09782712893383973096e+02 then exp(x) overflow
    474 //          if x < -7.45133219101941108420e+02 then exp(x) underflow
    475 //
    476 // Constants:
    477 // The hexadecimal values are the intended ones for the following
    478 // constants. The decimal values may be used, provided that the
    479 // compiler will convert from decimal to binary accurately enough
    480 // to produce the hexadecimal values shown.
    481 
    482 // Returns e^r * 2^k where r = hi - lo and |r| <= (ln(2) / 2).
    483 export fn expmultif64(hi: f64, lo: f64, k: i64) f64 = {
    484 	const P1 = 1.66666666666666657415e-01; // 0x3fc55555; 0x55555555
    485 	const P2 = -2.77777777770155933842e-03; // 0xbf66c16c; 0X16bebd9n
    486 	const P3 = 6.61375632143793436117e-05; // 0x3f11566a; 0Xaf25de2c
    487 	const P4 = -1.65339022054652515390e-06; // 0xbebbbd41; 0Xc5d26bf1
    488 	const P5 = 4.13813679705723846039e-08; // 0x3e663769; 0X72bea4d0
    489 
    490 	let r = hi - lo;
    491 	let t = r * r;
    492 	let c = r - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5))));
    493 	const y = 1f64 - ((lo - (r * c) / (2f64 - c)) - hi);
    494 	return ldexpf64(y, k);
    495 };
    496 
    497 // Returns e^x.
    498 export fn expf64(x: f64) f64 = {
    499 	const overflow = 7.09782712893383973096e+02;
    500 	const underflow = -7.45133219101941108420e+02;
    501 	const near_zero = 1f64 / ((1i64 << 28i64): f64);
    502 
    503 	if (isnan(x) || x == INF) {
    504 		return x;
    505 	} else if (x == -INF) {
    506 		return 0f64;
    507 	} else if (x > overflow) {
    508 		return INF;
    509 	} else if (x < underflow) {
    510 		return 0f64;
    511 	} else if (-near_zero < x && x < near_zero) {
    512 		return 1f64 + x;
    513 	};
    514 
    515 	// Reduce; computed as r = hi - lo for extra precision.
    516 	let k = 0i64;
    517 	if (x < 0f64) {
    518 		k = (((LOG2_E * x) - 0.5): i64);
    519 	} else if (x > 0f64) {
    520 		k = (((LOG2_E * x) + 0.5): i64);
    521 	};
    522 	const hi = x - ((k: f64) * LN2_HI);
    523 	const lo = (k: f64) * LN2_LO;
    524 
    525 	// Compute
    526 	return expmultif64(hi, lo, k);
    527 };
    528 
    529 // Returns 2^x.
    530 export fn exp2f64(x: f64) f64 = {
    531 	const overflow = 1.0239999999999999e+03;
    532 	const underflow = -1.0740e+03;
    533 
    534 	if (isnan(x) || x == INF) {
    535 		return x;
    536 	} else if (x == -INF) {
    537 		return 0f64;
    538 	} else if (x > overflow) {
    539 		return INF;
    540 	} else if (x < underflow) {
    541 		return 0f64;
    542 	};
    543 
    544 	// Argument reduction; x = r×lg(e) + k with |r| ≤ ln(2)/2.
    545 	// Computed as r = hi - lo for extra precision.
    546 	let k = 0i64;
    547 	if (x > 0f64) {
    548 		k = ((x + 0.5): i64);
    549 	} else if (x < 0f64) {
    550 		k = ((x - 0.5): i64);
    551 	};
    552 	const t = x - (k: f64);
    553 	const hi = t * LN2_HI;
    554 	const lo = -t * LN2_LO;
    555 
    556 	// Compute
    557 	return expmultif64(hi, lo, k);
    558 };
    559 
    560 // __ieee754_sqrt(x)
    561 // Return correctly rounded sqrt.
    562 //           -----------------------------------------
    563 //           | Use the hardware sqrt if you have one |
    564 //           -----------------------------------------
    565 // Method:
    566 //   Bit by bit method using integer arithmetic. (Slow, but portable)
    567 //   1. Normalization
    568 //      Scale x to y in [1,4) with even powers of 2:
    569 //      find an integer k such that  1 <= (y=x*2**(2k)) < 4, then
    570 //              sqrt(x) = 2**k * sqrt(y)
    571 //   2. Bit by bit computation
    572 //      Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
    573 //           i                                                   0
    574 //                                     i+1         2
    575 //          s  = 2*q , and      y  =  2   * ( y - q  ).          (1)
    576 //           i      i            i                 i
    577 //
    578 //      To compute q    from q , one checks whether
    579 //                  i+1       i
    580 //
    581 //                            -(i+1) 2
    582 //                      (q + 2      )  <= y.                     (2)
    583 //                        i
    584 //                                                            -(i+1)
    585 //      If (2) is false, then q   = q ; otherwise q   = q  + 2      .
    586 //                             i+1   i             i+1   i
    587 //
    588 //      With some algebraic manipulation, it is not difficult to see
    589 //      that (2) is equivalent to
    590 //                             -(i+1)
    591 //                      s  +  2       <= y                       (3)
    592 //                       i                i
    593 //
    594 //      The advantage of (3) is that s  and y  can be computed by
    595 //                                    i      i
    596 //      the following recurrence formula:
    597 //          if (3) is false
    598 //
    599 //          s     =  s  ,       y    = y   ;                     (4)
    600 //           i+1      i          i+1    i
    601 //
    602 //      otherwise,
    603 //                         -i                      -(i+1)
    604 //          s     =  s  + 2  ,  y    = y  -  s  - 2              (5)
    605 //           i+1      i          i+1    i     i
    606 //
    607 //      One may easily use induction to prove (4) and (5).
    608 //      Note. Since the left hand side of (3) contain only i+2 bits,
    609 //            it is not necessary to do a full (53-bit) comparison
    610 //            in (3).
    611 //   3. Final rounding
    612 //      After generating the 53 bits result, we compute one more bit.
    613 //      Together with the remainder, we can decide whether the
    614 //      result is exact, bigger than 1/2ulp, or less than 1/2ulp
    615 //      (it will never equal to 1/2ulp).
    616 //      The rounding mode can be detected by checking whether
    617 //      huge + tiny is equal to huge, and whether huge - tiny is
    618 //      equal to huge for some floating point number "huge" and "tiny".
    619 
    620 // Returns the square root of x.
    621 export fn sqrtf64(x: f64) f64 = {
    622 	if (x == 0f64) {
    623 		return x;
    624 	} else if (isnan(x) || x == INF) {
    625 		return x;
    626 	} else if (x < 0f64) {
    627 		return NAN;
    628 	};
    629 
    630 	let bits = f64bits(x);
    631 
    632 	// Normalize x
    633 	let exp = (((bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK): i64);
    634 	if (exp == 0i64) {
    635 		// Subnormal x
    636 		for (bits & (1 << F64_MANTISSA_BITS) == 0) {
    637 			bits <<= 1;
    638 			exp -= 1;
    639 		};
    640 		exp += 1;
    641 	};
    642 	// Unbias exponent
    643 	exp -= (F64_EXPONENT_BIAS: i64);
    644 	bits = bits & ~(F64_EXPONENT_MASK << F64_MANTISSA_BITS);
    645 	bits = bits | (1u64 << (F64_MANTISSA_BITS: u64));
    646 	// Odd exp, double x to make it even
    647 	if (exp & 1i64 == 1i64) {
    648 		bits <<= 1;
    649 	};
    650 	// exp = exp/2, exponent of square root
    651 	exp >>= 1;
    652 	// Generate sqrt(x) bit by bit
    653 	bits <<= 1;
    654 	// q = sqrt(x)
    655 	let q = 0u64;
    656 	let s = 0u64;
    657 	// r = moving bit from MSB to LSB
    658 	let r = ((1u64 << (F64_MANTISSA_BITS + 1u64)): u64);
    659 	for (r != 0) {
    660 		const t = s + r;
    661 		if (t <= bits) {
    662 			s = t + r;
    663 			bits -= t;
    664 			q += r;
    665 		};
    666 		bits <<= 1u64;
    667 		r >>= 1u64;
    668 	};
    669 	// Final rounding
    670 	if (bits != 0) {
    671 		// Remainder, result not exact
    672 		// Round according to extra bit
    673 		q += q & 1;
    674 	};
    675 	// significand + biased exponent
    676 	bits = (q >> 1) + (
    677 		((exp - 1i64 + (F64_EXPONENT_BIAS: i64)): u64) <<
    678 		F64_MANTISSA_BITS);
    679 	return f64frombits(bits);
    680 };
    681 
    682 fn is_f64_odd_int(x: f64) bool = {
    683 	const (x_int, x_frac) = modfracf64(x);
    684 	const has_no_frac = (x_frac == 0f64);
    685 	const is_odd = ((x_int: i64 & 1i64) == 1i64);
    686 	return has_no_frac && is_odd;
    687 };
    688 
    689 // Returns x^p.
    690 export fn powf64(x: f64, p: f64) f64 = {
    691 	if (x == 1f64 || p == 0f64) {
    692 		return 1f64;
    693 	} else if (p == 1f64) {
    694 		return x;
    695 	} else if (isnan(x)) {
    696 		return NAN;
    697 	} else if (isnan(p)) {
    698 		return NAN;
    699 	} else if (x == 0f64) {
    700 		if (p < 0f64) {
    701 			if (is_f64_odd_int(p)) {
    702 				return copysignf64(INF, x);
    703 			} else {
    704 				return INF;
    705 			};
    706 		} else if (p > 0f64) {
    707 			if (is_f64_odd_int(p)) {
    708 				return x;
    709 			} else {
    710 				return 0f64;
    711 			};
    712 		};
    713 	} else if (isinf(p)) {
    714 		if (x == -1f64) {
    715 			return 1f64;
    716 		} else if ((absf64(x) < 1f64) == (p == INF)) {
    717 			return 0f64;
    718 		};
    719 		return INF;
    720 	} else if (isinf(x)) {
    721 		if (x == -INF) {
    722 			return powf64(-0f64, -p);
    723 		} else if (p < 0f64) {
    724 			return 0f64;
    725 		} else if (p > 0f64) {
    726 			return INF;
    727 		};
    728 	} else if (p == 0.5f64) {
    729 		return sqrtf64(x);
    730 	} else if (p == -0.5f64) {
    731 		return 1f64 / sqrtf64(x);
    732 	};
    733 
    734 	let (p_int, p_frac) = modfracf64(absf64(p));
    735 	if (p_frac != 0f64 && x < 0f64) {
    736 		return NAN;
    737 	};
    738 	if (p_int > types::I64_MAX: f64) {
    739 		if (x == -1f64) {
    740 			return 1f64;
    741 		} else if ((absf64(x) < 1f64) == (p > 0f64)) {
    742 			return 0f64;
    743 		} else {
    744 			return INF;
    745 		};
    746 	};
    747 
    748 	let res_mantissa = 1f64;
    749 	let res_exp = 0i64;
    750 
    751 	// The method used later in this function doesn't apply to fractional
    752 	// powers, so we have to handle these separately with
    753 	// x^p = e^{p * ln(x)}
    754 	if (p_frac != 0f64) {
    755 		if (p_frac > 0.5f64) {
    756 			p_frac -= 1f64;
    757 			p_int += 1f64;
    758 		};
    759 		res_mantissa = expf64(p_frac * logf64(x));
    760 	};
    761 
    762 	// Repeatedly square our number x, for each bit in our power p.
    763 	// If the current bit is 1 in p, add the respective power of x to our
    764 	// result.
    765 	let (x_mantissa, x_exp) = frexp(x);
    766 	for (let i = p_int: i64; i != 0; i >>= 1) {
    767 		// Check for over/underflow.
    768 		if (x_exp <= -1i64 << (F64_EXPONENT_BITS: i64)) {
    769 			return 0f64;
    770 		};
    771 		if (x_exp >= 1i64 << (F64_EXPONENT_BITS: i64)) {
    772 			return INF;
    773 		};
    774 		// Perform squaring.
    775 		if (i & 1i64 == 1i64) {
    776 			res_mantissa *= x_mantissa;
    777 			res_exp += x_exp;
    778 		};
    779 		x_mantissa *= x_mantissa;
    780 		x_exp <<= 1;
    781 		// Correct mantisa to be in [0.5, 1).
    782 		if (x_mantissa < 0.5f64) {
    783 			x_mantissa += x_mantissa;
    784 			x_exp -= 1;
    785 		};
    786 	};
    787 
    788 	if (p < 0f64) {
    789 		res_mantissa = 1f64 / res_mantissa;
    790 		res_exp = -res_exp;
    791 	};
    792 
    793 	return ldexpf64(res_mantissa, res_exp);
    794 };
    795 
    796 // Returns the greatest integer value less than or equal to x.
    797 export fn floorf64(x: f64) f64 = {
    798 	if (x == 0f64 || isnan(x) || isinf(x)) {
    799 		return x;
    800 	};
    801 	if (x < 0f64) {
    802 		let (int_part, frac_part) = modfracf64(-x);
    803 		if (frac_part != 0f64) {
    804 			int_part += 1f64;
    805 		};
    806 		return -int_part;
    807 	};
    808 	return modfracf64(x).0;
    809 };
    810 
    811 // Returns the least integer value greater than or equal to x.
    812 export fn ceilf64(x: f64) f64 = -floorf64(-x);
    813 
    814 // Returns the integer value of x.
    815 export fn truncf64(x: f64) f64 = {
    816 	if (x == 0f64 || isnan(x) || isinf(x)) {
    817 		return x;
    818 	};
    819 	return modfracf64(x).0;
    820 };
    821 
    822 // Returns the nearest integer, rounding half away from zero.
    823 export fn roundf64(x: f64) f64 = {
    824 	let bits = f64bits(x);
    825 	let e = (bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK;
    826 	if (e < F64_EXPONENT_BIAS) {
    827 		// Round abs(x) < 1 including denormals.
    828 		bits &= F64_SIGN_MASK; // +-0
    829 		if (e == F64_EXPONENT_BIAS - 1) {
    830 			bits |= F64_ONE; // +-1
    831 		};
    832 	} else if (e < F64_EXPONENT_BIAS + F64_MANTISSA_BITS) {
    833 		// Round any abs(x) >= 1 containing a fractional component
    834 		// [0,1).
    835 		// Numbers with larger exponents are returned unchanged since
    836 		// they must be either an integer, infinity, or NaN.
    837 		const half = 1 << (F64_MANTISSA_BITS - 1);
    838 		e -= F64_EXPONENT_BIAS;
    839 		bits += half >> e;
    840 		bits = bits & ~(F64_MANTISSA_MASK >> e);
    841 	};
    842 	return f64frombits(bits);
    843 };
    844 
    845 // Returns the floating-point remainder of x / y. The magnitude of the result
    846 // is less than y and its sign agrees with that of x.
    847 export fn modf64(x: f64, y: f64) f64 = {
    848 	if (y == 0f64) {
    849 		return NAN;
    850 	};
    851 	if (isinf(x) || isnan(x) || isnan(y)) {
    852 		return NAN;
    853 	};
    854 
    855 	y = absf64(y);
    856 
    857 	const y_frexp = frexpf64(y);
    858 	const y_frac = y_frexp.0;
    859 	const y_exp = y_frexp.1;
    860 	let r = x;
    861 	if (x < 0f64) {
    862 		r = -x;
    863 	};
    864 
    865 	for (r >= y) {
    866 		const r_frexp = frexpf64(r);
    867 		const r_frac = r_frexp.0;
    868 		let r_exp = r_frexp.1;
    869 		if (r_frac < y_frac) {
    870 			r_exp -= 1i64;
    871 		};
    872 		r = r - ldexpf64(y, r_exp - y_exp);
    873 	};
    874 	if (x < 0f64) {
    875 		r = -r;
    876 	};
    877 	return r;
    878 };