math.ha - hare - [hare] The Hare programming language

math.ha (26382B)
      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 // Sections of the code below, in particular log() and exp(), are based on Go's
      5 // implementation, which is, in turn, based on FreeBSD's. The original C code,
      6 // as well as the respective comments and constants are from
      7 // /usr/src/lib/msun/src/{e_log,e_exp}.c.
      8 //
      9 // The FreeBSD copyright notice:
     10 // ====================================================
     11 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
     12 //
     13 // Developed at SunPro, a Sun Microsystems, Inc. business.
     14 // Permission to use, copy, modify, and distribute this
     15 // software is freely granted, provided that this notice
     16 // is preserved.
     17 // ====================================================
     18 //
     19 // The Go copyright notice:
     20 // ====================================================
     21 // Copyright (c) 2009 The Go Authors. All rights reserved.
     22 //
     23 // Redistribution and use in source and binary forms, with or without
     24 // modification, are permitted provided that the following conditions are
     25 // met:
     26 //
     27 //    * Redistributions of source code must retain the above copyright
     28 // notice, this list of conditions and the following disclaimer.
     29 //    * Redistributions in binary form must reproduce the above
     30 // copyright notice, this list of conditions and the following disclaimer
     31 // in the documentation and/or other materials provided with the
     32 // distribution.
     33 //    * Neither the name of Google Inc. nor the names of its
     34 // contributors may be used to endorse or promote products derived from
     35 // this software without specific prior written permission.
     36 //
     37 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     38 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     39 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     40 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     41 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     42 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     43 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     44 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     45 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     46 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     47 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     48 // ====================================================
     49 
     50 use types;
     51 
     52 // The standard tolerance used by [[isclosef32]] and [[isclosef64]], which is
     53 // just an arbitrary way to measure whether two floating-point numbers are
     54 // "sufficiently" close to each other.
     55 export def STANDARD_TOL = 1e-14;
     56 
     57 // Returns whether x and y are within tol of each other.
     58 export fn isclosef64(x: f64, y: f64, tol: f64 = STANDARD_TOL) bool = {
     59 	if (isnan(x) || isnan(y) || isnan(tol)) {
     60 		return false;
     61 	};
     62 	return absf64(x - y) < tol;
     63 };
     64 
     65 // Returns whether x and y are within tol of each other.
     66 export fn isclosef32(x: f32, y: f32, tol: f32 = STANDARD_TOL) bool = {
     67 	if (isnan(x) || isnan(y) || isnan(tol)) {
     68 		return false;
     69 	};
     70 	return absf32(x - y) < tol;
     71 };
     72 
     73 // e - https://oeis.org/A001113
     74 export def E: f64 = 2.71828182845904523536028747135266249775724709369995957496696763;
     75 // pi - https://oeis.org/A000796
     76 export def PI: f64 = 3.14159265358979323846264338327950288419716939937510582097494459;
     77 // tau - https://oeis.org/A019692
     78 export def TAU: f64 = 6.2831853071795864769252867665590057683943387987502116419498892;
     79 // phi - https://oeis.org/A001622
     80 export def PHI: f64 = 1.61803398874989484820458683436563811772030917980576286213544862;
     81 // sqrt(2) - https://oeis.org/A002193
     82 export def SQRT_2: f64 = 1.41421356237309504880168872420969807856967187537694807317667974;
     83 // sqrt(e) - https://oeis.org/A019774
     84 export def SQRT_E: f64 = 1.64872127070012814684865078781416357165377610071014801157507931;
     85 // sqrt(pi) - https://oeis.org/A002161
     86 export def SQRT_PI: f64 = 1.77245385090551602729816748334114518279754945612238712821380779;
     87 // sqrt(phi) - https://oeis.org/A139339
     88 export def SQRT_PHI: f64 = 1.27201964951406896425242246173749149171560804184009624861664038;
     89 // ln(2) - https://oeis.org/A002162
     90 export def LN_2: f64 = 0.693147180559945309417232121458176568075500134360255254120680009;
     91 // ln(2) - https://oeis.org/A002162
     92 export def LN2_HI: f64 = 6.93147180369123816490e-01;
     93 // ln(2) - https://oeis.org/A002162
     94 export def LN2_LO: f64 = 1.90821492927058770002e-10;
     95 // log_{2}(e)
     96 export def LOG2_E: f64 = 1f64 / LN_2;
     97 // ln(10) - https://oeis.org/A002392
     98 export def LN_10: f64 = 2.30258509299404568401799145468436420760110148862877297603332790;
     99 // log_{10}(e)
    100 export def LOG10_E: f64 = 1f64 / LN_10;
    101 
    102 // __ieee754_log(x)
    103 // Return the logarithm of x
    104 //
    105 // Method :
    106 //   1. Argument Reduction: find k and f such that
    107 //			x = 2**k * (1+f),
    108 //	   where  sqrt(2)/2 < 1+f < sqrt(2) .
    109 //
    110 //   2. Approximation of log(1+f).
    111 //	Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
    112 //		 = 2s + 2/3 s**3 + 2/5 s**5 + .....,
    113 //	     	 = 2s + s*R
    114 //      We use a special Reme algorithm on [0,0.1716] to generate
    115 //	a polynomial of degree 14 to approximate R.  The maximum error
    116 //	of this polynomial approximation is bounded by 2**-58.45. In
    117 //	other words,
    118 //		        2      4      6      8      10      12      14
    119 //	    R(z) ~ L1*s +L2*s +L3*s +L4*s +L5*s  +L6*s  +L7*s
    120 //	(the values of L1 to L7 are listed in the program) and
    121 //	    |      2          14          |     -58.45
    122 //	    | L1*s +...+L7*s    -  R(z) | <= 2
    123 //	    |                             |
    124 //	Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
    125 //	In order to guarantee error in log below 1ulp, we compute log by
    126 //		log(1+f) = f - s*(f - R)		(if f is not too large)
    127 //		log(1+f) = f - (hfsq - s*(hfsq+R)).	(better accuracy)
    128 //
    129 //	3. Finally,  log(x) = k*Ln2 + log(1+f).
    130 //			    = k*Ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*Ln2_lo)))
    131 //	   Here Ln2 is split into two floating point number:
    132 //			Ln2_hi + Ln2_lo,
    133 //	   where n*Ln2_hi is always exact for |n| < 2000.
    134 //
    135 // Special cases:
    136 //	log(x) is NaN with signal if x < 0 (including -INF) ;
    137 //	log(+INF) is +INF; log(0) is -INF with signal;
    138 //	log(NaN) is that NaN with no signal.
    139 //
    140 // Accuracy:
    141 //	according to an error analysis, the error is always less than
    142 //	1 ulp (unit in the last place).
    143 //
    144 // Constants:
    145 // The hexadecimal values are the intended ones for the following
    146 // constants. The decimal values may be used, provided that the
    147 // compiler will convert from decimal to binary accurately enough
    148 // to produce the hexadecimal values shown.
    149 
    150 // Returns the natural logarithm of x.
    151 export fn logf64(x: f64) f64 = {
    152 	const L1 = 6.666666666666735130e-01; // 3fe55555 55555593
    153 	const L2 = 3.999999999940941908e-01; // 3fd99999 9997fa04
    154 	const L3 = 2.857142874366239149e-01; // 3fd24924 94229359
    155 	const L4 = 2.222219843214978396e-01; // 3fcc71c5 1d8e78af
    156 	const L5 = 1.818357216161805012e-01; // 3fc74664 96cb03de
    157 	const L6 = 1.531383769920937332e-01; // 3fc39a09 d078c69f
    158 	const L7 = 1.479819860511658591e-01; // 3fc2f112 df3e5244
    159 
    160 	// special cases
    161 	if (isnan(x) || x == INF) {
    162 		return x;
    163 	} else if (x < 0f64) {
    164 		return NAN;
    165 	} else if (x == 0f64) {
    166 		return -INF;
    167 	};
    168 
    169 	// Reduce
    170 	const (f1, ki) = frexpf64(x);
    171 	if (f1 < (SQRT_2 / 2f64)) {
    172 		f1 *= 2f64;
    173 		ki -= 1i64;
    174 	};
    175 	let f = f1 - 1f64;
    176 	let k = (ki: f64);
    177 
    178 	// Compute
    179 	const s = f / (2f64 + f);
    180 	const s2 = s * s;
    181 	const s4 = s2 * s2;
    182 	const t1 = s2 * (L1 + s4 * (L3 + s4 * (L5 + s4 * L7)));
    183 	const t2 = s4 * (L2 + s4 * (L4 + s4 * L6));
    184 	const R = t1 + t2;
    185 	const hfsq = 0.5f64 * f * f;
    186 	return k * LN2_HI - ((hfsq - (s * (hfsq + R) + k * LN2_LO)) - f);
    187 };
    188 
    189 // Returns the decimal logarithm of x.
    190 export fn log10f64(x: f64) f64 = {
    191 	return logf64(x) * (1f64 / LN_10);
    192 };
    193 
    194 // Returns the binary logarithm of x.
    195 export fn log2f64(x: f64) f64 = {
    196 	const (frac, exp) = frexpf64(x);
    197 	// Make sure exact powers of two give an exact answer.
    198 	// Don't depend on log(0.5) * (1 / LN_2) + exp being exactly exp - 1.
    199 	if (frac == 0.5f64) {
    200 		return ((exp - 1): f64);
    201 	};
    202 	return logf64(frac) * (1f64 / LN_2) + (exp: f64);
    203 };
    204 
    205 // double log1p(double x)
    206 //
    207 // Method :
    208 //   1. Argument Reduction: find k and f such that
    209 //                      1+x = 2**k * (1+f),
    210 //         where  sqrt(2)/2 < 1+f < sqrt(2) .
    211 //
    212 //      Note. If k=0, then f=x is exact. However, if k!=0, then f
    213 //      may not be representable exactly. In that case, a correction
    214 //      term is need. Let u=1+x rounded. Let c = (1+x)-u, then
    215 //      log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
    216 //      and add back the correction term c/u.
    217 //      (Note: when x > 2**53, one can simply return log(x))
    218 //
    219 //   2. Approximation of log1p(f).
    220 //      Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
    221 //               = 2s + 2/3 s**3 + 2/5 s**5 + .....,
    222 //               = 2s + s*R
    223 //      We use a special Reme algorithm on [0,0.1716] to generate
    224 //      a polynomial of degree 14 to approximate R The maximum error
    225 //      of this polynomial approximation is bounded by 2**-58.45. In
    226 //      other words,
    227 //                      2      4      6      8      10      12      14
    228 //          R(z) ~ LP1*s +LP2*s +LP3*s +LP4*s +LP5*s  +LP6*s  +LP7*s
    229 //      (the values of LP1 to LP7 are listed in the program)
    230 //      and
    231 //          |      2          14          |     -58.45
    232 //          | LP1*s +...+LP7*s    -  R(z) | <= 2
    233 //          |                             |
    234 //      Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
    235 //      In order to guarantee error in log below 1ulp, we compute log
    236 //      by
    237 //              log1p(f) = f - (hfsq - s*(hfsq+R)).
    238 //
    239 //   3. Finally, log1p(x) = k*ln2 + log1p(f).
    240 //                        = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
    241 //      Here ln2 is split into two floating point number:
    242 //                   ln2_hi + ln2_lo,
    243 //      where n*ln2_hi is always exact for |n| < 2000.
    244 //
    245 // Special cases:
    246 //      log1p(x) is NaN with signal if x < -1 (including -INF) ;
    247 //      log1p(+INF) is +INF; log1p(-1) is -INF with signal;
    248 //      log1p(NaN) is that NaN with no signal.
    249 //
    250 // Accuracy:
    251 //      according to an error analysis, the error is always less than
    252 //      1 ulp (unit in the last place).
    253 //
    254 // Constants:
    255 // The hexadecimal values are the intended ones for the following
    256 // constants. The decimal values may be used, provided that the
    257 // compiler will convert from decimal to binary accurately enough
    258 // to produce the hexadecimal values shown.
    259 //
    260 // Note: Assuming log() return accurate answer, the following
    261 //       algorithm can be used to compute log1p(x) to within a few ULP:
    262 //
    263 //              u = 1+x;
    264 //              if(u==1.0) return x ; else
    265 //                         return log(u)*(x/(u-1.0));
    266 //
    267 //       See HP-15C Advanced Functions Handbook, p.193.
    268 
    269 // Returns the natural logarithm of 1 plus its argument x.
    270 // It is more accurate than log(1 + x) when x is near zero.
    271 export fn log1pf64(x: f64) f64 = {
    272 	// sqrt(2) - 1
    273 	const SQRT2M1 = 4.142135623730950488017e-01; // 0x3fda827999fcef34
    274 	// sqrt(2) / 2 - 1
    275 	const SQRT2HALFM1 = -2.928932188134524755992e-01; // 0xbfd2bec333018866
    276 	const SMALL = 1f64 / ((1i64 << 29): f64); // 2**-29
    277 	const TINY = 1f64 / ((1i64 << 54): f64); // 2**-54
    278 	const TWO53 = ((1i64 << 53): f64); // 2**53
    279 	const LN2HI = 6.93147180369123816490e-01; // 3fe62e42fee00000
    280 	const LN2LO = 1.90821492927058770002e-10; // 3dea39ef35793c76
    281 	const LP1 = 6.666666666666735130e-01; // 3fe5555555555593
    282 	const LP2 = 3.999999999940941908e-01; // 3fd999999997fa04
    283 	const LP3 = 2.857142874366239149e-01; // 3fd2492494229359
    284 	const LP4 = 2.222219843214978396e-01; // 3fcc71c51d8e78af
    285 	const LP5 = 1.818357216161805012e-01; // 3fc7466496cb03de
    286 	const LP6 = 1.531383769920937332e-01; // 3fc39a09d078c69f
    287 	const LP7 = 1.479819860511658591e-01; // 3fc2f112df3e5244
    288 
    289 	if (x < -1f64 || isnan(x)) {
    290 		return NAN;
    291 	} else if (x == -1f64) {
    292 		return -INF;
    293 	} else if (x == INF) {
    294 		return INF;
    295 	};
    296 
    297 	const absx = absf64(x);
    298 
    299 	let f = 0f64;
    300 	let iu = 0u64;
    301 	let k = 1i64;
    302 	if (absx < SQRT2M1) { //  |x| < Sqrt(2)-1
    303 		if (absx < SMALL) { // |x| < 2**-29
    304 			if (absx < TINY) { // |x| < 2**-54
    305 				return x;
    306 			};
    307 			return x - (x * x * 0.5f64);
    308 		};
    309 		if (x > SQRT2HALFM1) { // Sqrt(2)/2-1 < x
    310 			// (Sqrt(2)/2-1) < x < (Sqrt(2)-1)
    311 			k = 0;
    312 			f = x;
    313 			iu = 1;
    314 		};
    315 	};
    316 	let c = 0f64;
    317 	if (k != 0) {
    318 		let u = 0f64;
    319 		if (absx < TWO53) { // 1<<53
    320 			u = 1.0 + x;
    321 			iu = f64bits(u);
    322 			k = (((iu >> 52) - 1023): i64);
    323 			// Correction term
    324 			if (k > 0) {
    325 				c = 1f64 - (u - x);
    326 			} else {
    327 				c = x - (u - 1f64);
    328 			};
    329 			c /= u;
    330 		} else {
    331 			u = x;
    332 			iu = f64bits(u);
    333 			k = (((iu >> 52) - 1023): i64);
    334 			c = 0f64;
    335 		};
    336 		iu &= 0x000fffffffffffff;
    337 		if (iu < 0x0006a09e667f3bcd) { // Mantissa of Sqrt(2)
    338 			// Normalize u
    339 			u = f64frombits(iu | 0x3ff0000000000000);
    340 		} else {
    341 			k += 1;
    342 			// Normalize u/2
    343 			u = f64frombits(iu | 0x3fe0000000000000);
    344 			iu = (0x0010000000000000 - iu) >> 2;
    345 		};
    346 		f = u - 1f64; // Sqrt(2)/2 < u < Sqrt(2)
    347 	};
    348 	const hfsq = 0.5 * f * f;
    349 	let s = 0f64;
    350 	let R = 0f64;
    351 	let z = 0f64;
    352 	if (iu == 0) { // |f| < 2**-20
    353 		if (f == 0f64) {
    354 			if (k == 0) {
    355 				return 0f64;
    356 			};
    357 			c += (k: f64) * LN2LO;
    358 			return (k: f64) * LN2HI + c;
    359 		};
    360 		R = hfsq * (1.0 - 0.66666666666666666 * f); // Avoid division
    361 		if (k == 0) {
    362 			return f - R;
    363 		};
    364 		return (k: f64) * LN2HI - ((R - ((k: f64) * LN2LO + c)) - f);
    365 	};
    366 	s = f / (2f64 + f);
    367 	z = s * s;
    368 	R = z * (LP1 +
    369 		z * (LP2 +
    370 		z * (LP3 +
    371 		z * (LP4 +
    372 		z * (LP5 +
    373 		z * (LP6 +
    374 		z * LP7))))));
    375 	if (k == 0) {
    376 		return f - (hfsq - s * (hfsq + R));
    377 	};
    378 	return (k: f64) * LN2HI -
    379 		((hfsq - (s * (hfsq + R) + ((k: f64) * LN2LO + c))) - f);
    380 };
    381 
    382 // exp(x)
    383 // Returns the exponential of x.
    384 //
    385 // Method
    386 //   1. Argument reduction:
    387 //      Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658.
    388 //      Given x, find r and integer k such that
    389 //
    390 //               x = k*ln2 + r,  |r| <= 0.5*ln2.
    391 //
    392 //      Here r will be represented as r = hi-lo for better
    393 //      accuracy.
    394 //
    395 //   2. Approximation of exp(r) by a special rational function on
    396 //      the interval [0,0.34658]:
    397 //      Write
    398 //          R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
    399 //      We use a special Remez algorithm on [0,0.34658] to generate
    400 //      a polynomial of degree 5 to approximate R. The maximum error
    401 //      of this polynomial approximation is bounded by 2**-59. In
    402 //      other words,
    403 //          R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5
    404 //      (where z=r*r, and the values of P1 to P5 are listed below)
    405 //      and
    406 //          |                  5          |     -59
    407 //          | 2.0+P1*z+...+P5*z   -  R(z) | <= 2
    408 //          |                             |
    409 //      The computation of exp(r) thus becomes
    410 //                             2*r
    411 //              exp(r) = 1 + -------
    412 //                            R - r
    413 //                                 r*R1(r)
    414 //                     = 1 + r + ----------- (for better accuracy)
    415 //                                2 - R1(r)
    416 //      where
    417 //                               2       4             10
    418 //              R1(r) = r - (P1*r  + P2*r  + ... + P5*r   ).
    419 //
    420 //   3. Scale back to obtain exp(x):
    421 //      From step 1, we have
    422 //         exp(x) = 2**k * exp(r)
    423 //
    424 // Special cases:
    425 //      exp(INF) is INF, exp(NaN) is NaN;
    426 //      exp(-INF) is 0, and
    427 //      for finite argument, only exp(0)=1 is exact.
    428 //
    429 // Accuracy:
    430 //      according to an error analysis, the error is always less than
    431 //      1 ulp (unit in the last place).
    432 //
    433 // Misc. info.
    434 //      For IEEE double
    435 //          if x >  7.09782712893383973096e+02 then exp(x) overflow
    436 //          if x < -7.45133219101941108420e+02 then exp(x) underflow
    437 //
    438 // Constants:
    439 // The hexadecimal values are the intended ones for the following
    440 // constants. The decimal values may be used, provided that the
    441 // compiler will convert from decimal to binary accurately enough
    442 // to produce the hexadecimal values shown.
    443 
    444 // Returns e^r * 2^k where r = hi - lo and |r| <= (ln(2) / 2).
    445 export fn expmultif64(hi: f64, lo: f64, k: i64) f64 = {
    446 	const P1 = 1.66666666666666657415e-01; // 0x3fc55555; 0x55555555
    447 	const P2 = -2.77777777770155933842e-03; // 0xbf66c16c; 0X16bebd9n
    448 	const P3 = 6.61375632143793436117e-05; // 0x3f11566a; 0Xaf25de2c
    449 	const P4 = -1.65339022054652515390e-06; // 0xbebbbd41; 0Xc5d26bf1
    450 	const P5 = 4.13813679705723846039e-08; // 0x3e663769; 0X72bea4d0
    451 
    452 	let r = hi - lo;
    453 	let t = r * r;
    454 	let c = r - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5))));
    455 	const y = 1f64 - ((lo - (r * c) / (2f64 - c)) - hi);
    456 	return ldexpf64(y, k);
    457 };
    458 
    459 // Returns e^x.
    460 export fn expf64(x: f64) f64 = {
    461 	const overflow = 7.09782712893383973096e+02;
    462 	const underflow = -7.45133219101941108420e+02;
    463 	const near_zero = 1f64 / ((1i64 << 28i64): f64);
    464 
    465 	if (isnan(x) || x == INF) {
    466 		return x;
    467 	} else if (x == -INF) {
    468 		return 0f64;
    469 	} else if (x > overflow) {
    470 		return INF;
    471 	} else if (x < underflow) {
    472 		return 0f64;
    473 	} else if (-near_zero < x && x < near_zero) {
    474 		return 1f64 + x;
    475 	};
    476 
    477 	// Reduce; computed as r = hi - lo for extra precision.
    478 	let k = 0i64;
    479 	if (x < 0f64) {
    480 		k = (((LOG2_E * x) - 0.5): i64);
    481 	} else if (x > 0f64) {
    482 		k = (((LOG2_E * x) + 0.5): i64);
    483 	};
    484 	const hi = x - ((k: f64) * LN2_HI);
    485 	const lo = (k: f64) * LN2_LO;
    486 
    487 	// Compute
    488 	return expmultif64(hi, lo, k);
    489 };
    490 
    491 // Returns 2^x.
    492 export fn exp2f64(x: f64) f64 = {
    493 	const overflow = 1.0239999999999999e+03;
    494 	const underflow = -1.0740e+03;
    495 
    496 	if (isnan(x) || x == INF) {
    497 		return x;
    498 	} else if (x == -INF) {
    499 		return 0f64;
    500 	} else if (x > overflow) {
    501 		return INF;
    502 	} else if (x < underflow) {
    503 		return 0f64;
    504 	};
    505 
    506 	// Argument reduction; x = r×lg(e) + k with |r| ≤ ln(2)/2.
    507 	// Computed as r = hi - lo for extra precision.
    508 	let k = 0i64;
    509 	if (x > 0f64) {
    510 		k = ((x + 0.5): i64);
    511 	} else if (x < 0f64) {
    512 		k = ((x - 0.5): i64);
    513 	};
    514 	const t = x - (k: f64);
    515 	const hi = t * LN2_HI;
    516 	const lo = -t * LN2_LO;
    517 
    518 	// Compute
    519 	return expmultif64(hi, lo, k);
    520 };
    521 
    522 // __ieee754_sqrt(x)
    523 // Return correctly rounded sqrt.
    524 //           -----------------------------------------
    525 //           | Use the hardware sqrt if you have one |
    526 //           -----------------------------------------
    527 // Method:
    528 //   Bit by bit method using integer arithmetic. (Slow, but portable)
    529 //   1. Normalization
    530 //      Scale x to y in [1,4) with even powers of 2:
    531 //      find an integer k such that  1 <= (y=x*2**(2k)) < 4, then
    532 //              sqrt(x) = 2**k * sqrt(y)
    533 //   2. Bit by bit computation
    534 //      Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
    535 //           i                                                   0
    536 //                                     i+1         2
    537 //          s  = 2*q , and      y  =  2   * ( y - q  ).          (1)
    538 //           i      i            i                 i
    539 //
    540 //      To compute q    from q , one checks whether
    541 //                  i+1       i
    542 //
    543 //                            -(i+1) 2
    544 //                      (q + 2      )  <= y.                     (2)
    545 //                        i
    546 //                                                            -(i+1)
    547 //      If (2) is false, then q   = q ; otherwise q   = q  + 2      .
    548 //                             i+1   i             i+1   i
    549 //
    550 //      With some algebraic manipulation, it is not difficult to see
    551 //      that (2) is equivalent to
    552 //                             -(i+1)
    553 //                      s  +  2       <= y                       (3)
    554 //                       i                i
    555 //
    556 //      The advantage of (3) is that s  and y  can be computed by
    557 //                                    i      i
    558 //      the following recurrence formula:
    559 //          if (3) is false
    560 //
    561 //          s     =  s  ,       y    = y   ;                     (4)
    562 //           i+1      i          i+1    i
    563 //
    564 //      otherwise,
    565 //                         -i                      -(i+1)
    566 //          s     =  s  + 2  ,  y    = y  -  s  - 2              (5)
    567 //           i+1      i          i+1    i     i
    568 //
    569 //      One may easily use induction to prove (4) and (5).
    570 //      Note. Since the left hand side of (3) contain only i+2 bits,
    571 //            it is not necessary to do a full (53-bit) comparison
    572 //            in (3).
    573 //   3. Final rounding
    574 //      After generating the 53 bits result, we compute one more bit.
    575 //      Together with the remainder, we can decide whether the
    576 //      result is exact, bigger than 1/2ulp, or less than 1/2ulp
    577 //      (it will never equal to 1/2ulp).
    578 //      The rounding mode can be detected by checking whether
    579 //      huge + tiny is equal to huge, and whether huge - tiny is
    580 //      equal to huge for some floating point number "huge" and "tiny".
    581 
    582 // Returns the square root of x.
    583 export fn sqrtf64(x: f64) f64 = {
    584 	if (x == 0f64) {
    585 		return x;
    586 	} else if (isnan(x) || x == INF) {
    587 		return x;
    588 	} else if (x < 0f64) {
    589 		return NAN;
    590 	};
    591 
    592 	let bits = f64bits(x);
    593 
    594 	// Normalize x
    595 	let exp = (((bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK): i64);
    596 	if (exp == 0i64) {
    597 		// Subnormal x
    598 		for (bits & (1 << F64_MANTISSA_BITS) == 0) {
    599 			bits <<= 1;
    600 			exp -= 1;
    601 		};
    602 		exp += 1;
    603 	};
    604 	// Unbias exponent
    605 	exp -= (F64_EXPONENT_BIAS: i64);
    606 	bits = bits & ~(F64_EXPONENT_MASK << F64_MANTISSA_BITS);
    607 	bits = bits | (1u64 << (F64_MANTISSA_BITS: u64));
    608 	// Odd exp, double x to make it even
    609 	if (exp & 1i64 == 1i64) {
    610 		bits <<= 1;
    611 	};
    612 	// exp = exp/2, exponent of square root
    613 	exp >>= 1;
    614 	// Generate sqrt(x) bit by bit
    615 	bits <<= 1;
    616 	// q = sqrt(x)
    617 	let q = 0u64;
    618 	let s = 0u64;
    619 	// r = moving bit from MSB to LSB
    620 	let r = ((1u64 << (F64_MANTISSA_BITS + 1u64)): u64);
    621 	for (r != 0) {
    622 		const t = s + r;
    623 		if (t <= bits) {
    624 			s = t + r;
    625 			bits -= t;
    626 			q += r;
    627 		};
    628 		bits <<= 1u64;
    629 		r >>= 1u64;
    630 	};
    631 	// Final rounding
    632 	if (bits != 0) {
    633 		// Remainder, result not exact
    634 		// Round according to extra bit
    635 		q += q & 1;
    636 	};
    637 	// significand + biased exponent
    638 	bits = (q >> 1) + (
    639 		((exp - 1i64 + (F64_EXPONENT_BIAS: i64)): u64) <<
    640 		F64_MANTISSA_BITS);
    641 	return f64frombits(bits);
    642 };
    643 
    644 fn is_f64_odd_int(x: f64) bool = {
    645 	const (x_int, x_frac) = modfracf64(x);
    646 	const has_no_frac = (x_frac == 0f64);
    647 	const is_odd = ((x_int: i64 & 1i64) == 1i64);
    648 	return has_no_frac && is_odd;
    649 };
    650 
    651 // Returns x^p.
    652 export fn powf64(x: f64, p: f64) f64 = {
    653 	if (x == 1f64 || p == 0f64) {
    654 		return 1f64;
    655 	} else if (p == 1f64) {
    656 		return x;
    657 	} else if (isnan(x)) {
    658 		return NAN;
    659 	} else if (isnan(p)) {
    660 		return NAN;
    661 	} else if (x == 0f64) {
    662 		if (p < 0f64) {
    663 			if (is_f64_odd_int(p)) {
    664 				return copysignf64(INF, x);
    665 			} else {
    666 				return INF;
    667 			};
    668 		} else if (p > 0f64) {
    669 			if (is_f64_odd_int(p)) {
    670 				return x;
    671 			} else {
    672 				return 0f64;
    673 			};
    674 		};
    675 	} else if (isinf(p)) {
    676 		if (x == -1f64) {
    677 			return 1f64;
    678 		} else if ((absf64(x) < 1f64) == (p == INF)) {
    679 			return 0f64;
    680 		};
    681 		return INF;
    682 	} else if (isinf(x)) {
    683 		if (x == -INF) {
    684 			return powf64(-0f64, -p);
    685 		} else if (p < 0f64) {
    686 			return 0f64;
    687 		} else if (p > 0f64) {
    688 			return INF;
    689 		};
    690 	} else if (p == 0.5f64) {
    691 		return sqrtf64(x);
    692 	} else if (p == -0.5f64) {
    693 		return 1f64 / sqrtf64(x);
    694 	};
    695 
    696 	let (p_int, p_frac) = modfracf64(absf64(p));
    697 	if (p_frac != 0f64 && x < 0f64) {
    698 		return NAN;
    699 	};
    700 	if (p_int > types::I64_MAX: f64) {
    701 		if (x == -1f64) {
    702 			return 1f64;
    703 		} else if ((absf64(x) < 1f64) == (p > 0f64)) {
    704 			return 0f64;
    705 		} else {
    706 			return INF;
    707 		};
    708 	};
    709 
    710 	let res_mantissa = 1f64;
    711 	let res_exp = 0i64;
    712 
    713 	// The method used later in this function doesn't apply to fractional
    714 	// powers, so we have to handle these separately with
    715 	// x^p = e^{p * ln(x)}
    716 	if (p_frac != 0f64) {
    717 		if (p_frac > 0.5f64) {
    718 			p_frac -= 1f64;
    719 			p_int += 1f64;
    720 		};
    721 		res_mantissa = expf64(p_frac * logf64(x));
    722 	};
    723 
    724 	// Repeatedly square our number x, for each bit in our power p.
    725 	// If the current bit is 1 in p, add the respective power of x to our
    726 	// result.
    727 	let (x_mantissa, x_exp) = frexpf64(x);
    728 	for (let i = p_int: i64; i != 0; i >>= 1) {
    729 		// Check for over/underflow.
    730 		if (x_exp <= -1i64 << (F64_EXPONENT_BITS: i64)) {
    731 			return 0f64;
    732 		};
    733 		if (x_exp >= 1i64 << (F64_EXPONENT_BITS: i64)) {
    734 			return INF;
    735 		};
    736 		// Perform squaring.
    737 		if (i & 1i64 == 1i64) {
    738 			res_mantissa *= x_mantissa;
    739 			res_exp += x_exp;
    740 		};
    741 		x_mantissa *= x_mantissa;
    742 		x_exp <<= 1;
    743 		// Correct mantisa to be in [0.5, 1).
    744 		if (x_mantissa < 0.5f64) {
    745 			x_mantissa += x_mantissa;
    746 			x_exp -= 1;
    747 		};
    748 	};
    749 
    750 	if (p < 0f64) {
    751 		res_mantissa = 1f64 / res_mantissa;
    752 		res_exp = -res_exp;
    753 	};
    754 
    755 	return ldexpf64(res_mantissa, res_exp);
    756 };
    757 
    758 // Returns the greatest integer value less than or equal to x.
    759 export fn floorf64(x: f64) f64 = {
    760 	if (x == 0f64 || isnan(x) || isinf(x)) {
    761 		return x;
    762 	};
    763 	if (x < 0f64) {
    764 		let (int_part, frac_part) = modfracf64(-x);
    765 		if (frac_part != 0f64) {
    766 			int_part += 1f64;
    767 		};
    768 		return -int_part;
    769 	};
    770 	return modfracf64(x).0;
    771 };
    772 
    773 // Returns the least integer value greater than or equal to x.
    774 export fn ceilf64(x: f64) f64 = -floorf64(-x);
    775 
    776 // Returns the integer value of x.
    777 export fn truncf64(x: f64) f64 = {
    778 	if (x == 0f64 || isnan(x) || isinf(x)) {
    779 		return x;
    780 	};
    781 	return modfracf64(x).0;
    782 };
    783 
    784 // Returns the nearest integer, rounding half away from zero.
    785 export fn roundf64(x: f64) f64 = {
    786 	let bits = f64bits(x);
    787 	let e = (bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK;
    788 	if (e < F64_EXPONENT_BIAS) {
    789 		// Round abs(x) < 1 including denormals.
    790 		bits &= F64_SIGN_MASK; // +-0
    791 		if (e == F64_EXPONENT_BIAS - 1) {
    792 			bits |= F64_ONE; // +-1
    793 		};
    794 	} else if (e < F64_EXPONENT_BIAS + F64_MANTISSA_BITS) {
    795 		// Round any abs(x) >= 1 containing a fractional component
    796 		// [0,1).
    797 		// Numbers with larger exponents are returned unchanged since
    798 		// they must be either an integer, infinity, or NaN.
    799 		const half = 1u64 << (F64_MANTISSA_BITS - 1);
    800 		e -= F64_EXPONENT_BIAS;
    801 		bits += half >> e;
    802 		bits = bits & ~(F64_MANTISSA_MASK >> e);
    803 	};
    804 	return f64frombits(bits);
    805 };
    806 
    807 // Returns the floating-point remainder of x / y. The magnitude of the result
    808 // is less than y and its sign agrees with that of x.
    809 export fn modf64(x: f64, y: f64) f64 = {
    810 	if (y == 0f64) {
    811 		return NAN;
    812 	};
    813 	if (isinf(x) || isnan(x) || isnan(y)) {
    814 		return NAN;
    815 	};
    816 
    817 	y = absf64(y);
    818 
    819 	const (y_frac, y_exp) = frexpf64(y);
    820 	let r = x;
    821 	if (x < 0f64) {
    822 		r = -x;
    823 	};
    824 
    825 	for (r >= y) {
    826 		const (r_frac, r_exp) = frexpf64(r);
    827 		if (r_frac < y_frac) {
    828 			r_exp -= 1i64;
    829 		};
    830 		r = r - ldexpf64(y, r_exp - y_exp);
    831 	};
    832 	if (x < 0f64) {
    833 		r = -r;
    834 	};
    835 	return r;
    836 };
	hare [hare] The Hare programming language
	git clone https://git.torresjrjr.com/hare.git
	Log \| Files \| Refs \| README \| LICENSE