hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

uints.ha (17273B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 // Sections of the code below are based on Go's implementation, in particular
      5 // https://raw.githubusercontent.com/golang/go/master/src/math/bits/bits.go.
      6 //
      7 // The Go copyright notice:
      8 // ====================================================
      9 // Copyright (c) 2009 The Go Authors. All rights reserved.
     10 //
     11 // Redistribution and use in source and binary forms, with or without
     12 // modification, are permitted provided that the following conditions are
     13 // met:
     14 //
     15 //    * Redistributions of source code must retain the above copyright
     16 // notice, this list of conditions and the following disclaimer.
     17 //    * Redistributions in binary form must reproduce the above
     18 // copyright notice, this list of conditions and the following disclaimer
     19 // in the documentation and/or other materials provided with the
     20 // distribution.
     21 //    * Neither the name of Google Inc. nor the names of its
     22 // contributors may be used to endorse or promote products derived from
     23 // this software without specific prior written permission.
     24 //
     25 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     26 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     27 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     28 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     29 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     30 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     31 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     32 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     33 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     34 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     35 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     36 // ====================================================
     37 
     38 // The number of bits required to represent the first 256 numbers.
     39 const LEN8TAB: [256]u8 = [
     40 	0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04,
     41 	0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
     42 	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06,
     43 	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
     44 	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
     45 	0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
     46 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
     47 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
     48 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
     49 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
     50 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08,
     51 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
     52 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
     53 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
     54 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
     55 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
     56 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
     57 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
     58 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
     59 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
     60 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
     61 	0x08, 0x08, 0x08, 0x08,
     62 ];
     63 
     64 const NTZ8TAB: [256]u8 = [
     65 	0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00,
     66 	0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
     67 	0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x05, 0x00, 0x01, 0x00,
     68 	0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
     69 	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00,
     70 	0x02, 0x00, 0x01, 0x00, 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
     71 	0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00,
     72 	0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
     73 	0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00,
     74 	0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
     75 	0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x07, 0x00, 0x01, 0x00,
     76 	0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
     77 	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00,
     78 	0x02, 0x00, 0x01, 0x00, 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
     79 	0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00,
     80 	0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
     81 	0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00,
     82 	0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
     83 	0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x05, 0x00, 0x01, 0x00,
     84 	0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
     85 	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00,
     86 	0x02, 0x00, 0x01, 0x00,
     87 ];
     88 
     89 // See https://web.archive.org/web/20240314060050/http://supertech.csail.mit.edu/papers/debruijn.pdf
     90 def DEBRUIJN32: u32 = 0x077CB531;
     91 
     92 const DEBRUIJN32TAB: [32]u8 = [
     93 	0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
     94 	31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
     95 ];
     96 
     97 def DEBRUIJN64: u64 = 0x03f79d71b4ca8b09;
     98 
     99 const DEBRUIJN64TAB: [64]u8 = [
    100 	0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
    101 	62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
    102 	63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
    103 	54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
    104 ];
    105 
    106 // Returns the minimum number of bits required to represent x.
    107 export fn bit_size(x: u64) u8 = {
    108 	let res = 0u8;
    109 	if (x >= 1u64 << 32) {
    110 		x >>= 32;
    111 		res += 32;
    112 	};
    113 	if (x >= 1u64 << 16) {
    114 		x >>= 16;
    115 		res += 16;
    116 	};
    117 	if (x >= 1u64 << 8) {
    118 		x >>= 8;
    119 		res += 8;
    120 	};
    121 	return res + LEN8TAB[x];
    122 };
    123 
    124 @test fn bit_size() void = {
    125 	assert(bit_size(0) == 0);
    126 	assert(bit_size(1) == 1);
    127 	assert(bit_size(2) == 2);
    128 	assert(bit_size(5) == 3);
    129 	assert(bit_size(31111) == 15);
    130 	assert(bit_size(536870911) == 29);
    131 	assert(bit_size(8589934591) == 33);
    132 };
    133 
    134 // Returns the number of leading zero bits in x
    135 // The result is size(uint) * 8 for x == 0.
    136 export fn leading_zeros_u(x: uint) u8 = size(uint): u8 * 8 - bit_size(x);
    137 
    138 // Returns the number of leading zero bits in x
    139 // The result is 8 for x == 0.
    140 export fn leading_zeros_u8(x: u8) u8 = 8 - bit_size(x);
    141 
    142 // Returns the number of leading zero bits in x
    143 // The result is 16 for x == 0.
    144 export fn leading_zeros_u16(x: u16) u8 = 16 - bit_size(x);
    145 
    146 // Returns the number of leading zero bits in x
    147 // The result is 32 for x == 0.
    148 export fn leading_zeros_u32(x: u32) u8 = 32 - bit_size(x);
    149 
    150 // Returns the number of leading zero bits in x
    151 // The result is 64 for x == 0.
    152 export fn leading_zeros_u64(x: u64) u8 = 64 - bit_size(x);
    153 
    154 @test fn leading_zeros_u() void = {
    155 	assert(leading_zeros_u(0) == size(uint) * 8);
    156 	assert(leading_zeros_u(1) == size(uint) * 8 - 1);
    157 	assert(leading_zeros_u8(0) == 8);
    158 	assert(leading_zeros_u8(1) == 8 - 1);
    159 	assert(leading_zeros_u16(0) == 16);
    160 	assert(leading_zeros_u16(1) == 16 - 1);
    161 	assert(leading_zeros_u32(0) == 32);
    162 	assert(leading_zeros_u32(1) == 32 - 1);
    163 	assert(leading_zeros_u64(0) == 64);
    164 	assert(leading_zeros_u64(1) == 64 - 1);
    165 };
    166 
    167 // Returns the number of trailing zero bits in x
    168 // The result is size(uint) * 8 for x == 0.
    169 export fn trailing_zeros_u(x: uint) u8 = {
    170 	if (size(uint) == 4) {
    171 		return trailing_zeros_u32(x: u32);
    172 	};
    173 	return trailing_zeros_u64(x: u64);
    174 };
    175 
    176 // Returns the number of trailing zero bits in x
    177 // The result is 8 for x == 0.
    178 export fn trailing_zeros_u8(x: u8) u8 = NTZ8TAB[x];
    179 
    180 // Returns the number of trailing zero bits in x
    181 // The result is 16 for x == 0.
    182 export fn trailing_zeros_u16(x: u16) u8 = {
    183 	if (x == 0) {
    184 		return 16;
    185 	};
    186 	return DEBRUIJN32TAB[(x & -x): u32 * DEBRUIJN32 >> (32 - 5)];
    187 };
    188 
    189 // Returns the number of trailing zero bits in x
    190 // The result is 32 for x == 0.
    191 export fn trailing_zeros_u32(x: u32) u8 = {
    192 	if (x == 0) {
    193 		return 32;
    194 	};
    195 	return DEBRUIJN32TAB[(x & -x) * DEBRUIJN32 >> (32 - 5)];
    196 };
    197 
    198 // Returns the number of trailing zero bits in x
    199 // The result is 64 for x == 0.
    200 export fn trailing_zeros_u64(x: u64) u8 = {
    201 	if (x == 0) {
    202 		return 64;
    203 	};
    204 	return DEBRUIJN64TAB[(x & -x) * DEBRUIJN64 >> (64 - 6)];
    205 };
    206 
    207 @test fn trailing_zeros_u() void = {
    208 	assert(trailing_zeros_u8(0) == 8);
    209 	for (let x: u8 = 1 << 7, i = 7u8; x > 0; x >>= 1) {
    210 		assert(trailing_zeros_u8(x) == i);
    211 		i -= 1;
    212 	};
    213 
    214 	assert(trailing_zeros_u16(0) == 16);
    215 	for (let x: u16 = 1 << 15, i = 15u8; x > 0; x >>= 1) {
    216 		assert(trailing_zeros_u16(x) == i);
    217 		i -= 1;
    218 	};
    219 
    220 	assert(trailing_zeros_u32(0) == 32);
    221 	for (let x: u32 = 1 << 31, i = 31u8; x > 0; x >>= 1) {
    222 		assert(trailing_zeros_u32(x) == i);
    223 		i -= 1;
    224 	};
    225 
    226 	assert(trailing_zeros_u64(0) == 64);
    227 	for (let x: u64 = 1 << 63, i = 63u8; x > 0; x >>= 1) {
    228 		assert(trailing_zeros_u64(x) == i);
    229 		i -= 1;
    230 	};
    231 
    232 	assert(trailing_zeros_u(0) == size(uint) * 8);
    233 	assert(trailing_zeros_u(1) == 0);
    234 };
    235 
    236 // Returns the number of bits set (the population count) of x.
    237 export fn popcount(x: u64) u8 = {
    238 	let i = 0u8;
    239 	for (x != 0; x >>= 1) {
    240 		if (x & 1 == 1) {
    241 			i += 1;
    242 		};
    243 	};
    244 	return i;
    245 };
    246 
    247 @test fn popcount() void = {
    248 	assert(popcount(0) == 0);
    249 	assert(popcount(0b11010110) == 5);
    250 	assert(popcount(~0) == 64);
    251 };
    252 
    253 // Returns the 64-bit product of x and y: (hi, lo) = x * y
    254 // with the product bits' upper half returned in hi and the lower
    255 // half returned in lo.
    256 export fn mulu32(x: u32, y: u32) (u32, u32) = {
    257 	const product = (x: u64) * (y: u64);
    258 	const hi = ((product >> 32): u32);
    259 	const lo = (product: u32);
    260 	return (hi, lo);
    261 };
    262 
    263 // Returns the 128-bit product of x and y: (hi, lo) = x * y
    264 // with the product bits' upper half returned in hi and the lower
    265 // half returned in lo.
    266 export fn mulu64(x: u64, y: u64) (u64, u64) = {
    267 	const mask32 = (1u64 << 32) - 1;
    268 	const x0 = x & mask32;
    269 	const x1 = x >> 32;
    270 	const y0 = y & mask32;
    271 	const y1 = y >> 32;
    272 	const w0 = x0 * y0;
    273 	const t = (x1 * y0) + (w0 >> 32);
    274 	let w1 = t & mask32;
    275 	const w2 = t >> 32;
    276 	w1 += x0 * y1;
    277 	const hi = (x1 * y1) + w2 + (w1 >> 32);
    278 	const lo = x * y;
    279 	return (hi, lo);
    280 };
    281 
    282 // Returns the product of x and y: (hi, lo) = x * y
    283 // with the product bits' upper half returned in hi and the lower
    284 // half returned in lo.
    285 export fn mulu(x: uint, y: uint) (uint, uint) = {
    286 	if (size(uint) == 4) {
    287 		const res = mulu32((x: u32), (y: u32));
    288 		return ((res.0: uint), (res.1: uint));
    289 	};
    290 	const res = mulu64((x: u64), (y: u64));
    291 	return ((res.0: uint), (res.1: uint));
    292 };
    293 
    294 @test fn mulu() void = {
    295 	// 32
    296 	let res = mulu32(2u32, 3u32);
    297 	assert(res.0 == 0u32);
    298 	assert(res.1 == 6u32);
    299 	let res = mulu32(~0u32, 2u32);
    300 	assert(res.0 == 1u32);
    301 	assert(res.1 == ~0u32 - 1);
    302 
    303 	// 64
    304 	let res = mulu64(2u64, 3u64);
    305 	assert(res.0 == 0u64);
    306 	assert(res.1 == 6u64);
    307 	let res = mulu64(~0u64, 2u64);
    308 	assert(res.0 == 1u64);
    309 	assert(res.1 == ~0u64 - 1);
    310 
    311 	// mulu()
    312 	let res = mulu(2u, 3u);
    313 	assert(res.0 == 0u);
    314 	assert(res.1 == 6u);
    315 	let res = mulu(~0u, 2u);
    316 	assert(res.0 == 1u);
    317 	assert(res.1 == ~0u - 1);
    318 };
    319 
    320 // Returns the quotient and remainder of (hi, lo) divided by y:
    321 // quo = (hi, lo) / y, rem = (hi, lo) % y with the dividend bits' upper
    322 // half in parameter hi and the lower half in parameter lo.
    323 // Aborts if y == 0 (division by zero) or y <= hi (quotient overflow).
    324 export fn divu32(hi: u32, lo: u32, y: u32) (u32, u32) = {
    325 	assert(y != 0, "division by zero");
    326 	assert(y > hi, "quotient overflow");
    327 	const z = (hi: u64) << 32 | (lo: u64);
    328 	const quo = ((z / (y: u64)): u32);
    329 	const rem = ((z % (y: u64)): u32);
    330 	return (quo, rem);
    331 };
    332 
    333 // Returns the quotient and remainder of (hi, lo) divided by y:
    334 // quo = (hi, lo) / y, rem = (hi, lo) % y with the dividend bits' upper
    335 // half in parameter hi and the lower half in parameter lo.
    336 // Aborts if y == 0 (division by zero) or y <= hi (quotient overflow).
    337 export fn divu64(hi: u64, lo: u64, y: u64) (u64, u64) = {
    338 	const two32 = 1u64 << 32;
    339 	const mask32 = two32 - 1;
    340 	assert(y != 0, "division by zero");
    341 	assert(y > hi, "quotient overflow");
    342 
    343 	const s = leading_zeros_u64(y);
    344 	y <<= s;
    345 
    346 	const yn1 = y >> 32;
    347 	const yn0 = y & mask32;
    348 	const un32 = (hi << s) | (lo >> (64 - s));
    349 	const un10 = lo << s;
    350 	const un1 = un10 >> 32;
    351 	const un0 = un10 & mask32;
    352 	let q1 = un32 / yn1;
    353 	let rhat = un32 - (q1 * yn1);
    354 
    355 	for (q1 >= two32 || (q1 * yn0) > ((two32 * rhat) + un1)) {
    356 		q1 -= 1;
    357 		rhat += yn1;
    358 		if (rhat >= two32) {
    359 			break;
    360 		};
    361 	};
    362 
    363 	const un21 = (un32 * two32) + un1 - (q1 * y);
    364 	let q0 = un21 / yn1;
    365 	rhat = un21 - (q0 * yn1);
    366 
    367 	for (q0 >= two32 || (q0 * yn0) > ((two32 * rhat) + un0)) {
    368 		q0 -= 1;
    369 		rhat += yn1;
    370 		if (rhat >= two32) {
    371 			break;
    372 		};
    373 	};
    374 
    375 	const quo = (q1 * two32) + q0;
    376 	const rem = ((un21 * two32) + un0 - (q0 * y)) >> s;
    377 	return (quo, rem);
    378 };
    379 
    380 // Returns the quotient and remainder of (hi, lo) divided by y:
    381 // quo = (hi, lo) / y, rem = (hi, lo) % y with the dividend bits' upper
    382 // half in parameter hi and the lower half in parameter lo.
    383 // Aborts if y == 0 (division by zero) or y <= hi (quotient overflow).
    384 export fn divu(hi: uint, lo: uint, y: uint) (uint, uint) = {
    385 	if (size(uint) == 4) {
    386 		const res = divu32((hi: u32), (lo: u32), (y: u32));
    387 		return ((res.0: uint), (res.1: uint));
    388 	};
    389 	const res = divu64((hi: u64), (lo: u64), (y: u64));
    390 	return ((res.0: uint), (res.1: uint));
    391 };
    392 
    393 @test fn divu() void = {
    394 	// 32
    395 	let res = divu32(0u32, 4u32, 2u32);
    396 	assert(res.0 == 2u32);
    397 	assert(res.1 == 0u32);
    398 	let res = divu32(0u32, 5u32, 2u32);
    399 	assert(res.0 == 2u32);
    400 	assert(res.1 == 1u32);
    401 	let res = divu32(1u32, 0u32, 2u32);
    402 	assert(res.0 == (1u32 << 31));
    403 	assert(res.1 == 0u32);
    404 	// These should abort.
    405 	// let res = divu32(1u32, 1u32, 0u32);
    406 	// let res = divu32(1u32, 0u32, 1u32);
    407 
    408 	// 64
    409 	let res = divu64(0u64, 4u64, 2u64);
    410 	assert(res.0 == 2u64);
    411 	assert(res.1 == 0u64);
    412 	let res = divu64(0u64, 5u64, 2u64);
    413 	assert(res.0 == 2u64);
    414 	assert(res.1 == 1u64);
    415 	let res = divu64(1u64, 0u64, 2u64);
    416 	assert(res.0 == (1u64 << 63));
    417 	assert(res.1 == 0u64);
    418 	// These should abort.
    419 	// let res = divu64(1u64, 1u64, 0u64);
    420 	// let res = divu64(1u64, 0u64, 1u64);
    421 
    422 	// divu()
    423 	let res = divu(0u, 4u, 2u);
    424 	assert(res.0 == 2u);
    425 	assert(res.1 == 0u);
    426 	let res = divu(0u, 5u, 2u);
    427 	assert(res.0 == 2u);
    428 	assert(res.1 == 1u);
    429 	let res = divu(1u, 0u, 2u);
    430 	assert(res.0 == (1u << 31));
    431 	assert(res.1 == 0u);
    432 	// These should abort.
    433 	// divu(1u, 1u, 0u);
    434 	// divu(1u, 0u, 1u);
    435 };
    436 
    437 // Returns the remainder of (hi, lo) divided by y.
    438 // Aborts if y == 0 (division by zero) but, unlike [[divu32]], it doesn't abort
    439 // on a quotient overflow.
    440 export fn remu32(hi: u32, lo: u32, y: u32) u32 = {
    441 	assert(y != 0, "division by zero");
    442 	const res = ((hi: u64) << 32 | (lo: u64)) % (y: u64);
    443 	return (res: u32);
    444 };
    445 
    446 // Returns the remainder of (hi, lo) divided by y.
    447 // Aborts if y == 0 (division by zero) but, unlike [[divu64]], it doesn't abort
    448 // on a quotient overflow.
    449 export fn remu64(hi: u64, lo: u64, y: u64) u64 = {
    450 	assert(y != 0, "division by zero");
    451 	// We scale down hi so that hi < y, then use divu() to compute the
    452 	// rem with the guarantee that it won't abort on quotient overflow.
    453 	// Given that
    454 	//   hi ≡ hi%y    (mod y)
    455 	// we have
    456 	//   hi<<64 + lo ≡ (hi%y)<<64 + lo    (mod y)
    457 	const res = divu64(hi % y, lo, y);
    458 	return res.1;
    459 };
    460 
    461 // Returns the remainder of (hi, lo) divided by y.
    462 // Aborts if y == 0 (division by zero) but, unlike [[divu]], it doesn't abort on
    463 // a quotient overflow.
    464 export fn remu(hi: uint, lo: uint, y: uint) uint = {
    465 	if (size(uint) == 4) {
    466 		return (remu32((hi: u32), (lo: u32), (y: u32)): uint);
    467 	};
    468 	return (remu64((hi: u64), (lo: u64), (y: u64)): uint);
    469 };
    470 
    471 @test fn remu() void = {
    472 	// 32
    473 	assert(remu32(0u32, 4u32, 2u32) == 0u32);
    474 	assert(remu32(0u32, 5u32, 2u32) == 1u32);
    475 	assert(remu32(0u32, 5u32, 3u32) == 2u32);
    476 	assert(remu32(1u32, 1u32, 2u32) == 1u32);
    477 	// These should abort.
    478 	// remu32(0u32, 4u32, 0u32);
    479 
    480 	// 64
    481 	assert(remu64(0u64, 4u64, 2u64) == 0u64);
    482 	assert(remu64(0u64, 5u64, 2u64) == 1u64);
    483 	assert(remu64(0u64, 5u64, 3u64) == 2u64);
    484 	assert(remu64(1u32, 1u32, 2u32) == 1u32);
    485 	// These should abort.
    486 	// remu64(0u64, 4u64, 0u64);
    487 
    488 	// remu()
    489 	assert(remu(0u, 4u, 2u) == 0u);
    490 	assert(remu(0u, 5u, 2u) == 1u);
    491 	assert(remu(0u, 5u, 3u) == 2u);
    492 	assert(remu(1u, 1u, 2u) == 1u);
    493 	// These should abort.
    494 	// remu(0u, 4u, 0u);
    495 };
    496 
    497 // Returns the greatest common divisor of a and b.
    498 export fn gcd(a: u64, b: u64) u64 = {
    499 	if (a == b) {
    500 		return a;
    501 	};
    502 	if (a == 0) {
    503 		return b;
    504 	};
    505 	if (b == 0) {
    506 		return a;
    507 	};
    508 
    509 	const i = trailing_zeros_u64(a);
    510 	const j = trailing_zeros_u64(b);
    511 	a >>= i;
    512 	b >>= j;
    513 	const k = if (i < j) i else j;
    514 
    515 	for (true) {
    516 		if (a > b) {
    517 			const t = a;
    518 			a = b;
    519 			b = t;
    520 		};
    521 
    522 		b -= a;
    523 		if (b == 0) {
    524 			return a << k;
    525 		};
    526 		b >>= trailing_zeros_u64(b);
    527 	};
    528 };
    529 
    530 @test fn gcd() void = {
    531 	assert(gcd(2 * 3 * 5, 3 * 7) == 3);
    532 	assert(gcd(2, 7) == 1);
    533 	assert(gcd(2, 0) == 2);
    534 	assert(gcd(0, 2) == 2);
    535 	// gcd(123 * 2^55 * 3, 123 * 7)
    536 	assert(gcd((123 << 55) * 3, 123 * 7) == 123);
    537 };