hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

uints.ha (22213B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 // Sections of the code below are based on Go's implementation, in particular
      5 // https://raw.githubusercontent.com/golang/go/master/src/math/bits/bits.go.
      6 //
      7 // The Go copyright notice:
      8 // ====================================================
      9 // Copyright (c) 2009 The Go Authors. All rights reserved.
     10 //
     11 // Redistribution and use in source and binary forms, with or without
     12 // modification, are permitted provided that the following conditions are
     13 // met:
     14 //
     15 //    * Redistributions of source code must retain the above copyright
     16 // notice, this list of conditions and the following disclaimer.
     17 //    * Redistributions in binary form must reproduce the above
     18 // copyright notice, this list of conditions and the following disclaimer
     19 // in the documentation and/or other materials provided with the
     20 // distribution.
     21 //    * Neither the name of Google Inc. nor the names of its
     22 // contributors may be used to endorse or promote products derived from
     23 // this software without specific prior written permission.
     24 //
     25 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     26 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     27 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     28 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     29 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     30 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     31 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     32 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     33 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     34 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     35 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     36 // ====================================================
     37 
     38 // The number of bits required to represent the first 256 numbers.
     39 const LEN8TAB: [256]u8 = [
     40 	0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04,
     41 	0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
     42 	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06,
     43 	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
     44 	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
     45 	0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
     46 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
     47 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
     48 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
     49 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
     50 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08,
     51 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
     52 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
     53 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
     54 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
     55 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
     56 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
     57 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
     58 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
     59 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
     60 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
     61 	0x08, 0x08, 0x08, 0x08,
     62 ];
     63 
     64 const NTZ8TAB: [256]u8 = [
     65 	0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00,
     66 	0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
     67 	0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x05, 0x00, 0x01, 0x00,
     68 	0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
     69 	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00,
     70 	0x02, 0x00, 0x01, 0x00, 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
     71 	0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00,
     72 	0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
     73 	0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00,
     74 	0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
     75 	0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x07, 0x00, 0x01, 0x00,
     76 	0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
     77 	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00,
     78 	0x02, 0x00, 0x01, 0x00, 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
     79 	0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00,
     80 	0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
     81 	0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00,
     82 	0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
     83 	0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x05, 0x00, 0x01, 0x00,
     84 	0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
     85 	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00,
     86 	0x02, 0x00, 0x01, 0x00,
     87 ];
     88 
     89 // See http://supertech.csail.mit.edu/papers/debruijn.pdf
     90 def DEBRUIJN32: u32 = 0x077CB531;
     91 
     92 const DEBRUIJN32TAB: [32]u8 = [
     93 	0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
     94 	31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
     95 ];
     96 
     97 def DEBRUIJN64: u64 = 0x03f79d71b4ca8b09;
     98 
     99 const DEBRUIJN64TAB: [64]u8 = [
    100 	0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
    101 	62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
    102 	63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
    103 	54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
    104 ];
    105 
    106 // The size of an unsigned int: 32 or 64
    107 def UINT_SIZE: u8 = (size(uint): u8) * 8;
    108 
    109 // Returns the minimum number of bits required to represent x.
    110 export fn bit_size_u8(x: u8) u8 = {
    111 	return LEN8TAB[x];
    112 };
    113 
    114 // Returns the minimum number of bits required to represent x.
    115 export fn bit_size_u16(x: u16) u8 = {
    116 	let res = 0u8;
    117 	if (x >= 1u16 << 8) {
    118 		x >>= 8;
    119 		res += 8;
    120 	};
    121 	return res + LEN8TAB[x];
    122 };
    123 
    124 // Returns the minimum number of bits required to represent x.
    125 export fn bit_size_u32(x: u32) u8 = {
    126 	let res = 0u8;
    127 	if (x >= 1u32 << 16) {
    128 		x >>= 16;
    129 		res += 16;
    130 	};
    131 	if (x >= 1u32 << 8) {
    132 		x >>= 8;
    133 		res += 8;
    134 	};
    135 	return res + LEN8TAB[x];
    136 };
    137 
    138 // Returns the minimum number of bits required to represent x.
    139 export fn bit_size_u64(x: u64) u8 = {
    140 	let res = 0u8;
    141 	if (x >= 1u64 << 32) {
    142 		x >>= 32;
    143 		res += 32;
    144 	};
    145 	if (x >= 1u64 << 16) {
    146 		x >>= 16;
    147 		res += 16;
    148 	};
    149 	if (x >= 1u64 << 8) {
    150 		x >>= 8;
    151 		res += 8;
    152 	};
    153 	return res + LEN8TAB[x];
    154 };
    155 
    156 // Returns the minimum number of bits required to represent x.
    157 export fn bit_size_u(x: uint) u8 = {
    158 	if (UINT_SIZE == 32) {
    159 		return bit_size_u32(x: u32);
    160 	};
    161 	return bit_size_u64(x: u64);
    162 };
    163 
    164 @test fn bit_size_u() void = {
    165 	assert(bit_size_u8(0) == 0);
    166 	assert(bit_size_u8(2) == 2);
    167 	assert(bit_size_u8(5) == 3);
    168 	assert(bit_size_u16(5) == 3);
    169 	assert(bit_size_u32(5) == 3);
    170 	assert(bit_size_u64(5) == 3);
    171 	assert(bit_size_u16(31111) == 15);
    172 	assert(bit_size_u32(536870911) == 29);
    173 	assert(bit_size_u64(8589934591) == 33);
    174 	assert(bit_size_u(0) == 0);
    175 	assert(bit_size_u(1) == 1);
    176 };
    177 
    178 
    179 // Returns the number of leading zero bits in x
    180 // The result is UINT_SIZE for x == 0.
    181 export fn leading_zeros_u(x: uint) u8 = UINT_SIZE - bit_size_u(x);
    182 
    183 // Returns the number of leading zero bits in x
    184 // The result is 8 for x == 0.
    185 export fn leading_zeros_u8(x: u8) u8 = 8 - bit_size_u8(x);
    186 
    187 // Returns the number of leading zero bits in x
    188 // The result is 16 for x == 0.
    189 export fn leading_zeros_u16(x: u16) u8 = 16 - bit_size_u16(x);
    190 
    191 // Returns the number of leading zero bits in x
    192 // The result is 32 for x == 0.
    193 export fn leading_zeros_u32(x: u32) u8 = 32 - bit_size_u32(x);
    194 
    195 // Returns the number of leading zero bits in x
    196 // The result is 64 for x == 0.
    197 export fn leading_zeros_u64(x: u64) u8 = 64 - bit_size_u64(x);
    198 
    199 @test fn leading_zeros_u() void = {
    200 	assert(leading_zeros_u(0) == UINT_SIZE);
    201 	assert(leading_zeros_u(1) == UINT_SIZE - 1);
    202 	assert(leading_zeros_u8(0) == 8);
    203 	assert(leading_zeros_u8(1) == 8 - 1);
    204 	assert(leading_zeros_u16(0) == 16);
    205 	assert(leading_zeros_u16(1) == 16 - 1);
    206 	assert(leading_zeros_u32(0) == 32);
    207 	assert(leading_zeros_u32(1) == 32 - 1);
    208 	assert(leading_zeros_u64(0) == 64);
    209 	assert(leading_zeros_u64(1) == 64 - 1);
    210 };
    211 
    212 // Returns the number of trailing zero bits in x
    213 // The result is UINT_SIZE for x == 0.
    214 export fn trailing_zeros_u(x: uint) u8 = {
    215 	if (UINT_SIZE == 32) {
    216 		return trailing_zeros_u32(x: u32);
    217 	};
    218 	return trailing_zeros_u64(x: u64);
    219 };
    220 
    221 // Returns the number of trailing zero bits in x
    222 // The result is 8 for x == 0.
    223 export fn trailing_zeros_u8(x: u8) u8 = NTZ8TAB[x];
    224 
    225 // Returns the number of trailing zero bits in x
    226 // The result is 16 for x == 0.
    227 export fn trailing_zeros_u16(x: u16) u8 = {
    228 	if (x == 0) {
    229 		return 16;
    230 	};
    231 	return DEBRUIJN32TAB[(x & -x): u32 * DEBRUIJN32 >> (32 - 5)];
    232 };
    233 
    234 // Returns the number of trailing zero bits in x
    235 // The result is 32 for x == 0.
    236 export fn trailing_zeros_u32(x: u32) u8 = {
    237 	if (x == 0) {
    238 		return 32;
    239 	};
    240 	return DEBRUIJN32TAB[(x & -x) * DEBRUIJN32 >> (32 - 5)];
    241 };
    242 
    243 // Returns the number of trailing zero bits in x
    244 // The result is 64 for x == 0.
    245 export fn trailing_zeros_u64(x: u64) u8 = {
    246 	if (x == 0) {
    247 		return 64;
    248 	};
    249 	return DEBRUIJN64TAB[(x & -x) * DEBRUIJN64 >> (64 - 6)];
    250 };
    251 
    252 @test fn trailing_zeros_u() void = {
    253 	assert(trailing_zeros_u8(0) == 8);
    254 	for (let x: u8 = 1 << 7, i = 7u8; x > 0; x >>= 1) {
    255 		assert(trailing_zeros_u8(x) == i);
    256 		i -= 1;
    257 	};
    258 
    259 	assert(trailing_zeros_u16(0) == 16);
    260 	for (let x: u16 = 1 << 15, i = 15u8; x > 0; x >>= 1) {
    261 		assert(trailing_zeros_u16(x) == i);
    262 		i -= 1;
    263 	};
    264 
    265 	assert(trailing_zeros_u32(0) == 32);
    266 	for (let x: u32 = 1 << 31, i = 31u8; x > 0; x >>= 1) {
    267 		assert(trailing_zeros_u32(x) == i);
    268 		i -= 1;
    269 	};
    270 
    271 	assert(trailing_zeros_u64(0) == 64);
    272 	for (let x: u64 = 1 << 63, i = 63u8; x > 0; x >>= 1) {
    273 		assert(trailing_zeros_u64(x) == i);
    274 		i -= 1;
    275 	};
    276 
    277 	assert(trailing_zeros_u(0) == UINT_SIZE);
    278 	assert(trailing_zeros_u(1) == 0);
    279 };
    280 
    281 // Returns the number of bits set (the population count) of x.
    282 export fn popcount(x: u64) u8 = {
    283 	let i = 0u8;
    284 	for (x != 0; x >>= 1) {
    285 		if (x & 1 == 1) {
    286 			i += 1;
    287 		};
    288 	};
    289 	return i;
    290 };
    291 
    292 @test fn popcount() void = {
    293 	assert(popcount(0) == 0);
    294 	assert(popcount(0b11010110) == 5);
    295 	assert(popcount(~0) == 64);
    296 };
    297 
    298 // Returns the sum with carry of x, y and carry: sum = x + y + carry.
    299 // The carry input must be 0 or 1, otherwise the behavior is undefined.
    300 // The carry_out output is guaranteed to be 0 or 1.
    301 export fn addu32(x: u32, y: u32, carry: u32) (u32, u32) = {
    302 	const sum64 = (x: u64) + (y: u64) + (carry: u64);
    303 	const sum = (sum64: u32);
    304 	const carry_out = ((sum64 >> 32): u32);
    305 	return (sum, carry_out);
    306 };
    307 
    308 // Returns the sum with carry of x, y and carry: sum = x + y + carry.
    309 // The carry input must be 0 or 1, otherwise the behavior is undefined.
    310 // The carry_out output is guaranteed to be 0 or 1.
    311 export fn addu64(x: u64, y: u64, carry: u64) (u64, u64) = {
    312 	const sum = x + y + carry;
    313 	// The sum will overflow if both top bits are set (x & y) or if one of
    314 	// them is (x | y), and a carry from the lower place happened. If such a
    315 	// carry happens, the top bit will be 1 + 0 + 1 = 0 (& ~sum).
    316 	const carry_out = ((x & y) | ((x | y) & ~sum)) >> 63;
    317 	return (sum, carry_out);
    318 };
    319 
    320 // Calls either addu32() or addu64() depending on size(uint).
    321 export fn addu(x: uint, y: uint, carry: uint) (uint, uint) = {
    322 	if (UINT_SIZE == 32) {
    323 		const res = addu32((x: u32), (y: u32), (carry: u32));
    324 		return ((res.0: uint), (res.1: uint));
    325 	};
    326 	const res = addu64((x: u64), (y: u64), (carry: u64));
    327 	return ((res.0: uint), (res.1: uint));
    328 };
    329 
    330 @test fn addu() void = {
    331 	// 32
    332 	let res = addu32(2u32, 2u32, 0u32);
    333 	assert(res.0 == 4u32);
    334 	assert(res.1 == 0u32);
    335 	let res = addu32(2u32, 2u32, 1u32);
    336 	assert(res.0 == 5u32);
    337 	assert(res.1 == 0u32);
    338 	let res = addu32(~0u32, 0u32, 0u32);
    339 	assert(res.0 == ~0u32);
    340 	assert(res.1 == 0u32);
    341 	let res = addu32(~0u32, 1u32, 0u32);
    342 	assert(res.0 == 0u32);
    343 	assert(res.1 == 1u32);
    344 
    345 	// 64
    346 	let res = addu64(2u64, 2u64, 0u64);
    347 	assert(res.0 == 4u64);
    348 	assert(res.1 == 0u64);
    349 	let res = addu64(2u64, 2u64, 1u64);
    350 	assert(res.0 == 5u64);
    351 	assert(res.1 == 0u64);
    352 	let res = addu64(~0u64, 0u64, 0u64);
    353 	assert(res.0 == ~0u64);
    354 	assert(res.1 == 0u64);
    355 	let res = addu64(~0u64, 1u64, 0u64);
    356 	assert(res.0 == 0u64);
    357 	assert(res.1 == 1u64);
    358 
    359 	// addu()
    360 	let res = addu(2u, 2u, 0u);
    361 	assert(res.0 == 4u);
    362 	assert(res.1 == 0u);
    363 	let res = addu(2u, 2u, 1u);
    364 	assert(res.0 == 5u);
    365 	assert(res.1 == 0u);
    366 };
    367 
    368 // Returns the difference of x, y and borrow, diff = x - y - borrow.
    369 // The borrow input must be 0 or 1, otherwise the behavior is undefined.
    370 // The borrow_out output is guaranteed to be 0 or 1.
    371 export fn subu32(x: u32, y: u32, borrow: u32) (u32, u32) = {
    372 	const diff = x - y - borrow;
    373 	// The difference will underflow if the top bit of x is not set and the
    374 	// top bit of y is set (^x & y) or if they are the same (^(x ^ y)) and a
    375 	// borrow from the lower place happens. If that borrow happens, the
    376 	// result will be 1 - 1 - 1 = 0 - 0 - 1 = 1 (& diff).
    377 	const borrow_out = ((~x & y) | (~(x ^ y) & diff)) >> 31;
    378 	return (diff, borrow_out);
    379 };
    380 
    381 // Returns the difference of x, y and borrow, diff = x - y - borrow.
    382 // The borrow input must be 0 or 1, otherwise the behavior is undefined.
    383 // The borrow_out output is guaranteed to be 0 or 1.
    384 export fn subu64(x: u64, y: u64, borrow: u64) (u64, u64) = {
    385 	const diff = x - y - borrow;
    386 	// See subu32 for the bit logic.
    387 	const borrow_out = ((~x & y) | (~(x ^ y) & diff)) >> 63;
    388 	return (diff, borrow_out);
    389 };
    390 
    391 // Calls either mulu32() or mulu64() depending on size(uint).
    392 export fn subu(x: uint, y: uint, carry: uint) (uint, uint) = {
    393 	if (UINT_SIZE == 32) {
    394 		const res = subu32((x: u32), (y: u32), (carry: u32));
    395 		return ((res.0: uint), (res.1: uint));
    396 	};
    397 	const res = subu64((x: u64), (y: u64), (carry: u64));
    398 	return ((res.0: uint), (res.1: uint));
    399 };
    400 
    401 @test fn subu() void = {
    402 	// 32
    403 	let res = subu32(4u32, 2u32, 0u32);
    404 	assert(res.0 == 2u32);
    405 	assert(res.1 == 0u32);
    406 	let res = subu32(4u32, 2u32, 1u32);
    407 	assert(res.0 == 1u32);
    408 	assert(res.1 == 0u32);
    409 	let res = subu32(0u32, 0u32, 0u32);
    410 	assert(res.0 == 0u32);
    411 	assert(res.1 == 0u32);
    412 	let res = subu32(0u32, 1u32, 0u32);
    413 	assert(res.0 == ~0u32);
    414 	assert(res.1 == 1u32);
    415 
    416 	// 64
    417 	let res = subu64(4u64, 2u64, 0u64);
    418 	assert(res.0 == 2u64);
    419 	assert(res.1 == 0u64);
    420 	let res = subu64(4u64, 2u64, 1u64);
    421 	assert(res.0 == 1u64);
    422 	assert(res.1 == 0u64);
    423 	let res = subu64(0u64, 0u64, 0u64);
    424 	assert(res.0 == 0u64);
    425 	assert(res.1 == 0u64);
    426 	let res = subu64(0u64, 1u64, 0u64);
    427 	assert(res.0 == ~0u64);
    428 	assert(res.1 == 1u64);
    429 
    430 	// subu()
    431 	let res = subu(4u, 2u, 0u);
    432 	assert(res.0 == 2u);
    433 	assert(res.1 == 0u);
    434 	let res = subu(4u, 2u, 1u);
    435 	assert(res.0 == 1u);
    436 	assert(res.1 == 0u);
    437 };
    438 
    439 // Returns the 64-bit product of x and y: (hi, lo) = x * y
    440 // with the product bits' upper half returned in hi and the lower
    441 // half returned in lo.
    442 export fn mulu32(x: u32, y: u32) (u32, u32) = {
    443 	const product = (x: u64) * (y: u64);
    444 	const hi = ((product >> 32): u32);
    445 	const lo = (product: u32);
    446 	return (hi, lo);
    447 };
    448 
    449 // Returns the 128-bit product of x and y: (hi, lo) = x * y
    450 // with the product bits' upper half returned in hi and the lower
    451 // half returned in lo.
    452 export fn mulu64(x: u64, y: u64) (u64, u64) = {
    453 	const mask32 = (1u64 << 32) - 1;
    454 	const x0 = x & mask32;
    455 	const x1 = x >> 32;
    456 	const y0 = y & mask32;
    457 	const y1 = y >> 32;
    458 	const w0 = x0 * y0;
    459 	const t = (x1 * y0) + (w0 >> 32);
    460 	let w1 = t & mask32;
    461 	const w2 = t >> 32;
    462 	w1 += x0 * y1;
    463 	const hi = (x1 * y1) + w2 + (w1 >> 32);
    464 	const lo = x * y;
    465 	return (hi, lo);
    466 };
    467 
    468 // Calls either mulu32() or mulu64() depending on size(uint).
    469 export fn mulu(x: uint, y: uint) (uint, uint) = {
    470 	if (UINT_SIZE == 32) {
    471 		const res = mulu32((x: u32), (y: u32));
    472 		return ((res.0: uint), (res.1: uint));
    473 	};
    474 	const res = mulu64((x: u64), (y: u64));
    475 	return ((res.0: uint), (res.1: uint));
    476 };
    477 
    478 @test fn mulu() void = {
    479 	// 32
    480 	let res = mulu32(2u32, 3u32);
    481 	assert(res.0 == 0u32);
    482 	assert(res.1 == 6u32);
    483 	let res = mulu32(~0u32, 2u32);
    484 	assert(res.0 == 1u32);
    485 	assert(res.1 == ~0u32 - 1);
    486 
    487 	// 64
    488 	let res = mulu64(2u64, 3u64);
    489 	assert(res.0 == 0u64);
    490 	assert(res.1 == 6u64);
    491 	let res = mulu64(~0u64, 2u64);
    492 	assert(res.0 == 1u64);
    493 	assert(res.1 == ~0u64 - 1);
    494 
    495 	// mulu()
    496 	let res = mulu(2u, 3u);
    497 	assert(res.0 == 0u);
    498 	assert(res.1 == 6u);
    499 	let res = mulu(~0u, 2u);
    500 	assert(res.0 == 1u);
    501 	assert(res.1 == ~0u - 1);
    502 };
    503 
    504 // Returns the quotient and remainder of (hi, lo) divided by y:
    505 // quo = (hi, lo) / y, rem = (hi, lo) % y with the dividend bits' upper
    506 // half in parameter hi and the lower half in parameter lo.
    507 // Panics for y == 0 (division by zero) or y <= hi (quotient overflow).
    508 export fn divu32(hi: u32, lo: u32, y: u32) (u32, u32) = {
    509 	assert(y != 0);
    510 	assert(y > hi);
    511 	const z = (hi: u64) << 32 | (lo: u64);
    512 	const quo = ((z / (y: u64)): u32);
    513 	const rem = ((z % (y: u64)): u32);
    514 	return (quo, rem);
    515 };
    516 
    517 // Returns the quotient and remainder of (hi, lo) divided by y:
    518 // quo = (hi, lo) / y, rem = (hi, lo) % y with the dividend bits' upper
    519 // half in parameter hi and the lower half in parameter lo.
    520 // Panics for y == 0 (division by zero) or y <= hi (quotient overflow).
    521 export fn divu64(hi: u64, lo: u64, y: u64) (u64, u64) = {
    522 	const two32 = 1u64 << 32;
    523 	const mask32 = two32 - 1;
    524 	assert(y != 0);
    525 	assert(y > hi);
    526 
    527 	const s = leading_zeros_u64(y);
    528 	y <<= s;
    529 
    530 	const yn1 = y >> 32;
    531 	const yn0 = y & mask32;
    532 	const un32 = (hi << s) | (lo >> (64 - s));
    533 	const un10 = lo << s;
    534 	const un1 = un10 >> 32;
    535 	const un0 = un10 & mask32;
    536 	let q1 = un32 / yn1;
    537 	let rhat = un32 - (q1 * yn1);
    538 
    539 	for (q1 >= two32 || (q1 * yn0) > ((two32 * rhat) + un1)) {
    540 		q1 -= 1;
    541 		rhat += yn1;
    542 		if (rhat >= two32) {
    543 			break;
    544 		};
    545 	};
    546 
    547 	const un21 = (un32 * two32) + un1 - (q1 * y);
    548 	let q0 = un21 / yn1;
    549 	rhat = un21 - (q0 * yn1);
    550 
    551 	for (q0 >= two32 || (q0 * yn0) > ((two32 * rhat) + un0)) {
    552 		q0 -= 1;
    553 		rhat += yn1;
    554 		if (rhat >= two32) {
    555 			break;
    556 		};
    557 	};
    558 
    559 	const quo = (q1 * two32) + q0;
    560 	const rem = ((un21 * two32) + un0 - (q0 * y)) >> s;
    561 	return (quo, rem);
    562 };
    563 
    564 // Calls either divu32() or divu64() depending on size(uint).
    565 export fn divu(hi: uint, lo: uint, y: uint) (uint, uint) = {
    566 	if (UINT_SIZE == 32) {
    567 		const res = divu32((hi: u32), (lo: u32), (y: u32));
    568 		return ((res.0: uint), (res.1: uint));
    569 	};
    570 	const res = divu64((hi: u64), (lo: u64), (y: u64));
    571 	return ((res.0: uint), (res.1: uint));
    572 };
    573 
    574 @test fn divu() void = {
    575 	// 32
    576 	let res = divu32(0u32, 4u32, 2u32);
    577 	assert(res.0 == 2u32);
    578 	assert(res.1 == 0u32);
    579 	let res = divu32(0u32, 5u32, 2u32);
    580 	assert(res.0 == 2u32);
    581 	assert(res.1 == 1u32);
    582 	let res = divu32(1u32, 0u32, 2u32);
    583 	assert(res.0 == (1u32 << 31));
    584 	assert(res.1 == 0u32);
    585 	// These should panic.
    586 	// let res = divu32(1u32, 1u32, 0u32);
    587 	// let res = divu32(1u32, 0u32, 1u32);
    588 
    589 	// 64
    590 	let res = divu64(0u64, 4u64, 2u64);
    591 	assert(res.0 == 2u64);
    592 	assert(res.1 == 0u64);
    593 	let res = divu64(0u64, 5u64, 2u64);
    594 	assert(res.0 == 2u64);
    595 	assert(res.1 == 1u64);
    596 	let res = divu64(1u64, 0u64, 2u64);
    597 	assert(res.0 == (1u64 << 63));
    598 	assert(res.1 == 0u64);
    599 	// These should panic.
    600 	// let res = divu64(1u64, 1u64, 0u64);
    601 	// let res = divu64(1u64, 0u64, 1u64);
    602 
    603 	// divu()
    604 	let res = divu(0u, 4u, 2u);
    605 	assert(res.0 == 2u);
    606 	assert(res.1 == 0u);
    607 	let res = divu(0u, 5u, 2u);
    608 	assert(res.0 == 2u);
    609 	assert(res.1 == 1u);
    610 	let res = divu(1u, 0u, 2u);
    611 	assert(res.0 == (1u << 31));
    612 	assert(res.1 == 0u);
    613 	// These should panic.
    614 	// divu(1u, 1u, 0u);
    615 	// divu(1u, 0u, 1u);
    616 };
    617 
    618 // Returns the remainder of (hi, lo) divided by y.
    619 // Panics for y == 0 (division by zero) but, unlike divu(), it doesn't panic on
    620 // a quotient overflow.
    621 export fn remu32(hi: u32, lo: u32, y: u32) u32 = {
    622 	assert(y != 0);
    623 	const res = ((hi: u64) << 32 | (lo: u64)) % (y: u64);
    624 	return (res: u32);
    625 };
    626 
    627 // Returns the remainder of (hi, lo) divided by y.
    628 // Panics for y == 0 (division by zero) but, unlike divu(), it doesn't panic on
    629 // a quotient overflow.
    630 export fn remu64(hi: u64, lo: u64, y: u64) u64 = {
    631 	assert(y != 0);
    632 	// We scale down hi so that hi < y, then use divu() to compute the
    633 	// rem with the guarantee that it won't panic on quotient overflow.
    634 	// Given that
    635 	//   hi ≡ hi%y    (mod y)
    636 	// we have
    637 	//   hi<<64 + lo ≡ (hi%y)<<64 + lo    (mod y)
    638 	const res = divu64(hi % y, lo, y);
    639 	return res.1;
    640 };
    641 
    642 // Calls either remu32() or remu64() depending on size(uint).
    643 export fn remu(hi: uint, lo: uint, y: uint) uint = {
    644 	if (UINT_SIZE == 32) {
    645 		return (remu32((hi: u32), (lo: u32), (y: u32)): uint);
    646 	};
    647 	return (remu64((hi: u64), (lo: u64), (y: u64)): uint);
    648 };
    649 
    650 @test fn remu() void = {
    651 	// 32
    652 	assert(remu32(0u32, 4u32, 2u32) == 0u32);
    653 	assert(remu32(0u32, 5u32, 2u32) == 1u32);
    654 	assert(remu32(0u32, 5u32, 3u32) == 2u32);
    655 	assert(remu32(1u32, 1u32, 2u32) == 1u32);
    656 	// These should panic.
    657 	// remu32(0u32, 4u32, 0u32);
    658 
    659 	// 64
    660 	assert(remu64(0u64, 4u64, 2u64) == 0u64);
    661 	assert(remu64(0u64, 5u64, 2u64) == 1u64);
    662 	assert(remu64(0u64, 5u64, 3u64) == 2u64);
    663 	assert(remu64(1u32, 1u32, 2u32) == 1u32);
    664 	// These should panic.
    665 	// remu64(0u64, 4u64, 0u64);
    666 
    667 	// remu()
    668 	assert(remu(0u, 4u, 2u) == 0u);
    669 	assert(remu(0u, 5u, 2u) == 1u);
    670 	assert(remu(0u, 5u, 3u) == 2u);
    671 	assert(remu(1u, 1u, 2u) == 1u);
    672 	// These should panic.
    673 	// remu(0u, 4u, 0u);
    674 };
    675 
    676 // Returns the greatest common divisor of a and b.
    677 export fn gcd(a: u64, b: u64) u64 = {
    678 	if (a == b) {
    679 		return a;
    680 	};
    681 	if (a == 0) {
    682 		return b;
    683 	};
    684 	if (b == 0) {
    685 		return a;
    686 	};
    687 
    688 	const i = trailing_zeros_u64(a);
    689 	const j = trailing_zeros_u64(b);
    690 	a >>= i;
    691 	b >>= j;
    692 	const k = if (i < j) i else j;
    693 
    694 	for (true) {
    695 		if (a > b) {
    696 			const t = a;
    697 			a = b;
    698 			b = t;
    699 		};
    700 
    701 		b -= a;
    702 		if (b == 0) {
    703 			return a << k;
    704 		};
    705 		b >>= trailing_zeros_u64(b);
    706 	};
    707 };
    708 
    709 @test fn gcd() void = {
    710 	assert(gcd(2 * 3 * 5, 3 * 7) == 3);
    711 	assert(gcd(2, 7) == 1);
    712 	assert(gcd(2, 0) == 2);
    713 	assert(gcd(0, 2) == 2);
    714 	// gcd(123 * 2^55 * 3, 123 * 7)
    715 	assert(gcd((123 << 55) * 3, 123 * 7) == 123);
    716 };