uints.ha (22213B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 // Sections of the code below are based on Go's implementation, in particular 5 // https://raw.githubusercontent.com/golang/go/master/src/math/bits/bits.go. 6 // 7 // The Go copyright notice: 8 // ==================================================== 9 // Copyright (c) 2009 The Go Authors. All rights reserved. 10 // 11 // Redistribution and use in source and binary forms, with or without 12 // modification, are permitted provided that the following conditions are 13 // met: 14 // 15 // * Redistributions of source code must retain the above copyright 16 // notice, this list of conditions and the following disclaimer. 17 // * Redistributions in binary form must reproduce the above 18 // copyright notice, this list of conditions and the following disclaimer 19 // in the documentation and/or other materials provided with the 20 // distribution. 21 // * Neither the name of Google Inc. nor the names of its 22 // contributors may be used to endorse or promote products derived from 23 // this software without specific prior written permission. 24 // 25 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 // ==================================================== 37 38 // The number of bits required to represent the first 256 numbers. 39 const LEN8TAB: [256]u8 = [ 40 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 41 0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 42 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 43 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 44 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 45 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 46 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 47 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 48 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 49 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 50 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08, 51 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 52 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 53 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 54 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 55 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 56 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 57 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 58 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 59 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 60 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 61 0x08, 0x08, 0x08, 0x08, 62 ]; 63 64 const NTZ8TAB: [256]u8 = [ 65 0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 66 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 67 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x05, 0x00, 0x01, 0x00, 68 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 69 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 70 0x02, 0x00, 0x01, 0x00, 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 71 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 72 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 73 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 74 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 75 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x07, 0x00, 0x01, 0x00, 76 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 77 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 78 0x02, 0x00, 0x01, 0x00, 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 79 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 80 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 81 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 82 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 83 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x05, 0x00, 0x01, 0x00, 84 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 85 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 86 0x02, 0x00, 0x01, 0x00, 87 ]; 88 89 // See http://supertech.csail.mit.edu/papers/debruijn.pdf 90 def DEBRUIJN32: u32 = 0x077CB531; 91 92 const DEBRUIJN32TAB: [32]u8 = [ 93 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 94 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9, 95 ]; 96 97 def DEBRUIJN64: u64 = 0x03f79d71b4ca8b09; 98 99 const DEBRUIJN64TAB: [64]u8 = [ 100 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, 101 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, 102 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11, 103 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, 104 ]; 105 106 // The size of an unsigned int: 32 or 64 107 def UINT_SIZE: u8 = (size(uint): u8) * 8; 108 109 // Returns the minimum number of bits required to represent x. 110 export fn bit_size_u8(x: u8) u8 = { 111 return LEN8TAB[x]; 112 }; 113 114 // Returns the minimum number of bits required to represent x. 115 export fn bit_size_u16(x: u16) u8 = { 116 let res = 0u8; 117 if (x >= 1u16 << 8) { 118 x >>= 8; 119 res += 8; 120 }; 121 return res + LEN8TAB[x]; 122 }; 123 124 // Returns the minimum number of bits required to represent x. 125 export fn bit_size_u32(x: u32) u8 = { 126 let res = 0u8; 127 if (x >= 1u32 << 16) { 128 x >>= 16; 129 res += 16; 130 }; 131 if (x >= 1u32 << 8) { 132 x >>= 8; 133 res += 8; 134 }; 135 return res + LEN8TAB[x]; 136 }; 137 138 // Returns the minimum number of bits required to represent x. 139 export fn bit_size_u64(x: u64) u8 = { 140 let res = 0u8; 141 if (x >= 1u64 << 32) { 142 x >>= 32; 143 res += 32; 144 }; 145 if (x >= 1u64 << 16) { 146 x >>= 16; 147 res += 16; 148 }; 149 if (x >= 1u64 << 8) { 150 x >>= 8; 151 res += 8; 152 }; 153 return res + LEN8TAB[x]; 154 }; 155 156 // Returns the minimum number of bits required to represent x. 157 export fn bit_size_u(x: uint) u8 = { 158 if (UINT_SIZE == 32) { 159 return bit_size_u32(x: u32); 160 }; 161 return bit_size_u64(x: u64); 162 }; 163 164 @test fn bit_size_u() void = { 165 assert(bit_size_u8(0) == 0); 166 assert(bit_size_u8(2) == 2); 167 assert(bit_size_u8(5) == 3); 168 assert(bit_size_u16(5) == 3); 169 assert(bit_size_u32(5) == 3); 170 assert(bit_size_u64(5) == 3); 171 assert(bit_size_u16(31111) == 15); 172 assert(bit_size_u32(536870911) == 29); 173 assert(bit_size_u64(8589934591) == 33); 174 assert(bit_size_u(0) == 0); 175 assert(bit_size_u(1) == 1); 176 }; 177 178 179 // Returns the number of leading zero bits in x 180 // The result is UINT_SIZE for x == 0. 181 export fn leading_zeros_u(x: uint) u8 = UINT_SIZE - bit_size_u(x); 182 183 // Returns the number of leading zero bits in x 184 // The result is 8 for x == 0. 185 export fn leading_zeros_u8(x: u8) u8 = 8 - bit_size_u8(x); 186 187 // Returns the number of leading zero bits in x 188 // The result is 16 for x == 0. 189 export fn leading_zeros_u16(x: u16) u8 = 16 - bit_size_u16(x); 190 191 // Returns the number of leading zero bits in x 192 // The result is 32 for x == 0. 193 export fn leading_zeros_u32(x: u32) u8 = 32 - bit_size_u32(x); 194 195 // Returns the number of leading zero bits in x 196 // The result is 64 for x == 0. 197 export fn leading_zeros_u64(x: u64) u8 = 64 - bit_size_u64(x); 198 199 @test fn leading_zeros_u() void = { 200 assert(leading_zeros_u(0) == UINT_SIZE); 201 assert(leading_zeros_u(1) == UINT_SIZE - 1); 202 assert(leading_zeros_u8(0) == 8); 203 assert(leading_zeros_u8(1) == 8 - 1); 204 assert(leading_zeros_u16(0) == 16); 205 assert(leading_zeros_u16(1) == 16 - 1); 206 assert(leading_zeros_u32(0) == 32); 207 assert(leading_zeros_u32(1) == 32 - 1); 208 assert(leading_zeros_u64(0) == 64); 209 assert(leading_zeros_u64(1) == 64 - 1); 210 }; 211 212 // Returns the number of trailing zero bits in x 213 // The result is UINT_SIZE for x == 0. 214 export fn trailing_zeros_u(x: uint) u8 = { 215 if (UINT_SIZE == 32) { 216 return trailing_zeros_u32(x: u32); 217 }; 218 return trailing_zeros_u64(x: u64); 219 }; 220 221 // Returns the number of trailing zero bits in x 222 // The result is 8 for x == 0. 223 export fn trailing_zeros_u8(x: u8) u8 = NTZ8TAB[x]; 224 225 // Returns the number of trailing zero bits in x 226 // The result is 16 for x == 0. 227 export fn trailing_zeros_u16(x: u16) u8 = { 228 if (x == 0) { 229 return 16; 230 }; 231 return DEBRUIJN32TAB[(x & -x): u32 * DEBRUIJN32 >> (32 - 5)]; 232 }; 233 234 // Returns the number of trailing zero bits in x 235 // The result is 32 for x == 0. 236 export fn trailing_zeros_u32(x: u32) u8 = { 237 if (x == 0) { 238 return 32; 239 }; 240 return DEBRUIJN32TAB[(x & -x) * DEBRUIJN32 >> (32 - 5)]; 241 }; 242 243 // Returns the number of trailing zero bits in x 244 // The result is 64 for x == 0. 245 export fn trailing_zeros_u64(x: u64) u8 = { 246 if (x == 0) { 247 return 64; 248 }; 249 return DEBRUIJN64TAB[(x & -x) * DEBRUIJN64 >> (64 - 6)]; 250 }; 251 252 @test fn trailing_zeros_u() void = { 253 assert(trailing_zeros_u8(0) == 8); 254 for (let x: u8 = 1 << 7, i = 7u8; x > 0; x >>= 1) { 255 assert(trailing_zeros_u8(x) == i); 256 i -= 1; 257 }; 258 259 assert(trailing_zeros_u16(0) == 16); 260 for (let x: u16 = 1 << 15, i = 15u8; x > 0; x >>= 1) { 261 assert(trailing_zeros_u16(x) == i); 262 i -= 1; 263 }; 264 265 assert(trailing_zeros_u32(0) == 32); 266 for (let x: u32 = 1 << 31, i = 31u8; x > 0; x >>= 1) { 267 assert(trailing_zeros_u32(x) == i); 268 i -= 1; 269 }; 270 271 assert(trailing_zeros_u64(0) == 64); 272 for (let x: u64 = 1 << 63, i = 63u8; x > 0; x >>= 1) { 273 assert(trailing_zeros_u64(x) == i); 274 i -= 1; 275 }; 276 277 assert(trailing_zeros_u(0) == UINT_SIZE); 278 assert(trailing_zeros_u(1) == 0); 279 }; 280 281 // Returns the number of bits set (the population count) of x. 282 export fn popcount(x: u64) u8 = { 283 let i = 0u8; 284 for (x != 0; x >>= 1) { 285 if (x & 1 == 1) { 286 i += 1; 287 }; 288 }; 289 return i; 290 }; 291 292 @test fn popcount() void = { 293 assert(popcount(0) == 0); 294 assert(popcount(0b11010110) == 5); 295 assert(popcount(~0) == 64); 296 }; 297 298 // Returns the sum with carry of x, y and carry: sum = x + y + carry. 299 // The carry input must be 0 or 1, otherwise the behavior is undefined. 300 // The carry_out output is guaranteed to be 0 or 1. 301 export fn addu32(x: u32, y: u32, carry: u32) (u32, u32) = { 302 const sum64 = (x: u64) + (y: u64) + (carry: u64); 303 const sum = (sum64: u32); 304 const carry_out = ((sum64 >> 32): u32); 305 return (sum, carry_out); 306 }; 307 308 // Returns the sum with carry of x, y and carry: sum = x + y + carry. 309 // The carry input must be 0 or 1, otherwise the behavior is undefined. 310 // The carry_out output is guaranteed to be 0 or 1. 311 export fn addu64(x: u64, y: u64, carry: u64) (u64, u64) = { 312 const sum = x + y + carry; 313 // The sum will overflow if both top bits are set (x & y) or if one of 314 // them is (x | y), and a carry from the lower place happened. If such a 315 // carry happens, the top bit will be 1 + 0 + 1 = 0 (& ~sum). 316 const carry_out = ((x & y) | ((x | y) & ~sum)) >> 63; 317 return (sum, carry_out); 318 }; 319 320 // Calls either addu32() or addu64() depending on size(uint). 321 export fn addu(x: uint, y: uint, carry: uint) (uint, uint) = { 322 if (UINT_SIZE == 32) { 323 const res = addu32((x: u32), (y: u32), (carry: u32)); 324 return ((res.0: uint), (res.1: uint)); 325 }; 326 const res = addu64((x: u64), (y: u64), (carry: u64)); 327 return ((res.0: uint), (res.1: uint)); 328 }; 329 330 @test fn addu() void = { 331 // 32 332 let res = addu32(2u32, 2u32, 0u32); 333 assert(res.0 == 4u32); 334 assert(res.1 == 0u32); 335 let res = addu32(2u32, 2u32, 1u32); 336 assert(res.0 == 5u32); 337 assert(res.1 == 0u32); 338 let res = addu32(~0u32, 0u32, 0u32); 339 assert(res.0 == ~0u32); 340 assert(res.1 == 0u32); 341 let res = addu32(~0u32, 1u32, 0u32); 342 assert(res.0 == 0u32); 343 assert(res.1 == 1u32); 344 345 // 64 346 let res = addu64(2u64, 2u64, 0u64); 347 assert(res.0 == 4u64); 348 assert(res.1 == 0u64); 349 let res = addu64(2u64, 2u64, 1u64); 350 assert(res.0 == 5u64); 351 assert(res.1 == 0u64); 352 let res = addu64(~0u64, 0u64, 0u64); 353 assert(res.0 == ~0u64); 354 assert(res.1 == 0u64); 355 let res = addu64(~0u64, 1u64, 0u64); 356 assert(res.0 == 0u64); 357 assert(res.1 == 1u64); 358 359 // addu() 360 let res = addu(2u, 2u, 0u); 361 assert(res.0 == 4u); 362 assert(res.1 == 0u); 363 let res = addu(2u, 2u, 1u); 364 assert(res.0 == 5u); 365 assert(res.1 == 0u); 366 }; 367 368 // Returns the difference of x, y and borrow, diff = x - y - borrow. 369 // The borrow input must be 0 or 1, otherwise the behavior is undefined. 370 // The borrow_out output is guaranteed to be 0 or 1. 371 export fn subu32(x: u32, y: u32, borrow: u32) (u32, u32) = { 372 const diff = x - y - borrow; 373 // The difference will underflow if the top bit of x is not set and the 374 // top bit of y is set (^x & y) or if they are the same (^(x ^ y)) and a 375 // borrow from the lower place happens. If that borrow happens, the 376 // result will be 1 - 1 - 1 = 0 - 0 - 1 = 1 (& diff). 377 const borrow_out = ((~x & y) | (~(x ^ y) & diff)) >> 31; 378 return (diff, borrow_out); 379 }; 380 381 // Returns the difference of x, y and borrow, diff = x - y - borrow. 382 // The borrow input must be 0 or 1, otherwise the behavior is undefined. 383 // The borrow_out output is guaranteed to be 0 or 1. 384 export fn subu64(x: u64, y: u64, borrow: u64) (u64, u64) = { 385 const diff = x - y - borrow; 386 // See subu32 for the bit logic. 387 const borrow_out = ((~x & y) | (~(x ^ y) & diff)) >> 63; 388 return (diff, borrow_out); 389 }; 390 391 // Calls either mulu32() or mulu64() depending on size(uint). 392 export fn subu(x: uint, y: uint, carry: uint) (uint, uint) = { 393 if (UINT_SIZE == 32) { 394 const res = subu32((x: u32), (y: u32), (carry: u32)); 395 return ((res.0: uint), (res.1: uint)); 396 }; 397 const res = subu64((x: u64), (y: u64), (carry: u64)); 398 return ((res.0: uint), (res.1: uint)); 399 }; 400 401 @test fn subu() void = { 402 // 32 403 let res = subu32(4u32, 2u32, 0u32); 404 assert(res.0 == 2u32); 405 assert(res.1 == 0u32); 406 let res = subu32(4u32, 2u32, 1u32); 407 assert(res.0 == 1u32); 408 assert(res.1 == 0u32); 409 let res = subu32(0u32, 0u32, 0u32); 410 assert(res.0 == 0u32); 411 assert(res.1 == 0u32); 412 let res = subu32(0u32, 1u32, 0u32); 413 assert(res.0 == ~0u32); 414 assert(res.1 == 1u32); 415 416 // 64 417 let res = subu64(4u64, 2u64, 0u64); 418 assert(res.0 == 2u64); 419 assert(res.1 == 0u64); 420 let res = subu64(4u64, 2u64, 1u64); 421 assert(res.0 == 1u64); 422 assert(res.1 == 0u64); 423 let res = subu64(0u64, 0u64, 0u64); 424 assert(res.0 == 0u64); 425 assert(res.1 == 0u64); 426 let res = subu64(0u64, 1u64, 0u64); 427 assert(res.0 == ~0u64); 428 assert(res.1 == 1u64); 429 430 // subu() 431 let res = subu(4u, 2u, 0u); 432 assert(res.0 == 2u); 433 assert(res.1 == 0u); 434 let res = subu(4u, 2u, 1u); 435 assert(res.0 == 1u); 436 assert(res.1 == 0u); 437 }; 438 439 // Returns the 64-bit product of x and y: (hi, lo) = x * y 440 // with the product bits' upper half returned in hi and the lower 441 // half returned in lo. 442 export fn mulu32(x: u32, y: u32) (u32, u32) = { 443 const product = (x: u64) * (y: u64); 444 const hi = ((product >> 32): u32); 445 const lo = (product: u32); 446 return (hi, lo); 447 }; 448 449 // Returns the 128-bit product of x and y: (hi, lo) = x * y 450 // with the product bits' upper half returned in hi and the lower 451 // half returned in lo. 452 export fn mulu64(x: u64, y: u64) (u64, u64) = { 453 const mask32 = (1u64 << 32) - 1; 454 const x0 = x & mask32; 455 const x1 = x >> 32; 456 const y0 = y & mask32; 457 const y1 = y >> 32; 458 const w0 = x0 * y0; 459 const t = (x1 * y0) + (w0 >> 32); 460 let w1 = t & mask32; 461 const w2 = t >> 32; 462 w1 += x0 * y1; 463 const hi = (x1 * y1) + w2 + (w1 >> 32); 464 const lo = x * y; 465 return (hi, lo); 466 }; 467 468 // Calls either mulu32() or mulu64() depending on size(uint). 469 export fn mulu(x: uint, y: uint) (uint, uint) = { 470 if (UINT_SIZE == 32) { 471 const res = mulu32((x: u32), (y: u32)); 472 return ((res.0: uint), (res.1: uint)); 473 }; 474 const res = mulu64((x: u64), (y: u64)); 475 return ((res.0: uint), (res.1: uint)); 476 }; 477 478 @test fn mulu() void = { 479 // 32 480 let res = mulu32(2u32, 3u32); 481 assert(res.0 == 0u32); 482 assert(res.1 == 6u32); 483 let res = mulu32(~0u32, 2u32); 484 assert(res.0 == 1u32); 485 assert(res.1 == ~0u32 - 1); 486 487 // 64 488 let res = mulu64(2u64, 3u64); 489 assert(res.0 == 0u64); 490 assert(res.1 == 6u64); 491 let res = mulu64(~0u64, 2u64); 492 assert(res.0 == 1u64); 493 assert(res.1 == ~0u64 - 1); 494 495 // mulu() 496 let res = mulu(2u, 3u); 497 assert(res.0 == 0u); 498 assert(res.1 == 6u); 499 let res = mulu(~0u, 2u); 500 assert(res.0 == 1u); 501 assert(res.1 == ~0u - 1); 502 }; 503 504 // Returns the quotient and remainder of (hi, lo) divided by y: 505 // quo = (hi, lo) / y, rem = (hi, lo) % y with the dividend bits' upper 506 // half in parameter hi and the lower half in parameter lo. 507 // Panics for y == 0 (division by zero) or y <= hi (quotient overflow). 508 export fn divu32(hi: u32, lo: u32, y: u32) (u32, u32) = { 509 assert(y != 0); 510 assert(y > hi); 511 const z = (hi: u64) << 32 | (lo: u64); 512 const quo = ((z / (y: u64)): u32); 513 const rem = ((z % (y: u64)): u32); 514 return (quo, rem); 515 }; 516 517 // Returns the quotient and remainder of (hi, lo) divided by y: 518 // quo = (hi, lo) / y, rem = (hi, lo) % y with the dividend bits' upper 519 // half in parameter hi and the lower half in parameter lo. 520 // Panics for y == 0 (division by zero) or y <= hi (quotient overflow). 521 export fn divu64(hi: u64, lo: u64, y: u64) (u64, u64) = { 522 const two32 = 1u64 << 32; 523 const mask32 = two32 - 1; 524 assert(y != 0); 525 assert(y > hi); 526 527 const s = leading_zeros_u64(y); 528 y <<= s; 529 530 const yn1 = y >> 32; 531 const yn0 = y & mask32; 532 const un32 = (hi << s) | (lo >> (64 - s)); 533 const un10 = lo << s; 534 const un1 = un10 >> 32; 535 const un0 = un10 & mask32; 536 let q1 = un32 / yn1; 537 let rhat = un32 - (q1 * yn1); 538 539 for (q1 >= two32 || (q1 * yn0) > ((two32 * rhat) + un1)) { 540 q1 -= 1; 541 rhat += yn1; 542 if (rhat >= two32) { 543 break; 544 }; 545 }; 546 547 const un21 = (un32 * two32) + un1 - (q1 * y); 548 let q0 = un21 / yn1; 549 rhat = un21 - (q0 * yn1); 550 551 for (q0 >= two32 || (q0 * yn0) > ((two32 * rhat) + un0)) { 552 q0 -= 1; 553 rhat += yn1; 554 if (rhat >= two32) { 555 break; 556 }; 557 }; 558 559 const quo = (q1 * two32) + q0; 560 const rem = ((un21 * two32) + un0 - (q0 * y)) >> s; 561 return (quo, rem); 562 }; 563 564 // Calls either divu32() or divu64() depending on size(uint). 565 export fn divu(hi: uint, lo: uint, y: uint) (uint, uint) = { 566 if (UINT_SIZE == 32) { 567 const res = divu32((hi: u32), (lo: u32), (y: u32)); 568 return ((res.0: uint), (res.1: uint)); 569 }; 570 const res = divu64((hi: u64), (lo: u64), (y: u64)); 571 return ((res.0: uint), (res.1: uint)); 572 }; 573 574 @test fn divu() void = { 575 // 32 576 let res = divu32(0u32, 4u32, 2u32); 577 assert(res.0 == 2u32); 578 assert(res.1 == 0u32); 579 let res = divu32(0u32, 5u32, 2u32); 580 assert(res.0 == 2u32); 581 assert(res.1 == 1u32); 582 let res = divu32(1u32, 0u32, 2u32); 583 assert(res.0 == (1u32 << 31)); 584 assert(res.1 == 0u32); 585 // These should panic. 586 // let res = divu32(1u32, 1u32, 0u32); 587 // let res = divu32(1u32, 0u32, 1u32); 588 589 // 64 590 let res = divu64(0u64, 4u64, 2u64); 591 assert(res.0 == 2u64); 592 assert(res.1 == 0u64); 593 let res = divu64(0u64, 5u64, 2u64); 594 assert(res.0 == 2u64); 595 assert(res.1 == 1u64); 596 let res = divu64(1u64, 0u64, 2u64); 597 assert(res.0 == (1u64 << 63)); 598 assert(res.1 == 0u64); 599 // These should panic. 600 // let res = divu64(1u64, 1u64, 0u64); 601 // let res = divu64(1u64, 0u64, 1u64); 602 603 // divu() 604 let res = divu(0u, 4u, 2u); 605 assert(res.0 == 2u); 606 assert(res.1 == 0u); 607 let res = divu(0u, 5u, 2u); 608 assert(res.0 == 2u); 609 assert(res.1 == 1u); 610 let res = divu(1u, 0u, 2u); 611 assert(res.0 == (1u << 31)); 612 assert(res.1 == 0u); 613 // These should panic. 614 // divu(1u, 1u, 0u); 615 // divu(1u, 0u, 1u); 616 }; 617 618 // Returns the remainder of (hi, lo) divided by y. 619 // Panics for y == 0 (division by zero) but, unlike divu(), it doesn't panic on 620 // a quotient overflow. 621 export fn remu32(hi: u32, lo: u32, y: u32) u32 = { 622 assert(y != 0); 623 const res = ((hi: u64) << 32 | (lo: u64)) % (y: u64); 624 return (res: u32); 625 }; 626 627 // Returns the remainder of (hi, lo) divided by y. 628 // Panics for y == 0 (division by zero) but, unlike divu(), it doesn't panic on 629 // a quotient overflow. 630 export fn remu64(hi: u64, lo: u64, y: u64) u64 = { 631 assert(y != 0); 632 // We scale down hi so that hi < y, then use divu() to compute the 633 // rem with the guarantee that it won't panic on quotient overflow. 634 // Given that 635 // hi ≡ hi%y (mod y) 636 // we have 637 // hi<<64 + lo ≡ (hi%y)<<64 + lo (mod y) 638 const res = divu64(hi % y, lo, y); 639 return res.1; 640 }; 641 642 // Calls either remu32() or remu64() depending on size(uint). 643 export fn remu(hi: uint, lo: uint, y: uint) uint = { 644 if (UINT_SIZE == 32) { 645 return (remu32((hi: u32), (lo: u32), (y: u32)): uint); 646 }; 647 return (remu64((hi: u64), (lo: u64), (y: u64)): uint); 648 }; 649 650 @test fn remu() void = { 651 // 32 652 assert(remu32(0u32, 4u32, 2u32) == 0u32); 653 assert(remu32(0u32, 5u32, 2u32) == 1u32); 654 assert(remu32(0u32, 5u32, 3u32) == 2u32); 655 assert(remu32(1u32, 1u32, 2u32) == 1u32); 656 // These should panic. 657 // remu32(0u32, 4u32, 0u32); 658 659 // 64 660 assert(remu64(0u64, 4u64, 2u64) == 0u64); 661 assert(remu64(0u64, 5u64, 2u64) == 1u64); 662 assert(remu64(0u64, 5u64, 3u64) == 2u64); 663 assert(remu64(1u32, 1u32, 2u32) == 1u32); 664 // These should panic. 665 // remu64(0u64, 4u64, 0u64); 666 667 // remu() 668 assert(remu(0u, 4u, 2u) == 0u); 669 assert(remu(0u, 5u, 2u) == 1u); 670 assert(remu(0u, 5u, 3u) == 2u); 671 assert(remu(1u, 1u, 2u) == 1u); 672 // These should panic. 673 // remu(0u, 4u, 0u); 674 }; 675 676 // Returns the greatest common divisor of a and b. 677 export fn gcd(a: u64, b: u64) u64 = { 678 if (a == b) { 679 return a; 680 }; 681 if (a == 0) { 682 return b; 683 }; 684 if (b == 0) { 685 return a; 686 }; 687 688 const i = trailing_zeros_u64(a); 689 const j = trailing_zeros_u64(b); 690 a >>= i; 691 b >>= j; 692 const k = if (i < j) i else j; 693 694 for (true) { 695 if (a > b) { 696 const t = a; 697 a = b; 698 b = t; 699 }; 700 701 b -= a; 702 if (b == 0) { 703 return a << k; 704 }; 705 b >>= trailing_zeros_u64(b); 706 }; 707 }; 708 709 @test fn gcd() void = { 710 assert(gcd(2 * 3 * 5, 3 * 7) == 3); 711 assert(gcd(2, 7) == 1); 712 assert(gcd(2, 0) == 2); 713 assert(gcd(0, 2) == 2); 714 // gcd(123 * 2^55 * 3, 123 * 7) 715 assert(gcd((123 << 55) * 3, 123 * 7) == 123); 716 };