commit fff2addff294dd1df0a6cf64f1261c294ce8869b
parent 9f9bc489ce2ba58e5cd08c42abc2b0ec08457c8c
Author: Bor Grošelj Simić <bgs@turminal.net>
Date: Mon, 16 May 2022 17:56:16 +0200
hash::siphash: new module
Signed-off-by: Bor Grošelj Simić <bgs@turminal.net>
Diffstat:
6 files changed, 269 insertions(+), 1 deletion(-)
diff --git a/hash/fnv/README b/hash/fnv/README
@@ -1,2 +1,3 @@
Implements the Fowler–Noll–Vo (FNV) hash function. This hash is recommended for
-hash map keys and similar applications. It is a non-cryptographic hash.
+hash map keys and similar applications when hash flooding isn't an issue. It is
+a non-cryptographic hash.
diff --git a/hash/siphash/+test.ha b/hash/siphash/+test.ha
@@ -0,0 +1,89 @@
+use endian;
+use fmt;
+use hash;
+use io;
+use strio;
+use strings;
+
+@test fn siphash() void = {
+ // same data and test procedure as the reference implementation
+ const vectors: [64]u64 = [
+ 0x726fdb47dd0e0e31, 0x74f839c593dc67fd,
+ 0x0d6c8009d9a94f5a, 0x85676696d7fb7e2d,
+ 0xcf2794e0277187b7, 0x18765564cd99a68d,
+ 0xcbc9466e58fee3ce, 0xab0200f58b01d137,
+ 0x93f5f5799a932462, 0x9e0082df0ba9e4b0,
+ 0x7a5dbbc594ddb9f3, 0xf4b32f46226bada7,
+ 0x751e8fbc860ee5fb, 0x14ea5627c0843d90,
+ 0xf723ca908e7af2ee, 0xa129ca6149be45e5,
+ 0x3f2acc7f57c29bdb, 0x699ae9f52cbe4794,
+ 0x4bc1b3f0968dd39c, 0xbb6dc91da77961bd,
+ 0xbed65cf21aa2ee98, 0xd0f2cbb02e3b67c7,
+ 0x93536795e3a33e88, 0xa80c038ccd5ccec8,
+ 0xb8ad50c6f649af94, 0xbce192de8a85b8ea,
+ 0x17d835b85bbb15f3, 0x2f2e6163076bcfad,
+ 0xde4daaaca71dc9a5, 0xa6a2506687956571,
+ 0xad87a3535c49ef28, 0x32d892fad841c342,
+ 0x7127512f72f27cce, 0xa7f32346f95978e3,
+ 0x12e0b01abb051238, 0x15e034d40fa197ae,
+ 0x314dffbe0815a3b4, 0x027990f029623981,
+ 0xcadcd4e59ef40c4d, 0x9abfd8766a33735c,
+ 0x0e3ea96b5304a7d0, 0xad0c42d6fc585992,
+ 0x187306c89bc215a9, 0xd4a60abcf3792b95,
+ 0xf935451de4f21df2, 0xa9538f0419755787,
+ 0xdb9acddff56ca510, 0xd06c98cd5c0975eb,
+ 0xe612a3cb9ecba951, 0xc766e62cfcadaf96,
+ 0xee64435a9752fe72, 0xa192d576b245165a,
+ 0x0a8787bf8ecb74b2, 0x81b3e73d20b49b6f,
+ 0x7fa8220ba3b2ecea, 0x245731c13ca42499,
+ 0xb78dbfaf3a8d83bd, 0xea1ad565322a1a0b,
+ 0x60e61c23a3795013, 0x6606d7e446282b93,
+ 0x6ca4ecb15c5f91e1, 0x9f626da15c9625f3,
+ 0xe51b38608ef25f57, 0x958a324ceb064572,
+ ];
+
+ let key: [16]u8 = [0...];
+ for (let i = 0u8; i < 16; i += 1) {
+ key[i] = i;
+ };
+ let sip = siphash(2, 4, &key);
+ defer hash::close(&sip);
+
+ let buf: [8]u8 = [0...];
+ for (let i = 0u8; i < 64; i += 1) {
+ hash::sum(&sip, &buf);
+ assert(endian::legetu64(buf) == vectors[i]);
+ hash::write(&sip, [i]);
+ };
+
+ const vectors: [_](str, u64) = [
+ ("", 0x726fdb47dd0e0e31),
+ ("abc", 0x5dbcfa53aa2007a5),
+ ("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 0x62da29967a36b79f),
+ ("abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 0x3ddf92b5dac6728b),
+ ("hello world", 0xed5159c956cd5602),
+ ("Hare is a cool language", 0xb459a8d06410857e),
+ ("'UNIX was not designed to stop its users from doing stupid things, as that would also stop them from doing clever things' - Doug Gwyn", 0x8f2787e694a9cced),
+ ("'Life is too short to run proprietary software' - Bdale Garbee", 0xfe97befe3823b15c),
+ ("'The central enemy of reliability is complexity.' - Geer et al", 0x61ea79c94e39ed29),
+ ];
+
+ for (let i = 0z; i < len(vectors); i += 1) {
+ const vector = vectors[i];
+ let sip = siphash(2, 4, &key);
+ defer hash::close(&sip);
+ hash::write(&sip, strings::toutf8(vector.0));
+ assert(sum(&sip) == vector.1);
+ };
+
+ // Uncomment this to run the 1G test vector (I promise it works):
+
+ // const input = "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmno";
+ // const expected = 0x439f9847c050d927u64;
+ // let sip = siphash(2, 4, &key);
+ // defer hash::close(&sip);
+ // for (let i = 0z; i < 16777216; i += 1) {
+ // hash::write(&sip, strings::toutf8(input));
+ // };
+ // assert(sum(&sip) == expected);
+};
diff --git a/hash/siphash/README b/hash/siphash/README
@@ -0,0 +1,3 @@
+Implements the SipHash keyed hash function. Output of SipHash(secret, input)
+for any input does not reveal anything about the secret used, which makes it a
+good choice for use cases that require resistance against hash flooding.
diff --git a/hash/siphash/siphash.ha b/hash/siphash/siphash.ha
@@ -0,0 +1,130 @@
+use crypto::math::{rotl64};
+use endian::{leputu64, legetu64};
+use hash;
+use io;
+
+export type state = struct {
+ hash::hash,
+ v: [4]u64,
+ x: [CHUNKSZ]u8,
+ x_len: u8,
+ c: u8,
+ d: u8,
+ ln: size,
+};
+
+def CHUNKSZ: size = 8;
+
+const sip_vtable: io::vtable = io::vtable {
+ writer = &write,
+ closer = &close,
+ ...
+};
+
+// Creates a [hash::hash] that computes SipHash-c-d with the given 16 byte key,
+// where c denotes number of compression rounds and d denotes number of
+// finalization rounds. Recommended values for c and d are 2 and 4 respectively.
+export fn siphash(c: u8, d: u8, key: *[16]u8) state = {
+ let h = state {
+ stream = &sip_vtable,
+ sum = &sum64_bytes,
+ sz = CHUNKSZ,
+ c = c,
+ d = d,
+ ...
+ };
+ let s = legetu64(key[..8]);
+ h.v[0] = 0x736f6d6570736575 ^ s;
+ h.v[2] = 0x6c7967656e657261 ^ s;
+ let s = legetu64(key[8..]);
+ h.v[1] = 0x646f72616e646f6d ^ s;
+ h.v[3] = 0x7465646279746573 ^ s;
+ return h;
+};
+
+fn write(s: *io::stream, buf: const []u8) (size | io::error) = {
+ let h = s: *state;
+ h.ln += len(buf);
+ let n = CHUNKSZ - h.x_len;
+
+ if (len(buf) < n) {
+ // not enough data to fill a chunk
+ h.x[h.x_len..h.x_len + len(buf)] = buf;
+ h.x_len += len(buf): u8;
+ return len(buf);
+ };
+
+ h.x[h.x_len..] = buf[..n];
+ let b = legetu64(h.x);
+ h.v[3] ^= b;
+ for (let i = 0u8; i < h.c; i += 1) {
+ round(h);
+ };
+ h.v[0] ^= b;
+
+ let buf = buf[n..];
+ for (len(buf) >= CHUNKSZ) {
+ let b = legetu64(buf);
+ h.v[3] ^= b;
+ for (let i = 0u8; i < h.c; i += 1) {
+ round(h);
+ };
+ h.v[0] ^= b;
+ buf = buf[CHUNKSZ..];
+ };
+
+ h.x_len = len(buf): u8;
+ h.x[..h.x_len] = buf;
+ return len(buf);
+};
+
+// Returns the sum as a u64
+export fn sum(h: *state) u64 = {
+ let h = *h;
+
+ for (let i = h.x_len; i < 7; i += 1) {
+ h.x[i] = 0;
+ };
+ h.x[7] = h.ln: u8;
+
+ let b = legetu64(h.x);
+ h.v[3] ^= b;
+ for (let i = 0u8; i < h.c; i += 1) {
+ round(&h);
+ };
+ h.v[0] ^= b;
+
+ h.v[2] ^= 0xff;
+ for (let i = 0u8; i < h.d; i+= 1) {
+ round(&h);
+ };
+ return h.v[0] ^ h.v[1] ^ h.v[2] ^ h.v[3];
+};
+
+fn sum64_bytes(h: *hash::hash, buf: []u8) void = leputu64(buf, sum(h: *state));
+
+fn round(h: *state) void = {
+ h.v[0] += h.v[1];
+ h.v[1] = rotl64(h.v[1], 13);
+ h.v[1] ^= h.v[0];
+ h.v[0] = rotl64(h.v[0], 32);
+ h.v[2] += h.v[3];
+ h.v[3] = rotl64(h.v[3], 16);
+ h.v[3] ^= h.v[2];
+ h.v[0] += h.v[3];
+ h.v[3] = rotl64(h.v[3], 21);
+ h.v[3] ^= h.v[0];
+ h.v[2] += h.v[1];
+ h.v[1] = rotl64(h.v[1], 17);
+ h.v[1] ^= h.v[2];
+ h.v[2] = rotl64(h.v[2], 32);
+};
+
+fn close(h: *io::stream) (void | io::error) = {
+ let h = h: *state;
+ h.v = [0...];
+ h.x = [0...];
+ h.ln = 0;
+ h.x_len = 0;
+};
+
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -806,6 +806,17 @@ hash_fnv() {
gen_ssa hash::fnv hash io strings
}
+hash_siphash() {
+ if [ $testing -eq 0 ]
+ then
+ gen_srcs hash::siphash siphash.ha
+ gen_ssa hash::siphash hash io endian
+ else
+ gen_srcs hash::siphash siphash.ha +test.ha
+ gen_ssa hash::siphash hash io endian fmt strio strings
+ fi
+}
+
gensrcs_io() {
gen_srcs -plinux io \
'arch+$(ARCH).ha' \
@@ -1413,6 +1424,7 @@ hash::crc16
hash::crc32
hash::crc64
hash::fnv
+hash::siphash
io linux freebsd
linux linux
linux::keyctl linux
diff --git a/stdlib.mk b/stdlib.mk
@@ -458,6 +458,12 @@ stdlib_deps_any += $(stdlib_hash_fnv_any)
stdlib_hash_fnv_linux = $(stdlib_hash_fnv_any)
stdlib_hash_fnv_freebsd = $(stdlib_hash_fnv_any)
+# gen_lib hash::siphash (any)
+stdlib_hash_siphash_any = $(HARECACHE)/hash/siphash/hash_siphash-any.o
+stdlib_deps_any += $(stdlib_hash_siphash_any)
+stdlib_hash_siphash_linux = $(stdlib_hash_siphash_any)
+stdlib_hash_siphash_freebsd = $(stdlib_hash_siphash_any)
+
# gen_lib io (linux)
stdlib_io_linux = $(HARECACHE)/io/io-linux.o
stdlib_deps_linux += $(stdlib_io_linux)
@@ -1359,6 +1365,16 @@ $(HARECACHE)/hash/fnv/hash_fnv-any.ssa: $(stdlib_hash_fnv_any_srcs) $(stdlib_rt)
@HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nhash::fnv \
-t$(HARECACHE)/hash/fnv/hash_fnv.td $(stdlib_hash_fnv_any_srcs)
+# hash::siphash (+any)
+stdlib_hash_siphash_any_srcs = \
+ $(STDLIB)/hash/siphash/siphash.ha
+
+$(HARECACHE)/hash/siphash/hash_siphash-any.ssa: $(stdlib_hash_siphash_any_srcs) $(stdlib_rt) $(stdlib_hash_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_endian_$(PLATFORM))
+ @printf 'HAREC \t$@\n'
+ @mkdir -p $(HARECACHE)/hash/siphash
+ @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nhash::siphash \
+ -t$(HARECACHE)/hash/siphash/hash_siphash.td $(stdlib_hash_siphash_any_srcs)
+
# io (+linux)
stdlib_io_linux_srcs = \
$(STDLIB)/io/arch+$(ARCH).ha \
@@ -2539,6 +2555,12 @@ testlib_deps_any += $(testlib_hash_fnv_any)
testlib_hash_fnv_linux = $(testlib_hash_fnv_any)
testlib_hash_fnv_freebsd = $(testlib_hash_fnv_any)
+# gen_lib hash::siphash (any)
+testlib_hash_siphash_any = $(TESTCACHE)/hash/siphash/hash_siphash-any.o
+testlib_deps_any += $(testlib_hash_siphash_any)
+testlib_hash_siphash_linux = $(testlib_hash_siphash_any)
+testlib_hash_siphash_freebsd = $(testlib_hash_siphash_any)
+
# gen_lib io (linux)
testlib_io_linux = $(TESTCACHE)/io/io-linux.o
testlib_deps_linux += $(testlib_io_linux)
@@ -3476,6 +3498,17 @@ $(TESTCACHE)/hash/fnv/hash_fnv-any.ssa: $(testlib_hash_fnv_any_srcs) $(testlib_r
@HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nhash::fnv \
-t$(TESTCACHE)/hash/fnv/hash_fnv.td $(testlib_hash_fnv_any_srcs)
+# hash::siphash (+any)
+testlib_hash_siphash_any_srcs = \
+ $(STDLIB)/hash/siphash/siphash.ha \
+ $(STDLIB)/hash/siphash/+test.ha
+
+$(TESTCACHE)/hash/siphash/hash_siphash-any.ssa: $(testlib_hash_siphash_any_srcs) $(testlib_rt) $(testlib_hash_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_endian_$(PLATFORM)) $(testlib_fmt_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) $(testlib_strings_$(PLATFORM))
+ @printf 'HAREC \t$@\n'
+ @mkdir -p $(TESTCACHE)/hash/siphash
+ @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nhash::siphash \
+ -t$(TESTCACHE)/hash/siphash/hash_siphash.td $(testlib_hash_siphash_any_srcs)
+
# io (+linux)
testlib_io_linux_srcs = \
$(STDLIB)/io/arch+$(ARCH).ha \