Use user-provided buffer for hash::sum - hare - The Hare programming language

commit 68f62ed83904ee38acedc342cf0d3ff0569cf46c
parent 89d36b8310a18e9b40bb977a485102d58a952eec
Author: Kiëd Llaentenn <kiedtl@tilde.team>
Date:   Sun, 25 Apr 2021 23:00:59 +0000

Use user-provided buffer for hash::sum

Signed-off-by: Kiëd Llaentenn <kiedtl@tilde.team>

Diffstat:
M compress/zlib/reader.ha  | 8 ++++----
M crypto/md5/+test.ha  | 5 +++--
M crypto/md5/md5.ha  | 15 +++++----------
M crypto/sha1/+test.ha  | 5 +++--
M crypto/sha1/sha1.ha  | 17 ++++++-----------
M crypto/sha256/+test.ha  | 5 +++--
M crypto/sha256/sha256.ha  | 23 +++++++++--------------
M crypto/sha512/+test.ha  | 20 ++++++++------------
M crypto/sha512/sha512.ha  | 6 ++----
M hare/module/scan.ha  | 21 ++++++++++++++++++---
M hash/adler32/adler32.ha  | 15 ++++++++++-----
M hash/crc32/crc32.ha  | 22 ++++++++++------------
M hash/fnv/fnv.ha  | 15 +++++++++------
M hash/hash.ha  | 22 ++++++++++++----------

14 files changed, 102 insertions(+), 97 deletions(-)
diff --git a/compress/zlib/reader.ha b/compress/zlib/reader.ha
@@ -48,16 +48,16 @@ fn wraperror(err: decompress_err) errors::opaque = {
 };
 
 fn verifysum(s: *reader) (io::EOF | io::error) = {
-	let hash: [4]u8 = [0...];
+	let hash: [adler32::SIZE]u8 = [0...];
+
 	for (let n = 0z; n < len(hash)) {
 		match (io::read(s.source, hash[n..])?) {
 			_: io::EOF => return wraperror(decompress_err::EOF),
 			z: size => n += z,
 		};
 	};
-	let checksum = hash::sum(s.hash);
-	defer free(checksum);
-	return if (bytes::equal(checksum, hash)) io::EOF
+
+	return if (adler32::sum32(s.hash) == endian::begetu32(hash)) io::EOF
 		else wraperror(decompress_err::CHECKSUM);
 };
 
diff --git a/crypto/md5/+test.ha b/crypto/md5/+test.ha
@@ -24,8 +24,9 @@ use io;
 		const vector = vectors[i];
 		hash::reset(md5);
 		hash::write(md5, strings::toutf8(vector.0));
-		let sum = hash::sum(md5);
-		defer free(sum);
+
+		let sum: [SIZE]u8 = [0...];
+		hash::sum(md5, sum);
 
 		let hex = strio::dynamic();
 		defer io::close(hex);
diff --git a/crypto/md5/md5.ha b/crypto/md5/md5.ha
@@ -87,7 +87,7 @@ fn reset(h: *hash::hash) void = {
 	h.ln = 0;
 };
 
-fn sum(h: *hash::hash) []u8 = {
+fn sum(h: *hash::hash, buf: []u8) void = {
 	let h = h: *digest;
 	let copy = *h;
 	let h = &copy;
@@ -102,15 +102,10 @@ fn sum(h: *hash::hash) []u8 = {
 	assert(h.nx == 0);
 
 	// Where we write the digest
-	let d: [SIZE]u8 = [0...];
-	endian::leputu32(d[0..], h.h[0]);
-	endian::leputu32(d[4..], h.h[1]);
-	endian::leputu32(d[8..], h.h[2]);
-	endian::leputu32(d[12..], h.h[3]);
-
-	let slice: []u8 = alloc([], SIZE);
-	append(slice, d...);
-	return slice;
+	endian::leputu32(buf[0..], h.h[0]);
+	endian::leputu32(buf[4..], h.h[1]);
+	endian::leputu32(buf[8..], h.h[2]);
+	endian::leputu32(buf[12..], h.h[3]);
 };
 
 // A generic, pure Hare version of the MD5 block step
diff --git a/crypto/sha1/+test.ha b/crypto/sha1/+test.ha
@@ -28,8 +28,9 @@ use io;
 		const vector = vectors[i];
 		hash::reset(sha);
 		hash::write(sha, strings::toutf8(vector.0));
-		let sum = hash::sum(sha);
-		defer free(sum);
+
+		let sum: [SIZE]u8 = [0...];
+		hash::sum(sha, sum);
 
 		let hex = strio::dynamic();
 		defer io::close(hex);
diff --git a/crypto/sha1/sha1.ha b/crypto/sha1/sha1.ha
@@ -92,7 +92,7 @@ fn reset(h: *hash::hash) void = {
 	h.ln = 0;
 };
 
-fn sum(h: *hash::hash) []u8 = {
+fn sum(h: *hash::hash, buf: []u8) void = {
 	let h = h: *digest;
 	let copy = *h;
 	let h = &copy;
@@ -112,16 +112,11 @@ fn sum(h: *hash::hash) []u8 = {
 	assert(h.nx == 0);
 
 	// Where we write the digest
-	let d: [SIZE]u8 = [0...];
-	endian::beputu32(d[0..], h.h[0]);
-	endian::beputu32(d[4..], h.h[1]);
-	endian::beputu32(d[8..], h.h[2]);
-	endian::beputu32(d[12..], h.h[3]);
-	endian::beputu32(d[16..], h.h[4]);
-
-	let slice: []u8 = alloc([], SIZE);
-	append(slice, d...);
-	return slice;
+	endian::beputu32(buf[0..], h.h[0]);
+	endian::beputu32(buf[4..], h.h[1]);
+	endian::beputu32(buf[8..], h.h[2]);
+	endian::beputu32(buf[12..], h.h[3]);
+	endian::beputu32(buf[16..], h.h[4]);
 };
 
 let K0: u32 = 0x5A827999;
diff --git a/crypto/sha256/+test.ha b/crypto/sha256/+test.ha
@@ -25,8 +25,9 @@ use strio;
 		const vector = vectors[i];
 		hash::reset(sha);
 		hash::write(sha, strings::toutf8(vector.0));
-		let sum = hash::sum(sha);
-		defer free(sum);
+
+		let sum: [SIZE]u8 = [0...];
+		hash::sum(sha, sum);
 
 		let hex = strio::dynamic();
 		defer io::close(hex);
diff --git a/crypto/sha256/sha256.ha b/crypto/sha256/sha256.ha
@@ -107,7 +107,7 @@ fn close(st: *io::stream) void = {
 	free(st);
 };
 
-fn sum(h: *hash::hash) []u8 = {
+fn sum(h: *hash::hash, buf: []u8) void = {
 	let h = h: *state;
 	let copy = *h;
 	let h = &copy;
@@ -126,19 +126,14 @@ fn sum(h: *hash::hash) []u8 = {
 
 	assert(h.nx == 0);
 
-	let digest: [SIZE]u8 = [0...];
-	endian::beputu32(digest[0..], h.h[0]);
-	endian::beputu32(digest[4..], h.h[1]);
-	endian::beputu32(digest[8..], h.h[2]);
-	endian::beputu32(digest[12..], h.h[3]);
-	endian::beputu32(digest[16..], h.h[4]);
-	endian::beputu32(digest[20..], h.h[5]);
-	endian::beputu32(digest[24..], h.h[6]);
-	endian::beputu32(digest[28..], h.h[7]);
-
-	let slice: []u8 = alloc([], SIZE);
-	append(slice, digest...);
-	return slice;
+	endian::beputu32(buf[0..], h.h[0]);
+	endian::beputu32(buf[4..], h.h[1]);
+	endian::beputu32(buf[8..], h.h[2]);
+	endian::beputu32(buf[12..], h.h[3]);
+	endian::beputu32(buf[16..], h.h[4]);
+	endian::beputu32(buf[20..], h.h[5]);
+	endian::beputu32(buf[24..], h.h[6]);
+	endian::beputu32(buf[28..], h.h[7]);
 };
 
 // TODO: Rewrite me in assembly
diff --git a/crypto/sha512/+test.ha b/crypto/sha512/+test.ha
@@ -25,9 +25,8 @@ use io;
 		hash::reset(sha);
 		hash::write(sha, strings::toutf8(vector.0));
 
-		let sum = hash::sum(sha);
-		defer free(sum);
-		assert(len(sum) == SIZE, "Expected len(sum) == SIZE");
+		let sum: [SIZE]u8 = [0...];
+		hash::sum(sha, sum);
 
 		let hex = strio::dynamic();
 		defer io::close(hex);
@@ -58,9 +57,8 @@ use io;
 		hash::reset(sha);
 		hash::write(sha, strings::toutf8(vector.0));
 
-		let sum = hash::sum(sha);
-		defer free(sum);
-		assert(len(sum) == SIZE224, "Expected len(sum) == SIZE224");
+		let sum: [SIZE224]u8 = [0...];
+		hash::sum(sha, sum);
 
 		let hex = strio::dynamic();
 		defer io::close(hex);
@@ -89,9 +87,8 @@ use io;
 		hash::reset(sha);
 		hash::write(sha, strings::toutf8(vector.0));
 
-		let sum = hash::sum(sha);
-		defer free(sum);
-		assert(len(sum) == SIZE256, "Expected len(sum) == SIZE256");
+		let sum: [SIZE256]u8 = [0...];
+		hash::sum(sha, sum);
 
 		let hex = strio::dynamic();
 		defer io::close(hex);
@@ -122,9 +119,8 @@ use io;
 		hash::reset(sha);
 		hash::write(sha, strings::toutf8(vector.0));
 
-		let sum = hash::sum(sha);
-		defer free(sum);
-		assert(len(sum) == SIZE384, "Expected len(sum) == SIZE384");
+		let sum: [SIZE384]u8 = [0...];
+		hash::sum(sha, sum);
 
 		let hex = strio::dynamic();
 		defer io::close(hex);
diff --git a/crypto/sha512/sha512.ha b/crypto/sha512/sha512.ha
@@ -133,7 +133,7 @@ fn write(st: *io::stream, buf: const []u8) (size | io::error) = {
 
 fn close(st: *io::stream) void = free(st);
 
-fn sum(h: *hash::hash) []u8 = {
+fn sum(h: *hash::hash, buf: []u8) void = {
 	let d = h: *digest;
 	let copy = *d;
 	let d = &copy;
@@ -171,9 +171,7 @@ fn sum(h: *hash::hash) []u8 = {
 
         // We only copy the necessary bytes from fixed-size array into the
         // returned slice. The size is already found in the inner hash struct.
-	let slice: []u8 = alloc([], d.hash.sz);
-	append(slice, dig[..d.hash.sz]...);
-	return slice;
+	buf[..] = dig[..d.hash.sz];
 };
 
 fn reset(h: *hash::hash) void = {
diff --git a/hare/module/scan.ha b/hare/module/scan.ha
@@ -39,9 +39,13 @@ export fn scan(ctx: *context, path: str) (version | error) = {
 				...
 			};
 			append(inputs, in);
+
+			let sumbuf: [sha256::SIZE]u8 = [0...];
 			hash::write(sha, in.hash);
+			hash::finish(sha, sumbuf);
+
 			return version {
-				hash = hash::finish(sha),
+				hash = sumbuf,
 				basedir = path::dirname(fs::resolve(ctx.fs, path)),
 				depends = deps,
 				inputs = inputs,
@@ -55,7 +59,12 @@ export fn scan(ctx: *context, path: str) (version | error) = {
 		...
 	};
 	scan_directory(ctx, &ver, sha, path, iter)?;
-	ver.hash = hash::finish(sha);
+
+	let tmp: [sha256::SIZE]u8 = [0...];
+	hash::finish(sha, tmp);
+	ver.hash = alloc([], sha.sz);
+	append(ver.hash, tmp...);
+
 	return ver;
 };
 
@@ -318,7 +327,13 @@ fn scan_file(
 	};
 
 	io::copy(io::empty, tee)?; // Finish spooling out the file for the SHA
-	return hash::finish(sha);
+
+	let tmp: [sha256::SIZE]u8 = [0...];
+	hash::finish(sha, tmp);
+
+	let checksum: []u8 = alloc([], sha.sz);
+	append(checksum, tmp...);
+	return checksum;
 };
 
 fn have_ident(sl: *[]ast::ident, id: ast::ident) bool = {
diff --git a/hash/adler32/adler32.ha b/hash/adler32/adler32.ha
@@ -3,6 +3,9 @@ use hash;
 use io;
 use strings;
 
+// The size, in bytes, of an Adler-32 checksum.
+export def SIZE: size = 4;
+
 type state = struct {
 	hash: hash::hash,
 	a: u32,
@@ -42,13 +45,11 @@ fn reset(h: *hash::hash) void = {
 	h.b = 0;
 };
 
-fn sum(h: *hash::hash) []u8 = {
+fn sum(h: *hash::hash, buf: []u8) void = {
 	let h = h: *state;
-	let buf: [4]u8 = [0...];
 	// RFC 1950 specifies that Adler-32 checksums are stored in network
 	// order.
 	endian::beputu32(buf, (h.b << 16) | h.a);
-	return alloc(buf);
 };
 
 export fn sum32(h: *hash::hash) u32 = {
@@ -68,14 +69,18 @@ export fn sum32(h: *hash::hash) u32 = {
 		("'A language that doesn’t have everything is actually easier to program in than some that do.' - Dennis Ritchie", 1148528899),
 
 	];
+
 	let hash = adler32();
 	defer hash::close(hash);
+	let s: [4]u8 = [0...];
+
 	for (let i = 0z; i < len(vectors); i += 1) {
 		let vec = vectors[i];
 		hash::reset(hash);
 		hash::write(hash, strings::toutf8(vec.0));
-		let s = hash::sum(hash);
-		defer free(s);
+
+		hash::sum(hash, s);
+
 		assert(endian::begetu32(s) == vec.1);
 		assert(sum32(hash) == vec.1);
 	};
diff --git a/hash/crc32/crc32.ha b/hash/crc32/crc32.ha
@@ -2,6 +2,7 @@ use endian;
 use hash;
 use io;
 use strings;
+use fmt;
 
 // IEEE polynomial for CRC-32. Used in ethernet, SATA, MPEG-2, gzip, bsip2,
 // cksum, PNG, etc. It is by far the most common polynomial used.
@@ -217,11 +218,9 @@ fn reset(h: *hash::hash) void = {
 	h.cval = ~0u32;
 };
 
-fn sum(h: *hash::hash) []u8 = {
+fn sum(h: *hash::hash, buf: []u8) void = {
 	let h = h: *state;
-	let buf: [4]u8 = [0...];
 	endian::host.putu32(buf, ~h.cval);
-	return alloc(buf);
 };
 
 export fn sum32(h: *hash::hash) u32 = {
@@ -251,28 +250,27 @@ export fn sum32(h: *hash::hash) u32 = {
 	let crc_koopman = crc32(&KOOPMAN_TABLE);
 	defer hash::close(crc_koopman);
 
+	let buf: [4]u8 = [0...];
+
 	for (let i = 0z; i < len(vectors); i += 1) {
 		let vec = vectors[i];
 
 		hash::reset(crc_ieee);
 		hash::write(crc_ieee, strings::toutf8(vec.0));
-		let s = hash::sum(crc_ieee);
-		defer free(s);
-		assert(endian::host.getu32(s) == vec.1);
+		hash::sum(crc_ieee, buf);
+		assert(endian::host.getu32(buf) == vec.1);
 		assert(sum32(crc_ieee) == vec.1);
 
 		hash::reset(crc_castagnoli);
 		hash::write(crc_castagnoli, strings::toutf8(vec.0));
-		let s = hash::sum(crc_castagnoli);
-		defer free(s);
-		assert(endian::host.getu32(s) == vec.2);
+		hash::sum(crc_castagnoli, buf);
+		assert(endian::host.getu32(buf) == vec.2);
 		assert(sum32(crc_castagnoli) == vec.2);
 
 		hash::reset(crc_koopman);
 		hash::write(crc_koopman, strings::toutf8(vec.0));
-		let s = hash::sum(crc_koopman);
-		defer free(s);
-		assert(endian::host.getu32(s) == vec.3);
+		hash::sum(crc_koopman, buf);
+		assert(endian::host.getu32(buf) == vec.3);
 		assert(sum32(crc_koopman) == vec.3);
 	};
 };
diff --git a/hash/fnv/fnv.ha b/hash/fnv/fnv.ha
@@ -103,11 +103,9 @@ fn fnv32_reset(h: *hash::hash) void = {
 	h.v = basis32;
 };
 
-fn fnv32_sum(h: *hash::hash) []u8 = {
+fn fnv32_sum(h: *hash::hash, buf: []u8) void = {
 	let h = h: *state32;
-	let buf: [4]u8 = [0...];
 	endian::host.putu32(buf, h.v);
-	return alloc(buf);
 };
 
 fn fnv64_write(s: *io::stream, buf: const []u8) (size | io::error) = {
@@ -133,11 +131,9 @@ fn fnv64_reset(h: *hash::hash) void = {
 	h.v = basis64;
 };
 
-fn fnv64_sum(h: *hash::hash) []u8 = {
+fn fnv64_sum(h: *hash::hash, buf: []u8) void = {
 	let h = h: *state64;
-	let buf: [8]u8 = [0...];
 	endian::host.putu64(buf, h.v);
-	return alloc(buf);
 };
 
 // Returns the sum of a 32-bit FNV hash.
@@ -166,12 +162,19 @@ export fn sum64(h: *hash::hash) u64 = {
 		("'The central enemy of reliability is complexity.' - Geer et al", 3263526736),
 		("'A language that doesn’t have everything is actually easier to program in than some that do.' - Dennis Ritchie", 3069348265),
 	];
+
 	let hash = fnv32();
 	defer hash::close(hash);
+	let s: [4]u8 = [0...];
+
 	for (let i = 0z; i < len(vectors); i += 1) {
 		let vec = vectors[i];
+
 		hash::reset(hash);
 		hash::write(hash, strings::toutf8(vec.0));
+		hash::sum(hash, s);
+
+		assert(endian::host.getu32(s) == vec.1);
 		assert(sum32(hash) == vec.1);
 	};
 };
diff --git a/hash/hash.ha b/hash/hash.ha
@@ -1,5 +1,5 @@
-// TODO: Let caller supply the output buffer, to avoid the slice allocation
 use io;
+use fmt;
 
 // The general purpose interface for a hashing function.
 export type hash = struct {
@@ -7,7 +7,7 @@ export type hash = struct {
 	stream: io::stream,
 
 	// Returns the current hash.
-	sum: *fn(hash: *hash) []u8,
+	sum: *fn(hash: *hash, buf: []u8) void,
 
 	// Resets the hash function to its initial state.
 	reset: *fn(hash: *hash) void,
@@ -23,20 +23,22 @@ export fn writer(h: *hash) *io::stream = &h.stream;
 export fn write(h: *hash, buf: const []u8) size =
 	io::write(&h.stream, buf) as size;
 
-// Finalizes the hash, frees resources associated with the hash, and returns the
-// sum. The return value is heap allocated, the caller needs to free it.
-export fn finish(h: *hash) []u8 = {
-	let sum = sum(h);
+// Finalizes the hash, frees resources associated with the hash, and populate
+// buf with the sum.
+export fn finish(h: *hash, buf: []u8) void = {
+	sum(h, buf);
 	io::close(&h.stream);
-	return sum;
+	return buf;
 };
 
 // Closes a hash, freeing its resources and discarding the checksum.
 export fn close(h: *hash) void = io::close(&h.stream);
 
-// Returns the current sum. The return value is heap allocated, the caller
-// needs to free it.
-export fn sum(h: *hash) []u8 = h.sum(h);
+// Populates the user-provided buffer with the current sum.
+export fn sum(h: *hash, buf: []u8) void = {
+	assert(len(buf) >= h.sz, "hash::sum buffer does not meet minimum required size for this hash function");
+	h.sum(h, buf);
+};
 
 // Resets the hash function to its initial state.
 export fn reset(h: *hash) void = h.reset(h);

	hare The Hare programming language
	git clone https://git.torresjrjr.com/hare.git
	Log \| Files \| Refs \| README \| LICENSE

M	compress/zlib/reader.ha	\|	8	++++----
M	crypto/md5/+test.ha	\|	5	+++--
M	crypto/md5/md5.ha	\|	15	+++++----------
M	crypto/sha1/+test.ha	\|	5	+++--
M	crypto/sha1/sha1.ha	\|	17	++++++-----------
M	crypto/sha256/+test.ha	\|	5	+++--
M	crypto/sha256/sha256.ha	\|	23	+++++++++--------------
M	crypto/sha512/+test.ha	\|	20	++++++++------------
M	crypto/sha512/sha512.ha	\|	6	++----
M	hare/module/scan.ha	\|	21	++++++++++++++++++---
M	hash/adler32/adler32.ha	\|	15	++++++++++-----
M	hash/crc32/crc32.ha	\|	22	++++++++++------------
M	hash/fnv/fnv.ha	\|	15	+++++++++------
M	hash/hash.ha	\|	22	++++++++++++----------